blob: 9b91cfc276723168b5271ea677a92bed365baee5 [file] [log] [blame]
/*
* Copyright (C) 2013 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "core/html/parser/HTMLIdentifier.h"
#include "HTMLNames.h"
#include "wtf/HashMap.h"
#include "wtf/MainThread.h"
#include "wtf/text/StringHash.h"
namespace WebCore {
using namespace HTMLNames;
typedef HashMap<unsigned, StringImpl*, AlreadyHashed> IdentifierTable;
unsigned HTMLIdentifier::maxNameLength = 0;
static IdentifierTable& identifierTable()
{
DEFINE_STATIC_LOCAL(IdentifierTable, table, ());
ASSERT(isMainThread() || !table.isEmpty());
return table;
}
#ifndef NDEBUG
bool HTMLIdentifier::isKnown(const StringImpl* string)
{
const IdentifierTable& table = identifierTable();
return table.contains(string->hash());
}
#endif
StringImpl* HTMLIdentifier::findIfKnown(const UChar* characters, unsigned length)
{
// We don't need to try hashing if we know the string is too long.
if (length > maxNameLength)
return 0;
// computeHashAndMaskTop8Bits is the function StringImpl::hash() uses.
unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length);
const IdentifierTable& table = identifierTable();
ASSERT(!table.isEmpty());
IdentifierTable::const_iterator it = table.find(hash);
if (it == table.end())
return 0;
// It's possible to have hash collisions between arbitrary strings and
// known identifiers (e.g. "bvvfg" collides with "script").
// However ASSERTs in addNames() guard against there ever being collisions
// between known identifiers.
if (!equal(it->value, characters, length))
return 0;
return it->value;
}
const String& HTMLIdentifier::asString() const
{
ASSERT(isMainThread());
return m_string;
}
const StringImpl* HTMLIdentifier::asStringImpl() const
{
return m_string.impl();
}
void HTMLIdentifier::addNames(const QualifiedName* const* names, unsigned namesCount)
{
IdentifierTable& table = identifierTable();
for (unsigned i = 0; i < namesCount; ++i) {
StringImpl* name = names[i]->localName().impl();
unsigned hash = name->hash();
IdentifierTable::AddResult addResult = table.add(hash, name);
maxNameLength = std::max(maxNameLength, name->length());
// Ensure we're using the same hashing algorithm to get and set.
ASSERT_UNUSED(addResult, !addResult.isNewEntry || HTMLIdentifier::findIfKnown(String(name).charactersWithNullTermination().data(), name->length()) == name);
// We expect some hash collisions, but only for identical strings.
// Since all of these names are AtomicStrings pointers should be equal.
// Note: If you hit this ASSERT, then we had a hash collision among
// HTMLNames strings, and we need to re-design how we use this hash!
ASSERT_UNUSED(addResult, !addResult.isNewEntry || name == addResult.iterator->value);
}
}
void HTMLIdentifier::init()
{
ASSERT(isMainThread()); // Not technically necessary, but this is our current expected usage.
static bool isInitialized = false;
if (isInitialized)
return;
isInitialized = true;
// FIXME: We should atomize small whitespace (\n, \n\n, etc.)
addNames(getHTMLTags(), HTMLTagsCount);
addNames(getHTMLAttrs(), HTMLAttrsCount);
}
}