blob: dac25b289daf21340b18149b8b4fe68a2d5d579d [file] [log] [blame]
/*
* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
* Copyright (C) 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
* Copyright (C) 2009 Google Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef StringImpl_h
#define StringImpl_h
#include <limits.h>
#include <wtf/ASCIICType.h>
#include <wtf/CrossThreadRefCounted.h>
#include <wtf/OwnFastMallocPtr.h>
#include <wtf/PassRefPtr.h>
#include <wtf/PtrAndFlags.h>
#include <wtf/RefCounted.h>
#include <wtf/Vector.h>
#include <wtf/unicode/Unicode.h>
#if USE(JSC)
#include <runtime/UString.h>
#endif
#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
typedef const struct __CFString * CFStringRef;
#endif
#ifdef __OBJC__
@class NSString;
#endif
namespace WebCore {
class StringBuffer;
struct CStringTranslator;
struct HashAndCharactersTranslator;
struct StringHash;
struct UCharBufferTranslator;
enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
typedef bool (*CharacterMatchFunctionPtr)(UChar);
class StringImpl : public RefCounted<StringImpl> {
friend struct CStringTranslator;
friend struct HashAndCharactersTranslator;
friend struct UCharBufferTranslator;
private:
friend class ThreadGlobalData;
StringImpl();
// This adopts the UChar* without copying the buffer.
StringImpl(const UChar*, unsigned length);
// For use only by AtomicString's XXXTranslator helpers.
void setHash(unsigned hash) { ASSERT(!m_hash); m_hash = hash; }
typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar;
public:
~StringImpl();
static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
static PassRefPtr<StringImpl> create(const char*, unsigned length);
static PassRefPtr<StringImpl> create(const char*);
static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
static PassRefPtr<StringImpl> adopt(StringBuffer&);
static PassRefPtr<StringImpl> adopt(Vector<UChar>&);
#if USE(JSC)
static PassRefPtr<StringImpl> create(const JSC::UString&);
JSC::UString ustring();
#endif
SharedUChar* sharedBuffer();
const UChar* characters() { return m_data; }
unsigned length() { return m_length; }
bool hasTerminatingNullCharacter() const { return m_sharedBufferAndFlags.isFlagSet(HasTerminatingNullCharacter); }
bool inTable() const { return m_sharedBufferAndFlags.isFlagSet(InTable); }
void setInTable() { return m_sharedBufferAndFlags.setFlag(InTable); }
unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; }
unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
static unsigned computeHash(const UChar*, unsigned len);
static unsigned computeHash(const char*);
// Returns a StringImpl suitable for use on another thread.
PassRefPtr<StringImpl> crossThreadString();
// Makes a deep copy. Helpful only if you need to use a String on another thread
// (use crossThreadString if the method call doesn't need to be threadsafe).
// Since StringImpl objects are immutable, there's no other reason to make a copy.
PassRefPtr<StringImpl> threadsafeCopy() const;
PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
UChar32 characterStartingAt(unsigned);
bool containsOnlyWhitespace();
int toIntStrict(bool* ok = 0, int base = 10);
unsigned toUIntStrict(bool* ok = 0, int base = 10);
int64_t toInt64Strict(bool* ok = 0, int base = 10);
uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
int toInt(bool* ok = 0); // ignores trailing garbage
unsigned toUInt(bool* ok = 0); // ignores trailing garbage
int64_t toInt64(bool* ok = 0); // ignores trailing garbage
uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
double toDouble(bool* ok = 0);
float toFloat(bool* ok = 0);
PassRefPtr<StringImpl> lower();
PassRefPtr<StringImpl> upper();
PassRefPtr<StringImpl> secure(UChar aChar);
PassRefPtr<StringImpl> capitalize(UChar previousCharacter);
PassRefPtr<StringImpl> foldCase();
PassRefPtr<StringImpl> stripWhiteSpace();
PassRefPtr<StringImpl> simplifyWhiteSpace();
PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
int find(const char*, int index = 0, bool caseSensitive = true);
int find(UChar, int index = 0);
int find(CharacterMatchFunctionPtr, int index = 0);
int find(StringImpl*, int index, bool caseSensitive = true);
int reverseFind(UChar, int index);
int reverseFind(StringImpl*, int index, bool caseSensitive = true);
bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; }
bool endsWith(StringImpl*, bool caseSensitive = true);
PassRefPtr<StringImpl> replace(UChar, UChar);
PassRefPtr<StringImpl> replace(UChar, StringImpl*);
PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
static StringImpl* empty();
Vector<char> ascii();
WTF::Unicode::Direction defaultWritingDirection();
#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
CFStringRef createCFString();
#endif
#ifdef __OBJC__
operator NSString*();
#endif
void operator delete(void*);
private:
// Allocation from a custom buffer is only allowed internally to avoid
// mismatched allocators. Callers should use create().
void* operator new(size_t size);
void* operator new(size_t size, void* address);
static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
// The StringImpl struct and its data may be allocated within a single heap block.
// In this case, the m_data pointer is an "internal buffer", and does not need to be deallocated.
bool bufferIsInternal() { return m_data == reinterpret_cast<const UChar*>(this + 1); }
enum StringImplFlags {
HasTerminatingNullCharacter,
InTable,
};
const UChar* m_data;
unsigned m_length;
mutable unsigned m_hash;
PtrAndFlags<SharedUChar, StringImplFlags> m_sharedBufferAndFlags;
// There is a fictitious variable-length UChar array at the end, which is used
// as the internal buffer by the createUninitialized and create methods.
};
bool equal(StringImpl*, StringImpl*);
bool equal(StringImpl*, const char*);
inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
bool equalIgnoringCase(StringImpl*, StringImpl*);
bool equalIgnoringCase(StringImpl*, const char*);
inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
bool equalIgnoringNullity(StringImpl*, StringImpl*);
// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
// or anything like that.
const unsigned phi = 0x9e3779b9U;
// Paul Hsieh's SuperFastHash
// http://www.azillionmonkeys.com/qed/hash.html
inline unsigned StringImpl::computeHash(const UChar* data, unsigned length)
{
unsigned hash = phi;
// Main loop.
for (unsigned pairCount = length >> 1; pairCount; pairCount--) {
hash += data[0];
unsigned tmp = (data[1] << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2;
hash += hash >> 11;
}
// Handle end case.
if (length & 1) {
hash += data[0];
hash ^= hash << 11;
hash += hash >> 17;
}
// Force "avalanching" of final 127 bits.
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 2;
hash += hash >> 15;
hash ^= hash << 10;
// This avoids ever returning a hash code of 0, since that is used to
// signal "hash not computed yet", using a value that is likely to be
// effectively the same as 0 when the low bits are masked.
hash |= !hash << 31;
return hash;
}
// Paul Hsieh's SuperFastHash
// http://www.azillionmonkeys.com/qed/hash.html
inline unsigned StringImpl::computeHash(const char* data)
{
// This hash is designed to work on 16-bit chunks at a time. But since the normal case
// (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
// were 16-bit chunks, which should give matching results
unsigned hash = phi;
// Main loop
for (;;) {
unsigned char b0 = data[0];
if (!b0)
break;
unsigned char b1 = data[1];
if (!b1) {
hash += b0;
hash ^= hash << 11;
hash += hash >> 17;
break;
}
hash += b0;
unsigned tmp = (b1 << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2;
hash += hash >> 11;
}
// Force "avalanching" of final 127 bits.
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 2;
hash += hash >> 15;
hash ^= hash << 10;
// This avoids ever returning a hash code of 0, since that is used to
// signal "hash not computed yet", using a value that is likely to be
// effectively the same as 0 when the low bits are masked.
hash |= !hash << 31;
return hash;
}
static inline bool isSpaceOrNewline(UChar c)
{
// Use isASCIISpace() for basic Latin-1.
// This will include newlines, which aren't included in Unicode DirWS.
return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
}
// This is a hot function because it's used when parsing HTML.
inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
{
ASSERT(characters);
ASSERT(length);
// Optimize for the case where there are no Null characters by quickly
// searching for nulls, and then using StringImpl::create, which will
// memcpy the whole buffer. This is faster than assigning character by
// character during the loop.
// Fast case.
int foundNull = 0;
for (unsigned i = 0; !foundNull && i < length; i++) {
int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
foundNull |= !c;
}
if (!foundNull)
return StringImpl::create(characters, length);
return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
}
}
namespace WTF {
// WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
template<typename T> struct DefaultHash;
template<> struct DefaultHash<WebCore::StringImpl*> {
typedef WebCore::StringHash Hash;
};
template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
typedef WebCore::StringHash Hash;
};
}
#endif