use SkTDynamicHash in picture recording

cleaned up SkPictureFlat.h quite a bit while working on this.

bench --match picture_record_ shows some improvement:
compare.sh bench --match picture_record_ --repeat 100
master -> usehash
N=3 p=0.001000 (corrected to 0.000333)
sig? rel. speed  bench
  y      1.0x  picture_record_dictionaries
  y      1.5x  picture_record_recurring_paint_dictionary
  y      3.8x  picture_record_unique_paint_dictionary
Overall relative speed:   1.9x

bench_pictures --record is pretty much neutral:
compare.sh bench_pictures -r ../skp --mode record --repeat 30
master -> usehash
N=63 p=0.001000 (corrected to 0.000016)
sig? rel. speed  bench
  n      0.9x  desk_pokemonwiki.skp
  y      0.9x  desk_googlespreadsheet.skp
  y      0.9x  tabl_pravda.skp
  y      1.0x  desk_googlespreadsheetdashed.skp
  n      1.0x  tabl_onlinewsj.skp
  n      1.0x  tabl_nytimes.skp
  n      1.0x  desk_googlehome.skp
  y      1.0x  desk_techcrunch.skp
  n      1.0x  tabl_slashdot.skp
  n      1.0x  tabl_techmeme.skp
  n      1.0x  desk_googleplus.skp
  n      1.0x  desk_sfgate.skp
  n      1.0x  tabl_transformice.skp
  n      1.0x  desk_espn.skp
  n      1.0x  desk_baidu.skp
  n      1.0x  tabl_worldjournal.skp
  n      1.0x  desk_chalkboard.skp
  n      1.0x  tabl_frantzen.skp
  n      1.0x  desk_gws.skp
  n      1.0x  tabl_androidpolice.skp
  n      1.0x  desk_linkedin.skp
  n      1.0x  mobi_wikipedia.skp
  n      1.0x  desk_wowwiki.skp
  n      1.0x  desk_css3gradients.skp
  n      1.0x  desk_gmailthread.skp
  n      1.0x  desk_yahoogames.skp
  n      1.0x  desk_facebook.skp
  n      1.0x  desk_wordpress.skp
  n      1.0x  tabl_vnexpress.skp
  n      1.0x  desk_br337.skp
  n      1.0x  tabl_engadget.skp
  n      1.0x  tabl_theverge.skp
  n      1.0x  desk_amazon.skp
  n      1.0x  desk_ebay.skp
  n      1.0x  tabl_hsfi.skp
  n      1.0x  tabl_sahadan.skp
  n      1.0x  desk_weather.skp
  n      1.0x  tabl_digg.skp
  n      1.0x  desk_youtubetvbrowse.skp
  n      1.0x  tabl_culturalsolutions.skp
  n      1.0x  tabl_ukwsj.skp
  n      1.0x  desk_youtube.skp
  n      1.0x  tabl_googlecalendar.skp
  y      1.0x  desk_yahooanswers.skp
  n      1.0x  desk_blogger.skp
  n      1.0x  desk_yahoonews.skp
  y      1.0x  desk_yahoosports.skp
  y      1.0x  tabl_mercurynews.skp
  n      1.0x  desk_youtubetvvideo.skp
  y      1.0x  tabl_gspro.skp
  y      1.1x  tabl_googleblog.skp
  y      1.1x  tabl_cnet.skp
  y      1.1x  tabl_mlb.skp
  y      1.1x  tabl_cuteoverload.skp
  y      1.1x  desk_booking.skp
  y      1.1x  tabl_deviantart.skp
  y      1.1x  desk_twitter.skp
  y      1.1x  tabl_cnn.skp
  y      1.1x  tabl_gamedeksiam.skp
  y      1.1x  tabl_gmail.skp
  y      1.1x  tabl_nofolo.skp
  y      1.1x  tabl_mozilla.skp
  y      1.1x  desk_pinterest.skp
Overall relative speed:   1.0x

(I'd take this to mean that the microbenches are probably drifting away from relevance.)
BUG=
R=reed@google.com

Review URL: https://codereview.chromium.org/21564008

git-svn-id: http://skia.googlecode.com/svn/trunk/src@10825 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/core/SkPictureFlat.cpp b/core/SkPictureFlat.cpp
index 2c6efa2..2a7d15a 100644
--- a/core/SkPictureFlat.cpp
+++ b/core/SkPictureFlat.cpp
@@ -94,16 +94,10 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-void SkFlatData::stampHeaderAndSentinel(int index, int32_t size) {
-    fIndex    = index;
-    fFlatSize = size;
-    fChecksum = SkChecksum::Compute(this->data32(), size);
-    this->setTopBotUnwritten();
-    this->setSentinelAsCandidate();
-}
-
-SkFlatData* SkFlatData::Create(SkFlatController* controller, const void* obj,
-        int index, void (*flattenProc)(SkOrderedWriteBuffer&, const void*)) {
+SkFlatData* SkFlatData::Create(SkFlatController* controller,
+                               const void* obj,
+                               int index,
+                               void (*flattenProc)(SkOrderedWriteBuffer&, const void*)) {
     // a buffer of 256 bytes should be sufficient for most paints, regions,
     // and matrices.
     intptr_t storage[256];
@@ -118,18 +112,14 @@
     uint32_t size = buffer.size();
     SkASSERT(SkIsAlign4(size));
 
-    /**
-     *  Allocate enough memory to hold
-     *  1. SkFlatData struct
-     *  2. flattenProc's data (4-byte aligned)
-     *  3. 4-byte sentinel
-     */
-    size_t allocSize = sizeof(SkFlatData) + size + sizeof(uint32_t);
+    // Allocate enough memory to hold SkFlatData struct and the flat data itself.
+    size_t allocSize = sizeof(SkFlatData) + size;
     SkFlatData* result = (SkFlatData*) controller->allocThrow(allocSize);
 
-    // put the serialized contents into the data section of the new allocation
+    // Put the serialized contents into the data section of the new allocation.
     buffer.writeToMemory(result->data());
-    result->stampHeaderAndSentinel(index, size);
+    // Stamp the index, size and checksum in the header.
+    result->stampHeader(index, size);
     return result;
 }
 
diff --git a/core/SkPictureFlat.h b/core/SkPictureFlat.h
index 76120f7..ac8e304 100644
--- a/core/SkPictureFlat.h
+++ b/core/SkPictureFlat.h
@@ -10,17 +10,19 @@
 
 //#define SK_DEBUG_SIZE
 
-#include "SkChunkAlloc.h"
 #include "SkBitmap.h"
 #include "SkBitmapHeap.h"
+#include "SkChecksum.h"
+#include "SkChunkAlloc.h"
+#include "SkMatrix.h"
 #include "SkOrderedReadBuffer.h"
 #include "SkOrderedWriteBuffer.h"
-#include "SkPicture.h"
-#include "SkPtrRecorder.h"
-#include "SkMatrix.h"
 #include "SkPaint.h"
 #include "SkPath.h"
+#include "SkPicture.h"
+#include "SkPtrRecorder.h"
 #include "SkRegion.h"
+#include "SkTDynamicHash.h"
 #include "SkTRefArray.h"
 #include "SkTSearch.h"
 
@@ -263,87 +265,32 @@
 
 class SkFlatData {
 public:
-    /**
-     *  Compare two SkFlatData ptrs, returning -1, 0, 1 to allow them to be
-     *  sorted.
-     *
-     *  Note: this assumes that a and b have different sentinel values, either
-     *  InCache or AsCandidate, otherwise the loop will go beyond the end of
-     *  the buffers.
-     *
-     *  dataToCompare() returns 2 fields before the flattened data:
-     *      - checksum
-     *      - size
-     *  This ensures that if we see two blocks of different length, we will
-     *  notice that right away, and not read any further. It also ensures that
-     *  we see the checksum right away, so that most of the time it is enough
-     *  to short-circuit our comparison.
-     */
-    static int Compare(const SkFlatData& a, const SkFlatData& b) {
-        const uint32_t* stop = a.dataStop();
-        const uint32_t* a_ptr = a.dataToCompare() - 1;
-        const uint32_t* b_ptr = b.dataToCompare() - 1;
-        // We use -1 above, so we can pre-increment our pointers in the loop
-        while (*++a_ptr == *++b_ptr) {}
-
-        if (a_ptr == stop) {    // sentinel
-            SkASSERT(b.dataStop() == b_ptr);
-            return 0;
-        }
-        SkASSERT(a_ptr < a.dataStop());
-        SkASSERT(b_ptr < b.dataStop());
-        return (*a_ptr < *b_ptr) ? -1 : 1;
-    }
-
-    // Adapts Compare to be used with SkTSearch
-    static bool Less(const SkFlatData& a, const SkFlatData& b) {
-        return Compare(a, b) < 0;
-    }
-
-    int index() const { return fIndex; }
-    const void* data() const { return (const char*)this + sizeof(*this); }
-    void* data() { return (char*)this + sizeof(*this); }
-    // Our data is always 32bit aligned, so we can offer this accessor
-    uint32_t* data32() { return (uint32_t*)this->data(); }
-    // Returns the size of the flattened data.
-    size_t flatSize() const { return fFlatSize; }
-
-    void setSentinelInCache() {
-        this->setSentinel(kInCache_Sentinel);
-    }
-    void setSentinelAsCandidate() {
-        this->setSentinel(kCandidate_Sentinel);
-    }
-
-    uint32_t checksum() const { return fChecksum; }
-
-#ifdef SK_DEBUG_SIZE
-    // returns the logical size of our data. Does not return any sentinel or
-    // padding we might have.
-    size_t size() const {
-        return sizeof(SkFlatData) + fFlatSize;
-    }
-#endif
-
-    static SkFlatData* Create(SkFlatController* controller, const void* obj, int index,
+    // Flatten obj into an SkFlatData with this index.  controller owns the SkFlatData*.
+    static SkFlatData* Create(SkFlatController* controller,
+                              const void* obj,
+                              int index,
                               void (*flattenProc)(SkOrderedWriteBuffer&, const void*));
 
+    // Unflatten this into result, using bitmapHeap and facePlayback for bitmaps and fonts if given.
     void unflatten(void* result,
                    void (*unflattenProc)(SkOrderedReadBuffer&, void*),
                    SkBitmapHeap* bitmapHeap = NULL,
                    SkTypefacePlayback* facePlayback = NULL) const;
 
-    // When we purge an entry, we want to reuse an old index for the new entry,
-    // so we expose this setter.
-    void setIndex(int index) { fIndex = index; }
-
-    // for unittesting
-    friend bool operator==(const SkFlatData& a, const SkFlatData& b) {
-        size_t N = (const char*)a.dataStop() - (const char*)a.dataToCompare();
-        return !memcmp(a.dataToCompare(), b.dataToCompare(), N);
+    // Do these contain the same data?  Ignores index() and topBot().
+    bool operator==(const SkFlatData& that) const {
+        if (this->checksum() != that.checksum() || this->flatSize() != that.flatSize()) {
+            return false;
+        }
+        return memcmp(this->data(), that.data(), this->flatSize()) == 0;
     }
 
-    // returns true if fTopBot[] has been recorded
+    int index() const { return fIndex; }
+    const uint8_t* data() const { return (const uint8_t*)this + sizeof(*this); }
+    size_t flatSize() const { return fFlatSize; }
+    uint32_t checksum() const { return fChecksum; }
+
+    // Returns true if fTopBot[] has been recorded.
     bool isTopBotWritten() const {
         return !SkScalarIsNaN(fTopBot[0]);
     }
@@ -355,54 +302,37 @@
         return fTopBot;
     }
 
-    // return the topbot[] after it has been recorded
+    // Return the topbot[] after it has been recorded.
     const SkScalar* topBot() const {
         SkASSERT(this->isTopBotWritten());
         return fTopBot;
     }
 
 private:
-    // This is *not* part of the key for search/sort
+    // For SkTDynamicHash.
+    static const SkFlatData& Identity(const SkFlatData& flat) { return flat; }
+    static uint32_t Hash(const SkFlatData& flat) { return flat.checksum(); }
+    static bool Equal(const SkFlatData& a, const SkFlatData& b) { return a == b; }
+
+    void setIndex(int index) { fIndex = index; }
+    uint8_t* data() { return (uint8_t*)this + sizeof(*this); }
+
+    // This assumes the payload flat data has already been written and does not modify it.
+    void stampHeader(int index, int32_t size) {
+        SkASSERT(SkIsAlign4(size));
+        fIndex     = index;
+        fFlatSize  = size;
+        fTopBot[0] = SK_ScalarNaN;  // Mark as unwritten.
+        fChecksum  = SkChecksum::Compute((uint32_t*)this->data(), size);
+    }
+
     int fIndex;
-
-    // Cache of paint's FontMetrics fTop,fBottom
-    // initialied to [NaN,NaN] as a sentinel that they have not been recorded yet
-    //
-    // This is *not* part of the key for search/sort
-    mutable SkScalar fTopBot[2];
-
-    // marks fTopBot[] as unrecorded
-    void setTopBotUnwritten() {
-        this->fTopBot[0] = SK_ScalarNaN; // initial to sentinel values
-    }
-
-    // From here down is the data we look at in the search/sort. We always begin
-    // with the checksum and then length.
+    int32_t fFlatSize;
     uint32_t fChecksum;
-    int32_t  fFlatSize;  // size of flattened data
-    // uint32_t flattenedData[]
-    // uint32_t sentinelValue
+    mutable SkScalar fTopBot[2];  // Cache of FontMetrics fTop, fBottom.  Starts as [NaN,?].
+    // uint32_t flattenedData[] implicitly hangs off the end.
 
-    const uint32_t* dataToCompare() const {
-        return (const uint32_t*)&fChecksum;
-    }
-    const uint32_t* dataStop() const {
-        SkASSERT(SkIsAlign4(fFlatSize));
-        return (const uint32_t*)((const char*)this->data() + fFlatSize);
-    }
-
-    enum {
-        kInCache_Sentinel = 0,
-        kCandidate_Sentinel = ~0U,
-    };
-    void setSentinel(uint32_t value) {
-        SkASSERT(SkIsAlign4(fFlatSize));
-        this->data32()[fFlatSize >> 2] = value;
-    }
-
-    // This does not modify the payload flat data, in case it's already been written.
-    void stampHeaderAndSentinel(int index, int32_t size);
-    template <class T> friend class SkFlatDictionary;  // For stampHeaderAndSentinel().
+    template <class T> friend class SkFlatDictionary;
 };
 
 template <class T>
@@ -417,11 +347,20 @@
     , fScratchSize(scratchSizeGuess)
     , fScratch(AllocScratch(fScratchSize))
     , fWriteBuffer(kWriteBufferGrowthBytes)
-    , fWriteBufferReady(false)
-    , fNextIndex(1)  { // set to 1 since returning a zero from find() indicates failure
-        sk_bzero(fHash, sizeof(fHash));
+    , fWriteBufferReady(false) {
+        this->reset();
+    }
+
+    /**
+     * Clears the dictionary of all entries. However, it does NOT free the
+     * memory that was allocated for each entry (that's owned by controller).
+     */
+    void reset() {
+        fIndexedData.rewind();
+        // TODO(mtklein): There's no reason to have the index start from 1.  Clean this up.
         // index 0 is always empty since it is used as a signal that find failed
         fIndexedData.push(NULL);
+        fNextIndex = 1;
     }
 
     ~SkFlatDictionary() {
@@ -429,26 +368,24 @@
     }
 
     int count() const {
-        SkASSERT(fIndexedData.count() == fSortedData.count()+1);
-        return fSortedData.count();
+        SkASSERT(fIndexedData.count() == fNextIndex);
+        SkASSERT(fHash.count() == fNextIndex - 1);
+        return fNextIndex - 1;
     }
 
-    const SkFlatData*  operator[](int index) const {
-        SkASSERT(index >= 0 && index < fSortedData.count());
-        return fSortedData[index];
+    // For testing only.  Index is zero-based.
+    const SkFlatData* operator[](int index) {
+        return fIndexedData[index+1];
     }
 
     /**
-     * Clears the dictionary of all entries. However, it does NOT free the
-     * memory that was allocated for each entry.
+     * Given an element of type T return its 1-based index in the dictionary. If
+     * the element wasn't previously in the dictionary it is automatically
+     * added.
+     *
      */
-    void reset() {
-        fSortedData.reset();
-        fIndexedData.rewind();
-        // index 0 is always empty since it is used as a signal that find failed
-        fIndexedData.push(NULL);
-        fNextIndex = 1;
-        sk_bzero(fHash, sizeof(fHash));
+    int find(const T& element) {
+        return this->findAndReturnFlat(element)->index();
     }
 
     /**
@@ -459,76 +396,64 @@
      * the entry in the dictionary, it returns the actual SkFlatData.
      */
     const SkFlatData* findAndReplace(const T& element,
-                                     const SkFlatData* toReplace, bool* added,
+                                     const SkFlatData* toReplace,
+                                     bool* added,
                                      bool* replaced) {
         SkASSERT(added != NULL && replaced != NULL);
-        int oldCount = fSortedData.count();
-        const SkFlatData* flat = this->findAndReturnFlat(element);
-        *added = fSortedData.count() == oldCount + 1;
-        *replaced = false;
-        if (*added && toReplace != NULL) {
-            // First, find the index of the one to replace
-            int indexToReplace = fSortedData.find(toReplace);
-            if (indexToReplace >= 0) {
-                // findAndReturnFlat set the index to fNextIndex and increased
-                // fNextIndex by one. Reuse the index from the one being
-                // replaced and reset fNextIndex to the proper value.
-                int oldIndex = flat->index();
-                const_cast<SkFlatData*>(flat)->setIndex(toReplace->index());
-                fIndexedData[toReplace->index()] = flat;
-                fNextIndex--;
-                // Remove from the arrays.
-                fSortedData.remove(indexToReplace);
-                fIndexedData.remove(oldIndex);
-                // Remove from the hash table.
-                int oldHash = ChecksumToHashIndex(toReplace->checksum());
-                if (fHash[oldHash] == toReplace) {
-                    fHash[oldHash] = NULL;
-                }
-                // Delete the actual object.
-                fController->unalloc((void*)toReplace);
-                *replaced = true;
-                SkASSERT(fIndexedData.count() == fSortedData.count()+1);
-            }
+
+        const int oldCount = this->count();
+        SkFlatData* flat = this->findAndReturnMutableFlat(element);
+        *added = this->count() > oldCount;
+
+        // If we don't want to replace anything, we're done.
+        if (!*added || toReplace == NULL) {
+            *replaced = false;
+            return flat;
         }
+
+        // If we don't have the thing to replace, we're done.
+        const SkFlatData* found = fHash.find(*toReplace);
+        if (found == NULL) {
+            *replaced = false;
+            return flat;
+        }
+
+        // findAndReturnMutableFlat gave us index (fNextIndex-1), but we'll use the old one.
+        fIndexedData.remove(flat->index());
+        fNextIndex--;
+        flat->setIndex(found->index());
+        fIndexedData[flat->index()] = flat;
+
+        // findAndReturnMutableFlat already called fHash.add(), so we just clean up the old entry.
+        fHash.remove(*found);
+        fController->unalloc((void*)found);
+        SkASSERT(this->count() == oldCount);
+
+        *replaced = true;
         return flat;
     }
 
     /**
-     * Given an element of type T return its 1-based index in the dictionary. If
-     * the element wasn't previously in the dictionary it is automatically
-     * added.
-     *
-     * To make the Compare function fast, we write a sentinel value at the end
-     * of each block. The blocks in our fSortedData[] all have a 0 sentinel. The
-     * newly created block we're comparing against has a -1 in the sentinel.
-     *
-     * This trick allows Compare to always loop until failure. If it fails on
-     * the sentinal value, we know the blocks are equal.
-     */
-    int find(const T& element) {
-        return this->findAndReturnFlat(element)->index();
-    }
-
-    /**
      *  Unflatten the objects and return them in SkTRefArray, or return NULL
-     *  if there no objects (instead of an empty array).
+     *  if there no objects.  Caller takes ownership of result.
      */
     SkTRefArray<T>* unflattenToArray() const {
-        int count = fSortedData.count();
-        SkTRefArray<T>* array = NULL;
-        if (count > 0) {
-            array = SkTRefArray<T>::Create(count);
-            this->unflattenIntoArray(&array->writableAt(0));
+        const int count = this->count();
+        if (count == 0) {
+            return NULL;
+        }
+        SkTRefArray<T>* array = SkTRefArray<T>::Create(count);
+        for (int i = 0; i < count; i++) {
+            this->unflatten(&array->writableAt(i), fIndexedData[i+1]);
         }
         return array;
     }
 
     /**
-     * Unflatten the specific object at the given index
+     * Unflatten the specific object at the given index.
+     * Caller takes ownership of the result.
      */
     T* unflatten(int index) const {
-        SkASSERT(fIndexedData.count() == fSortedData.count()+1);
         const SkFlatData* element = fIndexedData[index];
         SkASSERT(index == element->index());
 
@@ -537,41 +462,12 @@
         return dst;
     }
 
+    /**
+     * Find or insert a flattened version of element into the dictionary.
+     * Caller does not take ownership of the result.  This will not return NULL.
+     */
     const SkFlatData* findAndReturnFlat(const T& element) {
-        // Only valid until the next call to resetScratch().
-        const SkFlatData& scratch = this->resetScratch(element, fNextIndex);
-
-        // See if we have it in the hash?
-        const int hashIndex = ChecksumToHashIndex(scratch.checksum());
-        const SkFlatData* candidate = fHash[hashIndex];
-        if (candidate != NULL && SkFlatData::Compare(scratch, *candidate) == 0) {
-            return candidate;
-        }
-
-        // See if we have it at all?
-        const int index = SkTSearch<const SkFlatData, SkFlatData::Less>(fSortedData.begin(),
-                                                                        fSortedData.count(),
-                                                                        &scratch,
-                                                                        sizeof(&scratch));
-        if (index >= 0) {
-            // Found.  Update hash before we return.
-            fHash[hashIndex] = fSortedData[index];
-            return fSortedData[index];
-        }
-
-        // We don't have it.  Add it.
-        SkFlatData* detached = this->detachScratch();
-        // detached will live beyond the next call to resetScratch(), but is owned by fController.
-        *fSortedData.insert(~index) = detached;  // SkTSearch returned bit-not of where to insert.
-        *fIndexedData.insert(detached->index()) = detached;
-        fHash[hashIndex] = detached;
-
-        SkASSERT(detached->index() == fNextIndex);
-        SkASSERT(fSortedData.count() == fNextIndex);
-        SkASSERT(fIndexedData.count() == fNextIndex+1);
-        fNextIndex++;
-
-        return detached;
+        return this->findAndReturnMutableFlat(element);
     }
 
 protected:
@@ -579,10 +475,10 @@
     void (*fUnflattenProc)(SkOrderedReadBuffer&, void*);
 
 private:
-    // Layout: [ SkFlatData header, 20 bytes ] [ data ..., 4-byte aligned ] [ sentinel, 4 bytes]
+    // Layout: [ SkFlatData header, 20 bytes ] [ data ..., 4-byte aligned ]
     static size_t SizeWithPadding(size_t flatDataSize) {
         SkASSERT(SkIsAlign4(flatDataSize));
-        return sizeof(SkFlatData) + flatDataSize + sizeof(uint32_t);
+        return sizeof(SkFlatData) + flatDataSize;
     }
 
     // Allocate a new scratch SkFlatData.  Must be sk_freed.
@@ -605,6 +501,21 @@
         fWriteBufferReady = true;
     }
 
+    // As findAndReturnFlat, but returns a mutable pointer for internal use.
+    SkFlatData* findAndReturnMutableFlat(const T& element) {
+        // Only valid until the next call to resetScratch().
+        const SkFlatData& scratch = this->resetScratch(element, fNextIndex);
+
+        SkFlatData* candidate = fHash.find(scratch);
+        if (candidate != NULL) return candidate;
+
+        SkFlatData* detached = this->detachScratch();
+        fHash.add(detached);
+        *fIndexedData.insert(fNextIndex) = detached;
+        fNextIndex++;
+        return detached;
+    }
+
     // This reference is valid only until the next call to resetScratch() or detachScratch().
     const SkFlatData& resetScratch(const T& element, int index) {
         this->lazyWriteBufferInit();
@@ -628,8 +539,8 @@
             fScratch = larger;
         }
 
-        // The data is in fScratch now, but we need to stamp its header and trailing sentinel.
-        fScratch->stampHeaderAndSentinel(index, bytesWritten);
+        // The data is in fScratch now but we need to stamp its header.
+        fScratch->stampHeader(index, bytesWritten);
         return *fScratch;
     }
 
@@ -641,72 +552,36 @@
         const size_t paddedSize = SizeWithPadding(fScratch->flatSize());
         SkFlatData* detached = (SkFlatData*)fController->allocThrow(paddedSize);
 
-        // Copy scratch into the new SkFlatData, setting the sentinel for cache storage.
+        // Copy scratch into the new SkFlatData.
         memcpy(detached, fScratch, paddedSize);
-        detached->setSentinelInCache();
 
         // We can now reuse fScratch, and detached will live until fController dies.
         return detached;
     }
 
     void unflatten(T* dst, const SkFlatData* element) const {
-        element->unflatten(dst, fUnflattenProc,
+        element->unflatten(dst,
+                           fUnflattenProc,
                            fController->getBitmapHeap(),
                            fController->getTypefacePlayback());
     }
 
-    void unflattenIntoArray(T* array) const {
-        const int count = fSortedData.count();
-        SkASSERT(fIndexedData.count() == fSortedData.count()+1);
-        const SkFlatData* const* iter = fSortedData.begin();
-        for (int i = 0; i < count; ++i) {
-            const SkFlatData* element = iter[i];
-            int index = element->index() - 1;
-            SkASSERT((unsigned)index < (unsigned)count);
-            unflatten(&array[index], element);
-        }
-    }
-
+    // All SkFlatData* stored in fIndexedData and fHash are owned by the controller.
     SkAutoTUnref<SkFlatController> fController;
     size_t fScratchSize;  // How many bytes fScratch has allocated for data itself.
     SkFlatData* fScratch;  // Owned, must be freed with sk_free.
     SkOrderedWriteBuffer fWriteBuffer;
     bool fWriteBufferReady;
 
-    // SkFlatDictionary has two copies of the data one indexed by the
-    // SkFlatData's index and the other sorted. The sorted data is used
-    // for finding and uniquification while the indexed copy is used
-    // for standard array-style lookups based on the SkFlatData's index
-    // (as in 'unflatten').
+    // We map between SkFlatData and a 1-based integer index.
     int fNextIndex;
+
+    // For index -> SkFlatData.  fIndexedData[0] is always NULL.
     SkTDArray<const SkFlatData*> fIndexedData;
-    // fSortedData is sorted by checksum/size/data.
-    SkTDArray<const SkFlatData*> fSortedData;
 
-    enum {
-        // Determined by trying diff values on picture-recording benchmarks
-        // (e.g. PictureRecordBench.cpp), choosing the smallest value that
-        // showed a big improvement. Even better would be to benchmark diff
-        // values on recording representative web-pages or other "real" content.
-        HASH_BITS   = 7,
-        HASH_MASK   = (1 << HASH_BITS) - 1,
-        HASH_COUNT  = 1 << HASH_BITS
-    };
-    const SkFlatData* fHash[HASH_COUNT];
-
-    static int ChecksumToHashIndex(uint32_t checksum) {
-        int n = checksum;
-        if (HASH_BITS < 32) {
-            n ^= n >> 16;
-        }
-        if (HASH_BITS < 16) {
-            n ^= n >> 8;
-        }
-        if (HASH_BITS < 8) {
-            n ^= n >> 4;
-        }
-        return n & HASH_MASK;
-    }
+    // For SkFlatData -> cached SkFlatData, which has index().
+    SkTDynamicHash<SkFlatData, SkFlatData,
+                   SkFlatData::Identity, SkFlatData::Hash, SkFlatData::Equal> fHash;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/core/SkPictureRecord.cpp b/core/SkPictureRecord.cpp
index 7a85381..0ec9eaf 100644
--- a/core/SkPictureRecord.cpp
+++ b/core/SkPictureRecord.cpp
@@ -401,16 +401,13 @@
                                    SkColorGetA(saveLayerPaint->getColor()));
     dbmPaint->setColor(newColor);
 
-    const SkFlatData* data = paintDict->findAndReturnFlat(*dbmPaint);
-    if (NULL == data) {
-        return false;
-    }
+    const int paintIndex = paintDict->find(*dbmPaint);
 
     // kill the saveLayer and alter the DBMR2R's paint to be the modified one
     convert_command_to_noop(writer, saveLayerInfo.fOffset);
     uint32_t* ptr = writer->peek32(dbmInfo.fOffset+dbmPaintOffset);
     SkASSERT(dbmPaintId == *ptr);
-    *ptr = data->index();
+    *ptr = paintIndex;
     return true;
 }