Avoid re-rendering stencil clip for every draw with reducable clip stack

Fixes the cases where clip stack reduction would cause clip to be
re-rendered to stencil for each draw call. This causes unneeded
slowdown.

Stencil cache would not be used because the clip stack generation id communicated
by the clip stack element list would be invalid. This happended due to

 a) clip stack reduction creating new elements in the element list.

 b) purging logic removing the generation id, but reduction logic
    selecting already purged element, and thus the generation id, as
    the representative state of the clip.

Cases of a) where reduction would flatten the stack to a single new
element were fixed by assigning the generation id of the top-most
element of the clip stack as the generation id of the new
element. This is not strictly minimal, but enables more caching than
using invalid id.

Cases of a) where reduction would substitute a stack element with a
new element the generation id of the substituted element is used.

The b) part was fixed by removing the purging logic. It was not
exactly correct, as the previously purged states were actually
used. The purging was not used for anything.

Changes SkClipStack API to highlight that invalid generation id is
never returned by SkClipStack. Empty stacks are wide open. Changes the
clients to reflect this.

Fixes a crash when not passing anti-alias out parameter to
GrReducedClip::ReduceClipStack. The crash is not exercised in the
current code.

Committed: http://code.google.com/p/skia/source/detail?r=12084

R=bsalomon@google.com, robertphillips@google.com

Author: kkinnunen@nvidia.com

Review URL: https://codereview.chromium.org/48593003

git-svn-id: http://skia.googlecode.com/svn/trunk/src@12127 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/core/SkClipStack.cpp b/core/SkClipStack.cpp
index 2c0961a..9e3f7c6 100644
--- a/core/SkClipStack.cpp
+++ b/core/SkClipStack.cpp
@@ -452,7 +452,6 @@
         if (element->fSaveCount <= saveCount) {
             break;
         }
-        this->purgeClip(element);
         element->~Element();
         fDeque.pop_back();
     }
@@ -540,7 +539,6 @@
                     return;
                 case Element::kRect_Type:
                     if (element->rectRectIntersectAllowed(rect, doAA)) {
-                        this->purgeClip(element);
                         if (!element->fRect.intersect(rect)) {
                             element->setEmpty();
                             return;
@@ -554,7 +552,6 @@
                     break;
                 case Element::kPath_Type:
                     if (!SkRect::Intersects(element->fPath.getBounds(), rect)) {
-                        this->purgeClip(element);
                         element->setEmpty();
                         return;
                     }
@@ -567,10 +564,6 @@
     }
     new (fDeque.push_back()) Element(fSaveCount, rect, op, doAA);
     ((Element*) fDeque.back())->updateBoundAndGenID(element);
-
-    if (element && element->fSaveCount == fSaveCount) {
-        this->purgeClip(element);
-    }
 }
 
 void SkClipStack::clipDevPath(const SkPath& path, SkRegion::Op op, bool doAA) {
@@ -589,14 +582,12 @@
                     return;
                 case Element::kRect_Type:
                     if (!SkRect::Intersects(element->fRect, pathBounds)) {
-                        this->purgeClip(element);
                         element->setEmpty();
                         return;
                     }
                     break;
                 case Element::kPath_Type:
                     if (!SkRect::Intersects(element->fPath.getBounds(), pathBounds)) {
-                        this->purgeClip(element);
                         element->setEmpty();
                         return;
                     }
@@ -609,10 +600,6 @@
     }
     new (fDeque.push_back()) Element(fSaveCount, path, op, doAA);
     ((Element*) fDeque.back())->updateBoundAndGenID(element);
-
-    if (element && element->fSaveCount == fSaveCount) {
-        this->purgeClip(element);
-    }
 }
 
 void SkClipStack::clipEmpty() {
@@ -626,27 +613,17 @@
                 return;
             case Element::kRect_Type:
             case Element::kPath_Type:
-                this->purgeClip(element);
                 element->setEmpty();
                 return;
         }
     }
     new (fDeque.push_back()) Element(fSaveCount);
 
-    if (element && element->fSaveCount == fSaveCount) {
-        this->purgeClip(element);
-    }
     ((Element*)fDeque.back())->fGenID = kEmptyGenID;
 }
 
 bool SkClipStack::isWideOpen() const {
-    if (0 == fDeque.count()) {
-        return true;
-    }
-
-    const Element* back = (const Element*) fDeque.back();
-    return kWideOpenGenID == back->fGenID ||
-           (kInsideOut_BoundsType == back->fFiniteBoundType && back->fFiniteBound.isEmpty());
+    return this->getTopmostGenID() == kWideOpenGenID;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -740,45 +717,20 @@
     }
 }
 
-void SkClipStack::addPurgeClipCallback(PFPurgeClipCB callback, void* data) const {
-    ClipCallbackData temp = { callback, data };
-    fCallbackData.append(1, &temp);
-}
-
-void SkClipStack::removePurgeClipCallback(PFPurgeClipCB callback, void* data) const {
-    ClipCallbackData temp = { callback, data };
-    int index = fCallbackData.find(temp);
-    if (index >= 0) {
-        fCallbackData.removeShuffle(index);
-    }
-}
-
-// The clip state represented by 'element' will never be used again. Purge it.
-void SkClipStack::purgeClip(Element* element) {
-    SkASSERT(NULL != element);
-    if (element->fGenID >= 0 && element->fGenID < kFirstUnreservedGenID) {
-        return;
-    }
-
-    for (int i = 0; i < fCallbackData.count(); ++i) {
-        (*fCallbackData[i].fCallback)(element->fGenID, fCallbackData[i].fData);
-    }
-
-    // Invalidate element's gen ID so handlers can detect already handled records
-    element->fGenID = kInvalidGenID;
-}
-
 int32_t SkClipStack::GetNextGenID() {
     // TODO: handle overflow.
     return sk_atomic_inc(&gGenID);
 }
 
 int32_t SkClipStack::getTopmostGenID() const {
-
     if (fDeque.empty()) {
-        return kInvalidGenID;
+        return kWideOpenGenID;
     }
 
-    Element* element = (Element*)fDeque.back();
-    return element->fGenID;
+    const Element* back = static_cast<const Element*>(fDeque.back());
+    if (kInsideOut_BoundsType == back->fFiniteBoundType && back->fFiniteBound.isEmpty()) {
+        return kWideOpenGenID;
+    }
+
+    return back->getGenID();
 }
diff --git a/gpu/GrClipMaskCache.h b/gpu/GrClipMaskCache.h
index 97b4b51..213e282 100644
--- a/gpu/GrClipMaskCache.h
+++ b/gpu/GrClipMaskCache.h
@@ -36,10 +36,6 @@
         SkASSERT(clipGenID != SkClipStack::kWideOpenGenID);
         SkASSERT(clipGenID != SkClipStack::kEmptyGenID);
 
-        if (SkClipStack::kInvalidGenID == clipGenID) {
-            return false;
-        }
-
         GrClipStackFrame* back = (GrClipStackFrame*) fStack.back();
 
         // We could reuse the mask if bounds is a subset of last bounds. We'd have to communicate
diff --git a/gpu/GrClipMaskManager.cpp b/gpu/GrClipMaskManager.cpp
index 0f91566..3aef3de 100644
--- a/gpu/GrClipMaskManager.cpp
+++ b/gpu/GrClipMaskManager.cpp
@@ -113,6 +113,7 @@
     fCurrClipMaskType = kNone_ClipMaskType;
 
     ElementList elements(16);
+    int32_t genID;
     InitialState initialState;
     SkIRect clipSpaceIBounds;
     bool requiresAA;
@@ -132,6 +133,7 @@
         ReduceClipStack(*clipDataIn->fClipStack,
                         clipSpaceRTIBounds,
                         &elements,
+                        &genID,
                         &initialState,
                         &clipSpaceIBounds,
                         &requiresAA);
@@ -156,7 +158,6 @@
 
     // If MSAA is enabled we can do everything in the stencil buffer.
     if (0 == rt->numSamples() && requiresAA) {
-        int32_t genID = clipDataIn->fClipStack->getTopmostGenID();
         GrTexture* result = NULL;
 
         if (this->useSWOnlyPath(elements)) {
@@ -207,7 +208,8 @@
 
     // use the stencil clip if we can't represent the clip as a rectangle.
     SkIPoint clipSpaceToStencilSpaceOffset = -clipDataIn->fOrigin;
-    this->createStencilClipMask(initialState,
+    this->createStencilClipMask(genID,
+                                initialState,
                                 elements,
                                 clipSpaceIBounds,
                                 clipSpaceToStencilSpaceOffset);
@@ -390,11 +392,11 @@
 // Handles caching & allocation (if needed) of a clip alpha-mask texture for both the sw-upload
 // or gpu-rendered cases. Returns true if there is no more work to be done (i.e., we got a cache
 // hit)
-bool GrClipMaskManager::getMaskTexture(int32_t clipStackGenID,
+bool GrClipMaskManager::getMaskTexture(int32_t elementsGenID,
                                        const SkIRect& clipSpaceIBounds,
                                        GrTexture** result,
                                        bool willUpload) {
-    bool cached = fAACache.canReuse(clipStackGenID, clipSpaceIBounds);
+    bool cached = fAACache.canReuse(elementsGenID, clipSpaceIBounds);
     if (!cached) {
 
         // There isn't a suitable entry in the cache so we create a new texture to store the mask.
@@ -412,7 +414,7 @@
             desc.fConfig = kAlpha_8_GrPixelConfig;
         }
 
-        fAACache.acquireMask(clipStackGenID, desc, clipSpaceIBounds);
+        fAACache.acquireMask(elementsGenID, desc, clipSpaceIBounds);
     }
 
     *result = fAACache.getLastMask();
@@ -421,14 +423,14 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 // Create a 8-bit clip mask in alpha
-GrTexture* GrClipMaskManager::createAlphaClipMask(int32_t clipStackGenID,
+GrTexture* GrClipMaskManager::createAlphaClipMask(int32_t elementsGenID,
                                                   InitialState initialState,
                                                   const ElementList& elements,
                                                   const SkIRect& clipSpaceIBounds) {
     SkASSERT(kNone_ClipMaskType == fCurrClipMaskType);
 
     GrTexture* result;
-    if (this->getMaskTexture(clipStackGenID, clipSpaceIBounds, &result, false)) {
+    if (this->getMaskTexture(elementsGenID, clipSpaceIBounds, &result, false)) {
         fCurrClipMaskType = kAlpha_ClipMaskType;
         return result;
     }
@@ -569,7 +571,8 @@
 ////////////////////////////////////////////////////////////////////////////////
 // Create a 1-bit clip mask in the stencil buffer. 'devClipBounds' are in device
 // (as opposed to canvas) coordinates
-bool GrClipMaskManager::createStencilClipMask(InitialState initialState,
+bool GrClipMaskManager::createStencilClipMask(int32_t elementsGenID,
+                                              InitialState initialState,
                                               const ElementList& elements,
                                               const SkIRect& clipSpaceIBounds,
                                               const SkIPoint& clipSpaceToStencilOffset) {
@@ -587,11 +590,10 @@
     if (NULL == stencilBuffer) {
         return false;
     }
-    int32_t genID = elements.tail()->getGenID();
 
-    if (stencilBuffer->mustRenderClip(genID, clipSpaceIBounds, clipSpaceToStencilOffset)) {
+    if (stencilBuffer->mustRenderClip(elementsGenID, clipSpaceIBounds, clipSpaceToStencilOffset)) {
 
-        stencilBuffer->setLastClip(genID, clipSpaceIBounds, clipSpaceToStencilOffset);
+        stencilBuffer->setLastClip(elementsGenID, clipSpaceIBounds, clipSpaceToStencilOffset);
 
         // Set the matrix so that rendered clip elements are transformed from clip to stencil space.
         SkVector translate = {
@@ -921,14 +923,14 @@
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-GrTexture* GrClipMaskManager::createSoftwareClipMask(int32_t clipStackGenID,
+GrTexture* GrClipMaskManager::createSoftwareClipMask(int32_t elementsGenID,
                                                      GrReducedClip::InitialState initialState,
                                                      const GrReducedClip::ElementList& elements,
                                                      const SkIRect& clipSpaceIBounds) {
     SkASSERT(kNone_ClipMaskType == fCurrClipMaskType);
 
     GrTexture* result;
-    if (this->getMaskTexture(clipStackGenID, clipSpaceIBounds, &result, true)) {
+    if (this->getMaskTexture(elementsGenID, clipSpaceIBounds, &result, true)) {
         return result;
     }
 
diff --git a/gpu/GrClipMaskManager.h b/gpu/GrClipMaskManager.h
index 015c801..f44a8e7 100644
--- a/gpu/GrClipMaskManager.h
+++ b/gpu/GrClipMaskManager.h
@@ -103,18 +103,19 @@
     GrClipMaskCache fAACache;       // cache for the AA path
 
     // Draws the clip into the stencil buffer
-    bool createStencilClipMask(GrReducedClip::InitialState initialState,
+    bool createStencilClipMask(int32_t elementsGenID,
+                               GrReducedClip::InitialState initialState,
                                const GrReducedClip::ElementList& elements,
                                const SkIRect& clipSpaceIBounds,
                                const SkIPoint& clipSpaceToStencilOffset);
     // Creates an alpha mask of the clip. The mask is a rasterization of elements through the
     // rect specified by clipSpaceIBounds.
-    GrTexture* createAlphaClipMask(int32_t clipStackGenID,
+    GrTexture* createAlphaClipMask(int32_t elementsGenID,
                                    GrReducedClip::InitialState initialState,
                                    const GrReducedClip::ElementList& elements,
                                    const SkIRect& clipSpaceIBounds);
     // Similar to createAlphaClipMask but it rasterizes in SW and uploads to the result texture.
-    GrTexture* createSoftwareClipMask(int32_t clipStackGenID,
+    GrTexture* createSoftwareClipMask(int32_t elementsGenID,
                                       GrReducedClip::InitialState initialState,
                                       const GrReducedClip::ElementList& elements,
                                       const SkIRect& clipSpaceIBounds);
@@ -122,7 +123,7 @@
     // Gets a texture to use for the clip mask. If true is returned then a cached mask was found
     // that already contains the rasterization of the clip stack, otherwise an uninitialized texture
     // is returned. 'willUpload' is set when the alpha mask needs to be uploaded from the CPU.
-    bool getMaskTexture(int32_t clipStackGenID,
+    bool getMaskTexture(int32_t elementsGenID,
                         const SkIRect& clipSpaceIBounds,
                         GrTexture** result,
                         bool willUpload);
diff --git a/gpu/GrReducedClip.cpp b/gpu/GrReducedClip.cpp
index a5f4519..8480e04 100644
--- a/gpu/GrReducedClip.cpp
+++ b/gpu/GrReducedClip.cpp
@@ -17,6 +17,7 @@
 void reduced_stack_walker(const SkClipStack& stack,
                           const SkRect& queryBounds,
                           ElementList* result,
+                          int32_t* resultGenID,
                           InitialState* initialState,
                           bool* requiresAA);
 
@@ -30,11 +31,17 @@
 void ReduceClipStack(const SkClipStack& stack,
                      const SkIRect& queryBounds,
                      ElementList* result,
+                     int32_t* resultGenID,
                      InitialState* initialState,
                      SkIRect* tighterBounds,
                      bool* requiresAA) {
     result->reset();
 
+    // The clip established by the element list might be cached based on the last
+    // generation id. When we make early returns, we do not know what was the generation
+    // id that lead to the state. Make a conservative guess.
+    *resultGenID = stack.getTopmostGenID();
+
     if (stack.isWideOpen()) {
         *initialState = kAllIn_InitialState;
         return;
@@ -70,7 +77,9 @@
                 SkRect scalarTighterBounds = SkRect::Make(*tighterBounds);
                 if (scalarTighterBounds == isectRect) {
                     // the round-out didn't add any area outside the clip rect.
-                    *requiresAA = false;
+                    if (NULL != requiresAA) {
+                        *requiresAA = false;
+                    }
                     *initialState = kAllIn_InitialState;
                     return;
                 }
@@ -123,12 +132,17 @@
 
     // Now that we have determined the bounds to use and filtered out the trivial cases, call the
     // helper that actually walks the stack.
-    reduced_stack_walker(stack, scalarBounds, result, initialState, requiresAA);
+    reduced_stack_walker(stack, scalarBounds, result, resultGenID, initialState, requiresAA);
+
+    // The list that was computed in this function may be cached based on the gen id of the last
+    // element.
+    SkASSERT(SkClipStack::kInvalidGenID != *resultGenID);
 }
 
 void reduced_stack_walker(const SkClipStack& stack,
                           const SkRect& queryBounds,
                           ElementList* result,
+                          int32_t* resultGenID,
                           InitialState* initialState,
                           bool* requiresAA) {
 
@@ -312,6 +326,11 @@
                 break;
         }
         if (!skippable) {
+            if (0 == result->count()) {
+                // This will be the last element. Record the stricter genID.
+                *resultGenID = element->getGenID();
+            }
+
             // if it is a flip, change it to a bounds-filling rect
             if (isFlip) {
                 SkASSERT(SkRegion::kXOR_Op == element->getOp() ||
@@ -417,5 +436,13 @@
     if (NULL != requiresAA) {
         *requiresAA = numAAElements > 0;
     }
+
+    if (0 == result->count()) {
+        if (*initialState == kAllIn_InitialState) {
+            *resultGenID = SkClipStack::kWideOpenGenID;
+        } else {
+            *resultGenID = SkClipStack::kEmptyGenID;
+        }
+    }
 }
 } // namespace GrReducedClip
diff --git a/gpu/GrReducedClip.h b/gpu/GrReducedClip.h
index abfc244..0b79f2c 100644
--- a/gpu/GrReducedClip.h
+++ b/gpu/GrReducedClip.h
@@ -20,7 +20,8 @@
 
 /**
  * This function takes a clip stack and a query rectangle and it produces a reduced set of
- * SkClipStack::Elements that are equivalent to applying the full stack to the rectangle. The
+ * SkClipStack::Elements that are equivalent to applying the full stack to the rectangle. The clip
+ * stack generation id that represents the list of elements is returned in resultGenID. The
  * initial state of the query rectangle before the first clip element is applied is returned via
  * initialState. Optionally, the caller can request a tighter bounds on the clip be returned via
  * tighterBounds. If not NULL, tighterBounds will always be contained by queryBounds after return.
@@ -33,6 +34,7 @@
 void ReduceClipStack(const SkClipStack& stack,
                      const SkIRect& queryBounds,
                      ElementList* result,
+                     int32_t* resultGenID,
                      InitialState* initialState,
                      SkIRect* tighterBounds = NULL,
                      bool* requiresAA = NULL);
diff --git a/gpu/GrStencilBuffer.h b/gpu/GrStencilBuffer.h
index 3765a4c..37d40f1 100644
--- a/gpu/GrStencilBuffer.h
+++ b/gpu/GrStencilBuffer.h
@@ -43,8 +43,7 @@
     bool mustRenderClip(int32_t clipStackGenID,
                         const SkIRect& clipSpaceRect,
                         const SkIPoint clipSpaceToStencilOffset) const {
-        return SkClipStack::kInvalidGenID == clipStackGenID ||
-               fLastClipStackGenID != clipStackGenID ||
+        return fLastClipStackGenID != clipStackGenID ||
                fLastClipSpaceOffset != clipSpaceToStencilOffset ||
                !fLastClipStackRect.contains(clipSpaceRect);
     }
diff --git a/gpu/SkGpuDevice.cpp b/gpu/SkGpuDevice.cpp
index 4041c41..7630ccd 100644
--- a/gpu/SkGpuDevice.cpp
+++ b/gpu/SkGpuDevice.cpp
@@ -344,34 +344,15 @@
                                config, bitmap.getPixels(), bitmap.rowBytes(), flags);
 }
 
-namespace {
-void purgeClipCB(int genID, void* ) {
-
-    if (SkClipStack::kInvalidGenID == genID ||
-        SkClipStack::kEmptyGenID == genID ||
-        SkClipStack::kWideOpenGenID == genID) {
-        // none of these cases will have a cached clip mask
-        return;
-    }
-
-}
-};
-
 void SkGpuDevice::onAttachToCanvas(SkCanvas* canvas) {
     INHERITED::onAttachToCanvas(canvas);
 
     // Canvas promises that this ptr is valid until onDetachFromCanvas is called
     fClipData.fClipStack = canvas->getClipStack();
-
-    fClipData.fClipStack->addPurgeClipCallback(purgeClipCB, fContext);
 }
 
 void SkGpuDevice::onDetachFromCanvas() {
     INHERITED::onDetachFromCanvas();
-
-    // TODO: iterate through the clip stack and clean up any cached clip masks
-    fClipData.fClipStack->removePurgeClipCallback(purgeClipCB, fContext);
-
     fClipData.fClipStack = NULL;
 }