Combine GrTessellateStrokeOps when possible

Bug: skia:10419
Change-Id: Ifd93b6cd8acffa78675b3e45134dfa52062b4f7a
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/300102
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/gn/gpu.gni b/gn/gpu.gni
index a82ee8b..9a40c93 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -189,6 +189,7 @@
   "$_src/gpu/GrSPIRVUniformHandler.h",
   "$_src/gpu/GrSPIRVVaryingHandler.cpp",
   "$_src/gpu/GrSPIRVVaryingHandler.h",
+  "$_src/gpu/GrSTArenaList.h",
   "$_src/gpu/GrSWMaskHelper.cpp",
   "$_src/gpu/GrSWMaskHelper.h",
   "$_src/gpu/GrSamplePatternDictionary.cpp",
diff --git a/src/gpu/GrSTArenaList.h b/src/gpu/GrSTArenaList.h
new file mode 100644
index 0000000..eb86c70
--- /dev/null
+++ b/src/gpu/GrSTArenaList.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2020 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrSTArenaList_DEFINED
+#define GrSTArenaList_DEFINED
+
+#include "src/core/SkArenaAlloc.h"
+
+// A singly-linked list whose head element is a "stack allocated" class member and whose subsequent
+// elements are allocated in an SkArenaAlloc.
+template<typename T> class GrSTArenaList {
+public:
+    struct Node {
+        template <typename... Args>
+        Node(Args&&... elementArgs) : fElement(std::forward<Args>(elementArgs)...) {}
+        T fElement;
+        Node* fNext = nullptr;
+    };
+
+    template <typename... Args>
+    GrSTArenaList(Args&&... headArgs) : fHead(std::forward<Args>(headArgs)...) {}
+
+    const T& head() const { return fHead.fElement; }
+    T& head() { return fHead.fElement; }
+
+    void concat(GrSTArenaList&& list, SkArenaAlloc* allocator) {
+        Node* listHeadCopy = allocator->make<Node>(std::move(list.fHead));
+        fTail->fNext = listHeadCopy;
+        // If the list's fTail pointed to its locally allocated head element, then point our fTail
+        // at the copy we just made in the arena. Otherwise the list's fTail already points at an
+        // arena-allocated element, so keep it.
+        fTail = (list.fTail == &list.fHead) ? listHeadCopy : list.fTail;
+    }
+
+    struct Iter {
+        bool operator!=(const Iter& it) const { return fCurr != it.fCurr; }
+        bool operator==(const Iter& it) const { return fCurr == it.fCurr; }
+        void operator++() { fCurr = fCurr->fNext; }
+        T& operator*() { return fCurr->fElement; }
+        Node* fCurr;
+    };
+
+    Iter begin() { return Iter{&fHead}; }
+    Iter end() { return Iter{nullptr}; }
+
+private:
+    Node fHead;
+    Node* fTail = &fHead;
+};
+
+#endif
diff --git a/src/gpu/tessellate/GrTessellateStrokeOp.cpp b/src/gpu/tessellate/GrTessellateStrokeOp.cpp
index 7c1c29c..19f5023 100644
--- a/src/gpu/tessellate/GrTessellateStrokeOp.cpp
+++ b/src/gpu/tessellate/GrTessellateStrokeOp.cpp
@@ -43,18 +43,36 @@
                                            const SkStrokeRec& stroke, GrPaint&& paint,
                                            GrAAType aaType)
         : GrDrawOp(ClassID())
-        , fDevPath(transform_path(viewMatrix, path))
-        , fDevStroke(transform_stroke(viewMatrix, stroke))
-        , fAAType(aaType)
+        , fPathStrokes(transform_path(viewMatrix, path), transform_stroke(viewMatrix, stroke))
+        , fNumVerbs(path.countVerbs())
+        , fNumPoints(path.countPoints())
         , fColor(get_paint_constant_blended_color(paint))
+        , fAAType(aaType)
         , fProcessors(std::move(paint)) {
     SkASSERT(fAAType != GrAAType::kCoverage);  // No mixed samples support yet.
-    SkRect devBounds = fDevPath.getBounds();
-    float inflationRadius = fDevStroke.getInflationRadius();
+    SkStrokeRec& headStroke = fPathStrokes.head().fStroke;
+    if (headStroke.getJoin() == SkPaint::kMiter_Join) {
+        float miter = headStroke.getMiter();
+        if (miter <= 0) {
+            headStroke.setStrokeParams(headStroke.getCap(), SkPaint::kBevel_Join, 0);
+        } else {
+            fMiterLimitOrZero = miter;
+        }
+    }
+    SkRect devBounds = fPathStrokes.head().fPath.getBounds();
+    float inflationRadius = fPathStrokes.head().fStroke.getInflationRadius();
     devBounds.outset(inflationRadius, inflationRadius);
     this->setBounds(devBounds, HasAABloat(GrAAType::kCoverage == fAAType), IsHairline::kNo);
 }
 
+GrDrawOp::FixedFunctionFlags GrTessellateStrokeOp::fixedFunctionFlags() const {
+    auto flags = FixedFunctionFlags::kNone;
+    if (GrAAType::kNone != fAAType) {
+        flags |= FixedFunctionFlags::kUsesHWAA;
+    }
+    return flags;
+}
+
 GrProcessorSet::Analysis GrTessellateStrokeOp::finalize(const GrCaps& caps,
                                                         const GrAppliedClip* clip,
                                                         bool hasMixedSampledCoverage,
@@ -64,12 +82,28 @@
                                 clampType, &fColor);
 }
 
-GrDrawOp::FixedFunctionFlags GrTessellateStrokeOp::fixedFunctionFlags() const {
-    auto flags = FixedFunctionFlags::kNone;
-    if (GrAAType::kNone != fAAType) {
-        flags |= FixedFunctionFlags::kUsesHWAA;
+GrOp::CombineResult GrTessellateStrokeOp::onCombineIfPossible(GrOp* grOp,
+                                                              GrRecordingContext::Arenas* arenas,
+                                                              const GrCaps&) {
+    auto* op = grOp->cast<GrTessellateStrokeOp>();
+    if (fColor != op->fColor ||
+        fViewMatrix != op->fViewMatrix ||
+        fAAType != op->fAAType ||
+        ((fMiterLimitOrZero * op->fMiterLimitOrZero != 0) &&  // Are both non-zero?
+         fMiterLimitOrZero != op->fMiterLimitOrZero) ||
+        fProcessors != op->fProcessors) {
+        return CombineResult::kCannotCombine;
     }
-    return flags;
+
+    fPathStrokes.concat(std::move(op->fPathStrokes), arenas->recordTimeAllocator());
+    if (op->fMiterLimitOrZero != 0) {
+        SkASSERT(fMiterLimitOrZero == 0 || fMiterLimitOrZero == op->fMiterLimitOrZero);
+        fMiterLimitOrZero = op->fMiterLimitOrZero;
+    }
+    fNumVerbs += op->fNumVerbs;
+    fNumPoints += op->fNumPoints;
+
+    return CombineResult::kMerged;
 }
 
 void GrTessellateStrokeOp::onPrePrepare(GrRecordingContext*, const GrSurfaceProxyView* writeView,
@@ -120,41 +154,42 @@
 }
 
 void GrTessellateStrokeOp::onPrepare(GrOpFlushState* flushState) {
-    float strokeRadius = fDevStroke.getWidth() * .5f;
-
     // Rebuild the stroke using GrStrokeGeometry.
     GrStrokeGeometry strokeGeometry(flushState->caps().shaderCaps()->maxTessellationSegments(),
-                                    fDevPath.countPoints(), fDevPath.countVerbs());
-    GrStrokeGeometry::InstanceTallies tallies = GrStrokeGeometry::InstanceTallies();
-    strokeGeometry.beginPath(fDevStroke, strokeRadius * 2, &tallies);
-    SkPathVerb previousVerb = SkPathVerb::kClose;
-    for (auto [verb, pts, w] : SkPathPriv::Iterate(fDevPath)) {
-        switch (verb) {
-            case SkPathVerb::kMove:
-                if (previousVerb != SkPathVerb::kClose) {
-                    strokeGeometry.capContourAndExit();
-                }
-                strokeGeometry.moveTo(pts[0]);
-                break;
-            case SkPathVerb::kClose:
-                strokeGeometry.closeContour();
-                break;
-            case SkPathVerb::kLine:
-                strokeGeometry.lineTo(pts[1]);
-                break;
-            case SkPathVerb::kQuad:
-                strokeGeometry.quadraticTo(pts);
-                break;
-            case SkPathVerb::kCubic:
-                strokeGeometry.cubicTo(pts);
-                break;
-            case SkPathVerb::kConic:
-                SkUNREACHABLE;
+                                    fNumPoints, fNumVerbs);
+    for (auto& [path, stroke] : fPathStrokes) {
+        float strokeRadius = stroke.getWidth() * .5f;
+        GrStrokeGeometry::InstanceTallies tallies = GrStrokeGeometry::InstanceTallies();
+        strokeGeometry.beginPath(stroke, strokeRadius * 2, &tallies);
+        SkPathVerb previousVerb = SkPathVerb::kClose;
+        for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
+            switch (verb) {
+                case SkPathVerb::kMove:
+                    if (previousVerb != SkPathVerb::kClose) {
+                        strokeGeometry.capContourAndExit();
+                    }
+                    strokeGeometry.moveTo(pts[0]);
+                    break;
+                case SkPathVerb::kClose:
+                    strokeGeometry.closeContour();
+                    break;
+                case SkPathVerb::kLine:
+                    strokeGeometry.lineTo(pts[1]);
+                    break;
+                case SkPathVerb::kQuad:
+                    strokeGeometry.quadraticTo(pts);
+                    break;
+                case SkPathVerb::kCubic:
+                    strokeGeometry.cubicTo(pts);
+                    break;
+                case SkPathVerb::kConic:
+                    SkUNREACHABLE;
+            }
+            previousVerb = verb;
         }
-        previousVerb = verb;
-    }
-    if (previousVerb != SkPathVerb::kClose) {
-        strokeGeometry.capContourAndExit();
+        if (previousVerb != SkPathVerb::kClose) {
+            strokeGeometry.capContourAndExit();
+        }
     }
 
     auto vertexData = static_cast<SkPoint*>(flushState->makeVertexSpace(
@@ -176,11 +211,20 @@
     SkPoint firstJoinControlPoint = {0, 0};
     SkPoint lastJoinControlPoint = {0, 0};
     bool hasFirstControlPoint = false;
+    float currStrokeRadius = 0;
+    auto pathStrokesIter = fPathStrokes.begin();
     for (auto verb : strokeGeometry.verbs()) {
         SkPoint patch[4];
         float overrideNumSegments = 0;
         switch (verb) {
             case Verb::kBeginPath:
+                SkASSERT(pathStrokesIter != fPathStrokes.end());
+                pendingJoin = Verb::kEndContour;
+                firstJoinControlPoint = {0, 0};
+                lastJoinControlPoint = {0, 0};
+                hasFirstControlPoint = false;
+                currStrokeRadius = (*pathStrokesIter).fStroke.getWidth() * .5f;
+                ++pathStrokesIter;
                 continue;
             case Verb::kRoundJoin:
             case Verb::kInternalRoundJoin:
@@ -207,7 +251,7 @@
                 break;
             case Verb::kSquareCap: {
                 SkASSERT(pendingJoin == Verb::kEndContour);
-                write_square_cap(patch, pathPts[i], lastJoinControlPoint, strokeRadius);
+                write_square_cap(patch, pathPts[i], lastJoinControlPoint, currStrokeRadius);
                 // This cubic steps outside the cap, but if we force it to only have one segment, we
                 // will just get the rectangular cap.
                 overrideNumSegments = 1;
@@ -236,18 +280,18 @@
             vertexData[3] = patch[0];
             switch (pendingJoin) {
                 case Verb::kBevelJoin:
-                    vertexData[4].set(1, strokeRadius);
+                    vertexData[4].set(1, currStrokeRadius);
                     break;
                 case Verb::kMiterJoin:
-                    vertexData[4].set(2, strokeRadius);
+                    vertexData[4].set(2, currStrokeRadius);
                     break;
                 case Verb::kRoundJoin:
-                    vertexData[4].set(3, strokeRadius);
+                    vertexData[4].set(3, currStrokeRadius);
                     break;
                 case Verb::kInternalRoundJoin:
                 case Verb::kInternalBevelJoin:
                 default:
-                    vertexData[4].set(4, strokeRadius);
+                    vertexData[4].set(4, currStrokeRadius);
                     break;
             }
             vertexData += 5;
@@ -269,11 +313,12 @@
             hasFirstControlPoint = false;
         } else if (verb != Verb::kRotate && verb != Verb::kRoundCap) {
             memcpy(vertexData, patch, sizeof(SkPoint) * 4);
-            vertexData[4].set(-overrideNumSegments, strokeRadius);
+            vertexData[4].set(-overrideNumSegments, currStrokeRadius);
             vertexData += 5;
             fVertexCount += 5;
         }
     }
+    SkASSERT(pathStrokesIter == fPathStrokes.end());
 
     SkASSERT(fVertexCount <= strokeGeometry.verbs().count() * 2 * 5);
     flushState->putBackVertices(strokeGeometry.verbs().count() * 2 * 5 - fVertexCount,
@@ -296,7 +341,8 @@
     initArgs.fWriteSwizzle = flushState->drawOpArgs().writeSwizzle();
     GrPipeline pipeline(initArgs, std::move(fProcessors), flushState->detachAppliedClip());
 
-    GrTessellateStrokeShader strokeShader(SkMatrix::I(), fDevStroke.getMiter(), fColor);
+    SkASSERT(fViewMatrix.isIdentity());  // Only identity matrices supported for now.
+    GrTessellateStrokeShader strokeShader(fViewMatrix, fColor, fMiterLimitOrZero);
     GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline, &strokeShader);
 
     flushState->bindPipelineAndScissorClip(programInfo, this->bounds() /*chainBounds??*/);
diff --git a/src/gpu/tessellate/GrTessellateStrokeOp.h b/src/gpu/tessellate/GrTessellateStrokeOp.h
index 6dadc83..5326164 100644
--- a/src/gpu/tessellate/GrTessellateStrokeOp.h
+++ b/src/gpu/tessellate/GrTessellateStrokeOp.h
@@ -9,14 +9,17 @@
 #define GrTessellateStrokeOp_DEFINED
 
 #include "include/core/SkStrokeRec.h"
+#include "src/gpu/GrSTArenaList.h"
 #include "src/gpu/ops/GrDrawOp.h"
 
 // Renders opaque, constant-color strokes by decomposing them into standalone tessellation patches.
 // Each patch is either a "cubic" (single stroked bezier curve with butt caps) or a "join". Requires
 // MSAA if antialiasing is desired.
 class GrTessellateStrokeOp : public GrDrawOp {
+public:
     DEFINE_OP_CLASS_ID
 
+private:
     // The provided matrix must be a similarity matrix for the time being. This is so we can
     // bootstrap this Op on top of GrStrokeGeometry with minimal modifications.
     //
@@ -26,18 +29,29 @@
 
     const char* name() const override { return "GrTessellateStrokeOp"; }
     void visitProxies(const VisitProxyFunc& fn) const override { fProcessors.visitProxies(fn); }
+    FixedFunctionFlags fixedFunctionFlags() const override;
     GrProcessorSet::Analysis finalize(const GrCaps&, const GrAppliedClip*,
                                       bool hasMixedSampledCoverage, GrClampType) override;
-    FixedFunctionFlags fixedFunctionFlags() const override;
+    CombineResult onCombineIfPossible(GrOp*, GrRecordingContext::Arenas*, const GrCaps&) override;
     void onPrePrepare(GrRecordingContext*, const GrSurfaceProxyView*, GrAppliedClip*,
                       const GrXferProcessor::DstProxyView&) override;
     void onPrepare(GrOpFlushState* state) override;
     void onExecute(GrOpFlushState*, const SkRect& chainBounds) override;
 
-    const SkPath fDevPath;
-    const SkStrokeRec fDevStroke;
-    const GrAAType fAAType;
+    struct PathStroke {
+        PathStroke(const SkPath& path, const SkStrokeRec& stroke) : fPath(path), fStroke(stroke) {}
+        SkPath fPath;
+        SkStrokeRec fStroke;
+    };
+
+    GrSTArenaList<PathStroke> fPathStrokes;
+    int fNumVerbs;
+    int fNumPoints;
+
     SkPMColor4f fColor;
+    const SkMatrix fViewMatrix = SkMatrix::I();
+    const GrAAType fAAType;
+    float fMiterLimitOrZero = 0;  // Zero if there is not a stroke with a miter join type.
     GrProcessorSet fProcessors;
 
     sk_sp<const GrBuffer> fVertexBuffer;
diff --git a/src/gpu/tessellate/GrTessellateStrokeShader.cpp b/src/gpu/tessellate/GrTessellateStrokeShader.cpp
index d0f4229..ccc4930 100644
--- a/src/gpu/tessellate/GrTessellateStrokeShader.cpp
+++ b/src/gpu/tessellate/GrTessellateStrokeShader.cpp
@@ -46,9 +46,9 @@
                  const CoordTransformRange& transformRange) override {
         const auto& shader = primProc.cast<GrTessellateStrokeShader>();
 
-        if (fCachedMiterLimitValue != shader.fMiterLimit) {
-            pdman.set1f(fMiterLimitUniform, shader.fMiterLimit);
-            fCachedMiterLimitValue = shader.fMiterLimit;
+        if (shader.fMiterLimitOrZero != 0 && fCachedMiterLimitValue != shader.fMiterLimitOrZero) {
+            pdman.set1f(fMiterLimitUniform, shader.fMiterLimitOrZero);
+            fCachedMiterLimitValue = shader.fMiterLimitOrZero;
         }
 
         if (fCachedColorValue != shader.fColor) {
diff --git a/src/gpu/tessellate/GrTessellateStrokeShader.h b/src/gpu/tessellate/GrTessellateStrokeShader.h
index 715e9f2..262a40d 100644
--- a/src/gpu/tessellate/GrTessellateStrokeShader.h
+++ b/src/gpu/tessellate/GrTessellateStrokeShader.h
@@ -23,6 +23,7 @@
 //   (P4.x < 0)  : The patch is still a cubic, but will be linearized into exactly |P4.x| segments.
 //   (P4.x == 1) : The patch is an outer bevel join.
 //   (P4.x == 2) : The patch is an outer miter join.
+//                 (NOTE: If miterLimitOrZero == 0, then miter join patches are illegal.)
 //   (P4.x == 3) : The patch is an outer round join.
 //   (P4.x == 4) : The patch is an inner and outer round join.
 //   P4.y        : Represents the stroke radius.
@@ -37,11 +38,11 @@
 // tessellationPatchVertexCount of 5.
 class GrTessellateStrokeShader : public GrPathShader {
 public:
-    GrTessellateStrokeShader(const SkMatrix& viewMatrix, float miterLimit, SkPMColor4f color)
+    GrTessellateStrokeShader(const SkMatrix& viewMatrix, SkPMColor4f color, float miterLimitOrZero)
             : GrPathShader(kTessellate_GrTessellateStrokeShader_ClassID, viewMatrix,
                            GrPrimitiveType::kPatches, 5)
-            , fMiterLimit(miterLimit)
-            , fColor(color) {
+            , fColor(color)
+            , fMiterLimitOrZero(miterLimitOrZero) {
         constexpr static Attribute kInputPointAttrib{"inputPoint", kFloat2_GrVertexAttribType,
                                                      kFloat2_GrSLType};
         this->setVertexAttributes(&kInputPointAttrib, 1);
@@ -63,8 +64,8 @@
                                          const GrGLSLUniformHandler&,
                                          const GrShaderCaps&) const override;
 
-    const float fMiterLimit;
     const SkPMColor4f fColor;
+    const float fMiterLimitOrZero;  // Zero if there will not be any miter join patches.
 
     class Impl;
 };