Implement SSE2-based implementations of the morphology filters (dilate & erode). This gives a 3-5X speedup over the naive implementation, and also mitigates a timing-based security attack in Chrome (https://code.google.com/p/chromium/issues/detail?id=251711).

NOTE: this will require a corresponding GYP change on the Skia roll into Chrome: https://codereview.chromium.org/52453004/

R=mtklein@google.com, reed@google.com

Review URL: https://codereview.chromium.org/52603004

git-svn-id: http://skia.googlecode.com/svn/trunk/src@12038 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/effects/SkMorphologyImageFilter.cpp b/effects/SkMorphologyImageFilter.cpp
index 8705caa..52c01fe 100644
--- a/effects/SkMorphologyImageFilter.cpp
+++ b/effects/SkMorphologyImageFilter.cpp
@@ -10,6 +10,7 @@
 #include "SkColorPriv.h"
 #include "SkFlattenableBuffers.h"
 #include "SkRect.h"
+#include "SkMorphology_opts.h"
 #if SK_SUPPORT_GPU
 #include "GrContext.h"
 #include "GrTexture.h"
@@ -38,11 +39,19 @@
     buffer.writeInt(fRadius.fHeight);
 }
 
+enum MorphDirection {
+    kX, kY
+};
+
+template<MorphDirection direction>
 static void erode(const SkPMColor* src, SkPMColor* dst,
                   int radius, int width, int height,
-                  int srcStrideX, int srcStrideY,
-                  int dstStrideX, int dstStrideY)
+                  int srcStride, int dstStride)
 {
+    const int srcStrideX = direction == kX ? 1 : srcStride;
+    const int dstStrideX = direction == kX ? 1 : dstStride;
+    const int srcStrideY = direction == kX ? srcStride : 1;
+    const int dstStrideY = direction == kX ? dstStride : 1;
     radius = SkMin32(radius, width - 1);
     const SkPMColor* upperSrc = src + radius * srcStrideX;
     for (int x = 0; x < width; ++x) {
@@ -74,23 +83,35 @@
 
 static void erodeX(const SkBitmap& src, SkBitmap* dst, int radiusX, const SkIRect& bounds)
 {
-    erode(src.getAddr32(bounds.left(), bounds.top()), dst->getAddr32(0, 0),
-          radiusX, bounds.width(), bounds.height(),
-          1, src.rowBytesAsPixels(), 1, dst->rowBytesAsPixels());
+    SkMorphologyProc erodeXProc = SkMorphologyGetPlatformProc(kErodeX_SkMorphologyProcType);
+    if (!erodeXProc) {
+        erodeXProc = erode<kX>;
+    }
+    erodeXProc(src.getAddr32(bounds.left(), bounds.top()), dst->getAddr32(0, 0),
+               radiusX, bounds.width(), bounds.height(),
+               src.rowBytesAsPixels(), dst->rowBytesAsPixels());
 }
 
 static void erodeY(const SkBitmap& src, SkBitmap* dst, int radiusY, const SkIRect& bounds)
 {
-    erode(src.getAddr32(bounds.left(), bounds.top()), dst->getAddr32(0, 0),
-          radiusY, bounds.height(), bounds.width(),
-          src.rowBytesAsPixels(), 1, dst->rowBytesAsPixels(), 1);
+    SkMorphologyProc erodeYProc = SkMorphologyGetPlatformProc(kErodeY_SkMorphologyProcType);
+    if (!erodeYProc) {
+        erodeYProc = erode<kY>;
+    }
+    erodeYProc(src.getAddr32(bounds.left(), bounds.top()), dst->getAddr32(0, 0),
+               radiusY, bounds.height(), bounds.width(),
+               src.rowBytesAsPixels(), dst->rowBytesAsPixels());
 }
 
+template<MorphDirection direction>
 static void dilate(const SkPMColor* src, SkPMColor* dst,
                    int radius, int width, int height,
-                   int srcStrideX, int srcStrideY,
-                   int dstStrideX, int dstStrideY)
+                   int srcStride, int dstStride)
 {
+    const int srcStrideX = direction == kX ? 1 : srcStride;
+    const int dstStrideX = direction == kX ? 1 : dstStride;
+    const int srcStrideY = direction == kX ? srcStride : 1;
+    const int dstStrideY = direction == kX ? dstStride : 1;
     radius = SkMin32(radius, width - 1);
     const SkPMColor* upperSrc = src + radius * srcStrideX;
     for (int x = 0; x < width; ++x) {
@@ -122,16 +143,24 @@
 
 static void dilateX(const SkBitmap& src, SkBitmap* dst, int radiusX, const SkIRect& bounds)
 {
-    dilate(src.getAddr32(bounds.left(), bounds.top()), dst->getAddr32(0, 0),
-           radiusX, bounds.width(), bounds.height(),
-           1, src.rowBytesAsPixels(), 1, dst->rowBytesAsPixels());
+    SkMorphologyProc dilateXProc = SkMorphologyGetPlatformProc(kDilateX_SkMorphologyProcType);
+    if (!dilateXProc) {
+        dilateXProc = dilate<kX>;
+    }
+    dilateXProc(src.getAddr32(bounds.left(), bounds.top()), dst->getAddr32(0, 0),
+                radiusX, bounds.width(), bounds.height(),
+                src.rowBytesAsPixels(), dst->rowBytesAsPixels());
 }
 
 static void dilateY(const SkBitmap& src, SkBitmap* dst, int radiusY, const SkIRect& bounds)
 {
-    dilate(src.getAddr32(bounds.left(), bounds.top()), dst->getAddr32(0, 0),
-           radiusY, bounds.height(), bounds.width(),
-           src.rowBytesAsPixels(), 1, dst->rowBytesAsPixels(), 1);
+    SkMorphologyProc dilateYProc = SkMorphologyGetPlatformProc(kDilateY_SkMorphologyProcType);
+    if (!dilateYProc) {
+        dilateYProc = dilate<kY>;
+    }
+    dilateYProc(src.getAddr32(bounds.left(), bounds.top()), dst->getAddr32(0, 0),
+                radiusY, bounds.height(), bounds.width(),
+                src.rowBytesAsPixels(), dst->rowBytesAsPixels());
 }
 
 bool SkErodeImageFilter::onFilterImage(Proxy* proxy,
diff --git a/opts/SkMorphology_opts.h b/opts/SkMorphology_opts.h
new file mode 100644
index 0000000..e3ad853
--- /dev/null
+++ b/opts/SkMorphology_opts.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include <SkColor.h>
+
+/**
+ * All morphology procs have the same signature: src is the source buffer, dst the
+ * destination buffer, radius is the morphology radius, width and height are the bounds
+ * of the destination buffer (in pixels), and srcStride and dstStride are the
+ * number of pixels per row in each buffer. All buffers are 8888.
+ */
+
+typedef void (*SkMorphologyProc)(const SkPMColor* src, SkPMColor* dst, int radius,
+                                 int width, int height, int srcStride, int dstStride);
+
+enum SkMorphologyProcType {
+    kDilateX_SkMorphologyProcType,
+    kDilateY_SkMorphologyProcType,
+    kErodeX_SkMorphologyProcType,
+    kErodeY_SkMorphologyProcType
+};
+
+SkMorphologyProc SkMorphologyGetPlatformProc(SkMorphologyProcType type);
diff --git a/opts/SkMorphology_opts_SSE2.cpp b/opts/SkMorphology_opts_SSE2.cpp
new file mode 100644
index 0000000..6314335
--- /dev/null
+++ b/opts/SkMorphology_opts_SSE2.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#include "SkColorPriv.h"
+
+#include <emmintrin.h>
+
+/* SSE2 version of dilateX, dilateY, erodeX, erodeY.
+ * portable versions are in src/effects/SkMorphologyImageFilter.cpp.
+ */
+
+enum MorphType {
+    kDilate, kErode
+};
+
+enum MorphDirection {
+    kX, kY
+};
+
+template<MorphType type, MorphDirection direction>
+static void SkMorph_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                         int width, int height, int srcStride, int dstStride)
+{
+    const int srcStrideX = direction == kX ? 1 : srcStride;
+    const int dstStrideX = direction == kX ? 1 : dstStride;
+    const int srcStrideY = direction == kX ? srcStride : 1;
+    const int dstStrideY = direction == kX ? dstStride : 1;
+    radius = SkMin32(radius, width - 1);
+    const SkPMColor* upperSrc = src + radius * srcStrideX;
+    for (int x = 0; x < width; ++x) {
+        const SkPMColor* lp = src;
+        const SkPMColor* up = upperSrc;
+        SkPMColor* dptr = dst;
+        for (int y = 0; y < height; ++y) {
+            __m128i max = type == kDilate ? _mm_setzero_si128() : _mm_set1_epi32(0xFFFFFFFF);
+            for (const SkPMColor* p = lp; p <= up; p += srcStrideX) {
+                __m128i src_pixel = _mm_cvtsi32_si128(*p);
+                max = type == kDilate ? _mm_max_epu8(src_pixel, max) : _mm_min_epu8(src_pixel, max);
+            }
+            *dptr = _mm_cvtsi128_si32(max);
+            dptr += dstStrideY;
+            lp += srcStrideY;
+            up += srcStrideY;
+        }
+        if (x >= radius) src += srcStrideX;
+        if (x + radius < width - 1) upperSrc += srcStrideX;
+        dst += dstStrideX;
+    }
+}
+
+void SkDilateX_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                    int width, int height, int srcStride, int dstStride)
+{
+    SkMorph_SSE2<kDilate, kX>(src, dst, radius, width, height, srcStride, dstStride);
+}
+
+void SkErodeX_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                   int width, int height, int srcStride, int dstStride)
+{
+    SkMorph_SSE2<kErode, kX>(src, dst, radius, width, height, srcStride, dstStride);
+}
+
+void SkDilateY_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                    int width, int height, int srcStride, int dstStride)
+{
+    SkMorph_SSE2<kDilate, kY>(src, dst, radius, width, height, srcStride, dstStride);
+}
+
+void SkErodeY_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                   int width, int height, int srcStride, int dstStride)
+{
+    SkMorph_SSE2<kErode, kY>(src, dst, radius, width, height, srcStride, dstStride);
+}
diff --git a/opts/SkMorphology_opts_SSE2.h b/opts/SkMorphology_opts_SSE2.h
new file mode 100644
index 0000000..bd103e6
--- /dev/null
+++ b/opts/SkMorphology_opts_SSE2.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+void SkDilateX_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                    int width, int height, int srcStride, int dstStride);
+void SkDilateY_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                    int width, int height, int srcStride, int dstStride);
+void SkErodeX_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                   int width, int height, int srcStride, int dstStride);
+void SkErodeY_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
+                   int width, int height, int srcStride, int dstStride);
diff --git a/opts/SkMorphology_opts_none.cpp b/opts/SkMorphology_opts_none.cpp
new file mode 100644
index 0000000..66d58ba
--- /dev/null
+++ b/opts/SkMorphology_opts_none.cpp
@@ -0,0 +1,12 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkMorphology_opts.h"
+
+SkMorphologyProc SkMorphologyGetPlatformProc(SkMorphologyProcType) {
+    return NULL;
+}
diff --git a/opts/opts_check_SSE2.cpp b/opts/opts_check_SSE2.cpp
index 8f0bdac..b40ca9d 100644
--- a/opts/opts_check_SSE2.cpp
+++ b/opts/opts_check_SSE2.cpp
@@ -14,6 +14,8 @@
 #include "SkBlitRow_opts_SSE2.h"
 #include "SkUtils_opts_SSE2.h"
 #include "SkUtils.h"
+#include "SkMorphology_opts.h"
+#include "SkMorphology_opts_SSE2.h"
 
 #include "SkRTConf.h"
 
@@ -248,6 +250,24 @@
     }
 }
 
+SkMorphologyProc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
+    if (!cachedHasSSE2()) {
+        return NULL;
+    }
+    switch (type) {
+        case kDilateX_SkMorphologyProcType:
+            return SkDilateX_SSE2;
+        case kDilateY_SkMorphologyProcType:
+            return SkDilateY_SSE2;
+        case kErodeX_SkMorphologyProcType:
+            return SkErodeX_SSE2;
+        case kErodeY_SkMorphologyProcType:
+            return SkErodeY_SSE2;
+        default:
+            return NULL;
+    }
+}
+
 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
 
 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {