Implement a NEON version of morphology. This is good for ~2.2X speedup on Tegra3.

R=mtklein@google.com, mtklein, reed@google.com

Review URL: https://codereview.chromium.org/68123003

git-svn-id: http://skia.googlecode.com/svn/trunk/src@12219 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/opts/SkMorphology_opts_neon.cpp b/opts/SkMorphology_opts_neon.cpp
new file mode 100644
index 0000000..571b5c8
--- /dev/null
+++ b/opts/SkMorphology_opts_neon.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#include "SkColorPriv.h"
+#include "SkMorphology_opts.h"
+#include "SkMorphology_opts_neon.h"
+
+#include <arm_neon.h>
+
+/* neon version of dilateX, dilateY, erodeX, erodeY.
+ * portable versions are in src/effects/SkMorphologyImageFilter.cpp.
+ */
+
+enum MorphType {
+    kDilate, kErode
+};
+
+enum MorphDirection {
+    kX, kY
+};
+
+template<MorphType type, MorphDirection direction>
+static void SkMorph_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                         int width, int height, int srcStride, int dstStride)
+{
+    const int srcStrideX = direction == kX ? 1 : srcStride;
+    const int dstStrideX = direction == kX ? 1 : dstStride;
+    const int srcStrideY = direction == kX ? srcStride : 1;
+    const int dstStrideY = direction == kX ? dstStride : 1;
+    radius = SkMin32(radius, width - 1);
+    const SkPMColor* upperSrc = src + radius * srcStrideX;
+    for (int x = 0; x < width; ++x) {
+        const SkPMColor* lp = src;
+        const SkPMColor* up = upperSrc;
+        SkPMColor* dptr = dst;
+        for (int y = 0; y < height; ++y) {
+            uint8x8_t max = vdup_n_u8(type == kDilate ? 0 : 255);
+            for (const SkPMColor* p = lp; p <= up; p += srcStrideX) {
+                uint8x8_t src_pixel = vreinterpret_u8_u32(vdup_n_u32(*p));
+                max = type == kDilate ? vmax_u8(src_pixel, max) : vmin_u8(src_pixel, max);
+            }
+            *dptr = vget_lane_u32(vreinterpret_u32_u8(max), 0);
+            dptr += dstStrideY;
+            lp += srcStrideY;
+            up += srcStrideY;
+        }
+        if (x >= radius) src += srcStrideX;
+        if (x + radius < width - 1) upperSrc += srcStrideX;
+        dst += dstStrideX;
+    }
+}
+
+void SkDilateX_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                    int width, int height, int srcStride, int dstStride)
+{
+    SkMorph_neon<kDilate, kX>(src, dst, radius, width, height, srcStride, dstStride);
+}
+
+void SkErodeX_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                   int width, int height, int srcStride, int dstStride)
+{
+    SkMorph_neon<kErode, kX>(src, dst, radius, width, height, srcStride, dstStride);
+}
+
+void SkDilateY_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                    int width, int height, int srcStride, int dstStride)
+{
+    SkMorph_neon<kDilate, kY>(src, dst, radius, width, height, srcStride, dstStride);
+}
+
+void SkErodeY_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                   int width, int height, int srcStride, int dstStride)
+{
+    SkMorph_neon<kErode, kY>(src, dst, radius, width, height, srcStride, dstStride);
+}
diff --git a/opts/SkMorphology_opts_neon.h b/opts/SkMorphology_opts_neon.h
new file mode 100644
index 0000000..0b962bd
--- /dev/null
+++ b/opts/SkMorphology_opts_neon.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+void SkDilateX_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                    int width, int height, int srcStride, int dstStride);
+void SkDilateY_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                    int width, int height, int srcStride, int dstStride);
+void SkErodeX_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                   int width, int height, int srcStride, int dstStride);
+void SkErodeY_neon(const SkPMColor* src, SkPMColor* dst, int radius,
+                   int width, int height, int srcStride, int dstStride);
diff --git a/opts/opts_check_arm.cpp b/opts/opts_check_arm.cpp
index ba407d7..a9afa75 100644
--- a/opts/opts_check_arm.cpp
+++ b/opts/opts_check_arm.cpp
@@ -17,6 +17,8 @@
 #include "SkUtils.h"
 
 #include "SkUtilsArm.h"
+#include "SkMorphology_opts.h"
+#include "SkMorphology_opts_neon.h"
 
 #if defined(SK_CPU_LENDIAN) && !SK_ARM_NEON_IS_NONE
 extern "C" void memset16_neon(uint16_t dst[], uint16_t value, int count);
@@ -65,3 +67,27 @@
 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
     return NULL;
 }
+
+SkMorphologyProc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
+#if SK_ARM_NEON_IS_NONE
+    return NULL;
+#else
+#if SK_ARM_NEON_IS_DYNAMIC
+    if (!sk_cpu_arm_has_neon()) {
+        return NULL;
+    }
+#endif
+    switch (type) {
+        case kDilateX_SkMorphologyProcType:
+            return SkDilateX_neon;
+        case kDilateY_SkMorphologyProcType:
+            return SkDilateY_neon;
+        case kErodeX_SkMorphologyProcType:
+            return SkErodeX_neon;
+        case kErodeY_SkMorphologyProcType:
+            return SkErodeY_neon;
+        default:
+            return NULL;
+    }
+#endif
+}