Fix and enable advsimd_Blend.S for arm64 IntrinsicBlend.

Bug: 27153652

  - Fixed address arithmetic for the assembly.
  - CTS tests pass.

(cherry picked from commit 644d5943a78b3d84a21a617e7fd7fcd8cff4500a)

Change-Id: I512d978510d00f7b964ddd39c3ccb048e2859058
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index ad5d097..5a7194a 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -49,8 +49,8 @@
     rsCpuIntrinsics_advsimd_Blur.S \
     rsCpuIntrinsics_advsimd_ColorMatrix.S \
     rsCpuIntrinsics_advsimd_Resize.S \
-    rsCpuIntrinsics_advsimd_YuvToRGB.S
-#    rsCpuIntrinsics_advsimd_Blend.S \
+    rsCpuIntrinsics_advsimd_YuvToRGB.S \
+    rsCpuIntrinsics_advsimd_Blend.S
 
 ifeq ($(ARCH_ARM_HAVE_NEON),true)
     LOCAL_CFLAGS_arm += -DARCH_ARM_HAVE_NEON
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index 34bc82d..131b357 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -120,7 +120,7 @@
     uint32_t x1 = xstart;
     uint32_t x2 = xend;
 
-#if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
+#if defined(ARCH_ARM_USE_INTRINSICS)
     // Bug: 22047392 - Skip optimized version for BLEND_DST_ATOP until this
     // been fixed.
     if (gArchUseSIMD && info->slot != BLEND_DST_ATOP) {
diff --git a/cpu_ref/rsCpuIntrinsics_advsimd_Blend.S b/cpu_ref/rsCpuIntrinsics_advsimd_Blend.S
index 5211bb3..90dbd73 100644
--- a/cpu_ref/rsCpuIntrinsics_advsimd_Blend.S
+++ b/cpu_ref/rsCpuIntrinsics_advsimd_Blend.S
@@ -579,6 +579,9 @@
     BLEND_LIST(BLEND_X)
 #undef BLEND_X
 
+#define BLEND_X(d, n) .set tablesize, d+1 ;
+    BLEND_LIST(BLEND_X)
+#undef BLEND_X
 
 /*  int rsdIntrinsicBlend_K(
  *          uchar4 *out,        // x0
@@ -589,7 +592,7 @@
  */
 ENTRY(rsdIntrinsicBlend_K)
     adr     x5, 2f
-    cmp     w2, #(3f - 2f) >> 1
+    cmp     w2, tablesize >> 1
     bhs     1f
     ldrsh   x6, [x5, w2, uxtw #1]
     add     x0, x0, w3, uxtw #2