clear aarch64 related macro and fix bugs

fix 2 bugs:
 - build bug libyuv.gyp
 - runtime bug in ScaleRowDown38_2_Box_NEON
BUG=
TESTED=libyuv_unittest
R=fbarchard@google.com, fbarchard@chromium.org

Review URL: https://webrtc-codereview.appspot.com/23939004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1117 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index 402d859..27aa04b 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -44,21 +44,13 @@
 
 // The following are available on Neon platforms:
 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
 #define HAS_SCALEROWDOWN2_NEON
 #define HAS_SCALEROWDOWN4_NEON
 #define HAS_SCALEROWDOWN34_NEON
 #define HAS_SCALEROWDOWN38_NEON
 #define HAS_SCALEARGBROWDOWNEVEN_NEON
 #define HAS_SCALEARGBROWDOWN2_NEON
-#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
-    (defined(__aarch64__) || defined(LIBYUV_NEON))
-#define HAS_SCALEROWDOWN2_NEON
-#define HAS_SCALEROWDOWN4_NEON
-#define HAS_SCALEROWDOWN34_NEON
-#define HAS_SCALEROWDOWN38_NEON
-#define HAS_SCALEARGBROWDOWN2_NEON
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
 #endif
 
 // The following are available on Mips platforms:
diff --git a/libyuv.gyp b/libyuv.gyp
index fdb7455..3f06d41 100644
--- a/libyuv.gyp
+++ b/libyuv.gyp
@@ -130,16 +130,6 @@
             'LIBYUV_DISABLE_X86',
           ],
         }],
-        ['OS == "android" and target_arch == "arm64"', {
-          'ldflags': [
-            '-Wl,--dynamic-linker,/system/bin/linker64',
-          ],
-        }],
-        ['OS == "android" and target_arch != "arm64"', {
-          'ldflags': [
-            '-Wl,--dynamic-linker,/system/bin/linker',
-          ],
-        }],
       ], #conditions
       'defines': [
         # Enable the following 3 macros to turn off assembly for specified CPU.
@@ -159,6 +149,18 @@
           'include',
           '.',
         ],
+        'conditions': [
+          ['OS == "android" and target_arch == "arm64"', {
+            'ldflags': [
+              '-Wl,--dynamic-linker,/system/bin/linker64',
+            ],
+          }],
+          ['OS == "android" and target_arch != "arm64"', {
+            'ldflags': [
+              '-Wl,--dynamic-linker,/system/bin/linker',
+            ],
+          }],
+        ], #conditions
       },
       'sources': [
         '<@(libyuv_sources)',
diff --git a/source/rotate.cc b/source/rotate.cc
index 34b6666..48e4806 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -42,11 +42,7 @@
 #endif
 
 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_MIRRORROW_NEON
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-#define HAS_MIRRORROW_UV_NEON
-void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
 #define HAS_TRANSPOSE_WX8_NEON
 void TransposeWx8_NEON(const uint8* src, int src_stride,
                        uint8* dst, int dst_stride, int width);
@@ -55,23 +51,7 @@
                          uint8* dst_a, int dst_stride_a,
                          uint8* dst_b, int dst_stride_b,
                          int width);
-//following symbol is temporally enable for aarch64, until all neon optimized
-//functions have been ported to aarch64
-#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
-    (defined(__aarch64__) || defined(LIBYUV_NEON))
-// #define HAS_MIRRORROW_NEON
-// void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-// #define HAS_MIRRORROW_UV_NEON
-// void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
-#define HAS_TRANSPOSE_WX8_NEON
-void TransposeWx8_NEON(const uint8* src, int src_stride,
-                       uint8* dst, int dst_stride, int width);
-#define HAS_TRANSPOSE_UVWX8_NEON
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
-                         uint8* dst_a, int dst_stride_a,
-                         uint8* dst_b, int dst_stride_b,
-                         int width);
-#endif  // defined(__ARM_NEON__)
+#endif
 
 #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
     defined(__mips__) && \
diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc
index e31a6c9..933abd4 100644
--- a/source/scale_neon64.cc
+++ b/source/scale_neon64.cc
@@ -20,7 +20,6 @@
 // This module is for GCC Neon armv8 64 bit.
 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
 
-#ifdef HAS_SCALEROWDOWN2_NEON
 // Read 32x1 throw away even pixels, and write 16x1.
 void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                         uint8* dst, int dst_width) {
@@ -40,9 +39,7 @@
   : "v0", "v1"              // Clobber List
   );
 }
-#endif //HAS_SCALEROWDOWN2_NEON
 
-#ifdef HAS_SCALEROWDOWN2_NEON
 // Read 32x2 average down and write 16x1.
 void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst, int dst_width) {
@@ -72,9 +69,7 @@
   : "v0", "v1", "v2", "v3"     // Clobber List
   );
 }
-#endif //HAS_SCALEROWDOWN2_NEON
 
-#ifdef HAS_SCALEROWDOWN4_NEON
 void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                         uint8* dst_ptr, int dst_width) {
   asm volatile (
@@ -92,9 +87,7 @@
   : "v0", "v1", "v2", "v3", "memory", "cc"
   );
 }
-#endif //HAS_SCALEROWDOWN4_NEON
 
-#ifdef HAS_SCALEROWDOWN4_NEON
 void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width) {
   const uint8* src_ptr1 = src_ptr + src_stride;
@@ -130,9 +123,7 @@
   : "v0", "v1", "v2", "v3", "memory", "cc"
   );
 }
-#endif //HAS_SCALEROWDOWN4_NEON
 
-#ifdef HAS_SCALEROWDOWN34_NEON
 // Down scale from 4 to 3 pixels. Use the neon multilane read/write
 // to load up the every 4th pixel into a 4 different registers.
 // Point samples 32 pixels to 24 pixels.
@@ -155,9 +146,7 @@
   : "v0", "v1", "v2", "v3", "memory", "cc"
   );
 }
-#endif //HAS_SCALEROWDOWN34_NEON
 
-#ifdef HAS_SCALEROWDOWN34_NEON
 void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
                                ptrdiff_t src_stride,
                                uint8* dst_ptr, int dst_width) {
@@ -217,9 +206,7 @@
     "v20", "memory", "cc"
   );
 }
-#endif //ScaleRowDown34_0_Box_NEON
 
-#ifdef HAS_SCALEROWDOWN34_NEON
 void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
                                ptrdiff_t src_stride,
                                uint8* dst_ptr, int dst_width) {
@@ -262,9 +249,7 @@
   : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
   );
 }
-#endif //HAS_SCALEROWDOWN34_NEON
 
-#ifdef HAS_SCALEROWDOWN38_NEON
 static uvec8 kShuf38 =
   { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
 static uvec8 kShuf38_2 =
@@ -301,9 +286,6 @@
   );
 }
 
-#endif //HAS_SCALEROWDOWN38_NEON
-
-#ifdef HAS_SCALEROWDOWN38_NEON
 // 32x3 -> 12x1
 void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
                                       ptrdiff_t src_stride,
@@ -432,9 +414,7 @@
     "v30", "v31", "memory", "cc"
   );
 }
-#endif //HAS_SCALEROWDOWN38_NEON
 
-#ifdef HAS_SCALEROWDOWN38_NEON
 // 32x2 -> 12x1
 void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
                                ptrdiff_t src_stride,
@@ -456,7 +436,7 @@
     MEMACCESS(0)
     "ld4       {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32                \n"
     MEMACCESS(3)
-    "ld4       {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32                \n"
+    "ld4       {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32                \n"
     "subs      %3, %3, #12                             \n"
 
     // Shuffle the input data around to get align the data
@@ -541,7 +521,6 @@
     "v18", "v19", "v30", "v31", "memory", "cc"
   );
 }
-#endif //HAS_SCALEROWDOWN38_NEON
 
 // 16x2 -> 16x1
 void ScaleFilterRows_NEON(uint8* dst_ptr,
@@ -643,7 +622,6 @@
   );
 }
 
-#ifdef HAS_SCALEARGBROWDOWN2_NEON
 void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                             uint8* dst, int dst_width) {
   asm volatile (
@@ -666,9 +644,7 @@
   : "memory", "cc", "v0", "v1", "v2", "v3"  // Clobber List
   );
 }
-#endif //HAS_SCALEARGBROWDOWN2_NEON
 
-#ifdef HAS_SCALEARGBROWDOWN2_NEON
 void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                                uint8* dst, int dst_width) {
   asm volatile (
@@ -703,9 +679,7 @@
   : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
   );
 }
-#endif //HAS_SCALEARGBROWDOWN2_NEON
 
-#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
 // Reads 4 pixels at a time.
 // Alignment requirement: src_argb 4 byte aligned.
 void ScaleARGBRowDownEven_NEON(const uint8* src_argb,  ptrdiff_t src_stride,
@@ -731,9 +705,7 @@
   : "memory", "cc", "v0"
   );
 }
-#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
 
-#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
 // Reads 4 pixels at a time.
 // Alignment requirement: src_argb 4 byte aligned.
 // TODO, might be worth another optimization pass in future.
@@ -786,7 +758,6 @@
   : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
   );
 }
-#endif  // HAS_SCALEARGBROWDOWNEVEN_NEON
 #endif  // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
 
 #ifdef __cplusplus