Enabling building with NEON on ARM64

This patch enables NEON on ARM64 platform. Passed building both on
Android ARMv7 and Android ARM64.

BUG=3580
R=andrew@webrtc.org, jridges@masque.com

Review URL: https://webrtc-codereview.appspot.com/25069004

Patch from Zhongwei Yao <zhongwei.yao@arm.com>.

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7751 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/BUILD.gn b/webrtc/BUILD.gn
index 3f337fd..ea14644 100644
--- a/webrtc/BUILD.gn
+++ b/webrtc/BUILD.gn
@@ -122,15 +122,21 @@
 
   if (cpu_arch == "arm64") {
     defines += [ "WEBRTC_ARCH_ARM" ]
+    # TODO(zhongwei) Defining an unique WEBRTC_NEON and
+    # distinguishing ARMv7 NEON and ARM64 NEON by
+    # WEBRTC_ARCH_ARM_V7 and WEBRTC_ARCH_ARM64 should be better.
+
+    # This macro is used to distinguish ARMv7 NEON and ARM64 NEON
+    defines += [ "WEBRTC_ARCH_ARM64_NEON" ]
   }
 
   if (cpu_arch == "arm") {
     defines += [ "WEBRTC_ARCH_ARM" ]
-    if (arm_version == 7) {
+    if (arm_version >= 7) {
       defines += [ "WEBRTC_ARCH_ARM_V7" ]
       if (arm_use_neon) {
         defines += [ "WEBRTC_ARCH_ARM_NEON" ]
-      } else {
+      } else if (is_android) {
         defines += [ "WEBRTC_DETECT_ARM_NEON" ]
       }
     }
diff --git a/webrtc/build/arm_neon.gypi b/webrtc/build/arm_neon.gypi
index 30d040b..037dd70 100644
--- a/webrtc/build/arm_neon.gypi
+++ b/webrtc/build/arm_neon.gypi
@@ -24,7 +24,14 @@
     '-mfpu=vfpv3-d16',
   ],
   'cflags': [
-    '-mfpu=neon',
     '-flax-vector-conversions',
   ],
+  'conditions': [
+    # "-mfpu=neon" is not requried for arm64 in GCC.
+    ['target_arch!="arm64"', {
+      'cflags': [
+        '-mfpu=neon',
+       ],
+    }],
+  ],
 }
diff --git a/webrtc/build/common.gypi b/webrtc/build/common.gypi
index 6c3cc7b..307686d 100644
--- a/webrtc/build/common.gypi
+++ b/webrtc/build/common.gypi
@@ -153,7 +153,7 @@
         'build_libjpeg%': 0,
         'enable_protobuf%': 0,
       }],
-      ['target_arch=="arm" or target_arch=="armv7"', {
+      ['target_arch=="arm" or target_arch=="armv7" or target_arch=="arm64"', {
         'prefer_fixed_point%': 1,
       }],
       ['OS!="ios" and (target_arch!="arm" or arm_version>=7)', {
@@ -210,7 +210,7 @@
       }, {
         'conditions': [
           ['os_posix==1', {
-	    'configurations': {
+            'configurations': {
               'Debug_Base': {
                 'defines': [
                   # Chromium's build/common.gypi defines this for all posix
@@ -254,6 +254,12 @@
       ['target_arch=="arm64"', {
         'defines': [
           'WEBRTC_ARCH_ARM',
+          # TODO(zhongwei) Defining an unique WEBRTC_NEON and
+          # distinguishing ARMv7 NEON and ARM64 NEON by
+          # WEBRTC_ARCH_ARM_V7 and WEBRTC_ARCH_ARM64 should be better.
+
+          # This macro is used to distinguish ARMv7 NEON and ARM64 NEON
+          'WEBRTC_ARCH_ARM64_NEON',
         ],
       }],
       ['target_arch=="arm" or target_arch=="armv7"', {
@@ -261,12 +267,13 @@
           'WEBRTC_ARCH_ARM',
         ],
         'conditions': [
-          ['arm_version==7', {
+          ['arm_version>=7', {
             'defines': ['WEBRTC_ARCH_ARM_V7',],
             'conditions': [
               ['arm_neon==1', {
                 'defines': ['WEBRTC_ARCH_ARM_NEON',],
-              }, {
+              }],
+              ['arm_neon==0 and OS=="android"', {
                 'defines': ['WEBRTC_DETECT_ARM_NEON',],
               }],
             ],
diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn
index 8f5186b..49b5627 100644
--- a/webrtc/modules/audio_processing/BUILD.gn
+++ b/webrtc/modules/audio_processing/BUILD.gn
@@ -129,7 +129,7 @@
     deps += [ ":audio_processing_sse2" ]
   }
 
-  if (rtc_build_armv7_neon) {
+  if (rtc_build_armv7_neon || cpu_arch == "arm64") {
     deps += [ ":audio_processing_neon" ]
   }
 
@@ -187,11 +187,13 @@
   }
 }
 
-if (rtc_build_armv7_neon) {
+if (rtc_build_armv7_neon || cpu_arch == "arm64") {
   source_set("audio_processing_neon") {
     sources = [
       "aec/aec_core_neon.c",
       "aec/aec_rdft_neon.c",
+      "aecm/aecm_core_neon.c",
+      "ns/nsx_core_neon.c",
     ]
 
     configs += [ "../..:common_config" ]
@@ -199,21 +201,6 @@
 
     deps = [ "../../common_audio" ]
 
-    if (is_android || is_ios) {
-      sources += [
-        # TODO(andrew): Re-enable these once webrtc:3580 is resolved.
-        #"aecm/aecm_core_neon.S",
-        #"ns/nsx_core_neon.S",
-      ]
-
-      include_dirs = [ target_out_dir ]
-    } else {
-      sources += [
-        "aecm/aecm_core_neon.c",
-        "ns/nsx_core_neon.c",
-      ]
-    }
-
     # Enable compilation for the ARM v7 Neon instruction set. This is needed
     # since //build/config/arm.gni only enables Neon for iOS, not Android.
     # This provides the same functionality as webrtc/build/arm_neon.gypi.
@@ -223,9 +210,13 @@
     configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
     cflags = [
       "-flax-vector-conversions",
-      "-mfpu=neon",
     ]
 
+    # "-mfpu=neon" is not requried for arm64 in GCC.
+    if (cpu_arch != "arm64") {
+       cflags += [ "-mfpu=neon" ]
+    }
+
     # Disable LTO in audio_processing_neon target due to compiler bug.
     if (rtc_use_lto) {
       cflags -= [
diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c
index a21a954..f2b8a88 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_neon.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c
@@ -77,6 +77,8 @@
   }
 }
 
+// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32.
+#if !defined (WEBRTC_ARCH_ARM64_NEON)
 static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
   int i;
   float32x4_t x = vrecpeq_f32(b);
@@ -119,6 +121,8 @@
   return vmulq_f32(s, x);;
 }
 
+#endif  // WEBRTC_ARCH_ARM64_NEON
+
 static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
   const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
   const float error_threshold = aec->extended_filter_enabled ?
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.c b/webrtc/modules/audio_processing/aecm/aecm_core.c
index 03a0ecf..fc590f7 100644
--- a/webrtc/modules/audio_processing/aecm/aecm_core.c
+++ b/webrtc/modules/audio_processing/aecm/aecm_core.c
@@ -378,7 +378,8 @@
 }
 
 // Initialize function pointers for ARM Neon platform.
-#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
+#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON || \
+     defined WEBRTC_ARCH_ARM64_NEON)
 static void WebRtcAecm_InitNeon(void)
 {
   WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon;
@@ -532,7 +533,7 @@
     {
       WebRtcAecm_InitNeon();
     }
-#elif defined(WEBRTC_ARCH_ARM_NEON)
+#elif defined(WEBRTC_ARCH_ARM_NEON) || defined(WEBRTC_ARCH_ARM64_NEON)
     WebRtcAecm_InitNeon();
 #endif
 
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.h b/webrtc/modules/audio_processing/aecm/aecm_core.h
index e56ede6..a7f2695 100644
--- a/webrtc/modules/audio_processing/aecm/aecm_core.h
+++ b/webrtc/modules/audio_processing/aecm/aecm_core.h
@@ -416,7 +416,8 @@
 // For the above function pointers, functions for generic platforms are declared
 // and defined as static in file aecm_core.c, while those for ARM Neon platforms
 // are declared below and defined in file aecm_core_neon.s.
-#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
+#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON) || \
+     defined (WEBRTC_ARCH_ARM64_NEON)
 void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
                                        const uint16_t* far_spectrum,
                                        int32_t* echo_est,
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_neon.c b/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
index ff733b7..bc1b77b 100644
--- a/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
+++ b/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
@@ -43,6 +43,7 @@
   3172,  2780,  2386,  1990, 1594,  1196,  798,   399
 };
 
+#ifndef WEBRTC_ARCH_ARM64_NEON
 void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm,
                                  int16_t* fft,
                                  const int16_t* time_signal,
@@ -226,6 +227,7 @@
     }
   }
 }
+#endif //WEBRTC_ARCH_ARM64_NEON
 
 static inline void AddLanes(uint32_t* ptr, uint32x4_t v) {
 #if defined(__aarch64__)
diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi
index 3ac87b8..b2c9bb4 100644
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi
@@ -138,7 +138,7 @@
         ['target_arch=="ia32" or target_arch=="x64"', {
           'dependencies': ['audio_processing_sse2',],
         }],
-        ['(target_arch=="arm" and arm_version==7) or target_arch=="armv7"', {
+        ['(target_arch=="arm" and arm_version==7) or target_arch=="armv7" or target_arch=="arm64"', {
           'dependencies': ['audio_processing_neon',],
         }],
         ['target_arch=="mipsel" and mips_arch_variant!="r6" and android_webview_build==0', {
@@ -197,7 +197,7 @@
         },
       ],
     }],
-    ['(target_arch=="arm" and arm_version==7) or target_arch=="armv7"', {
+    ['(target_arch=="arm" and arm_version==7) or target_arch=="armv7" or target_arch=="arm64"', {
       'targets': [{
         'target_name': 'audio_processing_neon',
         'type': 'static_library',
@@ -212,7 +212,7 @@
           'ns/nsx_core_neon.c',
         ],
         'conditions': [
-          ['OS=="android" or OS=="ios"', {
+          ['(OS=="android" or OS=="ios") and target_arch!="arm64"', {
             'dependencies': [
               '<(gen_core_neon_offsets_gyp):*',
             ],
diff --git a/webrtc/modules/audio_processing/ns/nsx_core.c b/webrtc/modules/audio_processing/ns/nsx_core.c
index 28e9dc5..05efa3a 100644
--- a/webrtc/modules/audio_processing/ns/nsx_core.c
+++ b/webrtc/modules/audio_processing/ns/nsx_core.c
@@ -557,7 +557,8 @@
 Denormalize WebRtcNsx_Denormalize;
 NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
 
-#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
+#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON || \
+     defined WEBRTC_ARCH_ARM64_NEON)
 // Initialize function pointers for ARM Neon platform.
 static void WebRtcNsx_InitNeon(void) {
   WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
@@ -775,7 +776,7 @@
   if ((features & kCPUFeatureNEON) != 0) {
       WebRtcNsx_InitNeon();
   }
-#elif defined(WEBRTC_ARCH_ARM_NEON)
+#elif defined(WEBRTC_ARCH_ARM_NEON) || defined(WEBRTC_ARCH_ARM64_NEON)
   WebRtcNsx_InitNeon();
 #endif
 
diff --git a/webrtc/modules/audio_processing/ns/nsx_core.h b/webrtc/modules/audio_processing/ns/nsx_core.h
index 5b3c5e7..9a619b4 100644
--- a/webrtc/modules/audio_processing/ns/nsx_core.h
+++ b/webrtc/modules/audio_processing/ns/nsx_core.h
@@ -218,10 +218,11 @@
                                uint32_t* priorLocSnr,
                                uint32_t* postLocSnr);
 
-#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
+#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON || \
+     defined WEBRTC_ARCH_ARM64_NEON)
 // For the above function pointers, functions for generic platforms are declared
 // and defined as static in file nsx_core.c, while those for ARM Neon platforms
-// are declared below and defined in file nsx_core_neon.S.
+// are declared below and defined in file nsx_core_neon.c.
 void WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst,
                                    uint16_t* magn,
                                    uint32_t* noise,