Upgrade libopus to 794392ecd77e6fc6aafa62c3f6002780abcc2c7c
Test: make
Change-Id: I3e5232d6c6d250c69abfa71482ac91e30c1933b6
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..b9ad434
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,351 @@
+cc_library {
+ name: "libopus",
+ vendor_available: true,
+ host_supported: true,
+
+ export_include_dirs: ["include"],
+
+ local_include_dirs: [
+ "src",
+ "silk",
+ "celt",
+ "silk/fixed",
+ ],
+
+ sanitize: {
+ integer_overflow: true,
+ misc_undefined: ["bounds"],
+ blocklist: "libopus_blocklist.txt",
+ },
+
+ srcs: [
+ // CELT_SOURCES
+ "celt/bands.c",
+ "celt/celt.c",
+ "celt/celt_encoder.c",
+ "celt/celt_decoder.c",
+ "celt/cwrs.c",
+ "celt/entcode.c",
+ "celt/entdec.c",
+ "celt/entenc.c",
+ "celt/kiss_fft.c",
+ "celt/laplace.c",
+ "celt/mathops.c",
+ "celt/mdct.c",
+ "celt/modes.c",
+ "celt/pitch.c",
+ "celt/celt_lpc.c",
+ "celt/quant_bands.c",
+ "celt/rate.c",
+ "celt/vq.c",
+
+ // SILK_SOURCES
+ "silk/CNG.c",
+ "silk/code_signs.c",
+ "silk/init_decoder.c",
+ "silk/decode_core.c",
+ "silk/decode_frame.c",
+ "silk/decode_parameters.c",
+ "silk/decode_indices.c",
+ "silk/decode_pulses.c",
+ "silk/decoder_set_fs.c",
+ "silk/dec_API.c",
+ "silk/enc_API.c",
+ "silk/encode_indices.c",
+ "silk/encode_pulses.c",
+ "silk/gain_quant.c",
+ "silk/interpolate.c",
+ "silk/LP_variable_cutoff.c",
+ "silk/NLSF_decode.c",
+ "silk/NSQ.c",
+ "silk/NSQ_del_dec.c",
+ "silk/PLC.c",
+ "silk/shell_coder.c",
+ "silk/tables_gain.c",
+ "silk/tables_LTP.c",
+ "silk/tables_NLSF_CB_NB_MB.c",
+ "silk/tables_NLSF_CB_WB.c",
+ "silk/tables_other.c",
+ "silk/tables_pitch_lag.c",
+ "silk/tables_pulses_per_block.c",
+ "silk/VAD.c",
+ "silk/control_audio_bandwidth.c",
+ "silk/quant_LTP_gains.c",
+ "silk/VQ_WMat_EC.c",
+ "silk/HP_variable_cutoff.c",
+ "silk/NLSF_encode.c",
+ "silk/NLSF_VQ.c",
+ "silk/NLSF_unpack.c",
+ "silk/NLSF_del_dec_quant.c",
+ "silk/process_NLSFs.c",
+ "silk/stereo_LR_to_MS.c",
+ "silk/stereo_MS_to_LR.c",
+ "silk/check_control_input.c",
+ "silk/control_SNR.c",
+ "silk/init_encoder.c",
+ "silk/control_codec.c",
+ "silk/A2NLSF.c",
+ "silk/ana_filt_bank_1.c",
+ "silk/biquad_alt.c",
+ "silk/bwexpander_32.c",
+ "silk/bwexpander.c",
+ "silk/debug.c",
+ "silk/decode_pitch.c",
+ "silk/inner_prod_aligned.c",
+ "silk/lin2log.c",
+ "silk/log2lin.c",
+ "silk/LPC_analysis_filter.c",
+ "silk/LPC_fit.c",
+ "silk/LPC_inv_pred_gain.c",
+ "silk/table_LSF_cos.c",
+ "silk/NLSF2A.c",
+ "silk/NLSF_stabilize.c",
+ "silk/NLSF_VQ_weights_laroia.c",
+ "silk/pitch_est_tables.c",
+ "silk/resampler.c",
+ "silk/resampler_down2_3.c",
+ "silk/resampler_down2.c",
+ "silk/resampler_private_AR2.c",
+ "silk/resampler_private_down_FIR.c",
+ "silk/resampler_private_IIR_FIR.c",
+ "silk/resampler_private_up2_HQ.c",
+ "silk/resampler_rom.c",
+ "silk/sigm_Q15.c",
+ "silk/sort.c",
+ "silk/sum_sqr_shift.c",
+ "silk/stereo_decode_pred.c",
+ "silk/stereo_encode_pred.c",
+ "silk/stereo_find_predictor.c",
+ "silk/stereo_quant_pred.c",
+
+ // SILK_SOURCES_FIXED
+ "silk/fixed/LTP_analysis_filter_FIX.c",
+ "silk/fixed/LTP_scale_ctrl_FIX.c",
+ "silk/fixed/corrMatrix_FIX.c",
+ "silk/fixed/encode_frame_FIX.c",
+ "silk/fixed/find_LPC_FIX.c",
+ "silk/fixed/find_LTP_FIX.c",
+ "silk/fixed/find_pitch_lags_FIX.c",
+ "silk/fixed/find_pred_coefs_FIX.c",
+ "silk/fixed/noise_shape_analysis_FIX.c",
+ "silk/fixed/process_gains_FIX.c",
+ "silk/fixed/regularize_correlations_FIX.c",
+ "silk/fixed/residual_energy16_FIX.c",
+ "silk/fixed/residual_energy_FIX.c",
+ "silk/fixed/warped_autocorrelation_FIX.c",
+ "silk/fixed/apply_sine_window_FIX.c",
+ "silk/fixed/autocorr_FIX.c",
+ "silk/fixed/burg_modified_FIX.c",
+ "silk/fixed/k2a_FIX.c",
+ "silk/fixed/k2a_Q16_FIX.c",
+ "silk/fixed/pitch_analysis_core_FIX.c",
+ "silk/fixed/vector_ops_FIX.c",
+ "silk/fixed/schur64_FIX.c",
+ "silk/fixed/schur_FIX.c",
+
+ // OPUS_SOURCES
+ "src/mapping_matrix.c",
+ "src/opus.c",
+ "src/opus_decoder.c",
+ "src/opus_encoder.c",
+ "src/opus_multistream.c",
+ "src/opus_multistream_encoder.c",
+ "src/opus_multistream_decoder.c",
+ "src/opus_projection_encoder.c",
+ "src/opus_projection_decoder.c",
+ "src/repacketizer.c",
+
+ // OPUS_SOURCES_FLOAT
+ "src/analysis.c",
+ "src/mlp.c",
+ "src/mlp_data.c",
+ ],
+
+ cflags: [
+ "-DNULL=0",
+ "-DSOCKLEN_T=socklen_t",
+ "-DLOCALE_NOT_USED",
+ "-D_LARGEFILE_SOURCE=1",
+ "-D_FILE_OFFSET_BITS=64",
+ "-Drestrict=",
+ "-D__EMX__",
+ "-DOPUS_BUILD",
+ "-DFIXED_POINT",
+ "-DUSE_ALLOCA",
+ "-DHAVE_LRINT",
+ "-DHAVE_LRINTF",
+ "-DENABLE_HARDENING",
+ "-O2",
+ "-fno-math-errno",
+ "-Wall",
+ "-Werror",
+ ],
+ cppflags: [
+ "-DBSD=1",
+ "-ffast-math",
+ "-O2",
+ "-funroll-loops",
+ ],
+
+ arch: {
+ arm: {
+ srcs: [
+ // CELT_SOURCES_ARM
+ "celt/arm/armcpu.c",
+ "celt/arm/arm_celt_map.c",
+
+ // DSP, MEDIA and NEON instructions are in the same assembler
+ // file - thus we need to include it even if NEON is not
+ // supported on target platform.
+ // CELT_SOURCES_ARM_ASM
+ "celt/arm/celt_pitch_xcorr_arm_gnu.s",
+
+ // CELT_AM_SOURCES_ARM_ASM
+ "celt/arm/armopts_gnu.s",
+ ],
+
+ cflags: [
+ "-DOPUS_ARM_ASM",
+ "-DOPUS_ARM_INLINE_ASM",
+ "-DOPUS_ARM_MAY_HAVE_EDSP",
+ "-DOPUS_ARM_INLINE_EDSP",
+ "-DOPUS_ARM_MAY_HAVE_MEDIA",
+ "-DOPUS_ARM_INLINE_MEDIA",
+ "-DOPUS_ARM_MAY_HAVE_NEON",
+ "-DOPUS_HAVE_RTCD",
+ ],
+
+ // Note: OPUS enhanced DSP/NEON implementation is not yet
+ // compatible with arm64. Only add the appropriate defines for
+ // 32-bit arm architecture.
+ neon: {
+ srcs: [
+ // CELT_SOURCES_ARM_NEON_INTR
+ "celt/arm/celt_neon_intr.c",
+ "celt/arm/pitch_neon_intr.c",
+
+ // SILK_SOURCES_ARM_NEON_INTR,
+ "silk/arm/arm_silk_map.c",
+ "silk/arm/biquad_alt_neon_intr.c",
+ "silk/arm/LPC_inv_pred_gain_neon_intr.c",
+ "silk/arm/NSQ_del_dec_neon_intr.c",
+ "silk/arm/NSQ_neon.c",
+
+ // SILK_SOURCES_FIXED_ARM_NEON_INTR,
+ "silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c",
+ ],
+
+ cflags: [
+ "-DOPUS_ARM_MAY_HAVE_NEON",
+ "-DOPUS_ARM_MAY_HAVE_NEON_INTR",
+ "-DOPUS_ARM_PRESUME_NEON",
+ "-DOPUS_ARM_INLINE_NEON",
+ ],
+ },
+
+ },
+
+ x86: {
+ ssse3: {
+ srcs: [
+ // CELT_SOURCES_SSE
+ "celt/x86/x86cpu.c",
+ "celt/x86/x86_celt_map.c",
+ "celt/x86/pitch_sse.c",
+
+ // CELT_SOURCES_SSE2
+ "celt/x86/pitch_sse2.c",
+ "celt/x86/vq_sse2.c",
+ ],
+
+ cflags: [
+ "-DOPUS_X86_MAY_HAVE_SSE",
+ "-DOPUS_X86_PRESUME_SSE",
+ "-DOPUS_X86_MAY_HAVE_SSE2",
+ "-DOPUS_X86_PRESUME_SSE2",
+ ],
+ },
+
+ sse4_1: {
+ srcs: [
+ // CELT_SOURCES_SSE4_1
+ "celt/x86/celt_lpc_sse4_1.c",
+ "celt/x86/pitch_sse4_1.c",
+
+ // SILK_SOURCES_SSE4_1
+ "silk/x86/NSQ_sse4_1.c",
+ "silk/x86/NSQ_del_dec_sse4_1.c",
+ "silk/x86/x86_silk_map.c",
+ "silk/x86/VAD_sse4_1.c",
+ "silk/x86/VQ_WMat_EC_sse4_1.c",
+
+ // SILK_SOURCES_FIXED_SSE4_1
+ "silk/fixed/x86/vector_ops_FIX_sse4_1.c",
+ "silk/fixed/x86/burg_modified_FIX_sse4_1.c",
+ ],
+
+ cflags: [
+ "-DOPUS_X86_MAY_HAVE_SSE4_1",
+ "-DOPUS_X86_PRESUME_SSE4_1",
+ ],
+ },
+ },
+
+ x86_64: {
+ ssse3: {
+ srcs: [
+ // CELT_SOURCES_SSE
+ "celt/x86/x86cpu.c",
+ "celt/x86/x86_celt_map.c",
+ "celt/x86/pitch_sse.c",
+
+ // CELT_SOURCES_SSE2
+ "celt/x86/pitch_sse2.c",
+ "celt/x86/vq_sse2.c",
+ ],
+
+ cflags: [
+ "-DOPUS_X86_MAY_HAVE_SSE",
+ "-DOPUS_X86_PRESUME_SSE",
+ "-DOPUS_X86_MAY_HAVE_SSE2",
+ "-DOPUS_X86_PRESUME_SSE2",
+ ],
+ },
+
+ sse4_1: {
+ srcs: [
+ // CELT_SOURCES_SSE4_1
+ "celt/x86/celt_lpc_sse4_1.c",
+ "celt/x86/pitch_sse4_1.c",
+
+ // SILK_SOURCES_SSE4_1
+ "silk/x86/NSQ_sse4_1.c",
+ "silk/x86/NSQ_del_dec_sse4_1.c",
+ "silk/x86/x86_silk_map.c",
+ "silk/x86/VAD_sse4_1.c",
+ "silk/x86/VQ_WMat_EC_sse4_1.c",
+
+ // SILK_SOURCES_FIXED_SSE4_1
+ "silk/fixed/x86/vector_ops_FIX_sse4_1.c",
+ "silk/fixed/x86/burg_modified_FIX_sse4_1.c",
+ ],
+
+ cflags: [
+ "-DOPUS_X86_MAY_HAVE_SSE4_1",
+ "-DOPUS_X86_PRESUME_SSE4_1",
+ ],
+ },
+ },
+ },
+
+ target: {
+ darwin: {
+ enabled: false,
+ },
+ },
+ apex_available: [
+ "//apex_available:platform", // used by libstagefright_soft_opusdec
+ "com.android.media.swcodec",
+ ],
+ min_sdk_version: "29",
+}
diff --git a/CleanSpec.mk b/CleanSpec.mk
new file mode 100644
index 0000000..d531442
--- /dev/null
+++ b/CleanSpec.mk
@@ -0,0 +1,49 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# If you don't need to do a full clean build but would like to touch
+# a file or delete some intermediate files, add a clean step to the end
+# of the list. These steps will only be run once, if they haven't been
+# run before.
+#
+# E.g.:
+# $(call add-clean-step, touch -c external/sqlite/sqlite3.h)
+# $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libz_intermediates)
+#
+# Always use "touch -c" and "rm -f" or "rm -rf" to gracefully deal with
+# files that are missing or have been moved.
+#
+# Use $(PRODUCT_OUT) to get to the "out/target/product/blah/" directory.
+# Use $(OUT_DIR) to refer to the "out" directory.
+#
+# If you need to re-do something that's already mentioned, just copy
+# the command and add it to the bottom of the list. E.g., if a change
+# that you made last week required touching a file and a change you
+# made today requires touching the same file, just copy the old
+# touch step and add it to the end of the list.
+#
+# ************************************************
+# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
+# ************************************************
+
+# For example:
+#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/APPS/AndroidTests_intermediates)
+#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/JAVA_LIBRARIES/core_intermediates)
+#$(call add-clean-step, find $(OUT_DIR) -type f -name "IGTalkSession*" -print0 | xargs -0 rm -f)
+#$(call add-clean-step, rm -rf $(PRODUCT_OUT)/data/*)
+
+# ************************************************
+# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
+# ************************************************
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..eb54eee
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,15 @@
+name: "libopus"
+description: "Android fork of the opus library."
+third_party {
+ url {
+ type: GIT
+ value: "https://gitlab.xiph.org/xiph/opus.git"
+ }
+ version: "794392ecd77e6fc6aafa62c3f6002780abcc2c7c"
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2021
+ month: 1
+ day: 5
+ }
+}
diff --git a/MODULE_LICENSE_BSD b/MODULE_LICENSE_BSD
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_BSD
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000..108afb2
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,27 @@
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+- Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..229a641
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,4 @@
+# Default code reviewers picked from top 3 or more developers.
+# Please update this list if you find better candidates.
+flim@google.com
+essick@google.com
diff --git a/PREUPLOAD.cfg b/PREUPLOAD.cfg
new file mode 100644
index 0000000..ecf8b8e
--- /dev/null
+++ b/PREUPLOAD.cfg
@@ -0,0 +1,2 @@
+[Hook Scripts]
+mainline_hook = ${REPO_ROOT}/frameworks/av/tools/mainline_hook_project.sh
diff --git a/README.android b/README.android
new file mode 100644
index 0000000..06fdc0e
--- /dev/null
+++ b/README.android
@@ -0,0 +1,7 @@
+* current source is based on libopus 1.3 (https://git.xiph.org/?p=opus.git;a=snapshot;h=83d5155f151ca47c9d6274ded1a7481f746b9a43;sf=tgz)
+* libopus is BSD-licensed - http://www.opus-codec.org/license/
+
+Updating:
+* Run "convert_android_asm.sh" from the root of the library (external/libopus).
+ This uses 'arm2gnu.pl' included in libopus to convert ARM ASM files to GNU ASM
+ files for building under the Android NDK.
diff --git a/README.version b/README.version
new file mode 100644
index 0000000..515fea8
--- /dev/null
+++ b/README.version
@@ -0,0 +1,7 @@
+This commit is upto-date with following the commit in upstream-master
+Branch: upstream-master
+commit: b83dd52868326a401c8578041e3dbea439d53f11
+
+upstream-master is in sync with upstream project at
+URL: https://gitlab.xiph.org/xiph/opus.git
+
diff --git a/celt/arm/armopts_gnu.s b/celt/arm/armopts_gnu.s
new file mode 100644
index 0000000..c7082fc
--- /dev/null
+++ b/celt/arm/armopts_gnu.s
@@ -0,0 +1,38 @@
+ .syntax unified
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES@ LOSS OF USE, @ DATA, OR
+ PROFITS@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN .if ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+@ Set the following to 1 if we have EDSP instructions
+@ (LDRD/STRD, etc., ARMv5E and later).
+ .set OPUS_ARM_MAY_HAVE_EDSP, 1
+
+@ Set the following to 1 if we have ARMv6 media instructions.
+ .set OPUS_ARM_MAY_HAVE_MEDIA, 1
+
+@ Set the following to 1 if we have NEON (some ARMv7)
+ .set OPUS_ARM_MAY_HAVE_NEON, 1
+
+@ END:
diff --git a/celt/arm/celt_pitch_xcorr_arm_gnu.s b/celt/arm/celt_pitch_xcorr_arm_gnu.s
new file mode 100644
index 0000000..31b0c65
--- /dev/null
+++ b/celt/arm/celt_pitch_xcorr_arm_gnu.s
@@ -0,0 +1,555 @@
+ .syntax unified
+@ Copyright (c) 2007-2008 CSIRO
+@ Copyright (c) 2007-2009 Xiph.Org Foundation
+@ Copyright (c) 2013 Parrot
+@ Written by Aurélien Zanelli
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions
+@ are met:
+@
+@ - Redistributions of source code must retain the above copyright
+@ notice, this list of conditions and the following disclaimer.
+@
+@ - Redistributions in binary form must reproduce the above copyright
+@ notice, this list of conditions and the following disclaimer in the
+@ documentation and/or other materials provided with the distribution.
+@
+@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ .text; .p2align 2; .arch armv7-a
+ .fpu neon
+ .object_arch armv4t
+
+ .include "celt/arm/armopts_gnu.s"
+
+ .if OPUS_ARM_MAY_HAVE_EDSP
+ .global celt_pitch_xcorr_edsp
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_NEON
+ .global celt_pitch_xcorr_neon
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_NEON
+
+@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+ .type xcorr_kernel_neon, %function; xcorr_kernel_neon: @ PROC
+xcorr_kernel_neon_start:
+ @ input:
+ @ r3 = int len
+ @ r4 = opus_val16 *x
+ @ r5 = opus_val16 *y
+ @ q0 = opus_val32 sum[4]
+ @ output:
+ @ q0 = opus_val32 sum[4]
+ @ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+ @ internal usage:
+ @ r12 = int j
+ @ d3 = y_3|y_2|y_1|y_0
+ @ q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+ @ q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+ @ q8 = scratch
+ @
+ @ Load y[0...3]
+ @ This requires len>0 to always be valid (which we assert in the C code).
+ VLD1.16 {d5}, [r5]!
+ SUBS r12, r3, #8
+ BLE xcorr_kernel_neon_process4
+@ Process 8 samples at a time.
+@ This loop loads one y value more than we actually need. Therefore we have to
+@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+@ reading past the end of the array.
+xcorr_kernel_neon_process8:
+ @ This loop has 19 total instructions (10 cycles to issue, minimum), with
+ @ - 2 cycles of ARM insrtuctions,
+ @ - 10 cycles of load/store/byte permute instructions, and
+ @ - 9 cycles of data processing instructions.
+ @ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+ @ latter two categories, meaning the whole loop should run in 10 cycles per
+ @ iteration, barring cache misses.
+ @
+ @ Load x[0...7]
+ VLD1.16 {d6, d7}, [r4]!
+ @ Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+ @ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+ VAND d3, d5, d5
+ SUBS r12, r12, #8
+ @ Load y[4...11]
+ VLD1.16 {d4, d5}, [r5]!
+ VMLAL.S16 q0, d3, d6[0]
+ VEXT.16 d16, d3, d4, #1
+ VMLAL.S16 q0, d4, d7[0]
+ VEXT.16 d17, d4, d5, #1
+ VMLAL.S16 q0, d16, d6[1]
+ VEXT.16 d16, d3, d4, #2
+ VMLAL.S16 q0, d17, d7[1]
+ VEXT.16 d17, d4, d5, #2
+ VMLAL.S16 q0, d16, d6[2]
+ VEXT.16 d16, d3, d4, #3
+ VMLAL.S16 q0, d17, d7[2]
+ VEXT.16 d17, d4, d5, #3
+ VMLAL.S16 q0, d16, d6[3]
+ VMLAL.S16 q0, d17, d7[3]
+ BGT xcorr_kernel_neon_process8
+@ Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4:
+ ADDS r12, r12, #4
+ BLE xcorr_kernel_neon_process2
+ @ Load x[0...3]
+ VLD1.16 d6, [r4]!
+ @ Use VAND since it's a data processing instruction again.
+ VAND d4, d5, d5
+ SUB r12, r12, #4
+ @ Load y[4...7]
+ VLD1.16 d5, [r5]!
+ VMLAL.S16 q0, d4, d6[0]
+ VEXT.16 d16, d4, d5, #1
+ VMLAL.S16 q0, d16, d6[1]
+ VEXT.16 d16, d4, d5, #2
+ VMLAL.S16 q0, d16, d6[2]
+ VEXT.16 d16, d4, d5, #3
+ VMLAL.S16 q0, d16, d6[3]
+@ Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2:
+ ADDS r12, r12, #2
+ BLE xcorr_kernel_neon_process1
+ @ Load x[0...1]
+ VLD2.16 {d6[],d7[]}, [r4]!
+ @ Use VAND since it's a data processing instruction again.
+ VAND d4, d5, d5
+ SUB r12, r12, #2
+ @ Load y[4...5]
+ VLD1.32 {d5[]}, [r5]!
+ VMLAL.S16 q0, d4, d6
+ VEXT.16 d16, d4, d5, #1
+ @ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+ @ instead of VEXT, since it's a data-processing instruction.
+ VSRI.64 d5, d4, #32
+ VMLAL.S16 q0, d16, d7
+@ Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1:
+ @ Load next *x
+ VLD1.16 {d6[]}, [r4]!
+ ADDS r12, r12, #1
+ @ y[0...3] are left in d5 from prior iteration(s) (if any)
+ VMLAL.S16 q0, d5, d6
+ MOVLE pc, lr
+@ Now process 1 last sample, not reading ahead.
+ @ Load last *y
+ VLD1.16 {d4[]}, [r5]!
+ VSRI.64 d4, d5, #16
+ @ Load last *x
+ VLD1.16 {d6[]}, [r4]!
+ VMLAL.S16 q0, d4, d6
+ MOV pc, lr
+ .size xcorr_kernel_neon, .-xcorr_kernel_neon @ ENDP
+
+@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+@ opus_val32 *xcorr, int len, int max_pitch, int arch)
+ .type celt_pitch_xcorr_neon, %function; celt_pitch_xcorr_neon: @ PROC
+ @ input:
+ @ r0 = opus_val16 *_x
+ @ r1 = opus_val16 *_y
+ @ r2 = opus_val32 *xcorr
+ @ r3 = int len
+ @ output:
+ @ r0 = int maxcorr
+ @ internal usage:
+ @ r4 = opus_val16 *x (for xcorr_kernel_neon())
+ @ r5 = opus_val16 *y (for xcorr_kernel_neon())
+ @ r6 = int max_pitch
+ @ r12 = int j
+ @ q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+ @ ignored:
+ @ int arch
+ STMFD sp!, {r4-r6, lr}
+ LDR r6, [sp, #16]
+ VMOV.S32 q15, #1
+ @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+ SUBS r6, r6, #4
+ BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4:
+ @ xcorr_kernel_neon parameters:
+ @ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+ MOV r4, r0
+ MOV r5, r1
+ VEOR q0, q0, q0
+ @ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+ @ So we don't save/restore any other registers.
+ BL xcorr_kernel_neon_start
+ SUBS r6, r6, #4
+ VST1.32 {q0}, [r2]!
+ @ _y += 4
+ ADD r1, r1, #8
+ VMAX.S32 q15, q15, q0
+ @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+ BGE celt_pitch_xcorr_neon_process4
+@ We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done:
+ ADDS r6, r6, #4
+ @ Reduce maxcorr to a single value
+ VMAX.S32 d30, d30, d31
+ VPMAX.S32 d30, d30, d30
+ @ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+ BLE celt_pitch_xcorr_neon_done
+@ Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining:
+ MOV r4, r0
+ MOV r5, r1
+ VMOV.I32 q0, #0
+ SUBS r12, r3, #8
+ BLT celt_pitch_xcorr_neon_process_remaining4
+@ Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8:
+ @ Load x[0...7]
+ VLD1.16 {q1}, [r4]!
+ @ Load y[0...7]
+ VLD1.16 {q2}, [r5]!
+ SUBS r12, r12, #8
+ VMLAL.S16 q0, d4, d2
+ VMLAL.S16 q0, d5, d3
+ BGE celt_pitch_xcorr_neon_process_remaining_loop8
+@ Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4:
+ ADDS r12, r12, #4
+ BLT celt_pitch_xcorr_neon_process_remaining4_done
+ @ Load x[0...3]
+ VLD1.16 {d2}, [r4]!
+ @ Load y[0...3]
+ VLD1.16 {d3}, [r5]!
+ SUB r12, r12, #4
+ VMLAL.S16 q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done:
+ @ Reduce the sum to a single value.
+ VADD.S32 d0, d0, d1
+ VPADDL.S32 d0, d0
+ ADDS r12, r12, #4
+ BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+@ Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1:
+ VLD1.16 {d2[]}, [r4]!
+ VLD1.16 {d3[]}, [r5]!
+ SUBS r12, r12, #1
+ VMLAL.S16 q0, d2, d3
+ BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done:
+ VST1.32 {d0[0]}, [r2]!
+ VMAX.S32 d30, d30, d0
+ SUBS r6, r6, #1
+ @ _y++
+ ADD r1, r1, #2
+ @ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+ BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done:
+ VMOV.32 r0, d30[0]
+ LDMFD sp!, {r4-r6, pc}
+ .size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon @ ENDP
+
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_EDSP
+
+@ This will get used on ARMv7 devices without NEON, so it has been optimized
+@ to take advantage of dual-issuing where possible.
+ .type xcorr_kernel_edsp, %function; xcorr_kernel_edsp: @ PROC
+xcorr_kernel_edsp_start:
+ @ input:
+ @ r3 = int len
+ @ r4 = opus_val16 *_x (must be 32-bit aligned)
+ @ r5 = opus_val16 *_y (must be 32-bit aligned)
+ @ r6...r9 = opus_val32 sum[4]
+ @ output:
+ @ r6...r9 = opus_val32 sum[4]
+ @ preserved: r0-r5
+ @ internal usage
+ @ r2 = int j
+ @ r12,r14 = opus_val16 x[4]
+ @ r10,r11 = opus_val16 y[4]
+ STMFD sp!, {r2,r4,r5,lr}
+ LDR r10, [r5], #4 @ Load y[0...1]
+ SUBS r2, r3, #4 @ j = len-4
+ LDR r11, [r5], #4 @ Load y[2...3]
+ BLE xcorr_kernel_edsp_process4_done
+ LDR r12, [r4], #4 @ Load x[0...1]
+ @ Stall
+xcorr_kernel_edsp_process4:
+ @ The multiplies must issue from pipeline 0, and can't dual-issue with each
+ @ other. Every other instruction here dual-issues with a multiply, and is
+ @ thus "free". There should be no stalls in the body of the loop.
+ SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_0,y_0)
+ LDR r14, [r4], #4 @ Load x[2...3]
+ SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x_0,y_1)
+ SUBS r2, r2, #4 @ j-=4
+ SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_0,y_2)
+ SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x_0,y_3)
+ SMLATT r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_1,y_1)
+ LDR r10, [r5], #4 @ Load y[4...5]
+ SMLATB r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],x_1,y_2)
+ SMLATT r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_1,y_3)
+ SMLATB r9, r12, r10, r9 @ sum[3] = MAC16_16(sum[3],x_1,y_4)
+ LDRGT r12, [r4], #4 @ Load x[0...1]
+ SMLABB r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_2,y_2)
+ SMLABT r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x_2,y_3)
+ SMLABB r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_2,y_4)
+ SMLABT r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x_2,y_5)
+ SMLATT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_3,y_3)
+ LDR r11, [r5], #4 @ Load y[6...7]
+ SMLATB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],x_3,y_4)
+ SMLATT r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_3,y_5)
+ SMLATB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],x_3,y_6)
+ BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done:
+ ADDS r2, r2, #4
+ BLE xcorr_kernel_edsp_done
+ LDRH r12, [r4], #2 @ r12 = *x++
+ SUBS r2, r2, #1 @ j--
+ @ Stall
+ SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_0)
+ LDRHGT r14, [r4], #2 @ r14 = *x++
+ SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x,y_1)
+ SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_2)
+ SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x,y_3)
+ BLE xcorr_kernel_edsp_done
+ SMLABT r6, r14, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_1)
+ SUBS r2, r2, #1 @ j--
+ SMLABB r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x,y_2)
+ LDRH r10, [r5], #2 @ r10 = y_4 = *y++
+ SMLABT r8, r14, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_3)
+ LDRHGT r12, [r4], #2 @ r12 = *x++
+ SMLABB r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x,y_4)
+ BLE xcorr_kernel_edsp_done
+ SMLABB r6, r12, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_2)
+ CMP r2, #1 @ j--
+ SMLABT r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_3)
+ LDRH r2, [r5], #2 @ r2 = y_5 = *y++
+ SMLABB r8, r12, r10, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_4)
+ LDRHGT r14, [r4] @ r14 = *x
+ SMLABB r9, r12, r2, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_5)
+ BLE xcorr_kernel_edsp_done
+ SMLABT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_3)
+ LDRH r11, [r5] @ r11 = y_6 = *y
+ SMLABB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_4)
+ SMLABB r8, r14, r2, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_5)
+ SMLABB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done:
+ LDMFD sp!, {r2,r4,r5,pc}
+ .size xcorr_kernel_edsp, .-xcorr_kernel_edsp @ ENDP
+
+ .type celt_pitch_xcorr_edsp, %function; celt_pitch_xcorr_edsp: @ PROC
+ @ input:
+ @ r0 = opus_val16 *_x (must be 32-bit aligned)
+ @ r1 = opus_val16 *_y (only needs to be 16-bit aligned)
+ @ r2 = opus_val32 *xcorr
+ @ r3 = int len
+ @ output:
+ @ r0 = maxcorr
+ @ internal usage
+ @ r4 = opus_val16 *x
+ @ r5 = opus_val16 *y
+ @ r6 = opus_val32 sum0
+ @ r7 = opus_val32 sum1
+ @ r8 = opus_val32 sum2
+ @ r9 = opus_val32 sum3
+ @ r1 = int max_pitch
+ @ r12 = int j
+ @ ignored:
+ @ int arch
+ STMFD sp!, {r4-r11, lr}
+ MOV r5, r1
+ LDR r1, [sp, #36]
+ MOV r4, r0
+ TST r5, #3
+ @ maxcorr = 1
+ MOV r0, #1
+ BEQ celt_pitch_xcorr_edsp_process1u_done
+@ Compute one sum at the start to make y 32-bit aligned.
+ SUBS r12, r3, #4
+ @ r14 = sum = 0
+ MOV r14, #0
+ LDRH r8, [r5], #2
+ BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+ LDR r6, [r4], #4
+ MOV r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4:
+ LDR r9, [r5], #4
+ SMLABT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
+ LDR r7, [r4], #4
+ SMLATB r14, r6, r9, r14 @ sum = MAC16_16(sum, x_1, y_1)
+ LDR r8, [r5], #4
+ SMLABT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2)
+ SUBS r12, r12, #4 @ j-=4
+ SMLATB r14, r7, r8, r14 @ sum = MAC16_16(sum, x_3, y_3)
+ LDRGT r6, [r4], #4
+ BGT celt_pitch_xcorr_edsp_process1u_loop4
+ MOV r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done:
+ ADDS r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1:
+ LDRHGE r6, [r4], #2
+ @ Stall
+ SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y)
+ SUBSGE r12, r12, #1
+ LDRHGT r8, [r5], #2
+ BGT celt_pitch_xcorr_edsp_process1u_loop1
+ @ Restore _x
+ SUB r4, r4, r3, LSL #1
+ @ Restore and advance _y
+ SUB r5, r5, r3, LSL #1
+ @ maxcorr = max(maxcorr, sum)
+ CMP r0, r14
+ ADD r5, r5, #2
+ MOVLT r0, r14
+ SUBS r1, r1, #1
+ @ xcorr[i] = sum
+ STR r14, [r2], #4
+ BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done:
+ @ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+ SUBS r1, r1, #4
+ BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4:
+ @ xcorr_kernel_edsp parameters:
+ @ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+ MOV r6, #0
+ MOV r7, #0
+ MOV r8, #0
+ MOV r9, #0
+ BL xcorr_kernel_edsp_start @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+ @ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+ CMP r0, r6
+ @ _y+=4
+ ADD r5, r5, #8
+ MOVLT r0, r6
+ CMP r0, r7
+ MOVLT r0, r7
+ CMP r0, r8
+ MOVLT r0, r8
+ CMP r0, r9
+ MOVLT r0, r9
+ STMIA r2!, {r6-r9}
+ SUBS r1, r1, #4
+ BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2:
+ ADDS r1, r1, #2
+ BLT celt_pitch_xcorr_edsp_process1a
+ SUBS r12, r3, #4
+ @ {r10, r11} = {sum0, sum1} = {0, 0}
+ MOV r10, #0
+ MOV r11, #0
+ LDR r8, [r5], #4
+ BLE celt_pitch_xcorr_edsp_process2_loop_done
+ LDR r6, [r4], #4
+ LDR r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4:
+ SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
+ LDR r7, [r4], #4
+ SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
+ SUBS r12, r12, #4 @ j-=4
+ SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1)
+ LDR r8, [r5], #4
+ SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2)
+ LDRGT r6, [r4], #4
+ SMLABB r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_2, y_2)
+ SMLABT r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_2, y_3)
+ SMLATT r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_3, y_3)
+ LDRGT r9, [r5], #4
+ SMLATB r11, r7, r8, r11 @ sum1 = MAC16_16(sum1, x_3, y_4)
+ BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done:
+ ADDS r12, r12, #2
+ BLE celt_pitch_xcorr_edsp_process2_1
+ LDR r6, [r4], #4
+ @ Stall
+ SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
+ LDR r9, [r5], #4
+ SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
+ SUB r12, r12, #2
+ SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1)
+ MOV r8, r9
+ SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1:
+ LDRH r6, [r4], #2
+ ADDS r12, r12, #1
+ @ Stall
+ SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
+ LDRHGT r7, [r4], #2
+ SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
+ BLE celt_pitch_xcorr_edsp_process2_done
+ LDRH r9, [r5], #2
+ SMLABT r10, r7, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_1)
+ SMLABB r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done:
+ @ Restore _x
+ SUB r4, r4, r3, LSL #1
+ @ Restore and advance _y
+ SUB r5, r5, r3, LSL #1
+ @ maxcorr = max(maxcorr, sum0)
+ CMP r0, r10
+ ADD r5, r5, #2
+ MOVLT r0, r10
+ SUB r1, r1, #2
+ @ maxcorr = max(maxcorr, sum1)
+ CMP r0, r11
+ @ xcorr[i] = sum
+ STR r10, [r2], #4
+ MOVLT r0, r11
+ STR r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a:
+ ADDS r1, r1, #1
+ BLT celt_pitch_xcorr_edsp_done
+ SUBS r12, r3, #4
+ @ r14 = sum = 0
+ MOV r14, #0
+ BLT celt_pitch_xcorr_edsp_process1a_loop_done
+ LDR r6, [r4], #4
+ LDR r8, [r5], #4
+ LDR r7, [r4], #4
+ LDR r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4:
+ SMLABB r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
+ SUBS r12, r12, #4 @ j-=4
+ SMLATT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1)
+ LDRGE r6, [r4], #4
+ SMLABB r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2)
+ LDRGE r8, [r5], #4
+ SMLATT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_3, y_3)
+ LDRGE r7, [r4], #4
+ LDRGE r9, [r5], #4
+ BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done:
+ ADDS r12, r12, #2
+ LDRGE r6, [r4], #4
+ LDRGE r8, [r5], #4
+ @ Stall
+ SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
+ SUBGE r12, r12, #2
+ SMLATTGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1)
+ ADDS r12, r12, #1
+ LDRHGE r6, [r4], #2
+ LDRHGE r8, [r5], #2
+ @ Stall
+ SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y)
+ @ maxcorr = max(maxcorr, sum)
+ CMP r0, r14
+ @ xcorr[i] = sum
+ STR r14, [r2], #4
+ MOVLT r0, r14
+celt_pitch_xcorr_edsp_done:
+ LDMFD sp!, {r4-r11, pc}
+ .size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp @ ENDP
+
+ .endif
+
+@ END:
+ .section .note.GNU-stack,"",%progbits
diff --git a/convert_android_asm.sh b/convert_android_asm.sh
new file mode 100755
index 0000000..ea3d198
--- /dev/null
+++ b/convert_android_asm.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+set -e
+ASM_CONVERTER="./celt/arm/arm2gnu.pl"
+
+if [[ ! -x "${ASM_CONVERTER}" ]]; then
+ echo "This script should be run from external/libopus."
+ exit
+fi
+
+while read file; do
+ # This check is required because the ASM conversion script doesn't seem to be
+ # idempotent.
+ if [[ ! "${file}" =~ .*_gnu\.s$ ]]; then
+ gnu_file="${file%.s}_gnu.s"
+ ${ASM_CONVERTER} "${file}" > "${gnu_file}"
+ # The ASM conversion script replaces includes with *_gnu.S. So, replace
+ # occurences of "*-gnu.S" with "*_gnu.s".
+ sed -i "s/-gnu\.S/_gnu\.s/g" "${gnu_file}"
+ rm -f "${file}"
+ fi
+done < <(find . -iname '*.s')
+
+# Generate armopts.s from armopts.s.in
+sed \
+ -e "s/@OPUS_ARM_MAY_HAVE_EDSP@/1/g" \
+ -e "s/@OPUS_ARM_MAY_HAVE_MEDIA@/1/g" \
+ -e "s/@OPUS_ARM_MAY_HAVE_NEON@/1/g" \
+ -e "s/@OPUS_ARM_MAY_HAVE_NEON_INTR@/1/g" \
+ celt/arm/armopts.s.in > celt/arm/armopts.s.temp
+${ASM_CONVERTER} "celt/arm/armopts.s.temp" > "celt/arm/armopts_gnu.s"
+rm "celt/arm/armopts.s.temp"
+echo "Converted all ASM files and generated armopts.s successfully."
diff --git a/fuzzer/Android.bp b/fuzzer/Android.bp
new file mode 100644
index 0000000..35dad5c
--- /dev/null
+++ b/fuzzer/Android.bp
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+cc_fuzz {
+ name: "opus_dec_fuzzer",
+ host_supported:true,
+
+ static_libs: [
+ "libopus",
+ ],
+
+ srcs: [
+ "opus_dec_fuzzer.cpp",
+ ],
+
+ fuzz_config: {
+ cc: [
+ "android-media-fuzzing-reports@google.com",
+ ],
+ componentid: 155276,
+ },
+}
+
+cc_fuzz {
+ name: "opus_multistream_dec_fuzzer",
+ host_supported:true,
+
+ static_libs: [
+ "libopus",
+ ],
+
+ srcs: [
+ "opus_dec_fuzzer.cpp",
+ ],
+
+ cflags: ["-DMULTISTREAM"],
+
+ fuzz_config: {
+ cc: [
+ "android-media-fuzzing-reports@google.com",
+ ],
+ componentid: 155276,
+ },
+}
diff --git a/fuzzer/README.md b/fuzzer/README.md
new file mode 100644
index 0000000..cd6a680
--- /dev/null
+++ b/fuzzer/README.md
@@ -0,0 +1,62 @@
+# Fuzzer for libopus decoder
+
+## Plugin Design Considerations
+The fuzzer plugin for opus decoder is designed based on the understanding of the
+codec and tries to achieve the following:
+
+##### Maximize code coverage
+
+This fuzzer provides support for both single stream and multi stream inputs,
+thus enabling fuzzing for API's provided for single stream as well as multi
+stream.
+
+Following arguments are passed to OPUS_DEC_CREATE_API:
+
+1. Sampling frequency (parameter name: `Fs`)
+2. Number of channels (parameter name: `channels`)
+
+| Parameter| Valid Values| Configured Value|
+|------------- |-------------| ----- |
+| `Fs` | `8000 ` `12000 ` `16000 ` `24000 ` `48000 ` | Derived from Byte-9 of input stream|
+| `channels` | `1 ` `2 ` | Derived from Byte-9 of input stream |
+
+##### Maximize utilization of input data
+The plugin feeds the entire input data to the codec. Frame sizes are determined only
+after the call to extractor, so in absence of call to extractor,
+we feed the entire data to the decoder.
+This ensures that the plugin tolerates any kind of input (empty, huge,
+malformed, etc) and doesnt `exit()` on any input and thereby increasing the
+chance of identifying vulnerabilities.
+
+## Build
+
+This describes steps to build opus_dec_fuzzer and opus_multistream_dec_fuzzer binary.
+
+## Android
+
+### Steps to build
+Build the fuzzer
+```
+ $ mm -j$(nproc) opus_dec_fuzzer
+ $ mm -j$(nproc) opus_multistream_dec_fuzzer
+```
+
+### Steps to run
+Create a directory CORPUS_DIR and copy some opus files to that folder.
+Push this directory to device.
+
+To run on device
+```
+ $ adb sync data
+ $ adb shell /data/fuzz/arm64/opus_dec_fuzzer/opus_dec_fuzzer CORPUS_DIR
+ $ adb shell /data/fuzz/arm64/opus_multistream_dec_fuzzer/opus_multistream_dec_fuzzer CORPUS_DIR
+```
+To run on host
+```
+ $ $ANDROID_HOST_OUT/fuzz/x86_64/opus_dec_fuzzer/opus_dec_fuzzer CORPUS_DIR
+ $ $ANDROID_HOST_OUT/fuzz/x86_64/opus_multistream_dec_fuzzer/opus_multistream_dec_fuzzer CORPUS_DIR
+```
+
+## References:
+ * http://llvm.org/docs/LibFuzzer.html
+ * https://github.com/google/oss-fuzz
diff --git a/fuzzer/opus_dec_fuzzer.cpp b/fuzzer/opus_dec_fuzzer.cpp
new file mode 100644
index 0000000..23bf69e
--- /dev/null
+++ b/fuzzer/opus_dec_fuzzer.cpp
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <opus.h>
+
+/* 4 bytes: packet length, 4 bytes: encoder final range */
+constexpr int kSetupByteOffset = 8;
+constexpr int kMaxFrameSample = 5760;
+const int kSamplingRates[] = {8000, 12000, 16000, 24000, 48000};
+constexpr int kNumberSamplingRates = sizeof(kSamplingRates) / sizeof(kSamplingRates[0]);
+
+#ifdef MULTISTREAM
+#include "opus_multistream.h"
+#define OPUS_DEC_DATA_TYPE OpusMSDecoder
+#define OPUS_DEC_DECODE_API opus_multistream_decode
+#define OPUS_DEC_CREATE_API ms_opus_decoder_create
+#define OPUS_DEC_DESTROY_API opus_multistream_decoder_destroy
+static OpusMSDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *error) {
+ int streams = 1;
+ int coupledStreams = channels == 2;
+ unsigned char mapping[256] = {0, 1};
+ return opus_multistream_decoder_create(Fs, channels, streams, coupledStreams, mapping, error);
+}
+#else
+#define OPUS_DEC_DATA_TYPE OpusDecoder
+#define OPUS_DEC_DECODE_API opus_decode
+#define OPUS_DEC_CREATE_API opus_decoder_create
+#define OPUS_DEC_DESTROY_API opus_decoder_destroy
+#endif
+
+class Codec {
+ public:
+ Codec() = default;
+ ~Codec() { deInitDecoder(); }
+ bool initDecoder(const uint8_t *data);
+ void decodeFrames(const uint8_t *data, size_t size);
+ void deInitDecoder();
+
+ private:
+ int mSamplingRate;
+ int mNoOfChannels;
+ OPUS_DEC_DATA_TYPE *mDec = nullptr;
+ opus_int16 *mPcm = nullptr;
+};
+
+bool Codec::initDecoder(const uint8_t *data) {
+ const uint8_t *tocPtr = &data[kSetupByteOffset];
+ const int bandwidth = opus_packet_get_bandwidth(tocPtr);
+ int samplingRateIndex = bandwidth - OPUS_BANDWIDTH_NARROWBAND;
+
+ /*bounds check on samplingRateIndex*/
+ if ((samplingRateIndex >= 0) && (samplingRateIndex < kNumberSamplingRates)) {
+ mSamplingRate = kSamplingRates[samplingRateIndex];
+ } else {
+ mSamplingRate = 8000; // set to a default value
+ }
+
+ mNoOfChannels = opus_packet_get_nb_channels(tocPtr);
+ if ((mNoOfChannels != 1) && (mNoOfChannels != 2)) {
+ mNoOfChannels = 1;
+ }
+
+ int err;
+ mDec = OPUS_DEC_CREATE_API(mSamplingRate, mNoOfChannels, &err);
+ if (!mDec || err != OPUS_OK) {
+ return false;
+ }
+ size_t sizePcm = sizeof(*mPcm) * kMaxFrameSample * mNoOfChannels;
+ mPcm = static_cast<opus_int16 *>(malloc(sizePcm));
+ if (!mPcm) {
+ return false;
+ }
+ memset(mPcm, 0x0, sizePcm);
+ return true;
+}
+
+void Codec::deInitDecoder() {
+ OPUS_DEC_DESTROY_API(mDec);
+ mDec = nullptr;
+ if (mPcm) {
+ free(mPcm);
+ }
+ mPcm = nullptr;
+}
+
+void Codec::decodeFrames(const uint8_t *data, size_t size) {
+ (void)OPUS_DEC_DECODE_API(mDec, data, size, mPcm, kMaxFrameSample, 0 /*fec*/);
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ if (size < kSetupByteOffset + 1) {
+ return 0;
+ }
+ Codec *codec = new Codec();
+ if (!codec) {
+ return 0;
+ }
+ if (codec->initDecoder(data)) {
+ codec->decodeFrames(data, size);
+ }
+ delete codec;
+ return 0;
+}
diff --git a/libopus_blocklist.txt b/libopus_blocklist.txt
new file mode 100644
index 0000000..fd3f398
--- /dev/null
+++ b/libopus_blocklist.txt
@@ -0,0 +1,49 @@
+[integer]
+# celt/celt_decoder.c:1055:61: 0 - 1 cannot be represented in type 'unsigned int'
+fun:celt_decode_with_ec
+# celt/celt_encoder.c:2171:75: 0 - 1 cannot be represented in type 'unsigned int'
+fun:celt_encode_with_ec
+fun:celt_lcg_rand
+# celt/entcode.h:131: negation of 100 cannot be represented in type 'opus_uint32'
+fun:celt_udiv
+# celt/mdct.c:273
+# celt/mdct.c:274
+# celt/mdct.c:304
+# celt/mdct.c:305
+# celt/mdct.c:315
+# celt/mdct.c:316
+# celt/mdct.c:336
+# celt/mdct.c:337
+fun:clt_mdct_backward_c
+fun:ec_dec_init
+# celt/entdec.c:143
+fun:ec_decode
+# celt/entdec.c:150
+fun:ec_decode_bin
+
+src:*/celt/kiss_fft.c
+
+# assembly optimizations that know what they are doing
+fun:silk_SMULWB_armv4
+fun:silk_SMULWT_armv4
+fun:silk_SMULWW_armv4
+fun:silk_SMLAWW_armv4
+#
+fun:silk_SMULWB_armv5e
+fun:silk_SMLAWB_armv5e
+fun:silk_SMULWT_armv5e
+fun:silk_SMLAWT_armv5e
+fun:silk_SMULBB_armv5e
+fun:silk_SMLABB_armv5e
+fun:silk_SMULBT_armv5e
+fun:silk_SMLABT_armv5e
+fun:silk_ADD_SAT32_armv5e
+fun:silk_SUB_SAT32_armv5e
+fun:silk_CLZ16_armv5
+fun:silk_CLZ32_armv5
+
+
+# Performance related
+fun:exp_rotation1
+fun:haar1
+fun:celt_preemphasis
diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h
index f9ae326..a906890 100644
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -448,13 +448,29 @@
/* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
(just standard two's complement implementation-specific behaviour) */
-#define silk_ADD32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b)))
+static OPUS_INLINE opus_int32 silk_ADD32_ovflw(opus_int32 a, opus_int32 b) {
+ opus_int32 _c;
+ __builtin_add_overflow(a, b, &_c);
+ return _c;
+}
+
/* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
(just standard two's complement implementation-specific behaviour) */
-#define silk_SUB32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b)))
+static OPUS_INLINE opus_int32 silk_SUB32_ovflw(opus_int32 a, opus_int32 b) {
+ opus_int32 _c;
+ __builtin_sub_overflow(a, b, &_c);
+ return _c;
+}
/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
-#define silk_MLA_ovflw(a32, b32, c32) silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32))
+/* .. also ignoring multiply overflows; caller has comment about this happening occasionally */
+static OPUS_INLINE opus_int32 silk_MLA_ovflw(opus_int32 a, opus_int32 b, opus_int32 c) {
+ opus_int32 _d, _e;
+ __builtin_mul_overflow(b, c, &_d);
+ __builtin_add_overflow(a, _d, &_e);
+ return _e;
+}
+
#define silk_SMLABB_ovflw(a32, b32, c32) (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
#define silk_DIV32_16(a32, b16) ((opus_int32)((a32) / (b16)))
@@ -496,7 +512,12 @@
/* Add with saturation for positive input values */
#define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)))
-#define silk_ADD_POS_SAT32(a, b) ((((opus_uint32)(a)+(opus_uint32)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
+static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int32 a, opus_int32 b) {
+ opus_int32 _c;
+ if (__builtin_add_overflow(a, b, &_c))
+ return silk_int32_MAX;
+ return _c;
+}
#define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */
#define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */
diff --git a/silk/macros.h b/silk/macros.h
index 3c67b6e..00ccca3 100644
--- a/silk/macros.h
+++ b/silk/macros.h
@@ -33,6 +33,7 @@
#endif
#include "opus_types.h"
+#include "typedef.h"
#include "opus_defines.h"
#include "arch.h"
@@ -96,13 +97,31 @@
#endif
/* add/subtract with output saturated */
-#define silk_ADD_SAT32(a, b) ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ? \
- ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) : \
- ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) )
+/* use clang builtin overflow detectors */
+static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a, opus_int32 b) {
+ opus_int32 c;
+ if (__builtin_add_overflow(a, b, &c)) {
+ // overflowed
+ if (a < 0) // neg+X can only overflow towards -inf
+ c = silk_int32_MIN;
+ else
+ c = silk_int32_MAX;
+ }
+ return c;
+}
-#define silk_SUB_SAT32(a, b) ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ? \
- (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \
- ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
+/* use clang builtin overflow detectors */
+static OPUS_INLINE opus_int32 silk_SUB_SAT32(opus_int32 a, opus_int32 b) {
+ opus_int32 c;
+ if (__builtin_sub_overflow(a, b, &c)) {
+ // overflowed,
+ if (a < 0) // neg-X only overflows towards -inf
+ c = silk_int32_MIN;
+ else
+ c = silk_int32_MAX;
+ }
+ return c;
+}
#if defined(MIPSr1_ASM)
#include "mips/macros_mipsr1.h"