audio_utils: Biquad refactorization am: d96c30322a
Original change: https://android-review.googlesource.com/c/platform/system/media/+/1870313
Change-Id: I8c30d8501fdee42fe6e766ed8077f9b691541829
diff --git a/audio_utils/benchmarks/biquad_filter_benchmark.cpp b/audio_utils/benchmarks/biquad_filter_benchmark.cpp
index 35aa839..b0f6f6a 100644
--- a/audio_utils/benchmarks/biquad_filter_benchmark.cpp
+++ b/audio_utils/benchmarks/biquad_filter_benchmark.cpp
@@ -85,320 +85,325 @@
/*******************************************************************
A test result running on Pixel 4XL for comparison.
- The first parameter indicates the input data is subnormal or not.
- 0 for normal input data, 1 for subnormal input data.
- The second parameter indicates the channel count.
- The third parameter indicates the occupancy of the coefficients.
+
+ Parameterized Test BM_BiquadFilter1D/A
+ <A> is 0 or 1 indicating if the input data is subnormal or not.
+
+ Parameterized Test BM_BiquadFilter<TYPE>/A/B/C
+ <A> is 0 or 1 indicating if the input data is subnormal or not.
+ <B> is the channel count, starting from 1
+ <C> indicates the occupancy of the coefficients as a bitmask (1 - 31) representing
+ b0, b1, b2, a0, a1. 31 indicates all Biquad coefficients are non-zero.
-----------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------------------
-BM_BiquadFilter1D/0 556 ns 555 ns 1263112
-BM_BiquadFilter1D/1 560 ns 558 ns 1253287
-BM_BiquadFilterFloatOptimized/0/1/31 2178 ns 2172 ns 322245
-BM_BiquadFilterFloatOptimized/0/2/31 5013 ns 4999 ns 140023
-BM_BiquadFilterFloatOptimized/0/3/31 4938 ns 4924 ns 142153
-BM_BiquadFilterFloatOptimized/0/4/31 4996 ns 4981 ns 140506
-BM_BiquadFilterFloatOptimized/0/5/31 4931 ns 4917 ns 142358
-BM_BiquadFilterFloatOptimized/0/6/31 5222 ns 5208 ns 134401
-BM_BiquadFilterFloatOptimized/0/7/31 4694 ns 4681 ns 149552
-BM_BiquadFilterFloatOptimized/0/8/31 5174 ns 5159 ns 135656
-BM_BiquadFilterFloatOptimized/0/9/31 5604 ns 5589 ns 125174
-BM_BiquadFilterFloatOptimized/0/10/31 6136 ns 6118 ns 114547
-BM_BiquadFilterFloatOptimized/0/11/31 6080 ns 6065 ns 115425
-BM_BiquadFilterFloatOptimized/0/12/31 6114 ns 6098 ns 114790
-BM_BiquadFilterFloatOptimized/0/13/31 7247 ns 7229 ns 96798
-BM_BiquadFilterFloatOptimized/0/14/31 7539 ns 7515 ns 93137
-BM_BiquadFilterFloatOptimized/0/15/31 12787 ns 12748 ns 55041
-BM_BiquadFilterFloatOptimized/0/16/31 7493 ns 7470 ns 93688
-BM_BiquadFilterFloatOptimized/0/17/31 9797 ns 9766 ns 71597
-BM_BiquadFilterFloatOptimized/0/18/31 12563 ns 12524 ns 55862
-BM_BiquadFilterFloatOptimized/0/19/31 12560 ns 12521 ns 55846
-BM_BiquadFilterFloatOptimized/0/20/31 12560 ns 12523 ns 55926
-BM_BiquadFilterFloatOptimized/0/21/31 12576 ns 12543 ns 55795
-BM_BiquadFilterFloatOptimized/0/22/31 12881 ns 12845 ns 54408
-BM_BiquadFilterFloatOptimized/0/23/31 12681 ns 12635 ns 55410
-BM_BiquadFilterFloatOptimized/0/24/31 12749 ns 12712 ns 55041
-BM_BiquadFilterFloatOptimized/0/1/1 557 ns 555 ns 1260939
-BM_BiquadFilterFloatOptimized/0/1/2 652 ns 650 ns 1077181
-BM_BiquadFilterFloatOptimized/0/1/3 652 ns 650 ns 1077352
-BM_BiquadFilterFloatOptimized/0/1/4 833 ns 831 ns 840290
-BM_BiquadFilterFloatOptimized/0/1/5 835 ns 833 ns 840171
-BM_BiquadFilterFloatOptimized/0/1/6 836 ns 833 ns 840106
-BM_BiquadFilterFloatOptimized/0/1/7 835 ns 832 ns 840200
-BM_BiquadFilterFloatOptimized/0/1/8 1813 ns 1808 ns 387100
-BM_BiquadFilterFloatOptimized/0/1/9 1813 ns 1808 ns 387152
-BM_BiquadFilterFloatOptimized/0/1/10 2552 ns 2544 ns 275176
-BM_BiquadFilterFloatOptimized/0/1/11 2551 ns 2544 ns 275192
-BM_BiquadFilterFloatOptimized/0/1/12 2178 ns 2172 ns 322335
-BM_BiquadFilterFloatOptimized/0/1/13 2179 ns 2172 ns 322286
-BM_BiquadFilterFloatOptimized/0/1/14 2178 ns 2172 ns 322252
-BM_BiquadFilterFloatOptimized/0/1/15 2178 ns 2172 ns 322285
-BM_BiquadFilterFloatOptimized/0/1/16 2175 ns 2169 ns 322716
-BM_BiquadFilterFloatOptimized/0/1/17 2174 ns 2169 ns 322730
-BM_BiquadFilterFloatOptimized/0/1/18 2175 ns 2169 ns 322719
-BM_BiquadFilterFloatOptimized/0/1/19 2175 ns 2169 ns 322741
-BM_BiquadFilterFloatOptimized/0/1/20 2178 ns 2172 ns 322336
-BM_BiquadFilterFloatOptimized/0/1/21 2178 ns 2172 ns 322315
-BM_BiquadFilterFloatOptimized/0/1/22 2178 ns 2172 ns 322328
-BM_BiquadFilterFloatOptimized/0/1/23 2178 ns 2172 ns 322306
-BM_BiquadFilterFloatOptimized/0/1/24 2175 ns 2169 ns 322752
-BM_BiquadFilterFloatOptimized/0/1/25 2174 ns 2169 ns 322721
-BM_BiquadFilterFloatOptimized/0/1/26 2174 ns 2169 ns 322722
-BM_BiquadFilterFloatOptimized/0/1/27 2175 ns 2169 ns 322704
-BM_BiquadFilterFloatOptimized/0/1/28 2178 ns 2172 ns 322317
-BM_BiquadFilterFloatOptimized/0/1/29 2178 ns 2172 ns 322308
-BM_BiquadFilterFloatOptimized/0/1/30 2179 ns 2172 ns 322300
-BM_BiquadFilterFloatOptimized/0/1/31 2178 ns 2172 ns 322271
-BM_BiquadFilterFloatOptimized/0/2/1 737 ns 734 ns 953033
-BM_BiquadFilterFloatOptimized/0/2/2 1085 ns 1082 ns 647110
-BM_BiquadFilterFloatOptimized/0/2/3 1085 ns 1082 ns 646630
-BM_BiquadFilterFloatOptimized/0/2/4 1538 ns 1534 ns 456015
-BM_BiquadFilterFloatOptimized/0/2/5 1536 ns 1532 ns 456137
-BM_BiquadFilterFloatOptimized/0/2/6 1537 ns 1532 ns 456168
-BM_BiquadFilterFloatOptimized/0/2/7 1536 ns 1532 ns 456982
-BM_BiquadFilterFloatOptimized/0/2/8 1974 ns 1969 ns 355506
-BM_BiquadFilterFloatOptimized/0/2/9 1974 ns 1969 ns 355489
-BM_BiquadFilterFloatOptimized/0/2/10 4345 ns 4333 ns 161562
-BM_BiquadFilterFloatOptimized/0/2/11 4344 ns 4332 ns 161564
-BM_BiquadFilterFloatOptimized/0/2/12 5014 ns 4999 ns 140035
-BM_BiquadFilterFloatOptimized/0/2/13 5014 ns 4999 ns 139958
-BM_BiquadFilterFloatOptimized/0/2/14 5012 ns 4999 ns 139996
-BM_BiquadFilterFloatOptimized/0/2/15 5013 ns 4999 ns 140021
-BM_BiquadFilterFloatOptimized/0/2/16 3985 ns 3973 ns 176193
-BM_BiquadFilterFloatOptimized/0/2/17 3984 ns 3973 ns 176178
-BM_BiquadFilterFloatOptimized/0/2/18 3984 ns 3973 ns 176178
-BM_BiquadFilterFloatOptimized/0/2/19 3984 ns 3973 ns 176179
-BM_BiquadFilterFloatOptimized/0/2/20 5013 ns 4999 ns 140011
-BM_BiquadFilterFloatOptimized/0/2/21 5013 ns 4999 ns 140042
-BM_BiquadFilterFloatOptimized/0/2/22 5012 ns 4999 ns 140027
-BM_BiquadFilterFloatOptimized/0/2/23 5011 ns 4999 ns 140028
-BM_BiquadFilterFloatOptimized/0/2/24 3984 ns 3973 ns 176189
-BM_BiquadFilterFloatOptimized/0/2/25 3984 ns 3973 ns 176199
-BM_BiquadFilterFloatOptimized/0/2/26 3979 ns 3971 ns 176263
-BM_BiquadFilterFloatOptimized/0/2/27 3984 ns 3973 ns 176206
-BM_BiquadFilterFloatOptimized/0/2/28 5013 ns 4999 ns 140019
-BM_BiquadFilterFloatOptimized/0/2/29 5013 ns 4999 ns 140032
-BM_BiquadFilterFloatOptimized/0/2/30 5013 ns 4999 ns 140031
-BM_BiquadFilterFloatOptimized/0/2/31 5012 ns 4999 ns 140021
-BM_BiquadFilterFloatOptimized/0/3/1 1010 ns 1007 ns 695238
-BM_BiquadFilterFloatOptimized/0/3/2 1760 ns 1755 ns 409554
-BM_BiquadFilterFloatOptimized/0/3/3 1750 ns 1745 ns 391924
-BM_BiquadFilterFloatOptimized/0/3/4 2315 ns 2308 ns 303349
-BM_BiquadFilterFloatOptimized/0/3/5 2315 ns 2309 ns 303177
-BM_BiquadFilterFloatOptimized/0/3/6 2316 ns 2309 ns 303026
-BM_BiquadFilterFloatOptimized/0/3/7 2315 ns 2309 ns 303133
-BM_BiquadFilterFloatOptimized/0/3/8 3052 ns 3044 ns 229836
-BM_BiquadFilterFloatOptimized/0/3/9 3052 ns 3044 ns 229888
-BM_BiquadFilterFloatOptimized/0/3/10 4345 ns 4333 ns 161546
-BM_BiquadFilterFloatOptimized/0/3/11 4344 ns 4333 ns 161549
-BM_BiquadFilterFloatOptimized/0/3/12 4937 ns 4924 ns 142178
-BM_BiquadFilterFloatOptimized/0/3/13 4933 ns 4923 ns 142166
-BM_BiquadFilterFloatOptimized/0/3/14 4937 ns 4924 ns 142174
-BM_BiquadFilterFloatOptimized/0/3/15 4937 ns 4924 ns 142139
-BM_BiquadFilterFloatOptimized/0/3/16 4068 ns 4058 ns 172507
-BM_BiquadFilterFloatOptimized/0/3/17 4068 ns 4057 ns 172495
-BM_BiquadFilterFloatOptimized/0/3/18 4069 ns 4058 ns 172509
-BM_BiquadFilterFloatOptimized/0/3/19 4070 ns 4059 ns 172495
-BM_BiquadFilterFloatOptimized/0/3/20 4937 ns 4924 ns 142161
-BM_BiquadFilterFloatOptimized/0/3/21 4937 ns 4924 ns 142171
-BM_BiquadFilterFloatOptimized/0/3/22 4937 ns 4923 ns 142172
-BM_BiquadFilterFloatOptimized/0/3/23 4938 ns 4924 ns 142191
-BM_BiquadFilterFloatOptimized/0/3/24 4072 ns 4058 ns 172484
-BM_BiquadFilterFloatOptimized/0/3/25 4070 ns 4058 ns 172532
-BM_BiquadFilterFloatOptimized/0/3/26 4068 ns 4058 ns 172543
-BM_BiquadFilterFloatOptimized/0/3/27 4069 ns 4058 ns 172503
-BM_BiquadFilterFloatOptimized/0/3/28 4937 ns 4924 ns 142173
-BM_BiquadFilterFloatOptimized/0/3/29 4940 ns 4924 ns 142160
-BM_BiquadFilterFloatOptimized/0/3/30 4937 ns 4924 ns 142168
-BM_BiquadFilterFloatOptimized/0/3/31 4937 ns 4924 ns 142171
-BM_BiquadFilterFloatOptimized/0/4/1 555 ns 553 ns 1264721
-BM_BiquadFilterFloatOptimized/0/4/2 736 ns 734 ns 953947
-BM_BiquadFilterFloatOptimized/0/4/3 736 ns 734 ns 953825
-BM_BiquadFilterFloatOptimized/0/4/4 1357 ns 1353 ns 517334
-BM_BiquadFilterFloatOptimized/0/4/5 1357 ns 1353 ns 517339
-BM_BiquadFilterFloatOptimized/0/4/6 1357 ns 1353 ns 517307
-BM_BiquadFilterFloatOptimized/0/4/7 1357 ns 1353 ns 517153
-BM_BiquadFilterFloatOptimized/0/4/8 1901 ns 1896 ns 369069
-BM_BiquadFilterFloatOptimized/0/4/9 1902 ns 1897 ns 369100
-BM_BiquadFilterFloatOptimized/0/4/10 3984 ns 3972 ns 176207
-BM_BiquadFilterFloatOptimized/0/4/11 3984 ns 3972 ns 176209
-BM_BiquadFilterFloatOptimized/0/4/12 4998 ns 4982 ns 140517
-BM_BiquadFilterFloatOptimized/0/4/13 4996 ns 4982 ns 140523
-BM_BiquadFilterFloatOptimized/0/4/14 4996 ns 4982 ns 140527
-BM_BiquadFilterFloatOptimized/0/4/15 4995 ns 4982 ns 140510
-BM_BiquadFilterFloatOptimized/0/4/16 3984 ns 3973 ns 176180
-BM_BiquadFilterFloatOptimized/0/4/17 3985 ns 3973 ns 176195
-BM_BiquadFilterFloatOptimized/0/4/18 3985 ns 3973 ns 176206
-BM_BiquadFilterFloatOptimized/0/4/19 3984 ns 3973 ns 176193
-BM_BiquadFilterFloatOptimized/0/4/20 4999 ns 4984 ns 140465
-BM_BiquadFilterFloatOptimized/0/4/21 4997 ns 4982 ns 140518
-BM_BiquadFilterFloatOptimized/0/4/22 4997 ns 4982 ns 140541
-BM_BiquadFilterFloatOptimized/0/4/23 4995 ns 4982 ns 140518
-BM_BiquadFilterFloatOptimized/0/4/24 3984 ns 3973 ns 176197
-BM_BiquadFilterFloatOptimized/0/4/25 3983 ns 3973 ns 176182
-BM_BiquadFilterFloatOptimized/0/4/26 3984 ns 3973 ns 176193
-BM_BiquadFilterFloatOptimized/0/4/27 3985 ns 3973 ns 176205
-BM_BiquadFilterFloatOptimized/0/4/28 4997 ns 4982 ns 140507
-BM_BiquadFilterFloatOptimized/0/4/29 4996 ns 4982 ns 140515
-BM_BiquadFilterFloatOptimized/0/4/30 4996 ns 4983 ns 140517
-BM_BiquadFilterFloatOptimized/0/4/31 4998 ns 4982 ns 140519
-BM_BiquadFilterFloatOptimized/1/1/1 557 ns 555 ns 1261214
-BM_BiquadFilterFloatOptimized/1/1/2 652 ns 650 ns 1077578
-BM_BiquadFilterFloatOptimized/1/1/3 652 ns 650 ns 1077688
-BM_BiquadFilterFloatOptimized/1/1/4 834 ns 832 ns 841263
-BM_BiquadFilterFloatOptimized/1/1/5 836 ns 833 ns 840264
-BM_BiquadFilterFloatOptimized/1/1/6 836 ns 833 ns 840002
-BM_BiquadFilterFloatOptimized/1/1/7 835 ns 833 ns 840209
-BM_BiquadFilterFloatOptimized/1/1/8 1813 ns 1808 ns 387140
-BM_BiquadFilterFloatOptimized/1/1/9 1814 ns 1808 ns 387077
-BM_BiquadFilterFloatOptimized/1/1/10 2552 ns 2544 ns 275164
-BM_BiquadFilterFloatOptimized/1/1/11 2552 ns 2545 ns 275177
-BM_BiquadFilterFloatOptimized/1/1/12 2178 ns 2172 ns 322211
-BM_BiquadFilterFloatOptimized/1/1/13 2178 ns 2172 ns 322244
-BM_BiquadFilterFloatOptimized/1/1/14 2179 ns 2172 ns 322290
-BM_BiquadFilterFloatOptimized/1/1/15 2179 ns 2172 ns 322318
-BM_BiquadFilterFloatOptimized/1/1/16 2175 ns 2169 ns 322771
-BM_BiquadFilterFloatOptimized/1/1/17 2176 ns 2169 ns 322723
-BM_BiquadFilterFloatOptimized/1/1/18 2175 ns 2169 ns 322752
-BM_BiquadFilterFloatOptimized/1/1/19 2175 ns 2169 ns 322712
-BM_BiquadFilterFloatOptimized/1/1/20 2178 ns 2172 ns 322229
-BM_BiquadFilterFloatOptimized/1/1/21 2178 ns 2172 ns 322263
-BM_BiquadFilterFloatOptimized/1/1/22 2178 ns 2172 ns 322271
-BM_BiquadFilterFloatOptimized/1/1/23 2178 ns 2172 ns 322302
-BM_BiquadFilterFloatOptimized/1/1/24 2176 ns 2169 ns 322749
-BM_BiquadFilterFloatOptimized/1/1/25 2175 ns 2169 ns 322653
-BM_BiquadFilterFloatOptimized/1/1/26 2175 ns 2169 ns 322739
-BM_BiquadFilterFloatOptimized/1/1/27 2175 ns 2169 ns 322709
-BM_BiquadFilterFloatOptimized/1/1/28 2178 ns 2172 ns 322242
-BM_BiquadFilterFloatOptimized/1/1/29 2178 ns 2172 ns 322286
-BM_BiquadFilterFloatOptimized/1/1/30 2177 ns 2172 ns 322259
-BM_BiquadFilterFloatOptimized/1/1/31 2178 ns 2172 ns 322321
-BM_BiquadFilterFloatOptimized/1/2/1 737 ns 734 ns 953000
-BM_BiquadFilterFloatOptimized/1/2/2 1085 ns 1082 ns 646529
-BM_BiquadFilterFloatOptimized/1/2/3 1086 ns 1082 ns 646983
-BM_BiquadFilterFloatOptimized/1/2/4 1537 ns 1533 ns 456082
-BM_BiquadFilterFloatOptimized/1/2/5 1538 ns 1533 ns 457062
-BM_BiquadFilterFloatOptimized/1/2/6 1539 ns 1534 ns 457137
-BM_BiquadFilterFloatOptimized/1/2/7 1539 ns 1534 ns 457042
-BM_BiquadFilterFloatOptimized/1/2/8 1975 ns 1969 ns 355538
-BM_BiquadFilterFloatOptimized/1/2/9 1975 ns 1969 ns 355560
-BM_BiquadFilterFloatOptimized/1/2/10 4347 ns 4333 ns 161568
-BM_BiquadFilterFloatOptimized/1/2/11 4345 ns 4333 ns 161551
-BM_BiquadFilterFloatOptimized/1/2/12 5014 ns 4999 ns 139998
-BM_BiquadFilterFloatOptimized/1/2/13 5014 ns 4999 ns 140001
-BM_BiquadFilterFloatOptimized/1/2/14 5016 ns 5000 ns 140022
-BM_BiquadFilterFloatOptimized/1/2/15 5013 ns 4999 ns 140019
-BM_BiquadFilterFloatOptimized/1/2/16 3986 ns 3973 ns 176177
-BM_BiquadFilterFloatOptimized/1/2/17 3985 ns 3973 ns 176194
-BM_BiquadFilterFloatOptimized/1/2/18 3984 ns 3973 ns 176174
-BM_BiquadFilterFloatOptimized/1/2/19 3984 ns 3973 ns 176167
-BM_BiquadFilterFloatOptimized/1/2/20 5012 ns 4999 ns 140029
-BM_BiquadFilterFloatOptimized/1/2/21 5014 ns 4999 ns 140026
-BM_BiquadFilterFloatOptimized/1/2/22 5013 ns 4999 ns 140013
-BM_BiquadFilterFloatOptimized/1/2/23 5014 ns 5000 ns 139998
-BM_BiquadFilterFloatOptimized/1/2/24 3986 ns 3973 ns 176163
-BM_BiquadFilterFloatOptimized/1/2/25 3984 ns 3973 ns 176201
-BM_BiquadFilterFloatOptimized/1/2/26 3983 ns 3973 ns 176186
-BM_BiquadFilterFloatOptimized/1/2/27 3986 ns 3973 ns 176174
-BM_BiquadFilterFloatOptimized/1/2/28 5013 ns 4999 ns 140001
-BM_BiquadFilterFloatOptimized/1/2/29 5014 ns 4999 ns 140033
-BM_BiquadFilterFloatOptimized/1/2/30 5012 ns 4999 ns 140018
-BM_BiquadFilterFloatOptimized/1/2/31 5014 ns 4999 ns 140003
-BM_BiquadFilterFloatOptimized/1/3/1 1010 ns 1007 ns 695126
-BM_BiquadFilterFloatOptimized/1/3/2 1753 ns 1748 ns 401120
-BM_BiquadFilterFloatOptimized/1/3/3 1765 ns 1759 ns 403787
-BM_BiquadFilterFloatOptimized/1/3/4 2312 ns 2307 ns 303354
-BM_BiquadFilterFloatOptimized/1/3/5 2317 ns 2309 ns 303095
-BM_BiquadFilterFloatOptimized/1/3/6 2318 ns 2311 ns 302366
-BM_BiquadFilterFloatOptimized/1/3/7 2315 ns 2309 ns 303183
-BM_BiquadFilterFloatOptimized/1/3/8 3053 ns 3044 ns 229914
-BM_BiquadFilterFloatOptimized/1/3/9 3053 ns 3044 ns 229952
-BM_BiquadFilterFloatOptimized/1/3/10 4346 ns 4333 ns 161527
-BM_BiquadFilterFloatOptimized/1/3/11 4345 ns 4333 ns 161578
-BM_BiquadFilterFloatOptimized/1/3/12 4938 ns 4924 ns 142144
-BM_BiquadFilterFloatOptimized/1/3/13 4938 ns 4924 ns 142160
-BM_BiquadFilterFloatOptimized/1/3/14 4938 ns 4924 ns 142173
-BM_BiquadFilterFloatOptimized/1/3/15 4938 ns 4924 ns 142171
-BM_BiquadFilterFloatOptimized/1/3/16 4072 ns 4058 ns 172551
-BM_BiquadFilterFloatOptimized/1/3/17 4071 ns 4059 ns 172535
-BM_BiquadFilterFloatOptimized/1/3/18 4071 ns 4059 ns 172451
-BM_BiquadFilterFloatOptimized/1/3/19 4072 ns 4059 ns 172440
-BM_BiquadFilterFloatOptimized/1/3/20 4938 ns 4925 ns 142159
-BM_BiquadFilterFloatOptimized/1/3/21 4940 ns 4924 ns 142162
-BM_BiquadFilterFloatOptimized/1/3/22 4938 ns 4924 ns 142152
-BM_BiquadFilterFloatOptimized/1/3/23 4939 ns 4924 ns 142166
-BM_BiquadFilterFloatOptimized/1/3/24 4070 ns 4058 ns 172556
-BM_BiquadFilterFloatOptimized/1/3/25 4069 ns 4058 ns 172463
-BM_BiquadFilterFloatOptimized/1/3/26 4071 ns 4058 ns 172489
-BM_BiquadFilterFloatOptimized/1/3/27 4070 ns 4058 ns 172506
-BM_BiquadFilterFloatOptimized/1/3/28 4938 ns 4924 ns 142152
-BM_BiquadFilterFloatOptimized/1/3/29 4939 ns 4924 ns 142164
-BM_BiquadFilterFloatOptimized/1/3/30 4937 ns 4924 ns 142172
-BM_BiquadFilterFloatOptimized/1/3/31 4939 ns 4924 ns 142156
-BM_BiquadFilterFloatOptimized/1/4/1 555 ns 553 ns 1264784
-BM_BiquadFilterFloatOptimized/1/4/2 736 ns 734 ns 953628
-BM_BiquadFilterFloatOptimized/1/4/3 736 ns 734 ns 953966
-BM_BiquadFilterFloatOptimized/1/4/4 1357 ns 1353 ns 517294
-BM_BiquadFilterFloatOptimized/1/4/5 1357 ns 1353 ns 517252
-BM_BiquadFilterFloatOptimized/1/4/6 1357 ns 1353 ns 517358
-BM_BiquadFilterFloatOptimized/1/4/7 1357 ns 1353 ns 517367
-BM_BiquadFilterFloatOptimized/1/4/8 1902 ns 1896 ns 369039
-BM_BiquadFilterFloatOptimized/1/4/9 1903 ns 1897 ns 368999
-BM_BiquadFilterFloatOptimized/1/4/10 3984 ns 3972 ns 176223
-BM_BiquadFilterFloatOptimized/1/4/11 3985 ns 3972 ns 176227
-BM_BiquadFilterFloatOptimized/1/4/12 4996 ns 4982 ns 140498
-BM_BiquadFilterFloatOptimized/1/4/13 4996 ns 4982 ns 140514
-BM_BiquadFilterFloatOptimized/1/4/14 4995 ns 4982 ns 140497
-BM_BiquadFilterFloatOptimized/1/4/15 4995 ns 4982 ns 140514
-BM_BiquadFilterFloatOptimized/1/4/16 3984 ns 3973 ns 176199
-BM_BiquadFilterFloatOptimized/1/4/17 3984 ns 3973 ns 176183
-BM_BiquadFilterFloatOptimized/1/4/18 3985 ns 3973 ns 176198
-BM_BiquadFilterFloatOptimized/1/4/19 3986 ns 3973 ns 176194
-BM_BiquadFilterFloatOptimized/1/4/20 4998 ns 4984 ns 140422
-BM_BiquadFilterFloatOptimized/1/4/21 4997 ns 4982 ns 140519
-BM_BiquadFilterFloatOptimized/1/4/22 4995 ns 4982 ns 140514
-BM_BiquadFilterFloatOptimized/1/4/23 4996 ns 4982 ns 140516
-BM_BiquadFilterFloatOptimized/1/4/24 3984 ns 3973 ns 176184
-BM_BiquadFilterFloatOptimized/1/4/25 3983 ns 3972 ns 176191
-BM_BiquadFilterFloatOptimized/1/4/26 3985 ns 3973 ns 176189
-BM_BiquadFilterFloatOptimized/1/4/27 3985 ns 3973 ns 176195
-BM_BiquadFilterFloatOptimized/1/4/28 4996 ns 4982 ns 140504
-BM_BiquadFilterFloatOptimized/1/4/29 4996 ns 4982 ns 140513
-BM_BiquadFilterFloatOptimized/1/4/30 4995 ns 4982 ns 140510
-BM_BiquadFilterFloatOptimized/1/4/31 4997 ns 4982 ns 140504
-BM_BiquadFilterFloatNonOptimized/0/1/31 2178 ns 2172 ns 322337
-BM_BiquadFilterFloatNonOptimized/0/2/31 4353 ns 4342 ns 161208
-BM_BiquadFilterFloatNonOptimized/0/3/31 6529 ns 6509 ns 107546
-BM_BiquadFilterFloatNonOptimized/0/4/31 8700 ns 8677 ns 80685
-BM_BiquadFilterFloatNonOptimized/0/5/31 10874 ns 10844 ns 64535
-BM_BiquadFilterFloatNonOptimized/0/6/31 13072 ns 13030 ns 53723
-BM_BiquadFilterFloatNonOptimized/0/7/31 15226 ns 15184 ns 46111
-BM_BiquadFilterFloatNonOptimized/0/8/31 17416 ns 17371 ns 40292
-BM_BiquadFilterFloatNonOptimized/0/9/31 19595 ns 19545 ns 35814
-BM_BiquadFilterFloatNonOptimized/0/10/31 21774 ns 21713 ns 32242
-BM_BiquadFilterFloatNonOptimized/0/11/31 23971 ns 23908 ns 29279
-BM_BiquadFilterFloatNonOptimized/0/12/31 26170 ns 26092 ns 26825
-BM_BiquadFilterFloatNonOptimized/0/13/31 28384 ns 28304 ns 24732
-BM_BiquadFilterFloatNonOptimized/0/14/31 30585 ns 30495 ns 22956
-BM_BiquadFilterFloatNonOptimized/0/15/31 32811 ns 32724 ns 21391
-BM_BiquadFilterFloatNonOptimized/0/16/31 35082 ns 34987 ns 20007
-BM_BiquadFilterFloatNonOptimized/0/17/31 37629 ns 37527 ns 18653
-BM_BiquadFilterFloatNonOptimized/0/18/31 40442 ns 40328 ns 17366
-BM_BiquadFilterFloatNonOptimized/0/19/31 42448 ns 42335 ns 16532
-BM_BiquadFilterFloatNonOptimized/0/20/31 45171 ns 45045 ns 15536
-BM_BiquadFilterFloatNonOptimized/0/21/31 46966 ns 46835 ns 14950
-BM_BiquadFilterFloatNonOptimized/0/22/31 48604 ns 48466 ns 14449
-BM_BiquadFilterFloatNonOptimized/0/23/31 50446 ns 50294 ns 13915
-BM_BiquadFilterFloatNonOptimized/0/24/31 52667 ns 52495 ns 13339
-BM_BiquadFilterDoubleOptimized/0/1/31 2180 ns 2173 ns 322151
-BM_BiquadFilterDoubleOptimized/0/2/31 5002 ns 4987 ns 140369
-BM_BiquadFilterDoubleOptimized/0/3/31 4919 ns 4906 ns 142292
-BM_BiquadFilterDoubleOptimized/0/4/31 5225 ns 5210 ns 134286
-BM_BiquadFilterDoubleNonOptimized/0/1/31 2177 ns 2171 ns 322374
-BM_BiquadFilterDoubleNonOptimized/0/2/31 4353 ns 4341 ns 161217
-BM_BiquadFilterDoubleNonOptimized/0/3/31 6537 ns 6516 ns 107442
-BM_BiquadFilterDoubleNonOptimized/0/4/31 8715 ns 8691 ns 80545
+BM_BiquadFilter1D/0 558 ns 556 ns 1258922
+BM_BiquadFilter1D/1 561 ns 560 ns 1251090
+BM_BiquadFilterFloatOptimized/0/1/31 2499 ns 2493 ns 280808
+BM_BiquadFilterFloatOptimized/0/2/31 3174 ns 3166 ns 221128
+BM_BiquadFilterFloatOptimized/0/3/31 3497 ns 3487 ns 200739
+BM_BiquadFilterFloatOptimized/0/4/31 3165 ns 3157 ns 221768
+BM_BiquadFilterFloatOptimized/0/5/31 3424 ns 3415 ns 204909
+BM_BiquadFilterFloatOptimized/0/6/31 3539 ns 3530 ns 198271
+BM_BiquadFilterFloatOptimized/0/7/31 4311 ns 4300 ns 162593
+BM_BiquadFilterFloatOptimized/0/8/31 3501 ns 3492 ns 200490
+BM_BiquadFilterFloatOptimized/0/9/31 4310 ns 4299 ns 162317
+BM_BiquadFilterFloatOptimized/0/10/31 4487 ns 4476 ns 156406
+BM_BiquadFilterFloatOptimized/0/11/31 5589 ns 5575 ns 125644
+BM_BiquadFilterFloatOptimized/0/12/31 4457 ns 4445 ns 157532
+BM_BiquadFilterFloatOptimized/0/13/31 5600 ns 5586 ns 125403
+BM_BiquadFilterFloatOptimized/0/14/31 5834 ns 5819 ns 120309
+BM_BiquadFilterFloatOptimized/0/15/31 7089 ns 7070 ns 98986
+BM_BiquadFilterFloatOptimized/0/16/31 5644 ns 5627 ns 124364
+BM_BiquadFilterFloatOptimized/0/17/31 8244 ns 8223 ns 85126
+BM_BiquadFilterFloatOptimized/0/18/31 8900 ns 8874 ns 78853
+BM_BiquadFilterFloatOptimized/0/19/31 9385 ns 9360 ns 74775
+BM_BiquadFilterFloatOptimized/0/20/31 8783 ns 8760 ns 79901
+BM_BiquadFilterFloatOptimized/0/21/31 9335 ns 9305 ns 75239
+BM_BiquadFilterFloatOptimized/0/22/31 9561 ns 9535 ns 73368
+BM_BiquadFilterFloatOptimized/0/23/31 10334 ns 10307 ns 67876
+BM_BiquadFilterFloatOptimized/0/24/31 9266 ns 9241 ns 75692
+BM_BiquadFilterFloatOptimized/0/1/1 557 ns 556 ns 1259656
+BM_BiquadFilterFloatOptimized/0/1/2 651 ns 649 ns 1078575
+BM_BiquadFilterFloatOptimized/0/1/3 650 ns 648 ns 1079479
+BM_BiquadFilterFloatOptimized/0/1/4 805 ns 803 ns 918780
+BM_BiquadFilterFloatOptimized/0/1/5 984 ns 981 ns 736887
+BM_BiquadFilterFloatOptimized/0/1/6 797 ns 795 ns 882135
+BM_BiquadFilterFloatOptimized/0/1/7 792 ns 790 ns 897376
+BM_BiquadFilterFloatOptimized/0/1/8 1974 ns 1969 ns 355501
+BM_BiquadFilterFloatOptimized/0/1/9 1973 ns 1968 ns 355606
+BM_BiquadFilterFloatOptimized/0/1/10 2709 ns 2703 ns 259268
+BM_BiquadFilterFloatOptimized/0/1/11 2613 ns 2607 ns 268435
+BM_BiquadFilterFloatOptimized/0/1/12 2499 ns 2493 ns 280813
+BM_BiquadFilterFloatOptimized/0/1/13 2497 ns 2491 ns 280990
+BM_BiquadFilterFloatOptimized/0/1/14 2499 ns 2493 ns 280818
+BM_BiquadFilterFloatOptimized/0/1/15 2499 ns 2493 ns 280815
+BM_BiquadFilterFloatOptimized/0/1/16 2327 ns 2321 ns 301566
+BM_BiquadFilterFloatOptimized/0/1/17 2326 ns 2321 ns 301606
+BM_BiquadFilterFloatOptimized/0/1/18 2326 ns 2321 ns 301606
+BM_BiquadFilterFloatOptimized/0/1/19 2327 ns 2321 ns 301606
+BM_BiquadFilterFloatOptimized/0/1/20 2499 ns 2493 ns 280810
+BM_BiquadFilterFloatOptimized/0/1/21 2497 ns 2491 ns 280989
+BM_BiquadFilterFloatOptimized/0/1/22 2499 ns 2493 ns 280796
+BM_BiquadFilterFloatOptimized/0/1/23 2499 ns 2493 ns 280807
+BM_BiquadFilterFloatOptimized/0/1/24 2327 ns 2321 ns 301596
+BM_BiquadFilterFloatOptimized/0/1/25 2327 ns 2321 ns 301600
+BM_BiquadFilterFloatOptimized/0/1/26 2327 ns 2321 ns 301597
+BM_BiquadFilterFloatOptimized/0/1/27 2327 ns 2321 ns 301588
+BM_BiquadFilterFloatOptimized/0/1/28 2500 ns 2493 ns 280761
+BM_BiquadFilterFloatOptimized/0/1/29 2499 ns 2492 ns 280951
+BM_BiquadFilterFloatOptimized/0/1/30 2500 ns 2493 ns 280787
+BM_BiquadFilterFloatOptimized/0/1/31 2500 ns 2493 ns 280808
+BM_BiquadFilterFloatOptimized/0/2/1 440 ns 439 ns 1595281
+BM_BiquadFilterFloatOptimized/0/2/2 633 ns 631 ns 1108368
+BM_BiquadFilterFloatOptimized/0/2/3 633 ns 631 ns 1108778
+BM_BiquadFilterFloatOptimized/0/2/4 1523 ns 1518 ns 461120
+BM_BiquadFilterFloatOptimized/0/2/5 1523 ns 1518 ns 461075
+BM_BiquadFilterFloatOptimized/0/2/6 1522 ns 1518 ns 461059
+BM_BiquadFilterFloatOptimized/0/2/7 1523 ns 1518 ns 461068
+BM_BiquadFilterFloatOptimized/0/2/8 2854 ns 2845 ns 248471
+BM_BiquadFilterFloatOptimized/0/2/9 2809 ns 2800 ns 250019
+BM_BiquadFilterFloatOptimized/0/2/10 4412 ns 4398 ns 159164
+BM_BiquadFilterFloatOptimized/0/2/11 4413 ns 4399 ns 159138
+BM_BiquadFilterFloatOptimized/0/2/12 3177 ns 3167 ns 221023
+BM_BiquadFilterFloatOptimized/0/2/13 3164 ns 3154 ns 221972
+BM_BiquadFilterFloatOptimized/0/2/14 3225 ns 3211 ns 217654
+BM_BiquadFilterFloatOptimized/0/2/15 3178 ns 3167 ns 221055
+BM_BiquadFilterFloatOptimized/0/2/16 3726 ns 3714 ns 188557
+BM_BiquadFilterFloatOptimized/0/2/17 3726 ns 3716 ns 188151
+BM_BiquadFilterFloatOptimized/0/2/18 3734 ns 3721 ns 188243
+BM_BiquadFilterFloatOptimized/0/2/19 3723 ns 3710 ns 188560
+BM_BiquadFilterFloatOptimized/0/2/20 3178 ns 3167 ns 221083
+BM_BiquadFilterFloatOptimized/0/2/21 3163 ns 3154 ns 221947
+BM_BiquadFilterFloatOptimized/0/2/22 3224 ns 3214 ns 218373
+BM_BiquadFilterFloatOptimized/0/2/23 3177 ns 3167 ns 221028
+BM_BiquadFilterFloatOptimized/0/2/24 3727 ns 3714 ns 188443
+BM_BiquadFilterFloatOptimized/0/2/25 3735 ns 3721 ns 188131
+BM_BiquadFilterFloatOptimized/0/2/26 3732 ns 3719 ns 188374
+BM_BiquadFilterFloatOptimized/0/2/27 3721 ns 3710 ns 188619
+BM_BiquadFilterFloatOptimized/0/2/28 3176 ns 3167 ns 221067
+BM_BiquadFilterFloatOptimized/0/2/29 3164 ns 3154 ns 221953
+BM_BiquadFilterFloatOptimized/0/2/30 3225 ns 3214 ns 217988
+BM_BiquadFilterFloatOptimized/0/2/31 3176 ns 3167 ns 221015
+BM_BiquadFilterFloatOptimized/0/3/1 877 ns 874 ns 800012
+BM_BiquadFilterFloatOptimized/0/3/2 1218 ns 1214 ns 576381
+BM_BiquadFilterFloatOptimized/0/3/3 1217 ns 1214 ns 577767
+BM_BiquadFilterFloatOptimized/0/3/4 2281 ns 2274 ns 307760
+BM_BiquadFilterFloatOptimized/0/3/5 2285 ns 2278 ns 307313
+BM_BiquadFilterFloatOptimized/0/3/6 2285 ns 2278 ns 307254
+BM_BiquadFilterFloatOptimized/0/3/7 2280 ns 2273 ns 307865
+BM_BiquadFilterFloatOptimized/0/3/8 2966 ns 2957 ns 236544
+BM_BiquadFilterFloatOptimized/0/3/9 2945 ns 2936 ns 238459
+BM_BiquadFilterFloatOptimized/0/3/10 4613 ns 4597 ns 152280
+BM_BiquadFilterFloatOptimized/0/3/11 4612 ns 4597 ns 152296
+BM_BiquadFilterFloatOptimized/0/3/12 3499 ns 3489 ns 200637
+BM_BiquadFilterFloatOptimized/0/3/13 3498 ns 3486 ns 200771
+BM_BiquadFilterFloatOptimized/0/3/14 3569 ns 3557 ns 196782
+BM_BiquadFilterFloatOptimized/0/3/15 3500 ns 3489 ns 200662
+BM_BiquadFilterFloatOptimized/0/3/16 3809 ns 3797 ns 184356
+BM_BiquadFilterFloatOptimized/0/3/17 3817 ns 3804 ns 184009
+BM_BiquadFilterFloatOptimized/0/3/18 3818 ns 3804 ns 183988
+BM_BiquadFilterFloatOptimized/0/3/19 3809 ns 3797 ns 184373
+BM_BiquadFilterFloatOptimized/0/3/20 3501 ns 3489 ns 200657
+BM_BiquadFilterFloatOptimized/0/3/21 3497 ns 3486 ns 200769
+BM_BiquadFilterFloatOptimized/0/3/22 3567 ns 3556 ns 196867
+BM_BiquadFilterFloatOptimized/0/3/23 3500 ns 3489 ns 200647
+BM_BiquadFilterFloatOptimized/0/3/24 3808 ns 3796 ns 184354
+BM_BiquadFilterFloatOptimized/0/3/25 3816 ns 3805 ns 184002
+BM_BiquadFilterFloatOptimized/0/3/26 3816 ns 3804 ns 184006
+BM_BiquadFilterFloatOptimized/0/3/27 3809 ns 3797 ns 184416
+BM_BiquadFilterFloatOptimized/0/3/28 3500 ns 3488 ns 200657
+BM_BiquadFilterFloatOptimized/0/3/29 3498 ns 3486 ns 200786
+BM_BiquadFilterFloatOptimized/0/3/30 3568 ns 3557 ns 196887
+BM_BiquadFilterFloatOptimized/0/3/31 3500 ns 3488 ns 200663
+BM_BiquadFilterFloatOptimized/0/4/1 558 ns 556 ns 1257930
+BM_BiquadFilterFloatOptimized/0/4/2 652 ns 650 ns 1076427
+BM_BiquadFilterFloatOptimized/0/4/3 651 ns 648 ns 1079429
+BM_BiquadFilterFloatOptimized/0/4/4 831 ns 829 ns 844257
+BM_BiquadFilterFloatOptimized/0/4/5 829 ns 826 ns 847191
+BM_BiquadFilterFloatOptimized/0/4/6 829 ns 826 ns 847010
+BM_BiquadFilterFloatOptimized/0/4/7 832 ns 829 ns 843914
+BM_BiquadFilterFloatOptimized/0/4/8 1881 ns 1875 ns 373166
+BM_BiquadFilterFloatOptimized/0/4/9 1910 ns 1904 ns 367626
+BM_BiquadFilterFloatOptimized/0/4/10 2247 ns 2239 ns 312581
+BM_BiquadFilterFloatOptimized/0/4/11 2246 ns 2238 ns 312874
+BM_BiquadFilterFloatOptimized/0/4/12 3170 ns 3158 ns 221666
+BM_BiquadFilterFloatOptimized/0/4/13 3159 ns 3150 ns 222273
+BM_BiquadFilterFloatOptimized/0/4/14 3149 ns 3139 ns 222959
+BM_BiquadFilterFloatOptimized/0/4/15 3168 ns 3158 ns 221668
+BM_BiquadFilterFloatOptimized/0/4/16 2278 ns 2271 ns 308250
+BM_BiquadFilterFloatOptimized/0/4/17 2280 ns 2273 ns 308036
+BM_BiquadFilterFloatOptimized/0/4/18 2280 ns 2273 ns 308016
+BM_BiquadFilterFloatOptimized/0/4/19 2278 ns 2271 ns 308301
+BM_BiquadFilterFloatOptimized/0/4/20 3168 ns 3158 ns 221671
+BM_BiquadFilterFloatOptimized/0/4/21 3159 ns 3150 ns 222270
+BM_BiquadFilterFloatOptimized/0/4/22 3149 ns 3139 ns 223010
+BM_BiquadFilterFloatOptimized/0/4/23 3168 ns 3158 ns 221652
+BM_BiquadFilterFloatOptimized/0/4/24 2279 ns 2271 ns 308191
+BM_BiquadFilterFloatOptimized/0/4/25 2281 ns 2273 ns 307942
+BM_BiquadFilterFloatOptimized/0/4/26 2280 ns 2272 ns 308012
+BM_BiquadFilterFloatOptimized/0/4/27 2279 ns 2271 ns 308357
+BM_BiquadFilterFloatOptimized/0/4/28 3169 ns 3158 ns 221700
+BM_BiquadFilterFloatOptimized/0/4/29 3159 ns 3149 ns 222286
+BM_BiquadFilterFloatOptimized/0/4/30 3149 ns 3139 ns 222997
+BM_BiquadFilterFloatOptimized/0/4/31 3168 ns 3158 ns 221672
+BM_BiquadFilterFloatOptimized/1/1/1 558 ns 556 ns 1259230
+BM_BiquadFilterFloatOptimized/1/1/2 651 ns 649 ns 1078239
+BM_BiquadFilterFloatOptimized/1/1/3 651 ns 649 ns 1078731
+BM_BiquadFilterFloatOptimized/1/1/4 771 ns 768 ns 898703
+BM_BiquadFilterFloatOptimized/1/1/5 1020 ns 1017 ns 712070
+BM_BiquadFilterFloatOptimized/1/1/6 796 ns 794 ns 867607
+BM_BiquadFilterFloatOptimized/1/1/7 816 ns 814 ns 895946
+BM_BiquadFilterFloatOptimized/1/1/8 1976 ns 1970 ns 355331
+BM_BiquadFilterFloatOptimized/1/1/9 1976 ns 1969 ns 355435
+BM_BiquadFilterFloatOptimized/1/1/10 2709 ns 2700 ns 259919
+BM_BiquadFilterFloatOptimized/1/1/11 2617 ns 2608 ns 268279
+BM_BiquadFilterFloatOptimized/1/1/12 2501 ns 2494 ns 280784
+BM_BiquadFilterFloatOptimized/1/1/13 2500 ns 2492 ns 280890
+BM_BiquadFilterFloatOptimized/1/1/14 2502 ns 2493 ns 280685
+BM_BiquadFilterFloatOptimized/1/1/15 2502 ns 2493 ns 280729
+BM_BiquadFilterFloatOptimized/1/1/16 2329 ns 2322 ns 301460
+BM_BiquadFilterFloatOptimized/1/1/17 2330 ns 2322 ns 301456
+BM_BiquadFilterFloatOptimized/1/1/18 2329 ns 2322 ns 301447
+BM_BiquadFilterFloatOptimized/1/1/19 2329 ns 2322 ns 301456
+BM_BiquadFilterFloatOptimized/1/1/20 2502 ns 2494 ns 280714
+BM_BiquadFilterFloatOptimized/1/1/21 2501 ns 2492 ns 280834
+BM_BiquadFilterFloatOptimized/1/1/22 2502 ns 2494 ns 280713
+BM_BiquadFilterFloatOptimized/1/1/23 2502 ns 2494 ns 280691
+BM_BiquadFilterFloatOptimized/1/1/24 2329 ns 2322 ns 301435
+BM_BiquadFilterFloatOptimized/1/1/25 2330 ns 2322 ns 301438
+BM_BiquadFilterFloatOptimized/1/1/26 2329 ns 2322 ns 301470
+BM_BiquadFilterFloatOptimized/1/1/27 2330 ns 2322 ns 301493
+BM_BiquadFilterFloatOptimized/1/1/28 2502 ns 2493 ns 280702
+BM_BiquadFilterFloatOptimized/1/1/29 2500 ns 2492 ns 280940
+BM_BiquadFilterFloatOptimized/1/1/30 2502 ns 2494 ns 280740
+BM_BiquadFilterFloatOptimized/1/1/31 2502 ns 2494 ns 280719
+BM_BiquadFilterFloatOptimized/1/2/1 440 ns 439 ns 1595119
+BM_BiquadFilterFloatOptimized/1/2/2 634 ns 631 ns 1109077
+BM_BiquadFilterFloatOptimized/1/2/3 633 ns 631 ns 1108421
+BM_BiquadFilterFloatOptimized/1/2/4 1523 ns 1518 ns 460928
+BM_BiquadFilterFloatOptimized/1/2/5 1524 ns 1518 ns 461034
+BM_BiquadFilterFloatOptimized/1/2/6 1524 ns 1518 ns 460936
+BM_BiquadFilterFloatOptimized/1/2/7 1524 ns 1519 ns 460956
+BM_BiquadFilterFloatOptimized/1/2/8 2871 ns 2862 ns 243633
+BM_BiquadFilterFloatOptimized/1/2/9 2808 ns 2800 ns 249997
+BM_BiquadFilterFloatOptimized/1/2/10 4412 ns 4397 ns 159195
+BM_BiquadFilterFloatOptimized/1/2/11 4412 ns 4398 ns 159154
+BM_BiquadFilterFloatOptimized/1/2/12 3177 ns 3167 ns 221084
+BM_BiquadFilterFloatOptimized/1/2/13 3164 ns 3154 ns 221939
+BM_BiquadFilterFloatOptimized/1/2/14 3217 ns 3210 ns 218007
+BM_BiquadFilterFloatOptimized/1/2/15 3177 ns 3167 ns 221047
+BM_BiquadFilterFloatOptimized/1/2/16 3726 ns 3713 ns 188559
+BM_BiquadFilterFloatOptimized/1/2/17 3733 ns 3720 ns 188289
+BM_BiquadFilterFloatOptimized/1/2/18 3733 ns 3721 ns 188122
+BM_BiquadFilterFloatOptimized/1/2/19 3724 ns 3712 ns 188522
+BM_BiquadFilterFloatOptimized/1/2/20 3177 ns 3167 ns 221061
+BM_BiquadFilterFloatOptimized/1/2/21 3164 ns 3154 ns 221952
+BM_BiquadFilterFloatOptimized/1/2/22 3224 ns 3213 ns 217980
+BM_BiquadFilterFloatOptimized/1/2/23 3178 ns 3167 ns 221046
+BM_BiquadFilterFloatOptimized/1/2/24 3726 ns 3714 ns 188525
+BM_BiquadFilterFloatOptimized/1/2/25 3732 ns 3720 ns 188234
+BM_BiquadFilterFloatOptimized/1/2/26 3732 ns 3719 ns 188156
+BM_BiquadFilterFloatOptimized/1/2/27 3726 ns 3714 ns 188613
+BM_BiquadFilterFloatOptimized/1/2/28 3177 ns 3167 ns 221042
+BM_BiquadFilterFloatOptimized/1/2/29 3164 ns 3154 ns 221970
+BM_BiquadFilterFloatOptimized/1/2/30 3226 ns 3215 ns 217798
+BM_BiquadFilterFloatOptimized/1/2/31 3178 ns 3167 ns 221042
+BM_BiquadFilterFloatOptimized/1/3/1 885 ns 882 ns 795133
+BM_BiquadFilterFloatOptimized/1/3/2 1219 ns 1214 ns 576293
+BM_BiquadFilterFloatOptimized/1/3/3 1218 ns 1214 ns 576722
+BM_BiquadFilterFloatOptimized/1/3/4 2282 ns 2274 ns 307745
+BM_BiquadFilterFloatOptimized/1/3/5 2286 ns 2278 ns 307324
+BM_BiquadFilterFloatOptimized/1/3/6 2286 ns 2278 ns 307308
+BM_BiquadFilterFloatOptimized/1/3/7 2282 ns 2274 ns 307912
+BM_BiquadFilterFloatOptimized/1/3/8 2962 ns 2952 ns 237180
+BM_BiquadFilterFloatOptimized/1/3/9 2946 ns 2935 ns 238462
+BM_BiquadFilterFloatOptimized/1/3/10 4612 ns 4597 ns 152246
+BM_BiquadFilterFloatOptimized/1/3/11 4613 ns 4596 ns 152286
+BM_BiquadFilterFloatOptimized/1/3/12 3501 ns 3489 ns 200662
+BM_BiquadFilterFloatOptimized/1/3/13 3497 ns 3486 ns 200784
+BM_BiquadFilterFloatOptimized/1/3/14 3569 ns 3557 ns 196804
+BM_BiquadFilterFloatOptimized/1/3/15 3499 ns 3488 ns 200661
+BM_BiquadFilterFloatOptimized/1/3/16 3809 ns 3797 ns 184350
+BM_BiquadFilterFloatOptimized/1/3/17 3816 ns 3804 ns 184028
+BM_BiquadFilterFloatOptimized/1/3/18 3815 ns 3804 ns 184008
+BM_BiquadFilterFloatOptimized/1/3/19 3808 ns 3796 ns 184333
+BM_BiquadFilterFloatOptimized/1/3/20 3502 ns 3489 ns 200636
+BM_BiquadFilterFloatOptimized/1/3/21 3499 ns 3486 ns 200768
+BM_BiquadFilterFloatOptimized/1/3/22 3569 ns 3557 ns 196840
+BM_BiquadFilterFloatOptimized/1/3/23 3501 ns 3488 ns 200657
+BM_BiquadFilterFloatOptimized/1/3/24 3807 ns 3796 ns 184403
+BM_BiquadFilterFloatOptimized/1/3/25 3816 ns 3804 ns 184040
+BM_BiquadFilterFloatOptimized/1/3/26 3816 ns 3804 ns 184021
+BM_BiquadFilterFloatOptimized/1/3/27 3808 ns 3796 ns 184385
+BM_BiquadFilterFloatOptimized/1/3/28 3500 ns 3488 ns 200666
+BM_BiquadFilterFloatOptimized/1/3/29 3497 ns 3485 ns 200811
+BM_BiquadFilterFloatOptimized/1/3/30 3571 ns 3558 ns 196974
+BM_BiquadFilterFloatOptimized/1/3/31 3499 ns 3488 ns 200710
+BM_BiquadFilterFloatOptimized/1/4/1 558 ns 556 ns 1259007
+BM_BiquadFilterFloatOptimized/1/4/2 652 ns 650 ns 1076207
+BM_BiquadFilterFloatOptimized/1/4/3 650 ns 648 ns 1079464
+BM_BiquadFilterFloatOptimized/1/4/4 831 ns 828 ns 847251
+BM_BiquadFilterFloatOptimized/1/4/5 829 ns 826 ns 847543
+BM_BiquadFilterFloatOptimized/1/4/6 829 ns 826 ns 847037
+BM_BiquadFilterFloatOptimized/1/4/7 832 ns 829 ns 844307
+BM_BiquadFilterFloatOptimized/1/4/8 1879 ns 1873 ns 378908
+BM_BiquadFilterFloatOptimized/1/4/9 1910 ns 1905 ns 367554
+BM_BiquadFilterFloatOptimized/1/4/10 2246 ns 2240 ns 312471
+BM_BiquadFilterFloatOptimized/1/4/11 2244 ns 2238 ns 312719
+BM_BiquadFilterFloatOptimized/1/4/12 3167 ns 3157 ns 221689
+BM_BiquadFilterFloatOptimized/1/4/13 3159 ns 3149 ns 222292
+BM_BiquadFilterFloatOptimized/1/4/14 3148 ns 3138 ns 223041
+BM_BiquadFilterFloatOptimized/1/4/15 3167 ns 3157 ns 221705
+BM_BiquadFilterFloatOptimized/1/4/16 2278 ns 2271 ns 308275
+BM_BiquadFilterFloatOptimized/1/4/17 2280 ns 2273 ns 308050
+BM_BiquadFilterFloatOptimized/1/4/18 2280 ns 2272 ns 307994
+BM_BiquadFilterFloatOptimized/1/4/19 2278 ns 2270 ns 308324
+BM_BiquadFilterFloatOptimized/1/4/20 3168 ns 3157 ns 221734
+BM_BiquadFilterFloatOptimized/1/4/21 3159 ns 3149 ns 222273
+BM_BiquadFilterFloatOptimized/1/4/22 3148 ns 3139 ns 222991
+BM_BiquadFilterFloatOptimized/1/4/23 3166 ns 3157 ns 221723
+BM_BiquadFilterFloatOptimized/1/4/24 2278 ns 2271 ns 308395
+BM_BiquadFilterFloatOptimized/1/4/25 2279 ns 2272 ns 308055
+BM_BiquadFilterFloatOptimized/1/4/26 2280 ns 2272 ns 308098
+BM_BiquadFilterFloatOptimized/1/4/27 2278 ns 2271 ns 308274
+BM_BiquadFilterFloatOptimized/1/4/28 3168 ns 3157 ns 221710
+BM_BiquadFilterFloatOptimized/1/4/29 3158 ns 3149 ns 222311
+BM_BiquadFilterFloatOptimized/1/4/30 3148 ns 3138 ns 223009
+BM_BiquadFilterFloatOptimized/1/4/31 3167 ns 3157 ns 221723
+BM_BiquadFilterFloatNonOptimized/0/1/31 2500 ns 2493 ns 280839
+BM_BiquadFilterFloatNonOptimized/0/2/31 4996 ns 4983 ns 140491
+BM_BiquadFilterFloatNonOptimized/0/3/31 7491 ns 7468 ns 93734
+BM_BiquadFilterFloatNonOptimized/0/4/31 9988 ns 9955 ns 70314
+BM_BiquadFilterFloatNonOptimized/0/5/31 12475 ns 12440 ns 56266
+BM_BiquadFilterFloatNonOptimized/0/6/31 14977 ns 14927 ns 46888
+BM_BiquadFilterFloatNonOptimized/0/7/31 17540 ns 17486 ns 40039
+BM_BiquadFilterFloatNonOptimized/0/8/31 19997 ns 19937 ns 35114
+BM_BiquadFilterFloatNonOptimized/0/9/31 22510 ns 22444 ns 31185
+BM_BiquadFilterFloatNonOptimized/0/10/31 25029 ns 24949 ns 28059
+BM_BiquadFilterFloatNonOptimized/0/11/31 27520 ns 27436 ns 25514
+BM_BiquadFilterFloatNonOptimized/0/12/31 30048 ns 29959 ns 23368
+BM_BiquadFilterFloatNonOptimized/0/13/31 32524 ns 32428 ns 21586
+BM_BiquadFilterFloatNonOptimized/0/14/31 35051 ns 34949 ns 20029
+BM_BiquadFilterFloatNonOptimized/0/15/31 37546 ns 37436 ns 18697
+BM_BiquadFilterFloatNonOptimized/0/16/31 40115 ns 39978 ns 17510
+BM_BiquadFilterFloatNonOptimized/0/17/31 42624 ns 42492 ns 16473
+BM_BiquadFilterFloatNonOptimized/0/18/31 45142 ns 45008 ns 15550
+BM_BiquadFilterFloatNonOptimized/0/19/31 47667 ns 47508 ns 14732
+BM_BiquadFilterFloatNonOptimized/0/20/31 50150 ns 50005 ns 13999
+BM_BiquadFilterFloatNonOptimized/0/21/31 52661 ns 52492 ns 13336
+BM_BiquadFilterFloatNonOptimized/0/22/31 55160 ns 54977 ns 12732
+BM_BiquadFilterFloatNonOptimized/0/23/31 57717 ns 57556 ns 12194
+BM_BiquadFilterFloatNonOptimized/0/24/31 60105 ns 59986 ns 11684
+BM_BiquadFilterDoubleOptimized/0/1/31 2498 ns 2491 ns 281105
+BM_BiquadFilterDoubleOptimized/0/2/31 3123 ns 3112 ns 224898
+BM_BiquadFilterDoubleOptimized/0/3/31 3435 ns 3425 ns 204393
+BM_BiquadFilterDoubleOptimized/0/4/31 3567 ns 3556 ns 196854
+BM_BiquadFilterDoubleNonOptimized/0/1/31 2498 ns 2490 ns 281119
+BM_BiquadFilterDoubleNonOptimized/0/2/31 5019 ns 5004 ns 100000
+BM_BiquadFilterDoubleNonOptimized/0/3/31 7500 ns 7478 ns 93607
+BM_BiquadFilterDoubleNonOptimized/0/4/31 10010 ns 9981 ns 70129
*******************************************************************/
diff --git a/audio_utils/include/audio_utils/BiquadFilter.h b/audio_utils/include/audio_utils/BiquadFilter.h
index c2f481b..7a10339 100644
--- a/audio_utils/include/audio_utils/BiquadFilter.h
+++ b/audio_utils/include/audio_utils/BiquadFilter.h
@@ -14,8 +14,7 @@
* limitations under the License.
*/
-#ifndef ANDROID_AUDIO_UTILS_BIQUAD_FILTER_H
-#define ANDROID_AUDIO_UTILS_BIQUAD_FILTER_H
+#pragma once
#include "intrinsic_utils.h"
@@ -36,12 +35,146 @@
#define USE_NEON
#endif
+// Use dither to prevent subnormals for CPUs that raise an exception.
+#pragma push_macro("USE_DITHER")
+#undef USE_DITHER
+
+#if defined(__i386__) || defined(__x86_x64__)
+#define USE_DITHER
+#endif
+
namespace android::audio_utils {
static constexpr size_t kBiquadNumCoefs = 5;
static constexpr size_t kBiquadNumDelays = 2;
+/**
+ * The BiquadDirect2Transpose is a low overhead
+ * Biquad filter with coefficients b0, b1, b2, a1, a2.
+ *
+ * This can be used by itself, but it is preferred for best data management
+ * to use the BiquadFilter abstraction below.
+ *
+ * T is the data type (scalar or vector).
+ * F is the filter coefficient type. It is either a scalar or vector (matching T).
+ */
+template <typename T, typename F>
+struct BiquadDirect2Transpose {
+ F coef_[5]; // these are stored with the denominator a's negated.
+ T s1_; // delay state 1
+ T s2_; // delay state 2
+
+ // These are the coefficient occupancies we optimize for (from b0, b1, b2, a1, a2)
+ // as expressed by a bitmask.
+ static inline constexpr size_t required_occupancies_[] = {
+ 0x1, // constant scale
+ 0x3, // single zero
+ 0x7, // double zero
+ 0x9, // single pole
+ 0xb, // (11) first order IIR
+ 0x1b, // (27) double pole + single zero
+ 0x1f, // (31) second order IIR (full Biquad)
+ };
+
+ // Take care the order of arguments - starts with b's then goes to a's.
+ // The a's are "positive" reference, some filters take negative.
+ BiquadDirect2Transpose(const F& b0, const F& b1, const F& b2, const F& a1, const F& a2,
+ const T& s1 = {}, const T& s2 = {})
+ // : coef_{b0, b1, b2, -a1, -a2}
+ : coef_{ b0,
+ b1,
+ b2,
+ intrinsics::vneg(a1),
+ intrinsics::vneg(a2) }
+ , s1_{s1}
+ , s2_{s2} {
+ }
+
+ // D is the data type. It must be the same element type of T or F.
+ // Take care the order of input and output.
+ template<typename D, size_t OCCUPANCY = 0x1f>
+ __attribute__((always_inline)) // required for 1ch speedup (30% faster)
+ void process(D* output, const D* input, size_t frames, size_t stride) {
+ using namespace intrinsics;
+ // For SSE it is possible to vdup F to T if F is scalar.
+ const F b0 = coef_[0]; // b0
+ const F b1 = coef_[1]; // b1
+ const F b2 = coef_[2]; // b2
+ const F negativeA1 = coef_[3]; // -a1
+ const F negativeA2 = coef_[4]; // -a2
+ T s1 = s1_;
+ T s2 = s2_;
+ T xn, yn; // OK to declare temps outside loop rather than at the point of initialization.
+#ifdef USE_DITHER
+ constexpr D DITHER_VALUE = std::numeric_limits<float>::min() * (1 << 24); // use FLOAT
+ T dither = vdupn<T>(DITHER_VALUE); // NEON does not have vector + scalar acceleration.
+#endif
+
+ // Unroll control. Make sure the constexpr remains constexpr :-).
+ constexpr size_t CHANNELS = sizeof(T) / sizeof(D);
+ constexpr size_t UNROLL_CHANNEL_LOWER_LIMIT = 2; // below this won't be unrolled.
+ constexpr size_t UNROLL_CHANNEL_UPPER_LIMIT = 16; // above this won't be unrolled.
+ constexpr size_t UNROLL_LOOPS = (CHANNELS >= UNROLL_CHANNEL_LOWER_LIMIT &&
+ CHANNELS <= UNROLL_CHANNEL_UPPER_LIMIT) ? 2 : 1;
+ size_t remainder = 0;
+ if constexpr (UNROLL_LOOPS > 1) {
+ remainder = frames % UNROLL_LOOPS;
+ frames /= UNROLL_LOOPS;
+ }
+
+ // For this lambda, attribute always_inline must be used to inline past CHANNELS > 4.
+ // The other alternative is to use a MACRO, but that doesn't read as well.
+ const auto KERNEL = [&]() __attribute__((always_inline)) {
+ xn = vld1<T>(input);
+ input += stride;
+#ifdef USE_DITHER
+ xn = vadd(xn, dither);
+ dither = vneg(dither);
+#endif
+
+ yn = s1;
+ if constexpr (OCCUPANCY >> 0 & 1) {
+ yn = vmla(yn, b0, xn);
+ }
+ vst1(output, yn);
+ output += stride;
+
+ s1 = s2;
+ if constexpr (OCCUPANCY >> 3 & 1) {
+ s1 = vmla(s1, negativeA1, yn);
+ }
+ if constexpr (OCCUPANCY >> 1 & 1) {
+ s1 = vmla(s1, b1, xn);
+ }
+ if constexpr (OCCUPANCY >> 2 & 1) {
+ s2 = vmul(b2, xn);
+ } else {
+ s2 = vdupn<T>(0.f);
+ }
+ if constexpr (OCCUPANCY >> 4 & 1) {
+ s2 = vmla(s2, negativeA2, yn);
+ }
+ };
+
+ while (frames > 0) {
+ #pragma unroll
+ for (size_t i = 0; i < UNROLL_LOOPS; ++i) {
+ KERNEL();
+ }
+ frames--;
+ }
+ if constexpr (UNROLL_LOOPS > 1) {
+ for (size_t i = 0; i < remainder; ++i) {
+ KERNEL();
+ }
+ }
+ s1_ = s1;
+ s2_ = s2;
+ }
+};
+
namespace details {
+
// Helper methods for constructing a constexpr array of function pointers.
// As function pointers are efficient and have no constructor/destructor
// this is preferred over std::function.
@@ -105,67 +238,6 @@
}
}
-// For biquad_filter_fast, we template based on whether coef[i] is nonzero - this should be
-// determined in a constexpr fashion for optimization.
-
-// Helper which takes a stride to allow column processing of interleaved audio streams.
-template <size_t OCCUPANCY, bool SAME_COEF_PER_CHANNEL, typename D>
-void biquad_filter_1fast(D *out, const D *in, size_t frames, size_t stride,
- size_t channelCount, D *delays, const D *coefs, size_t localStride) {
-#if defined(__i386__) || defined(__x86_x64__)
- D delta = std::numeric_limits<float>::min() * (1 << 24);
-#endif
- D b0, b1, b2, negativeA1, negativeA2;
-
- if constexpr (SAME_COEF_PER_CHANNEL) {
- b0 = coefs[0];
- b1 = coefs[1];
- b2 = coefs[2];
- negativeA1 = -coefs[3];
- negativeA2 = -coefs[4];
- }
- for (size_t i = 0; i < channelCount; ++i) {
- if constexpr (!SAME_COEF_PER_CHANNEL) {
- b0 = coefs[0];
- b1 = coefs[localStride];
- b2 = coefs[2 * localStride];
- negativeA1 = -coefs[3 * localStride];
- negativeA2 = -coefs[4 * localStride];
- ++coefs;
- }
-
- D s1n1 = delays[0];
- D s2n1 = delays[localStride];
- const D *input = &in[i];
- D *output = &out[i];
- for (size_t j = frames; j > 0; --j) {
- // Adding a delta to avoid subnormal exception handling on the x86/x64 platform;
- // this is not a problem with the ARM platform. The delta will not affect the
- // precision of the result.
-#if defined(__i386__) || defined(__x86_x64__)
- const D xn = *input + delta;
-#else
- const D xn = *input;
-#endif
- D yn = (OCCUPANCY >> 0 & 1) * b0 * xn + s1n1;
- s1n1 = (OCCUPANCY >> 1 & 1) * b1 * xn + (OCCUPANCY >> 3 & 1) * negativeA1 * yn + s2n1;
- s2n1 = (OCCUPANCY >> 2 & 1) * b2 * xn + (OCCUPANCY >> 4 & 1) * negativeA2 * yn;
-
- input += stride;
-
- *output = yn;
- output += stride;
-
-#if defined(__i386__) || defined(__x86_x64__)
- delta = -delta;
-#endif
- }
- delays[0] = s1n1;
- delays[localStride] = s2n1;
- ++delays;
- }
-}
-
// Helper function to zero channels in the input buffer.
// This is used for the degenerate coefficient case which results in all zeroes.
template <typename D>
@@ -180,90 +252,69 @@
}
}
-template <size_t OCCUPANCY, bool SAME_COEF_PER_CHANNEL, typename D>
-void biquad_filter_fast(D *out, const D *in, size_t frames, size_t stride,
- size_t channelCount, D *delays, const D *coefs, size_t localStride) {
- if constexpr ((OCCUPANCY & 7) == 0) { // all b's are zero, output is zero.
- zeroChannels(out, frames, stride, channelCount);
- return;
- }
- biquad_filter_1fast<OCCUPANCY, SAME_COEF_PER_CHANNEL>(
- out, in, frames, stride, channelCount, delays, coefs, localStride);
-}
-
-#ifdef USE_NEON
-
-template <size_t OCCUPANCY, bool SAME_COEF_PER_CHANNEL, typename T, typename F>
-void biquad_filter_neon_impl(F *out, const F *in, size_t frames, size_t stride,
+template <template <typename, typename> typename FilterType,
+ size_t OCCUPANCY, bool SAME_COEF_PER_CHANNEL, typename T, typename F>
+void biquad_filter_func_impl(F *out, const F *in, size_t frames, size_t stride,
size_t channelCount, F *delays, const F *coefs, size_t localStride) {
using namespace android::audio_utils::intrinsics;
constexpr size_t elements = sizeof(T) / sizeof(F); // how many float elements in T.
- T b0, b1, b2, negativeA1, negativeA2;
- if constexpr (SAME_COEF_PER_CHANNEL) {
- b0 = vdupn<T>(coefs[0]);
- b1 = vdupn<T>(coefs[1]);
- b2 = vdupn<T>(coefs[2]);
- negativeA1 = vneg(vdupn<T>(coefs[3]));
- negativeA2 = vneg(vdupn<T>(coefs[4]));
- }
+ const size_t coefStride = SAME_COEF_PER_CHANNEL ? 1 : localStride;
+ using CoefType = std::conditional_t<SAME_COEF_PER_CHANNEL, F, T>;
+
for (size_t i = 0; i < channelCount; i += elements) {
- if constexpr (!SAME_COEF_PER_CHANNEL) {
- b0 = vld1<T>(coefs);
- b1 = vld1<T>(coefs + localStride);
- b2 = vld1<T>(coefs + localStride * 2);
- negativeA1 = vneg(vld1<T>(coefs + localStride * 3));
- negativeA2 = vneg(vld1<T>(coefs + localStride * 4));
- coefs += elements;
- }
T s1 = vld1<T>(&delays[0]);
T s2 = vld1<T>(&delays[localStride]);
- const F *input = &in[i];
- F *output = &out[i];
- for (size_t j = frames; j > 0; --j) {
- T xn = vld1<T>(input);
- T yn = s1;
- if constexpr (OCCUPANCY >> 0 & 1) {
- yn = vmla(yn, b0, xn);
- }
- s1 = s2;
- if constexpr (OCCUPANCY >> 3 & 1) {
- s1 = vmla(s1, negativeA1, yn);
- }
- if constexpr (OCCUPANCY >> 1 & 1) {
- s1 = vmla(s1, b1, xn);
- }
- if constexpr (OCCUPANCY >> 2 & 1) {
- s2 = vmul(b2, xn);
- } else {
- s2 = vdupn<T>(0.f);
- }
- if constexpr (OCCUPANCY >> 4 & 1) {
- s2 = vmla(s2, negativeA2, yn);
- }
-
- input += stride;
- vst1(output, yn);
- output += stride;
- }
- vst1(&delays[0], s1);
- vst1(&delays[localStride], s2);
+ FilterType<T, CoefType> kernel(
+ vld1<CoefType>(coefs), vld1<CoefType>(coefs + coefStride),
+ vld1<CoefType>(coefs + coefStride * 2), vld1<CoefType>(coefs + coefStride * 3),
+ vld1<CoefType>(coefs + coefStride * 4),
+ s1, s2);
+ if constexpr (!SAME_COEF_PER_CHANNEL) coefs += elements;
+ kernel.template process<F, OCCUPANCY>(&out[i], &in[i], frames, stride);
+ vst1(&delays[0], kernel.s1_);
+ vst1(&delays[localStride], kernel.s2_);
delays += elements;
}
}
-#define BIQUAD_FILTER_CASE(N, ... /* type */) \
+// Find the nearest occupancy mask that includes all the desired bits.
+template <typename T, size_t N>
+static constexpr size_t nearestOccupancy(T occupancy, const T (&occupancies)[N]) {
+ if (occupancy < 32) {
+ for (auto test : occupancies) {
+ if ((occupancy & test) == occupancy) return test;
+ }
+ }
+ return 31;
+}
+
+enum FILTER_OPTION {
+ FILTER_OPTION_SCALAR_ONLY = (1 << 0),
+};
+
+// Default biquad type.
+template <typename T, typename F>
+using BiquadFilterType = BiquadDirect2Transpose<T, F>;
+
+#define BIQUAD_FILTER_CASE(N, FilterType, ... /* type */) \
case N: { \
- biquad_filter_neon_impl<OCCUPANCY, SAME_COEF_PER_CHANNEL, __VA_ARGS__>( \
+ using VectorType = __VA_ARGS__; \
+ biquad_filter_func_impl< \
+ FilterType, \
+ nearestOccupancy(OCCUPANCY, \
+ FilterType<VectorType, D>::required_occupancies_), \
+ SAME_COEF_PER_CHANNEL, VectorType>( \
out + offset, in + offset, frames, stride, remaining, \
delays + offset, c, localStride); \
goto exit; \
}
template <size_t OCCUPANCY, bool SAME_COEF_PER_CHANNEL, typename D>
-void biquad_filter_neon(D *out, const D *in, size_t frames, size_t stride,
- size_t channelCount, D *delays, const D *coefs, size_t localStride) {
+void biquad_filter_func(D *out, const D *in, size_t frames, size_t stride,
+ size_t channelCount, D *delays, const D *coefs, size_t localStride,
+ FILTER_OPTION filterOptions) {
if constexpr ((OCCUPANCY & 7) == 0) { // all b's are zero, output is zero.
zeroChannels(out, frames, stride, channelCount);
return;
@@ -274,41 +325,53 @@
// using alt_9_t = struct { struct { float32x4x2_t a; float b; } s; };
// using alt_15_t = struct { struct { float32x4x2_t a; struct { float v[7]; } b; } s; };
+#ifdef USE_NEON
+ // use NEON types to ensure we have the proper intrinsic acceleration.
+ using alt_16_t = float32x4x4_t;
+ using alt_8_t = float32x4x2_t;
+ using alt_4_t = float32x4_t;
+#else
+ // Use C++ types, no NEON needed.
+ using alt_16_t = intrinsics::internal_array_t<float, 16>;
+ using alt_8_t = intrinsics::internal_array_t<float, 8>;
+ using alt_4_t = intrinsics::internal_array_t<float, 4>;
+#endif
+
for (size_t offset = 0; offset < channelCount; ) {
size_t remaining = channelCount - offset;
auto *c = SAME_COEF_PER_CHANNEL ? coefs : coefs + offset;
+ if (filterOptions & FILTER_OPTION_SCALAR_ONLY) goto scalar;
if constexpr (std::is_same_v<D, float>) {
switch (remaining) {
default:
if (remaining >= 16) {
remaining &= ~15;
- biquad_filter_neon_impl<OCCUPANCY, SAME_COEF_PER_CHANNEL, float32x4x4_t>(
+ biquad_filter_func_impl<
+ BiquadFilterType,
+ nearestOccupancy(OCCUPANCY,
+ BiquadFilterType<D, D>::required_occupancies_),
+ SAME_COEF_PER_CHANNEL, alt_16_t>(
out + offset, in + offset, frames, stride, remaining,
delays + offset, c, localStride);
offset += remaining;
continue;
}
break; // case 1 handled at bottom.
- BIQUAD_FILTER_CASE(15, intrinsics::internal_array_t<float, 15>)
- BIQUAD_FILTER_CASE(14, intrinsics::internal_array_t<float, 14>)
- BIQUAD_FILTER_CASE(13, intrinsics::internal_array_t<float, 13>)
- BIQUAD_FILTER_CASE(12, intrinsics::internal_array_t<float, 12>)
- BIQUAD_FILTER_CASE(11, intrinsics::internal_array_t<float, 11>)
- BIQUAD_FILTER_CASE(10, intrinsics::internal_array_t<float, 10>)
- BIQUAD_FILTER_CASE(9, intrinsics::internal_array_t<float, 9>)
- // We choose the NEON intrinsic type over internal_array for 8 to
- // check if there is any performance difference in benchmark (should be similar).
- // BIQUAD_FILTER_CASE(8, intrinsics::internal_array_t<float, 8>)
- BIQUAD_FILTER_CASE(8, float32x4x2_t)
- BIQUAD_FILTER_CASE(7, intrinsics::internal_array_t<float, 7>)
- BIQUAD_FILTER_CASE(6, intrinsics::internal_array_t<float, 6>)
- BIQUAD_FILTER_CASE(5, intrinsics::internal_array_t<float, 5>)
- BIQUAD_FILTER_CASE(4, float32x4_t)
- // We choose the NEON intrinsic type over internal_array for 4 to
- // check if there is any performance difference in benchmark (should be similar).
- // BIQUAD_FILTER_CASE(4, intrinsics::internal_array_t<float, 4>)
- BIQUAD_FILTER_CASE(3, intrinsics::internal_array_t<float, 3>)
- BIQUAD_FILTER_CASE(2, intrinsics::internal_array_t<float, 2>)
+ BIQUAD_FILTER_CASE(15, BiquadFilterType, intrinsics::internal_array_t<float, 15>)
+ BIQUAD_FILTER_CASE(14, BiquadFilterType, intrinsics::internal_array_t<float, 14>)
+ BIQUAD_FILTER_CASE(13, BiquadFilterType, intrinsics::internal_array_t<float, 13>)
+ BIQUAD_FILTER_CASE(12, BiquadFilterType, intrinsics::internal_array_t<float, 12>)
+ BIQUAD_FILTER_CASE(11, BiquadFilterType, intrinsics::internal_array_t<float, 11>)
+ BIQUAD_FILTER_CASE(10, BiquadFilterType, intrinsics::internal_array_t<float, 10>)
+ BIQUAD_FILTER_CASE(9, BiquadFilterType, intrinsics::internal_array_t<float, 9>)
+ BIQUAD_FILTER_CASE(8, BiquadFilterType, alt_8_t)
+ BIQUAD_FILTER_CASE(7, BiquadFilterType, intrinsics::internal_array_t<float, 7>)
+ BIQUAD_FILTER_CASE(6, BiquadFilterType, intrinsics::internal_array_t<float, 6>)
+ BIQUAD_FILTER_CASE(5, BiquadFilterType, intrinsics::internal_array_t<float, 5>)
+ BIQUAD_FILTER_CASE(4, BiquadFilterType, alt_4_t)
+ BIQUAD_FILTER_CASE(3, BiquadFilterType, intrinsics::internal_array_t<float, 3>)
+ BIQUAD_FILTER_CASE(2, BiquadFilterType, intrinsics::internal_array_t<float, 2>)
+ // BIQUAD_FILTER_CASE(1, BiquadFilterType, intrinsics::internal_array_t<float, 1>)
}
} else if constexpr (std::is_same_v<D, double>) {
#if defined(__aarch64__)
@@ -316,27 +379,34 @@
default:
if (remaining >= 8) {
remaining &= ~7;
- biquad_filter_neon_impl<OCCUPANCY, SAME_COEF_PER_CHANNEL,
- intrinsics::internal_array_t<double, 8>>(
+ biquad_filter_func_impl<BiquadFilterType,
+ nearestOccupancy(OCCUPANCY,
+ BiquadFilterType<D, D>::required_occupancies_),
+ SAME_COEF_PER_CHANNEL,
+ intrinsics::internal_array_t<double, 8>>(
out + offset, in + offset, frames, stride, remaining,
delays + offset, c, localStride);
offset += remaining;
continue;
}
break; // case 1 handled at bottom.
- BIQUAD_FILTER_CASE(7, intrinsics::internal_array_t<double, 7>)
- BIQUAD_FILTER_CASE(6, intrinsics::internal_array_t<double, 6>)
- BIQUAD_FILTER_CASE(5, intrinsics::internal_array_t<double, 5>)
- BIQUAD_FILTER_CASE(4, intrinsics::internal_array_t<double, 4>)
- BIQUAD_FILTER_CASE(3, intrinsics::internal_array_t<double, 3>)
- BIQUAD_FILTER_CASE(2, intrinsics::internal_array_t<double, 2>)
+ BIQUAD_FILTER_CASE(7, BiquadFilterType, intrinsics::internal_array_t<double, 7>)
+ BIQUAD_FILTER_CASE(6, BiquadFilterType, intrinsics::internal_array_t<double, 6>)
+ BIQUAD_FILTER_CASE(5, BiquadFilterType, intrinsics::internal_array_t<double, 5>)
+ BIQUAD_FILTER_CASE(4, BiquadFilterType, intrinsics::internal_array_t<double, 4>)
+ BIQUAD_FILTER_CASE(3, BiquadFilterType, intrinsics::internal_array_t<double, 3>)
+ BIQUAD_FILTER_CASE(2, BiquadFilterType, intrinsics::internal_array_t<double, 2>)
};
#endif
}
+ scalar:
// Essentially the code below is scalar, the same as
// biquad_filter_1fast<OCCUPANCY, SAME_COEF_PER_CHANNEL>,
// but formulated with NEON intrinsic-like call pattern.
- biquad_filter_neon_impl<OCCUPANCY, SAME_COEF_PER_CHANNEL, D>(
+ biquad_filter_func_impl<BiquadFilterType,
+ nearestOccupancy(OCCUPANCY,
+ BiquadFilterType<D, D>::required_occupancies_),
+ SAME_COEF_PER_CHANNEL, D>(
out + offset, in + offset, frames, stride, remaining,
delays + offset, c, localStride);
offset += remaining;
@@ -344,8 +414,6 @@
exit:;
}
-#endif // USE_NEON
-
} // namespace details
/**
@@ -584,16 +652,14 @@
}
// Select the proper filtering function from our array.
- (void)optimized; // avoid unused variable warning.
- mFunc = mFilterFast[category]; // default if we don't have processor optimization.
-
-#ifdef USE_NEON
- /* if constexpr (std::is_same_v<D, float>) */ {
- if (optimized) {
- mFunc = mFilterNeon[category];
- }
+ if (optimized) {
+ mFilterOptions = (details::FILTER_OPTION)
+ (mFilterOptions & ~details::FILTER_OPTION_SCALAR_ONLY);
+ } else {
+ mFilterOptions = (details::FILTER_OPTION)
+ (mFilterOptions | details::FILTER_OPTION_SCALAR_ONLY);
}
-#endif
+ mFunc = mFilterFuncs[category];
}
/**
@@ -603,7 +669,7 @@
* \param in pointer to the input data
* \param frames number of audio frames to be processed
*/
- void process(D* out, const D *in, size_t frames) {
+ void process(D* out, const D* in, size_t frames) {
process(out, in, frames, mChannelCount);
}
@@ -615,10 +681,10 @@
* \param frames number of audio frames to be processed
* \param stride the total number of samples associated with a frame, if not channelCount.
*/
- void process(D* out, const D *in, size_t frames, size_t stride) {
+ void process(D* out, const D* in, size_t frames, size_t stride) {
assert(stride >= mChannelCount);
mFunc(out, in, frames, stride, mChannelCount, mDelays.data(),
- mCoefs.data(), mChannelCount);
+ mCoefs.data(), mChannelCount, mFilterOptions);
}
/**
@@ -655,7 +721,7 @@
auto coefs = mCoefs.data() + (SAME_COEF_PER_CHANNEL ? 0 : fromEnd);
auto delays = mDelays.data() + fromEnd;
mFunc(inout, inout, 1 /* frames */, 1 /* stride */, i + 1,
- delays, coefs, mChannelCount);
+ delays, coefs, mChannelCount, mFilterOptions);
}
auto delays = mDelays.data() + baseIdx;
@@ -664,13 +730,13 @@
// sliding one audio sample at a time.
mFunc(inout, inout,
frames - channelBlock + 1, 1 /* stride */, channelBlock,
- delays, coefs, mChannelCount);
+ delays, coefs, mChannelCount, mFilterOptions);
// drain data pipe.
for (size_t i = 1; i < channelBlock; ++i) {
mFunc(inout + frames - channelBlock + i, inout + frames - channelBlock + i,
1 /* frames */, 1 /* stride */, channelBlock - i,
- delays, coefs, mChannelCount);
+ delays, coefs, mChannelCount, mFilterOptions);
}
}
}
@@ -681,7 +747,7 @@
auto coefs = mCoefs.data() + (SAME_COEF_PER_CHANNEL ? 0 : fromEnd);
mFunc(inout, inout,
frames, 1 /* stride */, 1 /* channelCount */,
- mDelays.data() + fromEnd, coefs, mChannelCount);
+ mDelays.data() + fromEnd, coefs, mChannelCount, mFilterOptions);
}
}
@@ -746,121 +812,57 @@
*/
std::vector<D> mDelays;
- using filter_func = decltype(details::biquad_filter_fast<0, true, D>);
+ details::FILTER_OPTION mFilterOptions{};
- /**
- * \var filter_func* mFunc
+ // Consider making a separate delegation class.
+ /*
+ * We store an array of functions based on the occupancy.
*
- * The current filter function selected for the channel occupancy of the Biquad.
- */
- filter_func *mFunc;
-
- // Create a functional wrapper to feed "biquad_filter_fast" to
- // make_functional_array() to populate the array.
- //
- // OCCUPANCY is a bitmask corresponding to the presence of nonzero Biquad coefficients
- // b0 b1 b2 a1 a2 (from lsb to msb)
- template <size_t OCCUPANCY, bool SC> // note SC == SAME_COEF_PER_CHANNEL
- struct FuncWrap {
- template<typename T>
- static constexpr size_t nearest() {
- // Combine cases to both improve expected performance and reduce code space.
- // Some occupancy masks provide worse performance than more occupied masks.
- constexpr size_t required_occupancies[] = {
- 1, // constant scale
- 3, // single zero
- 7, // double zero
- 9, // single pole
- // 11, // first order IIR (unnecessary optimization, close enough to 31).
- 27, // double pole + single zero
- 31, // second order IIR (full Biquad)
- };
- if constexpr (OCCUPANCY < 32) {
- for (auto test : required_occupancies) {
- if ((OCCUPANCY & test) == OCCUPANCY) return test;
- }
- } else {
- static_assert(intrinsics::dependent_false_v<T>);
- }
- return 0; // never gets here.
- }
-
- static void func(D* out, const D *in, size_t frames, size_t stride,
- size_t channelCount, D *delays, const D *coef, size_t localStride) {
- constexpr size_t NEAREST_OCCUPANCY = nearest<D>();
- details::biquad_filter_fast<NEAREST_OCCUPANCY, SC>(
- out, in, frames, stride, channelCount, delays, coef, localStride);
- }
- };
-
- /**
- * \var mFilterFast
- *
- * std::array of functions based on coefficient occupancy.
+ * OCCUPANCY is a bitmask corresponding to the presence of nonzero Biquad coefficients
+ * b0 b1 b2 a1 a2 (from lsb to msb)
*
* static inline constexpr std::array<filter_func*, M> mArray = {
- * biquad_filter_fast<0>,
- * biquad_filter_fast<1>,
- * biquad_filter_fast<2>,
+ * biquad_filter_func<0>,
+ * biquad_filter_func<1>,
+ * biquad_filter_func<2>,
* ...
- * biquad_filter_fast<(1 << kBiquadNumCoefs) - 1>,
+ * biquad_filter_func<(1 << kBiquadNumCoefs) - 1>,
* };
*
* Every time the coefficients are changed, we select the processing function from
* this table.
*/
- static inline constexpr auto mFilterFast =
- details::make_functional_array<
- FuncWrap, 1 << kBiquadNumCoefs, SAME_COEF_PER_CHANNEL>();
-#ifdef USE_NEON
- // OCCUPANCY is a bitmask corresponding to the presence of nonzero Biquad coefficients
- // b0 b1 b2 a1 a2 (from lsb to msb)
-
+ // Used to build the functional array.
template <size_t OCCUPANCY, bool SC> // note SC == SAME_COEF_PER_CHANNEL
- struct FuncWrapNeon {
- template<typename T>
- static constexpr size_t nearest() {
- // combine cases to both improve expected performance and reduce code space.
- //
- // This lists the occupancies we will specialize functions for.
- constexpr size_t required_occupancies[] = {
- 1, // constant scale
- 3, // single zero
- 7, // double zero
- 9, // single pole
- 11, // first order IIR
- 27, // double pole + single zero
- 31, // second order IIR (full Biquad)
- };
- if constexpr (OCCUPANCY < 32) {
- for (auto test : required_occupancies) {
- if ((OCCUPANCY & test) == OCCUPANCY) return test;
- }
- } else {
- static_assert(intrinsics::dependent_false_v<T>);
- }
- return 0; // never gets here.
- }
-
+ struct FuncWrap {
static void func(D* out, const D *in, size_t frames, size_t stride,
- size_t channelCount, D *delays, const D *coef, size_t localStride) {
- constexpr size_t NEAREST_OCCUPANCY = nearest<D>();
- details::biquad_filter_neon<NEAREST_OCCUPANCY, SC>(
- out, in, frames, stride, channelCount, delays, coef, localStride);
+ size_t channelCount, D *delays, const D *coef, size_t localStride,
+ details::FILTER_OPTION filterOptions) {
+ constexpr size_t NEAREST_OCCUPANCY =
+ details::nearestOccupancy(
+ OCCUPANCY, details::BiquadFilterType<D, D>::required_occupancies_);
+ details::biquad_filter_func<NEAREST_OCCUPANCY, SC>(
+ out, in, frames, stride, channelCount, delays, coef, localStride,
+ filterOptions);
}
};
- // Neon optimized array of functions.
- static inline constexpr auto mFilterNeon =
+ // Vector optimized array of functions.
+ static inline constexpr auto mFilterFuncs =
details::make_functional_array<
- FuncWrapNeon, 1 << kBiquadNumCoefs, SAME_COEF_PER_CHANNEL>();
-#endif // USE_NEON
+ FuncWrap, 1 << kBiquadNumCoefs, SAME_COEF_PER_CHANNEL>();
+ /**
+ * \var filter_func* mFunc
+ *
+ * The current filter function selected for the channel occupancy of the Biquad.
+ * It will be one of mFilterFuncs.
+ */
+ std::decay_t<decltype(mFilterFuncs[0])> mFunc;
};
} // namespace android::audio_utils
+#pragma pop_macro("USE_DITHER")
#pragma pop_macro("USE_NEON")
-
-#endif // !ANDROID_AUDIO_UTILS_BIQUAD_FILTER_H
diff --git a/audio_utils/include/audio_utils/intrinsic_utils.h b/audio_utils/include/audio_utils/intrinsic_utils.h
index ed2b2bb..0c333e0 100644
--- a/audio_utils/include/audio_utils/intrinsic_utils.h
+++ b/audio_utils/include/audio_utils/intrinsic_utils.h
@@ -78,6 +78,45 @@
using alternative_15_t = struct { struct { float32x4x2_t a; struct { float v[7]; } b; } s; };
*/
+// add a + b
+template<typename T>
+static inline T vadd(T a, T b) {
+ if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
+ return a + b;
+
+#ifdef USE_NEON
+ } else if constexpr (std::is_same_v<T, float32x2_t>) {
+ return vadd_f32(a, b);
+ } else if constexpr (std::is_same_v<T, float32x4_t>) {
+ return vaddq_f32(a, b);
+#if defined(__aarch64__)
+ } else if constexpr (std::is_same_v<T, float64x2_t>) {
+ return vaddq_f64(a, b);
+#endif
+#endif // USE_NEON
+
+ } else /* constexpr */ {
+ T ret;
+ auto &[retval] = ret; // single-member struct
+ const auto &[aval] = a;
+ const auto &[bval] = b;
+ if constexpr (std::is_array_v<decltype(retval)>) {
+#pragma unroll
+ for (size_t i = 0; i < std::size(aval); ++i) {
+ retval[i] = vadd(aval[i], bval[i]);
+ }
+ return ret;
+ } else /* constexpr */ {
+ auto &[r1, r2] = retval;
+ const auto &[a1, a2] = aval;
+ const auto &[b1, b2] = bval;
+ r1 = vadd(a1, b1);
+ r2 = vadd(a2, b2);
+ return ret;
+ }
+ }
+}
+
// duplicate float into all elements.
template<typename T, typename F>
static inline T vdupn(F f) {
@@ -156,6 +195,73 @@
}
}
+/**
+ * Returns c as follows:
+ * c_i = a_i * b_i if a and b are the same vector type or
+ * c_i = a_i * b if a is a vector and b is scalar or
+ * c_i = a * b_i if a is scalar and b is a vector.
+ */
+template<typename T, typename S, typename F>
+static inline T vmla(T a, S b, F c) {
+ // Both types T and S are non-primitive and they are not equal. T == S handled below.
+ (void) a;
+ (void) b;
+ (void) c;
+ static_assert(dependent_false_v<T>);
+}
+
+template<typename T, typename F>
+static inline T vmla(T a, T b, F c) {
+ if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
+ if constexpr (std::is_same_v<F, float> || std::is_same_v<F, double>) {
+ return a + b * c;
+ } else {
+ static_assert(dependent_false_v<T>);
+ }
+ } else if constexpr (std::is_same_v<F, float> || std::is_same_v<F, double>) {
+ // handle the lane variant
+#ifdef USE_NEON
+ if constexpr (std::is_same_v<T, float32x2_t>) {
+ return vmla_n_f32(a, b, c);
+ } else if constexpr (std::is_same_v<T, float32x4_t>) {
+ return vmlaq_n_f32(a, b,c);
+#if defined(__aarch64__)
+ } else if constexpr (std::is_same_v<T, float64x2_t>) {
+ return vmlaq_n_f64(a, b);
+#endif
+ } else
+#endif // USE_NEON
+ {
+ T ret;
+ auto &[retval] = ret; // single-member struct
+ const auto &[aval] = a;
+ const auto &[bval] = b;
+ if constexpr (std::is_array_v<decltype(retval)>) {
+#pragma unroll
+ for (size_t i = 0; i < std::size(aval); ++i) {
+ retval[i] = vmla(aval[i], bval[i], c);
+ }
+ return ret;
+ } else /* constexpr */ {
+ auto &[r1, r2] = retval;
+ const auto &[a1, a2] = aval;
+ const auto &[b1, b2] = bval;
+ r1 = vmla(a1, b1, c);
+ r2 = vmla(a2, b2, c);
+ return ret;
+ }
+ }
+ } else {
+ // Both types T and F are non-primitive and they are not equal.
+ static_assert(dependent_false_v<T>);
+ }
+}
+
+template<typename T, typename F>
+static inline T vmla(T a, F b, T c) {
+ return vmla(a, c, b);
+}
+
// fused multiply-add a + b * c
template<typename T>
static inline T vmla(T a, T b, T c) {
@@ -197,7 +303,57 @@
}
}
-// multiply a * b
+/**
+ * Returns c as follows:
+ * c_i = a_i * b_i if a and b are the same vector type or
+ * c_i = a_i * b if a is a vector and b is scalar or
+ * c_i = a * b_i if a is scalar and b is a vector.
+ */
+template<typename T, typename F>
+static inline auto vmul(T a, F b) {
+ if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
+ if constexpr (std::is_same_v<F, float> || std::is_same_v<F, double>) {
+ return a * b;
+ } else /* constexpr */ {
+ return vmul(b, a); // we prefer T to be the vector/struct form.
+ }
+ } else if constexpr (std::is_same_v<F, float> || std::is_same_v<F, double>) {
+ // handle the lane variant
+#ifdef USE_NEON
+ if constexpr (std::is_same_v<T, float32x2_t>) {
+ return vmul_n_f32(a, b);
+ } else if constexpr (std::is_same_v<T, float32x4_t>) {
+ return vmulq_n_f32(a, b);
+#if defined(__aarch64__)
+ } else if constexpr (std::is_same_v<T, float64x2_t>) {
+ return vmulq_n_f64(a, b);
+#endif
+ } else
+#endif // USE_NEON
+ {
+ T ret;
+ auto &[retval] = ret; // single-member struct
+ const auto &[aval] = a;
+ if constexpr (std::is_array_v<decltype(retval)>) {
+#pragma unroll
+ for (size_t i = 0; i < std::size(aval); ++i) {
+ retval[i] = vmul(aval[i], b);
+ }
+ return ret;
+ } else /* constexpr */ {
+ auto &[r1, r2] = retval;
+ const auto &[a1, a2] = aval;
+ r1 = vmul(a1, b);
+ r2 = vmul(a2, b);
+ return ret;
+ }
+ }
+ } else {
+ // Both types T and F are non-primitive and they are not equal.
+ static_assert(dependent_false_v<T>);
+ }
+}
+
template<typename T>
static inline T vmul(T a, T b) {
if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
@@ -308,6 +464,45 @@
}
}
+// subtract a - b
+template<typename T>
+static inline T vsub(T a, T b) {
+ if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
+ return a - b;
+
+#ifdef USE_NEON
+ } else if constexpr (std::is_same_v<T, float32x2_t>) {
+ return vsub_f32(a, b);
+ } else if constexpr (std::is_same_v<T, float32x4_t>) {
+ return vsubq_f32(a, b);
+#if defined(__aarch64__)
+ } else if constexpr (std::is_same_v<T, float64x2_t>) {
+ return vsubq_f64(a, b);
+#endif
+#endif // USE_NEON
+
+ } else /* constexpr */ {
+ T ret;
+ auto &[retval] = ret; // single-member struct
+ const auto &[aval] = a;
+ const auto &[bval] = b;
+ if constexpr (std::is_array_v<decltype(retval)>) {
+#pragma unroll
+ for (size_t i = 0; i < std::size(aval); ++i) {
+ retval[i] = vsub(aval[i], bval[i]);
+ }
+ return ret;
+ } else /* constexpr */ {
+ auto &[r1, r2] = retval;
+ const auto &[a1, a2] = aval;
+ const auto &[b1, b2] = bval;
+ r1 = vsub(a1, b1);
+ r2 = vsub(a2, b2);
+ return ret;
+ }
+ }
+}
+
} // namespace android::audio_utils::intrinsics
#pragma pop_macro("USE_NEON")
diff --git a/audio_utils/tests/intrinsic_tests.cpp b/audio_utils/tests/intrinsic_tests.cpp
index 6a16747..d9686ef 100644
--- a/audio_utils/tests/intrinsic_tests.cpp
+++ b/audio_utils/tests/intrinsic_tests.cpp
@@ -25,6 +25,13 @@
using FloatTypes = ::testing::Types<float, double>;
TYPED_TEST_CASE(IntrisicUtilsTest, FloatTypes);
+TYPED_TEST(IntrisicUtilsTest, vadd) {
+ constexpr TypeParam a = 0.25f;
+ constexpr TypeParam b = 0.5f;
+ constexpr TypeParam result = a + b;
+ ASSERT_EQ(result, android::audio_utils::intrinsics::vadd(a, b));
+}
+
TYPED_TEST(IntrisicUtilsTest, vdupn) {
constexpr TypeParam value = 1.f;
ASSERT_EQ(value, android::audio_utils::intrinsics::vdupn<TypeParam>(value));
@@ -62,3 +69,10 @@
&destination, android::audio_utils::intrinsics::vdupn<TypeParam>(value));
ASSERT_EQ(value, destination);
}
+
+TYPED_TEST(IntrisicUtilsTest, vsub) {
+ constexpr TypeParam a = 1.25f;
+ constexpr TypeParam b = 1.5f;
+ constexpr TypeParam result = a - b;
+ ASSERT_EQ(result, android::audio_utils::intrinsics::vsub(a, b));
+}