Merge "Update num_mbs_left When mb_x is Reset." into mnc-dev am: f7cc570a1d am: 6885bab2da am: 8ffb648d7a am: eba2dd32e8 am: 0d619bbda7 am: 292b3a4f21 am: 698fb39b5b am: e71fb91532
am: 63f5d9175e
Change-Id: I49862b67a62f7aa40f2c9724fce3f8f5944318a0
diff --git a/common/arm/ideint_function_selector.c b/common/arm/ideint_function_selector.c
index 920a8eb..7f706b8 100644
--- a/common/arm/ideint_function_selector.c
+++ b/common/arm/ideint_function_selector.c
@@ -50,7 +50,6 @@
/* User include files */
#include "icv_datatypes.h"
#include "icv_macros.h"
-#include "icv_platform_macros.h"
#include "icv.h"
#include "icv_variance.h"
#include "icv_sad.h"
diff --git a/common/arm/ideint_function_selector_a9.c b/common/arm/ideint_function_selector_a9.c
index 58939c9..a5768b7 100644
--- a/common/arm/ideint_function_selector_a9.c
+++ b/common/arm/ideint_function_selector_a9.c
@@ -50,7 +50,6 @@
/* User include files */
#include "icv_datatypes.h"
#include "icv_macros.h"
-#include "icv_platform_macros.h"
#include "icv.h"
#include "icv_variance.h"
#include "icv_sad.h"
diff --git a/common/arm/ideint_function_selector_av8.c b/common/arm/ideint_function_selector_av8.c
index 7e433c4..7886be1 100644
--- a/common/arm/ideint_function_selector_av8.c
+++ b/common/arm/ideint_function_selector_av8.c
@@ -50,7 +50,6 @@
/* User include files */
#include "icv_datatypes.h"
#include "icv_macros.h"
-#include "icv_platform_macros.h"
#include "icv.h"
#include "icv_variance.h"
#include "icv_sad.h"
diff --git a/common/armv8/icv_sad_av8.s b/common/armv8/icv_sad_av8.s
index 7bc1ffd..8c868d4 100644
--- a/common/armv8/icv_sad_av8.s
+++ b/common/armv8/icv_sad_av8.s
@@ -95,6 +95,6 @@
addp v0.8h, v0.8h, v0.8h
addp v0.8h, v0.8h, v0.8h
- smov x0, v0.8h[0]
+ smov x0, v0.h[0]
ret
diff --git a/common/armv8/icv_variance_av8.s b/common/armv8/icv_variance_av8.s
index 3caa148..211e092 100644
--- a/common/armv8/icv_variance_av8.s
+++ b/common/armv8/icv_variance_av8.s
@@ -100,10 +100,10 @@
addp v20.2s, v20.2s, v20.2s
// Sum(values)
- smov x0, v4.4h[0]
+ smov x0, v4.h[0]
// SumOfSquares
- smov x1, v20.2s[0]
+ smov x1, v20.s[0]
// SquareOfSums
mul x3, x0, x0
diff --git a/common/armv8/ideint_cac_av8.s b/common/armv8/ideint_cac_av8.s
index 76c22b7..bac1382 100644
--- a/common/armv8/ideint_cac_av8.s
+++ b/common/armv8/ideint_cac_av8.s
@@ -218,7 +218,7 @@
cmhi v0.2s, v20.2s, v21.2s
uaddlp v0.1d, v0.2s
- smov x0, v0.2s[0]
+ smov x0, v0.s[0]
cmp x0, #0
mov x4, #1
csel x0, x4, x0, ne
diff --git a/common/armv8/ideint_spatial_filter_av8.s b/common/armv8/ideint_spatial_filter_av8.s
index b95e07f..5713cd3 100644
--- a/common/armv8/ideint_spatial_filter_av8.s
+++ b/common/armv8/ideint_spatial_filter_av8.s
@@ -144,9 +144,9 @@
// Compute shift for first half of the block
compute_shift_1:
- smov x5, v16.2s[0]
- smov x6, v18.2s[0]
- smov x7, v20.2s[0]
+ smov x5, v16.s[0]
+ smov x6, v18.s[0]
+ smov x7, v20.s[0]
// Compute shift
mov x8, #0
@@ -172,9 +172,9 @@
compute_shift_2:
// Compute shift for first half of the block
- smov x5, v16.2s[1]
- smov x6, v18.2s[1]
- smov x7, v20.2s[1]
+ smov x5, v16.s[1]
+ smov x6, v18.s[1]
+ smov x7, v20.s[1]
// Compute shift
mov x9, #0
diff --git a/common/armv8/impeg2_idct.s b/common/armv8/impeg2_idct.s
index 4956e54..82ff0ef 100644
--- a/common/armv8/impeg2_idct.s
+++ b/common/armv8/impeg2_idct.s
@@ -384,30 +384,30 @@
ld1 {v2.4h}, [x0], #8
ld1 {v3.4h}, [x9], #8
ld1 {v4.4h}, [x0], x5
- smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+ smull v20.4s, v2.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
ld1 {v5.4h}, [x9], x5
- smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
+ smull v18.4s, v3.4h, v1.h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
ld1 {v6.4h}, [x0], #8
ld1 {v7.4h}, [x9], #8
- smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
+ smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
ld1 {v8.4h}, [x0], x10
- smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
+ smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
ld1 {v9.4h}, [x9], x10
- smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
+ smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
ld1 {v10.4h}, [x0], #8
- smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
+ smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
ld1 {v11.4h}, [x9], #8
- smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+ smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
ld1 {v12.4h}, [x0], x5
- smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+ smlsl v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
ld1 {v13.4h}, [x9], x5
- smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+ smlsl v28.4s, v7.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
ld1 {v14.4h}, [x0], #8
- smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+ smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
ld1 {v15.4h}, [x9], #8
- smull v22.4s, v10.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
+ smull v22.4s, v10.4h, v0.h[0] //// y4 * cos4(part of c0 and c1)
ld1 {v16.4h}, [x0], x10
- smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
+ smull v6.4s, v3.4h, v0.h[2] //// y2 * cos2(part of d0)
ld1 {v17.4h}, [x9], x10
///* this following was activated when alignment is not there */
@@ -431,21 +431,21 @@
- smlal v24.4s, v14.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
- smlsl v26.4s, v14.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
- smlal v28.4s, v14.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
- smlal v30.4s, v14.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+ smlal v24.4s, v14.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+ smlsl v26.4s, v14.4h, v0.h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+ smlal v28.4s, v14.4h, v1.h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+ smlal v30.4s, v14.4h, v0.h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
- smlsl v18.4s, v11.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
- smlal v6.4s, v11.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+ smlsl v18.4s, v11.4h, v0.h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+ smlal v6.4s, v11.4h, v1.h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
add v10.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
- smlal v24.4s, v15.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
- smlsl v26.4s, v15.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
- smlal v28.4s, v15.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
- smlsl v30.4s, v15.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
+ smlal v24.4s, v15.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
+ smlsl v26.4s, v15.4h, v1.h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
+ smlal v28.4s, v15.4h, v0.h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
+ smlsl v30.4s, v15.4h, v0.h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
add v14.4s, v10.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
sub v10.4s, v10.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
@@ -502,20 +502,20 @@
- smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
- smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
- smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
- smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
+ smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
+ smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
+ smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
+ smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
- smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
- smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
- smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
- smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+ smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+ smlsl v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+ smlsl v28.4s, v7.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+ smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
- smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
- smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
+ smull v18.4s, v3.4h, v1.h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
+ smull v6.4s, v3.4h, v0.h[2] //// y2 * cos2(part of d0)
- smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+ smull v20.4s, v2.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
add v14.4s, v20.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
@@ -554,37 +554,37 @@
cmp x12, #0xf0
bge skip_last4cols
- smull v24.4s, v8.4h, v0.4h[1] //// y1 * cos1(part of b0)
- smull v26.4s, v8.4h, v0.4h[3] //// y1 * cos3(part of b1)
- smull v28.4s, v8.4h, v1.4h[1] //// y1 * sin3(part of b2)
- smull v30.4s, v8.4h, v1.4h[3] //// y1 * sin1(part of b3)
+ smull v24.4s, v8.4h, v0.h[1] //// y1 * cos1(part of b0)
+ smull v26.4s, v8.4h, v0.h[3] //// y1 * cos3(part of b1)
+ smull v28.4s, v8.4h, v1.h[1] //// y1 * sin3(part of b2)
+ smull v30.4s, v8.4h, v1.h[3] //// y1 * sin1(part of b3)
- smlal v24.4s, v9.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
- smlsl v26.4s, v9.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
- smlsl v28.4s, v9.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
- smlsl v30.4s, v9.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+ smlal v24.4s, v9.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+ smlsl v26.4s, v9.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+ smlsl v28.4s, v9.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+ smlsl v30.4s, v9.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
- smull v18.4s, v5.4h, v1.4h[2] //// y2 * sin2 (q4 is freed by this time)(part of d1)
- smull v8.4s, v5.4h, v0.4h[2] //// y2 * cos2(part of d0)
+ smull v18.4s, v5.4h, v1.h[2] //// y2 * sin2 (q4 is freed by this time)(part of d1)
+ smull v8.4s, v5.4h, v0.h[2] //// y2 * cos2(part of d0)
- smull v20.4s, v4.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
- smull v22.4s, v12.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
+ smull v20.4s, v4.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
+ smull v22.4s, v12.4h, v0.h[0] //// y4 * cos4(part of c0 and c1)
- smlal v24.4s, v16.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
- smlsl v26.4s, v16.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
- smlal v28.4s, v16.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
- smlal v30.4s, v16.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+ smlal v24.4s, v16.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+ smlsl v26.4s, v16.4h, v0.h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+ smlal v28.4s, v16.4h, v1.h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+ smlal v30.4s, v16.4h, v0.h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
- smlsl v18.4s, v13.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
- smlal v8.4s, v13.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+ smlsl v18.4s, v13.4h, v0.h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+ smlal v8.4s, v13.4h, v1.h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
add v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
- smlal v24.4s, v17.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of e0,e7)
- smlsl v26.4s, v17.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of e1,e6)
- smlal v28.4s, v17.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of e2,e5)
- smlsl v30.4s, v17.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of e3,e4)
+ smlal v24.4s, v17.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of e0,e7)
+ smlsl v26.4s, v17.4h, v1.h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of e1,e6)
+ smlal v28.4s, v17.4h, v0.h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of e2,e5)
+ smlsl v30.4s, v17.4h, v0.h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of e3,e4)
add v16.4s, v12.4s , v8.4s //// a0 = c0 + d0(part of e0,e7)
sub v12.4s, v12.4s , v8.4s //// a3 = c0 - d0(part of e3,e4)
@@ -647,21 +647,21 @@
mov v25.d[0], x15
- smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
- smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
- smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
- smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
+ smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
+ smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
+ smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
+ smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
- smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
- smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
- smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
- smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+ smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+ smlsl v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+ smlsl v28.4s, v7.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+ smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
- smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+ smull v20.4s, v2.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
// vmull.s16 q11,d4,d0[0] @// y4 * cos4(part of c0 and c1)
- smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
- smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
+ smull v18.4s, v3.4h, v1.h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
+ smull v6.4s, v3.4h, v0.h[2] //// y2 * cos2(part of d0)
@@ -727,19 +727,19 @@
mov v25.d[0], x19
mov v25.d[1], x20
- smull v24.4s, v14.4h, v0.4h[1] //// y1 * cos1(part of b0)
+ smull v24.4s, v14.4h, v0.h[1] //// y1 * cos1(part of b0)
- smull v26.4s, v14.4h, v0.4h[3] //// y1 * cos3(part of b1)
- smull v28.4s, v14.4h, v1.4h[1] //// y1 * sin3(part of b2)
- smull v30.4s, v14.4h, v1.4h[3] //// y1 * sin1(part of b3)
+ smull v26.4s, v14.4h, v0.h[3] //// y1 * cos3(part of b1)
+ smull v28.4s, v14.4h, v1.h[1] //// y1 * sin3(part of b2)
+ smull v30.4s, v14.4h, v1.h[3] //// y1 * sin1(part of b3)
- smlal v24.4s, v15.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
- smlsl v26.4s, v15.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
- smlsl v28.4s, v15.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
- smlsl v30.4s, v15.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
- smull v20.4s, v10.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
- smull v18.4s, v11.4h, v1.4h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
- smull v14.4s, v11.4h, v0.4h[2] //// y2 * cos2(part of d0)
+ smlal v24.4s, v15.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+ smlsl v26.4s, v15.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+ smlsl v28.4s, v15.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+ smlsl v30.4s, v15.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+ smull v20.4s, v10.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
+ smull v18.4s, v11.4h, v1.h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
+ smull v14.4s, v11.4h, v0.h[2] //// y2 * cos2(part of d0)
add x4, x2, x8, lsl #1 // x4 = x2 + pred_strd * 2 => x4 points to 3rd row of pred data
@@ -908,38 +908,38 @@
//// q5 -> q2
//// q7 -> q4
- smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
- smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
- smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
- smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
+ smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
+ smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
+ smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
+ smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
- smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
- smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
- smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
- smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+ smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+ smlsl v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+ smlsl v28.4s, v7.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+ smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
- smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
- smull v22.4s, v4.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
+ smull v20.4s, v2.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
+ smull v22.4s, v4.4h, v0.h[0] //// y4 * cos4(part of c0 and c1)
- smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
- smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
+ smull v18.4s, v3.4h, v1.h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
+ smull v6.4s, v3.4h, v0.h[2] //// y2 * cos2(part of d0)
- smlal v24.4s, v8.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
- smlsl v26.4s, v8.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
- smlal v28.4s, v8.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
- smlal v30.4s, v8.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+ smlal v24.4s, v8.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+ smlsl v26.4s, v8.4h, v0.h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+ smlal v28.4s, v8.4h, v1.h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+ smlal v30.4s, v8.4h, v0.h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
- smlsl v18.4s, v5.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
- smlal v6.4s, v5.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+ smlsl v18.4s, v5.4h, v0.h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+ smlal v6.4s, v5.4h, v1.h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
add v2.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
- smlal v24.4s, v9.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
- smlsl v26.4s, v9.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
- smlal v28.4s, v9.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
- smlsl v30.4s, v9.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
+ smlal v24.4s, v9.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
+ smlsl v26.4s, v9.4h, v1.h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
+ smlal v28.4s, v9.4h, v0.h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
+ smlsl v30.4s, v9.4h, v0.h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
sub v22.4s, v2.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
add v4.4s, v2.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
@@ -1004,53 +1004,53 @@
- smull v24.4s, v14.4h, v0.4h[1] //// y1 * cos1(part of b0)
- smull v26.4s, v14.4h, v0.4h[3] //// y1 * cos3(part of b1)
- smull v28.4s, v14.4h, v1.4h[1] //// y1 * sin3(part of b2)
- smull v30.4s, v14.4h, v1.4h[3] //// y1 * sin1(part of b3)
- smlal v24.4s, v15.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
- smlsl v26.4s, v15.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
- smlsl v28.4s, v15.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
- smlsl v30.4s, v15.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
- smull v20.4s, v10.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
- smull v22.4s, v12.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
- smull v18.4s, v11.4h, v1.4h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
- smull v14.4s, v11.4h, v0.4h[2] //// y2 * cos2(part of d0)
- smlal v24.4s, v16.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+ smull v24.4s, v14.4h, v0.h[1] //// y1 * cos1(part of b0)
+ smull v26.4s, v14.4h, v0.h[3] //// y1 * cos3(part of b1)
+ smull v28.4s, v14.4h, v1.h[1] //// y1 * sin3(part of b2)
+ smull v30.4s, v14.4h, v1.h[3] //// y1 * sin1(part of b3)
+ smlal v24.4s, v15.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+ smlsl v26.4s, v15.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+ smlsl v28.4s, v15.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+ smlsl v30.4s, v15.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+ smull v20.4s, v10.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
+ smull v22.4s, v12.4h, v0.h[0] //// y4 * cos4(part of c0 and c1)
+ smull v18.4s, v11.4h, v1.h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
+ smull v14.4s, v11.4h, v0.h[2] //// y2 * cos2(part of d0)
+ smlal v24.4s, v16.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
add x4, x2, x8, lsl #1 // x4 = x2 + pred_strd * 2 => x4 points to 3rd row of pred data
- smlsl v26.4s, v16.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+ smlsl v26.4s, v16.4h, v0.h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
add x5, x8, x8, lsl #1 //
- smlal v28.4s, v16.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+ smlal v28.4s, v16.4h, v1.h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
add x0, x3, x7, lsl #1 // x0 points to 3rd row of dest data
- smlal v30.4s, v16.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+ smlal v30.4s, v16.4h, v0.h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
add x10, x7, x7, lsl #1 //
- smlsl v18.4s, v13.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+ smlsl v18.4s, v13.4h, v0.h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
- smlal v14.4s, v13.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+ smlal v14.4s, v13.4h, v1.h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
add v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
- smlal v24.4s, v17.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
+ smlal v24.4s, v17.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
// swapping v3 and v6
mov v31.d[0], v3.d[0]
mov v3.d[0], v6.d[0]
mov v6.d[0], v31.d[0]
- smlsl v26.4s, v17.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
+ smlsl v26.4s, v17.4h, v1.h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
// swapping v5 and v8
mov v31.d[0], v5.d[0]
mov v5.d[0], v8.d[0]
mov v8.d[0], v31.d[0]
- smlal v28.4s, v17.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
- smlsl v30.4s, v17.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
+ smlal v28.4s, v17.4h, v0.h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
+ smlsl v30.4s, v17.4h, v0.h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
sub v22.4s, v12.4s , v14.4s //// a3 = c0 - d0(part of x3,x4)
add v12.4s, v12.4s , v14.4s //// a0 = c0 + d0(part of x0,x7)
diff --git a/decoder.arm.mk b/decoder.arm.mk
index fb94969..c3af911 100644
--- a/decoder.arm.mk
+++ b/decoder.arm.mk
@@ -28,5 +28,4 @@
LOCAL_CFLAGS_arm += $(libmpeg2d_cflags_arm)
# CLANG WORKAROUNDS
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
LOCAL_CLANG_ASFLAGS_arm += $(addprefix -Wa$(comma)-I,$(libmpeg2d_inc_dir_arm))
diff --git a/decoder.arm64.mk b/decoder.arm64.mk
index a195111..6770ea0 100644
--- a/decoder.arm64.mk
+++ b/decoder.arm64.mk
@@ -6,7 +6,6 @@
libmpeg2d_srcs_c_arm64 += decoder/arm/impeg2d_function_selector.c
-ifeq ($(ARCH_ARM_HAVE_NEON),true)
libmpeg2d_srcs_c_arm64 += decoder/arm/impeg2d_function_selector_av8.c
libmpeg2d_srcs_c_arm64 += common/arm/ideint_function_selector.c
libmpeg2d_srcs_c_arm64 += common/arm/ideint_function_selector_av8.c
@@ -21,11 +20,6 @@
libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_inter_pred.s
libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_mem_func.s
libmpeg2d_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC
-else
-libmpeg2d_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
-endif
-
-
LOCAL_SRC_FILES_arm64 += $(libmpeg2d_srcs_c_arm64) $(libmpeg2d_srcs_asm_arm64)
@@ -33,5 +27,4 @@
LOCAL_CFLAGS_arm64 += $(libmpeg2d_cflags_arm64)
# CLANG WORKAROUNDS
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
LOCAL_CLANG_ASFLAGS_arm64 += $(addprefix -Wa$(comma)-I,$(libmpeg2d_inc_dir_arm64))
diff --git a/decoder.mk b/decoder.mk
index c1e5861..bf3fa03 100644
--- a/decoder.mk
+++ b/decoder.mk
@@ -10,7 +10,7 @@
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_CFLAGS += -D_LIB -DMULTICORE -fPIC
-LOCAL_CFLAGS += -O3 -DANDROID
+LOCAL_CFLAGS += -O3 -DANDROID -Werror
LOCAL_C_INCLUDES := $(LOCAL_PATH)/decoder $(LOCAL_PATH)/common
diff --git a/decoder/impeg2d_dec_hdr.c b/decoder/impeg2d_dec_hdr.c
index 2608531..aa3c70f 100644
--- a/decoder/impeg2d_dec_hdr.c
+++ b/decoder/impeg2d_dec_hdr.c
@@ -18,8 +18,9 @@
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#include <string.h>
+#ifdef __ANDROID__
#include <cutils/log.h>
-
+#endif
#include "iv_datatypedef.h"
#include "iv.h"
#include "ivd.h"
@@ -1005,7 +1006,9 @@
if (1 == ps_dec->i4_num_cores && 0 == ps_dec->u2_num_mbs_left)
{
i4_continue_decode = 0;
+#ifdef __ANDROID__
android_errorWriteLog(0x534e4554, "26070014");
+#endif
}
if(i4_continue_decode)
@@ -1295,10 +1298,13 @@
/* Store current slice's row position */
i4_start_row = i4_row;
- } else if (i4_prev_row > i4_row) {
+ }
+#ifdef __ANDROID__
+ else if (i4_prev_row > i4_row)
+ {
android_errorWriteLog(0x534e4554, "26070014");
}
-
+#endif
impeg2d_bit_stream_flush(&s_bitstrm, START_CODE_LEN);
diff --git a/test/decoder.mk b/test/decoder.mk
index 2aef0f9..9296620 100644
--- a/test/decoder.mk
+++ b/test/decoder.mk
@@ -9,5 +9,5 @@
LOCAL_C_INCLUDES += $(LOCAL_PATH)/../decoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/decoder/
LOCAL_SRC_FILES := decoder/main.c
LOCAL_STATIC_LIBRARIES := libmpeg2dec
-
+LOCAL_SHARED_LIBRARIES := liblog
include $(BUILD_EXECUTABLE)