Add push-pop for Neon D8-D15 registers

Bug: 68320413
Test: hevcdec with testbench updates to validate D8-D15

Change-Id: Ib62e68b69b016b1ded12cb00d1bb580de8a744c2
(cherry picked from commit 6acb36d25de9b2ac19aa4c34d6821c42e9a84c00)
diff --git a/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s b/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s
index a9a75cb..caf7123 100644
--- a/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s
+++ b/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s
@@ -68,13 +68,13 @@
 @*                                                                            *
 @*  Register Usage   : R0 - R14                                               *
 @*                                                                            *
-@*  Stack Usage      : 40 Bytes                                               *
+@*  Stack Usage      : 104 Bytes                                              *
 @*                                                                            *
 @*  Interruptibility : Interruptible                                          *
 @*                                                                            *
 @*  Known Limitations                                                         *
 @*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
-@*                     greater than or equal to 16                *
+@*                     greater than or equal to 16                            *
 @*                     Image Height:    Assumed to be even.                   *
 @*                                                                            *
 @*  Revision History :                                                        *
@@ -82,6 +82,7 @@
 @*         07 06 2010   Varshita        Draft                                 *
 @*         07 06 2010   Naveen Kr T     Completed                             *
 @*         05 08 2013   Naveen K P      Modified for HEVC                     *
+@*         30 10 2018   Saurabh Sood    Store D registers to stack            *
 @*****************************************************************************/
     .global ihevcd_fmt_conv_420sp_to_rgba8888_a9q
 .type ihevcd_fmt_conv_420sp_to_rgba8888_a9q, function
@@ -89,7 +90,7 @@
 
     @// push the registers on the stack
     STMFD       SP!,{R4-R12,LR}
-
+    VPUSH       {d8-d15}
 
     @//R0 - Y PTR
     @//R1 - UV PTR
@@ -131,12 +132,12 @@
 
     @//D0 HAS C1-C2-C3-C4
     @// load other parameters from stack
-    LDR         R5,[sp,#40]
+    LDR         R5,[sp,#104]
     @LDR  R4,[sp,#44]
-    LDR         R6,[sp,#44]
-    LDR         R7,[sp,#48]
+    LDR         R6,[sp,#108]
+    LDR         R7,[sp,#112]
     @LDR  R8,[sp,#52]
-    LDR         R9,[sp,#52]
+    LDR         R9,[sp,#116]
 
     @// calculate offsets, offset = stride - width
     SUB         R10,R6,R3                   @// luma offset
@@ -445,10 +446,8 @@
     BNE         LABEL_YUV420SP_TO_RGB8888_HEIGHT_LOOP
 
     @//POP THE REGISTERS
+    VPOP        {d8-d15}
     LDMFD       SP!,{R4-R12,PC}
 
 
-
-
     .section .note.GNU-stack,"",%progbits
-
diff --git a/decoder/arm/ihevcd_itrans_recon_dc_chroma.s b/decoder/arm/ihevcd_itrans_recon_dc_chroma.s
index 6732ce0..9184b80 100644
--- a/decoder/arm/ihevcd_itrans_recon_dc_chroma.s
+++ b/decoder/arm/ihevcd_itrans_recon_dc_chroma.s
@@ -58,9 +58,9 @@
 
 
     push        {r0-r11,lr}
-    ldr         r4,[sp,#0x34]               @loads log2_trans_size
-    ldr         r5,[sp,#0x38]               @ loads i2_coeff_value
-
+    vpush       {d8-d15}
+    ldr         r4,[sp,#0x74]               @loads log2_trans_size
+    ldr         r5,[sp,#0x78]               @ loads i2_coeff_value
     mov         r10,#1
     lsl         r4,r10,r4                   @    trans_size = (1 << log2_trans_size)@
     mov         r6,#64 @ 1 << (shift1 - 1)@
@@ -188,6 +188,7 @@
     vst1.u32    {d8},[r1]
 
 end_loops_chroma:
+    vpop        {d8-d15}
     pop         {r0-r11,pc}
 
 
diff --git a/decoder/arm/ihevcd_itrans_recon_dc_luma.s b/decoder/arm/ihevcd_itrans_recon_dc_luma.s
index 8aee84c..da1f1ad 100644
--- a/decoder/arm/ihevcd_itrans_recon_dc_luma.s
+++ b/decoder/arm/ihevcd_itrans_recon_dc_luma.s
@@ -59,9 +59,9 @@
 
 
     push        {r0-r11,lr}
-    ldr         r4,[sp,#0x34]               @loads log2_trans_size
-    ldr         r5,[sp,#0x38]               @ loads i2_coeff_value
-
+    vpush       {d8-d15}
+    ldr         r4,[sp,#0x74]               @loads log2_trans_size
+    ldr         r5,[sp,#0x78]               @ loads i2_coeff_value
     mov         r10,#1
     lsl         r4,r10,r4                   @    trans_size = (1 << log2_trans_size)@
     mov         r6,#64 @ 1 << (shift1 - 1)@
@@ -182,6 +182,7 @@
     vst1.u32    {d5[0]},[r1]
 
 end_loops:
+    vpop        {d8-d15}
     pop         {r0-r11,pc}