Mterp: Always use macros for vreg reads/writes.

This makes is easier to find/modify the related code.

Test: No change of the assembly instructions in libart.so
Change-Id: Ia01a5cbda247bcb06fad4c1d1c903d8c53a56e93
diff --git a/runtime/interpreter/mterp/arm/arithmetic.S b/runtime/interpreter/mterp/arm/arithmetic.S
index 7a373c7..a6ba454 100644
--- a/runtime/interpreter/mterp/arm/arithmetic.S
+++ b/runtime/interpreter/mterp/arm/arithmetic.S
@@ -157,8 +157,8 @@
     VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
-    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
-    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r2    @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r3    @ r2/r3<- vCC/vCC+1
     .if $chkzero
     orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
     beq     common_errDivideByZero
@@ -168,7 +168,7 @@
     $preinstr                           @ optional op; may set condition codes
     $instr                              @ result<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    SET_VREG_WIDE_BY_ADDR $result0,$result1,r9  @ vAA/vAA+1<,  $result0/$result1
     GOTO_OPCODE ip                      @ jump to next instruction
     /* 14-17 instructions */
 
@@ -192,8 +192,8 @@
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
     VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
-    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
-    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r1    @ r2/r3<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r9    @ r0/r1<- vAA/vAA+1
     .if $chkzero
     orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
     beq     common_errDivideByZero
@@ -203,7 +203,7 @@
     $preinstr                           @ optional op; may set condition codes
     $instr                              @ result<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    SET_VREG_WIDE_BY_ADDR $result0,$result1,r9  @ vAA/vAA+1<- $result0/$result1
     GOTO_OPCODE ip                      @ jump to next instruction
     /* 12-15 instructions */
 
@@ -243,7 +243,7 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vB/vB+1
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $preinstr                           @ optional op; may set condition codes
     $instr                              @ r0<- op, r0-r3 changed
@@ -265,13 +265,13 @@
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
     VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $preinstr                           @ optional op; may set condition codes
     $instr                              @ r0/r1<- op, r2-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
     /* 10-11 instructions */
 
@@ -293,7 +293,7 @@
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $instr                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vA/vA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
     /* 9-10 instructions */
 
@@ -345,8 +345,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
-    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
-    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r2    @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r3    @ r2/r3<- vCC/vCC+1
     cmp     r0, r2
     sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
     mov     ip, #0
@@ -541,8 +541,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
-    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
-    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r2    @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r3    @ r2/r3<- vCC/vCC+1
     mul     ip, r2, r1                  @ ip<- ZxW
     umull   r1, lr, r2, r0              @ r1/lr <- ZxX
     mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
@@ -552,7 +552,7 @@
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
+    SET_VREG_WIDE_BY_ADDR r1, r2 , r0   @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_mul_long_2addr():
@@ -569,8 +569,8 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
     VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
-    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
-    ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r1    @ r2/r3<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, rINST @ r0/r1<- vAA/vAA+1
     mul     ip, r2, r1                  @ ip<- ZxW
     umull   r1, lr, r2, r0              @ r1/lr <- ZxX
     mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
@@ -578,7 +578,7 @@
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
+    SET_VREG_WIDE_BY_ADDR r1, r2, r0    @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_neg_int():
@@ -781,7 +781,7 @@
     mov     r0, r0, lsr #8              @ r0<- CC
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
@@ -793,7 +793,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_shl_long_2addr():
@@ -808,7 +808,7 @@
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r9    @ r0/r1<- vAA/vAA+1
     mov     r1, r1, asl r2              @ r1<- r1 << r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
@@ -817,7 +817,7 @@
     movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
     mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_shr_int():
@@ -843,7 +843,7 @@
     mov     r0, r0, lsr #8              @ r0<- CC
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
@@ -855,7 +855,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_shr_long_2addr():
@@ -870,7 +870,7 @@
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r9    @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
@@ -879,7 +879,7 @@
     movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
     mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_sub_int():
@@ -917,7 +917,7 @@
     mov     r0, r0, lsr #8              @ r0<- CC
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
@@ -929,7 +929,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_ushr_long_2addr():
@@ -944,7 +944,7 @@
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r9    @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
@@ -953,7 +953,7 @@
     movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
     mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_xor_int():
diff --git a/runtime/interpreter/mterp/arm/array.S b/runtime/interpreter/mterp/arm/array.S
index 88d89c5..7b3db61 100644
--- a/runtime/interpreter/mterp/arm/array.S
+++ b/runtime/interpreter/mterp/arm/array.S
@@ -87,7 +87,7 @@
     ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
+    SET_VREG_WIDE_BY_ADDR r2, r3, r9    @ vAA/vAA+1<- r2/r3
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_aput(store="str", shift="2", data_offset="MIRROR_INT_ARRAY_DATA_OFFSET"):
@@ -169,7 +169,7 @@
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r9    @ r2/r3<- vAA/vAA+1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     strd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/control_flow.S b/runtime/interpreter/mterp/arm/control_flow.S
index 51832e1..2299ef9 100644
--- a/runtime/interpreter/mterp/arm/control_flow.S
+++ b/runtime/interpreter/mterp/arm/control_flow.S
@@ -189,7 +189,7 @@
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
-    ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r2    @ r0/r1 <- vAA/vAA+1
     b       MterpReturn
 
 %def op_sparse_switch():
diff --git a/runtime/interpreter/mterp/arm/floating_point.S b/runtime/interpreter/mterp/arm/floating_point.S
index 21c386e..9e4d00c 100644
--- a/runtime/interpreter/mterp/arm/floating_point.S
+++ b/runtime/interpreter/mterp/arm/floating_point.S
@@ -13,8 +13,8 @@
     and     r2, r0, #255                @ r2<- BB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
-    flds    s1, [r3]                    @ s1<- vCC
-    flds    s0, [r2]                    @ s0<- vBB
+    GET_VREG_FLOAT_BY_ADDR s1, r3       @ s1<- vCC
+    GET_VREG_FLOAT_BY_ADDR s0, r2       @ s0<- vBB
 
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     $instr                              @ s2<- op
@@ -35,12 +35,12 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
-    flds    s1, [r3]                    @ s1<- vB
+    GET_VREG_FLOAT_BY_ADDR s1, r3       @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    flds    s0, [r9]                    @ s0<- vA
+    GET_VREG_FLOAT_BY_ADDR s0, r9       @ s0<- vA
     $instr                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    fsts    s2, [r9]                    @ vAA<- s2 No need to clear as it's 2addr
+    SET_VREG_FLOAT_BY_ADDR s2, r9       @ vAA<- s2 No need to clear as it's 2addr
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def fbinopWide(instr=""):
@@ -58,14 +58,14 @@
     and     r2, r0, #255                @ r2<- BB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
-    fldd    d1, [r3]                    @ d1<- vCC
-    fldd    d0, [r2]                    @ d0<- vBB
+    GET_VREG_DOUBLE_BY_ADDR d1, r3      @ d1<- vCC
+    GET_VREG_DOUBLE_BY_ADDR d0, r2      @ d0<- vBB
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     $instr                              @ s2<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
-    fstd    d2, [r9]                    @ vAA<- d2
+    SET_VREG_DOUBLE_BY_ADDR d2, r9      @ vAA<- d2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def fbinopWide2addr(instr=""):
@@ -82,13 +82,13 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
-    fldd    d1, [r3]                    @ d1<- vB
+    GET_VREG_DOUBLE_BY_ADDR d1, r3      @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    fldd    d0, [r9]                    @ d0<- vA
+    GET_VREG_DOUBLE_BY_ADDR d0, r9      @ d0<- vA
     $instr                              @ d2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    fstd    d2, [r9]                    @ vAA<- d2
+    SET_VREG_DOUBLE_BY_ADDR d2, r9      @ vAA<- d2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def funop(instr=""):
@@ -101,7 +101,7 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    flds    s0, [r3]                    @ s0<- vB
+    GET_VREG_FLOAT_BY_ADDR s0, r3       @ s0<- vB
     ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $instr                              @ s1<- op
@@ -119,7 +119,7 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    fldd    d0, [r3]                    @ d0<- vB
+    GET_VREG_DOUBLE_BY_ADDR d0, r3      @ d0<- vB
     ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $instr                              @ s0<- op
@@ -137,14 +137,14 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    flds    s0, [r3]                    @ s0<- vB
+    GET_VREG_FLOAT_BY_ADDR s0, r3       @ s0<- vB
     ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $instr                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
-    fstd    d0, [r9]                    @ vA<- d0
+    SET_VREG_DOUBLE_BY_ADDR d0, r9      @ vA<- d0
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_add_double():
@@ -183,8 +183,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
-    fldd    d0, [r2]                    @ d0<- vBB
-    fldd    d1, [r3]                    @ d1<- vCC
+    GET_VREG_DOUBLE_BY_ADDR d0, r2      @ d0<- vBB
+    GET_VREG_DOUBLE_BY_ADDR d1, r3      @ d1<- vCC
     vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, #1                      @ r0<- 1 (default)
@@ -219,8 +219,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
-    flds    s0, [r2]                    @ s0<- vBB
-    flds    s1, [r3]                    @ s1<- vCC
+    GET_VREG_FLOAT_BY_ADDR s0, r2       @ s0<- vBB
+    GET_VREG_FLOAT_BY_ADDR s1, r3       @ s1<- vCC
     vcmpe.f32 s0, s1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, #1                      @ r0<- 1 (default)
@@ -255,8 +255,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
-    fldd    d0, [r2]                    @ d0<- vBB
-    fldd    d1, [r3]                    @ d1<- vCC
+    GET_VREG_DOUBLE_BY_ADDR d0, r2      @ d0<- vBB
+    GET_VREG_DOUBLE_BY_ADDR d1, r3      @ d1<- vCC
     vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mvn     r0, #0                      @ r0<- -1 (default)
@@ -291,8 +291,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
-    flds    s0, [r2]                    @ s0<- vBB
-    flds    s1, [r3]                    @ s1<- vCC
+    GET_VREG_FLOAT_BY_ADDR s0, r2       @ s0<- vBB
+    GET_VREG_FLOAT_BY_ADDR s1, r3       @ s1<- vCC
     vcmpe.f32  s0, s1                   @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mvn     r0, #0                      @ r0<- -1 (default)
diff --git a/runtime/interpreter/mterp/arm/main.S b/runtime/interpreter/mterp/arm/main.S
index 6d6b190..4cf65d1 100644
--- a/runtime/interpreter/mterp/arm/main.S
+++ b/runtime/interpreter/mterp/arm/main.S
@@ -298,6 +298,25 @@
     add     \reg, rFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
 .endm
 
+.macro GET_VREG_WIDE_BY_ADDR reg0, reg1, addr
+    ldmia \addr, {\reg0, \reg1}
+.endm
+.macro SET_VREG_WIDE_BY_ADDR reg0, reg1, addr
+    stmia \addr, {\reg0, \reg1}
+.endm
+.macro GET_VREG_FLOAT_BY_ADDR reg, addr
+    flds \reg, [\addr]
+.endm
+.macro SET_VREG_FLOAT_BY_ADDR reg, addr
+    fsts \reg, [\addr]
+.endm
+.macro GET_VREG_DOUBLE_BY_ADDR reg, addr
+    fldd \reg, [\addr]
+.endm
+.macro SET_VREG_DOUBLE_BY_ADDR reg, addr
+    fstd \reg, [\addr]
+.endm
+
 /*
  * Refresh handler table.
  */
diff --git a/runtime/interpreter/mterp/arm/object.S b/runtime/interpreter/mterp/arm/object.S
index 092aa9e..7736383 100644
--- a/runtime/interpreter/mterp/arm/object.S
+++ b/runtime/interpreter/mterp/arm/object.S
@@ -160,7 +160,7 @@
     VREG_INDEX_TO_ADDR r3, r2           @ r3<- &fp[A]
     CLEAR_SHADOW_PAIR r2, ip, lr        @ Zero out the shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r3    @ fp[A]<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_instance_of():
@@ -257,7 +257,7 @@
     cmp     r2, #0                      @ check object for null
     beq     common_errNullObject        @ object was null
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[A]
-    ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
+    GET_VREG_WIDE_BY_ADDR r0, r1, r0    @ r0/r1<- fp[A]/fp[A+1]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     strd    r0, [r2, r3]                @ obj.field<- r0/r1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/other.S b/runtime/interpreter/mterp/arm/other.S
index fcdde1e..31b9354 100644
--- a/runtime/interpreter/mterp/arm/other.S
+++ b/runtime/interpreter/mterp/arm/other.S
@@ -104,7 +104,7 @@
     FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_const_wide_16():
@@ -116,7 +116,7 @@
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r3    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_const_wide_32():
@@ -130,7 +130,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     mov     r1, r0, asr #31             @ r1<- ssssssss
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r3    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_const_wide_high16():
@@ -143,7 +143,7 @@
     CLEAR_SHADOW_PAIR r3, r0, r2        @ Zero shadow regs
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r3    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_monitor_enter():
@@ -279,7 +279,7 @@
     ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r2    @ fp[AA]<- r0/r1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -290,11 +290,11 @@
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
     VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[A]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- fp[B]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r2, {r0-r1}                 @ fp[A]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r2    @ fp[A]<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_move_wide_16():
@@ -304,10 +304,10 @@
     FETCH r2, 1                         @ r2<- AAAA
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
     VREG_INDEX_TO_ADDR lr, r2           @ r2<- &fp[AAAA]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- fp[BBBB]
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r2, r3, ip        @ Zero out the shadow regs
-    stmia   lr, {r0-r1}                 @ fp[AAAA]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, lr    @ fp[AAAA]<- r0/r1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -318,11 +318,11 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
     VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- fp[BBBB]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r2    @ fp[AA]<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_nop():
diff --git a/runtime/interpreter/mterp/arm64/floating_point.S b/runtime/interpreter/mterp/arm64/floating_point.S
index 04ca694..ad42db3 100644
--- a/runtime/interpreter/mterp/arm64/floating_point.S
+++ b/runtime/interpreter/mterp/arm64/floating_point.S
@@ -1,5 +1,5 @@
 %def fbinop(instr=""):
-    /*:
+    /*
      * Generic 32-bit floating-point operation.
      *
      * For: add-float, sub-float, mul-float, div-float
@@ -15,7 +15,24 @@
     lsr     w1, wINST, #8               // r1<- AA
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
-    SET_VREG  s0, w1
+    SET_VREG_FLOAT s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+%def fbinopWide(instr="fadd d0, d1, d2", result="d0", r1="d1", r2="d2"):
+    /*
+     * Generic 64-bit floating-point operation.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_DOUBLE $r2, w2             // w2<- vCC
+    GET_VREG_DOUBLE $r1, w1             // w1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $instr                              // $result<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_DOUBLE $result, w4         // vAA<- $result
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def fbinop2addr(instr=""):
@@ -34,7 +51,22 @@
     $instr                              // s2<- op
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
-    SET_VREG s2, w9
+    SET_VREG_FLOAT s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+%def fbinopWide2addr(instr="fadd d0, d0, d1", r0="d0", r1="d1"):
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_DOUBLE $r1, w1             // x1<- vB
+    GET_VREG_DOUBLE $r0, w2             // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $instr                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_DOUBLE $r0, w2             // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def fcmp(wide="", r1="s1", r2="s2", cond="lt"):
@@ -47,8 +79,13 @@
     lsr     w4, wINST, #8               // w4<- AA
     and     w2, w0, #255                // w2<- BB
     lsr     w3, w0, #8                  // w3<- CC
-    GET_VREG$wide $r1, w2
-    GET_VREG$wide $r2, w3
+%  if r1.startswith("d"):
+    GET_VREG_DOUBLE $r1, w2
+    GET_VREG_DOUBLE $r2, w3
+%  else:
+    GET_VREG $r1, w2
+    GET_VREG $r2, w3
+%  #endif
     fcmp $r1, $r2
     cset w0, ne
     cneg w0, w0, $cond
@@ -72,7 +109,7 @@
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG $tgtreg, w4                // vA<- d0
+    SET_VREG_FLOAT $tgtreg, w4          // vA<- d0
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def funopNarrower(srcreg="s0", tgtreg="d0", instr=""):
@@ -85,11 +122,15 @@
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
     ubfx    w4, wINST, #8, #4           // w4<- A
+%  if srcreg.startswith("d"):
+    GET_VREG_DOUBLE $srcreg, w3
+%  else:
     GET_VREG_WIDE $srcreg, w3
+%  #endif
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG $tgtreg, w4                // vA<- d0
+    SET_VREG_FLOAT $tgtreg, w4          // vA<- d0
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def funopWide(srcreg="s0", tgtreg="d0", instr=""):
@@ -102,11 +143,19 @@
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
     ubfx    w4, wINST, #8, #4           // w4<- A
+%  if srcreg.startswith("d"):
+    GET_VREG_DOUBLE $srcreg, w3
+%  else:
     GET_VREG_WIDE $srcreg, w3
+%  #endif
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
+%  if tgtreg.startswith("d"):
+    SET_VREG_DOUBLE $tgtreg, w4         // vA<- d0
+%  else:
     SET_VREG_WIDE $tgtreg, w4           // vA<- d0
+%  #endif
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def funopWider(srcreg="s0", tgtreg="d0", instr=""):
@@ -127,10 +176,10 @@
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def op_add_double():
-%  binopWide(instr="fadd d0, d1, d2", result="d0", r1="d1", r2="d2")
+%  fbinopWide(instr="fadd d0, d1, d2", result="d0", r1="d1", r2="d2")
 
 %def op_add_double_2addr():
-%  binopWide2addr(instr="fadd     d0, d0, d1", r0="d0", r1="d1")
+%  fbinopWide2addr(instr="fadd     d0, d0, d1", r0="d0", r1="d1")
 
 %def op_add_float():
 %  fbinop(instr="fadd   s0, s0, s1")
@@ -151,10 +200,10 @@
 %  fcmp(wide="", r1="s1", r2="s2", cond="lt")
 
 %def op_div_double():
-%  binopWide(instr="fdiv d0, d1, d2", result="d0", r1="d1", r2="d2")
+%  fbinopWide(instr="fdiv d0, d1, d2", result="d0", r1="d1", r2="d2")
 
 %def op_div_double_2addr():
-%  binopWide2addr(instr="fdiv     d0, d0, d1", r0="d0", r1="d1")
+%  fbinopWide2addr(instr="fdiv     d0, d0, d1", r0="d0", r1="d1")
 
 %def op_div_float():
 %  fbinop(instr="fdiv   s0, s0, s1")
@@ -193,10 +242,10 @@
 %  funopNarrower(instr="scvtf s0, x0", srcreg="x0", tgtreg="s0")
 
 %def op_mul_double():
-%  binopWide(instr="fmul d0, d1, d2", result="d0", r1="d1", r2="d2")
+%  fbinopWide(instr="fmul d0, d1, d2", result="d0", r1="d1", r2="d2")
 
 %def op_mul_double_2addr():
-%  binopWide2addr(instr="fmul     d0, d0, d1", r0="d0", r1="d1")
+%  fbinopWide2addr(instr="fmul     d0, d0, d1", r0="d0", r1="d1")
 
 %def op_mul_float():
 %  fbinop(instr="fmul   s0, s0, s1")
@@ -215,8 +264,8 @@
     FETCH w0, 1                         // w0<- CCBB
     lsr     w2, w0, #8                  // w2<- CC
     and     w1, w0, #255                // w1<- BB
-    GET_VREG_WIDE d1, w2                // d1<- vCC
-    GET_VREG_WIDE d0, w1                // d0<- vBB
+    GET_VREG_DOUBLE d1, w2              // d1<- vCC
+    GET_VREG_DOUBLE d0, w1              // d0<- vBB
     bl  fmod
     lsr     w4, wINST, #8               // w4<- AA
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -229,8 +278,8 @@
     /* rem vA, vB */
     lsr     w1, wINST, #12              // w1<- B
     ubfx    w2, wINST, #8, #4           // w2<- A
-    GET_VREG_WIDE d1, w1                // d1<- vB
-    GET_VREG_WIDE d0, w2                // d0<- vA
+    GET_VREG_DOUBLE d1, w1              // d1<- vB
+    GET_VREG_DOUBLE d0, w2              // d0<- vA
     bl fmod
     ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
@@ -253,14 +302,14 @@
     ubfx    w9, wINST, #8, #4           // w9<- A
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
-    SET_VREG s0, w9
+    SET_VREG_FLOAT s0, w9
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def op_sub_double():
-%  binopWide(instr="fsub d0, d1, d2", result="d0", r1="d1", r2="d2")
+%  fbinopWide(instr="fsub d0, d1, d2", result="d0", r1="d1", r2="d2")
 
 %def op_sub_double_2addr():
-%  binopWide2addr(instr="fsub     d0, d0, d1", r0="d0", r1="d1")
+%  fbinopWide2addr(instr="fsub     d0, d0, d1", r0="d0", r1="d1")
 
 %def op_sub_float():
 %  fbinop(instr="fsub   s0, s0, s1")
diff --git a/runtime/interpreter/mterp/arm64/main.S b/runtime/interpreter/mterp/arm64/main.S
index 858cb38..0cfbbff 100644
--- a/runtime/interpreter/mterp/arm64/main.S
+++ b/runtime/interpreter/mterp/arm64/main.S
@@ -259,10 +259,13 @@
     str     \reg, [xFP, \vreg, uxtw #2]
     str     \reg, [xREFS, \vreg, uxtw #2]
 .endm
+.macro SET_VREG_FLOAT reg, vreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     wzr, [xREFS, \vreg, uxtw #2]
+.endm
 
 /*
  * Get/set the 64-bit value from a Dalvik register.
- * TUNING: can we do better here?
  */
 .macro GET_VREG_WIDE reg, vreg
     add     ip2, xFP, \vreg, lsl #2
@@ -274,6 +277,16 @@
     add     ip2, xREFS, \vreg, lsl #2
     str     xzr, [ip2]
 .endm
+.macro GET_VREG_DOUBLE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    ldr     \reg, [ip2]
+.endm
+.macro SET_VREG_DOUBLE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    str     \reg, [ip2]
+    add     ip2, xREFS, \vreg, lsl #2
+    str     xzr, [ip2]
+.endm
 
 /*
  * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
diff --git a/runtime/interpreter/mterp/x86/arithmetic.S b/runtime/interpreter/mterp/x86/arithmetic.S
index 3b5f0be..973e5b8 100644
--- a/runtime/interpreter/mterp/x86/arithmetic.S
+++ b/runtime/interpreter/mterp/x86/arithmetic.S
@@ -153,7 +153,7 @@
     movzbl  2(rPC), %eax                    # eax <- BB
     movzbl  3(rPC), %ecx                    # ecx <- CC
     GET_VREG %eax, %eax                     # eax <- vBB
-    $instr                                  # ex: addl    VREG_ADDRESS(%ecx),%eax
+    $instr VREG_ADDRESS(%ecx), %eax
     SET_VREG $result, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -187,7 +187,7 @@
     sarl    $$4, rINST                      # rINST <- B
     GET_VREG %eax, rINST                    # eax <- vB
     andb    $$0xf, %cl                      # ecx <- A
-    $instr                                  # for ex: addl   %eax,VREG_ADDRESS(%ecx)
+    $instr %eax, VREG_ADDRESS(%ecx)
     CLEAR_REF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -240,8 +240,8 @@
     movl    rIBASE, LOCAL0(%esp)            # save rIBASE
     GET_VREG rIBASE, %eax                   # rIBASE <- v[BB+0]
     GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1]
-    $instr1                                 # ex: addl   VREG_ADDRESS(%ecx),rIBASE
-    $instr2                                 # ex: adcl   VREG_HIGH_ADDRESS(%ecx),%eax
+    $instr1 VREG_ADDRESS(%ecx), rIBASE
+    $instr2 VREG_HIGH_ADDRESS(%ecx), %eax
     SET_VREG rIBASE, rINST                  # v[AA+0] <- rIBASE
     movl    LOCAL0(%esp), rIBASE            # restore rIBASE
     SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
@@ -257,8 +257,8 @@
     GET_VREG %eax, %ecx                     # eax<- v[B+0]
     GET_VREG_HIGH %ecx, %ecx                # eax<- v[B+1]
     andb    $$0xF, rINSTbl                  # rINST<- A
-    $instr1                                 # ex: addl   %eax,(rFP,rINST,4)
-    $instr2                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    $instr1 %eax, VREG_ADDRESS(rINST)
+    $instr2 %ecx, VREG_HIGH_ADDRESS(rINST)
     CLEAR_WIDE_REF rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -355,10 +355,10 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_add_int():
-%  binop(instr="addl    VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="addl")
 
 %def op_add_int_2addr():
-%  binop2addr(instr="addl    %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="addl")
 
 %def op_add_int_lit16():
 %  binopLit16(instr="addl    %ecx, %eax")
@@ -367,16 +367,16 @@
 %  binopLit8(instr="addl    %ecx, %eax")
 
 %def op_add_long():
-%  binopWide(instr1="addl    VREG_ADDRESS(%ecx), rIBASE", instr2="adcl    VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="addl", instr2="adcl")
 
 %def op_add_long_2addr():
-%  binopWide2addr(instr1="addl    %eax, (rFP,rINST,4)", instr2="adcl    %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="addl", instr2="adcl")
 
 %def op_and_int():
-%  binop(instr="andl    VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="andl")
 
 %def op_and_int_2addr():
-%  binop2addr(instr="andl    %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="andl")
 
 %def op_and_int_lit16():
 %  binopLit16(instr="andl    %ecx, %eax")
@@ -385,10 +385,10 @@
 %  binopLit8(instr="andl    %ecx, %eax")
 
 %def op_and_long():
-%  binopWide(instr1="andl    VREG_ADDRESS(%ecx), rIBASE", instr2="andl    VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="andl", instr2="andl")
 
 %def op_and_long_2addr():
-%  binopWide2addr(instr1="andl    %eax, (rFP,rINST,4)", instr2="andl    %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="andl", instr2="andl")
 
 %def op_cmp_long():
 /*
@@ -666,10 +666,10 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_or_int():
-%  binop(instr="orl     VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="orl")
 
 %def op_or_int_2addr():
-%  binop2addr(instr="orl     %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="orl")
 
 %def op_or_int_lit16():
 %  binopLit16(instr="orl     %ecx, %eax")
@@ -678,10 +678,10 @@
 %  binopLit8(instr="orl     %ecx, %eax")
 
 %def op_or_long():
-%  binopWide(instr1="orl     VREG_ADDRESS(%ecx), rIBASE", instr2="orl     VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="orl", instr2="orl")
 
 %def op_or_long_2addr():
-%  binopWide2addr(instr1="orl     %eax, (rFP,rINST,4)", instr2="orl     %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="orl", instr2="orl")
 
 %def op_rem_int():
 %  bindiv(result="rIBASE", special="$0", rem="1")
@@ -845,16 +845,16 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_sub_int():
-%  binop(instr="subl    VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="subl")
 
 %def op_sub_int_2addr():
-%  binop2addr(instr="subl    %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="subl")
 
 %def op_sub_long():
-%  binopWide(instr1="subl    VREG_ADDRESS(%ecx), rIBASE", instr2="sbbl    VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="subl", instr2="sbbl")
 
 %def op_sub_long_2addr():
-%  binopWide2addr(instr1="subl    %eax, (rFP,rINST,4)", instr2="sbbl    %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="subl", instr2="sbbl")
 
 %def op_ushr_int():
 %  binop1(instr="shrl    %cl, %eax")
@@ -925,10 +925,10 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_xor_int():
-%  binop(instr="xorl    VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="xorl")
 
 %def op_xor_int_2addr():
-%  binop2addr(instr="xorl    %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="xorl")
 
 %def op_xor_int_lit16():
 %  binopLit16(instr="xorl    %ecx, %eax")
@@ -937,7 +937,7 @@
 %  binopLit8(instr="xorl    %ecx, %eax")
 
 %def op_xor_long():
-%  binopWide(instr1="xorl    VREG_ADDRESS(%ecx), rIBASE", instr2="xorl    VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="xorl", instr2="xorl")
 
 %def op_xor_long_2addr():
-%  binopWide2addr(instr1="xorl    %eax, (rFP,rINST,4)", instr2="xorl    %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="xorl", instr2="xorl")
diff --git a/runtime/interpreter/mterp/x86/floating_point.S b/runtime/interpreter/mterp/x86/floating_point.S
index 3de1fc8..bc7c59d 100644
--- a/runtime/interpreter/mterp/x86/floating_point.S
+++ b/runtime/interpreter/mterp/x86/floating_point.S
@@ -18,7 +18,7 @@
     /* op vAA, vBB, vCC */
     movzbl  3(rPC), %ecx                    # ecx<- CC
     movzbl  2(rPC), %eax                    # eax<- BB
-    movs${suff} VREG_ADDRESS(%eax), %xmm0
+    GET_VREG_XMM${suff} %xmm0, %eax
     xor     %eax, %eax
     ucomis${suff} VREG_ADDRESS(%ecx), %xmm0
     jp      .L${opcode}_nan_is_${nanval}
@@ -55,9 +55,9 @@
 %def sseBinop(instr="", suff=""):
     movzbl  2(rPC), %ecx                    # ecx <- BB
     movzbl  3(rPC), %eax                    # eax <- CC
-    movs${suff}   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    GET_VREG_XMM${suff} %xmm0, %ecx         # %xmm0 <- 1st src
     ${instr}${suff} VREG_ADDRESS(%eax), %xmm0
-    movs${suff}   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    SET_VREG_XMM${suff} %xmm0, rINST        # vAA <- %xmm0
     pxor    %xmm0, %xmm0
     movs${suff}   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -65,10 +65,10 @@
 %def sseBinop2Addr(instr="", suff=""):
     movzx   rINSTbl, %ecx                   # ecx <- A+
     andl    $$0xf, %ecx                     # ecx <- A
-    movs${suff} VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    GET_VREG_XMM${suff} %xmm0, %ecx         # %xmm0 <- 1st src
     sarl    $$4, rINST                      # rINST<- B
     ${instr}${suff} VREG_ADDRESS(rINST), %xmm0
-    movs${suff} %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    SET_VREG_XMM${suff} %xmm0, %ecx         # vAA<- %xmm0
     pxor    %xmm0, %xmm0
     movs${suff} %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/main.S b/runtime/interpreter/mterp/x86/main.S
index 0621fb4..6eaea6f 100644
--- a/runtime/interpreter/mterp/x86/main.S
+++ b/runtime/interpreter/mterp/x86/main.S
@@ -318,6 +318,19 @@
     movl    MACRO_LITERAL(0), VREG_REF_HIGH_ADDRESS(\_vreg)
 .endm
 
+.macro GET_VREG_XMMs _xmmreg _vreg
+    movss VREG_ADDRESS(\_vreg), \_xmmreg
+.endm
+.macro GET_VREG_XMMd _xmmreg _vreg
+    movsd VREG_ADDRESS(\_vreg), \_xmmreg
+.endm
+.macro SET_VREG_XMMs _xmmreg _vreg
+    movss \_xmmreg, VREG_ADDRESS(\_vreg)
+.endm
+.macro SET_VREG_XMMd _xmmreg _vreg
+    movsd \_xmmreg, VREG_ADDRESS(\_vreg)
+.endm
+
 /*
  * function support macros.
  */
diff --git a/runtime/interpreter/mterp/x86_64/arithmetic.S b/runtime/interpreter/mterp/x86_64/arithmetic.S
index 263f82b..ff64b53 100644
--- a/runtime/interpreter/mterp/x86_64/arithmetic.S
+++ b/runtime/interpreter/mterp/x86_64/arithmetic.S
@@ -137,7 +137,7 @@
     movzbq  2(rPC), %rax                    # rax <- BB
     movzbq  3(rPC), %rcx                    # rcx <- CC
     GET_VREG %eax, %rax                     # eax <- vBB
-    $instr                                  # ex: addl    VREG_ADDRESS(%rcx),%eax
+    $instr VREG_ADDRESS(%rcx),%eax
     SET_VREG $result, rINSTq
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -177,7 +177,7 @@
     sarl    $$4, rINST                      # rINST <- B
     andb    $$0xf, %cl                      # ecx <- A
     GET_VREG %eax, rINSTq                   # eax <- vB
-    $instr                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    $instr %eax, VREG_ADDRESS(%rcx)
     CLEAR_REF %rcx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -228,7 +228,7 @@
     movzbq  2(rPC), %rax                    # eax <- BB
     movzbq  3(rPC), %rcx                    # ecx <- CC
     GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
-    $instr                                  # ex: addq   VREG_ADDRESS(%rcx),%rax
+    $instr VREG_ADDRESS(%rcx),%rax
     SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -241,7 +241,7 @@
     sarl    $$4, rINST                      # rINST <- B
     andb    $$0xf, %cl                      # ecx <- A
     GET_WIDE_VREG %rax, rINSTq              # rax <- vB
-    $instr                                  # for ex: addq   %rax,VREG_ADDRESS(%rcx)
+    $instr %rax,VREG_ADDRESS(%rcx)
     CLEAR_WIDE_REF %rcx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -255,7 +255,7 @@
     movl    rINST, %ecx                     # rcx <- A+
     sarl    $$4, rINST                      # rINST <- B
     andb    $$0xf, %cl                      # ecx <- A
-    movs${fp_suffix}   VREG_ADDRESS(rINSTq), %xmm0
+    GET_VREG_XMM${fp_suffix} %xmm0, rINSTq
     mov${i_suffix}  ${max_const}, ${result_reg}
     cvtsi2s${fp_suffix}${i_suffix} ${result_reg}, %xmm1
     comis${fp_suffix}    %xmm1, %xmm0
@@ -317,10 +317,10 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_add_int():
-%  binop(instr="addl    VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="addl")
 
 %def op_add_int_2addr():
-%  binop2addr(instr="addl    %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="addl")
 
 %def op_add_int_lit16():
 %  binopLit16(instr="addl    %ecx, %eax")
@@ -329,16 +329,16 @@
 %  binopLit8(instr="addl    %ecx, %eax")
 
 %def op_add_long():
-%  binopWide(instr="addq    VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="addq")
 
 %def op_add_long_2addr():
-%  binopWide2addr(instr="addq    %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="addq")
 
 %def op_and_int():
-%  binop(instr="andl    VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="andl")
 
 %def op_and_int_2addr():
-%  binop2addr(instr="andl    %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="andl")
 
 %def op_and_int_lit16():
 %  binopLit16(instr="andl    %ecx, %eax")
@@ -347,10 +347,10 @@
 %  binopLit8(instr="andl    %ecx, %eax")
 
 %def op_and_long():
-%  binopWide(instr="andq    VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="andq")
 
 %def op_and_long_2addr():
-%  binopWide2addr(instr="andq    %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="andq")
 
 %def op_cmp_long():
 /*
@@ -413,7 +413,7 @@
 %  op_move()
 
 %def op_mul_int():
-%  binop(instr="imull   VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="imull")
 
 %def op_mul_int_2addr():
     /* mul vA, vB */
@@ -432,7 +432,7 @@
 %  binopLit8(instr="imull   %ecx, %eax")
 
 %def op_mul_long():
-%  binopWide(instr="imulq   VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="imulq")
 
 %def op_mul_long_2addr():
     /* mul vA, vB */
@@ -457,10 +457,10 @@
 %  unop(instr="    notq    %rax", wide="1")
 
 %def op_or_int():
-%  binop(instr="orl     VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="orl")
 
 %def op_or_int_2addr():
-%  binop2addr(instr="orl     %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="orl")
 
 %def op_or_int_lit16():
 %  binopLit16(instr="orl     %ecx, %eax")
@@ -469,10 +469,10 @@
 %  binopLit8(instr="orl     %ecx, %eax")
 
 %def op_or_long():
-%  binopWide(instr="orq     VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="orq")
 
 %def op_or_long_2addr():
-%  binopWide2addr(instr="orq     %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="orq")
 
 %def op_rem_int():
 %  bindiv(result="%edx", second="%ecx", wide="0", suffix="l", rem="1")
@@ -530,16 +530,16 @@
 %  shop2addr(instr="sarq    %cl, %rax", wide="1")
 
 %def op_sub_int():
-%  binop(instr="subl    VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="subl")
 
 %def op_sub_int_2addr():
-%  binop2addr(instr="subl    %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="subl")
 
 %def op_sub_long():
-%  binopWide(instr="subq    VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="subq")
 
 %def op_sub_long_2addr():
-%  binopWide2addr(instr="subq    %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="subq")
 
 %def op_ushr_int():
 %  binop1(instr="shrl    %cl, %eax")
@@ -557,10 +557,10 @@
 %  shop2addr(instr="shrq    %cl, %rax", wide="1")
 
 %def op_xor_int():
-%  binop(instr="xorl    VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="xorl")
 
 %def op_xor_int_2addr():
-%  binop2addr(instr="xorl    %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="xorl")
 
 %def op_xor_int_lit16():
 %  binopLit16(instr="xorl    %ecx, %eax")
@@ -569,7 +569,7 @@
 %  binopLit8(instr="xorl    %ecx, %eax")
 
 %def op_xor_long():
-%  binopWide(instr="xorq    VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="xorq")
 
 %def op_xor_long_2addr():
-%  binopWide2addr(instr="xorq    %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="xorq")
diff --git a/runtime/interpreter/mterp/x86_64/floating_point.S b/runtime/interpreter/mterp/x86_64/floating_point.S
index b40c0e6..7fcb742 100644
--- a/runtime/interpreter/mterp/x86_64/floating_point.S
+++ b/runtime/interpreter/mterp/x86_64/floating_point.S
@@ -18,7 +18,7 @@
     /* op vAA, vBB, vCC */
     movzbq  3(rPC), %rcx                    # ecx<- CC
     movzbq  2(rPC), %rax                    # eax<- BB
-    movs${suff} VREG_ADDRESS(%rax), %xmm0
+    GET_VREG_XMM${suff} %xmm0, %rax
     xor     %eax, %eax
     ucomis${suff} VREG_ADDRESS(%rcx), %xmm0
     jp      .L${opcode}_nan_is_${nanval}
@@ -44,10 +44,10 @@
     andb    $$0xf, %cl                      # ecx <- A
     cvts${source_suffix}2s${dest_suffix}    VREG_ADDRESS(rINSTq), %xmm0
     .if $wide
-    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    SET_VREG_XMMd %xmm0, %rcx
     CLEAR_WIDE_REF %rcx
     .else
-    movss   %xmm0, VREG_ADDRESS(%rcx)
+    SET_VREG_XMMs %xmm0, %rcx
     CLEAR_REF %rcx
     .endif
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
@@ -55,9 +55,9 @@
 %def sseBinop(instr="", suff=""):
     movzbq  2(rPC), %rcx                    # ecx <- BB
     movzbq  3(rPC), %rax                    # eax <- CC
-    movs${suff}   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    GET_VREG_XMM${suff} %xmm0, %rcx         # %xmm0 <- 1st src
     ${instr}${suff} VREG_ADDRESS(%rax), %xmm0
-    movs${suff}   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    SET_VREG_XMM${suff} %xmm0, rINSTq       # vAA <- %xmm0
     pxor    %xmm0, %xmm0
     movs${suff}   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -65,10 +65,10 @@
 %def sseBinop2Addr(instr="", suff=""):
     movl    rINST, %ecx                     # ecx <- A+
     andl    $$0xf, %ecx                     # ecx <- A
-    movs${suff} VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    GET_VREG_XMM${suff} %xmm0, %rcx         # %xmm0 <- 1st src
     sarl    $$4, rINST                      # rINST<- B
     ${instr}${suff} VREG_ADDRESS(rINSTq), %xmm0
-    movs${suff} %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    SET_VREG_XMM${suff} %xmm0, %rcx         # vAA <- %xmm0
     pxor    %xmm0, %xmm0
     movs${suff} %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/main.S b/runtime/interpreter/mterp/x86_64/main.S
index 4609067..5900220 100644
--- a/runtime/interpreter/mterp/x86_64/main.S
+++ b/runtime/interpreter/mterp/x86_64/main.S
@@ -306,6 +306,19 @@
     movl    MACRO_LITERAL(0), VREG_REF_HIGH_ADDRESS(\_vreg)
 .endm
 
+.macro GET_VREG_XMMs _xmmreg _vreg
+    movss VREG_ADDRESS(\_vreg), \_xmmreg
+.endm
+.macro GET_VREG_XMMd _xmmreg _vreg
+    movsd VREG_ADDRESS(\_vreg), \_xmmreg
+.endm
+.macro SET_VREG_XMMs _xmmreg _vreg
+    movss \_xmmreg, VREG_ADDRESS(\_vreg)
+.endm
+.macro SET_VREG_XMMd _xmmreg _vreg
+    movsd \_xmmreg, VREG_ADDRESS(\_vreg)
+.endm
+
 /*
  * function support macros.
  */