ART: Enable JitProfiling for Arm Mterp

Also, fix missing shadow frame clear operation for the 64-bit
shift operations.

Change-Id: Icea95b3aeb1d6d36ea92336fb738cf56edd92da4
diff --git a/runtime/interpreter/mterp/arm/op_shl_long.S b/runtime/interpreter/mterp/arm/op_shl_long.S
index dc8a679..12ea248 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
index fd7668d..4799e77 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long.S b/runtime/interpreter/mterp/arm/op_shr_long.S
index c0edf90..88a13d6 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
index ffeaf9c..78d8bb7 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long.S b/runtime/interpreter/mterp/arm/op_ushr_long.S
index f64c861..f98ec63 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
index dbab08d..840283d 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 15745d2..8481fab 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -147,7 +147,7 @@
       Runtime::Current()->GetInstrumentation();
   bool unhandled_instrumentation;
   // TODO: enable for other targets after more extensive testing.
-  if (kRuntimeISA == kArm64) {
+  if ((kRuntimeISA == kArm64) || (kRuntimeISA == kArm)) {
     unhandled_instrumentation = instrumentation->NonJitProfilingActive();
   } else {
     unhandled_instrumentation = instrumentation->IsActive();
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 511c35b..02aa8ac 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -5161,16 +5161,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -5193,16 +5193,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -5225,16 +5225,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6300,17 +6300,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6327,17 +6327,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6354,17 +6354,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction