ARM/ARM64: Improve Mterp */lit8 assembly.

Embed shifts in operations when possible. Use UBFX for shift
distance extraction - for ARM this saves an instruction, for
ARM64 this just improves readability as the reader doesn't
need to know that the higher bits are unused by shifts.

Test: Run ART test suite on Nexus 9.
Change-Id: I5666f3eaded618d3f6cb754c4a431fd9548e746d
diff --git a/runtime/interpreter/mterp/arm/binopLit8.S b/runtime/interpreter/mterp/arm/binopLit8.S
index b8f0d92..7c9c631 100644
--- a/runtime/interpreter/mterp/arm/binopLit8.S
+++ b/runtime/interpreter/mterp/arm/binopLit8.S
@@ -1,10 +1,14 @@
-%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+%default {"extract":"asr     r1, r3, #8", "result":"r0", "chkzero":"0"}
     /*
      * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = r0 op r1".
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -17,14 +21,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    $extract                            @ optional; typically r1<- ssssssCC (sign extended)
     .if $chkzero
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    $preinstr                           @ optional op; may set condition codes
     $instr                              @ $result<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG $result, r9                @ vAA<- $result
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit8.S b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
index b84684a..035510d 100644
--- a/runtime/interpreter/mterp/arm/op_add_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"add     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"add     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit8.S b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
index d5783e5..af746b5 100644
--- a/runtime/interpreter/mterp/arm/op_and_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"and     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"and     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit8.S b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
index 2d85038..9882bfc 100644
--- a/runtime/interpreter/mterp/arm/op_or_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"orr     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"orr     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
index 2ee11e1..dc953dc 100644
--- a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"rsb     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"rsb     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
index 6a48bfc..60a1498 100644
--- a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
index 60fe5fc..c2f6cb0 100644
--- a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
index 40a4435..5554eb0 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
index 46bb712..97d0b9e 100644
--- a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"eor     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"eor     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S
index 0b7c68a..dfa3169 100644
--- a/runtime/interpreter/mterp/arm64/binopLit8.S
+++ b/runtime/interpreter/mterp/arm64/binopLit8.S
@@ -1,10 +1,14 @@
-%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+%default {"extract": "asr     w1, w3, #8", "preinstr":"", "result":"w0", "chkzero":"0"}
     /*
      * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = w0 op w1".
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -17,7 +21,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    $extract                            // optional; typically w1<- ssssssCC (sign extended)
     .if $chkzero
     cbz     w1, common_errDivideByZero
     .endif
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
index 196ea99..2dfb8b9 100644
--- a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"add     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"add     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
index 167b40e..495b5cd 100644
--- a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"and     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"and     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
index 51675f8..7cb26b7 100644
--- a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"orr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"orr     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
index 17f57f9..9c19b55 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"lsl     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
index 274080c..c7b61df 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"asr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
index ff30e1f..555ed4e 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"lsr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
index 6d187b5..1d3d93e 100644
--- a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"eor     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"eor     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index df25767..02b462f 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -6473,6 +6473,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6485,15 +6489,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    add     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6511,6 +6514,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6523,15 +6530,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    rsb     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    rsb     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6550,6 +6556,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6562,14 +6572,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    asr     r1, r3, #8                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
     mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6657,6 +6666,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6669,15 +6682,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    and     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6695,6 +6707,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6707,15 +6723,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    orr     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6733,6 +6748,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6745,15 +6764,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    eor     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6771,6 +6789,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6783,14 +6805,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6809,6 +6830,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6821,14 +6846,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6847,6 +6871,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6859,14 +6887,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index de37e07..0a99802 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -6044,6 +6044,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6056,13 +6060,13 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    add     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6080,6 +6084,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6092,7 +6100,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6117,6 +6125,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6129,7 +6141,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6153,6 +6165,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6165,7 +6181,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 1
     cbz     w1, common_errDivideByZero
     .endif
@@ -6189,6 +6205,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6201,7 +6221,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 1
     cbz     w1, common_errDivideByZero
     .endif
@@ -6225,6 +6245,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6237,13 +6261,13 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    and     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6261,6 +6285,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6273,13 +6301,13 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    orr     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6297,6 +6325,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6309,13 +6341,13 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    eor     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6333,6 +6365,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6345,7 +6381,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6369,6 +6405,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6381,7 +6421,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6405,6 +6445,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6417,7 +6461,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif