Merge "More prep work for new opcodes." into dalvik-dev
diff --git a/dx/etc/opcode-gen b/dx/etc/opcode-gen
index 518be4d..d57749f 100755
--- a/dx/etc/opcode-gen
+++ b/dx/etc/opcode-gen
@@ -20,9 +20,11 @@
 # generate code inside the given <file>, based on the directives found
 # in that file:
 #
-#     opcodes:   static final ints for each opcode
-#     dops:      static final objects for each opcode
-#     dops-init: initialization code for the "dops"
+#     opcodes:       static final ints for each opcode
+#     dops:          static final objects for each opcode
+#     dops-init:     initialization code for the "dops"
+#     first-opcodes: a comment indicating which opcodes are at the head
+#                    position of instruction fitting chains
 
 file="$1"
 tmpfile="/tmp/$$.txt"
@@ -57,7 +59,9 @@
 awk -v "bytecodeFile=$bytecodeFile" '
 
 BEGIN {
+    MAX_OPCODE = 65535;
     readBytecodes();
+    deriveOpcodeChains();
     consumeUntil = "";
 }
 
@@ -73,7 +77,8 @@
     consumeUntil = "END(opcodes)";
     print;
 
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if (hex[i] == "") continue;
         printf("    public static final int %s = 0x%s;\n",
                uppername[i], hex[i]);
     }
@@ -81,12 +86,25 @@
     next;
 }
 
+/BEGIN\(first-opcodes\)/ {
+    consumeUntil = "END(first-opcodes)";
+    print;
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if (isFirst[i] == "true") {
+            printf("    //     DalvOps.%s\n", uppername[i]);
+        }
+    }
+
+    next;
+}
+
 /BEGIN\(dops\)/ {
     consumeUntil = "END(dops)";
     print;
 
-    for (i = 0; i < 256; i++) {
-        if (index(name[i], "unused") != 0) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
             continue;
         }
         printf("    public static final Dop %s =\n" \
@@ -94,6 +112,7 @@
                "            Form%s.THE_ONE, %s, \"%s\");\n\n",
                uppername[i], uppername[i], family[i], format[i], hasres[i],
                name[i]);
+        # TODO: Include nextOpcode[i] in the constructor.
     }
 
     next;
@@ -103,7 +122,10 @@
     consumeUntil = "END(dops-init)";
     print;
 
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
         if (index(name[i], "unused") != 0) {
             continue;
         }
@@ -127,7 +149,7 @@
             exit 1;
         }
 
-        # Clean up the line and extract the command
+        # Clean up the line and extract the command.
         gsub(/  */, " ", line);
         sub(/ *#.*$/, "", line);
         sub(/ $/, "", line);
@@ -169,18 +191,81 @@
     family[idx] = toupper(parts[1]);
     gsub("-", "_", family[idx]);
 
+    # This association is used when computing "next" opcodes.
+    familyFormat[family[idx],format[idx]] = idx;
+
+    if (nextFormat[format[idx]] == "") {
+        printf("unknown format: %s\n", format[idx]);
+        return 1;
+    }
+
     return 0;
 }
 
 # Define a format family.
-function defineFormat(line, count, parts) {
-    # locals: count, parts
+function defineFormat(line, count, parts, i) {
+    # locals: count, parts, i
     count = split(line, parts);
     if (count < 1)  return -1;
     formats[parts[1]] = line;
+
+    parts[count + 1] = "none";
+    for (i = 1; i <= count; i++) {
+        nextFormat[parts[i]] = parts[i + 1];
+    }
+
     return 0;
 }
 
+# Produce the nextOpcode and isFirst arrays. The former indicates, for
+# each opcode, which one should be tried next when doing instruction
+# fitting. The latter indicates which opcodes are at the head of an
+# instruction fitting chain.
+function deriveOpcodeChains(i, op) {
+    # locals: i, op
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
+        isFirst[i] = "true";
+    }
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
+        op = findNextOpcode(i);
+        nextOpcode[i] = op;
+        if (op != -1) {
+            isFirst[op] = "false";
+        }
+    }
+}
+
+# Given an opcode by index, find the next opcode in the same family
+# (that is, with the same base name) to try when matching instructions
+# to opcodes. This simply walks the nextFormat chain looking for a
+# match. This returns the index of the matching opcode or -1 if there
+# is none.
+function findNextOpcode(idx, fam, fmt, result) {
+    # locals: fam, fmt, result
+    fam = family[idx];
+    fmt = format[idx];
+
+    # Not every opcode has a version with every possible format, so
+    # we have to iterate down the chain until we find one or run out of
+    # formats to try.
+    for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
+        result = familyFormat[fam,fmt];
+        if (result != "") {
+            return result;
+        }
+    }
+
+    return -1;
+}
+
 # Convert a hex value to an int.
 function parseHex(hex, result, chars, count, c, i) {
     # locals: result, chars, count, c, i
diff --git a/dx/etc/run-opcode-gen b/dx/etc/run-opcode-gen
index 061c048..713ed2d 100755
--- a/dx/etc/run-opcode-gen
+++ b/dx/etc/run-opcode-gen
@@ -18,3 +18,4 @@
 
 ./opcode-gen ../src/com/android/dx/dex/code/DalvOps.java
 ./opcode-gen ../src/com/android/dx/dex/code/Dops.java
+./opcode-gen ../src/com/android/dx/dex/code/RopToDop.java
diff --git a/dx/src/com/android/dx/dex/code/RopToDop.java b/dx/src/com/android/dx/dex/code/RopToDop.java
index d8fa1cc..856386b 100644
--- a/dx/src/com/android/dx/dex/code/RopToDop.java
+++ b/dx/src/com/android/dx/dex/code/RopToDop.java
@@ -45,11 +45,183 @@
         // This space intentionally left blank.
     }
 
+    /*
+     * The following comment lists each opcode that should be considered
+     * the "head" of an opcode chain, in terms of the process of fitting
+     * an instruction's arguments to an actual opcode. This list is
+     * automatically generated and may be of use in double-checking the
+     * manually-generated static initialization code for this class.
+     *
+     * TODO: Make opcode-gen produce useful code in this case instead
+     * of just a comment.
+     */
+
+    // BEGIN(first-opcodes); GENERATED AUTOMATICALLY BY opcode-gen
+    //     DalvOps.NOP
+    //     DalvOps.MOVE
+    //     DalvOps.MOVE_WIDE
+    //     DalvOps.MOVE_OBJECT
+    //     DalvOps.MOVE_RESULT
+    //     DalvOps.MOVE_RESULT_WIDE
+    //     DalvOps.MOVE_RESULT_OBJECT
+    //     DalvOps.MOVE_EXCEPTION
+    //     DalvOps.RETURN_VOID
+    //     DalvOps.RETURN
+    //     DalvOps.RETURN_WIDE
+    //     DalvOps.RETURN_OBJECT
+    //     DalvOps.CONST_4
+    //     DalvOps.CONST_WIDE_16
+    //     DalvOps.CONST_STRING
+    //     DalvOps.CONST_CLASS
+    //     DalvOps.MONITOR_ENTER
+    //     DalvOps.MONITOR_EXIT
+    //     DalvOps.CHECK_CAST
+    //     DalvOps.INSTANCE_OF
+    //     DalvOps.ARRAY_LENGTH
+    //     DalvOps.NEW_INSTANCE
+    //     DalvOps.NEW_ARRAY
+    //     DalvOps.FILLED_NEW_ARRAY
+    //     DalvOps.FILL_ARRAY_DATA
+    //     DalvOps.THROW
+    //     DalvOps.GOTO
+    //     DalvOps.PACKED_SWITCH
+    //     DalvOps.SPARSE_SWITCH
+    //     DalvOps.CMPL_FLOAT
+    //     DalvOps.CMPG_FLOAT
+    //     DalvOps.CMPL_DOUBLE
+    //     DalvOps.CMPG_DOUBLE
+    //     DalvOps.CMP_LONG
+    //     DalvOps.IF_EQ
+    //     DalvOps.IF_NE
+    //     DalvOps.IF_LT
+    //     DalvOps.IF_GE
+    //     DalvOps.IF_GT
+    //     DalvOps.IF_LE
+    //     DalvOps.IF_EQZ
+    //     DalvOps.IF_NEZ
+    //     DalvOps.IF_LTZ
+    //     DalvOps.IF_GEZ
+    //     DalvOps.IF_GTZ
+    //     DalvOps.IF_LEZ
+    //     DalvOps.AGET
+    //     DalvOps.AGET_WIDE
+    //     DalvOps.AGET_OBJECT
+    //     DalvOps.AGET_BOOLEAN
+    //     DalvOps.AGET_BYTE
+    //     DalvOps.AGET_CHAR
+    //     DalvOps.AGET_SHORT
+    //     DalvOps.APUT
+    //     DalvOps.APUT_WIDE
+    //     DalvOps.APUT_OBJECT
+    //     DalvOps.APUT_BOOLEAN
+    //     DalvOps.APUT_BYTE
+    //     DalvOps.APUT_CHAR
+    //     DalvOps.APUT_SHORT
+    //     DalvOps.IGET
+    //     DalvOps.IGET_WIDE
+    //     DalvOps.IGET_OBJECT
+    //     DalvOps.IGET_BOOLEAN
+    //     DalvOps.IGET_BYTE
+    //     DalvOps.IGET_CHAR
+    //     DalvOps.IGET_SHORT
+    //     DalvOps.IPUT
+    //     DalvOps.IPUT_WIDE
+    //     DalvOps.IPUT_OBJECT
+    //     DalvOps.IPUT_BOOLEAN
+    //     DalvOps.IPUT_BYTE
+    //     DalvOps.IPUT_CHAR
+    //     DalvOps.IPUT_SHORT
+    //     DalvOps.SGET
+    //     DalvOps.SGET_WIDE
+    //     DalvOps.SGET_OBJECT
+    //     DalvOps.SGET_BOOLEAN
+    //     DalvOps.SGET_BYTE
+    //     DalvOps.SGET_CHAR
+    //     DalvOps.SGET_SHORT
+    //     DalvOps.SPUT
+    //     DalvOps.SPUT_WIDE
+    //     DalvOps.SPUT_OBJECT
+    //     DalvOps.SPUT_BOOLEAN
+    //     DalvOps.SPUT_BYTE
+    //     DalvOps.SPUT_CHAR
+    //     DalvOps.SPUT_SHORT
+    //     DalvOps.INVOKE_VIRTUAL
+    //     DalvOps.INVOKE_SUPER
+    //     DalvOps.INVOKE_DIRECT
+    //     DalvOps.INVOKE_STATIC
+    //     DalvOps.INVOKE_INTERFACE
+    //     DalvOps.NEG_INT
+    //     DalvOps.NOT_INT
+    //     DalvOps.NEG_LONG
+    //     DalvOps.NOT_LONG
+    //     DalvOps.NEG_FLOAT
+    //     DalvOps.NEG_DOUBLE
+    //     DalvOps.INT_TO_LONG
+    //     DalvOps.INT_TO_FLOAT
+    //     DalvOps.INT_TO_DOUBLE
+    //     DalvOps.LONG_TO_INT
+    //     DalvOps.LONG_TO_FLOAT
+    //     DalvOps.LONG_TO_DOUBLE
+    //     DalvOps.FLOAT_TO_INT
+    //     DalvOps.FLOAT_TO_LONG
+    //     DalvOps.FLOAT_TO_DOUBLE
+    //     DalvOps.DOUBLE_TO_INT
+    //     DalvOps.DOUBLE_TO_LONG
+    //     DalvOps.DOUBLE_TO_FLOAT
+    //     DalvOps.INT_TO_BYTE
+    //     DalvOps.INT_TO_CHAR
+    //     DalvOps.INT_TO_SHORT
+    //     DalvOps.ADD_INT_2ADDR
+    //     DalvOps.SUB_INT_2ADDR
+    //     DalvOps.MUL_INT_2ADDR
+    //     DalvOps.DIV_INT_2ADDR
+    //     DalvOps.REM_INT_2ADDR
+    //     DalvOps.AND_INT_2ADDR
+    //     DalvOps.OR_INT_2ADDR
+    //     DalvOps.XOR_INT_2ADDR
+    //     DalvOps.SHL_INT_2ADDR
+    //     DalvOps.SHR_INT_2ADDR
+    //     DalvOps.USHR_INT_2ADDR
+    //     DalvOps.ADD_LONG_2ADDR
+    //     DalvOps.SUB_LONG_2ADDR
+    //     DalvOps.MUL_LONG_2ADDR
+    //     DalvOps.DIV_LONG_2ADDR
+    //     DalvOps.REM_LONG_2ADDR
+    //     DalvOps.AND_LONG_2ADDR
+    //     DalvOps.OR_LONG_2ADDR
+    //     DalvOps.XOR_LONG_2ADDR
+    //     DalvOps.SHL_LONG_2ADDR
+    //     DalvOps.SHR_LONG_2ADDR
+    //     DalvOps.USHR_LONG_2ADDR
+    //     DalvOps.ADD_FLOAT_2ADDR
+    //     DalvOps.SUB_FLOAT_2ADDR
+    //     DalvOps.MUL_FLOAT_2ADDR
+    //     DalvOps.DIV_FLOAT_2ADDR
+    //     DalvOps.REM_FLOAT_2ADDR
+    //     DalvOps.ADD_DOUBLE_2ADDR
+    //     DalvOps.SUB_DOUBLE_2ADDR
+    //     DalvOps.MUL_DOUBLE_2ADDR
+    //     DalvOps.DIV_DOUBLE_2ADDR
+    //     DalvOps.REM_DOUBLE_2ADDR
+    //     DalvOps.ADD_INT_LIT8
+    //     DalvOps.RSUB_INT_LIT8
+    //     DalvOps.MUL_INT_LIT8
+    //     DalvOps.DIV_INT_LIT8
+    //     DalvOps.REM_INT_LIT8
+    //     DalvOps.AND_INT_LIT8
+    //     DalvOps.OR_INT_LIT8
+    //     DalvOps.XOR_INT_LIT8
+    //     DalvOps.SHL_INT_LIT8
+    //     DalvOps.SHR_INT_LIT8
+    //     DalvOps.USHR_INT_LIT8
+    // END(first-opcodes)
+
     static {
         /*
          * Note: The choices made here are to pick the optimistically
          * smallest Dalvik opcode, and leave it to later processing to
-         * pessimize.
+         * pessimize. See the automatically-generated comment above
+         * for reference.
          */
         MAP = new HashMap<Rop, Dop>(400);
         MAP.put(Rops.NOP,               Dops.NOP);