More prep work for new opcodes.

The opcode-gen script now expects there to be up to 65536 opcodes, and
it generates opcode chain information. The chaining info isn't used
yet, but eventually it will get hooked into the instruction selection
logic in dx, meaning that dx will no longer have to do relatively
slower opcode searches; the right "next" opcode will have been
staticly generated and compiled in.

I also added a helpful generated comment in RopToDop, which indicates
which opcodes are the ones that *won't* be found by chaining, that is,
which ones are the starts of chains. This is left as a comment and not
code, though, since there are lots of quirks with how these get hooked
up. I hope to make this cleaner at some point, but today's not the day
for it.

Change-Id: I11da833cc57b383733743114c0f3e0e64903982d
diff --git a/dx/etc/opcode-gen b/dx/etc/opcode-gen
index 518be4d..d57749f 100755
--- a/dx/etc/opcode-gen
+++ b/dx/etc/opcode-gen
@@ -20,9 +20,11 @@
 # generate code inside the given <file>, based on the directives found
 # in that file:
 #
-#     opcodes:   static final ints for each opcode
-#     dops:      static final objects for each opcode
-#     dops-init: initialization code for the "dops"
+#     opcodes:       static final ints for each opcode
+#     dops:          static final objects for each opcode
+#     dops-init:     initialization code for the "dops"
+#     first-opcodes: a comment indicating which opcodes are at the head
+#                    position of instruction fitting chains
 
 file="$1"
 tmpfile="/tmp/$$.txt"
@@ -57,7 +59,9 @@
 awk -v "bytecodeFile=$bytecodeFile" '
 
 BEGIN {
+    MAX_OPCODE = 65535;
     readBytecodes();
+    deriveOpcodeChains();
     consumeUntil = "";
 }
 
@@ -73,7 +77,8 @@
     consumeUntil = "END(opcodes)";
     print;
 
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if (hex[i] == "") continue;
         printf("    public static final int %s = 0x%s;\n",
                uppername[i], hex[i]);
     }
@@ -81,12 +86,25 @@
     next;
 }
 
+/BEGIN\(first-opcodes\)/ {
+    consumeUntil = "END(first-opcodes)";
+    print;
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if (isFirst[i] == "true") {
+            printf("    //     DalvOps.%s\n", uppername[i]);
+        }
+    }
+
+    next;
+}
+
 /BEGIN\(dops\)/ {
     consumeUntil = "END(dops)";
     print;
 
-    for (i = 0; i < 256; i++) {
-        if (index(name[i], "unused") != 0) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
             continue;
         }
         printf("    public static final Dop %s =\n" \
@@ -94,6 +112,7 @@
                "            Form%s.THE_ONE, %s, \"%s\");\n\n",
                uppername[i], uppername[i], family[i], format[i], hasres[i],
                name[i]);
+        # TODO: Include nextOpcode[i] in the constructor.
     }
 
     next;
@@ -103,7 +122,10 @@
     consumeUntil = "END(dops-init)";
     print;
 
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
         if (index(name[i], "unused") != 0) {
             continue;
         }
@@ -127,7 +149,7 @@
             exit 1;
         }
 
-        # Clean up the line and extract the command
+        # Clean up the line and extract the command.
         gsub(/  */, " ", line);
         sub(/ *#.*$/, "", line);
         sub(/ $/, "", line);
@@ -169,18 +191,81 @@
     family[idx] = toupper(parts[1]);
     gsub("-", "_", family[idx]);
 
+    # This association is used when computing "next" opcodes.
+    familyFormat[family[idx],format[idx]] = idx;
+
+    if (nextFormat[format[idx]] == "") {
+        printf("unknown format: %s\n", format[idx]);
+        return 1;
+    }
+
     return 0;
 }
 
 # Define a format family.
-function defineFormat(line, count, parts) {
-    # locals: count, parts
+function defineFormat(line, count, parts, i) {
+    # locals: count, parts, i
     count = split(line, parts);
     if (count < 1)  return -1;
     formats[parts[1]] = line;
+
+    parts[count + 1] = "none";
+    for (i = 1; i <= count; i++) {
+        nextFormat[parts[i]] = parts[i + 1];
+    }
+
     return 0;
 }
 
+# Produce the nextOpcode and isFirst arrays. The former indicates, for
+# each opcode, which one should be tried next when doing instruction
+# fitting. The latter indicates which opcodes are at the head of an
+# instruction fitting chain.
+function deriveOpcodeChains(i, op) {
+    # locals: i, op
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
+        isFirst[i] = "true";
+    }
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
+        op = findNextOpcode(i);
+        nextOpcode[i] = op;
+        if (op != -1) {
+            isFirst[op] = "false";
+        }
+    }
+}
+
+# Given an opcode by index, find the next opcode in the same family
+# (that is, with the same base name) to try when matching instructions
+# to opcodes. This simply walks the nextFormat chain looking for a
+# match. This returns the index of the matching opcode or -1 if there
+# is none.
+function findNextOpcode(idx, fam, fmt, result) {
+    # locals: fam, fmt, result
+    fam = family[idx];
+    fmt = format[idx];
+
+    # Not every opcode has a version with every possible format, so
+    # we have to iterate down the chain until we find one or run out of
+    # formats to try.
+    for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
+        result = familyFormat[fam,fmt];
+        if (result != "") {
+            return result;
+        }
+    }
+
+    return -1;
+}
+
 # Convert a hex value to an int.
 function parseHex(hex, result, chars, count, c, i) {
     # locals: result, chars, count, c, i
diff --git a/dx/etc/run-opcode-gen b/dx/etc/run-opcode-gen
index 061c048..713ed2d 100755
--- a/dx/etc/run-opcode-gen
+++ b/dx/etc/run-opcode-gen
@@ -18,3 +18,4 @@
 
 ./opcode-gen ../src/com/android/dx/dex/code/DalvOps.java
 ./opcode-gen ../src/com/android/dx/dex/code/Dops.java
+./opcode-gen ../src/com/android/dx/dex/code/RopToDop.java
diff --git a/dx/src/com/android/dx/dex/code/RopToDop.java b/dx/src/com/android/dx/dex/code/RopToDop.java
index d8fa1cc..856386b 100644
--- a/dx/src/com/android/dx/dex/code/RopToDop.java
+++ b/dx/src/com/android/dx/dex/code/RopToDop.java
@@ -45,11 +45,183 @@
         // This space intentionally left blank.
     }
 
+    /*
+     * The following comment lists each opcode that should be considered
+     * the "head" of an opcode chain, in terms of the process of fitting
+     * an instruction's arguments to an actual opcode. This list is
+     * automatically generated and may be of use in double-checking the
+     * manually-generated static initialization code for this class.
+     *
+     * TODO: Make opcode-gen produce useful code in this case instead
+     * of just a comment.
+     */
+
+    // BEGIN(first-opcodes); GENERATED AUTOMATICALLY BY opcode-gen
+    //     DalvOps.NOP
+    //     DalvOps.MOVE
+    //     DalvOps.MOVE_WIDE
+    //     DalvOps.MOVE_OBJECT
+    //     DalvOps.MOVE_RESULT
+    //     DalvOps.MOVE_RESULT_WIDE
+    //     DalvOps.MOVE_RESULT_OBJECT
+    //     DalvOps.MOVE_EXCEPTION
+    //     DalvOps.RETURN_VOID
+    //     DalvOps.RETURN
+    //     DalvOps.RETURN_WIDE
+    //     DalvOps.RETURN_OBJECT
+    //     DalvOps.CONST_4
+    //     DalvOps.CONST_WIDE_16
+    //     DalvOps.CONST_STRING
+    //     DalvOps.CONST_CLASS
+    //     DalvOps.MONITOR_ENTER
+    //     DalvOps.MONITOR_EXIT
+    //     DalvOps.CHECK_CAST
+    //     DalvOps.INSTANCE_OF
+    //     DalvOps.ARRAY_LENGTH
+    //     DalvOps.NEW_INSTANCE
+    //     DalvOps.NEW_ARRAY
+    //     DalvOps.FILLED_NEW_ARRAY
+    //     DalvOps.FILL_ARRAY_DATA
+    //     DalvOps.THROW
+    //     DalvOps.GOTO
+    //     DalvOps.PACKED_SWITCH
+    //     DalvOps.SPARSE_SWITCH
+    //     DalvOps.CMPL_FLOAT
+    //     DalvOps.CMPG_FLOAT
+    //     DalvOps.CMPL_DOUBLE
+    //     DalvOps.CMPG_DOUBLE
+    //     DalvOps.CMP_LONG
+    //     DalvOps.IF_EQ
+    //     DalvOps.IF_NE
+    //     DalvOps.IF_LT
+    //     DalvOps.IF_GE
+    //     DalvOps.IF_GT
+    //     DalvOps.IF_LE
+    //     DalvOps.IF_EQZ
+    //     DalvOps.IF_NEZ
+    //     DalvOps.IF_LTZ
+    //     DalvOps.IF_GEZ
+    //     DalvOps.IF_GTZ
+    //     DalvOps.IF_LEZ
+    //     DalvOps.AGET
+    //     DalvOps.AGET_WIDE
+    //     DalvOps.AGET_OBJECT
+    //     DalvOps.AGET_BOOLEAN
+    //     DalvOps.AGET_BYTE
+    //     DalvOps.AGET_CHAR
+    //     DalvOps.AGET_SHORT
+    //     DalvOps.APUT
+    //     DalvOps.APUT_WIDE
+    //     DalvOps.APUT_OBJECT
+    //     DalvOps.APUT_BOOLEAN
+    //     DalvOps.APUT_BYTE
+    //     DalvOps.APUT_CHAR
+    //     DalvOps.APUT_SHORT
+    //     DalvOps.IGET
+    //     DalvOps.IGET_WIDE
+    //     DalvOps.IGET_OBJECT
+    //     DalvOps.IGET_BOOLEAN
+    //     DalvOps.IGET_BYTE
+    //     DalvOps.IGET_CHAR
+    //     DalvOps.IGET_SHORT
+    //     DalvOps.IPUT
+    //     DalvOps.IPUT_WIDE
+    //     DalvOps.IPUT_OBJECT
+    //     DalvOps.IPUT_BOOLEAN
+    //     DalvOps.IPUT_BYTE
+    //     DalvOps.IPUT_CHAR
+    //     DalvOps.IPUT_SHORT
+    //     DalvOps.SGET
+    //     DalvOps.SGET_WIDE
+    //     DalvOps.SGET_OBJECT
+    //     DalvOps.SGET_BOOLEAN
+    //     DalvOps.SGET_BYTE
+    //     DalvOps.SGET_CHAR
+    //     DalvOps.SGET_SHORT
+    //     DalvOps.SPUT
+    //     DalvOps.SPUT_WIDE
+    //     DalvOps.SPUT_OBJECT
+    //     DalvOps.SPUT_BOOLEAN
+    //     DalvOps.SPUT_BYTE
+    //     DalvOps.SPUT_CHAR
+    //     DalvOps.SPUT_SHORT
+    //     DalvOps.INVOKE_VIRTUAL
+    //     DalvOps.INVOKE_SUPER
+    //     DalvOps.INVOKE_DIRECT
+    //     DalvOps.INVOKE_STATIC
+    //     DalvOps.INVOKE_INTERFACE
+    //     DalvOps.NEG_INT
+    //     DalvOps.NOT_INT
+    //     DalvOps.NEG_LONG
+    //     DalvOps.NOT_LONG
+    //     DalvOps.NEG_FLOAT
+    //     DalvOps.NEG_DOUBLE
+    //     DalvOps.INT_TO_LONG
+    //     DalvOps.INT_TO_FLOAT
+    //     DalvOps.INT_TO_DOUBLE
+    //     DalvOps.LONG_TO_INT
+    //     DalvOps.LONG_TO_FLOAT
+    //     DalvOps.LONG_TO_DOUBLE
+    //     DalvOps.FLOAT_TO_INT
+    //     DalvOps.FLOAT_TO_LONG
+    //     DalvOps.FLOAT_TO_DOUBLE
+    //     DalvOps.DOUBLE_TO_INT
+    //     DalvOps.DOUBLE_TO_LONG
+    //     DalvOps.DOUBLE_TO_FLOAT
+    //     DalvOps.INT_TO_BYTE
+    //     DalvOps.INT_TO_CHAR
+    //     DalvOps.INT_TO_SHORT
+    //     DalvOps.ADD_INT_2ADDR
+    //     DalvOps.SUB_INT_2ADDR
+    //     DalvOps.MUL_INT_2ADDR
+    //     DalvOps.DIV_INT_2ADDR
+    //     DalvOps.REM_INT_2ADDR
+    //     DalvOps.AND_INT_2ADDR
+    //     DalvOps.OR_INT_2ADDR
+    //     DalvOps.XOR_INT_2ADDR
+    //     DalvOps.SHL_INT_2ADDR
+    //     DalvOps.SHR_INT_2ADDR
+    //     DalvOps.USHR_INT_2ADDR
+    //     DalvOps.ADD_LONG_2ADDR
+    //     DalvOps.SUB_LONG_2ADDR
+    //     DalvOps.MUL_LONG_2ADDR
+    //     DalvOps.DIV_LONG_2ADDR
+    //     DalvOps.REM_LONG_2ADDR
+    //     DalvOps.AND_LONG_2ADDR
+    //     DalvOps.OR_LONG_2ADDR
+    //     DalvOps.XOR_LONG_2ADDR
+    //     DalvOps.SHL_LONG_2ADDR
+    //     DalvOps.SHR_LONG_2ADDR
+    //     DalvOps.USHR_LONG_2ADDR
+    //     DalvOps.ADD_FLOAT_2ADDR
+    //     DalvOps.SUB_FLOAT_2ADDR
+    //     DalvOps.MUL_FLOAT_2ADDR
+    //     DalvOps.DIV_FLOAT_2ADDR
+    //     DalvOps.REM_FLOAT_2ADDR
+    //     DalvOps.ADD_DOUBLE_2ADDR
+    //     DalvOps.SUB_DOUBLE_2ADDR
+    //     DalvOps.MUL_DOUBLE_2ADDR
+    //     DalvOps.DIV_DOUBLE_2ADDR
+    //     DalvOps.REM_DOUBLE_2ADDR
+    //     DalvOps.ADD_INT_LIT8
+    //     DalvOps.RSUB_INT_LIT8
+    //     DalvOps.MUL_INT_LIT8
+    //     DalvOps.DIV_INT_LIT8
+    //     DalvOps.REM_INT_LIT8
+    //     DalvOps.AND_INT_LIT8
+    //     DalvOps.OR_INT_LIT8
+    //     DalvOps.XOR_INT_LIT8
+    //     DalvOps.SHL_INT_LIT8
+    //     DalvOps.SHR_INT_LIT8
+    //     DalvOps.USHR_INT_LIT8
+    // END(first-opcodes)
+
     static {
         /*
          * Note: The choices made here are to pick the optimistically
          * smallest Dalvik opcode, and leave it to later processing to
-         * pessimize.
+         * pessimize. See the automatically-generated comment above
+         * for reference.
          */
         MAP = new HashMap<Rop, Dop>(400);
         MAP.put(Rops.NOP,               Dops.NOP);