More prep work for new opcodes. The opcode-gen script now expects there to be up to 65536 opcodes, and it generates opcode chain information. The chaining info isn't used yet, but eventually it will get hooked into the instruction selection logic in dx, meaning that dx will no longer have to do relatively slower opcode searches; the right "next" opcode will have been staticly generated and compiled in. I also added a helpful generated comment in RopToDop, which indicates which opcodes are the ones that *won't* be found by chaining, that is, which ones are the starts of chains. This is left as a comment and not code, though, since there are lots of quirks with how these get hooked up. I hope to make this cleaner at some point, but today's not the day for it. Change-Id: I11da833cc57b383733743114c0f3e0e64903982d

commit: 12209c24903959d41f887c05e6a04e0b08367463 [log] [tgz]
author: Dan Bornstein <danfuzz@android.com> Thu Nov 04 10:17:14 2010 -0700
committer: Dan Bornstein <danfuzz@android.com> Thu Nov 04 10:25:31 2010 -0700
tree: 1f0c07740bc14061905dbc25a2dd50b887ebc02a
parent: e49178a6a7aba223ec27d45d0355a19959bb7f3c [diff]
diff --git a/dx/etc/opcode-gen b/dx/etc/opcode-gen
index 518be4d..d57749f 100755
--- a/dx/etc/opcode-gen
+++ b/dx/etc/opcode-gen

@@ -20,9 +20,11 @@
 # generate code inside the given <file>, based on the directives found
 # in that file:
 #
-#     opcodes:   static final ints for each opcode
-#     dops:      static final objects for each opcode
-#     dops-init: initialization code for the "dops"
+#     opcodes:       static final ints for each opcode
+#     dops:          static final objects for each opcode
+#     dops-init:     initialization code for the "dops"
+#     first-opcodes: a comment indicating which opcodes are at the head
+#                    position of instruction fitting chains
 
 file="$1"
 tmpfile="/tmp/$$.txt"
@@ -57,7 +59,9 @@
 awk -v "bytecodeFile=$bytecodeFile" '
 
 BEGIN {
+    MAX_OPCODE = 65535;
     readBytecodes();
+    deriveOpcodeChains();
     consumeUntil = "";
 }
 
@@ -73,7 +77,8 @@
     consumeUntil = "END(opcodes)";
     print;
 
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if (hex[i] == "") continue;
         printf("    public static final int %s = 0x%s;\n",
                uppername[i], hex[i]);
     }
@@ -81,12 +86,25 @@
     next;
 }
 
+/BEGIN\(first-opcodes\)/ {
+    consumeUntil = "END(first-opcodes)";
+    print;
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if (isFirst[i] == "true") {
+            printf("    //     DalvOps.%s\n", uppername[i]);
+        }
+    }
+
+    next;
+}
+
 /BEGIN\(dops\)/ {
     consumeUntil = "END(dops)";
     print;
 
-    for (i = 0; i < 256; i++) {
-        if (index(name[i], "unused") != 0) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
             continue;
         }
         printf("    public static final Dop %s =\n" \
@@ -94,6 +112,7 @@
                "            Form%s.THE_ONE, %s, \"%s\");\n\n",
                uppername[i], uppername[i], family[i], format[i], hasres[i],
                name[i]);
+        # TODO: Include nextOpcode[i] in the constructor.
     }
 
     next;
@@ -103,7 +122,10 @@
     consumeUntil = "END(dops-init)";
     print;
 
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
         if (index(name[i], "unused") != 0) {
             continue;
         }
@@ -127,7 +149,7 @@
             exit 1;
         }
 
-        # Clean up the line and extract the command
+        # Clean up the line and extract the command.
         gsub(/  */, " ", line);
         sub(/ *#.*$/, "", line);
         sub(/ $/, "", line);
@@ -169,18 +191,81 @@
     family[idx] = toupper(parts[1]);
     gsub("-", "_", family[idx]);
 
+    # This association is used when computing "next" opcodes.
+    familyFormat[family[idx],format[idx]] = idx;
+
+    if (nextFormat[format[idx]] == "") {
+        printf("unknown format: %s\n", format[idx]);
+        return 1;
+    }
+
     return 0;
 }
 
 # Define a format family.
-function defineFormat(line, count, parts) {
-    # locals: count, parts
+function defineFormat(line, count, parts, i) {
+    # locals: count, parts, i
     count = split(line, parts);
     if (count < 1)  return -1;
     formats[parts[1]] = line;
+
+    parts[count + 1] = "none";
+    for (i = 1; i <= count; i++) {
+        nextFormat[parts[i]] = parts[i + 1];
+    }
+
     return 0;
 }
 
+# Produce the nextOpcode and isFirst arrays. The former indicates, for
+# each opcode, which one should be tried next when doing instruction
+# fitting. The latter indicates which opcodes are at the head of an
+# instruction fitting chain.
+function deriveOpcodeChains(i, op) {
+    # locals: i, op
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
+        isFirst[i] = "true";
+    }
+
+    for (i = 0; i <= MAX_OPCODE; i++) {
+        if ((hex[i] == "") || (index(name[i], "unused") != 0)) {
+            continue;
+        }
+        op = findNextOpcode(i);
+        nextOpcode[i] = op;
+        if (op != -1) {
+            isFirst[op] = "false";
+        }
+    }
+}
+
+# Given an opcode by index, find the next opcode in the same family
+# (that is, with the same base name) to try when matching instructions
+# to opcodes. This simply walks the nextFormat chain looking for a
+# match. This returns the index of the matching opcode or -1 if there
+# is none.
+function findNextOpcode(idx, fam, fmt, result) {
+    # locals: fam, fmt, result
+    fam = family[idx];
+    fmt = format[idx];
+
+    # Not every opcode has a version with every possible format, so
+    # we have to iterate down the chain until we find one or run out of
+    # formats to try.
+    for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
+        result = familyFormat[fam,fmt];
+        if (result != "") {
+            return result;
+        }
+    }
+
+    return -1;
+}
+
 # Convert a hex value to an int.
 function parseHex(hex, result, chars, count, c, i) {
     # locals: result, chars, count, c, i

diff --git a/dx/etc/run-opcode-gen b/dx/etc/run-opcode-gen
index 061c048..713ed2d 100755
--- a/dx/etc/run-opcode-gen
+++ b/dx/etc/run-opcode-gen

@@ -18,3 +18,4 @@
 
 ./opcode-gen ../src/com/android/dx/dex/code/DalvOps.java
 ./opcode-gen ../src/com/android/dx/dex/code/Dops.java
+./opcode-gen ../src/com/android/dx/dex/code/RopToDop.java

diff --git a/dx/src/com/android/dx/dex/code/RopToDop.java b/dx/src/com/android/dx/dex/code/RopToDop.java
index d8fa1cc..856386b 100644
--- a/dx/src/com/android/dx/dex/code/RopToDop.java
+++ b/dx/src/com/android/dx/dex/code/RopToDop.java

@@ -45,11 +45,183 @@
         // This space intentionally left blank.
     }
 
+    /*
+     * The following comment lists each opcode that should be considered
+     * the "head" of an opcode chain, in terms of the process of fitting
+     * an instruction's arguments to an actual opcode. This list is
+     * automatically generated and may be of use in double-checking the
+     * manually-generated static initialization code for this class.
+     *
+     * TODO: Make opcode-gen produce useful code in this case instead
+     * of just a comment.
+     */
+
+    // BEGIN(first-opcodes); GENERATED AUTOMATICALLY BY opcode-gen
+    //     DalvOps.NOP
+    //     DalvOps.MOVE
+    //     DalvOps.MOVE_WIDE
+    //     DalvOps.MOVE_OBJECT
+    //     DalvOps.MOVE_RESULT
+    //     DalvOps.MOVE_RESULT_WIDE
+    //     DalvOps.MOVE_RESULT_OBJECT
+    //     DalvOps.MOVE_EXCEPTION
+    //     DalvOps.RETURN_VOID
+    //     DalvOps.RETURN
+    //     DalvOps.RETURN_WIDE
+    //     DalvOps.RETURN_OBJECT
+    //     DalvOps.CONST_4
+    //     DalvOps.CONST_WIDE_16
+    //     DalvOps.CONST_STRING
+    //     DalvOps.CONST_CLASS
+    //     DalvOps.MONITOR_ENTER
+    //     DalvOps.MONITOR_EXIT
+    //     DalvOps.CHECK_CAST
+    //     DalvOps.INSTANCE_OF
+    //     DalvOps.ARRAY_LENGTH
+    //     DalvOps.NEW_INSTANCE
+    //     DalvOps.NEW_ARRAY
+    //     DalvOps.FILLED_NEW_ARRAY
+    //     DalvOps.FILL_ARRAY_DATA
+    //     DalvOps.THROW
+    //     DalvOps.GOTO
+    //     DalvOps.PACKED_SWITCH
+    //     DalvOps.SPARSE_SWITCH
+    //     DalvOps.CMPL_FLOAT
+    //     DalvOps.CMPG_FLOAT
+    //     DalvOps.CMPL_DOUBLE
+    //     DalvOps.CMPG_DOUBLE
+    //     DalvOps.CMP_LONG
+    //     DalvOps.IF_EQ
+    //     DalvOps.IF_NE
+    //     DalvOps.IF_LT
+    //     DalvOps.IF_GE
+    //     DalvOps.IF_GT
+    //     DalvOps.IF_LE
+    //     DalvOps.IF_EQZ
+    //     DalvOps.IF_NEZ
+    //     DalvOps.IF_LTZ
+    //     DalvOps.IF_GEZ
+    //     DalvOps.IF_GTZ
+    //     DalvOps.IF_LEZ
+    //     DalvOps.AGET
+    //     DalvOps.AGET_WIDE
+    //     DalvOps.AGET_OBJECT
+    //     DalvOps.AGET_BOOLEAN
+    //     DalvOps.AGET_BYTE
+    //     DalvOps.AGET_CHAR
+    //     DalvOps.AGET_SHORT
+    //     DalvOps.APUT
+    //     DalvOps.APUT_WIDE
+    //     DalvOps.APUT_OBJECT
+    //     DalvOps.APUT_BOOLEAN
+    //     DalvOps.APUT_BYTE
+    //     DalvOps.APUT_CHAR
+    //     DalvOps.APUT_SHORT
+    //     DalvOps.IGET
+    //     DalvOps.IGET_WIDE
+    //     DalvOps.IGET_OBJECT
+    //     DalvOps.IGET_BOOLEAN
+    //     DalvOps.IGET_BYTE
+    //     DalvOps.IGET_CHAR
+    //     DalvOps.IGET_SHORT
+    //     DalvOps.IPUT
+    //     DalvOps.IPUT_WIDE
+    //     DalvOps.IPUT_OBJECT
+    //     DalvOps.IPUT_BOOLEAN
+    //     DalvOps.IPUT_BYTE
+    //     DalvOps.IPUT_CHAR
+    //     DalvOps.IPUT_SHORT
+    //     DalvOps.SGET
+    //     DalvOps.SGET_WIDE
+    //     DalvOps.SGET_OBJECT
+    //     DalvOps.SGET_BOOLEAN
+    //     DalvOps.SGET_BYTE
+    //     DalvOps.SGET_CHAR
+    //     DalvOps.SGET_SHORT
+    //     DalvOps.SPUT
+    //     DalvOps.SPUT_WIDE
+    //     DalvOps.SPUT_OBJECT
+    //     DalvOps.SPUT_BOOLEAN
+    //     DalvOps.SPUT_BYTE
+    //     DalvOps.SPUT_CHAR
+    //     DalvOps.SPUT_SHORT
+    //     DalvOps.INVOKE_VIRTUAL
+    //     DalvOps.INVOKE_SUPER
+    //     DalvOps.INVOKE_DIRECT
+    //     DalvOps.INVOKE_STATIC
+    //     DalvOps.INVOKE_INTERFACE
+    //     DalvOps.NEG_INT
+    //     DalvOps.NOT_INT
+    //     DalvOps.NEG_LONG
+    //     DalvOps.NOT_LONG
+    //     DalvOps.NEG_FLOAT
+    //     DalvOps.NEG_DOUBLE
+    //     DalvOps.INT_TO_LONG
+    //     DalvOps.INT_TO_FLOAT
+    //     DalvOps.INT_TO_DOUBLE
+    //     DalvOps.LONG_TO_INT
+    //     DalvOps.LONG_TO_FLOAT
+    //     DalvOps.LONG_TO_DOUBLE
+    //     DalvOps.FLOAT_TO_INT
+    //     DalvOps.FLOAT_TO_LONG
+    //     DalvOps.FLOAT_TO_DOUBLE
+    //     DalvOps.DOUBLE_TO_INT
+    //     DalvOps.DOUBLE_TO_LONG
+    //     DalvOps.DOUBLE_TO_FLOAT
+    //     DalvOps.INT_TO_BYTE
+    //     DalvOps.INT_TO_CHAR
+    //     DalvOps.INT_TO_SHORT
+    //     DalvOps.ADD_INT_2ADDR
+    //     DalvOps.SUB_INT_2ADDR
+    //     DalvOps.MUL_INT_2ADDR
+    //     DalvOps.DIV_INT_2ADDR
+    //     DalvOps.REM_INT_2ADDR
+    //     DalvOps.AND_INT_2ADDR
+    //     DalvOps.OR_INT_2ADDR
+    //     DalvOps.XOR_INT_2ADDR
+    //     DalvOps.SHL_INT_2ADDR
+    //     DalvOps.SHR_INT_2ADDR
+    //     DalvOps.USHR_INT_2ADDR
+    //     DalvOps.ADD_LONG_2ADDR
+    //     DalvOps.SUB_LONG_2ADDR
+    //     DalvOps.MUL_LONG_2ADDR
+    //     DalvOps.DIV_LONG_2ADDR
+    //     DalvOps.REM_LONG_2ADDR
+    //     DalvOps.AND_LONG_2ADDR
+    //     DalvOps.OR_LONG_2ADDR
+    //     DalvOps.XOR_LONG_2ADDR
+    //     DalvOps.SHL_LONG_2ADDR
+    //     DalvOps.SHR_LONG_2ADDR
+    //     DalvOps.USHR_LONG_2ADDR
+    //     DalvOps.ADD_FLOAT_2ADDR
+    //     DalvOps.SUB_FLOAT_2ADDR
+    //     DalvOps.MUL_FLOAT_2ADDR
+    //     DalvOps.DIV_FLOAT_2ADDR
+    //     DalvOps.REM_FLOAT_2ADDR
+    //     DalvOps.ADD_DOUBLE_2ADDR
+    //     DalvOps.SUB_DOUBLE_2ADDR
+    //     DalvOps.MUL_DOUBLE_2ADDR
+    //     DalvOps.DIV_DOUBLE_2ADDR
+    //     DalvOps.REM_DOUBLE_2ADDR
+    //     DalvOps.ADD_INT_LIT8
+    //     DalvOps.RSUB_INT_LIT8
+    //     DalvOps.MUL_INT_LIT8
+    //     DalvOps.DIV_INT_LIT8
+    //     DalvOps.REM_INT_LIT8
+    //     DalvOps.AND_INT_LIT8
+    //     DalvOps.OR_INT_LIT8
+    //     DalvOps.XOR_INT_LIT8
+    //     DalvOps.SHL_INT_LIT8
+    //     DalvOps.SHR_INT_LIT8
+    //     DalvOps.USHR_INT_LIT8
+    // END(first-opcodes)
+
     static {
         /*
          * Note: The choices made here are to pick the optimistically
          * smallest Dalvik opcode, and leave it to later processing to
-         * pessimize.
+         * pessimize. See the automatically-generated comment above
+         * for reference.
          */
         MAP = new HashMap<Rop, Dop>(400);
         MAP.put(Rops.NOP,               Dops.NOP);
commit	12209c24903959d41f887c05e6a04e0b08367463	[log] [tgz]
author	Dan Bornstein <danfuzz@android.com>	Thu Nov 04 10:17:14 2010 -0700
committer	Dan Bornstein <danfuzz@android.com>	Thu Nov 04 10:25:31 2010 -0700
tree	1f0c07740bc14061905dbc25a2dd50b887ebc02a
parent	e49178a6a7aba223ec27d45d0355a19959bb7f3c [diff]