Simplify lexer transition code.

This removes an if-check from the top of get_transition, removes an if-
check from the top of Lexer::next(), simplifies a bounds check, and
removes bitfields from the index array. Disappointingly, on my machine,
I can't measure any change at all; `get_transition` and `next`
stubbornly remain at about 4-5% of total nanobench time for
`sksl_large`. However, it's still simpler and hopefully slightly smaller
code.

Change-Id: If4187c01f350fe642b7af7cb6bd2c8250ca3c00e
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/548396
Auto-Submit: John Stiles <johnstiles@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/src/sksl/SkSLLexer.cpp b/src/sksl/SkSLLexer.cpp
index 44a4de8..76a3017 100644
--- a/src/sksl/SkSLLexer.cpp
+++ b/src/sksl/SkSLLexer.cpp
@@ -12,18 +12,15 @@
 namespace SkSL {
 
 using State = uint16_t;
-static const uint8_t INVALID_CHAR = 18;
-static const int8_t kMappings[127] = {
-        0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  3,  1,  3,  3,  3,  3,  3,  3,  3,  3,
-        3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  1,  4,  3,  5,  6,  7,  8,  3,  9,  10, 11, 12,
-        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 22, 22, 23, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-        31, 32, 33, 34, 31, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 35, 38, 35, 35,
-        39, 35, 35, 40, 3,  41, 42, 43, 3,  44, 45, 46, 47, 48, 49, 50, 51, 52, 35, 53, 54, 55,
-        56, 57, 58, 35, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71};
-struct IndexEntry {
-    uint16_t type : 2;
-    uint16_t pos : 14;
-};
+static constexpr uint8_t kInvalidChar = 18;
+static constexpr int8_t kMappings[118] = {
+        1,  2,  3,  3,  1,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+        3,  3,  3,  1,  4,  3,  5,  6,  7,  8,  3,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+        18, 19, 20, 21, 22, 22, 22, 23, 23, 24, 25, 26, 27, 28, 29, 30, 31, 31, 32, 33,
+        34, 31, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 35, 38, 35, 35, 39,
+        35, 35, 40, 3,  41, 42, 43, 3,  44, 45, 46, 47, 48, 49, 50, 51, 52, 35, 53, 54,
+        55, 56, 57, 58, 35, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71};
+using IndexEntry = int16_t;
 struct FullEntry {
     State data[72];
 };
@@ -119,6 +116,12 @@
 static constexpr CompactEntry kCompact[] = {
         {0,
          0,
+         0,
+         {
+                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         }},
+        {0,
+         0,
          3,
          {
                  60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1946,61 +1949,36 @@
          }},
 };
 static constexpr IndexEntry kIndices[] = {
-        {0, 0},   {1, 0},   {2, 0},   {2, 0},   {0, 0},   {2, 1},   {0, 0},   {2, 2},   {2, 3},
-        {2, 4},   {2, 5},   {2, 6},   {2, 7},   {2, 5},   {2, 8},   {0, 0},   {2, 9},   {0, 0},
-        {0, 0},   {0, 0},   {0, 0},   {2, 10},  {0, 0},   {2, 11},  {0, 0},   {0, 0},   {0, 0},
-        {2, 12},  {0, 0},   {0, 0},   {2, 13},  {2, 14},  {2, 15},  {2, 16},  {2, 16},  {2, 17},
-        {2, 18},  {2, 19},  {0, 0},   {2, 20},  {0, 0},   {1, 1},   {2, 21},  {2, 22},  {2, 23},
-        {2, 24},  {2, 24},  {2, 25},  {2, 26},  {2, 26},  {1, 2},   {1, 3},   {2, 27},  {2, 28},
-        {2, 28},  {0, 0},   {0, 0},   {0, 0},   {2, 29},  {2, 30},  {1, 4},   {1, 4},   {0, 0},
-        {0, 0},   {2, 31},  {2, 32},  {0, 0},   {0, 0},   {2, 33},  {0, 0},   {2, 34},  {0, 0},
-        {2, 35},  {0, 0},   {0, 0},   {2, 36},  {2, 37},  {0, 0},   {2, 38},  {2, 39},  {2, 40},
-        {2, 41},  {2, 42},  {0, 0},   {2, 5},   {0, 0},   {0, 0},   {2, 43},  {0, 0},   {0, 0},
-        {2, 44},  {2, 45},  {2, 5},   {2, 46},  {2, 47},  {2, 48},  {2, 49},  {2, 50},  {2, 51},
-        {2, 52},  {2, 53},  {2, 54},  {2, 55},  {2, 56},  {2, 5},   {1, 5},   {2, 57},  {2, 58},
-        {2, 5},   {2, 59},  {2, 60},  {2, 61},  {2, 62},  {2, 63},  {2, 64},  {2, 5},   {2, 65},
-        {2, 66},  {2, 67},  {2, 68},  {2, 5},   {1, 6},   {2, 69},  {2, 70},  {2, 71},  {2, 72},
-        {2, 73},  {2, 5},   {2, 74},  {2, 75},  {2, 76},  {2, 77},  {2, 78},  {2, 5},   {2, 79},
-        {2, 80},  {2, 81},  {2, 52},  {2, 82},  {2, 83},  {2, 84},  {1, 7},   {2, 85},  {2, 86},
-        {2, 5},   {2, 87},  {2, 45},  {2, 88},  {2, 89},  {2, 90},  {2, 91},  {2, 92},  {2, 93},
-        {1, 8},   {2, 94},  {2, 95},  {2, 96},  {2, 5},   {2, 97},  {2, 98},  {2, 99},  {2, 100},
-        {2, 101}, {2, 5},   {2, 102}, {2, 5},   {2, 103}, {2, 104}, {2, 84},  {2, 105}, {2, 106},
-        {2, 107}, {2, 108}, {2, 109}, {2, 110}, {2, 111}, {2, 112}, {2, 113}, {2, 5},   {2, 114},
-        {2, 115}, {2, 84},  {2, 116}, {2, 5},   {1, 9},   {2, 117}, {2, 118}, {2, 119}, {2, 5},
-        {2, 120}, {2, 121}, {2, 5},   {2, 122}, {2, 123}, {2, 124}, {2, 125}, {2, 126}, {2, 127},
-        {2, 128}, {2, 52},  {2, 129}, {2, 130}, {2, 131}, {2, 132}, {2, 133}, {2, 123}, {2, 134},
-        {2, 135}, {2, 136}, {2, 137}, {2, 138}, {2, 5},   {2, 139}, {2, 140}, {2, 141}, {2, 5},
-        {2, 142}, {2, 143}, {2, 144}, {2, 145}, {2, 146}, {2, 147}, {2, 5},   {2, 148}, {2, 149},
-        {2, 150}, {2, 151}, {2, 152}, {2, 153}, {2, 154}, {2, 52},  {2, 155}, {2, 156}, {2, 157},
-        {2, 158}, {2, 159}, {2, 160}, {2, 5},   {2, 161}, {2, 162}, {2, 163}, {2, 164}, {2, 165},
-        {2, 166}, {2, 167}, {2, 168}, {2, 169}, {2, 170}, {2, 5},   {2, 171}, {2, 172}, {2, 173},
-        {2, 174}, {2, 123}, {1, 10},  {2, 175}, {2, 176}, {2, 177}, {2, 99},  {2, 178}, {2, 179},
-        {2, 180}, {2, 181}, {2, 182}, {2, 183}, {2, 184}, {2, 185}, {2, 186}, {2, 187}, {2, 188},
-        {2, 189}, {2, 190}, {2, 191}, {2, 192}, {2, 193}, {2, 5},   {1, 11},  {2, 194}, {2, 195},
-        {2, 196}, {2, 197}, {2, 198}, {1, 12},  {2, 199}, {2, 200}, {2, 201}, {2, 202}, {2, 203},
-        {2, 204}, {2, 205}, {2, 206}, {2, 207}, {2, 208}, {2, 209}, {2, 210}, {2, 211}, {2, 212},
-        {2, 213}, {2, 214}, {2, 215}, {2, 205}, {2, 216}, {2, 217}, {2, 218}, {2, 219}, {2, 123},
-        {2, 220}, {2, 221}, {2, 52},  {2, 222}, {2, 223}, {2, 224}, {2, 225}, {2, 226}, {2, 227},
-        {2, 228}, {2, 229}, {2, 230}, {2, 231}, {2, 232}, {2, 233}, {2, 234}, {2, 235}, {2, 236},
-        {2, 237}, {2, 238}, {2, 239}, {2, 240}, {2, 241}, {2, 242}, {2, 5},   {2, 243}, {2, 244},
-        {2, 245}, {2, 188}, {2, 246}, {2, 247}, {2, 248}, {2, 5},   {2, 249}, {2, 250}, {2, 251},
-        {2, 252}, {2, 253}, {2, 254}, {2, 255}, {2, 256}, {2, 5},   {1, 13},  {2, 257}, {2, 258},
-        {2, 259}, {2, 260}, {2, 261}, {2, 52},  {2, 262}, {2, 61},  {2, 263}, {2, 264}, {2, 5},
-        {2, 265}, {2, 266}, {2, 267}, {2, 268}, {2, 225}, {2, 269}, {2, 270}, {2, 271}, {2, 272},
-        {2, 273}, {2, 274}, {2, 5},   {2, 184}, {2, 275}, {2, 276}, {2, 277}, {2, 278}, {2, 99},
-        {2, 279}, {2, 280}, {2, 140}, {2, 281}, {2, 282}, {2, 283}, {2, 284}, {2, 285}, {2, 140},
-        {2, 286}, {2, 287}, {2, 288}, {2, 289}, {2, 290}, {2, 52},  {2, 291}, {2, 292}, {2, 293},
-        {2, 294}, {2, 5},   {0, 0},   {2, 295}, {0, 0},   {0, 0},   {0, 0},   {0, 0},
+        0,   -1,  1,   1,   0,   2,   0,   3,   4,   5,   6,   7,   8,   6,   9,   0,   10,  0,
+        0,   0,   0,   11,  0,   12,  0,   0,   0,   13,  0,   0,   14,  15,  16,  17,  17,  18,
+        19,  20,  0,   21,  0,   -2,  22,  23,  24,  25,  25,  26,  27,  27,  -3,  -4,  28,  29,
+        29,  0,   0,   0,   30,  31,  -5,  -5,  0,   0,   32,  33,  0,   0,   34,  0,   35,  0,
+        36,  0,   0,   37,  38,  0,   39,  40,  41,  42,  43,  0,   6,   0,   0,   44,  0,   0,
+        45,  46,  6,   47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  6,   -6,  58,  59,
+        6,   60,  61,  62,  63,  64,  65,  6,   66,  67,  68,  69,  6,   -7,  70,  71,  72,  73,
+        74,  6,   75,  76,  77,  78,  79,  6,   80,  81,  82,  53,  83,  84,  85,  -8,  86,  87,
+        6,   88,  46,  89,  90,  91,  92,  93,  94,  -9,  95,  96,  97,  6,   98,  99,  100, 101,
+        102, 6,   103, 6,   104, 105, 85,  106, 107, 108, 109, 110, 111, 112, 113, 114, 6,   115,
+        116, 85,  117, 6,   -10, 118, 119, 120, 6,   121, 122, 6,   123, 124, 125, 126, 127, 128,
+        129, 53,  130, 131, 132, 133, 134, 124, 135, 136, 137, 138, 139, 6,   140, 141, 142, 6,
+        143, 144, 145, 146, 147, 148, 6,   149, 150, 151, 152, 153, 154, 155, 53,  156, 157, 158,
+        159, 160, 161, 6,   162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 6,   172, 173, 174,
+        175, 124, -11, 176, 177, 178, 100, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
+        190, 191, 192, 193, 194, 6,   -12, 195, 196, 197, 198, 199, -13, 200, 201, 202, 203, 204,
+        205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 206, 217, 218, 219, 220, 124,
+        221, 222, 53,  223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
+        238, 239, 240, 241, 242, 243, 6,   244, 245, 246, 189, 247, 248, 249, 6,   250, 251, 252,
+        253, 254, 255, 256, 257, 6,   -14, 258, 259, 260, 261, 262, 53,  263, 62,  264, 265, 6,
+        266, 267, 268, 269, 226, 270, 271, 272, 273, 274, 275, 6,   185, 276, 277, 278, 279, 100,
+        280, 281, 141, 282, 283, 284, 285, 286, 141, 287, 288, 289, 290, 291, 53,  292, 293, 294,
+        295, 6,   0,   296, 0,   0,   0,   0,
 };
 State get_transition(int transition, int state) {
     IndexEntry index = kIndices[state];
-    if (index.type == 0) {
-        return 0;
+    if (index < 0) {
+        return kFull[~index].data[transition];
     }
-    if (index.type == 1) {
-        return kFull[index.pos].data[transition];
-    }
-    const CompactEntry& entry = kCompact[index.pos];
+    const CompactEntry& entry = kCompact[index];
     int value = entry.data[transition >> 2];
     value >>= 2 * (transition & 3);
     value &= 3;
@@ -2035,20 +2013,17 @@
     // tokens. Our grammar doesn't have this property, so we can simplify the logic
     // a bit.
     int32_t startOffset = fOffset;
-    if (startOffset == (int32_t)fText.length()) {
-        return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0);
-    }
     State state = 1;
     for (;;) {
         if (fOffset >= (int32_t)fText.length()) {
-            if (kAccepts[state] == -1) {
+            if (startOffset == (int32_t)fText.length() || kAccepts[state] == -1) {
                 return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0);
             }
             break;
         }
-        uint8_t c = (uint8_t)fText[fOffset];
-        if (c <= 8 || c >= 127) {
-            c = INVALID_CHAR;
+        uint8_t c = (uint8_t)(fText[fOffset] - 9);
+        if (c >= 118) {
+            c = kInvalidChar;
         }
         State newState = get_transition(kMappings[c], state);
         if (!newState) {
diff --git a/src/sksl/lex/Main.cpp b/src/sksl/lex/Main.cpp
index 738e386..ab4e3a6 100644
--- a/src/sksl/lex/Main.cpp
+++ b/src/sksl/lex/Main.cpp
@@ -116,13 +116,24 @@
         states = std::max(states, row.size());
     }
     out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n";
-    // arbitrarily-chosen character which is greater than START_CHAR and should not appear in actual
-    // input
-    out << "static const uint8_t INVALID_CHAR = 18;";
-    out << "static const int8_t kMappings[" << dfa.fCharMappings.size() << "] = {\n    ";
+
+    // Find the first character mapped in our DFA.
+    size_t startChar = 0;
+    for (; startChar < dfa.fCharMappings.size(); ++startChar) {
+        if (dfa.fCharMappings[startChar] != 0) {
+            break;
+        }
+    }
+
+    // Arbitrarily-chosen character which is greater than startChar, and should not appear in actual
+    // input.
+    SkASSERT(startChar < 18);
+    out << "static constexpr uint8_t kInvalidChar = 18;";
+    out << "static constexpr int8_t kMappings[" << dfa.fCharMappings.size() - startChar << "] = {\n"
+           "    ";
     const char* separator = "";
-    for (int m : dfa.fCharMappings) {
-        out << separator << std::to_string(m);
+    for (size_t index = startChar; index < dfa.fCharMappings.size(); ++index) {
+        out << separator << std::to_string(dfa.fCharMappings[index]);
         separator = ", ";
     }
     out << "\n};\n";
@@ -148,20 +159,17 @@
     // tokens. Our grammar doesn't have this property, so we can simplify the logic
     // a bit.
     int32_t startOffset = fOffset;
-    if (startOffset == (int32_t)fText.length()) {
-        return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0);
-    }
-    State state = 1;
+    State   state = 1;
     for (;;) {
         if (fOffset >= (int32_t)fText.length()) {
-            if (kAccepts[state] == -1) {
-                return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0);
+            if (startOffset == (int32_t)fText.length() || kAccepts[state] == -1) {
+                return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0);
             }
             break;
         }
-        uint8_t c = (uint8_t) fText[fOffset];
-        if (c <= 8 || c >= )" << dfa.fCharMappings.size() << R"() {
-            c = INVALID_CHAR;
+        uint8_t c = (uint8_t)(fText[fOffset] - )" << startChar << R"();
+        if (c >= )" << dfa.fCharMappings.size() - startChar << R"() {
+            c = kInvalidChar;
         }
         State newState = get_transition(kMappings[c], state);
         if (!newState) {
diff --git a/src/sksl/lex/TransitionTable.cpp b/src/sksl/lex/TransitionTable.cpp
index 6874eb5..9e9fb25 100644
--- a/src/sksl/lex/TransitionTable.cpp
+++ b/src/sksl/lex/TransitionTable.cpp
@@ -32,9 +32,8 @@
 constexpr int kDataPerByte = 8 / kNumBits;
 
 enum IndexType {
-    kZero = 0,
+    kCompactEntry = 0,
     kFullEntry,
-    kCompactEntry,
 };
 struct IndexEntry {
     IndexType type;
@@ -130,11 +129,7 @@
         }
 
         transitionSet.erase(0);
-        if (transitionSet.empty()) {
-            // This transition table was completely empty (every value was zero). No data needed;
-            // zero pages are handled as a special index type.
-            indices.push_back(IndexEntry{kZero, 0});
-        } else if (transitionSet.size() <= kNumValues) {
+        if (transitionSet.size() <= kNumValues) {
             // This table only contained a small number of unique nonzero values.
             // Use a compact representation that squishes each value down to a few bits.
             int index = add_compact_entry(transitionSet, data, &compactEntries);
@@ -155,10 +150,7 @@
     }
 
     // Emit all the structs our transition table will use.
-    out << "struct IndexEntry {\n"
-        << "    uint16_t type : 2;\n"
-        << "    uint16_t pos : 14;\n"
-        << "};\n"
+    out << "using IndexEntry = int16_t;\n"
         << "struct FullEntry {\n"
         << "    State data[" << numTransitions << "];\n"
         << "};\n";
@@ -216,14 +208,19 @@
     out << "};\n"
         << "static constexpr IndexEntry kIndices[] = {\n";
     for (const IndexEntry& entry : indices) {
-        out << "    {" << entry.type << ", " << entry.pos << "},\n";
+        if (entry.type == kFullEntry) {
+            // Bit-not is used so that full entries start at -1 and go down from there.
+            out << ~entry.pos << ", ";
+        } else {
+            // Compact entries start at 0 and go up from there.
+            out << entry.pos << ", ";
+        }
     }
     out << "};\n"
         << "State get_transition(int transition, int state) {\n"
         << "    IndexEntry index = kIndices[state];\n"
-        << "    if (index.type == 0) { return 0; }\n"
-        << "    if (index.type == 1) { return kFull[index.pos].data[transition]; }\n"
-        << "    const CompactEntry& entry = kCompact[index.pos];\n"
+        << "    if (index < 0) { return kFull[~index].data[transition]; }\n"
+        << "    const CompactEntry& entry = kCompact[index];\n"
         << "    int value = entry.data[transition >> " << std::log2(kDataPerByte) << "];\n"
         << "    value >>= " << kNumBits << " * (transition & " << kDataPerByte - 1 << ");\n"
         << "    value &= " << kNumValues << ";\n"