Simplify lexer transition code.
This removes an if-check from the top of get_transition, removes an if-
check from the top of Lexer::next(), simplifies a bounds check, and
removes bitfields from the index array. Disappointingly, on my machine,
I can't measure any change at all; `get_transition` and `next`
stubbornly remain at about 4-5% of total nanobench time for
`sksl_large`. However, it's still simpler and hopefully slightly smaller
code.
Change-Id: If4187c01f350fe642b7af7cb6bd2c8250ca3c00e
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/548396
Auto-Submit: John Stiles <johnstiles@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/src/sksl/SkSLLexer.cpp b/src/sksl/SkSLLexer.cpp
index 44a4de8..76a3017 100644
--- a/src/sksl/SkSLLexer.cpp
+++ b/src/sksl/SkSLLexer.cpp
@@ -12,18 +12,15 @@
namespace SkSL {
using State = uint16_t;
-static const uint8_t INVALID_CHAR = 18;
-static const int8_t kMappings[127] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 4, 3, 5, 6, 7, 8, 3, 9, 10, 11, 12,
- 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 22, 22, 23, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 31, 32, 33, 34, 31, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 35, 38, 35, 35,
- 39, 35, 35, 40, 3, 41, 42, 43, 3, 44, 45, 46, 47, 48, 49, 50, 51, 52, 35, 53, 54, 55,
- 56, 57, 58, 35, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71};
-struct IndexEntry {
- uint16_t type : 2;
- uint16_t pos : 14;
-};
+static constexpr uint8_t kInvalidChar = 18;
+static constexpr int8_t kMappings[118] = {
+ 1, 2, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 1, 4, 3, 5, 6, 7, 8, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 22, 22, 23, 23, 24, 25, 26, 27, 28, 29, 30, 31, 31, 32, 33,
+ 34, 31, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 35, 38, 35, 35, 39,
+ 35, 35, 40, 3, 41, 42, 43, 3, 44, 45, 46, 47, 48, 49, 50, 51, 52, 35, 53, 54,
+ 55, 56, 57, 58, 35, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71};
+using IndexEntry = int16_t;
struct FullEntry {
State data[72];
};
@@ -119,6 +116,12 @@
static constexpr CompactEntry kCompact[] = {
{0,
0,
+ 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ }},
+ {0,
+ 0,
3,
{
60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1946,61 +1949,36 @@
}},
};
static constexpr IndexEntry kIndices[] = {
- {0, 0}, {1, 0}, {2, 0}, {2, 0}, {0, 0}, {2, 1}, {0, 0}, {2, 2}, {2, 3},
- {2, 4}, {2, 5}, {2, 6}, {2, 7}, {2, 5}, {2, 8}, {0, 0}, {2, 9}, {0, 0},
- {0, 0}, {0, 0}, {0, 0}, {2, 10}, {0, 0}, {2, 11}, {0, 0}, {0, 0}, {0, 0},
- {2, 12}, {0, 0}, {0, 0}, {2, 13}, {2, 14}, {2, 15}, {2, 16}, {2, 16}, {2, 17},
- {2, 18}, {2, 19}, {0, 0}, {2, 20}, {0, 0}, {1, 1}, {2, 21}, {2, 22}, {2, 23},
- {2, 24}, {2, 24}, {2, 25}, {2, 26}, {2, 26}, {1, 2}, {1, 3}, {2, 27}, {2, 28},
- {2, 28}, {0, 0}, {0, 0}, {0, 0}, {2, 29}, {2, 30}, {1, 4}, {1, 4}, {0, 0},
- {0, 0}, {2, 31}, {2, 32}, {0, 0}, {0, 0}, {2, 33}, {0, 0}, {2, 34}, {0, 0},
- {2, 35}, {0, 0}, {0, 0}, {2, 36}, {2, 37}, {0, 0}, {2, 38}, {2, 39}, {2, 40},
- {2, 41}, {2, 42}, {0, 0}, {2, 5}, {0, 0}, {0, 0}, {2, 43}, {0, 0}, {0, 0},
- {2, 44}, {2, 45}, {2, 5}, {2, 46}, {2, 47}, {2, 48}, {2, 49}, {2, 50}, {2, 51},
- {2, 52}, {2, 53}, {2, 54}, {2, 55}, {2, 56}, {2, 5}, {1, 5}, {2, 57}, {2, 58},
- {2, 5}, {2, 59}, {2, 60}, {2, 61}, {2, 62}, {2, 63}, {2, 64}, {2, 5}, {2, 65},
- {2, 66}, {2, 67}, {2, 68}, {2, 5}, {1, 6}, {2, 69}, {2, 70}, {2, 71}, {2, 72},
- {2, 73}, {2, 5}, {2, 74}, {2, 75}, {2, 76}, {2, 77}, {2, 78}, {2, 5}, {2, 79},
- {2, 80}, {2, 81}, {2, 52}, {2, 82}, {2, 83}, {2, 84}, {1, 7}, {2, 85}, {2, 86},
- {2, 5}, {2, 87}, {2, 45}, {2, 88}, {2, 89}, {2, 90}, {2, 91}, {2, 92}, {2, 93},
- {1, 8}, {2, 94}, {2, 95}, {2, 96}, {2, 5}, {2, 97}, {2, 98}, {2, 99}, {2, 100},
- {2, 101}, {2, 5}, {2, 102}, {2, 5}, {2, 103}, {2, 104}, {2, 84}, {2, 105}, {2, 106},
- {2, 107}, {2, 108}, {2, 109}, {2, 110}, {2, 111}, {2, 112}, {2, 113}, {2, 5}, {2, 114},
- {2, 115}, {2, 84}, {2, 116}, {2, 5}, {1, 9}, {2, 117}, {2, 118}, {2, 119}, {2, 5},
- {2, 120}, {2, 121}, {2, 5}, {2, 122}, {2, 123}, {2, 124}, {2, 125}, {2, 126}, {2, 127},
- {2, 128}, {2, 52}, {2, 129}, {2, 130}, {2, 131}, {2, 132}, {2, 133}, {2, 123}, {2, 134},
- {2, 135}, {2, 136}, {2, 137}, {2, 138}, {2, 5}, {2, 139}, {2, 140}, {2, 141}, {2, 5},
- {2, 142}, {2, 143}, {2, 144}, {2, 145}, {2, 146}, {2, 147}, {2, 5}, {2, 148}, {2, 149},
- {2, 150}, {2, 151}, {2, 152}, {2, 153}, {2, 154}, {2, 52}, {2, 155}, {2, 156}, {2, 157},
- {2, 158}, {2, 159}, {2, 160}, {2, 5}, {2, 161}, {2, 162}, {2, 163}, {2, 164}, {2, 165},
- {2, 166}, {2, 167}, {2, 168}, {2, 169}, {2, 170}, {2, 5}, {2, 171}, {2, 172}, {2, 173},
- {2, 174}, {2, 123}, {1, 10}, {2, 175}, {2, 176}, {2, 177}, {2, 99}, {2, 178}, {2, 179},
- {2, 180}, {2, 181}, {2, 182}, {2, 183}, {2, 184}, {2, 185}, {2, 186}, {2, 187}, {2, 188},
- {2, 189}, {2, 190}, {2, 191}, {2, 192}, {2, 193}, {2, 5}, {1, 11}, {2, 194}, {2, 195},
- {2, 196}, {2, 197}, {2, 198}, {1, 12}, {2, 199}, {2, 200}, {2, 201}, {2, 202}, {2, 203},
- {2, 204}, {2, 205}, {2, 206}, {2, 207}, {2, 208}, {2, 209}, {2, 210}, {2, 211}, {2, 212},
- {2, 213}, {2, 214}, {2, 215}, {2, 205}, {2, 216}, {2, 217}, {2, 218}, {2, 219}, {2, 123},
- {2, 220}, {2, 221}, {2, 52}, {2, 222}, {2, 223}, {2, 224}, {2, 225}, {2, 226}, {2, 227},
- {2, 228}, {2, 229}, {2, 230}, {2, 231}, {2, 232}, {2, 233}, {2, 234}, {2, 235}, {2, 236},
- {2, 237}, {2, 238}, {2, 239}, {2, 240}, {2, 241}, {2, 242}, {2, 5}, {2, 243}, {2, 244},
- {2, 245}, {2, 188}, {2, 246}, {2, 247}, {2, 248}, {2, 5}, {2, 249}, {2, 250}, {2, 251},
- {2, 252}, {2, 253}, {2, 254}, {2, 255}, {2, 256}, {2, 5}, {1, 13}, {2, 257}, {2, 258},
- {2, 259}, {2, 260}, {2, 261}, {2, 52}, {2, 262}, {2, 61}, {2, 263}, {2, 264}, {2, 5},
- {2, 265}, {2, 266}, {2, 267}, {2, 268}, {2, 225}, {2, 269}, {2, 270}, {2, 271}, {2, 272},
- {2, 273}, {2, 274}, {2, 5}, {2, 184}, {2, 275}, {2, 276}, {2, 277}, {2, 278}, {2, 99},
- {2, 279}, {2, 280}, {2, 140}, {2, 281}, {2, 282}, {2, 283}, {2, 284}, {2, 285}, {2, 140},
- {2, 286}, {2, 287}, {2, 288}, {2, 289}, {2, 290}, {2, 52}, {2, 291}, {2, 292}, {2, 293},
- {2, 294}, {2, 5}, {0, 0}, {2, 295}, {0, 0}, {0, 0}, {0, 0}, {0, 0},
+ 0, -1, 1, 1, 0, 2, 0, 3, 4, 5, 6, 7, 8, 6, 9, 0, 10, 0,
+ 0, 0, 0, 11, 0, 12, 0, 0, 0, 13, 0, 0, 14, 15, 16, 17, 17, 18,
+ 19, 20, 0, 21, 0, -2, 22, 23, 24, 25, 25, 26, 27, 27, -3, -4, 28, 29,
+ 29, 0, 0, 0, 30, 31, -5, -5, 0, 0, 32, 33, 0, 0, 34, 0, 35, 0,
+ 36, 0, 0, 37, 38, 0, 39, 40, 41, 42, 43, 0, 6, 0, 0, 44, 0, 0,
+ 45, 46, 6, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 6, -6, 58, 59,
+ 6, 60, 61, 62, 63, 64, 65, 6, 66, 67, 68, 69, 6, -7, 70, 71, 72, 73,
+ 74, 6, 75, 76, 77, 78, 79, 6, 80, 81, 82, 53, 83, 84, 85, -8, 86, 87,
+ 6, 88, 46, 89, 90, 91, 92, 93, 94, -9, 95, 96, 97, 6, 98, 99, 100, 101,
+ 102, 6, 103, 6, 104, 105, 85, 106, 107, 108, 109, 110, 111, 112, 113, 114, 6, 115,
+ 116, 85, 117, 6, -10, 118, 119, 120, 6, 121, 122, 6, 123, 124, 125, 126, 127, 128,
+ 129, 53, 130, 131, 132, 133, 134, 124, 135, 136, 137, 138, 139, 6, 140, 141, 142, 6,
+ 143, 144, 145, 146, 147, 148, 6, 149, 150, 151, 152, 153, 154, 155, 53, 156, 157, 158,
+ 159, 160, 161, 6, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 6, 172, 173, 174,
+ 175, 124, -11, 176, 177, 178, 100, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
+ 190, 191, 192, 193, 194, 6, -12, 195, 196, 197, 198, 199, -13, 200, 201, 202, 203, 204,
+ 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 206, 217, 218, 219, 220, 124,
+ 221, 222, 53, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
+ 238, 239, 240, 241, 242, 243, 6, 244, 245, 246, 189, 247, 248, 249, 6, 250, 251, 252,
+ 253, 254, 255, 256, 257, 6, -14, 258, 259, 260, 261, 262, 53, 263, 62, 264, 265, 6,
+ 266, 267, 268, 269, 226, 270, 271, 272, 273, 274, 275, 6, 185, 276, 277, 278, 279, 100,
+ 280, 281, 141, 282, 283, 284, 285, 286, 141, 287, 288, 289, 290, 291, 53, 292, 293, 294,
+ 295, 6, 0, 296, 0, 0, 0, 0,
};
State get_transition(int transition, int state) {
IndexEntry index = kIndices[state];
- if (index.type == 0) {
- return 0;
+ if (index < 0) {
+ return kFull[~index].data[transition];
}
- if (index.type == 1) {
- return kFull[index.pos].data[transition];
- }
- const CompactEntry& entry = kCompact[index.pos];
+ const CompactEntry& entry = kCompact[index];
int value = entry.data[transition >> 2];
value >>= 2 * (transition & 3);
value &= 3;
@@ -2035,20 +2013,17 @@
// tokens. Our grammar doesn't have this property, so we can simplify the logic
// a bit.
int32_t startOffset = fOffset;
- if (startOffset == (int32_t)fText.length()) {
- return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0);
- }
State state = 1;
for (;;) {
if (fOffset >= (int32_t)fText.length()) {
- if (kAccepts[state] == -1) {
+ if (startOffset == (int32_t)fText.length() || kAccepts[state] == -1) {
return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0);
}
break;
}
- uint8_t c = (uint8_t)fText[fOffset];
- if (c <= 8 || c >= 127) {
- c = INVALID_CHAR;
+ uint8_t c = (uint8_t)(fText[fOffset] - 9);
+ if (c >= 118) {
+ c = kInvalidChar;
}
State newState = get_transition(kMappings[c], state);
if (!newState) {
diff --git a/src/sksl/lex/Main.cpp b/src/sksl/lex/Main.cpp
index 738e386..ab4e3a6 100644
--- a/src/sksl/lex/Main.cpp
+++ b/src/sksl/lex/Main.cpp
@@ -116,13 +116,24 @@
states = std::max(states, row.size());
}
out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n";
- // arbitrarily-chosen character which is greater than START_CHAR and should not appear in actual
- // input
- out << "static const uint8_t INVALID_CHAR = 18;";
- out << "static const int8_t kMappings[" << dfa.fCharMappings.size() << "] = {\n ";
+
+ // Find the first character mapped in our DFA.
+ size_t startChar = 0;
+ for (; startChar < dfa.fCharMappings.size(); ++startChar) {
+ if (dfa.fCharMappings[startChar] != 0) {
+ break;
+ }
+ }
+
+ // Arbitrarily-chosen character which is greater than startChar, and should not appear in actual
+ // input.
+ SkASSERT(startChar < 18);
+ out << "static constexpr uint8_t kInvalidChar = 18;";
+ out << "static constexpr int8_t kMappings[" << dfa.fCharMappings.size() - startChar << "] = {\n"
+ " ";
const char* separator = "";
- for (int m : dfa.fCharMappings) {
- out << separator << std::to_string(m);
+ for (size_t index = startChar; index < dfa.fCharMappings.size(); ++index) {
+ out << separator << std::to_string(dfa.fCharMappings[index]);
separator = ", ";
}
out << "\n};\n";
@@ -148,20 +159,17 @@
// tokens. Our grammar doesn't have this property, so we can simplify the logic
// a bit.
int32_t startOffset = fOffset;
- if (startOffset == (int32_t)fText.length()) {
- return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0);
- }
- State state = 1;
+ State state = 1;
for (;;) {
if (fOffset >= (int32_t)fText.length()) {
- if (kAccepts[state] == -1) {
- return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0);
+ if (startOffset == (int32_t)fText.length() || kAccepts[state] == -1) {
+ return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0);
}
break;
}
- uint8_t c = (uint8_t) fText[fOffset];
- if (c <= 8 || c >= )" << dfa.fCharMappings.size() << R"() {
- c = INVALID_CHAR;
+ uint8_t c = (uint8_t)(fText[fOffset] - )" << startChar << R"();
+ if (c >= )" << dfa.fCharMappings.size() - startChar << R"() {
+ c = kInvalidChar;
}
State newState = get_transition(kMappings[c], state);
if (!newState) {
diff --git a/src/sksl/lex/TransitionTable.cpp b/src/sksl/lex/TransitionTable.cpp
index 6874eb5..9e9fb25 100644
--- a/src/sksl/lex/TransitionTable.cpp
+++ b/src/sksl/lex/TransitionTable.cpp
@@ -32,9 +32,8 @@
constexpr int kDataPerByte = 8 / kNumBits;
enum IndexType {
- kZero = 0,
+ kCompactEntry = 0,
kFullEntry,
- kCompactEntry,
};
struct IndexEntry {
IndexType type;
@@ -130,11 +129,7 @@
}
transitionSet.erase(0);
- if (transitionSet.empty()) {
- // This transition table was completely empty (every value was zero). No data needed;
- // zero pages are handled as a special index type.
- indices.push_back(IndexEntry{kZero, 0});
- } else if (transitionSet.size() <= kNumValues) {
+ if (transitionSet.size() <= kNumValues) {
// This table only contained a small number of unique nonzero values.
// Use a compact representation that squishes each value down to a few bits.
int index = add_compact_entry(transitionSet, data, &compactEntries);
@@ -155,10 +150,7 @@
}
// Emit all the structs our transition table will use.
- out << "struct IndexEntry {\n"
- << " uint16_t type : 2;\n"
- << " uint16_t pos : 14;\n"
- << "};\n"
+ out << "using IndexEntry = int16_t;\n"
<< "struct FullEntry {\n"
<< " State data[" << numTransitions << "];\n"
<< "};\n";
@@ -216,14 +208,19 @@
out << "};\n"
<< "static constexpr IndexEntry kIndices[] = {\n";
for (const IndexEntry& entry : indices) {
- out << " {" << entry.type << ", " << entry.pos << "},\n";
+ if (entry.type == kFullEntry) {
+ // Bit-not is used so that full entries start at -1 and go down from there.
+ out << ~entry.pos << ", ";
+ } else {
+ // Compact entries start at 0 and go up from there.
+ out << entry.pos << ", ";
+ }
}
out << "};\n"
<< "State get_transition(int transition, int state) {\n"
<< " IndexEntry index = kIndices[state];\n"
- << " if (index.type == 0) { return 0; }\n"
- << " if (index.type == 1) { return kFull[index.pos].data[transition]; }\n"
- << " const CompactEntry& entry = kCompact[index.pos];\n"
+ << " if (index < 0) { return kFull[~index].data[transition]; }\n"
+ << " const CompactEntry& entry = kCompact[index];\n"
<< " int value = entry.data[transition >> " << std::log2(kDataPerByte) << "];\n"
<< " value >>= " << kNumBits << " * (transition & " << kDataPerByte - 1 << ");\n"
<< " value &= " << kNumValues << ";\n"