Merge changes Icc10bf0a,Ifb5ab4eb
* changes:
tp: introduce tokenizer for SQLite statements
perfetto: add payload APIs to base::Status
diff --git a/Android.bp b/Android.bp
index b65d3fe..7265be2 100644
--- a/Android.bp
+++ b/Android.bp
@@ -8425,6 +8425,7 @@
"src/base/scoped_file_unittest.cc",
"src/base/small_vector_unittest.cc",
"src/base/status_or_unittest.cc",
+ "src/base/status_unittest.cc",
"src/base/string_splitter_unittest.cc",
"src/base/string_utils_unittest.cc",
"src/base/string_view_unittest.cc",
@@ -10379,6 +10380,7 @@
"src/trace_processor/sqlite/sql_stats_table.cc",
"src/trace_processor/sqlite/sqlite_engine.cc",
"src/trace_processor/sqlite/sqlite_table.cc",
+ "src/trace_processor/sqlite/sqlite_tokenizer.cc",
"src/trace_processor/sqlite/sqlite_utils.cc",
"src/trace_processor/sqlite/stats_table.cc",
],
@@ -10390,6 +10392,7 @@
srcs: [
"src/trace_processor/sqlite/db_sqlite_table_unittest.cc",
"src/trace_processor/sqlite/query_constraints_unittest.cc",
+ "src/trace_processor/sqlite/sqlite_tokenizer_unittest.cc",
"src/trace_processor/sqlite/sqlite_utils_unittest.cc",
],
}
diff --git a/BUILD b/BUILD
index a62edb8..f819fce 100644
--- a/BUILD
+++ b/BUILD
@@ -2174,6 +2174,8 @@
"src/trace_processor/sqlite/sqlite_engine.h",
"src/trace_processor/sqlite/sqlite_table.cc",
"src/trace_processor/sqlite/sqlite_table.h",
+ "src/trace_processor/sqlite/sqlite_tokenizer.cc",
+ "src/trace_processor/sqlite/sqlite_tokenizer.h",
"src/trace_processor/sqlite/sqlite_utils.cc",
"src/trace_processor/sqlite/sqlite_utils.h",
"src/trace_processor/sqlite/stats_table.cc",
diff --git a/include/perfetto/base/status.h b/include/perfetto/base/status.h
index 2939357..f059184 100644
--- a/include/perfetto/base/status.h
+++ b/include/perfetto/base/status.h
@@ -17,7 +17,10 @@
#ifndef INCLUDE_PERFETTO_BASE_STATUS_H_
#define INCLUDE_PERFETTO_BASE_STATUS_H_
+#include <optional>
#include <string>
+#include <string_view>
+#include <vector>
#include "perfetto/base/compiler.h"
#include "perfetto/base/export.h"
@@ -30,6 +33,10 @@
// This can used as the return type of functions which would usually return an
// bool for success or int for errno but also wants to add some string context
// (ususally for logging).
+//
+// Similar to absl::Status, an optional "payload" can also be included with more
+// context about the error. This allows passing additional metadata about the
+// error (e.g. location of errors, potential mitigations etc).
class PERFETTO_EXPORT_COMPONENT Status {
public:
Status() : ok_(true) {}
@@ -52,9 +59,49 @@
const std::string& message() const { return message_; }
const char* c_message() const { return message_.c_str(); }
+ //////////////////////////////////////////////////////////////////////////////
+ // Payload Management APIs
+ //////////////////////////////////////////////////////////////////////////////
+
+ // Payloads can be attached to error statuses to provide additional context.
+ //
+ // Payloads are (key, value) pairs, where the key is a string acting as a
+ // unique "type URL" and the value is an opaque string. The "type URL" should
+ // be unique, follow the format of a URL and, ideally, documentation on how to
+ // interpret its associated data should be available.
+ //
+ // To attach a payload to a status object, call `Status::SetPayload()`.
+ // Similarly, to extract the payload from a status, call
+ // `Status::GetPayload()`.
+ //
+ // Note: the payload APIs are only meaningful to call when the status is an
+ // error. Otherwise, all methods are noops.
+
+ // Gets the payload for the given |type_url| if one exists.
+ //
+ // Will always return std::nullopt if |ok()|.
+ std::optional<std::string_view> GetPayload(std::string_view type_url);
+
+ // Sets the payload for the given key. The key should
+ //
+ // Will always do nothing if |ok()|.
+ void SetPayload(std::string_view type_url, std::string value);
+
+ // Erases the payload for the given string and returns true if the payload
+ // existed and was erased.
+ //
+ // Will always do nothing if |ok()|.
+ bool ErasePayload(std::string_view type_url);
+
private:
+ struct Payload {
+ std::string type_url;
+ std::string payload;
+ };
+
bool ok_ = false;
std::string message_;
+ std::vector<Payload> payloads_;
};
// Returns a status object which represents the Ok status.
diff --git a/src/base/BUILD.gn b/src/base/BUILD.gn
index 8d48bf0..7dfce6b 100644
--- a/src/base/BUILD.gn
+++ b/src/base/BUILD.gn
@@ -199,6 +199,7 @@
"scoped_file_unittest.cc",
"small_vector_unittest.cc",
"status_or_unittest.cc",
+ "status_unittest.cc",
"string_splitter_unittest.cc",
"string_utils_unittest.cc",
"string_view_unittest.cc",
diff --git a/src/base/status.cc b/src/base/status.cc
index 30ccc47..d3c13e8 100644
--- a/src/base/status.cc
+++ b/src/base/status.cc
@@ -17,6 +17,7 @@
#include "perfetto/base/status.h"
#include <stdarg.h>
+#include <algorithm>
namespace perfetto {
namespace base {
@@ -31,5 +32,42 @@
return status;
}
+std::optional<std::string_view> Status::GetPayload(std::string_view type_url) {
+ if (ok()) {
+ return std::nullopt;
+ }
+ for (const auto& kv : payloads_) {
+ if (kv.type_url == type_url) {
+ return kv.payload;
+ }
+ }
+ return std::nullopt;
+}
+
+void Status::SetPayload(std::string_view type_url, std::string value) {
+ if (ok()) {
+ return;
+ }
+ for (auto& kv : payloads_) {
+ if (kv.type_url == type_url) {
+ kv.payload = value;
+ return;
+ }
+ }
+ payloads_.push_back(Payload{std::string(type_url), std::move(value)});
+}
+
+bool Status::ErasePayload(std::string_view type_url) {
+ if (ok()) {
+ return false;
+ }
+ auto it = std::remove_if(
+ payloads_.begin(), payloads_.end(),
+ [type_url](const Payload& p) { return p.type_url == type_url; });
+ bool erased = it != payloads_.end();
+ payloads_.erase(it, payloads_.end());
+ return erased;
+}
+
} // namespace base
} // namespace perfetto
diff --git a/src/base/status_unittest.cc b/src/base/status_unittest.cc
new file mode 100644
index 0000000..df42b31
--- /dev/null
+++ b/src/base/status_unittest.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/base/status.h"
+
+#include "test/gtest_and_gmock.h"
+
+namespace perfetto {
+namespace base {
+
+TEST(StatusTest, GetMissingPayload) {
+ base::Status status = base::ErrStatus("Error");
+ ASSERT_EQ(status.GetPayload("test.foo.com/bar"), std::nullopt);
+}
+
+TEST(StatusTest, SetThenGetPayload) {
+ base::Status status = base::ErrStatus("Error");
+ status.SetPayload("test.foo.com/bar", "payload_value");
+ ASSERT_EQ(status.GetPayload("test.foo.com/bar"), "payload_value");
+}
+
+TEST(StatusTest, SetEraseGetPayload) {
+ base::Status status = base::ErrStatus("Error");
+ status.SetPayload("test.foo.com/bar", "payload_value");
+ ASSERT_TRUE(status.ErasePayload("test.foo.com/bar"));
+ ASSERT_EQ(status.GetPayload("test.foo.com/bar"), std::nullopt);
+}
+
+TEST(StatusTest, SetOverride) {
+ base::Status status = base::ErrStatus("Error");
+ status.SetPayload("test.foo.com/bar", "payload_value");
+ status.SetPayload("test.foo.com/bar", "other_value");
+ ASSERT_EQ(status.GetPayload("test.foo.com/bar"), "other_value");
+}
+
+TEST(StatusTest, SetGetOk) {
+ base::Status status = base::OkStatus();
+ status.SetPayload("test.foo.com/bar", "payload_value");
+ ASSERT_EQ(status.GetPayload("test.foo.com/bar"), std::nullopt);
+}
+
+TEST(StatusTest, SetMultipleAndDuplicate) {
+ base::Status status = base::ErrStatus("Error");
+ status.SetPayload("test.foo.com/bar", "payload_value");
+ status.SetPayload("test.foo.com/bar1", "1");
+ status.SetPayload("test.foo.com/bar2", "2");
+ status.SetPayload("test.foo.com/bar", "other_value");
+ ASSERT_EQ(status.GetPayload("test.foo.com/bar"), "other_value");
+ ASSERT_EQ(status.GetPayload("test.foo.com/bar1"), "1");
+ ASSERT_EQ(status.GetPayload("test.foo.com/bar2"), "2");
+}
+
+} // namespace base
+} // namespace perfetto
diff --git a/src/trace_processor/sqlite/BUILD.gn b/src/trace_processor/sqlite/BUILD.gn
index 7045815..b35605a 100644
--- a/src/trace_processor/sqlite/BUILD.gn
+++ b/src/trace_processor/sqlite/BUILD.gn
@@ -28,6 +28,8 @@
"sqlite_engine.h",
"sqlite_table.cc",
"sqlite_table.h",
+ "sqlite_tokenizer.cc",
+ "sqlite_tokenizer.h",
"sqlite_utils.cc",
"sqlite_utils.h",
"sqlite_utils.h",
@@ -72,6 +74,7 @@
sources = [
"db_sqlite_table_unittest.cc",
"query_constraints_unittest.cc",
+ "sqlite_tokenizer_unittest.cc",
"sqlite_utils_unittest.cc",
]
deps = [
diff --git a/src/trace_processor/sqlite/sqlite_tokenizer.cc b/src/trace_processor/sqlite/sqlite_tokenizer.cc
new file mode 100644
index 0000000..1766baa
--- /dev/null
+++ b/src/trace_processor/sqlite/sqlite_tokenizer.cc
@@ -0,0 +1,448 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/sqlite/sqlite_tokenizer.h"
+
+#include <ctype.h>
+#include <sqlite3.h>
+#include <optional>
+#include <string_view>
+
+#include "perfetto/base/compiler.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+// The contents of this file are ~copied from SQLite with some modifications to
+// minimize the amount copied: i.e. if we can call a libc function/public SQLite
+// API instead of a private one.
+//
+// The changes are as follows:
+// 1. Remove all ifdefs to only keep branches we actually use
+// 2. Change handling of |CC_KYWD0| to remove distinction between different
+// SQLite kewords, reducing how many things we need to copy over.
+// 3. Constants are changed from be macro defines to be values in
+// |SqliteTokenType|.
+
+namespace {
+
+const unsigned char sqlite3CtypeMap[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 00..07 ........ */
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, /* 08..0f ........ */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 10..17 ........ */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 18..1f ........ */
+ 0x01, 0x00, 0x80, 0x00, 0x40, 0x00, 0x00, 0x80, /* 20..27 !"#$%&' */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 28..2f ()*+,-./ */
+ 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, /* 30..37 01234567 */
+ 0x0c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 38..3f 89:;<=>? */
+
+ 0x00, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x02, /* 40..47 @ABCDEFG */
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 48..4f HIJKLMNO */
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 50..57 PQRSTUVW */
+ 0x02, 0x02, 0x02, 0x80, 0x00, 0x00, 0x00, 0x40, /* 58..5f XYZ[\]^_ */
+ 0x80, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x22, /* 60..67 `abcdefg */
+ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 68..6f hijklmno */
+ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 70..77 pqrstuvw */
+ 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, /* 78..7f xyz{|}~. */
+
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 80..87 ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 88..8f ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 90..97 ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 98..9f ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a0..a7 ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a8..af ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b0..b7 ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b8..bf ........ */
+
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c0..c7 ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c8..cf ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d0..d7 ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d8..df ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e0..e7 ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e8..ef ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* f0..f7 ........ */
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 /* f8..ff ........ */
+};
+
+#define CC_X 0 /* The letter 'x', or start of BLOB literal */
+#define CC_KYWD0 1 /* First letter of a keyword */
+#define CC_KYWD 2 /* Alphabetics or '_'. Usable in a keyword */
+#define CC_DIGIT 3 /* Digits */
+#define CC_DOLLAR 4 /* '$' */
+#define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */
+#define CC_VARNUM 6 /* '?'. Numeric SQL variables */
+#define CC_SPACE 7 /* Space characters */
+#define CC_QUOTE 8 /* '"', '\'', or '`'. String literals, quoted ids */
+#define CC_QUOTE2 9 /* '['. [...] style quoted ids */
+#define CC_PIPE 10 /* '|'. Bitwise OR or concatenate */
+#define CC_MINUS 11 /* '-'. Minus or SQL-style comment */
+#define CC_LT 12 /* '<'. Part of < or <= or <> */
+#define CC_GT 13 /* '>'. Part of > or >= */
+#define CC_EQ 14 /* '='. Part of = or == */
+#define CC_BANG 15 /* '!'. Part of != */
+#define CC_SLASH 16 /* '/'. / or c-style comment */
+#define CC_LP 17 /* '(' */
+#define CC_RP 18 /* ')' */
+#define CC_SEMI 19 /* ';' */
+#define CC_PLUS 20 /* '+' */
+#define CC_STAR 21 /* '*' */
+#define CC_PERCENT 22 /* '%' */
+#define CC_COMMA 23 /* ',' */
+#define CC_AND 24 /* '&' */
+#define CC_TILDA 25 /* '~' */
+#define CC_DOT 26 /* '.' */
+#define CC_ID 27 /* unicode characters usable in IDs */
+#define CC_NUL 29 /* 0x00 */
+#define CC_BOM 30 /* First byte of UTF8 BOM: 0xEF 0xBB 0xBF */
+
+// clang-format off
+static const unsigned char aiClass[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
+/* 0x */ 29, 28, 28, 28, 28, 28, 28, 28, 28, 7, 7, 28, 7, 7, 28, 28,
+/* 1x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+/* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16,
+/* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6,
+/* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 9, 28, 28, 28, 2,
+/* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 28, 10, 28, 25, 28,
+/* 8x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* 9x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Ax */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Bx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Cx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Dx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Ex */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 30,
+/* Fx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27
+};
+// clang-format on
+
+#define IdChar(C) ((sqlite3CtypeMap[static_cast<unsigned char>(C)] & 0x46) != 0)
+
+// Copy of |sqlite3GetToken| for use by the PerfettoSql transpiler.
+//
+// We copy this function because |sqlite3GetToken| is static to sqlite3.c
+// in most distributions of SQLite so we cannot call it from our code.
+//
+// While we could redefine SQLITE_PRIVATE, pragmatically that will not fly in
+// all the places we build trace processor so we need to resort to making a
+// copy.
+int GetSqliteToken(const unsigned char* z, SqliteTokenType* tokenType) {
+ int i, c;
+ switch (aiClass[*z]) { /* Switch on the character-class of the first byte
+ ** of the token. See the comment on the CC_ defines
+ ** above. */
+ case CC_SPACE: {
+ for (i = 1; isspace(z[i]); i++) {
+ }
+ *tokenType = SqliteTokenType::TK_SPACE;
+ return i;
+ }
+ case CC_MINUS: {
+ if (z[1] == '-') {
+ for (i = 2; (c = z[i]) != 0 && c != '\n'; i++) {
+ }
+ *tokenType = SqliteTokenType::TK_SPACE; /* IMP: R-22934-25134 */
+ return i;
+ } else if (z[1] == '>') {
+ *tokenType = SqliteTokenType::TK_PTR;
+ return 2 + (z[2] == '>');
+ }
+ *tokenType = SqliteTokenType::TK_MINUS;
+ return 1;
+ }
+ case CC_LP: {
+ *tokenType = SqliteTokenType::TK_LP;
+ return 1;
+ }
+ case CC_RP: {
+ *tokenType = SqliteTokenType::TK_RP;
+ return 1;
+ }
+ case CC_SEMI: {
+ *tokenType = SqliteTokenType::TK_SEMI;
+ return 1;
+ }
+ case CC_PLUS: {
+ *tokenType = SqliteTokenType::TK_PLUS;
+ return 1;
+ }
+ case CC_STAR: {
+ *tokenType = SqliteTokenType::TK_STAR;
+ return 1;
+ }
+ case CC_SLASH: {
+ if (z[1] != '*' || z[2] == 0) {
+ *tokenType = SqliteTokenType::TK_SLASH;
+ return 1;
+ }
+ for (i = 3, c = z[2]; (c != '*' || z[i] != '/') && (c = z[i]) != 0; i++) {
+ }
+ if (c)
+ i++;
+ *tokenType = SqliteTokenType::TK_SPACE; /* IMP: R-22934-25134 */
+ return i;
+ }
+ case CC_PERCENT: {
+ *tokenType = SqliteTokenType::TK_REM;
+ return 1;
+ }
+ case CC_EQ: {
+ *tokenType = SqliteTokenType::TK_EQ;
+ return 1 + (z[1] == '=');
+ }
+ case CC_LT: {
+ if ((c = z[1]) == '=') {
+ *tokenType = SqliteTokenType::TK_LE;
+ return 2;
+ } else if (c == '>') {
+ *tokenType = SqliteTokenType::TK_NE;
+ return 2;
+ } else if (c == '<') {
+ *tokenType = SqliteTokenType::TK_LSHIFT;
+ return 2;
+ } else {
+ *tokenType = SqliteTokenType::TK_LT;
+ return 1;
+ }
+ }
+ case CC_GT: {
+ if ((c = z[1]) == '=') {
+ *tokenType = SqliteTokenType::TK_GE;
+ return 2;
+ } else if (c == '>') {
+ *tokenType = SqliteTokenType::TK_RSHIFT;
+ return 2;
+ } else {
+ *tokenType = SqliteTokenType::TK_GT;
+ return 1;
+ }
+ }
+ case CC_BANG: {
+ if (z[1] != '=') {
+ *tokenType = SqliteTokenType::TK_ILLEGAL;
+ return 1;
+ } else {
+ *tokenType = SqliteTokenType::TK_NE;
+ return 2;
+ }
+ }
+ case CC_PIPE: {
+ if (z[1] != '|') {
+ *tokenType = SqliteTokenType::TK_BITOR;
+ return 1;
+ } else {
+ *tokenType = SqliteTokenType::TK_CONCAT;
+ return 2;
+ }
+ }
+ case CC_COMMA: {
+ *tokenType = SqliteTokenType::TK_COMMA;
+ return 1;
+ }
+ case CC_AND: {
+ *tokenType = SqliteTokenType::TK_BITAND;
+ return 1;
+ }
+ case CC_TILDA: {
+ *tokenType = SqliteTokenType::TK_BITNOT;
+ return 1;
+ }
+ case CC_QUOTE: {
+ int delim = z[0];
+ for (i = 1; (c = z[i]) != 0; i++) {
+ if (c == delim) {
+ if (z[i + 1] == delim) {
+ i++;
+ } else {
+ break;
+ }
+ }
+ }
+ if (c == '\'') {
+ *tokenType = SqliteTokenType::TK_STRING;
+ return i + 1;
+ } else if (c != 0) {
+ *tokenType = SqliteTokenType::TK_ID;
+ return i + 1;
+ } else {
+ *tokenType = SqliteTokenType::TK_ILLEGAL;
+ return i;
+ }
+ }
+ case CC_DOT: {
+ if (!isdigit(z[1])) {
+ *tokenType = SqliteTokenType::TK_DOT;
+ return 1;
+ }
+ [[fallthrough]];
+ }
+ case CC_DIGIT: {
+ *tokenType = SqliteTokenType::TK_INTEGER;
+ if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X') && isxdigit(z[2])) {
+ for (i = 3; isxdigit(z[i]); i++) {
+ }
+ return i;
+ }
+ for (i = 0; isxdigit(z[i]); i++) {
+ }
+ if (z[i] == '.') {
+ i++;
+ while (isxdigit(z[i])) {
+ i++;
+ }
+ *tokenType = SqliteTokenType::TK_FLOAT;
+ }
+ if ((z[i] == 'e' || z[i] == 'E') &&
+ (isdigit(z[i + 1]) ||
+ ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
+ i += 2;
+ while (isdigit(z[i])) {
+ i++;
+ }
+ *tokenType = SqliteTokenType::TK_FLOAT;
+ }
+ while (IdChar(z[i])) {
+ *tokenType = SqliteTokenType::TK_ILLEGAL;
+ i++;
+ }
+ return i;
+ }
+ case CC_QUOTE2: {
+ for (i = 1, c = z[0]; c != ']' && (c = z[i]) != 0; i++) {
+ }
+ *tokenType =
+ c == ']' ? SqliteTokenType::TK_ID : SqliteTokenType::TK_ILLEGAL;
+ return i;
+ }
+ case CC_VARNUM: {
+ *tokenType = SqliteTokenType::TK_VARIABLE;
+ for (i = 1; isdigit(z[i]); i++) {
+ }
+ return i;
+ }
+ case CC_DOLLAR:
+ case CC_VARALPHA: {
+ int n = 0;
+ *tokenType = SqliteTokenType::TK_VARIABLE;
+ for (i = 1; (c = z[i]) != 0; i++) {
+ if (IdChar(c)) {
+ n++;
+ } else if (c == '(' && n > 0) {
+ do {
+ i++;
+ } while ((c = z[i]) != 0 && !isspace(c) && c != ')');
+ if (c == ')') {
+ i++;
+ } else {
+ *tokenType = SqliteTokenType::TK_ILLEGAL;
+ }
+ break;
+ } else if (c == ':' && z[i + 1] == ':') {
+ i++;
+ } else {
+ break;
+ }
+ }
+ if (n == 0)
+ *tokenType = SqliteTokenType::TK_ILLEGAL;
+ return i;
+ }
+ case CC_KYWD0: {
+ for (i = 1; aiClass[z[i]] <= CC_KYWD; i++) {
+ }
+ if (IdChar(z[i])) {
+ /* This token started out using characters that can appear in keywords,
+ ** but z[i] is a character not allowed within keywords, so this must
+ ** be an identifier instead */
+ i++;
+ break;
+ }
+ if (sqlite3_keyword_check(reinterpret_cast<const char*>(z), i)) {
+ *tokenType = SqliteTokenType::TK_GENERIC_KEYWORD;
+ } else {
+ *tokenType = SqliteTokenType::TK_ID;
+ }
+ return i;
+ }
+ case CC_X: {
+ if (z[1] == '\'') {
+ *tokenType = SqliteTokenType::TK_BLOB;
+ for (i = 2; isdigit(z[i]); i++) {
+ }
+ if (z[i] != '\'' || i % 2) {
+ *tokenType = SqliteTokenType::TK_ILLEGAL;
+ while (z[i] && z[i] != '\'') {
+ i++;
+ }
+ }
+ if (z[i])
+ i++;
+ return i;
+ }
+ [[fallthrough]];
+ }
+ case CC_KYWD:
+ case CC_ID: {
+ i = 1;
+ break;
+ }
+ case CC_BOM: {
+ if (z[1] == 0xbb && z[2] == 0xbf) {
+ *tokenType = SqliteTokenType::TK_SPACE;
+ return 3;
+ }
+ i = 1;
+ break;
+ }
+ case CC_NUL: {
+ *tokenType = SqliteTokenType::TK_ILLEGAL;
+ return 0;
+ }
+ default: {
+ *tokenType = SqliteTokenType::TK_ILLEGAL;
+ return 1;
+ }
+ }
+ while (IdChar(z[i])) {
+ i++;
+ }
+ *tokenType = SqliteTokenType::TK_ID;
+ return i;
+}
+
+} // namespace
+
+SqliteTokenizer::SqliteTokenizer(const char* sql) : ptr_(sql) {}
+
+SqliteTokenizer::Token SqliteTokenizer::Next() {
+ Token token;
+ const char* start = ptr_;
+ int n = GetSqliteToken(unsigned_ptr(), &token.token_type);
+ ptr_ += n;
+ token.str = std::string_view(start, static_cast<uint32_t>(n));
+ return token;
+}
+
+SqliteTokenizer::Token SqliteTokenizer::NextNonWhitespace() {
+ Token t;
+ for (t = Next(); t.token_type == SqliteTokenType::TK_SPACE; t = Next()) {
+ }
+ return t;
+}
+
+} // namespace trace_processor
+} // namespace perfetto
diff --git a/src/trace_processor/sqlite/sqlite_tokenizer.h b/src/trace_processor/sqlite/sqlite_tokenizer.h
new file mode 100644
index 0000000..0d3bfb6
--- /dev/null
+++ b/src/trace_processor/sqlite/sqlite_tokenizer.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
+#define SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
+
+#include <optional>
+#include <string_view>
+
+namespace perfetto {
+namespace trace_processor {
+
+// List of token types returnable by |SqliteTokenizer|
+// 1:1 matches the defintions in SQLite.
+enum class SqliteTokenType : uint32_t {
+ TK_SEMI = 1,
+ TK_LP = 22,
+ TK_RP = 23,
+ TK_COMMA = 25,
+ TK_NE = 52,
+ TK_EQ = 53,
+ TK_GT = 54,
+ TK_LE = 55,
+ TK_LT = 56,
+ TK_GE = 57,
+ TK_ID = 59,
+ TK_BITAND = 102,
+ TK_BITOR = 103,
+ TK_LSHIFT = 104,
+ TK_RSHIFT = 105,
+ TK_PLUS = 106,
+ TK_MINUS = 107,
+ TK_STAR = 108,
+ TK_SLASH = 109,
+ TK_REM = 110,
+ TK_CONCAT = 111,
+ TK_PTR = 112,
+ TK_BITNOT = 114,
+ TK_STRING = 117,
+ TK_DOT = 141,
+ TK_FLOAT = 153,
+ TK_BLOB = 154,
+ TK_INTEGER = 155,
+ TK_VARIABLE = 156,
+ TK_SPACE = 183,
+ TK_ILLEGAL = 184,
+
+ // Generic constant which replaces all the keywords in SQLite as we do not
+ // care about the distinguishing between the vast majority of them.
+ TK_GENERIC_KEYWORD = 1000,
+};
+
+// Tokenizes SQL statements according to SQLite SQL language specification:
+// https://www2.sqlite.org/hlr40000.html
+//
+// Usage of this class:
+// SqliteTokenizer tzr;
+// tzr.Reset(my_sql_string.c_str());
+// for (auto t = tzr.Next(); t.token_type != TK_SEMI; t = tzr.Next()) {
+// // Handle t here
+// }
+class SqliteTokenizer {
+ public:
+ // A single SQL token according to the SQLite standard.
+ struct Token {
+ // The string contents of the token.
+ std::string_view str;
+
+ // The type of the token.
+ SqliteTokenType token_type;
+
+ bool operator==(const Token& o) const {
+ return str == o.str && token_type == o.token_type;
+ }
+ };
+
+ explicit SqliteTokenizer(const char* sql);
+
+ // Returns the next SQL token.
+ Token Next();
+
+ // Returns the next SQL token which is not of type TK_SPACE.
+ Token NextNonWhitespace();
+
+ // Returns the pointer to the start of the next token which will be returned.
+ const char* ptr() const { return ptr_; }
+
+ private:
+ const unsigned char* unsigned_ptr() const {
+ return reinterpret_cast<const unsigned char*>(ptr_);
+ }
+
+ const char* ptr_ = nullptr;
+};
+
+} // namespace trace_processor
+} // namespace perfetto
+
+#endif // SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
diff --git a/src/trace_processor/sqlite/sqlite_tokenizer_unittest.cc b/src/trace_processor/sqlite/sqlite_tokenizer_unittest.cc
new file mode 100644
index 0000000..44946b7
--- /dev/null
+++ b/src/trace_processor/sqlite/sqlite_tokenizer_unittest.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/sqlite/sqlite_tokenizer.h"
+#include <vector>
+
+#include "perfetto/base/logging.h"
+#include "test/gtest_and_gmock.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace {
+
+using Token = SqliteTokenizer::Token;
+using Type = SqliteTokenType;
+
+class SqliteTokenizerTest : public ::testing::Test {
+ protected:
+ std::vector<SqliteTokenizer::Token> Tokenize(const char* ptr) {
+ SqliteTokenizer tokenizer(ptr);
+ std::vector<SqliteTokenizer::Token> tokens;
+ for (auto t = tokenizer.Next(); !t.str.empty(); t = tokenizer.Next()) {
+ tokens.push_back(t);
+ }
+ return tokens;
+ }
+};
+
+TEST_F(SqliteTokenizerTest, EmptyString) {
+ ASSERT_THAT(Tokenize(""), testing::IsEmpty());
+}
+
+TEST_F(SqliteTokenizerTest, OnlySpace) {
+ ASSERT_THAT(Tokenize(" "), testing::ElementsAre(Token{" ", Type::TK_SPACE}));
+}
+
+TEST_F(SqliteTokenizerTest, SpaceColon) {
+ ASSERT_THAT(Tokenize(" ;"), testing::ElementsAre(Token{" ", Type::TK_SPACE},
+ Token{";", Type::TK_SEMI}));
+}
+
+TEST_F(SqliteTokenizerTest, Select) {
+ ASSERT_THAT(
+ Tokenize("SELECT * FROM slice;"),
+ testing::ElementsAre(
+ Token{"SELECT", Type::TK_GENERIC_KEYWORD}, Token{" ", Type::TK_SPACE},
+ Token{"*", Type::TK_STAR}, Token{" ", Type::TK_SPACE},
+ Token{"FROM", Type::TK_GENERIC_KEYWORD}, Token{" ", Type::TK_SPACE},
+ Token{"slice", Type::TK_ID}, Token{";", Type::TK_SEMI}));
+}
+
+} // namespace
+} // namespace trace_processor
+} // namespace perfetto