Merge changes Icc10bf0a,Ifb5ab4eb

* changes:
  tp: introduce tokenizer for SQLite statements
  perfetto: add payload APIs to base::Status
diff --git a/Android.bp b/Android.bp
index b65d3fe..7265be2 100644
--- a/Android.bp
+++ b/Android.bp
@@ -8425,6 +8425,7 @@
         "src/base/scoped_file_unittest.cc",
         "src/base/small_vector_unittest.cc",
         "src/base/status_or_unittest.cc",
+        "src/base/status_unittest.cc",
         "src/base/string_splitter_unittest.cc",
         "src/base/string_utils_unittest.cc",
         "src/base/string_view_unittest.cc",
@@ -10379,6 +10380,7 @@
         "src/trace_processor/sqlite/sql_stats_table.cc",
         "src/trace_processor/sqlite/sqlite_engine.cc",
         "src/trace_processor/sqlite/sqlite_table.cc",
+        "src/trace_processor/sqlite/sqlite_tokenizer.cc",
         "src/trace_processor/sqlite/sqlite_utils.cc",
         "src/trace_processor/sqlite/stats_table.cc",
     ],
@@ -10390,6 +10392,7 @@
     srcs: [
         "src/trace_processor/sqlite/db_sqlite_table_unittest.cc",
         "src/trace_processor/sqlite/query_constraints_unittest.cc",
+        "src/trace_processor/sqlite/sqlite_tokenizer_unittest.cc",
         "src/trace_processor/sqlite/sqlite_utils_unittest.cc",
     ],
 }
diff --git a/BUILD b/BUILD
index a62edb8..f819fce 100644
--- a/BUILD
+++ b/BUILD
@@ -2174,6 +2174,8 @@
         "src/trace_processor/sqlite/sqlite_engine.h",
         "src/trace_processor/sqlite/sqlite_table.cc",
         "src/trace_processor/sqlite/sqlite_table.h",
+        "src/trace_processor/sqlite/sqlite_tokenizer.cc",
+        "src/trace_processor/sqlite/sqlite_tokenizer.h",
         "src/trace_processor/sqlite/sqlite_utils.cc",
         "src/trace_processor/sqlite/sqlite_utils.h",
         "src/trace_processor/sqlite/stats_table.cc",
diff --git a/include/perfetto/base/status.h b/include/perfetto/base/status.h
index 2939357..f059184 100644
--- a/include/perfetto/base/status.h
+++ b/include/perfetto/base/status.h
@@ -17,7 +17,10 @@
 #ifndef INCLUDE_PERFETTO_BASE_STATUS_H_
 #define INCLUDE_PERFETTO_BASE_STATUS_H_
 
+#include <optional>
 #include <string>
+#include <string_view>
+#include <vector>
 
 #include "perfetto/base/compiler.h"
 #include "perfetto/base/export.h"
@@ -30,6 +33,10 @@
 // This can used as the return type of functions which would usually return an
 // bool for success or int for errno but also wants to add some string context
 // (ususally for logging).
+//
+// Similar to absl::Status, an optional "payload" can also be included with more
+// context about the error. This allows passing additional metadata about the
+// error (e.g. location of errors, potential mitigations etc).
 class PERFETTO_EXPORT_COMPONENT Status {
  public:
   Status() : ok_(true) {}
@@ -52,9 +59,49 @@
   const std::string& message() const { return message_; }
   const char* c_message() const { return message_.c_str(); }
 
+  //////////////////////////////////////////////////////////////////////////////
+  // Payload Management APIs
+  //////////////////////////////////////////////////////////////////////////////
+
+  // Payloads can be attached to error statuses to provide additional context.
+  //
+  // Payloads are (key, value) pairs, where the key is a string acting as a
+  // unique "type URL" and the value is an opaque string. The "type URL" should
+  // be unique, follow the format of a URL and, ideally, documentation on how to
+  // interpret its associated data should be available.
+  //
+  // To attach a payload to a status object, call `Status::SetPayload()`.
+  // Similarly, to extract the payload from a status, call
+  // `Status::GetPayload()`.
+  //
+  // Note: the payload APIs are only meaningful to call when the status is an
+  // error. Otherwise, all methods are noops.
+
+  // Gets the payload for the given |type_url| if one exists.
+  //
+  // Will always return std::nullopt if |ok()|.
+  std::optional<std::string_view> GetPayload(std::string_view type_url);
+
+  // Sets the payload for the given key. The key should
+  //
+  // Will always do nothing if |ok()|.
+  void SetPayload(std::string_view type_url, std::string value);
+
+  // Erases the payload for the given string and returns true if the payload
+  // existed and was erased.
+  //
+  // Will always do nothing if |ok()|.
+  bool ErasePayload(std::string_view type_url);
+
  private:
+  struct Payload {
+    std::string type_url;
+    std::string payload;
+  };
+
   bool ok_ = false;
   std::string message_;
+  std::vector<Payload> payloads_;
 };
 
 // Returns a status object which represents the Ok status.
diff --git a/src/base/BUILD.gn b/src/base/BUILD.gn
index 8d48bf0..7dfce6b 100644
--- a/src/base/BUILD.gn
+++ b/src/base/BUILD.gn
@@ -199,6 +199,7 @@
     "scoped_file_unittest.cc",
     "small_vector_unittest.cc",
     "status_or_unittest.cc",
+    "status_unittest.cc",
     "string_splitter_unittest.cc",
     "string_utils_unittest.cc",
     "string_view_unittest.cc",
diff --git a/src/base/status.cc b/src/base/status.cc
index 30ccc47..d3c13e8 100644
--- a/src/base/status.cc
+++ b/src/base/status.cc
@@ -17,6 +17,7 @@
 #include "perfetto/base/status.h"
 
 #include <stdarg.h>
+#include <algorithm>
 
 namespace perfetto {
 namespace base {
@@ -31,5 +32,42 @@
   return status;
 }
 
+std::optional<std::string_view> Status::GetPayload(std::string_view type_url) {
+  if (ok()) {
+    return std::nullopt;
+  }
+  for (const auto& kv : payloads_) {
+    if (kv.type_url == type_url) {
+      return kv.payload;
+    }
+  }
+  return std::nullopt;
+}
+
+void Status::SetPayload(std::string_view type_url, std::string value) {
+  if (ok()) {
+    return;
+  }
+  for (auto& kv : payloads_) {
+    if (kv.type_url == type_url) {
+      kv.payload = value;
+      return;
+    }
+  }
+  payloads_.push_back(Payload{std::string(type_url), std::move(value)});
+}
+
+bool Status::ErasePayload(std::string_view type_url) {
+  if (ok()) {
+    return false;
+  }
+  auto it = std::remove_if(
+      payloads_.begin(), payloads_.end(),
+      [type_url](const Payload& p) { return p.type_url == type_url; });
+  bool erased = it != payloads_.end();
+  payloads_.erase(it, payloads_.end());
+  return erased;
+}
+
 }  // namespace base
 }  // namespace perfetto
diff --git a/src/base/status_unittest.cc b/src/base/status_unittest.cc
new file mode 100644
index 0000000..df42b31
--- /dev/null
+++ b/src/base/status_unittest.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/base/status.h"
+
+#include "test/gtest_and_gmock.h"
+
+namespace perfetto {
+namespace base {
+
+TEST(StatusTest, GetMissingPayload) {
+  base::Status status = base::ErrStatus("Error");
+  ASSERT_EQ(status.GetPayload("test.foo.com/bar"), std::nullopt);
+}
+
+TEST(StatusTest, SetThenGetPayload) {
+  base::Status status = base::ErrStatus("Error");
+  status.SetPayload("test.foo.com/bar", "payload_value");
+  ASSERT_EQ(status.GetPayload("test.foo.com/bar"), "payload_value");
+}
+
+TEST(StatusTest, SetEraseGetPayload) {
+  base::Status status = base::ErrStatus("Error");
+  status.SetPayload("test.foo.com/bar", "payload_value");
+  ASSERT_TRUE(status.ErasePayload("test.foo.com/bar"));
+  ASSERT_EQ(status.GetPayload("test.foo.com/bar"), std::nullopt);
+}
+
+TEST(StatusTest, SetOverride) {
+  base::Status status = base::ErrStatus("Error");
+  status.SetPayload("test.foo.com/bar", "payload_value");
+  status.SetPayload("test.foo.com/bar", "other_value");
+  ASSERT_EQ(status.GetPayload("test.foo.com/bar"), "other_value");
+}
+
+TEST(StatusTest, SetGetOk) {
+  base::Status status = base::OkStatus();
+  status.SetPayload("test.foo.com/bar", "payload_value");
+  ASSERT_EQ(status.GetPayload("test.foo.com/bar"), std::nullopt);
+}
+
+TEST(StatusTest, SetMultipleAndDuplicate) {
+  base::Status status = base::ErrStatus("Error");
+  status.SetPayload("test.foo.com/bar", "payload_value");
+  status.SetPayload("test.foo.com/bar1", "1");
+  status.SetPayload("test.foo.com/bar2", "2");
+  status.SetPayload("test.foo.com/bar", "other_value");
+  ASSERT_EQ(status.GetPayload("test.foo.com/bar"), "other_value");
+  ASSERT_EQ(status.GetPayload("test.foo.com/bar1"), "1");
+  ASSERT_EQ(status.GetPayload("test.foo.com/bar2"), "2");
+}
+
+}  // namespace base
+}  // namespace perfetto
diff --git a/src/trace_processor/sqlite/BUILD.gn b/src/trace_processor/sqlite/BUILD.gn
index 7045815..b35605a 100644
--- a/src/trace_processor/sqlite/BUILD.gn
+++ b/src/trace_processor/sqlite/BUILD.gn
@@ -28,6 +28,8 @@
     "sqlite_engine.h",
     "sqlite_table.cc",
     "sqlite_table.h",
+    "sqlite_tokenizer.cc",
+    "sqlite_tokenizer.h",
     "sqlite_utils.cc",
     "sqlite_utils.h",
     "sqlite_utils.h",
@@ -72,6 +74,7 @@
   sources = [
     "db_sqlite_table_unittest.cc",
     "query_constraints_unittest.cc",
+    "sqlite_tokenizer_unittest.cc",
     "sqlite_utils_unittest.cc",
   ]
   deps = [
diff --git a/src/trace_processor/sqlite/sqlite_tokenizer.cc b/src/trace_processor/sqlite/sqlite_tokenizer.cc
new file mode 100644
index 0000000..1766baa
--- /dev/null
+++ b/src/trace_processor/sqlite/sqlite_tokenizer.cc
@@ -0,0 +1,448 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/sqlite/sqlite_tokenizer.h"
+
+#include <ctype.h>
+#include <sqlite3.h>
+#include <optional>
+#include <string_view>
+
+#include "perfetto/base/compiler.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+// The contents of this file are ~copied from SQLite with some modifications to
+// minimize the amount copied: i.e. if we can call a libc function/public SQLite
+// API instead of a private one.
+//
+// The changes are as follows:
+// 1. Remove all ifdefs to only keep branches we actually use
+// 2. Change handling of |CC_KYWD0| to remove distinction between different
+//    SQLite kewords, reducing how many things we need to copy over.
+// 3. Constants are changed from be macro defines to be values in
+//    |SqliteTokenType|.
+
+namespace {
+
+const unsigned char sqlite3CtypeMap[256] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 00..07    ........ */
+    0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, /* 08..0f    ........ */
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 10..17    ........ */
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 18..1f    ........ */
+    0x01, 0x00, 0x80, 0x00, 0x40, 0x00, 0x00, 0x80, /* 20..27     !"#$%&' */
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 28..2f    ()*+,-./ */
+    0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, /* 30..37    01234567 */
+    0x0c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 38..3f    89:;<=>? */
+
+    0x00, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x02, /* 40..47    @ABCDEFG */
+    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 48..4f    HIJKLMNO */
+    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 50..57    PQRSTUVW */
+    0x02, 0x02, 0x02, 0x80, 0x00, 0x00, 0x00, 0x40, /* 58..5f    XYZ[\]^_ */
+    0x80, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x22, /* 60..67    `abcdefg */
+    0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 68..6f    hijklmno */
+    0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 70..77    pqrstuvw */
+    0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, /* 78..7f    xyz{|}~. */
+
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 80..87    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 88..8f    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 90..97    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 98..9f    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a0..a7    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a8..af    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b0..b7    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b8..bf    ........ */
+
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c0..c7    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c8..cf    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d0..d7    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d8..df    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e0..e7    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e8..ef    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* f0..f7    ........ */
+    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40  /* f8..ff    ........ */
+};
+
+#define CC_X 0        /* The letter 'x', or start of BLOB literal */
+#define CC_KYWD0 1    /* First letter of a keyword */
+#define CC_KYWD 2     /* Alphabetics or '_'.  Usable in a keyword */
+#define CC_DIGIT 3    /* Digits */
+#define CC_DOLLAR 4   /* '$' */
+#define CC_VARALPHA 5 /* '@', '#', ':'.  Alphabetic SQL variables */
+#define CC_VARNUM 6   /* '?'.  Numeric SQL variables */
+#define CC_SPACE 7    /* Space characters */
+#define CC_QUOTE 8    /* '"', '\'', or '`'.  String literals, quoted ids */
+#define CC_QUOTE2 9   /* '['.   [...] style quoted ids */
+#define CC_PIPE 10    /* '|'.   Bitwise OR or concatenate */
+#define CC_MINUS 11   /* '-'.  Minus or SQL-style comment */
+#define CC_LT 12      /* '<'.  Part of < or <= or <> */
+#define CC_GT 13      /* '>'.  Part of > or >= */
+#define CC_EQ 14      /* '='.  Part of = or == */
+#define CC_BANG 15    /* '!'.  Part of != */
+#define CC_SLASH 16   /* '/'.  / or c-style comment */
+#define CC_LP 17      /* '(' */
+#define CC_RP 18      /* ')' */
+#define CC_SEMI 19    /* ';' */
+#define CC_PLUS 20    /* '+' */
+#define CC_STAR 21    /* '*' */
+#define CC_PERCENT 22 /* '%' */
+#define CC_COMMA 23   /* ',' */
+#define CC_AND 24     /* '&' */
+#define CC_TILDA 25   /* '~' */
+#define CC_DOT 26     /* '.' */
+#define CC_ID 27      /* unicode characters usable in IDs */
+#define CC_NUL 29     /* 0x00 */
+#define CC_BOM 30     /* First byte of UTF8 BOM:  0xEF 0xBB 0xBF */
+
+// clang-format off
+static const unsigned char aiClass[] = {
+/*         x0  x1  x2  x3  x4  x5  x6  x7  x8  x9  xa  xb  xc  xd  xe  xf */
+/* 0x */   29, 28, 28, 28, 28, 28, 28, 28, 28,  7,  7, 28,  7,  7, 28, 28,
+/* 1x */   28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+/* 2x */    7, 15,  8,  5,  4, 22, 24,  8, 17, 18, 21, 20, 23, 11, 26, 16,
+/* 3x */    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  5, 19, 12, 14, 13,  6,
+/* 4x */    5,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+/* 5x */    1,  1,  1,  1,  1,  1,  1,  1,  0,  2,  2,  9, 28, 28, 28,  2,
+/* 6x */    8,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+/* 7x */    1,  1,  1,  1,  1,  1,  1,  1,  0,  2,  2, 28, 10, 28, 25, 28,
+/* 8x */   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* 9x */   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Ax */   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Bx */   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Cx */   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Dx */   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+/* Ex */   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 30,
+/* Fx */   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27
+};
+// clang-format on
+
+#define IdChar(C) ((sqlite3CtypeMap[static_cast<unsigned char>(C)] & 0x46) != 0)
+
+// Copy of |sqlite3GetToken| for use by the PerfettoSql transpiler.
+//
+// We copy this function because |sqlite3GetToken| is static to sqlite3.c
+// in most distributions of SQLite so we cannot call it from our code.
+//
+// While we could redefine SQLITE_PRIVATE, pragmatically that will not fly in
+// all the places we build trace processor so we need to resort to making a
+// copy.
+int GetSqliteToken(const unsigned char* z, SqliteTokenType* tokenType) {
+  int i, c;
+  switch (aiClass[*z]) { /* Switch on the character-class of the first byte
+                         ** of the token. See the comment on the CC_ defines
+                         ** above. */
+    case CC_SPACE: {
+      for (i = 1; isspace(z[i]); i++) {
+      }
+      *tokenType = SqliteTokenType::TK_SPACE;
+      return i;
+    }
+    case CC_MINUS: {
+      if (z[1] == '-') {
+        for (i = 2; (c = z[i]) != 0 && c != '\n'; i++) {
+        }
+        *tokenType = SqliteTokenType::TK_SPACE; /* IMP: R-22934-25134 */
+        return i;
+      } else if (z[1] == '>') {
+        *tokenType = SqliteTokenType::TK_PTR;
+        return 2 + (z[2] == '>');
+      }
+      *tokenType = SqliteTokenType::TK_MINUS;
+      return 1;
+    }
+    case CC_LP: {
+      *tokenType = SqliteTokenType::TK_LP;
+      return 1;
+    }
+    case CC_RP: {
+      *tokenType = SqliteTokenType::TK_RP;
+      return 1;
+    }
+    case CC_SEMI: {
+      *tokenType = SqliteTokenType::TK_SEMI;
+      return 1;
+    }
+    case CC_PLUS: {
+      *tokenType = SqliteTokenType::TK_PLUS;
+      return 1;
+    }
+    case CC_STAR: {
+      *tokenType = SqliteTokenType::TK_STAR;
+      return 1;
+    }
+    case CC_SLASH: {
+      if (z[1] != '*' || z[2] == 0) {
+        *tokenType = SqliteTokenType::TK_SLASH;
+        return 1;
+      }
+      for (i = 3, c = z[2]; (c != '*' || z[i] != '/') && (c = z[i]) != 0; i++) {
+      }
+      if (c)
+        i++;
+      *tokenType = SqliteTokenType::TK_SPACE; /* IMP: R-22934-25134 */
+      return i;
+    }
+    case CC_PERCENT: {
+      *tokenType = SqliteTokenType::TK_REM;
+      return 1;
+    }
+    case CC_EQ: {
+      *tokenType = SqliteTokenType::TK_EQ;
+      return 1 + (z[1] == '=');
+    }
+    case CC_LT: {
+      if ((c = z[1]) == '=') {
+        *tokenType = SqliteTokenType::TK_LE;
+        return 2;
+      } else if (c == '>') {
+        *tokenType = SqliteTokenType::TK_NE;
+        return 2;
+      } else if (c == '<') {
+        *tokenType = SqliteTokenType::TK_LSHIFT;
+        return 2;
+      } else {
+        *tokenType = SqliteTokenType::TK_LT;
+        return 1;
+      }
+    }
+    case CC_GT: {
+      if ((c = z[1]) == '=') {
+        *tokenType = SqliteTokenType::TK_GE;
+        return 2;
+      } else if (c == '>') {
+        *tokenType = SqliteTokenType::TK_RSHIFT;
+        return 2;
+      } else {
+        *tokenType = SqliteTokenType::TK_GT;
+        return 1;
+      }
+    }
+    case CC_BANG: {
+      if (z[1] != '=') {
+        *tokenType = SqliteTokenType::TK_ILLEGAL;
+        return 1;
+      } else {
+        *tokenType = SqliteTokenType::TK_NE;
+        return 2;
+      }
+    }
+    case CC_PIPE: {
+      if (z[1] != '|') {
+        *tokenType = SqliteTokenType::TK_BITOR;
+        return 1;
+      } else {
+        *tokenType = SqliteTokenType::TK_CONCAT;
+        return 2;
+      }
+    }
+    case CC_COMMA: {
+      *tokenType = SqliteTokenType::TK_COMMA;
+      return 1;
+    }
+    case CC_AND: {
+      *tokenType = SqliteTokenType::TK_BITAND;
+      return 1;
+    }
+    case CC_TILDA: {
+      *tokenType = SqliteTokenType::TK_BITNOT;
+      return 1;
+    }
+    case CC_QUOTE: {
+      int delim = z[0];
+      for (i = 1; (c = z[i]) != 0; i++) {
+        if (c == delim) {
+          if (z[i + 1] == delim) {
+            i++;
+          } else {
+            break;
+          }
+        }
+      }
+      if (c == '\'') {
+        *tokenType = SqliteTokenType::TK_STRING;
+        return i + 1;
+      } else if (c != 0) {
+        *tokenType = SqliteTokenType::TK_ID;
+        return i + 1;
+      } else {
+        *tokenType = SqliteTokenType::TK_ILLEGAL;
+        return i;
+      }
+    }
+    case CC_DOT: {
+      if (!isdigit(z[1])) {
+        *tokenType = SqliteTokenType::TK_DOT;
+        return 1;
+      }
+      [[fallthrough]];
+    }
+    case CC_DIGIT: {
+      *tokenType = SqliteTokenType::TK_INTEGER;
+      if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X') && isxdigit(z[2])) {
+        for (i = 3; isxdigit(z[i]); i++) {
+        }
+        return i;
+      }
+      for (i = 0; isxdigit(z[i]); i++) {
+      }
+      if (z[i] == '.') {
+        i++;
+        while (isxdigit(z[i])) {
+          i++;
+        }
+        *tokenType = SqliteTokenType::TK_FLOAT;
+      }
+      if ((z[i] == 'e' || z[i] == 'E') &&
+          (isdigit(z[i + 1]) ||
+           ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
+        i += 2;
+        while (isdigit(z[i])) {
+          i++;
+        }
+        *tokenType = SqliteTokenType::TK_FLOAT;
+      }
+      while (IdChar(z[i])) {
+        *tokenType = SqliteTokenType::TK_ILLEGAL;
+        i++;
+      }
+      return i;
+    }
+    case CC_QUOTE2: {
+      for (i = 1, c = z[0]; c != ']' && (c = z[i]) != 0; i++) {
+      }
+      *tokenType =
+          c == ']' ? SqliteTokenType::TK_ID : SqliteTokenType::TK_ILLEGAL;
+      return i;
+    }
+    case CC_VARNUM: {
+      *tokenType = SqliteTokenType::TK_VARIABLE;
+      for (i = 1; isdigit(z[i]); i++) {
+      }
+      return i;
+    }
+    case CC_DOLLAR:
+    case CC_VARALPHA: {
+      int n = 0;
+      *tokenType = SqliteTokenType::TK_VARIABLE;
+      for (i = 1; (c = z[i]) != 0; i++) {
+        if (IdChar(c)) {
+          n++;
+        } else if (c == '(' && n > 0) {
+          do {
+            i++;
+          } while ((c = z[i]) != 0 && !isspace(c) && c != ')');
+          if (c == ')') {
+            i++;
+          } else {
+            *tokenType = SqliteTokenType::TK_ILLEGAL;
+          }
+          break;
+        } else if (c == ':' && z[i + 1] == ':') {
+          i++;
+        } else {
+          break;
+        }
+      }
+      if (n == 0)
+        *tokenType = SqliteTokenType::TK_ILLEGAL;
+      return i;
+    }
+    case CC_KYWD0: {
+      for (i = 1; aiClass[z[i]] <= CC_KYWD; i++) {
+      }
+      if (IdChar(z[i])) {
+        /* This token started out using characters that can appear in keywords,
+        ** but z[i] is a character not allowed within keywords, so this must
+        ** be an identifier instead */
+        i++;
+        break;
+      }
+      if (sqlite3_keyword_check(reinterpret_cast<const char*>(z), i)) {
+        *tokenType = SqliteTokenType::TK_GENERIC_KEYWORD;
+      } else {
+        *tokenType = SqliteTokenType::TK_ID;
+      }
+      return i;
+    }
+    case CC_X: {
+      if (z[1] == '\'') {
+        *tokenType = SqliteTokenType::TK_BLOB;
+        for (i = 2; isdigit(z[i]); i++) {
+        }
+        if (z[i] != '\'' || i % 2) {
+          *tokenType = SqliteTokenType::TK_ILLEGAL;
+          while (z[i] && z[i] != '\'') {
+            i++;
+          }
+        }
+        if (z[i])
+          i++;
+        return i;
+      }
+      [[fallthrough]];
+    }
+    case CC_KYWD:
+    case CC_ID: {
+      i = 1;
+      break;
+    }
+    case CC_BOM: {
+      if (z[1] == 0xbb && z[2] == 0xbf) {
+        *tokenType = SqliteTokenType::TK_SPACE;
+        return 3;
+      }
+      i = 1;
+      break;
+    }
+    case CC_NUL: {
+      *tokenType = SqliteTokenType::TK_ILLEGAL;
+      return 0;
+    }
+    default: {
+      *tokenType = SqliteTokenType::TK_ILLEGAL;
+      return 1;
+    }
+  }
+  while (IdChar(z[i])) {
+    i++;
+  }
+  *tokenType = SqliteTokenType::TK_ID;
+  return i;
+}
+
+}  // namespace
+
+SqliteTokenizer::SqliteTokenizer(const char* sql) : ptr_(sql) {}
+
+SqliteTokenizer::Token SqliteTokenizer::Next() {
+  Token token;
+  const char* start = ptr_;
+  int n = GetSqliteToken(unsigned_ptr(), &token.token_type);
+  ptr_ += n;
+  token.str = std::string_view(start, static_cast<uint32_t>(n));
+  return token;
+}
+
+SqliteTokenizer::Token SqliteTokenizer::NextNonWhitespace() {
+  Token t;
+  for (t = Next(); t.token_type == SqliteTokenType::TK_SPACE; t = Next()) {
+  }
+  return t;
+}
+
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/sqlite/sqlite_tokenizer.h b/src/trace_processor/sqlite/sqlite_tokenizer.h
new file mode 100644
index 0000000..0d3bfb6
--- /dev/null
+++ b/src/trace_processor/sqlite/sqlite_tokenizer.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
+#define SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
+
+#include <optional>
+#include <string_view>
+
+namespace perfetto {
+namespace trace_processor {
+
+// List of token types returnable by |SqliteTokenizer|
+// 1:1 matches the defintions in SQLite.
+enum class SqliteTokenType : uint32_t {
+  TK_SEMI = 1,
+  TK_LP = 22,
+  TK_RP = 23,
+  TK_COMMA = 25,
+  TK_NE = 52,
+  TK_EQ = 53,
+  TK_GT = 54,
+  TK_LE = 55,
+  TK_LT = 56,
+  TK_GE = 57,
+  TK_ID = 59,
+  TK_BITAND = 102,
+  TK_BITOR = 103,
+  TK_LSHIFT = 104,
+  TK_RSHIFT = 105,
+  TK_PLUS = 106,
+  TK_MINUS = 107,
+  TK_STAR = 108,
+  TK_SLASH = 109,
+  TK_REM = 110,
+  TK_CONCAT = 111,
+  TK_PTR = 112,
+  TK_BITNOT = 114,
+  TK_STRING = 117,
+  TK_DOT = 141,
+  TK_FLOAT = 153,
+  TK_BLOB = 154,
+  TK_INTEGER = 155,
+  TK_VARIABLE = 156,
+  TK_SPACE = 183,
+  TK_ILLEGAL = 184,
+
+  // Generic constant which replaces all the keywords in SQLite as we do not
+  // care about the distinguishing between the vast majority of them.
+  TK_GENERIC_KEYWORD = 1000,
+};
+
+// Tokenizes SQL statements according to SQLite SQL language specification:
+// https://www2.sqlite.org/hlr40000.html
+//
+// Usage of this class:
+// SqliteTokenizer tzr;
+// tzr.Reset(my_sql_string.c_str());
+// for (auto t = tzr.Next(); t.token_type != TK_SEMI; t = tzr.Next()) {
+//   // Handle t here
+// }
+class SqliteTokenizer {
+ public:
+  // A single SQL token according to the SQLite standard.
+  struct Token {
+    // The string contents of the token.
+    std::string_view str;
+
+    // The type of the token.
+    SqliteTokenType token_type;
+
+    bool operator==(const Token& o) const {
+      return str == o.str && token_type == o.token_type;
+    }
+  };
+
+  explicit SqliteTokenizer(const char* sql);
+
+  // Returns the next SQL token.
+  Token Next();
+
+  // Returns the next SQL token which is not of type TK_SPACE.
+  Token NextNonWhitespace();
+
+  // Returns the pointer to the start of the next token which will be returned.
+  const char* ptr() const { return ptr_; }
+
+ private:
+  const unsigned char* unsigned_ptr() const {
+    return reinterpret_cast<const unsigned char*>(ptr_);
+  }
+
+  const char* ptr_ = nullptr;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
diff --git a/src/trace_processor/sqlite/sqlite_tokenizer_unittest.cc b/src/trace_processor/sqlite/sqlite_tokenizer_unittest.cc
new file mode 100644
index 0000000..44946b7
--- /dev/null
+++ b/src/trace_processor/sqlite/sqlite_tokenizer_unittest.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/sqlite/sqlite_tokenizer.h"
+#include <vector>
+
+#include "perfetto/base/logging.h"
+#include "test/gtest_and_gmock.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace {
+
+using Token = SqliteTokenizer::Token;
+using Type = SqliteTokenType;
+
+class SqliteTokenizerTest : public ::testing::Test {
+ protected:
+  std::vector<SqliteTokenizer::Token> Tokenize(const char* ptr) {
+    SqliteTokenizer tokenizer(ptr);
+    std::vector<SqliteTokenizer::Token> tokens;
+    for (auto t = tokenizer.Next(); !t.str.empty(); t = tokenizer.Next()) {
+      tokens.push_back(t);
+    }
+    return tokens;
+  }
+};
+
+TEST_F(SqliteTokenizerTest, EmptyString) {
+  ASSERT_THAT(Tokenize(""), testing::IsEmpty());
+}
+
+TEST_F(SqliteTokenizerTest, OnlySpace) {
+  ASSERT_THAT(Tokenize(" "), testing::ElementsAre(Token{" ", Type::TK_SPACE}));
+}
+
+TEST_F(SqliteTokenizerTest, SpaceColon) {
+  ASSERT_THAT(Tokenize(" ;"), testing::ElementsAre(Token{" ", Type::TK_SPACE},
+                                                   Token{";", Type::TK_SEMI}));
+}
+
+TEST_F(SqliteTokenizerTest, Select) {
+  ASSERT_THAT(
+      Tokenize("SELECT * FROM slice;"),
+      testing::ElementsAre(
+          Token{"SELECT", Type::TK_GENERIC_KEYWORD}, Token{" ", Type::TK_SPACE},
+          Token{"*", Type::TK_STAR}, Token{" ", Type::TK_SPACE},
+          Token{"FROM", Type::TK_GENERIC_KEYWORD}, Token{" ", Type::TK_SPACE},
+          Token{"slice", Type::TK_ID}, Token{";", Type::TK_SEMI}));
+}
+
+}  // namespace
+}  // namespace trace_processor
+}  // namespace perfetto