pw_tokenizer: Use datetime.isoformat

The isoformat functions are quite a bit faster than their format string
equivalents, and the `parse_csv` function is quite performance-sensitive
as this loop is run once for every token in a token database. This
change noticeably improved the performance of one downstream consumer.

N.B.: Token databases which represented dates using months and days
that were not padded two two digits will no longer be successfully
loaded. It doesn't seem like any such databases should have been
generated before using the old format string approach, but the previous
format-string based loading seems to have supported reading these types
of entries.

Bug: b/269511233
Change-Id: I01e6ef7a944d38190cddad90cf49d841efedbd68
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/129451
Pigweed-Auto-Submit: Taylor Cramer <cramertj@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
Reviewed-by: Wyatt Hepler <hepler@google.com>
diff --git a/pw_tokenizer/py/pw_tokenizer/database.py b/pw_tokenizer/py/pw_tokenizer/database.py
index 9d69f93..ed100ed 100755
--- a/pw_tokenizer/py/pw_tokenizer/database.py
+++ b/pw_tokenizer/py/pw_tokenizer/database.py
@@ -507,7 +507,7 @@
         if value == 'today':
             return datetime.now()
 
-        return datetime.strptime(value, tokens.DATE_FORMAT)
+        return datetime.fromisoformat(value)
 
     year_month_day.__name__ = 'year-month-day (YYYY-MM-DD)'
 
diff --git a/pw_tokenizer/py/pw_tokenizer/tokens.py b/pw_tokenizer/py/pw_tokenizer/tokens.py
index b66b223..55a3e65 100644
--- a/pw_tokenizer/py/pw_tokenizer/tokens.py
+++ b/pw_tokenizer/py/pw_tokenizer/tokens.py
@@ -41,7 +41,6 @@
 )
 from uuid import uuid4
 
-DATE_FORMAT = '%Y-%m-%d'
 DEFAULT_DOMAIN = ''
 
 # The default hash length to use for C-style hashes. This value only applies
@@ -355,24 +354,26 @@
 
 def parse_csv(fd: TextIO) -> Iterable[TokenizedStringEntry]:
     """Parses TokenizedStringEntries from a CSV token database file."""
+    entries = []
     for line in csv.reader(fd):
         try:
             token_str, date_str, string_literal = line
 
             token = int(token_str, 16)
             date = (
-                datetime.strptime(date_str, DATE_FORMAT)
-                if date_str.strip()
-                else None
+                datetime.fromisoformat(date_str) if date_str.strip() else None
             )
 
-            yield TokenizedStringEntry(
-                token, string_literal, DEFAULT_DOMAIN, date
+            entries.append(
+                TokenizedStringEntry(
+                    token, string_literal, DEFAULT_DOMAIN, date
+                )
             )
         except (ValueError, UnicodeDecodeError) as err:
             _LOG.error(
                 'Failed to parse tokenized string entry %s: %s', line, err
             )
+    return entries
 
 
 def write_csv(database: Database, fd: BinaryIO) -> None:
@@ -388,9 +389,7 @@
     fd.write(
         '{:08x},{:10},"{}"\n'.format(
             entry.token,
-            entry.date_removed.strftime(DATE_FORMAT)
-            if entry.date_removed
-            else '',
+            entry.date_removed.date().isoformat() if entry.date_removed else '',
             entry.string.replace('"', '""'),
         ).encode()
     )  # escape " as ""
diff --git a/pw_tokenizer/py/tokens_test.py b/pw_tokenizer/py/tokens_test.py
index 666a0fa..ee3431b 100755
--- a/pw_tokenizer/py/tokens_test.py
+++ b/pw_tokenizer/py/tokens_test.py
@@ -87,7 +87,7 @@
 2,this is totally invalid,"Whoa there!"
 3,,"This one's OK"
 ,,"Also broken"
-5,1845-2-2,"I'm %s fine"
+5,1845-02-02,"I'm %s fine"
 6,"Missing fields"
 """
 
@@ -178,7 +178,7 @@
         db = read_db_from_csv('')
         self.assertEqual(str(db), '')
 
-        db = read_db_from_csv('abc123,2048-4-1,Fake string\n')
+        db = read_db_from_csv('abc123,2048-04-01,Fake string\n')
         self.assertEqual(str(db), '00abc123,2048-04-01,"Fake string"\n')
 
         db = read_db_from_csv(