pw_tokenizer: Encoding command line interface

Simple command line interface for encoding tokenized strings with
arguments. Helpful for debugging and experimentation.

Change-Id: I905265faf19bee457a93033e344a25f4524fabf9
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/92400
Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com>
Reviewed-by: Anthony DiGirolamo <tonymd@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index 1471ce7..8efcc75 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst
@@ -458,6 +458,28 @@
    arguments short or avoid encoding them as strings (e.g. encode an enum as an
    integer instead of a string). See also `Tokenized strings as %s arguments`_.
 
+Encoding command line utility
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The ``pw_tokenizer.encode`` command line tool can be used to encode tokenized
+strings.
+
+.. code-block:: bash
+
+  python -m pw_tokenizer.encode [-h] FORMAT_STRING [ARG ...]
+
+Example:
+
+.. code-block:: text
+
+  $ python -m pw_tokenizer.encode "There's... %d many of %s!" 2 them
+        Raw input: "There's... %d many of %s!" % (2, 'them')
+  Formatted input: There's... 2 many of them!
+            Token: 0xb6ef8b2d
+          Encoded: b'-\x8b\xef\xb6\x04\x04them' (2d 8b ef b6 04 04 74 68 65 6d) [10 bytes]
+  Prefixed Base64: $LYvvtgQEdGhlbQ==
+
+See ``--help`` for full usage details.
+
 Token generation: fixed length hashing at compile time
 ------------------------------------------------------
 String tokens are generated using a modified version of the x65599 hash used by
diff --git a/pw_tokenizer/py/pw_tokenizer/encode.py b/pw_tokenizer/py/pw_tokenizer/encode.py
index 97c62bf..4cf9f8c 100644
--- a/pw_tokenizer/py/pw_tokenizer/encode.py
+++ b/pw_tokenizer/py/pw_tokenizer/encode.py
@@ -13,9 +13,13 @@
 # the License.
 """Provides functionality for encoding tokenized messages."""
 
+import argparse
 import base64
 import struct
-from typing import Union
+import sys
+from typing import Sequence, Union
+
+from pw_tokenizer import tokens
 
 _INT32_MAX = 2**31 - 1
 _UINT32_MAX = 2**32 - 1
@@ -93,3 +97,54 @@
 def prefixed_base64(data: bytes, prefix: str = '$') -> str:
     """Encodes a tokenized message as prefixed Base64."""
     return prefix + base64.b64encode(data).decode()
+
+
+def _parse_user_input(string: str):
+    """Evaluates a string as Python code or returns it as a literal string."""
+    try:
+        value = eval(string, dict(__builtins__={}))  # pylint: disable=eval-used
+    except (NameError, SyntaxError):
+        return string
+
+    return value if isinstance(value, (int, float)) else string
+
+
+def _main(format_string_list: Sequence[str], raw_args: Sequence[str]) -> int:
+    format_string, = format_string_list
+    token = tokens.pw_tokenizer_65599_hash(format_string)
+    args = tuple(_parse_user_input(a) for a in raw_args)
+
+    data = encode_token_and_args(token, *args)
+    token = int.from_bytes(data[:4], 'little')
+    binary = ' '.join(f'{b:02x}' for b in data)
+
+    print(f'      Raw input: {format_string!r} % {args!r}')
+    print(f'Formatted input: {format_string % args}')
+    print(f'          Token: 0x{token:08x}')
+    print(f'        Encoded: {data!r} ({binary}) [{len(data)} bytes]')
+    print(f'Prefixed Base64: {prefixed_base64(data)}')
+
+    return 0
+
+
+def _parse_args() -> dict:
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('format_string_list',
+                        metavar='FORMAT_STRING',
+                        nargs=1,
+                        help='Format string with optional %%-style arguments.')
+    parser.add_argument(
+        'raw_args',
+        metavar='ARG',
+        nargs='*',
+        help=('Arguments for the format string, if any. Arguments are parsed '
+              'as Python expressions, with no builtins (e.g. 9 is the number '
+              '9 and \'"9"\' is the string "9"). Arguments that are not valid '
+              'Python are treated as string literals.'))
+    return vars(parser.parse_args())
+
+
+if __name__ == '__main__':
+    sys.exit(_main(**_parse_args()))