pw_tokenizer: Encoding command line interface Simple command line interface for encoding tokenized strings with arguments. Helpful for debugging and experimentation. Change-Id: I905265faf19bee457a93033e344a25f4524fabf9 Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/92400 Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com> Reviewed-by: Anthony DiGirolamo <tonymd@google.com> Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>

commit: 1a64209207a9564ca0e441c979750d96c1fe4b8e [log] [tgz]
author: Wyatt Hepler <hepler@google.com> Thu Jul 07 23:07:56 2022 +0000
committer: CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com> Thu Jul 07 23:07:56 2022 +0000
tree: 417d7cf7aebcf6e8161da23d4ebf256e09a2918a
parent: d0234b955e366500a61f063e346d1e99675e077a [diff]
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index 1471ce7..8efcc75 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst

@@ -458,6 +458,28 @@
    arguments short or avoid encoding them as strings (e.g. encode an enum as an
    integer instead of a string). See also `Tokenized strings as %s arguments`_.
 
+Encoding command line utility
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The ``pw_tokenizer.encode`` command line tool can be used to encode tokenized
+strings.
+
+.. code-block:: bash
+
+  python -m pw_tokenizer.encode [-h] FORMAT_STRING [ARG ...]
+
+Example:
+
+.. code-block:: text
+
+  $ python -m pw_tokenizer.encode "There's... %d many of %s!" 2 them
+        Raw input: "There's... %d many of %s!" % (2, 'them')
+  Formatted input: There's... 2 many of them!
+            Token: 0xb6ef8b2d
+          Encoded: b'-\x8b\xef\xb6\x04\x04them' (2d 8b ef b6 04 04 74 68 65 6d) [10 bytes]
+  Prefixed Base64: $LYvvtgQEdGhlbQ==
+
+See ``--help`` for full usage details.
+
 Token generation: fixed length hashing at compile time
 ------------------------------------------------------
 String tokens are generated using a modified version of the x65599 hash used by

diff --git a/pw_tokenizer/py/pw_tokenizer/encode.py b/pw_tokenizer/py/pw_tokenizer/encode.py
index 97c62bf..4cf9f8c 100644
--- a/pw_tokenizer/py/pw_tokenizer/encode.py
+++ b/pw_tokenizer/py/pw_tokenizer/encode.py

@@ -13,9 +13,13 @@
 # the License.
 """Provides functionality for encoding tokenized messages."""
 
+import argparse
 import base64
 import struct
-from typing import Union
+import sys
+from typing import Sequence, Union
+
+from pw_tokenizer import tokens
 
 _INT32_MAX = 2**31 - 1
 _UINT32_MAX = 2**32 - 1
@@ -93,3 +97,54 @@
 def prefixed_base64(data: bytes, prefix: str = '$') -> str:
     """Encodes a tokenized message as prefixed Base64."""
     return prefix + base64.b64encode(data).decode()
+
+
+def _parse_user_input(string: str):
+    """Evaluates a string as Python code or returns it as a literal string."""
+    try:
+        value = eval(string, dict(__builtins__={}))  # pylint: disable=eval-used
+    except (NameError, SyntaxError):
+        return string
+
+    return value if isinstance(value, (int, float)) else string
+
+
+def _main(format_string_list: Sequence[str], raw_args: Sequence[str]) -> int:
+    format_string, = format_string_list
+    token = tokens.pw_tokenizer_65599_hash(format_string)
+    args = tuple(_parse_user_input(a) for a in raw_args)
+
+    data = encode_token_and_args(token, *args)
+    token = int.from_bytes(data[:4], 'little')
+    binary = ' '.join(f'{b:02x}' for b in data)
+
+    print(f'      Raw input: {format_string!r} % {args!r}')
+    print(f'Formatted input: {format_string % args}')
+    print(f'          Token: 0x{token:08x}')
+    print(f'        Encoded: {data!r} ({binary}) [{len(data)} bytes]')
+    print(f'Prefixed Base64: {prefixed_base64(data)}')
+
+    return 0
+
+
+def _parse_args() -> dict:
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('format_string_list',
+                        metavar='FORMAT_STRING',
+                        nargs=1,
+                        help='Format string with optional %%-style arguments.')
+    parser.add_argument(
+        'raw_args',
+        metavar='ARG',
+        nargs='*',
+        help=('Arguments for the format string, if any. Arguments are parsed '
+              'as Python expressions, with no builtins (e.g. 9 is the number '
+              '9 and \'"9"\' is the string "9"). Arguments that are not valid '
+              'Python are treated as string literals.'))
+    return vars(parser.parse_args())
+
+
+if __name__ == '__main__':
+    sys.exit(_main(**_parse_args()))
commit	1a64209207a9564ca0e441c979750d96c1fe4b8e	[log] [tgz]
author	Wyatt Hepler <hepler@google.com>	Thu Jul 07 23:07:56 2022 +0000
committer	CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com>	Thu Jul 07 23:07:56 2022 +0000
tree	417d7cf7aebcf6e8161da23d4ebf256e09a2918a
parent	d0234b955e366500a61f063e346d1e99675e077a [diff]