pw_tokenizer: Encoding command line interface
Simple command line interface for encoding tokenized strings with
arguments. Helpful for debugging and experimentation.
Change-Id: I905265faf19bee457a93033e344a25f4524fabf9
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/92400
Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com>
Reviewed-by: Anthony DiGirolamo <tonymd@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index 1471ce7..8efcc75 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst
@@ -458,6 +458,28 @@
arguments short or avoid encoding them as strings (e.g. encode an enum as an
integer instead of a string). See also `Tokenized strings as %s arguments`_.
+Encoding command line utility
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The ``pw_tokenizer.encode`` command line tool can be used to encode tokenized
+strings.
+
+.. code-block:: bash
+
+ python -m pw_tokenizer.encode [-h] FORMAT_STRING [ARG ...]
+
+Example:
+
+.. code-block:: text
+
+ $ python -m pw_tokenizer.encode "There's... %d many of %s!" 2 them
+ Raw input: "There's... %d many of %s!" % (2, 'them')
+ Formatted input: There's... 2 many of them!
+ Token: 0xb6ef8b2d
+ Encoded: b'-\x8b\xef\xb6\x04\x04them' (2d 8b ef b6 04 04 74 68 65 6d) [10 bytes]
+ Prefixed Base64: $LYvvtgQEdGhlbQ==
+
+See ``--help`` for full usage details.
+
Token generation: fixed length hashing at compile time
------------------------------------------------------
String tokens are generated using a modified version of the x65599 hash used by
diff --git a/pw_tokenizer/py/pw_tokenizer/encode.py b/pw_tokenizer/py/pw_tokenizer/encode.py
index 97c62bf..4cf9f8c 100644
--- a/pw_tokenizer/py/pw_tokenizer/encode.py
+++ b/pw_tokenizer/py/pw_tokenizer/encode.py
@@ -13,9 +13,13 @@
# the License.
"""Provides functionality for encoding tokenized messages."""
+import argparse
import base64
import struct
-from typing import Union
+import sys
+from typing import Sequence, Union
+
+from pw_tokenizer import tokens
_INT32_MAX = 2**31 - 1
_UINT32_MAX = 2**32 - 1
@@ -93,3 +97,54 @@
def prefixed_base64(data: bytes, prefix: str = '$') -> str:
"""Encodes a tokenized message as prefixed Base64."""
return prefix + base64.b64encode(data).decode()
+
+
+def _parse_user_input(string: str):
+ """Evaluates a string as Python code or returns it as a literal string."""
+ try:
+ value = eval(string, dict(__builtins__={})) # pylint: disable=eval-used
+ except (NameError, SyntaxError):
+ return string
+
+ return value if isinstance(value, (int, float)) else string
+
+
+def _main(format_string_list: Sequence[str], raw_args: Sequence[str]) -> int:
+ format_string, = format_string_list
+ token = tokens.pw_tokenizer_65599_hash(format_string)
+ args = tuple(_parse_user_input(a) for a in raw_args)
+
+ data = encode_token_and_args(token, *args)
+ token = int.from_bytes(data[:4], 'little')
+ binary = ' '.join(f'{b:02x}' for b in data)
+
+ print(f' Raw input: {format_string!r} % {args!r}')
+ print(f'Formatted input: {format_string % args}')
+ print(f' Token: 0x{token:08x}')
+ print(f' Encoded: {data!r} ({binary}) [{len(data)} bytes]')
+ print(f'Prefixed Base64: {prefixed_base64(data)}')
+
+ return 0
+
+
+def _parse_args() -> dict:
+ parser = argparse.ArgumentParser(
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument('format_string_list',
+ metavar='FORMAT_STRING',
+ nargs=1,
+ help='Format string with optional %%-style arguments.')
+ parser.add_argument(
+ 'raw_args',
+ metavar='ARG',
+ nargs='*',
+ help=('Arguments for the format string, if any. Arguments are parsed '
+ 'as Python expressions, with no builtins (e.g. 9 is the number '
+ '9 and \'"9"\' is the string "9"). Arguments that are not valid '
+ 'Python are treated as string literals.'))
+ return vars(parser.parse_args())
+
+
+if __name__ == '__main__':
+ sys.exit(_main(**_parse_args()))