| // Copyright 2023 The Pigweed Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| // use this file except in compliance with the License. You may obtain a copy of |
| // the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| // License for the specific language governing permissions and limitations under |
| // the License. |
| |
| //! `pw_tokenizer` - Efficient string handling and printf style encoding. |
| //! |
| //! Logging is critical, but developers are often forced to choose between |
| //! additional logging or saving crucial flash space. The `pw_tokenizer` crate |
| //! helps address this by replacing printf-style strings with binary tokens |
| //! during compilation. This enables extensive logging with substantially less |
| //! memory usage. |
| //! |
| //! For a more in depth explanation of the systems design and motivations, |
| //! see [Pigweed's pw_tokenizer module documentation](https://pigweed.dev/pw_tokenizer/). |
| //! |
| //! # Example |
| //! |
| //! ``` |
| //! use pw_tokenizer::tokenize_to_buffer; |
| //! |
| //! # fn doctest() -> pw_status::Result<()> { |
| //! let mut buffer = [0u8; 1024]; |
| //! let len = tokenize_to_buffer!(&mut buffer, "The answer is %d", 42)?; |
| //! |
| //! // 4 bytes used to encode the token and one to encode the value 42. This |
| //! // is a **3.5x** reduction in size compared to the raw string! |
| //! assert_eq!(len, 5); |
| //! # Ok(()) |
| //! # } |
| //! # doctest().unwrap(); |
| //! ``` |
| |
| #![no_std] |
| #![deny(missing_docs)] |
| |
| #[doc(hidden)] |
| pub mod internal; |
| |
| #[doc(hidden)] |
| // Creating a __private namespace allows us a way to get to the modules |
| // we need from macros by doing: |
| // use $crate::__private as __pw_tokenizer_crate; |
| // |
| // This is how proc macro generated code can reliably reference back to |
| // `pw_tokenizer` while still allowing a user to import it under a different |
| // name. |
| pub mod __private { |
| pub use crate::*; |
| pub use pw_stream::{Cursor, Seek, WriteInteger, WriteVarint}; |
| pub use pw_tokenizer_macro::{_token, _tokenize_to_buffer}; |
| } |
| |
| /// Return the [`u32`] token for the specified string and add it to the token |
| /// database. |
| /// |
| /// This is where the magic happens in `pw_tokenizer`! ... and by magic |
| /// we mean hiding information in a special linker section that ends up in the |
| /// final elf binary but does not get flashed to the device. |
| /// |
| /// Two things are accomplished here: |
| /// 1) The string is hashed into its stable `u32` token. This is the value that |
| /// is returned from the macro. |
| /// 2) A [token database entry](https://pigweed.dev/pw_tokenizer/design.html#binary-database-format) |
| /// is generated, assigned to a unique static symbol, placed in a linker |
| /// section named `pw_tokenizer.entries.<TOKEN_HASH>`. A |
| /// [linker script](https://pigweed.googlesource.com/pigweed/pigweed/+/refs/heads/main/pw_tokenizer/pw_tokenizer_linker_sections.ld) |
| /// is responsible for picking these symbols up and aggregating them into a |
| /// single `.pw_tokenizer.entries` section in the final binary. |
| /// |
| /// # Example |
| /// ``` |
| /// use pw_tokenizer::token; |
| /// |
| /// let token = token!("hello, \"world\""); |
| /// assert_eq!(token, 3537412730); |
| /// ``` |
| /// |
| /// Currently there is no support for encoding tokens to specific domains |
| /// or with "fixed lengths" per [`pw_tokenizer_core::hash_bytes_fixed`]. |
| #[macro_export] |
| macro_rules! token { |
| ($string:literal) => {{ |
| $crate::__private::_token!($string) |
| }}; |
| } |
| |
| /// Tokenize a format string and arguments to an [`AsMut<u8>`] buffer and add |
| /// the format string's token to the token database. |
| /// |
| /// See [`token`] for an explanation on how strings are tokenized and entries |
| /// are added to the token database. |
| /// |
| /// Returns a [`pw_status::Result<usize>`] the number of bytes written to the buffer. |
| /// |
| /// # Errors |
| /// - [`pw_status::Error::OutOfRange`] - Buffer is not large enough to fit |
| /// tokenized data. |
| /// - [`pw_status::Error::InvalidArgument`] - Invalid buffer was provided. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use pw_tokenizer::tokenize_to_buffer; |
| /// |
| /// # fn doctest() -> pw_status::Result<()> { |
| /// let mut buffer = [0u8; 1024]; |
| /// let len = tokenize_to_buffer!(&mut buffer, "The answer is %d", 42)?; |
| /// |
| /// // 4 bytes used to encode the token and one to encode the value 42. |
| /// assert_eq!(len, 5); |
| /// # Ok(()) |
| /// # } |
| /// # doctest().unwrap(); |
| /// ``` |
| #[macro_export] |
| macro_rules! tokenize_to_buffer { |
| ($buffer:expr, $format_string:literal) => {{ |
| use $crate::__private as __pw_tokenizer_crate; |
| __pw_tokenizer_crate::_tokenize_to_buffer!($buffer, $format_string) |
| }}; |
| |
| ($buffer:expr, $format_string:literal, $($args:expr),*) => {{ |
| use $crate::__private as __pw_tokenizer_crate; |
| __pw_tokenizer_crate::_tokenize_to_buffer!($buffer, $format_string, $($args),*) |
| }}; |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| extern crate self as pw_tokenizer; |
| |
| // This is not meant to be an exhaustive test of tokenization which is |
| // covered by `pw_tokenizer_core`'s unit tests. Rather, this is testing |
| // that the `tokenize!` macro connects to that correctly. |
| #[test] |
| fn test_token() {} |
| |
| macro_rules! tokenize_to_buffer_test { |
| ($expected_data:expr, $buffer_len:expr, $fmt:expr) => { |
| { |
| let mut orig_buffer = [0u8; $buffer_len]; |
| let buffer = |
| tokenize_to_buffer!(&mut orig_buffer, $fmt).unwrap(); |
| let len = buffer.len(); |
| assert_eq!( |
| &orig_buffer[..(($buffer_len) - len)], |
| $expected_data, |
| ); |
| } |
| }; |
| |
| ($expected_data:expr, $buffer_len:expr, $fmt:expr, $($args:expr),*) => { |
| { |
| let mut buffer = [0u8; $buffer_len]; |
| let len = tokenize_to_buffer!(&mut buffer, $fmt, $($args),*).unwrap(); |
| assert_eq!( |
| &buffer[..len], |
| $expected_data, |
| ); |
| } |
| }; |
| } |
| |
| #[test] |
| fn test_decimal_format() { |
| tokenize_to_buffer_test!( |
| &[0x52, 0x1c, 0xb0, 0x4c, 0x2], // expected buffer |
| 64, // buffer size |
| "The answer is %d!", |
| 1 |
| ); |
| |
| tokenize_to_buffer_test!( |
| &[0x36, 0xd0, 0xfb, 0x69, 0x1], // expected buffer |
| 64, // buffer size |
| "No! The answer is %d!", |
| -1 |
| ); |
| |
| tokenize_to_buffer_test!( |
| &[0xa4, 0xad, 0x50, 0x54, 0x0], // expected buffer |
| 64, // buffer size |
| "I think you'll find that the answer is %d!", |
| 0 |
| ); |
| } |
| |
| #[test] |
| fn test_misc_integer_format() { |
| // %d, %i, %o, %u, %x, %X all encode integers the same. |
| tokenize_to_buffer_test!( |
| &[0x57, 0x88, 0xc5, 0xd8, 0x2], // expected buffer |
| 64, // buffer size |
| "The answer is %i!", |
| 1 |
| ); |
| |
| tokenize_to_buffer_test!( |
| &[0x5d, 0x70, 0x12, 0xb4, 0x2], // expected buffer |
| 64, // buffer size |
| "The answer is %o!", |
| 1 |
| ); |
| |
| tokenize_to_buffer_test!( |
| &[0x63, 0x58, 0x5f, 0x8f, 0x2], // expected buffer |
| 64, // buffer size |
| "The answer is %u!", |
| 1 |
| ); |
| |
| tokenize_to_buffer_test!( |
| &[0x66, 0xcc, 0x05, 0x7d, 0x2], // expected buffer |
| 64, // buffer size |
| "The answer is %x!", |
| 1 |
| ); |
| |
| tokenize_to_buffer_test!( |
| &[0x46, 0x4c, 0x16, 0x96, 0x2], // expected buffer |
| 64, // buffer size |
| "The answer is %X!", |
| 1 |
| ); |
| } |
| |
| #[test] |
| fn test_string_format() { |
| tokenize_to_buffer_test!( |
| b"\x25\xf6\x2e\x66\x07Pigweed", // expected buffer |
| 64, // buffer size |
| "Hello: %s!", |
| "Pigweed" |
| ); |
| } |
| |
| #[test] |
| fn test_string_format_overflow() { |
| tokenize_to_buffer_test!( |
| b"\x25\xf6\x2e\x66\x83Pig", // expected buffer |
| 8, // buffer size |
| "Hello: %s!", |
| "Pigweed" |
| ); |
| } |
| |
| #[test] |
| fn test_char_format() { |
| tokenize_to_buffer_test!( |
| &[0x2e, 0x52, 0xac, 0xe4, 0x50], // expected buffer |
| 64, // buffer size |
| "Hello: %cigweed", |
| "P".as_bytes()[0] |
| ); |
| } |
| } |