pw_tokenizer/rust/pw_tokenizer/lib.rs - platform/external/pigweed - Git at Google

 // Copyright 2023 The Pigweed Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy of
 // the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 // License for the specific language governing permissions and limitations under
 // the License.

 //! `pw_tokenizer` - Efficient string handling and printf style encoding.
 //!
 //! Logging is critical, but developers are often forced to choose between
 //! additional logging or saving crucial flash space. The `pw_tokenizer` crate
 //! helps address this by replacing printf-style strings with binary tokens
 //! during compilation. This enables extensive logging with substantially less
 //! memory usage.
 //!
 //! For a more in depth explanation of the systems design and motivations,
 //! see [Pigweed's pw_tokenizer module documentation](https://pigweed.dev/pw_tokenizer/).
 //!
 //! # Example
 //!
 //! ```
 //! use pw_tokenizer::tokenize_to_buffer;
 //!
 //! # fn doctest() -> pw_status::Result<()> {
 //! let mut buffer = [0u8; 1024];
 //! let len = tokenize_to_buffer!(&mut buffer, "The answer is %d", 42)?;
 //!
 //! // 4 bytes used to encode the token and one to encode the value 42.  This
 //! // is a **3.5x** reduction in size compared to the raw string!
 //! assert_eq!(len, 5);
 //! # Ok(())
 //! # }
 //! # doctest().unwrap();
 //! ```

 #![no_std]
 #![deny(missing_docs)]

 #[doc(hidden)]
 pub mod internal;

 #[doc(hidden)]
 // Creating a __private namespace allows us a way to get to the modules
 // we need from macros by doing:
 //     use $crate::__private as __pw_tokenizer_crate;
 //
 // This is how proc macro generated code can reliably reference back to
 // `pw_tokenizer` while still allowing a user to import it under a different
 // name.
 pub mod __private {
     pub use crate::*;
     pub use pw_stream::{Cursor, Seek, WriteInteger, WriteVarint};
     pub use pw_tokenizer_macro::{_token, _tokenize_to_buffer};
 }

 /// Return the [`u32`] token for the specified string and add it to the token
 /// database.
 ///
 /// This is where the magic happens in `pw_tokenizer`!   ... and by magic
 /// we mean hiding information in a special linker section that ends up in the
 /// final elf binary but does not get flashed to the device.
 ///
 /// Two things are accomplished here:
 /// 1) The string is hashed into its stable `u32` token.  This is the value that
 ///    is returned from the macro.
 /// 2) A [token database entry](https://pigweed.dev/pw_tokenizer/design.html#binary-database-format)
 ///   is generated, assigned to a unique static symbol, placed in a linker
 ///   section named `pw_tokenizer.entries.<TOKEN_HASH>`.  A
 ///   [linker script](https://pigweed.googlesource.com/pigweed/pigweed/+/refs/heads/main/pw_tokenizer/pw_tokenizer_linker_sections.ld)
 ///   is responsible for picking these symbols up and aggregating them into a
 ///   single `.pw_tokenizer.entries` section in the final binary.
 ///
 /// # Example
 /// ```
 /// use pw_tokenizer::token;
 ///
 /// let token = token!("hello, \"world\"");
 /// assert_eq!(token, 3537412730);
 /// ```
 ///
 /// Currently there is no support for encoding tokens to specific domains
 /// or with "fixed lengths" per [`pw_tokenizer_core::hash_bytes_fixed`].
 #[macro_export]
 macro_rules! token {
     ($string:literal) => {{
         $crate::__private::_token!($string)
     }};
 }

 /// Tokenize a format string and arguments to an [`AsMut<u8>`] buffer and add
 /// the format string's token to the token database.
 ///
 /// See [`token`] for an explanation on how strings are tokenized and entries
 /// are added to the token database.
 ///
 /// Returns a [`pw_status::Result<usize>`] the number of bytes written to the buffer.
 ///
 /// # Errors
 /// - [`pw_status::Error::OutOfRange`] - Buffer is not large enough to fit
 ///   tokenized data.
 /// - [`pw_status::Error::InvalidArgument`] - Invalid buffer was provided.
 ///
 /// # Example
 ///
 /// ```
 /// use pw_tokenizer::tokenize_to_buffer;
 ///
 /// # fn doctest() -> pw_status::Result<()> {
 /// let mut buffer = [0u8; 1024];
 /// let len = tokenize_to_buffer!(&mut buffer, "The answer is %d", 42)?;
 ///
 /// // 4 bytes used to encode the token and one to encode the value 42.
 /// assert_eq!(len, 5);
 /// # Ok(())
 /// # }
 /// # doctest().unwrap();
 /// ```
 #[macro_export]
 macro_rules! tokenize_to_buffer {
     ($buffer:expr, $format_string:literal) => {{
       use $crate::__private as __pw_tokenizer_crate;
       __pw_tokenizer_crate::_tokenize_to_buffer!($buffer, $format_string)
     }};

     ($buffer:expr, $format_string:literal, $($args:expr),*) => {{
       use $crate::__private as __pw_tokenizer_crate;
       __pw_tokenizer_crate::_tokenize_to_buffer!($buffer, $format_string, $($args),*)
     }};
 }

 #[cfg(test)]
 mod tests {
     use super::*;
     extern crate self as pw_tokenizer;

     // This is not meant to be an exhaustive test of tokenization which is
     // covered by `pw_tokenizer_core`'s unit tests.  Rather, this is testing
     // that the `tokenize!` macro connects to that correctly.
     #[test]
     fn test_token() {}

     macro_rules! tokenize_to_buffer_test {
       ($expected_data:expr, $buffer_len:expr, $fmt:expr) => {
         {
           let mut orig_buffer = [0u8; $buffer_len];
           let buffer =
               tokenize_to_buffer!(&mut orig_buffer, $fmt).unwrap();
             let len = buffer.len();
             assert_eq!(
               &orig_buffer[..(($buffer_len) - len)],
               $expected_data,
           );
         }
       };

       ($expected_data:expr, $buffer_len:expr, $fmt:expr, $($args:expr),*) => {
         {
           let mut buffer = [0u8; $buffer_len];
           let len = tokenize_to_buffer!(&mut buffer, $fmt, $($args),*).unwrap();
           assert_eq!(
               &buffer[..len],
               $expected_data,
           );
         }
       };
     }

     #[test]
     fn test_decimal_format() {
         tokenize_to_buffer_test!(
             &[0x52, 0x1c, 0xb0, 0x4c, 0x2], // expected buffer
             64,                             // buffer size
             "The answer is %d!",
             1
         );

         tokenize_to_buffer_test!(
             &[0x36, 0xd0, 0xfb, 0x69, 0x1], // expected buffer
             64,                             // buffer size
             "No! The answer is %d!",
             -1
         );

         tokenize_to_buffer_test!(
             &[0xa4, 0xad, 0x50, 0x54, 0x0], // expected buffer
             64,                             // buffer size
             "I think you'll find that the answer is %d!",
             0
         );
     }

     #[test]
     fn test_misc_integer_format() {
         // %d, %i, %o, %u, %x, %X all encode integers the same.
         tokenize_to_buffer_test!(
             &[0x57, 0x88, 0xc5, 0xd8, 0x2], // expected buffer
             64,                             // buffer size
             "The answer is %i!",
             1
         );

         tokenize_to_buffer_test!(
             &[0x5d, 0x70, 0x12, 0xb4, 0x2], // expected buffer
             64,                             // buffer size
             "The answer is %o!",
             1
         );

         tokenize_to_buffer_test!(
             &[0x63, 0x58, 0x5f, 0x8f, 0x2], // expected buffer
             64,                             // buffer size
             "The answer is %u!",
             1
         );

         tokenize_to_buffer_test!(
             &[0x66, 0xcc, 0x05, 0x7d, 0x2], // expected buffer
             64,                             // buffer size
             "The answer is %x!",
             1
         );

         tokenize_to_buffer_test!(
             &[0x46, 0x4c, 0x16, 0x96, 0x2], // expected buffer
             64,                             // buffer size
             "The answer is %X!",
             1
         );
     }

     #[test]
     fn test_string_format() {
         tokenize_to_buffer_test!(
             b"\x25\xf6\x2e\x66\x07Pigweed", // expected buffer
             64,                             // buffer size
             "Hello: %s!",
             "Pigweed"
         );
     }

     #[test]
     fn test_string_format_overflow() {
         tokenize_to_buffer_test!(
             b"\x25\xf6\x2e\x66\x83Pig", // expected buffer
             8,                          // buffer size
             "Hello: %s!",
             "Pigweed"
         );
     }

     #[test]
     fn test_char_format() {
         tokenize_to_buffer_test!(
             &[0x2e, 0x52, 0xac, 0xe4, 0x50], // expected buffer
             64,                              // buffer size
             "Hello: %cigweed",
             "P".as_bytes()[0]
         );
     }
 }
	// Copyright 2023 The Pigweed Authors
	//
	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
	// use this file except in compliance with the License. You may obtain a copy of
	// the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	// License for the specific language governing permissions and limitations under
	// the License.

	//! `pw_tokenizer` - Efficient string handling and printf style encoding.
	//!
	//! Logging is critical, but developers are often forced to choose between
	//! additional logging or saving crucial flash space. The `pw_tokenizer` crate
	//! helps address this by replacing printf-style strings with binary tokens
	//! during compilation. This enables extensive logging with substantially less
	//! memory usage.
	//!
	//! For a more in depth explanation of the systems design and motivations,
	//! see [Pigweed's pw_tokenizer module documentation](https://pigweed.dev/pw_tokenizer/).
	//!
	//! # Example
	//!
	//! ```
	//! use pw_tokenizer::tokenize_to_buffer;
	//!
	//! # fn doctest() -> pw_status::Result<()> {
	//! let mut buffer = [0u8; 1024];
	//! let len = tokenize_to_buffer!(&mut buffer, "The answer is %d", 42)?;
	//!
	//! // 4 bytes used to encode the token and one to encode the value 42. This
	//! // is a 3.5x reduction in size compared to the raw string!
	//! assert_eq!(len, 5);
	//! # Ok(())
	//! # }
	//! # doctest().unwrap();
	//! ```

	#![no_std]
	#![deny(missing_docs)]

	#[doc(hidden)]
	pub mod internal;

	#[doc(hidden)]
	// Creating a __private namespace allows us a way to get to the modules
	// we need from macros by doing:
	// use $crate::__private as __pw_tokenizer_crate;
	//
	// This is how proc macro generated code can reliably reference back to
	// `pw_tokenizer` while still allowing a user to import it under a different
	// name.
	pub mod __private {
	pub use crate::*;
	pub use pw_stream::{Cursor, Seek, WriteInteger, WriteVarint};
	pub use pw_tokenizer_macro::{_token, _tokenize_to_buffer};
	}

	/// Return the [`u32`] token for the specified string and add it to the token
	/// database.
	///
	/// This is where the magic happens in `pw_tokenizer`! ... and by magic
	/// we mean hiding information in a special linker section that ends up in the
	/// final elf binary but does not get flashed to the device.
	///
	/// Two things are accomplished here:
	/// 1) The string is hashed into its stable `u32` token. This is the value that
	/// is returned from the macro.
	/// 2) A [token database entry](https://pigweed.dev/pw_tokenizer/design.html#binary-database-format)
	/// is generated, assigned to a unique static symbol, placed in a linker
	/// section named `pw_tokenizer.entries.<TOKEN_HASH>`. A
	/// [linker script](https://pigweed.googlesource.com/pigweed/pigweed/+/refs/heads/main/pw_tokenizer/pw_tokenizer_linker_sections.ld)
	/// is responsible for picking these symbols up and aggregating them into a
	/// single `.pw_tokenizer.entries` section in the final binary.
	///
	/// # Example
	/// ```
	/// use pw_tokenizer::token;
	///
	/// let token = token!("hello, \"world\"");
	/// assert_eq!(token, 3537412730);
	/// ```
	///
	/// Currently there is no support for encoding tokens to specific domains
	/// or with "fixed lengths" per [`pw_tokenizer_core::hash_bytes_fixed`].
	#[macro_export]
	macro_rules! token {
	($string:literal) => {{
	$crate::__private::_token!($string)
	}};
	}

	/// Tokenize a format string and arguments to an [`AsMut<u8>`] buffer and add
	/// the format string's token to the token database.
	///
	/// See [`token`] for an explanation on how strings are tokenized and entries
	/// are added to the token database.
	///
	/// Returns a [`pw_status::Result<usize>`] the number of bytes written to the buffer.
	///
	/// # Errors
	/// - [`pw_status::Error::OutOfRange`] - Buffer is not large enough to fit
	/// tokenized data.
	/// - [`pw_status::Error::InvalidArgument`] - Invalid buffer was provided.
	///
	/// # Example
	///
	/// ```
	/// use pw_tokenizer::tokenize_to_buffer;
	///
	/// # fn doctest() -> pw_status::Result<()> {
	/// let mut buffer = [0u8; 1024];
	/// let len = tokenize_to_buffer!(&mut buffer, "The answer is %d", 42)?;
	///
	/// // 4 bytes used to encode the token and one to encode the value 42.
	/// assert_eq!(len, 5);
	/// # Ok(())
	/// # }
	/// # doctest().unwrap();
	/// ```
	#[macro_export]
	macro_rules! tokenize_to_buffer {
	($buffer:expr, $format_string:literal) => {{
	use $crate::__private as __pw_tokenizer_crate;
	__pw_tokenizer_crate::_tokenize_to_buffer!($buffer, $format_string)
	}};

	($buffer:expr, $format_string:literal, $($args:expr),*) => {{
	use $crate::__private as __pw_tokenizer_crate;
	__pw_tokenizer_crate::_tokenize_to_buffer!($buffer, $format_string, $($args),*)
	}};
	}

	#[cfg(test)]
	mod tests {
	use super::*;
	extern crate self as pw_tokenizer;

	// This is not meant to be an exhaustive test of tokenization which is
	// covered by `pw_tokenizer_core`'s unit tests. Rather, this is testing
	// that the `tokenize!` macro connects to that correctly.
	#[test]
	fn test_token() {}

	macro_rules! tokenize_to_buffer_test {
	($expected_data:expr, $buffer_len:expr, $fmt:expr) => {
	{
	let mut orig_buffer = [0u8; $buffer_len];
	let buffer =
	tokenize_to_buffer!(&mut orig_buffer, $fmt).unwrap();
	let len = buffer.len();
	assert_eq!(
	&orig_buffer[..(($buffer_len) - len)],
	$expected_data,
	);
	}
	};

	($expected_data:expr, $buffer_len:expr, $fmt:expr, $($args:expr),*) => {
	{
	let mut buffer = [0u8; $buffer_len];
	let len = tokenize_to_buffer!(&mut buffer, $fmt, $($args),*).unwrap();
	assert_eq!(
	&buffer[..len],
	$expected_data,
	);
	}
	};
	}

	#[test]
	fn test_decimal_format() {
	tokenize_to_buffer_test!(
	&[0x52, 0x1c, 0xb0, 0x4c, 0x2], // expected buffer
	64, // buffer size
	"The answer is %d!",
	1
	);

	tokenize_to_buffer_test!(
	&[0x36, 0xd0, 0xfb, 0x69, 0x1], // expected buffer
	64, // buffer size
	"No! The answer is %d!",
	-1
	);

	tokenize_to_buffer_test!(
	&[0xa4, 0xad, 0x50, 0x54, 0x0], // expected buffer
	64, // buffer size
	"I think you'll find that the answer is %d!",
	0
	);
	}

	#[test]
	fn test_misc_integer_format() {
	// %d, %i, %o, %u, %x, %X all encode integers the same.
	tokenize_to_buffer_test!(
	&[0x57, 0x88, 0xc5, 0xd8, 0x2], // expected buffer
	64, // buffer size
	"The answer is %i!",
	1
	);

	tokenize_to_buffer_test!(
	&[0x5d, 0x70, 0x12, 0xb4, 0x2], // expected buffer
	64, // buffer size
	"The answer is %o!",
	1
	);

	tokenize_to_buffer_test!(
	&[0x63, 0x58, 0x5f, 0x8f, 0x2], // expected buffer
	64, // buffer size
	"The answer is %u!",
	1
	);

	tokenize_to_buffer_test!(
	&[0x66, 0xcc, 0x05, 0x7d, 0x2], // expected buffer
	64, // buffer size
	"The answer is %x!",
	1
	);

	tokenize_to_buffer_test!(
	&[0x46, 0x4c, 0x16, 0x96, 0x2], // expected buffer
	64, // buffer size
	"The answer is %X!",
	1
	);
	}

	#[test]
	fn test_string_format() {
	tokenize_to_buffer_test!(
	b"\x25\xf6\x2e\x66\x07Pigweed", // expected buffer
	64, // buffer size
	"Hello: %s!",
	"Pigweed"
	);
	}

	#[test]
	fn test_string_format_overflow() {
	tokenize_to_buffer_test!(
	b"\x25\xf6\x2e\x66\x83Pig", // expected buffer
	8, // buffer size
	"Hello: %s!",
	"Pigweed"
	);
	}

	#[test]
	fn test_char_format() {
	tokenize_to_buffer_test!(
	&[0x2e, 0x52, 0xac, 0xe4, 0x50], // expected buffer
	64, // buffer size
	"Hello: %cigweed",
	"P".as_bytes()[0]
	);
	}
	}