crates/base64/src/alphabet.rs - platform/external/rust/android-crates-io - Git at Google

 //! Provides [Alphabet] and constants for alphabets commonly used in the wild.

 use crate::PAD_BYTE;
 use core::{convert, fmt};
 #[cfg(any(feature = "std", test))]
 use std::error;

 const ALPHABET_SIZE: usize = 64;

 /// An alphabet defines the 64 ASCII characters (symbols) used for base64.
 ///
 /// Common alphabets are provided as constants, and custom alphabets
 /// can be made via `from_str` or the `TryFrom<str>` implementation.
 ///
 /// # Examples
 ///
 /// Building and using a custom Alphabet:
 ///
 /// ```
 /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
 ///
 /// let engine = base64::engine::GeneralPurpose::new(
 ///     &custom,
 ///     base64::engine::general_purpose::PAD);
 /// ```
 ///
 /// Building a const:
 ///
 /// ```
 /// use base64::alphabet::Alphabet;
 ///
 /// static CUSTOM: Alphabet = {
 ///     // Result::unwrap() isn't const yet, but panic!() is OK
 ///     match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
 ///         Ok(x) => x,
 ///         Err(_) => panic!("creation of alphabet failed"),
 ///     }
 /// };
 /// ```
 ///
 /// Building lazily:
 ///
 /// ```
 /// use base64::{
 ///     alphabet::Alphabet,
 ///     engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
 /// };
 /// use once_cell::sync::Lazy;
 ///
 /// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
 ///     Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
 /// );
 /// ```
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Alphabet {
     pub(crate) symbols: [u8; ALPHABET_SIZE],
 }

 impl Alphabet {
     /// Performs no checks so that it can be const.
     /// Used only for known-valid strings.
     const fn from_str_unchecked(alphabet: &str) -> Self {
         let mut symbols = [0_u8; ALPHABET_SIZE];
         let source_bytes = alphabet.as_bytes();

         // a way to copy that's allowed in const fn
         let mut index = 0;
         while index < ALPHABET_SIZE {
             symbols[index] = source_bytes[index];
             index += 1;
         }

         Self { symbols }
     }

     /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
     ///
     /// The `=` byte is not allowed as it is used for padding.
     pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
         let bytes = alphabet.as_bytes();
         if bytes.len() != ALPHABET_SIZE {
             return Err(ParseAlphabetError::InvalidLength);
         }

         {
             let mut index = 0;
             while index < ALPHABET_SIZE {
                 let byte = bytes[index];

                 // must be ascii printable. 127 (DEL) is commonly considered printable
                 // for some reason but clearly unsuitable for base64.
                 if !(byte >= 32_u8 && byte <= 126_u8) {
                     return Err(ParseAlphabetError::UnprintableByte(byte));
                 }
                 // = is assumed to be padding, so cannot be used as a symbol
                 if byte == PAD_BYTE {
                     return Err(ParseAlphabetError::ReservedByte(byte));
                 }

                 // Check for duplicates while staying within what const allows.
                 // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
                 // microsecond range.

                 let mut probe_index = 0;
                 while probe_index < ALPHABET_SIZE {
                     if probe_index == index {
                         probe_index += 1;
                         continue;
                     }

                     let probe_byte = bytes[probe_index];

                     if byte == probe_byte {
                         return Err(ParseAlphabetError::DuplicatedByte(byte));
                     }

                     probe_index += 1;
                 }

                 index += 1;
             }
         }

         Ok(Self::from_str_unchecked(alphabet))
     }

     /// Create a `&str` from the symbols in the `Alphabet`
     pub fn as_str(&self) -> &str {
         core::str::from_utf8(&self.symbols).unwrap()
     }
 }

 impl convert::TryFrom<&str> for Alphabet {
     type Error = ParseAlphabetError;

     fn try_from(value: &str) -> Result<Self, Self::Error> {
         Self::new(value)
     }
 }

 /// Possible errors when constructing an [Alphabet] from a `str`.
 #[derive(Debug, Eq, PartialEq)]
 pub enum ParseAlphabetError {
     /// Alphabets must be 64 ASCII bytes
     InvalidLength,
     /// All bytes must be unique
     DuplicatedByte(u8),
     /// All bytes must be printable (in the range `[32, 126]`).
     UnprintableByte(u8),
     /// `=` cannot be used
     ReservedByte(u8),
 }

 impl fmt::Display for ParseAlphabetError {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
             Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
             Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
             Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
             Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
         }
     }
 }

 #[cfg(any(feature = "std", test))]
 impl error::Error for ParseAlphabetError {}

 /// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
 ///
 /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
 );

 /// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
 ///
 /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
 );

 /// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
 ///
 /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
 pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
     "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
 );

 /// The bcrypt alphabet.
 pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
     "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
 );

 /// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
 ///
 /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
 pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
 );

 /// The alphabet used in BinHex 4.0 files.
 ///
 /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
 pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
     "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr",
 );

 #[cfg(test)]
 mod tests {
     use crate::alphabet::*;
     use core::convert::TryFrom as _;

     #[test]
     fn detects_duplicate_start() {
         assert_eq!(
             ParseAlphabetError::DuplicatedByte(b'A'),
             Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
                 .unwrap_err()
         );
     }

     #[test]
     fn detects_duplicate_end() {
         assert_eq!(
             ParseAlphabetError::DuplicatedByte(b'/'),
             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
                 .unwrap_err()
         );
     }

     #[test]
     fn detects_duplicate_middle() {
         assert_eq!(
             ParseAlphabetError::DuplicatedByte(b'Z'),
             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
                 .unwrap_err()
         );
     }

     #[test]
     fn detects_length() {
         assert_eq!(
             ParseAlphabetError::InvalidLength,
             Alphabet::new(
                 "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
             )
             .unwrap_err()
         );
     }

     #[test]
     fn detects_padding() {
         assert_eq!(
             ParseAlphabetError::ReservedByte(b'='),
             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
                 .unwrap_err()
         );
     }

     #[test]
     fn detects_unprintable() {
         // form feed
         assert_eq!(
             ParseAlphabetError::UnprintableByte(0xc),
             Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
                 .unwrap_err()
         );
     }

     #[test]
     fn same_as_unchecked() {
         assert_eq!(
             STANDARD,
             Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
                 .unwrap()
         );
     }

     #[test]
     fn str_same_as_input() {
         let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
         let a = Alphabet::try_from(alphabet).unwrap();
         assert_eq!(alphabet, a.as_str())
     }
 }
	//! Provides [Alphabet] and constants for alphabets commonly used in the wild.

	use crate::PAD_BYTE;
	use core::{convert, fmt};
	#[cfg(any(feature = "std", test))]
	use std::error;

	const ALPHABET_SIZE: usize = 64;

	/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
	///
	/// Common alphabets are provided as constants, and custom alphabets
	/// can be made via `from_str` or the `TryFrom<str>` implementation.
	///
	/// # Examples
	///
	/// Building and using a custom Alphabet:
	///
	/// ```
	/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
	///
	/// let engine = base64::engine::GeneralPurpose::new(
	/// &custom,
	/// base64::engine::general_purpose::PAD);
	/// ```
	///
	/// Building a const:
	///
	/// ```
	/// use base64::alphabet::Alphabet;
	///
	/// static CUSTOM: Alphabet = {
	/// // Result::unwrap() isn't const yet, but panic!() is OK
	/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
	/// Ok(x) => x,
	/// Err(_) => panic!("creation of alphabet failed"),
	/// }
	/// };
	/// ```
	///
	/// Building lazily:
	///
	/// ```
	/// use base64::{
	/// alphabet::Alphabet,
	/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
	/// };
	/// use once_cell::sync::Lazy;
	///
	/// static CUSTOM: Lazy<Alphabet> = Lazy::new(\|\|
	/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
	/// );
	/// ```
	#[derive(Clone, Debug, Eq, PartialEq)]
	pub struct Alphabet {
	pub(crate) symbols: [u8; ALPHABET_SIZE],
	}

	impl Alphabet {
	/// Performs no checks so that it can be const.
	/// Used only for known-valid strings.
	const fn from_str_unchecked(alphabet: &str) -> Self {
	let mut symbols = [0_u8; ALPHABET_SIZE];
	let source_bytes = alphabet.as_bytes();

	// a way to copy that's allowed in const fn
	let mut index = 0;
	while index < ALPHABET_SIZE {
	symbols[index] = source_bytes[index];
	index += 1;
	}

	Self { symbols }
	}

	/// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
	///
	/// The `=` byte is not allowed as it is used for padding.
	pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
	let bytes = alphabet.as_bytes();
	if bytes.len() != ALPHABET_SIZE {
	return Err(ParseAlphabetError::InvalidLength);
	}

	{
	let mut index = 0;
	while index < ALPHABET_SIZE {
	let byte = bytes[index];

	// must be ascii printable. 127 (DEL) is commonly considered printable
	// for some reason but clearly unsuitable for base64.
	if !(byte >= 32_u8 && byte <= 126_u8) {
	return Err(ParseAlphabetError::UnprintableByte(byte));
	}
	// = is assumed to be padding, so cannot be used as a symbol
	if byte == PAD_BYTE {
	return Err(ParseAlphabetError::ReservedByte(byte));
	}

	// Check for duplicates while staying within what const allows.
	// It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
	// microsecond range.

	let mut probe_index = 0;
	while probe_index < ALPHABET_SIZE {
	if probe_index == index {
	probe_index += 1;
	continue;
	}

	let probe_byte = bytes[probe_index];

	if byte == probe_byte {
	return Err(ParseAlphabetError::DuplicatedByte(byte));
	}

	probe_index += 1;
	}

	index += 1;
	}
	}

	Ok(Self::from_str_unchecked(alphabet))
	}

	/// Create a `&str` from the symbols in the `Alphabet`
	pub fn as_str(&self) -> &str {
	core::str::from_utf8(&self.symbols).unwrap()
	}
	}

	impl convert::TryFrom<&str> for Alphabet {
	type Error = ParseAlphabetError;

	fn try_from(value: &str) -> Result<Self, Self::Error> {
	Self::new(value)
	}
	}

	/// Possible errors when constructing an [Alphabet] from a `str`.
	#[derive(Debug, Eq, PartialEq)]
	pub enum ParseAlphabetError {
	/// Alphabets must be 64 ASCII bytes
	InvalidLength,
	/// All bytes must be unique
	DuplicatedByte(u8),
	/// All bytes must be printable (in the range `[32, 126]`).
	UnprintableByte(u8),
	/// `=` cannot be used
	ReservedByte(u8),
	}

	impl fmt::Display for ParseAlphabetError {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
	match self {
	Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
	Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
	Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
	Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
	}
	}
	}

	#[cfg(any(feature = "std", test))]
	impl error::Error for ParseAlphabetError {}

	/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
	///
	/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
	pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
	);

	/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
	///
	/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
	pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
	);

	/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
	///
	/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
	pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
	"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
	);

	/// The bcrypt alphabet.
	pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
	"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
	);

	/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
	///
	/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
	pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
	);

	/// The alphabet used in BinHex 4.0 files.
	///
	/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
	pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
	"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr",
	);

	#[cfg(test)]
	mod tests {
	use crate::alphabet::*;
	use core::convert::TryFrom as _;

	#[test]
	fn detects_duplicate_start() {
	assert_eq!(
	ParseAlphabetError::DuplicatedByte(b'A'),
	Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
	.unwrap_err()
	);
	}

	#[test]
	fn detects_duplicate_end() {
	assert_eq!(
	ParseAlphabetError::DuplicatedByte(b'/'),
	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
	.unwrap_err()
	);
	}

	#[test]
	fn detects_duplicate_middle() {
	assert_eq!(
	ParseAlphabetError::DuplicatedByte(b'Z'),
	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
	.unwrap_err()
	);
	}

	#[test]
	fn detects_length() {
	assert_eq!(
	ParseAlphabetError::InvalidLength,
	Alphabet::new(
	"xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
	)
	.unwrap_err()
	);
	}

	#[test]
	fn detects_padding() {
	assert_eq!(
	ParseAlphabetError::ReservedByte(b'='),
	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
	.unwrap_err()
	);
	}

	#[test]
	fn detects_unprintable() {
	// form feed
	assert_eq!(
	ParseAlphabetError::UnprintableByte(0xc),
	Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
	.unwrap_err()
	);
	}

	#[test]
	fn same_as_unchecked() {
	assert_eq!(
	STANDARD,
	Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
	.unwrap()
	);
	}

	#[test]
	fn str_same_as_input() {
	let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
	let a = Alphabet::try_from(alphabet).unwrap();
	assert_eq!(alphabet, a.as_str())
	}
	}