|  | //! Provides [Alphabet] and constants for alphabets commonly used in the wild. | 
|  |  | 
|  | use crate::PAD_BYTE; | 
|  | use core::{convert, fmt}; | 
|  | #[cfg(any(feature = "std", test))] | 
|  | use std::error; | 
|  |  | 
|  | const ALPHABET_SIZE: usize = 64; | 
|  |  | 
|  | /// An alphabet defines the 64 ASCII characters (symbols) used for base64. | 
|  | /// | 
|  | /// Common alphabets are provided as constants, and custom alphabets | 
|  | /// can be made via `from_str` or the `TryFrom<str>` implementation. | 
|  | /// | 
|  | /// # Examples | 
|  | /// | 
|  | /// Building and using a custom Alphabet: | 
|  | /// | 
|  | /// ``` | 
|  | /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap(); | 
|  | /// | 
|  | /// let engine = base64::engine::GeneralPurpose::new( | 
|  | ///     &custom, | 
|  | ///     base64::engine::general_purpose::PAD); | 
|  | /// ``` | 
|  | /// | 
|  | /// Building a const: | 
|  | /// | 
|  | /// ``` | 
|  | /// use base64::alphabet::Alphabet; | 
|  | /// | 
|  | /// static CUSTOM: Alphabet = { | 
|  | ///     // Result::unwrap() isn't const yet, but panic!() is OK | 
|  | ///     match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") { | 
|  | ///         Ok(x) => x, | 
|  | ///         Err(_) => panic!("creation of alphabet failed"), | 
|  | ///     } | 
|  | /// }; | 
|  | /// ``` | 
|  | /// | 
|  | /// Building lazily: | 
|  | /// | 
|  | /// ``` | 
|  | /// use base64::{ | 
|  | ///     alphabet::Alphabet, | 
|  | ///     engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig}, | 
|  | /// }; | 
|  | /// use once_cell::sync::Lazy; | 
|  | /// | 
|  | /// static CUSTOM: Lazy<Alphabet> = Lazy::new(|| | 
|  | ///     Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap() | 
|  | /// ); | 
|  | /// ``` | 
|  | #[derive(Clone, Debug, Eq, PartialEq)] | 
|  | pub struct Alphabet { | 
|  | pub(crate) symbols: [u8; ALPHABET_SIZE], | 
|  | } | 
|  |  | 
|  | impl Alphabet { | 
|  | /// Performs no checks so that it can be const. | 
|  | /// Used only for known-valid strings. | 
|  | const fn from_str_unchecked(alphabet: &str) -> Self { | 
|  | let mut symbols = [0_u8; ALPHABET_SIZE]; | 
|  | let source_bytes = alphabet.as_bytes(); | 
|  |  | 
|  | // a way to copy that's allowed in const fn | 
|  | let mut index = 0; | 
|  | while index < ALPHABET_SIZE { | 
|  | symbols[index] = source_bytes[index]; | 
|  | index += 1; | 
|  | } | 
|  |  | 
|  | Self { symbols } | 
|  | } | 
|  |  | 
|  | /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. | 
|  | /// | 
|  | /// The `=` byte is not allowed as it is used for padding. | 
|  | pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { | 
|  | let bytes = alphabet.as_bytes(); | 
|  | if bytes.len() != ALPHABET_SIZE { | 
|  | return Err(ParseAlphabetError::InvalidLength); | 
|  | } | 
|  |  | 
|  | { | 
|  | let mut index = 0; | 
|  | while index < ALPHABET_SIZE { | 
|  | let byte = bytes[index]; | 
|  |  | 
|  | // must be ascii printable. 127 (DEL) is commonly considered printable | 
|  | // for some reason but clearly unsuitable for base64. | 
|  | if !(byte >= 32_u8 && byte <= 126_u8) { | 
|  | return Err(ParseAlphabetError::UnprintableByte(byte)); | 
|  | } | 
|  | // = is assumed to be padding, so cannot be used as a symbol | 
|  | if byte == PAD_BYTE { | 
|  | return Err(ParseAlphabetError::ReservedByte(byte)); | 
|  | } | 
|  |  | 
|  | // Check for duplicates while staying within what const allows. | 
|  | // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit | 
|  | // microsecond range. | 
|  |  | 
|  | let mut probe_index = 0; | 
|  | while probe_index < ALPHABET_SIZE { | 
|  | if probe_index == index { | 
|  | probe_index += 1; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | let probe_byte = bytes[probe_index]; | 
|  |  | 
|  | if byte == probe_byte { | 
|  | return Err(ParseAlphabetError::DuplicatedByte(byte)); | 
|  | } | 
|  |  | 
|  | probe_index += 1; | 
|  | } | 
|  |  | 
|  | index += 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | Ok(Self::from_str_unchecked(alphabet)) | 
|  | } | 
|  |  | 
|  | /// Create a `&str` from the symbols in the `Alphabet` | 
|  | pub fn as_str(&self) -> &str { | 
|  | core::str::from_utf8(&self.symbols).unwrap() | 
|  | } | 
|  | } | 
|  |  | 
|  | impl convert::TryFrom<&str> for Alphabet { | 
|  | type Error = ParseAlphabetError; | 
|  |  | 
|  | fn try_from(value: &str) -> Result<Self, Self::Error> { | 
|  | Self::new(value) | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Possible errors when constructing an [Alphabet] from a `str`. | 
|  | #[derive(Debug, Eq, PartialEq)] | 
|  | pub enum ParseAlphabetError { | 
|  | /// Alphabets must be 64 ASCII bytes | 
|  | InvalidLength, | 
|  | /// All bytes must be unique | 
|  | DuplicatedByte(u8), | 
|  | /// All bytes must be printable (in the range `[32, 126]`). | 
|  | UnprintableByte(u8), | 
|  | /// `=` cannot be used | 
|  | ReservedByte(u8), | 
|  | } | 
|  |  | 
|  | impl fmt::Display for ParseAlphabetError { | 
|  | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
|  | match self { | 
|  | Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"), | 
|  | Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b), | 
|  | Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b), | 
|  | Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b), | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | #[cfg(any(feature = "std", test))] | 
|  | impl error::Error for ParseAlphabetError {} | 
|  |  | 
|  | /// The standard alphabet (with `+` and `/`) specified in [RFC 4648][]. | 
|  | /// | 
|  | /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 | 
|  | pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( | 
|  | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", | 
|  | ); | 
|  |  | 
|  | /// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][]. | 
|  | /// | 
|  | /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5 | 
|  | pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( | 
|  | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", | 
|  | ); | 
|  |  | 
|  | /// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters). | 
|  | /// | 
|  | /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. | 
|  | pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( | 
|  | "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", | 
|  | ); | 
|  |  | 
|  | /// The bcrypt alphabet. | 
|  | pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( | 
|  | "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", | 
|  | ); | 
|  |  | 
|  | /// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`). | 
|  | /// | 
|  | /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) | 
|  | pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( | 
|  | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", | 
|  | ); | 
|  |  | 
|  | /// The alphabet used in BinHex 4.0 files. | 
|  | /// | 
|  | /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) | 
|  | pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( | 
|  | "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr", | 
|  | ); | 
|  |  | 
|  | #[cfg(test)] | 
|  | mod tests { | 
|  | use crate::alphabet::*; | 
|  | use core::convert::TryFrom as _; | 
|  |  | 
|  | #[test] | 
|  | fn detects_duplicate_start() { | 
|  | assert_eq!( | 
|  | ParseAlphabetError::DuplicatedByte(b'A'), | 
|  | Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") | 
|  | .unwrap_err() | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn detects_duplicate_end() { | 
|  | assert_eq!( | 
|  | ParseAlphabetError::DuplicatedByte(b'/'), | 
|  | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//") | 
|  | .unwrap_err() | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn detects_duplicate_middle() { | 
|  | assert_eq!( | 
|  | ParseAlphabetError::DuplicatedByte(b'Z'), | 
|  | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/") | 
|  | .unwrap_err() | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn detects_length() { | 
|  | assert_eq!( | 
|  | ParseAlphabetError::InvalidLength, | 
|  | Alphabet::new( | 
|  | "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/", | 
|  | ) | 
|  | .unwrap_err() | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn detects_padding() { | 
|  | assert_eq!( | 
|  | ParseAlphabetError::ReservedByte(b'='), | 
|  | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=") | 
|  | .unwrap_err() | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn detects_unprintable() { | 
|  | // form feed | 
|  | assert_eq!( | 
|  | ParseAlphabetError::UnprintableByte(0xc), | 
|  | Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") | 
|  | .unwrap_err() | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn same_as_unchecked() { | 
|  | assert_eq!( | 
|  | STANDARD, | 
|  | Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") | 
|  | .unwrap() | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn str_same_as_input() { | 
|  | let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | 
|  | let a = Alphabet::try_from(alphabet).unwrap(); | 
|  | assert_eq!(alphabet, a.as_str()) | 
|  | } | 
|  | } |