|  | use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::hex_digit_value}; | 
|  |  | 
|  |  | 
|  | /// Must start with `\` | 
|  | pub(crate) fn unescape<E: Escapee>(input: &str, offset: usize) -> Result<(E, usize), ParseError> { | 
|  | let first = input.as_bytes().get(1) | 
|  | .ok_or(perr(offset, UnterminatedEscape))?; | 
|  | let out = match first { | 
|  | // Quote escapes | 
|  | b'\'' => (E::from_byte(b'\''), 2), | 
|  | b'"' => (E::from_byte(b'"'), 2), | 
|  |  | 
|  | // Ascii escapes | 
|  | b'n' => (E::from_byte(b'\n'), 2), | 
|  | b'r' => (E::from_byte(b'\r'), 2), | 
|  | b't' => (E::from_byte(b'\t'), 2), | 
|  | b'\\' => (E::from_byte(b'\\'), 2), | 
|  | b'0' => (E::from_byte(b'\0'), 2), | 
|  | b'x' => { | 
|  | let hex_string = input.get(2..4) | 
|  | .ok_or(perr(offset..offset + input.len(), UnterminatedEscape))? | 
|  | .as_bytes(); | 
|  | let first = hex_digit_value(hex_string[0]) | 
|  | .ok_or(perr(offset..offset + 4, InvalidXEscape))?; | 
|  | let second = hex_digit_value(hex_string[1]) | 
|  | .ok_or(perr(offset..offset + 4, InvalidXEscape))?; | 
|  | let value = second + 16 * first; | 
|  |  | 
|  | if E::SUPPORTS_UNICODE && value > 0x7F { | 
|  | return Err(perr(offset..offset + 4, NonAsciiXEscape)); | 
|  | } | 
|  |  | 
|  | (E::from_byte(value), 4) | 
|  | }, | 
|  |  | 
|  | // Unicode escape | 
|  | b'u' => { | 
|  | if !E::SUPPORTS_UNICODE { | 
|  | return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral)); | 
|  | } | 
|  |  | 
|  | if input.as_bytes().get(2) != Some(&b'{') { | 
|  | return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace)); | 
|  | } | 
|  |  | 
|  | let closing_pos = input.bytes().position(|b| b == b'}') | 
|  | .ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?; | 
|  |  | 
|  | let inner = &input[3..closing_pos]; | 
|  | if inner.as_bytes().first() == Some(&b'_') { | 
|  | return Err(perr(4, InvalidStartOfUnicodeEscape)); | 
|  | } | 
|  |  | 
|  | let mut v: u32 = 0; | 
|  | let mut digit_count = 0; | 
|  | for (i, b) in inner.bytes().enumerate() { | 
|  | if b == b'_'{ | 
|  | continue; | 
|  | } | 
|  |  | 
|  | let digit = hex_digit_value(b) | 
|  | .ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?; | 
|  |  | 
|  | if digit_count == 6 { | 
|  | return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape)); | 
|  | } | 
|  | digit_count += 1; | 
|  | v = 16 * v + digit as u32; | 
|  | } | 
|  |  | 
|  | let c = std::char::from_u32(v) | 
|  | .ok_or(perr(offset..closing_pos + 1, InvalidUnicodeEscapeChar))?; | 
|  |  | 
|  | (E::from_char(c), closing_pos + 1) | 
|  | } | 
|  |  | 
|  | _ => return Err(perr(offset..offset + 2, UnknownEscape)), | 
|  | }; | 
|  |  | 
|  | Ok(out) | 
|  | } | 
|  |  | 
|  | pub(crate) trait Escapee: Into<char> { | 
|  | const SUPPORTS_UNICODE: bool; | 
|  | fn from_byte(b: u8) -> Self; | 
|  | fn from_char(c: char) -> Self; | 
|  | } | 
|  |  | 
|  | impl Escapee for u8 { | 
|  | const SUPPORTS_UNICODE: bool = false; | 
|  | fn from_byte(b: u8) -> Self { | 
|  | b | 
|  | } | 
|  | fn from_char(_: char) -> Self { | 
|  | panic!("bug: `<u8 as Escapee>::from_char` was called"); | 
|  | } | 
|  | } | 
|  |  | 
|  | impl Escapee for char { | 
|  | const SUPPORTS_UNICODE: bool = true; | 
|  | fn from_byte(b: u8) -> Self { | 
|  | b.into() | 
|  | } | 
|  | fn from_char(c: char) -> Self { | 
|  | c | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Checks whether the character is skipped after a string continue start | 
|  | /// (unescaped backlash followed by `\n`). | 
|  | fn is_string_continue_skipable_whitespace(b: u8) -> bool { | 
|  | b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' | 
|  | } | 
|  |  | 
|  | /// Unescapes a whole string or byte string. | 
|  | #[inline(never)] | 
|  | pub(crate) fn unescape_string<E: Escapee>( | 
|  | input: &str, | 
|  | offset: usize, | 
|  | ) -> Result<Option<String>, ParseError> { | 
|  | let mut i = offset; | 
|  | let mut end_last_escape = offset; | 
|  | let mut value = String::new(); | 
|  | while i < input.len() - 1 { | 
|  | match input.as_bytes()[i] { | 
|  | // Handle "string continue". | 
|  | b'\\' if input.as_bytes()[i + 1] == b'\n' => { | 
|  | value.push_str(&input[end_last_escape..i]); | 
|  |  | 
|  | // Find the first non-whitespace character. | 
|  | let end_escape = input[i + 2..].bytes() | 
|  | .position(|b| !is_string_continue_skipable_whitespace(b)) | 
|  | .ok_or(perr(None, UnterminatedString))?; | 
|  |  | 
|  | i += 2 + end_escape; | 
|  | end_last_escape = i; | 
|  | } | 
|  | b'\\' => { | 
|  | let (c, len) = unescape::<E>(&input[i..input.len() - 1], i)?; | 
|  | value.push_str(&input[end_last_escape..i]); | 
|  | value.push(c.into()); | 
|  | i += len; | 
|  | end_last_escape = i; | 
|  | } | 
|  | b'\r' => { | 
|  | if input.as_bytes()[i + 1] == b'\n' { | 
|  | value.push_str(&input[end_last_escape..i]); | 
|  | value.push('\n'); | 
|  | i += 2; | 
|  | end_last_escape = i; | 
|  | } else { | 
|  | return Err(perr(i, IsolatedCr)) | 
|  | } | 
|  | } | 
|  | b'"' => return Err(perr(i + 1..input.len(), UnexpectedChar)), | 
|  | b if !E::SUPPORTS_UNICODE && !b.is_ascii() | 
|  | => return Err(perr(i, NonAsciiInByteLiteral)), | 
|  | _ => i += 1, | 
|  | } | 
|  | } | 
|  |  | 
|  | if input.as_bytes()[input.len() - 1] != b'"' || input.len() == offset { | 
|  | return Err(perr(None, UnterminatedString)); | 
|  | } | 
|  |  | 
|  | // `value` is only empty if there was no escape in the input string | 
|  | // (with the special case of the input being empty). This means the | 
|  | // string value basically equals the input, so we store `None`. | 
|  | let value = if value.is_empty() { | 
|  | None | 
|  | } else { | 
|  | // There was an escape in the string, so we need to push the | 
|  | // remaining unescaped part of the string still. | 
|  | value.push_str(&input[end_last_escape..input.len() - 1]); | 
|  | Some(value) | 
|  | }; | 
|  |  | 
|  | Ok(value) | 
|  | } | 
|  |  | 
|  | /// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to | 
|  | /// just `\n` sequences. Returns an optional new string (if the input contained | 
|  | /// any `\r\n`) and the number of hashes used by the literal. | 
|  | #[inline(never)] | 
|  | pub(crate) fn scan_raw_string<E: Escapee>( | 
|  | input: &str, | 
|  | offset: usize, | 
|  | ) -> Result<(Option<String>, u32), ParseError> { | 
|  | // Raw string literal | 
|  | let num_hashes = input[offset..].bytes().position(|b| b != b'#') | 
|  | .ok_or(perr(None, InvalidLiteral))?; | 
|  |  | 
|  | if input.as_bytes().get(offset + num_hashes) != Some(&b'"') { | 
|  | return Err(perr(None, InvalidLiteral)); | 
|  | } | 
|  | let start_inner = offset + num_hashes + 1; | 
|  | let hashes = &input[offset..num_hashes + offset]; | 
|  |  | 
|  | let mut closing_quote_pos = None; | 
|  | let mut i = start_inner; | 
|  | let mut end_last_escape = start_inner; | 
|  | let mut value = String::new(); | 
|  | while i < input.len() { | 
|  | let b = input.as_bytes()[i]; | 
|  | if b == b'"' && input[i + 1..].starts_with(hashes) { | 
|  | closing_quote_pos = Some(i); | 
|  | break; | 
|  | } | 
|  |  | 
|  | if b == b'\r' { | 
|  | // Convert `\r\n` into `\n`. This is currently not well documented | 
|  | // in the Rust reference, but is done even for raw strings. That's | 
|  | // because rustc simply converts all line endings when reading | 
|  | // source files. | 
|  | if input.as_bytes().get(i + 1) == Some(&b'\n') { | 
|  | value.push_str(&input[end_last_escape..i]); | 
|  | value.push('\n'); | 
|  | i += 2; | 
|  | end_last_escape = i; | 
|  | continue; | 
|  | } else if E::SUPPORTS_UNICODE { | 
|  | // If no \n follows the \r and we are scanning a raw string | 
|  | // (not raw byte string), we error. | 
|  | return Err(perr(i, IsolatedCr)) | 
|  | } | 
|  | } | 
|  |  | 
|  | if !E::SUPPORTS_UNICODE { | 
|  | if !b.is_ascii() { | 
|  | return Err(perr(i, NonAsciiInByteLiteral)); | 
|  | } | 
|  | } | 
|  |  | 
|  | i += 1; | 
|  | } | 
|  |  | 
|  | let closing_quote_pos = closing_quote_pos | 
|  | .ok_or(perr(None, UnterminatedRawString))?; | 
|  |  | 
|  | if closing_quote_pos + num_hashes != input.len() - 1 { | 
|  | return Err(perr(closing_quote_pos + num_hashes + 1..input.len(), UnexpectedChar)); | 
|  | } | 
|  |  | 
|  | // `value` is only empty if there was no \r\n in the input string (with the | 
|  | // special case of the input being empty). This means the string value | 
|  | // equals the input, so we store `None`. | 
|  | let value = if value.is_empty() { | 
|  | None | 
|  | } else { | 
|  | // There was an \r\n in the string, so we need to push the remaining | 
|  | // unescaped part of the string still. | 
|  | value.push_str(&input[end_last_escape..closing_quote_pos]); | 
|  | Some(value) | 
|  | }; | 
|  |  | 
|  | Ok((value, num_hashes as u32)) | 
|  | } |