|  | // (C) Copyright 2016 Jethro G. Beekman | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | 
|  | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | 
|  | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | 
|  | // option. This file may not be copied, modified, or distributed | 
|  | // except according to those terms. | 
|  | //! Parsing C literals from byte slices. | 
|  | //! | 
|  | //! This will parse a representation of a C literal into a Rust type. | 
|  | //! | 
|  | //! # characters | 
|  | //! Character literals are stored into the `CChar` type, which can hold values | 
|  | //! that are not valid Unicode code points. ASCII characters are represented as | 
|  | //! `char`, literal bytes with the high byte set are converted into the raw | 
|  | //! representation. Escape sequences are supported. If hex and octal escapes | 
|  | //! map to an ASCII character, that is used, otherwise, the raw encoding is | 
|  | //! used, including for values over 255. Unicode escapes are checked for | 
|  | //! validity and mapped to `char`. Character sequences are not supported. Width | 
|  | //! prefixes are ignored. | 
|  | //! | 
|  | //! # strings | 
|  | //! Strings are interpreted as byte vectors. Escape sequences are supported. If | 
|  | //! hex and octal escapes map onto multi-byte characters, they are truncated to | 
|  | //! one 8-bit character. Unicode escapes are converted into their UTF-8 | 
|  | //! encoding. Width prefixes are ignored. | 
|  | //! | 
|  | //! # integers | 
|  | //! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are | 
|  | //! all supported. If the literal value is between `i64::MAX` and `u64::MAX`, | 
|  | //! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and | 
|  | //! sign suffixes are ignored. Sign prefixes are not supported. | 
|  | //! | 
|  | //! # real numbers | 
|  | //! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are | 
|  | //! not supported in the significand. Hexadecimal floating points are not | 
|  | //! supported. | 
|  |  | 
|  | use std::char; | 
|  | use std::str::{self, FromStr}; | 
|  |  | 
|  | use nom::branch::alt; | 
|  | use nom::bytes::complete::is_not; | 
|  | use nom::bytes::complete::tag; | 
|  | use nom::character::complete::{char, one_of}; | 
|  | use nom::combinator::{complete, map, map_opt, opt, recognize}; | 
|  | use nom::multi::{fold_many0, many0, many1, many_m_n}; | 
|  | use nom::sequence::{delimited, pair, preceded, terminated, tuple}; | 
|  | use nom::*; | 
|  |  | 
|  | use crate::expr::EvalResult; | 
|  | use crate::ToCexprResult; | 
|  |  | 
|  | #[derive(Debug, Copy, Clone, PartialEq, Eq)] | 
|  | /// Representation of a C character | 
|  | pub enum CChar { | 
|  | /// A character that can be represented as a `char` | 
|  | Char(char), | 
|  | /// Any other character (8-bit characters, unicode surrogates, etc.) | 
|  | Raw(u64), | 
|  | } | 
|  |  | 
|  | impl From<u8> for CChar { | 
|  | fn from(i: u8) -> CChar { | 
|  | match i { | 
|  | 0..=0x7f => CChar::Char(i as u8 as char), | 
|  | _ => CChar::Raw(i as u64), | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // A non-allocating version of this would be nice... | 
|  | impl std::convert::Into<Vec<u8>> for CChar { | 
|  | fn into(self) -> Vec<u8> { | 
|  | match self { | 
|  | CChar::Char(c) => { | 
|  | let mut s = String::with_capacity(4); | 
|  | s.extend(&[c]); | 
|  | s.into_bytes() | 
|  | } | 
|  | CChar::Raw(i) => { | 
|  | let mut v = Vec::with_capacity(1); | 
|  | v.push(i as u8); | 
|  | v | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /// ensures the child parser consumes the whole input | 
|  | pub fn full<I: Clone, O, F>( | 
|  | f: F, | 
|  | ) -> impl Fn(I) -> nom::IResult<I, O> | 
|  | where | 
|  | I: nom::InputLength, | 
|  | F: Fn(I) -> nom::IResult<I, O>, | 
|  | { | 
|  | move |input| { | 
|  | let res = f(input); | 
|  | match res { | 
|  | Ok((i, o)) => { | 
|  | if i.input_len() == 0 { | 
|  | Ok((i, o)) | 
|  | } else { | 
|  | Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Complete))) | 
|  | } | 
|  | } | 
|  | r => r, | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // ================================= | 
|  | // ======== matching digits ======== | 
|  | // ================================= | 
|  |  | 
|  | macro_rules! byte { | 
|  | ($($p: pat)|* ) => {{ | 
|  | fn parser(i: &[u8]) -> crate::nom::IResult<&[u8], u8> { | 
|  | match i.split_first() { | 
|  | $(Some((&c @ $p,rest)))|* => Ok((rest,c)), | 
|  | Some(_) => Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::OneOf))), | 
|  | None => Err(nom::Err::Incomplete(Needed::new(1))), | 
|  | } | 
|  | } | 
|  |  | 
|  | parser | 
|  | }} | 
|  | } | 
|  |  | 
|  | fn binary(i: &[u8]) -> nom::IResult<&[u8], u8> { | 
|  | byte!(b'0'..=b'1')(i) | 
|  | } | 
|  |  | 
|  | fn octal(i: &[u8]) -> nom::IResult<&[u8], u8> { | 
|  | byte!(b'0'..=b'7')(i) | 
|  | } | 
|  |  | 
|  | fn decimal(i: &[u8]) -> nom::IResult<&[u8], u8> { | 
|  | byte!(b'0'..=b'9')(i) | 
|  | } | 
|  |  | 
|  | fn hexadecimal(i: &[u8]) -> nom::IResult<&[u8], u8> { | 
|  | byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F')(i) | 
|  | } | 
|  |  | 
|  | // ======================================== | 
|  | // ======== characters and strings ======== | 
|  | // ======================================== | 
|  |  | 
|  | fn escape2char(c: char) -> CChar { | 
|  | CChar::Char(match c { | 
|  | 'a' => '\x07', | 
|  | 'b' => '\x08', | 
|  | 'f' => '\x0c', | 
|  | 'n' => '\n', | 
|  | 'r' => '\r', | 
|  | 't' => '\t', | 
|  | 'v' => '\x0b', | 
|  | _ => unreachable!("invalid escape {}", c), | 
|  | }) | 
|  | } | 
|  |  | 
|  | fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> { | 
|  | str::from_utf8(&n) | 
|  | .ok() | 
|  | .and_then(|i| u64::from_str_radix(i, radix).ok()) | 
|  | .map(|i| match i { | 
|  | 0..=0x7f => CChar::Char(i as u8 as char), | 
|  | _ => CChar::Raw(i), | 
|  | }) | 
|  | } | 
|  |  | 
|  | fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> { | 
|  | str::from_utf8(&n) | 
|  | .ok() | 
|  | .and_then(|i| u32::from_str_radix(i, 16).ok()) | 
|  | .and_then(char::from_u32) | 
|  | .map(CChar::Char) | 
|  | } | 
|  |  | 
|  | fn escaped_char(i: &[u8]) -> nom::IResult<&[u8], CChar> { | 
|  | preceded( | 
|  | char('\\'), | 
|  | alt(( | 
|  | map(one_of(r#"'"?\"#), CChar::Char), | 
|  | map(one_of("abfnrtv"), escape2char), | 
|  | map_opt(many_m_n(1, 3, octal), |v| c_raw_escape(v, 8)), | 
|  | map_opt(preceded(char('x'), many1(hexadecimal)), |v| { | 
|  | c_raw_escape(v, 16) | 
|  | }), | 
|  | map_opt( | 
|  | preceded(char('u'), many_m_n(4, 4, hexadecimal)), | 
|  | c_unicode_escape, | 
|  | ), | 
|  | map_opt( | 
|  | preceded(char('U'), many_m_n(8, 8, hexadecimal)), | 
|  | c_unicode_escape, | 
|  | ), | 
|  | )), | 
|  | )(i) | 
|  | } | 
|  |  | 
|  | fn c_width_prefix(i: &[u8]) -> nom::IResult<&[u8], &[u8]> { | 
|  | alt((tag("u8"), tag("u"), tag("U"), tag("L")))(i) | 
|  | } | 
|  |  | 
|  | fn c_char(i: &[u8]) -> nom::IResult<&[u8], CChar> { | 
|  | delimited( | 
|  | terminated(opt(c_width_prefix), char('\'')), | 
|  | alt(( | 
|  | escaped_char, | 
|  | map(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from), | 
|  | )), | 
|  | char('\''), | 
|  | )(i) | 
|  | } | 
|  |  | 
|  | fn c_string(i: &[u8]) -> nom::IResult<&[u8], Vec<u8>> { | 
|  | delimited( | 
|  | alt((preceded(c_width_prefix, char('"')), char('"'))), | 
|  | fold_many0( | 
|  | alt(( | 
|  | map(escaped_char, |c: CChar| c.into()), | 
|  | map(is_not([b'\\', b'"']), |c: &[u8]| c.into()), | 
|  | )), | 
|  | Vec::new, | 
|  | |mut v: Vec<u8>, res: Vec<u8>| { | 
|  | v.extend_from_slice(&res); | 
|  | v | 
|  | }, | 
|  | ), | 
|  | char('"'), | 
|  | )(i) | 
|  | } | 
|  |  | 
|  | // ================================ | 
|  | // ======== parse integers ======== | 
|  | // ================================ | 
|  |  | 
|  | fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> { | 
|  | str::from_utf8(&n) | 
|  | .ok() | 
|  | .and_then(|i| u64::from_str_radix(i, radix).ok()) | 
|  | } | 
|  |  | 
|  | fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> { | 
|  | let r = input.split_at_position(|c| c != b'u' && c != b'U' && c != b'l' && c != b'L'); | 
|  | match r { | 
|  | Err(Err::Incomplete(_)) => Ok((&input[input.len()..], input)), | 
|  | res => res, | 
|  | } | 
|  | } | 
|  |  | 
|  | fn c_int(i: &[u8]) -> nom::IResult<&[u8], i64> { | 
|  | map( | 
|  | terminated( | 
|  | alt(( | 
|  | map_opt(preceded(tag("0x"), many1(complete(hexadecimal))), |v| { | 
|  | c_int_radix(v, 16) | 
|  | }), | 
|  | map_opt(preceded(tag("0X"), many1(complete(hexadecimal))), |v| { | 
|  | c_int_radix(v, 16) | 
|  | }), | 
|  | map_opt(preceded(tag("0b"), many1(complete(binary))), |v| { | 
|  | c_int_radix(v, 2) | 
|  | }), | 
|  | map_opt(preceded(tag("0B"), many1(complete(binary))), |v| { | 
|  | c_int_radix(v, 2) | 
|  | }), | 
|  | map_opt(preceded(char('0'), many1(complete(octal))), |v| { | 
|  | c_int_radix(v, 8) | 
|  | }), | 
|  | map_opt(many1(complete(decimal)), |v| c_int_radix(v, 10)), | 
|  | |input| Err(crate::nom::Err::Error(nom::error::Error::new(input, crate::nom::ErrorKind::Fix))), | 
|  | )), | 
|  | opt(take_ul), | 
|  | ), | 
|  | |i| i as i64, | 
|  | )(i) | 
|  | } | 
|  |  | 
|  | // ============================== | 
|  | // ======== parse floats ======== | 
|  | // ============================== | 
|  |  | 
|  | fn float_width(i: &[u8]) -> nom::IResult<&[u8], u8> { | 
|  | nom::combinator::complete(byte!(b'f' | b'l' | b'F' | b'L'))(i) | 
|  | } | 
|  |  | 
|  | fn float_exp(i: &[u8]) -> nom::IResult<&[u8], (Option<u8>, Vec<u8>)> { | 
|  | preceded( | 
|  | byte!(b'e' | b'E'), | 
|  | pair(opt(byte!(b'-' | b'+')), many1(complete(decimal))), | 
|  | )(i) | 
|  | } | 
|  |  | 
|  | fn c_float(i: &[u8]) -> nom::IResult<&[u8], f64> { | 
|  | map_opt( | 
|  | alt(( | 
|  | terminated( | 
|  | recognize(tuple(( | 
|  | many1(complete(decimal)), | 
|  | byte!(b'.'), | 
|  | many0(complete(decimal)), | 
|  | ))), | 
|  | opt(float_width), | 
|  | ), | 
|  | terminated( | 
|  | recognize(tuple(( | 
|  | many0(complete(decimal)), | 
|  | byte!(b'.'), | 
|  | many1(complete(decimal)), | 
|  | ))), | 
|  | opt(float_width), | 
|  | ), | 
|  | terminated( | 
|  | recognize(tuple(( | 
|  | many0(complete(decimal)), | 
|  | opt(byte!(b'.')), | 
|  | many1(complete(decimal)), | 
|  | float_exp, | 
|  | ))), | 
|  | opt(float_width), | 
|  | ), | 
|  | terminated( | 
|  | recognize(tuple(( | 
|  | many1(complete(decimal)), | 
|  | opt(byte!(b'.')), | 
|  | many0(complete(decimal)), | 
|  | float_exp, | 
|  | ))), | 
|  | opt(float_width), | 
|  | ), | 
|  | terminated(recognize(many1(complete(decimal))), float_width), | 
|  | )), | 
|  | |v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok()), | 
|  | )(i) | 
|  | } | 
|  |  | 
|  | // ================================ | 
|  | // ======== main interface ======== | 
|  | // ================================ | 
|  |  | 
|  | fn one_literal(input: &[u8]) -> nom::IResult<&[u8], EvalResult, crate::Error<&[u8]>> { | 
|  | alt(( | 
|  | map(full(c_char), EvalResult::Char), | 
|  | map(full(c_int), |i| EvalResult::Int(::std::num::Wrapping(i))), | 
|  | map(full(c_float), EvalResult::Float), | 
|  | map(full(c_string), EvalResult::Str), | 
|  | ))(input) | 
|  | .to_cexpr_result() | 
|  | } | 
|  |  | 
|  | /// Parse a C literal. | 
|  | /// | 
|  | /// The input must contain exactly the representation of a single literal | 
|  | /// token, and in particular no whitespace or sign prefixes. | 
|  | pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error<&[u8]>> { | 
|  | crate::assert_full_parse(one_literal(input)) | 
|  | } |