crates/cexpr/src/literal.rs - platform/external/rust/android-crates-io - Git at Google

 // (C) Copyright 2016 Jethro G. Beekman
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //! Parsing C literals from byte slices.
 //!
 //! This will parse a representation of a C literal into a Rust type.
 //!
 //! # characters
 //! Character literals are stored into the `CChar` type, which can hold values
 //! that are not valid Unicode code points. ASCII characters are represented as
 //! `char`, literal bytes with the high byte set are converted into the raw
 //! representation. Escape sequences are supported. If hex and octal escapes
 //! map to an ASCII character, that is used, otherwise, the raw encoding is
 //! used, including for values over 255. Unicode escapes are checked for
 //! validity and mapped to `char`. Character sequences are not supported. Width
 //! prefixes are ignored.
 //!
 //! # strings
 //! Strings are interpreted as byte vectors. Escape sequences are supported. If
 //! hex and octal escapes map onto multi-byte characters, they are truncated to
 //! one 8-bit character. Unicode escapes are converted into their UTF-8
 //! encoding. Width prefixes are ignored.
 //!
 //! # integers
 //! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are
 //! all supported. If the literal value is between `i64::MAX` and `u64::MAX`,
 //! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and
 //! sign suffixes are ignored. Sign prefixes are not supported.
 //!
 //! # real numbers
 //! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are
 //! not supported in the significand. Hexadecimal floating points are not
 //! supported.

 use std::char;
 use std::str::{self, FromStr};

 use nom::branch::alt;
 use nom::bytes::complete::is_not;
 use nom::bytes::complete::tag;
 use nom::character::complete::{char, one_of};
 use nom::combinator::{complete, map, map_opt, opt, recognize};
 use nom::multi::{fold_many0, many0, many1, many_m_n};
 use nom::sequence::{delimited, pair, preceded, terminated, tuple};
 use nom::*;

 use crate::expr::EvalResult;
 use crate::ToCexprResult;

 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 /// Representation of a C character
 pub enum CChar {
     /// A character that can be represented as a `char`
     Char(char),
     /// Any other character (8-bit characters, unicode surrogates, etc.)
     Raw(u64),
 }

 impl From<u8> for CChar {
     fn from(i: u8) -> CChar {
         match i {
             0..=0x7f => CChar::Char(i as u8 as char),
             _ => CChar::Raw(i as u64),
         }
     }
 }

 // A non-allocating version of this would be nice...
 impl std::convert::Into<Vec<u8>> for CChar {
     fn into(self) -> Vec<u8> {
         match self {
             CChar::Char(c) => {
                 let mut s = String::with_capacity(4);
                 s.extend(&[c]);
                 s.into_bytes()
             }
             CChar::Raw(i) => {
                 let mut v = Vec::with_capacity(1);
                 v.push(i as u8);
                 v
             }
         }
     }
 }

 /// ensures the child parser consumes the whole input
 pub fn full<I: Clone, O, F>(
     f: F,
 ) -> impl Fn(I) -> nom::IResult<I, O>
 where
     I: nom::InputLength,
     F: Fn(I) -> nom::IResult<I, O>,
 {
     move |input| {
         let res = f(input);
         match res {
             Ok((i, o)) => {
                 if i.input_len() == 0 {
                     Ok((i, o))
                 } else {
                     Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Complete)))
                 }
             }
             r => r,
         }
     }
 }

 // =================================
 // ======== matching digits ========
 // =================================

 macro_rules! byte {
 	($($p: pat)|* ) => {{
         fn parser(i: &[u8]) -> crate::nom::IResult<&[u8], u8> {
             match i.split_first() {
                 $(Some((&c @ $p,rest)))|* => Ok((rest,c)),
                 Some(_) => Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::OneOf))),
                 None => Err(nom::Err::Incomplete(Needed::new(1))),
             }
         }

         parser
 	}}
 }

 fn binary(i: &[u8]) -> nom::IResult<&[u8], u8> {
     byte!(b'0'..=b'1')(i)
 }

 fn octal(i: &[u8]) -> nom::IResult<&[u8], u8> {
     byte!(b'0'..=b'7')(i)
 }

 fn decimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
     byte!(b'0'..=b'9')(i)
 }

 fn hexadecimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
     byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F')(i)
 }

 // ========================================
 // ======== characters and strings ========
 // ========================================

 fn escape2char(c: char) -> CChar {
     CChar::Char(match c {
         'a' => '\x07',
         'b' => '\x08',
         'f' => '\x0c',
         'n' => '\n',
         'r' => '\r',
         't' => '\t',
         'v' => '\x0b',
         _ => unreachable!("invalid escape {}", c),
     })
 }

 fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> {
     str::from_utf8(&n)
         .ok()
         .and_then(|i| u64::from_str_radix(i, radix).ok())
         .map(|i| match i {
             0..=0x7f => CChar::Char(i as u8 as char),
             _ => CChar::Raw(i),
         })
 }

 fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {
     str::from_utf8(&n)
         .ok()
         .and_then(|i| u32::from_str_radix(i, 16).ok())
         .and_then(char::from_u32)
         .map(CChar::Char)
 }

 fn escaped_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
     preceded(
         char('\\'),
         alt((
             map(one_of(r#"'"?\"#), CChar::Char),
             map(one_of("abfnrtv"), escape2char),
             map_opt(many_m_n(1, 3, octal), |v| c_raw_escape(v, 8)),
             map_opt(preceded(char('x'), many1(hexadecimal)), |v| {
                 c_raw_escape(v, 16)
             }),
             map_opt(
                 preceded(char('u'), many_m_n(4, 4, hexadecimal)),
                 c_unicode_escape,
             ),
             map_opt(
                 preceded(char('U'), many_m_n(8, 8, hexadecimal)),
                 c_unicode_escape,
             ),
         )),
     )(i)
 }

 fn c_width_prefix(i: &[u8]) -> nom::IResult<&[u8], &[u8]> {
     alt((tag("u8"), tag("u"), tag("U"), tag("L")))(i)
 }

 fn c_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
     delimited(
         terminated(opt(c_width_prefix), char('\'')),
         alt((
             escaped_char,
             map(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from),
         )),
         char('\''),
     )(i)
 }

 fn c_string(i: &[u8]) -> nom::IResult<&[u8], Vec<u8>> {
     delimited(
         alt((preceded(c_width_prefix, char('"')), char('"'))),
         fold_many0(
             alt((
                 map(escaped_char, |c: CChar| c.into()),
                 map(is_not([b'\\', b'"']), |c: &[u8]| c.into()),
             )),
             Vec::new,
             |mut v: Vec<u8>, res: Vec<u8>| {
                 v.extend_from_slice(&res);
                 v
             },
         ),
         char('"'),
     )(i)
 }

 // ================================
 // ======== parse integers ========
 // ================================

 fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> {
     str::from_utf8(&n)
         .ok()
         .and_then(|i| u64::from_str_radix(i, radix).ok())
 }

 fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> {
     let r = input.split_at_position(|c| c != b'u' && c != b'U' && c != b'l' && c != b'L');
     match r {
         Err(Err::Incomplete(_)) => Ok((&input[input.len()..], input)),
         res => res,
     }
 }

 fn c_int(i: &[u8]) -> nom::IResult<&[u8], i64> {
     map(
         terminated(
             alt((
                 map_opt(preceded(tag("0x"), many1(complete(hexadecimal))), |v| {
                     c_int_radix(v, 16)
                 }),
                 map_opt(preceded(tag("0X"), many1(complete(hexadecimal))), |v| {
                     c_int_radix(v, 16)
                 }),
                 map_opt(preceded(tag("0b"), many1(complete(binary))), |v| {
                     c_int_radix(v, 2)
                 }),
                 map_opt(preceded(tag("0B"), many1(complete(binary))), |v| {
                     c_int_radix(v, 2)
                 }),
                 map_opt(preceded(char('0'), many1(complete(octal))), |v| {
                     c_int_radix(v, 8)
                 }),
                 map_opt(many1(complete(decimal)), |v| c_int_radix(v, 10)),
                 |input| Err(crate::nom::Err::Error(nom::error::Error::new(input, crate::nom::ErrorKind::Fix))),
             )),
             opt(take_ul),
         ),
         |i| i as i64,
     )(i)
 }

 // ==============================
 // ======== parse floats ========
 // ==============================

 fn float_width(i: &[u8]) -> nom::IResult<&[u8], u8> {
     nom::combinator::complete(byte!(b'f' | b'l' | b'F' | b'L'))(i)
 }

 fn float_exp(i: &[u8]) -> nom::IResult<&[u8], (Option<u8>, Vec<u8>)> {
     preceded(
         byte!(b'e' | b'E'),
         pair(opt(byte!(b'-' | b'+')), many1(complete(decimal))),
     )(i)
 }

 fn c_float(i: &[u8]) -> nom::IResult<&[u8], f64> {
     map_opt(
         alt((
             terminated(
                 recognize(tuple((
                     many1(complete(decimal)),
                     byte!(b'.'),
                     many0(complete(decimal)),
                 ))),
                 opt(float_width),
             ),
             terminated(
                 recognize(tuple((
                     many0(complete(decimal)),
                     byte!(b'.'),
                     many1(complete(decimal)),
                 ))),
                 opt(float_width),
             ),
             terminated(
                 recognize(tuple((
                     many0(complete(decimal)),
                     opt(byte!(b'.')),
                     many1(complete(decimal)),
                     float_exp,
                 ))),
                 opt(float_width),
             ),
             terminated(
                 recognize(tuple((
                     many1(complete(decimal)),
                     opt(byte!(b'.')),
                     many0(complete(decimal)),
                     float_exp,
                 ))),
                 opt(float_width),
             ),
             terminated(recognize(many1(complete(decimal))), float_width),
         )),
         |v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok()),
     )(i)
 }

 // ================================
 // ======== main interface ========
 // ================================

 fn one_literal(input: &[u8]) -> nom::IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
     alt((
         map(full(c_char), EvalResult::Char),
         map(full(c_int), |i| EvalResult::Int(::std::num::Wrapping(i))),
         map(full(c_float), EvalResult::Float),
         map(full(c_string), EvalResult::Str),
     ))(input)
     .to_cexpr_result()
 }

 /// Parse a C literal.
 ///
 /// The input must contain exactly the representation of a single literal
 /// token, and in particular no whitespace or sign prefixes.
 pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
     crate::assert_full_parse(one_literal(input))
 }
	// (C) Copyright 2016 Jethro G. Beekman
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.
	//! Parsing C literals from byte slices.
	//!
	//! This will parse a representation of a C literal into a Rust type.
	//!
	//! # characters
	//! Character literals are stored into the `CChar` type, which can hold values
	//! that are not valid Unicode code points. ASCII characters are represented as
	//! `char`, literal bytes with the high byte set are converted into the raw
	//! representation. Escape sequences are supported. If hex and octal escapes
	//! map to an ASCII character, that is used, otherwise, the raw encoding is
	//! used, including for values over 255. Unicode escapes are checked for
	//! validity and mapped to `char`. Character sequences are not supported. Width
	//! prefixes are ignored.
	//!
	//! # strings
	//! Strings are interpreted as byte vectors. Escape sequences are supported. If
	//! hex and octal escapes map onto multi-byte characters, they are truncated to
	//! one 8-bit character. Unicode escapes are converted into their UTF-8
	//! encoding. Width prefixes are ignored.
	//!
	//! # integers
	//! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are
	//! all supported. If the literal value is between `i64::MAX` and `u64::MAX`,
	//! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and
	//! sign suffixes are ignored. Sign prefixes are not supported.
	//!
	//! # real numbers
	//! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are
	//! not supported in the significand. Hexadecimal floating points are not
	//! supported.

	use std::char;
	use std::str::{self, FromStr};

	use nom::branch::alt;
	use nom::bytes::complete::is_not;
	use nom::bytes::complete::tag;
	use nom::character::complete::{char, one_of};
	use nom::combinator::{complete, map, map_opt, opt, recognize};
	use nom::multi::{fold_many0, many0, many1, many_m_n};
	use nom::sequence::{delimited, pair, preceded, terminated, tuple};
	use nom::*;

	use crate::expr::EvalResult;
	use crate::ToCexprResult;

	#[derive(Debug, Copy, Clone, PartialEq, Eq)]
	/// Representation of a C character
	pub enum CChar {
	/// A character that can be represented as a `char`
	Char(char),
	/// Any other character (8-bit characters, unicode surrogates, etc.)
	Raw(u64),
	}

	impl From<u8> for CChar {
	fn from(i: u8) -> CChar {
	match i {
	0..=0x7f => CChar::Char(i as u8 as char),
	_ => CChar::Raw(i as u64),
	}
	}
	}

	// A non-allocating version of this would be nice...
	impl std::convert::Into<Vec<u8>> for CChar {
	fn into(self) -> Vec<u8> {
	match self {
	CChar::Char(c) => {
	let mut s = String::with_capacity(4);
	s.extend(&[c]);
	s.into_bytes()
	}
	CChar::Raw(i) => {
	let mut v = Vec::with_capacity(1);
	v.push(i as u8);
	v
	}
	}
	}
	}

	/// ensures the child parser consumes the whole input
	pub fn full<I: Clone, O, F>(
	f: F,
	) -> impl Fn(I) -> nom::IResult<I, O>
	where
	I: nom::InputLength,
	F: Fn(I) -> nom::IResult<I, O>,
	{
	move \|input\| {
	let res = f(input);
	match res {
	Ok((i, o)) => {
	if i.input_len() == 0 {
	Ok((i, o))
	} else {
	Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Complete)))
	}
	}
	r => r,
	}
	}
	}

	// =================================
	// ======== matching digits ========
	// =================================

	macro_rules! byte {
	($($p: pat)\|* ) => {{
	fn parser(i: &[u8]) -> crate::nom::IResult<&[u8], u8> {
	match i.split_first() {
	$(Some((&c @ $p,rest)))\|* => Ok((rest,c)),
	Some(_) => Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::OneOf))),
	None => Err(nom::Err::Incomplete(Needed::new(1))),
	}
	}

	parser
	}}
	}

	fn binary(i: &[u8]) -> nom::IResult<&[u8], u8> {
	byte!(b'0'..=b'1')(i)
	}

	fn octal(i: &[u8]) -> nom::IResult<&[u8], u8> {
	byte!(b'0'..=b'7')(i)
	}

	fn decimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
	byte!(b'0'..=b'9')(i)
	}

	fn hexadecimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
	byte!(b'0' ..= b'9' \| b'a' ..= b'f' \| b'A' ..= b'F')(i)
	}

	// ========================================
	// ======== characters and strings ========
	// ========================================

	fn escape2char(c: char) -> CChar {
	CChar::Char(match c {
	'a' => '\x07',
	'b' => '\x08',
	'f' => '\x0c',
	'n' => '\n',
	'r' => '\r',
	't' => '\t',
	'v' => '\x0b',
	_ => unreachable!("invalid escape {}", c),
	})
	}

	fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> {
	str::from_utf8(&n)
	.ok()
	.and_then(\|i\| u64::from_str_radix(i, radix).ok())
	.map(\|i\| match i {
	0..=0x7f => CChar::Char(i as u8 as char),
	_ => CChar::Raw(i),
	})
	}

	fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {
	str::from_utf8(&n)
	.ok()
	.and_then(\|i\| u32::from_str_radix(i, 16).ok())
	.and_then(char::from_u32)
	.map(CChar::Char)
	}

	fn escaped_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
	preceded(
	char('\\'),
	alt((
	map(one_of(r#"'"?\"#), CChar::Char),
	map(one_of("abfnrtv"), escape2char),
	map_opt(many_m_n(1, 3, octal), \|v\| c_raw_escape(v, 8)),
	map_opt(preceded(char('x'), many1(hexadecimal)), \|v\| {
	c_raw_escape(v, 16)
	}),
	map_opt(
	preceded(char('u'), many_m_n(4, 4, hexadecimal)),
	c_unicode_escape,
	),
	map_opt(
	preceded(char('U'), many_m_n(8, 8, hexadecimal)),
	c_unicode_escape,
	),
	)),
	)(i)
	}

	fn c_width_prefix(i: &[u8]) -> nom::IResult<&[u8], &[u8]> {
	alt((tag("u8"), tag("u"), tag("U"), tag("L")))(i)
	}

	fn c_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
	delimited(
	terminated(opt(c_width_prefix), char('\'')),
	alt((
	escaped_char,
	map(byte!(0 ..= 91 /* \=92 */ \| 93 ..= 255), CChar::from),
	)),
	char('\''),
	)(i)
	}

	fn c_string(i: &[u8]) -> nom::IResult<&[u8], Vec<u8>> {
	delimited(
	alt((preceded(c_width_prefix, char('"')), char('"'))),
	fold_many0(
	alt((
	map(escaped_char, \|c: CChar\| c.into()),
	map(is_not([b'\\', b'"']), \|c: &[u8]\| c.into()),
	)),
	Vec::new,
	\|mut v: Vec<u8>, res: Vec<u8>\| {
	v.extend_from_slice(&res);
	v
	},
	),
	char('"'),
	)(i)
	}

	// ================================
	// ======== parse integers ========
	// ================================

	fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> {
	str::from_utf8(&n)
	.ok()
	.and_then(\|i\| u64::from_str_radix(i, radix).ok())
	}

	fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> {
	let r = input.split_at_position(\|c\| c != b'u' && c != b'U' && c != b'l' && c != b'L');
	match r {
	Err(Err::Incomplete(_)) => Ok((&input[input.len()..], input)),
	res => res,
	}
	}

	fn c_int(i: &[u8]) -> nom::IResult<&[u8], i64> {
	map(
	terminated(
	alt((
	map_opt(preceded(tag("0x"), many1(complete(hexadecimal))), \|v\| {
	c_int_radix(v, 16)
	}),
	map_opt(preceded(tag("0X"), many1(complete(hexadecimal))), \|v\| {
	c_int_radix(v, 16)
	}),
	map_opt(preceded(tag("0b"), many1(complete(binary))), \|v\| {
	c_int_radix(v, 2)
	}),
	map_opt(preceded(tag("0B"), many1(complete(binary))), \|v\| {
	c_int_radix(v, 2)
	}),
	map_opt(preceded(char('0'), many1(complete(octal))), \|v\| {
	c_int_radix(v, 8)
	}),
	map_opt(many1(complete(decimal)), \|v\| c_int_radix(v, 10)),
	\|input\| Err(crate::nom::Err::Error(nom::error::Error::new(input, crate::nom::ErrorKind::Fix))),
	)),
	opt(take_ul),
	),
	\|i\| i as i64,
	)(i)
	}

	// ==============================
	// ======== parse floats ========
	// ==============================

	fn float_width(i: &[u8]) -> nom::IResult<&[u8], u8> {
	nom::combinator::complete(byte!(b'f' \| b'l' \| b'F' \| b'L'))(i)
	}

	fn float_exp(i: &[u8]) -> nom::IResult<&[u8], (Option<u8>, Vec<u8>)> {
	preceded(
	byte!(b'e' \| b'E'),
	pair(opt(byte!(b'-' \| b'+')), many1(complete(decimal))),
	)(i)
	}

	fn c_float(i: &[u8]) -> nom::IResult<&[u8], f64> {
	map_opt(
	alt((
	terminated(
	recognize(tuple((
	many1(complete(decimal)),
	byte!(b'.'),
	many0(complete(decimal)),
	))),
	opt(float_width),
	),
	terminated(
	recognize(tuple((
	many0(complete(decimal)),
	byte!(b'.'),
	many1(complete(decimal)),
	))),
	opt(float_width),
	),
	terminated(
	recognize(tuple((
	many0(complete(decimal)),
	opt(byte!(b'.')),
	many1(complete(decimal)),
	float_exp,
	))),
	opt(float_width),
	),
	terminated(
	recognize(tuple((
	many1(complete(decimal)),
	opt(byte!(b'.')),
	many0(complete(decimal)),
	float_exp,
	))),
	opt(float_width),
	),
	terminated(recognize(many1(complete(decimal))), float_width),
	)),
	\|v\| str::from_utf8(v).ok().and_then(\|i\| f64::from_str(i).ok()),
	)(i)
	}

	// ================================
	// ======== main interface ========
	// ================================

	fn one_literal(input: &[u8]) -> nom::IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
	alt((
	map(full(c_char), EvalResult::Char),
	map(full(c_int), \|i\| EvalResult::Int(::std::num::Wrapping(i))),
	map(full(c_float), EvalResult::Float),
	map(full(c_string), EvalResult::Str),
	))(input)
	.to_cexpr_result()
	}

	/// Parse a C literal.
	///
	/// The input must contain exactly the representation of a single literal
	/// token, and in particular no whitespace or sign prefixes.
	pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
	crate::assert_full_parse(one_literal(input))
	}