| //! Scans an input string (source file) character by character. |
| |
| use std::{io::Read, path::Path}; |
| |
| /// ParseError represents an error encountered while parsing the ninja file. |
| /// It contains both an error message and a position in the file that the error |
| /// occurred at. |
| #[derive(Debug)] |
| pub struct ParseError { |
| msg: String, |
| ofs: usize, |
| chunk_index: usize, |
| } |
| |
| impl ParseError { |
| /// Returns the index of the chunk that contained the error |
| pub fn get_chunk_index(&self) -> usize { |
| self.chunk_index |
| } |
| } |
| |
| /// ParseResult<T> is a Result<T, ParseError> |
| pub type ParseResult<T> = Result<T, ParseError>; |
| |
| /// A Scanner wrapps a byte slice and provides methods for easily reading |
| /// individual characters out of it. |
| pub struct Scanner<'a> { |
| buf: &'a [u8], |
| /// The offset of the scanner in the buffer. |
| pub ofs: usize, |
| /// The line number of the scanner in its buffer. |
| pub line: usize, |
| chunk_index: usize, |
| } |
| |
| impl<'a> Scanner<'a> { |
| /// Creates a new scanner that scans the given buffer. The chunk index |
| /// is recorded into ParseErrors to accurately recreate the position of |
| /// the error when buf isn't the complete file. |
| pub fn new(buf: &'a [u8], chunk_index: usize) -> Self { |
| Scanner { |
| buf, |
| ofs: 0, |
| line: 1, |
| chunk_index, |
| } |
| } |
| |
| /// Returns a string slice into the scanner's buffer. |
| pub fn slice(&self, start: usize, end: usize) -> &'a str { |
| unsafe { std::str::from_utf8_unchecked(self.buf.get_unchecked(start..end)) } |
| } |
| |
| /// Returns the character that will be returned by read(), but without |
| /// advancing the scanner. |
| pub fn peek(&self) -> char { |
| unsafe { *self.buf.get_unchecked(self.ofs) as char } |
| } |
| |
| /// Returns true if there is a \n or \r\n at the scanner's current position. |
| pub fn peek_newline(&self) -> bool { |
| let peek = self.peek(); |
| if peek == '\n' { |
| return true; |
| } |
| if self.ofs >= self.buf.len() - 1 { |
| return false; |
| } |
| let peek2 = unsafe { *self.buf.get_unchecked(self.ofs + 1) as char }; |
| peek == '\r' && peek2 == '\n' |
| } |
| |
| /// Advances the scanner forward by one character. |
| pub fn next(&mut self) { |
| if self.peek() == '\n' { |
| self.line += 1; |
| } |
| #[cfg(debug_assertions)] |
| if self.ofs == self.buf.len() { |
| panic!("scanned past end") |
| } |
| self.ofs += 1; |
| } |
| |
| /// The opposite of next(), moves the scanner back one character. |
| pub fn back(&mut self) { |
| #[cfg(debug_assertions)] |
| if self.ofs == 0 { |
| panic!("back at start") |
| } |
| self.ofs -= 1; |
| if self.peek() == '\n' { |
| self.line -= 1; |
| } |
| } |
| |
| /// Consumes the next character in the scanner and returns it. |
| pub fn read(&mut self) -> char { |
| let c = self.peek(); |
| self.next(); |
| c |
| } |
| |
| /// Advances the scanner if the next character is the provided one. Returns |
| /// true if the skip was performed. |
| pub fn skip(&mut self, ch: char) -> bool { |
| if self.peek() == ch { |
| self.next(); |
| return true; |
| } |
| false |
| } |
| |
| /// Advances the scanner past 0 or more spaces. |
| pub fn skip_spaces(&mut self) { |
| while self.skip(' ') {} |
| } |
| |
| /// Reads a character, and returns an error if that character is not |
| /// the provided one. |
| pub fn expect(&mut self, ch: char) -> ParseResult<()> { |
| let r = self.read(); |
| if r != ch { |
| self.back(); |
| return self.parse_error(format!("expected {:?}, got {:?}", ch, r)); |
| } |
| Ok(()) |
| } |
| |
| /// Creates an error representing the given error message and associating |
| /// it with the scanner's current position. |
| pub fn parse_error<T, S: Into<String>>(&self, msg: S) -> ParseResult<T> { |
| Err(ParseError { |
| msg: msg.into(), |
| ofs: self.ofs, |
| chunk_index: self.chunk_index, |
| }) |
| } |
| } |
| |
| /// Returns a string representing the given ParseError, but also printing |
| /// the location of the error in the ninja file and a context line. |
| pub fn format_parse_error(mut ofs: usize, buf: &[u8], filename: &Path, err: ParseError) -> String { |
| let lines = buf.split(|&c| c == b'\n'); |
| for (line_number, line) in lines.enumerate() { |
| if ofs + line.len() >= err.ofs { |
| let mut msg = "parse error: ".to_string(); |
| msg.push_str(&err.msg); |
| msg.push('\n'); |
| |
| let prefix = format!("{}:{}: ", filename.display(), line_number + 1); |
| msg.push_str(&prefix); |
| |
| let mut context = unsafe { std::str::from_utf8_unchecked(line) }; |
| let mut col = err.ofs - ofs; |
| if col > 40 { |
| // Trim beginning of line to fit it on screen. |
| msg.push_str("..."); |
| context = &context[col - 20..]; |
| col = 3 + 20; |
| } |
| if context.len() > 40 { |
| context = &context[0..40]; |
| msg.push_str(context); |
| msg.push_str("..."); |
| } else { |
| msg.push_str(context); |
| } |
| msg.push('\n'); |
| |
| msg.push_str(&" ".repeat(prefix.len() + col)); |
| msg.push_str("^\n"); |
| return msg; |
| } |
| ofs += line.len() + 1; |
| } |
| panic!("invalid offset when formatting error") |
| } |
| |
| /// Scanner wants its input buffer to end in a trailing nul. |
| /// This function is like std::fs::read() but appends a nul, efficiently. |
| pub fn read_file_with_nul(path: &Path) -> std::io::Result<Vec<u8>> { |
| // Using std::fs::read() to read the file and then pushing a nul on the end |
| // causes us to allocate a buffer the size of the file, then grow it to push |
| // the nul, copying the entire file(!). So instead create a buffer of the |
| // right size up front. |
| let mut file = std::fs::File::open(path)?; |
| let size = file.metadata()?.len() as usize; |
| let mut bytes = Vec::with_capacity(size + 1); |
| unsafe { |
| bytes.set_len(size); |
| } |
| file.read_exact(&mut bytes[..size])?; |
| bytes.push(0); |
| Ok(bytes) |
| } |