src/scanner.rs - platform/external/n2 - Git at Google

 //! Scans an input string (source file) character by character.

 use std::{io::Read, path::Path};

 /// ParseError represents an error encountered while parsing the ninja file.
 /// It contains both an error message and a position in the file that the error
 /// occurred at.
 #[derive(Debug)]
 pub struct ParseError {
     msg: String,
     ofs: usize,
     chunk_index: usize,
 }

 impl ParseError {
     /// Returns the index of the chunk that contained the error
     pub fn get_chunk_index(&self) -> usize {
         self.chunk_index
     }
 }

 /// ParseResult<T> is a Result<T, ParseError>
 pub type ParseResult<T> = Result<T, ParseError>;

 /// A Scanner wrapps a byte slice and provides methods for easily reading
 /// individual characters out of it.
 pub struct Scanner<'a> {
     buf: &'a [u8],
     /// The offset of the scanner in the buffer.
     pub ofs: usize,
     /// The line number of the scanner in its buffer.
     pub line: usize,
     chunk_index: usize,
 }

 impl<'a> Scanner<'a> {
     /// Creates a new scanner that scans the given buffer. The chunk index
     /// is recorded into ParseErrors to accurately recreate the position of
     /// the error when buf isn't the complete file.
     pub fn new(buf: &'a [u8], chunk_index: usize) -> Self {
         Scanner {
             buf,
             ofs: 0,
             line: 1,
             chunk_index,
         }
     }

     /// Returns a string slice into the scanner's buffer.
     pub fn slice(&self, start: usize, end: usize) -> &'a str {
         unsafe { std::str::from_utf8_unchecked(self.buf.get_unchecked(start..end)) }
     }

     /// Returns the character that will be returned by read(), but without
     /// advancing the scanner.
     pub fn peek(&self) -> char {
         unsafe { *self.buf.get_unchecked(self.ofs) as char }
     }

     /// Returns true if there is a \n or \r\n at the scanner's current position.
     pub fn peek_newline(&self) -> bool {
         let peek = self.peek();
         if peek == '\n' {
             return true;
         }
         if self.ofs >= self.buf.len() - 1 {
             return false;
         }
         let peek2 = unsafe { *self.buf.get_unchecked(self.ofs + 1) as char };
         peek == '\r' && peek2 == '\n'
     }

     /// Advances the scanner forward by one character.
     pub fn next(&mut self) {
         if self.peek() == '\n' {
             self.line += 1;
         }
         #[cfg(debug_assertions)]
         if self.ofs == self.buf.len() {
             panic!("scanned past end")
         }
         self.ofs += 1;
     }

     /// The opposite of next(), moves the scanner back one character.
     pub fn back(&mut self) {
         #[cfg(debug_assertions)]
         if self.ofs == 0 {
             panic!("back at start")
         }
         self.ofs -= 1;
         if self.peek() == '\n' {
             self.line -= 1;
         }
     }

     /// Consumes the next character in the scanner and returns it.
     pub fn read(&mut self) -> char {
         let c = self.peek();
         self.next();
         c
     }

     /// Advances the scanner if the next character is the provided one. Returns
     /// true if the skip was performed.
     pub fn skip(&mut self, ch: char) -> bool {
         if self.peek() == ch {
             self.next();
             return true;
         }
         false
     }

     /// Advances the scanner past 0 or more spaces.
     pub fn skip_spaces(&mut self) {
         while self.skip(' ') {}
     }

     /// Reads a character, and returns an error if that character is not
     /// the provided one.
     pub fn expect(&mut self, ch: char) -> ParseResult<()> {
         let r = self.read();
         if r != ch {
             self.back();
             return self.parse_error(format!("expected {:?}, got {:?}", ch, r));
         }
         Ok(())
     }

     /// Creates an error representing the given error message and associating
     /// it with the scanner's current position.
     pub fn parse_error<T, S: Into<String>>(&self, msg: S) -> ParseResult<T> {
         Err(ParseError {
             msg: msg.into(),
             ofs: self.ofs,
             chunk_index: self.chunk_index,
         })
     }
 }

 /// Returns a string representing the given ParseError, but also printing
 /// the location of the error in the ninja file and a context line.
 pub fn format_parse_error(mut ofs: usize, buf: &[u8], filename: &Path, err: ParseError) -> String {
     let lines = buf.split(|&c| c == b'\n');
     for (line_number, line) in lines.enumerate() {
         if ofs + line.len() >= err.ofs {
             let mut msg = "parse error: ".to_string();
             msg.push_str(&err.msg);
             msg.push('\n');

             let prefix = format!("{}:{}: ", filename.display(), line_number + 1);
             msg.push_str(&prefix);

             let mut context = unsafe { std::str::from_utf8_unchecked(line) };
             let mut col = err.ofs - ofs;
             if col > 40 {
                 // Trim beginning of line to fit it on screen.
                 msg.push_str("...");
                 context = &context[col - 20..];
                 col = 3 + 20;
             }
             if context.len() > 40 {
                 context = &context[0..40];
                 msg.push_str(context);
                 msg.push_str("...");
             } else {
                 msg.push_str(context);
             }
             msg.push('\n');

             msg.push_str(&" ".repeat(prefix.len() + col));
             msg.push_str("^\n");
             return msg;
         }
         ofs += line.len() + 1;
     }
     panic!("invalid offset when formatting error")
 }

 /// Scanner wants its input buffer to end in a trailing nul.
 /// This function is like std::fs::read() but appends a nul, efficiently.
 pub fn read_file_with_nul(path: &Path) -> std::io::Result<Vec<u8>> {
     // Using std::fs::read() to read the file and then pushing a nul on the end
     // causes us to allocate a buffer the size of the file, then grow it to push
     // the nul, copying the entire file(!).  So instead create a buffer of the
     // right size up front.
     let mut file = std::fs::File::open(path)?;
     let size = file.metadata()?.len() as usize;
     let mut bytes = Vec::with_capacity(size + 1);
     unsafe {
         bytes.set_len(size);
     }
     file.read_exact(&mut bytes[..size])?;
     bytes.push(0);
     Ok(bytes)
 }
	//! Scans an input string (source file) character by character.

	use std::{io::Read, path::Path};

	/// ParseError represents an error encountered while parsing the ninja file.
	/// It contains both an error message and a position in the file that the error
	/// occurred at.
	#[derive(Debug)]
	pub struct ParseError {
	msg: String,
	ofs: usize,
	chunk_index: usize,
	}

	impl ParseError {
	/// Returns the index of the chunk that contained the error
	pub fn get_chunk_index(&self) -> usize {
	self.chunk_index
	}
	}

	/// ParseResult<T> is a Result<T, ParseError>
	pub type ParseResult<T> = Result<T, ParseError>;

	/// A Scanner wrapps a byte slice and provides methods for easily reading
	/// individual characters out of it.
	pub struct Scanner<'a> {
	buf: &'a [u8],
	/// The offset of the scanner in the buffer.
	pub ofs: usize,
	/// The line number of the scanner in its buffer.
	pub line: usize,
	chunk_index: usize,
	}

	impl<'a> Scanner<'a> {
	/// Creates a new scanner that scans the given buffer. The chunk index
	/// is recorded into ParseErrors to accurately recreate the position of
	/// the error when buf isn't the complete file.
	pub fn new(buf: &'a [u8], chunk_index: usize) -> Self {
	Scanner {
	buf,
	ofs: 0,
	line: 1,
	chunk_index,
	}
	}

	/// Returns a string slice into the scanner's buffer.
	pub fn slice(&self, start: usize, end: usize) -> &'a str {
	unsafe { std::str::from_utf8_unchecked(self.buf.get_unchecked(start..end)) }
	}

	/// Returns the character that will be returned by read(), but without
	/// advancing the scanner.
	pub fn peek(&self) -> char {
	unsafe { *self.buf.get_unchecked(self.ofs) as char }
	}

	/// Returns true if there is a \n or \r\n at the scanner's current position.
	pub fn peek_newline(&self) -> bool {
	let peek = self.peek();
	if peek == '\n' {
	return true;
	}
	if self.ofs >= self.buf.len() - 1 {
	return false;
	}
	let peek2 = unsafe { *self.buf.get_unchecked(self.ofs + 1) as char };
	peek == '\r' && peek2 == '\n'
	}

	/// Advances the scanner forward by one character.
	pub fn next(&mut self) {
	if self.peek() == '\n' {
	self.line += 1;
	}
	#[cfg(debug_assertions)]
	if self.ofs == self.buf.len() {
	panic!("scanned past end")
	}
	self.ofs += 1;
	}

	/// The opposite of next(), moves the scanner back one character.
	pub fn back(&mut self) {
	#[cfg(debug_assertions)]
	if self.ofs == 0 {
	panic!("back at start")
	}
	self.ofs -= 1;
	if self.peek() == '\n' {
	self.line -= 1;
	}
	}

	/// Consumes the next character in the scanner and returns it.
	pub fn read(&mut self) -> char {
	let c = self.peek();
	self.next();
	c
	}

	/// Advances the scanner if the next character is the provided one. Returns
	/// true if the skip was performed.
	pub fn skip(&mut self, ch: char) -> bool {
	if self.peek() == ch {
	self.next();
	return true;
	}
	false
	}

	/// Advances the scanner past 0 or more spaces.
	pub fn skip_spaces(&mut self) {
	while self.skip(' ') {}
	}

	/// Reads a character, and returns an error if that character is not
	/// the provided one.
	pub fn expect(&mut self, ch: char) -> ParseResult<()> {
	let r = self.read();
	if r != ch {
	self.back();
	return self.parse_error(format!("expected {:?}, got {:?}", ch, r));
	}
	Ok(())
	}

	/// Creates an error representing the given error message and associating
	/// it with the scanner's current position.
	pub fn parse_error<T, S: Into<String>>(&self, msg: S) -> ParseResult<T> {
	Err(ParseError {
	msg: msg.into(),
	ofs: self.ofs,
	chunk_index: self.chunk_index,
	})
	}
	}

	/// Returns a string representing the given ParseError, but also printing
	/// the location of the error in the ninja file and a context line.
	pub fn format_parse_error(mut ofs: usize, buf: &[u8], filename: &Path, err: ParseError) -> String {
	let lines = buf.split(\|&c\| c == b'\n');
	for (line_number, line) in lines.enumerate() {
	if ofs + line.len() >= err.ofs {
	let mut msg = "parse error: ".to_string();
	msg.push_str(&err.msg);
	msg.push('\n');

	let prefix = format!("{}:{}: ", filename.display(), line_number + 1);
	msg.push_str(&prefix);

	let mut context = unsafe { std::str::from_utf8_unchecked(line) };
	let mut col = err.ofs - ofs;
	if col > 40 {
	// Trim beginning of line to fit it on screen.
	msg.push_str("...");
	context = &context[col - 20..];
	col = 3 + 20;
	}
	if context.len() > 40 {
	context = &context[0..40];
	msg.push_str(context);
	msg.push_str("...");
	} else {
	msg.push_str(context);
	}
	msg.push('\n');

	msg.push_str(&" ".repeat(prefix.len() + col));
	msg.push_str("^\n");
	return msg;
	}
	ofs += line.len() + 1;
	}
	panic!("invalid offset when formatting error")
	}

	/// Scanner wants its input buffer to end in a trailing nul.
	/// This function is like std::fs::read() but appends a nul, efficiently.
	pub fn read_file_with_nul(path: &Path) -> std::io::Result<Vec<u8>> {
	// Using std::fs::read() to read the file and then pushing a nul on the end
	// causes us to allocate a buffer the size of the file, then grow it to push
	// the nul, copying the entire file(!). So instead create a buffer of the
	// right size up front.
	let mut file = std::fs::File::open(path)?;
	let size = file.metadata()?.len() as usize;
	let mut bytes = Vec::with_capacity(size + 1);
	unsafe {
	bytes.set_len(size);
	}
	file.read_exact(&mut bytes[..size])?;
	bytes.push(0);
	Ok(bytes)
	}