| //! Source file support for diagnostic reporting. |
| //! |
| //! The main trait defined in this module is the [`Files`] trait, which provides |
| //! provides the minimum amount of functionality required for printing [`Diagnostics`] |
| //! with the [`term::emit`] function. |
| //! |
| //! Simple implementations of this trait are implemented: |
| //! |
| //! - [`SimpleFile`]: For single-file use-cases |
| //! - [`SimpleFiles`]: For multi-file use-cases |
| //! |
| //! These data structures provide a pretty minimal API, however, |
| //! so end-users are encouraged to create their own implementations for their |
| //! own specific use-cases, such as an implementation that accesses the file |
| //! system directly (and caches the line start locations), or an implementation |
| //! using an incremental compilation library like [`salsa`]. |
| //! |
| //! [`term::emit`]: crate::term::emit |
| //! [`Diagnostics`]: crate::diagnostic::Diagnostic |
| //! [`Files`]: Files |
| //! [`SimpleFile`]: SimpleFile |
| //! [`SimpleFiles`]: SimpleFiles |
| //! |
| //! [`salsa`]: https://crates.io/crates/salsa |
| |
| use std::ops::Range; |
| |
| /// An enum representing an error that happened while looking up a file or a piece of content in that file. |
| #[derive(Debug)] |
| #[non_exhaustive] |
| pub enum Error { |
| /// A required file is not in the file database. |
| FileMissing, |
| /// The file is present, but does not contain the specified byte index. |
| IndexTooLarge { given: usize, max: usize }, |
| /// The file is present, but does not contain the specified line index. |
| LineTooLarge { given: usize, max: usize }, |
| /// The file is present and contains the specified line index, but the line does not contain the specified column index. |
| ColumnTooLarge { given: usize, max: usize }, |
| /// The given index is contained in the file, but is not a boundary of a UTF-8 code point. |
| InvalidCharBoundary { given: usize }, |
| /// There was a error while doing IO. |
| Io(std::io::Error), |
| } |
| |
| impl From<std::io::Error> for Error { |
| fn from(err: std::io::Error) -> Error { |
| Error::Io(err) |
| } |
| } |
| |
| impl std::fmt::Display for Error { |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| match self { |
| Error::FileMissing => write!(f, "file missing"), |
| Error::IndexTooLarge { given, max } => { |
| write!(f, "invalid index {}, maximum index is {}", given, max) |
| } |
| Error::LineTooLarge { given, max } => { |
| write!(f, "invalid line {}, maximum line is {}", given, max) |
| } |
| Error::ColumnTooLarge { given, max } => { |
| write!(f, "invalid column {}, maximum column {}", given, max) |
| } |
| Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"), |
| Error::Io(err) => write!(f, "{}", err), |
| } |
| } |
| } |
| |
| impl std::error::Error for Error { |
| fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
| match &self { |
| Error::Io(err) => Some(err), |
| _ => None, |
| } |
| } |
| } |
| |
| /// A minimal interface for accessing source files when rendering diagnostics. |
| /// |
| /// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference. |
| /// This is to workaround the lack of higher kinded lifetime parameters. |
| /// This can be ignored if this is not needed, however. |
| pub trait Files<'a> { |
| /// A unique identifier for files in the file provider. This will be used |
| /// for rendering `diagnostic::Label`s in the corresponding source files. |
| type FileId: 'a + Copy + PartialEq; |
| /// The user-facing name of a file, to be displayed in diagnostics. |
| type Name: 'a + std::fmt::Display; |
| /// The source code of a file. |
| type Source: 'a + AsRef<str>; |
| |
| /// The user-facing name of a file. |
| fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>; |
| |
| /// The source code of a file. |
| fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>; |
| |
| /// The index of the line at the given byte index. |
| /// If the byte index is past the end of the file, returns the maximum line index in the file. |
| /// This means that this function only fails if the file is not present. |
| /// |
| /// # Note for trait implementors |
| /// |
| /// This can be implemented efficiently by performing a binary search over |
| /// a list of line starts that was computed by calling the [`line_starts`] |
| /// function that is exported from the [`files`] module. It might be useful |
| /// to pre-compute and cache these line starts. |
| /// |
| /// [`line_starts`]: crate::files::line_starts |
| /// [`files`]: crate::files |
| fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>; |
| |
| /// The user-facing line number at the given line index. |
| /// It is not necessarily checked that the specified line index |
| /// is actually in the file. |
| /// |
| /// # Note for trait implementors |
| /// |
| /// This is usually 1-indexed from the beginning of the file, but |
| /// can be useful for implementing something like the |
| /// [C preprocessor's `#line` macro][line-macro]. |
| /// |
| /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line |
| #[allow(unused_variables)] |
| fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> { |
| Ok(line_index + 1) |
| } |
| |
| /// The user-facing column number at the given line index and byte index. |
| /// |
| /// # Note for trait implementors |
| /// |
| /// This is usually 1-indexed from the the start of the line. |
| /// A default implementation is provided, based on the [`column_index`] |
| /// function that is exported from the [`files`] module. |
| /// |
| /// [`files`]: crate::files |
| /// [`column_index`]: crate::files::column_index |
| fn column_number( |
| &'a self, |
| id: Self::FileId, |
| line_index: usize, |
| byte_index: usize, |
| ) -> Result<usize, Error> { |
| let source = self.source(id)?; |
| let line_range = self.line_range(id, line_index)?; |
| let column_index = column_index(source.as_ref(), line_range, byte_index); |
| |
| Ok(column_index + 1) |
| } |
| |
| /// Convenience method for returning line and column number at the given |
| /// byte index in the file. |
| fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> { |
| let line_index = self.line_index(id, byte_index)?; |
| |
| Ok(Location { |
| line_number: self.line_number(id, line_index)?, |
| column_number: self.column_number(id, line_index, byte_index)?, |
| }) |
| } |
| |
| /// The byte range of line in the source of the file. |
| fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>; |
| } |
| |
| /// A user-facing location in a source file. |
| /// |
| /// Returned by [`Files::location`]. |
| /// |
| /// [`Files::location`]: Files::location |
| #[derive(Debug, Copy, Clone, PartialEq, Eq)] |
| pub struct Location { |
| /// The user-facing line number. |
| pub line_number: usize, |
| /// The user-facing column number. |
| pub column_number: usize, |
| } |
| |
| /// The column index at the given byte index in the source file. |
| /// This is the number of characters to the given byte index. |
| /// |
| /// If the byte index is smaller than the start of the line, then `0` is returned. |
| /// If the byte index is past the end of the line, the column index of the last |
| /// character `+ 1` is returned. |
| /// |
| /// # Example |
| /// |
| /// ```rust |
| /// use codespan_reporting::files; |
| /// |
| /// let source = "\n\n🗻∈🌏\n\n"; |
| /// |
| /// assert_eq!(files::column_index(source, 0..1, 0), 0); |
| /// assert_eq!(files::column_index(source, 2..13, 0), 0); |
| /// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0); |
| /// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0); |
| /// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1); |
| /// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2); |
| /// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2); |
| /// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3); |
| /// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3); |
| /// ``` |
| pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize { |
| let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len())); |
| |
| (line_range.start..end_index) |
| .filter(|byte_index| source.is_char_boundary(byte_index + 1)) |
| .count() |
| } |
| |
| /// Return the starting byte index of each line in the source string. |
| /// |
| /// This can make it easier to implement [`Files::line_index`] by allowing |
| /// implementors of [`Files`] to pre-compute the line starts, then search for |
| /// the corresponding line range, as shown in the example below. |
| /// |
| /// [`Files`]: Files |
| /// [`Files::line_index`]: Files::line_index |
| /// |
| /// # Example |
| /// |
| /// ```rust |
| /// use codespan_reporting::files; |
| /// |
| /// let source = "foo\nbar\r\n\nbaz"; |
| /// let line_starts: Vec<_> = files::line_starts(source).collect(); |
| /// |
| /// assert_eq!( |
| /// line_starts, |
| /// [ |
| /// 0, // "foo\n" |
| /// 4, // "bar\r\n" |
| /// 9, // "" |
| /// 10, // "baz" |
| /// ], |
| /// ); |
| /// |
| /// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> { |
| /// match line_starts.binary_search(&byte_index) { |
| /// Ok(line) => Some(line), |
| /// Err(next_line) => Some(next_line - 1), |
| /// } |
| /// } |
| /// |
| /// assert_eq!(line_index(&line_starts, 5), Some(1)); |
| /// ``` |
| // NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync. |
| pub fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize> { |
| std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1)) |
| } |
| |
| /// A file database that contains a single source file. |
| /// |
| /// Because there is only single file in this database we use `()` as a [`FileId`]. |
| /// |
| /// This is useful for simple language tests, but it might be worth creating a |
| /// custom implementation when a language scales beyond a certain size. |
| /// |
| /// [`FileId`]: Files::FileId |
| #[derive(Debug, Clone)] |
| pub struct SimpleFile<Name, Source> { |
| /// The name of the file. |
| name: Name, |
| /// The source code of the file. |
| source: Source, |
| /// The starting byte indices in the source code. |
| line_starts: Vec<usize>, |
| } |
| |
| impl<Name, Source> SimpleFile<Name, Source> |
| where |
| Name: std::fmt::Display, |
| Source: AsRef<str>, |
| { |
| /// Create a new source file. |
| pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> { |
| SimpleFile { |
| name, |
| line_starts: line_starts(source.as_ref()).collect(), |
| source, |
| } |
| } |
| |
| /// Return the name of the file. |
| pub fn name(&self) -> &Name { |
| &self.name |
| } |
| |
| /// Return the source of the file. |
| pub fn source(&self) -> &Source { |
| &self.source |
| } |
| |
| /// Return the starting byte index of the line with the specified line index. |
| /// Convenience method that already generates errors if necessary. |
| fn line_start(&self, line_index: usize) -> Result<usize, Error> { |
| use std::cmp::Ordering; |
| |
| match line_index.cmp(&self.line_starts.len()) { |
| Ordering::Less => Ok(self |
| .line_starts |
| .get(line_index) |
| .cloned() |
| .expect("failed despite previous check")), |
| Ordering::Equal => Ok(self.source.as_ref().len()), |
| Ordering::Greater => Err(Error::LineTooLarge { |
| given: line_index, |
| max: self.line_starts.len() - 1, |
| }), |
| } |
| } |
| } |
| |
| impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source> |
| where |
| Name: 'a + std::fmt::Display + Clone, |
| Source: 'a + AsRef<str>, |
| { |
| type FileId = (); |
| type Name = Name; |
| type Source = &'a str; |
| |
| fn name(&self, (): ()) -> Result<Name, Error> { |
| Ok(self.name.clone()) |
| } |
| |
| fn source(&self, (): ()) -> Result<&str, Error> { |
| Ok(self.source.as_ref()) |
| } |
| |
| fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> { |
| Ok(self |
| .line_starts |
| .binary_search(&byte_index) |
| .unwrap_or_else(|next_line| next_line - 1)) |
| } |
| |
| fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> { |
| let line_start = self.line_start(line_index)?; |
| let next_line_start = self.line_start(line_index + 1)?; |
| |
| Ok(line_start..next_line_start) |
| } |
| } |
| |
| /// A file database that can store multiple source files. |
| /// |
| /// This is useful for simple language tests, but it might be worth creating a |
| /// custom implementation when a language scales beyond a certain size. |
| /// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait. |
| #[derive(Debug, Clone)] |
| pub struct SimpleFiles<Name, Source> { |
| files: Vec<SimpleFile<Name, Source>>, |
| } |
| |
| impl<Name, Source> SimpleFiles<Name, Source> |
| where |
| Name: std::fmt::Display, |
| Source: AsRef<str>, |
| { |
| /// Create a new files database. |
| pub fn new() -> SimpleFiles<Name, Source> { |
| SimpleFiles { files: Vec::new() } |
| } |
| |
| /// Add a file to the database, returning the handle that can be used to |
| /// refer to it again. |
| pub fn add(&mut self, name: Name, source: Source) -> usize { |
| let file_id = self.files.len(); |
| self.files.push(SimpleFile::new(name, source)); |
| file_id |
| } |
| |
| /// Get the file corresponding to the given id. |
| pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> { |
| self.files.get(file_id).ok_or(Error::FileMissing) |
| } |
| } |
| |
| impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source> |
| where |
| Name: 'a + std::fmt::Display + Clone, |
| Source: 'a + AsRef<str>, |
| { |
| type FileId = usize; |
| type Name = Name; |
| type Source = &'a str; |
| |
| fn name(&self, file_id: usize) -> Result<Name, Error> { |
| Ok(self.get(file_id)?.name().clone()) |
| } |
| |
| fn source(&self, file_id: usize) -> Result<&str, Error> { |
| Ok(self.get(file_id)?.source().as_ref()) |
| } |
| |
| fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> { |
| self.get(file_id)?.line_index((), byte_index) |
| } |
| |
| fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> { |
| self.get(file_id)?.line_range((), line_index) |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use super::*; |
| |
| const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz"; |
| |
| #[test] |
| fn line_starts() { |
| let file = SimpleFile::new("test", TEST_SOURCE); |
| |
| assert_eq!( |
| file.line_starts, |
| [ |
| 0, // "foo\n" |
| 4, // "bar\r\n" |
| 9, // "" |
| 10, // "baz" |
| ], |
| ); |
| } |
| |
| #[test] |
| fn line_span_sources() { |
| let file = SimpleFile::new("test", TEST_SOURCE); |
| |
| let line_sources = (0..4) |
| .map(|line| { |
| let line_range = file.line_range((), line).unwrap(); |
| &file.source[line_range] |
| }) |
| .collect::<Vec<_>>(); |
| |
| assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]); |
| } |
| } |