| //! Detects unescaped backticks (\`) in doc comments. |
| |
| use crate::clean::Item; |
| use crate::core::DocContext; |
| use crate::html::markdown::main_body_opts; |
| use pulldown_cmark::{BrokenLink, Event, Parser}; |
| use rustc_errors::DiagnosticBuilder; |
| use rustc_lint_defs::Applicability; |
| use rustc_resolve::rustdoc::source_span_for_markdown_range; |
| use std::ops::Range; |
| |
| pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { |
| let tcx = cx.tcx; |
| let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { |
| // If non-local, no need to check anything. |
| return; |
| }; |
| |
| let dox = item.doc_value(); |
| if dox.is_empty() { |
| return; |
| } |
| |
| let link_names = item.link_names(&cx.cache); |
| let mut replacer = |broken_link: BrokenLink<'_>| { |
| link_names |
| .iter() |
| .find(|link| *link.original_text == *broken_link.reference) |
| .map(|link| ((*link.href).into(), (*link.new_text).into())) |
| }; |
| let parser = Parser::new_with_broken_link_callback(&dox, main_body_opts(), Some(&mut replacer)) |
| .into_offset_iter(); |
| |
| let mut element_stack = Vec::new(); |
| |
| let mut prev_text_end = 0; |
| for (event, event_range) in parser { |
| match event { |
| Event::Start(_) => { |
| element_stack.push(Element::new(event_range)); |
| } |
| Event::End(_) => { |
| let element = element_stack.pop().unwrap(); |
| |
| let Some(backtick_index) = element.backtick_index else { |
| continue; |
| }; |
| |
| // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, |
| // use the span of the entire attribute as a fallback. |
| let span = source_span_for_markdown_range( |
| tcx, |
| &dox, |
| &(backtick_index..backtick_index + 1), |
| &item.attrs.doc_strings, |
| ) |
| .unwrap_or_else(|| item.attr_span(tcx)); |
| |
| tcx.node_span_lint(crate::lint::UNESCAPED_BACKTICKS, hir_id, span, "unescaped backtick", |lint| { |
| let mut help_emitted = false; |
| |
| match element.prev_code_guess { |
| PrevCodeGuess::None => {} |
| PrevCodeGuess::Start { guess, .. } => { |
| // "foo` `bar`" -> "`foo` `bar`" |
| if let Some(suggest_index) = clamp_start(guess, &element.suggestible_ranges) |
| && can_suggest_backtick(&dox, suggest_index) |
| { |
| suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the opening backtick of a previous inline code may be missing"); |
| help_emitted = true; |
| } |
| } |
| PrevCodeGuess::End { guess, .. } => { |
| // "`foo `bar`" -> "`foo` `bar`" |
| // Don't `clamp_end` here, because the suggestion is guaranteed to be inside |
| // an inline code node and we intentionally "break" the inline code here. |
| let suggest_index = guess; |
| if can_suggest_backtick(&dox, suggest_index) { |
| suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "a previous inline code might be longer than expected"); |
| help_emitted = true; |
| } |
| } |
| } |
| |
| if !element.prev_code_guess.is_confident() { |
| // "`foo` bar`" -> "`foo` `bar`" |
| if let Some(guess) = guess_start_of_code(&dox, element.element_range.start..backtick_index) |
| && let Some(suggest_index) = clamp_start(guess, &element.suggestible_ranges) |
| && can_suggest_backtick(&dox, suggest_index) |
| { |
| suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the opening backtick of an inline code may be missing"); |
| help_emitted = true; |
| } |
| |
| // "`foo` `bar" -> "`foo` `bar`" |
| // Don't suggest closing backtick after single trailing char, |
| // if we already suggested opening backtick. For example: |
| // "foo`." -> "`foo`." or "foo`s" -> "`foo`s". |
| if let Some(guess) = guess_end_of_code(&dox, backtick_index + 1..element.element_range.end) |
| && let Some(suggest_index) = clamp_end(guess, &element.suggestible_ranges) |
| && can_suggest_backtick(&dox, suggest_index) |
| && (!help_emitted || suggest_index - backtick_index > 2) |
| { |
| suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the closing backtick of an inline code may be missing"); |
| help_emitted = true; |
| } |
| } |
| |
| if !help_emitted { |
| lint.help("the opening or closing backtick of an inline code may be missing"); |
| } |
| |
| suggest_insertion(cx, item, &dox, lint, backtick_index, '\\', "if you meant to use a literal backtick, escape it"); |
| }); |
| } |
| Event::Code(_) => { |
| let element = element_stack |
| .last_mut() |
| .expect("expected inline code node to be inside of an element"); |
| assert!( |
| event_range.start >= element.element_range.start |
| && event_range.end <= element.element_range.end |
| ); |
| |
| // This inline code might be longer than it's supposed to be. |
| // Only check single backtick inline code for now. |
| if !element.prev_code_guess.is_confident() |
| && dox.as_bytes().get(event_range.start) == Some(&b'`') |
| && dox.as_bytes().get(event_range.start + 1) != Some(&b'`') |
| { |
| let range_inside = event_range.start + 1..event_range.end - 1; |
| let text_inside = &dox[range_inside.clone()]; |
| |
| let is_confident = text_inside.starts_with(char::is_whitespace) |
| || text_inside.ends_with(char::is_whitespace); |
| |
| if let Some(guess) = guess_end_of_code(&dox, range_inside) { |
| // Find earlier end of code. |
| element.prev_code_guess = PrevCodeGuess::End { guess, is_confident }; |
| } else { |
| // Find alternate start of code. |
| let range_before = element.element_range.start..event_range.start; |
| if let Some(guess) = guess_start_of_code(&dox, range_before) { |
| element.prev_code_guess = PrevCodeGuess::Start { guess, is_confident }; |
| } |
| } |
| } |
| } |
| Event::Text(text) => { |
| let element = element_stack |
| .last_mut() |
| .expect("expected inline text node to be inside of an element"); |
| assert!( |
| event_range.start >= element.element_range.start |
| && event_range.end <= element.element_range.end |
| ); |
| |
| // The first char is escaped if the prev char is \ and not part of a text node. |
| let is_escaped = prev_text_end < event_range.start |
| && dox.as_bytes()[event_range.start - 1] == b'\\'; |
| |
| // Don't lint backslash-escaped (\`) or html-escaped (`) backticks. |
| if *text == *"`" && !is_escaped && *text == dox[event_range.clone()] { |
| // We found a stray backtick. |
| assert!( |
| element.backtick_index.is_none(), |
| "expected at most one unescaped backtick per element", |
| ); |
| element.backtick_index = Some(event_range.start); |
| } |
| |
| prev_text_end = event_range.end; |
| |
| if is_escaped { |
| // Ensure that we suggest "`\x" and not "\`x". |
| element.suggestible_ranges.push(event_range.start - 1..event_range.end); |
| } else { |
| element.suggestible_ranges.push(event_range); |
| } |
| } |
| _ => {} |
| } |
| } |
| } |
| |
| /// A previous inline code node, that looks wrong. |
| /// |
| /// `guess` is the position, where we want to suggest a \` and the guess `is_confident` if an |
| /// inline code starts or ends with a whitespace. |
| #[derive(Debug)] |
| enum PrevCodeGuess { |
| None, |
| |
| /// Missing \` at start. |
| /// |
| /// ```markdown |
| /// foo` `bar` |
| /// ``` |
| Start { |
| guess: usize, |
| is_confident: bool, |
| }, |
| |
| /// Missing \` at end. |
| /// |
| /// ```markdown |
| /// `foo `bar` |
| /// ``` |
| End { |
| guess: usize, |
| is_confident: bool, |
| }, |
| } |
| |
| impl PrevCodeGuess { |
| fn is_confident(&self) -> bool { |
| match *self { |
| PrevCodeGuess::None => false, |
| PrevCodeGuess::Start { is_confident, .. } | PrevCodeGuess::End { is_confident, .. } => { |
| is_confident |
| } |
| } |
| } |
| } |
| |
| /// A markdown [tagged element], which may or may not contain an unescaped backtick. |
| /// |
| /// [tagged element]: https://docs.rs/pulldown-cmark/0.9/pulldown_cmark/enum.Tag.html |
| #[derive(Debug)] |
| struct Element { |
| /// The full range (span) of the element in the doc string. |
| element_range: Range<usize>, |
| |
| /// The ranges where we're allowed to put backticks. |
| /// This is used to prevent breaking markdown elements like links or lists. |
| suggestible_ranges: Vec<Range<usize>>, |
| |
| /// The unescaped backtick. |
| backtick_index: Option<usize>, |
| |
| /// Suggest a different start or end of an inline code. |
| prev_code_guess: PrevCodeGuess, |
| } |
| |
| impl Element { |
| const fn new(element_range: Range<usize>) -> Self { |
| Self { |
| element_range, |
| suggestible_ranges: Vec::new(), |
| backtick_index: None, |
| prev_code_guess: PrevCodeGuess::None, |
| } |
| } |
| } |
| |
| /// Given a potentially unclosed inline code, attempt to find the start. |
| fn guess_start_of_code(dox: &str, range: Range<usize>) -> Option<usize> { |
| assert!(dox.as_bytes()[range.end] == b'`'); |
| |
| let mut braces = 0; |
| let mut guess = 0; |
| for (idx, ch) in dox[range.clone()].char_indices().rev() { |
| match ch { |
| ')' | ']' | '}' => braces += 1, |
| '(' | '[' | '{' => { |
| if braces == 0 { |
| guess = idx + 1; |
| break; |
| } |
| braces -= 1; |
| } |
| ch if ch.is_whitespace() && braces == 0 => { |
| guess = idx + 1; |
| break; |
| } |
| _ => (), |
| } |
| } |
| |
| guess += range.start; |
| |
| // Don't suggest empty inline code or duplicate backticks. |
| can_suggest_backtick(dox, guess).then_some(guess) |
| } |
| |
| /// Given a potentially unclosed inline code, attempt to find the end. |
| fn guess_end_of_code(dox: &str, range: Range<usize>) -> Option<usize> { |
| // Punctuation that should be outside of the inline code. |
| const TRAILING_PUNCTUATION: &[u8] = b".,"; |
| |
| assert!(dox.as_bytes()[range.start - 1] == b'`'); |
| |
| let text = dox[range.clone()].trim_end(); |
| let mut braces = 0; |
| let mut guess = text.len(); |
| for (idx, ch) in text.char_indices() { |
| match ch { |
| '(' | '[' | '{' => braces += 1, |
| ')' | ']' | '}' => { |
| if braces == 0 { |
| guess = idx; |
| break; |
| } |
| braces -= 1; |
| } |
| ch if ch.is_whitespace() && braces == 0 => { |
| guess = idx; |
| break; |
| } |
| _ => (), |
| } |
| } |
| |
| // Strip a single trailing punctuation. |
| if guess >= 1 |
| && TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 1]) |
| && (guess < 2 || !TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 2])) |
| { |
| guess -= 1; |
| } |
| |
| guess += range.start; |
| |
| // Don't suggest empty inline code or duplicate backticks. |
| can_suggest_backtick(dox, guess).then_some(guess) |
| } |
| |
| /// Returns whether inserting a backtick at `dox[index]` will not produce double backticks. |
| fn can_suggest_backtick(dox: &str, index: usize) -> bool { |
| (index == 0 || dox.as_bytes()[index - 1] != b'`') |
| && (index == dox.len() || dox.as_bytes()[index] != b'`') |
| } |
| |
| /// Increase the index until it is inside or one past the end of one of the ranges. |
| /// |
| /// The ranges must be sorted for this to work correctly. |
| fn clamp_start(index: usize, ranges: &[Range<usize>]) -> Option<usize> { |
| for range in ranges { |
| if range.start >= index { |
| return Some(range.start); |
| } |
| if index <= range.end { |
| return Some(index); |
| } |
| } |
| None |
| } |
| |
| /// Decrease the index until it is inside or one past the end of one of the ranges. |
| /// |
| /// The ranges must be sorted for this to work correctly. |
| fn clamp_end(index: usize, ranges: &[Range<usize>]) -> Option<usize> { |
| for range in ranges.iter().rev() { |
| if range.end <= index { |
| return Some(range.end); |
| } |
| if index >= range.start { |
| return Some(index); |
| } |
| } |
| None |
| } |
| |
| /// Try to emit a span suggestion and fall back to help messages if we can't find a suitable span. |
| /// |
| /// This helps finding backticks in huge macro-generated docs. |
| fn suggest_insertion( |
| cx: &DocContext<'_>, |
| item: &Item, |
| dox: &str, |
| lint: &mut DiagnosticBuilder<'_, ()>, |
| insert_index: usize, |
| suggestion: char, |
| message: &'static str, |
| ) { |
| /// Maximum bytes of context to show around the insertion. |
| const CONTEXT_MAX_LEN: usize = 80; |
| |
| if let Some(span) = source_span_for_markdown_range( |
| cx.tcx, |
| &dox, |
| &(insert_index..insert_index), |
| &item.attrs.doc_strings, |
| ) { |
| lint.span_suggestion(span, message, suggestion, Applicability::MaybeIncorrect); |
| } else { |
| let line_start = dox[..insert_index].rfind('\n').map_or(0, |idx| idx + 1); |
| let line_end = dox[insert_index..].find('\n').map_or(dox.len(), |idx| idx + insert_index); |
| |
| let context_before_max_len = if insert_index - line_start < CONTEXT_MAX_LEN / 2 { |
| insert_index - line_start |
| } else if line_end - insert_index < CONTEXT_MAX_LEN / 2 { |
| CONTEXT_MAX_LEN - (line_end - insert_index) |
| } else { |
| CONTEXT_MAX_LEN / 2 |
| }; |
| let context_after_max_len = CONTEXT_MAX_LEN - context_before_max_len; |
| |
| let (prefix, context_start) = if insert_index - line_start <= context_before_max_len { |
| ("", line_start) |
| } else { |
| ("...", dox.ceil_char_boundary(insert_index - context_before_max_len)) |
| }; |
| let (suffix, context_end) = if line_end - insert_index <= context_after_max_len { |
| ("", line_end) |
| } else { |
| ("...", dox.floor_char_boundary(insert_index + context_after_max_len)) |
| }; |
| |
| let context_full = &dox[context_start..context_end].trim_end(); |
| let context_before = &dox[context_start..insert_index]; |
| let context_after = &dox[insert_index..context_end].trim_end(); |
| lint.help(format!( |
| "{message}\n change: {prefix}{context_full}{suffix}\nto this: {prefix}{context_before}{suggestion}{context_after}{suffix}" |
| )); |
| } |
| } |