| pub use CommentStyle::*; |
| |
| use crate::ast; |
| use crate::source_map::SourceMap; |
| use crate::parse::lexer::{is_block_doc_comment, is_pattern_whitespace}; |
| use crate::parse::lexer::{self, ParseSess, StringReader, TokenAndSpan}; |
| use crate::print::pprust; |
| |
| use syntax_pos::{BytePos, CharPos, Pos, FileName}; |
| use log::debug; |
| |
| use std::io::Read; |
| use std::usize; |
| |
| #[derive(Clone, Copy, PartialEq, Debug)] |
| pub enum CommentStyle { |
| /// No code on either side of each line of the comment |
| Isolated, |
| /// Code exists to the left of the comment |
| Trailing, |
| /// Code before /* foo */ and after the comment |
| Mixed, |
| /// Just a manual blank line "\n\n", for layout |
| BlankLine, |
| } |
| |
| #[derive(Clone)] |
| pub struct Comment { |
| pub style: CommentStyle, |
| pub lines: Vec<String>, |
| pub pos: BytePos, |
| } |
| |
| fn is_doc_comment(s: &str) -> bool { |
| (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") || |
| (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!") |
| } |
| |
| pub fn doc_comment_style(comment: &str) -> ast::AttrStyle { |
| assert!(is_doc_comment(comment)); |
| if comment.starts_with("//!") || comment.starts_with("/*!") { |
| ast::AttrStyle::Inner |
| } else { |
| ast::AttrStyle::Outer |
| } |
| } |
| |
| pub fn strip_doc_comment_decoration(comment: &str) -> String { |
| /// remove whitespace-only lines from the start/end of lines |
| fn vertical_trim(lines: Vec<String>) -> Vec<String> { |
| let mut i = 0; |
| let mut j = lines.len(); |
| // first line of all-stars should be omitted |
| if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { |
| i += 1; |
| } |
| |
| while i < j && lines[i].trim().is_empty() { |
| i += 1; |
| } |
| // like the first, a last line of all stars should be omitted |
| if j > i && |
| lines[j - 1] |
| .chars() |
| .skip(1) |
| .all(|c| c == '*') { |
| j -= 1; |
| } |
| |
| while j > i && lines[j - 1].trim().is_empty() { |
| j -= 1; |
| } |
| |
| lines[i..j].to_vec() |
| } |
| |
| /// remove a "[ \t]*\*" block from each line, if possible |
| fn horizontal_trim(lines: Vec<String>) -> Vec<String> { |
| let mut i = usize::MAX; |
| let mut can_trim = true; |
| let mut first = true; |
| |
| for line in &lines { |
| for (j, c) in line.chars().enumerate() { |
| if j > i || !"* \t".contains(c) { |
| can_trim = false; |
| break; |
| } |
| if c == '*' { |
| if first { |
| i = j; |
| first = false; |
| } else if i != j { |
| can_trim = false; |
| } |
| break; |
| } |
| } |
| if i >= line.len() { |
| can_trim = false; |
| } |
| if !can_trim { |
| break; |
| } |
| } |
| |
| if can_trim { |
| lines.iter() |
| .map(|line| (&line[i + 1..line.len()]).to_string()) |
| .collect() |
| } else { |
| lines |
| } |
| } |
| |
| // one-line comments lose their prefix |
| const ONELINERS: &[&str] = &["///!", "///", "//!", "//"]; |
| |
| for prefix in ONELINERS { |
| if comment.starts_with(*prefix) { |
| return (&comment[prefix.len()..]).to_string(); |
| } |
| } |
| |
| if comment.starts_with("/*") { |
| let lines = comment[3..comment.len() - 2] |
| .lines() |
| .map(|s| s.to_string()) |
| .collect::<Vec<String>>(); |
| |
| let lines = vertical_trim(lines); |
| let lines = horizontal_trim(lines); |
| |
| return lines.join("\n"); |
| } |
| |
| panic!("not a doc-comment: {}", comment); |
| } |
| |
| fn push_blank_line_comment(rdr: &StringReader<'_>, comments: &mut Vec<Comment>) { |
| debug!(">>> blank-line comment"); |
| comments.push(Comment { |
| style: BlankLine, |
| lines: Vec::new(), |
| pos: rdr.pos, |
| }); |
| } |
| |
| fn consume_whitespace_counting_blank_lines( |
| rdr: &mut StringReader<'_>, |
| comments: &mut Vec<Comment> |
| ) { |
| while is_pattern_whitespace(rdr.ch) && !rdr.is_eof() { |
| if rdr.ch_is('\n') { |
| push_blank_line_comment(rdr, &mut *comments); |
| } |
| rdr.bump(); |
| } |
| } |
| |
| fn read_shebang_comment(rdr: &mut StringReader<'_>, |
| code_to_the_left: bool, |
| comments: &mut Vec<Comment>) { |
| debug!(">>> shebang comment"); |
| let p = rdr.pos; |
| debug!("<<< shebang comment"); |
| comments.push(Comment { |
| style: if code_to_the_left { Trailing } else { Isolated }, |
| lines: vec![rdr.read_one_line_comment()], |
| pos: p, |
| }); |
| } |
| |
| fn read_line_comments(rdr: &mut StringReader<'_>, |
| code_to_the_left: bool, |
| comments: &mut Vec<Comment>) { |
| debug!(">>> line comments"); |
| let p = rdr.pos; |
| let mut lines: Vec<String> = Vec::new(); |
| while rdr.ch_is('/') && rdr.nextch_is('/') { |
| let line = rdr.read_one_line_comment(); |
| debug!("{}", line); |
| // Doc comments are not put in comments. |
| if is_doc_comment(&line[..]) { |
| break; |
| } |
| lines.push(line); |
| rdr.consume_non_eol_whitespace(); |
| } |
| debug!("<<< line comments"); |
| if !lines.is_empty() { |
| comments.push(Comment { |
| style: if code_to_the_left { Trailing } else { Isolated }, |
| lines, |
| pos: p, |
| }); |
| } |
| } |
| |
| /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char. |
| /// Otherwise returns `Some(k)` where `k` is first char offset after that leading |
| /// whitespace. Note that `k` may be outside bounds of `s`. |
| fn all_whitespace(s: &str, col: CharPos) -> Option<usize> { |
| let mut idx = 0; |
| for (i, ch) in s.char_indices().take(col.to_usize()) { |
| if !ch.is_whitespace() { |
| return None; |
| } |
| idx = i + ch.len_utf8(); |
| } |
| Some(idx) |
| } |
| |
| fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String>, s: String, col: CharPos) { |
| let len = s.len(); |
| let s1 = match all_whitespace(&s[..], col) { |
| Some(col) => { |
| if col < len { |
| s[col..len].to_string() |
| } else { |
| String::new() |
| } |
| } |
| None => s, |
| }; |
| debug!("pushing line: {}", s1); |
| lines.push(s1); |
| } |
| |
| fn read_block_comment(rdr: &mut StringReader<'_>, |
| code_to_the_left: bool, |
| comments: &mut Vec<Comment>) { |
| debug!(">>> block comment"); |
| let p = rdr.pos; |
| let mut lines: Vec<String> = Vec::new(); |
| |
| // Count the number of chars since the start of the line by rescanning. |
| let src_index = rdr.src_index(rdr.source_file.line_begin_pos(rdr.pos)); |
| let end_src_index = rdr.src_index(rdr.pos); |
| assert!(src_index <= end_src_index, |
| "src_index={}, end_src_index={}, line_begin_pos={}", |
| src_index, end_src_index, rdr.source_file.line_begin_pos(rdr.pos).to_u32()); |
| |
| let col = CharPos(rdr.src[src_index..end_src_index].chars().count()); |
| |
| rdr.bump(); |
| rdr.bump(); |
| |
| let mut curr_line = String::from("/*"); |
| |
| // doc-comments are not really comments, they are attributes |
| if (rdr.ch_is('*') && !rdr.nextch_is('*')) || rdr.ch_is('!') { |
| while !(rdr.ch_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() { |
| curr_line.push(rdr.ch.unwrap()); |
| rdr.bump(); |
| } |
| if !rdr.is_eof() { |
| curr_line.push_str("*/"); |
| rdr.bump(); |
| rdr.bump(); |
| } |
| if is_block_doc_comment(&curr_line[..]) { |
| return; |
| } |
| assert!(!curr_line.contains('\n')); |
| lines.push(curr_line); |
| } else { |
| let mut level: isize = 1; |
| while level > 0 { |
| debug!("=== block comment level {}", level); |
| if rdr.is_eof() { |
| rdr.fatal("unterminated block comment").raise(); |
| } |
| if rdr.ch_is('\n') { |
| trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); |
| curr_line = String::new(); |
| rdr.bump(); |
| } else { |
| curr_line.push(rdr.ch.unwrap()); |
| if rdr.ch_is('/') && rdr.nextch_is('*') { |
| rdr.bump(); |
| rdr.bump(); |
| curr_line.push('*'); |
| level += 1; |
| } else { |
| if rdr.ch_is('*') && rdr.nextch_is('/') { |
| rdr.bump(); |
| rdr.bump(); |
| curr_line.push('/'); |
| level -= 1; |
| } else { |
| rdr.bump(); |
| } |
| } |
| } |
| } |
| if !curr_line.is_empty() { |
| trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); |
| } |
| } |
| |
| let mut style = if code_to_the_left { |
| Trailing |
| } else { |
| Isolated |
| }; |
| rdr.consume_non_eol_whitespace(); |
| if !rdr.is_eof() && !rdr.ch_is('\n') && lines.len() == 1 { |
| style = Mixed; |
| } |
| debug!("<<< block comment"); |
| comments.push(Comment { |
| style, |
| lines, |
| pos: p, |
| }); |
| } |
| |
| |
| fn consume_comment(rdr: &mut StringReader<'_>, |
| comments: &mut Vec<Comment>, |
| code_to_the_left: &mut bool, |
| anything_to_the_left: &mut bool) { |
| debug!(">>> consume comment"); |
| if rdr.ch_is('/') && rdr.nextch_is('/') { |
| read_line_comments(rdr, *code_to_the_left, comments); |
| *code_to_the_left = false; |
| *anything_to_the_left = false; |
| } else if rdr.ch_is('/') && rdr.nextch_is('*') { |
| read_block_comment(rdr, *code_to_the_left, comments); |
| *anything_to_the_left = true; |
| } else if rdr.ch_is('#') && rdr.nextch_is('!') { |
| read_shebang_comment(rdr, *code_to_the_left, comments); |
| *code_to_the_left = false; |
| *anything_to_the_left = false; |
| } else { |
| panic!(); |
| } |
| debug!("<<< consume comment"); |
| } |
| |
| #[derive(Clone)] |
| pub struct Literal { |
| pub lit: String, |
| pub pos: BytePos, |
| } |
| |
| // it appears this function is called only from pprust... that's |
| // probably not a good thing. |
| pub fn gather_comments_and_literals(sess: &ParseSess, path: FileName, srdr: &mut dyn Read) |
| -> (Vec<Comment>, Vec<Literal>) |
| { |
| let mut src = String::new(); |
| srdr.read_to_string(&mut src).unwrap(); |
| let cm = SourceMap::new(sess.source_map().path_mapping().clone()); |
| let source_file = cm.new_source_file(path, src); |
| let mut rdr = lexer::StringReader::new_raw(sess, source_file, None); |
| |
| let mut comments: Vec<Comment> = Vec::new(); |
| let mut literals: Vec<Literal> = Vec::new(); |
| let mut code_to_the_left = false; // Only code |
| let mut anything_to_the_left = false; // Code or comments |
| |
| while !rdr.is_eof() { |
| loop { |
| // Eat all the whitespace and count blank lines. |
| rdr.consume_non_eol_whitespace(); |
| if rdr.ch_is('\n') { |
| if anything_to_the_left { |
| rdr.bump(); // The line is not blank, do not count. |
| } |
| consume_whitespace_counting_blank_lines(&mut rdr, &mut comments); |
| code_to_the_left = false; |
| anything_to_the_left = false; |
| } |
| // Eat one comment group |
| if rdr.peeking_at_comment() { |
| consume_comment(&mut rdr, &mut comments, |
| &mut code_to_the_left, &mut anything_to_the_left); |
| } else { |
| break |
| } |
| } |
| |
| let bstart = rdr.pos; |
| rdr.next_token(); |
| // discard, and look ahead; we're working with internal state |
| let TokenAndSpan { tok, sp } = rdr.peek(); |
| if tok.is_lit() { |
| rdr.with_str_from(bstart, |s| { |
| debug!("tok lit: {}", s); |
| literals.push(Literal { |
| lit: s.to_string(), |
| pos: sp.lo(), |
| }); |
| }) |
| } else { |
| debug!("tok: {}", pprust::token_to_string(&tok)); |
| } |
| code_to_the_left = true; |
| anything_to_the_left = true; |
| } |
| |
| (comments, literals) |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| #[test] |
| fn test_block_doc_comment_1() { |
| let comment = "/**\n * Test \n ** Test\n * Test\n*/"; |
| let stripped = strip_doc_comment_decoration(comment); |
| assert_eq!(stripped, " Test \n* Test\n Test"); |
| } |
| |
| #[test] |
| fn test_block_doc_comment_2() { |
| let comment = "/**\n * Test\n * Test\n*/"; |
| let stripped = strip_doc_comment_decoration(comment); |
| assert_eq!(stripped, " Test\n Test"); |
| } |
| |
| #[test] |
| fn test_block_doc_comment_3() { |
| let comment = "/**\n let a: *i32;\n *a = 5;\n*/"; |
| let stripped = strip_doc_comment_decoration(comment); |
| assert_eq!(stripped, " let a: *i32;\n *a = 5;"); |
| } |
| |
| #[test] |
| fn test_block_doc_comment_4() { |
| let comment = "/*******************\n test\n *********************/"; |
| let stripped = strip_doc_comment_decoration(comment); |
| assert_eq!(stripped, " test"); |
| } |
| |
| #[test] |
| fn test_line_doc_comment() { |
| let stripped = strip_doc_comment_decoration("/// test"); |
| assert_eq!(stripped, " test"); |
| let stripped = strip_doc_comment_decoration("///! test"); |
| assert_eq!(stripped, " test"); |
| let stripped = strip_doc_comment_decoration("// test"); |
| assert_eq!(stripped, " test"); |
| let stripped = strip_doc_comment_decoration("// test"); |
| assert_eq!(stripped, " test"); |
| let stripped = strip_doc_comment_decoration("///test"); |
| assert_eq!(stripped, "test"); |
| let stripped = strip_doc_comment_decoration("///!test"); |
| assert_eq!(stripped, "test"); |
| let stripped = strip_doc_comment_decoration("//test"); |
| assert_eq!(stripped, "test"); |
| } |
| } |