| #![cfg(feature = "inline")] |
| use std::borrow::Cow; |
| use std::fmt; |
| |
| use crate::text::{DiffableStr, TextDiff}; |
| use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag}; |
| use crate::{capture_diff_deadline, get_diff_ratio}; |
| |
| use std::ops::Index; |
| use std::time::{Duration, Instant}; |
| |
| use super::utils::upper_seq_ratio; |
| |
| struct MultiLookup<'bufs, 's, T: DiffableStr + ?Sized> { |
| strings: &'bufs [&'s T], |
| seqs: Vec<(&'s T, usize, usize)>, |
| } |
| |
| impl<'bufs, 's, T: DiffableStr + ?Sized> MultiLookup<'bufs, 's, T> { |
| fn new(strings: &'bufs [&'s T]) -> MultiLookup<'bufs, 's, T> { |
| let mut seqs = Vec::new(); |
| for (string_idx, string) in strings.iter().enumerate() { |
| let mut offset = 0; |
| let iter = { |
| #[cfg(feature = "unicode")] |
| { |
| string.tokenize_unicode_words() |
| } |
| #[cfg(not(feature = "unicode"))] |
| { |
| string.tokenize_words() |
| } |
| }; |
| for word in iter { |
| seqs.push((word, string_idx, offset)); |
| offset += word.len(); |
| } |
| } |
| MultiLookup { strings, seqs } |
| } |
| |
| pub fn len(&self) -> usize { |
| self.seqs.len() |
| } |
| |
| fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s T)> { |
| let mut last = None; |
| let mut rv = Vec::new(); |
| |
| for offset in 0..len { |
| let (s, str_idx, char_idx) = self.seqs[idx + offset]; |
| last = match last { |
| None => Some((str_idx, char_idx, s.len())), |
| Some((last_str_idx, start_char_idx, last_len)) => { |
| if last_str_idx == str_idx { |
| Some((str_idx, start_char_idx, last_len + s.len())) |
| } else { |
| rv.push(( |
| last_str_idx, |
| self.strings[last_str_idx] |
| .slice(start_char_idx..start_char_idx + last_len), |
| )); |
| Some((str_idx, char_idx, s.len())) |
| } |
| } |
| }; |
| } |
| |
| if let Some((str_idx, start_char_idx, len)) = last { |
| rv.push(( |
| str_idx, |
| self.strings[str_idx].slice(start_char_idx..start_char_idx + len), |
| )); |
| } |
| |
| rv |
| } |
| } |
| |
| impl<'bufs, 's, T: DiffableStr + ?Sized> Index<usize> for MultiLookup<'bufs, 's, T> { |
| type Output = T; |
| |
| fn index(&self, index: usize) -> &Self::Output { |
| self.seqs[index].0 |
| } |
| } |
| |
| fn push_values<'s, T: DiffableStr + ?Sized>( |
| v: &mut Vec<Vec<(bool, &'s T)>>, |
| idx: usize, |
| emphasized: bool, |
| s: &'s T, |
| ) { |
| v.resize_with(v.len().max(idx + 1), Vec::new); |
| // newlines cause all kinds of wacky stuff if they end up highlighted. |
| // because of this we want to unemphasize all newlines we encounter. |
| if emphasized { |
| for seg in s.tokenize_lines_and_newlines() { |
| v[idx].push((!seg.ends_with_newline(), seg)); |
| } |
| } else { |
| v[idx].push((false, s)); |
| } |
| } |
| |
| /// Represents the expanded textual change with inline highlights. |
| /// |
| /// This is like [`Change`] but with inline highlight info. |
| #[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)] |
| #[cfg_attr(feature = "serde", derive(serde::Serialize))] |
| pub struct InlineChange<'s, T: DiffableStr + ?Sized> { |
| tag: ChangeTag, |
| old_index: Option<usize>, |
| new_index: Option<usize>, |
| values: Vec<(bool, &'s T)>, |
| } |
| |
| impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> { |
| /// Returns the change tag. |
| pub fn tag(&self) -> ChangeTag { |
| self.tag |
| } |
| |
| /// Returns the old index if available. |
| pub fn old_index(&self) -> Option<usize> { |
| self.old_index |
| } |
| |
| /// Returns the new index if available. |
| pub fn new_index(&self) -> Option<usize> { |
| self.new_index |
| } |
| |
| /// Returns the changed values. |
| /// |
| /// Each item is a tuple in the form `(emphasized, value)` where `emphasized` |
| /// is true if it should be highlighted as an inline diff. |
| /// |
| /// Depending on the type of the underlying [`DiffableStr`] this value is |
| /// more or less useful. If you always want to have a utf-8 string it's |
| /// better to use the [`InlineChange::iter_strings_lossy`] method. |
| pub fn values(&self) -> &[(bool, &'s T)] { |
| &self.values |
| } |
| |
| /// Iterates over all (potentially lossy) utf-8 decoded values. |
| /// |
| /// Each item is a tuple in the form `(emphasized, value)` where `emphasized` |
| /// is true if it should be highlighted as an inline diff. |
| /// |
| /// By default, words are split by whitespace, which results in coarser diff. |
| /// For example: `"f(x) y"` is tokenized as `["f(x)", "y"]`. |
| /// |
| /// If you want it to be tokenized instead as `["f(", "x", ")"]`, |
| /// you should enable the `"unicode"` flag. |
| pub fn iter_strings_lossy(&self) -> impl Iterator<Item = (bool, Cow<'_, str>)> { |
| self.values() |
| .iter() |
| .map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy())) |
| } |
| |
| /// Returns `true` if this change does not end in a newline and must be |
| /// followed up by one if line based diffs are used. |
| pub fn missing_newline(&self) -> bool { |
| !self.values.last().map_or(true, |x| x.1.ends_with_newline()) |
| } |
| } |
| |
| impl<'s, T: DiffableStr + ?Sized> From<Change<&'s T>> for InlineChange<'s, T> { |
| fn from(change: Change<&'s T>) -> InlineChange<'s, T> { |
| InlineChange { |
| tag: change.tag(), |
| old_index: change.old_index(), |
| new_index: change.new_index(), |
| values: vec![(false, change.value())], |
| } |
| } |
| } |
| |
| impl<'s, T: DiffableStr + ?Sized> fmt::Display for InlineChange<'s, T> { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| for (emphasized, value) in self.iter_strings_lossy() { |
| let marker = match (emphasized, self.tag) { |
| (false, _) | (true, ChangeTag::Equal) => "", |
| (true, ChangeTag::Delete) => "-", |
| (true, ChangeTag::Insert) => "+", |
| }; |
| write!(f, "{}{}{}", marker, value, marker)?; |
| } |
| if self.missing_newline() { |
| writeln!(f)?; |
| } |
| Ok(()) |
| } |
| } |
| |
| const MIN_RATIO: f32 = 0.5; |
| const TIMEOUT_MS: u64 = 500; |
| |
| pub(crate) fn iter_inline_changes<'x, 'diff, 'old, 'new, 'bufs, T>( |
| diff: &'diff TextDiff<'old, 'new, 'bufs, T>, |
| op: &DiffOp, |
| ) -> impl Iterator<Item = InlineChange<'x, T>> + 'diff |
| where |
| T: DiffableStr + ?Sized, |
| 'x: 'diff, |
| 'old: 'x, |
| 'new: 'x, |
| { |
| let (tag, old_range, new_range) = op.as_tag_tuple(); |
| |
| if let DiffTag::Equal | DiffTag::Insert | DiffTag::Delete = tag { |
| return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>; |
| } |
| |
| let mut old_index = old_range.start; |
| let mut new_index = new_range.start; |
| let old_slices = &diff.old_slices()[old_range]; |
| let new_slices = &diff.new_slices()[new_range]; |
| |
| if upper_seq_ratio(old_slices, new_slices) < MIN_RATIO { |
| return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>; |
| } |
| |
| let old_lookup = MultiLookup::new(old_slices); |
| let new_lookup = MultiLookup::new(new_slices); |
| |
| let ops = capture_diff_deadline( |
| Algorithm::Patience, |
| &old_lookup, |
| 0..old_lookup.len(), |
| &new_lookup, |
| 0..new_lookup.len(), |
| Some(Instant::now() + Duration::from_millis(TIMEOUT_MS)), |
| ); |
| |
| if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < MIN_RATIO { |
| return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>; |
| } |
| |
| let mut old_values = Vec::<Vec<_>>::new(); |
| let mut new_values = Vec::<Vec<_>>::new(); |
| |
| for op in ops { |
| match op { |
| DiffOp::Equal { |
| old_index, |
| len, |
| new_index, |
| } => { |
| for (idx, slice) in old_lookup.get_original_slices(old_index, len) { |
| push_values(&mut old_values, idx, false, slice); |
| } |
| for (idx, slice) in new_lookup.get_original_slices(new_index, len) { |
| push_values(&mut new_values, idx, false, slice); |
| } |
| } |
| DiffOp::Delete { |
| old_index, old_len, .. |
| } => { |
| for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) { |
| push_values(&mut old_values, idx, true, slice); |
| } |
| } |
| DiffOp::Insert { |
| new_index, new_len, .. |
| } => { |
| for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) { |
| push_values(&mut new_values, idx, true, slice); |
| } |
| } |
| DiffOp::Replace { |
| old_index, |
| old_len, |
| new_index, |
| new_len, |
| } => { |
| for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) { |
| push_values(&mut old_values, idx, true, slice); |
| } |
| for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) { |
| push_values(&mut new_values, idx, true, slice); |
| } |
| } |
| } |
| } |
| |
| let mut rv = Vec::new(); |
| |
| for values in old_values { |
| rv.push(InlineChange { |
| tag: ChangeTag::Delete, |
| old_index: Some(old_index), |
| new_index: None, |
| values, |
| }); |
| old_index += 1; |
| } |
| |
| for values in new_values { |
| rv.push(InlineChange { |
| tag: ChangeTag::Insert, |
| old_index: None, |
| new_index: Some(new_index), |
| values, |
| }); |
| new_index += 1; |
| } |
| |
| Box::new(rv.into_iter()) as Box<dyn Iterator<Item = _>> |
| } |
| |
| #[test] |
| fn test_line_ops_inline() { |
| let diff = TextDiff::from_lines( |
| "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", |
| "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", |
| ); |
| assert!(diff.newline_terminated()); |
| let changes = diff |
| .ops() |
| .iter() |
| .flat_map(|op| diff.iter_inline_changes(op)) |
| .collect::<Vec<_>>(); |
| insta::assert_debug_snapshot!(&changes); |
| } |
| |
| #[test] |
| #[cfg(feature = "serde")] |
| fn test_serde() { |
| let diff = TextDiff::from_lines( |
| "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", |
| "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", |
| ); |
| assert!(diff.newline_terminated()); |
| let changes = diff |
| .ops() |
| .iter() |
| .flat_map(|op| diff.iter_inline_changes(op)) |
| .collect::<Vec<_>>(); |
| let json = serde_json::to_string_pretty(&changes).unwrap(); |
| insta::assert_snapshot!(&json); |
| } |