src/word_splitters.rs - platform/external/rust/crates/textwrap - Git at Google

 //! Word splitting functionality.
 //!
 //! To wrap text into lines, long words sometimes need to be split
 //! across lines. The [`WordSplitter`] trait defines this
 //! functionality. [`HyphenSplitter`] is the default implementation of
 //! this treat: it will simply split words on existing hyphens.

 use std::ops::Deref;

 use crate::core::{display_width, Word};

 /// The `WordSplitter` trait describes where words can be split.
 ///
 /// If the textwrap crate has been compiled with the `hyphenation`
 /// Cargo feature enabled, you will find an implementation of
 /// `WordSplitter` by the `hyphenation::Standard` struct. Use this
 /// struct for language-aware hyphenation:
 ///
 /// ```
 /// #[cfg(feature = "hyphenation")]
 /// {
 ///     use hyphenation::{Language, Load, Standard};
 ///     use textwrap::{wrap, Options};
 ///
 ///     let text = "Oxidation is the loss of electrons.";
 ///     let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
 ///     let options = Options::new(8).word_splitter(dictionary);
 ///     assert_eq!(wrap(text, &options), vec!["Oxida-",
 ///                                           "tion is",
 ///                                           "the loss",
 ///                                           "of elec-",
 ///                                           "trons."]);
 /// }
 /// ```
 ///
 /// Please see the documentation for the [hyphenation] crate for more
 /// details.
 ///
 /// [hyphenation]: https://docs.rs/hyphenation/
 pub trait WordSplitter: WordSplitterClone + std::fmt::Debug {
     /// Return all possible indices where `word` can be split.
     ///
     /// The indices returned must be in range `0..word.len()`. They
     /// should point to the index _after_ the split point, i.e., after
     /// `-` if splitting on hyphens. This way, `word.split_at(idx)`
     /// will break the word into two well-formed pieces.
     ///
     /// # Examples
     ///
     /// ```
     /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter};
     /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]);
     /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
     /// ```
     fn split_points(&self, word: &str) -> Vec<usize>;
 }

 // The internal `WordSplitterClone` trait is allows us to implement
 // `Clone` for `Box<dyn WordSplitter>`. This in used in the
 // `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a,
 // WrapAlgo, WordSep, WordSplit>` implementation.
 #[doc(hidden)]
 pub trait WordSplitterClone {
     fn clone_box(&self) -> Box<dyn WordSplitter>;
 }

 impl<T: WordSplitter + Clone + 'static> WordSplitterClone for T {
     fn clone_box(&self) -> Box<dyn WordSplitter> {
         Box::new(self.clone())
     }
 }

 impl Clone for Box<dyn WordSplitter> {
     fn clone(&self) -> Box<dyn WordSplitter> {
         self.deref().clone_box()
     }
 }

 impl WordSplitter for Box<dyn WordSplitter> {
     fn split_points(&self, word: &str) -> Vec<usize> {
         self.deref().split_points(word)
     }
 }

 /// Use this as a [`Options.word_splitter`] to avoid any kind of
 /// hyphenation:
 ///
 /// ```
 /// use textwrap::{wrap, Options};
 /// use textwrap::word_splitters::NoHyphenation;
 ///
 /// let options = Options::new(8).word_splitter(NoHyphenation);
 /// assert_eq!(wrap("foo bar-baz", &options),
 ///            vec!["foo", "bar-baz"]);
 /// ```
 ///
 /// [`Options.word_splitter`]: super::Options::word_splitter
 #[derive(Clone, Copy, Debug)]
 pub struct NoHyphenation;

 /// `NoHyphenation` implements `WordSplitter` by not splitting the
 /// word at all.
 impl WordSplitter for NoHyphenation {
     fn split_points(&self, _: &str) -> Vec<usize> {
         Vec::new()
     }
 }

 /// Simple and default way to split words: splitting on existing
 /// hyphens only.
 ///
 /// You probably don't need to use this type since it's already used
 /// by default by [`Options::new`](super::Options::new).
 #[derive(Clone, Copy, Debug)]
 pub struct HyphenSplitter;

 /// `HyphenSplitter` is the default `WordSplitter` used by
 /// [`Options::new`](super::Options::new). It will split words on any
 /// existing hyphens in the word.
 ///
 /// It will only use hyphens that are surrounded by alphanumeric
 /// characters, which prevents a word like `"--foo-bar"` from being
 /// split into `"--"` and `"foo-bar"`.
 impl WordSplitter for HyphenSplitter {
     fn split_points(&self, word: &str) -> Vec<usize> {
         let mut splits = Vec::new();

         for (idx, _) in word.match_indices('-') {
             // We only use hyphens that are surrounded by alphanumeric
             // characters. This is to avoid splitting on repeated hyphens,
             // such as those found in --foo-bar.
             let prev = word[..idx].chars().next_back();
             let next = word[idx + 1..].chars().next();

             if prev.filter(|ch| ch.is_alphanumeric()).is_some()
                 && next.filter(|ch| ch.is_alphanumeric()).is_some()
             {
                 splits.push(idx + 1); // +1 due to width of '-'.
             }
         }

         splits
     }
 }

 /// A hyphenation dictionary can be used to do language-specific
 /// hyphenation using patterns from the [hyphenation] crate.
 ///
 /// **Note:** Only available when the `hyphenation` Cargo feature is
 /// enabled.
 ///
 /// [hyphenation]: https://docs.rs/hyphenation/
 #[cfg(feature = "hyphenation")]
 impl WordSplitter for hyphenation::Standard {
     fn split_points(&self, word: &str) -> Vec<usize> {
         use hyphenation::Hyphenator;
         self.hyphenate(word).breaks
     }
 }

 /// Split words into smaller words according to the split points given
 /// by `word_splitter`.
 ///
 /// Note that we split all words, regardless of their length. This is
 /// to more cleanly separate the business of splitting (including
 /// automatic hyphenation) from the business of word wrapping.
 ///
 /// # Examples
 ///
 /// ```
 /// use textwrap::core::Word;
 /// use textwrap::word_splitters::{split_words, NoHyphenation, HyphenSplitter};
 ///
 /// assert_eq!(
 ///     split_words(vec![Word::from("foo-bar")], &HyphenSplitter).collect::<Vec<_>>(),
 ///     vec![Word::from("foo-"), Word::from("bar")]
 /// );
 ///
 /// // The NoHyphenation splitter ignores the '-':
 /// assert_eq!(
 ///     split_words(vec![Word::from("foo-bar")], &NoHyphenation).collect::<Vec<_>>(),
 ///     vec![Word::from("foo-bar")]
 /// );
 /// ```
 pub fn split_words<'a, I, WordSplit>(
     words: I,
     word_splitter: &'a WordSplit,
 ) -> impl Iterator<Item = Word<'a>>
 where
     I: IntoIterator<Item = Word<'a>>,
     WordSplit: WordSplitter,
 {
     words.into_iter().flat_map(move |word| {
         let mut prev = 0;
         let mut split_points = word_splitter.split_points(&word).into_iter();
         std::iter::from_fn(move || {
             if let Some(idx) = split_points.next() {
                 let need_hyphen = !word[..idx].ends_with('-');
                 let w = Word {
                     word: &word.word[prev..idx],
                     width: display_width(&word[prev..idx]),
                     whitespace: "",
                     penalty: if need_hyphen { "-" } else { "" },
                 };
                 prev = idx;
                 return Some(w);
             }

             if prev < word.word.len() || prev == 0 {
                 let w = Word {
                     word: &word.word[prev..],
                     width: display_width(&word[prev..]),
                     whitespace: word.whitespace,
                     penalty: word.penalty,
                 };
                 prev = word.word.len() + 1;
                 return Some(w);
             }

             None
         })
     })
 }

 #[cfg(test)]
 mod tests {
     use super::*;

     // Like assert_eq!, but the left expression is an iterator.
     macro_rules! assert_iter_eq {
         ($left:expr, $right:expr) => {
             assert_eq!($left.collect::<Vec<_>>(), $right);
         };
     }

     #[test]
     fn split_words_no_words() {
         assert_iter_eq!(split_words(vec![], &HyphenSplitter), vec![]);
     }

     #[test]
     fn split_words_empty_word() {
         assert_iter_eq!(
             split_words(vec![Word::from("   ")], &HyphenSplitter),
             vec![Word::from("   ")]
         );
     }

     #[test]
     fn split_words_single_word() {
         assert_iter_eq!(
             split_words(vec![Word::from("foobar")], &HyphenSplitter),
             vec![Word::from("foobar")]
         );
     }

     #[test]
     fn split_words_hyphen_splitter() {
         assert_iter_eq!(
             split_words(vec![Word::from("foo-bar")], &HyphenSplitter),
             vec![Word::from("foo-"), Word::from("bar")]
         );
     }

     #[test]
     fn split_words_adds_penalty() {
         #[derive(Clone, Debug)]
         struct FixedSplitPoint;
         impl WordSplitter for FixedSplitPoint {
             fn split_points(&self, _: &str) -> Vec<usize> {
                 vec![3]
             }
         }

         assert_iter_eq!(
             split_words(vec![Word::from("foobar")].into_iter(), &FixedSplitPoint),
             vec![
                 Word {
                     word: "foo",
                     width: 3,
                     whitespace: "",
                     penalty: "-"
                 },
                 Word {
                     word: "bar",
                     width: 3,
                     whitespace: "",
                     penalty: ""
                 }
             ]
         );

         assert_iter_eq!(
             split_words(vec![Word::from("fo-bar")].into_iter(), &FixedSplitPoint),
             vec![
                 Word {
                     word: "fo-",
                     width: 3,
                     whitespace: "",
                     penalty: ""
                 },
                 Word {
                     word: "bar",
                     width: 3,
                     whitespace: "",
                     penalty: ""
                 }
             ]
         );
     }
 }
	//! Word splitting functionality.
	//!
	//! To wrap text into lines, long words sometimes need to be split
	//! across lines. The [`WordSplitter`] trait defines this
	//! functionality. [`HyphenSplitter`] is the default implementation of
	//! this treat: it will simply split words on existing hyphens.

	use std::ops::Deref;

	use crate::core::{display_width, Word};

	/// The `WordSplitter` trait describes where words can be split.
	///
	/// If the textwrap crate has been compiled with the `hyphenation`
	/// Cargo feature enabled, you will find an implementation of
	/// `WordSplitter` by the `hyphenation::Standard` struct. Use this
	/// struct for language-aware hyphenation:
	///
	/// ```
	/// #[cfg(feature = "hyphenation")]
	/// {
	/// use hyphenation::{Language, Load, Standard};
	/// use textwrap::{wrap, Options};
	///
	/// let text = "Oxidation is the loss of electrons.";
	/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
	/// let options = Options::new(8).word_splitter(dictionary);
	/// assert_eq!(wrap(text, &options), vec!["Oxida-",
	/// "tion is",
	/// "the loss",
	/// "of elec-",
	/// "trons."]);
	/// }
	/// ```
	///
	/// Please see the documentation for the [hyphenation] crate for more
	/// details.
	///
	/// [hyphenation]: https://docs.rs/hyphenation/
	pub trait WordSplitter: WordSplitterClone + std::fmt::Debug {
	/// Return all possible indices where `word` can be split.
	///
	/// The indices returned must be in range `0..word.len()`. They
	/// should point to the index _after_ the split point, i.e., after
	/// `-` if splitting on hyphens. This way, `word.split_at(idx)`
	/// will break the word into two well-formed pieces.
	///
	/// # Examples
	///
	/// ```
	/// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter};
	/// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]);
	/// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
	/// ```
	fn split_points(&self, word: &str) -> Vec<usize>;
	}

	// The internal `WordSplitterClone` trait is allows us to implement
	// `Clone` for `Box<dyn WordSplitter>`. This in used in the
	// `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a,
	// WrapAlgo, WordSep, WordSplit>` implementation.
	#[doc(hidden)]
	pub trait WordSplitterClone {
	fn clone_box(&self) -> Box<dyn WordSplitter>;
	}

	impl<T: WordSplitter + Clone + 'static> WordSplitterClone for T {
	fn clone_box(&self) -> Box<dyn WordSplitter> {
	Box::new(self.clone())
	}
	}

	impl Clone for Box<dyn WordSplitter> {
	fn clone(&self) -> Box<dyn WordSplitter> {
	self.deref().clone_box()
	}
	}

	impl WordSplitter for Box<dyn WordSplitter> {
	fn split_points(&self, word: &str) -> Vec<usize> {
	self.deref().split_points(word)
	}
	}

	/// Use this as a [`Options.word_splitter`] to avoid any kind of
	/// hyphenation:
	///
	/// ```
	/// use textwrap::{wrap, Options};
	/// use textwrap::word_splitters::NoHyphenation;
	///
	/// let options = Options::new(8).word_splitter(NoHyphenation);
	/// assert_eq!(wrap("foo bar-baz", &options),
	/// vec!["foo", "bar-baz"]);
	/// ```
	///
	/// [`Options.word_splitter`]: super::Options::word_splitter
	#[derive(Clone, Copy, Debug)]
	pub struct NoHyphenation;

	/// `NoHyphenation` implements `WordSplitter` by not splitting the
	/// word at all.
	impl WordSplitter for NoHyphenation {
	fn split_points(&self, _: &str) -> Vec<usize> {
	Vec::new()
	}
	}

	/// Simple and default way to split words: splitting on existing
	/// hyphens only.
	///
	/// You probably don't need to use this type since it's already used
	/// by default by [`Options::new`](super::Options::new).
	#[derive(Clone, Copy, Debug)]
	pub struct HyphenSplitter;

	/// `HyphenSplitter` is the default `WordSplitter` used by
	/// [`Options::new`](super::Options::new). It will split words on any
	/// existing hyphens in the word.
	///
	/// It will only use hyphens that are surrounded by alphanumeric
	/// characters, which prevents a word like `"--foo-bar"` from being
	/// split into `"--"` and `"foo-bar"`.
	impl WordSplitter for HyphenSplitter {
	fn split_points(&self, word: &str) -> Vec<usize> {
	let mut splits = Vec::new();

	for (idx, _) in word.match_indices('-') {
	// We only use hyphens that are surrounded by alphanumeric
	// characters. This is to avoid splitting on repeated hyphens,
	// such as those found in --foo-bar.
	let prev = word[..idx].chars().next_back();
	let next = word[idx + 1..].chars().next();

	if prev.filter(\|ch\| ch.is_alphanumeric()).is_some()
	&& next.filter(\|ch\| ch.is_alphanumeric()).is_some()
	{
	splits.push(idx + 1); // +1 due to width of '-'.
	}
	}

	splits
	}
	}

	/// A hyphenation dictionary can be used to do language-specific
	/// hyphenation using patterns from the [hyphenation] crate.
	///
	/// Note: Only available when the `hyphenation` Cargo feature is
	/// enabled.
	///
	/// [hyphenation]: https://docs.rs/hyphenation/
	#[cfg(feature = "hyphenation")]
	impl WordSplitter for hyphenation::Standard {
	fn split_points(&self, word: &str) -> Vec<usize> {
	use hyphenation::Hyphenator;
	self.hyphenate(word).breaks
	}
	}

	/// Split words into smaller words according to the split points given
	/// by `word_splitter`.
	///
	/// Note that we split all words, regardless of their length. This is
	/// to more cleanly separate the business of splitting (including
	/// automatic hyphenation) from the business of word wrapping.
	///
	/// # Examples
	///
	/// ```
	/// use textwrap::core::Word;
	/// use textwrap::word_splitters::{split_words, NoHyphenation, HyphenSplitter};
	///
	/// assert_eq!(
	/// split_words(vec![Word::from("foo-bar")], &HyphenSplitter).collect::<Vec<_>>(),
	/// vec![Word::from("foo-"), Word::from("bar")]
	/// );
	///
	/// // The NoHyphenation splitter ignores the '-':
	/// assert_eq!(
	/// split_words(vec![Word::from("foo-bar")], &NoHyphenation).collect::<Vec<_>>(),
	/// vec![Word::from("foo-bar")]
	/// );
	/// ```
	pub fn split_words<'a, I, WordSplit>(
	words: I,
	word_splitter: &'a WordSplit,
	) -> impl Iterator<Item = Word<'a>>
	where
	I: IntoIterator<Item = Word<'a>>,
	WordSplit: WordSplitter,
	{
	words.into_iter().flat_map(move \|word\| {
	let mut prev = 0;
	let mut split_points = word_splitter.split_points(&word).into_iter();
	std::iter::from_fn(move \|\| {
	if let Some(idx) = split_points.next() {
	let need_hyphen = !word[..idx].ends_with('-');
	let w = Word {
	word: &word.word[prev..idx],
	width: display_width(&word[prev..idx]),
	whitespace: "",
	penalty: if need_hyphen { "-" } else { "" },
	};
	prev = idx;
	return Some(w);
	}

	if prev < word.word.len() \|\| prev == 0 {
	let w = Word {
	word: &word.word[prev..],
	width: display_width(&word[prev..]),
	whitespace: word.whitespace,
	penalty: word.penalty,
	};
	prev = word.word.len() + 1;
	return Some(w);
	}

	None
	})
	})
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	// Like assert_eq!, but the left expression is an iterator.
	macro_rules! assert_iter_eq {
	($left:expr, $right:expr) => {
	assert_eq!($left.collect::<Vec<_>>(), $right);
	};
	}

	#[test]
	fn split_words_no_words() {
	assert_iter_eq!(split_words(vec![], &HyphenSplitter), vec![]);
	}

	#[test]
	fn split_words_empty_word() {
	assert_iter_eq!(
	split_words(vec![Word::from(" ")], &HyphenSplitter),
	vec![Word::from(" ")]
	);
	}

	#[test]
	fn split_words_single_word() {
	assert_iter_eq!(
	split_words(vec![Word::from("foobar")], &HyphenSplitter),
	vec![Word::from("foobar")]
	);
	}

	#[test]
	fn split_words_hyphen_splitter() {
	assert_iter_eq!(
	split_words(vec![Word::from("foo-bar")], &HyphenSplitter),
	vec![Word::from("foo-"), Word::from("bar")]
	);
	}

	#[test]
	fn split_words_adds_penalty() {
	#[derive(Clone, Debug)]
	struct FixedSplitPoint;
	impl WordSplitter for FixedSplitPoint {
	fn split_points(&self, _: &str) -> Vec<usize> {
	vec![3]
	}
	}

	assert_iter_eq!(
	split_words(vec![Word::from("foobar")].into_iter(), &FixedSplitPoint),
	vec![
	Word {
	word: "foo",
	width: 3,
	whitespace: "",
	penalty: "-"
	},
	Word {
	word: "bar",
	width: 3,
	whitespace: "",
	penalty: ""
	}
	]
	);

	assert_iter_eq!(
	split_words(vec![Word::from("fo-bar")].into_iter(), &FixedSplitPoint),
	vec![
	Word {
	word: "fo-",
	width: 3,
	whitespace: "",
	penalty: ""
	},
	Word {
	word: "bar",
	width: 3,
	whitespace: "",
	penalty: ""
	}
	]
	);
	}
	}