src/windows/wtf8/code_points.rs - platform/external/rust/crates/os_str_bytes - Git at Google

 use std::iter::FusedIterator;
 use std::iter::Peekable;
 use std::mem;

 use crate::util::is_continuation;
 use crate::util::BYTE_SHIFT;
 use crate::util::CONT_MASK;

 use super::EncodingError;
 use super::Result;

 pub(in super::super) struct CodePoints<I>
 where
     I: Iterator<Item = u8>,
 {
     iter: Peekable<I>,
     surrogate: bool,
     still_utf8: bool,
 }

 impl<I> CodePoints<I>
 where
     I: Iterator<Item = u8>,
 {
     pub(in super::super) fn new<S>(string: S) -> Self
     where
         S: IntoIterator<IntoIter = I>,
     {
         Self {
             iter: string.into_iter().peekable(),
             surrogate: false,
             still_utf8: true,
         }
     }

     pub(super) fn is_still_utf8(&self) -> bool {
         self.still_utf8
     }

     fn consume_next(&mut self, code_point: &mut u32) -> Result<()> {
         let &byte = self.iter.peek().ok_or(EncodingError::End())?;

         if !is_continuation(byte) {
             self.surrogate = false;
             // Not consuming this byte will be useful if this crate ever offers
             // a way to encode lossily.
             return Err(EncodingError::Byte(byte));
         }
         *code_point =
             (*code_point << BYTE_SHIFT) | u32::from(byte & CONT_MASK);

         let removed = self.iter.next();
         debug_assert_eq!(Some(byte), removed);

         Ok(())
     }

     pub(super) fn inner_size_hint(&self) -> (usize, Option<usize>) {
         self.iter.size_hint()
     }
 }

 impl<I> FusedIterator for CodePoints<I> where
     I: FusedIterator + Iterator<Item = u8>
 {
 }

 impl<I> Iterator for CodePoints<I>
 where
     I: Iterator<Item = u8>,
 {
     type Item = Result<u32>;

     fn next(&mut self) -> Option<Self::Item> {
         let byte = self.iter.next()?;
         let mut code_point: u32 = byte.into();

         macro_rules! consume_next {
             () => {{
                 if let Err(error) = self.consume_next(&mut code_point) {
                     return Some(Err(error));
                 }
             }};
         }

         let prev_surrogate = mem::replace(&mut self.surrogate, false);

         let mut invalid = false;
         if !byte.is_ascii() {
             if byte < 0xC2 {
                 return Some(Err(EncodingError::Byte(byte)));
             }

             if byte < 0xE0 {
                 code_point &= 0x1F;
             } else {
                 code_point &= 0x0F;
                 consume_next!();

                 if byte >= 0xF0 {
                     if code_point.wrapping_sub(0x10) >= 0x100 {
                         invalid = true;
                     }
                     consume_next!();

                 // This condition is optimized to detect surrogate code points.
                 } else if code_point & 0xFE0 == 0x360 {
                     self.still_utf8 = false;
                     if code_point & 0x10 == 0 {
                         self.surrogate = true;
                     } else if prev_surrogate {
                         // Decoding a broken surrogate pair would be lossy.
                         invalid = true;
                     }
                 }

                 if code_point < 0x20 {
                     invalid = true;
                 }
             }
             consume_next!();
         }
         if invalid {
             return Some(Err(EncodingError::CodePoint(code_point)));
         }

         Some(Ok(code_point))
     }
 }
	use std::iter::FusedIterator;
	use std::iter::Peekable;
	use std::mem;

	use crate::util::is_continuation;
	use crate::util::BYTE_SHIFT;
	use crate::util::CONT_MASK;

	use super::EncodingError;
	use super::Result;

	pub(in super::super) struct CodePoints<I>
	where
	I: Iterator<Item = u8>,
	{
	iter: Peekable<I>,
	surrogate: bool,
	still_utf8: bool,
	}

	impl<I> CodePoints<I>
	where
	I: Iterator<Item = u8>,
	{
	pub(in super::super) fn new<S>(string: S) -> Self
	where
	S: IntoIterator<IntoIter = I>,
	{
	Self {
	iter: string.into_iter().peekable(),
	surrogate: false,
	still_utf8: true,
	}
	}

	pub(super) fn is_still_utf8(&self) -> bool {
	self.still_utf8
	}

	fn consume_next(&mut self, code_point: &mut u32) -> Result<()> {
	let &byte = self.iter.peek().ok_or(EncodingError::End())?;

	if !is_continuation(byte) {
	self.surrogate = false;
	// Not consuming this byte will be useful if this crate ever offers
	// a way to encode lossily.
	return Err(EncodingError::Byte(byte));
	}
	*code_point =
	(*code_point << BYTE_SHIFT) \| u32::from(byte & CONT_MASK);

	let removed = self.iter.next();
	debug_assert_eq!(Some(byte), removed);

	Ok(())
	}

	pub(super) fn inner_size_hint(&self) -> (usize, Option<usize>) {
	self.iter.size_hint()
	}
	}

	impl<I> FusedIterator for CodePoints<I> where
	I: FusedIterator + Iterator<Item = u8>
	{
	}

	impl<I> Iterator for CodePoints<I>
	where
	I: Iterator<Item = u8>,
	{
	type Item = Result<u32>;

	fn next(&mut self) -> Option<Self::Item> {
	let byte = self.iter.next()?;
	let mut code_point: u32 = byte.into();

	macro_rules! consume_next {
	() => {{
	if let Err(error) = self.consume_next(&mut code_point) {
	return Some(Err(error));
	}
	}};
	}

	let prev_surrogate = mem::replace(&mut self.surrogate, false);

	let mut invalid = false;
	if !byte.is_ascii() {
	if byte < 0xC2 {
	return Some(Err(EncodingError::Byte(byte)));
	}

	if byte < 0xE0 {
	code_point &= 0x1F;
	} else {
	code_point &= 0x0F;
	consume_next!();

	if byte >= 0xF0 {
	if code_point.wrapping_sub(0x10) >= 0x100 {
	invalid = true;
	}
	consume_next!();

	// This condition is optimized to detect surrogate code points.
	} else if code_point & 0xFE0 == 0x360 {
	self.still_utf8 = false;
	if code_point & 0x10 == 0 {
	self.surrogate = true;
	} else if prev_surrogate {
	// Decoding a broken surrogate pair would be lossy.
	invalid = true;
	}
	}

	if code_point < 0x20 {
	invalid = true;
	}
	}
	consume_next!();
	}
	if invalid {
	return Some(Err(EncodingError::CodePoint(code_point)));
	}

	Some(Ok(code_point))
	}
	}