blob: d75ebae843b8b93b40d9d086b91757890e7b0e0f [file] [log] [blame]
use super::*;
use ciborium_io::Read;
use core::marker::PhantomData;
/// A parser for incoming segments
pub trait Parser: Default {
/// The type of item that is parsed
type Item: ?Sized;
/// The parsing error that may occur
type Error;
/// The main parsing function
///
/// This function processes the incoming bytes and returns the item.
///
/// One important detail that **MUST NOT** be overlooked is that the
/// parser may save data from a previous parsing attempt. The number of
/// bytes saved is indicated by the `Parser::saved()` function. The saved
/// bytes will be copied into the beginning of the `bytes` array before
/// processing. Therefore, two requirements should be met.
///
/// First, the incoming byte slice should be larger than the saved bytes.
///
/// Second, the incoming byte slice should contain new bytes only after
/// the saved byte prefix.
///
/// If both criteria are met, this allows the parser to prepend its saved
/// bytes without any additional allocation.
fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
/// Indicates the number of saved bytes in the parser
fn saved(&self) -> usize {
0
}
}
/// A bytes parser
///
/// No actual processing is performed and the input bytes are directly
/// returned. This implies that this parser never saves any bytes internally.
#[derive(Default)]
pub struct Bytes(());
impl Parser for Bytes {
type Item = [u8];
type Error = core::convert::Infallible;
fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
Ok(bytes)
}
}
/// A text parser
///
/// This parser converts the input bytes to a `str`. This parser preserves
/// trailing invalid UTF-8 sequences in the case that chunking fell in the
/// middle of a valid UTF-8 character.
#[derive(Default)]
pub struct Text {
stored: usize,
buffer: [u8; 3],
}
impl Parser for Text {
type Item = str;
type Error = core::str::Utf8Error;
fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
// If we cannot advance, return nothing.
if bytes.len() <= self.stored {
return Ok("");
}
// Copy previously invalid data into place.
bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
Ok(match core::str::from_utf8(bytes) {
Ok(s) => s,
Err(e) => {
let valid_len = e.valid_up_to();
let invalid_len = bytes.len() - valid_len;
// If the size of the invalid UTF-8 is large enough to hold
// all valid UTF-8 characters, we have a syntax error.
if invalid_len > self.buffer.len() {
return Err(e);
}
// Otherwise, store the invalid bytes for the next read cycle.
self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
self.stored = invalid_len;
// Decode the valid part of the string.
core::str::from_utf8(&bytes[..valid_len]).unwrap()
}
})
}
fn saved(&self) -> usize {
self.stored
}
}
/// A CBOR segment
///
/// This type represents a single bytes or text segment on the wire. It can be
/// read out in parsed chunks based on the size of the input scratch buffer.
pub struct Segment<'r, R: Read, P: Parser> {
reader: &'r mut Decoder<R>,
unread: usize,
offset: usize,
parser: P,
}
impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
/// Gets the number of unprocessed bytes
#[inline]
pub fn left(&self) -> usize {
self.unread + self.parser.saved()
}
/// Gets the next parsed chunk within the segment
///
/// Returns `Ok(None)` when all chunks have been read.
#[inline]
pub fn pull<'a>(
&mut self,
buffer: &'a mut [u8],
) -> Result<Option<&'a P::Item>, Error<R::Error>> {
use core::cmp::min;
let prev = self.parser.saved();
match self.unread {
0 if prev == 0 => return Ok(None),
0 => return Err(Error::Syntax(self.offset)),
_ => (),
}
// Determine how many bytes to read.
let size = min(buffer.len(), prev + self.unread);
let full = &mut buffer[..size];
let next = &mut full[min(size, prev)..];
// Read additional bytes.
self.reader.read_exact(next)?;
self.unread -= next.len();
self.parser
.parse(full)
.or(Err(Error::Syntax(self.offset)))
.map(Some)
}
}
/// A sequence of CBOR segments
///
/// CBOR allows for bytes or text items to be segmented. This type represents
/// the state of that segmented input stream.
pub struct Segments<'r, R: Read, P: Parser> {
reader: &'r mut Decoder<R>,
finish: bool,
nested: usize,
parser: PhantomData<P>,
unwrap: fn(Header) -> Result<Option<usize>, ()>,
}
impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
#[inline]
pub(crate) fn new(
decoder: &'r mut Decoder<R>,
unwrap: fn(Header) -> Result<Option<usize>, ()>,
) -> Self {
Self {
reader: decoder,
finish: false,
nested: 0,
parser: PhantomData,
unwrap,
}
}
/// Gets the next segment in the stream
///
/// Returns `Ok(None)` at the conclusion of the stream.
#[inline]
pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
while !self.finish {
let offset = self.reader.offset();
match self.reader.pull()? {
Header::Break if self.nested == 1 => return Ok(None),
Header::Break if self.nested > 1 => self.nested -= 1,
header => match (self.unwrap)(header) {
Err(..) => return Err(Error::Syntax(offset)),
Ok(None) => self.nested += 1,
Ok(Some(len)) => {
self.finish = self.nested == 0;
return Ok(Some(Segment {
reader: self.reader,
unread: len,
offset,
parser: P::default(),
}));
}
},
}
}
Ok(None)
}
}