blob: 21f37d14d6f959b9360cdd96e821dd439bffb367 [file] [log] [blame]
// Copyright 2016 `multipart` Crate Developers
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
//! Boundary parsing for `multipart` requests.
use ::safemem;
use super::buf_redux::BufReader;
use super::buf_redux::policy::MinBuffered;
use super::twoway;
use std::cmp;
use std::borrow::Borrow;
use std::io;
use std::io::prelude::*;
use self::State::*;
pub const MIN_BUF_SIZE: usize = 1024;
#[derive(Debug, PartialEq, Eq)]
enum State {
Searching,
BoundaryRead,
AtEnd
}
/// A struct implementing `Read` and `BufRead` that will yield bytes until it sees a given sequence.
#[derive(Debug)]
pub struct BoundaryReader<R> {
source: BufReader<R, MinBuffered>,
boundary: Vec<u8>,
search_idx: usize,
state: State,
}
impl<R> BoundaryReader<R> where R: Read {
/// Internal API
pub fn from_reader<B: Into<Vec<u8>>>(reader: R, boundary: B) -> BoundaryReader<R> {
let mut boundary = boundary.into();
safemem::prepend(b"--", &mut boundary);
let source = BufReader::new(reader).set_policy(MinBuffered(MIN_BUF_SIZE));
BoundaryReader {
source,
boundary,
search_idx: 0,
state: Searching,
}
}
fn read_to_boundary(&mut self) -> io::Result<&[u8]> {
let buf = self.source.fill_buf()?;
trace!("Buf: {:?}", String::from_utf8_lossy(buf));
debug!("Before search Buf len: {} Search idx: {} State: {:?}",
buf.len(), self.search_idx, self.state);
if self.state == BoundaryRead || self.state == AtEnd {
return Ok(&buf[..self.search_idx])
}
if self.state == Searching && self.search_idx < buf.len() {
let lookahead = &buf[self.search_idx..];
// Look for the boundary, or if it isn't found, stop near the end.
match find_boundary(lookahead, &self.boundary) {
Ok(found_idx) => {
self.search_idx += found_idx;
self.state = BoundaryRead;
},
Err(yield_len) => {
self.search_idx += yield_len;
}
}
}
debug!("After search Buf len: {} Search idx: {} State: {:?}",
buf.len(), self.search_idx, self.state);
// back up the cursor to before the boundary's preceding CRLF if we haven't already
if self.search_idx >= 2 && !buf[self.search_idx..].starts_with(b"\r\n") {
let two_bytes_before = &buf[self.search_idx - 2 .. self.search_idx];
trace!("Two bytes before: {:?} ({:?}) (\"\\r\\n\": {:?})",
String::from_utf8_lossy(two_bytes_before), two_bytes_before, b"\r\n");
if two_bytes_before == *b"\r\n" {
debug!("Subtract two!");
self.search_idx -= 2;
}
}
let ret_buf = &buf[..self.search_idx];
trace!("Returning buf: {:?}", String::from_utf8_lossy(ret_buf));
Ok(ret_buf)
}
pub fn set_min_buf_size(&mut self, min_buf_size: usize) {
// ensure the minimum buf size is at least enough to find a boundary with some extra
let min_buf_size = cmp::max(self.boundary.len() * 2, min_buf_size);
self.source.policy_mut().0 = min_buf_size;
}
pub fn consume_boundary(&mut self) -> io::Result<bool> {
if self.state == AtEnd {
return Ok(false);
}
while self.state == Searching {
debug!("Boundary not found yet");
let buf_len = self.read_to_boundary()?.len();
if buf_len == 0 && self.state == Searching {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
"unexpected end of request body"));
}
debug!("Discarding {} bytes", buf_len);
self.consume(buf_len);
}
let consume_amt = {
let buf = self.source.fill_buf()?;
// if the boundary is found we should have at least this much in-buffer
let mut consume_amt = self.search_idx + self.boundary.len();
// we don't care about data before the cursor
let bnd_segment = &buf[self.search_idx..];
if bnd_segment.starts_with(b"\r\n") {
// preceding CRLF needs to be consumed as well
consume_amt += 2;
// assert that we've found the boundary after the CRLF
debug_assert_eq!(*self.boundary, bnd_segment[2 .. self.boundary.len() + 2]);
} else {
// assert that we've found the boundary
debug_assert_eq!(*self.boundary, bnd_segment[..self.boundary.len()]);
}
// include the trailing CRLF or --
consume_amt += 2;
if buf.len() < consume_amt {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
"not enough bytes to verify boundary"));
}
// we have enough bytes to verify
self.state = Searching;
let last_two = &buf[consume_amt - 2 .. consume_amt];
match last_two {
b"\r\n" => self.state = Searching,
b"--" => self.state = AtEnd,
_ => return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("unexpected bytes following multipart boundary: {:X} {:X}",
last_two[0], last_two[1])
)),
}
consume_amt
};
trace!("Consuming {} bytes, remaining buf: {:?}",
consume_amt,
String::from_utf8_lossy(self.source.buffer()));
self.source.consume(consume_amt);
if cfg!(debug_assertions) {
}
self.search_idx = 0;
trace!("Consumed boundary (state: {:?}), remaining buf: {:?}", self.state,
String::from_utf8_lossy(self.source.buffer()));
Ok(self.state != AtEnd)
}
}
/// Find the boundary occurrence or the highest length to safely yield
fn find_boundary(buf: &[u8], boundary: &[u8]) -> Result<usize, usize> {
if let Some(idx) = twoway::find_bytes(buf, boundary) {
return Ok(idx);
}
let search_start = buf.len().saturating_sub(boundary.len());
// search for just the boundary fragment
for i in search_start .. buf.len() {
if boundary.starts_with(&buf[i..]) {
return Err(i);
}
}
Err(buf.len())
}
#[cfg(feature = "bench")]
impl<'a> BoundaryReader<io::Cursor<&'a [u8]>> {
fn new_with_bytes(bytes: &'a [u8], boundary: &str) -> Self {
Self::from_reader(io::Cursor::new(bytes), boundary)
}
fn reset(&mut self) {
// Dump buffer and reset cursor
self.source.seek(io::SeekFrom::Start(0));
self.state = Searching;
self.search_idx = 0;
}
}
impl<R> Borrow<R> for BoundaryReader<R> {
fn borrow(&self) -> &R {
self.source.get_ref()
}
}
impl<R> Read for BoundaryReader<R> where R: Read {
fn read(&mut self, out: &mut [u8]) -> io::Result<usize> {
let read = {
let mut buf = self.read_to_boundary()?;
// This shouldn't ever be an error so unwrapping is fine.
buf.read(out).unwrap()
};
self.consume(read);
Ok(read)
}
}
impl<R> BufRead for BoundaryReader<R> where R: Read {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
self.read_to_boundary()
}
fn consume(&mut self, amt: usize) {
let true_amt = cmp::min(amt, self.search_idx);
debug!("Consume! amt: {} true amt: {}", amt, true_amt);
self.source.consume(true_amt);
self.search_idx -= true_amt;
}
}
#[cfg(test)]
mod test {
use super::BoundaryReader;
use std::io;
use std::io::prelude::*;
const BOUNDARY: &'static str = "boundary";
const TEST_VAL: &'static str = "--boundary\r\n\
dashed-value-1\r\n\
--boundary\r\n\
dashed-value-2\r\n\
--boundary--";
#[test]
fn test_boundary() {
::init_log();
debug!("Testing boundary (no split)");
let src = &mut TEST_VAL.as_bytes();
let mut reader = BoundaryReader::from_reader(src, BOUNDARY);
let mut buf = String::new();
test_boundary_reader(&mut reader, &mut buf);
}
struct SplitReader<'a> {
left: &'a [u8],
right: &'a [u8],
}
impl<'a> SplitReader<'a> {
fn split(data: &'a [u8], at: usize) -> SplitReader<'a> {
let (left, right) = data.split_at(at);
SplitReader {
left: left,
right: right,
}
}
}
impl<'a> Read for SplitReader<'a> {
fn read(&mut self, dst: &mut [u8]) -> io::Result<usize> {
fn copy_bytes_partial(src: &mut &[u8], dst: &mut [u8]) -> usize {
src.read(dst).unwrap()
}
let mut copy_amt = copy_bytes_partial(&mut self.left, dst);
if copy_amt == 0 {
copy_amt = copy_bytes_partial(&mut self.right, dst)
};
Ok(copy_amt)
}
}
#[test]
fn test_split_boundary() {
::init_log();
debug!("Testing boundary (split)");
let mut buf = String::new();
// Substitute for `.step_by()` being unstable.
for split_at in 0 .. TEST_VAL.len(){
debug!("Testing split at: {}", split_at);
let src = SplitReader::split(TEST_VAL.as_bytes(), split_at);
let mut reader = BoundaryReader::from_reader(src, BOUNDARY);
test_boundary_reader(&mut reader, &mut buf);
}
}
fn test_boundary_reader<R: Read>(reader: &mut BoundaryReader<R>, buf: &mut String) {
buf.clear();
debug!("Read 1");
let _ = reader.read_to_string(buf).unwrap();
assert!(buf.is_empty(), "Buffer not empty: {:?}", buf);
buf.clear();
debug!("Consume 1");
reader.consume_boundary().unwrap();
debug!("Read 2");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "dashed-value-1");
buf.clear();
debug!("Consume 2");
reader.consume_boundary().unwrap();
debug!("Read 3");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "dashed-value-2");
buf.clear();
debug!("Consume 3");
reader.consume_boundary().unwrap();
debug!("Read 4");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "");
}
#[test]
fn test_empty_body() {
::init_log();
// empty body contains closing boundary only
let mut body: &[u8] = b"--boundary--";
let ref mut buf = String::new();
let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
debug!("Consume 1");
assert_eq!(reader.consume_boundary().unwrap(), false);
debug!("Read 1");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "");
buf.clear();
debug!("Consume 2");
assert_eq!(reader.consume_boundary().unwrap(), false);
}
#[test]
fn test_leading_crlf() {
::init_log();
let mut body: &[u8] = b"\r\n\r\n--boundary\r\n\
asdf1234\
\r\n\r\n--boundary--";
let ref mut buf = String::new();
let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
debug!("Consume 1");
assert_eq!(reader.consume_boundary().unwrap(), true);
debug!("Read 1");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "asdf1234\r\n");
buf.clear();
debug!("Consume 2");
assert_eq!(reader.consume_boundary().unwrap(), false);
debug!("Read 2 (empty)");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "");
}
#[test]
fn test_trailing_crlf() {
::init_log();
let mut body: &[u8] = b"--boundary\r\n\
asdf1234\
\r\n\r\n--boundary\r\n\
hjkl5678\r\n--boundary--";
let ref mut buf = String::new();
let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
debug!("Consume 1");
assert_eq!(reader.consume_boundary().unwrap(), true);
debug!("Read 1");
// Repro for https://github.com/abonander/multipart/issues/93
// These two reads should produce the same buffer
let buf1 = reader.read_to_boundary().unwrap().to_owned();
let buf2 = reader.read_to_boundary().unwrap().to_owned();
assert_eq!(buf1, buf2);
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "asdf1234\r\n");
buf.clear();
debug!("Consume 2");
assert_eq!(reader.consume_boundary().unwrap(), true);
debug!("Read 2");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "hjkl5678");
buf.clear();
debug!("Consume 3");
assert_eq!(reader.consume_boundary().unwrap(), false);
debug!("Read 3 (empty)");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "");
}
// https://github.com/abonander/multipart/issues/93#issuecomment-343610587
#[test]
fn test_trailing_lflf() {
::init_log();
let mut body: &[u8] = b"--boundary\r\n\
asdf1234\
\n\n\r\n--boundary\r\n\
hjkl5678\r\n--boundary--";
let ref mut buf = String::new();
let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
debug!("Consume 1");
assert_eq!(reader.consume_boundary().unwrap(), true);
debug!("Read 1");
// same as above
let buf1 = reader.read_to_boundary().unwrap().to_owned();
let buf2 = reader.read_to_boundary().unwrap().to_owned();
assert_eq!(buf1, buf2);
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "asdf1234\n\n");
buf.clear();
debug!("Consume 2");
assert_eq!(reader.consume_boundary().unwrap(), true);
debug!("Read 2");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "hjkl5678");
buf.clear();
debug!("Consume 3");
assert_eq!(reader.consume_boundary().unwrap(), false);
debug!("Read 3 (empty)");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "");
}
// https://github.com/abonander/multipart/issues/104
#[test]
fn test_unterminated_body() {
::init_log();
let mut body: &[u8] = b"--boundary\r\n\
asdf1234\
\n\n\r\n--boundary\r\n\
hjkl5678 ";
let ref mut buf = String::new();
let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
debug!("Consume 1");
assert_eq!(reader.consume_boundary().unwrap(), true);
debug!("Read 1");
// same as above
let buf1 = reader.read_to_boundary().unwrap().to_owned();
let buf2 = reader.read_to_boundary().unwrap().to_owned();
assert_eq!(buf1, buf2);
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "asdf1234\n\n");
buf.clear();
debug!("Consume 2");
assert_eq!(reader.consume_boundary().unwrap(), true);
debug!("Read 2");
let _ = reader.read_to_string(buf).unwrap();
assert_eq!(buf, "hjkl5678 ");
buf.clear();
debug!("Consume 3 - expecting error");
reader.consume_boundary().unwrap_err();
}
#[test]
fn test_lone_boundary() {
let mut body: &[u8] = b"--boundary";
let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
reader.consume_boundary().unwrap_err();
}
#[test]
fn test_invalid_boundary() {
let mut body: &[u8] = b"--boundary\x00\x00";
let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
reader.consume_boundary().unwrap_err();
}
#[test]
fn test_skip_field() {
let mut body: &[u8] = b"--boundary\r\nfield1\r\n--boundary\r\nfield2\r\n--boundary--";
let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
assert_eq!(reader.consume_boundary().unwrap(), true);
// skip `field1`
assert_eq!(reader.consume_boundary().unwrap(), true);
let mut buf = String::new();
reader.read_to_string(&mut buf).unwrap();
assert_eq!(buf, "field2");
assert_eq!(reader.consume_boundary().unwrap(), false);
}
#[cfg(feature = "bench")]
mod bench {
extern crate test;
use self::test::Bencher;
use super::*;
#[bench]
fn bench_boundary_reader(b: &mut Bencher) {
let mut reader = BoundaryReader::new_with_bytes(TEST_VAL.as_bytes(), BOUNDARY);
let mut buf = String::with_capacity(256);
b.iter(|| {
reader.reset();
test_boundary_reader(&mut reader, &mut buf);
});
}
}
}