| // Copyright 2016 `multipart` Crate Developers |
| // |
| // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or |
| // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or |
| // http://opensource.org/licenses/MIT>, at your option. This file may not be |
| // copied, modified, or distributed except according to those terms. |
| |
| //! Boundary parsing for `multipart` requests. |
| |
| use ::safemem; |
| |
| use super::buf_redux::BufReader; |
| use super::buf_redux::policy::MinBuffered; |
| use super::twoway; |
| |
| use std::cmp; |
| use std::borrow::Borrow; |
| |
| use std::io; |
| use std::io::prelude::*; |
| |
| use self::State::*; |
| |
| pub const MIN_BUF_SIZE: usize = 1024; |
| |
| #[derive(Debug, PartialEq, Eq)] |
| enum State { |
| Searching, |
| BoundaryRead, |
| AtEnd |
| } |
| |
| /// A struct implementing `Read` and `BufRead` that will yield bytes until it sees a given sequence. |
| #[derive(Debug)] |
| pub struct BoundaryReader<R> { |
| source: BufReader<R, MinBuffered>, |
| boundary: Vec<u8>, |
| search_idx: usize, |
| state: State, |
| } |
| |
| impl<R> BoundaryReader<R> where R: Read { |
| /// Internal API |
| pub fn from_reader<B: Into<Vec<u8>>>(reader: R, boundary: B) -> BoundaryReader<R> { |
| let mut boundary = boundary.into(); |
| safemem::prepend(b"--", &mut boundary); |
| let source = BufReader::new(reader).set_policy(MinBuffered(MIN_BUF_SIZE)); |
| |
| BoundaryReader { |
| source, |
| boundary, |
| search_idx: 0, |
| state: Searching, |
| } |
| } |
| |
| fn read_to_boundary(&mut self) -> io::Result<&[u8]> { |
| let buf = self.source.fill_buf()?; |
| |
| trace!("Buf: {:?}", String::from_utf8_lossy(buf)); |
| |
| debug!("Before search Buf len: {} Search idx: {} State: {:?}", |
| buf.len(), self.search_idx, self.state); |
| |
| if self.state == BoundaryRead || self.state == AtEnd { |
| return Ok(&buf[..self.search_idx]) |
| } |
| |
| if self.state == Searching && self.search_idx < buf.len() { |
| let lookahead = &buf[self.search_idx..]; |
| |
| // Look for the boundary, or if it isn't found, stop near the end. |
| match find_boundary(lookahead, &self.boundary) { |
| Ok(found_idx) => { |
| self.search_idx += found_idx; |
| self.state = BoundaryRead; |
| }, |
| Err(yield_len) => { |
| self.search_idx += yield_len; |
| } |
| } |
| } |
| |
| debug!("After search Buf len: {} Search idx: {} State: {:?}", |
| buf.len(), self.search_idx, self.state); |
| |
| // back up the cursor to before the boundary's preceding CRLF if we haven't already |
| if self.search_idx >= 2 && !buf[self.search_idx..].starts_with(b"\r\n") { |
| let two_bytes_before = &buf[self.search_idx - 2 .. self.search_idx]; |
| |
| trace!("Two bytes before: {:?} ({:?}) (\"\\r\\n\": {:?})", |
| String::from_utf8_lossy(two_bytes_before), two_bytes_before, b"\r\n"); |
| |
| if two_bytes_before == *b"\r\n" { |
| debug!("Subtract two!"); |
| self.search_idx -= 2; |
| } |
| } |
| |
| let ret_buf = &buf[..self.search_idx]; |
| |
| trace!("Returning buf: {:?}", String::from_utf8_lossy(ret_buf)); |
| |
| Ok(ret_buf) |
| } |
| |
| pub fn set_min_buf_size(&mut self, min_buf_size: usize) { |
| // ensure the minimum buf size is at least enough to find a boundary with some extra |
| let min_buf_size = cmp::max(self.boundary.len() * 2, min_buf_size); |
| |
| self.source.policy_mut().0 = min_buf_size; |
| } |
| |
| pub fn consume_boundary(&mut self) -> io::Result<bool> { |
| if self.state == AtEnd { |
| return Ok(false); |
| } |
| |
| while self.state == Searching { |
| debug!("Boundary not found yet"); |
| |
| let buf_len = self.read_to_boundary()?.len(); |
| |
| if buf_len == 0 && self.state == Searching { |
| return Err(io::Error::new(io::ErrorKind::UnexpectedEof, |
| "unexpected end of request body")); |
| } |
| |
| debug!("Discarding {} bytes", buf_len); |
| |
| self.consume(buf_len); |
| } |
| |
| let consume_amt = { |
| let buf = self.source.fill_buf()?; |
| |
| // if the boundary is found we should have at least this much in-buffer |
| let mut consume_amt = self.search_idx + self.boundary.len(); |
| |
| // we don't care about data before the cursor |
| let bnd_segment = &buf[self.search_idx..]; |
| |
| if bnd_segment.starts_with(b"\r\n") { |
| // preceding CRLF needs to be consumed as well |
| consume_amt += 2; |
| |
| // assert that we've found the boundary after the CRLF |
| debug_assert_eq!(*self.boundary, bnd_segment[2 .. self.boundary.len() + 2]); |
| } else { |
| // assert that we've found the boundary |
| debug_assert_eq!(*self.boundary, bnd_segment[..self.boundary.len()]); |
| } |
| |
| // include the trailing CRLF or -- |
| consume_amt += 2; |
| |
| if buf.len() < consume_amt { |
| return Err(io::Error::new(io::ErrorKind::UnexpectedEof, |
| "not enough bytes to verify boundary")); |
| } |
| |
| // we have enough bytes to verify |
| self.state = Searching; |
| |
| let last_two = &buf[consume_amt - 2 .. consume_amt]; |
| |
| match last_two { |
| b"\r\n" => self.state = Searching, |
| b"--" => self.state = AtEnd, |
| _ => return Err(io::Error::new( |
| io::ErrorKind::InvalidData, |
| format!("unexpected bytes following multipart boundary: {:X} {:X}", |
| last_two[0], last_two[1]) |
| )), |
| } |
| |
| consume_amt |
| }; |
| |
| trace!("Consuming {} bytes, remaining buf: {:?}", |
| consume_amt, |
| String::from_utf8_lossy(self.source.buffer())); |
| |
| self.source.consume(consume_amt); |
| |
| if cfg!(debug_assertions) { |
| |
| } |
| |
| self.search_idx = 0; |
| |
| trace!("Consumed boundary (state: {:?}), remaining buf: {:?}", self.state, |
| String::from_utf8_lossy(self.source.buffer())); |
| |
| Ok(self.state != AtEnd) |
| } |
| } |
| |
| /// Find the boundary occurrence or the highest length to safely yield |
| fn find_boundary(buf: &[u8], boundary: &[u8]) -> Result<usize, usize> { |
| if let Some(idx) = twoway::find_bytes(buf, boundary) { |
| return Ok(idx); |
| } |
| |
| let search_start = buf.len().saturating_sub(boundary.len()); |
| |
| // search for just the boundary fragment |
| for i in search_start .. buf.len() { |
| if boundary.starts_with(&buf[i..]) { |
| return Err(i); |
| } |
| } |
| |
| Err(buf.len()) |
| } |
| |
| #[cfg(feature = "bench")] |
| impl<'a> BoundaryReader<io::Cursor<&'a [u8]>> { |
| fn new_with_bytes(bytes: &'a [u8], boundary: &str) -> Self { |
| Self::from_reader(io::Cursor::new(bytes), boundary) |
| } |
| |
| fn reset(&mut self) { |
| // Dump buffer and reset cursor |
| self.source.seek(io::SeekFrom::Start(0)); |
| self.state = Searching; |
| self.search_idx = 0; |
| } |
| } |
| |
| impl<R> Borrow<R> for BoundaryReader<R> { |
| fn borrow(&self) -> &R { |
| self.source.get_ref() |
| } |
| } |
| |
| impl<R> Read for BoundaryReader<R> where R: Read { |
| fn read(&mut self, out: &mut [u8]) -> io::Result<usize> { |
| let read = { |
| let mut buf = self.read_to_boundary()?; |
| // This shouldn't ever be an error so unwrapping is fine. |
| buf.read(out).unwrap() |
| }; |
| |
| self.consume(read); |
| Ok(read) |
| } |
| } |
| |
| impl<R> BufRead for BoundaryReader<R> where R: Read { |
| fn fill_buf(&mut self) -> io::Result<&[u8]> { |
| self.read_to_boundary() |
| } |
| |
| fn consume(&mut self, amt: usize) { |
| let true_amt = cmp::min(amt, self.search_idx); |
| |
| debug!("Consume! amt: {} true amt: {}", amt, true_amt); |
| |
| self.source.consume(true_amt); |
| self.search_idx -= true_amt; |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use super::BoundaryReader; |
| |
| use std::io; |
| use std::io::prelude::*; |
| |
| const BOUNDARY: &'static str = "boundary"; |
| const TEST_VAL: &'static str = "--boundary\r\n\ |
| dashed-value-1\r\n\ |
| --boundary\r\n\ |
| dashed-value-2\r\n\ |
| --boundary--"; |
| |
| #[test] |
| fn test_boundary() { |
| ::init_log(); |
| |
| debug!("Testing boundary (no split)"); |
| |
| let src = &mut TEST_VAL.as_bytes(); |
| let mut reader = BoundaryReader::from_reader(src, BOUNDARY); |
| |
| let mut buf = String::new(); |
| |
| test_boundary_reader(&mut reader, &mut buf); |
| } |
| |
| struct SplitReader<'a> { |
| left: &'a [u8], |
| right: &'a [u8], |
| } |
| |
| impl<'a> SplitReader<'a> { |
| fn split(data: &'a [u8], at: usize) -> SplitReader<'a> { |
| let (left, right) = data.split_at(at); |
| |
| SplitReader { |
| left: left, |
| right: right, |
| } |
| } |
| } |
| |
| impl<'a> Read for SplitReader<'a> { |
| fn read(&mut self, dst: &mut [u8]) -> io::Result<usize> { |
| fn copy_bytes_partial(src: &mut &[u8], dst: &mut [u8]) -> usize { |
| src.read(dst).unwrap() |
| } |
| |
| let mut copy_amt = copy_bytes_partial(&mut self.left, dst); |
| |
| if copy_amt == 0 { |
| copy_amt = copy_bytes_partial(&mut self.right, dst) |
| }; |
| |
| Ok(copy_amt) |
| } |
| } |
| |
| #[test] |
| fn test_split_boundary() { |
| ::init_log(); |
| |
| debug!("Testing boundary (split)"); |
| |
| let mut buf = String::new(); |
| |
| // Substitute for `.step_by()` being unstable. |
| for split_at in 0 .. TEST_VAL.len(){ |
| debug!("Testing split at: {}", split_at); |
| |
| let src = SplitReader::split(TEST_VAL.as_bytes(), split_at); |
| let mut reader = BoundaryReader::from_reader(src, BOUNDARY); |
| test_boundary_reader(&mut reader, &mut buf); |
| } |
| } |
| |
| fn test_boundary_reader<R: Read>(reader: &mut BoundaryReader<R>, buf: &mut String) { |
| buf.clear(); |
| |
| debug!("Read 1"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert!(buf.is_empty(), "Buffer not empty: {:?}", buf); |
| buf.clear(); |
| |
| debug!("Consume 1"); |
| reader.consume_boundary().unwrap(); |
| |
| debug!("Read 2"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "dashed-value-1"); |
| buf.clear(); |
| |
| debug!("Consume 2"); |
| reader.consume_boundary().unwrap(); |
| |
| debug!("Read 3"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "dashed-value-2"); |
| buf.clear(); |
| |
| debug!("Consume 3"); |
| reader.consume_boundary().unwrap(); |
| |
| debug!("Read 4"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, ""); |
| } |
| |
| #[test] |
| fn test_empty_body() { |
| ::init_log(); |
| |
| // empty body contains closing boundary only |
| let mut body: &[u8] = b"--boundary--"; |
| |
| let ref mut buf = String::new(); |
| let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY); |
| |
| debug!("Consume 1"); |
| assert_eq!(reader.consume_boundary().unwrap(), false); |
| |
| debug!("Read 1"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, ""); |
| buf.clear(); |
| |
| debug!("Consume 2"); |
| assert_eq!(reader.consume_boundary().unwrap(), false); |
| } |
| |
| #[test] |
| fn test_leading_crlf() { |
| ::init_log(); |
| |
| let mut body: &[u8] = b"\r\n\r\n--boundary\r\n\ |
| asdf1234\ |
| \r\n\r\n--boundary--"; |
| |
| let ref mut buf = String::new(); |
| let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY); |
| |
| |
| debug!("Consume 1"); |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| |
| debug!("Read 1"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "asdf1234\r\n"); |
| buf.clear(); |
| |
| debug!("Consume 2"); |
| assert_eq!(reader.consume_boundary().unwrap(), false); |
| |
| debug!("Read 2 (empty)"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, ""); |
| } |
| |
| #[test] |
| fn test_trailing_crlf() { |
| ::init_log(); |
| |
| let mut body: &[u8] = b"--boundary\r\n\ |
| asdf1234\ |
| \r\n\r\n--boundary\r\n\ |
| hjkl5678\r\n--boundary--"; |
| |
| let ref mut buf = String::new(); |
| let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY); |
| |
| debug!("Consume 1"); |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| |
| debug!("Read 1"); |
| |
| // Repro for https://github.com/abonander/multipart/issues/93 |
| // These two reads should produce the same buffer |
| let buf1 = reader.read_to_boundary().unwrap().to_owned(); |
| let buf2 = reader.read_to_boundary().unwrap().to_owned(); |
| assert_eq!(buf1, buf2); |
| |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "asdf1234\r\n"); |
| buf.clear(); |
| |
| debug!("Consume 2"); |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| |
| debug!("Read 2"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "hjkl5678"); |
| buf.clear(); |
| |
| debug!("Consume 3"); |
| assert_eq!(reader.consume_boundary().unwrap(), false); |
| |
| debug!("Read 3 (empty)"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, ""); |
| } |
| |
| // https://github.com/abonander/multipart/issues/93#issuecomment-343610587 |
| #[test] |
| fn test_trailing_lflf() { |
| ::init_log(); |
| |
| let mut body: &[u8] = b"--boundary\r\n\ |
| asdf1234\ |
| \n\n\r\n--boundary\r\n\ |
| hjkl5678\r\n--boundary--"; |
| |
| let ref mut buf = String::new(); |
| let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY); |
| |
| debug!("Consume 1"); |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| |
| debug!("Read 1"); |
| |
| // same as above |
| let buf1 = reader.read_to_boundary().unwrap().to_owned(); |
| let buf2 = reader.read_to_boundary().unwrap().to_owned(); |
| assert_eq!(buf1, buf2); |
| |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "asdf1234\n\n"); |
| buf.clear(); |
| |
| debug!("Consume 2"); |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| |
| debug!("Read 2"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "hjkl5678"); |
| buf.clear(); |
| |
| debug!("Consume 3"); |
| assert_eq!(reader.consume_boundary().unwrap(), false); |
| |
| debug!("Read 3 (empty)"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, ""); |
| } |
| |
| // https://github.com/abonander/multipart/issues/104 |
| #[test] |
| fn test_unterminated_body() { |
| ::init_log(); |
| |
| let mut body: &[u8] = b"--boundary\r\n\ |
| asdf1234\ |
| \n\n\r\n--boundary\r\n\ |
| hjkl5678 "; |
| |
| let ref mut buf = String::new(); |
| let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY); |
| |
| debug!("Consume 1"); |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| |
| debug!("Read 1"); |
| |
| // same as above |
| let buf1 = reader.read_to_boundary().unwrap().to_owned(); |
| let buf2 = reader.read_to_boundary().unwrap().to_owned(); |
| assert_eq!(buf1, buf2); |
| |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "asdf1234\n\n"); |
| buf.clear(); |
| |
| debug!("Consume 2"); |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| |
| debug!("Read 2"); |
| let _ = reader.read_to_string(buf).unwrap(); |
| assert_eq!(buf, "hjkl5678 "); |
| buf.clear(); |
| |
| debug!("Consume 3 - expecting error"); |
| reader.consume_boundary().unwrap_err(); |
| } |
| |
| #[test] |
| fn test_lone_boundary() { |
| let mut body: &[u8] = b"--boundary"; |
| let mut reader = BoundaryReader::from_reader(&mut body, "boundary"); |
| reader.consume_boundary().unwrap_err(); |
| } |
| |
| #[test] |
| fn test_invalid_boundary() { |
| let mut body: &[u8] = b"--boundary\x00\x00"; |
| let mut reader = BoundaryReader::from_reader(&mut body, "boundary"); |
| reader.consume_boundary().unwrap_err(); |
| } |
| |
| #[test] |
| fn test_skip_field() { |
| let mut body: &[u8] = b"--boundary\r\nfield1\r\n--boundary\r\nfield2\r\n--boundary--"; |
| let mut reader = BoundaryReader::from_reader(&mut body, "boundary"); |
| |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| // skip `field1` |
| assert_eq!(reader.consume_boundary().unwrap(), true); |
| |
| let mut buf = String::new(); |
| reader.read_to_string(&mut buf).unwrap(); |
| assert_eq!(buf, "field2"); |
| |
| assert_eq!(reader.consume_boundary().unwrap(), false); |
| } |
| |
| #[cfg(feature = "bench")] |
| mod bench { |
| extern crate test; |
| use self::test::Bencher; |
| |
| use super::*; |
| |
| #[bench] |
| fn bench_boundary_reader(b: &mut Bencher) { |
| let mut reader = BoundaryReader::new_with_bytes(TEST_VAL.as_bytes(), BOUNDARY); |
| let mut buf = String::with_capacity(256); |
| |
| b.iter(|| { |
| reader.reset(); |
| test_boundary_reader(&mut reader, &mut buf); |
| }); |
| } |
| } |
| } |