| // Copyright 2016 `multipart` Crate Developers |
| // |
| // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or |
| // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or |
| // http://opensource.org/licenses/MIT>, at your option. This file may not be |
| // copied, modified, or distributed except according to those terms. |
| |
| //! `multipart` field header parsing. |
| use mime::Mime; |
| |
| use std::error::Error; |
| use std::io::{self, BufRead, Read}; |
| use std::{fmt, str}; |
| |
| use std::sync::Arc; |
| |
| use super::httparse::{self, Error as HttparseError, Header, Status, EMPTY_HEADER}; |
| |
| use self::ReadEntryResult::*; |
| |
| use super::save::SaveBuilder; |
| |
| const EMPTY_STR_HEADER: StrHeader<'static> = StrHeader { name: "", val: "" }; |
| |
| macro_rules! invalid_cont_disp { |
| ($reason: expr, $cause: expr) => { |
| return Err(ParseHeaderError::InvalidContDisp( |
| $reason, |
| $cause.to_string(), |
| )); |
| }; |
| } |
| |
| /// Not exposed |
| #[derive(Copy, Clone, Debug)] |
| pub struct StrHeader<'a> { |
| name: &'a str, |
| val: &'a str, |
| } |
| |
| struct DisplayHeaders<'s, 'a: 's>(&'s [StrHeader<'a>]); |
| |
| impl<'s, 'a: 's> fmt::Display for DisplayHeaders<'s, 'a> { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| for hdr in self.0 { |
| writeln!(f, "{}: {}", hdr.name, hdr.val)?; |
| } |
| |
| Ok(()) |
| } |
| } |
| |
| fn with_headers<R, F, Ret>(r: &mut R, closure: F) -> Result<Ret, ParseHeaderError> |
| where |
| R: BufRead, |
| F: FnOnce(&[StrHeader]) -> Ret, |
| { |
| const HEADER_LEN: usize = 4; |
| |
| let consume; |
| let ret; |
| |
| let mut last_len = 0; |
| |
| loop { |
| // this should return a larger buffer each time |
| let buf = r.fill_buf()?; |
| |
| // buffer has stopped growing |
| if buf.len() == last_len { |
| return Err(ParseHeaderError::TooLarge); |
| } |
| |
| let mut raw_headers = [EMPTY_HEADER; HEADER_LEN]; |
| |
| match httparse::parse_headers(buf, &mut raw_headers)? { |
| // read more and try again |
| Status::Partial => last_len = buf.len(), |
| Status::Complete((consume_, raw_headers)) => { |
| let mut headers = [EMPTY_STR_HEADER; HEADER_LEN]; |
| let headers = copy_headers(raw_headers, &mut headers)?; |
| debug!("Parsed headers: {:?}", headers); |
| consume = consume_; |
| ret = closure(headers); |
| break; |
| } |
| } |
| } |
| |
| r.consume(consume); |
| Ok(ret) |
| } |
| |
| fn copy_headers<'h, 'b: 'h>( |
| raw: &[Header<'b>], |
| headers: &'h mut [StrHeader<'b>], |
| ) -> io::Result<&'h [StrHeader<'b>]> { |
| for (raw, header) in raw.iter().zip(&mut *headers) { |
| header.name = raw.name; |
| header.val = io_str_utf8(raw.value)?; |
| } |
| |
| Ok(&headers[..raw.len()]) |
| } |
| |
| /// The headers that (may) appear before a `multipart/form-data` field. |
| /// |
| /// ### Warning: Values are Client-Provided |
| /// Everything in this struct are values from the client and should be considered **untrustworthy**. |
| /// This crate makes no effort to validate or sanitize any client inputs. |
| #[derive(Clone, Debug)] |
| pub struct FieldHeaders { |
| /// The field's name from the form. |
| pub name: Arc<str>, |
| |
| /// The filename of this entry, if supplied. This is not guaranteed to match the original file |
| /// or even to be a valid filename for the current platform. |
| pub filename: Option<String>, |
| |
| /// The MIME type (`Content-Type` value) of this file, if supplied by the client. |
| /// |
| /// If this is not supplied, the content-type of the field should default to `text/plain` as |
| /// per [IETF RFC 7578, section 4.4](https://tools.ietf.org/html/rfc7578#section-4.4), but this |
| /// should not be implicitly trusted. This crate makes no attempt to identify or validate |
| /// the content-type of the actual field data. |
| pub content_type: Option<Mime>, |
| } |
| |
| impl FieldHeaders { |
| /// Parse the field headers from the passed `BufRead`, consuming the relevant bytes. |
| fn read_from<R: BufRead>(r: &mut R) -> Result<Self, ParseHeaderError> { |
| with_headers(r, Self::parse)? |
| } |
| |
| fn parse(headers: &[StrHeader]) -> Result<FieldHeaders, ParseHeaderError> { |
| let cont_disp = ContentDisp::parse_required(headers)?; |
| |
| Ok(FieldHeaders { |
| name: cont_disp.field_name.into(), |
| filename: cont_disp.filename, |
| content_type: parse_content_type(headers)?, |
| }) |
| } |
| } |
| |
| /// The `Content-Disposition` header. |
| struct ContentDisp { |
| /// The name of the `multipart/form-data` field. |
| field_name: String, |
| /// The optional filename for this field. |
| filename: Option<String>, |
| } |
| |
| impl ContentDisp { |
| fn parse_required(headers: &[StrHeader]) -> Result<ContentDisp, ParseHeaderError> { |
| let header = if let Some(header) = find_header(headers, "Content-Disposition") { |
| header |
| } else { |
| return Err(ParseHeaderError::MissingContentDisposition( |
| DisplayHeaders(headers).to_string(), |
| )); |
| }; |
| |
| // Content-Disposition: ? |
| let after_disp_type = match split_once(header.val, ';') { |
| Some((disp_type, after_disp_type)) => { |
| // assert Content-Disposition: form-data |
| // but needs to be parsed out to trim the spaces (allowed by spec IIRC) |
| if disp_type.trim() != "form-data" { |
| invalid_cont_disp!("unexpected Content-Disposition value", disp_type); |
| } |
| after_disp_type |
| } |
| None => invalid_cont_disp!( |
| "expected additional data after Content-Disposition type", |
| header.val |
| ), |
| }; |
| |
| // Content-Disposition: form-data; name=? |
| let (field_name, filename) = match get_str_after("name=", ';', after_disp_type) { |
| None => invalid_cont_disp!( |
| "expected field name and maybe filename, got", |
| after_disp_type |
| ), |
| // Content-Disposition: form-data; name={field_name}; filename=? |
| Some((field_name, after_field_name)) => { |
| let field_name = trim_quotes(field_name); |
| let filename = get_str_after("filename=", ';', after_field_name) |
| .map(|(filename, _)| trim_quotes(filename).to_owned()); |
| (field_name, filename) |
| } |
| }; |
| |
| Ok(ContentDisp { |
| field_name: field_name.to_owned(), |
| filename, |
| }) |
| } |
| } |
| |
| fn parse_content_type(headers: &[StrHeader]) -> Result<Option<Mime>, ParseHeaderError> { |
| if let Some(header) = find_header(headers, "Content-Type") { |
| // Boundary parameter will be parsed into the `Mime` |
| debug!("Found Content-Type: {:?}", header.val); |
| Ok(Some(header.val.parse::<Mime>().map_err(|_| { |
| ParseHeaderError::MimeError(header.val.into()) |
| })?)) |
| } else { |
| Ok(None) |
| } |
| } |
| |
| /// A field in a multipart request with its associated headers and data. |
| #[derive(Debug)] |
| pub struct MultipartField<M: ReadEntry> { |
| /// The headers for this field, including the name, filename, and content-type, if provided. |
| /// |
| /// ### Warning: Values are Client-Provided |
| /// Everything in this struct are values from the client and should be considered **untrustworthy**. |
| /// This crate makes no effort to validate or sanitize any client inputs. |
| pub headers: FieldHeaders, |
| |
| /// The field's data. |
| pub data: MultipartData<M>, |
| } |
| |
| impl<M: ReadEntry> MultipartField<M> { |
| /// Returns `true` if this field has no content-type or the content-type is `text/...`. |
| /// |
| /// This typically means it can be read to a string, but it could still be using an unsupported |
| /// character encoding, so decoding to `String` needs to ensure that the data is valid UTF-8. |
| /// |
| /// Note also that the field contents may be too large to reasonably fit in memory. |
| /// The `.save()` adapter can be used to enforce a size limit. |
| /// |
| /// Detecting character encodings by any means is (currently) beyond the scope of this crate. |
| pub fn is_text(&self) -> bool { |
| self.headers |
| .content_type |
| .as_ref() |
| .map_or(true, |ct| ct.type_() == mime::TEXT) |
| } |
| |
| /// Read the next entry in the request. |
| pub fn next_entry(self) -> ReadEntryResult<M> { |
| self.data.into_inner().read_entry() |
| } |
| |
| /// Update `self` as the next entry. |
| /// |
| /// Returns `Ok(Some(self))` if another entry was read, `Ok(None)` if the end of the body was |
| /// reached, and `Err(e)` for any errors that occur. |
| pub fn next_entry_inplace(&mut self) -> io::Result<Option<&mut Self>> |
| where |
| for<'a> &'a mut M: ReadEntry, |
| { |
| let multipart = self.data.take_inner(); |
| |
| match multipart.read_entry() { |
| Entry(entry) => { |
| *self = entry; |
| Ok(Some(self)) |
| } |
| End(multipart) => { |
| self.data.give_inner(multipart); |
| Ok(None) |
| } |
| Error(multipart, err) => { |
| self.data.give_inner(multipart); |
| Err(err) |
| } |
| } |
| } |
| } |
| |
| /// The data of a field in a `multipart/form-data` request. |
| /// |
| /// You can read it to EOF, or use the `save()` adaptor to save it to disk/memory. |
| #[derive(Debug)] |
| pub struct MultipartData<M> { |
| inner: Option<M>, |
| } |
| |
| const DATA_INNER_ERR: &str = "MultipartFile::inner taken and not replaced; this is likely \ |
| caused by a logic error in `multipart` or by resuming after \ |
| a previously caught panic.\nPlease open an issue with the \ |
| relevant backtrace and debug logs at \ |
| https://github.com/abonander/multipart"; |
| |
| impl<M> MultipartData<M> |
| where |
| M: ReadEntry, |
| { |
| /// Get a builder type which can save the field with or without a size limit. |
| pub fn save(&mut self) -> SaveBuilder<&mut Self> { |
| SaveBuilder::new(self) |
| } |
| |
| /// Take the inner `Multipart` or `&mut Multipart` |
| pub fn into_inner(self) -> M { |
| self.inner.expect(DATA_INNER_ERR) |
| } |
| |
| /// Set the minimum buffer size that `BufRead::fill_buf(self)` will return |
| /// until the end of the stream is reached. Set this as small as you can tolerate |
| /// to minimize `read()` calls (`read()` won't be called again until the buffer |
| /// is smaller than this). |
| /// |
| /// This value is reset between fields. |
| pub fn set_min_buf_size(&mut self, min_buf_size: usize) { |
| self.inner_mut().set_min_buf_size(min_buf_size) |
| } |
| |
| fn inner_mut(&mut self) -> &mut M { |
| self.inner.as_mut().expect(DATA_INNER_ERR) |
| } |
| |
| fn take_inner(&mut self) -> M { |
| self.inner.take().expect(DATA_INNER_ERR) |
| } |
| |
| fn give_inner(&mut self, inner: M) { |
| self.inner = Some(inner); |
| } |
| } |
| |
| impl<M: ReadEntry> Read for MultipartData<M> { |
| fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
| self.inner_mut().source_mut().read(buf) |
| } |
| } |
| |
| /// In this implementation, `fill_buf()` can return more data with each call. |
| /// |
| /// Use `set_min_buf_size()` if you require a minimum buffer length. |
| impl<M: ReadEntry> BufRead for MultipartData<M> { |
| fn fill_buf(&mut self) -> io::Result<&[u8]> { |
| self.inner_mut().source_mut().fill_buf() |
| } |
| |
| fn consume(&mut self, amt: usize) { |
| self.inner_mut().source_mut().consume(amt) |
| } |
| } |
| |
| fn split_once(s: &str, delim: char) -> Option<(&str, &str)> { |
| s.find(delim).map(|idx| s.split_at(idx)) |
| } |
| |
| fn trim_quotes(s: &str) -> &str { |
| s.trim_matches('"') |
| } |
| |
| /// Get the string after `needle` in `haystack`, stopping before `end_val_delim` |
| fn get_str_after<'a>( |
| needle: &str, |
| end_val_delim: char, |
| haystack: &'a str, |
| ) -> Option<(&'a str, &'a str)> { |
| let val_start_idx = try_opt!(haystack.find(needle)) + needle.len(); |
| let val_end_idx = haystack[val_start_idx..] |
| .find(end_val_delim) |
| .map_or(haystack.len(), |end_idx| end_idx + val_start_idx); |
| Some(( |
| &haystack[val_start_idx..val_end_idx], |
| &haystack[val_end_idx..], |
| )) |
| } |
| |
| fn io_str_utf8(buf: &[u8]) -> io::Result<&str> { |
| str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) |
| } |
| |
| fn find_header<'a, 'b>(headers: &'a [StrHeader<'b>], name: &str) -> Option<&'a StrHeader<'b>> { |
| // Field names are case insensitive and consist of ASCII characters |
| // only (see https://tools.ietf.org/html/rfc822#section-3.2). |
| headers |
| .iter() |
| .find(|header| header.name.eq_ignore_ascii_case(name)) |
| } |
| |
| /// Common trait for `Multipart` and `&mut Multipart` |
| pub trait ReadEntry: PrivReadEntry + Sized { |
| /// Attempt to read the next entry in the multipart stream. |
| fn read_entry(mut self) -> ReadEntryResult<Self> { |
| self.set_min_buf_size(super::boundary::MIN_BUF_SIZE); |
| |
| debug!("ReadEntry::read_entry()"); |
| |
| if !try_read_entry!(self; self.consume_boundary()) { |
| return End(self); |
| } |
| |
| let field_headers: FieldHeaders = try_read_entry!(self; self.read_headers()); |
| |
| if let Some(ct) = field_headers.content_type.as_ref() { |
| if ct.type_() == mime::MULTIPART { |
| // fields of this type are sent by (supposedly) no known clients |
| // (https://tools.ietf.org/html/rfc7578#appendix-A) so I'd be fascinated |
| // to hear about any in the wild |
| info!( |
| "Found nested multipart field: {:?}:\r\n\ |
| Please report this client's User-Agent and any other available details \ |
| at https://github.com/abonander/multipart/issues/56", |
| field_headers |
| ); |
| } |
| } |
| |
| Entry(MultipartField { |
| headers: field_headers, |
| data: MultipartData { inner: Some(self) }, |
| }) |
| } |
| |
| /// Equivalent to `read_entry()` but takes `&mut self` |
| fn read_entry_mut(&mut self) -> ReadEntryResult<&mut Self> { |
| ReadEntry::read_entry(self) |
| } |
| } |
| |
| impl<T> ReadEntry for T where T: PrivReadEntry {} |
| |
| /// Public trait but not re-exported. |
| pub trait PrivReadEntry { |
| type Source: BufRead; |
| |
| fn source_mut(&mut self) -> &mut Self::Source; |
| |
| fn set_min_buf_size(&mut self, min_buf_size: usize); |
| |
| /// Consume the next boundary. |
| /// Returns `true` if a field should follow, `false` otherwise. |
| fn consume_boundary(&mut self) -> io::Result<bool>; |
| |
| fn read_headers(&mut self) -> Result<FieldHeaders, io::Error> { |
| FieldHeaders::read_from(self.source_mut()) |
| .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) |
| } |
| |
| fn read_to_string(&mut self) -> io::Result<String> { |
| let mut buf = String::new(); |
| |
| match self.source_mut().read_to_string(&mut buf) { |
| Ok(_) => Ok(buf), |
| Err(err) => Err(err), |
| } |
| } |
| } |
| |
| impl<'a, M: ReadEntry> PrivReadEntry for &'a mut M { |
| type Source = M::Source; |
| |
| fn source_mut(&mut self) -> &mut M::Source { |
| (**self).source_mut() |
| } |
| |
| fn set_min_buf_size(&mut self, min_buf_size: usize) { |
| (**self).set_min_buf_size(min_buf_size) |
| } |
| |
| fn consume_boundary(&mut self) -> io::Result<bool> { |
| (**self).consume_boundary() |
| } |
| } |
| |
| /// Ternary result type returned by `ReadEntry::next_entry()`, |
| /// `Multipart::into_entry()` and `MultipartField::next_entry()`. |
| pub enum ReadEntryResult<M: ReadEntry, Entry = MultipartField<M>> { |
| /// The next entry was found. |
| Entry(Entry), |
| /// No more entries could be read. |
| End(M), |
| /// An error occurred. |
| Error(M, io::Error), |
| } |
| |
| impl<M: ReadEntry, Entry> ReadEntryResult<M, Entry> { |
| /// Convert `self` into `Result<Option<Entry>>` as follows: |
| /// |
| /// * `Entry(entry) -> Ok(Some(entry))` |
| /// * `End(_) -> Ok(None)` |
| /// * `Error(_, err) -> Err(err)` |
| pub fn into_result(self) -> io::Result<Option<Entry>> { |
| match self { |
| ReadEntryResult::Entry(entry) => Ok(Some(entry)), |
| ReadEntryResult::End(_) => Ok(None), |
| ReadEntryResult::Error(_, err) => Err(err), |
| } |
| } |
| |
| /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`. |
| pub fn unwrap(self) -> Entry { |
| self.expect_alt( |
| "`ReadEntryResult::unwrap()` called on `End` value", |
| "`ReadEntryResult::unwrap()` called on `Error` value: {:?}", |
| ) |
| } |
| |
| /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error` |
| /// with the given message. Adds the error's message in the `Error` case. |
| pub fn expect(self, msg: &str) -> Entry { |
| self.expect_alt(msg, msg) |
| } |
| |
| /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`. |
| /// If this is `End`, panics with `end_msg`; if `Error`, panics with `err_msg` |
| /// as well as the error's message. |
| pub fn expect_alt(self, end_msg: &str, err_msg: &str) -> Entry { |
| match self { |
| Entry(entry) => entry, |
| End(_) => panic!("{}", end_msg), |
| Error(_, err) => panic!("{}: {:?}", err_msg, err), |
| } |
| } |
| |
| /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case. |
| pub fn unwrap_opt(self) -> Option<Entry> { |
| self.expect_opt("`ReadEntryResult::unwrap_opt()` called on `Error` value") |
| } |
| |
| /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case |
| /// with the given message as well as the error's message. |
| pub fn expect_opt(self, msg: &str) -> Option<Entry> { |
| match self { |
| Entry(entry) => Some(entry), |
| End(_) => None, |
| Error(_, err) => panic!("{}: {:?}", msg, err), |
| } |
| } |
| } |
| |
| const GENERIC_PARSE_ERR: &str = "an error occurred while parsing field headers"; |
| |
| quick_error! { |
| #[derive(Debug)] |
| enum ParseHeaderError { |
| /// The `Content-Disposition` header was not found |
| MissingContentDisposition(headers: String) { |
| display(x) -> ("{}:\n{}", x.description(), headers) |
| description("\"Content-Disposition\" header not found in field headers") |
| } |
| InvalidContDisp(reason: &'static str, cause: String) { |
| display(x) -> ("{}: {}: {}", x.description(), reason, cause) |
| description("invalid \"Content-Disposition\" header") |
| } |
| /// The header was found but could not be parsed |
| TokenizeError(err: HttparseError) { |
| description(GENERIC_PARSE_ERR) |
| display(x) -> ("{}: {}", x.description(), err) |
| cause(err) |
| from() |
| } |
| MimeError(cont_type: String) { |
| description("Failed to parse Content-Type") |
| display(this) -> ("{}: {}", this.description(), cont_type) |
| } |
| TooLarge { |
| description("field headers section ridiculously long or missing trailing CRLF-CRLF") |
| } |
| /// IO error |
| Io(err: io::Error) { |
| description("an io error occurred while parsing the headers") |
| display(x) -> ("{}: {}", x.description(), err) |
| cause(err) |
| from() |
| } |
| } |
| } |
| |
| #[test] |
| fn test_find_header() { |
| let headers = [ |
| StrHeader { |
| name: "Content-Type", |
| val: "text/plain", |
| }, |
| StrHeader { |
| name: "Content-disposition", |
| val: "form-data", |
| }, |
| StrHeader { |
| name: "content-transfer-encoding", |
| val: "binary", |
| }, |
| ]; |
| |
| assert_eq!( |
| find_header(&headers, "Content-Type").unwrap().val, |
| "text/plain" |
| ); |
| assert_eq!( |
| find_header(&headers, "Content-Disposition").unwrap().val, |
| "form-data" |
| ); |
| assert_eq!( |
| find_header(&headers, "Content-Transfer-Encoding") |
| .unwrap() |
| .val, |
| "binary" |
| ); |
| } |