blob: 3d66eea57c6c5cfa592806f5fd4732ec74cccb01 [file] [log] [blame]
use std::fs::File;
use std::io::{self, BufRead, Seek};
use std::marker::PhantomData;
use std::path::Path;
use std::result;
use csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder};
use serde::de::DeserializeOwned;
use crate::byte_record::{ByteRecord, Position};
use crate::error::{Error, ErrorKind, Result, Utf8Error};
use crate::string_record::StringRecord;
use crate::{Terminator, Trim};
/// Builds a CSV reader with various configuration knobs.
///
/// This builder can be used to tweak the field delimiter, record terminator
/// and more. Once a CSV `Reader` is built, its configuration cannot be
/// changed.
#[derive(Debug)]
pub struct ReaderBuilder {
capacity: usize,
flexible: bool,
has_headers: bool,
trim: Trim,
/// The underlying CSV parser builder.
///
/// We explicitly put this on the heap because CoreReaderBuilder embeds an
/// entire DFA transition table, which along with other things, tallies up
/// to almost 500 bytes on the stack.
builder: Box<CoreReaderBuilder>,
}
impl Default for ReaderBuilder {
fn default() -> ReaderBuilder {
ReaderBuilder {
capacity: 8 * (1 << 10),
flexible: false,
has_headers: true,
trim: Trim::default(),
builder: Box::new(CoreReaderBuilder::default()),
}
}
}
impl ReaderBuilder {
/// Create a new builder for configuring CSV parsing.
///
/// To convert a builder into a reader, call one of the methods starting
/// with `from_`.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::{ReaderBuilder, StringRecord};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// Concord,United States,42695
/// ";
/// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
///
/// let records = rdr
/// .records()
/// .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
/// assert_eq!(records, vec![
/// vec!["Boston", "United States", "4628910"],
/// vec!["Concord", "United States", "42695"],
/// ]);
/// Ok(())
/// }
/// ```
pub fn new() -> ReaderBuilder {
ReaderBuilder::default()
}
/// Build a CSV parser from this configuration that reads data from the
/// given file path.
///
/// If there was a problem opening the file at the given path, then this
/// returns the corresponding error.
///
/// # Example
///
/// ```no_run
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let mut rdr = ReaderBuilder::new().from_path("foo.csv")?;
/// for result in rdr.records() {
/// let record = result?;
/// println!("{:?}", record);
/// }
/// Ok(())
/// }
/// ```
pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
Ok(Reader::new(self, File::open(path)?))
}
/// Build a CSV parser from this configuration that reads data from `rdr`.
///
/// Note that the CSV reader is buffered automatically, so you should not
/// wrap `rdr` in a buffered reader like `io::BufReader`.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// Concord,United States,42695
/// ";
/// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
/// for result in rdr.records() {
/// let record = result?;
/// println!("{:?}", record);
/// }
/// Ok(())
/// }
/// ```
pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> {
Reader::new(self, rdr)
}
/// The field delimiter to use when parsing CSV.
///
/// The default is `b','`.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city;country;pop
/// Boston;United States;4628910
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .delimiter(b';')
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder {
self.builder.delimiter(delimiter);
self
}
/// Whether to treat the first row as a special header row.
///
/// By default, the first row is treated as a special header row, which
/// means the header is never returned by any of the record reading methods
/// or iterators. When this is disabled (`yes` set to `false`), the first
/// row is not treated specially.
///
/// Note that the `headers` and `byte_headers` methods are unaffected by
/// whether this is set. Those methods always return the first record.
///
/// # Example
///
/// This example shows what happens when `has_headers` is disabled.
/// Namely, the first row is treated just like any other row.
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .has_headers(false)
/// .from_reader(data.as_bytes());
/// let mut iter = rdr.records();
///
/// // Read the first record.
/// if let Some(result) = iter.next() {
/// let record = result?;
/// assert_eq!(record, vec!["city", "country", "pop"]);
/// } else {
/// return Err(From::from(
/// "expected at least two records but got none"));
/// }
///
/// // Read the second record.
/// if let Some(result) = iter.next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// } else {
/// return Err(From::from(
/// "expected at least two records but got one"))
/// }
/// Ok(())
/// }
/// ```
pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder {
self.has_headers = yes;
self
}
/// Whether the number of fields in records is allowed to change or not.
///
/// When disabled (which is the default), parsing CSV data will return an
/// error if a record is found with a number of fields different from the
/// number of fields in a previous record.
///
/// When enabled, this error checking is turned off.
///
/// # Example: flexible records enabled
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// // Notice that the first row is missing the population count.
/// let data = "\
/// city,country,pop
/// Boston,United States
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .flexible(true)
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
///
/// # Example: flexible records disabled
///
/// This shows the error that appears when records of unequal length
/// are found and flexible records have been disabled (which is the
/// default).
///
/// ```
/// use std::error::Error;
/// use csv::{ErrorKind, ReaderBuilder};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// // Notice that the first row is missing the population count.
/// let data = "\
/// city,country,pop
/// Boston,United States
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .flexible(false)
/// .from_reader(data.as_bytes());
///
/// if let Some(Err(err)) = rdr.records().next() {
/// match *err.kind() {
/// ErrorKind::UnequalLengths { expected_len, len, .. } => {
/// // The header row has 3 fields...
/// assert_eq!(expected_len, 3);
/// // ... but the first row has only 2 fields.
/// assert_eq!(len, 2);
/// Ok(())
/// }
/// ref wrong => {
/// Err(From::from(format!(
/// "expected UnequalLengths error but got {:?}",
/// wrong)))
/// }
/// }
/// } else {
/// Err(From::from(
/// "expected at least one errored record but got none"))
/// }
/// }
/// ```
pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder {
self.flexible = yes;
self
}
/// Whether fields are trimmed of leading and trailing whitespace or not.
///
/// By default, no trimming is performed. This method permits one to
/// override that behavior and choose one of the following options:
///
/// 1. `Trim::Headers` trims only header values.
/// 2. `Trim::Fields` trims only non-header or "field" values.
/// 3. `Trim::All` trims both header and non-header values.
///
/// A value is only interpreted as a header value if this CSV reader is
/// configured to read a header record (which is the default).
///
/// When reading string records, characters meeting the definition of
/// Unicode whitespace are trimmed. When reading byte records, characters
/// meeting the definition of ASCII whitespace are trimmed. ASCII
/// whitespace characters correspond to the set `[\t\n\v\f\r ]`.
///
/// # Example
///
/// This example shows what happens when all values are trimmed.
///
/// ```
/// use std::error::Error;
/// use csv::{ReaderBuilder, StringRecord, Trim};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city , country , pop
/// Boston,\"
/// United States\",4628910
/// Concord, United States ,42695
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .trim(Trim::All)
/// .from_reader(data.as_bytes());
/// let records = rdr
/// .records()
/// .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
/// assert_eq!(records, vec![
/// vec!["Boston", "United States", "4628910"],
/// vec!["Concord", "United States", "42695"],
/// ]);
/// Ok(())
/// }
/// ```
pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder {
self.trim = trim;
self
}
/// The record terminator to use when parsing CSV.
///
/// A record terminator can be any single byte. The default is a special
/// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n`
/// or `\r\n` as a single record terminator.
///
/// # Example: `$` as a record terminator
///
/// ```
/// use std::error::Error;
/// use csv::{ReaderBuilder, Terminator};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "city,country,pop$Boston,United States,4628910";
/// let mut rdr = ReaderBuilder::new()
/// .terminator(Terminator::Any(b'$'))
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder {
self.builder.terminator(term.to_core());
self
}
/// The quote character to use when parsing CSV.
///
/// The default is `b'"'`.
///
/// # Example: single quotes instead of double quotes
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,'United States',4628910
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .quote(b'\'')
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder {
self.builder.quote(quote);
self
}
/// The escape character to use when parsing CSV.
///
/// In some variants of CSV, quotes are escaped using a special escape
/// character like `\` (instead of escaping quotes by doubling them).
///
/// By default, recognizing these idiosyncratic escapes is disabled.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,\"The \\\"United\\\" States\",4628910
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .escape(Some(b'\\'))
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec![
/// "Boston", "The \"United\" States", "4628910",
/// ]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder {
self.builder.escape(escape);
self
}
/// Enable double quote escapes.
///
/// This is enabled by default, but it may be disabled. When disabled,
/// doubled quotes are not interpreted as escapes.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,\"The \"\"United\"\" States\",4628910
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .double_quote(false)
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec![
/// "Boston", "The \"United\"\" States\"", "4628910",
/// ]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder {
self.builder.double_quote(yes);
self
}
/// Enable or disable quoting.
///
/// This is enabled by default, but it may be disabled. When disabled,
/// quotes are not treated specially.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,\"The United States,4628910
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .quoting(false)
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec![
/// "Boston", "\"The United States", "4628910",
/// ]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder {
self.builder.quoting(yes);
self
}
/// The comment character to use when parsing CSV.
///
/// If the start of a record begins with the byte given here, then that
/// line is ignored by the CSV parser.
///
/// This is disabled by default.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// #Concord,United States,42695
/// Boston,United States,4628910
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .comment(Some(b'#'))
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder {
self.builder.comment(comment);
self
}
/// A convenience method for specifying a configuration to read ASCII
/// delimited text.
///
/// This sets the delimiter and record terminator to the ASCII unit
/// separator (`\x1F`) and record separator (`\x1E`), respectively.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910";
/// let mut rdr = ReaderBuilder::new()
/// .ascii()
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn ascii(&mut self) -> &mut ReaderBuilder {
self.builder.ascii();
self
}
/// Set the capacity (in bytes) of the buffer used in the CSV reader.
/// This defaults to a reasonable setting.
pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder {
self.capacity = capacity;
self
}
/// Enable or disable the NFA for parsing CSV.
///
/// This is intended to be a debug option. The NFA is always slower than
/// the DFA.
#[doc(hidden)]
pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder {
self.builder.nfa(yes);
self
}
}
/// A already configured CSV reader.
///
/// A CSV reader takes as input CSV data and transforms that into standard Rust
/// values. The most flexible way to read CSV data is as a sequence of records,
/// where a record is a sequence of fields and each field is a string. However,
/// a reader can also deserialize CSV data into Rust types like `i64` or
/// `(String, f64, f64, f64)` or even a custom struct automatically using
/// Serde.
///
/// # Configuration
///
/// A CSV reader has a couple convenient constructor methods like `from_path`
/// and `from_reader`. However, if you want to configure the CSV reader to use
/// a different delimiter or quote character (among many other things), then
/// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct
/// a `Reader`. For example, to change the field delimiter:
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city;country;pop
/// Boston;United States;4628910
/// ";
/// let mut rdr = ReaderBuilder::new()
/// .delimiter(b';')
/// .from_reader(data.as_bytes());
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
///
/// # Error handling
///
/// In general, CSV *parsing* does not ever return an error. That is, there is
/// no such thing as malformed CSV data. Instead, this reader will prioritize
/// finding a parse over rejecting CSV data that it does not understand. This
/// choice was inspired by other popular CSV parsers, but also because it is
/// pragmatic. CSV data varies wildly, so even if the CSV data is malformed,
/// it might still be possible to work with the data. In the land of CSV, there
/// is no "right" or "wrong," only "right" and "less right."
///
/// With that said, a number of errors can occur while reading CSV data:
///
/// * By default, all records in CSV data must have the same number of fields.
/// If a record is found with a different number of fields than a prior
/// record, then an error is returned. This behavior can be disabled by
/// enabling flexible parsing via the `flexible` method on
/// [`ReaderBuilder`](struct.ReaderBuilder.html).
/// * When reading CSV data from a resource (like a file), it is possible for
/// reading from the underlying resource to fail. This will return an error.
/// For subsequent calls to the `Reader` after encountering a such error
/// (unless `seek` is used), it will behave as if end of file had been
/// reached, in order to avoid running into infinite loops when still
/// attempting to read the next record when one has errored.
/// * When reading CSV data into `String` or `&str` fields (e.g., via a
/// [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly
/// enforced. If CSV data is invalid UTF-8, then an error is returned. If
/// you want to read invalid UTF-8, then you should use the byte oriented
/// APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit
/// support for another encoding entirely, then you'll need to use another
/// crate to transcode your CSV data to UTF-8 before parsing it.
/// * When using Serde to deserialize CSV data into Rust types, it is possible
/// for a number of additional errors to occur. For example, deserializing
/// a field `xyz` into an `i32` field will result in an error.
///
/// For more details on the precise semantics of errors, see the
/// [`Error`](enum.Error.html) type.
#[derive(Debug)]
pub struct Reader<R> {
/// The underlying CSV parser.
///
/// We explicitly put this on the heap because CoreReader embeds an entire
/// DFA transition table, which along with other things, tallies up to
/// almost 500 bytes on the stack.
core: Box<CoreReader>,
/// The underlying reader.
rdr: io::BufReader<R>,
/// Various state tracking.
///
/// There is more state embedded in the `CoreReader`.
state: ReaderState,
}
#[derive(Debug)]
struct ReaderState {
/// When set, this contains the first row of any parsed CSV data.
///
/// This is always populated, regardless of whether `has_headers` is set.
headers: Option<Headers>,
/// When set, the first row of parsed CSV data is excluded from things
/// that read records, like iterators and `read_record`.
has_headers: bool,
/// When set, there is no restriction on the length of records. When not
/// set, every record must have the same number of fields, or else an error
/// is reported.
flexible: bool,
trim: Trim,
/// The number of fields in the first record parsed.
first_field_count: Option<u64>,
/// The current position of the parser.
///
/// Note that this position is only observable by callers at the start
/// of a record. More granular positions are not supported.
cur_pos: Position,
/// Whether the first record has been read or not.
first: bool,
/// Whether the reader has been seeked or not.
seeked: bool,
/// Whether EOF of the underlying reader has been reached or not.
///
/// IO errors on the underlying reader will be considered as an EOF for
/// subsequent read attempts, as it would be incorrect to keep on trying
/// to read when the underlying reader has broken.
///
/// For clarity, having the best `Debug` impl and in case they need to be
/// treated differently at some point, we store whether the `EOF` is
/// considered because an actual EOF happened, or because we encoundered
/// an IO error.
/// This has no additional runtime cost.
eof: ReaderEofState,
}
/// Whether EOF of the underlying reader has been reached or not.
///
/// IO errors on the underlying reader will be considered as an EOF for
/// subsequent read attempts, as it would be incorrect to keep on trying
/// to read when the underlying reader has broken.
///
/// For clarity, having the best `Debug` impl and in case they need to be
/// treated differently at some point, we store whether the `EOF` is
/// considered because an actual EOF happened, or because we encoundered
/// an IO error
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ReaderEofState {
NotEof,
Eof,
IOError,
}
/// Headers encapsulates any data associated with the headers of CSV data.
///
/// The headers always correspond to the first row.
#[derive(Debug)]
struct Headers {
/// The header, as raw bytes.
byte_record: ByteRecord,
/// The header, as valid UTF-8 (or a UTF-8 error).
string_record: result::Result<StringRecord, Utf8Error>,
}
impl Reader<Reader<File>> {
/// Create a new CSV parser with a default configuration for the given
/// file path.
///
/// To customize CSV parsing, use a `ReaderBuilder`.
///
/// # Example
///
/// ```no_run
/// use std::error::Error;
/// use csv::Reader;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let mut rdr = Reader::from_path("foo.csv")?;
/// for result in rdr.records() {
/// let record = result?;
/// println!("{:?}", record);
/// }
/// Ok(())
/// }
/// ```
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> {
ReaderBuilder::new().from_path(path)
}
}
impl<R: io::Read> Reader<R> {
/// Create a new CSV reader given a builder and a source of underlying
/// bytes.
fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> {
Reader {
core: Box::new(builder.builder.build()),
rdr: io::BufReader::with_capacity(builder.capacity, rdr),
state: ReaderState {
headers: None,
has_headers: builder.has_headers,
flexible: builder.flexible,
trim: builder.trim,
first_field_count: None,
cur_pos: Position::new(),
first: false,
seeked: false,
eof: ReaderEofState::NotEof,
},
}
}
/// Create a new CSV parser with a default configuration for the given
/// reader.
///
/// To customize CSV parsing, use a `ReaderBuilder`.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::Reader;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// Concord,United States,42695
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
/// for result in rdr.records() {
/// let record = result?;
/// println!("{:?}", record);
/// }
/// Ok(())
/// }
/// ```
pub fn from_reader(rdr: R) -> Reader<R> {
ReaderBuilder::new().from_reader(rdr)
}
/// Returns a borrowed iterator over deserialized records.
///
/// Each item yielded by this iterator is a `Result<D, Error>`.
/// Therefore, in order to access the record, callers must handle the
/// possibility of error (typically with `try!` or `?`).
///
/// If `has_headers` was enabled via a `ReaderBuilder` (which is the
/// default), then this does not include the first record. Additionally,
/// if `has_headers` is enabled, then deserializing into a struct will
/// automatically align the values in each row to the fields of a struct
/// based on the header row.
///
/// # Example
///
/// This shows how to deserialize CSV data into normal Rust structs. The
/// fields of the header row are used to match up the values in each row
/// to the fields of the struct.
///
/// ```
/// use std::error::Error;
///
/// use csv::Reader;
/// use serde::Deserialize;
///
/// #[derive(Debug, Deserialize, Eq, PartialEq)]
/// struct Row {
/// city: String,
/// country: String,
/// #[serde(rename = "popcount")]
/// population: u64,
/// }
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,popcount
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
/// let mut iter = rdr.deserialize();
///
/// if let Some(result) = iter.next() {
/// let record: Row = result?;
/// assert_eq!(record, Row {
/// city: "Boston".to_string(),
/// country: "United States".to_string(),
/// population: 4628910,
/// });
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
///
/// # Rules
///
/// For the most part, any Rust type that maps straight-forwardly to a CSV
/// record is supported. This includes maps, structs, tuples and tuple
/// structs. Other Rust types, such as `Vec`s, arrays, and enums have
/// a more complicated story. In general, when working with CSV data, one
/// should avoid *nested sequences* as much as possible.
///
/// Maps, structs, tuples and tuple structs map to CSV records in a simple
/// way. Tuples and tuple structs decode their fields in the order that
/// they are defined. Structs will do the same only if `has_headers` has
/// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html),
/// otherwise, structs and maps are deserialized based on the fields
/// defined in the header row. (If there is no header row, then
/// deserializing into a map will result in an error.)
///
/// Nested sequences are supported in a limited capacity. Namely, they
/// are flattened. As a result, it's often useful to use a `Vec` to capture
/// a "tail" of fields in a record:
///
/// ```
/// use std::error::Error;
///
/// use csv::ReaderBuilder;
/// use serde::Deserialize;
///
/// #[derive(Debug, Deserialize, Eq, PartialEq)]
/// struct Row {
/// label: String,
/// values: Vec<i32>,
/// }
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "foo,1,2,3";
/// let mut rdr = ReaderBuilder::new()
/// .has_headers(false)
/// .from_reader(data.as_bytes());
/// let mut iter = rdr.deserialize();
///
/// if let Some(result) = iter.next() {
/// let record: Row = result?;
/// assert_eq!(record, Row {
/// label: "foo".to_string(),
/// values: vec![1, 2, 3],
/// });
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
///
/// In the above example, adding another field to the `Row` struct after
/// the `values` field will result in a deserialization error. This is
/// because the deserializer doesn't know when to stop reading fields
/// into the `values` vector, so it will consume the rest of the fields in
/// the record leaving none left over for the additional field.
///
/// Finally, simple enums in Rust can be deserialized as well. Namely,
/// enums must either be variants with no arguments or variants with a
/// single argument. Variants with no arguments are deserialized based on
/// which variant name the field matches. Variants with a single argument
/// are deserialized based on which variant can store the data. The latter
/// is only supported when using "untagged" enum deserialization. The
/// following example shows both forms in action:
///
/// ```
/// use std::error::Error;
///
/// use csv::Reader;
/// use serde::Deserialize;
///
/// #[derive(Debug, Deserialize, PartialEq)]
/// struct Row {
/// label: Label,
/// value: Number,
/// }
///
/// #[derive(Debug, Deserialize, PartialEq)]
/// #[serde(rename_all = "lowercase")]
/// enum Label {
/// Celsius,
/// Fahrenheit,
/// }
///
/// #[derive(Debug, Deserialize, PartialEq)]
/// #[serde(untagged)]
/// enum Number {
/// Integer(i64),
/// Float(f64),
/// }
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// label,value
/// celsius,22.2222
/// fahrenheit,72
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
/// let mut iter = rdr.deserialize();
///
/// // Read the first record.
/// if let Some(result) = iter.next() {
/// let record: Row = result?;
/// assert_eq!(record, Row {
/// label: Label::Celsius,
/// value: Number::Float(22.2222),
/// });
/// } else {
/// return Err(From::from(
/// "expected at least two records but got none"));
/// }
///
/// // Read the second record.
/// if let Some(result) = iter.next() {
/// let record: Row = result?;
/// assert_eq!(record, Row {
/// label: Label::Fahrenheit,
/// value: Number::Integer(72),
/// });
/// Ok(())
/// } else {
/// Err(From::from(
/// "expected at least two records but got only one"))
/// }
/// }
/// ```
pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D>
where
D: DeserializeOwned,
{
DeserializeRecordsIter::new(self)
}
/// Returns an owned iterator over deserialized records.
///
/// Each item yielded by this iterator is a `Result<D, Error>`.
/// Therefore, in order to access the record, callers must handle the
/// possibility of error (typically with `try!` or `?`).
///
/// This is mostly useful when you want to return a CSV iterator or store
/// it somewhere.
///
/// If `has_headers` was enabled via a `ReaderBuilder` (which is the
/// default), then this does not include the first record. Additionally,
/// if `has_headers` is enabled, then deserializing into a struct will
/// automatically align the values in each row to the fields of a struct
/// based on the header row.
///
/// For more detailed deserialization rules, see the documentation on the
/// `deserialize` method.
///
/// # Example
///
/// ```
/// use std::error::Error;
///
/// use csv::Reader;
/// use serde::Deserialize;
///
/// #[derive(Debug, Deserialize, Eq, PartialEq)]
/// struct Row {
/// city: String,
/// country: String,
/// #[serde(rename = "popcount")]
/// population: u64,
/// }
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,popcount
/// Boston,United States,4628910
/// ";
/// let rdr = Reader::from_reader(data.as_bytes());
/// let mut iter = rdr.into_deserialize();
///
/// if let Some(result) = iter.next() {
/// let record: Row = result?;
/// assert_eq!(record, Row {
/// city: "Boston".to_string(),
/// country: "United States".to_string(),
/// population: 4628910,
/// });
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D>
where
D: DeserializeOwned,
{
DeserializeRecordsIntoIter::new(self)
}
/// Returns a borrowed iterator over all records as strings.
///
/// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
/// Therefore, in order to access the record, callers must handle the
/// possibility of error (typically with `try!` or `?`).
///
/// If `has_headers` was enabled via a `ReaderBuilder` (which is the
/// default), then this does not include the first record.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::Reader;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
/// let mut iter = rdr.records();
///
/// if let Some(result) = iter.next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn records(&mut self) -> StringRecordsIter<R> {
StringRecordsIter::new(self)
}
/// Returns an owned iterator over all records as strings.
///
/// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
/// Therefore, in order to access the record, callers must handle the
/// possibility of error (typically with `try!` or `?`).
///
/// This is mostly useful when you want to return a CSV iterator or store
/// it somewhere.
///
/// If `has_headers` was enabled via a `ReaderBuilder` (which is the
/// default), then this does not include the first record.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::Reader;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let rdr = Reader::from_reader(data.as_bytes());
/// let mut iter = rdr.into_records();
///
/// if let Some(result) = iter.next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn into_records(self) -> StringRecordsIntoIter<R> {
StringRecordsIntoIter::new(self)
}
/// Returns a borrowed iterator over all records as raw bytes.
///
/// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
/// Therefore, in order to access the record, callers must handle the
/// possibility of error (typically with `try!` or `?`).
///
/// If `has_headers` was enabled via a `ReaderBuilder` (which is the
/// default), then this does not include the first record.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::Reader;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
/// let mut iter = rdr.byte_records();
///
/// if let Some(result) = iter.next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn byte_records(&mut self) -> ByteRecordsIter<R> {
ByteRecordsIter::new(self)
}
/// Returns an owned iterator over all records as raw bytes.
///
/// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
/// Therefore, in order to access the record, callers must handle the
/// possibility of error (typically with `try!` or `?`).
///
/// This is mostly useful when you want to return a CSV iterator or store
/// it somewhere.
///
/// If `has_headers` was enabled via a `ReaderBuilder` (which is the
/// default), then this does not include the first record.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::Reader;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let rdr = Reader::from_reader(data.as_bytes());
/// let mut iter = rdr.into_byte_records();
///
/// if let Some(result) = iter.next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> {
ByteRecordsIntoIter::new(self)
}
/// Returns a reference to the first row read by this parser.
///
/// If no row has been read yet, then this will force parsing of the first
/// row.
///
/// If there was a problem parsing the row or if it wasn't valid UTF-8,
/// then this returns an error.
///
/// If the underlying reader emits EOF before any data, then this returns
/// an empty record.
///
/// Note that this method may be used regardless of whether `has_headers`
/// was enabled (but it is enabled by default).
///
/// # Example
///
/// This example shows how to get the header row of CSV data. Notice that
/// the header row does not appear as a record in the iterator!
///
/// ```
/// use std::error::Error;
/// use csv::Reader;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
///
/// // We can read the headers before iterating.
/// {
/// // `headers` borrows from the reader, so we put this in its
/// // own scope. That way, the borrow ends before we try iterating
/// // below. Alternatively, we could clone the headers.
/// let headers = rdr.headers()?;
/// assert_eq!(headers, vec!["city", "country", "pop"]);
/// }
///
/// if let Some(result) = rdr.records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// } else {
/// return Err(From::from(
/// "expected at least one record but got none"))
/// }
///
/// // We can also read the headers after iterating.
/// let headers = rdr.headers()?;
/// assert_eq!(headers, vec!["city", "country", "pop"]);
/// Ok(())
/// }
/// ```
pub fn headers(&mut self) -> Result<&StringRecord> {
if self.state.headers.is_none() {
let mut record = ByteRecord::new();
self.read_byte_record_impl(&mut record)?;
self.set_headers_impl(Err(record));
}
let headers = self.state.headers.as_ref().unwrap();
match headers.string_record {
Ok(ref record) => Ok(record),
Err(ref err) => Err(Error::new(ErrorKind::Utf8 {
pos: headers.byte_record.position().map(Clone::clone),
err: err.clone(),
})),
}
}
/// Returns a reference to the first row read by this parser as raw bytes.
///
/// If no row has been read yet, then this will force parsing of the first
/// row.
///
/// If there was a problem parsing the row then this returns an error.
///
/// If the underlying reader emits EOF before any data, then this returns
/// an empty record.
///
/// Note that this method may be used regardless of whether `has_headers`
/// was enabled (but it is enabled by default).
///
/// # Example
///
/// This example shows how to get the header row of CSV data. Notice that
/// the header row does not appear as a record in the iterator!
///
/// ```
/// use std::error::Error;
/// use csv::Reader;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
///
/// // We can read the headers before iterating.
/// {
/// // `headers` borrows from the reader, so we put this in its
/// // own scope. That way, the borrow ends before we try iterating
/// // below. Alternatively, we could clone the headers.
/// let headers = rdr.byte_headers()?;
/// assert_eq!(headers, vec!["city", "country", "pop"]);
/// }
///
/// if let Some(result) = rdr.byte_records().next() {
/// let record = result?;
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// } else {
/// return Err(From::from(
/// "expected at least one record but got none"))
/// }
///
/// // We can also read the headers after iterating.
/// let headers = rdr.byte_headers()?;
/// assert_eq!(headers, vec!["city", "country", "pop"]);
/// Ok(())
/// }
/// ```
pub fn byte_headers(&mut self) -> Result<&ByteRecord> {
if self.state.headers.is_none() {
let mut record = ByteRecord::new();
self.read_byte_record_impl(&mut record)?;
self.set_headers_impl(Err(record));
}
Ok(&self.state.headers.as_ref().unwrap().byte_record)
}
/// Set the headers of this CSV parser manually.
///
/// This overrides any other setting (including `set_byte_headers`). Any
/// automatic detection of headers is disabled. This may be called at any
/// time.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::{Reader, StringRecord};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
///
/// assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]);
/// rdr.set_headers(StringRecord::from(vec!["a", "b", "c"]));
/// assert_eq!(rdr.headers()?, vec!["a", "b", "c"]);
///
/// Ok(())
/// }
/// ```
pub fn set_headers(&mut self, headers: StringRecord) {
self.set_headers_impl(Ok(headers));
}
/// Set the headers of this CSV parser manually as raw bytes.
///
/// This overrides any other setting (including `set_headers`). Any
/// automatic detection of headers is disabled. This may be called at any
/// time.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::{Reader, ByteRecord};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
///
/// assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]);
/// rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"]));
/// assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]);
///
/// Ok(())
/// }
/// ```
pub fn set_byte_headers(&mut self, headers: ByteRecord) {
self.set_headers_impl(Err(headers));
}
fn set_headers_impl(
&mut self,
headers: result::Result<StringRecord, ByteRecord>,
) {
// If we have string headers, then get byte headers. But if we have
// byte headers, then get the string headers (or a UTF-8 error).
let (mut str_headers, mut byte_headers) = match headers {
Ok(string) => {
let bytes = string.clone().into_byte_record();
(Ok(string), bytes)
}
Err(bytes) => {
match StringRecord::from_byte_record(bytes.clone()) {
Ok(str_headers) => (Ok(str_headers), bytes),
Err(err) => (Err(err.utf8_error().clone()), bytes),
}
}
};
if self.state.trim.should_trim_headers() {
if let Ok(ref mut str_headers) = str_headers.as_mut() {
str_headers.trim();
}
byte_headers.trim();
}
self.state.headers = Some(Headers {
byte_record: byte_headers,
string_record: str_headers,
});
}
/// Read a single row into the given record. Returns false when no more
/// records could be read.
///
/// If `has_headers` was enabled via a `ReaderBuilder` (which is the
/// default), then this will never read the first record.
///
/// This method is useful when you want to read records as fast as
/// as possible. It's less ergonomic than an iterator, but it permits the
/// caller to reuse the `StringRecord` allocation, which usually results
/// in higher throughput.
///
/// Records read via this method are guaranteed to have a position set
/// on them, even if the reader is at EOF or if an error is returned.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::{Reader, StringRecord};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
/// let mut record = StringRecord::new();
///
/// if rdr.read_record(&mut record)? {
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> {
let result = record.read(self);
// We need to trim again because trimming string records includes
// Unicode whitespace. (ByteRecord trimming only includes ASCII
// whitespace.)
if self.state.trim.should_trim_fields() {
record.trim();
}
result
}
/// Read a single row into the given byte record. Returns false when no
/// more records could be read.
///
/// If `has_headers` was enabled via a `ReaderBuilder` (which is the
/// default), then this will never read the first record.
///
/// This method is useful when you want to read records as fast as
/// as possible. It's less ergonomic than an iterator, but it permits the
/// caller to reuse the `ByteRecord` allocation, which usually results
/// in higher throughput.
///
/// Records read via this method are guaranteed to have a position set
/// on them, even if the reader is at EOF or if an error is returned.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::{ByteRecord, Reader};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,pop
/// Boston,United States,4628910
/// ";
/// let mut rdr = Reader::from_reader(data.as_bytes());
/// let mut record = ByteRecord::new();
///
/// if rdr.read_byte_record(&mut record)? {
/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn read_byte_record(
&mut self,
record: &mut ByteRecord,
) -> Result<bool> {
if !self.state.seeked && !self.state.has_headers && !self.state.first {
// If the caller indicated "no headers" and we haven't yielded the
// first record yet, then we should yield our header row if we have
// one.
if let Some(ref headers) = self.state.headers {
self.state.first = true;
record.clone_from(&headers.byte_record);
if self.state.trim.should_trim_fields() {
record.trim();
}
return Ok(!record.is_empty());
}
}
let ok = self.read_byte_record_impl(record)?;
self.state.first = true;
if !self.state.seeked && self.state.headers.is_none() {
self.set_headers_impl(Err(record.clone()));
// If the end user indicated that we have headers, then we should
// never return the first row. Instead, we should attempt to
// read and return the next one.
if self.state.has_headers {
let result = self.read_byte_record_impl(record);
if self.state.trim.should_trim_fields() {
record.trim();
}
return result;
}
} else if self.state.trim.should_trim_fields() {
record.trim();
}
Ok(ok)
}
/// Read a byte record from the underlying CSV reader, without accounting
/// for headers.
#[inline(always)]
fn read_byte_record_impl(
&mut self,
record: &mut ByteRecord,
) -> Result<bool> {
use csv_core::ReadRecordResult::*;
record.clear();
record.set_position(Some(self.state.cur_pos.clone()));
if self.state.eof != ReaderEofState::NotEof {
return Ok(false);
}
let (mut outlen, mut endlen) = (0, 0);
loop {
let (res, nin, nout, nend) = {
let input_res = self.rdr.fill_buf();
if input_res.is_err() {
self.state.eof = ReaderEofState::IOError;
}
let input = input_res?;
let (fields, ends) = record.as_parts();
self.core.read_record(
input,
&mut fields[outlen..],
&mut ends[endlen..],
)
};
self.rdr.consume(nin);
let byte = self.state.cur_pos.byte();
self.state
.cur_pos
.set_byte(byte + nin as u64)
.set_line(self.core.line());
outlen += nout;
endlen += nend;
match res {
InputEmpty => continue,
OutputFull => {
record.expand_fields();
continue;
}
OutputEndsFull => {
record.expand_ends();
continue;
}
Record => {
record.set_len(endlen);
self.state.add_record(record)?;
return Ok(true);
}
End => {
self.state.eof = ReaderEofState::Eof;
return Ok(false);
}
}
}
}
/// Return the current position of this CSV reader.
///
/// The byte offset in the position returned can be used to `seek` this
/// reader. In particular, seeking to a position returned here on the same
/// data will result in parsing the same subsequent record.
///
/// # Example: reading the position
///
/// ```
/// use std::error::Error;
/// use std::io;
/// use csv::{Reader, Position};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,popcount
/// Boston,United States,4628910
/// Concord,United States,42695
/// ";
/// let rdr = Reader::from_reader(io::Cursor::new(data));
/// let mut iter = rdr.into_records();
/// let mut pos = Position::new();
/// loop {
/// // Read the position immediately before each record.
/// let next_pos = iter.reader().position().clone();
/// if iter.next().is_none() {
/// break;
/// }
/// pos = next_pos;
/// }
///
/// // `pos` should now be the position immediately before the last
/// // record.
/// assert_eq!(pos.byte(), 51);
/// assert_eq!(pos.line(), 3);
/// assert_eq!(pos.record(), 2);
/// Ok(())
/// }
/// ```
pub fn position(&self) -> &Position {
&self.state.cur_pos
}
/// Returns true if and only if this reader has been exhausted.
///
/// When this returns true, no more records can be read from this reader
/// (unless it has been seeked to another position).
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use std::io;
/// use csv::{Reader, Position};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,popcount
/// Boston,United States,4628910
/// Concord,United States,42695
/// ";
/// let mut rdr = Reader::from_reader(io::Cursor::new(data));
/// assert!(!rdr.is_done());
/// for result in rdr.records() {
/// let _ = result?;
/// }
/// assert!(rdr.is_done());
/// Ok(())
/// }
/// ```
pub fn is_done(&self) -> bool {
self.state.eof != ReaderEofState::NotEof
}
/// Returns true if and only if this reader has been configured to
/// interpret the first record as a header record.
pub fn has_headers(&self) -> bool {
self.state.has_headers
}
/// Returns a reference to the underlying reader.
pub fn get_ref(&self) -> &R {
self.rdr.get_ref()
}
/// Returns a mutable reference to the underlying reader.
pub fn get_mut(&mut self) -> &mut R {
self.rdr.get_mut()
}
/// Unwraps this CSV reader, returning the underlying reader.
///
/// Note that any leftover data inside this reader's internal buffer is
/// lost.
pub fn into_inner(self) -> R {
self.rdr.into_inner()
}
}
impl<R: io::Read + io::Seek> Reader<R> {
/// Seeks the underlying reader to the position given.
///
/// This comes with a few caveats:
///
/// * Any internal buffer associated with this reader is cleared.
/// * If the given position does not correspond to a position immediately
/// before the start of a record, then the behavior of this reader is
/// unspecified.
/// * Any special logic that skips the first record in the CSV reader
/// when reading or iterating over records is disabled.
///
/// If the given position has a byte offset equivalent to the current
/// position, then no seeking is performed.
///
/// If the header row has not already been read, then this will attempt
/// to read the header row before seeking. Therefore, it is possible that
/// this returns an error associated with reading CSV data.
///
/// Note that seeking is performed based only on the byte offset in the
/// given position. Namely, the record or line numbers in the position may
/// be incorrect, but this will cause any future position generated by
/// this CSV reader to be similarly incorrect.
///
/// # Example: seek to parse a record twice
///
/// ```
/// use std::error::Error;
/// use std::io;
/// use csv::{Reader, Position};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// city,country,popcount
/// Boston,United States,4628910
/// Concord,United States,42695
/// ";
/// let rdr = Reader::from_reader(io::Cursor::new(data));
/// let mut iter = rdr.into_records();
/// let mut pos = Position::new();
/// loop {
/// // Read the position immediately before each record.
/// let next_pos = iter.reader().position().clone();
/// if iter.next().is_none() {
/// break;
/// }
/// pos = next_pos;
/// }
///
/// // Now seek the reader back to `pos`. This will let us read the
/// // last record again.
/// iter.reader_mut().seek(pos)?;
/// let mut iter = iter.into_reader().into_records();
/// if let Some(result) = iter.next() {
/// let record = result?;
/// assert_eq!(record, vec!["Concord", "United States", "42695"]);
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn seek(&mut self, pos: Position) -> Result<()> {
self.byte_headers()?;
self.state.seeked = true;
if pos.byte() == self.state.cur_pos.byte() {
return Ok(());
}
self.rdr.seek(io::SeekFrom::Start(pos.byte()))?;
self.core.reset();
self.core.set_line(pos.line());
self.state.cur_pos = pos;
self.state.eof = ReaderEofState::NotEof;
Ok(())
}
/// This is like `seek`, but provides direct control over how the seeking
/// operation is performed via `io::SeekFrom`.
///
/// The `pos` position given *should* correspond the position indicated
/// by `seek_from`, but there is no requirement. If the `pos` position
/// given is incorrect, then the position information returned by this
/// reader will be similarly incorrect.
///
/// If the header row has not already been read, then this will attempt
/// to read the header row before seeking. Therefore, it is possible that
/// this returns an error associated with reading CSV data.
///
/// Unlike `seek`, this will always cause an actual seek to be performed.
pub fn seek_raw(
&mut self,
seek_from: io::SeekFrom,
pos: Position,
) -> Result<()> {
self.byte_headers()?;
self.state.seeked = true;
self.rdr.seek(seek_from)?;
self.core.reset();
self.core.set_line(pos.line());
self.state.cur_pos = pos;
self.state.eof = ReaderEofState::NotEof;
Ok(())
}
}
impl ReaderState {
#[inline(always)]
fn add_record(&mut self, record: &ByteRecord) -> Result<()> {
let i = self.cur_pos.record();
self.cur_pos.set_record(i.checked_add(1).unwrap());
if !self.flexible {
match self.first_field_count {
None => self.first_field_count = Some(record.len() as u64),
Some(expected) => {
if record.len() as u64 != expected {
return Err(Error::new(ErrorKind::UnequalLengths {
pos: record.position().map(Clone::clone),
expected_len: expected,
len: record.len() as u64,
}));
}
}
}
}
Ok(())
}
}
/// An owned iterator over deserialized records.
///
/// The type parameter `R` refers to the underlying `io::Read` type, and `D`
/// refers to the type that this iterator will deserialize a record into.
pub struct DeserializeRecordsIntoIter<R, D> {
rdr: Reader<R>,
rec: StringRecord,
headers: Option<StringRecord>,
_priv: PhantomData<D>,
}
impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> {
fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> {
let headers = if !rdr.state.has_headers {
None
} else {
rdr.headers().ok().map(Clone::clone)
};
DeserializeRecordsIntoIter {
rdr: rdr,
rec: StringRecord::new(),
headers: headers,
_priv: PhantomData,
}
}
/// Return a reference to the underlying CSV reader.
pub fn reader(&self) -> &Reader<R> {
&self.rdr
}
/// Return a mutable reference to the underlying CSV reader.
pub fn reader_mut(&mut self) -> &mut Reader<R> {
&mut self.rdr
}
/// Drop this iterator and return the underlying CSV reader.
pub fn into_reader(self) -> Reader<R> {
self.rdr
}
}
impl<R: io::Read, D: DeserializeOwned> Iterator
for DeserializeRecordsIntoIter<R, D>
{
type Item = Result<D>;
fn next(&mut self) -> Option<Result<D>> {
match self.rdr.read_record(&mut self.rec) {
Err(err) => Some(Err(err)),
Ok(false) => None,
Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
}
}
}
/// A borrowed iterator over deserialized records.
///
/// The lifetime parameter `'r` refers to the lifetime of the underlying
/// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read`
/// type, and `D` refers to the type that this iterator will deserialize a
/// record into.
pub struct DeserializeRecordsIter<'r, R: 'r, D> {
rdr: &'r mut Reader<R>,
rec: StringRecord,
headers: Option<StringRecord>,
_priv: PhantomData<D>,
}
impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> {
fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> {
let headers = if !rdr.state.has_headers {
None
} else {
rdr.headers().ok().map(Clone::clone)
};
DeserializeRecordsIter {
rdr: rdr,
rec: StringRecord::new(),
headers: headers,
_priv: PhantomData,
}
}
/// Return a reference to the underlying CSV reader.
pub fn reader(&self) -> &Reader<R> {
&self.rdr
}
/// Return a mutable reference to the underlying CSV reader.
pub fn reader_mut(&mut self) -> &mut Reader<R> {
&mut self.rdr
}
}
impl<'r, R: io::Read, D: DeserializeOwned> Iterator
for DeserializeRecordsIter<'r, R, D>
{
type Item = Result<D>;
fn next(&mut self) -> Option<Result<D>> {
match self.rdr.read_record(&mut self.rec) {
Err(err) => Some(Err(err)),
Ok(false) => None,
Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
}
}
}
/// An owned iterator over records as strings.
pub struct StringRecordsIntoIter<R> {
rdr: Reader<R>,
rec: StringRecord,
}
impl<R: io::Read> StringRecordsIntoIter<R> {
fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> {
StringRecordsIntoIter { rdr: rdr, rec: StringRecord::new() }
}
/// Return a reference to the underlying CSV reader.
pub fn reader(&self) -> &Reader<R> {
&self.rdr
}
/// Return a mutable reference to the underlying CSV reader.
pub fn reader_mut(&mut self) -> &mut Reader<R> {
&mut self.rdr
}
/// Drop this iterator and return the underlying CSV reader.
pub fn into_reader(self) -> Reader<R> {
self.rdr
}
}
impl<R: io::Read> Iterator for StringRecordsIntoIter<R> {
type Item = Result<StringRecord>;
fn next(&mut self) -> Option<Result<StringRecord>> {
match self.rdr.read_record(&mut self.rec) {
Err(err) => Some(Err(err)),
Ok(true) => Some(Ok(self.rec.clone_truncated())),
Ok(false) => None,
}
}
}
/// A borrowed iterator over records as strings.
///
/// The lifetime parameter `'r` refers to the lifetime of the underlying
/// CSV `Reader`.
pub struct StringRecordsIter<'r, R: 'r> {
rdr: &'r mut Reader<R>,
rec: StringRecord,
}
impl<'r, R: io::Read> StringRecordsIter<'r, R> {
fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> {
StringRecordsIter { rdr: rdr, rec: StringRecord::new() }
}
/// Return a reference to the underlying CSV reader.
pub fn reader(&self) -> &Reader<R> {
&self.rdr
}
/// Return a mutable reference to the underlying CSV reader.
pub fn reader_mut(&mut self) -> &mut Reader<R> {
&mut self.rdr
}
}
impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> {
type Item = Result<StringRecord>;
fn next(&mut self) -> Option<Result<StringRecord>> {
match self.rdr.read_record(&mut self.rec) {
Err(err) => Some(Err(err)),
Ok(true) => Some(Ok(self.rec.clone_truncated())),
Ok(false) => None,
}
}
}
/// An owned iterator over records as raw bytes.
pub struct ByteRecordsIntoIter<R> {
rdr: Reader<R>,
rec: ByteRecord,
}
impl<R: io::Read> ByteRecordsIntoIter<R> {
fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> {
ByteRecordsIntoIter { rdr: rdr, rec: ByteRecord::new() }
}
/// Return a reference to the underlying CSV reader.
pub fn reader(&self) -> &Reader<R> {
&self.rdr
}
/// Return a mutable reference to the underlying CSV reader.
pub fn reader_mut(&mut self) -> &mut Reader<R> {
&mut self.rdr
}
/// Drop this iterator and return the underlying CSV reader.
pub fn into_reader(self) -> Reader<R> {
self.rdr
}
}
impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> {
type Item = Result<ByteRecord>;
fn next(&mut self) -> Option<Result<ByteRecord>> {
match self.rdr.read_byte_record(&mut self.rec) {
Err(err) => Some(Err(err)),
Ok(true) => Some(Ok(self.rec.clone_truncated())),
Ok(false) => None,
}
}
}
/// A borrowed iterator over records as raw bytes.
///
/// The lifetime parameter `'r` refers to the lifetime of the underlying
/// CSV `Reader`.
pub struct ByteRecordsIter<'r, R: 'r> {
rdr: &'r mut Reader<R>,
rec: ByteRecord,
}
impl<'r, R: io::Read> ByteRecordsIter<'r, R> {
fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> {
ByteRecordsIter { rdr: rdr, rec: ByteRecord::new() }
}
/// Return a reference to the underlying CSV reader.
pub fn reader(&self) -> &Reader<R> {
&self.rdr
}
/// Return a mutable reference to the underlying CSV reader.
pub fn reader_mut(&mut self) -> &mut Reader<R> {
&mut self.rdr
}
}
impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> {
type Item = Result<ByteRecord>;
fn next(&mut self) -> Option<Result<ByteRecord>> {
match self.rdr.read_byte_record(&mut self.rec) {
Err(err) => Some(Err(err)),
Ok(true) => Some(Ok(self.rec.clone_truncated())),
Ok(false) => None,
}
}
}
#[cfg(test)]
mod tests {
use std::io;
use crate::byte_record::ByteRecord;
use crate::error::ErrorKind;
use crate::string_record::StringRecord;
use super::{Position, ReaderBuilder, Trim};
fn b(s: &str) -> &[u8] {
s.as_bytes()
}
fn s(b: &[u8]) -> &str {
::std::str::from_utf8(b).unwrap()
}
fn newpos(byte: u64, line: u64, record: u64) -> Position {
let mut p = Position::new();
p.set_byte(byte).set_line(line).set_record(record);
p
}
#[test]
fn read_byte_record() {
let data = b("foo,\"b,ar\",baz\nabc,mno,xyz");
let mut rdr =
ReaderBuilder::new().has_headers(false).from_reader(data);
let mut rec = ByteRecord::new();
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("foo", s(&rec[0]));
assert_eq!("b,ar", s(&rec[1]));
assert_eq!("baz", s(&rec[2]));
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("abc", s(&rec[0]));
assert_eq!("mno", s(&rec[1]));
assert_eq!("xyz", s(&rec[2]));
assert!(!rdr.read_byte_record(&mut rec).unwrap());
}
#[test]
fn read_trimmed_records_and_headers() {
let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t");
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.trim(Trim::All)
.from_reader(data);
let mut rec = ByteRecord::new();
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!("1", s(&rec[0]));
assert_eq!("2", s(&rec[1]));
assert_eq!("3", s(&rec[2]));
let mut rec = StringRecord::new();
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!("1", &rec[0]);
assert_eq!("", &rec[1]);
assert_eq!("3", &rec[2]);
{
let headers = rdr.headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!("foo", &headers[0]);
assert_eq!("bar", &headers[1]);
assert_eq!("baz", &headers[2]);
}
}
#[test]
fn read_trimmed_header() {
let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t");
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.trim(Trim::Headers)
.from_reader(data);
let mut rec = ByteRecord::new();
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!(" 1", s(&rec[0]));
assert_eq!(" 2", s(&rec[1]));
assert_eq!(" 3", s(&rec[2]));
{
let headers = rdr.headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!("foo", &headers[0]);
assert_eq!("bar", &headers[1]);
assert_eq!("baz", &headers[2]);
}
}
#[test]
fn read_trimed_header_invalid_utf8() {
let data = &b"foo, b\xFFar,\tbaz\na,b,c\nd,e,f"[..];
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.trim(Trim::Headers)
.from_reader(data);
let mut rec = StringRecord::new();
// force the headers to be read
let _ = rdr.read_record(&mut rec);
// Check the byte headers are trimmed
{
let headers = rdr.byte_headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!(b"foo", &headers[0]);
assert_eq!(b"b\xFFar", &headers[1]);
assert_eq!(b"baz", &headers[2]);
}
match *rdr.headers().unwrap_err().kind() {
ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
assert_eq!(pos, &newpos(0, 1, 0));
assert_eq!(err.field(), 1);
assert_eq!(err.valid_up_to(), 3);
}
ref err => panic!("match failed, got {:?}", err),
}
}
#[test]
fn read_trimmed_records() {
let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t");
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.trim(Trim::Fields)
.from_reader(data);
let mut rec = ByteRecord::new();
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!("1", s(&rec[0]));
assert_eq!("2", s(&rec[1]));
assert_eq!("3", s(&rec[2]));
{
let headers = rdr.headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!("foo", &headers[0]);
assert_eq!(" bar", &headers[1]);
assert_eq!("\tbaz", &headers[2]);
}
}
#[test]
fn read_record_unequal_fails() {
let data = b("foo\nbar,baz");
let mut rdr =
ReaderBuilder::new().has_headers(false).from_reader(data);
let mut rec = ByteRecord::new();
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!(1, rec.len());
assert_eq!("foo", s(&rec[0]));
match rdr.read_byte_record(&mut rec) {
Err(err) => match *err.kind() {
ErrorKind::UnequalLengths {
expected_len: 1,
ref pos,
len: 2,
} => {
assert_eq!(pos, &Some(newpos(4, 2, 1)));
}
ref wrong => panic!("match failed, got {:?}", wrong),
},
wrong => panic!("match failed, got {:?}", wrong),
}
}
#[test]
fn read_record_unequal_ok() {
let data = b("foo\nbar,baz");
let mut rdr = ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(data);
let mut rec = ByteRecord::new();
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!(1, rec.len());
assert_eq!("foo", s(&rec[0]));
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!(2, rec.len());
assert_eq!("bar", s(&rec[0]));
assert_eq!("baz", s(&rec[1]));
assert!(!rdr.read_byte_record(&mut rec).unwrap());
}
// This tests that even if we get a CSV error, we can continue reading
// if we want.
#[test]
fn read_record_unequal_continue() {
let data = b("foo\nbar,baz\nquux");
let mut rdr =
ReaderBuilder::new().has_headers(false).from_reader(data);
let mut rec = ByteRecord::new();
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!(1, rec.len());
assert_eq!("foo", s(&rec[0]));
match rdr.read_byte_record(&mut rec) {
Err(err) => match err.kind() {
&ErrorKind::UnequalLengths {
expected_len: 1,
ref pos,
len: 2,
} => {
assert_eq!(pos, &Some(newpos(4, 2, 1)));
}
wrong => panic!("match failed, got {:?}", wrong),
},
wrong => panic!("match failed, got {:?}", wrong),
}
assert!(rdr.read_byte_record(&mut rec).unwrap());
assert_eq!(1, rec.len());
assert_eq!("quux", s(&rec[0]));
assert!(!rdr.read_byte_record(&mut rec).unwrap());
}
#[test]
fn read_record_headers() {
let data = b("foo,bar,baz\na,b,c\nd,e,f");
let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
let mut rec = StringRecord::new();
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("a", &rec[0]);
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("d", &rec[0]);
assert!(!rdr.read_record(&mut rec).unwrap());
{
let headers = rdr.byte_headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!(b"foo", &headers[0]);
assert_eq!(b"bar", &headers[1]);
assert_eq!(b"baz", &headers[2]);
}
{
let headers = rdr.headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!("foo", &headers[0]);
assert_eq!("bar", &headers[1]);
assert_eq!("baz", &headers[2]);
}
}
#[test]
fn read_record_headers_invalid_utf8() {
let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..];
let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
let mut rec = StringRecord::new();
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("a", &rec[0]);
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("d", &rec[0]);
assert!(!rdr.read_record(&mut rec).unwrap());
// Check that we can read the headers as raw bytes, but that
// if we read them as strings, we get an appropriate UTF-8 error.
{
let headers = rdr.byte_headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!(b"foo", &headers[0]);
assert_eq!(b"b\xFFar", &headers[1]);
assert_eq!(b"baz", &headers[2]);
}
match *rdr.headers().unwrap_err().kind() {
ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
assert_eq!(pos, &newpos(0, 1, 0));
assert_eq!(err.field(), 1);
assert_eq!(err.valid_up_to(), 1);
}
ref err => panic!("match failed, got {:?}", err),
}
}
#[test]
fn read_record_no_headers_before() {
let data = b("foo,bar,baz\na,b,c\nd,e,f");
let mut rdr =
ReaderBuilder::new().has_headers(false).from_reader(data);
let mut rec = StringRecord::new();
{
let headers = rdr.headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!("foo", &headers[0]);
assert_eq!("bar", &headers[1]);
assert_eq!("baz", &headers[2]);
}
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("foo", &rec[0]);
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("a", &rec[0]);
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("d", &rec[0]);
assert!(!rdr.read_record(&mut rec).unwrap());
}
#[test]
fn read_record_no_headers_after() {
let data = b("foo,bar,baz\na,b,c\nd,e,f");
let mut rdr =
ReaderBuilder::new().has_headers(false).from_reader(data);
let mut rec = StringRecord::new();
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("foo", &rec[0]);
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("a", &rec[0]);
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("d", &rec[0]);
assert!(!rdr.read_record(&mut rec).unwrap());
let headers = rdr.headers().unwrap();
assert_eq!(3, headers.len());
assert_eq!("foo", &headers[0]);
assert_eq!("bar", &headers[1]);
assert_eq!("baz", &headers[2]);
}
#[test]
fn seek() {
let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
rdr.seek(newpos(18, 3, 2)).unwrap();
let mut rec = StringRecord::new();
assert_eq!(18, rdr.position().byte());
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("d", &rec[0]);
assert_eq!(24, rdr.position().byte());
assert_eq!(4, rdr.position().line());
assert_eq!(3, rdr.position().record());
assert!(rdr.read_record(&mut rec).unwrap());
assert_eq!(3, rec.len());
assert_eq!("g", &rec[0]);
assert!(!rdr.read_record(&mut rec).unwrap());
}
// Test that we can read headers after seeking even if the headers weren't
// explicit read before seeking.
#[test]
fn seek_headers_after() {
let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
rdr.seek(newpos(18, 3, 2)).unwrap();
assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]);
}
// Test that we can read headers after seeking if the headers were read
// before seeking.
#[test]
fn seek_headers_before_after() {
let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
let headers = rdr.headers().unwrap().clone();
rdr.seek(newpos(18, 3, 2)).unwrap();
assert_eq!(&headers, rdr.headers().unwrap());
}
// Test that even if we didn't read headers before seeking, if we seek to
// the current byte offset, then no seeking is done and therefore we can
// still read headers after seeking.
#[test]
fn seek_headers_no_actual_seek() {
let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
rdr.seek(Position::new()).unwrap();
assert_eq!("foo", &rdr.headers().unwrap()[0]);
}
// Test that position info is reported correctly in absence of headers.
#[test]
fn positions_no_headers() {
let mut rdr = ReaderBuilder::new()
.has_headers(false)
.from_reader("a,b,c\nx,y,z".as_bytes())
.into_records();
let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
assert_eq!(pos.byte(), 0);
assert_eq!(pos.line(), 1);
assert_eq!(pos.record(), 0);
let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
assert_eq!(pos.byte(), 6);
assert_eq!(pos.line(), 2);
assert_eq!(pos.record(), 1);
}
// Test that position info is reported correctly with headers.
#[test]
fn positions_headers() {
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.from_reader("a,b,c\nx,y,z".as_bytes())
.into_records();
let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
assert_eq!(pos.byte(), 6);
assert_eq!(pos.line(), 2);
assert_eq!(pos.record(), 1);
}
// Test that reading headers on empty data yields an empty record.
#[test]
fn headers_on_empty_data() {
let mut rdr = ReaderBuilder::new().from_reader("".as_bytes());
let r = rdr.byte_headers().unwrap();
assert_eq!(r.len(), 0);
}
// Test that reading the first record on empty data works.
#[test]
fn no_headers_on_empty_data() {
let mut rdr =
ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
assert_eq!(rdr.records().count(), 0);
}
// Test that reading the first record on empty data works, even if
// we've tried to read headers before hand.
#[test]
fn no_headers_on_empty_data_after_headers() {
let mut rdr =
ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
assert_eq!(rdr.headers().unwrap().len(), 0);
assert_eq!(rdr.records().count(), 0);
}
}