webvtt/webvttparser.cc - platform/external/libwebm - Git at Google

 // Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 //
 // Use of this source code is governed by a BSD-style license
 // that can be found in the LICENSE file in the root of the source
 // tree. An additional intellectual property rights grant can be found
 // in the file PATENTS.  All contributing project authors may
 // be found in the AUTHORS file in the root of the source tree.

 #include "webvttparser.h"

 #include <ctype.h>

 #include <climits>
 #include <cstddef>

 namespace libwebvtt {

 // NOLINT'ing this enum because clang-format puts it in a single line which
 // makes it look really unreadable.
 enum {
   kNUL = '\x00',
   kSPACE = ' ',
   kTAB = '\x09',
   kLF = '\x0A',
   kCR = '\x0D'
 };  // NOLINT

 Reader::~Reader() {}

 LineReader::~LineReader() {}

 int LineReader::GetLine(std::string* line_ptr) {
   if (line_ptr == NULL)
     return -1;

   std::string& ln = *line_ptr;
   ln.clear();

   // Consume characters from the stream, until we
   // reach end-of-line (or end-of-stream).

   // The WebVTT spec states that lines may be
   // terminated in any of these three ways:
   //  LF
   //  CR
   //  CR LF

   // We interrogate each character as we read it from the stream.
   // If we detect an end-of-line character, we consume the full
   // end-of-line indication, and we're done; otherwise, accumulate
   // the character and repeat.

   for (;;) {
     char c;
     const int e = GetChar(&c);

     if (e < 0)  // error
       return e;

     if (e > 0)  // EOF
       return (ln.empty()) ? 1 : 0;

     // We have a character, so we must first determine
     // whether we have reached end-of-line.

     if (c == kLF)
       return 0;  // handle the easy end-of-line case immediately

     if (c == kCR)
       break;  // handle the hard end-of-line case outside of loop

     if (c == '\xFE' || c == '\xFF')  // not UTF-8
       return -1;

     // To defend against pathological or malicious streams, we
     // cap the line length at some arbitrarily-large value:
     enum { kMaxLineLength = 10000 };  // arbitrary

     if (ln.length() >= kMaxLineLength)
       return -1;

     // We don't have an end-of-line character, so accumulate
     // the character in our line buffer.
     ln.push_back(c);
   }

   // We detected a CR.  We must interrogate the next character
   // in the stream, to determine whether we have a LF (which
   // would make it part of this same line).

   char c;
   const int e = GetChar(&c);

   if (e < 0)  // error
     return e;

   if (e > 0)  // EOF
     return 0;

   // If next character in the stream is not a LF, return it
   // to the stream (because it's part of the next line).
   if (c != kLF)
     UngetChar(c);

   return 0;
 }

 Parser::Parser(Reader* r) : reader_(r), unget_(-1) {}

 Parser::~Parser() {}

 int Parser::Init() {
   int e = ParseBOM();

   if (e < 0)  // error
     return e;

   if (e > 0)  // EOF
     return -1;

   // Parse "WEBVTT".  We read from the stream one character at-a-time, in
   // order to defend against non-WebVTT streams (e.g. binary files) that don't
   // happen to comprise lines of text demarcated with line terminators.

   const char kId[] = "WEBVTT";

   for (const char* p = kId; *p; ++p) {
     char c;
     e = GetChar(&c);

     if (e < 0)  // error
       return e;

     if (e > 0)  // EOF
       return -1;

     if (c != *p)
       return -1;
   }

   std::string line;

   e = GetLine(&line);

   if (e < 0)  // error
     return e;

   if (e > 0)  // EOF
     return 0;  // weird but valid

   if (!line.empty()) {
     // Parse optional characters that follow "WEBVTT"

     const char c = line[0];

     if (c != kSPACE && c != kTAB)
       return -1;
   }

   // The WebVTT spec requires that the "WEBVTT" line
   // be followed by an empty line (to separate it from
   // first cue).

   e = GetLine(&line);

   if (e < 0)  // error
     return e;

   if (e > 0)  // EOF
     return 0;  // weird but we allow it

   if (!line.empty())
     return -1;

   return 0;  // success
 }

 int Parser::Parse(Cue* cue) {
   if (cue == NULL)
     return -1;

   // Parse first non-blank line

   std::string line;
   int e;

   for (;;) {
     e = GetLine(&line);

     if (e)  // EOF is OK here
       return e;

     if (!line.empty())
       break;
   }

   // A WebVTT cue comprises an optional cue identifier line followed
   // by a (non-optional) timings line.  You determine whether you have
   // a timings line by scanning for the arrow token, the lexeme of which
   // may not appear in the cue identifier line.

   const char kArrow[] = "-->";
   std::string::size_type arrow_pos = line.find(kArrow);

   if (arrow_pos != std::string::npos) {
     // We found a timings line, which implies that we don't have a cue
     // identifier.

     cue->identifier.clear();
   } else {
     // We did not find a timings line, so we assume that we have a cue
     // identifier line, and then try again to find the cue timings on
     // the next line.

     cue->identifier.swap(line);

     e = GetLine(&line);

     if (e < 0)  // error
       return e;

     if (e > 0)  // EOF
       return -1;

     arrow_pos = line.find(kArrow);

     if (arrow_pos == std::string::npos)  // not a timings line
       return -1;
   }

   e = ParseTimingsLine(&line, arrow_pos, &cue->start_time, &cue->stop_time,
                        &cue->settings);

   if (e)  // error
     return e;

   // The cue payload comprises all the non-empty
   // lines that follow the timings line.

   Cue::payload_t& p = cue->payload;
   p.clear();

   for (;;) {
     e = GetLine(&line);

     if (e < 0)  // error
       return e;

     if (line.empty())
       break;

     p.push_back(line);
   }

   if (p.empty())
     return -1;

   return 0;  // success
 }

 int Parser::GetChar(char* c) {
   if (unget_ >= 0) {
     *c = static_cast<char>(unget_);
     unget_ = -1;
     return 0;
   }

   return reader_->GetChar(c);
 }

 void Parser::UngetChar(char c) { unget_ = static_cast<unsigned char>(c); }

 int Parser::ParseBOM() {
   // Explanation of UTF-8 BOM:
   // http://en.wikipedia.org/wiki/Byte_order_mark

   static const char BOM[] = "\xEF\xBB\xBF";  // UTF-8 BOM

   for (int i = 0; i < 3; ++i) {
     char c;
     int e = GetChar(&c);

     if (e < 0)  // error
       return e;

     if (e > 0)  // EOF
       return 1;

     if (c != BOM[i]) {
       if (i == 0) {  // we don't have a BOM
         UngetChar(c);
         return 0;  // success
       }

       // We started a BOM, so we must finish the BOM.
       return -1;  // error
     }
   }

   return 0;  // success
 }

 int Parser::ParseTimingsLine(std::string* line_ptr,
                              std::string::size_type arrow_pos, Time* start_time,
                              Time* stop_time, Cue::settings_t* settings) {
   if (line_ptr == NULL)
     return -1;

   std::string& line = *line_ptr;

   if (arrow_pos == std::string::npos || arrow_pos >= line.length())
     return -1;

   // Place a NUL character at the start of the arrow token, in
   // order to demarcate the start time from remainder of line.
   line[arrow_pos] = kNUL;
   std::string::size_type idx = 0;

   int e = ParseTime(line, &idx, start_time);
   if (e)  // error
     return e;

   // Detect any junk that follows the start time,
   // but precedes the arrow symbol.

   while (char c = line[idx]) {
     if (c != kSPACE && c != kTAB)
       return -1;
     ++idx;
   }

   // Place a NUL character at the end of the line,
   // so the scanner has a place to stop, and begin
   // the scan just beyond the arrow token.

   line.push_back(kNUL);
   idx = arrow_pos + 3;

   e = ParseTime(line, &idx, stop_time);
   if (e)  // error
     return e;

   e = ParseSettings(line, idx, settings);
   if (e)  // error
     return e;

   return 0;  // success
 }

 int Parser::ParseTime(const std::string& line, std::string::size_type* idx_ptr,
                       Time* time) {
   if (idx_ptr == NULL)
     return -1;

   std::string::size_type& idx = *idx_ptr;

   if (idx == std::string::npos || idx >= line.length())
     return -1;

   if (time == NULL)
     return -1;

   // Consume any whitespace that precedes the timestamp.

   while (char c = line[idx]) {
     if (c != kSPACE && c != kTAB)
       break;
     ++idx;
   }

   // WebVTT timestamp syntax comes in three flavors:
   //  SS[.sss]
   //  MM:SS[.sss]
   //  HH:MM:SS[.sss]

   // Parse a generic number value.  We don't know which component
   // of the time we have yet, until we do more parsing.

   int val = ParseNumber(line, &idx);

   if (val < 0)  // error
     return val;

   Time& t = *time;

   // The presence of a colon character indicates that we have
   // an [HH:]MM:SS style syntax.

   if (line[idx] == ':') {
     // We have either HH:MM:SS or MM:SS

     // The value we just parsed is either the hours or minutes.
     // It must be followed by another number value (that is
     // either minutes or seconds).

     const int first_val = val;

     ++idx;  // consume colon

     // Parse second value

     val = ParseNumber(line, &idx);

     if (val < 0)
       return val;

     if (val >= 60)  // either MM or SS
       return -1;

     if (line[idx] == ':') {
       // We have HH:MM:SS

       t.hours = first_val;
       t.minutes = val;  // vetted above

       ++idx;  // consume MM:SS colon

       // We have parsed the hours and minutes.
       // We must now parse the seconds.

       val = ParseNumber(line, &idx);

       if (val < 0)
         return val;

       if (val >= 60)  // SS part of HH:MM:SS
         return -1;

       t.seconds = val;
     } else {
       // We have MM:SS

       // The implication here is that the hour value was omitted
       // from the timestamp (because it was 0).

       if (first_val >= 60)  // minutes
         return -1;

       t.hours = 0;
       t.minutes = first_val;
       t.seconds = val;  // vetted above
     }
   } else {
     // We have SS (only)

     // The time is expressed as total number of seconds,
     // so the seconds value has no upper bound.

     t.seconds = val;

     // Convert SS to HH:MM:SS

     t.minutes = t.seconds / 60;
     t.seconds -= t.minutes * 60;

     t.hours = t.minutes / 60;
     t.minutes -= t.hours * 60;
   }

   // We have parsed the hours, minutes, and seconds.
   // We must now parse the milliseconds.

   char c = line[idx];

   // TODO(matthewjheaney): one option here is to slightly relax the
   // syntax rules for WebVTT timestamps, to permit the comma character
   // to also be used as the seconds/milliseconds separator.  This
   // would handle streams that use localization conventions for
   // countries in Western Europe.  For now we obey the rules specified
   // in the WebVTT spec (allow "full stop" only).

   const bool have_milliseconds = (c == '.');

   if (!have_milliseconds) {
     t.milliseconds = 0;
   } else {
     ++idx;  // consume FULL STOP

     val = ParseNumber(line, &idx);

     if (val < 0)
       return val;

     if (val >= 1000)
       return -1;

     if (val < 10)
       t.milliseconds = val * 100;
     else if (val < 100)
       t.milliseconds = val * 10;
     else
       t.milliseconds = val;
   }

   // We have parsed the time proper.  We must check for any
   // junk that immediately follows the time specifier.

   c = line[idx];

   if (c != kNUL && c != kSPACE && c != kTAB)
     return -1;

   return 0;  // success
 }

 int Parser::ParseSettings(const std::string& line, std::string::size_type idx,
                           Cue::settings_t* settings) {
   settings->clear();

   if (idx == std::string::npos || idx >= line.length())
     return -1;

   for (;;) {
     // We must parse a line comprising a sequence of 0 or more
     // NAME:VALUE pairs, separated by whitespace.  The line iself is
     // terminated with a NUL char (indicating end-of-line).

     for (;;) {
       const char c = line[idx];

       if (c == kNUL)  // end-of-line
         return 0;  // success

       if (c != kSPACE && c != kTAB)
         break;

       ++idx;  // consume whitespace
     }

     // We have consumed the whitespace, and have not yet reached
     // end-of-line, so there is something on the line for us to parse.

     settings->push_back(Setting());
     Setting& s = settings->back();

     // Parse the NAME part of the settings pair.

     for (;;) {
       const char c = line[idx];

       if (c == ':')  // we have reached end of NAME part
         break;

       if (c == kNUL || c == kSPACE || c == kTAB)
         return -1;

       s.name.push_back(c);

       ++idx;
     }

     if (s.name.empty())
       return -1;

     ++idx;  // consume colon

     // Parse the VALUE part of the settings pair.

     for (;;) {
       const char c = line[idx];

       if (c == kNUL || c == kSPACE || c == kTAB)
         break;

       if (c == ':')  // suspicious when part of VALUE
         return -1;  // TODO(matthewjheaney): verify this behavior

       s.value.push_back(c);

       ++idx;
     }

     if (s.value.empty())
       return -1;
   }
 }

 int Parser::ParseNumber(const std::string& line,
                         std::string::size_type* idx_ptr) {
   if (idx_ptr == NULL)
     return -1;

   std::string::size_type& idx = *idx_ptr;

   if (idx == std::string::npos || idx >= line.length())
     return -1;

   if (!isdigit(line[idx]))
     return -1;

   int result = 0;

   while (isdigit(line[idx])) {
     const char c = line[idx];
     const int i = c - '0';

     if (result > INT_MAX / 10)
       return -1;

     result *= 10;

     if (result > INT_MAX - i)
       return -1;

     result += i;

     ++idx;
   }

   return result;
 }

 bool Time::operator==(const Time& rhs) const {
   if (hours != rhs.hours)
     return false;

   if (minutes != rhs.minutes)
     return false;

   if (seconds != rhs.seconds)
     return false;

   return (milliseconds == rhs.milliseconds);
 }

 bool Time::operator<(const Time& rhs) const {
   if (hours < rhs.hours)
     return true;

   if (hours > rhs.hours)
     return false;

   if (minutes < rhs.minutes)
     return true;

   if (minutes > rhs.minutes)
     return false;

   if (seconds < rhs.seconds)
     return true;

   if (seconds > rhs.seconds)
     return false;

   return (milliseconds < rhs.milliseconds);
 }

 bool Time::operator>(const Time& rhs) const { return rhs.operator<(*this); }

 bool Time::operator<=(const Time& rhs) const { return !this->operator>(rhs); }

 bool Time::operator>=(const Time& rhs) const { return !this->operator<(rhs); }

 presentation_t Time::presentation() const {
   const presentation_t h = 1000LL * 3600LL * presentation_t(hours);
   const presentation_t m = 1000LL * 60LL * presentation_t(minutes);
   const presentation_t s = 1000LL * presentation_t(seconds);
   const presentation_t result = h + m + s + milliseconds;
   return result;
 }

 Time& Time::presentation(presentation_t d) {
   if (d < 0) {  // error
     hours = 0;
     minutes = 0;
     seconds = 0;
     milliseconds = 0;

     return *this;
   }

   seconds = static_cast<int>(d / 1000);
   milliseconds = static_cast<int>(d - 1000 * seconds);

   minutes = seconds / 60;
   seconds -= 60 * minutes;

   hours = minutes / 60;
   minutes -= 60 * hours;

   return *this;
 }

 Time& Time::operator+=(presentation_t rhs) {
   const presentation_t d = this->presentation();
   const presentation_t dd = d + rhs;
   this->presentation(dd);
   return *this;
 }

 Time Time::operator+(presentation_t d) const {
   Time t(*this);
   t += d;
   return t;
 }

 Time& Time::operator-=(presentation_t d) { return this->operator+=(-d); }

 presentation_t Time::operator-(const Time& t) const {
   const presentation_t rhs = t.presentation();
   const presentation_t lhs = this->presentation();
   const presentation_t result = lhs - rhs;
   return result;
 }

 }  // namespace libwebvtt
	// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
	//
	// Use of this source code is governed by a BSD-style license
	// that can be found in the LICENSE file in the root of the source
	// tree. An additional intellectual property rights grant can be found
	// in the file PATENTS. All contributing project authors may
	// be found in the AUTHORS file in the root of the source tree.

	#include "webvttparser.h"

	#include <ctype.h>

	#include <climits>
	#include <cstddef>

	namespace libwebvtt {

	// NOLINT'ing this enum because clang-format puts it in a single line which
	// makes it look really unreadable.
	enum {
	kNUL = '\x00',
	kSPACE = ' ',
	kTAB = '\x09',
	kLF = '\x0A',
	kCR = '\x0D'
	}; // NOLINT

	Reader::~Reader() {}

	LineReader::~LineReader() {}

	int LineReader::GetLine(std::string* line_ptr) {
	if (line_ptr == NULL)
	return -1;

	std::string& ln = *line_ptr;
	ln.clear();

	// Consume characters from the stream, until we
	// reach end-of-line (or end-of-stream).

	// The WebVTT spec states that lines may be
	// terminated in any of these three ways:
	// LF
	// CR
	// CR LF

	// We interrogate each character as we read it from the stream.
	// If we detect an end-of-line character, we consume the full
	// end-of-line indication, and we're done; otherwise, accumulate
	// the character and repeat.

	for (;;) {
	char c;
	const int e = GetChar(&c);

	if (e < 0) // error
	return e;

	if (e > 0) // EOF
	return (ln.empty()) ? 1 : 0;

	// We have a character, so we must first determine
	// whether we have reached end-of-line.

	if (c == kLF)
	return 0; // handle the easy end-of-line case immediately

	if (c == kCR)
	break; // handle the hard end-of-line case outside of loop

	if (c == '\xFE' \|\| c == '\xFF') // not UTF-8
	return -1;

	// To defend against pathological or malicious streams, we
	// cap the line length at some arbitrarily-large value:
	enum { kMaxLineLength = 10000 }; // arbitrary

	if (ln.length() >= kMaxLineLength)
	return -1;

	// We don't have an end-of-line character, so accumulate
	// the character in our line buffer.
	ln.push_back(c);
	}

	// We detected a CR. We must interrogate the next character
	// in the stream, to determine whether we have a LF (which
	// would make it part of this same line).

	char c;
	const int e = GetChar(&c);

	if (e < 0) // error
	return e;

	if (e > 0) // EOF
	return 0;

	// If next character in the stream is not a LF, return it
	// to the stream (because it's part of the next line).
	if (c != kLF)
	UngetChar(c);

	return 0;
	}

	Parser::Parser(Reader* r) : reader_(r), unget_(-1) {}

	Parser::~Parser() {}

	int Parser::Init() {
	int e = ParseBOM();

	if (e < 0) // error
	return e;

	if (e > 0) // EOF
	return -1;

	// Parse "WEBVTT". We read from the stream one character at-a-time, in
	// order to defend against non-WebVTT streams (e.g. binary files) that don't
	// happen to comprise lines of text demarcated with line terminators.

	const char kId[] = "WEBVTT";

	for (const char* p = kId; *p; ++p) {
	char c;
	e = GetChar(&c);

	if (e < 0) // error
	return e;

	if (e > 0) // EOF
	return -1;

	if (c != *p)
	return -1;
	}

	std::string line;

	e = GetLine(&line);

	if (e < 0) // error
	return e;

	if (e > 0) // EOF
	return 0; // weird but valid

	if (!line.empty()) {
	// Parse optional characters that follow "WEBVTT"

	const char c = line[0];

	if (c != kSPACE && c != kTAB)
	return -1;
	}

	// The WebVTT spec requires that the "WEBVTT" line
	// be followed by an empty line (to separate it from
	// first cue).

	e = GetLine(&line);

	if (e < 0) // error
	return e;

	if (e > 0) // EOF
	return 0; // weird but we allow it

	if (!line.empty())
	return -1;

	return 0; // success
	}

	int Parser::Parse(Cue* cue) {
	if (cue == NULL)
	return -1;

	// Parse first non-blank line

	std::string line;
	int e;

	for (;;) {
	e = GetLine(&line);

	if (e) // EOF is OK here
	return e;

	if (!line.empty())
	break;
	}

	// A WebVTT cue comprises an optional cue identifier line followed
	// by a (non-optional) timings line. You determine whether you have
	// a timings line by scanning for the arrow token, the lexeme of which
	// may not appear in the cue identifier line.

	const char kArrow[] = "-->";
	std::string::size_type arrow_pos = line.find(kArrow);

	if (arrow_pos != std::string::npos) {
	// We found a timings line, which implies that we don't have a cue
	// identifier.

	cue->identifier.clear();
	} else {
	// We did not find a timings line, so we assume that we have a cue
	// identifier line, and then try again to find the cue timings on
	// the next line.

	cue->identifier.swap(line);

	e = GetLine(&line);

	if (e < 0) // error
	return e;

	if (e > 0) // EOF
	return -1;

	arrow_pos = line.find(kArrow);

	if (arrow_pos == std::string::npos) // not a timings line
	return -1;
	}

	e = ParseTimingsLine(&line, arrow_pos, &cue->start_time, &cue->stop_time,
	&cue->settings);

	if (e) // error
	return e;

	// The cue payload comprises all the non-empty
	// lines that follow the timings line.

	Cue::payload_t& p = cue->payload;
	p.clear();

	for (;;) {
	e = GetLine(&line);

	if (e < 0) // error
	return e;

	if (line.empty())
	break;

	p.push_back(line);
	}

	if (p.empty())
	return -1;

	return 0; // success
	}

	int Parser::GetChar(char* c) {
	if (unget_ >= 0) {
	*c = static_cast<char>(unget_);
	unget_ = -1;
	return 0;
	}

	return reader_->GetChar(c);
	}

	void Parser::UngetChar(char c) { unget_ = static_cast<unsigned char>(c); }

	int Parser::ParseBOM() {
	// Explanation of UTF-8 BOM:
	// http://en.wikipedia.org/wiki/Byte_order_mark

	static const char BOM[] = "\xEF\xBB\xBF"; // UTF-8 BOM

	for (int i = 0; i < 3; ++i) {
	char c;
	int e = GetChar(&c);

	if (e < 0) // error
	return e;

	if (e > 0) // EOF
	return 1;

	if (c != BOM[i]) {
	if (i == 0) { // we don't have a BOM
	UngetChar(c);
	return 0; // success
	}

	// We started a BOM, so we must finish the BOM.
	return -1; // error
	}
	}

	return 0; // success
	}

	int Parser::ParseTimingsLine(std::string* line_ptr,
	std::string::size_type arrow_pos, Time* start_time,
	Time* stop_time, Cue::settings_t* settings) {
	if (line_ptr == NULL)
	return -1;

	std::string& line = *line_ptr;

	if (arrow_pos == std::string::npos \|\| arrow_pos >= line.length())
	return -1;

	// Place a NUL character at the start of the arrow token, in
	// order to demarcate the start time from remainder of line.
	line[arrow_pos] = kNUL;
	std::string::size_type idx = 0;

	int e = ParseTime(line, &idx, start_time);
	if (e) // error
	return e;

	// Detect any junk that follows the start time,
	// but precedes the arrow symbol.

	while (char c = line[idx]) {
	if (c != kSPACE && c != kTAB)
	return -1;
	++idx;
	}

	// Place a NUL character at the end of the line,
	// so the scanner has a place to stop, and begin
	// the scan just beyond the arrow token.

	line.push_back(kNUL);
	idx = arrow_pos + 3;

	e = ParseTime(line, &idx, stop_time);
	if (e) // error
	return e;

	e = ParseSettings(line, idx, settings);
	if (e) // error
	return e;

	return 0; // success
	}

	int Parser::ParseTime(const std::string& line, std::string::size_type* idx_ptr,
	Time* time) {
	if (idx_ptr == NULL)
	return -1;

	std::string::size_type& idx = *idx_ptr;

	if (idx == std::string::npos \|\| idx >= line.length())
	return -1;

	if (time == NULL)
	return -1;

	// Consume any whitespace that precedes the timestamp.

	while (char c = line[idx]) {
	if (c != kSPACE && c != kTAB)
	break;
	++idx;
	}

	// WebVTT timestamp syntax comes in three flavors:
	// SS[.sss]
	// MM:SS[.sss]
	// HH:MM:SS[.sss]

	// Parse a generic number value. We don't know which component
	// of the time we have yet, until we do more parsing.

	int val = ParseNumber(line, &idx);

	if (val < 0) // error
	return val;

	Time& t = *time;

	// The presence of a colon character indicates that we have
	// an [HH:]MM:SS style syntax.

	if (line[idx] == ':') {
	// We have either HH:MM:SS or MM:SS

	// The value we just parsed is either the hours or minutes.
	// It must be followed by another number value (that is
	// either minutes or seconds).

	const int first_val = val;

	++idx; // consume colon

	// Parse second value

	val = ParseNumber(line, &idx);

	if (val < 0)
	return val;

	if (val >= 60) // either MM or SS
	return -1;

	if (line[idx] == ':') {
	// We have HH:MM:SS

	t.hours = first_val;
	t.minutes = val; // vetted above

	++idx; // consume MM:SS colon

	// We have parsed the hours and minutes.
	// We must now parse the seconds.

	val = ParseNumber(line, &idx);

	if (val < 0)
	return val;

	if (val >= 60) // SS part of HH:MM:SS
	return -1;

	t.seconds = val;
	} else {
	// We have MM:SS

	// The implication here is that the hour value was omitted
	// from the timestamp (because it was 0).

	if (first_val >= 60) // minutes
	return -1;

	t.hours = 0;
	t.minutes = first_val;
	t.seconds = val; // vetted above
	}
	} else {
	// We have SS (only)

	// The time is expressed as total number of seconds,
	// so the seconds value has no upper bound.

	t.seconds = val;

	// Convert SS to HH:MM:SS

	t.minutes = t.seconds / 60;
	t.seconds -= t.minutes * 60;

	t.hours = t.minutes / 60;
	t.minutes -= t.hours * 60;
	}

	// We have parsed the hours, minutes, and seconds.
	// We must now parse the milliseconds.

	char c = line[idx];

	// TODO(matthewjheaney): one option here is to slightly relax the
	// syntax rules for WebVTT timestamps, to permit the comma character
	// to also be used as the seconds/milliseconds separator. This
	// would handle streams that use localization conventions for
	// countries in Western Europe. For now we obey the rules specified
	// in the WebVTT spec (allow "full stop" only).

	const bool have_milliseconds = (c == '.');

	if (!have_milliseconds) {
	t.milliseconds = 0;
	} else {
	++idx; // consume FULL STOP

	val = ParseNumber(line, &idx);

	if (val < 0)
	return val;

	if (val >= 1000)
	return -1;

	if (val < 10)
	t.milliseconds = val * 100;
	else if (val < 100)
	t.milliseconds = val * 10;
	else
	t.milliseconds = val;
	}

	// We have parsed the time proper. We must check for any
	// junk that immediately follows the time specifier.

	c = line[idx];

	if (c != kNUL && c != kSPACE && c != kTAB)
	return -1;

	return 0; // success
	}

	int Parser::ParseSettings(const std::string& line, std::string::size_type idx,
	Cue::settings_t* settings) {
	settings->clear();

	if (idx == std::string::npos \|\| idx >= line.length())
	return -1;

	for (;;) {
	// We must parse a line comprising a sequence of 0 or more
	// NAME:VALUE pairs, separated by whitespace. The line iself is
	// terminated with a NUL char (indicating end-of-line).

	for (;;) {
	const char c = line[idx];

	if (c == kNUL) // end-of-line
	return 0; // success

	if (c != kSPACE && c != kTAB)
	break;

	++idx; // consume whitespace
	}

	// We have consumed the whitespace, and have not yet reached
	// end-of-line, so there is something on the line for us to parse.

	settings->push_back(Setting());
	Setting& s = settings->back();

	// Parse the NAME part of the settings pair.

	for (;;) {
	const char c = line[idx];

	if (c == ':') // we have reached end of NAME part
	break;

	if (c == kNUL \|\| c == kSPACE \|\| c == kTAB)
	return -1;

	s.name.push_back(c);

	++idx;
	}

	if (s.name.empty())
	return -1;

	++idx; // consume colon

	// Parse the VALUE part of the settings pair.

	for (;;) {
	const char c = line[idx];

	if (c == kNUL \|\| c == kSPACE \|\| c == kTAB)
	break;

	if (c == ':') // suspicious when part of VALUE
	return -1; // TODO(matthewjheaney): verify this behavior

	s.value.push_back(c);

	++idx;
	}

	if (s.value.empty())
	return -1;
	}
	}

	int Parser::ParseNumber(const std::string& line,
	std::string::size_type* idx_ptr) {
	if (idx_ptr == NULL)
	return -1;

	std::string::size_type& idx = *idx_ptr;

	if (idx == std::string::npos \|\| idx >= line.length())
	return -1;

	if (!isdigit(line[idx]))
	return -1;

	int result = 0;

	while (isdigit(line[idx])) {
	const char c = line[idx];
	const int i = c - '0';

	if (result > INT_MAX / 10)
	return -1;

	result *= 10;

	if (result > INT_MAX - i)
	return -1;

	result += i;

	++idx;
	}

	return result;
	}

	bool Time::operator==(const Time& rhs) const {
	if (hours != rhs.hours)
	return false;

	if (minutes != rhs.minutes)
	return false;

	if (seconds != rhs.seconds)
	return false;

	return (milliseconds == rhs.milliseconds);
	}

	bool Time::operator<(const Time& rhs) const {
	if (hours < rhs.hours)
	return true;

	if (hours > rhs.hours)
	return false;

	if (minutes < rhs.minutes)
	return true;

	if (minutes > rhs.minutes)
	return false;

	if (seconds < rhs.seconds)
	return true;

	if (seconds > rhs.seconds)
	return false;

	return (milliseconds < rhs.milliseconds);
	}

	bool Time::operator>(const Time& rhs) const { return rhs.operator<(*this); }

	bool Time::operator<=(const Time& rhs) const { return !this->operator>(rhs); }

	bool Time::operator>=(const Time& rhs) const { return !this->operator<(rhs); }

	presentation_t Time::presentation() const {
	const presentation_t h = 1000LL * 3600LL * presentation_t(hours);
	const presentation_t m = 1000LL * 60LL * presentation_t(minutes);
	const presentation_t s = 1000LL * presentation_t(seconds);
	const presentation_t result = h + m + s + milliseconds;
	return result;
	}

	Time& Time::presentation(presentation_t d) {
	if (d < 0) { // error
	hours = 0;
	minutes = 0;
	seconds = 0;
	milliseconds = 0;

	return *this;
	}

	seconds = static_cast<int>(d / 1000);
	milliseconds = static_cast<int>(d - 1000 * seconds);

	minutes = seconds / 60;
	seconds -= 60 * minutes;

	hours = minutes / 60;
	minutes -= 60 * hours;

	return *this;
	}

	Time& Time::operator+=(presentation_t rhs) {
	const presentation_t d = this->presentation();
	const presentation_t dd = d + rhs;
	this->presentation(dd);
	return *this;
	}

	Time Time::operator+(presentation_t d) const {
	Time t(*this);
	t += d;
	return t;
	}

	Time& Time::operator-=(presentation_t d) { return this->operator+=(-d); }

	presentation_t Time::operator-(const Time& t) const {
	const presentation_t rhs = t.presentation();
	const presentation_t lhs = this->presentation();
	const presentation_t result = lhs - rhs;
	return result;
	}

	} // namespace libwebvtt