| # -*- coding: utf-8 -*- |
| """ |
| This module offers a generic date/time string parser which is able to parse |
| most known formats to represent a date and/or time. |
| |
| This module attempts to be forgiving with regards to unlikely input formats, |
| returning a datetime object even for dates which are ambiguous. If an element |
| of a date/time stamp is omitted, the following rules are applied: |
| |
| - If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour |
| on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is |
| specified. |
| - If a time zone is omitted, a timezone-naive datetime is returned. |
| |
| If any other elements are missing, they are taken from the |
| :class:`datetime.datetime` object passed to the parameter ``default``. If this |
| results in a day number exceeding the valid number of days per month, the |
| value falls back to the end of the month. |
| |
| Additional resources about date/time string formats can be found below: |
| |
| - `A summary of the international standard date and time notation |
| <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ |
| - `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_ |
| - `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ |
| - `CPAN ParseDate module |
| <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ |
| - `Java SimpleDateFormat Class |
| <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ |
| """ |
| from __future__ import unicode_literals |
| |
| import datetime |
| import re |
| import string |
| import time |
| import warnings |
| |
| from calendar import monthrange |
| from io import StringIO |
| |
| import six |
| from six import integer_types, text_type |
| |
| from decimal import Decimal |
| |
| from warnings import warn |
| |
| from .. import relativedelta |
| from .. import tz |
| |
| __all__ = ["parse", "parserinfo"] |
| |
| |
| # TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth |
| # making public and/or figuring out if there is something we can |
| # take off their plate. |
| class _timelex(object): |
| # Fractional seconds are sometimes split by a comma |
| _split_decimal = re.compile("([.,])") |
| |
| def __init__(self, instream): |
| if six.PY2: |
| # In Python 2, we can't duck type properly because unicode has |
| # a 'decode' function, and we'd be double-decoding |
| if isinstance(instream, (bytes, bytearray)): |
| instream = instream.decode() |
| else: |
| if getattr(instream, 'decode', None) is not None: |
| instream = instream.decode() |
| |
| if isinstance(instream, text_type): |
| instream = StringIO(instream) |
| elif getattr(instream, 'read', None) is None: |
| raise TypeError('Parser must be a string or character stream, not ' |
| '{itype}'.format(itype=instream.__class__.__name__)) |
| |
| self.instream = instream |
| self.charstack = [] |
| self.tokenstack = [] |
| self.eof = False |
| |
| def get_token(self): |
| """ |
| This function breaks the time string into lexical units (tokens), which |
| can be parsed by the parser. Lexical units are demarcated by changes in |
| the character set, so any continuous string of letters is considered |
| one unit, any continuous string of numbers is considered one unit. |
| |
| The main complication arises from the fact that dots ('.') can be used |
| both as separators (e.g. "Sep.20.2009") or decimal points (e.g. |
| "4:30:21.447"). As such, it is necessary to read the full context of |
| any dot-separated strings before breaking it into tokens; as such, this |
| function maintains a "token stack", for when the ambiguous context |
| demands that multiple tokens be parsed at once. |
| """ |
| if self.tokenstack: |
| return self.tokenstack.pop(0) |
| |
| seenletters = False |
| token = None |
| state = None |
| |
| while not self.eof: |
| # We only realize that we've reached the end of a token when we |
| # find a character that's not part of the current token - since |
| # that character may be part of the next token, it's stored in the |
| # charstack. |
| if self.charstack: |
| nextchar = self.charstack.pop(0) |
| else: |
| nextchar = self.instream.read(1) |
| while nextchar == '\x00': |
| nextchar = self.instream.read(1) |
| |
| if not nextchar: |
| self.eof = True |
| break |
| elif not state: |
| # First character of the token - determines if we're starting |
| # to parse a word, a number or something else. |
| token = nextchar |
| if self.isword(nextchar): |
| state = 'a' |
| elif self.isnum(nextchar): |
| state = '0' |
| elif self.isspace(nextchar): |
| token = ' ' |
| break # emit token |
| else: |
| break # emit token |
| elif state == 'a': |
| # If we've already started reading a word, we keep reading |
| # letters until we find something that's not part of a word. |
| seenletters = True |
| if self.isword(nextchar): |
| token += nextchar |
| elif nextchar == '.': |
| token += nextchar |
| state = 'a.' |
| else: |
| self.charstack.append(nextchar) |
| break # emit token |
| elif state == '0': |
| # If we've already started reading a number, we keep reading |
| # numbers until we find something that doesn't fit. |
| if self.isnum(nextchar): |
| token += nextchar |
| elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): |
| token += nextchar |
| state = '0.' |
| else: |
| self.charstack.append(nextchar) |
| break # emit token |
| elif state == 'a.': |
| # If we've seen some letters and a dot separator, continue |
| # parsing, and the tokens will be broken up later. |
| seenletters = True |
| if nextchar == '.' or self.isword(nextchar): |
| token += nextchar |
| elif self.isnum(nextchar) and token[-1] == '.': |
| token += nextchar |
| state = '0.' |
| else: |
| self.charstack.append(nextchar) |
| break # emit token |
| elif state == '0.': |
| # If we've seen at least one dot separator, keep going, we'll |
| # break up the tokens later. |
| if nextchar == '.' or self.isnum(nextchar): |
| token += nextchar |
| elif self.isword(nextchar) and token[-1] == '.': |
| token += nextchar |
| state = 'a.' |
| else: |
| self.charstack.append(nextchar) |
| break # emit token |
| |
| if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or |
| token[-1] in '.,')): |
| l = self._split_decimal.split(token) |
| token = l[0] |
| for tok in l[1:]: |
| if tok: |
| self.tokenstack.append(tok) |
| |
| if state == '0.' and token.count('.') == 0: |
| token = token.replace(',', '.') |
| |
| return token |
| |
| def __iter__(self): |
| return self |
| |
| def __next__(self): |
| token = self.get_token() |
| if token is None: |
| raise StopIteration |
| |
| return token |
| |
| def next(self): |
| return self.__next__() # Python 2.x support |
| |
| @classmethod |
| def split(cls, s): |
| return list(cls(s)) |
| |
| @classmethod |
| def isword(cls, nextchar): |
| """ Whether or not the next character is part of a word """ |
| return nextchar.isalpha() |
| |
| @classmethod |
| def isnum(cls, nextchar): |
| """ Whether the next character is part of a number """ |
| return nextchar.isdigit() |
| |
| @classmethod |
| def isspace(cls, nextchar): |
| """ Whether the next character is whitespace """ |
| return nextchar.isspace() |
| |
| |
| class _resultbase(object): |
| |
| def __init__(self): |
| for attr in self.__slots__: |
| setattr(self, attr, None) |
| |
| def _repr(self, classname): |
| l = [] |
| for attr in self.__slots__: |
| value = getattr(self, attr) |
| if value is not None: |
| l.append("%s=%s" % (attr, repr(value))) |
| return "%s(%s)" % (classname, ", ".join(l)) |
| |
| def __len__(self): |
| return (sum(getattr(self, attr) is not None |
| for attr in self.__slots__)) |
| |
| def __repr__(self): |
| return self._repr(self.__class__.__name__) |
| |
| |
| class parserinfo(object): |
| """ |
| Class which handles what inputs are accepted. Subclass this to customize |
| the language and acceptable values for each parameter. |
| |
| :param dayfirst: |
| Whether to interpret the first value in an ambiguous 3-integer date |
| (e.g. 01/05/09) as the day (``True``) or month (``False``). If |
| ``yearfirst`` is set to ``True``, this distinguishes between YDM |
| and YMD. Default is ``False``. |
| |
| :param yearfirst: |
| Whether to interpret the first value in an ambiguous 3-integer date |
| (e.g. 01/05/09) as the year. If ``True``, the first number is taken |
| to be the year, otherwise the last number is taken to be the year. |
| Default is ``False``. |
| """ |
| |
| # m from a.m/p.m, t from ISO T separator |
| JUMP = [" ", ".", ",", ";", "-", "/", "'", |
| "at", "on", "and", "ad", "m", "t", "of", |
| "st", "nd", "rd", "th"] |
| |
| WEEKDAYS = [("Mon", "Monday"), |
| ("Tue", "Tuesday"), # TODO: "Tues" |
| ("Wed", "Wednesday"), |
| ("Thu", "Thursday"), # TODO: "Thurs" |
| ("Fri", "Friday"), |
| ("Sat", "Saturday"), |
| ("Sun", "Sunday")] |
| MONTHS = [("Jan", "January"), |
| ("Feb", "February"), # TODO: "Febr" |
| ("Mar", "March"), |
| ("Apr", "April"), |
| ("May", "May"), |
| ("Jun", "June"), |
| ("Jul", "July"), |
| ("Aug", "August"), |
| ("Sep", "Sept", "September"), |
| ("Oct", "October"), |
| ("Nov", "November"), |
| ("Dec", "December")] |
| HMS = [("h", "hour", "hours"), |
| ("m", "minute", "minutes"), |
| ("s", "second", "seconds")] |
| AMPM = [("am", "a"), |
| ("pm", "p")] |
| UTCZONE = ["UTC", "GMT", "Z", "z"] |
| PERTAIN = ["of"] |
| TZOFFSET = {} |
| # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", |
| # "Anno Domini", "Year of Our Lord"] |
| |
| def __init__(self, dayfirst=False, yearfirst=False): |
| self._jump = self._convert(self.JUMP) |
| self._weekdays = self._convert(self.WEEKDAYS) |
| self._months = self._convert(self.MONTHS) |
| self._hms = self._convert(self.HMS) |
| self._ampm = self._convert(self.AMPM) |
| self._utczone = self._convert(self.UTCZONE) |
| self._pertain = self._convert(self.PERTAIN) |
| |
| self.dayfirst = dayfirst |
| self.yearfirst = yearfirst |
| |
| self._year = time.localtime().tm_year |
| self._century = self._year // 100 * 100 |
| |
| def _convert(self, lst): |
| dct = {} |
| for i, v in enumerate(lst): |
| if isinstance(v, tuple): |
| for v in v: |
| dct[v.lower()] = i |
| else: |
| dct[v.lower()] = i |
| return dct |
| |
| def jump(self, name): |
| return name.lower() in self._jump |
| |
| def weekday(self, name): |
| try: |
| return self._weekdays[name.lower()] |
| except KeyError: |
| pass |
| return None |
| |
| def month(self, name): |
| try: |
| return self._months[name.lower()] + 1 |
| except KeyError: |
| pass |
| return None |
| |
| def hms(self, name): |
| try: |
| return self._hms[name.lower()] |
| except KeyError: |
| return None |
| |
| def ampm(self, name): |
| try: |
| return self._ampm[name.lower()] |
| except KeyError: |
| return None |
| |
| def pertain(self, name): |
| return name.lower() in self._pertain |
| |
| def utczone(self, name): |
| return name.lower() in self._utczone |
| |
| def tzoffset(self, name): |
| if name in self._utczone: |
| return 0 |
| |
| return self.TZOFFSET.get(name) |
| |
| def convertyear(self, year, century_specified=False): |
| """ |
| Converts two-digit years to year within [-50, 49] |
| range of self._year (current local time) |
| """ |
| |
| # Function contract is that the year is always positive |
| assert year >= 0 |
| |
| if year < 100 and not century_specified: |
| # assume current century to start |
| year += self._century |
| |
| if year >= self._year + 50: # if too far in future |
| year -= 100 |
| elif year < self._year - 50: # if too far in past |
| year += 100 |
| |
| return year |
| |
| def validate(self, res): |
| # move to info |
| if res.year is not None: |
| res.year = self.convertyear(res.year, res.century_specified) |
| |
| if ((res.tzoffset == 0 and not res.tzname) or |
| (res.tzname == 'Z' or res.tzname == 'z')): |
| res.tzname = "UTC" |
| res.tzoffset = 0 |
| elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): |
| res.tzoffset = 0 |
| return True |
| |
| |
| class _ymd(list): |
| def __init__(self, *args, **kwargs): |
| super(self.__class__, self).__init__(*args, **kwargs) |
| self.century_specified = False |
| self.dstridx = None |
| self.mstridx = None |
| self.ystridx = None |
| |
| @property |
| def has_year(self): |
| return self.ystridx is not None |
| |
| @property |
| def has_month(self): |
| return self.mstridx is not None |
| |
| @property |
| def has_day(self): |
| return self.dstridx is not None |
| |
| def could_be_day(self, value): |
| if self.has_day: |
| return False |
| elif not self.has_month: |
| return 1 <= value <= 31 |
| elif not self.has_year: |
| # Be permissive, assume leapyear |
| month = self[self.mstridx] |
| return 1 <= value <= monthrange(2000, month)[1] |
| else: |
| month = self[self.mstridx] |
| year = self[self.ystridx] |
| return 1 <= value <= monthrange(year, month)[1] |
| |
| def append(self, val, label=None): |
| if hasattr(val, '__len__'): |
| if val.isdigit() and len(val) > 2: |
| self.century_specified = True |
| if label not in [None, 'Y']: # pragma: no cover |
| raise ValueError(label) |
| label = 'Y' |
| elif val > 100: |
| self.century_specified = True |
| if label not in [None, 'Y']: # pragma: no cover |
| raise ValueError(label) |
| label = 'Y' |
| |
| super(self.__class__, self).append(int(val)) |
| |
| if label == 'M': |
| if self.has_month: |
| raise ValueError('Month is already set') |
| self.mstridx = len(self) - 1 |
| elif label == 'D': |
| if self.has_day: |
| raise ValueError('Day is already set') |
| self.dstridx = len(self) - 1 |
| elif label == 'Y': |
| if self.has_year: |
| raise ValueError('Year is already set') |
| self.ystridx = len(self) - 1 |
| |
| def _resolve_from_stridxs(self, strids): |
| """ |
| Try to resolve the identities of year/month/day elements using |
| ystridx, mstridx, and dstridx, if enough of these are specified. |
| """ |
| if len(self) == 3 and len(strids) == 2: |
| # we can back out the remaining stridx value |
| missing = [x for x in range(3) if x not in strids.values()] |
| key = [x for x in ['y', 'm', 'd'] if x not in strids] |
| assert len(missing) == len(key) == 1 |
| key = key[0] |
| val = missing[0] |
| strids[key] = val |
| |
| assert len(self) == len(strids) # otherwise this should not be called |
| out = {key: self[strids[key]] for key in strids} |
| return (out.get('y'), out.get('m'), out.get('d')) |
| |
| def resolve_ymd(self, yearfirst, dayfirst): |
| len_ymd = len(self) |
| year, month, day = (None, None, None) |
| |
| strids = (('y', self.ystridx), |
| ('m', self.mstridx), |
| ('d', self.dstridx)) |
| |
| strids = {key: val for key, val in strids if val is not None} |
| if (len(self) == len(strids) > 0 or |
| (len(self) == 3 and len(strids) == 2)): |
| return self._resolve_from_stridxs(strids) |
| |
| mstridx = self.mstridx |
| |
| if len_ymd > 3: |
| raise ValueError("More than three YMD values") |
| elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): |
| # One member, or two members with a month string |
| if mstridx is not None: |
| month = self[mstridx] |
| # since mstridx is 0 or 1, self[mstridx-1] always |
| # looks up the other element |
| other = self[mstridx - 1] |
| else: |
| other = self[0] |
| |
| if len_ymd > 1 or mstridx is None: |
| if other > 31: |
| year = other |
| else: |
| day = other |
| |
| elif len_ymd == 2: |
| # Two members with numbers |
| if self[0] > 31: |
| # 99-01 |
| year, month = self |
| elif self[1] > 31: |
| # 01-99 |
| month, year = self |
| elif dayfirst and self[1] <= 12: |
| # 13-01 |
| day, month = self |
| else: |
| # 01-13 |
| month, day = self |
| |
| elif len_ymd == 3: |
| # Three members |
| if mstridx == 0: |
| if self[1] > 31: |
| # Apr-2003-25 |
| month, year, day = self |
| else: |
| month, day, year = self |
| elif mstridx == 1: |
| if self[0] > 31 or (yearfirst and self[2] <= 31): |
| # 99-Jan-01 |
| year, month, day = self |
| else: |
| # 01-Jan-01 |
| # Give precendence to day-first, since |
| # two-digit years is usually hand-written. |
| day, month, year = self |
| |
| elif mstridx == 2: |
| # WTF!? |
| if self[1] > 31: |
| # 01-99-Jan |
| day, year, month = self |
| else: |
| # 99-01-Jan |
| year, day, month = self |
| |
| else: |
| if (self[0] > 31 or |
| self.ystridx == 0 or |
| (yearfirst and self[1] <= 12 and self[2] <= 31)): |
| # 99-01-01 |
| if dayfirst and self[2] <= 12: |
| year, day, month = self |
| else: |
| year, month, day = self |
| elif self[0] > 12 or (dayfirst and self[1] <= 12): |
| # 13-01-01 |
| day, month, year = self |
| else: |
| # 01-13-01 |
| month, day, year = self |
| |
| return year, month, day |
| |
| |
| class parser(object): |
| def __init__(self, info=None): |
| self.info = info or parserinfo() |
| |
| def parse(self, timestr, default=None, |
| ignoretz=False, tzinfos=None, **kwargs): |
| """ |
| Parse the date/time string into a :class:`datetime.datetime` object. |
| |
| :param timestr: |
| Any date/time string using the supported formats. |
| |
| :param default: |
| The default datetime object, if this is a datetime object and not |
| ``None``, elements specified in ``timestr`` replace elements in the |
| default object. |
| |
| :param ignoretz: |
| If set ``True``, time zones in parsed strings are ignored and a |
| naive :class:`datetime.datetime` object is returned. |
| |
| :param tzinfos: |
| Additional time zone names / aliases which may be present in the |
| string. This argument maps time zone names (and optionally offsets |
| from those time zones) to time zones. This parameter can be a |
| dictionary with timezone aliases mapping time zone names to time |
| zones or a function taking two parameters (``tzname`` and |
| ``tzoffset``) and returning a time zone. |
| |
| The timezones to which the names are mapped can be an integer |
| offset from UTC in seconds or a :class:`tzinfo` object. |
| |
| .. doctest:: |
| :options: +NORMALIZE_WHITESPACE |
| |
| >>> from dateutil.parser import parse |
| >>> from dateutil.tz import gettz |
| >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} |
| >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) |
| datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) |
| >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) |
| datetime.datetime(2012, 1, 19, 17, 21, |
| tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) |
| |
| This parameter is ignored if ``ignoretz`` is set. |
| |
| :param \\*\\*kwargs: |
| Keyword arguments as passed to ``_parse()``. |
| |
| :return: |
| Returns a :class:`datetime.datetime` object or, if the |
| ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the |
| first element being a :class:`datetime.datetime` object, the second |
| a tuple containing the fuzzy tokens. |
| |
| :raises ValueError: |
| Raised for invalid or unknown string format, if the provided |
| :class:`tzinfo` is not in a valid format, or if an invalid date |
| would be created. |
| |
| :raises TypeError: |
| Raised for non-string or character stream input. |
| |
| :raises OverflowError: |
| Raised if the parsed date exceeds the largest valid C integer on |
| your system. |
| """ |
| |
| if default is None: |
| default = datetime.datetime.now().replace(hour=0, minute=0, |
| second=0, microsecond=0) |
| |
| res, skipped_tokens = self._parse(timestr, **kwargs) |
| |
| if res is None: |
| raise ValueError("Unknown string format:", timestr) |
| |
| if len(res) == 0: |
| raise ValueError("String does not contain a date:", timestr) |
| |
| ret = self._build_naive(res, default) |
| |
| if not ignoretz: |
| ret = self._build_tzaware(ret, res, tzinfos) |
| |
| if kwargs.get('fuzzy_with_tokens', False): |
| return ret, skipped_tokens |
| else: |
| return ret |
| |
| class _result(_resultbase): |
| __slots__ = ["year", "month", "day", "weekday", |
| "hour", "minute", "second", "microsecond", |
| "tzname", "tzoffset", "ampm","any_unused_tokens"] |
| |
| def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, |
| fuzzy_with_tokens=False): |
| """ |
| Private method which performs the heavy lifting of parsing, called from |
| ``parse()``, which passes on its ``kwargs`` to this function. |
| |
| :param timestr: |
| The string to parse. |
| |
| :param dayfirst: |
| Whether to interpret the first value in an ambiguous 3-integer date |
| (e.g. 01/05/09) as the day (``True``) or month (``False``). If |
| ``yearfirst`` is set to ``True``, this distinguishes between YDM |
| and YMD. If set to ``None``, this value is retrieved from the |
| current :class:`parserinfo` object (which itself defaults to |
| ``False``). |
| |
| :param yearfirst: |
| Whether to interpret the first value in an ambiguous 3-integer date |
| (e.g. 01/05/09) as the year. If ``True``, the first number is taken |
| to be the year, otherwise the last number is taken to be the year. |
| If this is set to ``None``, the value is retrieved from the current |
| :class:`parserinfo` object (which itself defaults to ``False``). |
| |
| :param fuzzy: |
| Whether to allow fuzzy parsing, allowing for string like "Today is |
| January 1, 2047 at 8:21:00AM". |
| |
| :param fuzzy_with_tokens: |
| If ``True``, ``fuzzy`` is automatically set to True, and the parser |
| will return a tuple where the first element is the parsed |
| :class:`datetime.datetime` datetimestamp and the second element is |
| a tuple containing the portions of the string which were ignored: |
| |
| .. doctest:: |
| |
| >>> from dateutil.parser import parse |
| >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) |
| (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) |
| |
| """ |
| if fuzzy_with_tokens: |
| fuzzy = True |
| |
| info = self.info |
| |
| if dayfirst is None: |
| dayfirst = info.dayfirst |
| |
| if yearfirst is None: |
| yearfirst = info.yearfirst |
| |
| res = self._result() |
| l = _timelex.split(timestr) # Splits the timestr into tokens |
| |
| skipped_idxs = [] |
| |
| # year/month/day list |
| ymd = _ymd() |
| |
| len_l = len(l) |
| i = 0 |
| try: |
| while i < len_l: |
| |
| # Check if it's a number |
| value_repr = l[i] |
| try: |
| value = float(value_repr) |
| except ValueError: |
| value = None |
| |
| if value is not None: |
| # Numeric token |
| i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) |
| |
| # Check weekday |
| elif info.weekday(l[i]) is not None: |
| value = info.weekday(l[i]) |
| res.weekday = value |
| |
| # Check month name |
| elif info.month(l[i]) is not None: |
| value = info.month(l[i]) |
| ymd.append(value, 'M') |
| |
| if i + 1 < len_l: |
| if l[i + 1] in ('-', '/'): |
| # Jan-01[-99] |
| sep = l[i + 1] |
| ymd.append(l[i + 2]) |
| |
| if i + 3 < len_l and l[i + 3] == sep: |
| # Jan-01-99 |
| ymd.append(l[i + 4]) |
| i += 2 |
| |
| i += 2 |
| |
| elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and |
| info.pertain(l[i + 2])): |
| # Jan of 01 |
| # In this case, 01 is clearly year |
| if l[i + 4].isdigit(): |
| # Convert it here to become unambiguous |
| value = int(l[i + 4]) |
| year = str(info.convertyear(value)) |
| ymd.append(year, 'Y') |
| else: |
| # Wrong guess |
| pass |
| # TODO: not hit in tests |
| i += 4 |
| |
| # Check am/pm |
| elif info.ampm(l[i]) is not None: |
| value = info.ampm(l[i]) |
| val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) |
| |
| if val_is_ampm: |
| res.hour = self._adjust_ampm(res.hour, value) |
| res.ampm = value |
| |
| elif fuzzy: |
| skipped_idxs.append(i) |
| |
| # Check for a timezone name |
| elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): |
| res.tzname = l[i] |
| res.tzoffset = info.tzoffset(res.tzname) |
| |
| # Check for something like GMT+3, or BRST+3. Notice |
| # that it doesn't mean "I am 3 hours after GMT", but |
| # "my time +3 is GMT". If found, we reverse the |
| # logic so that timezone parsing code will get it |
| # right. |
| if i + 1 < len_l and l[i + 1] in ('+', '-'): |
| l[i + 1] = ('+', '-')[l[i + 1] == '+'] |
| res.tzoffset = None |
| if info.utczone(res.tzname): |
| # With something like GMT+3, the timezone |
| # is *not* GMT. |
| res.tzname = None |
| |
| # Check for a numbered timezone |
| elif res.hour is not None and l[i] in ('+', '-'): |
| signal = (-1, 1)[l[i] == '+'] |
| len_li = len(l[i + 1]) |
| |
| # TODO: check that l[i + 1] is integer? |
| if len_li == 4: |
| # -0300 |
| hour_offset = int(l[i + 1][:2]) |
| min_offset = int(l[i + 1][2:]) |
| elif i + 2 < len_l and l[i + 2] == ':': |
| # -03:00 |
| hour_offset = int(l[i + 1]) |
| min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? |
| i += 2 |
| elif len_li <= 2: |
| # -[0]3 |
| hour_offset = int(l[i + 1][:2]) |
| min_offset = 0 |
| else: |
| raise ValueError(timestr) |
| |
| res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) |
| |
| # Look for a timezone name between parenthesis |
| if (i + 5 < len_l and |
| info.jump(l[i + 2]) and l[i + 3] == '(' and |
| l[i + 5] == ')' and |
| 3 <= len(l[i + 4]) and |
| self._could_be_tzname(res.hour, res.tzname, |
| None, l[i + 4])): |
| # -0300 (BRST) |
| res.tzname = l[i + 4] |
| i += 4 |
| |
| i += 1 |
| |
| # Check jumps |
| elif not (info.jump(l[i]) or fuzzy): |
| raise ValueError(timestr) |
| |
| else: |
| skipped_idxs.append(i) |
| i += 1 |
| |
| # Process year/month/day |
| year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) |
| |
| res.century_specified = ymd.century_specified |
| res.year = year |
| res.month = month |
| res.day = day |
| |
| except (IndexError, ValueError): |
| return None, None |
| |
| if not info.validate(res): |
| return None, None |
| |
| if fuzzy_with_tokens: |
| skipped_tokens = self._recombine_skipped(l, skipped_idxs) |
| return res, tuple(skipped_tokens) |
| else: |
| return res, None |
| |
| def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): |
| # Token is a number |
| value_repr = tokens[idx] |
| try: |
| value = self._to_decimal(value_repr) |
| except Exception as e: |
| six.raise_from(ValueError('Unknown numeric token'), e) |
| |
| len_li = len(value_repr) |
| |
| len_l = len(tokens) |
| |
| if (len(ymd) == 3 and len_li in (2, 4) and |
| res.hour is None and |
| (idx + 1 >= len_l or |
| (tokens[idx + 1] != ':' and |
| info.hms(tokens[idx + 1]) is None))): |
| # 19990101T23[59] |
| s = tokens[idx] |
| res.hour = int(s[:2]) |
| |
| if len_li == 4: |
| res.minute = int(s[2:]) |
| |
| elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): |
| # YYMMDD or HHMMSS[.ss] |
| s = tokens[idx] |
| |
| if not ymd and '.' not in tokens[idx]: |
| ymd.append(s[:2]) |
| ymd.append(s[2:4]) |
| ymd.append(s[4:]) |
| else: |
| # 19990101T235959[.59] |
| |
| # TODO: Check if res attributes already set. |
| res.hour = int(s[:2]) |
| res.minute = int(s[2:4]) |
| res.second, res.microsecond = self._parsems(s[4:]) |
| |
| elif len_li in (8, 12, 14): |
| # YYYYMMDD |
| s = tokens[idx] |
| ymd.append(s[:4], 'Y') |
| ymd.append(s[4:6]) |
| ymd.append(s[6:8]) |
| |
| if len_li > 8: |
| res.hour = int(s[8:10]) |
| res.minute = int(s[10:12]) |
| |
| if len_li > 12: |
| res.second = int(s[12:]) |
| |
| elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: |
| # HH[ ]h or MM[ ]m or SS[.ss][ ]s |
| hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) |
| (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) |
| if hms is not None: |
| # TODO: checking that hour/minute/second are not |
| # already set? |
| self._assign_hms(res, value_repr, hms) |
| |
| elif idx + 2 < len_l and tokens[idx + 1] == ':': |
| # HH:MM[:SS[.ss]] |
| res.hour = int(value) |
| value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? |
| (res.minute, res.second) = self._parse_min_sec(value) |
| |
| if idx + 4 < len_l and tokens[idx + 3] == ':': |
| res.second, res.microsecond = self._parsems(tokens[idx + 4]) |
| |
| idx += 2 |
| |
| idx += 2 |
| |
| elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): |
| sep = tokens[idx + 1] |
| ymd.append(value_repr) |
| |
| if idx + 2 < len_l and not info.jump(tokens[idx + 2]): |
| if tokens[idx + 2].isdigit(): |
| # 01-01[-01] |
| ymd.append(tokens[idx + 2]) |
| else: |
| # 01-Jan[-01] |
| value = info.month(tokens[idx + 2]) |
| |
| if value is not None: |
| ymd.append(value, 'M') |
| else: |
| raise ValueError() |
| |
| if idx + 3 < len_l and tokens[idx + 3] == sep: |
| # We have three members |
| value = info.month(tokens[idx + 4]) |
| |
| if value is not None: |
| ymd.append(value, 'M') |
| else: |
| ymd.append(tokens[idx + 4]) |
| idx += 2 |
| |
| idx += 1 |
| idx += 1 |
| |
| elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): |
| if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: |
| # 12 am |
| hour = int(value) |
| res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) |
| idx += 1 |
| else: |
| # Year, month or day |
| ymd.append(value) |
| idx += 1 |
| |
| elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): |
| # 12am |
| hour = int(value) |
| res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) |
| idx += 1 |
| |
| elif ymd.could_be_day(value): |
| ymd.append(value) |
| |
| elif not fuzzy: |
| raise ValueError() |
| |
| return idx |
| |
| def _find_hms_idx(self, idx, tokens, info, allow_jump): |
| len_l = len(tokens) |
| |
| if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: |
| # There is an "h", "m", or "s" label following this token. We take |
| # assign the upcoming label to the current token. |
| # e.g. the "12" in 12h" |
| hms_idx = idx + 1 |
| |
| elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and |
| info.hms(tokens[idx+2]) is not None): |
| # There is a space and then an "h", "m", or "s" label. |
| # e.g. the "12" in "12 h" |
| hms_idx = idx + 2 |
| |
| elif idx > 0 and info.hms(tokens[idx-1]) is not None: |
| # There is a "h", "m", or "s" preceeding this token. Since neither |
| # of the previous cases was hit, there is no label following this |
| # token, so we use the previous label. |
| # e.g. the "04" in "12h04" |
| hms_idx = idx-1 |
| |
| elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and |
| info.hms(tokens[idx-2]) is not None): |
| # If we are looking at the final token, we allow for a |
| # backward-looking check to skip over a space. |
| # TODO: Are we sure this is the right condition here? |
| hms_idx = idx - 2 |
| |
| else: |
| hms_idx = None |
| |
| return hms_idx |
| |
| def _assign_hms(self, res, value_repr, hms): |
| # See GH issue #427, fixing float rounding |
| value = self._to_decimal(value_repr) |
| |
| if hms == 0: |
| # Hour |
| res.hour = int(value) |
| if value % 1: |
| res.minute = int(60*(value % 1)) |
| |
| elif hms == 1: |
| (res.minute, res.second) = self._parse_min_sec(value) |
| |
| elif hms == 2: |
| (res.second, res.microsecond) = self._parsems(value_repr) |
| |
| def _could_be_tzname(self, hour, tzname, tzoffset, token): |
| return (hour is not None and |
| tzname is None and |
| tzoffset is None and |
| len(token) <= 5 and |
| (all(x in string.ascii_uppercase for x in token) |
| or token in self.info.UTCZONE)) |
| |
| def _ampm_valid(self, hour, ampm, fuzzy): |
| """ |
| For fuzzy parsing, 'a' or 'am' (both valid English words) |
| may erroneously trigger the AM/PM flag. Deal with that |
| here. |
| """ |
| val_is_ampm = True |
| |
| # If there's already an AM/PM flag, this one isn't one. |
| if fuzzy and ampm is not None: |
| val_is_ampm = False |
| |
| # If AM/PM is found and hour is not, raise a ValueError |
| if hour is None: |
| if fuzzy: |
| val_is_ampm = False |
| else: |
| raise ValueError('No hour specified with AM or PM flag.') |
| elif not 0 <= hour <= 12: |
| # If AM/PM is found, it's a 12 hour clock, so raise |
| # an error for invalid range |
| if fuzzy: |
| val_is_ampm = False |
| else: |
| raise ValueError('Invalid hour specified for 12-hour clock.') |
| |
| return val_is_ampm |
| |
| def _adjust_ampm(self, hour, ampm): |
| if hour < 12 and ampm == 1: |
| hour += 12 |
| elif hour == 12 and ampm == 0: |
| hour = 0 |
| return hour |
| |
| def _parse_min_sec(self, value): |
| # TODO: Every usage of this function sets res.second to the return |
| # value. Are there any cases where second will be returned as None and |
| # we *dont* want to set res.second = None? |
| minute = int(value) |
| second = None |
| |
| sec_remainder = value % 1 |
| if sec_remainder: |
| second = int(60 * sec_remainder) |
| return (minute, second) |
| |
| def _parsems(self, value): |
| """Parse a I[.F] seconds value into (seconds, microseconds).""" |
| if "." not in value: |
| return int(value), 0 |
| else: |
| i, f = value.split(".") |
| return int(i), int(f.ljust(6, "0")[:6]) |
| |
| def _parse_hms(self, idx, tokens, info, hms_idx): |
| # TODO: Is this going to admit a lot of false-positives for when we |
| # just happen to have digits and "h", "m" or "s" characters in non-date |
| # text? I guess hex hashes won't have that problem, but there's plenty |
| # of random junk out there. |
| if hms_idx is None: |
| hms = None |
| new_idx = idx |
| elif hms_idx > idx: |
| hms = info.hms(tokens[hms_idx]) |
| new_idx = hms_idx |
| else: |
| # Looking backwards, increment one. |
| hms = info.hms(tokens[hms_idx]) + 1 |
| new_idx = idx |
| |
| return (new_idx, hms) |
| |
| def _recombine_skipped(self, tokens, skipped_idxs): |
| """ |
| >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] |
| >>> skipped_idxs = [0, 1, 2, 5] |
| >>> _recombine_skipped(tokens, skipped_idxs) |
| ["foo bar", "baz"] |
| """ |
| skipped_tokens = [] |
| for i, idx in enumerate(sorted(skipped_idxs)): |
| if i > 0 and idx - 1 == skipped_idxs[i - 1]: |
| skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] |
| else: |
| skipped_tokens.append(tokens[idx]) |
| |
| return skipped_tokens |
| |
| def _build_tzinfo(self, tzinfos, tzname, tzoffset): |
| if callable(tzinfos): |
| tzdata = tzinfos(tzname, tzoffset) |
| else: |
| tzdata = tzinfos.get(tzname) |
| # handle case where tzinfo is paased an options that returns None |
| # eg tzinfos = {'BRST' : None} |
| if isinstance(tzdata, datetime.tzinfo) or tzdata is None: |
| tzinfo = tzdata |
| elif isinstance(tzdata, text_type): |
| tzinfo = tz.tzstr(tzdata) |
| elif isinstance(tzdata, integer_types): |
| tzinfo = tz.tzoffset(tzname, tzdata) |
| return tzinfo |
| |
| def _build_tzaware(self, naive, res, tzinfos): |
| if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): |
| tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) |
| aware = naive.replace(tzinfo=tzinfo) |
| aware = self._assign_tzname(aware, res.tzname) |
| |
| elif res.tzname and res.tzname in time.tzname: |
| aware = naive.replace(tzinfo=tz.tzlocal()) |
| |
| # Handle ambiguous local datetime |
| aware = self._assign_tzname(aware, res.tzname) |
| |
| # This is mostly relevant for winter GMT zones parsed in the UK |
| if (aware.tzname() != res.tzname and |
| res.tzname in self.info.UTCZONE): |
| aware = aware.replace(tzinfo=tz.tzutc()) |
| |
| elif res.tzoffset == 0: |
| aware = naive.replace(tzinfo=tz.tzutc()) |
| |
| elif res.tzoffset: |
| aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) |
| |
| elif not res.tzname and not res.tzoffset: |
| # i.e. no timezone information was found. |
| aware = naive |
| |
| elif res.tzname: |
| # tz-like string was parsed but we don't know what to do |
| # with it |
| warnings.warn("tzname {tzname} identified but not understood. " |
| "Pass `tzinfos` argument in order to correctly " |
| "return a timezone-aware datetime. In a future " |
| "version, this will raise an " |
| "exception.".format(tzname=res.tzname), |
| category=UnknownTimezoneWarning) |
| aware = naive |
| |
| return aware |
| |
| def _build_naive(self, res, default): |
| repl = {} |
| for attr in ("year", "month", "day", "hour", |
| "minute", "second", "microsecond"): |
| value = getattr(res, attr) |
| if value is not None: |
| repl[attr] = value |
| |
| if 'day' not in repl: |
| # If the default day exceeds the last day of the month, fall back |
| # to the end of the month. |
| cyear = default.year if res.year is None else res.year |
| cmonth = default.month if res.month is None else res.month |
| cday = default.day if res.day is None else res.day |
| |
| if cday > monthrange(cyear, cmonth)[1]: |
| repl['day'] = monthrange(cyear, cmonth)[1] |
| |
| naive = default.replace(**repl) |
| |
| if res.weekday is not None and not res.day: |
| naive = naive + relativedelta.relativedelta(weekday=res.weekday) |
| |
| return naive |
| |
| def _assign_tzname(self, dt, tzname): |
| if dt.tzname() != tzname: |
| new_dt = tz.enfold(dt, fold=1) |
| if new_dt.tzname() == tzname: |
| return new_dt |
| |
| return dt |
| |
| def _to_decimal(self, val): |
| try: |
| decimal_value = Decimal(val) |
| # See GH 662, edge case, infinite value should not be converted via `_to_decimal` |
| if not decimal_value.is_finite(): |
| raise ValueError("Converted decimal value is infinite or NaN") |
| except Exception as e: |
| msg = "Could not convert %s to decimal" % val |
| six.raise_from(ValueError(msg), e) |
| else: |
| return decimal_value |
| |
| |
| DEFAULTPARSER = parser() |
| |
| |
| def parse(timestr, parserinfo=None, **kwargs): |
| """ |
| |
| Parse a string in one of the supported formats, using the |
| ``parserinfo`` parameters. |
| |
| :param timestr: |
| A string containing a date/time stamp. |
| |
| :param parserinfo: |
| A :class:`parserinfo` object containing parameters for the parser. |
| If ``None``, the default arguments to the :class:`parserinfo` |
| constructor are used. |
| |
| The ``**kwargs`` parameter takes the following keyword arguments: |
| |
| :param default: |
| The default datetime object, if this is a datetime object and not |
| ``None``, elements specified in ``timestr`` replace elements in the |
| default object. |
| |
| :param ignoretz: |
| If set ``True``, time zones in parsed strings are ignored and a naive |
| :class:`datetime` object is returned. |
| |
| :param tzinfos: |
| Additional time zone names / aliases which may be present in the |
| string. This argument maps time zone names (and optionally offsets |
| from those time zones) to time zones. This parameter can be a |
| dictionary with timezone aliases mapping time zone names to time |
| zones or a function taking two parameters (``tzname`` and |
| ``tzoffset``) and returning a time zone. |
| |
| The timezones to which the names are mapped can be an integer |
| offset from UTC in seconds or a :class:`tzinfo` object. |
| |
| .. doctest:: |
| :options: +NORMALIZE_WHITESPACE |
| |
| >>> from dateutil.parser import parse |
| >>> from dateutil.tz import gettz |
| >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} |
| >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) |
| datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) |
| >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) |
| datetime.datetime(2012, 1, 19, 17, 21, |
| tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) |
| |
| This parameter is ignored if ``ignoretz`` is set. |
| |
| :param dayfirst: |
| Whether to interpret the first value in an ambiguous 3-integer date |
| (e.g. 01/05/09) as the day (``True``) or month (``False``). If |
| ``yearfirst`` is set to ``True``, this distinguishes between YDM and |
| YMD. If set to ``None``, this value is retrieved from the current |
| :class:`parserinfo` object (which itself defaults to ``False``). |
| |
| :param yearfirst: |
| Whether to interpret the first value in an ambiguous 3-integer date |
| (e.g. 01/05/09) as the year. If ``True``, the first number is taken to |
| be the year, otherwise the last number is taken to be the year. If |
| this is set to ``None``, the value is retrieved from the current |
| :class:`parserinfo` object (which itself defaults to ``False``). |
| |
| :param fuzzy: |
| Whether to allow fuzzy parsing, allowing for string like "Today is |
| January 1, 2047 at 8:21:00AM". |
| |
| :param fuzzy_with_tokens: |
| If ``True``, ``fuzzy`` is automatically set to True, and the parser |
| will return a tuple where the first element is the parsed |
| :class:`datetime.datetime` datetimestamp and the second element is |
| a tuple containing the portions of the string which were ignored: |
| |
| .. doctest:: |
| |
| >>> from dateutil.parser import parse |
| >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) |
| (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) |
| |
| :return: |
| Returns a :class:`datetime.datetime` object or, if the |
| ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the |
| first element being a :class:`datetime.datetime` object, the second |
| a tuple containing the fuzzy tokens. |
| |
| :raises ValueError: |
| Raised for invalid or unknown string format, if the provided |
| :class:`tzinfo` is not in a valid format, or if an invalid date |
| would be created. |
| |
| :raises OverflowError: |
| Raised if the parsed date exceeds the largest valid C integer on |
| your system. |
| """ |
| if parserinfo: |
| return parser(parserinfo).parse(timestr, **kwargs) |
| else: |
| return DEFAULTPARSER.parse(timestr, **kwargs) |
| |
| |
| class _tzparser(object): |
| |
| class _result(_resultbase): |
| |
| __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", |
| "start", "end"] |
| |
| class _attr(_resultbase): |
| __slots__ = ["month", "week", "weekday", |
| "yday", "jyday", "day", "time"] |
| |
| def __repr__(self): |
| return self._repr("") |
| |
| def __init__(self): |
| _resultbase.__init__(self) |
| self.start = self._attr() |
| self.end = self._attr() |
| |
| def parse(self, tzstr): |
| res = self._result() |
| l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] |
| used_idxs = list() |
| try: |
| |
| len_l = len(l) |
| |
| i = 0 |
| while i < len_l: |
| # BRST+3[BRDT[+2]] |
| j = i |
| while j < len_l and not [x for x in l[j] |
| if x in "0123456789:,-+"]: |
| j += 1 |
| if j != i: |
| if not res.stdabbr: |
| offattr = "stdoffset" |
| res.stdabbr = "".join(l[i:j]) |
| else: |
| offattr = "dstoffset" |
| res.dstabbr = "".join(l[i:j]) |
| |
| for ii in range(j): |
| used_idxs.append(ii) |
| i = j |
| if (i < len_l and (l[i] in ('+', '-') or l[i][0] in |
| "0123456789")): |
| if l[i] in ('+', '-'): |
| # Yes, that's right. See the TZ variable |
| # documentation. |
| signal = (1, -1)[l[i] == '+'] |
| used_idxs.append(i) |
| i += 1 |
| else: |
| signal = -1 |
| len_li = len(l[i]) |
| if len_li == 4: |
| # -0300 |
| setattr(res, offattr, (int(l[i][:2]) * 3600 + |
| int(l[i][2:]) * 60) * signal) |
| elif i + 1 < len_l and l[i + 1] == ':': |
| # -03:00 |
| setattr(res, offattr, |
| (int(l[i]) * 3600 + |
| int(l[i + 2]) * 60) * signal) |
| used_idxs.append(i) |
| i += 2 |
| elif len_li <= 2: |
| # -[0]3 |
| setattr(res, offattr, |
| int(l[i][:2]) * 3600 * signal) |
| else: |
| return None |
| used_idxs.append(i) |
| i += 1 |
| if res.dstabbr: |
| break |
| else: |
| break |
| |
| |
| if i < len_l: |
| for j in range(i, len_l): |
| if l[j] == ';': |
| l[j] = ',' |
| |
| assert l[i] == ',' |
| |
| i += 1 |
| |
| if i >= len_l: |
| pass |
| elif (8 <= l.count(',') <= 9 and |
| not [y for x in l[i:] if x != ',' |
| for y in x if y not in "0123456789+-"]): |
| # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] |
| for x in (res.start, res.end): |
| x.month = int(l[i]) |
| used_idxs.append(i) |
| i += 2 |
| if l[i] == '-': |
| value = int(l[i + 1]) * -1 |
| used_idxs.append(i) |
| i += 1 |
| else: |
| value = int(l[i]) |
| used_idxs.append(i) |
| i += 2 |
| if value: |
| x.week = value |
| x.weekday = (int(l[i]) - 1) % 7 |
| else: |
| x.day = int(l[i]) |
| used_idxs.append(i) |
| i += 2 |
| x.time = int(l[i]) |
| used_idxs.append(i) |
| i += 2 |
| if i < len_l: |
| if l[i] in ('-', '+'): |
| signal = (-1, 1)[l[i] == "+"] |
| used_idxs.append(i) |
| i += 1 |
| else: |
| signal = 1 |
| used_idxs.append(i) |
| res.dstoffset = (res.stdoffset + int(l[i]) * signal) |
| |
| # This was a made-up format that is not in normal use |
| warn(('Parsed time zone "%s"' % tzstr) + |
| 'is in a non-standard dateutil-specific format, which ' + |
| 'is now deprecated; support for parsing this format ' + |
| 'will be removed in future versions. It is recommended ' + |
| 'that you switch to a standard format like the GNU ' + |
| 'TZ variable format.', tz.DeprecatedTzFormatWarning) |
| elif (l.count(',') == 2 and l[i:].count('/') <= 2 and |
| not [y for x in l[i:] if x not in (',', '/', 'J', 'M', |
| '.', '-', ':') |
| for y in x if y not in "0123456789"]): |
| for x in (res.start, res.end): |
| if l[i] == 'J': |
| # non-leap year day (1 based) |
| used_idxs.append(i) |
| i += 1 |
| x.jyday = int(l[i]) |
| elif l[i] == 'M': |
| # month[-.]week[-.]weekday |
| used_idxs.append(i) |
| i += 1 |
| x.month = int(l[i]) |
| used_idxs.append(i) |
| i += 1 |
| assert l[i] in ('-', '.') |
| used_idxs.append(i) |
| i += 1 |
| x.week = int(l[i]) |
| if x.week == 5: |
| x.week = -1 |
| used_idxs.append(i) |
| i += 1 |
| assert l[i] in ('-', '.') |
| used_idxs.append(i) |
| i += 1 |
| x.weekday = (int(l[i]) - 1) % 7 |
| else: |
| # year day (zero based) |
| x.yday = int(l[i]) + 1 |
| |
| used_idxs.append(i) |
| i += 1 |
| |
| if i < len_l and l[i] == '/': |
| used_idxs.append(i) |
| i += 1 |
| # start time |
| len_li = len(l[i]) |
| if len_li == 4: |
| # -0300 |
| x.time = (int(l[i][:2]) * 3600 + |
| int(l[i][2:]) * 60) |
| elif i + 1 < len_l and l[i + 1] == ':': |
| # -03:00 |
| x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 |
| used_idxs.append(i) |
| i += 2 |
| if i + 1 < len_l and l[i + 1] == ':': |
| used_idxs.append(i) |
| i += 2 |
| x.time += int(l[i]) |
| elif len_li <= 2: |
| # -[0]3 |
| x.time = (int(l[i][:2]) * 3600) |
| else: |
| return None |
| used_idxs.append(i) |
| i += 1 |
| |
| assert i == len_l or l[i] == ',' |
| |
| i += 1 |
| |
| assert i >= len_l |
| |
| except (IndexError, ValueError, AssertionError): |
| return None |
| |
| unused_idxs = set(range(len_l)).difference(used_idxs) |
| res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) |
| return res |
| |
| |
| DEFAULTTZPARSER = _tzparser() |
| |
| |
| def _parsetz(tzstr): |
| return DEFAULTTZPARSER.parse(tzstr) |
| |
| class UnknownTimezoneWarning(RuntimeWarning): |
| """Raised when the parser finds a timezone it cannot parse into a tzinfo""" |
| # vim:ts=4:sw=4:et |