blob: 28086e334b35831ecc354effa9254c2b9b45006d [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* apr_date.c: date parsing utility routines
* These routines are (hopefully) platform independent.
*
* 27 Oct 1996 Roy Fielding
* Extracted (with many modifications) from mod_proxy.c and
* tested with over 50,000 randomly chosen valid date strings
* and several hundred variations of invalid date strings.
*
*/
#include "apr.h"
#include "apr_lib.h"
#define APR_WANT_STRFUNC
#include "apr_want.h"
#if APR_HAVE_STDLIB_H
#include <stdlib.h>
#endif
#if APR_HAVE_CTYPE_H
#include <ctype.h>
#endif
#include "apr_date.h"
/*
* Compare a string to a mask
* Mask characters (arbitrary maximum is 256 characters, just in case):
* @ - uppercase letter
* $ - lowercase letter
* & - hex digit
* # - digit
* ~ - digit or space
* * - swallow remaining characters
* <x> - exact match for any other character
*/
APU_DECLARE(int) apr_date_checkmask(const char *data, const char *mask)
{
int i;
char d;
for (i = 0; i < 256; i++) {
d = data[i];
switch (mask[i]) {
case '\0':
return (d == '\0');
case '*':
return 1;
case '@':
if (!apr_isupper(d))
return 0;
break;
case '$':
if (!apr_islower(d))
return 0;
break;
case '#':
if (!apr_isdigit(d))
return 0;
break;
case '&':
if (!apr_isxdigit(d))
return 0;
break;
case '~':
if ((d != ' ') && !apr_isdigit(d))
return 0;
break;
default:
if (mask[i] != d)
return 0;
break;
}
}
return 0; /* We only get here if mask is corrupted (exceeds 256) */
}
/*
* Parses an HTTP date in one of three standard forms:
*
* Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
* Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
* Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
*
* and returns the apr_time_t number of microseconds since 1 Jan 1970 GMT,
* or APR_DATE_BAD if this would be out of range or if the date is invalid.
*
* The restricted HTTP syntax is
*
* HTTP-date = rfc1123-date | rfc850-date | asctime-date
*
* rfc1123-date = wkday "," SP date1 SP time SP "GMT"
* rfc850-date = weekday "," SP date2 SP time SP "GMT"
* asctime-date = wkday SP date3 SP time SP 4DIGIT
*
* date1 = 2DIGIT SP month SP 4DIGIT
* ; day month year (e.g., 02 Jun 1982)
* date2 = 2DIGIT "-" month "-" 2DIGIT
* ; day-month-year (e.g., 02-Jun-82)
* date3 = month SP ( 2DIGIT | ( SP 1DIGIT ))
* ; month day (e.g., Jun 2)
*
* time = 2DIGIT ":" 2DIGIT ":" 2DIGIT
* ; 00:00:00 - 23:59:59
*
* wkday = "Mon" | "Tue" | "Wed"
* | "Thu" | "Fri" | "Sat" | "Sun"
*
* weekday = "Monday" | "Tuesday" | "Wednesday"
* | "Thursday" | "Friday" | "Saturday" | "Sunday"
*
* month = "Jan" | "Feb" | "Mar" | "Apr"
* | "May" | "Jun" | "Jul" | "Aug"
* | "Sep" | "Oct" | "Nov" | "Dec"
*
* However, for the sake of robustness (and Netscapeness), we ignore the
* weekday and anything after the time field (including the timezone).
*
* This routine is intended to be very fast; 10x faster than using sscanf.
*
* Originally from Andrew Daviel <andrew@vancouver-webpages.com>, 29 Jul 96
* but many changes since then.
*
*/
APU_DECLARE(apr_time_t) apr_date_parse_http(const char *date)
{
apr_time_exp_t ds;
apr_time_t result;
int mint, mon;
const char *monstr, *timstr;
static const int months[12] =
{
('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b',
('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r',
('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n',
('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g',
('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't',
('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c'};
if (!date)
return APR_DATE_BAD;
while (*date && apr_isspace(*date)) /* Find first non-whitespace char */
++date;
if (*date == '\0')
return APR_DATE_BAD;
if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */
return APR_DATE_BAD;
++date; /* Now pointing to first char after space, which should be */
/* start of the actual date information for all 4 formats. */
if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) {
/* RFC 1123 format with two days */
ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 12;
}
else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {
/* RFC 850 format */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
}
else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) {
/* asctime format */
ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0');
if (date[4] == ' ')
ds.tm_mday = 0;
else
ds.tm_mday = (date[4] - '0') * 10;
ds.tm_mday += (date[5] - '0');
monstr = date;
timstr = date + 7;
}
else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) {
/* RFC 1123 format with one day */
ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0');
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 11;
}
else
return APR_DATE_BAD;
if (ds.tm_mday <= 0 || ds.tm_mday > 31)
return APR_DATE_BAD;
ds.tm_hour = ((timstr[0] - '0') * 10) + (timstr[1] - '0');
ds.tm_min = ((timstr[3] - '0') * 10) + (timstr[4] - '0');
ds.tm_sec = ((timstr[6] - '0') * 10) + (timstr[7] - '0');
if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61))
return APR_DATE_BAD;
mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2];
for (mon = 0; mon < 12; mon++)
if (mint == months[mon])
break;
if (mon == 12)
return APR_DATE_BAD;
if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10))
return APR_DATE_BAD;
/* February gets special check for leapyear */
if ((mon == 1) &&
((ds.tm_mday > 29) ||
((ds.tm_mday == 29)
&& ((ds.tm_year & 3)
|| (((ds.tm_year % 100) == 0)
&& (((ds.tm_year % 400) != 100)))))))
return APR_DATE_BAD;
ds.tm_mon = mon;
/* ap_mplode_time uses tm_usec and tm_gmtoff fields, but they haven't
* been set yet.
* It should be safe to just zero out these values.
* tm_usec is the number of microseconds into the second. HTTP only
* cares about second granularity.
* tm_gmtoff is the number of seconds off of GMT the time is. By
* definition all times going through this function are in GMT, so this
* is zero.
*/
ds.tm_usec = 0;
ds.tm_gmtoff = 0;
if (apr_time_exp_get(&result, &ds) != APR_SUCCESS)
return APR_DATE_BAD;
return result;
}
/*
* Parses a string resembling an RFC 822 date. This is meant to be
* leinent in its parsing of dates. Hence, this will parse a wider
* range of dates than apr_date_parse_http.
*
* The prominent mailer (or poster, if mailer is unknown) that has
* been seen in the wild is included for the unknown formats.
*
* Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
* Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
* Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
* Sun, 6 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
* Sun, 06 Nov 94 08:49:37 GMT ; RFC 822
* Sun, 6 Nov 94 08:49:37 GMT ; RFC 822
* Sun, 06 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk]
* Sun, 6 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk]
* Sun, 06 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85]
* Sun, 6 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85]
* Mon, 7 Jan 2002 07:21:22 GMT ; Unknown [Postfix]
* Sun, 06-Nov-1994 08:49:37 GMT ; RFC 850 with four digit years
*
*/
#define TIMEPARSE(ds,hr10,hr1,min10,min1,sec10,sec1) \
{ \
ds.tm_hour = ((hr10 - '0') * 10) + (hr1 - '0'); \
ds.tm_min = ((min10 - '0') * 10) + (min1 - '0'); \
ds.tm_sec = ((sec10 - '0') * 10) + (sec1 - '0'); \
}
#define TIMEPARSE_STD(ds,timstr) \
{ \
TIMEPARSE(ds, timstr[0],timstr[1], \
timstr[3],timstr[4], \
timstr[6],timstr[7]); \
}
APU_DECLARE(apr_time_t) apr_date_parse_rfc(const char *date)
{
apr_time_exp_t ds;
apr_time_t result;
int mint, mon;
const char *monstr, *timstr, *gmtstr;
static const int months[12] =
{
('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b',
('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r',
('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n',
('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g',
('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't',
('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c' };
if (!date)
return APR_DATE_BAD;
/* Not all dates have text days at the beginning. */
if (!apr_isdigit(date[0]))
{
while (*date && apr_isspace(*date)) /* Find first non-whitespace char */
++date;
if (*date == '\0')
return APR_DATE_BAD;
if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */
return APR_DATE_BAD;
++date; /* Now pointing to first char after space, which should be */ }
/* start of the actual date information for all 11 formats. */
if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) { /* RFC 1123 format */
ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 12;
gmtstr = date + 21;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {/* RFC 850 format */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
gmtstr = date + 19;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) {
/* asctime format */
ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0');
if (date[4] == ' ')
ds.tm_mday = 0;
else
ds.tm_mday = (date[4] - '0') * 10;
ds.tm_mday += (date[5] - '0');
monstr = date;
timstr = date + 7;
gmtstr = NULL;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) {
/* RFC 1123 format*/
ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0');
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 11;
gmtstr = date + 20;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "## @$$ ## ##:##:## *")) {
/* This is the old RFC 1123 date format - many many years ago, people
* used two-digit years. Oh, how foolish.
*
* Two-digit day, two-digit year version. */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
gmtstr = date + 19;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, " # @$$ ## ##:##:## *")) {
/* This is the old RFC 1123 date format - many many years ago, people
* used two-digit years. Oh, how foolish.
*
* Space + one-digit day, two-digit year version.*/
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
gmtstr = date + 19;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "# @$$ ## ##:##:## *")) {
/* This is the old RFC 1123 date format - many many years ago, people
* used two-digit years. Oh, how foolish.
*
* One-digit day, two-digit year version. */
ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 9;
gmtstr = date + 18;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "## @$$ ## ##:## *")) {
/* Loser format. This is quite bogus. */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
gmtstr = NULL;
TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0');
}
else if (apr_date_checkmask(date, "# @$$ ## ##:## *")) {
/* Loser format. This is quite bogus. */
ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 9;
gmtstr = NULL;
TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0');
}
else if (apr_date_checkmask(date, "## @$$ ## #:##:## *")) {
/* Loser format. This is quite bogus. */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 9;
gmtstr = date + 18;
TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]);
}
else if (apr_date_checkmask(date, "# @$$ ## #:##:## *")) {
/* Loser format. This is quite bogus. */
ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 8;
gmtstr = date + 17;
TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]);
}
else if (apr_date_checkmask(date, " # @$$ #### ##:##:## *")) {
/* RFC 1123 format with a space instead of a leading zero. */
ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
ds.tm_mday = (date[1] - '0');
monstr = date + 3;
timstr = date + 12;
gmtstr = date + 21;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "##-@$$-#### ##:##:## *")) {
/* RFC 1123 with dashes instead of spaces between date/month/year
* This also looks like RFC 850 with four digit years.
*/
ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 12;
gmtstr = date + 21;
TIMEPARSE_STD(ds, timstr);
}
else
return APR_DATE_BAD;
if (ds.tm_mday <= 0 || ds.tm_mday > 31)
return APR_DATE_BAD;
if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61))
return APR_DATE_BAD;
mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2];
for (mon = 0; mon < 12; mon++)
if (mint == months[mon])
break;
if (mon == 12)
return APR_DATE_BAD;
if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10))
return APR_DATE_BAD;
/* February gets special check for leapyear */
if ((mon == 1) &&
((ds.tm_mday > 29)
|| ((ds.tm_mday == 29)
&& ((ds.tm_year & 3)
|| (((ds.tm_year % 100) == 0)
&& (((ds.tm_year % 400) != 100)))))))
return APR_DATE_BAD;
ds.tm_mon = mon;
/* tm_gmtoff is the number of seconds off of GMT the time is.
*
* We only currently support: [+-]ZZZZ where Z is the offset in
* hours from GMT.
*
* If there is any confusion, tm_gmtoff will remain 0.
*/
ds.tm_gmtoff = 0;
/* Do we have a timezone ? */
if (gmtstr) {
int offset;
switch (*gmtstr) {
case '-':
offset = atoi(gmtstr+1);
ds.tm_gmtoff -= (offset / 100) * 60 * 60;
ds.tm_gmtoff -= (offset % 100) * 60;
break;
case '+':
offset = atoi(gmtstr+1);
ds.tm_gmtoff += (offset / 100) * 60 * 60;
ds.tm_gmtoff += (offset % 100) * 60;
break;
}
}
/* apr_time_exp_get uses tm_usec field, but it hasn't been set yet.
* It should be safe to just zero out this value.
* tm_usec is the number of microseconds into the second. HTTP only
* cares about second granularity.
*/
ds.tm_usec = 0;
if (apr_time_exp_gmt_get(&result, &ds) != APR_SUCCESS)
return APR_DATE_BAD;
return result;
}