blob: 6a1f2e5930339a304a5677546c507849c03742a5 [file] [log] [blame]
/*
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
FILE_LICENCE ( GPL2_OR_LATER );
/** @file
*
* Uniform Resource Identifiers
*
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <libgen.h>
#include <ctype.h>
#include <gpxe/vsprintf.h>
#include <gpxe/uri.h>
/**
* Dump URI for debugging
*
* @v uri URI
*/
static void dump_uri ( struct uri *uri ) {
if ( ! uri )
return;
if ( uri->scheme )
DBG ( " scheme \"%s\"", uri->scheme );
if ( uri->opaque )
DBG ( " opaque \"%s\"", uri->opaque );
if ( uri->user )
DBG ( " user \"%s\"", uri->user );
if ( uri->password )
DBG ( " password \"%s\"", uri->password );
if ( uri->host )
DBG ( " host \"%s\"", uri->host );
if ( uri->port )
DBG ( " port \"%s\"", uri->port );
if ( uri->path )
DBG ( " path \"%s\"", uri->path );
if ( uri->query )
DBG ( " query \"%s\"", uri->query );
if ( uri->fragment )
DBG ( " fragment \"%s\"", uri->fragment );
}
/**
* Parse URI
*
* @v uri_string URI as a string
* @ret uri URI
*
* Splits a URI into its component parts. The return URI structure is
* dynamically allocated and must eventually be freed by calling
* uri_put().
*/
struct uri * parse_uri ( const char *uri_string ) {
struct uri *uri;
char *raw;
char *tmp;
char *path = NULL;
char *authority = NULL;
int i;
size_t raw_len;
/* Allocate space for URI struct and a copy of the string */
raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
uri = zalloc ( sizeof ( *uri ) + raw_len );
if ( ! uri )
return NULL;
raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
/* Copy in the raw string */
memcpy ( raw, uri_string, raw_len );
/* Start by chopping off the fragment, if it exists */
if ( ( tmp = strchr ( raw, '#' ) ) ) {
*(tmp++) = '\0';
uri->fragment = tmp;
}
/* Identify absolute/relative URI. We ignore schemes that are
* apparently only a single character long, since otherwise we
* misinterpret a DOS-style path name ("C:\path\to\file") as a
* URI with scheme="C",opaque="\path\to\file".
*/
if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
/* Absolute URI: identify hierarchical/opaque */
uri->scheme = raw;
*(tmp++) = '\0';
if ( *tmp == '/' ) {
/* Absolute URI with hierarchical part */
path = tmp;
} else {
/* Absolute URI with opaque part */
uri->opaque = tmp;
}
} else {
/* Relative URI */
path = raw;
}
/* If we don't have a path (i.e. we have an absolute URI with
* an opaque portion, we're already finished processing
*/
if ( ! path )
goto done;
/* Chop off the query, if it exists */
if ( ( tmp = strchr ( path, '?' ) ) ) {
*(tmp++) = '\0';
uri->query = tmp;
}
/* Identify net/absolute/relative path */
if ( strncmp ( path, "//", 2 ) == 0 ) {
/* Net path. If this is terminated by the first '/'
* of an absolute path, then we have no space for a
* terminator after the authority field, so shuffle
* the authority down by one byte, overwriting one of
* the two slashes.
*/
authority = ( path + 2 );
if ( ( tmp = strchr ( authority, '/' ) ) ) {
/* Shuffle down */
uri->path = tmp;
memmove ( ( authority - 1 ), authority,
( tmp - authority ) );
authority--;
*(--tmp) = '\0';
}
} else {
/* Absolute/relative path */
uri->path = path;
}
/* Split authority into user[:password] and host[:port] portions */
if ( ( tmp = strchr ( authority, '@' ) ) ) {
/* Has user[:password] */
*(tmp++) = '\0';
uri->host = tmp;
uri->user = authority;
if ( ( tmp = strchr ( authority, ':' ) ) ) {
/* Has password */
*(tmp++) = '\0';
uri->password = tmp;
}
} else {
/* No user:password */
uri->host = authority;
}
/* Split host into host[:port] */
if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
*(tmp++) = '\0';
uri->port = tmp;
}
/* Decode fields that should be decoded */
for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
const char *field = uri_get_field ( uri, i );
if ( field && ( URI_ENCODED & ( 1 << i ) ) )
uri_decode ( field, ( char * ) field,
strlen ( field ) + 1 /* NUL */ );
}
done:
DBG ( "URI \"%s\" split into", uri_string );
dump_uri ( uri );
DBG ( "\n" );
return uri;
}
/**
* Get port from URI
*
* @v uri URI, or NULL
* @v default_port Default port to use if none specified in URI
* @ret port Port
*/
unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
if ( ( ! uri ) || ( ! uri->port ) )
return default_port;
return ( strtoul ( uri->port, NULL, 0 ) );
}
/**
* Unparse URI
*
* @v buf Buffer to fill with URI string
* @v size Size of buffer
* @v uri URI to write into buffer, or NULL
* @v fields Bitmask of fields to include in URI string, or URI_ALL
* @ret len Length of URI string
*/
int unparse_uri ( char *buf, size_t size, struct uri *uri,
unsigned int fields ) {
/* List of characters that typically go before certain fields */
static char separators[] = { /* scheme */ 0, /* opaque */ ':',
/* user */ 0, /* password */ ':',
/* host */ '@', /* port */ ':',
/* path */ 0, /* query */ '?',
/* fragment */ '#' };
int used = 0;
int i;
DBG ( "URI unparsing" );
dump_uri ( uri );
DBG ( "\n" );
/* Ensure buffer is NUL-terminated */
if ( size )
buf[0] = '\0';
/* Special-case NULL URI */
if ( ! uri )
return 0;
/* Iterate through requested fields */
for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
const char *field = uri_get_field ( uri, i );
char sep = separators[i];
/* Ensure `fields' only contains bits for fields that exist */
if ( ! field )
fields &= ~( 1 << i );
/* Store this field if we were asked to */
if ( fields & ( 1 << i ) ) {
/* Print :// if we're non-opaque and had a scheme */
if ( ( fields & URI_SCHEME_BIT ) &&
( i > URI_OPAQUE ) ) {
used += ssnprintf ( buf + used, size - used,
"://" );
/* Only print :// once */
fields &= ~URI_SCHEME_BIT;
}
/* Only print separator if an earlier field exists */
if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
used += ssnprintf ( buf + used, size - used,
"%c", sep );
/* Print contents of field, possibly encoded */
if ( URI_ENCODED & ( 1 << i ) )
used += uri_encode ( field, buf + used,
size - used, i );
else
used += ssnprintf ( buf + used, size - used,
"%s", field );
}
}
return used;
}
/**
* Duplicate URI
*
* @v uri URI
* @ret uri Duplicate URI
*
* Creates a modifiable copy of a URI.
*/
struct uri * uri_dup ( struct uri *uri ) {
size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
char buf[len];
unparse_uri ( buf, len, uri, URI_ALL );
return parse_uri ( buf );
}
/**
* Resolve base+relative path
*
* @v base_uri Base path
* @v relative_uri Relative path
* @ret resolved_uri Resolved path
*
* Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
* path (e.g. "initrd.gz") and produces a new path
* (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory
* portion of the base path will automatically be stripped; this
* matches the semantics used when resolving the path component of
* URIs.
*/
char * resolve_path ( const char *base_path,
const char *relative_path ) {
size_t base_len = ( strlen ( base_path ) + 1 );
char base_path_copy[base_len];
char *base_tmp = base_path_copy;
char *resolved;
/* If relative path is absolute, just re-use it */
if ( relative_path[0] == '/' )
return strdup ( relative_path );
/* Create modifiable copy of path for dirname() */
memcpy ( base_tmp, base_path, base_len );
base_tmp = dirname ( base_tmp );
/* Process "./" and "../" elements */
while ( *relative_path == '.' ) {
relative_path++;
if ( *relative_path == 0 ) {
/* Do nothing */
} else if ( *relative_path == '/' ) {
relative_path++;
} else if ( *relative_path == '.' ) {
relative_path++;
if ( *relative_path == 0 ) {
base_tmp = dirname ( base_tmp );
} else if ( *relative_path == '/' ) {
base_tmp = dirname ( base_tmp );
relative_path++;
} else {
relative_path -= 2;
break;
}
} else {
relative_path--;
break;
}
}
/* Create and return new path */
if ( asprintf ( &resolved, "%s%s%s", base_tmp,
( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
"" : "/" ), relative_path ) < 0 )
return NULL;
return resolved;
}
/**
* Resolve base+relative URI
*
* @v base_uri Base URI, or NULL
* @v relative_uri Relative URI
* @ret resolved_uri Resolved URI
*
* Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
* relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
* (e.g. "http://etherboot.org/initrds/initrd.gz").
*/
struct uri * resolve_uri ( struct uri *base_uri,
struct uri *relative_uri ) {
struct uri tmp_uri;
char *tmp_path = NULL;
struct uri *new_uri;
/* If relative URI is absolute, just re-use it */
if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
return uri_get ( relative_uri );
/* Mangle URI */
memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
if ( relative_uri->path ) {
tmp_path = resolve_path ( ( base_uri->path ?
base_uri->path : "/" ),
relative_uri->path );
tmp_uri.path = tmp_path;
tmp_uri.query = relative_uri->query;
tmp_uri.fragment = relative_uri->fragment;
} else if ( relative_uri->query ) {
tmp_uri.query = relative_uri->query;
tmp_uri.fragment = relative_uri->fragment;
} else if ( relative_uri->fragment ) {
tmp_uri.fragment = relative_uri->fragment;
}
/* Create demangled URI */
new_uri = uri_dup ( &tmp_uri );
free ( tmp_path );
return new_uri;
}
/**
* Test for unreserved URI characters
*
* @v c Character to test
* @v field Field of URI in which character lies
* @ret is_unreserved Character is an unreserved character
*/
static int is_unreserved_uri_char ( int c, int field ) {
/* According to RFC3986, the unreserved character set is
*
* A-Z a-z 0-9 - _ . ~
*
* but we also pass & ; = in queries, / in paths,
* and everything in opaques
*/
int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
( c == '-' ) || ( c == '_' ) ||
( c == '.' ) || ( c == '~' ) );
if ( field == URI_QUERY )
ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' );
if ( field == URI_PATH )
ok = ok || ( c == '/' );
if ( field == URI_OPAQUE )
ok = 1;
return ok;
}
/**
* URI-encode string
*
* @v raw_string String to be URI-encoded
* @v buf Buffer to contain encoded string
* @v len Length of buffer
* @v field Field of URI in which string lies
* @ret len Length of encoded string (excluding NUL)
*/
size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
int field ) {
ssize_t remaining = len;
size_t used;
unsigned char c;
if ( len > 0 )
buf[0] = '\0';
while ( ( c = *(raw_string++) ) ) {
if ( is_unreserved_uri_char ( c, field ) ) {
used = ssnprintf ( buf, remaining, "%c", c );
} else {
used = ssnprintf ( buf, remaining, "%%%02X", c );
}
buf += used;
remaining -= used;
}
return ( len - remaining );
}
/**
* Decode URI-encoded string
*
* @v encoded_string URI-encoded string
* @v buf Buffer to contain decoded string
* @v len Length of buffer
* @ret len Length of decoded string (excluding NUL)
*
* This function may be used in-place, with @a buf the same as
* @a encoded_string.
*/
size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len ) {
ssize_t remaining;
char hexbuf[3];
char *hexbuf_end;
unsigned char c;
for ( remaining = len; *encoded_string; remaining-- ) {
if ( *encoded_string == '%' ) {
encoded_string++;
snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
encoded_string );
c = strtoul ( hexbuf, &hexbuf_end, 16 );
encoded_string += ( hexbuf_end - hexbuf );
} else {
c = *(encoded_string++);
}
if ( remaining > 1 )
*buf++ = c;
}
if ( len )
*buf = 0;
return ( len - remaining );
}