blob: f8e7358aa285f6a9409cdaabf25daee0c7822d20 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: XMLUri.hpp 568078 2007-08-21 11:43:25Z amassari $
*/
#if !defined(XMLURI_HPP)
#define XMLURI_HPP
#include <xercesc/util/XMemory.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/internal/XSerializable.hpp>
#include <xercesc/framework/XMLBuffer.hpp>
XERCES_CPP_NAMESPACE_BEGIN
/*
* This class is a direct port of Java's URI class, to distinguish
* itself from the XMLURL, we use the name XMLUri instead of
* XMLURI.
*
* TODO: how to relate XMLUri and XMLURL since URL is part of URI.
*
*/
class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory
{
public:
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
/**
* Construct a new URI from a URI specification string.
*
* If the specification follows the "generic URI" syntax, (two slashes
* following the first colon), the specification will be parsed
* accordingly - setting the
* scheme,
* userinfo,
* host,
* port,
* path,
* querystring and
* fragment
* fields as necessary.
*
* If the specification does not follow the "generic URI" syntax,
* the specification is parsed into a
* scheme and
* scheme-specific part (stored as the path) only.
*
* @param uriSpec the URI specification string (cannot be null or empty)
*
* @param manager Pointer to the memory manager to be used to
* allocate objects.
*
* ctor# 2
*
*/
XMLUri(const XMLCh* const uriSpec,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
/**
* Construct a new URI from a base URI and a URI specification string.
* The URI specification string may be a relative URI.
*
* @param baseURI the base URI (cannot be null if uriSpec is null or
* empty)
*
* @param uriSpec the URI specification string (cannot be null or
* empty if base is null)
*
* @param manager Pointer to the memory manager to be used to
* allocate objects.
*
* ctor# 7 relative ctor
*
*/
XMLUri(const XMLUri* const baseURI
, const XMLCh* const uriSpec
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
/**
* Copy constructor
*/
XMLUri(const XMLUri& toCopy);
XMLUri& operator=(const XMLUri& toAssign);
virtual ~XMLUri();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
/**
* Get the URI as a string specification. See RFC 2396 Section 5.2.
*
* @return the URI string specification
*/
const XMLCh* getUriText() const;
/**
* Get the scheme for this URI.
*
* @return the scheme for this URI
*/
const XMLCh* getScheme() const;
/**
* Get the userinfo for this URI.
*
* @return the userinfo for this URI (null if not specified).
*/
const XMLCh* getUserInfo() const;
/**
* Get the host for this URI.
*
* @return the host for this URI (null if not specified).
*/
const XMLCh* getHost() const;
/**
* Get the port for this URI.
*
* @return the port for this URI (-1 if not specified).
*/
int getPort() const;
/**
* Get the registry based authority for this URI.
*
* @return the registry based authority (null if not specified).
*/
const XMLCh* getRegBasedAuthority() const;
/**
* Get the path for this URI. Note that the value returned is the path
* only and does not include the query string or fragment.
*
* @return the path for this URI.
*/
const XMLCh* getPath() const;
/**
* Get the query string for this URI.
*
* @return the query string for this URI. Null is returned if there
* was no "?" in the URI spec, empty string if there was a
* "?" but no query string following it.
*/
const XMLCh* getQueryString() const;
/**
* Get the fragment for this URI.
*
* @return the fragment for this URI. Null is returned if there
* was no "#" in the URI spec, empty string if there was a
* "#" but no fragment following it.
*/
const XMLCh* getFragment() const;
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
/**
* Set the scheme for this URI. The scheme is converted to lowercase
* before it is set.
*
* @param newScheme the scheme for this URI (cannot be null)
*
*/
void setScheme(const XMLCh* const newScheme);
/**
* Set the userinfo for this URI. If a non-null value is passed in and
* the host value is null, then an exception is thrown.
*
* @param newUserInfo the userinfo for this URI
*
*/
void setUserInfo(const XMLCh* const newUserInfo);
/**
* Set the host for this URI. If null is passed in, the userinfo
* field is also set to null and the port is set to -1.
*
* Note: This method overwrites registry based authority if it
* previously existed in this URI.
*
* @param newHost the host for this URI
*
*/
void setHost(const XMLCh* const newHost);
/**
* Set the port for this URI. -1 is used to indicate that the port is
* not specified, otherwise valid port numbers are between 0 and 65535.
* If a valid port number is passed in and the host field is null,
* an exception is thrown.
*
* @param newPort the port number for this URI
*
*/
void setPort(int newPort);
/**
* Sets the registry based authority for this URI.
*
* Note: This method overwrites server based authority
* if it previously existed in this URI.
*
* @param newRegAuth the registry based authority for this URI
*/
void setRegBasedAuthority(const XMLCh* const newRegAuth);
/**
* Set the path for this URI.
*
* If the supplied path is null, then the
* query string and fragment are set to null as well.
*
* If the supplied path includes a query string and/or fragment,
* these fields will be parsed and set as well.
*
* Note:
*
* For URIs following the "generic URI" syntax, the path
* specified should start with a slash.
*
* For URIs that do not follow the generic URI syntax, this method
* sets the scheme-specific part.
*
* @param newPath the path for this URI (may be null)
*
*/
void setPath(const XMLCh* const newPath);
/**
* Set the query string for this URI. A non-null value is valid only
* if this is an URI conforming to the generic URI syntax and
* the path value is not null.
*
* @param newQueryString the query string for this URI
*
*/
void setQueryString(const XMLCh* const newQueryString);
/**
* Set the fragment for this URI. A non-null value is valid only
* if this is a URI conforming to the generic URI syntax and
* the path value is not null.
*
* @param newFragment the fragment for this URI
*
*/
void setFragment(const XMLCh* const newFragment);
// -----------------------------------------------------------------------
// Miscellaneous methods
// -----------------------------------------------------------------------
/**
* Determine whether a given string contains only URI characters (also
* called "uric" in RFC 2396). uric consist of all reserved
* characters, unreserved characters and escaped characters.
*
* @return true if the string is comprised of uric, false otherwise
*/
static bool isURIString(const XMLCh* const uric);
/**
* Determine whether a given string is a valid URI
*/
static bool isValidURI( const XMLUri* const baseURI
, const XMLCh* const uriStr);
/**
* Determine whether a given string is a valid URI
*/
static bool isValidURI( bool haveBaseURI
, const XMLCh* const uriStr);
static void normalizeURI(const XMLCh* const systemURI,
XMLBuffer& normalizedURI);
/***
* Support for Serialization/De-serialization
***/
DECL_XSERIALIZABLE(XMLUri)
XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
private:
static const XMLCh MARK_OR_RESERVED_CHARACTERS[];
static const XMLCh RESERVED_CHARACTERS[];
static const XMLCh MARK_CHARACTERS[];
static const XMLCh SCHEME_CHARACTERS[];
static const XMLCh USERINFO_CHARACTERS[];
static const XMLCh REG_NAME_CHARACTERS[];
static const XMLCh PATH_CHARACTERS[];
//helper method for getUriText
void buildFullText();
// -----------------------------------------------------------------------
// Private helper methods
// -----------------------------------------------------------------------
/**
* Determine whether a character is a reserved character:
*
* @return true if the string contains any reserved characters
*/
static bool isReservedCharacter(const XMLCh theChar);
/**
* Determine whether a character is a path character:
*
* @return true if the character is path character
*/
static bool isPathCharacter(const XMLCh theChar);
/**
* Determine whether a char is an unreserved character.
*
* @return true if the char is unreserved, false otherwise
*/
static bool isUnreservedCharacter(const XMLCh theChar);
/**
* Determine whether a char is an reserved or unreserved character.
*
* @return true if the char is reserved or unreserved, false otherwise
*/
static bool isReservedOrUnreservedCharacter(const XMLCh theChar);
/**
* Determine whether a scheme conforms to the rules for a scheme name.
* A scheme is conformant if it starts with an alphanumeric, and
* contains only alphanumerics, '+','-' and '.'.
*
* @return true if the scheme is conformant, false otherwise
*/
static bool isConformantSchemeName(const XMLCh* const scheme);
/**
* Determine whether a userInfo conforms to the rules for a userinfo.
*
* @return true if the scheme is conformant, false otherwise
*/
static void isConformantUserInfo(const XMLCh* const userInfo
, MemoryManager* const manager);
/**
* Determines whether the components host, port, and user info
* are valid as a server authority.
*
* @return true if the given host, port, and userinfo compose
* a valid server authority
*/
static bool isValidServerBasedAuthority(const XMLCh* const host
, const int hostLen
, const int port
, const XMLCh* const userinfo
, const int userLen);
/**
* Determines whether the components host, port, and user info
* are valid as a server authority.
*
* @return true if the given host, port, and userinfo compose
* a valid server authority
*/
static bool isValidServerBasedAuthority(const XMLCh* const host
, const int port
, const XMLCh* const userinfo
, MemoryManager* const manager);
/**
* Determines whether the given string is a registry based authority.
*
* @param authority the authority component of a URI
*
* @return true if the given string is a registry based authority
*/
static bool isValidRegistryBasedAuthority(const XMLCh* const authority
, const int authLen);
/**
* Determines whether the given string is a registry based authority.
*
* @param authority the authority component of a URI
*
* @return true if the given string is a registry based authority
*/
static bool isValidRegistryBasedAuthority(const XMLCh* const authority);
/**
* Determine whether a string is syntactically capable of representing
* a valid IPv4 address, IPv6 reference or the domain name of a network host.
*
* A valid IPv4 address consists of four decimal digit groups
* separated by a '.'.
*
* See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the
* definition of IPv6 references.
*
* A hostname consists of domain labels (each of which must begin and
* end with an alphanumeric but may contain '-') separated by a '.'.
* See RFC 2396 Section 3.2.2.
*
* @return true if the string is a syntactically valid IPv4 address
* or hostname
*/
static bool isWellFormedAddress(const XMLCh* const addr
, MemoryManager* const manager);
/**
* Determines whether a string is an IPv4 address as defined by
* RFC 2373, and under the further constraint that it must be a 32-bit
* address. Though not expressed in the grammar, in order to satisfy
* the 32-bit address constraint, each segment of the address cannot
* be greater than 255 (8 bits of information).
*
* @return true if the string is a syntactically valid IPv4 address
*/
static bool isWellFormedIPv4Address(const XMLCh* const addr, const int length);
/**
* Determines whether a string is an IPv6 reference as defined
* by RFC 2732, where IPv6address is defined in RFC 2373. The
* IPv6 address is parsed according to Section 2.2 of RFC 2373,
* with the additional constraint that the address be composed of
* 128 bits of information.
*
* Note: The BNF expressed in RFC 2373 Appendix B does not
* accurately describe section 2.2, and was in fact removed from
* RFC 3513, the successor of RFC 2373.
*
* @return true if the string is a syntactically valid IPv6 reference
*/
static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int length);
/**
* Helper function for isWellFormedIPv6Reference which scans the
* hex sequences of an IPv6 address. It returns the index of the
* next character to scan in the address, or -1 if the string
* cannot match a valid IPv6 address.
*
* @param address the string to be scanned
* @param index the beginning index (inclusive)
* @param end the ending index (exclusive)
* @param counter a counter for the number of 16-bit sections read
* in the address
*
* @return the index of the next character to scan, or -1 if the
* string cannot match a valid IPv6 address
*/
static int scanHexSequence (const XMLCh* const addr, int index, int end, int& counter);
/**
* Get the indicator as to whether this URI uses the "generic URI"
* syntax.
*
* @return true if this URI uses the "generic URI" syntax, false
* otherwise
*/
bool isGenericURI();
// -----------------------------------------------------------------------
// Miscellaneous methods
// -----------------------------------------------------------------------
/**
* Initialize all fields of this URI from another URI.
*
* @param toCopy the URI to copy (cannot be null)
*/
void initialize(const XMLUri& toCopy);
/**
* Initializes this URI from a base URI and a URI specification string.
* See RFC 2396 Section 4 and Appendix B for specifications on parsing
* the URI and Section 5 for specifications on resolving relative URIs
* and relative paths.
*
* @param baseURI the base URI (may be null if uriSpec is an absolute
* URI)
*
* @param uriSpec the URI spec string which may be an absolute or
* relative URI (can only be null/empty if base
* is not null)
*
*/
void initialize(const XMLUri* const baseURI
, const XMLCh* const uriSpec);
/**
* Initialize the scheme for this URI from a URI string spec.
*
* @param uriSpec the URI specification (cannot be null)
*
*/
void initializeScheme(const XMLCh* const uriSpec);
/**
* Initialize the authority (userinfo, host and port) for this
* URI from a URI string spec.
*
* @param uriSpec the URI specification (cannot be null)
*
*/
void initializeAuthority(const XMLCh* const uriSpec);
/**
* Initialize the path for this URI from a URI string spec.
*
* @param uriSpec the URI specification (cannot be null)
*
*/
void initializePath(const XMLCh* const uriSpec);
/**
* cleanup the data variables
*
*/
void cleanUp();
static bool isConformantSchemeName(const XMLCh* const scheme,
const int schemeLen);
static bool processScheme(const XMLCh* const uriStr, int& index);
static bool processAuthority(const XMLCh* const uriStr, const int authLen);
static bool isWellFormedAddress(const XMLCh* const addr, const int addrLen);
static bool processPath(const XMLCh* const pathStr, const int pathStrLen,
const bool isSchemePresent);
// -----------------------------------------------------------------------
// Data members
//
// for all the data member, we own it,
// responsible for the creation and/or deletion for
// the memory allocated.
//
// -----------------------------------------------------------------------
int fPort;
XMLCh* fScheme;
XMLCh* fUserInfo;
XMLCh* fHost;
XMLCh* fRegAuth;
XMLCh* fPath;
XMLCh* fQueryString;
XMLCh* fFragment;
XMLCh* fURIText;
MemoryManager* fMemoryManager;
};
// ---------------------------------------------------------------------------
// XMLUri: Getter methods
// ---------------------------------------------------------------------------
inline const XMLCh* XMLUri::getScheme() const
{
return fScheme;
}
inline const XMLCh* XMLUri::getUserInfo() const
{
return fUserInfo;
}
inline const XMLCh* XMLUri::getHost() const
{
return fHost;
}
inline int XMLUri::getPort() const
{
return fPort;
}
inline const XMLCh* XMLUri::getRegBasedAuthority() const
{
return fRegAuth;
}
inline const XMLCh* XMLUri::getPath() const
{
return fPath;
}
inline const XMLCh* XMLUri::getQueryString() const
{
return fQueryString;
}
inline const XMLCh* XMLUri::getFragment() const
{
return fFragment;
}
inline const XMLCh* XMLUri::getUriText() const
{
//
// Fault it in if not already. Since this is a const method and we
// can't use mutable members due the compilers we have to support,
// we have to cast off the constness.
//
if (!fURIText)
((XMLUri*)this)->buildFullText();
return fURIText;
}
// ---------------------------------------------------------------------------
// XMLUri: Helper methods
// ---------------------------------------------------------------------------
inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar)
{
return (XMLString::isAlphaNum(theChar) ||
XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1);
}
inline bool XMLUri::isReservedCharacter(const XMLCh theChar)
{
return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1);
}
inline bool XMLUri::isPathCharacter(const XMLCh theChar)
{
return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1);
}
inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar)
{
return (XMLString::isAlphaNum(theChar) ||
XMLString::indexOf(MARK_CHARACTERS, theChar) != -1);
}
XERCES_CPP_NAMESPACE_END
#endif