| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * $Id: XMLUri.hpp 568078 2007-08-21 11:43:25Z amassari $ |
| */ |
| |
| #if !defined(XMLURI_HPP) |
| #define XMLURI_HPP |
| |
| #include <xercesc/util/XMemory.hpp> |
| #include <xercesc/util/XMLString.hpp> |
| |
| #include <xercesc/internal/XSerializable.hpp> |
| #include <xercesc/framework/XMLBuffer.hpp> |
| |
| XERCES_CPP_NAMESPACE_BEGIN |
| |
| /* |
| * This class is a direct port of Java's URI class, to distinguish |
| * itself from the XMLURL, we use the name XMLUri instead of |
| * XMLURI. |
| * |
| * TODO: how to relate XMLUri and XMLURL since URL is part of URI. |
| * |
| */ |
| |
| class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory |
| { |
| public: |
| |
| // ----------------------------------------------------------------------- |
| // Constructors and Destructor |
| // ----------------------------------------------------------------------- |
| |
| /** |
| * Construct a new URI from a URI specification string. |
| * |
| * If the specification follows the "generic URI" syntax, (two slashes |
| * following the first colon), the specification will be parsed |
| * accordingly - setting the |
| * scheme, |
| * userinfo, |
| * host, |
| * port, |
| * path, |
| * querystring and |
| * fragment |
| * fields as necessary. |
| * |
| * If the specification does not follow the "generic URI" syntax, |
| * the specification is parsed into a |
| * scheme and |
| * scheme-specific part (stored as the path) only. |
| * |
| * @param uriSpec the URI specification string (cannot be null or empty) |
| * |
| * @param manager Pointer to the memory manager to be used to |
| * allocate objects. |
| * |
| * ctor# 2 |
| * |
| */ |
| XMLUri(const XMLCh* const uriSpec, |
| MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); |
| |
| /** |
| * Construct a new URI from a base URI and a URI specification string. |
| * The URI specification string may be a relative URI. |
| * |
| * @param baseURI the base URI (cannot be null if uriSpec is null or |
| * empty) |
| * |
| * @param uriSpec the URI specification string (cannot be null or |
| * empty if base is null) |
| * |
| * @param manager Pointer to the memory manager to be used to |
| * allocate objects. |
| * |
| * ctor# 7 relative ctor |
| * |
| */ |
| XMLUri(const XMLUri* const baseURI |
| , const XMLCh* const uriSpec |
| , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); |
| |
| /** |
| * Copy constructor |
| */ |
| XMLUri(const XMLUri& toCopy); |
| XMLUri& operator=(const XMLUri& toAssign); |
| |
| virtual ~XMLUri(); |
| |
| // ----------------------------------------------------------------------- |
| // Getter methods |
| // ----------------------------------------------------------------------- |
| /** |
| * Get the URI as a string specification. See RFC 2396 Section 5.2. |
| * |
| * @return the URI string specification |
| */ |
| const XMLCh* getUriText() const; |
| |
| /** |
| * Get the scheme for this URI. |
| * |
| * @return the scheme for this URI |
| */ |
| const XMLCh* getScheme() const; |
| |
| /** |
| * Get the userinfo for this URI. |
| * |
| * @return the userinfo for this URI (null if not specified). |
| */ |
| const XMLCh* getUserInfo() const; |
| |
| |
| /** |
| * Get the host for this URI. |
| * |
| * @return the host for this URI (null if not specified). |
| */ |
| const XMLCh* getHost() const; |
| |
| /** |
| * Get the port for this URI. |
| * |
| * @return the port for this URI (-1 if not specified). |
| */ |
| int getPort() const; |
| |
| /** |
| * Get the registry based authority for this URI. |
| * |
| * @return the registry based authority (null if not specified). |
| */ |
| const XMLCh* getRegBasedAuthority() const; |
| |
| /** |
| * Get the path for this URI. Note that the value returned is the path |
| * only and does not include the query string or fragment. |
| * |
| * @return the path for this URI. |
| */ |
| const XMLCh* getPath() const; |
| |
| /** |
| * Get the query string for this URI. |
| * |
| * @return the query string for this URI. Null is returned if there |
| * was no "?" in the URI spec, empty string if there was a |
| * "?" but no query string following it. |
| */ |
| const XMLCh* getQueryString() const; |
| |
| /** |
| * Get the fragment for this URI. |
| * |
| * @return the fragment for this URI. Null is returned if there |
| * was no "#" in the URI spec, empty string if there was a |
| * "#" but no fragment following it. |
| */ |
| const XMLCh* getFragment() const; |
| |
| // ----------------------------------------------------------------------- |
| // Setter methods |
| // ----------------------------------------------------------------------- |
| |
| /** |
| * Set the scheme for this URI. The scheme is converted to lowercase |
| * before it is set. |
| * |
| * @param newScheme the scheme for this URI (cannot be null) |
| * |
| */ |
| void setScheme(const XMLCh* const newScheme); |
| |
| /** |
| * Set the userinfo for this URI. If a non-null value is passed in and |
| * the host value is null, then an exception is thrown. |
| * |
| * @param newUserInfo the userinfo for this URI |
| * |
| */ |
| void setUserInfo(const XMLCh* const newUserInfo); |
| |
| /** |
| * Set the host for this URI. If null is passed in, the userinfo |
| * field is also set to null and the port is set to -1. |
| * |
| * Note: This method overwrites registry based authority if it |
| * previously existed in this URI. |
| * |
| * @param newHost the host for this URI |
| * |
| */ |
| void setHost(const XMLCh* const newHost); |
| |
| /** |
| * Set the port for this URI. -1 is used to indicate that the port is |
| * not specified, otherwise valid port numbers are between 0 and 65535. |
| * If a valid port number is passed in and the host field is null, |
| * an exception is thrown. |
| * |
| * @param newPort the port number for this URI |
| * |
| */ |
| void setPort(int newPort); |
| |
| /** |
| * Sets the registry based authority for this URI. |
| * |
| * Note: This method overwrites server based authority |
| * if it previously existed in this URI. |
| * |
| * @param newRegAuth the registry based authority for this URI |
| */ |
| void setRegBasedAuthority(const XMLCh* const newRegAuth); |
| |
| /** |
| * Set the path for this URI. |
| * |
| * If the supplied path is null, then the |
| * query string and fragment are set to null as well. |
| * |
| * If the supplied path includes a query string and/or fragment, |
| * these fields will be parsed and set as well. |
| * |
| * Note: |
| * |
| * For URIs following the "generic URI" syntax, the path |
| * specified should start with a slash. |
| * |
| * For URIs that do not follow the generic URI syntax, this method |
| * sets the scheme-specific part. |
| * |
| * @param newPath the path for this URI (may be null) |
| * |
| */ |
| void setPath(const XMLCh* const newPath); |
| |
| /** |
| * Set the query string for this URI. A non-null value is valid only |
| * if this is an URI conforming to the generic URI syntax and |
| * the path value is not null. |
| * |
| * @param newQueryString the query string for this URI |
| * |
| */ |
| void setQueryString(const XMLCh* const newQueryString); |
| |
| /** |
| * Set the fragment for this URI. A non-null value is valid only |
| * if this is a URI conforming to the generic URI syntax and |
| * the path value is not null. |
| * |
| * @param newFragment the fragment for this URI |
| * |
| */ |
| void setFragment(const XMLCh* const newFragment); |
| |
| // ----------------------------------------------------------------------- |
| // Miscellaneous methods |
| // ----------------------------------------------------------------------- |
| |
| /** |
| * Determine whether a given string contains only URI characters (also |
| * called "uric" in RFC 2396). uric consist of all reserved |
| * characters, unreserved characters and escaped characters. |
| * |
| * @return true if the string is comprised of uric, false otherwise |
| */ |
| static bool isURIString(const XMLCh* const uric); |
| |
| /** |
| * Determine whether a given string is a valid URI |
| */ |
| static bool isValidURI( const XMLUri* const baseURI |
| , const XMLCh* const uriStr); |
| /** |
| * Determine whether a given string is a valid URI |
| */ |
| static bool isValidURI( bool haveBaseURI |
| , const XMLCh* const uriStr); |
| |
| |
| static void normalizeURI(const XMLCh* const systemURI, |
| XMLBuffer& normalizedURI); |
| |
| /*** |
| * Support for Serialization/De-serialization |
| ***/ |
| DECL_XSERIALIZABLE(XMLUri) |
| |
| XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); |
| |
| private: |
| |
| static const XMLCh MARK_OR_RESERVED_CHARACTERS[]; |
| static const XMLCh RESERVED_CHARACTERS[]; |
| static const XMLCh MARK_CHARACTERS[]; |
| static const XMLCh SCHEME_CHARACTERS[]; |
| static const XMLCh USERINFO_CHARACTERS[]; |
| static const XMLCh REG_NAME_CHARACTERS[]; |
| static const XMLCh PATH_CHARACTERS[]; |
| |
| //helper method for getUriText |
| void buildFullText(); |
| |
| // ----------------------------------------------------------------------- |
| // Private helper methods |
| // ----------------------------------------------------------------------- |
| |
| /** |
| * Determine whether a character is a reserved character: |
| * |
| * @return true if the string contains any reserved characters |
| */ |
| static bool isReservedCharacter(const XMLCh theChar); |
| |
| /** |
| * Determine whether a character is a path character: |
| * |
| * @return true if the character is path character |
| */ |
| static bool isPathCharacter(const XMLCh theChar); |
| |
| /** |
| * Determine whether a char is an unreserved character. |
| * |
| * @return true if the char is unreserved, false otherwise |
| */ |
| static bool isUnreservedCharacter(const XMLCh theChar); |
| |
| /** |
| * Determine whether a char is an reserved or unreserved character. |
| * |
| * @return true if the char is reserved or unreserved, false otherwise |
| */ |
| static bool isReservedOrUnreservedCharacter(const XMLCh theChar); |
| |
| /** |
| * Determine whether a scheme conforms to the rules for a scheme name. |
| * A scheme is conformant if it starts with an alphanumeric, and |
| * contains only alphanumerics, '+','-' and '.'. |
| * |
| * @return true if the scheme is conformant, false otherwise |
| */ |
| static bool isConformantSchemeName(const XMLCh* const scheme); |
| |
| /** |
| * Determine whether a userInfo conforms to the rules for a userinfo. |
| * |
| * @return true if the scheme is conformant, false otherwise |
| */ |
| static void isConformantUserInfo(const XMLCh* const userInfo |
| , MemoryManager* const manager); |
| |
| /** |
| * Determines whether the components host, port, and user info |
| * are valid as a server authority. |
| * |
| * @return true if the given host, port, and userinfo compose |
| * a valid server authority |
| */ |
| static bool isValidServerBasedAuthority(const XMLCh* const host |
| , const int hostLen |
| , const int port |
| , const XMLCh* const userinfo |
| , const int userLen); |
| |
| /** |
| * Determines whether the components host, port, and user info |
| * are valid as a server authority. |
| * |
| * @return true if the given host, port, and userinfo compose |
| * a valid server authority |
| */ |
| static bool isValidServerBasedAuthority(const XMLCh* const host |
| , const int port |
| , const XMLCh* const userinfo |
| , MemoryManager* const manager); |
| |
| /** |
| * Determines whether the given string is a registry based authority. |
| * |
| * @param authority the authority component of a URI |
| * |
| * @return true if the given string is a registry based authority |
| */ |
| static bool isValidRegistryBasedAuthority(const XMLCh* const authority |
| , const int authLen); |
| |
| /** |
| * Determines whether the given string is a registry based authority. |
| * |
| * @param authority the authority component of a URI |
| * |
| * @return true if the given string is a registry based authority |
| */ |
| static bool isValidRegistryBasedAuthority(const XMLCh* const authority); |
| |
| /** |
| * Determine whether a string is syntactically capable of representing |
| * a valid IPv4 address, IPv6 reference or the domain name of a network host. |
| * |
| * A valid IPv4 address consists of four decimal digit groups |
| * separated by a '.'. |
| * |
| * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the |
| * definition of IPv6 references. |
| * |
| * A hostname consists of domain labels (each of which must begin and |
| * end with an alphanumeric but may contain '-') separated by a '.'. |
| * See RFC 2396 Section 3.2.2. |
| * |
| * @return true if the string is a syntactically valid IPv4 address |
| * or hostname |
| */ |
| static bool isWellFormedAddress(const XMLCh* const addr |
| , MemoryManager* const manager); |
| |
| /** |
| * Determines whether a string is an IPv4 address as defined by |
| * RFC 2373, and under the further constraint that it must be a 32-bit |
| * address. Though not expressed in the grammar, in order to satisfy |
| * the 32-bit address constraint, each segment of the address cannot |
| * be greater than 255 (8 bits of information). |
| * |
| * @return true if the string is a syntactically valid IPv4 address |
| */ |
| static bool isWellFormedIPv4Address(const XMLCh* const addr, const int length); |
| |
| /** |
| * Determines whether a string is an IPv6 reference as defined |
| * by RFC 2732, where IPv6address is defined in RFC 2373. The |
| * IPv6 address is parsed according to Section 2.2 of RFC 2373, |
| * with the additional constraint that the address be composed of |
| * 128 bits of information. |
| * |
| * Note: The BNF expressed in RFC 2373 Appendix B does not |
| * accurately describe section 2.2, and was in fact removed from |
| * RFC 3513, the successor of RFC 2373. |
| * |
| * @return true if the string is a syntactically valid IPv6 reference |
| */ |
| static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int length); |
| |
| /** |
| * Helper function for isWellFormedIPv6Reference which scans the |
| * hex sequences of an IPv6 address. It returns the index of the |
| * next character to scan in the address, or -1 if the string |
| * cannot match a valid IPv6 address. |
| * |
| * @param address the string to be scanned |
| * @param index the beginning index (inclusive) |
| * @param end the ending index (exclusive) |
| * @param counter a counter for the number of 16-bit sections read |
| * in the address |
| * |
| * @return the index of the next character to scan, or -1 if the |
| * string cannot match a valid IPv6 address |
| */ |
| static int scanHexSequence (const XMLCh* const addr, int index, int end, int& counter); |
| |
| /** |
| * Get the indicator as to whether this URI uses the "generic URI" |
| * syntax. |
| * |
| * @return true if this URI uses the "generic URI" syntax, false |
| * otherwise |
| */ |
| bool isGenericURI(); |
| |
| // ----------------------------------------------------------------------- |
| // Miscellaneous methods |
| // ----------------------------------------------------------------------- |
| |
| /** |
| * Initialize all fields of this URI from another URI. |
| * |
| * @param toCopy the URI to copy (cannot be null) |
| */ |
| void initialize(const XMLUri& toCopy); |
| |
| /** |
| * Initializes this URI from a base URI and a URI specification string. |
| * See RFC 2396 Section 4 and Appendix B for specifications on parsing |
| * the URI and Section 5 for specifications on resolving relative URIs |
| * and relative paths. |
| * |
| * @param baseURI the base URI (may be null if uriSpec is an absolute |
| * URI) |
| * |
| * @param uriSpec the URI spec string which may be an absolute or |
| * relative URI (can only be null/empty if base |
| * is not null) |
| * |
| */ |
| void initialize(const XMLUri* const baseURI |
| , const XMLCh* const uriSpec); |
| |
| /** |
| * Initialize the scheme for this URI from a URI string spec. |
| * |
| * @param uriSpec the URI specification (cannot be null) |
| * |
| */ |
| void initializeScheme(const XMLCh* const uriSpec); |
| |
| /** |
| * Initialize the authority (userinfo, host and port) for this |
| * URI from a URI string spec. |
| * |
| * @param uriSpec the URI specification (cannot be null) |
| * |
| */ |
| void initializeAuthority(const XMLCh* const uriSpec); |
| |
| /** |
| * Initialize the path for this URI from a URI string spec. |
| * |
| * @param uriSpec the URI specification (cannot be null) |
| * |
| */ |
| void initializePath(const XMLCh* const uriSpec); |
| |
| /** |
| * cleanup the data variables |
| * |
| */ |
| void cleanUp(); |
| |
| static bool isConformantSchemeName(const XMLCh* const scheme, |
| const int schemeLen); |
| static bool processScheme(const XMLCh* const uriStr, int& index); |
| static bool processAuthority(const XMLCh* const uriStr, const int authLen); |
| static bool isWellFormedAddress(const XMLCh* const addr, const int addrLen); |
| static bool processPath(const XMLCh* const pathStr, const int pathStrLen, |
| const bool isSchemePresent); |
| |
| // ----------------------------------------------------------------------- |
| // Data members |
| // |
| // for all the data member, we own it, |
| // responsible for the creation and/or deletion for |
| // the memory allocated. |
| // |
| // ----------------------------------------------------------------------- |
| int fPort; |
| XMLCh* fScheme; |
| XMLCh* fUserInfo; |
| XMLCh* fHost; |
| XMLCh* fRegAuth; |
| XMLCh* fPath; |
| XMLCh* fQueryString; |
| XMLCh* fFragment; |
| XMLCh* fURIText; |
| MemoryManager* fMemoryManager; |
| }; |
| |
| // --------------------------------------------------------------------------- |
| // XMLUri: Getter methods |
| // --------------------------------------------------------------------------- |
| inline const XMLCh* XMLUri::getScheme() const |
| { |
| return fScheme; |
| } |
| |
| inline const XMLCh* XMLUri::getUserInfo() const |
| { |
| return fUserInfo; |
| } |
| |
| inline const XMLCh* XMLUri::getHost() const |
| { |
| return fHost; |
| } |
| |
| inline int XMLUri::getPort() const |
| { |
| return fPort; |
| } |
| |
| inline const XMLCh* XMLUri::getRegBasedAuthority() const |
| { |
| return fRegAuth; |
| } |
| |
| inline const XMLCh* XMLUri::getPath() const |
| { |
| return fPath; |
| } |
| |
| inline const XMLCh* XMLUri::getQueryString() const |
| { |
| return fQueryString; |
| } |
| |
| inline const XMLCh* XMLUri::getFragment() const |
| { |
| return fFragment; |
| } |
| |
| inline const XMLCh* XMLUri::getUriText() const |
| { |
| // |
| // Fault it in if not already. Since this is a const method and we |
| // can't use mutable members due the compilers we have to support, |
| // we have to cast off the constness. |
| // |
| if (!fURIText) |
| ((XMLUri*)this)->buildFullText(); |
| |
| return fURIText; |
| } |
| |
| // --------------------------------------------------------------------------- |
| // XMLUri: Helper methods |
| // --------------------------------------------------------------------------- |
| inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar) |
| { |
| return (XMLString::isAlphaNum(theChar) || |
| XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1); |
| } |
| |
| inline bool XMLUri::isReservedCharacter(const XMLCh theChar) |
| { |
| return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1); |
| } |
| |
| inline bool XMLUri::isPathCharacter(const XMLCh theChar) |
| { |
| return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1); |
| } |
| |
| inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar) |
| { |
| return (XMLString::isAlphaNum(theChar) || |
| XMLString::indexOf(MARK_CHARACTERS, theChar) != -1); |
| } |
| |
| XERCES_CPP_NAMESPACE_END |
| |
| #endif |