| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * $Id: XMLChar.hpp 568078 2007-08-21 11:43:25Z amassari $ |
| */ |
| |
| #if !defined(XMLCHAR_HPP) |
| #define XMLCHAR_HPP |
| |
| #include <xercesc/util/XMLUniDefs.hpp> |
| |
| XERCES_CPP_NAMESPACE_BEGIN |
| |
| // --------------------------------------------------------------------------- |
| // This file defines Char and utility that conforms to XML 1.0 and XML 1.1 |
| // --------------------------------------------------------------------------- |
| // Masks for the fgCharCharsTable1_0 array |
| const XMLByte gNCNameCharMask = 0x1; |
| const XMLByte gFirstNameCharMask = 0x2; |
| const XMLByte gNameCharMask = 0x4; |
| const XMLByte gPlainContentCharMask = 0x8; |
| const XMLByte gSpecialStartTagCharMask = 0x10; |
| const XMLByte gControlCharMask = 0x20; |
| const XMLByte gXMLCharMask = 0x40; |
| const XMLByte gWhitespaceCharMask = 0x80; |
| |
| // --------------------------------------------------------------------------- |
| // This class is for XML 1.0 |
| // --------------------------------------------------------------------------- |
| class XMLUTIL_EXPORT XMLChar1_0 |
| { |
| public: |
| // ----------------------------------------------------------------------- |
| // Public, static methods, check the string |
| // ----------------------------------------------------------------------- |
| static bool isAllSpaces |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool containsWhiteSpace |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool isValidNmtoken |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool isValidName |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool isValidName |
| ( |
| const XMLCh* const toCheck |
| ); |
| |
| static bool isValidNCName |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool isValidQName |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| // ----------------------------------------------------------------------- |
| // Public, static methods, check the XMLCh |
| // surrogate pair is assumed if second parameter is not null |
| // ----------------------------------------------------------------------- |
| static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isWhitespace(const XMLCh toCheck); |
| static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2); |
| static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| |
| static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| |
| // ----------------------------------------------------------------------- |
| // Special Non-conformant Public, static methods |
| // ----------------------------------------------------------------------- |
| /** |
| * Return true if NEL (0x85) and LSEP (0x2028) to be treated as white space char. |
| */ |
| static bool isNELRecognized(); |
| |
| /** |
| * Method to enable NEL (0x85) and LSEP (0x2028) to be treated as white space char. |
| */ |
| static void enableNELWS(); |
| |
| private: |
| // ----------------------------------------------------------------------- |
| // Unimplemented constructors and operators |
| // ----------------------------------------------------------------------- |
| XMLChar1_0(); |
| |
| // ----------------------------------------------------------------------- |
| // Static data members |
| // |
| // fgCharCharsTable1_0 |
| // The character characteristics table. Bits in each byte, represent |
| // the characteristics of each character. It is generated via some |
| // code and then hard coded into the cpp file for speed. |
| // |
| // fNEL |
| // Flag to respresents whether NEL and LSEP newline recognition is enabled |
| // or disabled |
| // ----------------------------------------------------------------------- |
| static XMLByte fgCharCharsTable1_0[0x10000]; |
| static bool enableNEL; |
| |
| friend class XMLReader; |
| }; |
| |
| |
| // --------------------------------------------------------------------------- |
| // XMLReader: Public, static methods |
| // --------------------------------------------------------------------------- |
| inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| // An XML letter is a FirstNameChar minus ':' and '_'. |
| if (!toCheck2) { |
| return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0) |
| && (toCheck != chColon) && (toCheck != chUnderscore)); |
| } |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0); |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) { |
| return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon)); |
| } |
| |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0); |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_0[toCheck] & gNCNameCharMask) != 0); |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0); |
| else { |
| if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF)) |
| if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF)) |
| return true; |
| } |
| return false; |
| } |
| |
| |
| inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0); |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0); |
| else { |
| if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF)) |
| if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF)) |
| return true; |
| } |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck) |
| { |
| return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0); |
| } |
| |
| inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0); |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0); |
| return false; |
| } |
| |
| inline bool XMLChar1_0::isNELRecognized() { |
| |
| return enableNEL; |
| } |
| |
| |
| // --------------------------------------------------------------------------- |
| // This class is for XML 1.1 |
| // --------------------------------------------------------------------------- |
| class XMLUTIL_EXPORT XMLChar1_1 |
| { |
| public: |
| // ----------------------------------------------------------------------- |
| // Public, static methods, check the string |
| // ----------------------------------------------------------------------- |
| static bool isAllSpaces |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool containsWhiteSpace |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool isValidNmtoken |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool isValidName |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool isValidName |
| ( |
| const XMLCh* const toCheck |
| ); |
| |
| static bool isValidNCName |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| static bool isValidQName |
| ( |
| const XMLCh* const toCheck |
| , const unsigned int count |
| ); |
| |
| // ----------------------------------------------------------------------- |
| // Public, static methods, check the XMLCh |
| // ----------------------------------------------------------------------- |
| static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| |
| static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0); |
| |
| private: |
| // ----------------------------------------------------------------------- |
| // Unimplemented constructors and operators |
| // ----------------------------------------------------------------------- |
| XMLChar1_1(); |
| |
| // ----------------------------------------------------------------------- |
| // Static data members |
| // |
| // fgCharCharsTable1_1 |
| // The character characteristics table. Bits in each byte, represent |
| // the characteristics of each character. It is generated via some |
| // code and then hard coded into the cpp file for speed. |
| // |
| // ----------------------------------------------------------------------- |
| static XMLByte fgCharCharsTable1_1[0x10000]; |
| |
| friend class XMLReader; |
| }; |
| |
| |
| // --------------------------------------------------------------------------- |
| // XMLReader: Public, static methods |
| // --------------------------------------------------------------------------- |
| inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| /** XML 1.1 does not define a letter, so we use the 1.0 definition */ |
| return XMLChar1_0::isXMLLetter(toCheck, toCheck2); |
| } |
| |
| inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0); |
| else { |
| if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F)) |
| if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF)) |
| return true; |
| } |
| return false; |
| } |
| |
| inline bool XMLChar1_1::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) { |
| return (((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon)); |
| } |
| else { |
| if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F)) |
| if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF)) |
| return true; |
| } |
| return false; |
| } |
| |
| inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0); |
| else { |
| if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F)) |
| if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF)) |
| return true; |
| } |
| return false; |
| } |
| |
| inline bool XMLChar1_1::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_1[toCheck] & gNCNameCharMask) != 0); |
| else { |
| if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F)) |
| if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF)) |
| return true; |
| } |
| return false; |
| } |
| |
| inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0); |
| else { |
| if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF)) |
| if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF)) |
| return true; |
| } |
| return false; |
| } |
| |
| |
| inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0); |
| return false; |
| } |
| |
| inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0); |
| else { |
| if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF)) |
| if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF)) |
| return true; |
| } |
| return false; |
| } |
| |
| inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0); |
| return false; |
| } |
| |
| inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2) |
| { |
| if (!toCheck2) |
| return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0); |
| return false; |
| } |
| |
| |
| XERCES_CPP_NAMESPACE_END |
| |
| #endif |