| /************************************************* |
| * Perl-Compatible Regular Expressions * |
| *************************************************/ |
| |
| /* PCRE is a library of functions to support regular expressions whose syntax |
| and semantics are as close as possible to those of the Perl 5 language. |
| |
| Written by Philip Hazel |
| Original API code Copyright (c) 1997-2012 University of Cambridge |
| New API code Copyright (c) 2016-2018 University of Cambridge |
| |
| ----------------------------------------------------------------------------- |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| * Redistributions of source code must retain the above copyright notice, |
| this list of conditions and the following disclaimer. |
| |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| * Neither the name of the University of Cambridge nor the names of its |
| contributors may be used to endorse or promote products derived from |
| this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| POSSIBILITY OF SUCH DAMAGE. |
| ----------------------------------------------------------------------------- |
| */ |
| |
| |
| #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD |
| #define PCRE2_UCP_H_IDEMPOTENT_GUARD |
| |
| /* This file contains definitions of the property values that are returned by |
| the UCD access macros. New values that are added for new releases of Unicode |
| should always be at the end of each enum, for backwards compatibility. |
| |
| IMPORTANT: Note also that the specific numeric values of the enums have to be |
| the same as the values that are generated by the maint/MultiStage2.py script, |
| where the equivalent property descriptive names are listed in vectors. |
| |
| ALSO: The specific values of the first two enums are assumed for the table |
| called catposstab in pcre2_compile.c. */ |
| |
| /* These are the general character categories. */ |
| |
| enum { |
| ucp_C, /* Other */ |
| ucp_L, /* Letter */ |
| ucp_M, /* Mark */ |
| ucp_N, /* Number */ |
| ucp_P, /* Punctuation */ |
| ucp_S, /* Symbol */ |
| ucp_Z /* Separator */ |
| }; |
| |
| /* These are the particular character categories. */ |
| |
| enum { |
| ucp_Cc, /* Control */ |
| ucp_Cf, /* Format */ |
| ucp_Cn, /* Unassigned */ |
| ucp_Co, /* Private use */ |
| ucp_Cs, /* Surrogate */ |
| ucp_Ll, /* Lower case letter */ |
| ucp_Lm, /* Modifier letter */ |
| ucp_Lo, /* Other letter */ |
| ucp_Lt, /* Title case letter */ |
| ucp_Lu, /* Upper case letter */ |
| ucp_Mc, /* Spacing mark */ |
| ucp_Me, /* Enclosing mark */ |
| ucp_Mn, /* Non-spacing mark */ |
| ucp_Nd, /* Decimal number */ |
| ucp_Nl, /* Letter number */ |
| ucp_No, /* Other number */ |
| ucp_Pc, /* Connector punctuation */ |
| ucp_Pd, /* Dash punctuation */ |
| ucp_Pe, /* Close punctuation */ |
| ucp_Pf, /* Final punctuation */ |
| ucp_Pi, /* Initial punctuation */ |
| ucp_Po, /* Other punctuation */ |
| ucp_Ps, /* Open punctuation */ |
| ucp_Sc, /* Currency symbol */ |
| ucp_Sk, /* Modifier symbol */ |
| ucp_Sm, /* Mathematical symbol */ |
| ucp_So, /* Other symbol */ |
| ucp_Zl, /* Line separator */ |
| ucp_Zp, /* Paragraph separator */ |
| ucp_Zs /* Space separator */ |
| }; |
| |
| /* These are grapheme break properties. The Extended Pictographic property |
| comes from the emoji-data.txt file. */ |
| |
| enum { |
| ucp_gbCR, /* 0 */ |
| ucp_gbLF, /* 1 */ |
| ucp_gbControl, /* 2 */ |
| ucp_gbExtend, /* 3 */ |
| ucp_gbPrepend, /* 4 */ |
| ucp_gbSpacingMark, /* 5 */ |
| ucp_gbL, /* 6 Hangul syllable type L */ |
| ucp_gbV, /* 7 Hangul syllable type V */ |
| ucp_gbT, /* 8 Hangul syllable type T */ |
| ucp_gbLV, /* 9 Hangul syllable type LV */ |
| ucp_gbLVT, /* 10 Hangul syllable type LVT */ |
| ucp_gbRegionalIndicator, /* 11 */ |
| ucp_gbOther, /* 12 */ |
| ucp_gbZWJ, /* 13 */ |
| ucp_gbExtended_Pictographic /* 14 */ |
| }; |
| |
| /* These are the script identifications. */ |
| |
| enum { |
| ucp_Unknown, |
| ucp_Arabic, |
| ucp_Armenian, |
| ucp_Bengali, |
| ucp_Bopomofo, |
| ucp_Braille, |
| ucp_Buginese, |
| ucp_Buhid, |
| ucp_Canadian_Aboriginal, |
| ucp_Cherokee, |
| ucp_Common, |
| ucp_Coptic, |
| ucp_Cypriot, |
| ucp_Cyrillic, |
| ucp_Deseret, |
| ucp_Devanagari, |
| ucp_Ethiopic, |
| ucp_Georgian, |
| ucp_Glagolitic, |
| ucp_Gothic, |
| ucp_Greek, |
| ucp_Gujarati, |
| ucp_Gurmukhi, |
| ucp_Han, |
| ucp_Hangul, |
| ucp_Hanunoo, |
| ucp_Hebrew, |
| ucp_Hiragana, |
| ucp_Inherited, |
| ucp_Kannada, |
| ucp_Katakana, |
| ucp_Kharoshthi, |
| ucp_Khmer, |
| ucp_Lao, |
| ucp_Latin, |
| ucp_Limbu, |
| ucp_Linear_B, |
| ucp_Malayalam, |
| ucp_Mongolian, |
| ucp_Myanmar, |
| ucp_New_Tai_Lue, |
| ucp_Ogham, |
| ucp_Old_Italic, |
| ucp_Old_Persian, |
| ucp_Oriya, |
| ucp_Osmanya, |
| ucp_Runic, |
| ucp_Shavian, |
| ucp_Sinhala, |
| ucp_Syloti_Nagri, |
| ucp_Syriac, |
| ucp_Tagalog, |
| ucp_Tagbanwa, |
| ucp_Tai_Le, |
| ucp_Tamil, |
| ucp_Telugu, |
| ucp_Thaana, |
| ucp_Thai, |
| ucp_Tibetan, |
| ucp_Tifinagh, |
| ucp_Ugaritic, |
| ucp_Yi, |
| /* New for Unicode 5.0 */ |
| ucp_Balinese, |
| ucp_Cuneiform, |
| ucp_Nko, |
| ucp_Phags_Pa, |
| ucp_Phoenician, |
| /* New for Unicode 5.1 */ |
| ucp_Carian, |
| ucp_Cham, |
| ucp_Kayah_Li, |
| ucp_Lepcha, |
| ucp_Lycian, |
| ucp_Lydian, |
| ucp_Ol_Chiki, |
| ucp_Rejang, |
| ucp_Saurashtra, |
| ucp_Sundanese, |
| ucp_Vai, |
| /* New for Unicode 5.2 */ |
| ucp_Avestan, |
| ucp_Bamum, |
| ucp_Egyptian_Hieroglyphs, |
| ucp_Imperial_Aramaic, |
| ucp_Inscriptional_Pahlavi, |
| ucp_Inscriptional_Parthian, |
| ucp_Javanese, |
| ucp_Kaithi, |
| ucp_Lisu, |
| ucp_Meetei_Mayek, |
| ucp_Old_South_Arabian, |
| ucp_Old_Turkic, |
| ucp_Samaritan, |
| ucp_Tai_Tham, |
| ucp_Tai_Viet, |
| /* New for Unicode 6.0.0 */ |
| ucp_Batak, |
| ucp_Brahmi, |
| ucp_Mandaic, |
| /* New for Unicode 6.1.0 */ |
| ucp_Chakma, |
| ucp_Meroitic_Cursive, |
| ucp_Meroitic_Hieroglyphs, |
| ucp_Miao, |
| ucp_Sharada, |
| ucp_Sora_Sompeng, |
| ucp_Takri, |
| /* New for Unicode 7.0.0 */ |
| ucp_Bassa_Vah, |
| ucp_Caucasian_Albanian, |
| ucp_Duployan, |
| ucp_Elbasan, |
| ucp_Grantha, |
| ucp_Khojki, |
| ucp_Khudawadi, |
| ucp_Linear_A, |
| ucp_Mahajani, |
| ucp_Manichaean, |
| ucp_Mende_Kikakui, |
| ucp_Modi, |
| ucp_Mro, |
| ucp_Nabataean, |
| ucp_Old_North_Arabian, |
| ucp_Old_Permic, |
| ucp_Pahawh_Hmong, |
| ucp_Palmyrene, |
| ucp_Psalter_Pahlavi, |
| ucp_Pau_Cin_Hau, |
| ucp_Siddham, |
| ucp_Tirhuta, |
| ucp_Warang_Citi, |
| /* New for Unicode 8.0.0 */ |
| ucp_Ahom, |
| ucp_Anatolian_Hieroglyphs, |
| ucp_Hatran, |
| ucp_Multani, |
| ucp_Old_Hungarian, |
| ucp_SignWriting, |
| /* New for Unicode 10.0.0 (no update since 8.0.0) */ |
| ucp_Adlam, |
| ucp_Bhaiksuki, |
| ucp_Marchen, |
| ucp_Newa, |
| ucp_Osage, |
| ucp_Tangut, |
| ucp_Masaram_Gondi, |
| ucp_Nushu, |
| ucp_Soyombo, |
| ucp_Zanabazar_Square, |
| /* New for Unicode 11.0.0 */ |
| ucp_Dogra, |
| ucp_Gunjala_Gondi, |
| ucp_Hanifi_Rohingya, |
| ucp_Makasar, |
| ucp_Medefaidrin, |
| ucp_Old_Sogdian, |
| ucp_Sogdian |
| }; |
| |
| #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ |
| |
| /* End of pcre2_ucp.h */ |