blob: 6d6950b5cbdaeac973fd549af2b112c54f529527 [file] [log] [blame]
/* Properties of Unicode characters.
Copyright (C) 2007, 2011-2020 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2007.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include "unictype.h"
#include <stdlib.h>
#include <string.h>
/* Indices stored in the 'struct named_category' elements of the perfect hash
table. We don't use uc_general_category_t values or their addresses
directly, because this would introduce load-time relocations. */
enum
{
/* General. */
UC_PROPERTY_INDEX_WHITE_SPACE,
UC_PROPERTY_INDEX_ALPHABETIC,
UC_PROPERTY_INDEX_OTHER_ALPHABETIC,
UC_PROPERTY_INDEX_NOT_A_CHARACTER,
UC_PROPERTY_INDEX_DEFAULT_IGNORABLE_CODE_POINT,
UC_PROPERTY_INDEX_OTHER_DEFAULT_IGNORABLE_CODE_POINT,
UC_PROPERTY_INDEX_DEPRECATED,
UC_PROPERTY_INDEX_LOGICAL_ORDER_EXCEPTION,
UC_PROPERTY_INDEX_VARIATION_SELECTOR,
UC_PROPERTY_INDEX_PRIVATE_USE,
UC_PROPERTY_INDEX_UNASSIGNED_CODE_VALUE,
/* Case. */
UC_PROPERTY_INDEX_UPPERCASE,
UC_PROPERTY_INDEX_OTHER_UPPERCASE,
UC_PROPERTY_INDEX_LOWERCASE,
UC_PROPERTY_INDEX_OTHER_LOWERCASE,
UC_PROPERTY_INDEX_TITLECASE,
UC_PROPERTY_INDEX_CASED,
UC_PROPERTY_INDEX_CASE_IGNORABLE,
UC_PROPERTY_INDEX_CHANGES_WHEN_LOWERCASED,
UC_PROPERTY_INDEX_CHANGES_WHEN_UPPERCASED,
UC_PROPERTY_INDEX_CHANGES_WHEN_TITLECASED,
UC_PROPERTY_INDEX_CHANGES_WHEN_CASEFOLDED,
UC_PROPERTY_INDEX_CHANGES_WHEN_CASEMAPPED,
UC_PROPERTY_INDEX_SOFT_DOTTED,
/* Identifiers. */
UC_PROPERTY_INDEX_ID_START,
UC_PROPERTY_INDEX_OTHER_ID_START,
UC_PROPERTY_INDEX_ID_CONTINUE,
UC_PROPERTY_INDEX_OTHER_ID_CONTINUE,
UC_PROPERTY_INDEX_XID_START,
UC_PROPERTY_INDEX_XID_CONTINUE,
UC_PROPERTY_INDEX_PATTERN_WHITE_SPACE,
UC_PROPERTY_INDEX_PATTERN_SYNTAX,
/* Shaping and rendering. */
UC_PROPERTY_INDEX_JOIN_CONTROL,
UC_PROPERTY_INDEX_GRAPHEME_BASE,
UC_PROPERTY_INDEX_GRAPHEME_EXTEND,
UC_PROPERTY_INDEX_OTHER_GRAPHEME_EXTEND,
UC_PROPERTY_INDEX_GRAPHEME_LINK,
/* Bidi. */
UC_PROPERTY_INDEX_BIDI_CONTROL,
UC_PROPERTY_INDEX_BIDI_LEFT_TO_RIGHT,
UC_PROPERTY_INDEX_BIDI_HEBREW_RIGHT_TO_LEFT,
UC_PROPERTY_INDEX_BIDI_ARABIC_RIGHT_TO_LEFT,
UC_PROPERTY_INDEX_BIDI_EUROPEAN_DIGIT,
UC_PROPERTY_INDEX_BIDI_EUR_NUM_SEPARATOR,
UC_PROPERTY_INDEX_BIDI_EUR_NUM_TERMINATOR,
UC_PROPERTY_INDEX_BIDI_ARABIC_DIGIT,
UC_PROPERTY_INDEX_BIDI_COMMON_SEPARATOR,
UC_PROPERTY_INDEX_BIDI_BLOCK_SEPARATOR,
UC_PROPERTY_INDEX_BIDI_SEGMENT_SEPARATOR,
UC_PROPERTY_INDEX_BIDI_WHITESPACE,
UC_PROPERTY_INDEX_BIDI_NON_SPACING_MARK,
UC_PROPERTY_INDEX_BIDI_BOUNDARY_NEUTRAL,
UC_PROPERTY_INDEX_BIDI_PDF,
UC_PROPERTY_INDEX_BIDI_EMBEDDING_OR_OVERRIDE,
UC_PROPERTY_INDEX_BIDI_OTHER_NEUTRAL,
/* Numeric. */
UC_PROPERTY_INDEX_HEX_DIGIT,
UC_PROPERTY_INDEX_ASCII_HEX_DIGIT,
/* CJK. */
UC_PROPERTY_INDEX_IDEOGRAPHIC,
UC_PROPERTY_INDEX_UNIFIED_IDEOGRAPH,
UC_PROPERTY_INDEX_RADICAL,
UC_PROPERTY_INDEX_IDS_BINARY_OPERATOR,
UC_PROPERTY_INDEX_IDS_TRINARY_OPERATOR,
/* Misc. */
UC_PROPERTY_INDEX_ZERO_WIDTH,
UC_PROPERTY_INDEX_SPACE,
UC_PROPERTY_INDEX_NON_BREAK,
UC_PROPERTY_INDEX_ISO_CONTROL,
UC_PROPERTY_INDEX_FORMAT_CONTROL,
UC_PROPERTY_INDEX_DASH,
UC_PROPERTY_INDEX_HYPHEN,
UC_PROPERTY_INDEX_PUNCTUATION,
UC_PROPERTY_INDEX_LINE_SEPARATOR,
UC_PROPERTY_INDEX_PARAGRAPH_SEPARATOR,
UC_PROPERTY_INDEX_QUOTATION_MARK,
UC_PROPERTY_INDEX_SENTENCE_TERMINAL,
UC_PROPERTY_INDEX_TERMINAL_PUNCTUATION,
UC_PROPERTY_INDEX_CURRENCY_SYMBOL,
UC_PROPERTY_INDEX_MATH,
UC_PROPERTY_INDEX_OTHER_MATH,
UC_PROPERTY_INDEX_PAIRED_PUNCTUATION,
UC_PROPERTY_INDEX_LEFT_OF_PAIR,
UC_PROPERTY_INDEX_COMBINING,
UC_PROPERTY_INDEX_COMPOSITE,
UC_PROPERTY_INDEX_DECIMAL_DIGIT,
UC_PROPERTY_INDEX_NUMERIC,
UC_PROPERTY_INDEX_DIACRITIC,
UC_PROPERTY_INDEX_EXTENDER,
UC_PROPERTY_INDEX_IGNORABLE_CONTROL
};
/* Get gperf generated lookup function. */
#include "unictype/pr_byname.h"
static const uc_property_t UC_PROPERTY_NONE = { NULL };
uc_property_t
uc_property_byname (const char *property_name)
{
char buf[MAX_WORD_LENGTH + 1];
const char *cp;
char *bp;
unsigned int count;
const struct named_property *found;
for (cp = property_name, bp = buf, count = MAX_WORD_LENGTH + 1; ; cp++, bp++)
{
unsigned char c = (unsigned char) *cp;
if (c >= 0x80)
goto invalid;
if (c >= 'A' && c <= 'Z')
c += 'a' - 'A';
else if (c == ' ' || c == '-')
c = '_';
*bp = c;
if (c == '\0')
break;
if (--count == 0)
goto invalid;
}
found = uc_property_lookup (buf, bp - buf);
if (found != NULL)
/* Use a 'switch' statement here, because a table would introduce load-time
relocations. */
switch (found->property_index)
{
case UC_PROPERTY_INDEX_WHITE_SPACE:
return UC_PROPERTY_WHITE_SPACE;
case UC_PROPERTY_INDEX_ALPHABETIC:
return UC_PROPERTY_ALPHABETIC;
case UC_PROPERTY_INDEX_OTHER_ALPHABETIC:
return UC_PROPERTY_OTHER_ALPHABETIC;
case UC_PROPERTY_INDEX_NOT_A_CHARACTER:
return UC_PROPERTY_NOT_A_CHARACTER;
case UC_PROPERTY_INDEX_DEFAULT_IGNORABLE_CODE_POINT:
return UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT;
case UC_PROPERTY_INDEX_OTHER_DEFAULT_IGNORABLE_CODE_POINT:
return UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT;
case UC_PROPERTY_INDEX_DEPRECATED:
return UC_PROPERTY_DEPRECATED;
case UC_PROPERTY_INDEX_LOGICAL_ORDER_EXCEPTION:
return UC_PROPERTY_LOGICAL_ORDER_EXCEPTION;
case UC_PROPERTY_INDEX_VARIATION_SELECTOR:
return UC_PROPERTY_VARIATION_SELECTOR;
case UC_PROPERTY_INDEX_PRIVATE_USE:
return UC_PROPERTY_PRIVATE_USE;
case UC_PROPERTY_INDEX_UNASSIGNED_CODE_VALUE:
return UC_PROPERTY_UNASSIGNED_CODE_VALUE;
case UC_PROPERTY_INDEX_UPPERCASE:
return UC_PROPERTY_UPPERCASE;
case UC_PROPERTY_INDEX_OTHER_UPPERCASE:
return UC_PROPERTY_OTHER_UPPERCASE;
case UC_PROPERTY_INDEX_LOWERCASE:
return UC_PROPERTY_LOWERCASE;
case UC_PROPERTY_INDEX_OTHER_LOWERCASE:
return UC_PROPERTY_OTHER_LOWERCASE;
case UC_PROPERTY_INDEX_TITLECASE:
return UC_PROPERTY_TITLECASE;
case UC_PROPERTY_INDEX_CASED:
return UC_PROPERTY_CASED;
case UC_PROPERTY_INDEX_CASE_IGNORABLE:
return UC_PROPERTY_CASE_IGNORABLE;
case UC_PROPERTY_INDEX_CHANGES_WHEN_LOWERCASED:
return UC_PROPERTY_CHANGES_WHEN_LOWERCASED;
case UC_PROPERTY_INDEX_CHANGES_WHEN_UPPERCASED:
return UC_PROPERTY_CHANGES_WHEN_UPPERCASED;
case UC_PROPERTY_INDEX_CHANGES_WHEN_TITLECASED:
return UC_PROPERTY_CHANGES_WHEN_TITLECASED;
case UC_PROPERTY_INDEX_CHANGES_WHEN_CASEFOLDED:
return UC_PROPERTY_CHANGES_WHEN_CASEFOLDED;
case UC_PROPERTY_INDEX_CHANGES_WHEN_CASEMAPPED:
return UC_PROPERTY_CHANGES_WHEN_CASEMAPPED;
case UC_PROPERTY_INDEX_SOFT_DOTTED:
return UC_PROPERTY_SOFT_DOTTED;
case UC_PROPERTY_INDEX_ID_START:
return UC_PROPERTY_ID_START;
case UC_PROPERTY_INDEX_OTHER_ID_START:
return UC_PROPERTY_OTHER_ID_START;
case UC_PROPERTY_INDEX_ID_CONTINUE:
return UC_PROPERTY_ID_CONTINUE;
case UC_PROPERTY_INDEX_OTHER_ID_CONTINUE:
return UC_PROPERTY_OTHER_ID_CONTINUE;
case UC_PROPERTY_INDEX_XID_START:
return UC_PROPERTY_XID_START;
case UC_PROPERTY_INDEX_XID_CONTINUE:
return UC_PROPERTY_XID_CONTINUE;
case UC_PROPERTY_INDEX_PATTERN_WHITE_SPACE:
return UC_PROPERTY_PATTERN_WHITE_SPACE;
case UC_PROPERTY_INDEX_PATTERN_SYNTAX:
return UC_PROPERTY_PATTERN_SYNTAX;
case UC_PROPERTY_INDEX_JOIN_CONTROL:
return UC_PROPERTY_JOIN_CONTROL;
case UC_PROPERTY_INDEX_GRAPHEME_BASE:
return UC_PROPERTY_GRAPHEME_BASE;
case UC_PROPERTY_INDEX_GRAPHEME_EXTEND:
return UC_PROPERTY_GRAPHEME_EXTEND;
case UC_PROPERTY_INDEX_OTHER_GRAPHEME_EXTEND:
return UC_PROPERTY_OTHER_GRAPHEME_EXTEND;
case UC_PROPERTY_INDEX_GRAPHEME_LINK:
return UC_PROPERTY_GRAPHEME_LINK;
case UC_PROPERTY_INDEX_BIDI_CONTROL:
return UC_PROPERTY_BIDI_CONTROL;
case UC_PROPERTY_INDEX_BIDI_LEFT_TO_RIGHT:
return UC_PROPERTY_BIDI_LEFT_TO_RIGHT;
case UC_PROPERTY_INDEX_BIDI_HEBREW_RIGHT_TO_LEFT:
return UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT;
case UC_PROPERTY_INDEX_BIDI_ARABIC_RIGHT_TO_LEFT:
return UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT;
case UC_PROPERTY_INDEX_BIDI_EUROPEAN_DIGIT:
return UC_PROPERTY_BIDI_EUROPEAN_DIGIT;
case UC_PROPERTY_INDEX_BIDI_EUR_NUM_SEPARATOR:
return UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR;
case UC_PROPERTY_INDEX_BIDI_EUR_NUM_TERMINATOR:
return UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR;
case UC_PROPERTY_INDEX_BIDI_ARABIC_DIGIT:
return UC_PROPERTY_BIDI_ARABIC_DIGIT;
case UC_PROPERTY_INDEX_BIDI_COMMON_SEPARATOR:
return UC_PROPERTY_BIDI_COMMON_SEPARATOR;
case UC_PROPERTY_INDEX_BIDI_BLOCK_SEPARATOR:
return UC_PROPERTY_BIDI_BLOCK_SEPARATOR;
case UC_PROPERTY_INDEX_BIDI_SEGMENT_SEPARATOR:
return UC_PROPERTY_BIDI_SEGMENT_SEPARATOR;
case UC_PROPERTY_INDEX_BIDI_WHITESPACE:
return UC_PROPERTY_BIDI_WHITESPACE;
case UC_PROPERTY_INDEX_BIDI_NON_SPACING_MARK:
return UC_PROPERTY_BIDI_NON_SPACING_MARK;
case UC_PROPERTY_INDEX_BIDI_BOUNDARY_NEUTRAL:
return UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL;
case UC_PROPERTY_INDEX_BIDI_PDF:
return UC_PROPERTY_BIDI_PDF;
case UC_PROPERTY_INDEX_BIDI_EMBEDDING_OR_OVERRIDE:
return UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE;
case UC_PROPERTY_INDEX_BIDI_OTHER_NEUTRAL:
return UC_PROPERTY_BIDI_OTHER_NEUTRAL;
case UC_PROPERTY_INDEX_HEX_DIGIT:
return UC_PROPERTY_HEX_DIGIT;
case UC_PROPERTY_INDEX_ASCII_HEX_DIGIT:
return UC_PROPERTY_ASCII_HEX_DIGIT;
case UC_PROPERTY_INDEX_IDEOGRAPHIC:
return UC_PROPERTY_IDEOGRAPHIC;
case UC_PROPERTY_INDEX_UNIFIED_IDEOGRAPH:
return UC_PROPERTY_UNIFIED_IDEOGRAPH;
case UC_PROPERTY_INDEX_RADICAL:
return UC_PROPERTY_RADICAL;
case UC_PROPERTY_INDEX_IDS_BINARY_OPERATOR:
return UC_PROPERTY_IDS_BINARY_OPERATOR;
case UC_PROPERTY_INDEX_IDS_TRINARY_OPERATOR:
return UC_PROPERTY_IDS_TRINARY_OPERATOR;
case UC_PROPERTY_INDEX_ZERO_WIDTH:
return UC_PROPERTY_ZERO_WIDTH;
case UC_PROPERTY_INDEX_SPACE:
return UC_PROPERTY_SPACE;
case UC_PROPERTY_INDEX_NON_BREAK:
return UC_PROPERTY_NON_BREAK;
case UC_PROPERTY_INDEX_ISO_CONTROL:
return UC_PROPERTY_ISO_CONTROL;
case UC_PROPERTY_INDEX_FORMAT_CONTROL:
return UC_PROPERTY_FORMAT_CONTROL;
case UC_PROPERTY_INDEX_DASH:
return UC_PROPERTY_DASH;
case UC_PROPERTY_INDEX_HYPHEN:
return UC_PROPERTY_HYPHEN;
case UC_PROPERTY_INDEX_PUNCTUATION:
return UC_PROPERTY_PUNCTUATION;
case UC_PROPERTY_INDEX_LINE_SEPARATOR:
return UC_PROPERTY_LINE_SEPARATOR;
case UC_PROPERTY_INDEX_PARAGRAPH_SEPARATOR:
return UC_PROPERTY_PARAGRAPH_SEPARATOR;
case UC_PROPERTY_INDEX_QUOTATION_MARK:
return UC_PROPERTY_QUOTATION_MARK;
case UC_PROPERTY_INDEX_SENTENCE_TERMINAL:
return UC_PROPERTY_SENTENCE_TERMINAL;
case UC_PROPERTY_INDEX_TERMINAL_PUNCTUATION:
return UC_PROPERTY_TERMINAL_PUNCTUATION;
case UC_PROPERTY_INDEX_CURRENCY_SYMBOL:
return UC_PROPERTY_CURRENCY_SYMBOL;
case UC_PROPERTY_INDEX_MATH:
return UC_PROPERTY_MATH;
case UC_PROPERTY_INDEX_OTHER_MATH:
return UC_PROPERTY_OTHER_MATH;
case UC_PROPERTY_INDEX_PAIRED_PUNCTUATION:
return UC_PROPERTY_PAIRED_PUNCTUATION;
case UC_PROPERTY_INDEX_LEFT_OF_PAIR:
return UC_PROPERTY_LEFT_OF_PAIR;
case UC_PROPERTY_INDEX_COMBINING:
return UC_PROPERTY_COMBINING;
case UC_PROPERTY_INDEX_COMPOSITE:
return UC_PROPERTY_COMPOSITE;
case UC_PROPERTY_INDEX_DECIMAL_DIGIT:
return UC_PROPERTY_DECIMAL_DIGIT;
case UC_PROPERTY_INDEX_NUMERIC:
return UC_PROPERTY_NUMERIC;
case UC_PROPERTY_INDEX_DIACRITIC:
return UC_PROPERTY_DIACRITIC;
case UC_PROPERTY_INDEX_EXTENDER:
return UC_PROPERTY_EXTENDER;
case UC_PROPERTY_INDEX_IGNORABLE_CONTROL:
return UC_PROPERTY_IGNORABLE_CONTROL;
default:
abort ();
}
invalid:
return UC_PROPERTY_NONE;
}