blob: 4103b29fd8e50c5d0fec638211630d0925b8541d [file] [log] [blame]
/*
******************************************************************************
*
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: ubidiimp.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999aug06
* created by: Markus W. Scherer, updated by Matitiahu Allouche
*/
#ifndef UBIDIIMP_H
#define UBIDIIMP_H
/* set import/export definitions */
#ifdef U_COMMON_IMPLEMENTATION
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "ubidi_props.h"
/* miscellaneous definitions ---------------------------------------------- */
typedef uint8_t DirProp;
typedef uint32_t Flags;
/* Comparing the description of the BiDi algorithm with this implementation
is easier with the same names for the BiDi types in the code as there.
See UCharDirection in uchar.h .
*/
enum {
L= U_LEFT_TO_RIGHT,
R= U_RIGHT_TO_LEFT,
EN= U_EUROPEAN_NUMBER,
ES= U_EUROPEAN_NUMBER_SEPARATOR,
ET= U_EUROPEAN_NUMBER_TERMINATOR,
AN= U_ARABIC_NUMBER,
CS= U_COMMON_NUMBER_SEPARATOR,
B= U_BLOCK_SEPARATOR,
S= U_SEGMENT_SEPARATOR,
WS= U_WHITE_SPACE_NEUTRAL,
ON= U_OTHER_NEUTRAL,
LRE=U_LEFT_TO_RIGHT_EMBEDDING,
LRO=U_LEFT_TO_RIGHT_OVERRIDE,
AL= U_RIGHT_TO_LEFT_ARABIC,
RLE=U_RIGHT_TO_LEFT_EMBEDDING,
RLO=U_RIGHT_TO_LEFT_OVERRIDE,
PDF=U_POP_DIRECTIONAL_FORMAT,
NSM=U_DIR_NON_SPACING_MARK,
BN= U_BOUNDARY_NEUTRAL,
dirPropCount
};
/*
* Sometimes, bit values are more appropriate
* to deal with directionality properties.
* Abbreviations in these macro names refer to names
* used in the BiDi algorithm.
*/
#define DIRPROP_FLAG(dir) (1UL<<(dir))
/* special flag for multiple runs from explicit embedding codes */
#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
/* are there any characters that are LTR or RTL? */
#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
/* explicit embedding codes */
#define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
#define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
#define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
#define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
/* paragraph and segment separators */
#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
/* all types that are counted as White Space or Neutral in some steps */
#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
#define MASK_N (DIRPROP_FLAG(ON)|MASK_WS)
/* all types that are included in a sequence of European Terminators for (W5) */
#define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
/* types that are neutrals or could becomes neutrals in (Wn) */
#define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
/*
* These types may be changed to "e",
* the embedding type (L or R) of the run,
* in the BiDi algorithm (N2)
*/
#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
/*
* The following bit is ORed to the property of characters in paragraphs
* with contextual RTL direction when paraLevel is contextual.
*/
#define CONTEXT_RTL 0x80
#define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL)
/*
* The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit.
*/
#define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir)))
#define GET_PARALEVEL(ubidi, index) \
(UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \
: (ubidi)->paraLevel)
/* Paragraph type for multiple paragraph support ---------------------------- */
typedef int32_t Para;
#define CR 0x000D
#define LF 0x000A
/* Run structure for reordering --------------------------------------------- */
enum {
LRM_BEFORE=1,
LRM_AFTER=2,
RLM_BEFORE=4,
RLM_AFTER=8
};
typedef struct Run {
int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
visualLimit, /* last visual position of the run +1 */
insertRemove; /* if >0, flags for inserting LRM/RLM before/after run,
if <0, count of bidi controls within run */
} Run;
/* in a Run, logicalStart will get this bit set if the run level is odd */
#define INDEX_ODD_BIT (1UL<<31)
#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31))
#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
#define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
#define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0))
#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
U_CFUNC UBool
ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
/** BiDi control code points */
enum {
ZWNJ_CHAR=0x200c,
ZWJ_CHAR,
LRM_CHAR,
RLM_CHAR,
LRE_CHAR=0x202a,
RLE_CHAR,
PDF_CHAR,
LRO_CHAR,
RLO_CHAR
};
#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5)
/* InsertPoints structure for noting where to put BiDi marks ---------------- */
typedef struct Point {
int32_t pos; /* position in text */
int32_t flag; /* flag for LRM/RLM, before/after */
} Point;
typedef struct InsertPoints {
int32_t capacity; /* number of points allocated */
int32_t size; /* number of points used */
int32_t confirmed; /* number of points confirmed */
UErrorCode errorCode; /* for eventual memory shortage */
Point *points; /* pointer to array of points */
} InsertPoints;
/* UBiDi structure ----------------------------------------------------------- */
struct UBiDi {
/* pointer to parent paragraph object (pointer to self if this object is
* a paragraph object); set to NULL in a newly opened object; set to a
* real value after a successful execution of ubidi_setPara or ubidi_setLine
*/
const UBiDi * pParaBiDi;
const UBiDiProps *bdp;
/* alias pointer to the current text */
const UChar *text;
/* length of the current text */
int32_t originalLength;
/* if the UBIDI_OPTION_STREAMING option is set, this is the length
* of text actually processed by ubidi_setPara, which may be shorter than
* the original length.
* Otherwise, it is identical to the original length.
*/
int32_t length;
/* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
* marks are allowed to be inserted in one of the reordering mode, the
* length of the result string may be different from the processed length.
*/
int32_t resultLength;
/* memory sizes in bytes */
int32_t dirPropsSize, levelsSize, parasSize, runsSize;
/* allocated memory */
DirProp *dirPropsMemory;
UBiDiLevel *levelsMemory;
Para *parasMemory;
Run *runsMemory;
/* indicators for whether memory may be allocated after ubidi_open() */
UBool mayAllocateText, mayAllocateRuns;
/* arrays with one value per text-character */
const DirProp *dirProps;
UBiDiLevel *levels;
/* are we performing an approximation of the "inverse BiDi" algorithm? */
UBool isInverse;
/* are we using the basic algorithm or its variation? */
UBiDiReorderingMode reorderingMode;
/* UBIDI_REORDER_xxx values must be ordered so that all the regular
* logical to visual modes come first, and all inverse BiDi modes
* come last.
*/
#define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL
/* bitmask for reordering options */
uint32_t reorderingOptions;
/* must block separators receive level 0? */
UBool orderParagraphsLTR;
/* the paragraph level */
UBiDiLevel paraLevel;
/* original paraLevel when contextual */
/* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
UBiDiLevel defaultParaLevel;
/* context data */
const UChar *prologue;
int32_t proLength;
const UChar *epilogue;
int32_t epiLength;
/* the following is set in ubidi_setPara, used in processPropertySeq */
const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */
/* the overall paragraph or line directionality - see UBiDiDirection */
UBiDiDirection direction;
/* flags is a bit set for which directional properties are in the text */
Flags flags;
/* lastArabicPos is index to the last AL in the text, -1 if none */
int32_t lastArabicPos;
/* characters after trailingWSStart are WS and are */
/* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
int32_t trailingWSStart;
/* fields for paragraph handling */
int32_t paraCount; /* set in getDirProps() */
Para *paras; /* limits of paragraphs, filled in
ResolveExplicitLevels() or CheckExplicitLevels() */
/* for single paragraph text, we only need a tiny array of paras (no malloc()) */
Para simpleParas[1];
/* fields for line reordering */
int32_t runCount; /* ==-1: runs not set up yet */
Run *runs;
/* for non-mixed text, we only need a tiny array of runs (no malloc()) */
Run simpleRuns[1];
/* for inverse Bidi with insertion of directional marks */
InsertPoints insertPoints;
/* for option UBIDI_OPTION_REMOVE_CONTROLS */
int32_t controlCount;
/* for Bidi class callback */
UBiDiClassCallback *fnClassCallback; /* action pointer */
const void *coClassCallback; /* context pointer */
};
#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
typedef union {
DirProp *dirPropsMemory;
UBiDiLevel *levelsMemory;
Para *parasMemory;
Run *runsMemory;
} BidiMemoryForAllocation;
/* Macros for initial checks at function entry */
#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \
if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \
if(!IS_VALID_PARA(bidi)) { \
errcode=U_INVALID_STATE_ERROR; \
return retvalue; \
}
#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \
if(!IS_VALID_PARA_OR_LINE(bidi)) { \
errcode=U_INVALID_STATE_ERROR; \
return retvalue; \
}
#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \
if((arg)<(start) || (arg)>=(limit)) { \
(errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
return retvalue; \
}
#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \
if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \
if(!IS_VALID_PARA(bidi)) { \
errcode=U_INVALID_STATE_ERROR; \
return; \
}
#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \
if(!IS_VALID_PARA_OR_LINE(bidi)) { \
errcode=U_INVALID_STATE_ERROR; \
return; \
}
#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \
if((arg)<(start) || (arg)>=(limit)) { \
(errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
return; \
}
/* helper function to (re)allocate memory if allowed */
U_CFUNC UBool
ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
/* helper macros for each allocated array in UBiDi */
#define getDirPropsMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
(pBiDi)->mayAllocateText, (length))
#define getLevelsMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
(pBiDi)->mayAllocateText, (length))
#define getRunsMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
(pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
/* additional macros used by ubidi_open() - always allow allocation */
#define getInitialDirPropsMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
TRUE, (length))
#define getInitialLevelsMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
TRUE, (length))
#define getInitialParasMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
TRUE, (length)*sizeof(Para))
#define getInitialRunsMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
TRUE, (length)*sizeof(Run))
#endif
#endif