| #!/bin/sh |
| |
| # vim: indentexpr= nosmartindent autoindent |
| # vim: tabstop=2 shiftwidth=2 softtabstop=2 |
| |
| # This regex was manually written, derived from the rules in UAX #29. |
| # Particularly, from Table 1c, which lays out a regex for grapheme clusters. |
| |
| CR="\p{gcb=CR}" |
| LF="\p{gcb=LF}" |
| Control="\p{gcb=Control}" |
| Prepend="\p{gcb=Prepend}" |
| L="\p{gcb=L}" |
| V="\p{gcb=V}" |
| LV="\p{gcb=LV}" |
| LVT="\p{gcb=LVT}" |
| T="\p{gcb=T}" |
| RI="\p{gcb=RI}" |
| Extend="\p{gcb=Extend}" |
| ZWJ="\p{gcb=ZWJ}" |
| SpacingMark="\p{gcb=SpacingMark}" |
| |
| Any="\p{any}" |
| ExtendPict="\p{Extended_Pictographic}" |
| |
| echo "(?x) |
| $CR $LF |
| | |
| $Control |
| | |
| $Prepend* |
| ( |
| ( |
| ($L* ($V+ | $LV $V* | $LVT) $T*) |
| | |
| $L+ |
| | |
| $T+ |
| ) |
| | |
| $RI $RI |
| | |
| $ExtendPict ($Extend* $ZWJ $ExtendPict)* |
| | |
| [^$Control $CR $LF] |
| ) |
| [$Extend $ZWJ $SpacingMark]* |
| | |
| $Any |
| " |