| // |
| // Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved. |
| // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| // |
| // This code is free software; you can redistribute it and/or modify it |
| // under the terms of the GNU General Public License version 2 only, as |
| // published by the Free Software Foundation. |
| // |
| // This code is distributed in the hope that it will be useful, but WITHOUT |
| // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| // version 2 for more details (a copy is included in the LICENSE file that |
| // accompanied this code). |
| // |
| // You should have received a copy of the GNU General Public License version |
| // 2 along with this work; if not, write to the Free Software Foundation, |
| // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| // |
| // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| // or visit www.oracle.com if you need additional information or have any |
| // questions. |
| // |
| // -------------------------------------------------------- |
| // This file contains test cases with supplementary characters for regular expressions. |
| // A test case consists of three lines: |
| // The first line is a pattern used in the test |
| // The second line is the input to search for the pattern in |
| // The third line is a concatentation of the match, the number of groups, |
| // and the contents of the first four subexpressions. |
| // Empty lines and lines beginning with comment slashes are ignored. |
| |
| // Test unsetting of backed off groups |
| ^(\ud800\udc61)?\ud800\udc61 |
| \ud800\udc61 |
| true \ud800\udc61 1 |
| |
| ^(\ud800\udc61\ud800)?\ud800\udc61\ud800 |
| \ud800\udc61\ud800 |
| true \ud800\udc61\ud800 1 |
| |
| ^(\ud800\udc61\ud800\udc61(\ud800\udc62\ud800\udc62)?)+$ |
| \ud800\udc61\ud800\udc61\ud800\udc62\ud800\udc62\ud800\udc61\ud800\udc61 |
| true \ud800\udc61\ud800\udc61\ud800\udc62\ud800\udc62\ud800\udc61\ud800\udc61 2 \ud800\udc61\ud800\udc61 \ud800\udc62\ud800\udc62 |
| |
| ^(\ud800\udc61\ud800\udc61\ud800(\ud800\udc62\ud800\udc62\ud800)?)+$ |
| \ud800\udc61\ud800\udc61\ud800\ud800\udc62\ud800\udc62\ud800\ud800\udc61\ud800\udc61\ud800 |
| true \ud800\udc61\ud800\udc61\ud800\ud800\udc62\ud800\udc62\ud800\ud800\udc61\ud800\udc61\ud800 2 \ud800\udc61\ud800\udc61\ud800 \ud800\udc62\ud800\udc62\ud800 |
| |
| ((\ud800\udc61|\ud800\udc62)?\ud800\udc62)+ |
| \ud800\udc62 |
| true \ud800\udc62 2 \ud800\udc62 |
| |
| ((\ud800|\ud800\udc62)?\ud800\udc62)+ |
| \ud800\udc62 |
| true \ud800\udc62 2 \ud800\udc62 |
| |
| (\ud800\udc61\ud800\udc61\ud800\udc61)?\ud800\udc61\ud800\udc61\ud800\udc61 |
| \ud800\udc61\ud800\udc61\ud800\udc61 |
| true \ud800\udc61\ud800\udc61\ud800\udc61 1 |
| |
| (\ud800\udc61\ud800\udc61\ud800\ud800\udc61)?\ud800\udc61\ud800\udc61\ud800\ud800\udc61 |
| \ud800\udc61\ud800\udc61\ud800\ud800\udc61 |
| true \ud800\udc61\ud800\udc61\ud800\ud800\udc61 1 |
| |
| ^(\ud800\udc61\ud800(\ud800\udc62\ud800)?)+$ |
| \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 |
| true \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 2 \ud800\udc61\ud800 \ud800\udc62\ud800 |
| |
| ^(\ud800\udc61(\ud800\udc62)?)+$ |
| \ud800\udc61\ud800\udc62\ud800\udc61 |
| true \ud800\udc61\ud800\udc62\ud800\udc61 2 \ud800\udc61 \ud800\udc62 |
| |
| ^(\ud800\udc61\ud800(\ud800\udc62\ud800)?)+$ |
| \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 |
| true \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 2 \ud800\udc61\ud800 \ud800\udc62\ud800 |
| |
| ^(\ud800\udc61(\ud800\udc62(\ud800\udc63)?)?)?\ud800\udc61\ud800\udc62\ud800\udc63 |
| \ud800\udc61\ud800\udc62\ud800\udc63 |
| true \ud800\udc61\ud800\udc62\ud800\udc63 3 |
| |
| ^(\ud800\udc61\ud800(\ud800\udc62(\ud800\udc63)?)?)?\ud800\udc61\ud800\ud800\udc62\ud800\udc63 |
| \ud800\udc61\ud800\ud800\udc62\ud800\udc63 |
| true \ud800\udc61\ud800\ud800\udc62\ud800\udc63 3 |
| |
| ^(\ud800\udc61(\ud800\udc02(\ud800\udc63))).* |
| \ud800\udc61\ud800\udc02\ud800\udc63 |
| true \ud800\udc61\ud800\udc02\ud800\udc63 3 \ud800\udc61\ud800\udc02\ud800\udc63 \ud800\udc02\ud800\udc63 \ud800\udc63 |
| |
| ^(\ud800\udc61(\ud800(\ud800\udc63))).* |
| \ud800\udc61\ud800\ud800\udc63 |
| true \ud800\udc61\ud800\ud800\udc63 3 \ud800\udc61\ud800\ud800\udc63 \ud800\ud800\udc63 \ud800\udc63 |
| |
| // Patterns including no surrogates |
| (.)([^a])xyz |
| \ud801\ud800\udc00xyz |
| true \ud801\ud800\udc00xyz 2 \ud801 \ud800\udc00 |
| |
| [^a-z].. |
| \ud801\ud800\udc00xyz |
| true \ud801\ud800\udc00x 0 |
| |
| .$ |
| \ud801\ud800\udc00 |
| true \ud800\udc00 0 |
| |
| .$ |
| \ud801\udc01\ud800\udc00 |
| true \ud800\udc00 0 |
| |
| .$ |
| \ud801\udc01\ud800\udc00\udcff |
| true \udcff 0 |
| |
| [^x-\uffff][^y-\uffff] |
| \ud800\udc00pqr |
| true \ud800\udc00p 0 |
| |
| [^x-\uffff]+ |
| \ud800\udc00pqrx |
| true \ud800\udc00pqr 0 |
| |
| /// The following test cases fail due to use of Start rather than |
| /// StartS. Disabled for now. |
| ///[a-\uffff] |
| ///\ud800\udc00x |
| ///true x 0 |
| /// |
| ///[a-\uffff] |
| ///\ud800\udc00 |
| ///false 0 |
| |
| // use of x modifier |
| \ud800\udc61bc(?x)bl\ud800\udc61h |
| \ud800\udc61bcbl\ud800\udc61h |
| true \ud800\udc61bcbl\ud800\udc61h 0 |
| |
| \ud800\udc61bc(?x) bl\ud800\udc61h |
| \ud800\udc61bcbl\ud800\udc61h |
| true \ud800\udc61bcbl\ud800\udc61h 0 |
| |
| \ud800\udc61bc(?x) bl\ud800\udc61h blech |
| \ud800\udc61bcbl\ud800\udc61hblech |
| true \ud800\udc61bcbl\ud800\udc61hblech 0 |
| |
| \ud800\udc61bc(?x) bl\ud800\udc61h # ignore comment |
| \ud800\udc61bcbl\ud800\udc61h |
| true \ud800\udc61bcbl\ud800\udc61h 0 |
| |
| // Simple alternation |
| \ud800\udc61|\ud800\udc62 |
| \ud800\udc61 |
| true \ud800\udc61 0 |
| |
| \ud800\udc61|\ud800\udc62|\ud800 |
| \ud800\udc61 |
| true \ud800\udc61 0 |
| |
| \ud800\udc61|\ud800 |
| \ud800\udc62 |
| false 0 |
| |
| \ud800\udc62|\ud800 |
| \ud800 |
| true \ud800 0 |
| |
| \ud800\udc61|\ud802\udc02 |
| z |
| false 0 |
| |
| \ud800\udc61|\ud802\udc02 |
| \ud802\udc02 |
| true \ud802\udc02 0 |
| |
| \ud800\udc61|\ud802\udc02|\ud803\udc03\ud804\udc04 |
| \ud803\udc03\ud804\udc04 |
| true \ud803\udc03\ud804\udc04 0 |
| |
| \ud800\udc61|\ud800\udc61d |
| \ud800\udc61d |
| true \ud800\udc61 0 |
| |
| z(\ud800\udc61|\ud800\udc61c)\ud802\udc02 |
| z\ud800\udc61c\ud802\udc02 |
| true z\ud800\udc61c\ud802\udc02 1 \ud800\udc61c |
| |
| z(\ud800\udc61|\ud800\udc61c|\udc61c)\ud802\udc02 |
| z\udc61c\ud802\udc02 |
| true z\udc61c\ud802\udc02 1 \udc61c |
| |
| // Simple codepoint class |
| [\ud800\udc61\ud802\udc02c]+ |
| \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 |
| true \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0 |
| |
| [\ud800\udc61\ud802\udc02c]+ |
| \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 |
| true \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0 |
| |
| [\ud800\udc61\ud802\udc02c\ud800]+ |
| \ud800\udc61\ud802\udc02\ud800\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 |
| true \ud800\udc61\ud802\udc02\ud800\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0 |
| |
| [\ud800\udc61bc]+ |
| d\ud800\udc62fg |
| false 0 |
| |
| [\ud800\udc61bc]+[\ud804\udc04ef]+[\ud807\udc07hi]+ |
| zzz\ud800\udc61\ud800\udc61\ud804\udc04\ud804\udc04\ud807\udc07\ud807\udc07zzz |
| true \ud800\udc61\ud800\udc61\ud804\udc04\ud804\udc04\ud807\udc07\ud807\udc07 0 |
| |
| // Range codepoint class |
| [\ud801\udc01-\ud807\udc07]+ |
| \ud8ff\udcff\ud8ff\udcff\ud8ff\udcff\ud807\udc07\ud807\udc07\ud807\udc07 |
| true \ud807\udc07\ud807\udc07\ud807\udc07 0 |
| |
| [\ud801\udc01-\ud807\udc07]+ |
| mmm |
| false 0 |
| |
| [\ud800\udc61-]+ |
| z\ud800\udc61-9z |
| true \ud800\udc61- 0 |
| |
| // Negated char class |
| [^\ud800\udc61\ud802\udc02c]+ |
| \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 |
| false 0 |
| |
| [^\ud800\udc61\ud802\udc02\ud803\udc03]+ |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02\ud802\udc02\ud802\udc02\ud803\udc03\ud803\udc03\ud803\udc03\ud804\udc04efg |
| true \ud804\udc04efg 0 |
| |
| [^\ud800\udc61\ud802\udc02\ud803\udc03\ud800]+ |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02\ud802\udc02\ud802\udc02\ud803\udc03\ud803\udc03\ud803\udc03\ud804\udc04efg |
| true \ud804\udc04efg 0 |
| |
| // Making sure a ^ not in first position matches literal ^ |
| [\ud801\udc01\ud802\udc02\ud803\udc03^\ud802\udc02] |
| \ud802\udc02 |
| true \ud802\udc02 0 |
| |
| [\ud801\udc01\ud802\udc02\ud803\udc03^\ud802\udc02] |
| ^ |
| true ^ 0 |
| |
| // Class union and intersection |
| [\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]] |
| \ud802\udc02 |
| true \ud802\udc02 0 |
| |
| [\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]] |
| \ud805\udc05 |
| true \ud805\udc05 0 |
| |
| [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] |
| \ud801\udc01 |
| true \ud801\udc01 0 |
| |
| [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] |
| \ud80c\udc0c |
| true \ud80c\udc0c 0 |
| |
| [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] |
| 4 |
| true 4 0 |
| |
| [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] |
| \ud805\udc05 |
| false 0 |
| |
| [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] |
| \ud816\udc16 |
| false 0 |
| |
| [[\ud801\udc01-\ud804\udc04][0-9][\ud80b\udc0b-\ud80d\udc0d]] |
| \ud802\udc02 |
| true \ud802\udc02 0 |
| |
| [[\ud801\udc01-\ud804\udc04][0-9][\ud80b\udc0b-\ud80d\udc0d]] |
| \ud81a\udc1a |
| false 0 |
| |
| [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]] |
| \ud801\udc01 |
| true \ud801\udc01 0 |
| |
| [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]] |
| \ud805\udc05 |
| true \ud805\udc05 0 |
| |
| [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]] |
| \ud808\udc08 |
| true \ud808\udc08 0 |
| |
| [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]] |
| \ud80d\udc0d |
| false 0 |
| |
| [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]\ud80d\udc0d] |
| \ud80d\udc0d |
| true \ud80d\udc0d 0 |
| |
| [\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09] |
| \ud801\udc01 |
| true \ud801\udc01 0 |
| |
| [\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09] |
| \ud804\udc04 |
| true \ud804\udc04 0 |
| |
| [\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09] |
| \ud808\udc08 |
| true \ud808\udc08 0 |
| |
| [\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09] |
| \ud816\udc16 |
| false 0 |
| |
| [\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]] |
| \ud801\udc01 |
| false 0 |
| |
| [\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]] |
| \ud805\udc05 |
| false 0 |
| |
| [\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]] |
| \ud81a\udc1a |
| false 0 |
| |
| [[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]] |
| \ud801\udc01 |
| false 0 |
| |
| [[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]] |
| \ud805\udc05 |
| false 0 |
| |
| [[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]] |
| \ud81a\udc1a |
| false 0 |
| |
| [\ud801\udc01-\ud803\udc03&&\ud804\udc04-\ud806\udc06] |
| \ud801\udc01 |
| false 0 |
| |
| [\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a] |
| \ud80d\udc0d |
| true \ud80d\udc0d 0 |
| |
| [\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a&&\ud801\udc01-\ud803\udc03] |
| \ud80d\udc0d |
| false 0 |
| |
| [\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a&&\ud801\udc01-\ud81a\udc1a] |
| \ud80d\udc0d |
| true \ud80d\udc0d 0 |
| |
| [[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]] |
| \ud801\udc01 |
| false 0 |
| |
| [[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]] |
| \ud80d\udc0d |
| true \ud80d\udc0d 0 |
| |
| [[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]] |
| \ud81a\udc1a |
| false 0 |
| |
| [[\ud801\udc01-\ud80d\udc0d]&&[^\ud801\udc01-\ud803\udc03]] |
| \ud801\udc01 |
| false 0 |
| |
| [[\ud801\udc01-\ud80d\udc0d]&&[^\ud801\udc01-\ud803\udc03]] |
| \ud804\udc04 |
| true \ud804\udc04 0 |
| |
| [\ud801\udc01-\ud80d\udc0d&&[^\ud801\udc01-\ud803\udc03]] |
| \ud801\udc01 |
| false 0 |
| |
| [\ud801\udc01-\ud80d\udc0d&&[^\ud801\udc01-\ud803\udc03]] |
| \ud804\udc04 |
| true \ud804\udc04 0 |
| |
| [\ud801\udc01-\ud803\udc03\ud804\udc04-\ud806\udc06&&[\ud804\udc04-\ud806\udc06]] |
| \ud801\udc01 |
| false 0 |
| |
| [\ud801\udc01-\ud803\udc03\ud804\udc04-\ud806\udc06&&[\ud804\udc04-\ud806\udc06]] |
| \ud805\udc05 |
| true \ud805\udc05 0 |
| |
| [[\ud801\udc01-\ud803\udc03]&&\ud804\udc04-\ud806\udc06\ud801\udc01-\ud803\udc03] |
| \ud801\udc01 |
| true \ud801\udc01 0 |
| |
| [[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06][\ud801\udc01-\ud803\udc03]] |
| \ud801\udc01 |
| true \ud801\udc01 0 |
| |
| [[\ud801\udc01-\ud803\udc03][\ud804\udc04-\ud806\udc06]&&\ud801\udc01\ud802\udc02\ud803\udc03] |
| \ud801\udc01 |
| true \ud801\udc01 0 |
| |
| [[\ud801\udc01-\ud803\udc03][\ud804\udc04-\ud806\udc06]&&\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]] |
| \ud805\udc05 |
| true \ud805\udc05 0 |
| |
| [[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04]&&[\ud803\udc03-\ud805\udc05]] |
| \ud801\udc01 |
| false 0 |
| |
| [[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04]&&[\ud803\udc03-\ud805\udc05]] |
| \ud803\udc03 |
| true \ud803\udc03 0 |
| |
| [[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04][\ud803\udc03-\ud805\udc05]&&[\ud815\udc15-\ud81a\udc1a]] |
| \ud803\udc03 |
| false 0 |
| |
| [\ud801\udc01\ud802\udc02\ud803\udc03[^\ud802\udc02\ud803\udc03\ud804\udc04]] |
| \ud801\udc01 |
| true \ud801\udc01 0 |
| |
| [\ud800\udc61\ud802\udc02\ud803\udc03[^\ud802\udc02\ud803\udc03\ud804\udc04]] |
| \ud804\udc04 |
| false 0 |
| |
| [\ud801\udc01-\ud803\udc03&&\ud801\udc01-\ud804\udc04&&\ud801\udc01-\ud805\udc05\ud807\udc07\ud808\udc08\ud809\udc09] |
| \ud802\udc02 |
| true \ud802\udc02 0 |
| |
| [\ud801\udc01-\ud803\udc03&&\ud801\udc01-\ud804\udc04&&\ud801\udc01-\ud805\udc05\ud807\udc07\ud808\udc08\ud809\udc09] |
| \ud807\udc07 |
| false 0 |
| |
| [[\ud801\udc01[\ud802\udc02]]&&[\ud802\udc02[\ud801\udc01]]] |
| \ud801\udc01 |
| true \ud801\udc01 0 |
| |
| // Unicode isn't supported in clazz() |
| [[\ud800\udc61]&&[b][c][\ud800\udc61]&&[^d]] |
| \ud800\udc61 |
| true \ud800\udc61 0 |
| |
| [[\ud800\udc61]&&[\ud802\udc02][\ud800][\ud800\udc61]&&[^\ud804\udc04]] |
| \ud800\udc61 |
| true \ud800\udc61 0 |
| |
| [[\ud800\udc61]&&[b][\ud800][\ud800\udc61]&&[^\ud804\udc04]] |
| \ud804\udc04 |
| false 0 |
| |
| [[\ud800\udc61]&&[b][c][\ud800\udc61]&&[^d]] |
| d |
| false 0 |
| |
| [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]] |
| \ud800\udc01 |
| false 0 |
| |
| [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]] |
| \ud800\udc03 |
| true \ud800\udc03 0 |
| |
| [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]] |
| \ud800\udc03 |
| true \ud800\udc03 0 |
| |
| [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03] |
| \ud800\udc03 |
| true \ud800\udc03 0 |
| |
| [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03&&\ud800\udc03] |
| \ud800\udc03 |
| true \ud800\udc03 0 |
| |
| [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03&&[\ud800\udc03\ud800\udc04\ud800\udc05]] |
| \ud800\udc03 |
| true \ud800\udc03 0 |
| |
| [z[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04]] |
| \ud800\udc03 |
| true \ud800\udc03 0 |
| |
| [z[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04]&&[u-z]] |
| z |
| true z 0 |
| |
| [x[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04[z]]&&[u-z]] |
| z |
| false 0 |
| |
| [x[[wz]\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04[z]]&&[u-z]] |
| z |
| true z 0 |
| |
| [[\ud800\udc61b\ud800\udc03]&&[\ud800\udc04\ud800\udc05f]\ud800\udc61b\ud800\udc03] |
| \ud800\udc61 |
| true \ud800\udc61 0 |
| |
| [[\ud800\udc61b\ud800\udc03]&&[\ud800\udc04\ud800\udc05f]xyz[\ud800\udc61b\ud800\udc03]] |
| \ud800\udc61 |
| true \ud800\udc61 0 |
| |
| \pL |
| \ud800\udc00 |
| true \ud800\udc00 0 |
| |
| \p{IsASCII} |
| \ud800\udc00 |
| false 0 |
| |
| \pLbc |
| \ud800\udc00bc |
| true \ud800\udc00bc 0 |
| |
| \ud800\udc61[r\p{InGreek}]c |
| \ud800\udc61\u0370c |
| true \ud800\udc61\u0370c 0 |
| |
| \ud800\udc61\p{InGreek} |
| \ud800\udc61\u0370 |
| true \ud800\udc61\u0370 0 |
| |
| \ud800\udc61\P{InGreek} |
| \ud800\udc61\u0370 |
| false 0 |
| |
| \ud800\udc61\P{InGreek} |
| \ud800\udc61b |
| true \ud800\udc61b 0 |
| |
| \ud800\udc61{^InGreek} |
| - |
| error |
| |
| \ud800\udc61\p{^InGreek} |
| - |
| error |
| |
| \ud800\udc61\P{^InGreek} |
| - |
| error |
| |
| \ud800\udc61\p{InGreek} |
| \ud800\udc61\u0370 |
| true \ud800\udc61\u0370 0 |
| |
| \ud800\udc61[\p{InGreek}]c |
| \ud800\udc61\u0370c |
| true \ud800\udc61\u0370c 0 |
| |
| \ud800\udc61[\P{InGreek}]c |
| \ud800\udc61\u0370c |
| false 0 |
| |
| \ud800\udc61[\P{InGreek}]c |
| \ud800\udc61bc |
| true \ud800\udc61bc 0 |
| |
| \ud800\udc61[{^InGreek}]c |
| \ud800\udc61nc |
| true \ud800\udc61nc 0 |
| |
| \ud800\udc61[{^InGreek}]c |
| \ud800\udc61zc |
| false 0 |
| |
| \ud800\udc61[\p{^InGreek}]c |
| - |
| error |
| |
| \ud800\udc61[\P{^InGreek}]c |
| - |
| error |
| |
| \ud800\udc61[\p{InGreek}] |
| \ud800\udc61\u0370 |
| true \ud800\udc61\u0370 0 |
| |
| \ud800\udc61[r\p{InGreek}]c |
| \ud800\udc61rc |
| true \ud800\udc61rc 0 |
| |
| \ud800\udc61[\p{InGreek}r]c |
| \ud800\udc61rc |
| true \ud800\udc61rc 0 |
| |
| \ud800\udc61[r\p{InGreek}]c |
| \ud800\udc61rc |
| true \ud800\udc61rc 0 |
| |
| \ud800\udc61[^\p{InGreek}]c |
| \ud800\udc61\u0370c |
| false 0 |
| |
| \ud800\udc61[^\P{InGreek}]c |
| \ud800\udc61\u0370c |
| true \ud800\udc61\u0370c 0 |
| |
| \ud800\udc61[\p{InGreek}&&[^\u0370]]c |
| \ud800\udc61\u0370c |
| false 0 |
| |
| // Test the dot metacharacter |
| \ud800\udc61.c.+ |
| \ud800\udc61#c%& |
| true \ud800\udc61#c%& 0 |
| |
| \ud800\udc61b. |
| \ud800\udc61b\n |
| false 0 |
| |
| (?s)\ud800\udc61b. |
| \ud800\udc61b\n |
| true \ud800\udc61b\n 0 |
| |
| \ud800\udc61[\p{L}&&[\P{InGreek}]]c |
| \ud800\udc61\u6000c |
| true \ud800\udc61\u6000c 0 |
| |
| \ud800\udc61[\p{L}&&[\P{InGreek}]]c |
| \ud800\udc61rc |
| true \ud800\udc61rc 0 |
| |
| \ud800\udc61[\p{L}&&[\P{InGreek}]]c |
| \ud800\udc61\u0370c |
| false 0 |
| |
| \ud800\udc61\p{InGreek}c |
| \ud800\udc61\u0370c |
| true \ud800\udc61\u0370c 0 |
| |
| \ud800\udc61\p{Sc} |
| \ud800\udc61$ |
| true \ud800\udc61$ 0 |
| |
| // Test \p{L} |
| \p{L} |
| \ud800\udf1e |
| true \ud800\udf1e 0 |
| |
| ^a\p{L}z$ |
| a\ud800\udf1ez |
| true a\ud800\udf1ez 0 |
| |
| // Test \P{InDeseret} |
| |
| \ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\P{InDeseret} |
| \ud800\udf00\ud800\udf1e\ud800\udf1esupp->\ud900\udc00<-\ud901\udf00 |
| true \ud800\udf00\ud800\udf1e\ud800\udf1esupp->\ud900\udc00<-\ud901\udf00 0 |
| |
| \ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\P{InDeseret} |
| \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud901\udf00 |
| true \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud901\udf00 0 |
| |
| // Test \p{InDeseret} |
| \ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\p{InDeseret} |
| \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud801\udc00 |
| true \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud801\udc00 0 |
| |
| // Test the word char escape sequence |
| \ud800\udc61b\wc |
| \ud800\udc61bcc |
| true \ud800\udc61bcc 0 |
| |
| \ud800\udc61bc[\w] |
| \ud800\udc61bcd |
| true \ud800\udc61bcd 0 |
| |
| \ud800\udc61bc[\sdef]* |
| \ud800\udc61bc def |
| true \ud800\udc61bc def 0 |
| |
| \ud800\udc61bc[\sy-z]* |
| \ud800\udc61bc y z |
| true \ud800\udc61bc y z 0 |
| |
| \ud800\udc01bc[\ud800\udc01-\ud800\udc04\sm-p]* |
| \ud800\udc01bc\ud800\udc01\ud800\udc01 mn p |
| true \ud800\udc01bc\ud800\udc01\ud800\udc01 mn p 0 |
| |
| // Test the whitespace escape sequence |
| \ud800\udc61b\s\ud800\udc03 |
| \ud800\udc61b \ud800\udc03 |
| true \ud800\udc61b \ud800\udc03 0 |
| |
| \s\s\s |
| bl\ud800\udc61h err |
| false 0 |
| |
| \S\S\s |
| bl\ud800\udc61h err |
| true \ud800\udc61h 0 |
| |
| // Test the digit escape sequence |
| \ud800\udc61b\d\ud800\udc03 |
| \ud800\udc61b9\ud800\udc03 |
| true \ud800\udc61b9\ud800\udc03 0 |
| |
| \d\d\d |
| bl\ud800\udc61h45 |
| false 0 |
| |
| // Test the caret metacharacter |
| ^\ud800\udc61bc |
| \ud800\udc61bcdef |
| true \ud800\udc61bc 0 |
| |
| ^\ud800\udc61bc |
| bcd\ud800\udc61bc |
| false 0 |
| |
| // Greedy ? metacharacter |
| \ud800\udc61?\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc02 0 |
| |
| \udc61?\ud800\udc02 |
| \ud800\udc61\udc61\udc61\ud800\udc02 |
| true \udc61\ud800\udc02 0 |
| |
| \ud800\udc61?\ud800\udc02 |
| \ud800\udc02 |
| true \ud800\udc02 0 |
| |
| \ud800?\ud800\udc02 |
| \ud800\udc02 |
| true \ud800\udc02 0 |
| |
| \ud800\udc61?\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc03\ud800\udc03\ud800\udc03 |
| false 0 |
| |
| .?\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc02 0 |
| |
| // Reluctant ? metacharacter |
| \ud800\udc61??\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc02 0 |
| |
| \ud800??\ud800\udc02 |
| \ud800\ud800\ud8001\ud800\ud800\udc02 |
| true \ud800\ud800\udc02 0 |
| |
| \ud800\udc61??\ud800\udc02 |
| \ud800\udc02 |
| true \ud800\udc02 0 |
| |
| \ud800??\ud800\udc02 |
| \ud800\udc02 |
| true \ud800\udc02 0 |
| |
| \ud800\udc61??\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61ccc |
| false 0 |
| |
| .??\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc02 0 |
| |
| // Possessive ? metacharacter |
| \ud800\udc61?+\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc02 0 |
| |
| \ud800\udc61?+\ud800\udc02 |
| \ud800\udc02 |
| true \ud800\udc02 0 |
| |
| \ud800\udc61?+\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61ccc |
| false 0 |
| |
| .?+\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc02 0 |
| |
| // Greedy + metacharacter |
| \ud800\udc61+\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 |
| |
| \udc61+\ud800\udc02 |
| \ud800\udc61\udc61\udc61\udc61\ud800\udc02 |
| true \udc61\udc61\udc61\ud800\udc02 0 |
| |
| \ud800\udc61+\ud800\udc02 |
| \ud800\udc02 |
| false 0 |
| |
| \ud800+\ud800\udc02 |
| \ud800\udc02 |
| false 0 |
| |
| \ud800\udc61+\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61ccc |
| false 0 |
| |
| .+\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 |
| |
| .+\ud800\udc02 |
| \ud800\udc61\udc61\udc61\udc61\ud800\udc02 |
| true \ud800\udc61\udc61\udc61\udc61\ud800\udc02 0 |
| |
| // Reluctant + metacharacter |
| \ud800\udc61+?\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 |
| |
| \udc61+?\ud800\udc02 |
| \udc61\udc61\udc61\udc61\ud800\udc02 |
| true \udc61\udc61\udc61\udc61\ud800\udc02 0 |
| |
| \ud800\udc61+?\ud800\udc02 |
| \ud800\udc02 |
| false 0 |
| |
| \ud800+?\ud800\udc02 |
| \ud800\udc02 |
| false 0 |
| |
| \ud800\udc61+?\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61ccc |
| false 0 |
| |
| .+?\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 |
| |
| // Possessive + metacharacter |
| \ud800\udc61++\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 |
| |
| \ud800\udc61++\ud800\udc02 |
| \ud800\udc02 |
| false 0 |
| |
| \ud800\udc61++\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61ccc |
| false 0 |
| |
| .++\ud800\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 |
| false 0 |
| |
| // Greedy Repetition |
| \ud800\udc61{2,3} |
| \ud800\udc61 |
| false 0 |
| |
| \ud800\udc61{2,3} |
| \ud800\udc61\ud800\udc61 |
| true \ud800\udc61\ud800\udc61 0 |
| |
| \ud800\udc61{2,3} |
| \ud800\udc61\ud800\udc61\ud800\udc61 |
| true \ud800\udc61\ud800\udc61\ud800\udc61 0 |
| |
| \ud800\udc61{2,3} |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61 |
| true \ud800\udc61\ud800\udc61\ud800\udc61 0 |
| |
| \ud800\udc61{3,} |
| zzz\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61zzz |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61 0 |
| |
| \ud800\udc61{3,} |
| zzz\ud800\udc61\ud800\udc61zzz |
| false 0 |
| |
| // Reluctant Repetition |
| \ud800\udc61{2,3}? |
| \ud800\udc61 |
| false 0 |
| |
| \ud800\udc61{2,3}? |
| \ud800\udc61\ud800\udc61 |
| true \ud800\udc61\ud800\udc61 0 |
| |
| \ud800\udc61{2,3}? |
| \ud800\udc61\ud800\udc61\ud800\udc61 |
| true \ud800\udc61\ud800\udc61 0 |
| |
| \ud800\udc61{2,3}? |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61 |
| true \ud800\udc61\ud800\udc61 0 |
| |
| // Zero width Positive lookahead |
| \ud800\udc61\ud802\udc02\ud803\udc03(?=\ud804\udc04) |
| zzz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04 |
| true \ud800\udc61\ud802\udc02\ud803\udc03 0 |
| |
| \ud800\udc61\ud802\udc02\ud803\udc03(?=\ud804\udc04) |
| zzz\ud800\udc61\ud802\udc02\ud803\udc03e\ud804\udc04 |
| false 0 |
| |
| \ud800\udc61\ud802\udc02\ud803\udc03(?=\udcff\ud804\udc04) |
| zzz\ud800\udc61\ud802\udc02\ud803\udc03\udcff\ud804\udc04 |
| true \ud800\udc61\ud802\udc02\ud803\udc03 0 |
| |
| \ud800\udc61\ud802\udc02\ud803\udc03(?=\udcff\ud804\udc04) |
| zzz\ud800\udc61\ud802\udc02\ud803\udc03\ud8ff\udcff\ud804\udc04 |
| false 0 |
| |
| // Zero width Negative lookahead |
| \ud800\udc61\ud802\udc02\ud803\udc03(?!\ud804\udc04) |
| zz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04 |
| false 0 |
| |
| a\ud802\udc02\ud803\udc03(?!\ud804\udc04) |
| zza\ud802\udc02\ud803\udc03\udc04\ud804\udc04 |
| true a\ud802\udc02\ud803\udc03 0 |
| |
| \ud800\udc61\ud802\udc02\ud803\udc03(?!\ud804\udc04\ud8ff) |
| zz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04\ud8ffX |
| false 0 |
| |
| a\ud802\udc02\ud803\udc03(?!\ud804\udc04\ud8ff) |
| zza\ud802\udc02\ud803\udc03e\ud804\udc04\ud8ff\udcff |
| true a\ud802\udc02\ud803\udc03 0 |
| |
| // Zero width Positive lookbehind |
| (?<=\ud801\udc01\ud802\udc02)\ud803\udc03 |
| \ud801\udc01\ud802\udc02\ud803\udc03 |
| true \ud803\udc03 0 |
| |
| // Zero width Negative lookbehind |
| (?<!\ud801\udc01)\ud802\udc02\ud803\udc03 |
| ###\ud800\udc00\ud802\udc02\ud803\udc03 |
| true \ud802\udc02\ud803\udc03 0 |
| |
| (?<![\ud801\udc01\ud802\udc02])\ud803\udc03. |
| \ud801\udc01\ud803\udc03x\ud800\udc00\ud803\udc03y |
| true \ud803\udc03y 0 |
| |
| (?<!\ud801\udc01)\ud803\udc03 |
| \ud801\udc01\ud803\udc03 |
| false 0 |
| |
| // Nondeterministic group |
| (\ud800\udc61+\ud802)+ |
| \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 |
| true \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 1 \ud800\udc61\ud802 |
| |
| (\ud800\udc61|\ud802)+ |
| \ud800\ud802\udc61\ud803\ud802\udc61 |
| false 1 |
| |
| // Deterministic group |
| (\ud800\udc61\ud802)+ |
| \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 |
| true \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 1 \ud800\udc61\ud802 |
| |
| (\ud800\udc61\ud802)+ |
| \ud800\udc61ccccd |
| false 1 |
| |
| (\ud800\udc61\ud802)* |
| \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 |
| true \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 1 \ud800\udc61\ud802 |
| |
| (\ud800\udc61b)(cd*) |
| zzz\ud800\udc61bczzz |
| true \ud800\udc61bc 2 \ud800\udc61b c |
| |
| \ud800\udc61bc(\ud804\udc04)*\ud800\udc61bc |
| \ud800\udc61bc\ud804\udc04\ud804\udc04\ud804\udc04\ud804\udc04\ud804\udc04\ud800\udc61bc |
| true \ud800\udc61bc\ud804\udc04\ud804\udc04\ud804\udc04\ud804\udc04\ud804\udc04\ud800\udc61bc 1 \ud804\udc04 |
| |
| // Back references |
| (\ud800\udc61*)\ud802\udc02c\1 |
| zzz\ud800\udc61\ud800\udc61\ud802\udc02c\ud800\udc61\ud800\udc61zzz |
| true \ud800\udc61\ud800\udc61\ud802\udc02c\ud800\udc61\ud800\udc61 1 \ud800\udc61\ud800\udc61 |
| |
| (\ud800\udc61*)\ud802\udc02c\1 |
| zzz\ud800\udc61\ud800\udc61\ud802\udc02c\ud800\udc61zzz |
| true \ud800\udc61\ud802\udc02c\ud800\udc61 1 \ud800\udc61 |
| |
| (\ud800\udc07\ud800\udc14*)(\ud804\udc04\ud804\udc04e)*(yu)\1\3(vv) |
| zzz\ud800\udc07\ud800\udc14\ud800\udc14\ud804\udc04\ud804\udc04e\ud804\udc04\ud804\udc04eyu\ud800\udc07\ud800\udc14\ud800\udc14yuvvzzz |
| true \ud800\udc07\ud800\udc14\ud800\udc14\ud804\udc04\ud804\udc04e\ud804\udc04\ud804\udc04eyu\ud800\udc07\ud800\udc14\ud800\udc14yuvv 4 \ud800\udc07\ud800\udc14\ud800\udc14 \ud804\udc04\ud804\udc04e yu vv |
| |
| // Greedy * metacharacter |
| \ud800\udc61*\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0 |
| |
| \ud800\udc61*\ud802\udc02 |
| \ud802\udc02 |
| true \ud802\udc02 0 |
| |
| \ud800\udc61*\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61ccc |
| false 0 |
| |
| .*\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0 |
| |
| // Reluctant * metacharacter |
| \ud800\udc61*?\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0 |
| |
| \ud800\udc61*?\ud802\udc02 |
| \ud802\udc02 |
| true \ud802\udc02 0 |
| |
| \ud800\udc61*?\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61ccc |
| false 0 |
| |
| .*?\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0 |
| |
| // Possessive * metacharacter |
| \ud800\udc61*+\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 |
| true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0 |
| |
| \ud800\udc61*+\ud802\udc02 |
| \ud802\udc02 |
| true \ud802\udc02 0 |
| |
| \ud800\udc61*+\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61ccc |
| false 0 |
| |
| .*+\ud802\udc02 |
| \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 |
| false 0 |
| |
| // Case insensitivity |
| (?iu)\ud801\udc00\ud801\udc01\ud801\udc02x |
| \ud801\udc28\ud801\udc29\ud801\udc2aX |
| true \ud801\udc28\ud801\udc29\ud801\udc2aX 0 |
| |
| \ud801\udc00(?iu)\ud801\udc01\ud801\udc02 |
| \ud801\udc00\ud801\udc29\ud801\udc2a |
| true \ud801\udc00\ud801\udc29\ud801\udc2a 0 |
| |
| \ud801\udc00(?iu)\ud801\udc01\ud801\udc02 |
| \ud801\udc28\ud801\udc29\ud801\udc2a |
| false 0 |
| |
| (?iu)\ud801\udc00[\ud801\udc01\ud801\udc02]+ |
| \ud801\udc28\ud801\udc29\ud801\udc2a |
| true \ud801\udc28\ud801\udc29\ud801\udc2a 0 |
| |
| (?iu)[\ud801\udc00-\ud801\udc02]+ |
| \ud801\udc28\ud801\udc29\ud801\udc2a |
| true \ud801\udc28\ud801\udc29\ud801\udc2a 0 |
| |
| // Disable metacharacters- test both length <=3 and >3 |
| // So that the BM optimization is part of test |
| \Q***\E\ud801\udc01\ud802\udc02\ud800\udc03 |
| ***\ud801\udc01\ud802\udc02\ud800\udc03 |
| true ***\ud801\udc01\ud802\udc02\ud800\udc03 0 |
| |
| \ud802\udc02l\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03 |
| \ud802\udc02l***\ud801\udc01\ud802\udc02\ud800\udc03 |
| true \ud802\udc02l***\ud801\udc01\ud802\udc02\ud800\udc03 0 |
| |
| \Q***\ud801\udc01\ud802\udc02\ud800\udc03 |
| ***\ud801\udc01\ud802\udc02\ud800\udc03 |
| true ***\ud801\udc01\ud802\udc02\ud800\udc03 0 |
| |
| \ud802\udc02l\ud801\udc01h\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03 |
| \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 |
| true \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 0 |
| |
| \Q***\ud801\udc01\ud802\udc02\ud800\udc03 |
| ***\ud801\udc01\ud802\udc02\ud800\udc03 |
| true ***\ud801\udc01\ud802\udc02\ud800\udc03 0 |
| |
| \Q*\ud801\udc01\ud802\udc02 |
| *\ud801\udc01\ud802\udc02 |
| true *\ud801\udc01\ud802\udc02 0 |
| |
| \ud802\udc02l\ud801\udc01h\Q***\ud801\udc01\ud802\udc02\ud800\udc03 |
| \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 |
| true \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 0 |
| |
| \ud802\udc02l\ud801\udc01\Q***\ud801\udc01\ud802\udc02\ud800\udc03 |
| \ud802\udc02l\ud801\udc01***\ud801\udc01\ud802\udc02\ud800\udc03 |
| true \ud802\udc02l\ud801\udc01***\ud801\udc01\ud802\udc02\ud800\udc03 0 |
| |
| //Test cases below copied from i18n QE's RegexSupplementaryTests.txt |
| \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 |
| \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 |
| true \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 0 |
| |
| \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 |
| \u1000\uD801\uDFF1\uDB00\uDC00 |
| false 0 |
| |
| \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 |
| \uD800\uDFFF\uFFFF\uDB00\uDC00 |
| false 0 |
| |
| \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 |
| \uD800\uDFFF\uD801\uDFF1\uFFFF |
| false 0 |
| |
| \u1000.\uFFFF |
| \u1000\uD800\uDFFF\uFFFF |
| true \u1000\uD800\uDFFF\uFFFF 0 |
| |
| //======= |
| // Ranges |
| //======= |
| [a-\uD800\uDFFF] |
| \uDFFF |
| true \uDFFF 0 |
| |
| [a-\uD800\uDFFF] |
| \uD800 |
| true \uD800 0 |
| |
| [a-\uD800\uDFFF] |
| \uD800\uDFFF |
| true \uD800\uDFFF 0 |
| |
| [\uD800\uDC00-\uDBFF\uDFFF] |
| \uDBFF |
| false 0 |
| |
| [\uD800\uDC00-\uDBFF\uDFFF] |
| \uDC00 |
| false 0 |
| |
| [\uD800-\uDFFF] |
| \uD800\uDFFF |
| false 0 |
| |
| [\uD800-\uDFFF] |
| \uDFFF\uD800 |
| true \uDFFF 0 |
| |
| foo[^\uD800-\uDFFF] |
| foo\uD800\uDFFF |
| true foo\uD800\uDFFF 0 |
| |
| foo[^\uD800-\uDFFF] |
| foo\uDFFF\uD800 |
| false 0 |
| |
| //fo\uD800[\uDC00-\uDFFF] |
| |
| //================== |
| // Character Classes |
| //================== |
| // Simple class |
| [ab\uD800\uDFFFcd]at |
| \uD800at |
| false 0 |
| |
| [ab\uD800\uDFFFcd]at |
| \uD800\uDFFFat |
| true \uD800\uDFFFat 0 |
| |
| // Negation |
| [^\uD800\uDFFFcd]at |
| \uD800at |
| true \uD800at 0 |
| |
| [^\uD800\uDFFFcd]at |
| \uDFFFat |
| true \uDFFFat 0 |
| |
| // Inclusive range |
| [\u0000-\uD800\uDFFF-\uFFFF] |
| \uD800\uDFFF |
| true \uD800\uDFFF 0 |
| |
| // Unions |
| [\u0000-\uD800[\uDFFF-\uFFFF]] |
| \uD800\uDFFF |
| false 0 |
| |
| |
| // Intersection |
| [\u0000-\uFFFF&&[\uD800\uDFFF]] |
| \uD800\uDFFF |
| false 0 |
| |
| [\u0000-\uFFFF&&[\uD800\uDFFF]] |
| \uD800 |
| false 0 |
| |
| [\u0000-\uFFFF&&[\uDFFF\uD800]] |
| \uD800 |
| true \uD800 0 |
| |
| [\u0000-\uFFFF&&[\uDFFF\uD800\uDC00]] |
| \uDC00 |
| false 0 |
| |
| [\u0000-\uDFFF&&[\uD800-\uFFFF]] |
| \uD800\uDFFF |
| false 0 |
| |
| [\u0000-\uDFFF&&[\uD800-\uFFFF]] |
| \uDFFF\uD800 |
| true \uDFFF 0 |
| |
| // Subtraction |
| [\u0000-\uD800\uDFFF&&[^\uD800\uDC00]] |
| \uD800 |
| true \uD800 0 |
| |
| [\u0000-\uD800\uDFFF&&[^\uD800\uDC00]] |
| \uDC00 |
| true \uDC00 0 |
| |
| [\u0000-\uD800\uDFFF&&[^\uD800\uDC00]] |
| \uD800\uDFFF |
| true \uD800\uDFFF 0 |
| |
| [\u0000-\uD800\uDFFF&&[^\uD800\uDBFF\uDC00]] |
| \uD800 |
| false 0 |
| |
| [\u0000-\uD800\uDFFF&&[^\uDC00\uD800\uDBFF]] |
| \uD800\uDC00 |
| true \uD800\uDC00 0 |
| |
| // Quantifiers |
| a\uD800\uDFFF? |
| a\uD800 |
| true a 0 |
| |
| a\uD800\uDFFF? |
| a\uDFFF |
| true a 0 |
| |
| a\uD800\uDFFF? |
| a\uD800\uDFFF |
| true a\uD800\uDFFF 0 |
| |
| a\uDFFF\uD800? |
| a\uDFFF |
| true a\uDFFF 0 |
| |
| a\uDFFF\uD800? |
| a\uD800 |
| false 0 |
| |
| \uD800\uDFFF\uDC00? |
| \uD800 |
| false 0 |
| |
| \uD800\uDFFF\uDC00? |
| \uD800\uDFFF |
| true \uD800\uDFFF 0 |
| |
| a\uD800\uDFFF?? |
| a\uDFFF |
| true a 0 |
| |
| a\uD800\uDFFF* |
| a |
| true a 0 |
| |
| a\uD800\uDFFF* |
| a\uD800 |
| true a 0 |
| |
| \uD800\uDFFF* |
| \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF |
| true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 |
| |
| \uD800\uDFFF* |
| \uD800\uDFFF\uDFFF\uDFFF\uDFFF |
| true \uD800\uDFFF 0 |
| |
| \uD800*\uDFFF |
| \uD800\uDFFF |
| false 0 |
| |
| a\uD800\uDFFF* |
| a\uD800 |
| true a 0 |
| |
| \uDFFF\uD800* |
| \uDFFF |
| true \uDFFF 0 |
| |
| \uDFFF\uD800* |
| \uDFFF\uD800\uD800\uD800 |
| true \uDFFF\uD800\uD800\uD800 0 |
| |
| \uD800\uDFFF+ |
| \uD800\uDFFF\uDFFF\uDFFF |
| true \uD800\uDFFF 0 |
| |
| \uD800\uDFFF+ |
| \uD800 |
| false 0 |
| |
| \uD800\uDFFF+ |
| \uD800\uDFFF |
| true \uD800\uDFFF 0 |
| |
| \uD800\uDFFF+ |
| \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF |
| true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 |
| |
| \uDFFF\uD800+ |
| \uDFFF\uD800\uDFFF\uD800 |
| false 0 |
| |
| \uD800+\uDFFF |
| \uD800\uDFFF |
| false 0 |
| |
| \uD800+\uDFFF |
| \uD800 |
| false 0 |
| |
| \uDFFF+\uD800 |
| \uD800 |
| false 0 |
| |
| \uDFFF+\uD800 |
| \uDFFF\uD800 |
| true \uDFFF\uD800 0 |
| |
| \uD800\uDFFF{3} |
| \uD800\uDFFF\uDFFF\uDFFF |
| false 0 |
| |
| \uD800\uDFFF{3} |
| \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF |
| true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 |
| |
| \uDFFF\uD800{3} |
| \uDFFF\uD800\uDFFF\uD800\uDFFF\uD800 |
| false 0 |
| |
| \uDFFF\uD800{3} |
| \uDFFF\uD800\uD800\uD800 |
| true \uDFFF\uD800\uD800\uD800 0 |
| |
| \uD800\uDFFF{2,} |
| \uD800\uDFFF |
| false 0 |
| |
| \uD800\uDFFF{2,} |
| \uD800\uDFFF\uDFFF |
| false 0 |
| |
| \uD800\uDFFF{2,} |
| \uD800\uDFFF\uD800\uDFFF |
| true \uD800\uDFFF\uD800\uDFFF 0 |
| |
| \uDFFF\uD800{2,} |
| \uDFFF\uD800\uDFFF\uD800 |
| false 0 |
| |
| \uDFFF\uD800{2,} |
| \uDFFF\uD800\uD800\uD800 |
| true \uDFFF\uD800\uD800\uD800 0 |
| |
| \uD800\uDFFF{3,4} |
| \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF |
| true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 |
| |
| \uD800\uDFFF{3,4} |
| \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800 |
| true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 |
| |
| \uD800\uDFFF{3,4} |
| \uD800\uDFFF\uD800\uD800\uDFFF\uD800\uDFFF |
| false 0 |
| |
| \uDFFF\uD800{3,5} |
| \uDFFF\uD800\uD800\uD800\uD800\uD800\uD800\uD800 |
| true \uDFFF\uD800\uD800\uD800\uD800\uD800 0 |
| |
| \uD800\uDFFF{3,5} |
| \uD800\uDFFF\uDFFF\uDFFF |
| false 0 |
| |
| \uD800\uDFFF{3,5} |
| \uD800\uDFFF\uD800\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF |
| true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 |
| |
| // Groupings |
| (\uD800(\uDFFF)) |
| \uD800\uDFFF |
| false 2 |
| |
| (\uD800(\uDC00)(\uDFFF)) |
| \uD800\uDC00\uDFFF |
| false 3 |
| |
| ((\uD800)(\uDFFF)) |
| \uD800\uDFFF |
| false 3 |
| |
| (\uD800(\uDFFF)\uDFFF) |
| \uD800\uDFFF |
| false 2 |
| |
| (\uDFFF(\uD800)(\uDBFF)) |
| \uDFFF\uD800\uDBFF |
| true \uDFFF\uD800\uDBFF 3 \uDFFF\uD800\uDBFF \uD800 \uDBFF |
| |
| (\uDFFF(\uD800)(\uDC00)) |
| \uDFFF\uD800\uDC00 |
| false 3 |
| |
| (\uDFFF\uD800(\uDC00\uDBFF)) |
| \uDFFF\uD800\uDC00\uDBFF |
| false 2 |
| |
| (\uD800\uDFFF(\uDBFF)(\uDC00)) |
| \uD800\uDFFF\uDBFF\uDC00 |
| false 3 |
| |
| (\uD800\uDFFF(\uDBFF\uDC00)) |
| \uD800\uDFFF\uDBFF\uDC00 |
| true \uD800\uDFFF\uDBFF\uDC00 2 \uD800\uDFFF\uDBFF\uDC00 \uDBFF\uDC00 |