| #!/usr/bin/perl |
| # Copyright 2008 The RE2 Authors. All Rights Reserved. |
| # Use of this source code is governed by a BSD-style |
| # license that can be found in the LICENSE file. |
| |
| # Generate table entries giving character ranges |
| # for POSIX/Perl character classes. Rather than |
| # figure out what the definition is, it is easier to ask |
| # Perl about each letter from 0-128 and write down |
| # its answer. |
| |
| @posixclasses = ( |
| "[:alnum:]", |
| "[:alpha:]", |
| "[:ascii:]", |
| "[:blank:]", |
| "[:cntrl:]", |
| "[:digit:]", |
| "[:graph:]", |
| "[:lower:]", |
| "[:print:]", |
| "[:punct:]", |
| "[:space:]", |
| "[:upper:]", |
| "[:word:]", |
| "[:xdigit:]", |
| ); |
| |
| @perlclasses = ( |
| "\\d", |
| "\\s", |
| "\\w", |
| ); |
| |
| %overrides = ( |
| # Prior to Perl 5.18, \s did not match vertical tab. |
| # RE2 preserves that original behaviour. |
| "\\s:11" => 0, |
| ); |
| |
| sub ComputeClass($) { |
| my ($cname) = @_; |
| my @ranges; |
| my $regexp = qr/[$cname]/; |
| my $start = -1; |
| for (my $i=0; $i<=129; $i++) { |
| if ($i == 129) { $i = 256; } |
| if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) { |
| if ($start < 0) { |
| $start = $i; |
| } |
| } else { |
| if ($start >= 0) { |
| push @ranges, [$start, $i-1]; |
| } |
| $start = -1; |
| } |
| } |
| return @ranges; |
| } |
| |
| sub PrintClass($$@) { |
| my ($cnum, $cname, @ranges) = @_; |
| print "static const URange16 code${cnum}[] = { /* $cname */\n"; |
| for (my $i=0; $i<@ranges; $i++) { |
| my @a = @{$ranges[$i]}; |
| printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; |
| } |
| print "};\n"; |
| my $n = @ranges; |
| my $escname = $cname; |
| $escname =~ s/\\/\\\\/g; |
| $negname = $escname; |
| if ($negname =~ /:/) { |
| $negname =~ s/:/:^/; |
| } else { |
| $negname =~ y/a-z/A-Z/; |
| } |
| return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }"; |
| } |
| |
| my $cnum = 0; |
| |
| sub PrintClasses($@) { |
| my ($pname, @classes) = @_; |
| my @entries; |
| foreach my $cname (@classes) { |
| my @ranges = ComputeClass($cname); |
| push @entries, PrintClass(++$cnum, $cname, @ranges); |
| } |
| print "const UGroup ${pname}_groups[] = {\n"; |
| foreach my $e (@entries) { |
| print "\t$e,\n"; |
| } |
| print "};\n"; |
| my $count = @entries; |
| print "const int num_${pname}_groups = $count;\n"; |
| } |
| |
| print <<EOF; |
| // GENERATED BY make_perl_groups.pl; DO NOT EDIT. |
| // make_perl_groups.pl >perl_groups.cc |
| |
| #include "re2/unicode_groups.h" |
| |
| namespace re2 { |
| |
| EOF |
| |
| PrintClasses("perl", @perlclasses); |
| PrintClasses("posix", @posixclasses); |
| |
| print <<EOF; |
| |
| } // namespace re2 |
| EOF |