| #!/usr/bin/perl |
| # Copyright 2008 The RE2 Authors. All Rights Reserved. |
| # Use of this source code is governed by a BSD-style |
| # license that can be found in the LICENSE file. |
| |
| # Generate table entries giving character ranges |
| # for POSIX/Perl character classes. Rather than |
| # figure out what the definition is, it is easier to ask |
| # Perl about each letter from 0-128 and write down |
| # its answer. |
| |
| @posixclasses = ( |
| "[:alnum:]", |
| "[:alpha:]", |
| "[:ascii:]", |
| "[:blank:]", |
| "[:cntrl:]", |
| "[:digit:]", |
| "[:graph:]", |
| "[:lower:]", |
| "[:print:]", |
| "[:punct:]", |
| "[:space:]", |
| "[:upper:]", |
| "[:word:]", |
| "[:xdigit:]", |
| ); |
| |
| @perlclasses = ( |
| "\\d", |
| "\\s", |
| "\\w", |
| ); |
| |
| sub ComputeClass($) { |
| my @ranges; |
| my ($class) = @_; |
| my $regexp = "[$class]"; |
| my $start = -1; |
| for (my $i=0; $i<=129; $i++) { |
| if ($i == 129) { $i = 256; } |
| if ($i <= 128 && chr($i) =~ $regexp) { |
| if ($start < 0) { |
| $start = $i; |
| } |
| } else { |
| if ($start >= 0) { |
| push @ranges, [$start, $i-1]; |
| } |
| $start = -1; |
| } |
| } |
| return @ranges; |
| } |
| |
| sub PrintClass($$@) { |
| my ($cname, $name, @ranges) = @_; |
| print "static URange16 code${cname}[] = { /* $name */\n"; |
| for (my $i=0; $i<@ranges; $i++) { |
| my @a = @{$ranges[$i]}; |
| printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; |
| } |
| print "};\n"; |
| my $n = @ranges; |
| my $escname = $name; |
| $escname =~ s/\\/\\\\/g; |
| $negname = $escname; |
| if ($negname =~ /:/) { |
| $negname =~ s/:/:^/; |
| } else { |
| $negname =~ y/a-z/A-Z/; |
| } |
| return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname, $n }"; |
| } |
| |
| my $gen = 0; |
| |
| sub PrintClasses($@) { |
| my ($cname, @classes) = @_; |
| my @entries; |
| foreach my $cl (@classes) { |
| my @ranges = ComputeClass($cl); |
| push @entries, PrintClass(++$gen, $cl, @ranges); |
| } |
| print "UGroup ${cname}_groups[] = {\n"; |
| foreach my $e (@entries) { |
| print "\t$e,\n"; |
| } |
| print "};\n"; |
| my $count = @entries; |
| print "int num_${cname}_groups = $count;\n"; |
| } |
| |
| print <<EOF; |
| // GENERATED BY make_perl_groups.pl; DO NOT EDIT. |
| // make_perl_groups.pl >perl_groups.cc |
| |
| #include "re2/unicode_groups.h" |
| |
| namespace re2 { |
| |
| EOF |
| |
| PrintClasses("perl", @perlclasses); |
| PrintClasses("posix", @posixclasses); |
| |
| print <<EOF; |
| |
| } // namespace re2 |
| EOF |