blob: 5f1d91fcb585f6062c1d9444b45acb01400063c7 [file] [log] [blame]
#!/usr/bin/env python
from __future__ import absolute_import, division, print_function
import argparse
import os.path as path
def read_tests(f):
basename, _ = path.splitext(path.basename(f))
tests = []
prev_pattern = None
for lineno, line in enumerate(open(f), 1):
fields = list(filter(None, map(str.strip, line.split('\t'))))
if not (4 <= len(fields) <= 5) \
or 'E' not in fields[0] or fields[0][0] == '#':
continue
terse_opts, pat, text, sgroups = fields[0:4]
groups = [] # groups as integer ranges
if sgroups == 'NOMATCH':
groups = []
elif ',' in sgroups:
noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
for g in noparen:
s, e = map(str.strip, g.split(','))
groups.append([int(s), int(e)])
break
else:
# This skips tests that should result in an error.
# There aren't many, so I think we can just capture those
# manually. Possibly fix this in future.
continue
opts = []
if text == "NULL":
text = ""
if pat == 'SAME':
pat = prev_pattern
if '$' in terse_opts:
pat = pat.encode('utf-8').decode('unicode_escape')
text = text.encode('utf-8').decode('unicode_escape')
text = text.encode('unicode_escape').decode('utf-8')
opts.append('escaped')
else:
opts.append('escaped')
text = text.encode('unicode_escape').decode('utf-8')
if 'i' in terse_opts:
opts.append('case-insensitive')
pat = pat.encode('unicode_escape').decode('utf-8')
pat = pat.replace('\\\\', '\\')
tests.append({
'name': '"%s%d"' % (basename, lineno),
'options': repr(opts),
'pattern': "'''%s'''" % pat,
'input': "'''%s'''" % text,
'matches': str(groups),
})
prev_pattern = pat
return tests
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Generate match tests from an AT&T POSIX test file.')
aa = parser.add_argument
aa('datfile', help='A dat AT&T POSIX test file.')
args = parser.parse_args()
tests = read_tests(args.datfile)
for t in tests:
print('[[tests]]')
for k, v in t.items():
print('%s = %s' % (k, v))
print('')