#! /usr/bin/env python | |
# Fix Python source files to use the new equality test operator, i.e., | |
# if x = y: ... | |
# is changed to | |
# if x == y: ... | |
# The script correctly tokenizes the Python program to reliably | |
# distinguish between assignments and equality tests. | |
# | |
# Command line arguments are files or directories to be processed. | |
# Directories are searched recursively for files whose name looks | |
# like a python module. | |
# Symbolic links are always ignored (except as explicit directory | |
# arguments). Of course, the original file is kept as a back-up | |
# (with a "~" attached to its name). | |
# It complains about binaries (files containing null bytes) | |
# and about files that are ostensibly not Python files: if the first | |
# line starts with '#!' and does not contain the string 'python'. | |
# | |
# Changes made are reported to stdout in a diff-like format. | |
# | |
# Undoubtedly you can do this using find and sed or perl, but this is | |
# a nice example of Python code that recurses down a directory tree | |
# and uses regular expressions. Also note several subtleties like | |
# preserving the file's mode and avoiding to even write a temp file | |
# when no changes are needed for a file. | |
# | |
# NB: by changing only the function fixline() you can turn this | |
# into a program for a different change to Python programs... | |
import sys | |
import re | |
import os | |
from stat import * | |
import string | |
err = sys.stderr.write | |
dbg = err | |
rep = sys.stdout.write | |
def main(): | |
bad = 0 | |
if not sys.argv[1:]: # No arguments | |
err('usage: ' + sys.argv[0] + ' file-or-directory ...\n') | |
sys.exit(2) | |
for arg in sys.argv[1:]: | |
if os.path.isdir(arg): | |
if recursedown(arg): bad = 1 | |
elif os.path.islink(arg): | |
err(arg + ': will not process symbolic links\n') | |
bad = 1 | |
else: | |
if fix(arg): bad = 1 | |
sys.exit(bad) | |
ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$') | |
def ispython(name): | |
return ispythonprog.match(name) >= 0 | |
def recursedown(dirname): | |
dbg('recursedown(%r)\n' % (dirname,)) | |
bad = 0 | |
try: | |
names = os.listdir(dirname) | |
except os.error, msg: | |
err('%s: cannot list directory: %r\n' % (dirname, msg)) | |
return 1 | |
names.sort() | |
subdirs = [] | |
for name in names: | |
if name in (os.curdir, os.pardir): continue | |
fullname = os.path.join(dirname, name) | |
if os.path.islink(fullname): pass | |
elif os.path.isdir(fullname): | |
subdirs.append(fullname) | |
elif ispython(name): | |
if fix(fullname): bad = 1 | |
for fullname in subdirs: | |
if recursedown(fullname): bad = 1 | |
return bad | |
def fix(filename): | |
## dbg('fix(%r)\n' % (dirname,)) | |
try: | |
f = open(filename, 'r') | |
except IOError, msg: | |
err('%s: cannot open: %r\n' % (filename, msg)) | |
return 1 | |
head, tail = os.path.split(filename) | |
tempname = os.path.join(head, '@' + tail) | |
g = None | |
# If we find a match, we rewind the file and start over but | |
# now copy everything to a temp file. | |
lineno = 0 | |
while 1: | |
line = f.readline() | |
if not line: break | |
lineno = lineno + 1 | |
if g is None and '\0' in line: | |
# Check for binary files | |
err(filename + ': contains null bytes; not fixed\n') | |
f.close() | |
return 1 | |
if lineno == 1 and g is None and line[:2] == '#!': | |
# Check for non-Python scripts | |
words = string.split(line[2:]) | |
if words and re.search('[pP]ython', words[0]) < 0: | |
msg = filename + ': ' + words[0] | |
msg = msg + ' script; not fixed\n' | |
err(msg) | |
f.close() | |
return 1 | |
while line[-2:] == '\\\n': | |
nextline = f.readline() | |
if not nextline: break | |
line = line + nextline | |
lineno = lineno + 1 | |
newline = fixline(line) | |
if newline != line: | |
if g is None: | |
try: | |
g = open(tempname, 'w') | |
except IOError, msg: | |
f.close() | |
err('%s: cannot create: %r\n' % (tempname, msg)) | |
return 1 | |
f.seek(0) | |
lineno = 0 | |
rep(filename + ':\n') | |
continue # restart from the beginning | |
rep(repr(lineno) + '\n') | |
rep('< ' + line) | |
rep('> ' + newline) | |
if g is not None: | |
g.write(newline) | |
# End of file | |
f.close() | |
if not g: return 0 # No changes | |
# Finishing touch -- move files | |
# First copy the file's mode to the temp file | |
try: | |
statbuf = os.stat(filename) | |
os.chmod(tempname, statbuf[ST_MODE] & 07777) | |
except os.error, msg: | |
err('%s: warning: chmod failed (%r)\n' % (tempname, msg)) | |
# Then make a backup of the original file as filename~ | |
try: | |
os.rename(filename, filename + '~') | |
except os.error, msg: | |
err('%s: warning: backup failed (%r)\n' % (filename, msg)) | |
# Now move the temp file to the original file | |
try: | |
os.rename(tempname, filename) | |
except os.error, msg: | |
err('%s: rename failed (%r)\n' % (filename, msg)) | |
return 1 | |
# Return succes | |
return 0 | |
from tokenize import tokenprog | |
match = {'if':':', 'elif':':', 'while':':', 'return':'\n', \ | |
'(':')', '[':']', '{':'}', '`':'`'} | |
def fixline(line): | |
# Quick check for easy case | |
if '=' not in line: return line | |
i, n = 0, len(line) | |
stack = [] | |
while i < n: | |
j = tokenprog.match(line, i) | |
if j < 0: | |
# A bad token; forget about the rest of this line | |
print '(Syntax error:)' | |
print line, | |
return line | |
a, b = tokenprog.regs[3] # Location of the token proper | |
token = line[a:b] | |
i = i+j | |
if stack and token == stack[-1]: | |
del stack[-1] | |
elif match.has_key(token): | |
stack.append(match[token]) | |
elif token == '=' and stack: | |
line = line[:a] + '==' + line[b:] | |
i, n = a + len('=='), len(line) | |
elif token == '==' and not stack: | |
print '(Warning: \'==\' at top level:)' | |
print line, | |
return line | |
if __name__ == "__main__": | |
main() |