#! /usr/bin/env python | |
""" | |
combinerefs path | |
A helper for analyzing PYTHONDUMPREFS output. | |
When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown | |
time Py_Finalize() prints the list of all live objects twice: first it | |
prints the repr() of each object while the interpreter is still fully intact. | |
After cleaning up everything it can, it prints all remaining live objects | |
again, but the second time just prints their addresses, refcounts, and type | |
names (because the interpreter has been torn down, calling repr methods at | |
this point can get into infinite loops or blow up). | |
Save all this output into a file, then run this script passing the path to | |
that file. The script finds both output chunks, combines them, then prints | |
a line of output for each object still alive at the end: | |
address refcnt typename repr | |
address is the address of the object, in whatever format the platform C | |
produces for a %p format code. | |
refcnt is of the form | |
"[" ref "]" | |
when the object's refcount is the same in both PYTHONDUMPREFS output blocks, | |
or | |
"[" ref_before "->" ref_after "]" | |
if the refcount changed. | |
typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS | |
output block. | |
repr is repr(object), extracted from the first PYTHONDUMPREFS output block. | |
CAUTION: If object is a container type, it may not actually contain all the | |
objects shown in the repr: the repr was captured from the first output block, | |
and some of the containees may have been released since then. For example, | |
it's common for the line showing the dict of interned strings to display | |
strings that no longer exist at the end of Py_Finalize; this can be recognized | |
(albeit painfully) because such containees don't have a line of their own. | |
The objects are listed in allocation order, with most-recently allocated | |
printed first, and the first object allocated printed last. | |
Simple examples: | |
00857060 [14] str '__len__' | |
The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS | |
output blocks said there were 14 references to it. This is probably due to | |
C modules that intern the string "__len__" and keep a reference to it in a | |
file static. | |
00857038 [46->5] tuple () | |
46-5 = 41 references to the empty tuple were removed by the cleanup actions | |
between the times PYTHONDUMPREFS produced output. | |
00858028 [1025->1456] str '<dummy key>' | |
The string '<dummy key>', which is used in dictobject.c to overwrite a real | |
key that gets deleted, grew several hundred references during cleanup. It | |
suggests that stuff did get removed from dicts by cleanup, but that the dicts | |
themselves are staying alive for some reason. """ | |
import re | |
import sys | |
# Generate lines from fileiter. If whilematch is true, continue reading | |
# while the regexp object pat matches line. If whilematch is false, lines | |
# are read so long as pat doesn't match them. In any case, the first line | |
# that doesn't match pat (when whilematch is true), or that does match pat | |
# (when whilematch is false), is lost, and fileiter will resume at the line | |
# following it. | |
def read(fileiter, pat, whilematch): | |
for line in fileiter: | |
if bool(pat.match(line)) == whilematch: | |
yield line | |
else: | |
break | |
def combine(fname): | |
f = file(fname) | |
fi = iter(f) | |
for line in read(fi, re.compile(r'^Remaining objects:$'), False): | |
pass | |
crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)') | |
addr2rc = {} | |
addr2guts = {} | |
before = 0 | |
for line in read(fi, re.compile(r'^Remaining object addresses:$'), False): | |
m = crack.match(line) | |
if m: | |
addr, addr2rc[addr], addr2guts[addr] = m.groups() | |
before += 1 | |
else: | |
print '??? skipped:', line | |
after = 0 | |
for line in read(fi, crack, True): | |
after += 1 | |
m = crack.match(line) | |
assert m | |
addr, rc, guts = m.groups() # guts is type name here | |
if addr not in addr2rc: | |
print '??? new object created while tearing down:', line.rstrip() | |
continue | |
print addr, | |
if rc == addr2rc[addr]: | |
print '[%s]' % rc, | |
else: | |
print '[%s->%s]' % (addr2rc[addr], rc), | |
print guts, addr2guts[addr] | |
f.close() | |
print "%d objects before, %d after" % (before, after) | |
if __name__ == '__main__': | |
combine(sys.argv[1]) |