| # The code here consists of hacks for pre-populating the known.tsv file. |
| |
| from c_analyzer.parser.preprocessor import _iter_clean_lines |
| from c_analyzer.parser.naive import ( |
| iter_variables, parse_variable_declaration, find_variables, |
| ) |
| from c_analyzer.common.known import HEADER as KNOWN_HEADER |
| from c_analyzer.common.info import UNKNOWN, ID |
| from c_analyzer.variables import Variable |
| from c_analyzer.util import write_tsv |
| |
| from . import SOURCE_DIRS, REPO_ROOT |
| from .known import DATA_FILE as KNOWN_FILE |
| from .files import iter_cpython_files |
| |
| |
| POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ') |
| POTS += tuple('const ' + v for v in POTS) |
| STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar') |
| |
| |
| def _parse_global(line, funcname=None): |
| line = line.strip() |
| if line.startswith('static '): |
| if '(' in line and '[' not in line and ' = ' not in line: |
| return None, None |
| name, decl = parse_variable_declaration(line) |
| elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): |
| name, decl = parse_variable_declaration(line) |
| elif line.startswith('_Py_static_string('): |
| decl = line.strip(';').strip() |
| name = line.split('(')[1].split(',')[0].strip() |
| elif line.startswith('_Py_IDENTIFIER('): |
| decl = line.strip(';').strip() |
| name = 'PyId_' + line.split('(')[1].split(')')[0].strip() |
| elif funcname: |
| return None, None |
| |
| # global-only |
| elif line.startswith('PyAPI_DATA('): # only in .h files |
| name, decl = parse_variable_declaration(line) |
| elif line.startswith('extern '): # only in .h files |
| name, decl = parse_variable_declaration(line) |
| elif line.startswith('PyDoc_VAR('): |
| decl = line.strip(';').strip() |
| name = line.split('(')[1].split(')')[0].strip() |
| elif line.startswith(POTS): # implied static |
| if '(' in line and '[' not in line and ' = ' not in line: |
| return None, None |
| name, decl = parse_variable_declaration(line) |
| elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static |
| name, decl = parse_variable_declaration(line) |
| elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static |
| name, decl = parse_variable_declaration(line) |
| elif line.startswith('struct '): |
| if not line.endswith(' = {'): |
| return None, None |
| if not line.partition(' ')[2].startswith(STRUCTS): |
| return None, None |
| # implied static |
| name, decl = parse_variable_declaration(line) |
| |
| # file-specific |
| elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): |
| # Objects/typeobject.c |
| funcname = line.split('(')[1].split(',')[0] |
| return [ |
| ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'), |
| ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'), |
| ] |
| elif line.startswith('WRAP_METHOD('): |
| # Objects/weakrefobject.c |
| funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(',')) |
| return [ |
| ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'), |
| ] |
| |
| else: |
| return None, None |
| return name, decl |
| |
| |
| def _pop_cached(varcache, filename, funcname, name, *, |
| _iter_variables=iter_variables, |
| ): |
| # Look for the file. |
| try: |
| cached = varcache[filename] |
| except KeyError: |
| cached = varcache[filename] = {} |
| for variable in _iter_variables(filename, |
| parse_variable=_parse_global, |
| ): |
| variable._isglobal = True |
| cached[variable.id] = variable |
| for var in cached: |
| print(' ', var) |
| |
| # Look for the variable. |
| if funcname == UNKNOWN: |
| for varid in cached: |
| if varid.name == name: |
| break |
| else: |
| return None |
| return cached.pop(varid) |
| else: |
| return cached.pop((filename, funcname, name), None) |
| |
| |
| def find_matching_variable(varid, varcache, allfilenames, *, |
| _pop_cached=_pop_cached, |
| ): |
| if varid.filename and varid.filename != UNKNOWN: |
| filenames = [varid.filename] |
| else: |
| filenames = allfilenames |
| for filename in filenames: |
| variable = _pop_cached(varcache, filename, varid.funcname, varid.name) |
| if variable is not None: |
| return variable |
| else: |
| if varid.filename and varid.filename != UNKNOWN and varid.funcname is None: |
| for filename in allfilenames: |
| if not filename.endswith('.h'): |
| continue |
| variable = _pop_cached(varcache, filename, None, varid.name) |
| if variable is not None: |
| return variable |
| return None |
| |
| |
| MULTILINE = { |
| # Python/Python-ast.c |
| 'Load_singleton': 'PyObject *', |
| 'Store_singleton': 'PyObject *', |
| 'Del_singleton': 'PyObject *', |
| 'AugLoad_singleton': 'PyObject *', |
| 'AugStore_singleton': 'PyObject *', |
| 'Param_singleton': 'PyObject *', |
| 'And_singleton': 'PyObject *', |
| 'Or_singleton': 'PyObject *', |
| 'Add_singleton': 'static PyObject *', |
| 'Sub_singleton': 'static PyObject *', |
| 'Mult_singleton': 'static PyObject *', |
| 'MatMult_singleton': 'static PyObject *', |
| 'Div_singleton': 'static PyObject *', |
| 'Mod_singleton': 'static PyObject *', |
| 'Pow_singleton': 'static PyObject *', |
| 'LShift_singleton': 'static PyObject *', |
| 'RShift_singleton': 'static PyObject *', |
| 'BitOr_singleton': 'static PyObject *', |
| 'BitXor_singleton': 'static PyObject *', |
| 'BitAnd_singleton': 'static PyObject *', |
| 'FloorDiv_singleton': 'static PyObject *', |
| 'Invert_singleton': 'static PyObject *', |
| 'Not_singleton': 'static PyObject *', |
| 'UAdd_singleton': 'static PyObject *', |
| 'USub_singleton': 'static PyObject *', |
| 'Eq_singleton': 'static PyObject *', |
| 'NotEq_singleton': 'static PyObject *', |
| 'Lt_singleton': 'static PyObject *', |
| 'LtE_singleton': 'static PyObject *', |
| 'Gt_singleton': 'static PyObject *', |
| 'GtE_singleton': 'static PyObject *', |
| 'Is_singleton': 'static PyObject *', |
| 'IsNot_singleton': 'static PyObject *', |
| 'In_singleton': 'static PyObject *', |
| 'NotIn_singleton': 'static PyObject *', |
| # Python/symtable.c |
| 'top': 'static identifier ', |
| 'lambda': 'static identifier ', |
| 'genexpr': 'static identifier ', |
| 'listcomp': 'static identifier ', |
| 'setcomp': 'static identifier ', |
| 'dictcomp': 'static identifier ', |
| '__class__': 'static identifier ', |
| # Python/compile.c |
| '__doc__': 'static PyObject *', |
| '__annotations__': 'static PyObject *', |
| # Objects/floatobject.c |
| 'double_format': 'static float_format_type ', |
| 'float_format': 'static float_format_type ', |
| 'detected_double_format': 'static float_format_type ', |
| 'detected_float_format': 'static float_format_type ', |
| # Python/dtoa.c |
| 'private_mem': 'static double private_mem[PRIVATE_mem]', |
| 'pmem_next': 'static double *', |
| # Modules/_weakref.c |
| 'weakref_functions': 'static PyMethodDef ', |
| } |
| INLINE = { |
| # Modules/_tracemalloc.c |
| 'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ', |
| # Modules/faulthandler.c |
| 'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ', |
| 'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ', |
| # Modules/signalmodule.c |
| 'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]', |
| 'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ', |
| # Python/dynload_shlib.c |
| 'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]', |
| # Objects/obmalloc.c |
| '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ', |
| # Python/bootstrap_hash.c |
| 'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ', |
| } |
| FUNC = { |
| # Objects/object.c |
| '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)', |
| # Parser/myreadline.c |
| 'PyOS_InputHook': 'int (*PyOS_InputHook)(void)', |
| # Python/pylifecycle.c |
| '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)', |
| # Parser/myreadline.c |
| 'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)', |
| } |
| IMPLIED = { |
| # Objects/boolobject.c |
| '_Py_FalseStruct': 'static struct _longobject ', |
| '_Py_TrueStruct': 'static struct _longobject ', |
| # Modules/config.c |
| '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]', |
| } |
| GLOBALS = {} |
| GLOBALS.update(MULTILINE) |
| GLOBALS.update(INLINE) |
| GLOBALS.update(FUNC) |
| GLOBALS.update(IMPLIED) |
| |
| LOCALS = { |
| 'buildinfo': ('Modules/getbuildinfo.c', |
| 'Py_GetBuildInfo', |
| 'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'), |
| 'methods': ('Python/codecs.c', |
| '_PyCodecRegistry_Init', |
| 'static struct { char *name; PyMethodDef def; } methods[]'), |
| } |
| |
| |
| def _known(symbol): |
| if symbol.funcname: |
| if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN: |
| raise KeyError(symbol.name) |
| filename, funcname, decl = LOCALS[symbol.name] |
| varid = ID(filename, funcname, symbol.name) |
| elif not symbol.filename or symbol.filename == UNKNOWN: |
| raise KeyError(symbol.name) |
| else: |
| varid = symbol.id |
| try: |
| decl = GLOBALS[symbol.name] |
| except KeyError: |
| |
| if symbol.name.endswith('_methods'): |
| decl = 'static PyMethodDef ' |
| elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')): |
| decl = 'static PyTypeObject ' |
| else: |
| raise |
| if symbol.name not in decl: |
| decl = decl + symbol.name |
| return Variable(varid, 'static', decl) |
| |
| |
| def known_row(varid, decl): |
| return ( |
| varid.filename, |
| varid.funcname or '-', |
| varid.name, |
| 'variable', |
| decl, |
| ) |
| |
| |
| def known_rows(symbols, *, |
| cached=True, |
| _get_filenames=iter_cpython_files, |
| _find_match=find_matching_variable, |
| _find_symbols=find_variables, |
| _as_known=known_row, |
| ): |
| filenames = list(_get_filenames()) |
| cache = {} |
| if cached: |
| for symbol in symbols: |
| try: |
| found = _known(symbol) |
| except KeyError: |
| found = _find_match(symbol, cache, filenames) |
| if found is None: |
| found = Variable(symbol.id, UNKNOWN, UNKNOWN) |
| yield _as_known(found.id, found.vartype) |
| else: |
| raise NotImplementedError # XXX incorporate KNOWN |
| for variable in _find_symbols(symbols, filenames, |
| srccache=cache, |
| parse_variable=_parse_global, |
| ): |
| #variable = variable._replace( |
| # filename=os.path.relpath(variable.filename, REPO_ROOT)) |
| if variable.funcname == UNKNOWN: |
| print(variable) |
| if variable.vartype== UNKNOWN: |
| print(variable) |
| yield _as_known(variable.id, variable.vartype) |
| |
| |
| def generate(symbols, filename=None, *, |
| _generate_rows=known_rows, |
| _write_tsv=write_tsv, |
| ): |
| if not filename: |
| filename = KNOWN_FILE + '.new' |
| |
| rows = _generate_rows(symbols) |
| _write_tsv(filename, KNOWN_HEADER, rows) |
| |
| |
| if __name__ == '__main__': |
| from c_symbols import binary |
| symbols = binary.iter_symbols( |
| binary.PYTHON, |
| find_local_symbol=None, |
| ) |
| generate(symbols) |