| #include <stdbool.h> |
| |
| #include "Python.h" |
| #include "pycore_flowgraph.h" |
| #include "pycore_compile.h" |
| #include "pycore_pymem.h" // _PyMem_IsPtrFreed() |
| #include "pycore_code.h" // write_location_entry_start() |
| |
| |
| #define DEFAULT_CODE_SIZE 128 |
| #define DEFAULT_LNOTAB_SIZE 16 |
| #define DEFAULT_CNOTAB_SIZE 32 |
| |
| #undef SUCCESS |
| #undef ERROR |
| #define SUCCESS 0 |
| #define ERROR -1 |
| |
| #define RETURN_IF_ERROR(X) \ |
| if ((X) == -1) { \ |
| return ERROR; \ |
| } |
| |
| typedef _PyCompilerSrcLocation location; |
| typedef _PyCfgInstruction cfg_instr; |
| typedef _PyCfgBasicblock basicblock; |
| |
| static inline bool |
| same_location(location a, location b) |
| { |
| return a.lineno == b.lineno && |
| a.end_lineno == b.end_lineno && |
| a.col_offset == b.col_offset && |
| a.end_col_offset == b.end_col_offset; |
| } |
| |
| struct assembler { |
| PyObject *a_bytecode; /* bytes containing bytecode */ |
| int a_offset; /* offset into bytecode */ |
| PyObject *a_except_table; /* bytes containing exception table */ |
| int a_except_table_off; /* offset into exception table */ |
| /* Location Info */ |
| int a_lineno; /* lineno of last emitted instruction */ |
| PyObject* a_linetable; /* bytes containing location info */ |
| int a_location_off; /* offset of last written location info frame */ |
| }; |
| |
| static int |
| assemble_init(struct assembler *a, int firstlineno) |
| { |
| memset(a, 0, sizeof(struct assembler)); |
| a->a_lineno = firstlineno; |
| a->a_linetable = NULL; |
| a->a_location_off = 0; |
| a->a_except_table = NULL; |
| a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE); |
| if (a->a_bytecode == NULL) { |
| goto error; |
| } |
| a->a_linetable = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE); |
| if (a->a_linetable == NULL) { |
| goto error; |
| } |
| a->a_except_table = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); |
| if (a->a_except_table == NULL) { |
| goto error; |
| } |
| return SUCCESS; |
| error: |
| Py_XDECREF(a->a_bytecode); |
| Py_XDECREF(a->a_linetable); |
| Py_XDECREF(a->a_except_table); |
| return ERROR; |
| } |
| |
| static void |
| assemble_free(struct assembler *a) |
| { |
| Py_XDECREF(a->a_bytecode); |
| Py_XDECREF(a->a_linetable); |
| Py_XDECREF(a->a_except_table); |
| } |
| |
| static inline void |
| write_except_byte(struct assembler *a, int byte) { |
| unsigned char *p = (unsigned char *) PyBytes_AS_STRING(a->a_except_table); |
| p[a->a_except_table_off++] = byte; |
| } |
| |
| #define CONTINUATION_BIT 64 |
| |
| static void |
| assemble_emit_exception_table_item(struct assembler *a, int value, int msb) |
| { |
| assert ((msb | 128) == 128); |
| assert(value >= 0 && value < (1 << 30)); |
| if (value >= 1 << 24) { |
| write_except_byte(a, (value >> 24) | CONTINUATION_BIT | msb); |
| msb = 0; |
| } |
| if (value >= 1 << 18) { |
| write_except_byte(a, ((value >> 18)&0x3f) | CONTINUATION_BIT | msb); |
| msb = 0; |
| } |
| if (value >= 1 << 12) { |
| write_except_byte(a, ((value >> 12)&0x3f) | CONTINUATION_BIT | msb); |
| msb = 0; |
| } |
| if (value >= 1 << 6) { |
| write_except_byte(a, ((value >> 6)&0x3f) | CONTINUATION_BIT | msb); |
| msb = 0; |
| } |
| write_except_byte(a, (value&0x3f) | msb); |
| } |
| |
| /* See Objects/exception_handling_notes.txt for details of layout */ |
| #define MAX_SIZE_OF_ENTRY 20 |
| |
| static int |
| assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler) |
| { |
| Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table); |
| if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) { |
| RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, len * 2)); |
| } |
| int size = end-start; |
| assert(end > start); |
| int target = handler->b_offset; |
| int depth = handler->b_startdepth - 1; |
| if (handler->b_preserve_lasti) { |
| depth -= 1; |
| } |
| assert(depth >= 0); |
| int depth_lasti = (depth<<1) | handler->b_preserve_lasti; |
| assemble_emit_exception_table_item(a, start, (1<<7)); |
| assemble_emit_exception_table_item(a, size, 0); |
| assemble_emit_exception_table_item(a, target, 0); |
| assemble_emit_exception_table_item(a, depth_lasti, 0); |
| return SUCCESS; |
| } |
| |
| static int |
| assemble_exception_table(struct assembler *a, basicblock *entryblock) |
| { |
| basicblock *b; |
| int ioffset = 0; |
| basicblock *handler = NULL; |
| int start = -1; |
| for (b = entryblock; b != NULL; b = b->b_next) { |
| ioffset = b->b_offset; |
| for (int i = 0; i < b->b_iused; i++) { |
| cfg_instr *instr = &b->b_instr[i]; |
| if (instr->i_except != handler) { |
| if (handler != NULL) { |
| RETURN_IF_ERROR( |
| assemble_emit_exception_table_entry(a, start, ioffset, handler)); |
| } |
| start = ioffset; |
| handler = instr->i_except; |
| } |
| ioffset += _PyCfg_InstrSize(instr); |
| } |
| } |
| if (handler != NULL) { |
| RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, handler)); |
| } |
| return SUCCESS; |
| } |
| |
| |
| /* Code location emitting code. See locations.md for a description of the format. */ |
| |
| #define MSB 0x80 |
| |
| static void |
| write_location_byte(struct assembler* a, int val) |
| { |
| PyBytes_AS_STRING(a->a_linetable)[a->a_location_off] = val&255; |
| a->a_location_off++; |
| } |
| |
| |
| static uint8_t * |
| location_pointer(struct assembler* a) |
| { |
| return (uint8_t *)PyBytes_AS_STRING(a->a_linetable) + |
| a->a_location_off; |
| } |
| |
| static void |
| write_location_first_byte(struct assembler* a, int code, int length) |
| { |
| a->a_location_off += write_location_entry_start( |
| location_pointer(a), code, length); |
| } |
| |
| static void |
| write_location_varint(struct assembler* a, unsigned int val) |
| { |
| uint8_t *ptr = location_pointer(a); |
| a->a_location_off += write_varint(ptr, val); |
| } |
| |
| |
| static void |
| write_location_signed_varint(struct assembler* a, int val) |
| { |
| uint8_t *ptr = location_pointer(a); |
| a->a_location_off += write_signed_varint(ptr, val); |
| } |
| |
| static void |
| write_location_info_short_form(struct assembler* a, int length, int column, int end_column) |
| { |
| assert(length > 0 && length <= 8); |
| int column_low_bits = column & 7; |
| int column_group = column >> 3; |
| assert(column < 80); |
| assert(end_column >= column); |
| assert(end_column - column < 16); |
| write_location_first_byte(a, PY_CODE_LOCATION_INFO_SHORT0 + column_group, length); |
| write_location_byte(a, (column_low_bits << 4) | (end_column - column)); |
| } |
| |
| static void |
| write_location_info_oneline_form(struct assembler* a, int length, int line_delta, int column, int end_column) |
| { |
| assert(length > 0 && length <= 8); |
| assert(line_delta >= 0 && line_delta < 3); |
| assert(column < 128); |
| assert(end_column < 128); |
| write_location_first_byte(a, PY_CODE_LOCATION_INFO_ONE_LINE0 + line_delta, length); |
| write_location_byte(a, column); |
| write_location_byte(a, end_column); |
| } |
| |
| static void |
| write_location_info_long_form(struct assembler* a, location loc, int length) |
| { |
| assert(length > 0 && length <= 8); |
| write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length); |
| write_location_signed_varint(a, loc.lineno - a->a_lineno); |
| assert(loc.end_lineno >= loc.lineno); |
| write_location_varint(a, loc.end_lineno - loc.lineno); |
| write_location_varint(a, loc.col_offset + 1); |
| write_location_varint(a, loc.end_col_offset + 1); |
| } |
| |
| static void |
| write_location_info_none(struct assembler* a, int length) |
| { |
| write_location_first_byte(a, PY_CODE_LOCATION_INFO_NONE, length); |
| } |
| |
| static void |
| write_location_info_no_column(struct assembler* a, int length, int line_delta) |
| { |
| write_location_first_byte(a, PY_CODE_LOCATION_INFO_NO_COLUMNS, length); |
| write_location_signed_varint(a, line_delta); |
| } |
| |
| #define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */ |
| |
| |
| static int |
| write_location_info_entry(struct assembler* a, location loc, int isize) |
| { |
| Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable); |
| if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) { |
| assert(len > THEORETICAL_MAX_ENTRY_SIZE); |
| RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, len*2)); |
| } |
| if (loc.lineno < 0) { |
| write_location_info_none(a, isize); |
| return SUCCESS; |
| } |
| int line_delta = loc.lineno - a->a_lineno; |
| int column = loc.col_offset; |
| int end_column = loc.end_col_offset; |
| assert(column >= -1); |
| assert(end_column >= -1); |
| if (column < 0 || end_column < 0) { |
| if (loc.end_lineno == loc.lineno || loc.end_lineno == -1) { |
| write_location_info_no_column(a, isize, line_delta); |
| a->a_lineno = loc.lineno; |
| return SUCCESS; |
| } |
| } |
| else if (loc.end_lineno == loc.lineno) { |
| if (line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column) { |
| write_location_info_short_form(a, isize, column, end_column); |
| return SUCCESS; |
| } |
| if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) { |
| write_location_info_oneline_form(a, isize, line_delta, column, end_column); |
| a->a_lineno = loc.lineno; |
| return SUCCESS; |
| } |
| } |
| write_location_info_long_form(a, loc, isize); |
| a->a_lineno = loc.lineno; |
| return SUCCESS; |
| } |
| |
| static int |
| assemble_emit_location(struct assembler* a, location loc, int isize) |
| { |
| if (isize == 0) { |
| return SUCCESS; |
| } |
| while (isize > 8) { |
| RETURN_IF_ERROR(write_location_info_entry(a, loc, 8)); |
| isize -= 8; |
| } |
| return write_location_info_entry(a, loc, isize); |
| } |
| |
| static int |
| assemble_location_info(struct assembler *a, basicblock *entryblock, int firstlineno) |
| { |
| a->a_lineno = firstlineno; |
| location loc = NO_LOCATION; |
| int size = 0; |
| for (basicblock *b = entryblock; b != NULL; b = b->b_next) { |
| for (int j = 0; j < b->b_iused; j++) { |
| if (!same_location(loc, b->b_instr[j].i_loc)) { |
| RETURN_IF_ERROR(assemble_emit_location(a, loc, size)); |
| loc = b->b_instr[j].i_loc; |
| size = 0; |
| } |
| size += _PyCfg_InstrSize(&b->b_instr[j]); |
| } |
| } |
| RETURN_IF_ERROR(assemble_emit_location(a, loc, size)); |
| return SUCCESS; |
| } |
| |
| static void |
| write_instr(_Py_CODEUNIT *codestr, cfg_instr *instruction, int ilen) |
| { |
| int opcode = instruction->i_opcode; |
| assert(!IS_PSEUDO_OPCODE(opcode)); |
| int oparg = instruction->i_oparg; |
| assert(HAS_ARG(opcode) || oparg == 0); |
| int caches = _PyOpcode_Caches[opcode]; |
| switch (ilen - caches) { |
| case 4: |
| codestr->op.code = EXTENDED_ARG; |
| codestr->op.arg = (oparg >> 24) & 0xFF; |
| codestr++; |
| /* fall through */ |
| case 3: |
| codestr->op.code = EXTENDED_ARG; |
| codestr->op.arg = (oparg >> 16) & 0xFF; |
| codestr++; |
| /* fall through */ |
| case 2: |
| codestr->op.code = EXTENDED_ARG; |
| codestr->op.arg = (oparg >> 8) & 0xFF; |
| codestr++; |
| /* fall through */ |
| case 1: |
| codestr->op.code = opcode; |
| codestr->op.arg = oparg & 0xFF; |
| codestr++; |
| break; |
| default: |
| Py_UNREACHABLE(); |
| } |
| while (caches--) { |
| codestr->op.code = CACHE; |
| codestr->op.arg = 0; |
| codestr++; |
| } |
| } |
| |
| /* assemble_emit_instr() |
| Extend the bytecode with a new instruction. |
| Update lnotab if necessary. |
| */ |
| |
| static int |
| assemble_emit_instr(struct assembler *a, cfg_instr *i) |
| { |
| Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); |
| _Py_CODEUNIT *code; |
| |
| int size = _PyCfg_InstrSize(i); |
| if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) { |
| if (len > PY_SSIZE_T_MAX / 2) { |
| return ERROR; |
| } |
| RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, len * 2)); |
| } |
| code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; |
| a->a_offset += size; |
| write_instr(code, i, size); |
| return SUCCESS; |
| } |
| |
| static int |
| assemble_emit(struct assembler *a, basicblock *entryblock, int first_lineno, |
| PyObject *const_cache) |
| { |
| RETURN_IF_ERROR(assemble_init(a, first_lineno)); |
| |
| for (basicblock *b = entryblock; b != NULL; b = b->b_next) { |
| for (int j = 0; j < b->b_iused; j++) { |
| RETURN_IF_ERROR(assemble_emit_instr(a, &b->b_instr[j])); |
| } |
| } |
| |
| RETURN_IF_ERROR(assemble_location_info(a, entryblock, a->a_lineno)); |
| |
| RETURN_IF_ERROR(assemble_exception_table(a, entryblock)); |
| |
| RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, a->a_except_table_off)); |
| RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_except_table)); |
| |
| RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, a->a_location_off)); |
| RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_linetable)); |
| |
| RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, a->a_offset * sizeof(_Py_CODEUNIT))); |
| RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_bytecode)); |
| return SUCCESS; |
| } |
| |
| static PyObject * |
| dict_keys_inorder(PyObject *dict, Py_ssize_t offset) |
| { |
| PyObject *tuple, *k, *v; |
| Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict); |
| |
| tuple = PyTuple_New(size); |
| if (tuple == NULL) |
| return NULL; |
| while (PyDict_Next(dict, &pos, &k, &v)) { |
| i = PyLong_AS_LONG(v); |
| assert((i - offset) < size); |
| assert((i - offset) >= 0); |
| PyTuple_SET_ITEM(tuple, i - offset, Py_NewRef(k)); |
| } |
| return tuple; |
| } |
| |
| // This is in codeobject.c. |
| extern void _Py_set_localsplus_info(int, PyObject *, unsigned char, |
| PyObject *, PyObject *); |
| |
| static void |
| compute_localsplus_info(_PyCompile_CodeUnitMetadata *umd, int nlocalsplus, |
| PyObject *names, PyObject *kinds) |
| { |
| PyObject *k, *v; |
| Py_ssize_t pos = 0; |
| while (PyDict_Next(umd->u_varnames, &pos, &k, &v)) { |
| int offset = (int)PyLong_AS_LONG(v); |
| assert(offset >= 0); |
| assert(offset < nlocalsplus); |
| // For now we do not distinguish arg kinds. |
| _PyLocals_Kind kind = CO_FAST_LOCAL; |
| if (PyDict_GetItem(umd->u_cellvars, k) != NULL) { |
| kind |= CO_FAST_CELL; |
| } |
| _Py_set_localsplus_info(offset, k, kind, names, kinds); |
| } |
| int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames); |
| |
| // This counter mirrors the fix done in fix_cell_offsets(). |
| int numdropped = 0; |
| pos = 0; |
| while (PyDict_Next(umd->u_cellvars, &pos, &k, &v)) { |
| if (PyDict_GetItem(umd->u_varnames, k) != NULL) { |
| // Skip cells that are already covered by locals. |
| numdropped += 1; |
| continue; |
| } |
| int offset = (int)PyLong_AS_LONG(v); |
| assert(offset >= 0); |
| offset += nlocals - numdropped; |
| assert(offset < nlocalsplus); |
| _Py_set_localsplus_info(offset, k, CO_FAST_CELL, names, kinds); |
| } |
| |
| pos = 0; |
| while (PyDict_Next(umd->u_freevars, &pos, &k, &v)) { |
| int offset = (int)PyLong_AS_LONG(v); |
| assert(offset >= 0); |
| offset += nlocals - numdropped; |
| assert(offset < nlocalsplus); |
| _Py_set_localsplus_info(offset, k, CO_FAST_FREE, names, kinds); |
| } |
| } |
| |
| static PyCodeObject * |
| makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_cache, |
| PyObject *constslist, int maxdepth, int nlocalsplus, int code_flags, |
| PyObject *filename) |
| { |
| PyCodeObject *co = NULL; |
| PyObject *names = NULL; |
| PyObject *consts = NULL; |
| PyObject *localsplusnames = NULL; |
| PyObject *localspluskinds = NULL; |
| names = dict_keys_inorder(umd->u_names, 0); |
| if (!names) { |
| goto error; |
| } |
| if (_PyCompile_ConstCacheMergeOne(const_cache, &names) < 0) { |
| goto error; |
| } |
| |
| consts = PyList_AsTuple(constslist); /* PyCode_New requires a tuple */ |
| if (consts == NULL) { |
| goto error; |
| } |
| if (_PyCompile_ConstCacheMergeOne(const_cache, &consts) < 0) { |
| goto error; |
| } |
| |
| assert(umd->u_posonlyargcount < INT_MAX); |
| assert(umd->u_argcount < INT_MAX); |
| assert(umd->u_kwonlyargcount < INT_MAX); |
| int posonlyargcount = (int)umd->u_posonlyargcount; |
| int posorkwargcount = (int)umd->u_argcount; |
| assert(INT_MAX - posonlyargcount - posorkwargcount > 0); |
| int kwonlyargcount = (int)umd->u_kwonlyargcount; |
| |
| localsplusnames = PyTuple_New(nlocalsplus); |
| if (localsplusnames == NULL) { |
| goto error; |
| } |
| localspluskinds = PyBytes_FromStringAndSize(NULL, nlocalsplus); |
| if (localspluskinds == NULL) { |
| goto error; |
| } |
| compute_localsplus_info(umd, nlocalsplus, localsplusnames, localspluskinds); |
| |
| struct _PyCodeConstructor con = { |
| .filename = filename, |
| .name = umd->u_name, |
| .qualname = umd->u_qualname ? umd->u_qualname : umd->u_name, |
| .flags = code_flags, |
| |
| .code = a->a_bytecode, |
| .firstlineno = umd->u_firstlineno, |
| .linetable = a->a_linetable, |
| |
| .consts = consts, |
| .names = names, |
| |
| .localsplusnames = localsplusnames, |
| .localspluskinds = localspluskinds, |
| |
| .argcount = posonlyargcount + posorkwargcount, |
| .posonlyargcount = posonlyargcount, |
| .kwonlyargcount = kwonlyargcount, |
| |
| .stacksize = maxdepth, |
| |
| .exceptiontable = a->a_except_table, |
| }; |
| |
| if (_PyCode_Validate(&con) < 0) { |
| goto error; |
| } |
| |
| if (_PyCompile_ConstCacheMergeOne(const_cache, &localsplusnames) < 0) { |
| goto error; |
| } |
| con.localsplusnames = localsplusnames; |
| |
| co = _PyCode_New(&con); |
| if (co == NULL) { |
| goto error; |
| } |
| |
| error: |
| Py_XDECREF(names); |
| Py_XDECREF(consts); |
| Py_XDECREF(localsplusnames); |
| Py_XDECREF(localspluskinds); |
| return co; |
| } |
| |
| |
| PyCodeObject * |
| _PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache, |
| PyObject *consts, int maxdepth, basicblock *entryblock, |
| int nlocalsplus, int code_flags, PyObject *filename) |
| { |
| PyCodeObject *co = NULL; |
| |
| struct assembler a; |
| int res = assemble_emit(&a, entryblock, umd->u_firstlineno, const_cache); |
| if (res == SUCCESS) { |
| co = makecode(umd, &a, const_cache, consts, maxdepth, nlocalsplus, |
| code_flags, filename); |
| } |
| assemble_free(&a); |
| return co; |
| } |