| /* |
| * Copyright 2011 Christoph Bumiller |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF |
| * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include "nv50/codegen/nv50_ir.h" |
| #include "nv50/codegen/nv50_ir_target.h" |
| |
| namespace nv50_ir { |
| |
| const uint8_t Target::operationSrcNr[OP_LAST + 1] = |
| { |
| 0, 0, // NOP, PHI |
| 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT |
| 1, 1, 2, // MOV, LOAD, STORE |
| 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD |
| 1, 1, 1, // ABS, NEG, NOT |
| 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR |
| 2, 2, 1, // MAX, MIN, SAT |
| 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT |
| 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT |
| 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2 |
| 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW |
| 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, |
| 0, 0, 0, // PRERET,CONT,BREAK |
| 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR |
| 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP |
| 1, 1, // EMIT, RESTART |
| 1, 1, 1, // TEX, TXB, TXL, |
| 1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA |
| 1, 2, // SULD, SUST |
| 1, 1, // DFDX, DFDY |
| 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP |
| 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR |
| 0 |
| }; |
| |
| const OpClass Target::operationClass[OP_LAST + 1] = |
| { |
| // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT |
| OPCLASS_OTHER, |
| OPCLASS_PSEUDO, |
| OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, |
| // MOV; LOAD; STORE |
| OPCLASS_MOVE, |
| OPCLASS_LOAD, |
| OPCLASS_STORE, |
| // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD |
| OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, |
| OPCLASS_ARITH, OPCLASS_ARITH, |
| OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, |
| // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR |
| OPCLASS_CONVERT, OPCLASS_CONVERT, |
| OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, |
| OPCLASS_SHIFT, OPCLASS_SHIFT, |
| // MAX, MIN |
| OPCLASS_COMPARE, OPCLASS_COMPARE, |
| // SAT, CEIL, FLOOR, TRUNC; CVT |
| OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, |
| OPCLASS_CONVERT, |
| // SET(AND,OR,XOR); SELP, SLCT |
| OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, |
| OPCLASS_COMPARE, OPCLASS_COMPARE, |
| // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW |
| OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, |
| OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, |
| OPCLASS_SFU, OPCLASS_SFU, |
| // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN |
| OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, |
| OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, |
| OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, |
| // DISCARD, EXIT |
| OPCLASS_FLOW, OPCLASS_FLOW, |
| // MEMBAR |
| OPCLASS_OTHER, |
| // VFETCH, PFETCH, EXPORT |
| OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE, |
| // LINTERP, PINTERP |
| OPCLASS_SFU, OPCLASS_SFU, |
| // EMIT, RESTART |
| OPCLASS_OTHER, OPCLASS_OTHER, |
| // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA |
| OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, |
| OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, |
| // SULD, SUST |
| OPCLASS_SURFACE, OPCLASS_SURFACE, |
| // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP |
| OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, |
| OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, |
| // POPCNT, INSBF, EXTBF |
| OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, |
| // TEXBAR |
| OPCLASS_OTHER, |
| OPCLASS_PSEUDO // LAST |
| }; |
| |
| |
| extern Target *getTargetNVC0(unsigned int chipset); |
| extern Target *getTargetNV50(unsigned int chipset); |
| |
| Target *Target::create(unsigned int chipset) |
| { |
| switch (chipset & 0xf0) { |
| case 0xc0: |
| case 0xd0: |
| case 0xe0: |
| return getTargetNVC0(chipset); |
| case 0x50: |
| case 0x80: |
| case 0x90: |
| case 0xa0: |
| return getTargetNV50(chipset); |
| default: |
| ERROR("unsupported target: NV%x\n", chipset); |
| return 0; |
| } |
| } |
| |
| void Target::destroy(Target *targ) |
| { |
| delete targ; |
| } |
| |
| CodeEmitter::CodeEmitter(const Target *target) : targ(target) |
| { |
| } |
| |
| void |
| CodeEmitter::setCodeLocation(void *ptr, uint32_t size) |
| { |
| code = reinterpret_cast<uint32_t *>(ptr); |
| codeSize = 0; |
| codeSizeLimit = size; |
| } |
| |
| void |
| CodeEmitter::printBinary() const |
| { |
| uint32_t *bin = code - codeSize / 4; |
| INFO("program binary (%u bytes)", codeSize); |
| for (unsigned int pos = 0; pos < codeSize / 4; ++pos) { |
| if ((pos % 8) == 0) |
| INFO("\n"); |
| INFO("%08x ", bin[pos]); |
| } |
| INFO("\n"); |
| } |
| |
| static inline uint32_t sizeToBundlesNVE4(uint32_t size) |
| { |
| return (size + 55) / 56; |
| } |
| |
| void |
| CodeEmitter::prepareEmission(Program *prog) |
| { |
| for (ArrayList::Iterator fi = prog->allFuncs.iterator(); |
| !fi.end(); fi.next()) { |
| Function *func = reinterpret_cast<Function *>(fi.get()); |
| func->binPos = prog->binSize; |
| prepareEmission(func); |
| |
| // adjust sizes & positions for schedulding info: |
| if (prog->getTarget()->hasSWSched) { |
| BasicBlock *bb = NULL; |
| for (int i = 0; i < func->bbCount; ++i) { |
| bb = func->bbArray[i]; |
| const uint32_t oldPos = bb->binPos; |
| const uint32_t oldEnd = bb->binPos + bb->binSize; |
| uint32_t adjPos = oldPos + sizeToBundlesNVE4(oldPos) * 8; |
| uint32_t adjEnd = oldEnd + sizeToBundlesNVE4(oldEnd) * 8; |
| bb->binPos = adjPos; |
| bb->binSize = adjEnd - adjPos; |
| } |
| if (bb) |
| func->binSize = bb->binPos + bb->binSize; |
| } |
| |
| prog->binSize += func->binSize; |
| } |
| } |
| |
| void |
| CodeEmitter::prepareEmission(Function *func) |
| { |
| func->bbCount = 0; |
| func->bbArray = new BasicBlock * [func->cfg.getSize()]; |
| |
| BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos; |
| |
| for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next()) |
| prepareEmission(BasicBlock::get(*it)); |
| } |
| |
| void |
| CodeEmitter::prepareEmission(BasicBlock *bb) |
| { |
| Instruction *i, *next; |
| Function *func = bb->getFunction(); |
| int j; |
| unsigned int nShort; |
| |
| for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); |
| |
| for (; j >= 0; --j) { |
| BasicBlock *in = func->bbArray[j]; |
| Instruction *exit = in->getExit(); |
| |
| if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { |
| in->binSize -= 8; |
| func->binSize -= 8; |
| |
| for (++j; j < func->bbCount; ++j) |
| func->bbArray[j]->binPos -= 8; |
| |
| in->remove(exit); |
| } |
| bb->binPos = in->binPos + in->binSize; |
| if (in->binSize) // no more no-op branches to bb |
| break; |
| } |
| func->bbArray[func->bbCount++] = bb; |
| |
| if (!bb->getExit()) |
| return; |
| |
| // determine encoding size, try to group short instructions |
| nShort = 0; |
| for (i = bb->getEntry(); i; i = next) { |
| next = i->next; |
| |
| i->encSize = getMinEncodingSize(i); |
| if (next && i->encSize < 8) |
| ++nShort; |
| else |
| if ((nShort & 1) && next && getMinEncodingSize(next) == 4) { |
| if (i->isCommutationLegal(i->next)) { |
| bb->permuteAdjacent(i, next); |
| next->encSize = 4; |
| next = i; |
| i = i->prev; |
| ++nShort; |
| } else |
| if (i->isCommutationLegal(i->prev) && next->next) { |
| bb->permuteAdjacent(i->prev, i); |
| next->encSize = 4; |
| next = next->next; |
| bb->binSize += 4; |
| ++nShort; |
| } else { |
| i->encSize = 8; |
| i->prev->encSize = 8; |
| bb->binSize += 4; |
| nShort = 0; |
| } |
| } else { |
| i->encSize = 8; |
| if (nShort & 1) { |
| i->prev->encSize = 8; |
| bb->binSize += 4; |
| } |
| nShort = 0; |
| } |
| bb->binSize += i->encSize; |
| } |
| |
| if (bb->getExit()->encSize == 4) { |
| assert(nShort); |
| bb->getExit()->encSize = 8; |
| bb->binSize += 4; |
| |
| if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) { |
| bb->binSize += 8; |
| bb->getExit()->prev->encSize = 8; |
| } |
| } |
| assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8)); |
| |
| func->binSize += bb->binSize; |
| } |
| |
| void |
| Program::emitSymbolTable(struct nv50_ir_prog_info *info) |
| { |
| unsigned int n = 0, nMax = allFuncs.getSize(); |
| |
| info->bin.syms = |
| (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms)); |
| |
| for (ArrayList::Iterator fi = allFuncs.iterator(); |
| !fi.end(); |
| fi.next(), ++n) { |
| Function *f = (Function *)fi.get(); |
| assert(n < nMax); |
| |
| info->bin.syms[n].label = f->getLabel(); |
| info->bin.syms[n].offset = f->binPos; |
| } |
| |
| info->bin.numSyms = n; |
| } |
| |
| bool |
| Program::emitBinary(struct nv50_ir_prog_info *info) |
| { |
| CodeEmitter *emit = target->getCodeEmitter(progType); |
| |
| emit->prepareEmission(this); |
| |
| if (dbgFlags & NV50_IR_DEBUG_BASIC) |
| this->print(); |
| |
| if (!binSize) { |
| code = NULL; |
| return false; |
| } |
| code = reinterpret_cast<uint32_t *>(MALLOC(binSize)); |
| if (!code) |
| return false; |
| emit->setCodeLocation(code, binSize); |
| |
| for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { |
| Function *fn = reinterpret_cast<Function *>(fi.get()); |
| |
| assert(emit->getCodeSize() == fn->binPos); |
| |
| for (int b = 0; b < fn->bbCount; ++b) |
| for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) |
| emit->emitInstruction(i); |
| } |
| info->bin.relocData = emit->getRelocInfo(); |
| |
| emitSymbolTable(info); |
| |
| // the nvc0 driver will print the binary iself together with the header |
| if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) |
| emit->printBinary(); |
| |
| delete emit; |
| return true; |
| } |
| |
| #define RELOC_ALLOC_INCREMENT 8 |
| |
| bool |
| CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, |
| int s) |
| { |
| unsigned int n = relocInfo ? relocInfo->count : 0; |
| |
| if (!(n % RELOC_ALLOC_INCREMENT)) { |
| size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry); |
| relocInfo = reinterpret_cast<RelocInfo *>( |
| REALLOC(relocInfo, n ? size : 0, |
| size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry))); |
| if (!relocInfo) |
| return false; |
| if (n == 0) |
| memset(relocInfo, 0, sizeof(RelocInfo)); |
| } |
| ++relocInfo->count; |
| |
| relocInfo->entry[n].data = data; |
| relocInfo->entry[n].mask = m; |
| relocInfo->entry[n].offset = codeSize + w * 4; |
| relocInfo->entry[n].bitPos = s; |
| relocInfo->entry[n].type = ty; |
| |
| return true; |
| } |
| |
| void |
| RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const |
| { |
| uint32_t value = 0; |
| |
| switch (type) { |
| case TYPE_CODE: value = info->codePos; break; |
| case TYPE_BUILTIN: value = info->libPos; break; |
| case TYPE_DATA: value = info->dataPos; break; |
| default: |
| assert(0); |
| break; |
| } |
| value += data; |
| value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos); |
| |
| binary[offset / 4] &= ~mask; |
| binary[offset / 4] |= value & mask; |
| } |
| |
| } // namespace nv50_ir |
| |
| |
| #include "nv50/codegen/nv50_ir_driver.h" |
| |
| extern "C" { |
| |
| void |
| nv50_ir_relocate_code(void *relocData, uint32_t *code, |
| uint32_t codePos, |
| uint32_t libPos, |
| uint32_t dataPos) |
| { |
| nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData); |
| |
| info->codePos = codePos; |
| info->libPos = libPos; |
| info->dataPos = dataPos; |
| |
| for (unsigned int i = 0; i < info->count; ++i) |
| info->entry[i].apply(code, info); |
| } |
| |
| void |
| nv50_ir_get_target_library(uint32_t chipset, |
| const uint32_t **code, uint32_t *size) |
| { |
| nv50_ir::Target *targ = nv50_ir::Target::create(chipset); |
| targ->getBuiltinCode(code, size); |
| nv50_ir::Target::destroy(targ); |
| } |
| |
| } |