blob: 0e5784b491163352960c0377b8f102a9e1810660 [file] [log] [blame]
/*
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdarg.h>
#include <stdbool.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <string.h>
#include <assert.h>
#include <signal.h>
#include <errno.h>
#include "redump.h"
#include "disasm.h"
#include "script.h"
#include "rnnutil.h"
#include "buffers.h"
#include "cffdec.h"
/* ************************************************************************* */
/* originally based on kernel recovery dump code: */
static const struct cffdec_options *options;
static bool needs_wfi = false;
static bool summary = false;
static bool in_summary = false;
static int vertices;
static inline unsigned regcnt(void)
{
if (options->gpu_id >= 500)
return 0xffff;
else
return 0x7fff;
}
static int is_64b(void)
{
return options->gpu_id >= 500;
}
static int draws[3];
static struct {
uint64_t base;
uint32_t size; /* in dwords */
/* Generally cmdstream consists of multiple IB calls to different
* buffers, which are themselves often re-used for each tile. The
* triggered flag serves two purposes to help make it more clear
* what part of the cmdstream is before vs after the the GPU hang:
*
* 1) if in IB2 we are passed the point within the IB2 buffer where
* the GPU hung, but IB1 is not passed the point within its
* buffer where the GPU had hung, then we know the GPU hang
* happens on a future use of that IB2 buffer.
*
* 2) if in an IB1 or IB2 buffer that is not the one where the GPU
* hung, but we've already passed the trigger point at the same
* IB level, we know that we are passed the point where the GPU
* had hung.
*
* So this is a one way switch, false->true. And a higher #'d
* IB level isn't considered triggered unless the lower #'d IB
* level is.
*/
bool triggered;
} ibs[4];
static int ib;
static int draw_count;
static int current_draw_count;
/* query mode.. to handle symbolic register name queries, we need to
* defer parsing query string until after gpu_id is know and rnn db
* loaded:
*/
static int *queryvals;
static bool
quiet(int lvl)
{
if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count))
return true;
if ((lvl >= 3) && (summary || options->querystrs || options->script))
return true;
if ((lvl >= 2) && (options->querystrs || options->script))
return true;
return false;
}
void
printl(int lvl, const char *fmt, ...)
{
va_list args;
if (quiet(lvl))
return;
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
}
static const char *levels[] = {
"\t",
"\t\t",
"\t\t\t",
"\t\t\t\t",
"\t\t\t\t\t",
"\t\t\t\t\t\t",
"\t\t\t\t\t\t\t",
"\t\t\t\t\t\t\t\t",
"\t\t\t\t\t\t\t\t\t",
"x",
"x",
"x",
"x",
"x",
"x",
};
enum state_src_t {
STATE_SRC_DIRECT,
STATE_SRC_INDIRECT,
STATE_SRC_BINDLESS,
};
/* SDS (CP_SET_DRAW_STATE) helpers: */
static void load_all_groups(int level);
static void disable_all_groups(void);
static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level);
static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
static bool
highlight_gpuaddr(uint64_t gpuaddr)
{
if (!options->color)
return false;
if (!options->ibs[ib].base)
return false;
if ((ib > 0) && options->ibs[ib-1].base && !ibs[ib-1].triggered)
return false;
if (ibs[ib].triggered)
return true;
if (options->ibs[ib].base != ibs[ib].base)
return false;
uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
ibs[ib].triggered |= triggered;
if (triggered)
printf("ESTIMATED CRASH LOCATION!\n");
return triggered;
}
static void
dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
{
int i, j;
int lastzero = 1;
if (quiet(2))
return;
for (i = 0; i < sizedwords; i += 8) {
int zero = 1;
/* always show first row: */
if (i == 0)
zero = 0;
for (j = 0; (j < 8) && (i+j < sizedwords) && zero; j++)
if (dwords[i+j])
zero = 0;
if (zero && !lastzero)
printf("*\n");
lastzero = zero;
if (zero)
continue;
uint64_t addr = gpuaddr(&dwords[i]);
bool highlight = highlight_gpuaddr(addr);
if (highlight)
printf("\x1b[0;1;31m");
if (is_64b()) {
printf("%016lx:%s", addr, levels[level]);
} else {
printf("%08x:%s", (uint32_t)addr, levels[level]);
}
if (highlight)
printf("\x1b[0m");
printf("%04x:", i * 4);
for (j = 0; (j < 8) && (i+j < sizedwords); j++) {
printf(" %08x", dwords[i+j]);
}
printf("\n");
}
}
static void
dump_float(float *dwords, uint32_t sizedwords, int level)
{
int i;
for (i = 0; i < sizedwords; i++) {
if ((i % 8) == 0) {
if (is_64b()) {
printf("%016lx:%s", gpuaddr(dwords), levels[level]);
} else {
printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
}
} else {
printf(" ");
}
printf("%8f", *(dwords++));
if ((i % 8) == 7)
printf("\n");
}
if (i % 8)
printf("\n");
}
/* I believe the surface format is low bits:
#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
comments in sys2gmem_tex_const indicate that address is [31:12], but
looks like at least some of the bits above the format have different meaning..
*/
static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr,
uint32_t *flags, uint32_t mask)
{
assert(!is_64b()); /* this is only used on a2xx */
*gpuaddr = dword & ~mask;
*flags = dword & mask;
}
static uint32_t type0_reg_vals[0xffff + 1];
static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals)/8]; /* written since last draw */
static uint8_t type0_reg_written[sizeof(type0_reg_vals)/8];
static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
static bool reg_rewritten(uint32_t regbase)
{
return !!(type0_reg_rewritten[regbase/8] & (1 << (regbase % 8)));
}
bool reg_written(uint32_t regbase)
{
return !!(type0_reg_written[regbase/8] & (1 << (regbase % 8)));
}
static void clear_rewritten(void)
{
memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
}
static void clear_written(void)
{
memset(type0_reg_written, 0, sizeof(type0_reg_written));
clear_rewritten();
}
uint32_t reg_lastval(uint32_t regbase)
{
return lastvals[regbase];
}
static void
clear_lastvals(void)
{
memset(lastvals, 0, sizeof(lastvals));
}
uint32_t
reg_val(uint32_t regbase)
{
return type0_reg_vals[regbase];
}
void
reg_set(uint32_t regbase, uint32_t val)
{
assert(regbase < regcnt());
type0_reg_vals[regbase] = val;
type0_reg_written[regbase/8] |= (1 << (regbase % 8));
type0_reg_rewritten[regbase/8] |= (1 << (regbase % 8));
}
static void
reg_dump_scratch(const char *name, uint32_t dword, int level)
{
unsigned r;
if (quiet(3))
return;
r = regbase("CP_SCRATCH[0].REG");
// if not, try old a2xx/a3xx version:
if (!r)
r = regbase("CP_SCRATCH_REG0");
if (!r)
return;
printf("%s:%u,%u,%u,%u\n", levels[level],
reg_val(r + 4), reg_val(r + 5),
reg_val(r + 6), reg_val(r + 7));
}
static void
dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
{
void *buf;
if (quiet(quietlvl))
return;
buf = hostptr(gpuaddr);
if (buf) {
dump_hex(buf, sizedwords, level+1);
}
}
static void
dump_gpuaddr(uint64_t gpuaddr, int level)
{
dump_gpuaddr_size(gpuaddr, level, 64, 3);
}
static void
reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
{
dump_gpuaddr(dword, level);
}
uint32_t gpuaddr_lo;
static void
reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
{
gpuaddr_lo = dword;
}
static void
reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
{
dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
}
static void
dump_shader(const char *ext, void *buf, int bufsz)
{
if (options->dump_shaders) {
static int n = 0;
char filename[8];
int fd;
sprintf(filename, "%04d.%s", n++, ext);
fd = open(filename, O_WRONLY| O_TRUNC | O_CREAT, 0644);
write(fd, buf, bufsz);
close(fd);
}
}
static void
disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
{
void *buf;
gpuaddr &= 0xfffffffffffffff0;
if (quiet(3))
return;
buf = hostptr(gpuaddr);
if (buf) {
uint32_t sizedwords = hostlen(gpuaddr) / 4;
const char *ext;
dump_hex(buf, min(64, sizedwords), level+1);
disasm_a3xx(buf, sizedwords, level+2, stdout, options->gpu_id);
/* this is a bit ugly way, but oh well.. */
if (strstr(name, "SP_VS_OBJ")) {
ext = "vo3";
} else if (strstr(name, "SP_FS_OBJ")) {
ext = "fo3";
} else if (strstr(name, "SP_GS_OBJ")) {
ext = "go3";
} else if (strstr(name, "SP_CS_OBJ")) {
ext = "co3";
} else {
ext = NULL;
}
if (ext)
dump_shader(ext, buf, sizedwords * 4);
}
}
static void
reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
{
disasm_gpuaddr(name, dword, level);
}
static void
reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
{
disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
}
/* Find the value of the TEX_COUNT register that corresponds to the named
* TEX_SAMP/TEX_CONST reg.
*
* Note, this kinda assumes an equal # of samplers and textures, but not
* really sure if there is a much better option. I suppose on a6xx we
* could instead decode the bitfields in SP_xS_CONFIG
*/
static int
get_tex_count(const char *name)
{
char count_reg[strlen(name) + 5];
char *p;
p = strstr(name, "CONST");
if (!p)
p = strstr(name, "SAMP");
if (!p)
return 0;
int n = p - name;
strncpy(count_reg, name, n);
strcpy(count_reg + n, "COUNT");
return reg_val(regbase(count_reg));
}
static void
reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
{
if (!in_summary)
return;
int num_unit = get_tex_count(name);
uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
void *buf = hostptr(gpuaddr);
if (!buf)
return;
dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level+1);
}
static void
reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
{
if (!in_summary)
return;
int num_unit = get_tex_count(name);
uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
void *buf = hostptr(gpuaddr);
if (!buf)
return;
dump_tex_const(buf, num_unit, level+1);
}
/*
* Registers with special handling (rnndec_decode() handles rest):
*/
#define REG(x, fxn) { #x, fxn }
static struct {
const char *regname;
void (*fxn)(const char *name, uint32_t dword, int level);
uint32_t regbase;
} reg_a2xx[] = {
REG(CP_SCRATCH_REG0, reg_dump_scratch),
REG(CP_SCRATCH_REG1, reg_dump_scratch),
REG(CP_SCRATCH_REG2, reg_dump_scratch),
REG(CP_SCRATCH_REG3, reg_dump_scratch),
REG(CP_SCRATCH_REG4, reg_dump_scratch),
REG(CP_SCRATCH_REG5, reg_dump_scratch),
REG(CP_SCRATCH_REG6, reg_dump_scratch),
REG(CP_SCRATCH_REG7, reg_dump_scratch),
{NULL},
}, reg_a3xx[] = {
REG(CP_SCRATCH_REG0, reg_dump_scratch),
REG(CP_SCRATCH_REG1, reg_dump_scratch),
REG(CP_SCRATCH_REG2, reg_dump_scratch),
REG(CP_SCRATCH_REG3, reg_dump_scratch),
REG(CP_SCRATCH_REG4, reg_dump_scratch),
REG(CP_SCRATCH_REG5, reg_dump_scratch),
REG(CP_SCRATCH_REG6, reg_dump_scratch),
REG(CP_SCRATCH_REG7, reg_dump_scratch),
REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
{NULL},
}, reg_a4xx[] = {
REG(CP_SCRATCH[0].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
{NULL},
}, reg_a5xx[] = {
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
// REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
// REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
// REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
// REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
// REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
// REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
// REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
// REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
// REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
// REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
// REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
// REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
// REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
// REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
// REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
// REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
// REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
// REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
// REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
// REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
// REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
// REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
// REG(RB_2D_DST_LO, reg_gpuaddr_lo),
// REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
// REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
// REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
{NULL},
}, reg_a6xx[] = {
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
REG(SP_VS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(SP_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(SP_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(SP_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(SP_HS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(SP_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(SP_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(SP_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(SP_DS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(SP_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(SP_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(SP_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(SP_GS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(SP_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(SP_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(SP_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(SP_FS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(SP_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(SP_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(SP_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
REG(SP_CS_TEX_CONST_LO, reg_gpuaddr_lo),
REG(SP_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
REG(SP_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
REG(SP_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
{NULL},
}, *type0_reg;
static struct rnn *rnn;
static void
init_rnn(const char *gpuname)
{
rnn = rnn_new(!options->color);
rnn_load(rnn, gpuname);
if (options->querystrs) {
int i;
queryvals = calloc(options->nquery, sizeof(queryvals[0]));
for (i = 0; i < options->nquery; i++) {
int val = strtol(options->querystrs[i], NULL, 0);
if (val == 0)
val = regbase(options->querystrs[i]);
queryvals[i] = val;
printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
}
}
for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
if (!type0_reg[idx].regbase) {
printf("invalid register name: %s\n", type0_reg[idx].regname);
exit(1);
}
}
}
void
reset_regs(void)
{
clear_written();
clear_lastvals();
memset(&ibs, 0, sizeof(ibs));
}
void
cffdec_init(const struct cffdec_options *_options)
{
options = _options;
summary = options->summary;
/* in case we're decoding multiple files: */
free(queryvals);
reset_regs();
draw_count = 0;
/* TODO we need an API to free/cleanup any previous rnn */
switch (options->gpu_id) {
case 200 ... 299:
type0_reg = reg_a2xx;
init_rnn("a2xx");
break;
case 300 ... 399:
type0_reg = reg_a3xx;
init_rnn("a3xx");
break;
case 400 ... 499:
type0_reg = reg_a4xx;
init_rnn("a4xx");
break;
case 500 ... 599:
type0_reg = reg_a5xx;
init_rnn("a5xx");
break;
case 600 ... 699:
type0_reg = reg_a6xx;
init_rnn("a6xx");
break;
default:
errx(-1, "unsupported gpu");
}
}
const char *
pktname(unsigned opc)
{
return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
}
const char *
regname(uint32_t regbase, int color)
{
return rnn_regname(rnn, regbase, color);
}
uint32_t
regbase(const char *name)
{
return rnn_regbase(rnn, name);
}
static int
endswith(uint32_t regbase, const char *suffix)
{
const char *name = regname(regbase, 0);
const char *s = strstr(name, suffix);
if (!s)
return 0;
return (s - strlen(name) + strlen(suffix)) == name;
}
void
dump_register_val(uint32_t regbase, uint32_t dword, int level)
{
struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
if (info && info->typeinfo) {
uint64_t gpuaddr = 0;
char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
printf("%s%s: %s", levels[level], info->name, decoded);
/* Try and figure out if we are looking at a gpuaddr.. this
* might be useful for other gen's too, but at least a5xx has
* the _HI/_LO suffix we can look for. Maybe a better approach
* would be some special annotation in the xml..
*/
if (options->gpu_id >= 500) {
if (endswith(regbase, "_HI") && endswith(regbase-1, "_LO")) {
gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase-1);
} else if (endswith(regbase, "_LO") && endswith(regbase+1, "_HI")) {
gpuaddr = (((uint64_t)reg_val(regbase+1)) << 32) | dword;
}
}
if (gpuaddr && hostptr(gpuaddr)) {
printf("\t\tbase=%lx, offset=%lu, size=%u",
gpubaseaddr(gpuaddr),
gpuaddr - gpubaseaddr(gpuaddr),
hostlen(gpubaseaddr(gpuaddr)));
}
printf("\n");
free(decoded);
} else if (info) {
printf("%s%s: %08x\n", levels[level], info->name, dword);
} else {
printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
}
if (info) {
free(info->name);
free(info);
}
}
static void
dump_register(uint32_t regbase, uint32_t dword, int level)
{
if (!quiet(3)) {
dump_register_val(regbase, dword, level);
}
for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
if (type0_reg[idx].regbase == regbase) {
type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
break;
}
}
}
static bool
is_banked_reg(uint32_t regbase)
{
return (0x2000 <= regbase) && (regbase < 0x2400);
}
static void
dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level)
{
while (sizedwords--) {
int last_summary = summary;
/* access to non-banked registers needs a WFI:
* TODO banked register range for a2xx??
*/
if (needs_wfi && !is_banked_reg(regbase))
printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
reg_set(regbase, *dwords);
dump_register(regbase, *dwords, level);
regbase++;
dwords++;
summary = last_summary;
}
}
static void
dump_domain(uint32_t *dwords, uint32_t sizedwords, int level,
const char *name)
{
struct rnndomain *dom;
int i;
dom = rnn_finddomain(rnn->db, name);
if (!dom)
return;
if (script_packet)
script_packet(dwords, sizedwords, rnn, dom);
if (quiet(2))
return;
for (i = 0; i < sizedwords; i++) {
struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
char *decoded;
if (!(info && info->typeinfo))
break;
uint64_t value = dwords[i];
if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
value |= (uint64_t) dwords[i + 1] << 32;
i++; /* skip the next dword since we're printing it now */
}
decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
/* Unlike the register printing path, we don't print the name
* of the register, so if it doesn't contain other named
* things (i.e. it isn't a bitset) then print the register
* name as if it's a bitset with a single entry. This avoids
* having to create a dummy register with a single entry to
* get a name in the decoding.
*/
if (info->typeinfo->type == RNN_TTYPE_BITSET ||
info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
printf("%s%s\n", levels[level], decoded);
} else {
printf("%s{ %s%s%s = %s }\n", levels[level],
rnn->vc->colors->rname, info->name,
rnn->vc->colors->reset, decoded);
}
free(decoded);
free(info->name);
free(info);
}
}
static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
static unsigned mode;
static const char *render_mode;
static enum {
MODE_BINNING = 0x1,
MODE_GMEM = 0x2,
MODE_BYPASS = 0x4,
MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
} enable_mask = MODE_ALL;
static bool skip_ib2_enable_global;
static bool skip_ib2_enable_local;
static void
print_mode(int level)
{
if ((options->gpu_id >= 500) && !quiet(2)) {
printf("%smode: %s\n", levels[level], render_mode);
printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local);
}
}
static bool
skip_query(void)
{
switch (options->query_mode) {
case QUERY_ALL:
/* never skip: */
return false;
case QUERY_WRITTEN:
for (int i = 0; i < options->nquery; i++) {
uint32_t regbase = queryvals[i];
if (!reg_written(regbase)) {
continue;
}
if (reg_rewritten(regbase)) {
return false;
}
}
return true;
case QUERY_DELTA:
for (int i = 0; i < options->nquery; i++) {
uint32_t regbase = queryvals[i];
if (!reg_written(regbase)) {
continue;
}
uint32_t lastval = reg_val(regbase);
if (lastval != lastvals[regbase]) {
return false;
}
}
return true;
}
return true;
}
static void
__do_query(const char *primtype, uint32_t num_indices)
{
int n = 0;
if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
bin_x1 = scissor_tl & 0xffff;
bin_y1 = scissor_tl >> 16;
bin_x2 = scissor_br & 0xffff;
bin_y2 = scissor_br >> 16;
}
for (int i = 0; i < options->nquery; i++) {
uint32_t regbase = queryvals[i];
if (reg_written(regbase)) {
uint32_t lastval = reg_val(regbase);
printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype,
bin_x1, bin_y1, bin_x2, bin_y2, num_indices);
if (options->gpu_id >= 500)
printf("%s:", render_mode);
printf("\t%08x", lastval);
if (lastval != lastvals[regbase]) {
printf("!");
} else {
printf(" ");
}
if (reg_rewritten(regbase)) {
printf("+");
} else {
printf(" ");
}
dump_register_val(regbase, lastval, 0);
n++;
}
}
if (n > 1)
printf("\n");
}
static void
do_query_compare(const char *primtype, uint32_t num_indices)
{
unsigned saved_enable_mask = enable_mask;
const char *saved_render_mode = render_mode;
/* in 'query-compare' mode, we want to see if the register is writtten
* or changed in any mode:
*
* (NOTE: this could cause false-positive for 'query-delta' if the reg
* is written with different values in binning vs sysmem/gmem mode, as
* we don't track previous values per-mode, but I think we can live with
* that)
*/
enable_mask = MODE_ALL;
clear_rewritten();
load_all_groups(0);
if (!skip_query()) {
/* dump binning pass values: */
enable_mask = MODE_BINNING;
render_mode = "BINNING";
clear_rewritten();
load_all_groups(0);
__do_query(primtype, num_indices);
/* dump draw pass values: */
enable_mask = MODE_GMEM | MODE_BYPASS;
render_mode = "DRAW";
clear_rewritten();
load_all_groups(0);
__do_query(primtype, num_indices);
printf("\n");
}
enable_mask = saved_enable_mask;
render_mode = saved_render_mode;
disable_all_groups();
}
/* well, actually query and script..
* NOTE: call this before dump_register_summary()
*/
static void
do_query(const char *primtype, uint32_t num_indices)
{
if (script_draw)
script_draw(primtype, num_indices);
if (options->query_compare) {
do_query_compare(primtype, num_indices);
return;
}
if (skip_query())
return;
__do_query(primtype, num_indices);
}
static void
cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t start = dwords[1] >> 16;
uint32_t size = dwords[1] & 0xffff;
const char *type = NULL, *ext = NULL;
gl_shader_stage disasm_type;
switch (dwords[0]) {
case 0:
type = "vertex";
ext = "vo";
disasm_type = MESA_SHADER_VERTEX;
break;
case 1:
type = "fragment";
ext = "fo";
disasm_type = MESA_SHADER_FRAGMENT;
break;
default:
type = "<unknown>";
disasm_type = 0;
break;
}
printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size);
disasm_a2xx(dwords + 2, sizedwords - 2, level+2, disasm_type);
/* dump raw shader: */
if (ext)
dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
}
static void
cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t reg = dwords[0] & 0xffff;
int i;
for (i = 1; i < sizedwords; i++) {
dump_register(reg, dwords[i], level+1);
reg_set(reg, dwords[i]);
reg++;
}
}
enum state_t {
TEX_SAMP = 1,
TEX_CONST,
TEX_MIPADDR, /* a3xx only */
SHADER_PROG,
SHADER_CONST,
// image/ssbo state:
SSBO_0,
SSBO_1,
SSBO_2,
UBO,
// unknown things, just to hexdumps:
UNKNOWN_DWORDS,
UNKNOWN_2DWORDS,
UNKNOWN_4DWORDS,
};
enum adreno_state_block {
SB_VERT_TEX = 0,
SB_VERT_MIPADDR = 1,
SB_FRAG_TEX = 2,
SB_FRAG_MIPADDR = 3,
SB_VERT_SHADER = 4,
SB_GEOM_SHADER = 5,
SB_FRAG_SHADER = 6,
SB_COMPUTE_SHADER = 7,
};
/* TODO there is probably a clever way to let rnndec parse things so
* we don't have to care about packet format differences across gens
*/
static void
a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
enum state_src_t *src)
{
unsigned state_block_id = (dwords[0] >> 19) & 0x7;
unsigned state_type = dwords[1] & 0x3;
static const struct {
gl_shader_stage stage;
enum state_t state;
} lookup[0xf][0x3] = {
[SB_VERT_TEX][0] = { MESA_SHADER_VERTEX, TEX_SAMP },
[SB_VERT_TEX][1] = { MESA_SHADER_VERTEX, TEX_CONST },
[SB_FRAG_TEX][0] = { MESA_SHADER_FRAGMENT, TEX_SAMP },
[SB_FRAG_TEX][1] = { MESA_SHADER_FRAGMENT, TEX_CONST },
[SB_VERT_SHADER][0] = { MESA_SHADER_VERTEX, SHADER_PROG },
[SB_VERT_SHADER][1] = { MESA_SHADER_VERTEX, SHADER_CONST },
[SB_FRAG_SHADER][0] = { MESA_SHADER_FRAGMENT, SHADER_PROG },
[SB_FRAG_SHADER][1] = { MESA_SHADER_FRAGMENT, SHADER_CONST },
};
*stage = lookup[state_block_id][state_type].stage;
*state = lookup[state_block_id][state_type].state;
unsigned state_src = (dwords[0] >> 16) & 0x7;
if (state_src == 0 /* SS_DIRECT */)
*src = STATE_SRC_DIRECT;
else
*src = STATE_SRC_INDIRECT;
}
static enum state_src_t
_get_state_src(unsigned dword0)
{
switch ((dword0 >> 16) & 0x3) {
case 0: /* SS4_DIRECT / SS6_DIRECT */
return STATE_SRC_DIRECT;
case 2: /* SS4_INDIRECT / SS6_INDIRECT */
return STATE_SRC_INDIRECT;
case 1: /* SS6_BINDLESS */
return STATE_SRC_BINDLESS;
default:
return STATE_SRC_DIRECT;
}
}
static void
_get_state_type(unsigned state_block_id, unsigned state_type,
gl_shader_stage *stage, enum state_t *state)
{
static const struct {
gl_shader_stage stage;
enum state_t state;
} lookup[0x10][0x4] = {
// SB4_VS_TEX:
[0x0][0] = { MESA_SHADER_VERTEX, TEX_SAMP },
[0x0][1] = { MESA_SHADER_VERTEX, TEX_CONST },
[0x0][2] = { MESA_SHADER_VERTEX, UBO },
// SB4_HS_TEX:
[0x1][0] = { MESA_SHADER_TESS_CTRL, TEX_SAMP },
[0x1][1] = { MESA_SHADER_TESS_CTRL, TEX_CONST },
[0x1][2] = { MESA_SHADER_TESS_CTRL, UBO },
// SB4_DS_TEX:
[0x2][0] = { MESA_SHADER_TESS_EVAL, TEX_SAMP },
[0x2][1] = { MESA_SHADER_TESS_EVAL, TEX_CONST },
[0x2][2] = { MESA_SHADER_TESS_EVAL, UBO },
// SB4_GS_TEX:
[0x3][0] = { MESA_SHADER_GEOMETRY, TEX_SAMP },
[0x3][1] = { MESA_SHADER_GEOMETRY, TEX_CONST },
[0x3][2] = { MESA_SHADER_GEOMETRY, UBO },
// SB4_FS_TEX:
[0x4][0] = { MESA_SHADER_FRAGMENT, TEX_SAMP },
[0x4][1] = { MESA_SHADER_FRAGMENT, TEX_CONST },
[0x4][2] = { MESA_SHADER_FRAGMENT, UBO },
// SB4_CS_TEX:
[0x5][0] = { MESA_SHADER_COMPUTE, TEX_SAMP },
[0x5][1] = { MESA_SHADER_COMPUTE, TEX_CONST },
[0x5][2] = { MESA_SHADER_COMPUTE, UBO },
// SB4_VS_SHADER:
[0x8][0] = { MESA_SHADER_VERTEX, SHADER_PROG },
[0x8][1] = { MESA_SHADER_VERTEX, SHADER_CONST },
[0x8][2] = { MESA_SHADER_VERTEX, UBO },
// SB4_HS_SHADER
[0x9][0] = { MESA_SHADER_TESS_CTRL, SHADER_PROG },
[0x9][1] = { MESA_SHADER_TESS_CTRL, SHADER_CONST },
[0x9][2] = { MESA_SHADER_TESS_CTRL, UBO },
// SB4_DS_SHADER
[0xa][0] = { MESA_SHADER_TESS_EVAL, SHADER_PROG },
[0xa][1] = { MESA_SHADER_TESS_EVAL, SHADER_CONST },
[0xa][2] = { MESA_SHADER_TESS_EVAL, UBO },
// SB4_GS_SHADER
[0xb][0] = { MESA_SHADER_GEOMETRY, SHADER_PROG },
[0xb][1] = { MESA_SHADER_GEOMETRY, SHADER_CONST },
[0xb][2] = { MESA_SHADER_GEOMETRY, UBO },
// SB4_FS_SHADER:
[0xc][0] = { MESA_SHADER_FRAGMENT, SHADER_PROG },
[0xc][1] = { MESA_SHADER_FRAGMENT, SHADER_CONST },
[0xc][2] = { MESA_SHADER_FRAGMENT, UBO },
// SB4_CS_SHADER:
[0xd][0] = { MESA_SHADER_COMPUTE, SHADER_PROG },
[0xd][1] = { MESA_SHADER_COMPUTE, SHADER_CONST },
[0xd][2] = { MESA_SHADER_COMPUTE, UBO },
[0xd][3] = { MESA_SHADER_COMPUTE, SSBO_0 }, /* a6xx location */
// SB4_SSBO (shared across all stages)
[0xe][0] = { 0, SSBO_0 }, /* a5xx (and a4xx?) location */
[0xe][1] = { 0, SSBO_1 },
[0xe][2] = { 0, SSBO_2 },
// SB4_CS_SSBO
[0xf][0] = { MESA_SHADER_COMPUTE, SSBO_0 },
[0xf][1] = { MESA_SHADER_COMPUTE, SSBO_1 },
[0xf][2] = { MESA_SHADER_COMPUTE, SSBO_2 },
// unknown things
/* This looks like combined UBO state for 3d stages (a5xx and
* before?? I think a6xx has UBO state per shader stage:
*/
[0x6][2] = { 0, UBO },
[0x7][1] = { 0, UNKNOWN_2DWORDS },
};
*stage = lookup[state_block_id][state_type].stage;
*state = lookup[state_block_id][state_type].state;
}
static void
a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
enum state_src_t *src)
{
unsigned state_block_id = (dwords[0] >> 18) & 0xf;
unsigned state_type = dwords[1] & 0x3;
_get_state_type(state_block_id, state_type, stage, state);
*src = _get_state_src(dwords[0]);
}
static void
a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
enum state_src_t *src)
{
unsigned state_block_id = (dwords[0] >> 18) & 0xf;
unsigned state_type = (dwords[0] >> 14) & 0x3;
_get_state_type(state_block_id, state_type, stage, state);
*src = _get_state_src(dwords[0]);
}
static void
dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
{
for (int i = 0; i < num_unit; i++) {
/* work-around to reduce noise for opencl blob which always
* writes the max # regardless of # of textures used
*/
if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
break;
if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
dump_domain(texsamp, 2, level+2, "A3XX_TEX_SAMP");
dump_hex(texsamp, 2, level+1);
texsamp += 2;
} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
dump_domain(texsamp, 2, level+2, "A4XX_TEX_SAMP");
dump_hex(texsamp, 2, level+1);
texsamp += 2;
} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
dump_domain(texsamp, 4, level+2, "A5XX_TEX_SAMP");
dump_hex(texsamp, 4, level+1);
texsamp += 4;
} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
dump_domain(texsamp, 4, level+2, "A6XX_TEX_SAMP");
dump_hex(texsamp, 4, level+1);
texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
}
}
}
static void
dump_tex_const(uint32_t *texconst, int num_unit, int level)
{
for (int i = 0; i < num_unit; i++) {
/* work-around to reduce noise for opencl blob which always
* writes the max # regardless of # of textures used
*/
if ((num_unit == 16) &&
(texconst[0] == 0) && (texconst[1] == 0) &&
(texconst[2] == 0) && (texconst[3] == 0))
break;
if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
dump_domain(texconst, 4, level+2, "A3XX_TEX_CONST");
dump_hex(texconst, 4, level+1);
texconst += 4;
} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
dump_domain(texconst, 8, level+2, "A4XX_TEX_CONST");
if (options->dump_textures) {
uint32_t addr = texconst[4] & ~0x1f;
dump_gpuaddr(addr, level-2);
}
dump_hex(texconst, 8, level+1);
texconst += 8;
} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
dump_domain(texconst, 12, level+2, "A5XX_TEX_CONST");
if (options->dump_textures) {
uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
}
dump_hex(texconst, 12, level+1);
texconst += 12;
} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
dump_domain(texconst, 16, level+2, "A6XX_TEX_CONST");
if (options->dump_textures) {
uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
}
dump_hex(texconst, 16, level+1);
texconst += 16;
}
}
}
static void
cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
{
gl_shader_stage stage;
enum state_t state;
enum state_src_t src;
uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
uint64_t ext_src_addr;
void *contents;
int i;
if (quiet(2) && !options->script)
return;
if (options->gpu_id >= 600)
a6xx_get_state_type(dwords, &stage, &state, &src);
else if (options->gpu_id >= 400)
a4xx_get_state_type(dwords, &stage, &state, &src);
else
a3xx_get_state_type(dwords, &stage, &state, &src);
switch (src) {
case STATE_SRC_DIRECT: ext_src_addr = 0; break;
case STATE_SRC_INDIRECT:
if (is_64b()) {
ext_src_addr = dwords[1] & 0xfffffffc;
ext_src_addr |= ((uint64_t)dwords[2]) << 32;
} else {
ext_src_addr = dwords[1] & 0xfffffffc;
}
break;
case STATE_SRC_BINDLESS: {
const unsigned base_reg =
stage == MESA_SHADER_COMPUTE ?
regbase("HLSQ_CS_BINDLESS_BASE[0]") :
regbase("HLSQ_BINDLESS_BASE[0]");
if (is_64b()) {
const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
ext_src_addr = reg_val(reg) & 0xfffffffc;
ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
} else {
const unsigned reg = base_reg + (dwords[1] >> 28);
ext_src_addr = reg_val(reg) & 0xfffffffc;
}
ext_src_addr += 4 * (dwords[1] & 0xffffff);
break;
}
}
if (ext_src_addr)
contents = hostptr(ext_src_addr);
else
contents = is_64b() ? dwords + 3 : dwords + 2;
if (!contents)
return;
switch (state) {
case SHADER_PROG: {
const char *ext = NULL;
if (quiet(2))
return;
if (options->gpu_id >= 400)
num_unit *= 16;
else if (options->gpu_id >= 300)
num_unit *= 4;
/* shaders:
*
* note: num_unit seems to be # of instruction groups, where
* an instruction group has 4 64bit instructions.
*/
if (stage == MESA_SHADER_VERTEX) {
ext = "vo3";
} else if (stage == MESA_SHADER_GEOMETRY) {
ext = "go3";
} else if (stage == MESA_SHADER_COMPUTE) {
ext = "co3";
} else if (stage == MESA_SHADER_FRAGMENT){
ext = "fo3";
}
if (contents)
disasm_a3xx(contents, num_unit * 2, level+2, stdout, options->gpu_id);
/* dump raw shader: */
if (ext)
dump_shader(ext, contents, num_unit * 2 * 4);
break;
}
case SHADER_CONST: {
if (quiet(2))
return;
/* uniforms/consts:
*
* note: num_unit seems to be # of pairs of dwords??
*/
if (options->gpu_id >= 400)
num_unit *= 2;
dump_float(contents, num_unit*2, level+1);
dump_hex(contents, num_unit*2, level+1);
break;
}
case TEX_MIPADDR: {
uint32_t *addrs = contents;
if (quiet(2))
return;
/* mipmap consts block just appears to be array of num_unit gpu addr's: */
for (i = 0; i < num_unit; i++) {
void *ptr = hostptr(addrs[i]);
printf("%s%2d: %08x\n", levels[level+1], i, addrs[i]);
if (options->dump_textures) {
printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
dump_hex(ptr, hostlen(addrs[i])/4, level+1);
}
}
break;
}
case TEX_SAMP: {
dump_tex_samp(contents, src, num_unit, level);
break;
}
case TEX_CONST: {
dump_tex_const(contents, num_unit, level);
break;
}
case SSBO_0: {
uint32_t *ssboconst = (uint32_t *)contents;
for (i = 0; i < num_unit; i++) {
int sz = 4;
if (400 <= options->gpu_id && options->gpu_id < 500) {
dump_domain(ssboconst, 4, level+2, "A4XX_SSBO_0");
} else if (500 <= options->gpu_id && options->gpu_id < 600) {
dump_domain(ssboconst, 4, level+2, "A5XX_SSBO_0");
} else if (600 <= options->gpu_id && options->gpu_id < 700) {
sz = 16;
dump_domain(ssboconst, 16, level+2, "A6XX_IBO");
}
dump_hex(ssboconst, sz, level+1);
ssboconst += sz;
}
break;
}
case SSBO_1: {
uint32_t *ssboconst = (uint32_t *)contents;
for (i = 0; i < num_unit; i++) {
if (400 <= options->gpu_id && options->gpu_id < 500)
dump_domain(ssboconst, 2, level+2, "A4XX_SSBO_1");
else if (500 <= options->gpu_id && options->gpu_id < 600)
dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_1");
dump_hex(ssboconst, 2, level+1);
ssboconst += 2;
}
break;
}
case SSBO_2: {
uint32_t *ssboconst = (uint32_t *)contents;
for (i = 0; i < num_unit; i++) {
/* TODO a4xx and a5xx might be same: */
if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_2");
dump_hex(ssboconst, 2, level+1);
}
if (options->dump_textures) {
uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
}
ssboconst += 2;
}
break;
}
case UBO: {
uint32_t *uboconst = (uint32_t *)contents;
for (i = 0; i < num_unit; i++) {
// TODO probably similar on a4xx..
if (500 <= options->gpu_id && options->gpu_id < 600)
dump_domain(uboconst, 2, level+2, "A5XX_UBO");
else if (600 <= options->gpu_id && options->gpu_id < 700)
dump_domain(uboconst, 2, level+2, "A6XX_UBO");
dump_hex(uboconst, 2, level+1);
uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
}
break;
}
case UNKNOWN_DWORDS: {
if (quiet(2))
return;
dump_hex(contents, num_unit, level+1);
break;
}
case UNKNOWN_2DWORDS: {
if (quiet(2))
return;
dump_hex(contents, num_unit * 2, level+1);
break;
}
case UNKNOWN_4DWORDS: {
if (quiet(2))
return;
dump_hex(contents, num_unit * 4, level+1);
break;
}
default:
if (quiet(2))
return;
/* hmm.. */
dump_hex(contents, num_unit, level+1);
break;
}
}
static void
cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
{
bin_x1 = dwords[1] & 0xffff;
bin_y1 = dwords[1] >> 16;
bin_x2 = dwords[2] & 0xffff;
bin_y2 = dwords[2] >> 16;
}
static void
dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
{
uint32_t w, h, p;
uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
static const char *filter[] = {
"point", "bilinear", "bicubic",
};
static const char *clamp[] = {
"wrap", "mirror", "clamp-last-texel",
};
static const char swiznames[] = "xyzw01??";
/* see sys2gmem_tex_const[] in adreno_a2xxx.c */
/* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
* RFMode=ZeroClamp-1, Dim=1:2d, pitch
*/
p = (dwords[0] >> 22) << 5;
clamp_x = (dwords[0] >> 10) & 0x3;
clamp_y = (dwords[0] >> 13) & 0x3;
clamp_z = (dwords[0] >> 16) & 0x3;
/* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
* NearestClamp=1:OGL Mode
*/
parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
/* Width, Height, EndianSwap=0:None */
w = (dwords[2] & 0x1fff) + 1;
h = ((dwords[2] >> 13) & 0x1fff) + 1;
/* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
* Mip=2:BaseMap
*/
mag = (dwords[3] >> 19) & 0x3;
min = (dwords[3] >> 21) & 0x3;
swiz = (dwords[3] >> 1) & 0xfff;
/* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
* Dim3d=0
*/
// XXX
/* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
* Dim=1:2d, MipPacking=0
*/
parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
printf("%sset texture const %04x\n", levels[level], val);
printf("%sclamp x/y/z: %s/%s/%s\n", levels[level+1],
clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]);
printf("%sfilter min/mag: %s/%s\n", levels[level+1], filter[min], filter[mag]);
printf("%sswizzle: %c%c%c%c\n", levels[level+1],
swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
levels[level+1], gpuaddr, flags, w, h, p,
rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
printf("%smipaddr=%08x (flags=%03x)\n", levels[level+1],
mip_gpuaddr, mip_flags);
}
static void
dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
{
int i;
printf("%sset shader const %04x\n", levels[level], val);
for (i = 0; i < sizedwords; ) {
uint32_t gpuaddr, flags;
parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
void *addr = hostptr(gpuaddr);
if (addr) {
const char * fmt =
rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
uint32_t size = dwords[i++];
printf("%saddr=%08x, size=%d, format=%s\n", levels[level+1],
gpuaddr, size, fmt);
// TODO maybe dump these as bytes instead of dwords?
size = (size + 3) / 4; // for now convert to dwords
dump_hex(addr, min(size, 64), level + 1);
if (size > min(size, 64))
printf("%s\t\t...\n", levels[level+1]);
dump_float(addr, min(size, 64), level + 1);
if (size > min(size, 64))
printf("%s\t\t...\n", levels[level+1]);
}
}
}
static void
cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t val = dwords[0] & 0xffff;
switch((dwords[0] >> 16) & 0xf) {
case 0x0:
dump_float((float *)(dwords+1), sizedwords-1, level+1);
break;
case 0x1:
/* need to figure out how const space is partitioned between
* attributes, textures, etc..
*/
if (val < 0x78) {
dump_a2xx_tex_const(dwords+1, sizedwords-1, val, level);
} else {
dump_a2xx_shader_const(dwords+1, sizedwords-1, val, level);
}
break;
case 0x2:
printf("%sset bool const %04x\n", levels[level], val);
break;
case 0x3:
printf("%sset loop const %04x\n", levels[level], val);
break;
case 0x4:
val += 0x2000;
if (dwords[0] & 0x80000000) {
uint32_t srcreg = dwords[1];
uint32_t dstval = dwords[2];
/* TODO: not sure what happens w/ payload != 2.. */
assert(sizedwords == 3);
assert(srcreg < ARRAY_SIZE(type0_reg_vals));
/* note: rnn_regname uses a static buf so we can't do
* two regname() calls for one printf..
*/
printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
dstval += type0_reg_vals[srcreg];
dump_registers(val, &dstval, 1, level+1);
} else {
dump_registers(val, dwords+1, sizedwords-1, level+1);
}
break;
}
}
static void dump_register_summary(int level);
static void
cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
{
const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
printl(2, "%sevent %s\n", levels[level], name);
if (name && (options->gpu_id > 500)) {
char eventname[64];
snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
if (!strcmp(name, "BLIT")) {
do_query(eventname, 0);
print_mode(level);
dump_register_summary(level);
}
}
}
static void
dump_register_summary(int level)
{
uint32_t i;
bool saved_summary = summary;
summary = false;
in_summary = true;
/* dump current state of registers: */
printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
for (i = 0; i < regcnt(); i++) {
uint32_t regbase = i;
uint32_t lastval = reg_val(regbase);
/* skip registers that haven't been updated since last draw/blit: */
if (!(options->allregs || reg_rewritten(regbase)))
continue;
if (!reg_written(regbase))
continue;
if (lastval != lastvals[regbase]) {
printl(2, "!");
lastvals[regbase] = lastval;
} else {
printl(2, " ");
}
if (reg_rewritten(regbase)) {
printl(2, "+");
} else {
printl(2, " ");
}
printl(2, "\t%08x", lastval);
if (!quiet(2)) {
dump_register(regbase, lastval, level);
}
}
clear_rewritten();
in_summary = false;
draw_count++;
summary = saved_summary;
}
static uint32_t
draw_indx_common(uint32_t *dwords, int level)
{
uint32_t prim_type = dwords[1] & 0x1f;
uint32_t source_select = (dwords[1] >> 6) & 0x3;
uint32_t num_indices = dwords[2];
const char *primtype;
primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
do_query(primtype, num_indices);
printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype,
prim_type);
printl(2, "%ssource_select: %s (%d)\n", levels[level],
rnn_enumname(rnn, "pc_di_src_sel", source_select),
source_select);
printl(2, "%snum_indices: %d\n", levels[level], num_indices);
vertices += num_indices;
draws[ib]++;
return num_indices;
}
enum pc_di_index_size {
INDEX_SIZE_IGN = 0,
INDEX_SIZE_16_BIT = 0,
INDEX_SIZE_32_BIT = 1,
INDEX_SIZE_8_BIT = 2,
INDEX_SIZE_INVALID = 0,
};
static void
cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t num_indices = draw_indx_common(dwords, level);
assert(!is_64b());
/* if we have an index buffer, dump that: */
if (sizedwords == 5) {
void *ptr = hostptr(dwords[3]);
printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
if (ptr) {
enum pc_di_index_size size =
((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
if (!quiet(2)) {
int i;
printf("%sidxs: ", levels[level]);
if (size == INDEX_SIZE_8_BIT) {
uint8_t *idx = ptr;
for (i = 0; i < dwords[4]; i++)
printf(" %u", idx[i]);
} else if (size == INDEX_SIZE_16_BIT) {
uint16_t *idx = ptr;
for (i = 0; i < dwords[4]/2; i++)
printf(" %u", idx[i]);
} else if (size == INDEX_SIZE_32_BIT) {
uint32_t *idx = ptr;
for (i = 0; i < dwords[4]/4; i++)
printf(" %u", idx[i]);
}
printf("\n");
dump_hex(ptr, dwords[4]/4, level+1);
}
}
}
/* don't bother dumping registers for the dummy draw_indx's.. */
if (num_indices > 0)
dump_register_summary(level);
needs_wfi = true;
}
static void
cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t num_indices = draw_indx_common(dwords, level);
enum pc_di_index_size size =
((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
void *ptr = &dwords[3];
int sz = 0;
assert(!is_64b());
/* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
if (!quiet(2)) {
int i;
printf("%sidxs: ", levels[level]);
if (size == INDEX_SIZE_8_BIT) {
uint8_t *idx = ptr;
for (i = 0; i < num_indices; i++)
printf(" %u", idx[i]);
sz = num_indices;
} else if (size == INDEX_SIZE_16_BIT) {
uint16_t *idx = ptr;
for (i = 0; i < num_indices; i++)
printf(" %u", idx[i]);
sz = num_indices * 2;
} else if (size == INDEX_SIZE_32_BIT) {
uint32_t *idx = ptr;
for (i = 0; i < num_indices; i++)
printf(" %u", idx[i]);
sz = num_indices * 4;
}
printf("\n");
dump_hex(ptr, sz / 4, level+1);
}
/* don't bother dumping registers for the dummy draw_indx's.. */
if (num_indices > 0)
dump_register_summary(level);
}
static void
cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t num_indices = dwords[2];
uint32_t prim_type = dwords[0] & 0x1f;
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
print_mode(level);
/* don't bother dumping registers for the dummy draw_indx's.. */
if (num_indices > 0)
dump_register_summary(level);
}
static void
cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t prim_type = dwords[0] & 0x1f;
uint64_t addr;
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
print_mode(level);
if (is_64b())
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
else
addr = dwords[1];
dump_gpuaddr_size(addr, level, 0x10, 2);
if (is_64b())
addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
else
addr = dwords[3];
dump_gpuaddr_size(addr, level, 0x10, 2);
dump_register_summary(level);
}
static void
cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t prim_type = dwords[0] & 0x1f;
uint64_t addr;
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
print_mode(level);
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
dump_gpuaddr_size(addr, level, 0x10, 2);
dump_register_summary(level);
}
static void
cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t prim_type = dwords[0] & 0x1f;
uint32_t count = dwords[2];
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
print_mode(level);
struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
if (count_dword) {
uint64_t count_addr = ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
uint32_t *buf = hostptr(count_addr);
/* Don't print more draws than this if we don't know the indirect
* count. It's possible the user will give ~0 or some other large
* value, expecting the GPU to fill in the draw count, and we don't
* want to print a gazillion draws in that case:
*/
const uint32_t max_draw_count = 0x100;
/* Assume the indirect count is garbage if it's larger than this
* (quite large) value or 0. Hopefully this catches most cases.
*/
const uint32_t max_indirect_draw_count = 0x10000;
if (buf) {
printf("%sindirect count: %u\n", levels[level], *buf);
if (*buf == 0 || *buf > max_indirect_draw_count) {
/* garbage value */
count = min(count, max_draw_count);
} else {
/* not garbage */
count = min(count, *buf);
}
} else {
count = min(count, max_draw_count);
}
}
if (addr_dword && stride_dword) {
uint64_t addr = ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
uint32_t stride = dwords[stride_dword];
for (unsigned i = 0; i < count; i++, addr += stride) {
printf("%sdraw %d:\n", levels[level], i);
dump_gpuaddr_size(addr, level, 0x10, 2);
}
}
dump_register_summary(level);
}
static void
cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
{
do_query("COMPUTE", 1);
dump_register_summary(level);
}
static void
cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
{
const char *buf = (void *)dwords;
int i;
if (quiet(3))
return;
// blob doesn't use CP_NOP for string_marker but it does
// use it for things that end up looking like, but aren't
// ascii chars:
if (!options->decode_markers)
return;
for (i = 0; i < 4 * sizedwords; i++) {
if (buf[i] == '\0')
break;
if (isascii(buf[i]))
printf("%c", buf[i]);
}
printf("\n");
}
static void
cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
{
/* traverse indirect buffers */
uint64_t ibaddr;
uint32_t ibsize;
uint32_t *ptr = NULL;
if (is_64b()) {
/* a5xx+.. high 32b of gpu addr, then size: */
ibaddr = dwords[0];
ibaddr |= ((uint64_t)dwords[1]) << 32;
ibsize = dwords[2];
} else {
ibaddr = dwords[0];
ibsize = dwords[1];
}
if (!quiet(3)) {
if (is_64b()) {
printf("%sibaddr:%016lx\n", levels[level], ibaddr);
} else {
printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
}
printf("%sibsize:%08x\n", levels[level], ibsize);
}
if (options->once && has_dumped(ibaddr, enable_mask))
return;
/* 'query-compare' mode implies 'once' mode, although we need only to
* process the cmdstream for *any* enable_mask mode, since we are
* comparing binning vs draw reg values at the same time, ie. it is
* not useful to process the same draw in both binning and draw pass.
*/
if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
return;
/* map gpuaddr back to hostptr: */
ptr = hostptr(ibaddr);
if (ptr) {
/* If the GPU hung within the target IB, the trigger point will be
* just after the current CP_INDIRECT_BUFFER. Because the IB is
* executed but never returns. Account for this by checking if
* the IB returned:
*/
highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
ib++;
ibs[ib].base = ibaddr;
ibs[ib].size = ibsize;
dump_commands(ptr, ibsize, level);
ib--;
} else {
fprintf(stderr, "could not find: %016"PRIx64" (%d)\n", ibaddr, ibsize);
}
}
static void
cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
{
needs_wfi = false;
}
static void
cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
{
if (quiet(2))
return;
if (is_64b()) {
uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr);
dump_hex(&dwords[2], sizedwords-2, level+1);
if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
dump_commands(&dwords[2], sizedwords-2, level+1);
} else {
uint32_t gpuaddr = dwords[0];
printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
dump_float((float *)&dwords[1], sizedwords-1, level+1);
}
}
static void
cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t val = dwords[0] & 0xffff;
uint32_t and = dwords[1];
uint32_t or = dwords[2];
printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or);
if (needs_wfi)
printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or);
reg_set(val, (reg_val(val) & and) | or);
}
static void
cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t val = dwords[0] & 0xffff;
printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
if (quiet(2))
return;
uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr);
void *ptr = hostptr(gpuaddr);
if (ptr) {
uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
dump_hex(ptr, cnt, level + 1);
}
}
struct draw_state {
uint16_t enable_mask;
uint16_t flags;
uint32_t count;
uint64_t addr;
};
struct draw_state state[32];
#define FLAG_DIRTY 0x1
#define FLAG_DISABLE 0x2
#define FLAG_DISABLE_ALL_GROUPS 0x4
#define FLAG_LOAD_IMMED 0x8
static int draw_mode;
static void
disable_group(unsigned group_id)
{
struct draw_state *ds = &state[group_id];
memset(ds, 0, sizeof(*ds));
}
static void
disable_all_groups(void)
{
for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
disable_group(i);
}
static void
load_group(unsigned group_id, int level)
{
struct draw_state *ds = &state[group_id];
if (!ds->count)
return;
printl(2, "%sgroup_id: %u\n", levels[level], group_id);
printl(2, "%scount: %d\n", levels[level], ds->count);
printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
printl(2, "%sflags: %x\n", levels[level], ds->flags);
if (options->gpu_id >= 600) {
printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
if (!(ds->enable_mask & enable_mask)) {
printl(2, "%s\tskipped!\n\n", levels[level]);
return;
}
}
void *ptr = hostptr(ds->addr);
if (ptr) {
if (!quiet(2))
dump_hex(ptr, ds->count, level+1);
ib++;
dump_commands(ptr, ds->count, level+1);
ib--;
}
}
static void
load_all_groups(int level)
{
/* sanity check, we should never recursively hit recursion here, and if
* we do bad things happen:
*/
static bool loading_groups = false;
if (loading_groups) {
printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
return;
}
loading_groups = true;
for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
load_group(i, level);
loading_groups = false;
/* in 'query-compare' mode, defer disabling all groups until we have a
* chance to process the query:
*/
if (!options->query_compare)
disable_all_groups();
}
static void
cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t i;
for (i = 0; i < sizedwords; ) {
struct draw_state *ds;
uint32_t count = dwords[i] & 0xffff;
uint32_t group_id = (dwords[i] >> 24) & 0x1f;
uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
uint32_t flags = (dwords[i] >> 16) & 0xf;
uint64_t addr;
if (is_64b()) {
addr = dwords[i + 1];
addr |= ((uint64_t)dwords[i + 2]) << 32;
i += 3;
} else {
addr = dwords[i + 1];
i += 2;
}
if (flags & FLAG_DISABLE_ALL_GROUPS) {
disable_all_groups();
continue;
}
if (flags & FLAG_DISABLE) {
disable_group(group_id);
continue;
}
assert(group_id < ARRAY_SIZE(state));
disable_group(group_id);
ds = &state[group_id];
ds->enable_mask = enable_mask;
ds->flags = flags;
ds->count = count;
ds->addr = addr;
if (flags & FLAG_LOAD_IMMED) {
load_group(group_id, level);
disable_group(group_id);
}
}
}
static void
cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
{
draw_mode = dwords[0];
}
/* execute compute shader */
static void
cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
{
do_query("compute", 0);
dump_register_summary(level);
}
static void
cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint64_t addr;
if (is_64b()) {
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
} else {
addr = dwords[1];
}
printl(3, "%saddr: %016llx\n", levels[level], addr);
dump_gpuaddr_size(addr, level, 0x10, 2);
do_query("compute", 0);
dump_register_summary(level);
}
static void
cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
{
render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);
if (!strcmp(render_mode, "RM6_BINNING")) {
enable_mask = MODE_BINNING;
} else if (!strcmp(render_mode, "RM6_GMEM")) {
enable_mask = MODE_GMEM;
} else if (!strcmp(render_mode, "RM6_BYPASS")) {
enable_mask = MODE_BYPASS;
}
}
static void
cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint64_t addr;
uint32_t *ptr, len;
assert(is_64b());
/* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
* not sure if this can come in different sizes.
*
* First ptr doesn't seem to be cmdstream, second one does.
*
* Comment from downstream kernel:
*
* SRM -- set render mode (ex binning, direct render etc)
* SRM is set by UMD usually at start of IB to tell CP the type of
* preemption.
* KMD needs to set SRM to NULL to indicate CP that rendering is
* done by IB.
* ------------------------------------------------------------------
*
* Seems to always be one of these two:
* 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
* 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
*
*/
assert(options->gpu_id >= 500);
render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
if (sizedwords == 1)
return;
addr = dwords[1];
addr |= ((uint64_t)dwords[2]) << 32;
mode = dwords[3];
dump_gpuaddr(addr, level+1);
if (sizedwords == 5)
return;
assert(sizedwords == 8);
len = dwords[5];
addr = dwords[6];
addr |= ((uint64_t)dwords[7]) << 32;
printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
printl(3, "%slen: 0x%x\n", levels[level], len);
ptr = hostptr(addr);
if (ptr) {
if (!quiet(2)) {
ib++;
dump_commands(ptr, len, level+1);
ib--;
dump_hex(ptr, len, level+1);
}
}
}
static void
cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint64_t addr;
uint32_t *ptr, len;
assert(is_64b());
assert(options->gpu_id >= 500);
assert(sizedwords == 8);
addr = dwords[5];
addr |= ((uint64_t)dwords[6]) << 32;
len = dwords[7];
printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
printl(3, "%slen: 0x%x\n", levels[level], len);
ptr = hostptr(addr);
if (ptr) {
if (!quiet(2)) {
ib++;
dump_commands(ptr, len, level+1);
ib--;
dump_hex(ptr, len, level+1);
}
}
}
static void
cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
{
do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
print_mode(level);
dump_register_summary(level);
}
static void
cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
{
int i;
/* NOTE: seems to write same reg multiple times.. not sure if different parts of
* these are triggered by the FLUSH_SO_n events?? (if that is what they actually
* are?)
*/
bool saved_summary = summary;
summary = false;
for (i = 0; i < sizedwords; i += 2) {
dump_register(dwords[i+0], dwords[i+1], level+1);
reg_set(dwords[i+0], dwords[i+1]);
}
summary = saved_summary;
}
static void
cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint32_t reg = dwords[1] & 0xffff;
dump_register(reg, dwords[2], level+1);
reg_set(reg, dwords[2]);
}
static void
cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
{
uint64_t addr;
uint32_t size = dwords[2] & 0xffff;
void *ptr;
addr = dwords[0] | ((uint64_t)dwords[1] << 32);
printf("addr=%lx\n", addr);
ptr = hostptr(addr);
if (ptr) {
dump_commands(ptr, size, level+1);
}
}
static void
cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
{
skip_ib2_enable_global = dwords[0];
}
static void
cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
{
skip_ib2_enable_local = dwords[0];
}
#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
static const struct type3_op {
const char *name;
void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
struct {
bool load_all_groups;
} options;
} type3_op[] = {
CP(NOP, cp_nop),
CP(INDIRECT_BUFFER, cp_indirect),
CP(INDIRECT_BUFFER_PFD, cp_indirect),
CP(WAIT_FOR_IDLE, cp_wfi),
CP(REG_RMW, cp_rmw),
CP(REG_TO_MEM, cp_reg_mem),
CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
CP(MEM_WRITE, cp_mem_write),
CP(EVENT_WRITE, cp_event_write),
CP(RUN_OPENCL, cp_run_cl),
CP(DRAW_INDX, cp_draw_indx, {.load_all_groups=true}),
CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups=true}),
CP(SET_CONSTANT, cp_set_const),
CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
CP(WIDE_REG_WRITE, cp_wide_reg_write),
/* for a3xx */
CP(LOAD_STATE, cp_load_state),
CP(SET_BIN, cp_set_bin),
/* for a4xx */
CP(LOAD_STATE4, cp_load_state),
CP(SET_DRAW_STATE, cp_set_draw_state),
CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups=true}),
CP(EXEC_CS, cp_exec_cs, {.load_all_groups=true}),
CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups=true}),
/* for a5xx */
CP(SET_RENDER_MODE, cp_set_render_mode),
CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
CP(BLIT, cp_blit),
CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups=true}),
CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups=true}),
CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups=true}),
CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
/* for a6xx */
CP(LOAD_STATE6_GEOM, cp_load_state),
CP(LOAD_STATE6_FRAG, cp_load_state),
CP(LOAD_STATE6, cp_load_state),
CP(SET_MODE, cp_set_mode),
CP(SET_MARKER, cp_set_marker),
CP(REG_WRITE, cp_reg_write),
CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
};
static void
noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
{
}
static const struct type3_op *
get_type3_op(unsigned opc)
{
static const struct type3_op dummy_op = {
.fxn = noop_fxn,
};
const char *name = pktname(opc);
if (!name)
return &dummy_op;
for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
if (!strcmp(name, type3_op[i].name))
return &type3_op[i];
return &dummy_op;
}
void
dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
{
int dwords_left = sizedwords;
uint32_t count = 0; /* dword count including packet header */
uint32_t val;
// assert(dwords);
if (!dwords) {
printf("NULL cmd buffer!\n");
return;
}
draws[ib] = 0;
while (dwords_left > 0) {
current_draw_count = draw_count;
/* hack, this looks like a -1 underflow, in some versions
* when it tries to write zero registers via pkt0
*/
// if ((dwords[0] >> 16) == 0xffff)
// goto skip;
if (pkt_is_type0(dwords[0])) {
printl(3, "t0");
count = type0_pkt_size(dwords[0]) + 1;
val = type0_pkt_offset(dwords[0]);
assert(val < regcnt());
printl(3, "%swrite %s%s (%04x)\n", levels[level+1], regname(val, 1),
(dwords[0] & 0x8000) ? " (same register)" : "", val);
dump_registers(val, dwords+1, count-1, level+2);
if (!quiet(3))
dump_hex(dwords, count, level+1);
} else if (pkt_is_type4(dwords[0])) {
/* basically the same(ish) as type0 prior to a5xx */
printl(3, "t4");
count = type4_pkt_size(dwords[0]) + 1;
val = type4_pkt_offset(dwords[0]);
assert(val < regcnt());
printl(3, "%swrite %s (%04x)\n", levels[level+1], regname(val, 1), val);
dump_registers(val, dwords+1, count-1, level+2);
if (!quiet(3))
dump_hex(dwords, count, level+1);
#if 0
} else if (pkt_is_type1(dwords[0])) {
printl(3, "t1");
count = 3;
val = dwords[0] & 0xfff;
printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
dump_registers(val, dwords+1, 1, level+2);
val = (dwords[0] >> 12) & 0xfff;
printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
dump_registers(val, dwords+2, 1, level+2);
if (!quiet(3))
dump_hex(dwords, count, level+1);
} else if (pkt_is_type2(dwords[0])) {
printl(3, "t2");
printf("%sNOP\n", levels[level+1]);
count = 1;
if (!quiet(3))
dump_hex(dwords, count, level+1);
#endif
} else if (pkt_is_type3(dwords[0])) {
count = type3_pkt_size(dwords[0]) + 1;
val = cp_type3_opcode(dwords[0]);
const struct type3_op *op = get_type3_op(val);
if (op->options.load_all_groups)
load_all_groups(level+1);
printl(3, "t3");
const char *name = pktname(val);
if (!quiet(2)) {
printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
val, count, (dwords[0] & 0x1) ? " (predicated)" : "");
}
if (name)
dump_domain(dwords+1, count-1, level+2, name);
op->fxn(dwords+1, count-1, level+1);
if (!quiet(2))
dump_hex(dwords, count, level+1);
} else if (pkt_is_type7(dwords[0])) {
count = type7_pkt_size(dwords[0]) + 1;
val = cp_type7_opcode(dwords[0]);
const struct type3_op *op = get_type3_op(val);
if (op->options.load_all_groups)
load_all_groups(level+1);
printl(3, "t7");
const char *name = pktname(val);
if (!quiet(2)) {
printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
val, count);
}
if (name) {
/* special hack for two packets that decode the same way
* on a6xx:
*/
if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
!strcmp(name, "CP_LOAD_STATE6_GEOM"))
name = "CP_LOAD_STATE6";
dump_domain(dwords+1, count-1, level+2, name);
}
op->fxn(dwords+1, count-1, level+1);
if (!quiet(2))
dump_hex(dwords, count, level+1);
} else if (pkt_is_type2(dwords[0])) {
printl(3, "t2");
printl(3, "%snop\n", levels[level+1]);
} else {
/* for 5xx+ we can do a passable job of looking for start of next valid packet: */
if (options->gpu_id >= 500) {
while (dwords_left > 0) {
if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
break;
printf("bad type! %08x\n", dwords[0]);
dwords++;
dwords_left--;
}
} else {
printf("bad type! %08x\n", dwords[0]);
return;
}
}
dwords += count;
dwords_left -= count;
}
if (dwords_left < 0)
printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
}