/* * Copyright (c) 2012 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "freedreno_pm4.h" #include "buffers.h" #include "cffdec.h" #include "disasm.h" #include "redump.h" #include "rnnutil.h" #include "script.h" /* ************************************************************************* */ /* originally based on kernel recovery dump code: */ static const struct cffdec_options *options; static bool needs_wfi = false; static bool summary = false; static bool in_summary = false; static int vertices; static inline unsigned regcnt(void) { if (options->gpu_id >= 500) return 0xffff; else return 0x7fff; } static int is_64b(void) { return options->gpu_id >= 500; } static int draws[4]; static struct { uint64_t base; uint32_t size; /* in dwords */ /* Generally cmdstream consists of multiple IB calls to different * buffers, which are themselves often re-used for each tile. The * triggered flag serves two purposes to help make it more clear * what part of the cmdstream is before vs after the the GPU hang: * * 1) if in IB2 we are passed the point within the IB2 buffer where * the GPU hung, but IB1 is not passed the point within its * buffer where the GPU had hung, then we know the GPU hang * happens on a future use of that IB2 buffer. * * 2) if in an IB1 or IB2 buffer that is not the one where the GPU * hung, but we've already passed the trigger point at the same * IB level, we know that we are passed the point where the GPU * had hung. * * So this is a one way switch, false->true. And a higher #'d * IB level isn't considered triggered unless the lower #'d IB * level is. */ bool triggered; } ibs[4]; static int ib; static int draw_count; static int current_draw_count; /* query mode.. to handle symbolic register name queries, we need to * defer parsing query string until after gpu_id is know and rnn db * loaded: */ static int *queryvals; static bool quiet(int lvl) { if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count)) return true; if ((lvl >= 3) && (summary || options->querystrs || options->script)) return true; if ((lvl >= 2) && (options->querystrs || options->script)) return true; return false; } void printl(int lvl, const char *fmt, ...) { va_list args; if (quiet(lvl)) return; va_start(args, fmt); vprintf(fmt, args); va_end(args); } static const char *levels[] = { "\t", "\t\t", "\t\t\t", "\t\t\t\t", "\t\t\t\t\t", "\t\t\t\t\t\t", "\t\t\t\t\t\t\t", "\t\t\t\t\t\t\t\t", "\t\t\t\t\t\t\t\t\t", "x", "x", "x", "x", "x", "x", }; enum state_src_t { STATE_SRC_DIRECT, STATE_SRC_INDIRECT, STATE_SRC_BINDLESS, }; /* SDS (CP_SET_DRAW_STATE) helpers: */ static void load_all_groups(int level); static void disable_all_groups(void); static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level); static void dump_tex_const(uint32_t *texsamp, int num_unit, int level); static bool highlight_gpuaddr(uint64_t gpuaddr) { if (!options->ibs[ib].base) return false; if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered) return false; if (ibs[ib].triggered) return options->color; if (options->ibs[ib].base != ibs[ib].base) return false; uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem); uint64_t end = ibs[ib].base + 4 * ibs[ib].size; bool triggered = (start <= gpuaddr) && (gpuaddr <= end); ibs[ib].triggered |= triggered; if (triggered) printf("ESTIMATED CRASH LOCATION!\n"); return triggered & options->color; } static void dump_hex(uint32_t *dwords, uint32_t sizedwords, int level) { int i, j; int lastzero = 1; if (quiet(2)) return; for (i = 0; i < sizedwords; i += 8) { int zero = 1; /* always show first row: */ if (i == 0) zero = 0; for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++) if (dwords[i + j]) zero = 0; if (zero && !lastzero) printf("*\n"); lastzero = zero; if (zero) continue; uint64_t addr = gpuaddr(&dwords[i]); bool highlight = highlight_gpuaddr(addr); if (highlight) printf("\x1b[0;1;31m"); if (is_64b()) { printf("%016" PRIx64 ":%s", addr, levels[level]); } else { printf("%08x:%s", (uint32_t)addr, levels[level]); } if (highlight) printf("\x1b[0m"); printf("%04x:", i * 4); for (j = 0; (j < 8) && (i + j < sizedwords); j++) { printf(" %08x", dwords[i + j]); } printf("\n"); } } static void dump_float(float *dwords, uint32_t sizedwords, int level) { int i; for (i = 0; i < sizedwords; i++) { if ((i % 8) == 0) { if (is_64b()) { printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]); } else { printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]); } } else { printf(" "); } printf("%8f", *(dwords++)); if ((i % 8) == 7) printf("\n"); } if (i % 8) printf("\n"); } /* I believe the surface format is low bits: #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL comments in sys2gmem_tex_const indicate that address is [31:12], but looks like at least some of the bits above the format have different meaning.. */ static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags, uint32_t mask) { assert(!is_64b()); /* this is only used on a2xx */ *gpuaddr = dword & ~mask; *flags = dword & mask; } static uint32_t type0_reg_vals[0xffff + 1]; static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) / 8]; /* written since last draw */ static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8]; static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)]; static bool reg_rewritten(uint32_t regbase) { return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8))); } bool reg_written(uint32_t regbase) { return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8))); } static void clear_rewritten(void) { memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten)); } static void clear_written(void) { memset(type0_reg_written, 0, sizeof(type0_reg_written)); clear_rewritten(); } uint32_t reg_lastval(uint32_t regbase) { return lastvals[regbase]; } static void clear_lastvals(void) { memset(lastvals, 0, sizeof(lastvals)); } uint32_t reg_val(uint32_t regbase) { return type0_reg_vals[regbase]; } void reg_set(uint32_t regbase, uint32_t val) { assert(regbase < regcnt()); type0_reg_vals[regbase] = val; type0_reg_written[regbase / 8] |= (1 << (regbase % 8)); type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8)); } static void reg_dump_scratch(const char *name, uint32_t dword, int level) { unsigned r; if (quiet(3)) return; r = regbase("CP_SCRATCH[0].REG"); // if not, try old a2xx/a3xx version: if (!r) r = regbase("CP_SCRATCH_REG0"); if (!r) return; printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5), reg_val(r + 6), reg_val(r + 7)); } static void dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl) { void *buf; if (quiet(quietlvl)) return; buf = hostptr(gpuaddr); if (buf) { dump_hex(buf, sizedwords, level + 1); } } static void dump_gpuaddr(uint64_t gpuaddr, int level) { dump_gpuaddr_size(gpuaddr, level, 64, 3); } static void reg_dump_gpuaddr(const char *name, uint32_t dword, int level) { dump_gpuaddr(dword, level); } uint32_t gpuaddr_lo; static void reg_gpuaddr_lo(const char *name, uint32_t dword, int level) { gpuaddr_lo = dword; } static void reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level) { dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level); } static void reg_dump_gpuaddr64(const char *name, uint64_t qword, int level) { dump_gpuaddr(qword, level); } static void dump_shader(const char *ext, void *buf, int bufsz) { if (options->dump_shaders) { static int n = 0; char filename[16]; int fd; sprintf(filename, "%04d.%s", n++, ext); fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644); if (fd != -1) { write(fd, buf, bufsz); close(fd); } } } static void disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level) { void *buf; gpuaddr &= 0xfffffffffffffff0; if (quiet(3)) return; buf = hostptr(gpuaddr); if (buf) { uint32_t sizedwords = hostlen(gpuaddr) / 4; const char *ext; dump_hex(buf, min(64, sizedwords), level + 1); try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id); /* this is a bit ugly way, but oh well.. */ if (strstr(name, "SP_VS_OBJ")) { ext = "vo3"; } else if (strstr(name, "SP_FS_OBJ")) { ext = "fo3"; } else if (strstr(name, "SP_GS_OBJ")) { ext = "go3"; } else if (strstr(name, "SP_CS_OBJ")) { ext = "co3"; } else { ext = NULL; } if (ext) dump_shader(ext, buf, sizedwords * 4); } } static void reg_disasm_gpuaddr(const char *name, uint32_t dword, int level) { disasm_gpuaddr(name, dword, level); } static void reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level) { disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level); } static void reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level) { disasm_gpuaddr(name, qword, level); } /* Find the value of the TEX_COUNT register that corresponds to the named * TEX_SAMP/TEX_CONST reg. * * Note, this kinda assumes an equal # of samplers and textures, but not * really sure if there is a much better option. I suppose on a6xx we * could instead decode the bitfields in SP_xS_CONFIG */ static int get_tex_count(const char *name) { char count_reg[strlen(name) + 5]; char *p; p = strstr(name, "CONST"); if (!p) p = strstr(name, "SAMP"); if (!p) return 0; int n = p - name; strncpy(count_reg, name, n); strcpy(count_reg + n, "COUNT"); return reg_val(regbase(count_reg)); } static void reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level) { if (!in_summary) return; int num_unit = get_tex_count(name); uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32); void *buf = hostptr(gpuaddr); if (!buf) return; dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1); } static void reg_dump_tex_const_hi(const char *name, uint32_t dword, int level) { if (!in_summary) return; int num_unit = get_tex_count(name); uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32); void *buf = hostptr(gpuaddr); if (!buf) return; dump_tex_const(buf, num_unit, level + 1); } /* * Registers with special handling (rnndec_decode() handles rest): */ #define REG(x, fxn) { #x, fxn } #define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true } static struct { const char *regname; void (*fxn)(const char *name, uint32_t dword, int level); void (*fxn64)(const char *name, uint64_t qword, int level); uint32_t regbase; bool is_reg64; } reg_a2xx[] = { REG(CP_SCRATCH_REG0, reg_dump_scratch), REG(CP_SCRATCH_REG1, reg_dump_scratch), REG(CP_SCRATCH_REG2, reg_dump_scratch), REG(CP_SCRATCH_REG3, reg_dump_scratch), REG(CP_SCRATCH_REG4, reg_dump_scratch), REG(CP_SCRATCH_REG5, reg_dump_scratch), REG(CP_SCRATCH_REG6, reg_dump_scratch), REG(CP_SCRATCH_REG7, reg_dump_scratch), {NULL}, }, reg_a3xx[] = { REG(CP_SCRATCH_REG0, reg_dump_scratch), REG(CP_SCRATCH_REG1, reg_dump_scratch), REG(CP_SCRATCH_REG2, reg_dump_scratch), REG(CP_SCRATCH_REG3, reg_dump_scratch), REG(CP_SCRATCH_REG4, reg_dump_scratch), REG(CP_SCRATCH_REG5, reg_dump_scratch), REG(CP_SCRATCH_REG6, reg_dump_scratch), REG(CP_SCRATCH_REG7, reg_dump_scratch), REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr), REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr), REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr), REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr), REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr), REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), {NULL}, }, reg_a4xx[] = { REG(CP_SCRATCH[0].REG, reg_dump_scratch), REG(CP_SCRATCH[0x1].REG, reg_dump_scratch), REG(CP_SCRATCH[0x2].REG, reg_dump_scratch), REG(CP_SCRATCH[0x3].REG, reg_dump_scratch), REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr), REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr), REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr), REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr), REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr), REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr), REG(SP_VS_OBJ_START, reg_disasm_gpuaddr), REG(SP_FS_OBJ_START, reg_disasm_gpuaddr), REG(SP_GS_OBJ_START, reg_disasm_gpuaddr), REG(SP_HS_OBJ_START, reg_disasm_gpuaddr), REG(SP_DS_OBJ_START, reg_disasm_gpuaddr), REG(SP_CS_OBJ_START, reg_disasm_gpuaddr), REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), {NULL}, }, reg_a5xx[] = { REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo), REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi), REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo), REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi), REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo), REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi), REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo), REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi), REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo), REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi), REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo), REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi), REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo), REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi), REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo), REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi), REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo), REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi), REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo), REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi), REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo), REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi), REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo), REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi), REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo), REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi), REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo), REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi), REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo), REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi), REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo), REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi), REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo), REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi), REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo), REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi), REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo), REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo), // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo), // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo), // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo), // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo), // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo), // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo), // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo), // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi), // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo), // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi), // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo), // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi), // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo), // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi), // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo), // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi), // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo), // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi), // REG(RB_2D_SRC_LO, reg_gpuaddr_lo), // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi), // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo), // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi), // REG(RB_2D_DST_LO, reg_gpuaddr_lo), // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi), // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo), // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi), {NULL}, }, reg_a6xx[] = { REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64), REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64), REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64), REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64), REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64), REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64), REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64), REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64), REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64), REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64), REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64), REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64), REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64), REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64), REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64), REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64), REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64), REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64), {NULL}, }, *type0_reg; static struct rnn *rnn; static void init_rnn(const char *gpuname) { rnn = rnn_new(!options->color); rnn_load(rnn, gpuname); if (options->querystrs) { int i; queryvals = calloc(options->nquery, sizeof(queryvals[0])); for (i = 0; i < options->nquery; i++) { int val = strtol(options->querystrs[i], NULL, 0); if (val == 0) val = regbase(options->querystrs[i]); queryvals[i] = val; printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]); } } for (unsigned idx = 0; type0_reg[idx].regname; idx++) { type0_reg[idx].regbase = regbase(type0_reg[idx].regname); if (!type0_reg[idx].regbase) { printf("invalid register name: %s\n", type0_reg[idx].regname); exit(1); } } } void reset_regs(void) { clear_written(); clear_lastvals(); memset(&ibs, 0, sizeof(ibs)); } void cffdec_init(const struct cffdec_options *_options) { options = _options; summary = options->summary; /* in case we're decoding multiple files: */ free(queryvals); reset_regs(); draw_count = 0; /* TODO we need an API to free/cleanup any previous rnn */ switch (options->gpu_id) { case 200 ... 299: type0_reg = reg_a2xx; init_rnn("a2xx"); break; case 300 ... 399: type0_reg = reg_a3xx; init_rnn("a3xx"); break; case 400 ... 499: type0_reg = reg_a4xx; init_rnn("a4xx"); break; case 500 ... 599: type0_reg = reg_a5xx; init_rnn("a5xx"); break; case 600 ... 699: type0_reg = reg_a6xx; init_rnn("a6xx"); break; default: errx(-1, "unsupported gpu"); } } const char * pktname(unsigned opc) { return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc); } const char * regname(uint32_t regbase, int color) { return rnn_regname(rnn, regbase, color); } uint32_t regbase(const char *name) { return rnn_regbase(rnn, name); } static int endswith(uint32_t regbase, const char *suffix) { const char *name = regname(regbase, 0); const char *s = strstr(name, suffix); if (!s) return 0; return (s - strlen(name) + strlen(suffix)) == name; } void dump_register_val(uint32_t regbase, uint32_t dword, int level) { struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase); if (info && info->typeinfo) { uint64_t gpuaddr = 0; char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword); printf("%s%s: %s", levels[level], info->name, decoded); /* Try and figure out if we are looking at a gpuaddr.. this * might be useful for other gen's too, but at least a5xx has * the _HI/_LO suffix we can look for. Maybe a better approach * would be some special annotation in the xml.. * for a6xx use "address" and "waddress" types */ if (options->gpu_id >= 600) { if (!strcmp(info->typeinfo->name, "address") || !strcmp(info->typeinfo->name, "waddress")) { gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword; } } else if (options->gpu_id >= 500) { if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) { gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1); } else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) { gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword; } } if (gpuaddr && hostptr(gpuaddr)) { printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u", gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr), hostlen(gpubaseaddr(gpuaddr))); } printf("\n"); free(decoded); } else if (info) { printf("%s%s: %08x\n", levels[level], info->name, dword); } else { printf("%s<%04x>: %08x\n", levels[level], regbase, dword); } if (info) { free(info->name); free(info); } } static void dump_register(uint32_t regbase, uint32_t dword, int level) { if (!quiet(3)) { dump_register_val(regbase, dword, level); } for (unsigned idx = 0; type0_reg[idx].regname; idx++) { if (type0_reg[idx].regbase == regbase) { if (type0_reg[idx].is_reg64) { uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword; type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level); } else { type0_reg[idx].fxn(type0_reg[idx].regname, dword, level); } break; } } } static bool is_banked_reg(uint32_t regbase) { return (0x2000 <= regbase) && (regbase < 0x2400); } static void dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level) { while (sizedwords--) { int last_summary = summary; /* access to non-banked registers needs a WFI: * TODO banked register range for a2xx?? */ if (needs_wfi && !is_banked_reg(regbase)) printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase); reg_set(regbase, *dwords); dump_register(regbase, *dwords, level); regbase++; dwords++; summary = last_summary; } } static void dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name) { struct rnndomain *dom; int i; dom = rnn_finddomain(rnn->db, name); if (!dom) return; if (script_packet) script_packet(dwords, sizedwords, rnn, dom); if (quiet(2)) return; for (i = 0; i < sizedwords; i++) { struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0); char *decoded; if (!(info && info->typeinfo)) break; uint64_t value = dwords[i]; if (info->typeinfo->high >= 32 && i < sizedwords - 1) { value |= (uint64_t)dwords[i + 1] << 32; i++; /* skip the next dword since we're printing it now */ } decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value); /* Unlike the register printing path, we don't print the name * of the register, so if it doesn't contain other named * things (i.e. it isn't a bitset) then print the register * name as if it's a bitset with a single entry. This avoids * having to create a dummy register with a single entry to * get a name in the decoding. */ if (info->typeinfo->type == RNN_TTYPE_BITSET || info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) { printf("%s%s\n", levels[level], decoded); } else { printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname, info->name, rnn->vc->colors->reset, decoded); } free(decoded); free(info->name); free(info); } } static uint32_t bin_x1, bin_x2, bin_y1, bin_y2; static unsigned mode; static const char *render_mode; static enum { MODE_BINNING = 0x1, MODE_GMEM = 0x2, MODE_BYPASS = 0x4, MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS, } enable_mask = MODE_ALL; static bool skip_ib2_enable_global; static bool skip_ib2_enable_local; static void print_mode(int level) { if ((options->gpu_id >= 500) && !quiet(2)) { printf("%smode: %s\n", levels[level], render_mode); printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local); } } static bool skip_query(void) { switch (options->query_mode) { case QUERY_ALL: /* never skip: */ return false; case QUERY_WRITTEN: for (int i = 0; i < options->nquery; i++) { uint32_t regbase = queryvals[i]; if (!reg_written(regbase)) { continue; } if (reg_rewritten(regbase)) { return false; } } return true; case QUERY_DELTA: for (int i = 0; i < options->nquery; i++) { uint32_t regbase = queryvals[i]; if (!reg_written(regbase)) { continue; } uint32_t lastval = reg_val(regbase); if (lastval != lastvals[regbase]) { return false; } } return true; } return true; } static void __do_query(const char *primtype, uint32_t num_indices) { int n = 0; if ((500 <= options->gpu_id) && (options->gpu_id < 700)) { uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL")); uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR")); bin_x1 = scissor_tl & 0xffff; bin_y1 = scissor_tl >> 16; bin_x2 = scissor_br & 0xffff; bin_y2 = scissor_br >> 16; } for (int i = 0; i < options->nquery; i++) { uint32_t regbase = queryvals[i]; if (reg_written(regbase)) { uint32_t lastval = reg_val(regbase); printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1, bin_y1, bin_x2, bin_y2, num_indices); if (options->gpu_id >= 500) printf("%s:", render_mode); printf("\t%08x", lastval); if (lastval != lastvals[regbase]) { printf("!"); } else { printf(" "); } if (reg_rewritten(regbase)) { printf("+"); } else { printf(" "); } dump_register_val(regbase, lastval, 0); n++; } } if (n > 1) printf("\n"); } static void do_query_compare(const char *primtype, uint32_t num_indices) { unsigned saved_enable_mask = enable_mask; const char *saved_render_mode = render_mode; /* in 'query-compare' mode, we want to see if the register is writtten * or changed in any mode: * * (NOTE: this could cause false-positive for 'query-delta' if the reg * is written with different values in binning vs sysmem/gmem mode, as * we don't track previous values per-mode, but I think we can live with * that) */ enable_mask = MODE_ALL; clear_rewritten(); load_all_groups(0); if (!skip_query()) { /* dump binning pass values: */ enable_mask = MODE_BINNING; render_mode = "BINNING"; clear_rewritten(); load_all_groups(0); __do_query(primtype, num_indices); /* dump draw pass values: */ enable_mask = MODE_GMEM | MODE_BYPASS; render_mode = "DRAW"; clear_rewritten(); load_all_groups(0); __do_query(primtype, num_indices); printf("\n"); } enable_mask = saved_enable_mask; render_mode = saved_render_mode; disable_all_groups(); } /* well, actually query and script.. * NOTE: call this before dump_register_summary() */ static void do_query(const char *primtype, uint32_t num_indices) { if (script_draw) script_draw(primtype, num_indices); if (options->query_compare) { do_query_compare(primtype, num_indices); return; } if (skip_query()) return; __do_query(primtype, num_indices); } static void cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t start = dwords[1] >> 16; uint32_t size = dwords[1] & 0xffff; const char *type = NULL, *ext = NULL; gl_shader_stage disasm_type; switch (dwords[0]) { case 0: type = "vertex"; ext = "vo"; disasm_type = MESA_SHADER_VERTEX; break; case 1: type = "fragment"; ext = "fo"; disasm_type = MESA_SHADER_FRAGMENT; break; default: type = ""; disasm_type = 0; break; } printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size); disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type); /* dump raw shader: */ if (ext) dump_shader(ext, dwords + 2, (sizedwords - 2) * 4); } static void cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t reg = dwords[0] & 0xffff; int i; for (i = 1; i < sizedwords; i++) { dump_register(reg, dwords[i], level + 1); reg_set(reg, dwords[i]); reg++; } } enum state_t { TEX_SAMP = 1, TEX_CONST, TEX_MIPADDR, /* a3xx only */ SHADER_PROG, SHADER_CONST, // image/ssbo state: SSBO_0, SSBO_1, SSBO_2, UBO, // unknown things, just to hexdumps: UNKNOWN_DWORDS, UNKNOWN_2DWORDS, UNKNOWN_4DWORDS, }; enum adreno_state_block { SB_VERT_TEX = 0, SB_VERT_MIPADDR = 1, SB_FRAG_TEX = 2, SB_FRAG_MIPADDR = 3, SB_VERT_SHADER = 4, SB_GEOM_SHADER = 5, SB_FRAG_SHADER = 6, SB_COMPUTE_SHADER = 7, }; /* TODO there is probably a clever way to let rnndec parse things so * we don't have to care about packet format differences across gens */ static void a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state, enum state_src_t *src) { unsigned state_block_id = (dwords[0] >> 19) & 0x7; unsigned state_type = dwords[1] & 0x3; static const struct { gl_shader_stage stage; enum state_t state; } lookup[0xf][0x3] = { [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP}, [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST}, [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP}, [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST}, [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG}, [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST}, [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG}, [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST}, }; *stage = lookup[state_block_id][state_type].stage; *state = lookup[state_block_id][state_type].state; unsigned state_src = (dwords[0] >> 16) & 0x7; if (state_src == 0 /* SS_DIRECT */) *src = STATE_SRC_DIRECT; else *src = STATE_SRC_INDIRECT; } static enum state_src_t _get_state_src(unsigned dword0) { switch ((dword0 >> 16) & 0x3) { case 0: /* SS4_DIRECT / SS6_DIRECT */ return STATE_SRC_DIRECT; case 2: /* SS4_INDIRECT / SS6_INDIRECT */ return STATE_SRC_INDIRECT; case 1: /* SS6_BINDLESS */ return STATE_SRC_BINDLESS; default: return STATE_SRC_DIRECT; } } static void _get_state_type(unsigned state_block_id, unsigned state_type, gl_shader_stage *stage, enum state_t *state) { static const struct { gl_shader_stage stage; enum state_t state; } lookup[0x10][0x4] = { // SB4_VS_TEX: [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP}, [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST}, [0x0][2] = {MESA_SHADER_VERTEX, UBO}, // SB4_HS_TEX: [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP}, [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST}, [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO}, // SB4_DS_TEX: [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP}, [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST}, [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO}, // SB4_GS_TEX: [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP}, [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST}, [0x3][2] = {MESA_SHADER_GEOMETRY, UBO}, // SB4_FS_TEX: [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP}, [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST}, [0x4][2] = {MESA_SHADER_FRAGMENT, UBO}, // SB4_CS_TEX: [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP}, [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST}, [0x5][2] = {MESA_SHADER_COMPUTE, UBO}, // SB4_VS_SHADER: [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG}, [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST}, [0x8][2] = {MESA_SHADER_VERTEX, UBO}, // SB4_HS_SHADER [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG}, [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST}, [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO}, // SB4_DS_SHADER [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG}, [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST}, [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO}, // SB4_GS_SHADER [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG}, [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST}, [0xb][2] = {MESA_SHADER_GEOMETRY, UBO}, // SB4_FS_SHADER: [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG}, [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST}, [0xc][2] = {MESA_SHADER_FRAGMENT, UBO}, // SB4_CS_SHADER: [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG}, [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST}, [0xd][2] = {MESA_SHADER_COMPUTE, UBO}, [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */ // SB4_SSBO (shared across all stages) [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */ [0xe][1] = {0, SSBO_1}, [0xe][2] = {0, SSBO_2}, // SB4_CS_SSBO [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0}, [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1}, [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2}, // unknown things /* This looks like combined UBO state for 3d stages (a5xx and * before?? I think a6xx has UBO state per shader stage: */ [0x6][2] = {0, UBO}, [0x7][1] = {0, UNKNOWN_2DWORDS}, }; *stage = lookup[state_block_id][state_type].stage; *state = lookup[state_block_id][state_type].state; } static void a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state, enum state_src_t *src) { unsigned state_block_id = (dwords[0] >> 18) & 0xf; unsigned state_type = dwords[1] & 0x3; _get_state_type(state_block_id, state_type, stage, state); *src = _get_state_src(dwords[0]); } static void a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state, enum state_src_t *src) { unsigned state_block_id = (dwords[0] >> 18) & 0xf; unsigned state_type = (dwords[0] >> 14) & 0x3; _get_state_type(state_block_id, state_type, stage, state); *src = _get_state_src(dwords[0]); } static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level) { for (int i = 0; i < num_unit; i++) { /* work-around to reduce noise for opencl blob which always * writes the max # regardless of # of textures used */ if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0)) break; if ((300 <= options->gpu_id) && (options->gpu_id < 400)) { dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP"); dump_hex(texsamp, 2, level + 1); texsamp += 2; } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) { dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP"); dump_hex(texsamp, 2, level + 1); texsamp += 2; } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP"); dump_hex(texsamp, 4, level + 1); texsamp += 4; } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) { dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP"); dump_hex(texsamp, 4, level + 1); texsamp += src == STATE_SRC_BINDLESS ? 16 : 4; } } } static void dump_tex_const(uint32_t *texconst, int num_unit, int level) { for (int i = 0; i < num_unit; i++) { /* work-around to reduce noise for opencl blob which always * writes the max # regardless of # of textures used */ if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) && (texconst[2] == 0) && (texconst[3] == 0)) break; if ((300 <= options->gpu_id) && (options->gpu_id < 400)) { dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST"); dump_hex(texconst, 4, level + 1); texconst += 4; } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) { dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST"); if (options->dump_textures) { uint32_t addr = texconst[4] & ~0x1f; dump_gpuaddr(addr, level - 2); } dump_hex(texconst, 8, level + 1); texconst += 8; } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST"); if (options->dump_textures) { uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4]; dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3); } dump_hex(texconst, 12, level + 1); texconst += 12; } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) { dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST"); if (options->dump_textures) { uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4]; dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3); } dump_hex(texconst, 16, level + 1); texconst += 16; } } } static void cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level) { gl_shader_stage stage; enum state_t state; enum state_src_t src; uint32_t num_unit = (dwords[0] >> 22) & 0x1ff; uint64_t ext_src_addr; void *contents; int i; if (quiet(2) && !options->script) return; if (options->gpu_id >= 600) a6xx_get_state_type(dwords, &stage, &state, &src); else if (options->gpu_id >= 400) a4xx_get_state_type(dwords, &stage, &state, &src); else a3xx_get_state_type(dwords, &stage, &state, &src); switch (src) { case STATE_SRC_DIRECT: ext_src_addr = 0; break; case STATE_SRC_INDIRECT: if (is_64b()) { ext_src_addr = dwords[1] & 0xfffffffc; ext_src_addr |= ((uint64_t)dwords[2]) << 32; } else { ext_src_addr = dwords[1] & 0xfffffffc; } break; case STATE_SRC_BINDLESS: { const unsigned base_reg = stage == MESA_SHADER_COMPUTE ? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR") : regbase("HLSQ_BINDLESS_BASE[0].ADDR"); if (is_64b()) { const unsigned reg = base_reg + (dwords[1] >> 28) * 2; ext_src_addr = reg_val(reg) & 0xfffffffc; ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32; } else { const unsigned reg = base_reg + (dwords[1] >> 28); ext_src_addr = reg_val(reg) & 0xfffffffc; } ext_src_addr += 4 * (dwords[1] & 0xffffff); break; } } if (ext_src_addr) contents = hostptr(ext_src_addr); else contents = is_64b() ? dwords + 3 : dwords + 2; if (!contents) return; switch (state) { case SHADER_PROG: { const char *ext = NULL; if (quiet(2)) return; if (options->gpu_id >= 400) num_unit *= 16; else if (options->gpu_id >= 300) num_unit *= 4; /* shaders: * * note: num_unit seems to be # of instruction groups, where * an instruction group has 4 64bit instructions. */ if (stage == MESA_SHADER_VERTEX) { ext = "vo3"; } else if (stage == MESA_SHADER_GEOMETRY) { ext = "go3"; } else if (stage == MESA_SHADER_COMPUTE) { ext = "co3"; } else if (stage == MESA_SHADER_FRAGMENT) { ext = "fo3"; } if (contents) try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout, options->gpu_id); /* dump raw shader: */ if (ext) dump_shader(ext, contents, num_unit * 2 * 4); break; } case SHADER_CONST: { if (quiet(2)) return; /* uniforms/consts: * * note: num_unit seems to be # of pairs of dwords?? */ if (options->gpu_id >= 400) num_unit *= 2; dump_float(contents, num_unit * 2, level + 1); dump_hex(contents, num_unit * 2, level + 1); break; } case TEX_MIPADDR: { uint32_t *addrs = contents; if (quiet(2)) return; /* mipmap consts block just appears to be array of num_unit gpu addr's: */ for (i = 0; i < num_unit; i++) { void *ptr = hostptr(addrs[i]); printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]); if (options->dump_textures) { printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i])); dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1); } } break; } case TEX_SAMP: { dump_tex_samp(contents, src, num_unit, level); break; } case TEX_CONST: { dump_tex_const(contents, num_unit, level); break; } case SSBO_0: { uint32_t *ssboconst = (uint32_t *)contents; for (i = 0; i < num_unit; i++) { int sz = 4; if (400 <= options->gpu_id && options->gpu_id < 500) { dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0"); } else if (500 <= options->gpu_id && options->gpu_id < 600) { dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0"); } else if (600 <= options->gpu_id && options->gpu_id < 700) { sz = 16; dump_domain(ssboconst, 16, level + 2, "A6XX_IBO"); } dump_hex(ssboconst, sz, level + 1); ssboconst += sz; } break; } case SSBO_1: { uint32_t *ssboconst = (uint32_t *)contents; for (i = 0; i < num_unit; i++) { if (400 <= options->gpu_id && options->gpu_id < 500) dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1"); else if (500 <= options->gpu_id && options->gpu_id < 600) dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1"); dump_hex(ssboconst, 2, level + 1); ssboconst += 2; } break; } case SSBO_2: { uint32_t *ssboconst = (uint32_t *)contents; for (i = 0; i < num_unit; i++) { /* TODO a4xx and a5xx might be same: */ if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2"); dump_hex(ssboconst, 2, level + 1); } if (options->dump_textures) { uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0]; dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3); } ssboconst += 2; } break; } case UBO: { uint32_t *uboconst = (uint32_t *)contents; for (i = 0; i < num_unit; i++) { // TODO probably similar on a4xx.. if (500 <= options->gpu_id && options->gpu_id < 600) dump_domain(uboconst, 2, level + 2, "A5XX_UBO"); else if (600 <= options->gpu_id && options->gpu_id < 700) dump_domain(uboconst, 2, level + 2, "A6XX_UBO"); dump_hex(uboconst, 2, level + 1); uboconst += src == STATE_SRC_BINDLESS ? 16 : 2; } break; } case UNKNOWN_DWORDS: { if (quiet(2)) return; dump_hex(contents, num_unit, level + 1); break; } case UNKNOWN_2DWORDS: { if (quiet(2)) return; dump_hex(contents, num_unit * 2, level + 1); break; } case UNKNOWN_4DWORDS: { if (quiet(2)) return; dump_hex(contents, num_unit * 4, level + 1); break; } default: if (quiet(2)) return; /* hmm.. */ dump_hex(contents, num_unit, level + 1); break; } } static void cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level) { bin_x1 = dwords[1] & 0xffff; bin_y1 = dwords[1] >> 16; bin_x2 = dwords[2] & 0xffff; bin_y2 = dwords[2] >> 16; } static void dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level) { uint32_t w, h, p; uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags; uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z; static const char *filter[] = { "point", "bilinear", "bicubic", }; static const char *clamp[] = { "wrap", "mirror", "clamp-last-texel", }; static const char swiznames[] = "xyzw01??"; /* see sys2gmem_tex_const[] in adreno_a2xxx.c */ /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat, * RFMode=ZeroClamp-1, Dim=1:2d, pitch */ p = (dwords[0] >> 22) << 5; clamp_x = (dwords[0] >> 10) & 0x3; clamp_y = (dwords[0] >> 13) & 0x3; clamp_z = (dwords[0] >> 16) & 0x3; /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0, * NearestClamp=1:OGL Mode */ parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff); /* Width, Height, EndianSwap=0:None */ w = (dwords[2] & 0x1fff) + 1; h = ((dwords[2] >> 13) & 0x1fff) + 1; /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point, * Mip=2:BaseMap */ mag = (dwords[3] >> 19) & 0x3; min = (dwords[3] >> 21) & 0x3; swiz = (dwords[3] >> 1) & 0xfff; /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0, * Dim3d=0 */ // XXX /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0, * Dim=1:2d, MipPacking=0 */ parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff); printf("%sset texture const %04x\n", levels[level], val); printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]); printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min], filter[mag]); printf("%sswizzle: %c%c%c%c\n", levels[level + 1], swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7], swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]); printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n", levels[level + 1], gpuaddr, flags, w, h, p, rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf)); printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr, mip_flags); } static void dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level) { int i; printf("%sset shader const %04x\n", levels[level], val); for (i = 0; i < sizedwords;) { uint32_t gpuaddr, flags; parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf); void *addr = hostptr(gpuaddr); if (addr) { const char *fmt = rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf); uint32_t size = dwords[i++]; printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr, size, fmt); // TODO maybe dump these as bytes instead of dwords? size = (size + 3) / 4; // for now convert to dwords dump_hex(addr, min(size, 64), level + 1); if (size > min(size, 64)) printf("%s\t\t...\n", levels[level + 1]); dump_float(addr, min(size, 64), level + 1); if (size > min(size, 64)) printf("%s\t\t...\n", levels[level + 1]); } } } static void cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t val = dwords[0] & 0xffff; switch ((dwords[0] >> 16) & 0xf) { case 0x0: dump_float((float *)(dwords + 1), sizedwords - 1, level + 1); break; case 0x1: /* need to figure out how const space is partitioned between * attributes, textures, etc.. */ if (val < 0x78) { dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level); } else { dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level); } break; case 0x2: printf("%sset bool const %04x\n", levels[level], val); break; case 0x3: printf("%sset loop const %04x\n", levels[level], val); break; case 0x4: val += 0x2000; if (dwords[0] & 0x80000000) { uint32_t srcreg = dwords[1]; uint32_t dstval = dwords[2]; /* TODO: not sure what happens w/ payload != 2.. */ assert(sizedwords == 3); assert(srcreg < ARRAY_SIZE(type0_reg_vals)); /* note: rnn_regname uses a static buf so we can't do * two regname() calls for one printf.. */ printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval); printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]); dstval += type0_reg_vals[srcreg]; dump_registers(val, &dstval, 1, level + 1); } else { dump_registers(val, dwords + 1, sizedwords - 1, level + 1); } break; } } static void dump_register_summary(int level); static void cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level) { const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]); printl(2, "%sevent %s\n", levels[level], name); if (name && (options->gpu_id > 500)) { char eventname[64]; snprintf(eventname, sizeof(eventname), "EVENT:%s", name); if (!strcmp(name, "BLIT")) { do_query(eventname, 0); print_mode(level); dump_register_summary(level); } } } static void dump_register_summary(int level) { uint32_t i; bool saved_summary = summary; summary = false; in_summary = true; /* dump current state of registers: */ printl(2, "%sdraw[%i] register values\n", levels[level], draw_count); for (i = 0; i < regcnt(); i++) { uint32_t regbase = i; uint32_t lastval = reg_val(regbase); /* skip registers that haven't been updated since last draw/blit: */ if (!(options->allregs || reg_rewritten(regbase))) continue; if (!reg_written(regbase)) continue; if (lastval != lastvals[regbase]) { printl(2, "!"); lastvals[regbase] = lastval; } else { printl(2, " "); } if (reg_rewritten(regbase)) { printl(2, "+"); } else { printl(2, " "); } printl(2, "\t%08x", lastval); if (!quiet(2)) { dump_register(regbase, lastval, level); } } clear_rewritten(); in_summary = false; draw_count++; summary = saved_summary; } static uint32_t draw_indx_common(uint32_t *dwords, int level) { uint32_t prim_type = dwords[1] & 0x1f; uint32_t source_select = (dwords[1] >> 6) & 0x3; uint32_t num_indices = dwords[2]; const char *primtype; primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type); do_query(primtype, num_indices); printl(2, "%sdraw: %d\n", levels[level], draws[ib]); printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type); printl(2, "%ssource_select: %s (%d)\n", levels[level], rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select); printl(2, "%snum_indices: %d\n", levels[level], num_indices); vertices += num_indices; draws[ib]++; return num_indices; } enum pc_di_index_size { INDEX_SIZE_IGN = 0, INDEX_SIZE_16_BIT = 0, INDEX_SIZE_32_BIT = 1, INDEX_SIZE_8_BIT = 2, INDEX_SIZE_INVALID = 0, }; static void cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t num_indices = draw_indx_common(dwords, level); assert(!is_64b()); /* if we have an index buffer, dump that: */ if (sizedwords == 5) { void *ptr = hostptr(dwords[3]); printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]); printl(2, "%sidx_size: %d\n", levels[level], dwords[4]); if (ptr) { enum pc_di_index_size size = ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2); if (!quiet(2)) { int i; printf("%sidxs: ", levels[level]); if (size == INDEX_SIZE_8_BIT) { uint8_t *idx = ptr; for (i = 0; i < dwords[4]; i++) printf(" %u", idx[i]); } else if (size == INDEX_SIZE_16_BIT) { uint16_t *idx = ptr; for (i = 0; i < dwords[4] / 2; i++) printf(" %u", idx[i]); } else if (size == INDEX_SIZE_32_BIT) { uint32_t *idx = ptr; for (i = 0; i < dwords[4] / 4; i++) printf(" %u", idx[i]); } printf("\n"); dump_hex(ptr, dwords[4] / 4, level + 1); } } } /* don't bother dumping registers for the dummy draw_indx's.. */ if (num_indices > 0) dump_register_summary(level); needs_wfi = true; } static void cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t num_indices = draw_indx_common(dwords, level); enum pc_di_index_size size = ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2); void *ptr = &dwords[3]; int sz = 0; assert(!is_64b()); /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */ if (!quiet(2)) { int i; printf("%sidxs: ", levels[level]); if (size == INDEX_SIZE_8_BIT) { uint8_t *idx = ptr; for (i = 0; i < num_indices; i++) printf(" %u", idx[i]); sz = num_indices; } else if (size == INDEX_SIZE_16_BIT) { uint16_t *idx = ptr; for (i = 0; i < num_indices; i++) printf(" %u", idx[i]); sz = num_indices * 2; } else if (size == INDEX_SIZE_32_BIT) { uint32_t *idx = ptr; for (i = 0; i < num_indices; i++) printf(" %u", idx[i]); sz = num_indices * 4; } printf("\n"); dump_hex(ptr, sz / 4, level + 1); } /* don't bother dumping registers for the dummy draw_indx's.. */ if (num_indices > 0) dump_register_summary(level); } static void cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t num_indices = dwords[2]; uint32_t prim_type = dwords[0] & 0x1f; do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices); print_mode(level); /* don't bother dumping registers for the dummy draw_indx's.. */ if (num_indices > 0) dump_register_summary(level); } static void cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t prim_type = dwords[0] & 0x1f; uint64_t addr; do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0); print_mode(level); if (is_64b()) addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; else addr = dwords[1]; dump_gpuaddr_size(addr, level, 0x10, 2); if (is_64b()) addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4]; else addr = dwords[3]; dump_gpuaddr_size(addr, level, 0x10, 2); dump_register_summary(level); } static void cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t prim_type = dwords[0] & 0x1f; uint64_t addr; do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0); print_mode(level); addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; dump_gpuaddr_size(addr, level, 0x10, 2); dump_register_summary(level); } static void cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t prim_type = dwords[0] & 0x1f; uint32_t count = dwords[2]; do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0); print_mode(level); struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI"); uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT"); uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT"); uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE"); if (count_dword) { uint64_t count_addr = ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword]; uint32_t *buf = hostptr(count_addr); /* Don't print more draws than this if we don't know the indirect * count. It's possible the user will give ~0 or some other large * value, expecting the GPU to fill in the draw count, and we don't * want to print a gazillion draws in that case: */ const uint32_t max_draw_count = 0x100; /* Assume the indirect count is garbage if it's larger than this * (quite large) value or 0. Hopefully this catches most cases. */ const uint32_t max_indirect_draw_count = 0x10000; if (buf) { printf("%sindirect count: %u\n", levels[level], *buf); if (*buf == 0 || *buf > max_indirect_draw_count) { /* garbage value */ count = min(count, max_draw_count); } else { /* not garbage */ count = min(count, *buf); } } else { count = min(count, max_draw_count); } } if (addr_dword && stride_dword) { uint64_t addr = ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword]; uint32_t stride = dwords[stride_dword]; for (unsigned i = 0; i < count; i++, addr += stride) { printf("%sdraw %d:\n", levels[level], i); dump_gpuaddr_size(addr, level, 0x10, 2); } } dump_register_summary(level); } static void cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level) { do_query("COMPUTE", 1); dump_register_summary(level); } static void cp_nop(uint32_t *dwords, uint32_t sizedwords, int level) { const char *buf = (void *)dwords; int i; if (quiet(3)) return; // blob doesn't use CP_NOP for string_marker but it does // use it for things that end up looking like, but aren't // ascii chars: if (!options->decode_markers) return; for (i = 0; i < 4 * sizedwords; i++) { if (buf[i] == '\0') break; if (isascii(buf[i])) printf("%c", buf[i]); } printf("\n"); } static void cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level) { /* traverse indirect buffers */ uint64_t ibaddr; uint32_t ibsize; uint32_t *ptr = NULL; if (is_64b()) { /* a5xx+.. high 32b of gpu addr, then size: */ ibaddr = dwords[0]; ibaddr |= ((uint64_t)dwords[1]) << 32; ibsize = dwords[2]; } else { ibaddr = dwords[0]; ibsize = dwords[1]; } if (!quiet(3)) { if (is_64b()) { printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr); } else { printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr); } printf("%sibsize:%08x\n", levels[level], ibsize); } if (options->once && has_dumped(ibaddr, enable_mask)) return; /* 'query-compare' mode implies 'once' mode, although we need only to * process the cmdstream for *any* enable_mask mode, since we are * comparing binning vs draw reg values at the same time, ie. it is * not useful to process the same draw in both binning and draw pass. */ if (options->query_compare && has_dumped(ibaddr, MODE_ALL)) return; /* map gpuaddr back to hostptr: */ ptr = hostptr(ibaddr); if (ptr) { /* If the GPU hung within the target IB, the trigger point will be * just after the current CP_INDIRECT_BUFFER. Because the IB is * executed but never returns. Account for this by checking if * the IB returned: */ highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2])); ib++; ibs[ib].base = ibaddr; ibs[ib].size = ibsize; dump_commands(ptr, ibsize, level); ib--; } else { fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize); } } static void cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level) { uint64_t ibaddr; uint32_t ibsize; uint32_t loopcount; uint32_t *ptr = NULL; loopcount = dwords[0]; ibaddr = dwords[1]; ibaddr |= ((uint64_t)dwords[2]) << 32; ibsize = dwords[3]; /* map gpuaddr back to hostptr: */ ptr = hostptr(ibaddr); if (ptr) { /* If the GPU hung within the target IB, the trigger point will be * just after the current CP_START_BIN. Because the IB is * executed but never returns. Account for this by checking if * the IB returned: */ highlight_gpuaddr(gpuaddr(&dwords[5])); /* TODO: we should duplicate the body of the loop after each bin, so * that draws get the correct state. We should also figure out if there * are any registers that can tell us what bin we're in when we hang so * that crashdec points to the right place. */ ib++; for (uint32_t i = 0; i < loopcount; i++) { ibs[ib].base = ibaddr; ibs[ib].size = ibsize; printf("%sbin %u\n", levels[level], i); dump_commands(ptr, ibsize, level); ibaddr += ibsize; ptr += ibsize; } ib--; } else { fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize); } } static void cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level) { needs_wfi = false; } static void cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level) { if (quiet(2)) return; if (is_64b()) { uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32); printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr); dump_hex(&dwords[2], sizedwords - 2, level + 1); if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2])) dump_commands(&dwords[2], sizedwords - 2, level + 1); } else { uint32_t gpuaddr = dwords[0]; printf("%sgpuaddr:%08x\n", levels[level], gpuaddr); dump_float((float *)&dwords[1], sizedwords - 1, level + 1); } } static void cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t val = dwords[0] & 0xffff; uint32_t and = dwords[1]; uint32_t or = dwords[2]; printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or); if (needs_wfi) printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or); reg_set(val, (reg_val(val) & and) | or); } static void cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t val = dwords[0] & 0xffff; printl(3, "%sbase register: %s\n", levels[level], regname(val, 1)); if (quiet(2)) return; uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32); printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr); void *ptr = hostptr(gpuaddr); if (ptr) { uint32_t cnt = (dwords[0] >> 19) & 0x3ff; dump_hex(ptr, cnt, level + 1); } } struct draw_state { uint16_t enable_mask; uint16_t flags; uint32_t count; uint64_t addr; }; struct draw_state state[32]; #define FLAG_DIRTY 0x1 #define FLAG_DISABLE 0x2 #define FLAG_DISABLE_ALL_GROUPS 0x4 #define FLAG_LOAD_IMMED 0x8 static int draw_mode; static void disable_group(unsigned group_id) { struct draw_state *ds = &state[group_id]; memset(ds, 0, sizeof(*ds)); } static void disable_all_groups(void) { for (unsigned i = 0; i < ARRAY_SIZE(state); i++) disable_group(i); } static void load_group(unsigned group_id, int level) { struct draw_state *ds = &state[group_id]; if (!ds->count) return; printl(2, "%sgroup_id: %u\n", levels[level], group_id); printl(2, "%scount: %d\n", levels[level], ds->count); printl(2, "%saddr: %016llx\n", levels[level], ds->addr); printl(2, "%sflags: %x\n", levels[level], ds->flags); if (options->gpu_id >= 600) { printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask); if (!(ds->enable_mask & enable_mask)) { printl(2, "%s\tskipped!\n\n", levels[level]); return; } } void *ptr = hostptr(ds->addr); if (ptr) { if (!quiet(2)) dump_hex(ptr, ds->count, level + 1); ib++; dump_commands(ptr, ds->count, level + 1); ib--; } } static void load_all_groups(int level) { /* sanity check, we should never recursively hit recursion here, and if * we do bad things happen: */ static bool loading_groups = false; if (loading_groups) { printf("ERROR: nothing in draw state should trigger recursively loading " "groups!\n"); return; } loading_groups = true; for (unsigned i = 0; i < ARRAY_SIZE(state); i++) load_group(i, level); loading_groups = false; /* in 'query-compare' mode, defer disabling all groups until we have a * chance to process the query: */ if (!options->query_compare) disable_all_groups(); } static void cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t i; for (i = 0; i < sizedwords;) { struct draw_state *ds; uint32_t count = dwords[i] & 0xffff; uint32_t group_id = (dwords[i] >> 24) & 0x1f; uint32_t enable_mask = (dwords[i] >> 20) & 0xf; uint32_t flags = (dwords[i] >> 16) & 0xf; uint64_t addr; if (is_64b()) { addr = dwords[i + 1]; addr |= ((uint64_t)dwords[i + 2]) << 32; i += 3; } else { addr = dwords[i + 1]; i += 2; } if (flags & FLAG_DISABLE_ALL_GROUPS) { disable_all_groups(); continue; } if (flags & FLAG_DISABLE) { disable_group(group_id); continue; } assert(group_id < ARRAY_SIZE(state)); disable_group(group_id); ds = &state[group_id]; ds->enable_mask = enable_mask; ds->flags = flags; ds->count = count; ds->addr = addr; if (flags & FLAG_LOAD_IMMED) { load_group(group_id, level); disable_group(group_id); } } } static void cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level) { draw_mode = dwords[0]; } /* execute compute shader */ static void cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level) { do_query("compute", 0); dump_register_summary(level); } static void cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level) { uint64_t addr; if (is_64b()) { addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; } else { addr = dwords[1]; } printl(3, "%saddr: %016llx\n", levels[level], addr); dump_gpuaddr_size(addr, level, 0x10, 2); do_query("compute", 0); dump_register_summary(level); } static void cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level) { render_mode = rnn_enumname(rnn, "a6xx_marker", dwords[0] & 0xf); if (!strcmp(render_mode, "RM6_BINNING")) { enable_mask = MODE_BINNING; } else if (!strcmp(render_mode, "RM6_GMEM")) { enable_mask = MODE_GMEM; } else if (!strcmp(render_mode, "RM6_BYPASS")) { enable_mask = MODE_BYPASS; } } static void cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level) { uint64_t addr; uint32_t *ptr, len; assert(is_64b()); /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr).. * not sure if this can come in different sizes. * * First ptr doesn't seem to be cmdstream, second one does. * * Comment from downstream kernel: * * SRM -- set render mode (ex binning, direct render etc) * SRM is set by UMD usually at start of IB to tell CP the type of * preemption. * KMD needs to set SRM to NULL to indicate CP that rendering is * done by IB. * ------------------------------------------------------------------ * * Seems to always be one of these two: * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d * 001c2000 00000000 * */ assert(options->gpu_id >= 500); render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]); if (sizedwords == 1) return; addr = dwords[1]; addr |= ((uint64_t)dwords[2]) << 32; mode = dwords[3]; dump_gpuaddr(addr, level + 1); if (sizedwords == 5) return; assert(sizedwords == 8); len = dwords[5]; addr = dwords[6]; addr |= ((uint64_t)dwords[7]) << 32; printl(3, "%saddr: 0x%016lx\n", levels[level], addr); printl(3, "%slen: 0x%x\n", levels[level], len); ptr = hostptr(addr); if (ptr) { if (!quiet(2)) { ib++; dump_commands(ptr, len, level + 1); ib--; dump_hex(ptr, len, level + 1); } } } static void cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level) { uint64_t addr; uint32_t *ptr, len; assert(is_64b()); assert(options->gpu_id >= 500); assert(sizedwords == 8); addr = dwords[5]; addr |= ((uint64_t)dwords[6]) << 32; len = dwords[7]; printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr); printl(3, "%slen: 0x%x\n", levels[level], len); ptr = hostptr(addr); if (ptr) { if (!quiet(2)) { ib++; dump_commands(ptr, len, level + 1); ib--; dump_hex(ptr, len, level + 1); } } } static void cp_blit(uint32_t *dwords, uint32_t sizedwords, int level) { do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0); print_mode(level); dump_register_summary(level); } static void cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level) { int i; /* NOTE: seems to write same reg multiple times.. not sure if different parts * of these are triggered by the FLUSH_SO_n events?? (if that is what they * actually are?) */ bool saved_summary = summary; summary = false; for (i = 0; i < sizedwords; i += 2) { dump_register(dwords[i + 0], dwords[i + 1], level + 1); reg_set(dwords[i + 0], dwords[i + 1]); } summary = saved_summary; } static void cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level) { uint32_t reg = dwords[1] & 0xffff; dump_register(reg, dwords[2], level + 1); reg_set(reg, dwords[2]); } static void cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level) { uint64_t addr; uint32_t size = dwords[2] & 0xffff; void *ptr; addr = dwords[0] | ((uint64_t)dwords[1] << 32); if (!quiet(3)) { printf("%saddr=%" PRIx64 "\n", levels[level], addr); } ptr = hostptr(addr); if (ptr) { dump_commands(ptr, size, level + 1); } } static void cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level) { skip_ib2_enable_global = dwords[0]; } static void cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level) { skip_ib2_enable_local = dwords[0]; } #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ } static const struct type3_op { const char *name; void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level); struct { bool load_all_groups; } options; } type3_op[] = { CP(NOP, cp_nop), CP(INDIRECT_BUFFER, cp_indirect), CP(INDIRECT_BUFFER_PFD, cp_indirect), CP(WAIT_FOR_IDLE, cp_wfi), CP(REG_RMW, cp_rmw), CP(REG_TO_MEM, cp_reg_mem), CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */ CP(MEM_WRITE, cp_mem_write), CP(EVENT_WRITE, cp_event_write), CP(RUN_OPENCL, cp_run_cl), CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}), CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}), CP(SET_CONSTANT, cp_set_const), CP(IM_LOAD_IMMEDIATE, cp_im_loadi), CP(WIDE_REG_WRITE, cp_wide_reg_write), /* for a3xx */ CP(LOAD_STATE, cp_load_state), CP(SET_BIN, cp_set_bin), /* for a4xx */ CP(LOAD_STATE4, cp_load_state), CP(SET_DRAW_STATE, cp_set_draw_state), CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}), CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}), CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}), /* for a5xx */ CP(SET_RENDER_MODE, cp_set_render_mode), CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint), CP(BLIT, cp_blit), CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch), CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}), CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}), CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}), CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global), CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local), /* for a6xx */ CP(LOAD_STATE6_GEOM, cp_load_state), CP(LOAD_STATE6_FRAG, cp_load_state), CP(LOAD_STATE6, cp_load_state), CP(SET_MODE, cp_set_mode), CP(SET_MARKER, cp_set_marker), CP(REG_WRITE, cp_reg_write), CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib), CP(START_BIN, cp_start_bin), }; static void noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level) { } static const struct type3_op * get_type3_op(unsigned opc) { static const struct type3_op dummy_op = { .fxn = noop_fxn, }; const char *name = pktname(opc); if (!name) return &dummy_op; for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++) if (!strcmp(name, type3_op[i].name)) return &type3_op[i]; return &dummy_op; } void dump_commands(uint32_t *dwords, uint32_t sizedwords, int level) { int dwords_left = sizedwords; uint32_t count = 0; /* dword count including packet header */ uint32_t val; // assert(dwords); if (!dwords) { printf("NULL cmd buffer!\n"); return; } assert(ib < ARRAY_SIZE(draws)); draws[ib] = 0; while (dwords_left > 0) { current_draw_count = draw_count; /* hack, this looks like a -1 underflow, in some versions * when it tries to write zero registers via pkt0 */ // if ((dwords[0] >> 16) == 0xffff) // goto skip; if (pkt_is_type0(dwords[0])) { printl(3, "t0"); count = type0_pkt_size(dwords[0]) + 1; val = type0_pkt_offset(dwords[0]); assert(val < regcnt()); printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1), (dwords[0] & 0x8000) ? " (same register)" : "", val); dump_registers(val, dwords + 1, count - 1, level + 2); if (!quiet(3)) dump_hex(dwords, count, level + 1); } else if (pkt_is_type4(dwords[0])) { /* basically the same(ish) as type0 prior to a5xx */ printl(3, "t4"); count = type4_pkt_size(dwords[0]) + 1; val = type4_pkt_offset(dwords[0]); assert(val < regcnt()); printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1), val); dump_registers(val, dwords + 1, count - 1, level + 2); if (!quiet(3)) dump_hex(dwords, count, level + 1); #if 0 } else if (pkt_is_type1(dwords[0])) { printl(3, "t1"); count = 3; val = dwords[0] & 0xfff; printl(3, "%swrite %s\n", levels[level+1], regname(val, 1)); dump_registers(val, dwords+1, 1, level+2); val = (dwords[0] >> 12) & 0xfff; printl(3, "%swrite %s\n", levels[level+1], regname(val, 1)); dump_registers(val, dwords+2, 1, level+2); if (!quiet(3)) dump_hex(dwords, count, level+1); } else if (pkt_is_type2(dwords[0])) { printl(3, "t2"); printf("%sNOP\n", levels[level+1]); count = 1; if (!quiet(3)) dump_hex(dwords, count, level+1); #endif } else if (pkt_is_type3(dwords[0])) { count = type3_pkt_size(dwords[0]) + 1; val = cp_type3_opcode(dwords[0]); const struct type3_op *op = get_type3_op(val); if (op->options.load_all_groups) load_all_groups(level + 1); printl(3, "t3"); const char *name = pktname(val); if (!quiet(2)) { printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level], rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val, count, (dwords[0] & 0x1) ? " (predicated)" : ""); } if (name) dump_domain(dwords + 1, count - 1, level + 2, name); op->fxn(dwords + 1, count - 1, level + 1); if (!quiet(2)) dump_hex(dwords, count, level + 1); } else if (pkt_is_type7(dwords[0])) { count = type7_pkt_size(dwords[0]) + 1; val = cp_type7_opcode(dwords[0]); const struct type3_op *op = get_type3_op(val); if (op->options.load_all_groups) load_all_groups(level + 1); printl(3, "t7"); const char *name = pktname(val); if (!quiet(2)) { printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level], rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val, count); } if (name) { /* special hack for two packets that decode the same way * on a6xx: */ if (!strcmp(name, "CP_LOAD_STATE6_FRAG") || !strcmp(name, "CP_LOAD_STATE6_GEOM")) name = "CP_LOAD_STATE6"; dump_domain(dwords + 1, count - 1, level + 2, name); } op->fxn(dwords + 1, count - 1, level + 1); if (!quiet(2)) dump_hex(dwords, count, level + 1); } else if (pkt_is_type2(dwords[0])) { printl(3, "t2"); printl(3, "%snop\n", levels[level + 1]); } else { /* for 5xx+ we can do a passable job of looking for start of next valid * packet: */ if (options->gpu_id >= 500) { while (dwords_left > 0) { if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0])) break; printf("bad type! %08x\n", dwords[0]); dwords++; dwords_left--; } } else { printf("bad type! %08x\n", dwords[0]); return; } } dwords += count; dwords_left -= count; } if (dwords_left < 0) printf("**** this ain't right!! dwords_left=%d\n", dwords_left); }