/* * Copyright (c) 2017 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include "util/os_file.h" #include "freedreno_pm4.h" #include "afuc.h" #include "util.h" #include "emu.h" static int gpuver; /* non-verbose mode should output something suitable to feed back into * assembler.. verbose mode has additional output useful for debugging * (like unexpected bits that are set) */ static bool verbose = false; /* emulator mode: */ static bool emulator = false; static void print_gpu_reg(uint32_t regbase) { if (regbase < 0x100) return; char *name = afuc_gpu_reg_name(regbase); if (name) { printf("\t; %s", name); free(name); } } #define printerr(fmt, ...) afuc_printc(AFUC_ERR, fmt, ##__VA_ARGS__) #define printlbl(fmt, ...) afuc_printc(AFUC_LBL, fmt, ##__VA_ARGS__) void print_src(unsigned reg) { if (reg == REG_REM) printf("$rem"); /* remainding dwords in packet */ else if (reg == REG_MEMDATA) printf("$memdata"); else if (reg == REG_REGDATA) printf("$regdata"); else if (reg == REG_DATA) printf("$data"); else printf("$%02x", reg); } void print_dst(unsigned reg) { if (reg == REG_REM) printf("$rem"); /* remainding dwords in packet */ else if (reg == REG_ADDR) printf("$addr"); else if (reg == REG_USRADDR) printf("$usraddr"); else if (reg == REG_DATA) printf("$data"); else printf("$%02x", reg); } static void print_alu_name(afuc_opc opc, uint32_t instr) { if (opc == OPC_ADD) { printf("add "); } else if (opc == OPC_ADDHI) { printf("addhi "); } else if (opc == OPC_SUB) { printf("sub "); } else if (opc == OPC_SUBHI) { printf("subhi "); } else if (opc == OPC_AND) { printf("and "); } else if (opc == OPC_OR) { printf("or "); } else if (opc == OPC_XOR) { printf("xor "); } else if (opc == OPC_NOT) { printf("not "); } else if (opc == OPC_SHL) { printf("shl "); } else if (opc == OPC_USHR) { printf("ushr "); } else if (opc == OPC_ISHR) { printf("ishr "); } else if (opc == OPC_ROT) { printf("rot "); } else if (opc == OPC_MUL8) { printf("mul8 "); } else if (opc == OPC_MIN) { printf("min "); } else if (opc == OPC_MAX) { printf("max "); } else if (opc == OPC_CMP) { printf("cmp "); } else if (opc == OPC_MSB) { printf("msb "); } else { printerr("[%08x]", instr); printf(" ; alu%02x ", opc); } } static const char * getpm4(uint32_t id) { return afuc_pm_id_name(id); } static struct { uint32_t offset; uint32_t num_jump_labels; uint32_t jump_labels[256]; } jump_labels[1024]; int num_jump_labels; static void add_jump_table_entry(uint32_t n, uint32_t offset) { int i; if (n > 128) /* can't possibly be a PM4 PKT3.. */ return; for (i = 0; i < num_jump_labels; i++) if (jump_labels[i].offset == offset) goto add_label; num_jump_labels = i + 1; jump_labels[i].offset = offset; jump_labels[i].num_jump_labels = 0; add_label: jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n; assert(jump_labels[i].num_jump_labels < 256); } static int get_jump_table_entry(uint32_t offset) { int i; for (i = 0; i < num_jump_labels; i++) if (jump_labels[i].offset == offset) return i; return -1; } static uint32_t label_offsets[0x512]; static int num_label_offsets; static int label_idx(uint32_t offset, bool create) { int i; for (i = 0; i < num_label_offsets; i++) if (offset == label_offsets[i]) return i; if (!create) return -1; label_offsets[i] = offset; num_label_offsets = i + 1; return i; } static const char * label_name(uint32_t offset, bool allow_jt) { static char name[12]; int lidx; if (allow_jt) { lidx = get_jump_table_entry(offset); if (lidx >= 0) { int j; for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) { uint32_t jump_label = jump_labels[lidx].jump_labels[j]; const char *str = getpm4(jump_label); if (str) return str; } // if we don't find anything w/ known name, maybe we should // return UNKN%d to at least make it clear that this is some // sort of jump-table entry? } } lidx = label_idx(offset, false); if (lidx < 0) return NULL; sprintf(name, "l%03d", lidx); return name; } static uint32_t fxn_offsets[0x512]; static int num_fxn_offsets; static int fxn_idx(uint32_t offset, bool create) { int i; for (i = 0; i < num_fxn_offsets; i++) if (offset == fxn_offsets[i]) return i; if (!create) return -1; fxn_offsets[i] = offset; num_fxn_offsets = i + 1; return i; } static const char * fxn_name(uint32_t offset) { static char name[14]; int fidx = fxn_idx(offset, false); if (fidx < 0) return NULL; sprintf(name, "fxn%02d", fidx); return name; } void print_control_reg(uint32_t id) { char *name = afuc_control_reg_name(id); if (name) { printf("@%s", name); free(name); } else { printf("0x%03x", id); } } void print_pipe_reg(uint32_t id) { char *name = afuc_pipe_reg_name(id); if (name) { printf("|%s", name); free(name); } else { printf("0x%03x", id); } } static void disasm_instr(uint32_t *instrs, unsigned pc) { int jump_label_idx; afuc_instr *instr = (void *)&instrs[pc]; const char *fname, *lname; afuc_opc opc; bool rep; afuc_get_opc(instr, &opc, &rep); lname = label_name(pc, false); fname = fxn_name(pc); jump_label_idx = get_jump_table_entry(pc); if (jump_label_idx >= 0) { int j; printf("\n"); for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) { uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j]; const char *name = getpm4(jump_label); if (name) { printlbl("%s", name); } else { printlbl("UNKN%d", jump_label); } printf(":\n"); } } if (fname) { printlbl("%s", fname); printf(":\n"); } if (lname) { printlbl(" %s", lname); printf(":"); } else { printf(" "); } if (verbose) { printf("\t%04x: %08x ", pc, instrs[pc]); } else { printf(" "); } switch (opc) { case OPC_NOP: { /* a6xx changed the default immediate, and apparently 0 * is illegal now. */ const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0; if (instrs[pc] != nop) { printerr("[%08x]", instrs[pc]); printf(" ; "); } if (rep) printf("(rep)"); printf("nop"); print_gpu_reg(instrs[pc]); break; } case OPC_ADD: case OPC_ADDHI: case OPC_SUB: case OPC_SUBHI: case OPC_AND: case OPC_OR: case OPC_XOR: case OPC_NOT: case OPC_SHL: case OPC_USHR: case OPC_ISHR: case OPC_ROT: case OPC_MUL8: case OPC_MIN: case OPC_MAX: case OPC_CMP: { bool src1 = true; if (opc == OPC_NOT) src1 = false; if (rep) printf("(rep)"); print_alu_name(opc, instrs[pc]); print_dst(instr->alui.dst); printf(", "); if (src1) { print_src(instr->alui.src); printf(", "); } printf("0x%04x", instr->alui.uimm); print_gpu_reg(instr->alui.uimm); /* print out unexpected bits: */ if (verbose) { if (instr->alui.src && !src1) printerr(" (src=%02x)", instr->alui.src); } break; } case OPC_MOVI: { if (rep) printf("(rep)"); printf("mov "); print_dst(instr->movi.dst); printf(", 0x%04x", instr->movi.uimm); if (instr->movi.shift) printf(" << %u", instr->movi.shift); if ((instr->movi.dst == REG_ADDR) && (instr->movi.shift >= 16)) { uint32_t val = (uint32_t)instr->movi.uimm << (uint32_t)instr->movi.shift; val &= ~0x40000; /* b18 seems to be a flag */ if ((val & 0x00ffffff) == 0) { printf("\t; "); print_pipe_reg(val >> 24); break; } } /* using mov w/ << 16 is popular way to construct a pkt7 * header to send (for ex, from PFP to ME), so check that * case first */ if ((instr->movi.shift == 16) && ((instr->movi.uimm & 0xff00) == 0x7000)) { unsigned opc, p; opc = instr->movi.uimm & 0x7f; p = pm4_odd_parity_bit(opc); /* So, you'd think that checking the parity bit would be * a good way to rule out false positives, but seems like * ME doesn't really care.. at least it would filter out * things that look like actual legit packets between * PFP and ME.. */ if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) { const char *name = getpm4(opc); printf("\t; "); if (name) printlbl("%s", name); else printlbl("UNKN%u", opc); break; } } print_gpu_reg((uint32_t)instr->movi.uimm << (uint32_t)instr->movi.shift); break; } case OPC_ALU: { bool src1 = true; if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB) src1 = false; if (instr->alu.pad) printf("[%08x] ; ", instrs[pc]); if (rep) printf("(rep)"); if (instr->alu.xmov) printf("(xmov%d)", instr->alu.xmov); /* special case mnemonics: * reading $00 seems to always yield zero, and so: * or $dst, $00, $src -> mov $dst, $src * Maybe add one for negate too, ie. * sub $dst, $00, $src ??? */ if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) { printf("mov "); src1 = false; } else { print_alu_name(instr->alu.alu, instrs[pc]); } print_dst(instr->alu.dst); if (src1) { printf(", "); print_src(instr->alu.src1); } printf(", "); print_src(instr->alu.src2); /* print out unexpected bits: */ if (verbose) { if (instr->alu.pad) printerr(" (pad=%01x)", instr->alu.pad); if (instr->alu.src1 && !src1) printerr(" (src1=%02x)", instr->alu.src1); } /* xmov is a modifier that makes the processor execute up to 3 * extra mov's after the current instruction. Given an ALU * instruction: * * (xmovN) alu $dst, $src1, $src2 * * In all of the uses in the firmware blob, $dst and $src2 are one * of the "special" registers $data, $addr, $addr2. I've observed * that if $dst isn't "special" then it's replaced with $00 * instead of $data, but I haven't checked what happens if $src2 * isn't "special". Anyway, in the usual case, the HW produces a * count M = min(N, $rem) and then does the following: * * M = 1: * mov $data, $src2 * * M = 2: * mov $data, $src2 * mov $data, $src2 * * M = 3: * mov $data, $src2 * mov $dst, $src2 (special case for CP_CONTEXT_REG_BUNCH) * mov $data, $src2 * * It seems to be frequently used in combination with (rep) to * provide a kind of hardware-based loop unrolling, and there's * even a special case in the ISA to be able to do this with * CP_CONTEXT_REG_BUNCH. However (rep) isn't required. * * This dumps the expected extra instructions, assuming that $rem * isn't too small. */ if (verbose && instr->alu.xmov) { for (int i = 0; i < instr->alu.xmov; i++) { printf("\n ; mov "); if (instr->alu.dst < 0x1d) printf("$00"); else if (instr->alu.xmov == 3 && i == 1) print_dst(instr->alu.dst); else printf("$data"); printf(", "); print_src(instr->alu.src2); } } break; } case OPC_CWRITE6: case OPC_CREAD6: case OPC_STORE6: case OPC_LOAD6: { if (rep) printf("(rep)"); bool is_control_reg = true; bool is_store = true; if (gpuver >= 6) { switch (opc) { case OPC_CWRITE6: printf("cwrite "); break; case OPC_CREAD6: is_store = false; printf("cread "); break; case OPC_STORE6: is_control_reg = false; printf("store "); break; case OPC_LOAD6: is_control_reg = false; is_store = false; printf("load "); break; default: assert(!"unreachable"); } } else { switch (opc) { case OPC_CWRITE5: printf("cwrite "); break; case OPC_CREAD5: is_store = false; printf("cread "); break; default: fprintf(stderr, "A6xx control opcode on A5xx?\n"); exit(1); } } if (is_store) print_src(instr->control.src1); else print_dst(instr->control.src1); printf(", ["); print_src(instr->control.src2); printf(" + "); if (is_control_reg && instr->control.flags != 0x4) print_control_reg(instr->control.uimm); else printf("0x%03x", instr->control.uimm); printf("], 0x%x", instr->control.flags); break; } case OPC_BRNEI: case OPC_BREQI: case OPC_BRNEB: case OPC_BREQB: { unsigned off = pc + instr->br.ioff; assert(!rep); /* Since $00 reads back zero, it can be used as src for * unconditional branches. (This only really makes sense * for the BREQB.. or possible BRNEI if imm==0.) * * If bit=0 then branch is taken if *all* bits are zero. * Otherwise it is taken if bit (bit-1) is clear. * * Note the instruction after a jump/branch is executed * regardless of whether branch is taken, so use nop or * take that into account in code. */ if (instr->br.src || (opc != OPC_BRNEB)) { bool immed = false; if (opc == OPC_BRNEI) { printf("brne "); immed = true; } else if (opc == OPC_BREQI) { printf("breq "); immed = true; } else if (opc == OPC_BRNEB) { printf("brne "); } else if (opc == OPC_BREQB) { printf("breq "); } print_src(instr->br.src); if (immed) { printf(", 0x%x,", instr->br.bit_or_imm); } else { printf(", b%u,", instr->br.bit_or_imm); } } else { printf("jump"); if (verbose && instr->br.bit_or_imm) { printerr(" (src=%03x, bit=%03x) ", instr->br.src, instr->br.bit_or_imm); } } printf(" #"); printlbl("%s", label_name(off, true)); if (verbose) printf(" (#%d, %04x)", instr->br.ioff, off); break; } case OPC_CALL: assert(!rep); printf("call #"); printlbl("%s", fxn_name(instr->call.uoff)); if (verbose) { printf(" (%04x)", instr->call.uoff); if (instr->br.bit_or_imm || instr->br.src) { printerr(" (src=%03x, bit=%03x) ", instr->br.src, instr->br.bit_or_imm); } } break; case OPC_RET: assert(!rep); if (instr->ret.pad) printf("[%08x] ; ", instrs[pc]); if (instr->ret.interrupt) printf("iret"); else printf("ret"); break; case OPC_WIN: assert(!rep); if (instr->waitin.pad) printf("[%08x] ; ", instrs[pc]); printf("waitin"); if (verbose && instr->waitin.pad) printerr(" (pad=%x)", instr->waitin.pad); break; case OPC_PREEMPTLEAVE6: if (gpuver < 6) { printf("[%08x] ; op38", instrs[pc]); } else { printf("preemptleave #"); printlbl("%s", label_name(instr->call.uoff, true)); } break; case OPC_SETSECURE: /* Note: This seems to implicitly read the secure/not-secure state * to set from the low bit of $02, and implicitly jumps to pc + 3 * (i.e. skipping the next two instructions) if it succeeds. We * print these implicit parameters to make reading the disassembly * easier. */ if (instr->pad) printf("[%08x] ; ", instrs[pc]); printf("setsecure $02, #"); printlbl("%s", label_name(pc + 3, true)); break; default: printerr("[%08x]", instrs[pc]); printf(" ; op%02x ", opc); print_dst(instr->alui.dst); printf(", "); print_src(instr->alui.src); print_gpu_reg(instrs[pc] & 0xffff); break; } printf("\n"); } static void setup_packet_table(uint32_t *jmptbl, uint32_t sizedwords) { num_jump_labels = 0; for (unsigned i = 0; i < sizedwords; i++) { unsigned offset = jmptbl[i]; unsigned n = i; // + CP_NOP; add_jump_table_entry(n, offset); } } static void setup_labels(uint32_t *instrs, uint32_t sizedwords) { afuc_opc opc; bool rep; num_label_offsets = 0; for (unsigned i = 0; i < sizedwords; i++) { afuc_instr *instr = (void *)&instrs[i]; afuc_get_opc(instr, &opc, &rep); switch (opc) { case OPC_BRNEI: case OPC_BREQI: case OPC_BRNEB: case OPC_BREQB: label_idx(i + instr->br.ioff, true); break; case OPC_PREEMPTLEAVE6: if (gpuver >= 6) label_idx(instr->call.uoff, true); break; case OPC_CALL: fxn_idx(instr->call.uoff, true); break; case OPC_SETSECURE: /* this implicitly jumps to pc + 3 if successful */ label_idx(i + 3, true); break; default: break; } } } static void disasm(struct emu *emu) { uint32_t sizedwords = emu->sizedwords; uint32_t lpac_offset = 0; EMU_GPU_REG(CP_SQE_INSTR_BASE); EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE); emu_init(emu); #ifdef BOOTSTRAP_DEBUG while (true) { disasm_instr(emu->instrs, emu->gpr_regs.pc); emu_step(emu); } #endif emu_run_bootstrap(emu); /* Figure out if we have LPAC SQE appended: */ if (emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE)) { lpac_offset = emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE) - emu_get_reg64(emu, &CP_SQE_INSTR_BASE); lpac_offset /= 4; sizedwords = lpac_offset; } setup_packet_table(emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); setup_labels(emu->instrs, emu->sizedwords); /* TODO add option to emulate LPAC SQE instead: */ if (emulator) { /* Start from clean slate: */ emu_fini(emu); emu_init(emu); while (true) { disasm_instr(emu->instrs, emu->gpr_regs.pc); emu_step(emu); } } /* print instructions: */ for (int i = 0; i < sizedwords; i++) { disasm_instr(emu->instrs, i); } if (!lpac_offset) return; printf(";\n"); printf("; LPAC microcode:\n"); printf(";\n"); emu_fini(emu); emu->lpac = true; emu->instrs += lpac_offset; emu->sizedwords -= lpac_offset; emu_init(emu); emu_run_bootstrap(emu); setup_packet_table(emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); setup_labels(emu->instrs, emu->sizedwords); /* print instructions: */ for (int i = 0; i < emu->sizedwords; i++) { disasm_instr(emu->instrs, i); } } static void disasm_legacy(uint32_t *buf, int sizedwords) { uint32_t *instrs = buf; const int jmptbl_start = instrs[1] & 0xffff; uint32_t *jmptbl = &buf[jmptbl_start]; int i; /* parse jumptable: */ setup_packet_table(jmptbl, 0x80); /* do a pre-pass to find instructions that are potential branch targets, * and add labels for them: */ setup_labels(instrs, jmptbl_start); /* print instructions: */ for (i = 0; i < jmptbl_start; i++) { disasm_instr(instrs, i); } /* print jumptable: */ if (verbose) { printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"); printf("; JUMP TABLE\n"); for (i = 0; i < 0x7f; i++) { int n = i; // + CP_NOP; uint32_t offset = jmptbl[i]; const char *name = getpm4(n); printf("%3d %02x: ", n, n); printf("%04x", offset); if (name) { printf(" ; %s", name); } else { printf(" ; UNKN%d", n); } printf("\n"); } } } static void usage(void) { fprintf(stderr, "Usage:\n" "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n" "\t\t-g - specify GPU version (5, etc)\n" "\t\t-c - use colors\n" "\t\t-v - verbose output\n" "\t\t-e - emulator mode\n"); exit(2); } int main(int argc, char **argv) { uint32_t *buf; char *file; bool colors = false; uint32_t gpu_id = 0; size_t sz; int c, ret; bool unit_test = false; /* Argument parsing: */ while ((c = getopt(argc, argv, "g:vceu")) != -1) { switch (c) { case 'g': gpu_id = atoi(optarg); break; case 'v': verbose = true; break; case 'c': colors = true; break; case 'e': emulator = true; verbose = true; break; case 'u': unit_test = true; break; default: usage(); } } if (optind >= argc) { fprintf(stderr, "no file specified!\n"); usage(); } file = argv[optind]; /* if gpu version not specified, infer from filename: */ if (!gpu_id) { char *str = strstr(file, "a5"); if (!str) str = strstr(file, "a6"); if (str) gpu_id = atoi(str + 1); } if (gpu_id < 500) { printf("invalid gpu_id: %d\n", gpu_id); return -1; } gpuver = gpu_id / 100; /* a6xx is *mostly* a superset of a5xx, but some opcodes shuffle * around, and behavior of special regs is a bit different. Right * now we only bother to support the a6xx variant. */ if (emulator && (gpuver != 6)) { fprintf(stderr, "Emulator only supported on a6xx!\n"); return 1; } ret = afuc_util_init(gpuver, colors); if (ret < 0) { usage(); } printf("; a%dxx microcode\n", gpuver); buf = (uint32_t *)os_read_file(file, &sz); if (!unit_test) printf("; Disassembling microcode: %s\n", file); printf("; Version: %08x\n\n", buf[1]); if (gpuver < 6) { disasm_legacy(&buf[1], sz / 4 - 1); } else { struct emu emu = { .instrs = &buf[1], .sizedwords = sz / 4 - 1, .gpu_id = gpu_id, }; disasm(&emu); } return 0; }