/* * Copyright (c) 2013 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef INSTR_A3XX_H_ #define INSTR_A3XX_H_ #define PACKED __attribute__((__packed__)) #include #include #include #include /* clang-format off */ void ir3_assert_handler(const char *expr, const char *file, int line, const char *func) __attribute__((weak)) __attribute__((__noreturn__)); /* clang-format on */ /* A wrapper for assert() that allows overriding handling of a failed * assert. This is needed for tools like crashdec which can want to * attempt to disassemble memory that might not actually be valid * instructions. */ #define ir3_assert(expr) \ do { \ if (!(expr)) { \ if (ir3_assert_handler) { \ ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \ } \ assert(expr); \ } \ } while (0) /* size of largest OPC field of all the instruction categories: */ #define NOPC_BITS 6 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) /* clang-format off */ typedef enum { /* category 0: */ OPC_NOP = _OPC(0, 0), OPC_B = _OPC(0, 1), OPC_JUMP = _OPC(0, 2), OPC_CALL = _OPC(0, 3), OPC_RET = _OPC(0, 4), OPC_KILL = _OPC(0, 5), OPC_END = _OPC(0, 6), OPC_EMIT = _OPC(0, 7), OPC_CUT = _OPC(0, 8), OPC_CHMASK = _OPC(0, 9), OPC_CHSH = _OPC(0, 10), OPC_FLOW_REV = _OPC(0, 11), OPC_BKT = _OPC(0, 16), OPC_STKS = _OPC(0, 17), OPC_STKR = _OPC(0, 18), OPC_XSET = _OPC(0, 19), OPC_XCLR = _OPC(0, 20), OPC_GETONE = _OPC(0, 21), OPC_DBG = _OPC(0, 22), OPC_SHPS = _OPC(0, 23), /* shader prologue start */ OPC_SHPE = _OPC(0, 24), /* shader prologue end */ OPC_PREDT = _OPC(0, 29), /* predicated true */ OPC_PREDF = _OPC(0, 30), /* predicated false */ OPC_PREDE = _OPC(0, 31), /* predicated end */ /* Logical opcodes for different branch instruction variations: */ OPC_BR = _OPC(0, 40), OPC_BRAO = _OPC(0, 41), OPC_BRAA = _OPC(0, 42), OPC_BRAC = _OPC(0, 43), OPC_BANY = _OPC(0, 44), OPC_BALL = _OPC(0, 45), OPC_BRAX = _OPC(0, 46), /* Logical opcode to distinguish kill and demote */ OPC_DEMOTE = _OPC(0, 47), /* category 1: */ OPC_MOV = _OPC(1, 0), OPC_MOVP = _OPC(1, 1), /* swz, gat, sct */ OPC_MOVMSK = _OPC(1, 3), /* Virtual opcodes for instructions differentiated via a "sub-opcode" that * replaces the repeat field: */ OPC_SWZ = _OPC(1, 4), OPC_GAT = _OPC(1, 5), OPC_SCT = _OPC(1, 6), /* Logical opcodes for different variants of mov: */ OPC_MOV_IMMED = _OPC(1, 40), OPC_MOV_CONST = _OPC(1, 41), OPC_MOV_GPR = _OPC(1, 42), OPC_MOV_RELGPR = _OPC(1, 43), OPC_MOV_RELCONST = _OPC(1, 44), /* Macros that expand to an if statement + move */ OPC_BALLOT_MACRO = _OPC(1, 50), OPC_ANY_MACRO = _OPC(1, 51), OPC_ALL_MACRO = _OPC(1, 52), OPC_ELECT_MACRO = _OPC(1, 53), OPC_READ_COND_MACRO = _OPC(1, 54), OPC_READ_FIRST_MACRO = _OPC(1, 55), OPC_SWZ_SHARED_MACRO = _OPC(1, 56), /* category 2: */ OPC_ADD_F = _OPC(2, 0), OPC_MIN_F = _OPC(2, 1), OPC_MAX_F = _OPC(2, 2), OPC_MUL_F = _OPC(2, 3), OPC_SIGN_F = _OPC(2, 4), OPC_CMPS_F = _OPC(2, 5), OPC_ABSNEG_F = _OPC(2, 6), OPC_CMPV_F = _OPC(2, 7), /* 8 - invalid */ OPC_FLOOR_F = _OPC(2, 9), OPC_CEIL_F = _OPC(2, 10), OPC_RNDNE_F = _OPC(2, 11), OPC_RNDAZ_F = _OPC(2, 12), OPC_TRUNC_F = _OPC(2, 13), /* 14-15 - invalid */ OPC_ADD_U = _OPC(2, 16), OPC_ADD_S = _OPC(2, 17), OPC_SUB_U = _OPC(2, 18), OPC_SUB_S = _OPC(2, 19), OPC_CMPS_U = _OPC(2, 20), OPC_CMPS_S = _OPC(2, 21), OPC_MIN_U = _OPC(2, 22), OPC_MIN_S = _OPC(2, 23), OPC_MAX_U = _OPC(2, 24), OPC_MAX_S = _OPC(2, 25), OPC_ABSNEG_S = _OPC(2, 26), /* 27 - invalid */ OPC_AND_B = _OPC(2, 28), OPC_OR_B = _OPC(2, 29), OPC_NOT_B = _OPC(2, 30), OPC_XOR_B = _OPC(2, 31), /* 32 - invalid */ OPC_CMPV_U = _OPC(2, 33), OPC_CMPV_S = _OPC(2, 34), /* 35-47 - invalid */ OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */ OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */ OPC_MULL_U = _OPC(2, 50), OPC_BFREV_B = _OPC(2, 51), OPC_CLZ_S = _OPC(2, 52), OPC_CLZ_B = _OPC(2, 53), OPC_SHL_B = _OPC(2, 54), OPC_SHR_B = _OPC(2, 55), OPC_ASHR_B = _OPC(2, 56), OPC_BARY_F = _OPC(2, 57), OPC_MGEN_B = _OPC(2, 58), OPC_GETBIT_B = _OPC(2, 59), OPC_SETRM = _OPC(2, 60), OPC_CBITS_B = _OPC(2, 61), OPC_SHB = _OPC(2, 62), OPC_MSAD = _OPC(2, 63), /* category 3: */ OPC_MAD_U16 = _OPC(3, 0), OPC_MADSH_U16 = _OPC(3, 1), OPC_MAD_S16 = _OPC(3, 2), OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ OPC_MAD_U24 = _OPC(3, 4), OPC_MAD_S24 = _OPC(3, 5), OPC_MAD_F16 = _OPC(3, 6), OPC_MAD_F32 = _OPC(3, 7), OPC_SEL_B16 = _OPC(3, 8), OPC_SEL_B32 = _OPC(3, 9), OPC_SEL_S16 = _OPC(3, 10), OPC_SEL_S32 = _OPC(3, 11), OPC_SEL_F16 = _OPC(3, 12), OPC_SEL_F32 = _OPC(3, 13), OPC_SAD_S16 = _OPC(3, 14), OPC_SAD_S32 = _OPC(3, 15), OPC_SHLG_B16 = _OPC(3, 16), /* category 4: */ OPC_RCP = _OPC(4, 0), OPC_RSQ = _OPC(4, 1), OPC_LOG2 = _OPC(4, 2), OPC_EXP2 = _OPC(4, 3), OPC_SIN = _OPC(4, 4), OPC_COS = _OPC(4, 5), OPC_SQRT = _OPC(4, 6), /* NOTE that these are 8+opc from their highp equivs, so it's possible * that the high order bit in the opc field has been repurposed for * half-precision use? But note that other ops (rcp/lsin/cos/sqrt) * still use the same opc as highp */ OPC_HRSQ = _OPC(4, 9), OPC_HLOG2 = _OPC(4, 10), OPC_HEXP2 = _OPC(4, 11), /* category 5: */ OPC_ISAM = _OPC(5, 0), OPC_ISAML = _OPC(5, 1), OPC_ISAMM = _OPC(5, 2), OPC_SAM = _OPC(5, 3), OPC_SAMB = _OPC(5, 4), OPC_SAML = _OPC(5, 5), OPC_SAMGQ = _OPC(5, 6), OPC_GETLOD = _OPC(5, 7), OPC_CONV = _OPC(5, 8), OPC_CONVM = _OPC(5, 9), OPC_GETSIZE = _OPC(5, 10), OPC_GETBUF = _OPC(5, 11), OPC_GETPOS = _OPC(5, 12), OPC_GETINFO = _OPC(5, 13), OPC_DSX = _OPC(5, 14), OPC_DSY = _OPC(5, 15), OPC_GATHER4R = _OPC(5, 16), OPC_GATHER4G = _OPC(5, 17), OPC_GATHER4B = _OPC(5, 18), OPC_GATHER4A = _OPC(5, 19), OPC_SAMGP0 = _OPC(5, 20), OPC_SAMGP1 = _OPC(5, 21), OPC_SAMGP2 = _OPC(5, 22), OPC_SAMGP3 = _OPC(5, 23), OPC_DSXPP_1 = _OPC(5, 24), OPC_DSYPP_1 = _OPC(5, 25), OPC_RGETPOS = _OPC(5, 26), OPC_RGETINFO = _OPC(5, 27), /* cat5 meta instructions, placed above the cat5 opc field's size */ OPC_DSXPP_MACRO = _OPC(5, 32), OPC_DSYPP_MACRO = _OPC(5, 33), /* category 6: */ OPC_LDG = _OPC(6, 0), /* load-global */ OPC_LDL = _OPC(6, 1), OPC_LDP = _OPC(6, 2), OPC_STG = _OPC(6, 3), /* store-global */ OPC_STL = _OPC(6, 4), OPC_STP = _OPC(6, 5), OPC_LDIB = _OPC(6, 6), OPC_G2L = _OPC(6, 7), OPC_L2G = _OPC(6, 8), OPC_PREFETCH = _OPC(6, 9), OPC_LDLW = _OPC(6, 10), OPC_STLW = _OPC(6, 11), OPC_RESFMT = _OPC(6, 14), OPC_RESINFO = _OPC(6, 15), OPC_ATOMIC_ADD = _OPC(6, 16), OPC_ATOMIC_SUB = _OPC(6, 17), OPC_ATOMIC_XCHG = _OPC(6, 18), OPC_ATOMIC_INC = _OPC(6, 19), OPC_ATOMIC_DEC = _OPC(6, 20), OPC_ATOMIC_CMPXCHG = _OPC(6, 21), OPC_ATOMIC_MIN = _OPC(6, 22), OPC_ATOMIC_MAX = _OPC(6, 23), OPC_ATOMIC_AND = _OPC(6, 24), OPC_ATOMIC_OR = _OPC(6, 25), OPC_ATOMIC_XOR = _OPC(6, 26), OPC_LDGB = _OPC(6, 27), OPC_STGB = _OPC(6, 28), OPC_STIB = _OPC(6, 29), OPC_LDC = _OPC(6, 30), OPC_LDLV = _OPC(6, 31), OPC_PIPR = _OPC(6, 32), /* ??? */ OPC_PIPC = _OPC(6, 33), /* ??? */ OPC_EMIT2 = _OPC(6, 34), /* ??? */ OPC_ENDLS = _OPC(6, 35), /* ??? */ OPC_GETSPID = _OPC(6, 36), /* SP ID */ OPC_GETWID = _OPC(6, 37), /* wavefront ID */ /* Logical opcodes for things that differ in a6xx+ */ OPC_STC = _OPC(6, 40), OPC_RESINFO_B = _OPC(6, 41), OPC_LDIB_B = _OPC(6, 42), OPC_STIB_B = _OPC(6, 43), /* Logical opcodes for different atomic instruction variations: */ OPC_ATOMIC_B_ADD = _OPC(6, 44), OPC_ATOMIC_B_SUB = _OPC(6, 45), OPC_ATOMIC_B_XCHG = _OPC(6, 46), OPC_ATOMIC_B_INC = _OPC(6, 47), OPC_ATOMIC_B_DEC = _OPC(6, 48), OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49), OPC_ATOMIC_B_MIN = _OPC(6, 50), OPC_ATOMIC_B_MAX = _OPC(6, 51), OPC_ATOMIC_B_AND = _OPC(6, 52), OPC_ATOMIC_B_OR = _OPC(6, 53), OPC_ATOMIC_B_XOR = _OPC(6, 54), OPC_LDG_A = _OPC(6, 55), OPC_STG_A = _OPC(6, 56), OPC_SPILL_MACRO = _OPC(6, 57), OPC_RELOAD_MACRO = _OPC(6, 58), /* category 7: */ OPC_BAR = _OPC(7, 0), OPC_FENCE = _OPC(7, 1), /* meta instructions (category -1): */ /* placeholder instr to mark shader inputs: */ OPC_META_INPUT = _OPC(-1, 0), /* The "collect" and "split" instructions are used for keeping * track of instructions that write to multiple dst registers * (split) like texture sample instructions, or read multiple * consecutive scalar registers (collect) (bary.f, texture samp) * * A "split" extracts a scalar component from a vecN, and a * "collect" gathers multiple scalar components into a vecN */ OPC_META_SPLIT = _OPC(-1, 2), OPC_META_COLLECT = _OPC(-1, 3), /* placeholder for texture fetches that run before FS invocation * starts: */ OPC_META_TEX_PREFETCH = _OPC(-1, 4), /* Parallel copies have multiple destinations, and copy each destination * to its corresponding source. This happens "in parallel," meaning that * it happens as-if every source is read first and then every destination * is stored. These are produced in RA when register shuffling is * required, and then lowered away immediately afterwards. */ OPC_META_PARALLEL_COPY = _OPC(-1, 5), OPC_META_PHI = _OPC(-1, 6), } opc_t; /* clang-format on */ #define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) const char *disasm_a3xx_instr_name(opc_t opc); typedef enum { TYPE_F16 = 0, TYPE_F32 = 1, TYPE_U16 = 2, TYPE_U32 = 3, TYPE_S16 = 4, TYPE_S32 = 5, TYPE_U8 = 6, TYPE_S8 = 7, // XXX I assume? } type_t; static inline uint32_t type_size(type_t type) { switch (type) { case TYPE_F32: case TYPE_U32: case TYPE_S32: return 32; case TYPE_F16: case TYPE_U16: case TYPE_S16: return 16; case TYPE_U8: case TYPE_S8: return 8; default: ir3_assert(0); /* invalid type */ return 0; } } static inline int type_float(type_t type) { return (type == TYPE_F32) || (type == TYPE_F16); } static inline int type_uint(type_t type) { return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); } static inline int type_sint(type_t type) { return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); } typedef enum { ROUND_ZERO = 0, ROUND_EVEN = 1, ROUND_POS_INF = 2, ROUND_NEG_INF = 3, } round_t; /* comp: * 0 - x * 1 - y * 2 - z * 3 - w */ static inline uint32_t regid(int num, int comp) { return (num << 2) | (comp & 0x3); } #define INVALID_REG regid(63, 0) #define VALIDREG(r) ((r) != INVALID_REG) #define CONDREG(r, val) COND(VALIDREG(r), (val)) /* special registers: */ #define REG_A0 61 /* address register */ #define REG_P0 62 /* predicate register */ typedef enum { BRANCH_PLAIN = 0, /* br */ BRANCH_OR = 1, /* brao */ BRANCH_AND = 2, /* braa */ BRANCH_CONST = 3, /* brac */ BRANCH_ANY = 4, /* bany */ BRANCH_ALL = 5, /* ball */ BRANCH_X = 6, /* brax ??? */ } brtype_t; /* With is_bindless_s2en = 1, this determines whether bindless is enabled and * if so, how to get the (base, index) pair for both sampler and texture. * There is a single base embedded in the instruction, which is always used * for the texture. */ typedef enum { /* Use traditional GL binding model, get texture and sampler index * from src3 which is not presumed to be uniform. This is * backwards-compatible with earlier generations, where this field was * always 0 and nonuniform-indexed sampling always worked. */ CAT5_NONUNIFORM = 0, /* The sampler base comes from the low 3 bits of a1.x, and the sampler * and texture index come from src3 which is presumed to be uniform. */ CAT5_BINDLESS_A1_UNIFORM = 1, /* The texture and sampler share the same base, and the sampler and * texture index come from src3 which is *not* presumed to be uniform. */ CAT5_BINDLESS_NONUNIFORM = 2, /* The sampler base comes from the low 3 bits of a1.x, and the sampler * and texture index come from src3 which is *not* presumed to be * uniform. */ CAT5_BINDLESS_A1_NONUNIFORM = 3, /* Use traditional GL binding model, get texture and sampler index * from src3 which is presumed to be uniform. */ CAT5_UNIFORM = 4, /* The texture and sampler share the same base, and the sampler and * texture index come from src3 which is presumed to be uniform. */ CAT5_BINDLESS_UNIFORM = 5, /* The texture and sampler share the same base, get sampler index from low * 4 bits of src3 and texture index from high 4 bits. */ CAT5_BINDLESS_IMM = 6, /* The sampler base comes from the low 3 bits of a1.x, and the texture * index comes from the next 8 bits of a1.x. The sampler index is an * immediate in src3. */ CAT5_BINDLESS_A1_IMM = 7, } cat5_desc_mode_t; /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded. */ typedef enum { /* Use old GL binding model with an immediate index. */ CAT6_IMM = 0, CAT6_UNIFORM = 1, CAT6_NONUNIFORM = 2, /* Use the bindless model, with an immediate index. */ CAT6_BINDLESS_IMM = 4, /* Use the bindless model, with a uniform register index. */ CAT6_BINDLESS_UNIFORM = 5, /* Use the bindless model, with a register index that isn't guaranteed * to be uniform. This presumably checks if the indices are equal and * splits up the load/store, because it works the way you would * expect. */ CAT6_BINDLESS_NONUNIFORM = 6, } cat6_desc_mode_t; static inline bool is_sat_compatible(opc_t opc) { /* On a6xx saturation doesn't work on cat4 */ if (opc_cat(opc) != 2 && opc_cat(opc) != 3) return false; switch (opc) { /* On a3xx and a6xx saturation doesn't work on bary.f */ case OPC_BARY_F: /* On a6xx saturation doesn't work on sel.* */ case OPC_SEL_B16: case OPC_SEL_B32: case OPC_SEL_S16: case OPC_SEL_S32: case OPC_SEL_F16: case OPC_SEL_F32: return false; default: return true; } } static inline bool is_mad(opc_t opc) { switch (opc) { case OPC_MAD_U16: case OPC_MAD_S16: case OPC_MAD_U24: case OPC_MAD_S24: case OPC_MAD_F16: case OPC_MAD_F32: return true; default: return false; } } static inline bool is_madsh(opc_t opc) { switch (opc) { case OPC_MADSH_U16: case OPC_MADSH_M16: return true; default: return false; } } static inline bool is_atomic(opc_t opc) { switch (opc) { case OPC_ATOMIC_ADD: case OPC_ATOMIC_SUB: case OPC_ATOMIC_XCHG: case OPC_ATOMIC_INC: case OPC_ATOMIC_DEC: case OPC_ATOMIC_CMPXCHG: case OPC_ATOMIC_MIN: case OPC_ATOMIC_MAX: case OPC_ATOMIC_AND: case OPC_ATOMIC_OR: case OPC_ATOMIC_XOR: return true; default: return false; } } static inline bool is_ssbo(opc_t opc) { switch (opc) { case OPC_RESFMT: case OPC_RESINFO: case OPC_LDGB: case OPC_STGB: case OPC_STIB: return true; default: return false; } } static inline bool is_isam(opc_t opc) { switch (opc) { case OPC_ISAM: case OPC_ISAML: case OPC_ISAMM: return true; default: return false; } } static inline bool is_cat2_float(opc_t opc) { switch (opc) { case OPC_ADD_F: case OPC_MIN_F: case OPC_MAX_F: case OPC_MUL_F: case OPC_SIGN_F: case OPC_CMPS_F: case OPC_ABSNEG_F: case OPC_CMPV_F: case OPC_FLOOR_F: case OPC_CEIL_F: case OPC_RNDNE_F: case OPC_RNDAZ_F: case OPC_TRUNC_F: return true; default: return false; } } static inline bool is_cat3_float(opc_t opc) { switch (opc) { case OPC_MAD_F16: case OPC_MAD_F32: case OPC_SEL_F16: case OPC_SEL_F32: return true; default: return false; } } #endif /* INSTR_A3XX_H_ */