#include "brw.h" #define X16 8 #define Y16 10 static void brw_wm_xy(struct brw_compile *p, int dw) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = __retype_uw(r1); struct brw_reg x_uw, y_uw; brw_set_compression_control(p, BRW_COMPRESSION_NONE); if (dw == 16) { x_uw = brw_uw16_grf(30, 0); y_uw = brw_uw16_grf(28, 0); } else { x_uw = brw_uw8_grf(30, 0); y_uw = brw_uw8_grf(28, 0); } brw_ADD(p, x_uw, __stride(__suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); brw_ADD(p, y_uw, __stride(__suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); } static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg) { int uv; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; if (p->gen >= 060) { brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 0), brw_vec8_grf(2, 0)); msg += dw/8; brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 4), brw_vec8_grf(2, 0)); } else { struct brw_reg r = brw_vec1_grf(uv, 0); brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); msg += dw/8; brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); } } static inline unsigned simd(int dw) { return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8; } static inline struct brw_reg sample_result(int dw, int result) { return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0, BRW_REGISTER_TYPE_UW, dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); } static int brw_wm_sample(struct brw_compile *p, int dw, int channel, int msg, int result) { struct brw_reg src0; bool header; int len; len = dw == 16 ? 4 : 2; if (p->gen >= 060) { header = false; src0 = brw_message_reg(++msg); } else { header = true; src0 = brw_vec8_grf(0, 0); } brw_SAMPLE(p, sample_result(dw, result), msg, src0, channel+1, channel, WRITEMASK_XYZW, 0, 2*len, len+header, header, simd(dw)); return result; } static int brw_wm_sample__alpha(struct brw_compile *p, int dw, int channel, int msg, int result) { struct brw_reg src0; int mlen, rlen; if (dw == 8) { /* SIMD8 sample return is not masked */ mlen = 3; rlen = 4; } else { mlen = 5; rlen = 2; } if (p->gen >= 060) src0 = brw_message_reg(msg); else src0 = brw_vec8_grf(0, 0); brw_SAMPLE(p, sample_result(dw, result), msg, src0, channel+1, channel, WRITEMASK_W, 0, rlen, mlen, true, simd(dw)); if (dw == 8) result += 3; return result; } static int brw_wm_affine(struct brw_compile *p, int dw, int channel, int msg, int result) { brw_wm_affine_st(p, dw, channel, msg); return brw_wm_sample(p, dw, channel, msg, result); } static int brw_wm_affine__alpha(struct brw_compile *p, int dw, int channel, int msg, int result) { brw_wm_affine_st(p, dw, channel, msg); return brw_wm_sample__alpha(p, dw, channel, msg, result); } static inline struct brw_reg null_result(int dw) { return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0, BRW_REGISTER_TYPE_UW, dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); } static void brw_fb_write(struct brw_compile *p, int dw) { struct brw_instruction *insn; unsigned msg_control, msg_type, msg_len; struct brw_reg src0; bool header; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; msg_len = 8; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; msg_len = 4; } if (p->gen < 060) { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); msg_len += 2; } /* The execution mask is ignored for render target writes. */ insn = brw_next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; if (p->gen >= 060) { msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = brw_message_reg(2); header = false; } else { insn->header.destreg__conditionalmod = 0; msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = __retype_uw(brw_vec8_grf(0, 0)); header = true; } brw_set_dest(p, insn, null_result(dw)); brw_set_src0(p, insn, src0); brw_set_dp_write_message(p, insn, 0, msg_control, msg_type, msg_len, header, true, 0, true, false); } static void brw_wm_write(struct brw_compile *p, int dw, int src) { int n; if (dw == 8 && p->gen >= 060) { /* XXX pixel execution mask? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); goto done; } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { if (p->gen >= 060) { brw_MOV(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0)); } else if (p->gen >= 045 && dw == 16) { brw_MOV(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0)); } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2 + n), brw_vec8_grf(src + 2*n, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(2 + n + 4), brw_vec8_grf(src + 2*n+1, 0)); } } } done: brw_fb_write(p, dw); } static void brw_wm_write__mask(struct brw_compile *p, int dw, int src, int mask) { int n; if (dw == 8 && p->gen >= 060) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, brw_message_reg(2), brw_vec8_grf(src+0, 0), brw_vec8_grf(mask, 0)); brw_MUL(p, brw_message_reg(3), brw_vec8_grf(src+1, 0), brw_vec8_grf(mask, 0)); brw_MUL(p, brw_message_reg(4), brw_vec8_grf(src+2, 0), brw_vec8_grf(mask, 0)); brw_MUL(p, brw_message_reg(5), brw_vec8_grf(src+3, 0), brw_vec8_grf(mask, 0)); goto done; } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { if (p->gen >= 060) { brw_MUL(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0), brw_vec8_grf(mask, 0)); } else if (p->gen >= 045 && dw == 16) { brw_MUL(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0), brw_vec8_grf(mask, 0)); } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, brw_message_reg(2 + n), brw_vec8_grf(src + 2*n, 0), brw_vec8_grf(mask, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MUL(p, brw_message_reg(2 + n + 4), brw_vec8_grf(src + 2*n+1, 0), brw_vec8_grf(mask+1, 0)); } } } done: brw_fb_write(p, dw); } static void brw_wm_write__opacity(struct brw_compile *p, int dw, int src, int mask) { int n; if (dw == 8 && p->gen >= 060) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, brw_message_reg(2), brw_vec8_grf(src+0, 0), brw_vec1_grf(mask, 3)); brw_MUL(p, brw_message_reg(3), brw_vec8_grf(src+1, 0), brw_vec1_grf(mask, 3)); brw_MUL(p, brw_message_reg(4), brw_vec8_grf(src+2, 0), brw_vec1_grf(mask, 3)); brw_MUL(p, brw_message_reg(5), brw_vec8_grf(src+3, 0), brw_vec1_grf(mask, 3)); goto done; } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { if (p->gen >= 060) { brw_MUL(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0), brw_vec1_grf(mask, 3)); } else if (p->gen >= 045 && dw == 16) { brw_MUL(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0), brw_vec1_grf(mask, 3)); } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, brw_message_reg(2 + n), brw_vec8_grf(src + 2*n, 0), brw_vec1_grf(mask, 3)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MUL(p, brw_message_reg(2 + n + 4), brw_vec8_grf(src + 2*n+1, 0), brw_vec1_grf(mask, 3)); } } } done: brw_fb_write(p, dw); } static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, int src, int mask) { int n; if (dw == 8 && p->gen >= 060) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, brw_message_reg(2), brw_vec8_grf(src + 0, 0), brw_vec8_grf(mask + 0, 0)); brw_MUL(p, brw_message_reg(3), brw_vec8_grf(src + 1, 0), brw_vec8_grf(mask + 1, 0)); brw_MUL(p, brw_message_reg(4), brw_vec8_grf(src + 2, 0), brw_vec8_grf(mask + 2, 0)); brw_MUL(p, brw_message_reg(5), brw_vec8_grf(src + 3, 0), brw_vec8_grf(mask + 3, 0)); goto done; } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { if (p->gen >= 060) { brw_MUL(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0), brw_vec8_grf(mask + 2*n, 0)); } else if (p->gen >= 045 && dw == 16) { brw_MUL(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0), brw_vec8_grf(mask + 2*n, 0)); } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, brw_message_reg(2 + n), brw_vec8_grf(src + 2*n, 0), brw_vec8_grf(mask + 2*n, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MUL(p, brw_message_reg(2 + n + 4), brw_vec8_grf(src + 2*n + 1, 0), brw_vec8_grf(mask + 2*n + 1, 0)); } } } done: brw_fb_write(p, dw); } bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch) { if (p->gen < 060) brw_wm_xy(p, dispatch); brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12)); return true; } bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) { int src, mask; if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_affine(p, dispatch, 0, 1, 12); mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20); brw_wm_write__mask(p, dispatch, src, mask); return true; } bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) { int src, mask; if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_affine(p, dispatch, 0, 1, 12); mask = brw_wm_affine(p, dispatch, 1, 6, 20); brw_wm_write__mask_ca(p, dispatch, src, mask); return true; } bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) { int src, mask; if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12); mask = brw_wm_affine(p, dispatch, 1, 6, 16); brw_wm_write__mask(p, dispatch, mask, src); return true; } /* Projective variants */ static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg) { int uv; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; if (p->gen >= 060) { /* First compute 1/z */ brw_PLN(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 0), brw_vec8_grf(2, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } else brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_PLN(p, brw_vec8_grf(26, 0), brw_vec1_grf(uv, 0), brw_vec8_grf(2, 0)); brw_PLN(p, brw_vec8_grf(28, 0), brw_vec1_grf(uv, 4), brw_vec8_grf(2, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(26, 0), brw_vec8_grf(30, 0)); brw_MUL(p, brw_message_reg(msg + dw/8), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); } else { struct brw_reg r = brw_vec1_grf(uv, 0); /* First compute 1/z */ brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } else brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); /* Now compute the output s,t values */ brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); msg += dw/8; brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); } } static int brw_wm_projective(struct brw_compile *p, int dw, int channel, int msg, int result) { brw_wm_projective_st(p, dw, channel, msg); return brw_wm_sample(p, dw, channel, msg, result); } static int brw_wm_projective__alpha(struct brw_compile *p, int dw, int channel, int msg, int result) { brw_wm_projective_st(p, dw, channel, msg); return brw_wm_sample__alpha(p, dw, channel, msg, result); } bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch) { if (p->gen < 060) brw_wm_xy(p, dispatch); brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12)); return true; } bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) { int src, mask; if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_projective(p, dispatch, 0, 1, 12); mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20); brw_wm_write__mask(p, dispatch, src, mask); return true; } bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) { int src, mask; if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_projective(p, dispatch, 0, 1, 12); mask = brw_wm_projective(p, dispatch, 1, 6, 20); brw_wm_write__mask_ca(p, dispatch, src, mask); return true; } bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) { int src, mask; if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12); mask = brw_wm_projective(p, dispatch, 1, 6, 16); brw_wm_write__mask(p, dispatch, mask, src); return true; } bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) { int src, mask; if (p->gen < 060) { brw_wm_xy(p, dispatch); mask = 5; } else mask = dispatch == 16 ? 8 : 6; src = brw_wm_affine(p, dispatch, 0, 1, 12); brw_wm_write__opacity(p, dispatch, src, mask); return true; } bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) { int src, mask; if (p->gen < 060) { brw_wm_xy(p, dispatch); mask = 5; } else mask = dispatch == 16 ? 8 : 6; src = brw_wm_projective(p, dispatch, 0, 1, 12); brw_wm_write__opacity(p, dispatch, src, mask); return true; }