/* * ffb_asm.s: Fast Creator raster op inner loops. * * Copyright (C) 1999 David S. Miller (davem@redhat.com) * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * JAKUB JELINEK OR DAVID MILLER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #if defined(__sparc_v9__) || defined(__sparcv9) || defined(__arch64__) .register %g2,#scratch .register %g3,#scratch #endif .register %g2, #scratch .register %g3, #scratch /* Hardware register offsets */ #define FFB_BY 0x0060 #define FFB_DY 0x0068 #define FFB_BH 0x0070 #define FFB_PPC 0x0200 #define FFB_DRAWOP 0x0300 #define FFB_UCSR 0x0900 #define FFB_DRAWOP_VSCROLL 0x0b #define FIFO_CACHE 0x00 #define BOX_X1 0x00 #define BOX_Y1 0x02 #define BOX_X2 0x04 #define BOX_Y2 0x06 #define POINT_X 0x00 #define POINT_Y 0x02 #define RECT_X 0x00 #define RECT_Y 0x02 #define RECT_W 0x04 #define RECT_H 0x06 #define SEG_X1 0x00 #define SEG_Y1 0x02 #define SEG_X2 0x04 #define SEG_Y2 0x06 .text .align 32 .globl FFB_STIPPLE_LOAD /* %o0 = &ffbregs->pattern[0] * %o1 = &stipple->bits[0] */ FFB_STIPPLE_LOAD: cmp %g0, 0 1: ldx [%o1 + 0x00], %g1 ldx [%o1 + 0x08], %g2 ldx [%o1 + 0x10], %g3 ldx [%o1 + 0x18], %g4 add %o0, 0x40, %o0 ldx [%o1 + 0x20], %g5 ldx [%o1 + 0x28], %o2 ldx [%o1 + 0x30], %o3 ldx [%o1 + 0x38], %o4 stx %g1, [%o0 - 0x40] stx %g2, [%o0 - 0x38] stx %g3, [%o0 - 0x30] stx %g4, [%o0 - 0x28] add %o1, 0x40, %o1 stx %g5, [%o0 - 0x20] stx %o2, [%o0 - 0x18] stx %o3, [%o0 - 0x10] stx %o4, [%o0 - 0x08] be,pt %icc, 1b cmp %g0, 1 retl nop #define FIFO_WAIT(ffbregs, goal, this_label, done_label) \ this_label: \ lduw [ffbregs + FFB_UCSR], %g1; \ and %g1, 0xfff, %g1; \ subcc %g1, (4 + goal), %g1; \ bge,pt %icc, done_label; \ nop; \ ba,a,pt %xcc, this_label .align 32 .globl FFB_PPT_BOX_LOOP /* This is only used (currently) by vscroll, so we put * the creator hwbug workaround in here (writing * the drawop each iteration). * * %o0 = ffbpriv, %o1 = ffbregs, * %o2 = pbox, %o3 = pbox_last, %o4 = ppt */ FFB_PPT_BOX_LOOP: lduh [%o0 + FIFO_CACHE], %g1 /* Load Group */ sethi %hi(FFB_DRAWOP), %g2 /* IEU0 */ cmp %o2, %o3 /* IEU1 */ bgu,pn %icc, 2f /* CTI */ or %g2,%lo(FFB_DRAWOP),%g2 /* IEU0 Group */ 1: lduh [%o4 + POINT_X], %g3 /* Load Group */ lduh [%o4 + POINT_Y], %g4 /* Load Group */ lduh [%o2 + BOX_Y2], %g5 /* Load Group */ lduh [%o2 + BOX_Y1], %o5 /* Load Group */ sllx %g4, 32, %g4 /* IEU0 */ or %g3, %g4, %g3 /* IEU0 Group */ sub %g5, %o5, %g5 /* IEU1 */ sllx %o5, 32, %g4 /* IEU0 Group */ sllx %g5, 32, %g5 /* IEU0 Group */ lduh [%o2 + BOX_X2], %o5 /* Load */ add %o4, 0x4, %o4 /* IEU1 */ lduh [%o2 + BOX_X1], %g7 /* Load Group */ add %o2, 0x8, %o2 /* IEU0 */ sub %o5, %g7, %o5 /* IEU0 Group */ or %g4, %g7, %g4 /* IEU1 */ or %g5, %o5, %g5 /* IEU0 Group */ subcc %g1, 7, %g1 /* IEU1 */ bl,pn %icc, FFB_PPT_BOX_WAIT /* CTI */ 9: cmp %o2, %o3 /* IEU1 Group */ /* This works around BUG ID 1189858 -DaveM */ mov FFB_DRAWOP_VSCROLL, %o5 /* IEU0 */ stw %o5, [%o1 + %g2] /* STORE */ stx %g3, [%o1 + FFB_BY] /* STORE Group */ stx %g4, [%o1 + FFB_DY] /* STORE Group */ bleu,pt %icc, 1b /* CTI */ stx %g5, [%o1 + FFB_BH] /* STORE Group */ 2: retl /* CTI Group */ sth %g1, [%o0 + FIFO_CACHE] /* STORE */ FIFO_WAIT(%o1, 7, FFB_PPT_BOX_WAIT, 9b) .align 32 .globl FFB_BOX_LOOP /* %o0 = ffbpriv, %o1 = ffbregs, * %o2 = pbox, %o3 = pbox_last */ FFB_BOX_LOOP: lduh [%o0 + FIFO_CACHE], %g1 /* Load Group */ cmp %o2, %o3 /* IEU0 */ bgu,pn %icc, 2f /* CTI */ nop /* IEU0 Group */ 1: lduw [%o2 + BOX_X1], %g4 /* Load Group */ lduw [%o2 + BOX_X2], %g3 /* Load Group */ sllx %g4, 32, %o5 /* IEU0 */ srl %g4, 16, %g7 /* IEU0 Group */ add %o2, 8, %o2 /* IEU1 */ sllx %g3, 32, %g5 /* IEU0 Group */ subcc %g1, 4, %g1 /* IEU1 */ srl %g3, 16, %o4 /* IEU0 Group */ or %o5, %g7, %o5 /* IEU1 */ or %g5, %o4, %g5 /* IEU0 Group */ bl,pn %icc, FFB_BOX_WAIT /* CTI */ sub %g5, %o5, %g5 /* IEU0 Group */ 9: cmp %o2, %o3 /* IEU1 */ stx %o5, [%o1 + FFB_BY] /* Store */ bleu,pt %icc, 1b /* CTI Group */ stx %g5, [%o1 + FFB_BH] /* Store */ 2: retl /* CTI Group */ sth %g1, [%o0 + FIFO_CACHE] /* Store */ FIFO_WAIT(%o1, 4, FFB_BOX_WAIT, 9b) .align 32 .globl FFB_RECT_LOOP /* %o0 = ffbpriv, %o1 = ffbregs, * %o2 = prect, %o3 = prect_last, * %o4 = xOrg, %o5 = yOrg */ FFB_RECT_LOOP: lduh [%o0 + FIFO_CACHE], %g1 /* Load Group */ sllx %o5, 32, %o5 /* IEU0 */ cmp %o2, %o3 /* IEU1 */ bgu,pn %icc, 2f /* CTI */ or %o4, %o5, %o4 /* IEU0 Group */ 1: lduh [%o2 + RECT_X], %g4 /* Load Group */ lduh [%o2 + RECT_Y], %o5 /* Load Group */ lduh [%o2 + RECT_W], %g3 /* Load Group */ lduh [%o2 + RECT_H], %g2 /* Load Group */ sllx %o5, 32, %g7 /* IEU0 */ add %o2, 8, %o2 /* IEU1 */ orcc %g7, %g4, %g7 /* IEU1 Group */ sllx %g2, 32, %g5 /* IEU0 */ or %g5, %g3, %g5 /* IEU0 Group */ add %g7, %o4, %g7 /* IEU1 */ subcc %g1, 4, %g1 /* IEU1 Group */ bl,pn %icc, FFB_RECT_WAIT /* CTI */ 9: cmp %o2, %o3 /* IEU1 Group */ stx %g7, [%o1 + FFB_BY] /* Store */ bleu,pt %icc, 1b /* CTI */ stx %g5, [%o1 + FFB_BH] /* Store Group */ 2: retl /* CTI Group */ sth %g1, [%o0 + FIFO_CACHE] /* Store */ FIFO_WAIT(%o1, 4, FFB_RECT_WAIT, 9b) .align 32 .globl FFB_PPT_WIDTH_LOOP /* %o0 = ffbpriv, %o1 = ffbregs, * %o2 = ppt, %o3 = ppt_last, * %o4 = pwidth */ FFB_PPT_WIDTH_LOOP: lduh [%o0 + FIFO_CACHE], %g1 /* Load Group */ cmp %o2, %o3 /* IEU1 */ bgu,pn %icc, 2f /* CTI */ nop 8: lduw [%o2 + POINT_X], %g4 /* Load Group */ lduw [%o4], %g2 /* Load Group */ sllx %g4, 32, %g7 1: srl %g4, 16, %g5 /* IEU0 Group */ add %o2, 4, %o2 /* IEU1 */ brz,pn %g2, 8b /* CTI+IEU1 Group */ add %o4, 4, %o4 /* IEU0 */ or %g5, %g7, %g7 /* IEU0 Group */ subcc %g1, 5, %g1 /* IEU1 */ bl,pn %icc, FFB_PPT_WIDTH_WAIT/* CTI */ add %g7, %g2, %o5 /* IEU0 Group */ 9: stw %g0, [%o1 + FFB_PPC] /* Store Group */ cmp %o2, %o3 /* IEU1 */ stx %g7, [%o1 + FFB_BY] /* Store Group */ bleu,pt %icc, 8b /* CTI */ stx %o5, [%o1 + FFB_BH] /* Store Group */ 2: retl /* CTI Group */ sth %g1, [%o0 + FIFO_CACHE] /* Store */ FIFO_WAIT(%o1, 5, FFB_PPT_WIDTH_WAIT, 9b) .align 32 .globl FFB_LINE_LOOP1 /* %o0 = ffbpriv, %o1 = ffbregs, * %o2 = ppt, %o3 = ppt_last, * %o4 = xOrg, %o5 = yOrg * * We return ppt_last + 1. */ FFB_LINE_LOOP1: lduh [%o0 + FIFO_CACHE], %g1 sllx %o5, 32, %o5 and %o4, 0x7ff, %o4 cmp %o2, %o3 bgu,pn %icc, 2f or %o4, %o5, %o4 1: lduw [%o2 + POINT_X], %g4 sllx %g4, 32, %g7 srl %g4, 16, %g3 add %o2, 4, %o2 or %g7, %g3, %g7 subcc %g1, 3, %g1 bl,pn %icc, FFB_LINE1_WAIT add %g7, %o4, %g7 9: cmp %o2, %o3 stw %g0, [%o1 + FFB_PPC] bleu,pt %icc, 1b stx %g7, [%o1 + FFB_BH] 2: nop sth %g1, [%o0 + FIFO_CACHE] retl mov %o2, %o0 FIFO_WAIT(%o1, 3, FFB_LINE1_WAIT, 9b) .align 32 .globl FFB_LINE_LOOP2 /* %o0 = ffbpriv, %o1 = ffbregs, * %o2 = ppt, %o3 = ppt_last, * %o4 = &xOrg, %o5 = &yOrg * * We return ppt_last + 1. * The only difference between the previous routine * is that here we accumulate the origin values. */ FFB_LINE_LOOP2: lduw [%o5], %g5 lduw [%o4], %g2 lduh [%o0 + FIFO_CACHE], %g1 sllx %g5, 32, %g5 and %g2, 0x7ff, %g2 cmp %o2, %o3 bgu,pn %icc, 2f or %g5, %g2, %g5 1: lduw [%o2 + POINT_X], %g4 sllx %g4, 32, %g7 srl %g4, 16, %g3 add %o2, 4, %o2 or %g7, %g3, %g7 subcc %g1, 3, %g1 bl,pn %icc, FFB_LINE2_WAIT add %g7, %g5, %g5 9: cmp %o2, %o3 stw %g0, [%o1 + FFB_PPC] bleu,pt %icc, 1b stx %g5, [%o1 + FFB_BH] 2: sth %g1, [%o0 + FIFO_CACHE] srlx %g5, 32, %g4 stw %g5, [%o4] stw %g4, [%o5] retl mov %o2, %o0 FIFO_WAIT(%o1, 3, FFB_LINE2_WAIT, 9b)