-- Parse cmdstream dump and analyse blits and batches

--local posix = require "posix"

function printf(fmt, ...)
	return io.write(string.format(fmt, ...))
end

function dbg(fmt, ...)
	--printf(fmt, ...)
end

printf("Analyzing Data...\n")

local r = rnn.init("a630")

-- Each submit, all draws will target the same N MRTs:
local mrts = {}
local allmrts = {}  -- includes historical render targets
function push_mrt(fmt, w, h, samples, base, flag, gmem)
	dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base)

	local mrt = {}
	mrt.format = fmt
	mrt.w = w
	mrt.h = h
	mrt.samples = samples
	mrt.base = base
	mrt.flag = flag
	mrt.gmem = gmem

	mrts[base] = mrt
	allmrts[base] = mrt
end

-- And each each draw will read from M sources/textures:
local sources = {}
function push_source(fmt, w, h, samples, base, flag)
	dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base)

	local source = {}
	source.format = fmt
	source.w = w
	source.h = h
	source.samples = samples
	source.base = base
	source.flag = flag

	sources[base] = source
end

local binw
local binh
local nbins
local blits = 0
local draws = 0
local drawmode
local cleared
local restored
local resolved
local nullbatch
local depthtest
local depthwrite
local stenciltest
local stencilwrite

function reset()
	dbg("reset\n")
	mrts = {}
	sources = {}
	draws = 0
	blits = 0
	cleared = {}
	restored = {}
	resolved = {}
	depthtest = false
	depthwrite = false
	stenciltest = false
	stencilwrite = false
	drawmode = Nil
end

function start_submit()
	dbg("start_submit\n")
	reset()
	nullbatch = true
end

function finish()
	dbg("finish\n")

	printf("\n")

	-- TODO we get false-positives for 'NULL BATCH!' because we don't have
	-- a really good way to differentiate between submits and cmds.  Ie.
	-- with growable cmdstream, and a large # of tiles, IB1 can get split
	-- across multiple buffers.  Since we ignore GMEM draws for window-
	-- offset != 0,0, the later cmds will appear as null batches
	if draws == 0 and blits == 0 then
		if nullbatch then
			printf("NULL BATCH!\n");
		end
		return
	end

	if draws > 0 then
		printf("Batch:\n")
		printf("-------\n")
		printf("  # of draws: %u\n", draws)
		printf("  mode: %s\n", drawmode)
		if drawmode == "RM6_GMEM" then
			printf("  bin size: %ux%u (%u bins)\n", binw, binh, nbins)
		end
		if depthtest or depthwrite then
			printf("  ")
			if depthtest then
				printf("DEPTHTEST ")
			end
			if depthwrite then
				printf("DEPTHWRITE")
			end
			printf("\n")
		end
		if stenciltest or stencilwrite then
			printf("  ")
			if stenciltest then
				printf("STENCILTEST ")
			end
			if stencilwrite then
				printf("STENCILWRITE")
			end
			printf("\n")
		end
	else
		printf("Blit:\n")
		printf("-----\n")
	end

	for base,mrt in pairs(mrts) do
		printf("  MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples)
		if drawmode == "RM6_GMEM" then
			if cleared[mrt.gmem] then
				printf("\tCLEARED")
			end
			if restored[mrt.gmem] then
				printf("\tRESTORED")
			end
			if resolved[mrt.gmem] then
				printf("\tRESOLVED")
			end
		else
			if cleared[mrt.base] then
				printf("\tCLEARED")
			end
		end
		printf("\n")
	end

	function print_source(source)
		printf("  SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples)
	end

	for base,source in pairs(sources) do
		-- only show sources that have been previously rendered to, other
		-- textures are less interesting.  Possibly this should be an
		-- option somehow
		if draws < 10 then
			print_source(source)
		elseif allmrts[base] or draws == 0 then
			print_source(source)
		elseif source.flag and allmrts[source.flag] then
			print_source(source)
		end
	end
	reset()
end

function end_submit()
	dbg("end_submit\n")
	finish()
end

-- Track the current mode:
local mode = ""
function CP_SET_MARKER(pkt, size)
	mode = pkt[0].MARKER
	dbg("mode: %s\n", mode)
end

function CP_EVENT_WRITE(pkt, size)
	if tostring(pkt[0].EVENT) ~= "BLIT" then
		return
	end
	nullbatch = false
	local m = tostring(mode)
	if m == "RM6_GMEM" then
		-- either clear or restore:
		if r.RB_BLIT_INFO.CLEAR_MASK == 0 then
			restored[r.RB_BLIT_BASE_GMEM] = 1
		else
			cleared[r.RB_BLIT_BASE_GMEM] = 1
		end
		-- push_mrt() because we could have GMEM
		-- passes with only a clear and no draws:
		local flag = 0
		local sysmem = 0;
		-- try to match up the GMEM addr with the MRT/DEPTH state,
		-- to avoid relying on RB_BLIT_DST also getting written:
		for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
			if r.RB_MRT[n].BASE_GMEM == r.RB_BLIT_BASE_GMEM then
				sysmem = r.RB_MRT[n].BASE
				flag = r.RB_MRT_FLAG_BUFFER[n].ADDR
				break
			end
		end
		if sysmem == 0 and r.RB_BLIT_BASE_GMEM == r.RB_DEPTH_BUFFER_BASE_GMEM then
			sysmem = r.RB_DEPTH_BUFFER_BASE
			flag = r.RB_DEPTH_FLAG_BUFFER_BASE

		end
		--NOTE this can get confused by previous blits:
		--if sysmem == 0 then
		--	-- fallback:
		--	sysmem = r.RB_BLIT_DST
		--	flag = r.RB_BLIT_FLAG_DST
		--end
		if not r.RB_BLIT_DST_INFO.FLAGS then
			flag = 0
		end
		-- TODO maybe just emit RB_BLIT_DST/HI for clears.. otherwise
		-- we get confused by stale values in registers.. not sure
		-- if this is a problem w/ blob
		push_mrt(r.RB_BLIT_DST_INFO.COLOR_FORMAT,
			r.RB_BLIT_SCISSOR_BR.X + 1,
			r.RB_BLIT_SCISSOR_BR.Y + 1,
			r.RB_BLIT_DST_INFO.SAMPLES,
			sysmem,
			flag,
			r.RB_BLIT_BASE_GMEM)
	elseif m == "RM6_RESOLVE" then
		resolved[r.RB_BLIT_BASE_GMEM] = 1
	else
		printf("I am confused!!!\n")
	end
end

function A6XX_TEX_CONST(pkt, size)
	push_source(pkt[0].FMT,
		pkt[1].WIDTH, pkt[1].HEIGHT,
		pkt[0].SAMPLES,
		pkt[4].BASE_LO | (pkt[5].BASE_HI << 32),
		pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32))
end

function handle_blit()
	-- blob sometimes uses CP_BLIT for resolves, so filter those out:
	-- TODO it would be nice to not hard-code GMEM addr:
	-- TODO I guess the src can be an offset from GMEM addr..
	if r.SP_PS_2D_SRC == 0x100000 and not r.RB_2D_BLIT_CNTL.SOLID_COLOR then
		resolved[0] = 1
		return
	end
	if draws > 0 then
		finish()
	end
	reset()
	drawmode = "BLIT"
	-- This kinda assumes that we are doing full img blits, which is maybe
	-- Not completely legit.  We could perhaps instead just track pitch and
	-- size/pitch??  Or maybe the size doesn't matter much
	push_mrt(r.RB_2D_DST_INFO.COLOR_FORMAT,
		r.GRAS_2D_DST_BR.X + 1,
		r.GRAS_2D_DST_BR.Y + 1,
		"MSAA_ONE",
		r.RB_2D_DST,
		r.RB_2D_DST_FLAGS,
		-1)
	if r.RB_2D_BLIT_CNTL.SOLID_COLOR then
		dbg("CLEAR=%x\n", r.RB_2D_DST)
		cleared[r.RB_2D_DST] = 1
	else
		push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT,
			r.GRAS_2D_SRC_BR_X.X + 1,
			r.GRAS_2D_SRC_BR_Y.Y + 1,
			"MSAA_ONE",
			r.SP_PS_2D_SRC,
			r.SP_PS_2D_SRC_FLAGS)
	end
	blits = blits + 1
	finish()
end

function valid_transition(curmode, newmode)
	if curmode == "RM6_BINNING" and newmode == "RM6_GMEM" then
		return true
	end
	if curmode == "RM6_GMEM" and newmode == "RM6_RESOLVE" then
		return true
	end
	return false
end

function draw(primtype, nindx)
	dbg("draw: %s (%s)\n", primtype, mode)
	nullbatch = false
	if primtype == "BLIT_OP_SCALE" then
		handle_blit()
		return
	elseif primtype == "EVENT:BLIT" then
		return
	end

	local m = tostring(mode)

	-- detect changes in drawmode which indicate a different
	-- pass..  BINNING->GMEM means same pass, but other
	-- transitions mean different pass:
	if drawmode and m ~= drawmode then
		dbg("%s -> %s transition\n", drawmode, m)
		if not valid_transition(drawmode, m) then
			dbg("invalid transition, new render pass!\n")
			finish()
			reset()
		end
	end

	if m ~= "RM6_GMEM" and m ~= "RM6_BYPASS" then
		if m == "RM6_BINNING" then
			drawmode = m
			return
		end
		if m == "RM6_RESOLVE" and primtype == "EVENT:BLIT" then
			return
		end
		printf("unknown MODE %s for primtype %s\n", m, primtype)
		return
	end

	-- Only count the first tile for GMEM mode to avoid counting
	-- each draw for each tile
	if m == "RM6_GMEM" then
		if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then
			return
		end
	end

	drawmode = m
	local render_components = {}
	render_components[0] = r.RB_RENDER_COMPONENTS.RT0;
	render_components[1] = r.RB_RENDER_COMPONENTS.RT1;
	render_components[2] = r.RB_RENDER_COMPONENTS.RT2;
	render_components[3] = r.RB_RENDER_COMPONENTS.RT3;
	render_components[4] = r.RB_RENDER_COMPONENTS.RT4;
	render_components[5] = r.RB_RENDER_COMPONENTS.RT5;
	render_components[6] = r.RB_RENDER_COMPONENTS.RT6;
	render_components[7] = r.RB_RENDER_COMPONENTS.RT7;
	for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
		if render_components[n] ~= 0 then
			push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT,
				r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
				r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
				r.RB_MSAA_CNTL.SAMPLES,
				r.RB_MRT[n].BASE,
				r.RB_MRT_FLAG_BUFFER[n].ADDR,
				r.RB_MRT[n].BASE_GMEM)
		end
	end

	local depthbase = r.RB_DEPTH_BUFFER_BASE

	if depthbase ~= 0 then
		push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT,
			r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
			r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
			r.RB_MSAA_CNTL.SAMPLES,
			depthbase,
			r.RB_DEPTH_FLAG_BUFFER_BASE,
			r.RB_DEPTH_BUFFER_BASE_GMEM)
	end

	if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then
		depthwrite = true
	end

	if r.RB_DEPTH_CNTL.Z_TEST_ENABLE then
		depthtest = true
	end

	-- clearly 0 != false.. :-/
	if r.RB_STENCILWRMASK.WRMASK ~= 0 then
		stencilwrite = true
	end

	if r.RB_STENCIL_CONTROL.STENCIL_ENABLE then
		stenciltest = true
	end

	-- TODO should also check for stencil buffer for z32+s8 case

	if m == "RM6_GMEM" then
		binw = r.VSC_BIN_SIZE.WIDTH
		binh = r.VSC_BIN_SIZE.HEIGHT
		nbins = r.VSC_BIN_COUNT.NX * r.VSC_BIN_COUNT.NY
	end

	draws = draws + 1
end