/* * Copyright © 2019 Raspberry Pi * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "v3dv_private.h" static uint32_t num_subpass_attachments(const VkSubpassDescription *desc) { return desc->inputAttachmentCount + desc->colorAttachmentCount + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + (desc->pDepthStencilAttachment != NULL); } static void set_use_tlb_resolve(struct v3dv_device *device, struct v3dv_render_pass_attachment *att) { const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format); att->use_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format); } static void pass_find_subpass_range_for_attachments(struct v3dv_device *device, struct v3dv_render_pass *pass) { for (uint32_t i = 0; i < pass->attachment_count; i++) { pass->attachments[i].first_subpass = pass->subpass_count - 1; pass->attachments[i].last_subpass = 0; if (pass->multiview_enabled) { for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) { pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1; pass->attachments[i].views[j].last_subpass = 0; } } } for (uint32_t i = 0; i < pass->subpass_count; i++) { const struct v3dv_subpass *subpass = &pass->subpasses[i]; for (uint32_t j = 0; j < subpass->color_count; j++) { uint32_t attachment_idx = subpass->color_attachments[j].attachment; if (attachment_idx == VK_ATTACHMENT_UNUSED) continue; struct v3dv_render_pass_attachment *att = &pass->attachments[attachment_idx]; if (i < att->first_subpass) att->first_subpass = i; if (i > att->last_subpass) att->last_subpass = i; uint32_t view_mask = subpass->view_mask; while (view_mask) { uint32_t view_index = u_bit_scan(&view_mask); if (i < att->views[view_index].first_subpass) att->views[view_index].first_subpass = i; if (i > att->views[view_index].last_subpass) att->views[view_index].last_subpass = i; } if (subpass->resolve_attachments && subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) { set_use_tlb_resolve(device, att); } } uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { if (i < pass->attachments[ds_attachment_idx].first_subpass) pass->attachments[ds_attachment_idx].first_subpass = i; if (i > pass->attachments[ds_attachment_idx].last_subpass) pass->attachments[ds_attachment_idx].last_subpass = i; } for (uint32_t j = 0; j < subpass->input_count; j++) { uint32_t input_attachment_idx = subpass->input_attachments[j].attachment; if (input_attachment_idx == VK_ATTACHMENT_UNUSED) continue; if (i < pass->attachments[input_attachment_idx].first_subpass) pass->attachments[input_attachment_idx].first_subpass = i; if (i > pass->attachments[input_attachment_idx].last_subpass) pass->attachments[input_attachment_idx].last_subpass = i; } if (subpass->resolve_attachments) { for (uint32_t j = 0; j < subpass->color_count; j++) { uint32_t attachment_idx = subpass->resolve_attachments[j].attachment; if (attachment_idx == VK_ATTACHMENT_UNUSED) continue; if (i < pass->attachments[attachment_idx].first_subpass) pass->attachments[attachment_idx].first_subpass = i; if (i > pass->attachments[attachment_idx].last_subpass) pass->attachments[attachment_idx].last_subpass = i; } } } } VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateRenderPass(VkDevice _device, const VkRenderPassCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkRenderPass *pRenderPass) { V3DV_FROM_HANDLE(v3dv_device, device, _device); struct v3dv_render_pass *pass; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); const VkRenderPassMultiviewCreateInfo *multiview_info = vk_find_struct_const(pCreateInfo->pNext, RENDER_PASS_MULTIVIEW_CREATE_INFO); bool multiview_enabled = multiview_info && multiview_info->subpassCount > 0; size_t size = sizeof(*pass); size_t subpasses_offset = size; size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); size_t attachments_offset = size; size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); pass = vk_object_zalloc(&device->vk, pAllocator, size, VK_OBJECT_TYPE_RENDER_PASS); if (pass == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); pass->multiview_enabled = multiview_enabled; pass->attachment_count = pCreateInfo->attachmentCount; pass->attachments = (void *) pass + attachments_offset; pass->subpass_count = pCreateInfo->subpassCount; pass->subpasses = (void *) pass + subpasses_offset; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) pass->attachments[i].desc = pCreateInfo->pAttachments[i]; uint32_t subpass_attachment_count = 0; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; subpass_attachment_count += num_subpass_attachments(desc); } if (subpass_attachment_count) { const size_t subpass_attachment_bytes = subpass_attachment_count * sizeof(struct v3dv_subpass_attachment); pass->subpass_attachments = vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pass->subpass_attachments == NULL) { vk_object_free(&device->vk, pAllocator, pass); return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } } else { pass->subpass_attachments = NULL; } struct v3dv_subpass_attachment *p = pass->subpass_attachments; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; struct v3dv_subpass *subpass = &pass->subpasses[i]; subpass->input_count = desc->inputAttachmentCount; subpass->color_count = desc->colorAttachmentCount; if (multiview_enabled) subpass->view_mask = multiview_info->pViewMasks[i]; if (desc->inputAttachmentCount > 0) { subpass->input_attachments = p; p += desc->inputAttachmentCount; for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { subpass->input_attachments[j] = (struct v3dv_subpass_attachment) { .attachment = desc->pInputAttachments[j].attachment, .layout = desc->pInputAttachments[j].layout, }; } } if (desc->colorAttachmentCount > 0) { subpass->color_attachments = p; p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->color_attachments[j] = (struct v3dv_subpass_attachment) { .attachment = desc->pColorAttachments[j].attachment, .layout = desc->pColorAttachments[j].layout, }; } } if (desc->pResolveAttachments) { subpass->resolve_attachments = p; p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) { .attachment = desc->pResolveAttachments[j].attachment, .layout = desc->pResolveAttachments[j].layout, }; } } if (desc->pDepthStencilAttachment) { subpass->ds_attachment = (struct v3dv_subpass_attachment) { .attachment = desc->pDepthStencilAttachment->attachment, .layout = desc->pDepthStencilAttachment->layout, }; /* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa), * the clear might get lost. If a subpass has this then we can't emit * the clear using the TLB and we have to do it as a draw call. * * FIXME: separate stencil. */ if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) { struct v3dv_render_pass_attachment *att = &pass->attachments[subpass->ds_attachment.attachment]; if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) { if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR && att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) { subpass->do_depth_clear_with_draw = true; } else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD && att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { subpass->do_stencil_clear_with_draw = true; } } } } else { subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED; } } pass_find_subpass_range_for_attachments(device, pass); /* FIXME: handle subpass dependencies */ *pRenderPass = v3dv_render_pass_to_handle(pass); return VK_SUCCESS; } VKAPI_ATTR void VKAPI_CALL v3dv_DestroyRenderPass(VkDevice _device, VkRenderPass _pass, const VkAllocationCallbacks *pAllocator) { V3DV_FROM_HANDLE(v3dv_device, device, _device); V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass); if (!_pass) return; vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments); vk_object_free(&device->vk, pAllocator, pass); } static void subpass_get_granularity(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, VkExtent2D *granularity) { static const uint8_t tile_sizes[] = { 64, 64, 64, 32, 32, 32, 32, 16, 16, 16, 16, 8, 8, 8 }; /* Our tile size depends on the number of color attachments and the maximum * bpp across them. */ assert(subpass_idx < pass->subpass_count); struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx]; const uint32_t color_attachment_count = subpass->color_count; uint32_t max_internal_bpp = 0; for (uint32_t i = 0; i < color_attachment_count; i++) { uint32_t attachment_idx = subpass->color_attachments[i].attachment; if (attachment_idx == VK_ATTACHMENT_UNUSED) continue; const VkAttachmentDescription *desc = &pass->attachments[attachment_idx].desc; const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format); uint32_t internal_type, internal_bpp; v3dv_X(device, get_internal_type_bpp_for_output_format) (format->rt_type, &internal_type, &internal_bpp); max_internal_bpp = MAX2(max_internal_bpp, internal_bpp); } uint32_t idx = 0; if (color_attachment_count > 2) idx += 2; else if (color_attachment_count > 1) idx += 1; idx += max_internal_bpp; assert(idx < ARRAY_SIZE(tile_sizes)); *granularity = (VkExtent2D) { .width = tile_sizes[idx * 2], .height = tile_sizes[idx * 2 + 1] }; } VKAPI_ATTR void VKAPI_CALL v3dv_GetRenderAreaGranularity(VkDevice _device, VkRenderPass renderPass, VkExtent2D *pGranularity) { V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass); V3DV_FROM_HANDLE(v3dv_device, device, _device); *pGranularity = (VkExtent2D) { .width = 64, .height = 64, }; for (uint32_t i = 0; i < pass->subpass_count; i++) { VkExtent2D sg; subpass_get_granularity(device, pass, i, &sg); pGranularity->width = MIN2(pGranularity->width, sg.width); pGranularity->height = MIN2(pGranularity->height, sg.height); } } /* Checks whether the render area rectangle covers a region that is aligned to * tile boundaries. This means that we are writing to all pixels covered by * all tiles in that area (except for pixels on edge tiles that are outside * the framebuffer dimensions). * * When our framebuffer is aligned to tile boundaries we know we are writing * valid data to all all pixels in each tile and we can apply certain * optimizations, like avoiding tile loads, since we know that none of the * original pixel values in each tile for that area need to be preserved. * We also use this to decide if we can use TLB clears, as these clear whole * tiles so we can't use them if the render area is not aligned. * * Note that when an image is created it will possibly include padding blocks * depending on its tiling layout. When the framebuffer dimensions are not * aligned to tile boundaries then edge tiles are only partially covered by the * framebuffer pixels, but tile stores still seem to store full tiles * writing to the padded sections. This is important when the framebuffer * is aliasing a smaller section of a larger image, as in that case the edge * tiles of the framebuffer would overwrite valid pixels in the larger image. * In that case, we can't flag the area as being aligned. */ bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device, const VkRect2D *area, struct v3dv_framebuffer *fb, struct v3dv_render_pass *pass, uint32_t subpass_idx) { assert(subpass_idx < pass->subpass_count); VkExtent2D granularity; subpass_get_granularity(device, pass, subpass_idx, &granularity); return area->offset.x % granularity.width == 0 && area->offset.y % granularity.height == 0 && (area->extent.width % granularity.width == 0 || (fb->has_edge_padding && area->offset.x + area->extent.width >= fb->width)) && (area->extent.height % granularity.height == 0 || (fb->has_edge_padding && area->offset.y + area->extent.height >= fb->height)); }