/* * Copyright © 2019 Raspberry Pi * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "v3dv_private.h" #include #include #include "drm-uapi/v3d_drm.h" #include "util/u_memory.h" /* Default max size of the bo cache, in MB. * * FIXME: we got this value when testing some apps using the rpi4 with 4GB, * but it should depend on the total amount of RAM. But for that we would need * to test on real hw with different amount of RAM. Using this value for now. */ #define DEFAULT_MAX_BO_CACHE_SIZE 512 /* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time * check for most of the calls */ static const bool dump_stats = false; static void bo_dump_stats(struct v3dv_device *device) { struct v3dv_bo_cache *cache = &device->bo_cache; fprintf(stderr, " BOs allocated: %d\n", device->bo_count); fprintf(stderr, " BOs size: %dkb\n", device->bo_size / 1024); fprintf(stderr, " BOs cached: %d\n", cache->cache_count); fprintf(stderr, " BOs cached size: %dkb\n", cache->cache_size / 1024); if (!list_is_empty(&cache->time_list)) { struct v3dv_bo *first = list_first_entry(&cache->time_list, struct v3dv_bo, time_list); struct v3dv_bo *last = list_last_entry(&cache->time_list, struct v3dv_bo, time_list); fprintf(stderr, " oldest cache time: %ld\n", (long)first->free_time); fprintf(stderr, " newest cache time: %ld\n", (long)last->free_time); struct timespec time; clock_gettime(CLOCK_MONOTONIC, &time); fprintf(stderr, " now: %ld\n", time.tv_sec); } if (cache->size_list_size) { uint32_t empty_size_list = 0; for (uint32_t i = 0; i < cache->size_list_size; i++) { if (list_is_empty(&cache->size_list[i])) empty_size_list++; } fprintf(stderr, " Empty size_list lists: %d\n", empty_size_list); } } static void bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo) { list_del(&bo->time_list); list_del(&bo->size_list); cache->cache_count--; cache->cache_size -= bo->size; } static struct v3dv_bo * bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name) { struct v3dv_bo_cache *cache = &device->bo_cache; uint32_t page_index = size / 4096 - 1; if (cache->size_list_size <= page_index) return NULL; struct v3dv_bo *bo = NULL; mtx_lock(&cache->lock); if (!list_is_empty(&cache->size_list[page_index])) { bo = list_first_entry(&cache->size_list[page_index], struct v3dv_bo, size_list); /* Check that the BO has gone idle. If not, then we want to * allocate something new instead, since we assume that the * user will proceed to CPU map it and fill it with stuff. */ if (!v3dv_bo_wait(device, bo, 0)) { mtx_unlock(&cache->lock); return NULL; } bo_remove_from_cache(cache, bo); bo->name = name; } mtx_unlock(&cache->lock); return bo; } static bool bo_free(struct v3dv_device *device, struct v3dv_bo *bo) { if (!bo) return true; if (bo->map) v3dv_bo_unmap(device, bo); struct drm_gem_close c; memset(&c, 0, sizeof(c)); c.handle = bo->handle; int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c); if (ret != 0) fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); device->bo_count--; device->bo_size -= bo->size; if (dump_stats) { fprintf(stderr, "Freed %s%s%dkb:\n", bo->name ? bo->name : "", bo->name ? " " : "", bo->size / 1024); bo_dump_stats(device); } vk_free(&device->vk.alloc, bo); return ret == 0; } static void bo_cache_free_all(struct v3dv_device *device, bool with_lock) { struct v3dv_bo_cache *cache = &device->bo_cache; if (with_lock) mtx_lock(&cache->lock); list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list, time_list) { bo_remove_from_cache(cache, bo); bo_free(device, bo); } if (with_lock) mtx_unlock(&cache->lock); } void v3dv_bo_init(struct v3dv_bo *bo, uint32_t handle, uint32_t size, uint32_t offset, const char *name, bool private) { bo->handle = handle; bo->handle_bit = 1ull << (handle % 64); bo->size = size; bo->offset = offset; bo->map = NULL; bo->map_size = 0; bo->name = name; bo->private = private; bo->dumb_handle = -1; list_inithead(&bo->list_link); } struct v3dv_bo * v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name, bool private) { struct v3dv_bo *bo; const uint32_t page_align = 4096; /* Always allocate full pages */ size = align(size, page_align); if (private) { bo = bo_from_cache(device, size, name); if (bo) { if (dump_stats) { fprintf(stderr, "Allocated %s %dkb from cache:\n", name, size / 1024); bo_dump_stats(device); } return bo; } } bo = vk_alloc(&device->vk.alloc, sizeof(struct v3dv_bo), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!bo) { fprintf(stderr, "Failed to allocate host memory for BO\n"); return NULL; } retry: ; bool cleared_and_retried = false; struct drm_v3d_create_bo create = { .size = size }; int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_V3D_CREATE_BO, &create); if (ret != 0) { if (!list_is_empty(&device->bo_cache.time_list) && !cleared_and_retried) { cleared_and_retried = true; bo_cache_free_all(device, true); goto retry; } vk_free(&device->vk.alloc, bo); fprintf(stderr, "Failed to allocate device memory for BO\n"); return NULL; } assert(create.offset % page_align == 0); assert((create.offset & 0xffffffff) == create.offset); v3dv_bo_init(bo, create.handle, size, create.offset, name, private); device->bo_count++; device->bo_size += bo->size; if (dump_stats) { fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024); bo_dump_stats(device); } return bo; } bool v3dv_bo_map_unsynchronized(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size) { assert(bo != NULL && size <= bo->size); if (bo->map) return bo->map; struct drm_v3d_mmap_bo map; memset(&map, 0, sizeof(map)); map.handle = bo->handle; int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_V3D_MMAP_BO, &map); if (ret != 0) { fprintf(stderr, "map ioctl failure\n"); return false; } bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, device->pdevice->render_fd, map.offset); if (bo->map == MAP_FAILED) { fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", bo->handle, (long long)map.offset, (uint32_t)bo->size); return false; } VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false)); bo->map_size = size; return true; } bool v3dv_bo_wait(struct v3dv_device *device, struct v3dv_bo *bo, uint64_t timeout_ns) { struct drm_v3d_wait_bo wait = { .handle = bo->handle, .timeout_ns = timeout_ns, }; return v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_V3D_WAIT_BO, &wait) == 0; } bool v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size) { assert(bo && size <= bo->size); bool ok = v3dv_bo_map_unsynchronized(device, bo, size); if (!ok) return false; ok = v3dv_bo_wait(device, bo, PIPE_TIMEOUT_INFINITE); if (!ok) { fprintf(stderr, "memory wait for map failed\n"); return false; } return true; } void v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo) { assert(bo && bo->map && bo->map_size > 0); munmap(bo->map, bo->map_size); VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); bo->map = NULL; bo->map_size = 0; } static boolean reallocate_size_list(struct v3dv_bo_cache *cache, struct v3dv_device *device, uint32_t size) { struct list_head *new_list = vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!new_list) { fprintf(stderr, "Failed to allocate host memory for cache bo list\n"); return false; } struct list_head *old_list = cache->size_list; /* Move old list contents over (since the array has moved, and * therefore the pointers to the list heads have to change). */ for (int i = 0; i < cache->size_list_size; i++) { struct list_head *old_head = &cache->size_list[i]; if (list_is_empty(old_head)) { list_inithead(&new_list[i]); } else { new_list[i].next = old_head->next; new_list[i].prev = old_head->prev; new_list[i].next->prev = &new_list[i]; new_list[i].prev->next = &new_list[i]; } } for (int i = cache->size_list_size; i < size; i++) list_inithead(&new_list[i]); cache->size_list = new_list; cache->size_list_size = size; vk_free(&device->vk.alloc, old_list); return true; } void v3dv_bo_cache_init(struct v3dv_device *device) { device->bo_size = 0; device->bo_count = 0; list_inithead(&device->bo_cache.time_list); /* FIXME: perhaps set a initial size for the size-list, to avoid run-time * reallocations */ device->bo_cache.size_list_size = 0; const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE"); if (max_cache_size_str == NULL) device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE; else device->bo_cache.max_cache_size = atoll(max_cache_size_str); if (dump_stats) { fprintf(stderr, "MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size); } device->bo_cache.max_cache_size *= 1024 * 1024; device->bo_cache.cache_count = 0; device->bo_cache.cache_size = 0; } void v3dv_bo_cache_destroy(struct v3dv_device *device) { bo_cache_free_all(device, true); vk_free(&device->vk.alloc, device->bo_cache.size_list); if (dump_stats) { fprintf(stderr, "BO stats after screen destroy:\n"); bo_dump_stats(device); } } static void free_stale_bos(struct v3dv_device *device, time_t time) { struct v3dv_bo_cache *cache = &device->bo_cache; bool freed_any = false; list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list, time_list) { /* If it's more than a second old, free it. */ if (time - bo->free_time > 2) { if (dump_stats && !freed_any) { fprintf(stderr, "Freeing stale BOs:\n"); bo_dump_stats(device); freed_any = true; } bo_remove_from_cache(cache, bo); bo_free(device, bo); } else { break; } } if (dump_stats && freed_any) { fprintf(stderr, "Freed stale BOs:\n"); bo_dump_stats(device); } } bool v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo) { if (!bo) return true; struct timespec time; struct v3dv_bo_cache *cache = &device->bo_cache; uint32_t page_index = bo->size / 4096 - 1; if (bo->private && bo->size > cache->max_cache_size - cache->cache_size) { clock_gettime(CLOCK_MONOTONIC, &time); mtx_lock(&cache->lock); free_stale_bos(device, time.tv_sec); mtx_unlock(&cache->lock); } if (!bo->private || bo->size > cache->max_cache_size - cache->cache_size) { return bo_free(device, bo); } clock_gettime(CLOCK_MONOTONIC, &time); mtx_lock(&cache->lock); if (cache->size_list_size <= page_index) { if (!reallocate_size_list(cache, device, page_index + 1)) { bool outcome = bo_free(device, bo); /* If the reallocation failed, it usually means that we are out of * memory, so we also free all the bo cache. We need to call it to * not use the cache lock, as we are already under it. */ bo_cache_free_all(device, false); mtx_unlock(&cache->lock); return outcome; } } bo->free_time = time.tv_sec; list_addtail(&bo->size_list, &cache->size_list[page_index]); list_addtail(&bo->time_list, &cache->time_list); cache->cache_count++; cache->cache_size += bo->size; if (dump_stats) { fprintf(stderr, "Freed %s %dkb to cache:\n", bo->name, bo->size / 1024); bo_dump_stats(device); } bo->name = NULL; free_stale_bos(device, time.tv_sec); mtx_unlock(&cache->lock); return true; }