// SPDX-License-Identifier: MIT /* * Copyright © 2019 Intel Corporation */ #include #include #include #include "../i915_selftest.h" #include "mock_drm.h" #include "mock_gem_device.h" #include "mock_region.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_migrate.h" #include "i915_memcpy.h" #include "i915_ttm_buddy_manager.h" #include "selftests/igt_flush_test.h" #include "selftests/i915_random.h" static void close_objects(struct intel_memory_region *mem, struct list_head *objects) { struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj, *on; list_for_each_entry_safe(obj, on, objects, st_link) { i915_gem_object_lock(obj, NULL); if (i915_gem_object_has_pinned_pages(obj)) i915_gem_object_unpin_pages(obj); /* No polluting the memory region between tests */ __i915_gem_object_put_pages(obj); i915_gem_object_unlock(obj); list_del(&obj->st_link); i915_gem_object_put(obj); } cond_resched(); i915_gem_drain_freed_objects(i915); } static int igt_mock_fill(void *arg) { struct intel_memory_region *mem = arg; resource_size_t total = resource_size(&mem->region); resource_size_t page_size; resource_size_t rem; unsigned long max_pages; unsigned long page_num; DRM_LIST_HEAD(objects); int err = 0; page_size = PAGE_SIZE; max_pages = div64_u64(total, page_size); rem = total; for_each_prime_number_from(page_num, 1, max_pages) { resource_size_t size = page_num * page_size; struct drm_i915_gem_object *obj; obj = i915_gem_object_create_region(mem, size, 0, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); break; } err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); break; } list_add(&obj->st_link, &objects); rem -= size; } if (err == -ENOMEM) err = 0; if (err == -ENXIO) { if (page_num * page_size <= rem) { pr_err("%s failed, space still left in region\n", __func__); err = -EINVAL; } else { err = 0; } } close_objects(mem, &objects); return err; } static struct drm_i915_gem_object * igt_object_create(struct intel_memory_region *mem, struct list_head *objects, u64 size, unsigned int flags) { struct drm_i915_gem_object *obj; int err; obj = i915_gem_object_create_region(mem, size, 0, flags); if (IS_ERR(obj)) return obj; err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto put; list_add(&obj->st_link, objects); return obj; put: i915_gem_object_put(obj); return ERR_PTR(err); } static void igt_object_release(struct drm_i915_gem_object *obj) { i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); i915_gem_object_unlock(obj); list_del(&obj->st_link); i915_gem_object_put(obj); } static bool is_contiguous(struct drm_i915_gem_object *obj) { struct scatterlist *sg; dma_addr_t addr = -1; for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { if (addr != -1 && sg_dma_address(sg) != addr) return false; addr = sg_dma_address(sg) + sg_dma_len(sg); } return true; } static int igt_mock_reserve(void *arg) { struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; resource_size_t avail = resource_size(&mem->region); struct drm_i915_gem_object *obj; const u32 chunk_size = SZ_32M; u32 i, offset, count, *order; u64 allocated, cur_avail; I915_RND_STATE(prng); LIST_HEAD(objects); int err = 0; count = avail / chunk_size; order = i915_random_order(count, &prng); if (!order) return 0; mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); if (IS_ERR(mem)) { pr_err("failed to create memory region\n"); err = PTR_ERR(mem); goto out_free_order; } /* Reserve a bunch of ranges within the region */ for (i = 0; i < count; ++i) { u64 start = order[i] * chunk_size; u64 size = i915_prandom_u32_max_state(chunk_size, &prng); /* Allow for some really big holes */ if (!size) continue; size = round_up(size, PAGE_SIZE); offset = igt_random_offset(&prng, 0, chunk_size, size, PAGE_SIZE); err = intel_memory_region_reserve(mem, start + offset, size); if (err) { pr_err("%s failed to reserve range", __func__); goto out_close; } /* XXX: maybe sanity check the block range here? */ avail -= size; } /* Try to see if we can allocate from the remaining space */ allocated = 0; cur_avail = avail; do { u32 size = i915_prandom_u32_max_state(cur_avail, &prng); size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { if (PTR_ERR(obj) == -ENXIO) break; err = PTR_ERR(obj); goto out_close; } cur_avail -= size; allocated += size; } while (1); if (allocated != avail) { pr_err("%s mismatch between allocation and free space", __func__); err = -EINVAL; } out_close: close_objects(mem, &objects); intel_memory_region_destroy(mem); out_free_order: kfree(order); return err; } static int igt_mock_contiguous(void *arg) { struct intel_memory_region *mem = arg; struct drm_i915_gem_object *obj; unsigned long n_objects; DRM_LIST_HEAD(objects); DRM_LIST_HEAD(holes); I915_RND_STATE(prng); resource_size_t total; resource_size_t min; u64 target; int err = 0; total = resource_size(&mem->region); /* Min size */ obj = igt_object_create(mem, &objects, PAGE_SIZE, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); if (!is_contiguous(obj)) { pr_err("%s min object spans disjoint sg entries\n", __func__); err = -EINVAL; goto err_close_objects; } igt_object_release(obj); /* Max size */ obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); if (!is_contiguous(obj)) { pr_err("%s max object spans disjoint sg entries\n", __func__); err = -EINVAL; goto err_close_objects; } igt_object_release(obj); /* Internal fragmentation should not bleed into the object size */ target = i915_prandom_u64_state(&prng); div64_u64_rem(target, total, &target); target = round_up(target, PAGE_SIZE); target = max_t(u64, PAGE_SIZE, target); obj = igt_object_create(mem, &objects, target, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); if (obj->base.size != target) { pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__, obj->base.size, target); err = -EINVAL; goto err_close_objects; } if (!is_contiguous(obj)) { pr_err("%s object spans disjoint sg entries\n", __func__); err = -EINVAL; goto err_close_objects; } igt_object_release(obj); /* * Try to fragment the address space, such that half of it is free, but * the max contiguous block size is SZ_64K. */ target = SZ_64K; n_objects = div64_u64(total, target); while (n_objects--) { struct list_head *list; if (n_objects % 2) list = &holes; else list = &objects; obj = igt_object_create(mem, list, target, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_close_objects; } } close_objects(mem, &holes); min = target; target = total >> 1; /* Make sure we can still allocate all the fragmented space */ obj = igt_object_create(mem, &objects, target, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_close_objects; } igt_object_release(obj); /* * Even though we have enough free space, we don't have a big enough * contiguous block. Make sure that holds true. */ do { bool should_fail = target > min; obj = igt_object_create(mem, &objects, target, I915_BO_ALLOC_CONTIGUOUS); if (should_fail != IS_ERR(obj)) { pr_err("%s target allocation(%llx) mismatch\n", __func__, target); err = -EINVAL; goto err_close_objects; } target >>= 1; } while (target >= PAGE_SIZE); err_close_objects: list_splice_tail(&holes, &objects); close_objects(mem, &objects); return err; } static int igt_mock_splintered_region(void *arg) { struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; struct drm_i915_gem_object *obj; struct drm_buddy *mm; unsigned int expected_order; LIST_HEAD(objects); u64 size; int err = 0; /* * Sanity check we can still allocate everything even if the * mm.max_order != mm.size. i.e our starting address space size is not a * power-of-two. */ size = (SZ_4G - 1) & LINUX_PAGE_MASK; mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_close; } res = to_ttm_buddy_resource(obj->mm.res); mm = res->mm; if (mm->size != size) { pr_err("%s size mismatch(%llu != %llu)\n", __func__, mm->size, size); err = -EINVAL; goto out_put; } expected_order = get_order(rounddown_pow_of_two(size)); if (mm->max_order != expected_order) { pr_err("%s order mismatch(%u != %u)\n", __func__, mm->max_order, expected_order); err = -EINVAL; goto out_put; } close_objects(mem, &objects); /* * While we should be able allocate everything without any flag * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are * actually limited to the largest power-of-two for the region size i.e * max_order, due to the inner workings of the buddy allocator. So make * sure that does indeed hold true. */ obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); if (!IS_ERR(obj)) { pr_err("%s too large contiguous allocation was not rejected\n", __func__); err = -EINVAL; goto out_close; } obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) { pr_err("%s largest possible contiguous allocation failed\n", __func__); err = PTR_ERR(obj); goto out_close; } out_close: close_objects(mem, &objects); out_put: intel_memory_region_destroy(mem); return err; } #ifndef SZ_8G #define SZ_8G BIT_ULL(33) #endif static int igt_mock_max_segment(void *arg) { struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; struct drm_i915_gem_object *obj; struct drm_buddy_block *block; struct drm_buddy *mm; struct list_head *blocks; struct scatterlist *sg; I915_RND_STATE(prng); LIST_HEAD(objects); unsigned int max_segment; unsigned int ps; u64 size; int err = 0; /* * While we may create very large contiguous blocks, we may need * to break those down for consumption elsewhere. In particular, * dma-mapping with scatterlist elements have an implicit limit of * UINT_MAX on each element. */ size = SZ_8G; ps = PAGE_SIZE; if (i915_prandom_u64_state(&prng) & 1) ps = SZ_64K; /* For something like DG2 */ max_segment = round_down(UINT_MAX, ps); mem = mock_region_create(i915, 0, size, ps, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_put; } res = to_ttm_buddy_resource(obj->mm.res); blocks = &res->blocks; mm = res->mm; size = 0; list_for_each_entry(block, blocks, link) { if (drm_buddy_block_size(mm, block) > size) size = drm_buddy_block_size(mm, block); } if (size < max_segment) { pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", __func__, max_segment, size); err = -EINVAL; goto out_close; } for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { dma_addr_t daddr = sg_dma_address(sg); if (sg->length > max_segment) { pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", __func__, sg->length, max_segment); err = -EINVAL; goto out_close; } if (!IS_ALIGNED(daddr, ps)) { pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", __func__, &daddr, ps); err = -EINVAL; goto out_close; } } out_close: close_objects(mem, &objects); out_put: intel_memory_region_destroy(mem); return err; } static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj) { struct intel_memory_region *mr = obj->mm.region; struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(obj->mm.res); struct drm_buddy *mm = bman_res->mm; struct drm_buddy_block *block; u64 total; total = 0; list_for_each_entry(block, &bman_res->blocks, link) { u64 start = drm_buddy_block_offset(block); u64 end = start + drm_buddy_block_size(mm, block); if (start < resource_size(&mr->io)) total += min_t(u64, end, resource_size(&mr->io)) - start; } return total; } static int igt_mock_io_size(void *arg) { struct intel_memory_region *mr = arg; struct drm_i915_private *i915 = mr->i915; struct drm_i915_gem_object *obj; u64 mappable_theft_total; u64 io_size; u64 total; u64 ps; u64 rem; u64 size; I915_RND_STATE(prng); LIST_HEAD(objects); int err = 0; ps = SZ_4K; if (i915_prandom_u64_state(&prng) & 1) ps = SZ_64K; /* For something like DG2 */ div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total); total = round_down(total, ps); total = max_t(u64, total, SZ_1G); div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size); io_size = round_down(io_size, ps); io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */ pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n", __func__, ps, io_size, total); mr = mock_region_create(i915, 0, total, ps, 0, io_size); if (IS_ERR(mr)) { err = PTR_ERR(mr); goto out_err; } mappable_theft_total = 0; rem = total - io_size; do { div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size); size = round_down(size, ps); size = max(size, ps); obj = igt_object_create(mr, &objects, size, I915_BO_ALLOC_GPU_ONLY); if (IS_ERR(obj)) { pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n", __func__, rem, size); err = PTR_ERR(obj); goto out_close; } mappable_theft_total += igt_object_mappable_total(obj); rem -= size; } while (rem); pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n", __func__, (u64)mappable_theft_total >> 20, (u64)io_size >> 20, (u64)total >> 20); /* * Even if we allocate all of the non-mappable portion, we should still * be able to dip into the mappable portion. */ obj = igt_object_create(mr, &objects, io_size, I915_BO_ALLOC_GPU_ONLY); if (IS_ERR(obj)) { pr_err("%s allocation unexpectedly failed\n", __func__); err = PTR_ERR(obj); goto out_close; } close_objects(mr, &objects); rem = io_size; do { div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size); size = round_down(size, ps); size = max(size, ps); obj = igt_object_create(mr, &objects, size, 0); if (IS_ERR(obj)) { pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n", __func__, rem, size); err = PTR_ERR(obj); goto out_close; } if (igt_object_mappable_total(obj) != size) { pr_err("%s allocation is not mappable(size=%llx)\n", __func__, size); err = -EINVAL; goto out_close; } rem -= size; } while (rem); /* * We assume CPU access is required by default, which should result in a * failure here, even though the non-mappable portion is free. */ obj = igt_object_create(mr, &objects, ps, 0); if (!IS_ERR(obj)) { pr_err("%s allocation unexpectedly succeeded\n", __func__); err = -EINVAL; goto out_close; } out_close: close_objects(mr, &objects); intel_memory_region_destroy(mr); out_err: if (err == -ENOMEM) err = 0; return err; } static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, u32 value) { return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32), vma->size >> PAGE_SHIFT, value); } static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) { unsigned long n = obj->base.size >> PAGE_SHIFT; u32 *ptr; int err; err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); if (err) return err; ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); if (IS_ERR(ptr)) return PTR_ERR(ptr); ptr += dword; while (n--) { if (*ptr != val) { pr_err("base[%u]=%08x, val=%08x\n", dword, *ptr, val); err = -EINVAL; break; } ptr += PAGE_SIZE / sizeof(*ptr); } i915_gem_object_unpin_map(obj); return err; } static int igt_gpu_write(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { struct i915_gem_engines *engines; struct i915_gem_engines_iter it; struct i915_address_space *vm; struct intel_context *ce; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); unsigned int count; struct i915_vma *vma; int *order; int i, n; int err = 0; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); n = 0; count = 0; for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { count++; if (!intel_engine_can_store_dword(ce->engine)) continue; vm = ce->vm; n++; } i915_gem_context_unlock_engines(ctx); if (!n) return 0; order = i915_random_order(count * count, &prng); if (!order) return -ENOMEM; vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out_free; } err = i915_vma_pin(vma, 0, 0, PIN_USER); if (err) goto out_free; i = 0; engines = i915_gem_context_lock_engines(ctx); do { u32 rng = prandom_u32_state(&prng); u32 dword = offset_in_page(rng) / 4; ce = engines->engines[order[i] % engines->num_engines]; i = (i + 1) % (count * count); if (!ce || !intel_engine_can_store_dword(ce->engine)) continue; err = igt_gpu_write_dw(ce, vma, dword, rng); if (err) break; i915_gem_object_lock(obj, NULL); err = igt_cpu_check(obj, dword, rng); i915_gem_object_unlock(obj); if (err) break; } while (!__igt_timeout(end_time, NULL)); i915_gem_context_unlock_engines(ctx); out_free: kfree(order); if (err == -ENOMEM) err = 0; return err; } static int igt_lmem_create(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; int err = 0; obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0); if (IS_ERR(obj)) return PTR_ERR(obj); err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_put; i915_gem_object_unpin_pages(obj); out_put: i915_gem_object_put(obj); return err; } static int igt_lmem_create_with_ps(void *arg) { struct drm_i915_private *i915 = arg; int err = 0; u32 ps; for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) { struct drm_i915_gem_object *obj; dma_addr_t daddr; obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); if (err == -ENXIO || err == -E2BIG) { pr_info("%s not enough lmem for ps(%u) err=%d\n", __func__, ps, err); err = 0; } break; } if (obj->base.size != ps) { pr_err("%s size(%zu) != ps(%u)\n", __func__, obj->base.size, ps); err = -EINVAL; goto out_put; } i915_gem_object_lock(obj, NULL); err = i915_gem_object_pin_pages(obj); if (err) { if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) { pr_info("%s not enough lmem for ps(%u) err=%d\n", __func__, ps, err); err = 0; } goto out_put; } daddr = i915_gem_object_get_dma_address(obj, 0); if (!IS_ALIGNED(daddr, ps)) { pr_err("%s daddr(%pa) not aligned with ps(%u)\n", __func__, &daddr, ps); err = -EINVAL; goto out_unpin; } out_unpin: i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); out_put: i915_gem_object_unlock(obj); i915_gem_object_put(obj); if (err) break; } return err; } static int igt_lmem_create_cleared_cpu(void *arg) { struct drm_i915_private *i915 = arg; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); u32 size, i; int err; i915_gem_drain_freed_objects(i915); size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng)); size = round_up(size, PAGE_SIZE); i = 0; do { struct drm_i915_gem_object *obj; unsigned int flags; u32 dword, val; void *vaddr; /* * Alternate between cleared and uncleared allocations, while * also dirtying the pages each time to check that the pages are * always cleared if requested, since we should get some overlap * of the underlying pages, if not all, since we are the only * user. */ flags = I915_BO_ALLOC_CPU_CLEAR; if (i & 1) flags = 0; obj = i915_gem_object_create_lmem(i915, size, flags); if (IS_ERR(obj)) return PTR_ERR(obj); i915_gem_object_lock(obj, NULL); err = i915_gem_object_pin_pages(obj); if (err) goto out_put; dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32), &prng); if (flags & I915_BO_ALLOC_CPU_CLEAR) { err = igt_cpu_check(obj, dword, 0); if (err) { pr_err("%s failed with size=%u, flags=%u\n", __func__, size, flags); goto out_unpin; } } vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto out_unpin; } val = prandom_u32_state(&prng); memset32(vaddr, val, obj->base.size / sizeof(u32)); i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); out_unpin: i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); out_put: i915_gem_object_unlock(obj); i915_gem_object_put(obj); if (err) break; ++i; } while (!__igt_timeout(end_time, NULL)); pr_info("%s completed (%u) iterations\n", __func__, i); return err; } static int igt_lmem_write_gpu(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; struct file *file; I915_RND_STATE(prng); u32 sz; int err; file = mock_file(i915); if (IS_ERR(file)) return PTR_ERR(file); ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_file; } sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); obj = i915_gem_object_create_lmem(i915, sz, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_file; } err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_put; err = igt_gpu_write(ctx, obj); if (err) pr_err("igt_gpu_write failed(%d)\n", err); i915_gem_object_unpin_pages(obj); out_put: i915_gem_object_put(obj); out_file: fput(file); return err; } static struct intel_engine_cs * random_engine_class(struct drm_i915_private *i915, unsigned int class, struct rnd_state *prng) { struct intel_engine_cs *engine; unsigned int count; count = 0; for (engine = intel_engine_lookup_user(i915, class, 0); engine && engine->uabi_class == class; engine = rb_entry_safe(rb_next(&engine->uabi_node), typeof(*engine), uabi_node)) count++; count = i915_prandom_u32_max_state(count, prng); return intel_engine_lookup_user(i915, class, count); } static int igt_lmem_write_cpu(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); u32 bytes[] = { 0, /* rng placeholder */ sizeof(u32), sizeof(u64), 64, /* cl */ PAGE_SIZE, PAGE_SIZE - sizeof(u32), PAGE_SIZE - sizeof(u64), PAGE_SIZE - 64, }; struct intel_engine_cs *engine; struct i915_request *rq; u32 *vaddr; u32 sz; u32 i; int *order; int count; int err; engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng); if (!engine) return 0; pr_info("%s: using %s\n", __func__, engine->name); sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); sz = max_t(u32, 2 * PAGE_SIZE, sz); obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto out_put; } i915_gem_object_lock(obj, NULL); err = dma_resv_reserve_fences(obj->base.resv, 1); if (err) { i915_gem_object_unlock(obj); goto out_put; } /* Put the pages into a known state -- from the gpu for added fun */ intel_engine_pm_get(engine); err = intel_context_migrate_clear(engine->gt->migrate.context, NULL, obj->mm.pages->sgl, i915_gem_get_pat_index(i915, I915_CACHE_NONE), true, 0xdeadbeaf, &rq); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence, DMA_RESV_USAGE_WRITE); i915_request_put(rq); } intel_engine_pm_put(engine); if (!err) err = i915_gem_object_set_to_wc_domain(obj, true); i915_gem_object_unlock(obj); if (err) goto out_unpin; count = ARRAY_SIZE(bytes); order = i915_random_order(count * count, &prng); if (!order) { err = -ENOMEM; goto out_unpin; } /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */ bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32)); GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32))); i = 0; do { u32 offset; u32 align; u32 dword; u32 size; u32 val; size = bytes[order[i] % count]; i = (i + 1) % (count * count); align = bytes[order[i] % count]; i = (i + 1) % (count * count); align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align)); offset = igt_random_offset(&prng, 0, obj->base.size, size, align); val = prandom_u32_state(&prng); memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf, size / sizeof(u32)); /* * Sample random dw -- don't waste precious time reading every * single dw. */ dword = igt_random_offset(&prng, offset, offset + size, sizeof(u32), sizeof(u32)); dword /= sizeof(u32); if (vaddr[dword] != (val ^ 0xdeadbeaf)) { pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n", __func__, dword, vaddr[dword], val ^ 0xdeadbeaf, size, align, offset); err = -EINVAL; break; } } while (!__igt_timeout(end_time, NULL)); out_unpin: i915_gem_object_unpin_map(obj); out_put: i915_gem_object_put(obj); return err; } static const char *repr_type(u32 type) { switch (type) { case I915_MAP_WB: return "WB"; case I915_MAP_WC: return "WC"; } return ""; } static struct drm_i915_gem_object * create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, void **out_addr) { struct drm_i915_gem_object *obj; void *addr; obj = i915_gem_object_create_region(mr, size, 0, 0); if (IS_ERR(obj)) { if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */ return ERR_PTR(-ENODEV); return obj; } addr = i915_gem_object_pin_map_unlocked(obj, type); if (IS_ERR(addr)) { i915_gem_object_put(obj); if (PTR_ERR(addr) == -ENXIO) return ERR_PTR(-ENODEV); return addr; } *out_addr = addr; return obj; } static int wrap_ktime_compare(const void *A, const void *B) { const ktime_t *a = A, *b = B; return ktime_compare(*a, *b); } static void igt_memcpy_long(void *dst, const void *src, size_t size) { unsigned long *tmp = dst; const unsigned long *s = src; size = size / sizeof(unsigned long); while (size--) *tmp++ = *s++; } static inline void igt_memcpy(void *dst, const void *src, size_t size) { memcpy(dst, src, size); } static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size) { i915_memcpy_from_wc(dst, src, size); } static int _perf_memcpy(struct intel_memory_region *src_mr, struct intel_memory_region *dst_mr, u64 size, u32 src_type, u32 dst_type) { struct drm_i915_private *i915 = src_mr->i915; const struct { const char *name; void (*copy)(void *dst, const void *src, size_t size); bool skip; } tests[] = { { "memcpy", igt_memcpy, }, { "memcpy_long", igt_memcpy_long, }, { "memcpy_from_wc", igt_memcpy_from_wc, !i915_has_memcpy_from_wc(), }, }; struct drm_i915_gem_object *src, *dst; void *src_addr, *dst_addr; int ret = 0; int i; src = create_region_for_mapping(src_mr, size, src_type, &src_addr); if (IS_ERR(src)) { ret = PTR_ERR(src); goto out; } dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr); if (IS_ERR(dst)) { ret = PTR_ERR(dst); goto out_unpin_src; } for (i = 0; i < ARRAY_SIZE(tests); ++i) { ktime_t t[5]; int pass; if (tests[i].skip) continue; for (pass = 0; pass < ARRAY_SIZE(t); pass++) { ktime_t t0, t1; t0 = ktime_get(); tests[i].copy(dst_addr, src_addr, size); t1 = ktime_get(); t[pass] = ktime_sub(t1, t0); } sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); if (t[0] <= 0) { /* ignore the impossible to protect our sanity */ pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n", __func__, src_mr->name, repr_type(src_type), dst_mr->name, repr_type(dst_type), tests[i].name, size >> 10, t[0], t[4]); continue; } pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n", __func__, src_mr->name, repr_type(src_type), dst_mr->name, repr_type(dst_type), tests[i].name, size >> 10, div64_u64(mul_u32_u32(4 * size, 1000 * 1000 * 1000), t[1] + 2 * t[2] + t[3]) >> 20); cond_resched(); } i915_gem_object_unpin_map(dst); i915_gem_object_put(dst); out_unpin_src: i915_gem_object_unpin_map(src); i915_gem_object_put(src); i915_gem_drain_freed_objects(i915); out: if (ret == -ENODEV) ret = 0; return ret; } static int perf_memcpy(void *arg) { struct drm_i915_private *i915 = arg; static const u32 types[] = { I915_MAP_WB, I915_MAP_WC, }; static const u32 sizes[] = { SZ_4K, SZ_64K, SZ_4M, }; struct intel_memory_region *src_mr, *dst_mr; int src_id, dst_id; int i, j, k; int ret; for_each_memory_region(src_mr, i915, src_id) { for_each_memory_region(dst_mr, i915, dst_id) { for (i = 0; i < ARRAY_SIZE(sizes); ++i) { for (j = 0; j < ARRAY_SIZE(types); ++j) { for (k = 0; k < ARRAY_SIZE(types); ++k) { ret = _perf_memcpy(src_mr, dst_mr, sizes[i], types[j], types[k]); if (ret) return ret; } } } } } return 0; } int intel_memory_region_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_mock_reserve), SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), SUBTEST(igt_mock_max_segment), SUBTEST(igt_mock_io_size), }; struct intel_memory_region *mem; struct drm_i915_private *i915; int err; i915 = mock_gem_device(); if (!i915) return -ENOMEM; mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); if (IS_ERR(mem)) { pr_err("failed to create memory region\n"); err = PTR_ERR(mem); goto out_unref; } err = i915_subtests(tests, mem); intel_memory_region_destroy(mem); out_unref: mock_destroy_device(i915); return err; } int intel_memory_region_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_lmem_create), SUBTEST(igt_lmem_create_with_ps), SUBTEST(igt_lmem_create_cleared_cpu), SUBTEST(igt_lmem_write_cpu), SUBTEST(igt_lmem_write_gpu), }; if (!HAS_LMEM(i915)) { pr_info("device lacks LMEM support, skipping\n"); return 0; } if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); } int intel_memory_region_perf_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(perf_memcpy), }; if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); }