// SPDX-License-Identifier: MIT /* * Copyright © 2021 Intel Corporation */ #include #include #include "i915_sw_fence.h" #include "i915_vma_resource.h" #include "i915_drv.h" #include "intel_memory_region.h" #include "gt/intel_gtt.h" static struct pool slab_vma_resources; /** * DOC: * We use a per-vm interval tree to keep track of vma_resources * scheduled for unbind but not yet unbound. The tree is protected by * the vm mutex, and nodes are removed just after the unbind fence signals. * The removal takes the vm mutex from a kernel thread which we need to * keep in mind so that we don't grab the mutex and try to wait for all * pending unbinds to complete, because that will temporaryily block many * of the workqueue threads, and people will get angry. * * We should consider using a single ordered fence per VM instead but that * requires ordering the unbinds and might introduce unnecessary waiting * for unrelated unbinds. Amount of code will probably be roughly the same * due to the simplicity of using the interval tree interface. * * Another drawback of this interval tree is that the complexity of insertion * and removal of fences increases as O(ln(pending_unbinds)) instead of * O(1) for a single fence without interval tree. */ #define VMA_RES_START(_node) ((_node)->start - (_node)->guard) #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1) #ifdef __linux__ INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, u64, __subtree_last, VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); #else static struct i915_vma_resource * vma_res_itree_iter_first(struct rb_root_cached *root, uint64_t start, uint64_t last) { struct i915_vma_resource *node; struct rb_node *rb; for (rb = rb_first_cached(root); rb; rb = rb_next(rb)) { node = rb_entry(rb, typeof(*node), rb); if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last) return node; } return NULL; } static struct i915_vma_resource * vma_res_itree_iter_next(struct i915_vma_resource *node, uint64_t start, uint64_t last) { struct rb_node *rb = &node->rb; for (rb = rb_next(rb); rb; rb = rb_next(rb)) { node = rb_entry(rb, typeof(*node), rb); if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last) return node; } return NULL; } static void vma_res_itree_remove(struct i915_vma_resource *node, struct rb_root_cached *root) { rb_erase_cached(&node->rb, root); } static void vma_res_itree_insert(struct i915_vma_resource *node, struct rb_root_cached *root) { struct rb_node **iter = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct i915_vma_resource *iter_node; while (*iter) { parent = *iter; iter_node = rb_entry(*iter, struct i915_vma_resource, rb); if (node->start < iter_node->start) iter = &(*iter)->rb_left; else iter = &(*iter)->rb_right; } rb_link_node(&node->rb, parent, iter); rb_insert_color_cached(&node->rb, root, false); } #endif /* Callbacks for the unbind dma-fence. */ /** * i915_vma_resource_alloc - Allocate a vma resource * * Return: A pointer to a cleared struct i915_vma_resource or * a -ENOMEM error pointer if allocation fails. */ struct i915_vma_resource *i915_vma_resource_alloc(void) { #ifdef __linux__ struct i915_vma_resource *vma_res = kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL); #else struct i915_vma_resource *vma_res = pool_get(&slab_vma_resources, PR_WAITOK | PR_ZERO); #endif return vma_res ? vma_res : ERR_PTR(-ENOMEM); } /** * i915_vma_resource_free - Free a vma resource * @vma_res: The vma resource to free. */ void i915_vma_resource_free(struct i915_vma_resource *vma_res) { #ifdef __linux__ if (vma_res) kmem_cache_free(slab_vma_resources, vma_res); #else if (vma_res) pool_put(&slab_vma_resources, vma_res); #endif } static const char *get_driver_name(struct dma_fence *fence) { return "vma unbind fence"; } static const char *get_timeline_name(struct dma_fence *fence) { return "unbound"; } static void unbind_fence_free_rcu(struct rcu_head *head) { struct i915_vma_resource *vma_res = container_of(head, typeof(*vma_res), unbind_fence.rcu); i915_vma_resource_free(vma_res); } static void unbind_fence_release(struct dma_fence *fence) { struct i915_vma_resource *vma_res = container_of(fence, typeof(*vma_res), unbind_fence); i915_sw_fence_fini(&vma_res->chain); call_rcu(&fence->rcu, unbind_fence_free_rcu); } static struct dma_fence_ops unbind_fence_ops = { .get_driver_name = get_driver_name, .get_timeline_name = get_timeline_name, .release = unbind_fence_release, }; static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res) { struct i915_address_space *vm; if (!refcount_dec_and_test(&vma_res->hold_count)) return; dma_fence_signal(&vma_res->unbind_fence); vm = vma_res->vm; if (vma_res->wakeref) intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref); vma_res->vm = NULL; if (!RB_EMPTY_NODE(&vma_res->rb)) { mutex_lock(&vm->mutex); vma_res_itree_remove(vma_res, &vm->pending_unbind); mutex_unlock(&vm->mutex); } if (vma_res->bi.pages_rsgt) i915_refct_sgt_put(vma_res->bi.pages_rsgt); } /** * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind * fence. * @vma_res: The vma resource. * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold. * * The function may leave a dma_fence critical section. */ void i915_vma_resource_unhold(struct i915_vma_resource *vma_res, bool lockdep_cookie) { dma_fence_end_signalling(lockdep_cookie); if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { unsigned long irq_flags; /* Inefficient open-coded might_lock_irqsave() */ spin_lock_irqsave(&vma_res->lock, irq_flags); spin_unlock_irqrestore(&vma_res->lock, irq_flags); } __i915_vma_resource_unhold(vma_res); } /** * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence. * @vma_res: The vma resource. * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should * be given as an argument to the pairing i915_vma_resource_unhold. * * If returning true, the function enters a dma_fence signalling critical * section if not in one already. * * Return: true if holding successful, false if not. */ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, bool *lockdep_cookie) { bool held = refcount_inc_not_zero(&vma_res->hold_count); if (held) *lockdep_cookie = dma_fence_begin_signalling(); return held; } static void i915_vma_resource_unbind_work(struct work_struct *work) { struct i915_vma_resource *vma_res = container_of(work, typeof(*vma_res), work); struct i915_address_space *vm = vma_res->vm; bool lockdep_cookie; lockdep_cookie = dma_fence_begin_signalling(); if (likely(!vma_res->skip_pte_rewrite)) vma_res->ops->unbind_vma(vm, vma_res); dma_fence_end_signalling(lockdep_cookie); __i915_vma_resource_unhold(vma_res); i915_vma_resource_put(vma_res); } static int i915_vma_resource_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_vma_resource *vma_res = container_of(fence, typeof(*vma_res), chain); struct dma_fence *unbind_fence = &vma_res->unbind_fence; switch (state) { case FENCE_COMPLETE: dma_fence_get(unbind_fence); if (vma_res->immediate_unbind) { i915_vma_resource_unbind_work(&vma_res->work); } else { INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work); queue_work(system_unbound_wq, &vma_res->work); } break; case FENCE_FREE: i915_vma_resource_put(vma_res); break; } return NOTIFY_DONE; } /** * i915_vma_resource_unbind - Unbind a vma resource * @vma_res: The vma resource to unbind. * @tlb: pointer to vma->obj->mm.tlb associated with the resource * to be stored at vma_res->tlb. When not-NULL, it will be used * to do TLB cache invalidation before freeing a VMA resource. * Used only for async unbind. * * At this point this function does little more than publish a fence that * signals immediately unless signaling is held back. * * Return: A refcounted pointer to a dma-fence that signals when unbinding is * complete. */ struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, u32 *tlb) { struct i915_address_space *vm = vma_res->vm; vma_res->tlb = tlb; /* Reference for the sw fence */ i915_vma_resource_get(vma_res); /* Caller must already have a wakeref in this case. */ if (vma_res->needs_wakeref) vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm); if (atomic_read(&vma_res->chain.pending) <= 1) { RB_CLEAR_NODE(&vma_res->rb); vma_res->immediate_unbind = 1; } else { vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind); } i915_sw_fence_commit(&vma_res->chain); return &vma_res->unbind_fence; } /** * __i915_vma_resource_init - Initialize a vma resource. * @vma_res: The vma resource to initialize * * Initializes the private members of a vma resource. */ void __i915_vma_resource_init(struct i915_vma_resource *vma_res) { mtx_init(&vma_res->lock, IPL_TTY); dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops, &vma_res->lock, 0, 0); refcount_set(&vma_res->hold_count, 1); i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify); } static void i915_vma_resource_color_adjust_range(struct i915_address_space *vm, u64 *start, u64 *end) { if (i915_vm_has_cache_coloring(vm)) { if (*start) *start -= I915_GTT_PAGE_SIZE; *end += I915_GTT_PAGE_SIZE; } } /** * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a * certain vm range. * @vm: The vm to look at. * @offset: The range start. * @size: The range size. * @intr: Whether to wait interrubtible. * * The function needs to be called with the vm lock held. * * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true */ int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, u64 offset, u64 size, bool intr) { struct i915_vma_resource *node; u64 last = offset + size - 1; lockdep_assert_held(&vm->mutex); might_sleep(); i915_vma_resource_color_adjust_range(vm, &offset, &last); node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); while (node) { int ret = dma_fence_wait(&node->unbind_fence, intr); if (ret) return ret; node = vma_res_itree_iter_next(node, offset, last); } return 0; } /** * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm, * releasing the vm lock while waiting. * @vm: The vm to look at. * * The function may not be called with the vm lock held. * Typically this is called at vm destruction to finish any pending * unbind operations. The vm mutex is released while waiting to avoid * stalling kernel workqueues trying to grab the mutex. */ void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm) { struct i915_vma_resource *node; struct dma_fence *fence; do { fence = NULL; mutex_lock(&vm->mutex); node = vma_res_itree_iter_first(&vm->pending_unbind, 0, U64_MAX); if (node) fence = dma_fence_get_rcu(&node->unbind_fence); mutex_unlock(&vm->mutex); if (fence) { /* * The wait makes sure the node eventually removes * itself from the tree. */ dma_fence_wait(fence, false); dma_fence_put(fence); } } while (node); } /** * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all * pending unbinds in a certain range of a vm. * @vm: The vm to look at. * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds. * @offset: The range start. * @size: The range size. * @intr: Whether to wait interrubtible. * @gfp: Allocation mode for memory allocations. * * The function makes @sw_fence await all pending unbinds in a certain * vm range before calling the complete notifier. To be able to await * each individual unbind, the function needs to allocate memory using * the @gpf allocation mode. If that fails, the function will instead * wait for the unbind fence to signal, using @intr to judge whether to * wait interruptible or not. Note that @gfp should ideally be selected so * as to avoid any expensive memory allocation stalls and rather fail and * synchronize itself. For now the vm mutex is required when calling this * function with means that @gfp can't call into direct reclaim. In reality * this means that during heavy memory pressure, we will sync in this * function. * * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true */ int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, struct i915_sw_fence *sw_fence, u64 offset, u64 size, bool intr, gfp_t gfp) { struct i915_vma_resource *node; u64 last = offset + size - 1; lockdep_assert_held(&vm->mutex); might_alloc(gfp); might_sleep(); i915_vma_resource_color_adjust_range(vm, &offset, &last); node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); while (node) { int ret; ret = i915_sw_fence_await_dma_fence(sw_fence, &node->unbind_fence, 0, gfp); if (ret < 0) { ret = dma_fence_wait(&node->unbind_fence, intr); if (ret) return ret; } node = vma_res_itree_iter_next(node, offset, last); } return 0; } void i915_vma_resource_module_exit(void) { #ifdef __linux__ kmem_cache_destroy(slab_vma_resources); #else pool_destroy(&slab_vma_resources); #endif } int __init i915_vma_resource_module_init(void) { #ifdef __linux__ slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN); if (!slab_vma_resources) return -ENOMEM; #else pool_init(&slab_vma_resources, sizeof(struct i915_vma_resource), 0, IPL_NONE, 0, "svmar", NULL); #endif return 0; }