// SPDX-License-Identifier: MIT /* * Copyright © 2023 Intel Corporation */ #include "i915_drv.h" #include "i915_perf_oa_regs.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_pm.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_tlb.h" /* * HW architecture suggest typical invalidation time at 40us, * with pessimistic cases up to 100us and a recommendation to * cap at 1ms. We go a bit higher just in case. */ #define TLB_INVAL_TIMEOUT_US 100 #define TLB_INVAL_TIMEOUT_MS 4 /* * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets * but are now considered MCR registers. Since they exist within a GAM range, * the primary instance of the register rolls up the status from each unit. */ static int wait_for_invalidate(struct intel_engine_cs *engine) { if (engine->tlb_inv.mcr) return intel_gt_mcr_wait_for_reg(engine->gt, engine->tlb_inv.reg.mcr_reg, engine->tlb_inv.done, 0, TLB_INVAL_TIMEOUT_US, TLB_INVAL_TIMEOUT_MS); else return __intel_wait_for_register_fw(engine->gt->uncore, engine->tlb_inv.reg.reg, engine->tlb_inv.done, 0, TLB_INVAL_TIMEOUT_US, TLB_INVAL_TIMEOUT_MS, NULL); } static void mmio_invalidate_full(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; intel_engine_mask_t awake, tmp; enum intel_engine_id id; unsigned long flags; if (GRAPHICS_VER(i915) < 8) return; intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); intel_gt_mcr_lock(gt, &flags); spin_lock(&uncore->lock); /* serialise invalidate with GT reset */ awake = 0; for_each_engine(engine, gt, id) { if (!intel_engine_pm_is_awake(engine)) continue; if (engine->tlb_inv.mcr) intel_gt_mcr_multicast_write_fw(gt, engine->tlb_inv.reg.mcr_reg, engine->tlb_inv.request); else intel_uncore_write_fw(uncore, engine->tlb_inv.reg.reg, engine->tlb_inv.request); awake |= engine->mask; } GT_TRACE(gt, "invalidated engines %08x\n", awake); /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ if (awake && (IS_TIGERLAKE(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915))) intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); spin_unlock(&uncore->lock); intel_gt_mcr_unlock(gt, flags); for_each_engine_masked(engine, gt, awake, tmp) { if (wait_for_invalidate(engine)) gt_err_ratelimited(gt, "%s TLB invalidation did not complete in %ums!\n", engine->name, TLB_INVAL_TIMEOUT_MS); } /* * Use delayed put since a) we mostly expect a flurry of TLB * invalidations so it is good to avoid paying the forcewake cost and * b) it works around a bug in Icelake which cannot cope with too rapid * transitions. */ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); } static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) { u32 cur = intel_gt_tlb_seqno(gt); /* Only skip if a *full* TLB invalidate barrier has passed */ return (s32)(cur - ALIGN(seqno, 2)) > 0; } void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno) { intel_wakeref_t wakeref; if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) return; if (intel_gt_is_wedged(gt)) return; if (tlb_seqno_passed(gt, seqno)) return; with_intel_gt_pm_if_awake(gt, wakeref) { mutex_lock(>->tlb.invalidate_lock); if (tlb_seqno_passed(gt, seqno)) goto unlock; mmio_invalidate_full(gt); #ifdef notyet write_seqcount_invalidate(>->tlb.seqno); #else barrier(); gt->tlb.seqno.seq.sequence += 2; #endif unlock: mutex_unlock(>->tlb.invalidate_lock); } } void intel_gt_init_tlb(struct intel_gt *gt) { rw_init(>->tlb.invalidate_lock, "gttlb"); seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); } void intel_gt_fini_tlb(struct intel_gt *gt) { mutex_destroy(>->tlb.invalidate_lock); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_tlb.c" #endif