/* $OpenBSD: pmap.c,v 1.185 2024/09/06 10:54:08 jsg Exp $ */ /* * Copyright (c) 2015 Martin Pieuchot * Copyright (c) 2001, 2002, 2007 Dale Rahn. * All rights reserved. * * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. */ /* * powerpc lazy icache management. * The icache does not snoop dcache accesses. The icache also will not load * modified data from the dcache, but the unmodified data in ram. * Before the icache is loaded, the dcache must be synced to ram to prevent * the icache from loading stale data. * pg->pg_flags PG_PMAP_EXE bit is used to track if the dcache is clean * and the icache may have valid data in it. * if the PG_PMAP_EXE bit is set (and the page is not currently RWX) * the icache will only have valid code in it. If the bit is clear * memory may not match the dcache contents or the icache may contain * data from a previous page. * * pmap enter * !E NONE -> R no action * !E NONE|R -> RW no action * !E NONE|R -> RX flush dcache, inval icache (that page only), set E * !E NONE|R -> RWX flush dcache, inval icache (that page only), set E * !E NONE|RW -> RWX flush dcache, inval icache (that page only), set E * E NONE -> R no action * E NONE|R -> RW clear PG_PMAP_EXE bit * E NONE|R -> RX no action * E NONE|R -> RWX no action * E NONE|RW -> RWX -invalid source state * * pamp_protect * E RW -> R - invalid source state * !E RW -> R - no action * * RX -> R - no action * * RWX -> R - sync dcache, inval icache * * RWX -> RW - clear PG_PMAP_EXE * * RWX -> RX - sync dcache, inval icache * * * -> NONE - no action * * pmap_page_protect (called with arg PROT_NONE if page is to be reused) * * RW -> R - as pmap_protect * * RX -> R - as pmap_protect * * RWX -> R - as pmap_protect * * RWX -> RW - as pmap_protect * * RWX -> RX - as pmap_protect * * * -> NONE - clear PG_PMAP_EXE * */ #include #include #include #include #include #include #include #include #include #include #include #include struct bat battable[16]; struct dumpmem dumpmem[VM_PHYSSEG_MAX]; u_int ndumpmem; struct pmap kernel_pmap_; static struct mem_region *pmap_mem, *pmap_avail; struct mem_region pmap_allocated[10]; int pmap_cnt_avail; int pmap_cnt_allocated; struct pte_64 *pmap_ptable64; struct pte_32 *pmap_ptable32; int pmap_ptab_cnt; u_int pmap_ptab_mask; #define HTABSIZE_32 (pmap_ptab_cnt * 64) #define HTABMEMSZ_64 (pmap_ptab_cnt * 8 * sizeof(struct pte_64)) #define HTABSIZE_64 (ffs(pmap_ptab_cnt) - 12) static u_int usedsr[NPMAPS / sizeof(u_int) / 8]; struct pte_desc { /* Linked list of phys -> virt entries */ LIST_ENTRY(pte_desc) pted_pv_list; union { struct pte_32 pted_pte32; struct pte_64 pted_pte64; } p; pmap_t pted_pmap; vaddr_t pted_va; }; void pmap_attr_save(paddr_t pa, u_int32_t bits); void pmap_pted_ro(struct pte_desc *, vm_prot_t); void pmap_pted_ro64(struct pte_desc *, vm_prot_t); void pmap_pted_ro32(struct pte_desc *, vm_prot_t); /* * Some functions are called in real mode and cannot be profiled. */ #define __noprof __attribute__((__no_instrument_function__)) /* VP routines */ int pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags); struct pte_desc *pmap_vp_remove(pmap_t pm, vaddr_t va); void pmap_vp_destroy(pmap_t pm); struct pte_desc *pmap_vp_lookup(pmap_t pm, vaddr_t va) __noprof; /* PV routines */ void pmap_enter_pv(struct pte_desc *pted, struct vm_page *); void pmap_remove_pv(struct pte_desc *pted); /* pte hash table routines */ static inline void *pmap_ptedinhash(struct pte_desc *); void pte_insert32(struct pte_desc *) __noprof; void pte_insert64(struct pte_desc *) __noprof; void pmap_fill_pte64(pmap_t, vaddr_t, paddr_t, struct pte_desc *, vm_prot_t, int) __noprof; void pmap_fill_pte32(pmap_t, vaddr_t, paddr_t, struct pte_desc *, vm_prot_t, int) __noprof; void pmap_syncicache_user_virt(pmap_t pm, vaddr_t va); void pmap_remove_pted(pmap_t, struct pte_desc *); /* setup/initialization functions */ void pmap_avail_setup(void); void pmap_avail_fixup(void); void pmap_remove_avail(paddr_t base, paddr_t end); void *pmap_steal_avail(size_t size, int align); /* asm interface */ int pte_spill_r(u_int32_t, u_int32_t, u_int32_t, int) __noprof; int pte_spill_v(pmap_t, u_int32_t, u_int32_t, int) __noprof; u_int32_t pmap_setusr(pmap_t pm, vaddr_t va); void pmap_popusr(u_int32_t oldsr); /* pte invalidation */ void pte_del(void *, vaddr_t); void pte_zap(void *, struct pte_desc *); /* XXX - panic on pool get failures? */ struct pool pmap_pmap_pool; struct pool pmap_vp_pool; struct pool pmap_pted_pool; int pmap_initialized = 0; int physmem; int physmaxaddr; #ifdef MULTIPROCESSOR struct __ppc_lock pmap_hash_lock = PPC_LOCK_INITIALIZER; #define PMAP_HASH_LOCK(s) \ do { \ s = ppc_intr_disable(); \ __ppc_lock(&pmap_hash_lock); \ } while (0) #define PMAP_HASH_UNLOCK(s) \ do { \ __ppc_unlock(&pmap_hash_lock); \ ppc_intr_enable(s); \ } while (0) #define PMAP_VP_LOCK_INIT(pm) mtx_init(&pm->pm_mtx, IPL_VM) #define PMAP_VP_LOCK(pm) \ do { \ if (pm != pmap_kernel()) \ mtx_enter(&pm->pm_mtx); \ } while (0) #define PMAP_VP_UNLOCK(pm) \ do { \ if (pm != pmap_kernel()) \ mtx_leave(&pm->pm_mtx); \ } while (0) #define PMAP_VP_ASSERT_LOCKED(pm) \ do { \ if (pm != pmap_kernel()) \ MUTEX_ASSERT_LOCKED(&pm->pm_mtx); \ } while (0) #else /* ! MULTIPROCESSOR */ #define PMAP_HASH_LOCK(s) (void)s #define PMAP_HASH_UNLOCK(s) /* nothing */ #define PMAP_VP_LOCK_INIT(pm) /* nothing */ #define PMAP_VP_LOCK(pm) /* nothing */ #define PMAP_VP_UNLOCK(pm) /* nothing */ #define PMAP_VP_ASSERT_LOCKED(pm) /* nothing */ #endif /* MULTIPROCESSOR */ /* virtual to physical helpers */ static inline int VP_SR(vaddr_t va) { return (va >>VP_SR_POS) & VP_SR_MASK; } static inline int VP_IDX1(vaddr_t va) { return (va >> VP_IDX1_POS) & VP_IDX1_MASK; } static inline int VP_IDX2(vaddr_t va) { return (va >> VP_IDX2_POS) & VP_IDX2_MASK; } #if VP_IDX1_SIZE != VP_IDX2_SIZE #error pmap allocation code expects IDX1 and IDX2 size to be same #endif struct pmapvp { void *vp[VP_IDX1_SIZE]; }; /* * VP routines, virtual to physical translation information. * These data structures are based off of the pmap, per process. */ /* * This is used for pmap_kernel() mappings, they are not to be removed * from the vp table because they were statically initialized at the * initial pmap initialization. This is so that memory allocation * is not necessary in the pmap_kernel() mappings. * Otherwise bad race conditions can appear. */ struct pte_desc * pmap_vp_lookup(pmap_t pm, vaddr_t va) { struct pmapvp *vp1; struct pmapvp *vp2; struct pte_desc *pted; PMAP_VP_ASSERT_LOCKED(pm); vp1 = pm->pm_vp[VP_SR(va)]; if (vp1 == NULL) { return NULL; } vp2 = vp1->vp[VP_IDX1(va)]; if (vp2 == NULL) { return NULL; } pted = vp2->vp[VP_IDX2(va)]; return pted; } /* * Remove, and return, pted at specified address, NULL if not present */ struct pte_desc * pmap_vp_remove(pmap_t pm, vaddr_t va) { struct pmapvp *vp1; struct pmapvp *vp2; struct pte_desc *pted; PMAP_VP_ASSERT_LOCKED(pm); vp1 = pm->pm_vp[VP_SR(va)]; if (vp1 == NULL) { return NULL; } vp2 = vp1->vp[VP_IDX1(va)]; if (vp2 == NULL) { return NULL; } pted = vp2->vp[VP_IDX2(va)]; vp2->vp[VP_IDX2(va)] = NULL; return pted; } /* * Create a V -> P mapping for the given pmap and virtual address * with reference to the pte descriptor that is used to map the page. * This code should track allocations of vp table allocations * so they can be freed efficiently. */ int pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags) { struct pmapvp *vp1; struct pmapvp *vp2; PMAP_VP_ASSERT_LOCKED(pm); vp1 = pm->pm_vp[VP_SR(va)]; if (vp1 == NULL) { vp1 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); if (vp1 == NULL) { if ((flags & PMAP_CANFAIL) == 0) panic("pmap_vp_enter: failed to allocate vp1"); return ENOMEM; } pm->pm_vp[VP_SR(va)] = vp1; } vp2 = vp1->vp[VP_IDX1(va)]; if (vp2 == NULL) { vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); if (vp2 == NULL) { if ((flags & PMAP_CANFAIL) == 0) panic("pmap_vp_enter: failed to allocate vp2"); return ENOMEM; } vp1->vp[VP_IDX1(va)] = vp2; } vp2->vp[VP_IDX2(va)] = pted; return 0; } static inline void tlbie(vaddr_t va) { asm volatile ("tlbie %0" :: "r"(va & ~PAGE_MASK)); } static inline void tlbsync(void) { asm volatile ("tlbsync"); } static inline void eieio(void) { asm volatile ("eieio"); } static inline void sync(void) { asm volatile ("sync"); } static inline void tlbia(void) { vaddr_t va; sync(); for (va = 0; va < 0x00040000; va += 0x00001000) tlbie(va); eieio(); tlbsync(); sync(); } static inline int ptesr(sr_t *sr, vaddr_t va) { return sr[(u_int)va >> ADDR_SR_SHIFT]; } static inline int pteidx(sr_t sr, vaddr_t va) { int hash; hash = (sr & SR_VSID) ^ (((u_int)va & ADDR_PIDX) >> ADDR_PIDX_SHIFT); return hash & pmap_ptab_mask; } #define PTED_VA_PTEGIDX_M 0x07 #define PTED_VA_HID_M 0x08 #define PTED_VA_MANAGED_M 0x10 #define PTED_VA_WIRED_M 0x20 #define PTED_VA_EXEC_M 0x40 static inline u_int32_t PTED_HID(struct pte_desc *pted) { return (pted->pted_va & PTED_VA_HID_M); } static inline u_int32_t PTED_PTEGIDX(struct pte_desc *pted) { return (pted->pted_va & PTED_VA_PTEGIDX_M); } static inline u_int32_t PTED_MANAGED(struct pte_desc *pted) { return (pted->pted_va & PTED_VA_MANAGED_M); } static inline u_int32_t PTED_VALID(struct pte_desc *pted) { if (ppc_proc_is_64b) return (pted->p.pted_pte64.pte_hi & PTE_VALID_64); else return (pted->p.pted_pte32.pte_hi & PTE_VALID_32); } /* * PV entries - * manipulate the physical to virtual translations for the entire system. * * QUESTION: should all mapped memory be stored in PV tables? Or * is it alright to only store "ram" memory. Currently device mappings * are not stored. * It makes sense to pre-allocate mappings for all of "ram" memory, since * it is likely that it will be mapped at some point, but would it also * make sense to use a tree/table like is use for pmap to store device * mappings? * Further notes: It seems that the PV table is only used for pmap_protect * and other paging related operations. Given this, it is not necessary * to store any pmap_kernel() entries in PV tables and does not make * sense to store device mappings in PV either. * * Note: unlike other powerpc pmap designs, the array is only an array * of pointers. Since the same structure is used for holding information * in the VP table, the PV table, and for kernel mappings, the wired entries. * Allocate one data structure to hold all of the info, instead of replicating * it multiple times. * * One issue of making this a single data structure is that two pointers are * wasted for every page which does not map ram (device mappings), this * should be a low percentage of mapped pages in the system, so should not * have too noticeable unnecessary ram consumption. */ void pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg) { if (__predict_false(!pmap_initialized)) { return; } mtx_enter(&pg->mdpage.pv_mtx); LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list); pted->pted_va |= PTED_VA_MANAGED_M; mtx_leave(&pg->mdpage.pv_mtx); } void pmap_remove_pv(struct pte_desc *pted) { struct vm_page *pg; if (ppc_proc_is_64b) pg = PHYS_TO_VM_PAGE(pted->p.pted_pte64.pte_lo & PTE_RPGN_64); else pg = PHYS_TO_VM_PAGE(pted->p.pted_pte32.pte_lo & PTE_RPGN_32); mtx_enter(&pg->mdpage.pv_mtx); pted->pted_va &= ~PTED_VA_MANAGED_M; LIST_REMOVE(pted, pted_pv_list); mtx_leave(&pg->mdpage.pv_mtx); } /* PTE_CHG_32 == PTE_CHG_64 */ /* PTE_REF_32 == PTE_REF_64 */ static __inline u_int pmap_pte2flags(u_int32_t pte) { return (((pte & PTE_REF_32) ? PG_PMAP_REF : 0) | ((pte & PTE_CHG_32) ? PG_PMAP_MOD : 0)); } static __inline u_int pmap_flags2pte(u_int32_t flags) { return (((flags & PG_PMAP_REF) ? PTE_REF_32 : 0) | ((flags & PG_PMAP_MOD) ? PTE_CHG_32 : 0)); } void pmap_attr_save(paddr_t pa, u_int32_t bits) { struct vm_page *pg; pg = PHYS_TO_VM_PAGE(pa); if (pg == NULL) return; atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(bits)); } int pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) { struct pte_desc *pted; struct vm_page *pg; boolean_t nocache = (pa & PMAP_NOCACHE) != 0; boolean_t wt = (pa & PMAP_WT) != 0; int need_sync = 0; int cache, error = 0; KASSERT(!(wt && nocache)); pa &= PMAP_PA_MASK; PMAP_VP_LOCK(pm); pted = pmap_vp_lookup(pm, va); if (pted && PTED_VALID(pted)) { pmap_remove_pted(pm, pted); /* we lost our pted if it was user */ if (pm != pmap_kernel()) pted = pmap_vp_lookup(pm, va); } pm->pm_stats.resident_count++; /* Do not have pted for this, get one and put it in VP */ if (pted == NULL) { pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO); if (pted == NULL) { if ((flags & PMAP_CANFAIL) == 0) { error = ENOMEM; goto out; } panic("pmap_enter: failed to allocate pted"); } error = pmap_vp_enter(pm, va, pted, flags); if (error) { pool_put(&pmap_pted_pool, pted); goto out; } } pg = PHYS_TO_VM_PAGE(pa); if (pg != NULL && (pg->pg_flags & PG_PMAP_UC)) nocache = TRUE; if (wt) cache = PMAP_CACHE_WT; else if (pg != NULL && !(pg->pg_flags & PG_DEV) && !nocache) cache = PMAP_CACHE_WB; else cache = PMAP_CACHE_CI; /* Calculate PTE */ if (ppc_proc_is_64b) pmap_fill_pte64(pm, va, pa, pted, prot, cache); else pmap_fill_pte32(pm, va, pa, pted, prot, cache); if (pg != NULL) { pmap_enter_pv(pted, pg); /* only managed mem */ } /* * Insert into HTAB * We were told to map the page, probably called from vm_fault, * so map the page! */ if (ppc_proc_is_64b) pte_insert64(pted); else pte_insert32(pted); if (prot & PROT_EXEC) { u_int sn = VP_SR(va); pm->pm_exec[sn]++; if (pm->pm_sr[sn] & SR_NOEXEC) pm->pm_sr[sn] &= ~SR_NOEXEC; if (pg != NULL) { need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0); if (prot & PROT_WRITE) atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); else atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE); } else need_sync = 1; } else { /* * Should we be paranoid about writeable non-exec * mappings ? if so, clear the exec tag */ if ((prot & PROT_WRITE) && (pg != NULL)) atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); } /* only instruction sync executable pages */ if (need_sync) pmap_syncicache_user_virt(pm, va); out: PMAP_VP_UNLOCK(pm); return (error); } /* * Remove the given range of mapping entries. */ void pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva) { struct pte_desc *pted; vaddr_t va; PMAP_VP_LOCK(pm); for (va = sva; va < eva; va += PAGE_SIZE) { pted = pmap_vp_lookup(pm, va); if (pted && PTED_VALID(pted)) pmap_remove_pted(pm, pted); } PMAP_VP_UNLOCK(pm); } /* * remove a single mapping, notice that this code is O(1) */ void pmap_remove_pted(pmap_t pm, struct pte_desc *pted) { void *pte; int s; KASSERT(pm == pted->pted_pmap); PMAP_VP_ASSERT_LOCKED(pm); pm->pm_stats.resident_count--; PMAP_HASH_LOCK(s); if ((pte = pmap_ptedinhash(pted)) != NULL) pte_zap(pte, pted); PMAP_HASH_UNLOCK(s); if (pted->pted_va & PTED_VA_EXEC_M) { u_int sn = VP_SR(pted->pted_va); pted->pted_va &= ~PTED_VA_EXEC_M; pm->pm_exec[sn]--; if (pm->pm_exec[sn] == 0) pm->pm_sr[sn] |= SR_NOEXEC; } if (ppc_proc_is_64b) pted->p.pted_pte64.pte_hi &= ~PTE_VALID_64; else pted->p.pted_pte32.pte_hi &= ~PTE_VALID_32; if (PTED_MANAGED(pted)) pmap_remove_pv(pted); if (pm != pmap_kernel()) { (void)pmap_vp_remove(pm, pted->pted_va); pool_put(&pmap_pted_pool, pted); } } /* * Enter a kernel mapping for the given page. * kernel mappings have a larger set of prerequisites than normal mappings. * * 1. no memory should be allocated to create a kernel mapping. * 2. a vp mapping should already exist, even if invalid. (see 1) * 3. all vp tree mappings should already exist (see 1) * */ void pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) { struct pte_desc *pted; struct vm_page *pg; boolean_t nocache = (pa & PMAP_NOCACHE) != 0; boolean_t wt = (pa & PMAP_WT) != 0; pmap_t pm; int cache; KASSERT(!(wt && nocache)); pa &= PMAP_PA_MASK; pm = pmap_kernel(); pted = pmap_vp_lookup(pm, va); if (pted && PTED_VALID(pted)) pmap_remove_pted(pm, pted); /* pted is reused */ pm->pm_stats.resident_count++; if (prot & PROT_WRITE) { pg = PHYS_TO_VM_PAGE(pa); if (pg != NULL) atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); } /* Do not have pted for this, get one and put it in VP */ if (pted == NULL) { panic("pted not preallocated in pmap_kernel() va %lx pa %lx", va, pa); } pg = PHYS_TO_VM_PAGE(pa); if (wt) cache = PMAP_CACHE_WT; else if (pg != NULL && !(pg->pg_flags & PG_DEV) && !nocache) cache = PMAP_CACHE_WB; else cache = PMAP_CACHE_CI; /* Calculate PTE */ if (ppc_proc_is_64b) pmap_fill_pte64(pm, va, pa, pted, prot, cache); else pmap_fill_pte32(pm, va, pa, pted, prot, cache); /* * Insert into HTAB * We were told to map the page, probably called from vm_fault, * so map the page! */ if (ppc_proc_is_64b) pte_insert64(pted); else pte_insert32(pted); pted->pted_va |= PTED_VA_WIRED_M; if (prot & PROT_EXEC) { u_int sn = VP_SR(va); pm->pm_exec[sn]++; if (pm->pm_sr[sn] & SR_NOEXEC) pm->pm_sr[sn] &= ~SR_NOEXEC; } } /* * remove kernel (pmap_kernel()) mappings */ void pmap_kremove(vaddr_t va, vsize_t len) { struct pte_desc *pted; for (len >>= PAGE_SHIFT; len > 0; len--, va += PAGE_SIZE) { pted = pmap_vp_lookup(pmap_kernel(), va); if (pted && PTED_VALID(pted)) pmap_remove_pted(pmap_kernel(), pted); } } static inline void * pmap_ptedinhash(struct pte_desc *pted) { vaddr_t va = pted->pted_va & ~PAGE_MASK; pmap_t pm = pted->pted_pmap; int sr, idx; sr = ptesr(pm->pm_sr, va); idx = pteidx(sr, va); if (ppc_proc_is_64b) { struct pte_64 *pte = pmap_ptable64; pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8; pte += PTED_PTEGIDX(pted); /* * We now have the pointer to where it will be, if it is * currently mapped. If the mapping was thrown away in * exchange for another page mapping, then this page is * not currently in the HASH. */ if ((pted->p.pted_pte64.pte_hi | (PTED_HID(pted) ? PTE_HID_64 : 0)) == pte->pte_hi) return (pte); } else { struct pte_32 *pte = pmap_ptable32; pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8; pte += PTED_PTEGIDX(pted); /* * We now have the pointer to where it will be, if it is * currently mapped. If the mapping was thrown away in * exchange for another page mapping, then this page is * not currently in the HASH. */ if ((pted->p.pted_pte32.pte_hi | (PTED_HID(pted) ? PTE_HID_32 : 0)) == pte->pte_hi) return (pte); } return (NULL); } /* * Delete a Page Table Entry, section 7.6.3.3. * * Note: pte must be locked. */ void pte_del(void *pte, vaddr_t va) { if (ppc_proc_is_64b) ((struct pte_64 *)pte)->pte_hi &= ~PTE_VALID_64; else ((struct pte_32 *)pte)->pte_hi &= ~PTE_VALID_32; sync(); /* Ensure update completed. */ tlbie(va); /* Invalidate old translation. */ eieio(); /* Order tlbie before tlbsync. */ tlbsync(); /* Ensure tlbie completed on all processors. */ sync(); /* Ensure tlbsync and update completed. */ } void pte_zap(void *pte, struct pte_desc *pted) { pte_del(pte, pted->pted_va); if (!PTED_MANAGED(pted)) return; if (ppc_proc_is_64b) { pmap_attr_save(pted->p.pted_pte64.pte_lo & PTE_RPGN_64, ((struct pte_64 *)pte)->pte_lo & (PTE_REF_64|PTE_CHG_64)); } else { pmap_attr_save(pted->p.pted_pte32.pte_lo & PTE_RPGN_32, ((struct pte_32 *)pte)->pte_lo & (PTE_REF_32|PTE_CHG_32)); } } /* * What about execution control? Even at only a segment granularity. */ void pmap_fill_pte64(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted, vm_prot_t prot, int cache) { sr_t sr; struct pte_64 *pte64; sr = ptesr(pm->pm_sr, va); pte64 = &pted->p.pted_pte64; pte64->pte_hi = (((u_int64_t)sr & SR_VSID) << PTE_VSID_SHIFT_64) | ((va >> ADDR_API_SHIFT_64) & PTE_API_64) | PTE_VALID_64; pte64->pte_lo = (pa & PTE_RPGN_64); if (cache == PMAP_CACHE_WB) pte64->pte_lo |= PTE_M_64; else if (cache == PMAP_CACHE_WT) pte64->pte_lo |= (PTE_W_64 | PTE_M_64); else pte64->pte_lo |= (PTE_M_64 | PTE_I_64 | PTE_G_64); if ((prot & (PROT_READ | PROT_WRITE)) == 0) pte64->pte_lo |= PTE_AC_64; if (prot & PROT_WRITE) pte64->pte_lo |= PTE_RW_64; else pte64->pte_lo |= PTE_RO_64; pted->pted_va = va & ~PAGE_MASK; if (prot & PROT_EXEC) pted->pted_va |= PTED_VA_EXEC_M; else pte64->pte_lo |= PTE_N_64; pted->pted_pmap = pm; } /* * What about execution control? Even at only a segment granularity. */ void pmap_fill_pte32(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted, vm_prot_t prot, int cache) { sr_t sr; struct pte_32 *pte32; sr = ptesr(pm->pm_sr, va); pte32 = &pted->p.pted_pte32; pte32->pte_hi = ((sr & SR_VSID) << PTE_VSID_SHIFT_32) | ((va >> ADDR_API_SHIFT_32) & PTE_API_32) | PTE_VALID_32; pte32->pte_lo = (pa & PTE_RPGN_32); if (cache == PMAP_CACHE_WB) pte32->pte_lo |= PTE_M_32; else if (cache == PMAP_CACHE_WT) pte32->pte_lo |= (PTE_W_32 | PTE_M_32); else pte32->pte_lo |= (PTE_M_32 | PTE_I_32 | PTE_G_32); if (prot & PROT_WRITE) pte32->pte_lo |= PTE_RW_32; else pte32->pte_lo |= PTE_RO_32; pted->pted_va = va & ~PAGE_MASK; /* XXX Per-page execution control. */ if (prot & PROT_EXEC) pted->pted_va |= PTED_VA_EXEC_M; pted->pted_pmap = pm; } int pmap_test_attrs(struct vm_page *pg, u_int flagbit) { u_int bits; struct pte_desc *pted; u_int ptebit = pmap_flags2pte(flagbit); int s; /* PTE_CHG_32 == PTE_CHG_64 */ /* PTE_REF_32 == PTE_REF_64 */ bits = pg->pg_flags & flagbit; if (bits == flagbit) return bits; mtx_enter(&pg->mdpage.pv_mtx); LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { void *pte; PMAP_HASH_LOCK(s); if ((pte = pmap_ptedinhash(pted)) != NULL) { if (ppc_proc_is_64b) { struct pte_64 *ptp64 = pte; bits |= pmap_pte2flags(ptp64->pte_lo & ptebit); } else { struct pte_32 *ptp32 = pte; bits |= pmap_pte2flags(ptp32->pte_lo & ptebit); } } PMAP_HASH_UNLOCK(s); if (bits == flagbit) break; } mtx_leave(&pg->mdpage.pv_mtx); atomic_setbits_int(&pg->pg_flags, bits); return bits; } int pmap_clear_attrs(struct vm_page *pg, u_int flagbit) { u_int bits; struct pte_desc *pted; u_int ptebit = pmap_flags2pte(flagbit); int s; /* PTE_CHG_32 == PTE_CHG_64 */ /* PTE_REF_32 == PTE_REF_64 */ bits = pg->pg_flags & flagbit; mtx_enter(&pg->mdpage.pv_mtx); LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { void *pte; PMAP_HASH_LOCK(s); if ((pte = pmap_ptedinhash(pted)) != NULL) { if (ppc_proc_is_64b) { struct pte_64 *ptp64 = pte; bits |= pmap_pte2flags(ptp64->pte_lo & ptebit); pte_del(ptp64, pted->pted_va); ptp64->pte_lo &= ~ptebit; eieio(); ptp64->pte_hi |= PTE_VALID_64; sync(); } else { struct pte_32 *ptp32 = pte; bits |= pmap_pte2flags(ptp32->pte_lo & ptebit); pte_del(ptp32, pted->pted_va); ptp32->pte_lo &= ~ptebit; eieio(); ptp32->pte_hi |= PTE_VALID_32; sync(); } } PMAP_HASH_UNLOCK(s); } mtx_leave(&pg->mdpage.pv_mtx); /* * this is done a second time, because while walking the list * a bit could have been promoted via pmap_attr_save() */ bits |= pg->pg_flags & flagbit; atomic_clearbits_int(&pg->pg_flags, flagbit); return bits; } /* * Fill the given physical page with zeros. */ void pmap_zero_page(struct vm_page *pg) { vaddr_t va = pmap_map_direct(pg); int i; /* * Loop over & zero cache lines. This code assumes that 64-bit * CPUs have 128-byte cache lines. We explicitly use ``dcbzl'' * here because we do not clear the DCBZ_SIZE bit of the HID5 * register in order to be compatible with code using ``dcbz'' * and assuming that cache line size is 32. */ if (ppc_proc_is_64b) { for (i = 0; i < PAGE_SIZE; i += 128) asm volatile ("dcbzl 0,%0" :: "r"(va + i)); return; } for (i = 0; i < PAGE_SIZE; i += CACHELINESIZE) asm volatile ("dcbz 0,%0" :: "r"(va + i)); } /* * Copy a page. */ void pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) { vaddr_t srcva = pmap_map_direct(srcpg); vaddr_t dstva = pmap_map_direct(dstpg); memcpy((void *)dstva, (void *)srcva, PAGE_SIZE); } int pmap_id_avail = 0; pmap_t pmap_create(void) { u_int bits; int first, i, k, try, tblidx, tbloff; int seg; pmap_t pm; pm = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO); pmap_reference(pm); PMAP_VP_LOCK_INIT(pm); /* * Allocate segment registers for this pmap. * Try not to reuse pmap ids, to spread the hash table usage. */ first = pmap_id_avail; again: for (i = 0; i < NPMAPS; i++) { try = first + i; try = try % NPMAPS; /* truncate back into bounds */ tblidx = try / (8 * sizeof usedsr[0]); tbloff = try % (8 * sizeof usedsr[0]); bits = usedsr[tblidx]; if ((bits & (1U << tbloff)) == 0) { if (atomic_cas_uint(&usedsr[tblidx], bits, bits | (1U << tbloff)) != bits) { first = try; goto again; } pmap_id_avail = try + 1; seg = try << 4; for (k = 0; k < 16; k++) pm->pm_sr[k] = (seg + k) | SR_NOEXEC; return (pm); } } panic("out of pmap slots"); } /* * Add a reference to a given pmap. */ void pmap_reference(pmap_t pm) { atomic_inc_int(&pm->pm_refs); } /* * Retire the given pmap from service. * Should only be called if the map contains no valid mappings. */ void pmap_destroy(pmap_t pm) { int refs; refs = atomic_dec_int_nv(&pm->pm_refs); if (refs == -1) panic("re-entering pmap_destroy"); if (refs > 0) return; /* * reference count is zero, free pmap resources and free pmap. */ pmap_release(pm); pool_put(&pmap_pmap_pool, pm); } /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. */ void pmap_release(pmap_t pm) { int i, tblidx, tbloff; pmap_vp_destroy(pm); i = (pm->pm_sr[0] & SR_VSID) >> 4; tblidx = i / (8 * sizeof usedsr[0]); tbloff = i % (8 * sizeof usedsr[0]); /* powerpc can do atomic cas, clearbits on same word. */ atomic_clearbits_int(&usedsr[tblidx], 1U << tbloff); } void pmap_vp_destroy(pmap_t pm) { int i, j; struct pmapvp *vp1; struct pmapvp *vp2; for (i = 0; i < VP_SR_SIZE; i++) { vp1 = pm->pm_vp[i]; if (vp1 == NULL) continue; for (j = 0; j < VP_IDX1_SIZE; j++) { vp2 = vp1->vp[j]; if (vp2 == NULL) continue; pool_put(&pmap_vp_pool, vp2); } pm->pm_vp[i] = NULL; pool_put(&pmap_vp_pool, vp1); } } void pmap_avail_setup(void) { struct mem_region *mp; ppc_mem_regions(&pmap_mem, &pmap_avail); for (mp = pmap_mem; mp->size !=0; mp++, ndumpmem++) { physmem += atop(mp->size); dumpmem[ndumpmem].start = atop(mp->start); dumpmem[ndumpmem].end = atop(mp->start + mp->size); } for (mp = pmap_avail; mp->size !=0 ; mp++) { if (physmaxaddr < mp->start + mp->size) physmaxaddr = mp->start + mp->size; } for (mp = pmap_avail; mp->size !=0; mp++) pmap_cnt_avail += 1; } void pmap_avail_fixup(void) { struct mem_region *mp; u_int32_t align; u_int32_t end; mp = pmap_avail; while(mp->size !=0) { align = round_page(mp->start); if (mp->start != align) { pmap_remove_avail(mp->start, align); mp = pmap_avail; continue; } end = mp->start+mp->size; align = trunc_page(end); if (end != align) { pmap_remove_avail(align, end); mp = pmap_avail; continue; } mp++; } } /* remove a given region from avail memory */ void pmap_remove_avail(paddr_t base, paddr_t end) { struct mem_region *mp; int i; int mpend; /* remove given region from available */ for (mp = pmap_avail; mp->size; mp++) { /* * Check if this region holds all of the region */ mpend = mp->start + mp->size; if (base > mpend) { continue; } if (base <= mp->start) { if (end <= mp->start) break; /* region not present -??? */ if (end >= mpend) { /* covers whole region */ /* shorten */ for (i = mp - pmap_avail; i < pmap_cnt_avail; i++) { pmap_avail[i] = pmap_avail[i+1]; } pmap_cnt_avail--; pmap_avail[pmap_cnt_avail].size = 0; } else { mp->start = end; mp->size = mpend - end; } } else { /* start after the beginning */ if (end >= mpend) { /* just truncate */ mp->size = base - mp->start; } else { /* split */ for (i = pmap_cnt_avail; i > (mp - pmap_avail); i--) { pmap_avail[i] = pmap_avail[i - 1]; } pmap_cnt_avail++; mp->size = base - mp->start; mp++; mp->start = end; mp->size = mpend - end; } } } for (mp = pmap_allocated; mp->size != 0; mp++) { if (base < mp->start) { if (end == mp->start) { mp->start = base; mp->size += end - base; break; } /* lengthen */ for (i = pmap_cnt_allocated; i > (mp - pmap_allocated); i--) { pmap_allocated[i] = pmap_allocated[i - 1]; } pmap_cnt_allocated++; mp->start = base; mp->size = end - base; return; } if (base == (mp->start + mp->size)) { mp->size += end - base; return; } } if (mp->size == 0) { mp->start = base; mp->size = end - base; pmap_cnt_allocated++; } } void * pmap_steal_avail(size_t size, int align) { struct mem_region *mp; int start; int remsize; for (mp = pmap_avail; mp->size; mp++) { if (mp->size > size) { start = (mp->start + (align -1)) & ~(align -1); remsize = mp->size - (start - mp->start); if (remsize >= 0) { pmap_remove_avail(start, start+size); return (void *)start; } } } panic ("unable to allocate region with size %zx align %x", size, align); } /* * Similar to pmap_steal_avail, but operating on vm_physmem since * uvm_page_physload() has been called. */ vaddr_t pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end) { int segno; u_int npg; vaddr_t va; paddr_t pa; struct vm_physseg *seg; size = round_page(size); npg = atop(size); for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) { if (seg->avail_end - seg->avail_start < npg) continue; /* * We can only steal at an ``unused'' segment boundary, * i.e. either at the start or at the end. */ if (seg->avail_start == seg->start || seg->avail_end == seg->end) break; } if (segno == vm_nphysseg) va = 0; else { if (seg->avail_start == seg->start) { pa = ptoa(seg->avail_start); seg->avail_start += npg; seg->start += npg; } else { pa = ptoa(seg->avail_end) - size; seg->avail_end -= npg; seg->end -= npg; } /* * If all the segment has been consumed now, remove it. * Note that the crash dump code still knows about it * and will dump it correctly. */ if (seg->start == seg->end) { if (vm_nphysseg-- == 1) panic("pmap_steal_memory: out of memory"); while (segno < vm_nphysseg) { seg[0] = seg[1]; /* struct copy */ seg++; segno++; } } va = (vaddr_t)pa; /* 1:1 mapping */ bzero((void *)va, size); } if (start != NULL) *start = VM_MIN_KERNEL_ADDRESS; if (end != NULL) *end = VM_MAX_KERNEL_ADDRESS; return (va); } void *msgbuf_addr; /* * Initialize pmap setup. * ALL of the code which deals with avail needs rewritten as an actual * memory allocation. */ void pmap_bootstrap(u_int kernelstart, u_int kernelend) { struct mem_region *mp; int i, k; struct pmapvp *vp1; struct pmapvp *vp2; extern vaddr_t ppc_kvm_stolen; /* * set the page size (default value is 4K which is ok) */ uvm_setpagesize(); /* * Get memory. */ pmap_avail_setup(); /* * Page align all regions. * Non-page memory isn't very interesting to us. * Also, sort the entries for ascending addresses. */ kernelstart = trunc_page(kernelstart); kernelend = round_page(kernelend); pmap_remove_avail(kernelstart, kernelend); msgbuf_addr = pmap_steal_avail(MSGBUFSIZE,4); #ifdef DEBUG for (mp = pmap_avail; mp->size; mp++) { bzero((void *)mp->start, mp->size); } #endif #define HTABENTS_32 1024 #define HTABENTS_64 2048 if (ppc_proc_is_64b) { pmap_ptab_cnt = HTABENTS_64; while (pmap_ptab_cnt * 2 < physmem) pmap_ptab_cnt <<= 1; } else { pmap_ptab_cnt = HTABENTS_32; while (HTABSIZE_32 < (ptoa(physmem) >> 7)) pmap_ptab_cnt <<= 1; } /* * allocate suitably aligned memory for HTAB */ if (ppc_proc_is_64b) { pmap_ptable64 = pmap_steal_avail(HTABMEMSZ_64, HTABMEMSZ_64); bzero((void *)pmap_ptable64, HTABMEMSZ_64); pmap_ptab_mask = pmap_ptab_cnt - 1; } else { pmap_ptable32 = pmap_steal_avail(HTABSIZE_32, HTABSIZE_32); bzero((void *)pmap_ptable32, HTABSIZE_32); pmap_ptab_mask = pmap_ptab_cnt - 1; } /* allocate v->p mappings for pmap_kernel() */ for (i = 0; i < VP_SR_SIZE; i++) { pmap_kernel()->pm_vp[i] = NULL; } vp1 = pmap_steal_avail(sizeof (struct pmapvp), 4); bzero (vp1, sizeof(struct pmapvp)); pmap_kernel()->pm_vp[PPC_KERNEL_SR] = vp1; for (i = 0; i < VP_IDX1_SIZE; i++) { vp2 = vp1->vp[i] = pmap_steal_avail(sizeof (struct pmapvp), 4); bzero (vp2, sizeof(struct pmapvp)); for (k = 0; k < VP_IDX2_SIZE; k++) { struct pte_desc *pted; pted = pmap_steal_avail(sizeof (struct pte_desc), 4); bzero (pted, sizeof (struct pte_desc)); vp2->vp[k] = pted; } } /* * Initialize kernel pmap and hardware. */ #if NPMAPS >= PPC_KERNEL_SEGMENT / 16 usedsr[PPC_KERNEL_SEGMENT / 16 / (sizeof usedsr[0] * 8)] |= 1 << ((PPC_KERNEL_SEGMENT / 16) % (sizeof usedsr[0] * 8)); #endif for (i = 0; i < 16; i++) pmap_kernel()->pm_sr[i] = (PPC_KERNEL_SEG0 + i) | SR_NOEXEC; if (ppc_nobat) { vp1 = pmap_steal_avail(sizeof (struct pmapvp), 4); bzero (vp1, sizeof(struct pmapvp)); pmap_kernel()->pm_vp[0] = vp1; for (i = 0; i < VP_IDX1_SIZE; i++) { vp2 = vp1->vp[i] = pmap_steal_avail(sizeof (struct pmapvp), 4); bzero (vp2, sizeof(struct pmapvp)); for (k = 0; k < VP_IDX2_SIZE; k++) { struct pte_desc *pted; pted = pmap_steal_avail(sizeof (struct pte_desc), 4); bzero (pted, sizeof (struct pte_desc)); vp2->vp[k] = pted; } } /* first segment contains executable pages */ pmap_kernel()->pm_exec[0]++; pmap_kernel()->pm_sr[0] &= ~SR_NOEXEC; } else { /* * Setup fixed BAT registers. * * Note that we still run in real mode, and the BAT * registers were cleared in cpu_bootstrap(). */ battable[0].batl = BATL(0x00000000, BAT_M); if (physmem > atop(0x08000000)) battable[0].batu = BATU(0x00000000, BAT_BL_256M); else battable[0].batu = BATU(0x00000000, BAT_BL_128M); /* Map physical memory with BATs. */ if (physmem > atop(0x10000000)) { battable[0x1].batl = BATL(0x10000000, BAT_M); battable[0x1].batu = BATU(0x10000000, BAT_BL_256M); } if (physmem > atop(0x20000000)) { battable[0x2].batl = BATL(0x20000000, BAT_M); battable[0x2].batu = BATU(0x20000000, BAT_BL_256M); } if (physmem > atop(0x30000000)) { battable[0x3].batl = BATL(0x30000000, BAT_M); battable[0x3].batu = BATU(0x30000000, BAT_BL_256M); } if (physmem > atop(0x40000000)) { battable[0x4].batl = BATL(0x40000000, BAT_M); battable[0x4].batu = BATU(0x40000000, BAT_BL_256M); } if (physmem > atop(0x50000000)) { battable[0x5].batl = BATL(0x50000000, BAT_M); battable[0x5].batu = BATU(0x50000000, BAT_BL_256M); } if (physmem > atop(0x60000000)) { battable[0x6].batl = BATL(0x60000000, BAT_M); battable[0x6].batu = BATU(0x60000000, BAT_BL_256M); } if (physmem > atop(0x70000000)) { battable[0x7].batl = BATL(0x70000000, BAT_M); battable[0x7].batu = BATU(0x70000000, BAT_BL_256M); } } ppc_kvm_stolen += reserve_dumppages( (caddr_t)(VM_MIN_KERNEL_ADDRESS + ppc_kvm_stolen)); pmap_avail_fixup(); for (mp = pmap_avail; mp->size; mp++) { if (mp->start > 0x80000000) continue; if (mp->start + mp->size > 0x80000000) mp->size = 0x80000000 - mp->start; uvm_page_physload(atop(mp->start), atop(mp->start+mp->size), atop(mp->start), atop(mp->start+mp->size), 0); } } void pmap_enable_mmu(void) { uint32_t scratch, sdr1; int i; /* * For the PowerPC 970, ACCR = 3 inhibits loads and stores to * pages with PTE_AC_64. This is for execute-only mappings. */ if (ppc_proc_is_64b) asm volatile ("mtspr 29, %0" :: "r" (3)); if (!ppc_nobat) { extern caddr_t etext; /* DBAT0 used for initial segment */ ppc_mtdbat0l(battable[0].batl); ppc_mtdbat0u(battable[0].batu); /* IBAT0 only covering the kernel .text */ ppc_mtibat0l(battable[0].batl); if (round_page((vaddr_t)&etext) < 8*1024*1024) ppc_mtibat0u(BATU(0x00000000, BAT_BL_8M)); else ppc_mtibat0u(BATU(0x00000000, BAT_BL_16M)); } for (i = 0; i < 16; i++) ppc_mtsrin(PPC_KERNEL_SEG0 + i, i << ADDR_SR_SHIFT); if (ppc_proc_is_64b) sdr1 = (uint32_t)pmap_ptable64 | HTABSIZE_64; else sdr1 = (uint32_t)pmap_ptable32 | (pmap_ptab_mask >> 10); asm volatile ("sync; mtsdr1 %0; isync" :: "r"(sdr1)); tlbia(); asm volatile ("eieio; mfmsr %0; ori %0,%0,%1; mtmsr %0; sync; isync" : "=r"(scratch) : "K"(PSL_IR|PSL_DR|PSL_ME|PSL_RI)); } /* * activate a pmap entry * All PTE entries exist in the same hash table. * Segment registers are filled on exit to user mode. */ void pmap_activate(struct proc *p) { struct pcb *pcb = &p->p_addr->u_pcb; /* Set the current pmap. */ pcb->pcb_pm = p->p_vmspace->vm_map.pmap; pmap_extract(pmap_kernel(), (vaddr_t)pcb->pcb_pm, (paddr_t *)&pcb->pcb_pmreal); curcpu()->ci_curpm = pcb->pcb_pmreal; } /* * deactivate a pmap entry * NOOP on powerpc */ void pmap_deactivate(struct proc *p) { } /* * pmap_extract: extract a PA for the given VA */ boolean_t pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pa) { struct pte_desc *pted; if (pm == pmap_kernel() && va < physmaxaddr) { *pa = va; return TRUE; } PMAP_VP_LOCK(pm); pted = pmap_vp_lookup(pm, va); if (pted == NULL || !PTED_VALID(pted)) { PMAP_VP_UNLOCK(pm); return FALSE; } if (ppc_proc_is_64b) *pa = (pted->p.pted_pte64.pte_lo & PTE_RPGN_64) | (va & ~PTE_RPGN_64); else *pa = (pted->p.pted_pte32.pte_lo & PTE_RPGN_32) | (va & ~PTE_RPGN_32); PMAP_VP_UNLOCK(pm); return TRUE; } #ifdef ALTIVEC /* * Read an instruction from a given virtual memory address. * Execute-only protection is bypassed. */ int pmap_copyinsn(pmap_t pm, vaddr_t va, uint32_t *insn) { struct pte_desc *pted; paddr_t pa; /* Assume pm != pmap_kernel(). */ if (ppc_proc_is_64b) { /* inline pmap_extract */ PMAP_VP_LOCK(pm); pted = pmap_vp_lookup(pm, va); if (pted == NULL || !PTED_VALID(pted)) { PMAP_VP_UNLOCK(pm); return EFAULT; } pa = (pted->p.pted_pte64.pte_lo & PTE_RPGN_64) | (va & ~PTE_RPGN_64); PMAP_VP_UNLOCK(pm); if (pa > physmaxaddr - sizeof(*insn)) return EFAULT; *insn = *(uint32_t *)pa; return 0; } else return copyin32((void *)va, insn); } #endif u_int32_t pmap_setusr(pmap_t pm, vaddr_t va) { u_int32_t sr; u_int32_t oldsr; sr = ptesr(pm->pm_sr, va); /* user address range lock?? */ asm volatile ("mfsr %0,%1" : "=r" (oldsr): "n"(PPC_USER_SR)); asm volatile ("isync; mtsr %0,%1; isync" :: "n"(PPC_USER_SR), "r"(sr)); return oldsr; } void pmap_popusr(u_int32_t sr) { asm volatile ("isync; mtsr %0,%1; isync" :: "n"(PPC_USER_SR), "r"(sr)); } int _copyin(const void *udaddr, void *kaddr, size_t len) { void *p; size_t l; u_int32_t oldsr; faultbuf env; void *oldh = curpcb->pcb_onfault; while (len > 0) { p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK); l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p; if (l > len) l = len; oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr); if (setfault(&env)) { pmap_popusr(oldsr); curpcb->pcb_onfault = oldh; return EFAULT; } bcopy(p, kaddr, l); pmap_popusr(oldsr); udaddr += l; kaddr += l; len -= l; } curpcb->pcb_onfault = oldh; return 0; } int copyout(const void *kaddr, void *udaddr, size_t len) { void *p; size_t l; u_int32_t oldsr; faultbuf env; void *oldh = curpcb->pcb_onfault; while (len > 0) { p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK); l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p; if (l > len) l = len; oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr); if (setfault(&env)) { pmap_popusr(oldsr); curpcb->pcb_onfault = oldh; return EFAULT; } bcopy(kaddr, p, l); pmap_popusr(oldsr); udaddr += l; kaddr += l; len -= l; } curpcb->pcb_onfault = oldh; return 0; } int copyin32(const uint32_t *udaddr, uint32_t *kaddr) { volatile uint32_t *p; u_int32_t oldsr; faultbuf env; void *oldh = curpcb->pcb_onfault; if ((u_int)udaddr & 0x3) return EFAULT; p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK); oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr); if (setfault(&env)) { pmap_popusr(oldsr); curpcb->pcb_onfault = oldh; return EFAULT; } *kaddr = *p; pmap_popusr(oldsr); curpcb->pcb_onfault = oldh; return 0; } int _copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done) { const u_char *uaddr = udaddr; u_char *kp = kaddr; u_char *up; u_char c; void *p; size_t l; u_int32_t oldsr; int cnt = 0; faultbuf env; void *oldh = curpcb->pcb_onfault; while (len > 0) { p = PPC_USER_ADDR + ((u_int)uaddr & ~PPC_SEGMENT_MASK); l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p; up = p; if (l > len) l = len; len -= l; oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)uaddr); if (setfault(&env)) { if (done != NULL) *done = cnt; curpcb->pcb_onfault = oldh; pmap_popusr(oldsr); return EFAULT; } while (l > 0) { c = *up; *kp = c; if (c == 0) { if (done != NULL) *done = cnt + 1; curpcb->pcb_onfault = oldh; pmap_popusr(oldsr); return 0; } up++; kp++; l--; cnt++; uaddr++; } pmap_popusr(oldsr); } curpcb->pcb_onfault = oldh; if (done != NULL) *done = cnt; return ENAMETOOLONG; } int copyoutstr(const void *kaddr, void *udaddr, size_t len, size_t *done) { u_char *uaddr = (void *)udaddr; const u_char *kp = kaddr; u_char *up; u_char c; void *p; size_t l; u_int32_t oldsr; int cnt = 0; faultbuf env; void *oldh = curpcb->pcb_onfault; while (len > 0) { p = PPC_USER_ADDR + ((u_int)uaddr & ~PPC_SEGMENT_MASK); l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p; up = p; if (l > len) l = len; len -= l; oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)uaddr); if (setfault(&env)) { if (done != NULL) *done = cnt; curpcb->pcb_onfault = oldh; pmap_popusr(oldsr); return EFAULT; } while (l > 0) { c = *kp; *up = c; if (c == 0) { if (done != NULL) *done = cnt + 1; curpcb->pcb_onfault = oldh; pmap_popusr(oldsr); return 0; } up++; kp++; l--; cnt++; uaddr++; } pmap_popusr(oldsr); } curpcb->pcb_onfault = oldh; if (done != NULL) *done = cnt; return ENAMETOOLONG; } /* * sync instruction cache for user virtual address. * The address WAS JUST MAPPED, so we have a VALID USERSPACE mapping */ void pmap_syncicache_user_virt(pmap_t pm, vaddr_t va) { vaddr_t start; int oldsr; if (pm != pmap_kernel()) { start = ((u_int)PPC_USER_ADDR + ((u_int)va & ~PPC_SEGMENT_MASK)); /* will only ever be page size, will not cross segments */ /* USER SEGMENT LOCK - MPXXX */ oldsr = pmap_setusr(pm, va); } else { start = va; /* flush mapped page */ } syncicache((void *)start, PAGE_SIZE); if (pm != pmap_kernel()) { pmap_popusr(oldsr); /* USER SEGMENT UNLOCK -MPXXX */ } } void pmap_pted_ro(struct pte_desc *pted, vm_prot_t prot) { if (ppc_proc_is_64b) pmap_pted_ro64(pted, prot); else pmap_pted_ro32(pted, prot); } void pmap_pted_ro64(struct pte_desc *pted, vm_prot_t prot) { pmap_t pm = pted->pted_pmap; vaddr_t va = pted->pted_va & ~PAGE_MASK; struct vm_page *pg; void *pte; int s; pg = PHYS_TO_VM_PAGE(pted->p.pted_pte64.pte_lo & PTE_RPGN_64); if (pg->pg_flags & PG_PMAP_EXE) { if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) { atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); } else { pmap_syncicache_user_virt(pm, va); } } pted->p.pted_pte64.pte_lo &= ~PTE_PP_64; pted->p.pted_pte64.pte_lo |= PTE_RO_64; if ((prot & PROT_EXEC) == 0) pted->p.pted_pte64.pte_lo |= PTE_N_64; if ((prot & (PROT_READ | PROT_WRITE)) == 0) pted->p.pted_pte64.pte_lo |= PTE_AC_64; PMAP_HASH_LOCK(s); if ((pte = pmap_ptedinhash(pted)) != NULL) { struct pte_64 *ptp64 = pte; pte_del(ptp64, va); if (PTED_MANAGED(pted)) { /* XXX */ pmap_attr_save(ptp64->pte_lo & PTE_RPGN_64, ptp64->pte_lo & (PTE_REF_64|PTE_CHG_64)); } /* Add a Page Table Entry, section 7.6.3.1. */ ptp64->pte_lo = pted->p.pted_pte64.pte_lo; eieio(); /* Order 1st PTE update before 2nd. */ ptp64->pte_hi |= PTE_VALID_64; sync(); /* Ensure updates completed. */ } PMAP_HASH_UNLOCK(s); } void pmap_pted_ro32(struct pte_desc *pted, vm_prot_t prot) { pmap_t pm = pted->pted_pmap; vaddr_t va = pted->pted_va & ~PAGE_MASK; struct vm_page *pg; void *pte; int s; pg = PHYS_TO_VM_PAGE(pted->p.pted_pte32.pte_lo & PTE_RPGN_32); if (pg->pg_flags & PG_PMAP_EXE) { if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) { atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); } else { pmap_syncicache_user_virt(pm, va); } } pted->p.pted_pte32.pte_lo &= ~PTE_PP_32; pted->p.pted_pte32.pte_lo |= PTE_RO_32; PMAP_HASH_LOCK(s); if ((pte = pmap_ptedinhash(pted)) != NULL) { struct pte_32 *ptp32 = pte; pte_del(ptp32, va); if (PTED_MANAGED(pted)) { /* XXX */ pmap_attr_save(ptp32->pte_lo & PTE_RPGN_32, ptp32->pte_lo & (PTE_REF_32|PTE_CHG_32)); } /* Add a Page Table Entry, section 7.6.3.1. */ ptp32->pte_lo &= ~(PTE_CHG_32|PTE_PP_32); ptp32->pte_lo |= PTE_RO_32; eieio(); /* Order 1st PTE update before 2nd. */ ptp32->pte_hi |= PTE_VALID_32; sync(); /* Ensure updates completed. */ } PMAP_HASH_UNLOCK(s); } /* * Lower the protection on the specified physical page. * * There are only two cases, either the protection is going to 0, * or it is going to read-only. */ void pmap_page_protect(struct vm_page *pg, vm_prot_t prot) { struct pte_desc *pted; void *pte; pmap_t pm; int s; if (prot == PROT_NONE) { mtx_enter(&pg->mdpage.pv_mtx); while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) { pmap_reference(pted->pted_pmap); pm = pted->pted_pmap; mtx_leave(&pg->mdpage.pv_mtx); PMAP_VP_LOCK(pm); /* * We dropped the pvlist lock before grabbing * the pmap lock to avoid lock ordering * problems. This means we have to check the * pvlist again since somebody else might have * modified it. All we care about is that the * pvlist entry matches the pmap we just * locked. If it doesn't, unlock the pmap and * try again. */ mtx_enter(&pg->mdpage.pv_mtx); if ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) == NULL || pted->pted_pmap != pm) { mtx_leave(&pg->mdpage.pv_mtx); PMAP_VP_UNLOCK(pm); pmap_destroy(pm); mtx_enter(&pg->mdpage.pv_mtx); continue; } PMAP_HASH_LOCK(s); if ((pte = pmap_ptedinhash(pted)) != NULL) pte_zap(pte, pted); PMAP_HASH_UNLOCK(s); pted->pted_va &= ~PTED_VA_MANAGED_M; LIST_REMOVE(pted, pted_pv_list); mtx_leave(&pg->mdpage.pv_mtx); pmap_remove_pted(pm, pted); PMAP_VP_UNLOCK(pm); pmap_destroy(pm); mtx_enter(&pg->mdpage.pv_mtx); } mtx_leave(&pg->mdpage.pv_mtx); /* page is being reclaimed, sync icache next use */ atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); return; } mtx_enter(&pg->mdpage.pv_mtx); LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) pmap_pted_ro(pted, prot); mtx_leave(&pg->mdpage.pv_mtx); } void pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) { if (prot & (PROT_READ | PROT_EXEC)) { struct pte_desc *pted; PMAP_VP_LOCK(pm); while (sva < eva) { pted = pmap_vp_lookup(pm, sva); if (pted && PTED_VALID(pted)) pmap_pted_ro(pted, prot); sva += PAGE_SIZE; } PMAP_VP_UNLOCK(pm); return; } pmap_remove(pm, sva, eva); } /* * Restrict given range to physical memory */ void pmap_real_memory(paddr_t *start, vsize_t *size) { struct mem_region *mp; for (mp = pmap_mem; mp->size; mp++) { if (((*start + *size) > mp->start) && (*start < (mp->start + mp->size))) { if (*start < mp->start) { *size -= mp->start - *start; *start = mp->start; } if ((*start + *size) > (mp->start + mp->size)) *size = mp->start + mp->size - *start; return; } } *size = 0; } void pmap_init() { pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0, "pmap", NULL); pool_setlowat(&pmap_pmap_pool, 2); pool_init(&pmap_vp_pool, sizeof(struct pmapvp), 0, IPL_VM, 0, "vp", &pool_allocator_single); pool_setlowat(&pmap_vp_pool, 10); pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0, "pted", NULL); pool_setlowat(&pmap_pted_pool, 20); pmap_initialized = 1; } void pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len) { paddr_t pa; vsize_t clen; while (len > 0) { /* add one to always round up to the next page */ clen = round_page(va + 1) - va; if (clen > len) clen = len; if (pmap_extract(pr->ps_vmspace->vm_map.pmap, va, &pa)) { syncicache((void *)pa, clen); } len -= clen; va += clen; } } /* * There are two routines, pte_spill_r and pte_spill_v * the _r version only handles kernel faults which are not user * accesses. The _v version handles all user faults and kernel copyin/copyout * "user" accesses. */ int pte_spill_r(u_int32_t va, u_int32_t msr, u_int32_t dsisr, int exec_fault) { pmap_t pm; struct pte_desc *pted; struct pte_desc pted_store; /* lookup is done physical to prevent faults */ /* * This function only handles kernel faults, not supervisor copyins. */ if (msr & PSL_PR) return 0; /* if copyin, throw to full exception handler */ if (VP_SR(va) == PPC_USER_SR) return 0; pm = pmap_kernel(); /* 0 - physmaxaddr mapped 1-1 */ if (va < physmaxaddr) { u_int32_t aligned_va; vm_prot_t prot = PROT_READ | PROT_WRITE; extern caddr_t kernel_text; extern caddr_t etext; pted = &pted_store; if (va >= trunc_page((vaddr_t)&kernel_text) && va < round_page((vaddr_t)&etext)) { prot |= PROT_EXEC; } aligned_va = trunc_page(va); if (ppc_proc_is_64b) { pmap_fill_pte64(pm, aligned_va, aligned_va, pted, prot, PMAP_CACHE_WB); pte_insert64(pted); } else { pmap_fill_pte32(pm, aligned_va, aligned_va, pted, prot, PMAP_CACHE_WB); pte_insert32(pted); } return 1; } return pte_spill_v(pm, va, dsisr, exec_fault); } int pte_spill_v(pmap_t pm, u_int32_t va, u_int32_t dsisr, int exec_fault) { struct pte_desc *pted; int inserted = 0; /* * DSISR_DABR is set if the PowerPC 970 attempted to read or * write an execute-only page. */ if (dsisr & DSISR_DABR) return 0; /* * If the current mapping is RO and the access was a write * we return 0 */ PMAP_VP_LOCK(pm); pted = pmap_vp_lookup(pm, va); if (pted == NULL || !PTED_VALID(pted)) goto out; /* Attempted to write a read-only page. */ if (dsisr & DSISR_STORE) { if (ppc_proc_is_64b) { if ((pted->p.pted_pte64.pte_lo & PTE_PP_64) == PTE_RO_64) goto out; } else { if ((pted->p.pted_pte32.pte_lo & PTE_PP_32) == PTE_RO_32) goto out; } } /* Attempted to execute non-executable page. */ if ((exec_fault != 0) && ((pted->pted_va & PTED_VA_EXEC_M) == 0)) goto out; inserted = 1; if (ppc_proc_is_64b) pte_insert64(pted); else pte_insert32(pted); out: PMAP_VP_UNLOCK(pm); return (inserted); } /* * should pte_insert code avoid wired mappings? * is the stack safe? * is the pted safe? (physical) * -ugh */ void pte_insert64(struct pte_desc *pted) { struct pte_64 *ptp64; int off, secondary; int sr, idx, i; void *pte; int s; PMAP_HASH_LOCK(s); if ((pte = pmap_ptedinhash(pted)) != NULL) pte_zap(pte, pted); pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M); sr = ptesr(pted->pted_pmap->pm_sr, pted->pted_va); idx = pteidx(sr, pted->pted_va); /* * instead of starting at the beginning of each pteg, * the code should pick a random location with in the primary * then search all of the entries, then if not yet found, * do the same for the secondary. * this would reduce the frontloading of the pteg. */ /* first just try fill of primary hash */ ptp64 = pmap_ptable64 + (idx) * 8; for (i = 0; i < 8; i++) { if (ptp64[i].pte_hi & PTE_VALID_64) continue; pted->pted_va |= i; /* Add a Page Table Entry, section 7.6.3.1. */ ptp64[i].pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64; ptp64[i].pte_lo = pted->p.pted_pte64.pte_lo; eieio(); /* Order 1st PTE update before 2nd. */ ptp64[i].pte_hi |= PTE_VALID_64; sync(); /* Ensure updates completed. */ goto out; } /* try fill of secondary hash */ ptp64 = pmap_ptable64 + (idx ^ pmap_ptab_mask) * 8; for (i = 0; i < 8; i++) { if (ptp64[i].pte_hi & PTE_VALID_64) continue; pted->pted_va |= (i | PTED_VA_HID_M); /* Add a Page Table Entry, section 7.6.3.1. */ ptp64[i].pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64; ptp64[i].pte_lo = pted->p.pted_pte64.pte_lo; eieio(); /* Order 1st PTE update before 2nd. */ ptp64[i].pte_hi |= (PTE_HID_64|PTE_VALID_64); sync(); /* Ensure updates completed. */ goto out; } /* need decent replacement algorithm */ off = ppc_mftb(); secondary = off & 8; pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M); idx = (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)); ptp64 = pmap_ptable64 + (idx * 8); ptp64 += PTED_PTEGIDX(pted); /* increment by index into pteg */ if (ptp64->pte_hi & PTE_VALID_64) { vaddr_t va; /* Bits 9-19 */ idx = (idx ^ ((ptp64->pte_hi & PTE_HID_64) ? pmap_ptab_mask : 0)); va = (ptp64->pte_hi >> PTE_VSID_SHIFT_64) ^ idx; va <<= ADDR_PIDX_SHIFT; /* Bits 4-8 */ va |= (ptp64->pte_hi & PTE_API_64) << ADDR_API_SHIFT_32; /* Bits 0-3 */ va |= (ptp64->pte_hi >> PTE_VSID_SHIFT_64) << ADDR_SR_SHIFT; pte_del(ptp64, va); pmap_attr_save(ptp64->pte_lo & PTE_RPGN_64, ptp64->pte_lo & (PTE_REF_64|PTE_CHG_64)); } /* Add a Page Table Entry, section 7.6.3.1. */ ptp64->pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64; if (secondary) ptp64->pte_hi |= PTE_HID_64; ptp64->pte_lo = pted->p.pted_pte64.pte_lo; eieio(); /* Order 1st PTE update before 2nd. */ ptp64->pte_hi |= PTE_VALID_64; sync(); /* Ensure updates completed. */ out: PMAP_HASH_UNLOCK(s); } void pte_insert32(struct pte_desc *pted) { struct pte_32 *ptp32; int off, secondary; int sr, idx, i; void *pte; int s; PMAP_HASH_LOCK(s); if ((pte = pmap_ptedinhash(pted)) != NULL) pte_zap(pte, pted); pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M); sr = ptesr(pted->pted_pmap->pm_sr, pted->pted_va); idx = pteidx(sr, pted->pted_va); /* * instead of starting at the beginning of each pteg, * the code should pick a random location with in the primary * then search all of the entries, then if not yet found, * do the same for the secondary. * this would reduce the frontloading of the pteg. */ /* first just try fill of primary hash */ ptp32 = pmap_ptable32 + (idx) * 8; for (i = 0; i < 8; i++) { if (ptp32[i].pte_hi & PTE_VALID_32) continue; pted->pted_va |= i; /* Add a Page Table Entry, section 7.6.3.1. */ ptp32[i].pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32; ptp32[i].pte_lo = pted->p.pted_pte32.pte_lo; eieio(); /* Order 1st PTE update before 2nd. */ ptp32[i].pte_hi |= PTE_VALID_32; sync(); /* Ensure updates completed. */ goto out; } /* try fill of secondary hash */ ptp32 = pmap_ptable32 + (idx ^ pmap_ptab_mask) * 8; for (i = 0; i < 8; i++) { if (ptp32[i].pte_hi & PTE_VALID_32) continue; pted->pted_va |= (i | PTED_VA_HID_M); /* Add a Page Table Entry, section 7.6.3.1. */ ptp32[i].pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32; ptp32[i].pte_lo = pted->p.pted_pte32.pte_lo; eieio(); /* Order 1st PTE update before 2nd. */ ptp32[i].pte_hi |= (PTE_HID_32|PTE_VALID_32); sync(); /* Ensure updates completed. */ goto out; } /* need decent replacement algorithm */ off = ppc_mftb(); secondary = off & 8; pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M); idx = (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)); ptp32 = pmap_ptable32 + (idx * 8); ptp32 += PTED_PTEGIDX(pted); /* increment by index into pteg */ if (ptp32->pte_hi & PTE_VALID_32) { vaddr_t va; va = ((ptp32->pte_hi & PTE_API_32) << ADDR_API_SHIFT_32) | ((((ptp32->pte_hi >> PTE_VSID_SHIFT_32) & SR_VSID) ^(idx ^ ((ptp32->pte_hi & PTE_HID_32) ? 0x3ff : 0))) & 0x3ff) << PAGE_SHIFT; pte_del(ptp32, va); pmap_attr_save(ptp32->pte_lo & PTE_RPGN_32, ptp32->pte_lo & (PTE_REF_32|PTE_CHG_32)); } /* Add a Page Table Entry, section 7.6.3.1. */ ptp32->pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32; if (secondary) ptp32->pte_hi |= PTE_HID_32; ptp32->pte_lo = pted->p.pted_pte32.pte_lo; eieio(); /* Order 1st PTE update before 2nd. */ ptp32->pte_hi |= PTE_VALID_32; sync(); /* Ensure updates completed. */ out: PMAP_HASH_UNLOCK(s); }