Blame - marvell/linux/arch/powerpc/mm/book3s64/hash_tlb.c - T108

blob: 4a70d8dd39cd66eb0e536f9216ad0c67e516c9fa [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-or-later
				2	/*
				3	* This file contains the routines for flushing entries from the
				4	* TLB and MMU hash table.
				5	*
				6	* Derived from arch/ppc64/mm/init.c:
				7	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
				8	*
				9	* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
				10	* and Cort Dougan (PReP) (cort@cs.nmt.edu)
				11	* Copyright (C) 1996 Paul Mackerras
				12	*
				13	* Derived from "arch/i386/mm/init.c"
				14	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				15	*
				16	* Dave Engebretsen <engebret@us.ibm.com>
				17	* Rework for PPC64 port.
				18	*/
				19
				20	#include <linux/kernel.h>
				21	#include <linux/mm.h>
				22	#include <linux/percpu.h>
				23	#include <linux/hardirq.h>
				24	#include <asm/pgalloc.h>
				25	#include <asm/tlbflush.h>
				26	#include <asm/tlb.h>
				27	#include <asm/bug.h>
				28	#include <asm/pte-walk.h>
				29
				30
				31	#include <trace/events/thp.h>
				32
				33	DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
				34
				35	/*
				36	* A linux PTE was changed and the corresponding hash table entry
				37	* neesd to be flushed. This function will either perform the flush
				38	* immediately or will batch it up if the current CPU has an active
				39	* batch on it.
				40	*/
				41	void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
				42	pte_t *ptep, unsigned long pte, int huge)
				43	{
				44	unsigned long vpn;
				45	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
				46	unsigned long vsid;
				47	unsigned int psize;
				48	int ssize;
				49	real_pte_t rpte;
				50	int i, offset;
				51
				52	i = batch->index;
				53
				54	/*
				55	* Get page size (maybe move back to caller).
				56	*
				57	* NOTE: when using special 64K mappings in 4K environment like
				58	* for SPEs, we obtain the page size from the slice, which thus
				59	* must still exist (and thus the VMA not reused) at the time
				60	* of this call
				61	*/
				62	if (huge) {
				63	#ifdef CONFIG_HUGETLB_PAGE
				64	psize = get_slice_psize(mm, addr);
				65	/* Mask the address for the correct page size */
				66	addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
				67	if (unlikely(psize == MMU_PAGE_16G))
				68	offset = PTRS_PER_PUD;
				69	else
				70	offset = PTRS_PER_PMD;
				71	#else
				72	BUG();
				73	psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
				74	#endif
				75	} else {
				76	psize = pte_pagesize_index(mm, addr, pte);
				77	/*
				78	* Mask the address for the standard page size. If we
				79	* have a 64k page kernel, but the hardware does not
				80	* support 64k pages, this might be different from the
				81	* hardware page size encoded in the slice table.
				82	*/
				83	addr &= PAGE_MASK;
				84	offset = PTRS_PER_PTE;
				85	}
				86
				87
				88	/* Build full vaddr */
				89	if (!is_kernel_addr(addr)) {
				90	ssize = user_segment_size(addr);
				91	vsid = get_user_vsid(&mm->context, addr, ssize);
				92	} else {
				93	vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
				94	ssize = mmu_kernel_ssize;
				95	}
				96	WARN_ON(vsid == 0);
				97	vpn = hpt_vpn(addr, vsid, ssize);
				98	rpte = __real_pte(__pte(pte), ptep, offset);
				99
				100	/*
				101	* Check if we have an active batch on this CPU. If not, just
				102	* flush now and return.
				103	*/
				104	if (!batch->active) {
				105	flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm));
				106	put_cpu_var(ppc64_tlb_batch);
				107	return;
				108	}
				109
				110	/*
				111	* This can happen when we are in the middle of a TLB batch and
				112	* we encounter memory pressure (eg copy_page_range when it tries
				113	* to allocate a new pte). If we have to reclaim memory and end
				114	* up scanning and resetting referenced bits then our batch context
				115	* will change mid stream.
				116	*
				117	* We also need to ensure only one page size is present in a given
				118	* batch
				119	*/
				120	if (i != 0 && (mm != batch->mm \|\| batch->psize != psize \|\|
				121	batch->ssize != ssize)) {
				122	__flush_tlb_pending(batch);
				123	i = 0;
				124	}
				125	if (i == 0) {
				126	batch->mm = mm;
				127	batch->psize = psize;
				128	batch->ssize = ssize;
				129	}
				130	batch->pte[i] = rpte;
				131	batch->vpn[i] = vpn;
				132	batch->index = ++i;
				133	if (i >= PPC64_TLB_BATCH_NR)
				134	__flush_tlb_pending(batch);
				135	put_cpu_var(ppc64_tlb_batch);
				136	}
				137
				138	/*
				139	* This function is called when terminating an mmu batch or when a batch
				140	* is full. It will perform the flush of all the entries currently stored
				141	* in a batch.
				142	*
				143	* Must be called from within some kind of spinlock/non-preempt region...
				144	*/
				145	void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
				146	{
				147	int i, local;
				148
				149	i = batch->index;
				150	local = mm_is_thread_local(batch->mm);
				151	if (i == 1)
				152	flush_hash_page(batch->vpn[0], batch->pte[0],
				153	batch->psize, batch->ssize, local);
				154	else
				155	flush_hash_range(i, local);
				156	batch->index = 0;
				157	}
				158
				159	void hash__tlb_flush(struct mmu_gather *tlb)
				160	{
				161	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
				162
				163	/*
				164	* If there's a TLB batch pending, then we must flush it because the
				165	* pages are going to be freed and we really don't want to have a CPU
				166	* access a freed page because it has a stale TLB
				167	*/
				168	if (tlbbatch->index)
				169	__flush_tlb_pending(tlbbatch);
				170
				171	put_cpu_var(ppc64_tlb_batch);
				172	}
				173
				174	/**
				175	* __flush_hash_table_range - Flush all HPTEs for a given address range
				176	* from the hash table (and the TLB). But keeps
				177	* the linux PTEs intact.
				178	*
				179	* @mm : mm_struct of the target address space (generally init_mm)
				180	* @start : starting address
				181	* @end : ending address (not included in the flush)
				182	*
				183	* This function is mostly to be used by some IO hotplug code in order
				184	* to remove all hash entries from a given address range used to map IO
				185	* space on a removed PCI-PCI bidge without tearing down the full mapping
				186	* since 64K pages may overlap with other bridges when using 64K pages
				187	* with 4K HW pages on IO space.
				188	*
				189	* Because of that usage pattern, it is implemented for small size rather
				190	* than speed.
				191	*/
				192	void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
				193	unsigned long end)
				194	{
				195	bool is_thp;
				196	int hugepage_shift;
				197	unsigned long flags;
				198
				199	start = _ALIGN_DOWN(start, PAGE_SIZE);
				200	end = _ALIGN_UP(end, PAGE_SIZE);
				201
				202	BUG_ON(!mm->pgd);
				203
				204	/*
				205	* Note: Normally, we should only ever use a batch within a
				206	* PTE locked section. This violates the rule, but will work
				207	* since we don't actually modify the PTEs, we just flush the
				208	* hash while leaving the PTEs intact (including their reference
				209	* to being hashed). This is not the most performance oriented
				210	* way to do things but is fine for our needs here.
				211	*/
				212	local_irq_save(flags);
				213	arch_enter_lazy_mmu_mode();
				214	for (; start < end; start += PAGE_SIZE) {
				215	pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp,
				216	&hugepage_shift);
				217	unsigned long pte;
				218
				219	if (ptep == NULL)
				220	continue;
				221	pte = pte_val(*ptep);
				222	if (is_thp)
				223	trace_hugepage_invalidate(start, pte);
				224	if (!(pte & H_PAGE_HASHPTE))
				225	continue;
				226	if (unlikely(is_thp))
				227	hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
				228	else
				229	hpte_need_flush(mm, start, ptep, pte, hugepage_shift);
				230	}
				231	arch_leave_lazy_mmu_mode();
				232	local_irq_restore(flags);
				233	}
				234
				235	void flush_tlb_pmd_range(struct mm_struct mm, pmd_t pmd, unsigned long addr)
				236	{
				237	pte_t *pte;
				238	pte_t *start_pte;
				239	unsigned long flags;
				240
				241	addr = _ALIGN_DOWN(addr, PMD_SIZE);
				242	/*
				243	* Note: Normally, we should only ever use a batch within a
				244	* PTE locked section. This violates the rule, but will work
				245	* since we don't actually modify the PTEs, we just flush the
				246	* hash while leaving the PTEs intact (including their reference
				247	* to being hashed). This is not the most performance oriented
				248	* way to do things but is fine for our needs here.
				249	*/
				250	local_irq_save(flags);
				251	arch_enter_lazy_mmu_mode();
				252	start_pte = pte_offset_map(pmd, addr);
				253	for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
				254	unsigned long pteval = pte_val(*pte);
				255	if (pteval & H_PAGE_HASHPTE)
				256	hpte_need_flush(mm, addr, pte, pteval, 0);
				257	addr += PAGE_SIZE;
				258	}
				259	arch_leave_lazy_mmu_mode();
				260	local_irq_restore(flags);
				261	}