Blame - marvell/linux/mm/page_io.c - T108

blob: b15586de598eeac6033e36dc7332f8d7a2aa06d4 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* linux/mm/page_io.c
				4	*
				5	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				6	*
				7	* Swap reorganised 29.12.95,
				8	* Asynchronous swapping added 30.12.95. Stephen Tweedie
				9	* Removed race in async swapping. 14.4.1996. Bruno Haible
				10	* Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
				11	* Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
				12	*/
				13
				14	#include <linux/mm.h>
				15	#include <linux/kernel_stat.h>
				16	#include <linux/gfp.h>
				17	#include <linux/pagemap.h>
				18	#include <linux/swap.h>
				19	#include <linux/bio.h>
				20	#include <linux/swapops.h>
				21	#include <linux/buffer_head.h>
				22	#include <linux/writeback.h>
				23	#include <linux/frontswap.h>
				24	#include <linux/blkdev.h>
				25	#include <linux/psi.h>
				26	#include <linux/uio.h>
				27	#include <linux/sched/task.h>
				28	#include <asm/pgtable.h>
				29
				30	static struct bio *get_swap_bio(gfp_t gfp_flags,
				31	struct page *page, bio_end_io_t end_io)
				32	{
				33	struct bio *bio;
				34
				35	bio = bio_alloc(gfp_flags, 1);
				36	if (bio) {
				37	struct block_device *bdev;
				38
				39	bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
				40	bio_set_dev(bio, bdev);
				41	bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
				42	bio->bi_end_io = end_io;
				43
				44	bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0);
				45	}
				46	return bio;
				47	}
				48
				49	void end_swap_bio_write(struct bio *bio)
				50	{
				51	struct page *page = bio_first_page_all(bio);
				52
				53	if (bio->bi_status) {
				54	SetPageError(page);
				55	/*
				56	* We failed to write the page out to swap-space.
				57	* Re-dirty the page in order to avoid it being reclaimed.
				58	* Also print a dire warning that things will go BAD (tm)
				59	* very quickly.
				60	*
				61	* Also clear PG_reclaim to avoid rotate_reclaimable_page()
				62	*/
				63	set_page_dirty(page);
				64	pr_alert("Write-error on swap-device (%u:%u:%llu)\n",
				65	MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
				66	(unsigned long long)bio->bi_iter.bi_sector);
				67	ClearPageReclaim(page);
				68	}
				69	end_page_writeback(page);
				70	bio_put(bio);
				71	}
				72
				73	static void end_swap_bio_read(struct bio *bio)
				74	{
				75	struct page *page = bio_first_page_all(bio);
				76	struct task_struct *waiter = bio->bi_private;
				77
				78	if (bio->bi_status) {
				79	SetPageError(page);
				80	ClearPageUptodate(page);
				81	pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
				82	MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
				83	(unsigned long long)bio->bi_iter.bi_sector);
				84	goto out;
				85	}
				86
				87	SetPageUptodate(page);
				88	out:
				89	unlock_page(page);
				90	WRITE_ONCE(bio->bi_private, NULL);
				91	bio_put(bio);
				92	if (waiter) {
				93	blk_wake_io_task(waiter);
				94	put_task_struct(waiter);
				95	}
				96	}
				97
				98	int generic_swapfile_activate(struct swap_info_struct *sis,
				99	struct file *swap_file,
				100	sector_t *span)
				101	{
				102	struct address_space *mapping = swap_file->f_mapping;
				103	struct inode *inode = mapping->host;
				104	unsigned blocks_per_page;
				105	unsigned long page_no;
				106	unsigned blkbits;
				107	sector_t probe_block;
				108	sector_t last_block;
				109	sector_t lowest_block = -1;
				110	sector_t highest_block = 0;
				111	int nr_extents = 0;
				112	int ret;
				113
				114	blkbits = inode->i_blkbits;
				115	blocks_per_page = PAGE_SIZE >> blkbits;
				116
				117	/*
				118	* Map all the blocks into the extent tree. This code doesn't try
				119	* to be very smart.
				120	*/
				121	probe_block = 0;
				122	page_no = 0;
				123	last_block = i_size_read(inode) >> blkbits;
				124	while ((probe_block + blocks_per_page) <= last_block &&
				125	page_no < sis->max) {
				126	unsigned block_in_page;
				127	sector_t first_block;
				128
				129	cond_resched();
				130
				131	first_block = probe_block;
				132	ret = bmap(inode, &first_block);
				133	if (ret \|\| !first_block)
				134	goto bad_bmap;
				135
				136	/*
				137	* It must be PAGE_SIZE aligned on-disk
				138	*/
				139	if (first_block & (blocks_per_page - 1)) {
				140	probe_block++;
				141	goto reprobe;
				142	}
				143
				144	for (block_in_page = 1; block_in_page < blocks_per_page;
				145	block_in_page++) {
				146	sector_t block;
				147
				148	block = probe_block + block_in_page;
				149	ret = bmap(inode, &block);
				150	if (ret \|\| !block)
				151	goto bad_bmap;
				152
				153	if (block != first_block + block_in_page) {
				154	/* Discontiguity */
				155	probe_block++;
				156	goto reprobe;
				157	}
				158	}
				159
				160	first_block >>= (PAGE_SHIFT - blkbits);
				161	if (page_no) { /* exclude the header page */
				162	if (first_block < lowest_block)
				163	lowest_block = first_block;
				164	if (first_block > highest_block)
				165	highest_block = first_block;
				166	}
				167
				168	/*
				169	* We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
				170	*/
				171	ret = add_swap_extent(sis, page_no, 1, first_block);
				172	if (ret < 0)
				173	goto out;
				174	nr_extents += ret;
				175	page_no++;
				176	probe_block += blocks_per_page;
				177	reprobe:
				178	continue;
				179	}
				180	ret = nr_extents;
				181	*span = 1 + highest_block - lowest_block;
				182	if (page_no == 0)
				183	page_no = 1; /* force Empty message */
				184	sis->max = page_no;
				185	sis->pages = page_no - 1;
				186	sis->highest_bit = page_no - 1;
				187	out:
				188	return ret;
				189	bad_bmap:
				190	pr_err("swapon: swapfile has holes\n");
				191	ret = -EINVAL;
				192	goto out;
				193	}
				194
				195	/*
				196	* We may have stale swap cache pages in memory: notice
				197	* them here and get rid of the unnecessary final write.
				198	*/
				199	int swap_writepage(struct page page, struct writeback_control wbc)
				200	{
				201	int ret = 0;
				202
				203	if (try_to_free_swap(page)) {
				204	unlock_page(page);
				205	goto out;
				206	}
				207	if (frontswap_store(page) == 0) {
				208	set_page_writeback(page);
				209	unlock_page(page);
				210	end_page_writeback(page);
				211	goto out;
				212	}
				213	ret = __swap_writepage(page, wbc, end_swap_bio_write);
				214	out:
				215	return ret;
				216	}
				217
				218	static inline void count_swpout_vm_event(struct page *page)
				219	{
				220	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
				221	if (unlikely(PageTransHuge(page)))
				222	count_vm_event(THP_SWPOUT);
				223	#endif
				224	count_vm_events(PSWPOUT, hpage_nr_pages(page));
				225	}
				226
				227	int __swap_writepage(struct page page, struct writeback_control wbc,
				228	bio_end_io_t end_write_func)
				229	{
				230	struct bio *bio;
				231	int ret;
				232	struct swap_info_struct *sis = page_swap_info(page);
				233
				234	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
				235	if (sis->flags & SWP_FS) {
				236	struct kiocb kiocb;
				237	struct file *swap_file = sis->swap_file;
				238	struct address_space *mapping = swap_file->f_mapping;
				239	struct bio_vec bv = {
				240	.bv_page = page,
				241	.bv_len = PAGE_SIZE,
				242	.bv_offset = 0
				243	};
				244	struct iov_iter from;
				245
				246	iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
				247	init_sync_kiocb(&kiocb, swap_file);
				248	kiocb.ki_pos = page_file_offset(page);
				249
				250	set_page_writeback(page);
				251	unlock_page(page);
				252	ret = mapping->a_ops->direct_IO(&kiocb, &from);
				253	if (ret == PAGE_SIZE) {
				254	count_vm_event(PSWPOUT);
				255	ret = 0;
				256	} else {
				257	/*
				258	* In the case of swap-over-nfs, this can be a
				259	* temporary failure if the system has limited
				260	* memory for allocating transmit buffers.
				261	* Mark the page dirty and avoid
				262	* rotate_reclaimable_page but rate-limit the
				263	* messages but do not flag PageError like
				264	* the normal direct-to-bio case as it could
				265	* be temporary.
				266	*/
				267	set_page_dirty(page);
				268	ClearPageReclaim(page);
				269	pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
				270	page_file_offset(page));
				271	}
				272	end_page_writeback(page);
				273	return ret;
				274	}
				275
				276	ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
				277	if (!ret) {
				278	count_swpout_vm_event(page);
				279	return 0;
				280	}
				281
				282	ret = 0;
				283	bio = get_swap_bio(GFP_NOIO, page, end_write_func);
				284	if (bio == NULL) {
				285	set_page_dirty(page);
				286	unlock_page(page);
				287	ret = -ENOMEM;
				288	goto out;
				289	}
				290	bio->bi_opf = REQ_OP_WRITE \| REQ_SWAP \| wbc_to_write_flags(wbc);
				291	bio_associate_blkg_from_page(bio, page);
				292	count_swpout_vm_event(page);
				293	set_page_writeback(page);
				294	unlock_page(page);
				295	submit_bio(bio);
				296	out:
				297	return ret;
				298	}
				299
				300	int swap_readpage(struct page *page, bool synchronous)
				301	{
				302	struct bio *bio;
				303	int ret = 0;
				304	struct swap_info_struct *sis = page_swap_info(page);
				305	blk_qc_t qc;
				306	struct gendisk *disk;
				307	unsigned long pflags;
				308
				309	VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
				310	VM_BUG_ON_PAGE(!PageLocked(page), page);
				311	VM_BUG_ON_PAGE(PageUptodate(page), page);
				312
				313	/*
				314	* Count submission time as memory stall. When the device is congested,
				315	* or the submitting cgroup IO-throttled, submission can be a
				316	* significant part of overall IO time.
				317	*/
				318	psi_memstall_enter(&pflags);
				319
				320	if (frontswap_load(page) == 0) {
				321	SetPageUptodate(page);
				322	unlock_page(page);
				323	goto out;
				324	}
				325
				326	if (sis->flags & SWP_FS) {
				327	struct file *swap_file = sis->swap_file;
				328	struct address_space *mapping = swap_file->f_mapping;
				329
				330	ret = mapping->a_ops->readpage(swap_file, page);
				331	if (!ret)
				332	count_vm_event(PSWPIN);
				333	goto out;
				334	}
				335
				336	ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
				337	if (!ret) {
				338	count_vm_event(PSWPIN);
				339	goto out;
				340	}
				341
				342	ret = 0;
				343	bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
				344	if (bio == NULL) {
				345	unlock_page(page);
				346	ret = -ENOMEM;
				347	goto out;
				348	}
				349	disk = bio->bi_disk;
				350	/*
				351	* Keep this task valid during swap readpage because the oom killer may
				352	* attempt to access it in the page fault retry time check.
				353	*/
				354	bio_set_op_attrs(bio, REQ_OP_READ, 0);
				355	if (synchronous) {
				356	bio->bi_opf \|= REQ_HIPRI;
				357	get_task_struct(current);
				358	bio->bi_private = current;
				359	}
				360	count_vm_event(PSWPIN);
				361	bio_get(bio);
				362	qc = submit_bio(bio);
				363	while (synchronous) {
				364	set_current_state(TASK_UNINTERRUPTIBLE);
				365	if (!READ_ONCE(bio->bi_private))
				366	break;
				367
				368	if (!blk_poll(disk->queue, qc, true))
				369	io_schedule();
				370	}
				371	__set_current_state(TASK_RUNNING);
				372	bio_put(bio);
				373
				374	out:
				375	psi_memstall_leave(&pflags);
				376	return ret;
				377	}
				378
				379	int swap_set_page_dirty(struct page *page)
				380	{
				381	struct swap_info_struct *sis = page_swap_info(page);
				382
				383	if (sis->flags & SWP_FS) {
				384	struct address_space *mapping = sis->swap_file->f_mapping;
				385
				386	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
				387	return mapping->a_ops->set_page_dirty(page);
				388	} else {
				389	return __set_page_dirty_no_writeback(page);
				390	}
				391	}