Blame - marvell/linux/fs/f2fs/data.c - T108

blob: a27d6fac14dbdcc9e149269ad9f83859dced2368 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* fs/f2fs/data.c
				4	*
				5	* Copyright (c) 2012 Samsung Electronics Co., Ltd.
				6	* http://www.samsung.com/
				7	*/
				8	#include <linux/fs.h>
				9	#include <linux/f2fs_fs.h>
				10	#include <linux/buffer_head.h>
				11	#include <linux/mpage.h>
				12	#include <linux/writeback.h>
				13	#include <linux/backing-dev.h>
				14	#include <linux/pagevec.h>
				15	#include <linux/blkdev.h>
				16	#include <linux/bio.h>
				17	#include <linux/blk-crypto.h>
				18	#include <linux/swap.h>
				19	#include <linux/prefetch.h>
				20	#include <linux/uio.h>
				21	#include <linux/cleancache.h>
				22	#include <linux/sched/signal.h>
				23
				24	#include "f2fs.h"
				25	#include "node.h"
				26	#include "segment.h"
				27	#include <trace/events/f2fs.h>
				28	#include <trace/events/android_fs.h>
				29
				30	#define NUM_PREALLOC_POST_READ_CTXS 128
				31
				32	static struct kmem_cache *bio_post_read_ctx_cache;
				33	static struct kmem_cache *bio_entry_slab;
				34	static mempool_t *bio_post_read_ctx_pool;
				35	static struct bio_set f2fs_bioset;
				36
				37	#define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
				38
				39	int __init f2fs_init_bioset(void)
				40	{
				41	if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
				42	0, BIOSET_NEED_BVECS))
				43	return -ENOMEM;
				44	return 0;
				45	}
				46
				47	void f2fs_destroy_bioset(void)
				48	{
				49	bioset_exit(&f2fs_bioset);
				50	}
				51
				52	static bool __is_cp_guaranteed(struct page *page)
				53	{
				54	struct address_space *mapping = page->mapping;
				55	struct inode *inode;
				56	struct f2fs_sb_info *sbi;
				57
				58	if (!mapping)
				59	return false;
				60
				61	inode = mapping->host;
				62	sbi = F2FS_I_SB(inode);
				63
				64	if (inode->i_ino == F2FS_META_INO(sbi) \|\|
				65	inode->i_ino == F2FS_NODE_INO(sbi) \|\|
				66	S_ISDIR(inode->i_mode))
				67	return true;
				68
				69	if (f2fs_is_compressed_page(page))
				70	return false;
				71	if ((S_ISREG(inode->i_mode) &&
				72	(f2fs_is_atomic_file(inode) \|\| IS_NOQUOTA(inode))) \|\|
				73	page_private_gcing(page))
				74	return true;
				75	return false;
				76	}
				77
				78	static enum count_type __read_io_type(struct page *page)
				79	{
				80	struct address_space *mapping = page_file_mapping(page);
				81
				82	if (mapping) {
				83	struct inode *inode = mapping->host;
				84	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				85
				86	if (inode->i_ino == F2FS_META_INO(sbi))
				87	return F2FS_RD_META;
				88
				89	if (inode->i_ino == F2FS_NODE_INO(sbi))
				90	return F2FS_RD_NODE;
				91	}
				92	return F2FS_RD_DATA;
				93	}
				94
				95	/* postprocessing steps for read bios */
				96	enum bio_post_read_step {
				97	#ifdef CONFIG_FS_ENCRYPTION
				98	STEP_DECRYPT = 1 << 0,
				99	#else
				100	STEP_DECRYPT = 0, /* compile out the decryption-related code */
				101	#endif
				102	#ifdef CONFIG_F2FS_FS_COMPRESSION
				103	STEP_DECOMPRESS = 1 << 1,
				104	#else
				105	STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
				106	#endif
				107	#ifdef CONFIG_FS_VERITY
				108	STEP_VERITY = 1 << 2,
				109	#else
				110	STEP_VERITY = 0, /* compile out the verity-related code */
				111	#endif
				112	};
				113
				114	struct bio_post_read_ctx {
				115	struct bio *bio;
				116	struct f2fs_sb_info *sbi;
				117	struct work_struct work;
				118	unsigned int enabled_steps;
				119	};
				120
				121	static void f2fs_finish_read_bio(struct bio *bio)
				122	{
				123	struct bio_vec *bv;
				124	struct bvec_iter_all iter_all;
				125
				126	/*
				127	* Update and unlock the bio's pagecache pages, and put the
				128	* decompression context for any compressed pages.
				129	*/
				130	bio_for_each_segment_all(bv, bio, iter_all) {
				131	struct page *page = bv->bv_page;
				132
				133	if (f2fs_is_compressed_page(page)) {
				134	if (bio->bi_status)
				135	f2fs_end_read_compressed_page(page, true, 0);
				136	f2fs_put_page_dic(page);
				137	continue;
				138	}
				139
				140	/* PG_error was set if decryption or verity failed. */
				141	if (bio->bi_status \|\| PageError(page)) {
				142	ClearPageUptodate(page);
				143	/* will re-read again later */
				144	ClearPageError(page);
				145	} else {
				146	SetPageUptodate(page);
				147	}
				148	dec_page_count(F2FS_P_SB(page), __read_io_type(page));
				149	unlock_page(page);
				150	}
				151
				152	if (bio->bi_private)
				153	mempool_free(bio->bi_private, bio_post_read_ctx_pool);
				154	bio_put(bio);
				155	}
				156
				157	static void f2fs_verify_bio(struct work_struct *work)
				158	{
				159	struct bio_post_read_ctx *ctx =
				160	container_of(work, struct bio_post_read_ctx, work);
				161	struct bio *bio = ctx->bio;
				162	bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
				163
				164	/*
				165	* fsverity_verify_bio() may call readpages() again, and while verity
				166	* will be disabled for this, decryption and/or decompression may still
				167	* be needed, resulting in another bio_post_read_ctx being allocated.
				168	* So to prevent deadlocks we need to release the current ctx to the
				169	* mempool first. This assumes that verity is the last post-read step.
				170	*/
				171	mempool_free(ctx, bio_post_read_ctx_pool);
				172	bio->bi_private = NULL;
				173
				174	/*
				175	* Verify the bio's pages with fs-verity. Exclude compressed pages,
				176	* as those were handled separately by f2fs_end_read_compressed_page().
				177	*/
				178	if (may_have_compressed_pages) {
				179	struct bio_vec *bv;
				180	struct bvec_iter_all iter_all;
				181
				182	bio_for_each_segment_all(bv, bio, iter_all) {
				183	struct page *page = bv->bv_page;
				184
				185	if (!f2fs_is_compressed_page(page) &&
				186	!PageError(page) && !fsverity_verify_page(page))
				187	SetPageError(page);
				188	}
				189	} else {
				190	fsverity_verify_bio(bio);
				191	}
				192
				193	f2fs_finish_read_bio(bio);
				194	}
				195
				196	/*
				197	* If the bio's data needs to be verified with fs-verity, then enqueue the
				198	* verity work for the bio. Otherwise finish the bio now.
				199	*
				200	* Note that to avoid deadlocks, the verity work can't be done on the
				201	* decryption/decompression workqueue. This is because verifying the data pages
				202	* can involve reading verity metadata pages from the file, and these verity
				203	* metadata pages may be encrypted and/or compressed.
				204	*/
				205	static void f2fs_verify_and_finish_bio(struct bio *bio)
				206	{
				207	struct bio_post_read_ctx *ctx = bio->bi_private;
				208
				209	if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
				210	INIT_WORK(&ctx->work, f2fs_verify_bio);
				211	fsverity_enqueue_verify_work(&ctx->work);
				212	} else {
				213	f2fs_finish_read_bio(bio);
				214	}
				215	}
				216
				217	/*
				218	* Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
				219	* remaining page was read by @ctx->bio.
				220	*
				221	* Note that a bio may span clusters (even a mix of compressed and uncompressed
				222	* clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
				223	* that the bio includes at least one compressed page. The actual decompression
				224	* is done on a per-cluster basis, not a per-bio basis.
				225	*/
				226	static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx)
				227	{
				228	struct bio_vec *bv;
				229	struct bvec_iter_all iter_all;
				230	bool all_compressed = true;
				231	block_t blkaddr = SECTOR_TO_BLOCK(ctx->bio->bi_iter.bi_sector);
				232
				233	bio_for_each_segment_all(bv, ctx->bio, iter_all) {
				234	struct page *page = bv->bv_page;
				235
				236	/* PG_error was set if decryption failed. */
				237	if (f2fs_is_compressed_page(page))
				238	f2fs_end_read_compressed_page(page, PageError(page),
				239	blkaddr);
				240	else
				241	all_compressed = false;
				242
				243	blkaddr++;
				244	}
				245
				246	/*
				247	* Optimization: if all the bio's pages are compressed, then scheduling
				248	* the per-bio verity work is unnecessary, as verity will be fully
				249	* handled at the compression cluster level.
				250	*/
				251	if (all_compressed)
				252	ctx->enabled_steps &= ~STEP_VERITY;
				253	}
				254
				255	static void f2fs_post_read_work(struct work_struct *work)
				256	{
				257	struct bio_post_read_ctx *ctx =
				258	container_of(work, struct bio_post_read_ctx, work);
				259
				260	if (ctx->enabled_steps & STEP_DECRYPT)
				261	fscrypt_decrypt_bio(ctx->bio);
				262
				263	if (ctx->enabled_steps & STEP_DECOMPRESS)
				264	f2fs_handle_step_decompress(ctx);
				265
				266	f2fs_verify_and_finish_bio(ctx->bio);
				267	}
				268
				269	static void f2fs_read_end_io(struct bio *bio)
				270	{
				271	struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
				272	struct bio_post_read_ctx *ctx = bio->bi_private;
				273
				274	if (time_to_inject(sbi, FAULT_READ_IO)) {
				275	f2fs_show_injection_info(sbi, FAULT_READ_IO);
				276	bio->bi_status = BLK_STS_IOERR;
				277	}
				278
				279	if (bio->bi_status) {
				280	f2fs_finish_read_bio(bio);
				281	return;
				282	}
				283
				284	if (ctx && (ctx->enabled_steps & (STEP_DECRYPT \| STEP_DECOMPRESS))) {
				285	INIT_WORK(&ctx->work, f2fs_post_read_work);
				286	queue_work(ctx->sbi->post_read_wq, &ctx->work);
				287	} else {
				288	f2fs_verify_and_finish_bio(bio);
				289	}
				290	}
				291
				292	static void f2fs_write_end_io(struct bio *bio)
				293	{
				294	struct f2fs_sb_info *sbi = bio->bi_private;
				295	struct bio_vec *bvec;
				296	struct bvec_iter_all iter_all;
				297
				298	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
				299	f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
				300	bio->bi_status = BLK_STS_IOERR;
				301	}
				302
				303	bio_for_each_segment_all(bvec, bio, iter_all) {
				304	struct page *page = bvec->bv_page;
				305	enum count_type type = WB_DATA_TYPE(page);
				306
				307	if (page_private_dummy(page)) {
				308	clear_page_private_dummy(page);
				309	unlock_page(page);
				310	mempool_free(page, sbi->write_io_dummy);
				311
				312	if (unlikely(bio->bi_status))
				313	f2fs_stop_checkpoint(sbi, true);
				314	continue;
				315	}
				316
				317	fscrypt_finalize_bounce_page(&page);
				318
				319	#ifdef CONFIG_F2FS_FS_COMPRESSION
				320	if (f2fs_is_compressed_page(page)) {
				321	f2fs_compress_write_end_io(bio, page);
				322	continue;
				323	}
				324	#endif
				325
				326	if (unlikely(bio->bi_status)) {
				327	mapping_set_error(page->mapping, -EIO);
				328	if (type == F2FS_WB_CP_DATA)
				329	f2fs_stop_checkpoint(sbi, true);
				330	}
				331
				332	f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
				333	page->index != nid_of_node(page));
				334
				335	dec_page_count(sbi, type);
				336	if (f2fs_in_warm_node_list(sbi, page))
				337	f2fs_del_fsync_node_entry(sbi, page);
				338	clear_page_private_gcing(page);
				339	end_page_writeback(page);
				340	}
				341	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
				342	wq_has_sleeper(&sbi->cp_wait))
				343	wake_up(&sbi->cp_wait);
				344
				345	bio_put(bio);
				346	}
				347
				348	struct block_device f2fs_target_device(struct f2fs_sb_info sbi,
				349	block_t blk_addr, struct bio *bio)
				350	{
				351	struct block_device *bdev = sbi->sb->s_bdev;
				352	int i;
				353
				354	if (f2fs_is_multi_device(sbi)) {
				355	for (i = 0; i < sbi->s_ndevs; i++) {
				356	if (FDEV(i).start_blk <= blk_addr &&
				357	FDEV(i).end_blk >= blk_addr) {
				358	blk_addr -= FDEV(i).start_blk;
				359	bdev = FDEV(i).bdev;
				360	break;
				361	}
				362	}
				363	}
				364	if (bio) {
				365	bio_set_dev(bio, bdev);
				366	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
				367	}
				368	return bdev;
				369	}
				370
				371	int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
				372	{
				373	int i;
				374
				375	if (!f2fs_is_multi_device(sbi))
				376	return 0;
				377
				378	for (i = 0; i < sbi->s_ndevs; i++)
				379	if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
				380	return i;
				381	return 0;
				382	}
				383
				384	/*
				385	* Return true, if pre_bio's bdev is same as its target device.
				386	*/
				387	static bool __same_bdev(struct f2fs_sb_info *sbi,
				388	block_t blk_addr, struct bio *bio)
				389	{
				390	struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
				391	return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
				392	}
				393
				394	static struct bio __bio_alloc(struct f2fs_io_info fio, int npages)
				395	{
				396	struct f2fs_sb_info *sbi = fio->sbi;
				397	struct bio *bio;
				398
				399	bio = bio_alloc_bioset(GFP_NOIO, npages, &f2fs_bioset);
				400
				401	f2fs_target_device(sbi, fio->new_blkaddr, bio);
				402	if (is_read_io(fio->op)) {
				403	bio->bi_end_io = f2fs_read_end_io;
				404	bio->bi_private = NULL;
				405	} else {
				406	bio->bi_end_io = f2fs_write_end_io;
				407	bio->bi_private = sbi;
				408	bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
				409	fio->type, fio->temp);
				410	}
				411	if (fio->io_wbc)
				412	wbc_init_bio(fio->io_wbc, bio);
				413
				414	return bio;
				415	}
				416
				417	static void f2fs_set_bio_crypt_ctx(struct bio bio, const struct inode inode,
				418	pgoff_t first_idx,
				419	const struct f2fs_io_info *fio,
				420	gfp_t gfp_mask)
				421	{
				422	/*
				423	* The f2fs garbage collector sets ->encrypted_page when it wants to
				424	* read/write raw data without encryption.
				425	*/
				426	if (!fio \|\| !fio->encrypted_page)
				427	fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
				428	else if (fscrypt_inode_should_skip_dm_default_key(inode))
				429	bio_set_skip_dm_default_key(bio);
				430	}
				431
				432	static bool f2fs_crypt_mergeable_bio(struct bio bio, const struct inode inode,
				433	pgoff_t next_idx,
				434	const struct f2fs_io_info *fio)
				435	{
				436	/*
				437	* The f2fs garbage collector sets ->encrypted_page when it wants to
				438	* read/write raw data without encryption.
				439	*/
				440	if (fio && fio->encrypted_page)
				441	return !bio_has_crypt_ctx(bio) &&
				442	(bio_should_skip_dm_default_key(bio) ==
				443	fscrypt_inode_should_skip_dm_default_key(inode));
				444
				445	return fscrypt_mergeable_bio(bio, inode, next_idx);
				446	}
				447
				448	static inline void __submit_bio(struct f2fs_sb_info *sbi,
				449	struct bio *bio, enum page_type type)
				450	{
				451	if (!is_read_io(bio_op(bio))) {
				452	unsigned int start;
				453
				454	if (type != DATA && type != NODE)
				455	goto submit_io;
				456
				457	if (f2fs_lfs_mode(sbi) && current->plug)
				458	blk_finish_plug(current->plug);
				459
				460	if (!F2FS_IO_ALIGNED(sbi))
				461	goto submit_io;
				462
				463	start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
				464	start %= F2FS_IO_SIZE(sbi);
				465
				466	if (start == 0)
				467	goto submit_io;
				468
				469	/* fill dummy pages */
				470	for (; start < F2FS_IO_SIZE(sbi); start++) {
				471	struct page *page =
				472	mempool_alloc(sbi->write_io_dummy,
				473	GFP_NOIO \| __GFP_NOFAIL);
				474	f2fs_bug_on(sbi, !page);
				475
				476	lock_page(page);
				477
				478	zero_user_segment(page, 0, PAGE_SIZE);
				479	set_page_private_dummy(page);
				480
				481	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
				482	f2fs_bug_on(sbi, 1);
				483	}
				484	/*
				485	* In the NODE case, we lose next block address chain. So, we
				486	* need to do checkpoint in f2fs_sync_file.
				487	*/
				488	if (type == NODE)
				489	set_sbi_flag(sbi, SBI_NEED_CP);
				490	}
				491	submit_io:
				492	if (is_read_io(bio_op(bio)))
				493	trace_f2fs_submit_read_bio(sbi->sb, type, bio);
				494	else
				495	trace_f2fs_submit_write_bio(sbi->sb, type, bio);
				496	submit_bio(bio);
				497	}
				498
				499	void f2fs_submit_bio(struct f2fs_sb_info *sbi,
				500	struct bio *bio, enum page_type type)
				501	{
				502	__submit_bio(sbi, bio, type);
				503	}
				504
				505	static void __attach_io_flag(struct f2fs_io_info *fio)
				506	{
				507	struct f2fs_sb_info *sbi = fio->sbi;
				508	unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
				509	unsigned int io_flag, fua_flag, meta_flag;
				510
				511	if (fio->type == DATA)
				512	io_flag = sbi->data_io_flag;
				513	else if (fio->type == NODE)
				514	io_flag = sbi->node_io_flag;
				515	else
				516	return;
				517
				518	fua_flag = io_flag & temp_mask;
				519	meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
				520
				521	/*
				522	* data/node io flag bits per temp:
				523	* REQ_META \| REQ_FUA \|
				524	* 5 \| 4 \| 3 \| 2 \| 1 \| 0 \|
				525	* Cold \| Warm \| Hot \| Cold \| Warm \| Hot \|
				526	*/
				527	if ((1 << fio->temp) & meta_flag)
				528	fio->op_flags \|= REQ_META;
				529	if ((1 << fio->temp) & fua_flag)
				530	fio->op_flags \|= REQ_FUA;
				531	}
				532
				533	static void __submit_merged_bio(struct f2fs_bio_info *io)
				534	{
				535	struct f2fs_io_info *fio = &io->fio;
				536
				537	if (!io->bio)
				538	return;
				539
				540	__attach_io_flag(fio);
				541	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
				542
				543	if (is_read_io(fio->op))
				544	trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
				545	else
				546	trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
				547
				548	__submit_bio(io->sbi, io->bio, fio->type);
				549	io->bio = NULL;
				550	}
				551
				552	static bool __has_merged_page(struct bio bio, struct inode inode,
				553	struct page *page, nid_t ino)
				554	{
				555	struct bio_vec *bvec;
				556	struct bvec_iter_all iter_all;
				557
				558	if (!bio)
				559	return false;
				560
				561	if (!inode && !page && !ino)
				562	return true;
				563
				564	bio_for_each_segment_all(bvec, bio, iter_all) {
				565	struct page *target = bvec->bv_page;
				566
				567	if (fscrypt_is_bounce_page(target)) {
				568	target = fscrypt_pagecache_page(target);
				569	if (IS_ERR(target))
				570	continue;
				571	}
				572	if (f2fs_is_compressed_page(target)) {
				573	target = f2fs_compress_control_page(target);
				574	if (IS_ERR(target))
				575	continue;
				576	}
				577
				578	if (inode && inode == target->mapping->host)
				579	return true;
				580	if (page && page == target)
				581	return true;
				582	if (ino && ino == ino_of_node(target))
				583	return true;
				584	}
				585
				586	return false;
				587	}
				588
				589	static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
				590	enum page_type type, enum temp_type temp)
				591	{
				592	enum page_type btype = PAGE_TYPE_OF_BIO(type);
				593	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
				594
				595	down_write(&io->io_rwsem);
				596
				597	/* change META to META_FLUSH in the checkpoint procedure */
				598	if (type >= META_FLUSH) {
				599	io->fio.type = META_FLUSH;
				600	io->fio.op = REQ_OP_WRITE;
				601	io->fio.op_flags = REQ_META \| REQ_PRIO \| REQ_SYNC;
				602	if (!test_opt(sbi, NOBARRIER))
				603	io->fio.op_flags \|= REQ_PREFLUSH \| REQ_FUA;
				604	}
				605	__submit_merged_bio(io);
				606	up_write(&io->io_rwsem);
				607	}
				608
				609	static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
				610	struct inode inode, struct page page,
				611	nid_t ino, enum page_type type, bool force)
				612	{
				613	enum temp_type temp;
				614	bool ret = true;
				615
				616	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
				617	if (!force) {
				618	enum page_type btype = PAGE_TYPE_OF_BIO(type);
				619	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
				620
				621	down_read(&io->io_rwsem);
				622	ret = __has_merged_page(io->bio, inode, page, ino);
				623	up_read(&io->io_rwsem);
				624	}
				625	if (ret)
				626	__f2fs_submit_merged_write(sbi, type, temp);
				627
				628	/* TODO: use HOT temp only for meta pages now. */
				629	if (type >= META)
				630	break;
				631	}
				632	}
				633
				634	void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
				635	{
				636	__submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
				637	}
				638
				639	void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
				640	struct inode inode, struct page page,
				641	nid_t ino, enum page_type type)
				642	{
				643	__submit_merged_write_cond(sbi, inode, page, ino, type, false);
				644	}
				645
				646	void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
				647	{
				648	f2fs_submit_merged_write(sbi, DATA);
				649	f2fs_submit_merged_write(sbi, NODE);
				650	f2fs_submit_merged_write(sbi, META);
				651	}
				652
				653	/*
				654	* Fill the locked page with data located in the block address.
				655	* A caller needs to unlock the page on failure.
				656	*/
				657	int f2fs_submit_page_bio(struct f2fs_io_info *fio)
				658	{
				659	struct bio *bio;
				660	struct page *page = fio->encrypted_page ?
				661	fio->encrypted_page : fio->page;
				662
				663	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
				664	fio->is_por ? META_POR : (__is_meta_io(fio) ?
				665	META_GENERIC : DATA_GENERIC_ENHANCE)))
				666	return -EFSCORRUPTED;
				667
				668	trace_f2fs_submit_page_bio(page, fio);
				669
				670	/* Allocate a new bio */
				671	bio = __bio_alloc(fio, 1);
				672
				673	f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
				674	fio->page->index, fio, GFP_NOIO);
				675
				676	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
				677	bio_put(bio);
				678	return -EFAULT;
				679	}
				680
				681	if (fio->io_wbc && !is_read_io(fio->op))
				682	wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
				683
				684	__attach_io_flag(fio);
				685	bio_set_op_attrs(bio, fio->op, fio->op_flags);
				686
				687	inc_page_count(fio->sbi, is_read_io(fio->op) ?
				688	__read_io_type(page): WB_DATA_TYPE(fio->page));
				689
				690	__submit_bio(fio->sbi, bio, fio->type);
				691	return 0;
				692	}
				693
				694	static bool page_is_mergeable(struct f2fs_sb_info sbi, struct bio bio,
				695	block_t last_blkaddr, block_t cur_blkaddr)
				696	{
				697	if (unlikely(sbi->max_io_bytes &&
				698	bio->bi_iter.bi_size >= sbi->max_io_bytes))
				699	return false;
				700	if (last_blkaddr + 1 != cur_blkaddr)
				701	return false;
				702	return __same_bdev(sbi, cur_blkaddr, bio);
				703	}
				704
				705	static bool io_type_is_mergeable(struct f2fs_bio_info *io,
				706	struct f2fs_io_info *fio)
				707	{
				708	if (io->fio.op != fio->op)
				709	return false;
				710	return io->fio.op_flags == fio->op_flags;
				711	}
				712
				713	static bool io_is_mergeable(struct f2fs_sb_info sbi, struct bio bio,
				714	struct f2fs_bio_info *io,
				715	struct f2fs_io_info *fio,
				716	block_t last_blkaddr,
				717	block_t cur_blkaddr)
				718	{
				719	if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA \|\| fio->type == NODE)) {
				720	unsigned int filled_blocks =
				721	F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
				722	unsigned int io_size = F2FS_IO_SIZE(sbi);
				723	unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
				724
				725	/* IOs in bio is aligned and left space of vectors is not enough */
				726	if (!(filled_blocks % io_size) && left_vecs < io_size)
				727	return false;
				728	}
				729	if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
				730	return false;
				731	return io_type_is_mergeable(io, fio);
				732	}
				733
				734	static void add_bio_entry(struct f2fs_sb_info sbi, struct bio bio,
				735	struct page *page, enum temp_type temp)
				736	{
				737	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
				738	struct bio_entry *be;
				739
				740	be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
				741	be->bio = bio;
				742	bio_get(bio);
				743
				744	if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
				745	f2fs_bug_on(sbi, 1);
				746
				747	down_write(&io->bio_list_lock);
				748	list_add_tail(&be->list, &io->bio_list);
				749	up_write(&io->bio_list_lock);
				750	}
				751
				752	static void del_bio_entry(struct bio_entry *be)
				753	{
				754	list_del(&be->list);
				755	kmem_cache_free(bio_entry_slab, be);
				756	}
				757
				758	static int add_ipu_page(struct f2fs_io_info fio, struct bio *bio,
				759	struct page *page)
				760	{
				761	struct f2fs_sb_info *sbi = fio->sbi;
				762	enum temp_type temp;
				763	bool found = false;
				764	int ret = -EAGAIN;
				765
				766	for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
				767	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
				768	struct list_head *head = &io->bio_list;
				769	struct bio_entry *be;
				770
				771	down_write(&io->bio_list_lock);
				772	list_for_each_entry(be, head, list) {
				773	if (be->bio != *bio)
				774	continue;
				775
				776	found = true;
				777
				778	f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
				779	*fio->last_block,
				780	fio->new_blkaddr));
				781	if (f2fs_crypt_mergeable_bio(*bio,
				782	fio->page->mapping->host,
				783	fio->page->index, fio) &&
				784	bio_add_page(*bio, page, PAGE_SIZE, 0) ==
				785	PAGE_SIZE) {
				786	ret = 0;
				787	break;
				788	}
				789
				790	/* page can't be merged into bio; submit the bio */
				791	del_bio_entry(be);
				792	__submit_bio(sbi, *bio, DATA);
				793	break;
				794	}
				795	up_write(&io->bio_list_lock);
				796	}
				797
				798	if (ret) {
				799	bio_put(*bio);
				800	*bio = NULL;
				801	}
				802
				803	return ret;
				804	}
				805
				806	void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
				807	struct bio *bio, struct page page)
				808	{
				809	enum temp_type temp;
				810	bool found = false;
				811	struct bio target = bio ? bio : NULL;
				812
				813	for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
				814	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
				815	struct list_head *head = &io->bio_list;
				816	struct bio_entry *be;
				817
				818	if (list_empty(head))
				819	continue;
				820
				821	down_read(&io->bio_list_lock);
				822	list_for_each_entry(be, head, list) {
				823	if (target)
				824	found = (target == be->bio);
				825	else
				826	found = __has_merged_page(be->bio, NULL,
				827	page, 0);
				828	if (found)
				829	break;
				830	}
				831	up_read(&io->bio_list_lock);
				832
				833	if (!found)
				834	continue;
				835
				836	found = false;
				837
				838	down_write(&io->bio_list_lock);
				839	list_for_each_entry(be, head, list) {
				840	if (target)
				841	found = (target == be->bio);
				842	else
				843	found = __has_merged_page(be->bio, NULL,
				844	page, 0);
				845	if (found) {
				846	target = be->bio;
				847	del_bio_entry(be);
				848	break;
				849	}
				850	}
				851	up_write(&io->bio_list_lock);
				852	}
				853
				854	if (found)
				855	__submit_bio(sbi, target, DATA);
				856	if (bio && *bio) {
				857	bio_put(*bio);
				858	*bio = NULL;
				859	}
				860	}
				861
				862	int f2fs_merge_page_bio(struct f2fs_io_info *fio)
				863	{
				864	struct bio bio = fio->bio;
				865	struct page *page = fio->encrypted_page ?
				866	fio->encrypted_page : fio->page;
				867
				868	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
				869	__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
				870	return -EFSCORRUPTED;
				871
				872	trace_f2fs_submit_page_bio(page, fio);
				873
				874	if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
				875	fio->new_blkaddr))
				876	f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
				877	alloc_new:
				878	if (!bio) {
				879	bio = __bio_alloc(fio, BIO_MAX_PAGES);
				880	__attach_io_flag(fio);
				881	f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
				882	fio->page->index, fio, GFP_NOIO);
				883	bio_set_op_attrs(bio, fio->op, fio->op_flags);
				884
				885	add_bio_entry(fio->sbi, bio, page, fio->temp);
				886	} else {
				887	if (add_ipu_page(fio, &bio, page))
				888	goto alloc_new;
				889	}
				890
				891	if (fio->io_wbc)
				892	wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
				893
				894	inc_page_count(fio->sbi, WB_DATA_TYPE(page));
				895
				896	*fio->last_block = fio->new_blkaddr;
				897	*fio->bio = bio;
				898
				899	return 0;
				900	}
				901
				902	void f2fs_submit_page_write(struct f2fs_io_info *fio)
				903	{
				904	struct f2fs_sb_info *sbi = fio->sbi;
				905	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
				906	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
				907	struct page *bio_page;
				908
				909	f2fs_bug_on(sbi, is_read_io(fio->op));
				910
				911	down_write(&io->io_rwsem);
				912	next:
				913	if (fio->in_list) {
				914	spin_lock(&io->io_lock);
				915	if (list_empty(&io->io_list)) {
				916	spin_unlock(&io->io_lock);
				917	goto out;
				918	}
				919	fio = list_first_entry(&io->io_list,
				920	struct f2fs_io_info, list);
				921	list_del(&fio->list);
				922	spin_unlock(&io->io_lock);
				923	}
				924
				925	verify_fio_blkaddr(fio);
				926
				927	if (fio->encrypted_page)
				928	bio_page = fio->encrypted_page;
				929	else if (fio->compressed_page)
				930	bio_page = fio->compressed_page;
				931	else
				932	bio_page = fio->page;
				933
				934	/* set submitted = true as a return value */
				935	fio->submitted = true;
				936
				937	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
				938
				939	if (io->bio &&
				940	(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
				941	fio->new_blkaddr) \|\|
				942	!f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
				943	bio_page->index, fio)))
				944	__submit_merged_bio(io);
				945	alloc_new:
				946	if (io->bio == NULL) {
				947	if (F2FS_IO_ALIGNED(sbi) &&
				948	(fio->type == DATA \|\| fio->type == NODE) &&
				949	fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
				950	dec_page_count(sbi, WB_DATA_TYPE(bio_page));
				951	fio->retry = true;
				952	goto skip;
				953	}
				954	io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
				955	f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
				956	bio_page->index, fio, GFP_NOIO);
				957	io->fio = *fio;
				958	}
				959
				960	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
				961	__submit_merged_bio(io);
				962	goto alloc_new;
				963	}
				964
				965	if (fio->io_wbc)
				966	wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
				967
				968	io->last_block_in_bio = fio->new_blkaddr;
				969
				970	trace_f2fs_submit_page_write(fio->page, fio);
				971	skip:
				972	if (fio->in_list)
				973	goto next;
				974	out:
				975	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) \|\|
				976	!f2fs_is_checkpoint_ready(sbi))
				977	__submit_merged_bio(io);
				978	up_write(&io->io_rwsem);
				979	}
				980
				981	static struct bio f2fs_grab_read_bio(struct inode inode, block_t blkaddr,
				982	unsigned nr_pages, unsigned op_flag,
				983	pgoff_t first_idx, bool for_write)
				984	{
				985	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				986	struct bio *bio;
				987	struct bio_post_read_ctx *ctx;
				988	unsigned int post_read_steps = 0;
				989
				990	bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
				991	min_t(int, nr_pages, BIO_MAX_PAGES),
				992	&f2fs_bioset);
				993	if (!bio)
				994	return ERR_PTR(-ENOMEM);
				995
				996	f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
				997
				998	f2fs_target_device(sbi, blkaddr, bio);
				999	bio->bi_end_io = f2fs_read_end_io;
				1000	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
				1001
				1002	if (fscrypt_inode_uses_fs_layer_crypto(inode))
				1003	post_read_steps \|= STEP_DECRYPT;
				1004
				1005	if (f2fs_need_verity(inode, first_idx))
				1006	post_read_steps \|= STEP_VERITY;
				1007
				1008	/*
				1009	* STEP_DECOMPRESS is handled specially, since a compressed file might
				1010	* contain both compressed and uncompressed clusters. We'll allocate a
				1011	* bio_post_read_ctx if the file is compressed, but the caller is
				1012	* responsible for enabling STEP_DECOMPRESS if it's actually needed.
				1013	*/
				1014
				1015	if (post_read_steps \|\| f2fs_compressed_file(inode)) {
				1016	/* Due to the mempool, this never fails. */
				1017	ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
				1018	ctx->bio = bio;
				1019	ctx->sbi = sbi;
				1020	ctx->enabled_steps = post_read_steps;
				1021	bio->bi_private = ctx;
				1022	}
				1023
				1024	return bio;
				1025	}
				1026
				1027	/* This can handle encryption stuffs */
				1028	static int f2fs_submit_page_read(struct inode inode, struct page page,
				1029	block_t blkaddr, int op_flags, bool for_write)
				1030	{
				1031	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				1032	struct bio *bio;
				1033
				1034	bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
				1035	page->index, for_write);
				1036	if (IS_ERR(bio))
				1037	return PTR_ERR(bio);
				1038
				1039	/* wait for GCed page writeback via META_MAPPING */
				1040	f2fs_wait_on_block_writeback(inode, blkaddr);
				1041
				1042	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
				1043	bio_put(bio);
				1044	return -EFAULT;
				1045	}
				1046	ClearPageError(page);
				1047	inc_page_count(sbi, F2FS_RD_DATA);
				1048	f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
				1049	__submit_bio(sbi, bio, DATA);
				1050	return 0;
				1051	}
				1052
				1053	static void __set_data_blkaddr(struct dnode_of_data *dn)
				1054	{
				1055	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
				1056	__le32 *addr_array;
				1057	int base = 0;
				1058
				1059	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
				1060	base = get_extra_isize(dn->inode);
				1061
				1062	/* Get physical address of data block */
				1063	addr_array = blkaddr_in_node(rn);
				1064	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
				1065	}
				1066
				1067	/*
				1068	* Lock ordering for the change of data block address:
				1069	* ->data_page
				1070	* ->node_page
				1071	* update block addresses in the node page
				1072	*/
				1073	void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
				1074	{
				1075	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
				1076	__set_data_blkaddr(dn);
				1077	if (set_page_dirty(dn->node_page))
				1078	dn->node_changed = true;
				1079	}
				1080
				1081	void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
				1082	{
				1083	dn->data_blkaddr = blkaddr;
				1084	f2fs_set_data_blkaddr(dn);
				1085	f2fs_update_extent_cache(dn);
				1086	}
				1087
				1088	/* dn->ofs_in_node will be returned with up-to-date last block pointer */
				1089	int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
				1090	{
				1091	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
				1092	int err;
				1093
				1094	if (!count)
				1095	return 0;
				1096
				1097	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
				1098	return -EPERM;
				1099	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
				1100	return err;
				1101
				1102	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
				1103	dn->ofs_in_node, count);
				1104
				1105	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
				1106
				1107	for (; count > 0; dn->ofs_in_node++) {
				1108	block_t blkaddr = f2fs_data_blkaddr(dn);
				1109
				1110	if (blkaddr == NULL_ADDR) {
				1111	dn->data_blkaddr = NEW_ADDR;
				1112	__set_data_blkaddr(dn);
				1113	count--;
				1114	}
				1115	}
				1116
				1117	if (set_page_dirty(dn->node_page))
				1118	dn->node_changed = true;
				1119	return 0;
				1120	}
				1121
				1122	/* Should keep dn->ofs_in_node unchanged */
				1123	int f2fs_reserve_new_block(struct dnode_of_data *dn)
				1124	{
				1125	unsigned int ofs_in_node = dn->ofs_in_node;
				1126	int ret;
				1127
				1128	ret = f2fs_reserve_new_blocks(dn, 1);
				1129	dn->ofs_in_node = ofs_in_node;
				1130	return ret;
				1131	}
				1132
				1133	int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
				1134	{
				1135	bool need_put = dn->inode_page ? false : true;
				1136	int err;
				1137
				1138	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
				1139	if (err)
				1140	return err;
				1141
				1142	if (dn->data_blkaddr == NULL_ADDR)
				1143	err = f2fs_reserve_new_block(dn);
				1144	if (err \|\| need_put)
				1145	f2fs_put_dnode(dn);
				1146	return err;
				1147	}
				1148
				1149	int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
				1150	{
				1151	struct extent_info ei = {0, 0, 0};
				1152	struct inode *inode = dn->inode;
				1153
				1154	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
				1155	dn->data_blkaddr = ei.blk + index - ei.fofs;
				1156	return 0;
				1157	}
				1158
				1159	return f2fs_reserve_block(dn, index);
				1160	}
				1161
				1162	struct page f2fs_get_read_data_page(struct inode inode, pgoff_t index,
				1163	int op_flags, bool for_write)
				1164	{
				1165	struct address_space *mapping = inode->i_mapping;
				1166	struct dnode_of_data dn;
				1167	struct page *page;
				1168	struct extent_info ei = {0,0,0};
				1169	int err;
				1170
				1171	page = f2fs_grab_cache_page(mapping, index, for_write);
				1172	if (!page)
				1173	return ERR_PTR(-ENOMEM);
				1174
				1175	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
				1176	dn.data_blkaddr = ei.blk + index - ei.fofs;
				1177	if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
				1178	DATA_GENERIC_ENHANCE_READ)) {
				1179	err = -EFSCORRUPTED;
				1180	goto put_err;
				1181	}
				1182	goto got_it;
				1183	}
				1184
				1185	set_new_dnode(&dn, inode, NULL, NULL, 0);
				1186	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
				1187	if (err)
				1188	goto put_err;
				1189	f2fs_put_dnode(&dn);
				1190
				1191	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
				1192	err = -ENOENT;
				1193	goto put_err;
				1194	}
				1195	if (dn.data_blkaddr != NEW_ADDR &&
				1196	!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
				1197	dn.data_blkaddr,
				1198	DATA_GENERIC_ENHANCE)) {
				1199	err = -EFSCORRUPTED;
				1200	goto put_err;
				1201	}
				1202	got_it:
				1203	if (PageUptodate(page)) {
				1204	unlock_page(page);
				1205	return page;
				1206	}
				1207
				1208	/*
				1209	* A new dentry page is allocated but not able to be written, since its
				1210	* new inode page couldn't be allocated due to -ENOSPC.
				1211	* In such the case, its blkaddr can be remained as NEW_ADDR.
				1212	* see, f2fs_add_link -> f2fs_get_new_data_page ->
				1213	* f2fs_init_inode_metadata.
				1214	*/
				1215	if (dn.data_blkaddr == NEW_ADDR) {
				1216	zero_user_segment(page, 0, PAGE_SIZE);
				1217	if (!PageUptodate(page))
				1218	SetPageUptodate(page);
				1219	unlock_page(page);
				1220	return page;
				1221	}
				1222
				1223	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
				1224	op_flags, for_write);
				1225	if (err)
				1226	goto put_err;
				1227	return page;
				1228
				1229	put_err:
				1230	f2fs_put_page(page, 1);
				1231	return ERR_PTR(err);
				1232	}
				1233
				1234	struct page f2fs_find_data_page(struct inode inode, pgoff_t index)
				1235	{
				1236	struct address_space *mapping = inode->i_mapping;
				1237	struct page *page;
				1238
				1239	page = find_get_page(mapping, index);
				1240	if (page && PageUptodate(page))
				1241	return page;
				1242	f2fs_put_page(page, 0);
				1243
				1244	page = f2fs_get_read_data_page(inode, index, 0, false);
				1245	if (IS_ERR(page))
				1246	return page;
				1247
				1248	if (PageUptodate(page))
				1249	return page;
				1250
				1251	wait_on_page_locked(page);
				1252	if (unlikely(!PageUptodate(page))) {
				1253	f2fs_put_page(page, 0);
				1254	return ERR_PTR(-EIO);
				1255	}
				1256	return page;
				1257	}
				1258
				1259	/*
				1260	* If it tries to access a hole, return an error.
				1261	* Because, the callers, functions in dir.c and GC, should be able to know
				1262	* whether this page exists or not.
				1263	*/
				1264	struct page f2fs_get_lock_data_page(struct inode inode, pgoff_t index,
				1265	bool for_write)
				1266	{
				1267	struct address_space *mapping = inode->i_mapping;
				1268	struct page *page;
				1269	repeat:
				1270	page = f2fs_get_read_data_page(inode, index, 0, for_write);
				1271	if (IS_ERR(page))
				1272	return page;
				1273
				1274	/* wait for read completion */
				1275	lock_page(page);
				1276	if (unlikely(page->mapping != mapping)) {
				1277	f2fs_put_page(page, 1);
				1278	goto repeat;
				1279	}
				1280	if (unlikely(!PageUptodate(page))) {
				1281	f2fs_put_page(page, 1);
				1282	return ERR_PTR(-EIO);
				1283	}
				1284	return page;
				1285	}
				1286
				1287	/*
				1288	* Caller ensures that this data page is never allocated.
				1289	* A new zero-filled data page is allocated in the page cache.
				1290	*
				1291	* Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
				1292	* f2fs_unlock_op().
				1293	* Note that, ipage is set only by make_empty_dir, and if any error occur,
				1294	* ipage should be released by this function.
				1295	*/
				1296	struct page f2fs_get_new_data_page(struct inode inode,
				1297	struct page *ipage, pgoff_t index, bool new_i_size)
				1298	{
				1299	struct address_space *mapping = inode->i_mapping;
				1300	struct page *page;
				1301	struct dnode_of_data dn;
				1302	int err;
				1303
				1304	page = f2fs_grab_cache_page(mapping, index, true);
				1305	if (!page) {
				1306	/*
				1307	* before exiting, we should make sure ipage will be released
				1308	* if any error occur.
				1309	*/
				1310	f2fs_put_page(ipage, 1);
				1311	return ERR_PTR(-ENOMEM);
				1312	}
				1313
				1314	set_new_dnode(&dn, inode, ipage, NULL, 0);
				1315	err = f2fs_reserve_block(&dn, index);
				1316	if (err) {
				1317	f2fs_put_page(page, 1);
				1318	return ERR_PTR(err);
				1319	}
				1320	if (!ipage)
				1321	f2fs_put_dnode(&dn);
				1322
				1323	if (PageUptodate(page))
				1324	goto got_it;
				1325
				1326	if (dn.data_blkaddr == NEW_ADDR) {
				1327	zero_user_segment(page, 0, PAGE_SIZE);
				1328	if (!PageUptodate(page))
				1329	SetPageUptodate(page);
				1330	} else {
				1331	f2fs_put_page(page, 1);
				1332
				1333	/* if ipage exists, blkaddr should be NEW_ADDR */
				1334	f2fs_bug_on(F2FS_I_SB(inode), ipage);
				1335	page = f2fs_get_lock_data_page(inode, index, true);
				1336	if (IS_ERR(page))
				1337	return page;
				1338	}
				1339	got_it:
				1340	if (new_i_size && i_size_read(inode) <
				1341	((loff_t)(index + 1) << PAGE_SHIFT))
				1342	f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
				1343	return page;
				1344	}
				1345
				1346	static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
				1347	{
				1348	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
				1349	struct f2fs_summary sum;
				1350	struct node_info ni;
				1351	block_t old_blkaddr;
				1352	blkcnt_t count = 1;
				1353	int err;
				1354
				1355	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
				1356	return -EPERM;
				1357
				1358	err = f2fs_get_node_info(sbi, dn->nid, &ni);
				1359	if (err)
				1360	return err;
				1361
				1362	dn->data_blkaddr = f2fs_data_blkaddr(dn);
				1363	if (dn->data_blkaddr != NULL_ADDR)
				1364	goto alloc;
				1365
				1366	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
				1367	return err;
				1368
				1369	alloc:
				1370	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
				1371	old_blkaddr = dn->data_blkaddr;
				1372	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
				1373	&sum, seg_type, NULL);
				1374	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
				1375	invalidate_mapping_pages(META_MAPPING(sbi),
				1376	old_blkaddr, old_blkaddr);
				1377	f2fs_invalidate_compress_page(sbi, old_blkaddr);
				1378	}
				1379	f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
				1380
				1381	/*
				1382	* i_size will be updated by direct_IO. Otherwise, we'll get stale
				1383	* data from unwritten block via dio_read.
				1384	*/
				1385	return 0;
				1386	}
				1387
				1388	int f2fs_preallocate_blocks(struct kiocb iocb, struct iov_iter from)
				1389	{
				1390	struct inode *inode = file_inode(iocb->ki_filp);
				1391	struct f2fs_map_blocks map;
				1392	int flag;
				1393	int err = 0;
				1394	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
				1395
				1396	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
				1397	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
				1398	if (map.m_len > map.m_lblk)
				1399	map.m_len -= map.m_lblk;
				1400	else
				1401	map.m_len = 0;
				1402
				1403	map.m_next_pgofs = NULL;
				1404	map.m_next_extent = NULL;
				1405	map.m_seg_type = NO_CHECK_TYPE;
				1406	map.m_may_create = true;
				1407
				1408	if (direct_io) {
				1409	map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
				1410	flag = f2fs_force_buffered_io(inode, iocb, from) ?
				1411	F2FS_GET_BLOCK_PRE_AIO :
				1412	F2FS_GET_BLOCK_PRE_DIO;
				1413	goto map_blocks;
				1414	}
				1415	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
				1416	err = f2fs_convert_inline_inode(inode);
				1417	if (err)
				1418	return err;
				1419	}
				1420	if (f2fs_has_inline_data(inode))
				1421	return err;
				1422
				1423	flag = F2FS_GET_BLOCK_PRE_AIO;
				1424
				1425	map_blocks:
				1426	err = f2fs_map_blocks(inode, &map, 1, flag);
				1427	if (map.m_len > 0 && err == -ENOSPC) {
				1428	if (!direct_io)
				1429	set_inode_flag(inode, FI_NO_PREALLOC);
				1430	err = 0;
				1431	}
				1432	return err;
				1433	}
				1434
				1435	void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
				1436	{
				1437	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
				1438	if (lock)
				1439	down_read(&sbi->node_change);
				1440	else
				1441	up_read(&sbi->node_change);
				1442	} else {
				1443	if (lock)
				1444	f2fs_lock_op(sbi);
				1445	else
				1446	f2fs_unlock_op(sbi);
				1447	}
				1448	}
				1449
				1450	/*
				1451	* f2fs_map_blocks() tries to find or build mapping relationship which
				1452	* maps continuous logical blocks to physical blocks, and return such
				1453	* info via f2fs_map_blocks structure.
				1454	*/
				1455	int f2fs_map_blocks(struct inode inode, struct f2fs_map_blocks map,
				1456	int create, int flag)
				1457	{
				1458	unsigned int maxblocks = map->m_len;
				1459	struct dnode_of_data dn;
				1460	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				1461	int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
				1462	pgoff_t pgofs, end_offset, end;
				1463	int err = 0, ofs = 1;
				1464	unsigned int ofs_in_node, last_ofs_in_node;
				1465	blkcnt_t prealloc;
				1466	struct extent_info ei = {0,0,0};
				1467	block_t blkaddr;
				1468	unsigned int start_pgofs;
				1469
				1470	if (!maxblocks)
				1471	return 0;
				1472
				1473	map->m_len = 0;
				1474	map->m_flags = 0;
				1475
				1476	/* it only supports block size == page size */
				1477	pgofs = (pgoff_t)map->m_lblk;
				1478	end = pgofs + maxblocks;
				1479
				1480	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
				1481	if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
				1482	map->m_may_create)
				1483	goto next_dnode;
				1484
				1485	map->m_pblk = ei.blk + pgofs - ei.fofs;
				1486	map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
				1487	map->m_flags = F2FS_MAP_MAPPED;
				1488	if (map->m_next_extent)
				1489	*map->m_next_extent = pgofs + map->m_len;
				1490
				1491	/* for hardware encryption, but to avoid potential issue in future */
				1492	if (flag == F2FS_GET_BLOCK_DIO)
				1493	f2fs_wait_on_block_writeback_range(inode,
				1494	map->m_pblk, map->m_len);
				1495	goto out;
				1496	}
				1497
				1498	next_dnode:
				1499	if (map->m_may_create)
				1500	f2fs_do_map_lock(sbi, flag, true);
				1501
				1502	/* When reading holes, we need its node page */
				1503	set_new_dnode(&dn, inode, NULL, NULL, 0);
				1504	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
				1505	if (err) {
				1506	if (flag == F2FS_GET_BLOCK_BMAP)
				1507	map->m_pblk = 0;
				1508
				1509	if (err == -ENOENT) {
				1510	/*
				1511	* There is one exceptional case that read_node_page()
				1512	* may return -ENOENT due to filesystem has been
				1513	* shutdown or cp_error, so force to convert error
				1514	* number to EIO for such case.
				1515	*/
				1516	if (map->m_may_create &&
				1517	(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) \|\|
				1518	f2fs_cp_error(sbi))) {
				1519	err = -EIO;
				1520	goto unlock_out;
				1521	}
				1522
				1523	err = 0;
				1524	if (map->m_next_pgofs)
				1525	*map->m_next_pgofs =
				1526	f2fs_get_next_page_offset(&dn, pgofs);
				1527	if (map->m_next_extent)
				1528	*map->m_next_extent =
				1529	f2fs_get_next_page_offset(&dn, pgofs);
				1530	}
				1531	goto unlock_out;
				1532	}
				1533
				1534	start_pgofs = pgofs;
				1535	prealloc = 0;
				1536	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
				1537	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
				1538
				1539	next_block:
				1540	blkaddr = f2fs_data_blkaddr(&dn);
				1541
				1542	if (__is_valid_data_blkaddr(blkaddr) &&
				1543	!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
				1544	err = -EFSCORRUPTED;
				1545	goto sync_out;
				1546	}
				1547
				1548	if (__is_valid_data_blkaddr(blkaddr)) {
				1549	/* use out-place-update for driect IO under LFS mode */
				1550	if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
				1551	map->m_may_create) {
				1552	err = __allocate_data_block(&dn, map->m_seg_type);
				1553	if (err)
				1554	goto sync_out;
				1555	blkaddr = dn.data_blkaddr;
				1556	set_inode_flag(inode, FI_APPEND_WRITE);
				1557	}
				1558	} else {
				1559	if (create) {
				1560	if (unlikely(f2fs_cp_error(sbi))) {
				1561	err = -EIO;
				1562	goto sync_out;
				1563	}
				1564	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
				1565	if (blkaddr == NULL_ADDR) {
				1566	prealloc++;
				1567	last_ofs_in_node = dn.ofs_in_node;
				1568	}
				1569	} else {
				1570	WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
				1571	flag != F2FS_GET_BLOCK_DIO);
				1572	err = __allocate_data_block(&dn,
				1573	map->m_seg_type);
				1574	if (!err)
				1575	set_inode_flag(inode, FI_APPEND_WRITE);
				1576	}
				1577	if (err)
				1578	goto sync_out;
				1579	map->m_flags \|= F2FS_MAP_NEW;
				1580	blkaddr = dn.data_blkaddr;
				1581	} else {
				1582	if (flag == F2FS_GET_BLOCK_BMAP) {
				1583	map->m_pblk = 0;
				1584	goto sync_out;
				1585	}
				1586	if (flag == F2FS_GET_BLOCK_PRECACHE)
				1587	goto sync_out;
				1588	if (flag == F2FS_GET_BLOCK_FIEMAP &&
				1589	blkaddr == NULL_ADDR) {
				1590	if (map->m_next_pgofs)
				1591	*map->m_next_pgofs = pgofs + 1;
				1592	goto sync_out;
				1593	}
				1594	if (flag != F2FS_GET_BLOCK_FIEMAP) {
				1595	/* for defragment case */
				1596	if (map->m_next_pgofs)
				1597	*map->m_next_pgofs = pgofs + 1;
				1598	goto sync_out;
				1599	}
				1600	}
				1601	}
				1602
				1603	if (flag == F2FS_GET_BLOCK_PRE_AIO)
				1604	goto skip;
				1605
				1606	if (map->m_len == 0) {
				1607	/* preallocated unwritten block should be mapped for fiemap. */
				1608	if (blkaddr == NEW_ADDR)
				1609	map->m_flags \|= F2FS_MAP_UNWRITTEN;
				1610	map->m_flags \|= F2FS_MAP_MAPPED;
				1611
				1612	map->m_pblk = blkaddr;
				1613	map->m_len = 1;
				1614	} else if ((map->m_pblk != NEW_ADDR &&
				1615	blkaddr == (map->m_pblk + ofs)) \|\|
				1616	(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) \|\|
				1617	flag == F2FS_GET_BLOCK_PRE_DIO) {
				1618	ofs++;
				1619	map->m_len++;
				1620	} else {
				1621	goto sync_out;
				1622	}
				1623
				1624	skip:
				1625	dn.ofs_in_node++;
				1626	pgofs++;
				1627
				1628	/* preallocate blocks in batch for one dnode page */
				1629	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
				1630	(pgofs == end \|\| dn.ofs_in_node == end_offset)) {
				1631
				1632	dn.ofs_in_node = ofs_in_node;
				1633	err = f2fs_reserve_new_blocks(&dn, prealloc);
				1634	if (err)
				1635	goto sync_out;
				1636
				1637	map->m_len += dn.ofs_in_node - ofs_in_node;
				1638	if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
				1639	err = -ENOSPC;
				1640	goto sync_out;
				1641	}
				1642	dn.ofs_in_node = end_offset;
				1643	}
				1644
				1645	if (pgofs >= end)
				1646	goto sync_out;
				1647	else if (dn.ofs_in_node < end_offset)
				1648	goto next_block;
				1649
				1650	if (flag == F2FS_GET_BLOCK_PRECACHE) {
				1651	if (map->m_flags & F2FS_MAP_MAPPED) {
				1652	unsigned int ofs = start_pgofs - map->m_lblk;
				1653
				1654	f2fs_update_extent_cache_range(&dn,
				1655	start_pgofs, map->m_pblk + ofs,
				1656	map->m_len - ofs);
				1657	}
				1658	}
				1659
				1660	f2fs_put_dnode(&dn);
				1661
				1662	if (map->m_may_create) {
				1663	f2fs_do_map_lock(sbi, flag, false);
				1664	f2fs_balance_fs(sbi, dn.node_changed);
				1665	}
				1666	goto next_dnode;
				1667
				1668	sync_out:
				1669
				1670	/* for hardware encryption, but to avoid potential issue in future */
				1671	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
				1672	f2fs_wait_on_block_writeback_range(inode,
				1673	map->m_pblk, map->m_len);
				1674
				1675	if (flag == F2FS_GET_BLOCK_PRECACHE) {
				1676	if (map->m_flags & F2FS_MAP_MAPPED) {
				1677	unsigned int ofs = start_pgofs - map->m_lblk;
				1678
				1679	f2fs_update_extent_cache_range(&dn,
				1680	start_pgofs, map->m_pblk + ofs,
				1681	map->m_len - ofs);
				1682	}
				1683	if (map->m_next_extent)
				1684	*map->m_next_extent = pgofs + 1;
				1685	}
				1686	f2fs_put_dnode(&dn);
				1687	unlock_out:
				1688	if (map->m_may_create) {
				1689	f2fs_do_map_lock(sbi, flag, false);
				1690	f2fs_balance_fs(sbi, dn.node_changed);
				1691	}
				1692	out:
				1693	trace_f2fs_map_blocks(inode, map, err);
				1694	return err;
				1695	}
				1696
				1697	bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
				1698	{
				1699	struct f2fs_map_blocks map;
				1700	block_t last_lblk;
				1701	int err;
				1702
				1703	if (pos + len > i_size_read(inode))
				1704	return false;
				1705
				1706	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
				1707	map.m_next_pgofs = NULL;
				1708	map.m_next_extent = NULL;
				1709	map.m_seg_type = NO_CHECK_TYPE;
				1710	map.m_may_create = false;
				1711	last_lblk = F2FS_BLK_ALIGN(pos + len);
				1712
				1713	while (map.m_lblk < last_lblk) {
				1714	map.m_len = last_lblk - map.m_lblk;
				1715	err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
				1716	if (err \|\| map.m_len == 0)
				1717	return false;
				1718	map.m_lblk += map.m_len;
				1719	}
				1720	return true;
				1721	}
				1722
				1723	static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
				1724	{
				1725	return (bytes >> inode->i_blkbits);
				1726	}
				1727
				1728	static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
				1729	{
				1730	return (blks << inode->i_blkbits);
				1731	}
				1732
				1733	static int __get_data_block(struct inode *inode, sector_t iblock,
				1734	struct buffer_head *bh, int create, int flag,
				1735	pgoff_t *next_pgofs, int seg_type, bool may_write)
				1736	{
				1737	struct f2fs_map_blocks map;
				1738	int err;
				1739
				1740	map.m_lblk = iblock;
				1741	map.m_len = bytes_to_blks(inode, bh->b_size);
				1742	map.m_next_pgofs = next_pgofs;
				1743	map.m_next_extent = NULL;
				1744	map.m_seg_type = seg_type;
				1745	map.m_may_create = may_write;
				1746
				1747	err = f2fs_map_blocks(inode, &map, create, flag);
				1748	if (!err) {
				1749	map_bh(bh, inode->i_sb, map.m_pblk);
				1750	bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) \| map.m_flags;
				1751	bh->b_size = blks_to_bytes(inode, map.m_len);
				1752	}
				1753	return err;
				1754	}
				1755
				1756	static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
				1757	struct buffer_head *bh_result, int create)
				1758	{
				1759	return __get_data_block(inode, iblock, bh_result, create,
				1760	F2FS_GET_BLOCK_DIO, NULL,
				1761	f2fs_rw_hint_to_seg_type(inode->i_write_hint),
				1762	true);
				1763	}
				1764
				1765	static int get_data_block_dio(struct inode *inode, sector_t iblock,
				1766	struct buffer_head *bh_result, int create)
				1767	{
				1768	return __get_data_block(inode, iblock, bh_result, create,
				1769	F2FS_GET_BLOCK_DIO, NULL,
				1770	f2fs_rw_hint_to_seg_type(inode->i_write_hint),
				1771	false);
				1772	}
				1773
				1774	static int f2fs_xattr_fiemap(struct inode *inode,
				1775	struct fiemap_extent_info *fieinfo)
				1776	{
				1777	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				1778	struct page *page;
				1779	struct node_info ni;
				1780	__u64 phys = 0, len;
				1781	__u32 flags;
				1782	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
				1783	int err = 0;
				1784
				1785	if (f2fs_has_inline_xattr(inode)) {
				1786	int offset;
				1787
				1788	page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
				1789	inode->i_ino, false);
				1790	if (!page)
				1791	return -ENOMEM;
				1792
				1793	err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
				1794	if (err) {
				1795	f2fs_put_page(page, 1);
				1796	return err;
				1797	}
				1798
				1799	phys = blks_to_bytes(inode, ni.blk_addr);
				1800	offset = offsetof(struct f2fs_inode, i_addr) +
				1801	sizeof(__le32) * (DEF_ADDRS_PER_INODE -
				1802	get_inline_xattr_addrs(inode));
				1803
				1804	phys += offset;
				1805	len = inline_xattr_size(inode);
				1806
				1807	f2fs_put_page(page, 1);
				1808
				1809	flags = FIEMAP_EXTENT_DATA_INLINE \| FIEMAP_EXTENT_NOT_ALIGNED;
				1810
				1811	if (!xnid)
				1812	flags \|= FIEMAP_EXTENT_LAST;
				1813
				1814	err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
				1815	trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
				1816	if (err \|\| err == 1)
				1817	return err;
				1818	}
				1819
				1820	if (xnid) {
				1821	page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
				1822	if (!page)
				1823	return -ENOMEM;
				1824
				1825	err = f2fs_get_node_info(sbi, xnid, &ni);
				1826	if (err) {
				1827	f2fs_put_page(page, 1);
				1828	return err;
				1829	}
				1830
				1831	phys = blks_to_bytes(inode, ni.blk_addr);
				1832	len = inode->i_sb->s_blocksize;
				1833
				1834	f2fs_put_page(page, 1);
				1835
				1836	flags = FIEMAP_EXTENT_LAST;
				1837	}
				1838
				1839	if (phys) {
				1840	err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
				1841	trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
				1842	}
				1843
				1844	return (err < 0 ? err : 0);
				1845	}
				1846
				1847	static loff_t max_inode_blocks(struct inode *inode)
				1848	{
				1849	loff_t result = ADDRS_PER_INODE(inode);
				1850	loff_t leaf_count = ADDRS_PER_BLOCK(inode);
				1851
				1852	/* two direct node blocks */
				1853	result += (leaf_count * 2);
				1854
				1855	/* two indirect node blocks */
				1856	leaf_count *= NIDS_PER_BLOCK;
				1857	result += (leaf_count * 2);
				1858
				1859	/* one double indirect node block */
				1860	leaf_count *= NIDS_PER_BLOCK;
				1861	result += leaf_count;
				1862
				1863	return result;
				1864	}
				1865
				1866	int f2fs_fiemap(struct inode inode, struct fiemap_extent_info fieinfo,
				1867	u64 start, u64 len)
				1868	{
				1869	struct f2fs_map_blocks map;
				1870	sector_t start_blk, last_blk;
				1871	pgoff_t next_pgofs;
				1872	u64 logical = 0, phys = 0, size = 0;
				1873	u32 flags = 0;
				1874	int ret = 0;
				1875	bool compr_cluster = false;
				1876	unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
				1877	loff_t maxbytes;
				1878
				1879	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
				1880	ret = f2fs_precache_extents(inode);
				1881	if (ret)
				1882	return ret;
				1883	}
				1884
				1885	ret = fiemap_prep(inode, fieinfo, start, &len,
				1886	FIEMAP_FLAG_SYNC \| FIEMAP_FLAG_XATTR);
				1887	if (ret)
				1888	return ret;
				1889
				1890	inode_lock(inode);
				1891
				1892	maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
				1893	if (start > maxbytes) {
				1894	ret = -EFBIG;
				1895	goto out;
				1896	}
				1897
				1898	if (len > maxbytes \|\| (maxbytes - len) < start)
				1899	len = maxbytes - start;
				1900
				1901	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
				1902	ret = f2fs_xattr_fiemap(inode, fieinfo);
				1903	goto out;
				1904	}
				1905
				1906	if (f2fs_has_inline_data(inode) \|\| f2fs_has_inline_dentry(inode)) {
				1907	ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
				1908	if (ret != -EAGAIN)
				1909	goto out;
				1910	}
				1911
				1912	if (bytes_to_blks(inode, len) == 0)
				1913	len = blks_to_bytes(inode, 1);
				1914
				1915	start_blk = bytes_to_blks(inode, start);
				1916	last_blk = bytes_to_blks(inode, start + len - 1);
				1917
				1918	next:
				1919	memset(&map, 0, sizeof(map));
				1920	map.m_lblk = start_blk;
				1921	map.m_len = bytes_to_blks(inode, len);
				1922	map.m_next_pgofs = &next_pgofs;
				1923	map.m_seg_type = NO_CHECK_TYPE;
				1924
				1925	if (compr_cluster)
				1926	map.m_len = cluster_size - 1;
				1927
				1928	ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
				1929	if (ret)
				1930	goto out;
				1931
				1932	/* HOLE */
				1933	if (!(map.m_flags & F2FS_MAP_FLAGS)) {
				1934	start_blk = next_pgofs;
				1935
				1936	if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
				1937	max_inode_blocks(inode)))
				1938	goto prep_next;
				1939
				1940	flags \|= FIEMAP_EXTENT_LAST;
				1941	}
				1942
				1943	if (size) {
				1944	flags \|= FIEMAP_EXTENT_MERGED;
				1945	if (IS_ENCRYPTED(inode))
				1946	flags \|= FIEMAP_EXTENT_DATA_ENCRYPTED;
				1947
				1948	ret = fiemap_fill_next_extent(fieinfo, logical,
				1949	phys, size, flags);
				1950	trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
				1951	if (ret)
				1952	goto out;
				1953	size = 0;
				1954	}
				1955
				1956	if (start_blk > last_blk)
				1957	goto out;
				1958
				1959	if (compr_cluster) {
				1960	compr_cluster = false;
				1961
				1962
				1963	logical = blks_to_bytes(inode, start_blk - 1);
				1964	phys = blks_to_bytes(inode, map.m_pblk);
				1965	size = blks_to_bytes(inode, cluster_size);
				1966
				1967	flags \|= FIEMAP_EXTENT_ENCODED;
				1968
				1969	start_blk += cluster_size - 1;
				1970
				1971	if (start_blk > last_blk)
				1972	goto out;
				1973
				1974	goto prep_next;
				1975	}
				1976
				1977	if (map.m_pblk == COMPRESS_ADDR) {
				1978	compr_cluster = true;
				1979	start_blk++;
				1980	goto prep_next;
				1981	}
				1982
				1983	logical = blks_to_bytes(inode, start_blk);
				1984	phys = blks_to_bytes(inode, map.m_pblk);
				1985	size = blks_to_bytes(inode, map.m_len);
				1986	flags = 0;
				1987	if (map.m_flags & F2FS_MAP_UNWRITTEN)
				1988	flags = FIEMAP_EXTENT_UNWRITTEN;
				1989
				1990	start_blk += bytes_to_blks(inode, size);
				1991
				1992	prep_next:
				1993	cond_resched();
				1994	if (fatal_signal_pending(current))
				1995	ret = -EINTR;
				1996	else
				1997	goto next;
				1998	out:
				1999	if (ret == 1)
				2000	ret = 0;
				2001
				2002	inode_unlock(inode);
				2003	return ret;
				2004	}
				2005
				2006	static inline loff_t f2fs_readpage_limit(struct inode *inode)
				2007	{
				2008	if (IS_ENABLED(CONFIG_FS_VERITY) &&
				2009	(IS_VERITY(inode) \|\| f2fs_verity_in_progress(inode)))
				2010	return inode->i_sb->s_maxbytes;
				2011
				2012	return i_size_read(inode);
				2013	}
				2014
				2015	static int f2fs_read_single_page(struct inode inode, struct page page,
				2016	unsigned nr_pages,
				2017	struct f2fs_map_blocks *map,
				2018	struct bio **bio_ret,
				2019	sector_t *last_block_in_bio,
				2020	bool is_readahead)
				2021	{
				2022	struct bio bio = bio_ret;
				2023	const unsigned blocksize = blks_to_bytes(inode, 1);
				2024	sector_t block_in_file;
				2025	sector_t last_block;
				2026	sector_t last_block_in_file;
				2027	sector_t block_nr;
				2028	int ret = 0;
				2029
				2030	block_in_file = (sector_t)page_index(page);
				2031	last_block = block_in_file + nr_pages;
				2032	last_block_in_file = bytes_to_blks(inode,
				2033	f2fs_readpage_limit(inode) + blocksize - 1);
				2034	if (last_block > last_block_in_file)
				2035	last_block = last_block_in_file;
				2036
				2037	/* just zeroing out page which is beyond EOF */
				2038	if (block_in_file >= last_block)
				2039	goto zero_out;
				2040	/*
				2041	* Map blocks using the previous result first.
				2042	*/
				2043	if ((map->m_flags & F2FS_MAP_MAPPED) &&
				2044	block_in_file > map->m_lblk &&
				2045	block_in_file < (map->m_lblk + map->m_len))
				2046	goto got_it;
				2047
				2048	/*
				2049	* Then do more f2fs_map_blocks() calls until we are
				2050	* done with this page.
				2051	*/
				2052	map->m_lblk = block_in_file;
				2053	map->m_len = last_block - block_in_file;
				2054
				2055	ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
				2056	if (ret)
				2057	goto out;
				2058	got_it:
				2059	if ((map->m_flags & F2FS_MAP_MAPPED)) {
				2060	block_nr = map->m_pblk + block_in_file - map->m_lblk;
				2061	SetPageMappedToDisk(page);
				2062
				2063	if (!PageUptodate(page) && (!PageSwapCache(page) &&
				2064	!cleancache_get_page(page))) {
				2065	SetPageUptodate(page);
				2066	goto confused;
				2067	}
				2068
				2069	if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
				2070	DATA_GENERIC_ENHANCE_READ)) {
				2071	ret = -EFSCORRUPTED;
				2072	goto out;
				2073	}
				2074	} else {
				2075	zero_out:
				2076	zero_user_segment(page, 0, PAGE_SIZE);
				2077	if (f2fs_need_verity(inode, page->index) &&
				2078	!fsverity_verify_page(page)) {
				2079	ret = -EIO;
				2080	goto out;
				2081	}
				2082	if (!PageUptodate(page))
				2083	SetPageUptodate(page);
				2084	unlock_page(page);
				2085	goto out;
				2086	}
				2087
				2088	/*
				2089	* This page will go to BIO. Do we need to send this
				2090	* BIO off first?
				2091	*/
				2092	if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
				2093	*last_block_in_bio, block_nr) \|\|
				2094	!f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
				2095	submit_and_realloc:
				2096	__submit_bio(F2FS_I_SB(inode), bio, DATA);
				2097	bio = NULL;
				2098	}
				2099	if (bio == NULL) {
				2100	bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
				2101	is_readahead ? REQ_RAHEAD : 0, page->index,
				2102	false);
				2103	if (IS_ERR(bio)) {
				2104	ret = PTR_ERR(bio);
				2105	bio = NULL;
				2106	goto out;
				2107	}
				2108	}
				2109
				2110	/*
				2111	* If the page is under writeback, we need to wait for
				2112	* its completion to see the correct decrypted data.
				2113	*/
				2114	f2fs_wait_on_block_writeback(inode, block_nr);
				2115
				2116	if (bio_add_page(bio, page, blocksize, 0) < blocksize)
				2117	goto submit_and_realloc;
				2118
				2119	inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
				2120	f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
				2121	ClearPageError(page);
				2122	*last_block_in_bio = block_nr;
				2123	goto out;
				2124	confused:
				2125	if (bio) {
				2126	__submit_bio(F2FS_I_SB(inode), bio, DATA);
				2127	bio = NULL;
				2128	}
				2129	unlock_page(page);
				2130	out:
				2131	*bio_ret = bio;
				2132	return ret;
				2133	}
				2134
				2135	#ifdef CONFIG_F2FS_FS_COMPRESSION
				2136	int f2fs_read_multi_pages(struct compress_ctx cc, struct bio *bio_ret,
				2137	unsigned nr_pages, sector_t *last_block_in_bio,
				2138	bool is_readahead, bool for_write)
				2139	{
				2140	struct dnode_of_data dn;
				2141	struct inode *inode = cc->inode;
				2142	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				2143	struct bio bio = bio_ret;
				2144	unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
				2145	sector_t last_block_in_file;
				2146	const unsigned blocksize = blks_to_bytes(inode, 1);
				2147	struct decompress_io_ctx *dic = NULL;
				2148	int i;
				2149	int ret = 0;
				2150
				2151	f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
				2152
				2153	last_block_in_file = bytes_to_blks(inode,
				2154	f2fs_readpage_limit(inode) + blocksize - 1);
				2155
				2156	/* get rid of pages beyond EOF */
				2157	for (i = 0; i < cc->cluster_size; i++) {
				2158	struct page *page = cc->rpages[i];
				2159
				2160	if (!page)
				2161	continue;
				2162	if ((sector_t)page->index >= last_block_in_file) {
				2163	zero_user_segment(page, 0, PAGE_SIZE);
				2164	if (!PageUptodate(page))
				2165	SetPageUptodate(page);
				2166	} else if (!PageUptodate(page)) {
				2167	continue;
				2168	}
				2169	unlock_page(page);
				2170	cc->rpages[i] = NULL;
				2171	cc->nr_rpages--;
				2172	}
				2173
				2174	/* we are done since all pages are beyond EOF */
				2175	if (f2fs_cluster_is_empty(cc))
				2176	goto out;
				2177
				2178	set_new_dnode(&dn, inode, NULL, NULL, 0);
				2179	ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
				2180	if (ret)
				2181	goto out;
				2182
				2183	f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
				2184
				2185	for (i = 1; i < cc->cluster_size; i++) {
				2186	block_t blkaddr;
				2187
				2188	blkaddr = data_blkaddr(dn.inode, dn.node_page,
				2189	dn.ofs_in_node + i);
				2190
				2191	if (!__is_valid_data_blkaddr(blkaddr))
				2192	break;
				2193
				2194	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
				2195	ret = -EFAULT;
				2196	goto out_put_dnode;
				2197	}
				2198	cc->nr_cpages++;
				2199	}
				2200
				2201	/* nothing to decompress */
				2202	if (cc->nr_cpages == 0) {
				2203	ret = 0;
				2204	goto out_put_dnode;
				2205	}
				2206
				2207	dic = f2fs_alloc_dic(cc);
				2208	if (IS_ERR(dic)) {
				2209	ret = PTR_ERR(dic);
				2210	goto out_put_dnode;
				2211	}
				2212
				2213	for (i = 0; i < cc->nr_cpages; i++) {
				2214	struct page *page = dic->cpages[i];
				2215	block_t blkaddr;
				2216	struct bio_post_read_ctx *ctx;
				2217
				2218	blkaddr = data_blkaddr(dn.inode, dn.node_page,
				2219	dn.ofs_in_node + i + 1);
				2220
				2221	f2fs_wait_on_block_writeback(inode, blkaddr);
				2222
				2223	if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
				2224	if (atomic_dec_and_test(&dic->remaining_pages))
				2225	f2fs_decompress_cluster(dic);
				2226	continue;
				2227	}
				2228
				2229	if (bio && (!page_is_mergeable(sbi, bio,
				2230	*last_block_in_bio, blkaddr) \|\|
				2231	!f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
				2232	submit_and_realloc:
				2233	__submit_bio(sbi, bio, DATA);
				2234	bio = NULL;
				2235	}
				2236
				2237	if (!bio) {
				2238	bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
				2239	is_readahead ? REQ_RAHEAD : 0,
				2240	page->index, for_write);
				2241	if (IS_ERR(bio)) {
				2242	ret = PTR_ERR(bio);
				2243	f2fs_decompress_end_io(dic, ret);
				2244	f2fs_put_dnode(&dn);
				2245	*bio_ret = NULL;
				2246	return ret;
				2247	}
				2248	}
				2249
				2250	if (bio_add_page(bio, page, blocksize, 0) < blocksize)
				2251	goto submit_and_realloc;
				2252
				2253	ctx = bio->bi_private;
				2254	ctx->enabled_steps \|= STEP_DECOMPRESS;
				2255	refcount_inc(&dic->refcnt);
				2256
				2257	inc_page_count(sbi, F2FS_RD_DATA);
				2258	f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
				2259	f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
				2260	ClearPageError(page);
				2261	*last_block_in_bio = blkaddr;
				2262	}
				2263
				2264	f2fs_put_dnode(&dn);
				2265
				2266	*bio_ret = bio;
				2267	return 0;
				2268
				2269	out_put_dnode:
				2270	f2fs_put_dnode(&dn);
				2271	out:
				2272	for (i = 0; i < cc->cluster_size; i++) {
				2273	if (cc->rpages[i]) {
				2274	ClearPageUptodate(cc->rpages[i]);
				2275	ClearPageError(cc->rpages[i]);
				2276	unlock_page(cc->rpages[i]);
				2277	}
				2278	}
				2279	*bio_ret = bio;
				2280	return ret;
				2281	}
				2282	#endif
				2283
				2284	/*
				2285	* This function was originally taken from fs/mpage.c, and customized for f2fs.
				2286	* Major change was from block_size == page_size in f2fs by default.
				2287	*
				2288	* Note that the aops->readpages() function is ONLY used for read-ahead. If
				2289	* this function ever deviates from doing just read-ahead, it should either
				2290	* use ->readpage() or do the necessary surgery to decouple ->readpages()
				2291	* from read-ahead.
				2292	*/
				2293	int f2fs_mpage_readpages(struct address_space *mapping,
				2294	struct list_head pages, struct page page,
				2295	unsigned nr_pages, bool is_readahead)
				2296	{
				2297	struct bio *bio = NULL;
				2298	sector_t last_block_in_bio = 0;
				2299	struct inode *inode = mapping->host;
				2300	struct f2fs_map_blocks map;
				2301	#ifdef CONFIG_F2FS_FS_COMPRESSION
				2302	struct compress_ctx cc = {
				2303	.inode = inode,
				2304	.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
				2305	.cluster_size = F2FS_I(inode)->i_cluster_size,
				2306	.cluster_idx = NULL_CLUSTER,
				2307	.rpages = NULL,
				2308	.cpages = NULL,
				2309	.nr_rpages = 0,
				2310	.nr_cpages = 0,
				2311	};
				2312	#endif
				2313	unsigned max_nr_pages = nr_pages;
				2314	int ret = 0;
				2315	bool drop_ra = false;
				2316
				2317	/* this is real from f2fs_merkle_tree_readahead() in old kernel only. */
				2318	if (!nr_pages)
				2319	return 0;
				2320
				2321	map.m_pblk = 0;
				2322	map.m_lblk = 0;
				2323	map.m_len = 0;
				2324	map.m_flags = 0;
				2325	map.m_next_pgofs = NULL;
				2326	map.m_next_extent = NULL;
				2327	map.m_seg_type = NO_CHECK_TYPE;
				2328	map.m_may_create = false;
				2329
				2330	/*
				2331	* Two readahead threads for same address range can cause race condition
				2332	* which fragments sequential read IOs. So let's avoid each other.
				2333	*/
				2334	if (pages && is_readahead) {
				2335	page = list_last_entry(pages, struct page, lru);
				2336	if (READ_ONCE(F2FS_I(inode)->ra_offset) == page_index(page))
				2337	drop_ra = true;
				2338	else
				2339	WRITE_ONCE(F2FS_I(inode)->ra_offset, page_index(page));
				2340	}
				2341
				2342	for (; nr_pages; nr_pages--) {
				2343	if (pages) {
				2344	page = list_last_entry(pages, struct page, lru);
				2345
				2346	prefetchw(&page->flags);
				2347	list_del(&page->lru);
				2348	if (drop_ra \|\| add_to_page_cache_lru(page, mapping,
				2349	page_index(page),
				2350	readahead_gfp_mask(mapping)))
				2351	goto next_page;
				2352	}
				2353
				2354	#ifdef CONFIG_F2FS_FS_COMPRESSION
				2355	if (f2fs_compressed_file(inode)) {
				2356	/* there are remained comressed pages, submit them */
				2357	if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
				2358	ret = f2fs_read_multi_pages(&cc, &bio,
				2359	max_nr_pages,
				2360	&last_block_in_bio,
				2361	is_readahead, false);
				2362	f2fs_destroy_compress_ctx(&cc, false);
				2363	if (ret)
				2364	goto set_error_page;
				2365	}
				2366	ret = f2fs_is_compressed_cluster(inode, page->index);
				2367	if (ret < 0)
				2368	goto set_error_page;
				2369	else if (!ret)
				2370	goto read_single_page;
				2371
				2372	ret = f2fs_init_compress_ctx(&cc);
				2373	if (ret)
				2374	goto set_error_page;
				2375
				2376	f2fs_compress_ctx_add_page(&cc, page);
				2377
				2378	goto next_page;
				2379	}
				2380	read_single_page:
				2381	#endif
				2382
				2383	ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
				2384	&bio, &last_block_in_bio, is_readahead);
				2385	if (ret) {
				2386	#ifdef CONFIG_F2FS_FS_COMPRESSION
				2387	set_error_page:
				2388	#endif
				2389	SetPageError(page);
				2390	zero_user_segment(page, 0, PAGE_SIZE);
				2391	unlock_page(page);
				2392	}
				2393	next_page:
				2394	if (pages)
				2395	put_page(page);
				2396
				2397	#ifdef CONFIG_F2FS_FS_COMPRESSION
				2398	if (f2fs_compressed_file(inode)) {
				2399	/* last page */
				2400	if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
				2401	ret = f2fs_read_multi_pages(&cc, &bio,
				2402	max_nr_pages,
				2403	&last_block_in_bio,
				2404	is_readahead, false);
				2405	f2fs_destroy_compress_ctx(&cc, false);
				2406	}
				2407	}
				2408	#endif
				2409	}
				2410	BUG_ON(pages && !list_empty(pages));
				2411	if (bio)
				2412	__submit_bio(F2FS_I_SB(inode), bio, DATA);
				2413
				2414	if (pages && is_readahead && !drop_ra)
				2415	WRITE_ONCE(F2FS_I(inode)->ra_offset, -1);
				2416	return pages ? 0 : ret;
				2417	}
				2418
				2419	static int f2fs_read_data_page(struct file file, struct page page)
				2420	{
				2421	struct inode *inode = page_file_mapping(page)->host;
				2422	int ret = -EAGAIN;
				2423
				2424	trace_f2fs_readpage(page, DATA);
				2425
				2426	if (!f2fs_is_compress_backend_ready(inode)) {
				2427	unlock_page(page);
				2428	return -EOPNOTSUPP;
				2429	}
				2430
				2431	/* If the file has inline data, try to read it directly */
				2432	if (f2fs_has_inline_data(inode))
				2433	ret = f2fs_read_inline_data(inode, page);
				2434	if (ret == -EAGAIN)
				2435	ret = f2fs_mpage_readpages(page_file_mapping(page),
				2436	NULL, page, 1, false);
				2437	return ret;
				2438	}
				2439
				2440	static int f2fs_read_data_pages(struct file *file,
				2441	struct address_space *mapping,
				2442	struct list_head *pages, unsigned nr_pages)
				2443	{
				2444	struct inode *inode = mapping->host;
				2445	struct page *page = list_last_entry(pages, struct page, lru);
				2446
				2447	trace_f2fs_readpages(inode, page, nr_pages);
				2448
				2449	if (!f2fs_is_compress_backend_ready(inode))
				2450	return 0;
				2451
				2452	/* If the file has inline data, skip readpages */
				2453	if (f2fs_has_inline_data(inode))
				2454	return 0;
				2455
				2456	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
				2457	}
				2458
				2459	int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
				2460	{
				2461	struct inode *inode = fio->page->mapping->host;
				2462	struct page mpage, page;
				2463	gfp_t gfp_flags = GFP_NOFS;
				2464
				2465	if (!f2fs_encrypted_file(inode))
				2466	return 0;
				2467
				2468	page = fio->compressed_page ? fio->compressed_page : fio->page;
				2469
				2470	/* wait for GCed page writeback via META_MAPPING */
				2471	f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
				2472
				2473	if (fscrypt_inode_uses_inline_crypto(inode))
				2474	return 0;
				2475
				2476	retry_encrypt:
				2477	fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
				2478	PAGE_SIZE, 0, gfp_flags);
				2479	if (IS_ERR(fio->encrypted_page)) {
				2480	/* flush pending IOs and wait for a while in the ENOMEM case */
				2481	if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
				2482	f2fs_flush_merged_writes(fio->sbi);
				2483	congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
				2484	gfp_flags \|= __GFP_NOFAIL;
				2485	goto retry_encrypt;
				2486	}
				2487	return PTR_ERR(fio->encrypted_page);
				2488	}
				2489
				2490	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
				2491	if (mpage) {
				2492	if (PageUptodate(mpage))
				2493	memcpy(page_address(mpage),
				2494	page_address(fio->encrypted_page), PAGE_SIZE);
				2495	f2fs_put_page(mpage, 1);
				2496	}
				2497	return 0;
				2498	}
				2499
				2500	static inline bool check_inplace_update_policy(struct inode *inode,
				2501	struct f2fs_io_info *fio)
				2502	{
				2503	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				2504	unsigned int policy = SM_I(sbi)->ipu_policy;
				2505
				2506	if (policy & (0x1 << F2FS_IPU_FORCE))
				2507	return true;
				2508	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
				2509	return true;
				2510	if (policy & (0x1 << F2FS_IPU_UTIL) &&
				2511	utilization(sbi) > SM_I(sbi)->min_ipu_util)
				2512	return true;
				2513	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
				2514	utilization(sbi) > SM_I(sbi)->min_ipu_util)
				2515	return true;
				2516
				2517	/*
				2518	* IPU for rewrite async pages
				2519	*/
				2520	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
				2521	fio && fio->op == REQ_OP_WRITE &&
				2522	!(fio->op_flags & REQ_SYNC) &&
				2523	!IS_ENCRYPTED(inode))
				2524	return true;
				2525
				2526	/* this is only set during fdatasync */
				2527	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
				2528	is_inode_flag_set(inode, FI_NEED_IPU))
				2529	return true;
				2530
				2531	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
				2532	!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
				2533	return true;
				2534
				2535	return false;
				2536	}
				2537
				2538	bool f2fs_should_update_inplace(struct inode inode, struct f2fs_io_info fio)
				2539	{
				2540	/* swap file is migrating in aligned write mode */
				2541	if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
				2542	return false;
				2543
				2544	if (f2fs_is_pinned_file(inode))
				2545	return true;
				2546
				2547	/* if this is cold file, we should overwrite to avoid fragmentation */
				2548	if (file_is_cold(inode))
				2549	return true;
				2550
				2551	return check_inplace_update_policy(inode, fio);
				2552	}
				2553
				2554	bool f2fs_should_update_outplace(struct inode inode, struct f2fs_io_info fio)
				2555	{
				2556	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				2557
				2558	if (f2fs_lfs_mode(sbi))
				2559	return true;
				2560	if (S_ISDIR(inode->i_mode))
				2561	return true;
				2562	if (IS_NOQUOTA(inode))
				2563	return true;
				2564	if (f2fs_is_atomic_file(inode))
				2565	return true;
				2566	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
				2567	return true;
				2568
				2569	/* swap file is migrating in aligned write mode */
				2570	if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
				2571	return true;
				2572
				2573	if (fio) {
				2574	if (page_private_gcing(fio->page))
				2575	return true;
				2576	if (page_private_dummy(fio->page))
				2577	return true;
				2578	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
				2579	f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
				2580	return true;
				2581	}
				2582	return false;
				2583	}
				2584
				2585	static inline bool need_inplace_update(struct f2fs_io_info *fio)
				2586	{
				2587	struct inode *inode = fio->page->mapping->host;
				2588
				2589	if (f2fs_should_update_outplace(inode, fio))
				2590	return false;
				2591
				2592	return f2fs_should_update_inplace(inode, fio);
				2593	}
				2594
				2595	int f2fs_do_write_data_page(struct f2fs_io_info *fio)
				2596	{
				2597	struct page *page = fio->page;
				2598	struct inode *inode = page->mapping->host;
				2599	struct dnode_of_data dn;
				2600	struct extent_info ei = {0,0,0};
				2601	struct node_info ni;
				2602	bool ipu_force = false;
				2603	int err = 0;
				2604
				2605	set_new_dnode(&dn, inode, NULL, NULL, 0);
				2606	if (need_inplace_update(fio) &&
				2607	f2fs_lookup_extent_cache(inode, page->index, &ei)) {
				2608	fio->old_blkaddr = ei.blk + page->index - ei.fofs;
				2609
				2610	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
				2611	DATA_GENERIC_ENHANCE))
				2612	return -EFSCORRUPTED;
				2613
				2614	ipu_force = true;
				2615	fio->need_lock = LOCK_DONE;
				2616	goto got_it;
				2617	}
				2618
				2619	/* Deadlock due to between page->lock and f2fs_lock_op */
				2620	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
				2621	return -EAGAIN;
				2622
				2623	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
				2624	if (err)
				2625	goto out;
				2626
				2627	fio->old_blkaddr = dn.data_blkaddr;
				2628
				2629	/* This page is already truncated */
				2630	if (fio->old_blkaddr == NULL_ADDR) {
				2631	ClearPageUptodate(page);
				2632	clear_page_private_gcing(page);
				2633	goto out_writepage;
				2634	}
				2635	got_it:
				2636	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
				2637	!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
				2638	DATA_GENERIC_ENHANCE)) {
				2639	err = -EFSCORRUPTED;
				2640	goto out_writepage;
				2641	}
				2642	/*
				2643	* If current allocation needs SSR,
				2644	* it had better in-place writes for updated data.
				2645	*/
				2646	if (ipu_force \|\|
				2647	(__is_valid_data_blkaddr(fio->old_blkaddr) &&
				2648	need_inplace_update(fio))) {
				2649	err = f2fs_encrypt_one_page(fio);
				2650	if (err)
				2651	goto out_writepage;
				2652
				2653	set_page_writeback(page);
				2654	ClearPageError(page);
				2655	f2fs_put_dnode(&dn);
				2656	if (fio->need_lock == LOCK_REQ)
				2657	f2fs_unlock_op(fio->sbi);
				2658	err = f2fs_inplace_write_data(fio);
				2659	if (err) {
				2660	if (fscrypt_inode_uses_fs_layer_crypto(inode))
				2661	fscrypt_finalize_bounce_page(&fio->encrypted_page);
				2662	if (PageWriteback(page))
				2663	end_page_writeback(page);
				2664	} else {
				2665	set_inode_flag(inode, FI_UPDATE_WRITE);
				2666	}
				2667	trace_f2fs_do_write_data_page(fio->page, IPU);
				2668	return err;
				2669	}
				2670
				2671	if (fio->need_lock == LOCK_RETRY) {
				2672	if (!f2fs_trylock_op(fio->sbi)) {
				2673	err = -EAGAIN;
				2674	goto out_writepage;
				2675	}
				2676	fio->need_lock = LOCK_REQ;
				2677	}
				2678
				2679	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
				2680	if (err)
				2681	goto out_writepage;
				2682
				2683	fio->version = ni.version;
				2684
				2685	err = f2fs_encrypt_one_page(fio);
				2686	if (err)
				2687	goto out_writepage;
				2688
				2689	set_page_writeback(page);
				2690	ClearPageError(page);
				2691
				2692	if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
				2693	f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
				2694
				2695	/* LFS mode write path */
				2696	f2fs_outplace_write_data(&dn, fio);
				2697	trace_f2fs_do_write_data_page(page, OPU);
				2698	set_inode_flag(inode, FI_APPEND_WRITE);
				2699	if (page->index == 0)
				2700	set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
				2701	out_writepage:
				2702	f2fs_put_dnode(&dn);
				2703	out:
				2704	if (fio->need_lock == LOCK_REQ)
				2705	f2fs_unlock_op(fio->sbi);
				2706	return err;
				2707	}
				2708
				2709	int f2fs_write_single_data_page(struct page page, int submitted,
				2710	struct bio **bio,
				2711	sector_t *last_block,
				2712	struct writeback_control *wbc,
				2713	enum iostat_type io_type,
				2714	int compr_blocks,
				2715	bool allow_balance)
				2716	{
				2717	struct inode *inode = page->mapping->host;
				2718	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				2719	loff_t i_size = i_size_read(inode);
				2720	const pgoff_t end_index = ((unsigned long long)i_size)
				2721	>> PAGE_SHIFT;
				2722	loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
				2723	unsigned offset = 0;
				2724	bool need_balance_fs = false;
				2725	int err = 0;
				2726	struct f2fs_io_info fio = {
				2727	.sbi = sbi,
				2728	.ino = inode->i_ino,
				2729	.type = DATA,
				2730	.op = REQ_OP_WRITE,
				2731	.op_flags = wbc_to_write_flags(wbc),
				2732	.old_blkaddr = NULL_ADDR,
				2733	.page = page,
				2734	.encrypted_page = NULL,
				2735	.submitted = false,
				2736	.compr_blocks = compr_blocks,
				2737	.need_lock = LOCK_RETRY,
				2738	.io_type = io_type,
				2739	.io_wbc = wbc,
				2740	.bio = bio,
				2741	.last_block = last_block,
				2742	};
				2743
				2744	trace_f2fs_writepage(page, DATA);
				2745
				2746	/* we should bypass data pages to proceed the kworkder jobs */
				2747	if (unlikely(f2fs_cp_error(sbi))) {
				2748	mapping_set_error(page->mapping, -EIO);
				2749	/*
				2750	* don't drop any dirty dentry pages for keeping lastest
				2751	* directory structure.
				2752	*/
				2753	if (S_ISDIR(inode->i_mode) &&
				2754	!is_sbi_flag_set(sbi, SBI_IS_CLOSE))
				2755	goto redirty_out;
				2756	goto out;
				2757	}
				2758
				2759	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
				2760	goto redirty_out;
				2761
				2762	if (page->index < end_index \|\|
				2763	f2fs_verity_in_progress(inode) \|\|
				2764	compr_blocks)
				2765	goto write;
				2766
				2767	/*
				2768	* If the offset is out-of-range of file size,
				2769	* this page does not have to be written to disk.
				2770	*/
				2771	offset = i_size & (PAGE_SIZE - 1);
				2772	if ((page->index >= end_index + 1) \|\| !offset)
				2773	goto out;
				2774
				2775	zero_user_segment(page, offset, PAGE_SIZE);
				2776	write:
				2777	if (f2fs_is_drop_cache(inode))
				2778	goto out;
				2779	/* we should not write 0'th page having journal header */
				2780	if (f2fs_is_volatile_file(inode) && (!page->index \|\|
				2781	(!wbc->for_reclaim &&
				2782	f2fs_available_free_memory(sbi, BASE_CHECK))))
				2783	goto redirty_out;
				2784
				2785	/* Dentry/quota blocks are controlled by checkpoint */
				2786	if (S_ISDIR(inode->i_mode) \|\| IS_NOQUOTA(inode)) {
				2787	/*
				2788	* We need to wait for node_write to avoid block allocation during
				2789	* checkpoint. This can only happen to quota writes which can cause
				2790	* the below discard race condition.
				2791	*/
				2792	if (IS_NOQUOTA(inode))
				2793	down_read(&sbi->node_write);
				2794
				2795	fio.need_lock = LOCK_DONE;
				2796	err = f2fs_do_write_data_page(&fio);
				2797
				2798	if (IS_NOQUOTA(inode))
				2799	up_read(&sbi->node_write);
				2800
				2801	goto done;
				2802	}
				2803
				2804	if (!wbc->for_reclaim)
				2805	need_balance_fs = true;
				2806	else if (has_not_enough_free_secs(sbi, 0, 0))
				2807	goto redirty_out;
				2808	else
				2809	set_inode_flag(inode, FI_HOT_DATA);
				2810
				2811	err = -EAGAIN;
				2812	if (f2fs_has_inline_data(inode)) {
				2813	err = f2fs_write_inline_data(inode, page);
				2814	if (!err)
				2815	goto out;
				2816	}
				2817
				2818	if (err == -EAGAIN) {
				2819	err = f2fs_do_write_data_page(&fio);
				2820	if (err == -EAGAIN) {
				2821	fio.need_lock = LOCK_REQ;
				2822	err = f2fs_do_write_data_page(&fio);
				2823	}
				2824	}
				2825
				2826	if (err) {
				2827	file_set_keep_isize(inode);
				2828	} else {
				2829	spin_lock(&F2FS_I(inode)->i_size_lock);
				2830	if (F2FS_I(inode)->last_disk_size < psize)
				2831	F2FS_I(inode)->last_disk_size = psize;
				2832	spin_unlock(&F2FS_I(inode)->i_size_lock);
				2833	}
				2834
				2835	done:
				2836	if (err && err != -ENOENT)
				2837	goto redirty_out;
				2838
				2839	out:
				2840	inode_dec_dirty_pages(inode);
				2841	if (err) {
				2842	ClearPageUptodate(page);
				2843	clear_page_private_gcing(page);
				2844	}
				2845
				2846	if (wbc->for_reclaim) {
				2847	f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
				2848	clear_inode_flag(inode, FI_HOT_DATA);
				2849	f2fs_remove_dirty_inode(inode);
				2850	submitted = NULL;
				2851	}
				2852	unlock_page(page);
				2853	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
				2854	!F2FS_I(inode)->cp_task && allow_balance)
				2855	f2fs_balance_fs(sbi, need_balance_fs);
				2856
				2857	if (unlikely(f2fs_cp_error(sbi))) {
				2858	f2fs_submit_merged_write(sbi, DATA);
				2859	f2fs_submit_merged_ipu_write(sbi, bio, NULL);
				2860	submitted = NULL;
				2861	}
				2862
				2863	if (submitted)
				2864	*submitted = fio.submitted ? 1 : 0;
				2865
				2866	return 0;
				2867
				2868	redirty_out:
				2869	redirty_page_for_writepage(wbc, page);
				2870	/*
				2871	* pageout() in MM traslates EAGAIN, so calls handle_write_error()
				2872	* -> mapping_set_error() -> set_bit(AS_EIO, ...).
				2873	* file_write_and_wait_range() will see EIO error, which is critical
				2874	* to return value of fsync() followed by atomic_write failure to user.
				2875	*/
				2876	if (!err \|\| wbc->for_reclaim)
				2877	return AOP_WRITEPAGE_ACTIVATE;
				2878	unlock_page(page);
				2879	return err;
				2880	}
				2881
				2882	static int f2fs_write_data_page(struct page *page,
				2883	struct writeback_control *wbc)
				2884	{
				2885	#ifdef CONFIG_F2FS_FS_COMPRESSION
				2886	struct inode *inode = page->mapping->host;
				2887
				2888	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
				2889	goto out;
				2890
				2891	if (f2fs_compressed_file(inode)) {
				2892	if (f2fs_is_compressed_cluster(inode, page->index)) {
				2893	redirty_page_for_writepage(wbc, page);
				2894	return AOP_WRITEPAGE_ACTIVATE;
				2895	}
				2896	}
				2897	out:
				2898	#endif
				2899
				2900	return f2fs_write_single_data_page(page, NULL, NULL, NULL,
				2901	wbc, FS_DATA_IO, 0, true);
				2902	}
				2903
				2904	/*
				2905	* This function was copied from write_cche_pages from mm/page-writeback.c.
				2906	* The major change is making write step of cold data page separately from
				2907	* warm/hot data page.
				2908	*/
				2909	static int f2fs_write_cache_pages(struct address_space *mapping,
				2910	struct writeback_control *wbc,
				2911	enum iostat_type io_type)
				2912	{
				2913	int ret = 0;
				2914	int done = 0, retry = 0;
				2915	struct pagevec pvec;
				2916	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
				2917	struct bio *bio = NULL;
				2918	sector_t last_block;
				2919	#ifdef CONFIG_F2FS_FS_COMPRESSION
				2920	struct inode *inode = mapping->host;
				2921	struct compress_ctx cc = {
				2922	.inode = inode,
				2923	.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
				2924	.cluster_size = F2FS_I(inode)->i_cluster_size,
				2925	.cluster_idx = NULL_CLUSTER,
				2926	.rpages = NULL,
				2927	.nr_rpages = 0,
				2928	.cpages = NULL,
				2929	.rbuf = NULL,
				2930	.cbuf = NULL,
				2931	.rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
				2932	.private = NULL,
				2933	};
				2934	#endif
				2935	int nr_pages;
				2936	pgoff_t uninitialized_var(writeback_index);
				2937	pgoff_t index;
				2938	pgoff_t end; /* Inclusive */
				2939	pgoff_t done_index;
				2940	int range_whole = 0;
				2941	xa_mark_t tag;
				2942	int nwritten = 0;
				2943	int submitted = 0;
				2944	int i;
				2945
				2946	pagevec_init(&pvec);
				2947
				2948	if (get_dirty_pages(mapping->host) <=
				2949	SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
				2950	set_inode_flag(mapping->host, FI_HOT_DATA);
				2951	else
				2952	clear_inode_flag(mapping->host, FI_HOT_DATA);
				2953
				2954	if (wbc->range_cyclic) {
				2955	writeback_index = mapping->writeback_index; /* prev offset */
				2956	index = writeback_index;
				2957	end = -1;
				2958	} else {
				2959	index = wbc->range_start >> PAGE_SHIFT;
				2960	end = wbc->range_end >> PAGE_SHIFT;
				2961	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
				2962	range_whole = 1;
				2963	}
				2964	if (wbc->sync_mode == WB_SYNC_ALL \|\| wbc->tagged_writepages)
				2965	tag = PAGECACHE_TAG_TOWRITE;
				2966	else
				2967	tag = PAGECACHE_TAG_DIRTY;
				2968	retry:
				2969	retry = 0;
				2970	if (wbc->sync_mode == WB_SYNC_ALL \|\| wbc->tagged_writepages)
				2971	tag_pages_for_writeback(mapping, index, end);
				2972	done_index = index;
				2973	while (!done && !retry && (index <= end)) {
				2974	nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
				2975	tag);
				2976	if (nr_pages == 0)
				2977	break;
				2978
				2979	for (i = 0; i < nr_pages; i++) {
				2980	struct page *page = pvec.pages[i];
				2981	bool need_readd;
				2982	readd:
				2983	need_readd = false;
				2984	#ifdef CONFIG_F2FS_FS_COMPRESSION
				2985	if (f2fs_compressed_file(inode)) {
				2986	ret = f2fs_init_compress_ctx(&cc);
				2987	if (ret) {
				2988	done = 1;
				2989	break;
				2990	}
				2991
				2992	if (!f2fs_cluster_can_merge_page(&cc,
				2993	page->index)) {
				2994	ret = f2fs_write_multi_pages(&cc,
				2995	&submitted, wbc, io_type);
				2996	if (!ret)
				2997	need_readd = true;
				2998	goto result;
				2999	}
				3000
				3001	if (unlikely(f2fs_cp_error(sbi)))
				3002	goto lock_page;
				3003
				3004	if (f2fs_cluster_is_empty(&cc)) {
				3005	void *fsdata = NULL;
				3006	struct page *pagep;
				3007	int ret2;
				3008
				3009	ret2 = f2fs_prepare_compress_overwrite(
				3010	inode, &pagep,
				3011	page->index, &fsdata);
				3012	if (ret2 < 0) {
				3013	ret = ret2;
				3014	done = 1;
				3015	break;
				3016	} else if (ret2 &&
				3017	!f2fs_compress_write_end(inode,
				3018	fsdata, page->index,
				3019	1)) {
				3020	retry = 1;
				3021	break;
				3022	}
				3023	} else {
				3024	goto lock_page;
				3025	}
				3026	}
				3027	#endif
				3028	/* give a priority to WB_SYNC threads */
				3029	if (atomic_read(&sbi->wb_sync_req[DATA]) &&
				3030	wbc->sync_mode == WB_SYNC_NONE) {
				3031	done = 1;
				3032	break;
				3033	}
				3034	#ifdef CONFIG_F2FS_FS_COMPRESSION
				3035	lock_page:
				3036	#endif
				3037	done_index = page->index;
				3038	retry_write:
				3039	lock_page(page);
				3040
				3041	if (unlikely(page->mapping != mapping)) {
				3042	continue_unlock:
				3043	unlock_page(page);
				3044	continue;
				3045	}
				3046
				3047	if (!PageDirty(page)) {
				3048	/* someone wrote it for us */
				3049	goto continue_unlock;
				3050	}
				3051
				3052	if (PageWriteback(page)) {
				3053	if (wbc->sync_mode != WB_SYNC_NONE)
				3054	f2fs_wait_on_page_writeback(page,
				3055	DATA, true, true);
				3056	else
				3057	goto continue_unlock;
				3058	}
				3059
				3060	if (!clear_page_dirty_for_io(page))
				3061	goto continue_unlock;
				3062
				3063	#ifdef CONFIG_F2FS_FS_COMPRESSION
				3064	if (f2fs_compressed_file(inode)) {
				3065	get_page(page);
				3066	f2fs_compress_ctx_add_page(&cc, page);
				3067	continue;
				3068	}
				3069	#endif
				3070	ret = f2fs_write_single_data_page(page, &submitted,
				3071	&bio, &last_block, wbc, io_type,
				3072	0, true);
				3073	if (ret == AOP_WRITEPAGE_ACTIVATE)
				3074	unlock_page(page);
				3075	#ifdef CONFIG_F2FS_FS_COMPRESSION
				3076	result:
				3077	#endif
				3078	nwritten += submitted;
				3079	wbc->nr_to_write -= submitted;
				3080
				3081	if (unlikely(ret)) {
				3082	/*
				3083	* keep nr_to_write, since vfs uses this to
				3084	* get # of written pages.
				3085	*/
				3086	if (ret == AOP_WRITEPAGE_ACTIVATE) {
				3087	ret = 0;
				3088	goto next;
				3089	} else if (ret == -EAGAIN) {
				3090	ret = 0;
				3091	if (wbc->sync_mode == WB_SYNC_ALL) {
				3092	cond_resched();
				3093	congestion_wait(BLK_RW_ASYNC,
				3094	DEFAULT_IO_TIMEOUT);
				3095	goto retry_write;
				3096	}
				3097	goto next;
				3098	}
				3099	done_index = page->index + 1;
				3100	done = 1;
				3101	break;
				3102	}
				3103
				3104	if (wbc->nr_to_write <= 0 &&
				3105	wbc->sync_mode == WB_SYNC_NONE) {
				3106	done = 1;
				3107	break;
				3108	}
				3109	next:
				3110	if (need_readd)
				3111	goto readd;
				3112	}
				3113	pagevec_release(&pvec);
				3114	cond_resched();
				3115	}
				3116	#ifdef CONFIG_F2FS_FS_COMPRESSION
				3117	/* flush remained pages in compress cluster */
				3118	if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
				3119	ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
				3120	nwritten += submitted;
				3121	wbc->nr_to_write -= submitted;
				3122	if (ret) {
				3123	done = 1;
				3124	retry = 0;
				3125	}
				3126	}
				3127	if (f2fs_compressed_file(inode))
				3128	f2fs_destroy_compress_ctx(&cc, false);
				3129	#endif
				3130	if (retry) {
				3131	index = 0;
				3132	end = -1;
				3133	goto retry;
				3134	}
				3135	if (wbc->range_cyclic && !done)
				3136	done_index = 0;
				3137	if (wbc->range_cyclic \|\| (range_whole && wbc->nr_to_write > 0))
				3138	mapping->writeback_index = done_index;
				3139
				3140	if (nwritten)
				3141	f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
				3142	NULL, 0, DATA);
				3143	/* submit cached bio of IPU write */
				3144	if (bio)
				3145	f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
				3146
				3147	return ret;
				3148	}
				3149
				3150	static inline bool __should_serialize_io(struct inode *inode,
				3151	struct writeback_control *wbc)
				3152	{
				3153	/* to avoid deadlock in path of data flush */
				3154	if (F2FS_I(inode)->cp_task)
				3155	return false;
				3156
				3157	if (!S_ISREG(inode->i_mode))
				3158	return false;
				3159	if (IS_NOQUOTA(inode))
				3160	return false;
				3161
				3162	if (f2fs_need_compress_data(inode))
				3163	return true;
				3164	if (wbc->sync_mode != WB_SYNC_ALL)
				3165	return true;
				3166	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
				3167	return true;
				3168	return false;
				3169	}
				3170
				3171	static int __f2fs_write_data_pages(struct address_space *mapping,
				3172	struct writeback_control *wbc,
				3173	enum iostat_type io_type)
				3174	{
				3175	struct inode *inode = mapping->host;
				3176	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				3177	struct blk_plug plug;
				3178	int ret;
				3179	bool locked = false;
				3180
				3181	/* deal with chardevs and other special file */
				3182	if (!mapping->a_ops->writepage)
				3183	return 0;
				3184
				3185	/* skip writing if there is no dirty page in this inode */
				3186	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
				3187	return 0;
				3188
				3189	/* during POR, we don't need to trigger writepage at all. */
				3190	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
				3191	goto skip_write;
				3192
				3193	if ((S_ISDIR(inode->i_mode) \|\| IS_NOQUOTA(inode)) &&
				3194	wbc->sync_mode == WB_SYNC_NONE &&
				3195	get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
				3196	f2fs_available_free_memory(sbi, DIRTY_DENTS))
				3197	goto skip_write;
				3198
				3199	/* skip writing during file defragment */
				3200	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
				3201	goto skip_write;
				3202
				3203	trace_f2fs_writepages(mapping->host, wbc, DATA);
				3204
				3205	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
				3206	if (wbc->sync_mode == WB_SYNC_ALL)
				3207	atomic_inc(&sbi->wb_sync_req[DATA]);
				3208	else if (atomic_read(&sbi->wb_sync_req[DATA])) {
				3209	/* to avoid potential deadlock */
				3210	if (current->plug)
				3211	blk_finish_plug(current->plug);
				3212	goto skip_write;
				3213	}
				3214
				3215	if (__should_serialize_io(inode, wbc)) {
				3216	mutex_lock(&sbi->writepages);
				3217	locked = true;
				3218	}
				3219
				3220	blk_start_plug(&plug);
				3221	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
				3222	blk_finish_plug(&plug);
				3223
				3224	if (locked)
				3225	mutex_unlock(&sbi->writepages);
				3226
				3227	if (wbc->sync_mode == WB_SYNC_ALL)
				3228	atomic_dec(&sbi->wb_sync_req[DATA]);
				3229	/*
				3230	* if some pages were truncated, we cannot guarantee its mapping->host
				3231	* to detect pending bios.
				3232	*/
				3233
				3234	f2fs_remove_dirty_inode(inode);
				3235	return ret;
				3236
				3237	skip_write:
				3238	wbc->pages_skipped += get_dirty_pages(inode);
				3239	trace_f2fs_writepages(mapping->host, wbc, DATA);
				3240	return 0;
				3241	}
				3242
				3243	static int f2fs_write_data_pages(struct address_space *mapping,
				3244	struct writeback_control *wbc)
				3245	{
				3246	struct inode *inode = mapping->host;
				3247
				3248	return __f2fs_write_data_pages(mapping, wbc,
				3249	F2FS_I(inode)->cp_task == current ?
				3250	FS_CP_DATA_IO : FS_DATA_IO);
				3251	}
				3252
				3253	static void f2fs_write_failed(struct address_space *mapping, loff_t to)
				3254	{
				3255	struct inode *inode = mapping->host;
				3256	loff_t i_size = i_size_read(inode);
				3257
				3258	if (IS_NOQUOTA(inode))
				3259	return;
				3260
				3261	/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
				3262	if (to > i_size && !f2fs_verity_in_progress(inode)) {
				3263	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
				3264	down_write(&F2FS_I(inode)->i_mmap_sem);
				3265
				3266	truncate_pagecache(inode, i_size);
				3267	f2fs_truncate_blocks(inode, i_size, true);
				3268
				3269	up_write(&F2FS_I(inode)->i_mmap_sem);
				3270	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
				3271	}
				3272	}
				3273
				3274	static int prepare_write_begin(struct f2fs_sb_info *sbi,
				3275	struct page *page, loff_t pos, unsigned len,
				3276	block_t blk_addr, bool node_changed)
				3277	{
				3278	struct inode *inode = page->mapping->host;
				3279	pgoff_t index = page->index;
				3280	struct dnode_of_data dn;
				3281	struct page *ipage;
				3282	bool locked = false;
				3283	struct extent_info ei = {0,0,0};
				3284	int err = 0;
				3285	int flag;
				3286
				3287	/*
				3288	* we already allocated all the blocks, so we don't need to get
				3289	* the block addresses when there is no need to fill the page.
				3290	*/
				3291	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
				3292	!is_inode_flag_set(inode, FI_NO_PREALLOC) &&
				3293	!f2fs_verity_in_progress(inode))
				3294	return 0;
				3295
				3296	/* f2fs_lock_op avoids race between write CP and convert_inline_page */
				3297	if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
				3298	flag = F2FS_GET_BLOCK_DEFAULT;
				3299	else
				3300	flag = F2FS_GET_BLOCK_PRE_AIO;
				3301
				3302	if (f2fs_has_inline_data(inode) \|\|
				3303	(pos & PAGE_MASK) >= i_size_read(inode)) {
				3304	f2fs_do_map_lock(sbi, flag, true);
				3305	locked = true;
				3306	}
				3307
				3308	restart:
				3309	/* check inline_data */
				3310	ipage = f2fs_get_node_page(sbi, inode->i_ino);
				3311	if (IS_ERR(ipage)) {
				3312	err = PTR_ERR(ipage);
				3313	goto unlock_out;
				3314	}
				3315
				3316	set_new_dnode(&dn, inode, ipage, ipage, 0);
				3317
				3318	if (f2fs_has_inline_data(inode)) {
				3319	if (pos + len <= MAX_INLINE_DATA(inode)) {
				3320	f2fs_do_read_inline_data(page, ipage);
				3321	set_inode_flag(inode, FI_DATA_EXIST);
				3322	if (inode->i_nlink)
				3323	set_page_private_inline(ipage);
				3324	} else {
				3325	err = f2fs_convert_inline_page(&dn, page);
				3326	if (err)
				3327	goto out;
				3328	if (dn.data_blkaddr == NULL_ADDR)
				3329	err = f2fs_get_block(&dn, index);
				3330	}
				3331	} else if (locked) {
				3332	err = f2fs_get_block(&dn, index);
				3333	} else {
				3334	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
				3335	dn.data_blkaddr = ei.blk + index - ei.fofs;
				3336	} else {
				3337	/* hole case */
				3338	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
				3339	if (err \|\| dn.data_blkaddr == NULL_ADDR) {
				3340	f2fs_put_dnode(&dn);
				3341	f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
				3342	true);
				3343	WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
				3344	locked = true;
				3345	goto restart;
				3346	}
				3347	}
				3348	}
				3349
				3350	/* convert_inline_page can make node_changed */
				3351	*blk_addr = dn.data_blkaddr;
				3352	*node_changed = dn.node_changed;
				3353	out:
				3354	f2fs_put_dnode(&dn);
				3355	unlock_out:
				3356	if (locked)
				3357	f2fs_do_map_lock(sbi, flag, false);
				3358	return err;
				3359	}
				3360
				3361	static int f2fs_write_begin(struct file file, struct address_space mapping,
				3362	loff_t pos, unsigned len, unsigned flags,
				3363	struct page pagep, void fsdata)
				3364	{
				3365	struct inode *inode = mapping->host;
				3366	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				3367	struct page *page = NULL;
				3368	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
				3369	bool need_balance = false, drop_atomic = false;
				3370	block_t blkaddr = NULL_ADDR;
				3371	int err = 0;
				3372
				3373	/*
				3374	* Should avoid quota operations which can make deadlock:
				3375	* kswapd -> f2fs_evict_inode -> dquot_drop ->
				3376	* f2fs_dquot_commit -> f2fs_write_begin ->
				3377	* d_obtain_alias -> __d_alloc -> kmem_cache_alloc(GFP_KERNEL)
				3378	*/
				3379	if (trace_android_fs_datawrite_start_enabled() && !IS_NOQUOTA(inode)) {
				3380	char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
				3381
				3382	path = android_fstrace_get_pathname(pathbuf,
				3383	MAX_TRACE_PATHBUF_LEN,
				3384	inode);
				3385	trace_android_fs_datawrite_start(inode, pos, len,
				3386	current->pid, path,
				3387	current->comm);
				3388	}
				3389	trace_f2fs_write_begin(inode, pos, len, flags);
				3390
				3391	if (!f2fs_is_checkpoint_ready(sbi)) {
				3392	err = -ENOSPC;
				3393	goto fail;
				3394	}
				3395
				3396	if ((f2fs_is_atomic_file(inode) &&
				3397	!f2fs_available_free_memory(sbi, INMEM_PAGES)) \|\|
				3398	is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
				3399	err = -ENOMEM;
				3400	drop_atomic = true;
				3401	goto fail;
				3402	}
				3403
				3404	/*
				3405	* We should check this at this moment to avoid deadlock on inode page
				3406	* and #0 page. The locking rule for inline_data conversion should be:
				3407	* lock_page(page #0) -> lock_page(inode_page)
				3408	*/
				3409	if (index != 0) {
				3410	err = f2fs_convert_inline_inode(inode);
				3411	if (err)
				3412	goto fail;
				3413	}
				3414
				3415	#ifdef CONFIG_F2FS_FS_COMPRESSION
				3416	if (f2fs_compressed_file(inode)) {
				3417	int ret;
				3418
				3419	*fsdata = NULL;
				3420
				3421	ret = f2fs_prepare_compress_overwrite(inode, pagep,
				3422	index, fsdata);
				3423	if (ret < 0) {
				3424	err = ret;
				3425	goto fail;
				3426	} else if (ret) {
				3427	return 0;
				3428	}
				3429	}
				3430	#endif
				3431
				3432	repeat:
				3433	/*
				3434	* Do not use grab_cache_page_write_begin() to avoid deadlock due to
				3435	* wait_for_stable_page. Will wait that below with our IO control.
				3436	*/
				3437	page = f2fs_pagecache_get_page(mapping, index,
				3438	FGP_LOCK \| FGP_WRITE \| FGP_CREAT, GFP_NOFS);
				3439	if (!page) {
				3440	err = -ENOMEM;
				3441	goto fail;
				3442	}
				3443
				3444	/* TODO: cluster can be compressed due to race with .writepage */
				3445
				3446	*pagep = page;
				3447
				3448	err = prepare_write_begin(sbi, page, pos, len,
				3449	&blkaddr, &need_balance);
				3450	if (err)
				3451	goto fail;
				3452
				3453	if (need_balance && !IS_NOQUOTA(inode) &&
				3454	has_not_enough_free_secs(sbi, 0, 0)) {
				3455	unlock_page(page);
				3456	f2fs_balance_fs(sbi, true);
				3457	lock_page(page);
				3458	if (page->mapping != mapping) {
				3459	/* The page got truncated from under us */
				3460	f2fs_put_page(page, 1);
				3461	goto repeat;
				3462	}
				3463	}
				3464
				3465	f2fs_wait_on_page_writeback(page, DATA, false, true);
				3466
				3467	if (len == PAGE_SIZE \|\| PageUptodate(page))
				3468	return 0;
				3469
				3470	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
				3471	!f2fs_verity_in_progress(inode)) {
				3472	zero_user_segment(page, len, PAGE_SIZE);
				3473	return 0;
				3474	}
				3475
				3476	if (blkaddr == NEW_ADDR) {
				3477	zero_user_segment(page, 0, PAGE_SIZE);
				3478	SetPageUptodate(page);
				3479	} else {
				3480	if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
				3481	DATA_GENERIC_ENHANCE_READ)) {
				3482	err = -EFSCORRUPTED;
				3483	goto fail;
				3484	}
				3485	err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
				3486	if (err)
				3487	goto fail;
				3488
				3489	lock_page(page);
				3490	if (unlikely(page->mapping != mapping)) {
				3491	f2fs_put_page(page, 1);
				3492	goto repeat;
				3493	}
				3494	if (unlikely(!PageUptodate(page))) {
				3495	err = -EIO;
				3496	goto fail;
				3497	}
				3498	}
				3499	return 0;
				3500
				3501	fail:
				3502	f2fs_put_page(page, 1);
				3503	f2fs_write_failed(mapping, pos + len);
				3504	if (drop_atomic)
				3505	f2fs_drop_inmem_pages_all(sbi, false);
				3506	return err;
				3507	}
				3508
				3509	static int f2fs_write_end(struct file *file,
				3510	struct address_space *mapping,
				3511	loff_t pos, unsigned len, unsigned copied,
				3512	struct page page, void fsdata)
				3513	{
				3514	struct inode *inode = page->mapping->host;
				3515
				3516	trace_android_fs_datawrite_end(inode, pos, len);
				3517	trace_f2fs_write_end(inode, pos, len, copied);
				3518
				3519	/*
				3520	* This should be come from len == PAGE_SIZE, and we expect copied
				3521	* should be PAGE_SIZE. Otherwise, we treat it with zero copied and
				3522	* let generic_perform_write() try to copy data again through copied=0.
				3523	*/
				3524	if (!PageUptodate(page)) {
				3525	if (unlikely(copied != len))
				3526	copied = 0;
				3527	else
				3528	SetPageUptodate(page);
				3529	}
				3530
				3531	#ifdef CONFIG_F2FS_FS_COMPRESSION
				3532	/* overwrite compressed file */
				3533	if (f2fs_compressed_file(inode) && fsdata) {
				3534	f2fs_compress_write_end(inode, fsdata, page->index, copied);
				3535	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
				3536
				3537	if (pos + copied > i_size_read(inode) &&
				3538	!f2fs_verity_in_progress(inode))
				3539	f2fs_i_size_write(inode, pos + copied);
				3540	return copied;
				3541	}
				3542	#endif
				3543
				3544	if (!copied)
				3545	goto unlock_out;
				3546
				3547	set_page_dirty(page);
				3548
				3549	if (pos + copied > i_size_read(inode) &&
				3550	!f2fs_verity_in_progress(inode))
				3551	f2fs_i_size_write(inode, pos + copied);
				3552	unlock_out:
				3553	f2fs_put_page(page, 1);
				3554	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
				3555	return copied;
				3556	}
				3557
				3558	static int check_direct_IO(struct inode inode, struct iov_iter iter,
				3559	loff_t offset)
				3560	{
				3561	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
				3562	unsigned blkbits = i_blkbits;
				3563	unsigned blocksize_mask = (1 << blkbits) - 1;
				3564	unsigned long align = offset \| iov_iter_alignment(iter);
				3565	struct block_device *bdev = inode->i_sb->s_bdev;
				3566
				3567	if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
				3568	return 1;
				3569
				3570	if (align & blocksize_mask) {
				3571	if (bdev)
				3572	blkbits = blksize_bits(bdev_logical_block_size(bdev));
				3573	blocksize_mask = (1 << blkbits) - 1;
				3574	if (align & blocksize_mask)
				3575	return -EINVAL;
				3576	return 1;
				3577	}
				3578	return 0;
				3579	}
				3580
				3581	static void f2fs_dio_end_io(struct bio *bio)
				3582	{
				3583	struct f2fs_private_dio *dio = bio->bi_private;
				3584
				3585	dec_page_count(F2FS_I_SB(dio->inode),
				3586	dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
				3587
				3588	bio->bi_private = dio->orig_private;
				3589	bio->bi_end_io = dio->orig_end_io;
				3590
				3591	kfree(dio);
				3592
				3593	bio_endio(bio);
				3594	}
				3595
				3596	static void f2fs_dio_submit_bio(struct bio bio, struct inode inode,
				3597	loff_t file_offset)
				3598	{
				3599	struct f2fs_private_dio *dio;
				3600	bool write = (bio_op(bio) == REQ_OP_WRITE);
				3601
				3602	dio = f2fs_kzalloc(F2FS_I_SB(inode),
				3603	sizeof(struct f2fs_private_dio), GFP_NOFS);
				3604	if (!dio)
				3605	goto out;
				3606
				3607	dio->inode = inode;
				3608	dio->orig_end_io = bio->bi_end_io;
				3609	dio->orig_private = bio->bi_private;
				3610	dio->write = write;
				3611
				3612	bio->bi_end_io = f2fs_dio_end_io;
				3613	bio->bi_private = dio;
				3614
				3615	inc_page_count(F2FS_I_SB(inode),
				3616	write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
				3617
				3618	submit_bio(bio);
				3619	return;
				3620	out:
				3621	bio->bi_status = BLK_STS_IOERR;
				3622	bio_endio(bio);
				3623	}
				3624
				3625	static ssize_t f2fs_direct_IO(struct kiocb iocb, struct iov_iter iter)
				3626	{
				3627	struct address_space *mapping = iocb->ki_filp->f_mapping;
				3628	struct inode *inode = mapping->host;
				3629	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				3630	struct f2fs_inode_info *fi = F2FS_I(inode);
				3631	size_t count = iov_iter_count(iter);
				3632	loff_t offset = iocb->ki_pos;
				3633	int rw = iov_iter_rw(iter);
				3634	int err;
				3635	enum rw_hint hint = iocb->ki_hint;
				3636	int whint_mode = F2FS_OPTION(sbi).whint_mode;
				3637	bool do_opu;
				3638
				3639	err = check_direct_IO(inode, iter, offset);
				3640	if (err)
				3641	return err < 0 ? err : 0;
				3642
				3643	if (f2fs_force_buffered_io(inode, iocb, iter))
				3644	return 0;
				3645
				3646	do_opu = allow_outplace_dio(inode, iocb, iter);
				3647
				3648	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
				3649
				3650	if (trace_android_fs_dataread_start_enabled() &&
				3651	(rw == READ)) {
				3652	char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
				3653
				3654	path = android_fstrace_get_pathname(pathbuf,
				3655	MAX_TRACE_PATHBUF_LEN,
				3656	inode);
				3657	trace_android_fs_dataread_start(inode, offset,
				3658	count, current->pid, path,
				3659	current->comm);
				3660	}
				3661	if (trace_android_fs_datawrite_start_enabled() &&
				3662	(rw == WRITE)) {
				3663	char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
				3664
				3665	path = android_fstrace_get_pathname(pathbuf,
				3666	MAX_TRACE_PATHBUF_LEN,
				3667	inode);
				3668	trace_android_fs_datawrite_start(inode, offset, count,
				3669	current->pid, path,
				3670	current->comm);
				3671	}
				3672
				3673	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
				3674	iocb->ki_hint = WRITE_LIFE_NOT_SET;
				3675
				3676	if (iocb->ki_flags & IOCB_NOWAIT) {
				3677	if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
				3678	iocb->ki_hint = hint;
				3679	err = -EAGAIN;
				3680	goto out;
				3681	}
				3682	if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
				3683	up_read(&fi->i_gc_rwsem[rw]);
				3684	iocb->ki_hint = hint;
				3685	err = -EAGAIN;
				3686	goto out;
				3687	}
				3688	} else {
				3689	down_read(&fi->i_gc_rwsem[rw]);
				3690	if (do_opu)
				3691	down_read(&fi->i_gc_rwsem[READ]);
				3692	}
				3693
				3694	err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
				3695	iter, rw == WRITE ? get_data_block_dio_write :
				3696	get_data_block_dio, NULL, f2fs_dio_submit_bio,
				3697	rw == WRITE ? DIO_LOCKING \| DIO_SKIP_HOLES :
				3698	DIO_SKIP_HOLES);
				3699
				3700	if (do_opu)
				3701	up_read(&fi->i_gc_rwsem[READ]);
				3702
				3703	up_read(&fi->i_gc_rwsem[rw]);
				3704
				3705	if (rw == WRITE) {
				3706	if (whint_mode == WHINT_MODE_OFF)
				3707	iocb->ki_hint = hint;
				3708	if (err > 0) {
				3709	f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
				3710	err);
				3711	if (!do_opu)
				3712	set_inode_flag(inode, FI_UPDATE_WRITE);
				3713	} else if (err == -EIOCBQUEUED) {
				3714	f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
				3715	count - iov_iter_count(iter));
				3716	} else if (err < 0) {
				3717	f2fs_write_failed(mapping, offset + count);
				3718	}
				3719	} else {
				3720	if (err > 0)
				3721	f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
				3722	else if (err == -EIOCBQUEUED)
				3723	f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
				3724	count - iov_iter_count(iter));
				3725	}
				3726
				3727	out:
				3728	if (trace_android_fs_dataread_start_enabled() &&
				3729	(rw == READ))
				3730	trace_android_fs_dataread_end(inode, offset, count);
				3731	if (trace_android_fs_datawrite_start_enabled() &&
				3732	(rw == WRITE))
				3733	trace_android_fs_datawrite_end(inode, offset, count);
				3734
				3735	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
				3736
				3737	return err;
				3738	}
				3739
				3740	void f2fs_invalidate_page(struct page *page, unsigned int offset,
				3741	unsigned int length)
				3742	{
				3743	struct inode *inode = page->mapping->host;
				3744	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				3745
				3746	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
				3747	(offset % PAGE_SIZE \|\| length != PAGE_SIZE))
				3748	return;
				3749
				3750	if (PageDirty(page)) {
				3751	if (inode->i_ino == F2FS_META_INO(sbi)) {
				3752	dec_page_count(sbi, F2FS_DIRTY_META);
				3753	} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
				3754	dec_page_count(sbi, F2FS_DIRTY_NODES);
				3755	} else {
				3756	inode_dec_dirty_pages(inode);
				3757	f2fs_remove_dirty_inode(inode);
				3758	}
				3759	}
				3760
				3761	clear_page_private_gcing(page);
				3762
				3763	if (test_opt(sbi, COMPRESS_CACHE)) {
				3764	if (f2fs_compressed_file(inode))
				3765	f2fs_invalidate_compress_pages(sbi, inode->i_ino);
				3766	if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
				3767	clear_page_private_data(page);
				3768	}
				3769
				3770	if (page_private_atomic(page))
				3771	return f2fs_drop_inmem_page(inode, page);
				3772
				3773	detach_page_private(page);
				3774	set_page_private(page, 0);
				3775	}
				3776
				3777	int f2fs_release_page(struct page *page, gfp_t wait)
				3778	{
				3779	/* If this is dirty page, keep PagePrivate */
				3780	if (PageDirty(page))
				3781	return 0;
				3782
				3783	/* This is atomic written page, keep Private */
				3784	if (page_private_atomic(page))
				3785	return 0;
				3786
				3787	if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) {
				3788	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
				3789	struct inode *inode = page->mapping->host;
				3790
				3791	if (f2fs_compressed_file(inode))
				3792	f2fs_invalidate_compress_pages(sbi, inode->i_ino);
				3793	if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
				3794	clear_page_private_data(page);
				3795	}
				3796
				3797	clear_page_private_gcing(page);
				3798
				3799	detach_page_private(page);
				3800	set_page_private(page, 0);
				3801	return 1;
				3802	}
				3803
				3804	static int f2fs_set_data_page_dirty(struct page *page)
				3805	{
				3806	struct inode *inode = page_file_mapping(page)->host;
				3807
				3808	trace_f2fs_set_page_dirty(page, DATA);
				3809
				3810	if (!PageUptodate(page))
				3811	SetPageUptodate(page);
				3812	if (PageSwapCache(page))
				3813	return __set_page_dirty_nobuffers(page);
				3814
				3815	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
				3816	if (!page_private_atomic(page)) {
				3817	f2fs_register_inmem_page(inode, page);
				3818	return 1;
				3819	}
				3820	/*
				3821	* Previously, this page has been registered, we just
				3822	* return here.
				3823	*/
				3824	return 0;
				3825	}
				3826
				3827	if (!PageDirty(page)) {
				3828	__set_page_dirty_nobuffers(page);
				3829	f2fs_update_dirty_page(inode, page);
				3830	return 1;
				3831	}
				3832	return 0;
				3833	}
				3834
				3835
				3836	static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
				3837	{
				3838	#ifdef CONFIG_F2FS_FS_COMPRESSION
				3839	struct dnode_of_data dn;
				3840	sector_t start_idx, blknr = 0;
				3841	int ret;
				3842
				3843	start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
				3844
				3845	set_new_dnode(&dn, inode, NULL, NULL, 0);
				3846	ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
				3847	if (ret)
				3848	return 0;
				3849
				3850	if (dn.data_blkaddr != COMPRESS_ADDR) {
				3851	dn.ofs_in_node += block - start_idx;
				3852	blknr = f2fs_data_blkaddr(&dn);
				3853	if (!__is_valid_data_blkaddr(blknr))
				3854	blknr = 0;
				3855	}
				3856
				3857	f2fs_put_dnode(&dn);
				3858	return blknr;
				3859	#else
				3860	return 0;
				3861	#endif
				3862	}
				3863
				3864
				3865	static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
				3866	{
				3867	struct inode *inode = mapping->host;
				3868	sector_t blknr = 0;
				3869
				3870	if (f2fs_has_inline_data(inode))
				3871	goto out;
				3872
				3873	/* make sure allocating whole blocks */
				3874	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
				3875	filemap_write_and_wait(mapping);
				3876
				3877	/* Block number less than F2FS MAX BLOCKS */
				3878	if (unlikely(block >= max_file_blocks(inode)))
				3879	goto out;
				3880
				3881	if (f2fs_compressed_file(inode)) {
				3882	blknr = f2fs_bmap_compress(inode, block);
				3883	} else {
				3884	struct f2fs_map_blocks map;
				3885
				3886	memset(&map, 0, sizeof(map));
				3887	map.m_lblk = block;
				3888	map.m_len = 1;
				3889	map.m_next_pgofs = NULL;
				3890	map.m_seg_type = NO_CHECK_TYPE;
				3891
				3892	if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
				3893	blknr = map.m_pblk;
				3894	}
				3895	out:
				3896	trace_f2fs_bmap(inode, block, blknr);
				3897	return blknr;
				3898	}
				3899
				3900	#ifdef CONFIG_MIGRATION
				3901	#include <linux/migrate.h>
				3902
				3903	int f2fs_migrate_page(struct address_space *mapping,
				3904	struct page newpage, struct page page, enum migrate_mode mode)
				3905	{
				3906	int rc, extra_count;
				3907	struct f2fs_inode_info *fi = F2FS_I(mapping->host);
				3908	bool atomic_written = page_private_atomic(page);
				3909
				3910	BUG_ON(PageWriteback(page));
				3911
				3912	/* migrating an atomic written page is safe with the inmem_lock hold */
				3913	if (atomic_written) {
				3914	if (mode != MIGRATE_SYNC)
				3915	return -EBUSY;
				3916	if (!mutex_trylock(&fi->inmem_lock))
				3917	return -EAGAIN;
				3918	}
				3919
				3920	/* one extra reference was held for atomic_write page */
				3921	extra_count = atomic_written ? 1 : 0;
				3922	rc = migrate_page_move_mapping(mapping, newpage,
				3923	page, extra_count);
				3924	if (rc != MIGRATEPAGE_SUCCESS) {
				3925	if (atomic_written)
				3926	mutex_unlock(&fi->inmem_lock);
				3927	return rc;
				3928	}
				3929
				3930	if (atomic_written) {
				3931	struct inmem_pages *cur;
				3932
				3933	list_for_each_entry(cur, &fi->inmem_pages, list)
				3934	if (cur->page == page) {
				3935	cur->page = newpage;
				3936	break;
				3937	}
				3938	mutex_unlock(&fi->inmem_lock);
				3939	put_page(page);
				3940	get_page(newpage);
				3941	}
				3942
				3943	/* guarantee to start from no stale private field */
				3944	set_page_private(newpage, 0);
				3945	if (PagePrivate(page)) {
				3946	set_page_private(newpage, page_private(page));
				3947	SetPagePrivate(newpage);
				3948	get_page(newpage);
				3949
				3950	set_page_private(page, 0);
				3951	ClearPagePrivate(page);
				3952	put_page(page);
				3953	}
				3954
				3955	if (mode != MIGRATE_SYNC_NO_COPY)
				3956	migrate_page_copy(newpage, page);
				3957	else
				3958	migrate_page_states(newpage, page);
				3959
				3960	return MIGRATEPAGE_SUCCESS;
				3961	}
				3962	#endif
				3963
				3964	#ifdef CONFIG_SWAP
				3965	static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
				3966	unsigned int blkcnt)
				3967	{
				3968	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				3969	unsigned int blkofs;
				3970	unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
				3971	unsigned int secidx = start_blk / blk_per_sec;
				3972	unsigned int end_sec = secidx + blkcnt / blk_per_sec;
				3973	int ret = 0;
				3974
				3975	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
				3976	down_write(&F2FS_I(inode)->i_mmap_sem);
				3977
				3978	set_inode_flag(inode, FI_ALIGNED_WRITE);
				3979
				3980	for (; secidx < end_sec; secidx++) {
				3981	down_write(&sbi->pin_sem);
				3982
				3983	f2fs_lock_op(sbi);
				3984	f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
				3985	f2fs_unlock_op(sbi);
				3986
				3987	set_inode_flag(inode, FI_DO_DEFRAG);
				3988
				3989	for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
				3990	struct page *page;
				3991	unsigned int blkidx = secidx * blk_per_sec + blkofs;
				3992
				3993	page = f2fs_get_lock_data_page(inode, blkidx, true);
				3994	if (IS_ERR(page)) {
				3995	up_write(&sbi->pin_sem);
				3996	ret = PTR_ERR(page);
				3997	goto done;
				3998	}
				3999
				4000	set_page_dirty(page);
				4001	f2fs_put_page(page, 1);
				4002	}
				4003
				4004	clear_inode_flag(inode, FI_DO_DEFRAG);
				4005
				4006	ret = filemap_fdatawrite(inode->i_mapping);
				4007
				4008	up_write(&sbi->pin_sem);
				4009
				4010	if (ret)
				4011	break;
				4012	}
				4013
				4014	done:
				4015	clear_inode_flag(inode, FI_DO_DEFRAG);
				4016	clear_inode_flag(inode, FI_ALIGNED_WRITE);
				4017
				4018	up_write(&F2FS_I(inode)->i_mmap_sem);
				4019	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
				4020
				4021	return ret;
				4022	}
				4023
				4024	static int check_swap_activate(struct swap_info_struct *sis,
				4025	struct file swap_file, sector_t span)
				4026	{
				4027	struct address_space *mapping = swap_file->f_mapping;
				4028	struct inode *inode = mapping->host;
				4029	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
				4030	sector_t cur_lblock;
				4031	sector_t last_lblock;
				4032	sector_t pblock;
				4033	sector_t lowest_pblock = -1;
				4034	sector_t highest_pblock = 0;
				4035	int nr_extents = 0;
				4036	unsigned long nr_pblocks;
				4037	unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
				4038	unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
				4039	unsigned int not_aligned = 0;
				4040	int ret = 0;
				4041
				4042	/*
				4043	* Map all the blocks into the extent list. This code doesn't try
				4044	* to be very smart.
				4045	*/
				4046	cur_lblock = 0;
				4047	last_lblock = bytes_to_blks(inode, i_size_read(inode));
				4048
				4049	while (cur_lblock < last_lblock && cur_lblock < sis->max) {
				4050	struct f2fs_map_blocks map;
				4051	retry:
				4052	cond_resched();
				4053
				4054	memset(&map, 0, sizeof(map));
				4055	map.m_lblk = cur_lblock;
				4056	map.m_len = last_lblock - cur_lblock;
				4057	map.m_next_pgofs = NULL;
				4058	map.m_next_extent = NULL;
				4059	map.m_seg_type = NO_CHECK_TYPE;
				4060	map.m_may_create = false;
				4061
				4062	ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
				4063	if (ret)
				4064	goto out;
				4065
				4066	/* hole */
				4067	if (!(map.m_flags & F2FS_MAP_FLAGS)) {
				4068	f2fs_err(sbi, "Swapfile has holes");
				4069	ret = -EINVAL;
				4070	goto out;
				4071	}
				4072
				4073	pblock = map.m_pblk;
				4074	nr_pblocks = map.m_len;
				4075
				4076	if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask \|\|
				4077	nr_pblocks & sec_blks_mask) {
				4078	not_aligned++;
				4079
				4080	nr_pblocks = roundup(nr_pblocks, blks_per_sec);
				4081	if (cur_lblock + nr_pblocks > sis->max)
				4082	nr_pblocks -= blks_per_sec;
				4083
				4084	if (!nr_pblocks) {
				4085	/* this extent is last one */
				4086	nr_pblocks = map.m_len;
				4087	f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
				4088	goto next;
				4089	}
				4090
				4091	ret = f2fs_migrate_blocks(inode, cur_lblock,
				4092	nr_pblocks);
				4093	if (ret)
				4094	goto out;
				4095	goto retry;
				4096	}
				4097	next:
				4098	if (cur_lblock + nr_pblocks >= sis->max)
				4099	nr_pblocks = sis->max - cur_lblock;
				4100
				4101	if (cur_lblock) { /* exclude the header page */
				4102	if (pblock < lowest_pblock)
				4103	lowest_pblock = pblock;
				4104	if (pblock + nr_pblocks - 1 > highest_pblock)
				4105	highest_pblock = pblock + nr_pblocks - 1;
				4106	}
				4107
				4108	/*
				4109	* We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
				4110	*/
				4111	ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
				4112	if (ret < 0)
				4113	goto out;
				4114	nr_extents += ret;
				4115	cur_lblock += nr_pblocks;
				4116	}
				4117	ret = nr_extents;
				4118	*span = 1 + highest_pblock - lowest_pblock;
				4119	if (cur_lblock == 0)
				4120	cur_lblock = 1; /* force Empty message */
				4121	sis->max = cur_lblock;
				4122	sis->pages = cur_lblock - 1;
				4123	sis->highest_bit = cur_lblock - 1;
				4124	out:
				4125	if (not_aligned)
				4126	f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
				4127	not_aligned, blks_per_sec * F2FS_BLKSIZE);
				4128	return ret;
				4129	}
				4130
				4131	static int f2fs_swap_activate(struct swap_info_struct sis, struct file file,
				4132	sector_t *span)
				4133	{
				4134	struct inode *inode = file_inode(file);
				4135	int ret;
				4136
				4137	if (!S_ISREG(inode->i_mode))
				4138	return -EINVAL;
				4139
				4140	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
				4141	return -EROFS;
				4142
				4143	if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
				4144	f2fs_err(F2FS_I_SB(inode),
				4145	"Swapfile not supported in LFS mode");
				4146	return -EINVAL;
				4147	}
				4148
				4149	ret = f2fs_convert_inline_inode(inode);
				4150	if (ret)
				4151	return ret;
				4152
				4153	if (!f2fs_disable_compressed_file(inode))
				4154	return -EINVAL;
				4155
				4156	f2fs_precache_extents(inode);
				4157
				4158	ret = check_swap_activate(sis, file, span);
				4159	if (ret < 0)
				4160	return ret;
				4161
				4162	set_inode_flag(inode, FI_PIN_FILE);
				4163	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
				4164	return ret;
				4165	}
				4166
				4167	static void f2fs_swap_deactivate(struct file *file)
				4168	{
				4169	struct inode *inode = file_inode(file);
				4170
				4171	clear_inode_flag(inode, FI_PIN_FILE);
				4172	}
				4173	#else
				4174	static int f2fs_swap_activate(struct swap_info_struct sis, struct file file,
				4175	sector_t *span)
				4176	{
				4177	return -EOPNOTSUPP;
				4178	}
				4179
				4180	static void f2fs_swap_deactivate(struct file *file)
				4181	{
				4182	}
				4183	#endif
				4184
				4185	const struct address_space_operations f2fs_dblock_aops = {
				4186	.readpage = f2fs_read_data_page,
				4187	.readpages = f2fs_read_data_pages,
				4188	.writepage = f2fs_write_data_page,
				4189	.writepages = f2fs_write_data_pages,
				4190	.write_begin = f2fs_write_begin,
				4191	.write_end = f2fs_write_end,
				4192	.set_page_dirty = f2fs_set_data_page_dirty,
				4193	.invalidatepage = f2fs_invalidate_page,
				4194	.releasepage = f2fs_release_page,
				4195	.direct_IO = f2fs_direct_IO,
				4196	.bmap = f2fs_bmap,
				4197	.swap_activate = f2fs_swap_activate,
				4198	.swap_deactivate = f2fs_swap_deactivate,
				4199	#ifdef CONFIG_MIGRATION
				4200	.migratepage = f2fs_migrate_page,
				4201	#endif
				4202	};
				4203
				4204	void f2fs_clear_page_cache_dirty_tag(struct page *page)
				4205	{
				4206	struct address_space *mapping = page_mapping(page);
				4207	unsigned long flags;
				4208
				4209	xa_lock_irqsave(&mapping->i_pages, flags);
				4210	__xa_clear_mark(&mapping->i_pages, page_index(page),
				4211	PAGECACHE_TAG_DIRTY);
				4212	xa_unlock_irqrestore(&mapping->i_pages, flags);
				4213	}
				4214
				4215	int __init f2fs_init_post_read_processing(void)
				4216	{
				4217	bio_post_read_ctx_cache =
				4218	kmem_cache_create("f2fs_bio_post_read_ctx",
				4219	sizeof(struct bio_post_read_ctx), 0, 0, NULL);
				4220	if (!bio_post_read_ctx_cache)
				4221	goto fail;
				4222	bio_post_read_ctx_pool =
				4223	mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
				4224	bio_post_read_ctx_cache);
				4225	if (!bio_post_read_ctx_pool)
				4226	goto fail_free_cache;
				4227	return 0;
				4228
				4229	fail_free_cache:
				4230	kmem_cache_destroy(bio_post_read_ctx_cache);
				4231	fail:
				4232	return -ENOMEM;
				4233	}
				4234
				4235	void f2fs_destroy_post_read_processing(void)
				4236	{
				4237	mempool_destroy(bio_post_read_ctx_pool);
				4238	kmem_cache_destroy(bio_post_read_ctx_cache);
				4239	}
				4240
				4241	int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
				4242	{
				4243	if (!f2fs_sb_has_encrypt(sbi) &&
				4244	!f2fs_sb_has_verity(sbi) &&
				4245	!f2fs_sb_has_compression(sbi))
				4246	return 0;
				4247
				4248	sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
				4249	WQ_UNBOUND \| WQ_HIGHPRI,
				4250	num_online_cpus());
				4251	if (!sbi->post_read_wq)
				4252	return -ENOMEM;
				4253	return 0;
				4254	}
				4255
				4256	void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
				4257	{
				4258	if (sbi->post_read_wq)
				4259	destroy_workqueue(sbi->post_read_wq);
				4260	}
				4261
				4262	int __init f2fs_init_bio_entry_cache(void)
				4263	{
				4264	bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
				4265	sizeof(struct bio_entry));
				4266	if (!bio_entry_slab)
				4267	return -ENOMEM;
				4268	return 0;
				4269	}
				4270
				4271	void f2fs_destroy_bio_entry_cache(void)
				4272	{
				4273	kmem_cache_destroy(bio_entry_slab);
				4274	}