blob: a27d6fac14dbdcc9e149269ad9f83859dced2368 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/f2fs/data.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8#include <linux/fs.h>
9#include <linux/f2fs_fs.h>
10#include <linux/buffer_head.h>
11#include <linux/mpage.h>
12#include <linux/writeback.h>
13#include <linux/backing-dev.h>
14#include <linux/pagevec.h>
15#include <linux/blkdev.h>
16#include <linux/bio.h>
17#include <linux/blk-crypto.h>
18#include <linux/swap.h>
19#include <linux/prefetch.h>
20#include <linux/uio.h>
21#include <linux/cleancache.h>
22#include <linux/sched/signal.h>
23
24#include "f2fs.h"
25#include "node.h"
26#include "segment.h"
27#include <trace/events/f2fs.h>
28#include <trace/events/android_fs.h>
29
30#define NUM_PREALLOC_POST_READ_CTXS 128
31
32static struct kmem_cache *bio_post_read_ctx_cache;
33static struct kmem_cache *bio_entry_slab;
34static mempool_t *bio_post_read_ctx_pool;
35static struct bio_set f2fs_bioset;
36
37#define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
38
39int __init f2fs_init_bioset(void)
40{
41 if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
42 0, BIOSET_NEED_BVECS))
43 return -ENOMEM;
44 return 0;
45}
46
47void f2fs_destroy_bioset(void)
48{
49 bioset_exit(&f2fs_bioset);
50}
51
52static bool __is_cp_guaranteed(struct page *page)
53{
54 struct address_space *mapping = page->mapping;
55 struct inode *inode;
56 struct f2fs_sb_info *sbi;
57
58 if (!mapping)
59 return false;
60
61 inode = mapping->host;
62 sbi = F2FS_I_SB(inode);
63
64 if (inode->i_ino == F2FS_META_INO(sbi) ||
65 inode->i_ino == F2FS_NODE_INO(sbi) ||
66 S_ISDIR(inode->i_mode))
67 return true;
68
69 if (f2fs_is_compressed_page(page))
70 return false;
71 if ((S_ISREG(inode->i_mode) &&
72 (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
73 page_private_gcing(page))
74 return true;
75 return false;
76}
77
78static enum count_type __read_io_type(struct page *page)
79{
80 struct address_space *mapping = page_file_mapping(page);
81
82 if (mapping) {
83 struct inode *inode = mapping->host;
84 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
85
86 if (inode->i_ino == F2FS_META_INO(sbi))
87 return F2FS_RD_META;
88
89 if (inode->i_ino == F2FS_NODE_INO(sbi))
90 return F2FS_RD_NODE;
91 }
92 return F2FS_RD_DATA;
93}
94
95/* postprocessing steps for read bios */
96enum bio_post_read_step {
97#ifdef CONFIG_FS_ENCRYPTION
98 STEP_DECRYPT = 1 << 0,
99#else
100 STEP_DECRYPT = 0, /* compile out the decryption-related code */
101#endif
102#ifdef CONFIG_F2FS_FS_COMPRESSION
103 STEP_DECOMPRESS = 1 << 1,
104#else
105 STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
106#endif
107#ifdef CONFIG_FS_VERITY
108 STEP_VERITY = 1 << 2,
109#else
110 STEP_VERITY = 0, /* compile out the verity-related code */
111#endif
112};
113
114struct bio_post_read_ctx {
115 struct bio *bio;
116 struct f2fs_sb_info *sbi;
117 struct work_struct work;
118 unsigned int enabled_steps;
119};
120
121static void f2fs_finish_read_bio(struct bio *bio)
122{
123 struct bio_vec *bv;
124 struct bvec_iter_all iter_all;
125
126 /*
127 * Update and unlock the bio's pagecache pages, and put the
128 * decompression context for any compressed pages.
129 */
130 bio_for_each_segment_all(bv, bio, iter_all) {
131 struct page *page = bv->bv_page;
132
133 if (f2fs_is_compressed_page(page)) {
134 if (bio->bi_status)
135 f2fs_end_read_compressed_page(page, true, 0);
136 f2fs_put_page_dic(page);
137 continue;
138 }
139
140 /* PG_error was set if decryption or verity failed. */
141 if (bio->bi_status || PageError(page)) {
142 ClearPageUptodate(page);
143 /* will re-read again later */
144 ClearPageError(page);
145 } else {
146 SetPageUptodate(page);
147 }
148 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
149 unlock_page(page);
150 }
151
152 if (bio->bi_private)
153 mempool_free(bio->bi_private, bio_post_read_ctx_pool);
154 bio_put(bio);
155}
156
157static void f2fs_verify_bio(struct work_struct *work)
158{
159 struct bio_post_read_ctx *ctx =
160 container_of(work, struct bio_post_read_ctx, work);
161 struct bio *bio = ctx->bio;
162 bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
163
164 /*
165 * fsverity_verify_bio() may call readpages() again, and while verity
166 * will be disabled for this, decryption and/or decompression may still
167 * be needed, resulting in another bio_post_read_ctx being allocated.
168 * So to prevent deadlocks we need to release the current ctx to the
169 * mempool first. This assumes that verity is the last post-read step.
170 */
171 mempool_free(ctx, bio_post_read_ctx_pool);
172 bio->bi_private = NULL;
173
174 /*
175 * Verify the bio's pages with fs-verity. Exclude compressed pages,
176 * as those were handled separately by f2fs_end_read_compressed_page().
177 */
178 if (may_have_compressed_pages) {
179 struct bio_vec *bv;
180 struct bvec_iter_all iter_all;
181
182 bio_for_each_segment_all(bv, bio, iter_all) {
183 struct page *page = bv->bv_page;
184
185 if (!f2fs_is_compressed_page(page) &&
186 !PageError(page) && !fsverity_verify_page(page))
187 SetPageError(page);
188 }
189 } else {
190 fsverity_verify_bio(bio);
191 }
192
193 f2fs_finish_read_bio(bio);
194}
195
196/*
197 * If the bio's data needs to be verified with fs-verity, then enqueue the
198 * verity work for the bio. Otherwise finish the bio now.
199 *
200 * Note that to avoid deadlocks, the verity work can't be done on the
201 * decryption/decompression workqueue. This is because verifying the data pages
202 * can involve reading verity metadata pages from the file, and these verity
203 * metadata pages may be encrypted and/or compressed.
204 */
205static void f2fs_verify_and_finish_bio(struct bio *bio)
206{
207 struct bio_post_read_ctx *ctx = bio->bi_private;
208
209 if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
210 INIT_WORK(&ctx->work, f2fs_verify_bio);
211 fsverity_enqueue_verify_work(&ctx->work);
212 } else {
213 f2fs_finish_read_bio(bio);
214 }
215}
216
217/*
218 * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
219 * remaining page was read by @ctx->bio.
220 *
221 * Note that a bio may span clusters (even a mix of compressed and uncompressed
222 * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
223 * that the bio includes at least one compressed page. The actual decompression
224 * is done on a per-cluster basis, not a per-bio basis.
225 */
226static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx)
227{
228 struct bio_vec *bv;
229 struct bvec_iter_all iter_all;
230 bool all_compressed = true;
231 block_t blkaddr = SECTOR_TO_BLOCK(ctx->bio->bi_iter.bi_sector);
232
233 bio_for_each_segment_all(bv, ctx->bio, iter_all) {
234 struct page *page = bv->bv_page;
235
236 /* PG_error was set if decryption failed. */
237 if (f2fs_is_compressed_page(page))
238 f2fs_end_read_compressed_page(page, PageError(page),
239 blkaddr);
240 else
241 all_compressed = false;
242
243 blkaddr++;
244 }
245
246 /*
247 * Optimization: if all the bio's pages are compressed, then scheduling
248 * the per-bio verity work is unnecessary, as verity will be fully
249 * handled at the compression cluster level.
250 */
251 if (all_compressed)
252 ctx->enabled_steps &= ~STEP_VERITY;
253}
254
255static void f2fs_post_read_work(struct work_struct *work)
256{
257 struct bio_post_read_ctx *ctx =
258 container_of(work, struct bio_post_read_ctx, work);
259
260 if (ctx->enabled_steps & STEP_DECRYPT)
261 fscrypt_decrypt_bio(ctx->bio);
262
263 if (ctx->enabled_steps & STEP_DECOMPRESS)
264 f2fs_handle_step_decompress(ctx);
265
266 f2fs_verify_and_finish_bio(ctx->bio);
267}
268
269static void f2fs_read_end_io(struct bio *bio)
270{
271 struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
272 struct bio_post_read_ctx *ctx = bio->bi_private;
273
274 if (time_to_inject(sbi, FAULT_READ_IO)) {
275 f2fs_show_injection_info(sbi, FAULT_READ_IO);
276 bio->bi_status = BLK_STS_IOERR;
277 }
278
279 if (bio->bi_status) {
280 f2fs_finish_read_bio(bio);
281 return;
282 }
283
284 if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
285 INIT_WORK(&ctx->work, f2fs_post_read_work);
286 queue_work(ctx->sbi->post_read_wq, &ctx->work);
287 } else {
288 f2fs_verify_and_finish_bio(bio);
289 }
290}
291
292static void f2fs_write_end_io(struct bio *bio)
293{
294 struct f2fs_sb_info *sbi = bio->bi_private;
295 struct bio_vec *bvec;
296 struct bvec_iter_all iter_all;
297
298 if (time_to_inject(sbi, FAULT_WRITE_IO)) {
299 f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
300 bio->bi_status = BLK_STS_IOERR;
301 }
302
303 bio_for_each_segment_all(bvec, bio, iter_all) {
304 struct page *page = bvec->bv_page;
305 enum count_type type = WB_DATA_TYPE(page);
306
307 if (page_private_dummy(page)) {
308 clear_page_private_dummy(page);
309 unlock_page(page);
310 mempool_free(page, sbi->write_io_dummy);
311
312 if (unlikely(bio->bi_status))
313 f2fs_stop_checkpoint(sbi, true);
314 continue;
315 }
316
317 fscrypt_finalize_bounce_page(&page);
318
319#ifdef CONFIG_F2FS_FS_COMPRESSION
320 if (f2fs_is_compressed_page(page)) {
321 f2fs_compress_write_end_io(bio, page);
322 continue;
323 }
324#endif
325
326 if (unlikely(bio->bi_status)) {
327 mapping_set_error(page->mapping, -EIO);
328 if (type == F2FS_WB_CP_DATA)
329 f2fs_stop_checkpoint(sbi, true);
330 }
331
332 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
333 page->index != nid_of_node(page));
334
335 dec_page_count(sbi, type);
336 if (f2fs_in_warm_node_list(sbi, page))
337 f2fs_del_fsync_node_entry(sbi, page);
338 clear_page_private_gcing(page);
339 end_page_writeback(page);
340 }
341 if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
342 wq_has_sleeper(&sbi->cp_wait))
343 wake_up(&sbi->cp_wait);
344
345 bio_put(bio);
346}
347
348struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
349 block_t blk_addr, struct bio *bio)
350{
351 struct block_device *bdev = sbi->sb->s_bdev;
352 int i;
353
354 if (f2fs_is_multi_device(sbi)) {
355 for (i = 0; i < sbi->s_ndevs; i++) {
356 if (FDEV(i).start_blk <= blk_addr &&
357 FDEV(i).end_blk >= blk_addr) {
358 blk_addr -= FDEV(i).start_blk;
359 bdev = FDEV(i).bdev;
360 break;
361 }
362 }
363 }
364 if (bio) {
365 bio_set_dev(bio, bdev);
366 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
367 }
368 return bdev;
369}
370
371int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
372{
373 int i;
374
375 if (!f2fs_is_multi_device(sbi))
376 return 0;
377
378 for (i = 0; i < sbi->s_ndevs; i++)
379 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
380 return i;
381 return 0;
382}
383
384/*
385 * Return true, if pre_bio's bdev is same as its target device.
386 */
387static bool __same_bdev(struct f2fs_sb_info *sbi,
388 block_t blk_addr, struct bio *bio)
389{
390 struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
391 return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
392}
393
394static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
395{
396 struct f2fs_sb_info *sbi = fio->sbi;
397 struct bio *bio;
398
399 bio = bio_alloc_bioset(GFP_NOIO, npages, &f2fs_bioset);
400
401 f2fs_target_device(sbi, fio->new_blkaddr, bio);
402 if (is_read_io(fio->op)) {
403 bio->bi_end_io = f2fs_read_end_io;
404 bio->bi_private = NULL;
405 } else {
406 bio->bi_end_io = f2fs_write_end_io;
407 bio->bi_private = sbi;
408 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
409 fio->type, fio->temp);
410 }
411 if (fio->io_wbc)
412 wbc_init_bio(fio->io_wbc, bio);
413
414 return bio;
415}
416
417static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
418 pgoff_t first_idx,
419 const struct f2fs_io_info *fio,
420 gfp_t gfp_mask)
421{
422 /*
423 * The f2fs garbage collector sets ->encrypted_page when it wants to
424 * read/write raw data without encryption.
425 */
426 if (!fio || !fio->encrypted_page)
427 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
428 else if (fscrypt_inode_should_skip_dm_default_key(inode))
429 bio_set_skip_dm_default_key(bio);
430}
431
432static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
433 pgoff_t next_idx,
434 const struct f2fs_io_info *fio)
435{
436 /*
437 * The f2fs garbage collector sets ->encrypted_page when it wants to
438 * read/write raw data without encryption.
439 */
440 if (fio && fio->encrypted_page)
441 return !bio_has_crypt_ctx(bio) &&
442 (bio_should_skip_dm_default_key(bio) ==
443 fscrypt_inode_should_skip_dm_default_key(inode));
444
445 return fscrypt_mergeable_bio(bio, inode, next_idx);
446}
447
448static inline void __submit_bio(struct f2fs_sb_info *sbi,
449 struct bio *bio, enum page_type type)
450{
451 if (!is_read_io(bio_op(bio))) {
452 unsigned int start;
453
454 if (type != DATA && type != NODE)
455 goto submit_io;
456
457 if (f2fs_lfs_mode(sbi) && current->plug)
458 blk_finish_plug(current->plug);
459
460 if (!F2FS_IO_ALIGNED(sbi))
461 goto submit_io;
462
463 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
464 start %= F2FS_IO_SIZE(sbi);
465
466 if (start == 0)
467 goto submit_io;
468
469 /* fill dummy pages */
470 for (; start < F2FS_IO_SIZE(sbi); start++) {
471 struct page *page =
472 mempool_alloc(sbi->write_io_dummy,
473 GFP_NOIO | __GFP_NOFAIL);
474 f2fs_bug_on(sbi, !page);
475
476 lock_page(page);
477
478 zero_user_segment(page, 0, PAGE_SIZE);
479 set_page_private_dummy(page);
480
481 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
482 f2fs_bug_on(sbi, 1);
483 }
484 /*
485 * In the NODE case, we lose next block address chain. So, we
486 * need to do checkpoint in f2fs_sync_file.
487 */
488 if (type == NODE)
489 set_sbi_flag(sbi, SBI_NEED_CP);
490 }
491submit_io:
492 if (is_read_io(bio_op(bio)))
493 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
494 else
495 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
496 submit_bio(bio);
497}
498
499void f2fs_submit_bio(struct f2fs_sb_info *sbi,
500 struct bio *bio, enum page_type type)
501{
502 __submit_bio(sbi, bio, type);
503}
504
505static void __attach_io_flag(struct f2fs_io_info *fio)
506{
507 struct f2fs_sb_info *sbi = fio->sbi;
508 unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
509 unsigned int io_flag, fua_flag, meta_flag;
510
511 if (fio->type == DATA)
512 io_flag = sbi->data_io_flag;
513 else if (fio->type == NODE)
514 io_flag = sbi->node_io_flag;
515 else
516 return;
517
518 fua_flag = io_flag & temp_mask;
519 meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
520
521 /*
522 * data/node io flag bits per temp:
523 * REQ_META | REQ_FUA |
524 * 5 | 4 | 3 | 2 | 1 | 0 |
525 * Cold | Warm | Hot | Cold | Warm | Hot |
526 */
527 if ((1 << fio->temp) & meta_flag)
528 fio->op_flags |= REQ_META;
529 if ((1 << fio->temp) & fua_flag)
530 fio->op_flags |= REQ_FUA;
531}
532
533static void __submit_merged_bio(struct f2fs_bio_info *io)
534{
535 struct f2fs_io_info *fio = &io->fio;
536
537 if (!io->bio)
538 return;
539
540 __attach_io_flag(fio);
541 bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
542
543 if (is_read_io(fio->op))
544 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
545 else
546 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
547
548 __submit_bio(io->sbi, io->bio, fio->type);
549 io->bio = NULL;
550}
551
552static bool __has_merged_page(struct bio *bio, struct inode *inode,
553 struct page *page, nid_t ino)
554{
555 struct bio_vec *bvec;
556 struct bvec_iter_all iter_all;
557
558 if (!bio)
559 return false;
560
561 if (!inode && !page && !ino)
562 return true;
563
564 bio_for_each_segment_all(bvec, bio, iter_all) {
565 struct page *target = bvec->bv_page;
566
567 if (fscrypt_is_bounce_page(target)) {
568 target = fscrypt_pagecache_page(target);
569 if (IS_ERR(target))
570 continue;
571 }
572 if (f2fs_is_compressed_page(target)) {
573 target = f2fs_compress_control_page(target);
574 if (IS_ERR(target))
575 continue;
576 }
577
578 if (inode && inode == target->mapping->host)
579 return true;
580 if (page && page == target)
581 return true;
582 if (ino && ino == ino_of_node(target))
583 return true;
584 }
585
586 return false;
587}
588
589static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
590 enum page_type type, enum temp_type temp)
591{
592 enum page_type btype = PAGE_TYPE_OF_BIO(type);
593 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
594
595 down_write(&io->io_rwsem);
596
597 /* change META to META_FLUSH in the checkpoint procedure */
598 if (type >= META_FLUSH) {
599 io->fio.type = META_FLUSH;
600 io->fio.op = REQ_OP_WRITE;
601 io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
602 if (!test_opt(sbi, NOBARRIER))
603 io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
604 }
605 __submit_merged_bio(io);
606 up_write(&io->io_rwsem);
607}
608
609static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
610 struct inode *inode, struct page *page,
611 nid_t ino, enum page_type type, bool force)
612{
613 enum temp_type temp;
614 bool ret = true;
615
616 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
617 if (!force) {
618 enum page_type btype = PAGE_TYPE_OF_BIO(type);
619 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
620
621 down_read(&io->io_rwsem);
622 ret = __has_merged_page(io->bio, inode, page, ino);
623 up_read(&io->io_rwsem);
624 }
625 if (ret)
626 __f2fs_submit_merged_write(sbi, type, temp);
627
628 /* TODO: use HOT temp only for meta pages now. */
629 if (type >= META)
630 break;
631 }
632}
633
634void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
635{
636 __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
637}
638
639void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
640 struct inode *inode, struct page *page,
641 nid_t ino, enum page_type type)
642{
643 __submit_merged_write_cond(sbi, inode, page, ino, type, false);
644}
645
646void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
647{
648 f2fs_submit_merged_write(sbi, DATA);
649 f2fs_submit_merged_write(sbi, NODE);
650 f2fs_submit_merged_write(sbi, META);
651}
652
653/*
654 * Fill the locked page with data located in the block address.
655 * A caller needs to unlock the page on failure.
656 */
657int f2fs_submit_page_bio(struct f2fs_io_info *fio)
658{
659 struct bio *bio;
660 struct page *page = fio->encrypted_page ?
661 fio->encrypted_page : fio->page;
662
663 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
664 fio->is_por ? META_POR : (__is_meta_io(fio) ?
665 META_GENERIC : DATA_GENERIC_ENHANCE)))
666 return -EFSCORRUPTED;
667
668 trace_f2fs_submit_page_bio(page, fio);
669
670 /* Allocate a new bio */
671 bio = __bio_alloc(fio, 1);
672
673 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
674 fio->page->index, fio, GFP_NOIO);
675
676 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
677 bio_put(bio);
678 return -EFAULT;
679 }
680
681 if (fio->io_wbc && !is_read_io(fio->op))
682 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
683
684 __attach_io_flag(fio);
685 bio_set_op_attrs(bio, fio->op, fio->op_flags);
686
687 inc_page_count(fio->sbi, is_read_io(fio->op) ?
688 __read_io_type(page): WB_DATA_TYPE(fio->page));
689
690 __submit_bio(fio->sbi, bio, fio->type);
691 return 0;
692}
693
694static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
695 block_t last_blkaddr, block_t cur_blkaddr)
696{
697 if (unlikely(sbi->max_io_bytes &&
698 bio->bi_iter.bi_size >= sbi->max_io_bytes))
699 return false;
700 if (last_blkaddr + 1 != cur_blkaddr)
701 return false;
702 return __same_bdev(sbi, cur_blkaddr, bio);
703}
704
705static bool io_type_is_mergeable(struct f2fs_bio_info *io,
706 struct f2fs_io_info *fio)
707{
708 if (io->fio.op != fio->op)
709 return false;
710 return io->fio.op_flags == fio->op_flags;
711}
712
713static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
714 struct f2fs_bio_info *io,
715 struct f2fs_io_info *fio,
716 block_t last_blkaddr,
717 block_t cur_blkaddr)
718{
719 if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
720 unsigned int filled_blocks =
721 F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
722 unsigned int io_size = F2FS_IO_SIZE(sbi);
723 unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
724
725 /* IOs in bio is aligned and left space of vectors is not enough */
726 if (!(filled_blocks % io_size) && left_vecs < io_size)
727 return false;
728 }
729 if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
730 return false;
731 return io_type_is_mergeable(io, fio);
732}
733
734static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
735 struct page *page, enum temp_type temp)
736{
737 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
738 struct bio_entry *be;
739
740 be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
741 be->bio = bio;
742 bio_get(bio);
743
744 if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
745 f2fs_bug_on(sbi, 1);
746
747 down_write(&io->bio_list_lock);
748 list_add_tail(&be->list, &io->bio_list);
749 up_write(&io->bio_list_lock);
750}
751
752static void del_bio_entry(struct bio_entry *be)
753{
754 list_del(&be->list);
755 kmem_cache_free(bio_entry_slab, be);
756}
757
758static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
759 struct page *page)
760{
761 struct f2fs_sb_info *sbi = fio->sbi;
762 enum temp_type temp;
763 bool found = false;
764 int ret = -EAGAIN;
765
766 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
767 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
768 struct list_head *head = &io->bio_list;
769 struct bio_entry *be;
770
771 down_write(&io->bio_list_lock);
772 list_for_each_entry(be, head, list) {
773 if (be->bio != *bio)
774 continue;
775
776 found = true;
777
778 f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
779 *fio->last_block,
780 fio->new_blkaddr));
781 if (f2fs_crypt_mergeable_bio(*bio,
782 fio->page->mapping->host,
783 fio->page->index, fio) &&
784 bio_add_page(*bio, page, PAGE_SIZE, 0) ==
785 PAGE_SIZE) {
786 ret = 0;
787 break;
788 }
789
790 /* page can't be merged into bio; submit the bio */
791 del_bio_entry(be);
792 __submit_bio(sbi, *bio, DATA);
793 break;
794 }
795 up_write(&io->bio_list_lock);
796 }
797
798 if (ret) {
799 bio_put(*bio);
800 *bio = NULL;
801 }
802
803 return ret;
804}
805
806void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
807 struct bio **bio, struct page *page)
808{
809 enum temp_type temp;
810 bool found = false;
811 struct bio *target = bio ? *bio : NULL;
812
813 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
814 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
815 struct list_head *head = &io->bio_list;
816 struct bio_entry *be;
817
818 if (list_empty(head))
819 continue;
820
821 down_read(&io->bio_list_lock);
822 list_for_each_entry(be, head, list) {
823 if (target)
824 found = (target == be->bio);
825 else
826 found = __has_merged_page(be->bio, NULL,
827 page, 0);
828 if (found)
829 break;
830 }
831 up_read(&io->bio_list_lock);
832
833 if (!found)
834 continue;
835
836 found = false;
837
838 down_write(&io->bio_list_lock);
839 list_for_each_entry(be, head, list) {
840 if (target)
841 found = (target == be->bio);
842 else
843 found = __has_merged_page(be->bio, NULL,
844 page, 0);
845 if (found) {
846 target = be->bio;
847 del_bio_entry(be);
848 break;
849 }
850 }
851 up_write(&io->bio_list_lock);
852 }
853
854 if (found)
855 __submit_bio(sbi, target, DATA);
856 if (bio && *bio) {
857 bio_put(*bio);
858 *bio = NULL;
859 }
860}
861
862int f2fs_merge_page_bio(struct f2fs_io_info *fio)
863{
864 struct bio *bio = *fio->bio;
865 struct page *page = fio->encrypted_page ?
866 fio->encrypted_page : fio->page;
867
868 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
869 __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
870 return -EFSCORRUPTED;
871
872 trace_f2fs_submit_page_bio(page, fio);
873
874 if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
875 fio->new_blkaddr))
876 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
877alloc_new:
878 if (!bio) {
879 bio = __bio_alloc(fio, BIO_MAX_PAGES);
880 __attach_io_flag(fio);
881 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
882 fio->page->index, fio, GFP_NOIO);
883 bio_set_op_attrs(bio, fio->op, fio->op_flags);
884
885 add_bio_entry(fio->sbi, bio, page, fio->temp);
886 } else {
887 if (add_ipu_page(fio, &bio, page))
888 goto alloc_new;
889 }
890
891 if (fio->io_wbc)
892 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
893
894 inc_page_count(fio->sbi, WB_DATA_TYPE(page));
895
896 *fio->last_block = fio->new_blkaddr;
897 *fio->bio = bio;
898
899 return 0;
900}
901
902void f2fs_submit_page_write(struct f2fs_io_info *fio)
903{
904 struct f2fs_sb_info *sbi = fio->sbi;
905 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
906 struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
907 struct page *bio_page;
908
909 f2fs_bug_on(sbi, is_read_io(fio->op));
910
911 down_write(&io->io_rwsem);
912next:
913 if (fio->in_list) {
914 spin_lock(&io->io_lock);
915 if (list_empty(&io->io_list)) {
916 spin_unlock(&io->io_lock);
917 goto out;
918 }
919 fio = list_first_entry(&io->io_list,
920 struct f2fs_io_info, list);
921 list_del(&fio->list);
922 spin_unlock(&io->io_lock);
923 }
924
925 verify_fio_blkaddr(fio);
926
927 if (fio->encrypted_page)
928 bio_page = fio->encrypted_page;
929 else if (fio->compressed_page)
930 bio_page = fio->compressed_page;
931 else
932 bio_page = fio->page;
933
934 /* set submitted = true as a return value */
935 fio->submitted = true;
936
937 inc_page_count(sbi, WB_DATA_TYPE(bio_page));
938
939 if (io->bio &&
940 (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
941 fio->new_blkaddr) ||
942 !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
943 bio_page->index, fio)))
944 __submit_merged_bio(io);
945alloc_new:
946 if (io->bio == NULL) {
947 if (F2FS_IO_ALIGNED(sbi) &&
948 (fio->type == DATA || fio->type == NODE) &&
949 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
950 dec_page_count(sbi, WB_DATA_TYPE(bio_page));
951 fio->retry = true;
952 goto skip;
953 }
954 io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
955 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
956 bio_page->index, fio, GFP_NOIO);
957 io->fio = *fio;
958 }
959
960 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
961 __submit_merged_bio(io);
962 goto alloc_new;
963 }
964
965 if (fio->io_wbc)
966 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
967
968 io->last_block_in_bio = fio->new_blkaddr;
969
970 trace_f2fs_submit_page_write(fio->page, fio);
971skip:
972 if (fio->in_list)
973 goto next;
974out:
975 if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
976 !f2fs_is_checkpoint_ready(sbi))
977 __submit_merged_bio(io);
978 up_write(&io->io_rwsem);
979}
980
981static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
982 unsigned nr_pages, unsigned op_flag,
983 pgoff_t first_idx, bool for_write)
984{
985 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
986 struct bio *bio;
987 struct bio_post_read_ctx *ctx;
988 unsigned int post_read_steps = 0;
989
990 bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
991 min_t(int, nr_pages, BIO_MAX_PAGES),
992 &f2fs_bioset);
993 if (!bio)
994 return ERR_PTR(-ENOMEM);
995
996 f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
997
998 f2fs_target_device(sbi, blkaddr, bio);
999 bio->bi_end_io = f2fs_read_end_io;
1000 bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
1001
1002 if (fscrypt_inode_uses_fs_layer_crypto(inode))
1003 post_read_steps |= STEP_DECRYPT;
1004
1005 if (f2fs_need_verity(inode, first_idx))
1006 post_read_steps |= STEP_VERITY;
1007
1008 /*
1009 * STEP_DECOMPRESS is handled specially, since a compressed file might
1010 * contain both compressed and uncompressed clusters. We'll allocate a
1011 * bio_post_read_ctx if the file is compressed, but the caller is
1012 * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1013 */
1014
1015 if (post_read_steps || f2fs_compressed_file(inode)) {
1016 /* Due to the mempool, this never fails. */
1017 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1018 ctx->bio = bio;
1019 ctx->sbi = sbi;
1020 ctx->enabled_steps = post_read_steps;
1021 bio->bi_private = ctx;
1022 }
1023
1024 return bio;
1025}
1026
1027/* This can handle encryption stuffs */
1028static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1029 block_t blkaddr, int op_flags, bool for_write)
1030{
1031 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1032 struct bio *bio;
1033
1034 bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1035 page->index, for_write);
1036 if (IS_ERR(bio))
1037 return PTR_ERR(bio);
1038
1039 /* wait for GCed page writeback via META_MAPPING */
1040 f2fs_wait_on_block_writeback(inode, blkaddr);
1041
1042 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1043 bio_put(bio);
1044 return -EFAULT;
1045 }
1046 ClearPageError(page);
1047 inc_page_count(sbi, F2FS_RD_DATA);
1048 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
1049 __submit_bio(sbi, bio, DATA);
1050 return 0;
1051}
1052
1053static void __set_data_blkaddr(struct dnode_of_data *dn)
1054{
1055 struct f2fs_node *rn = F2FS_NODE(dn->node_page);
1056 __le32 *addr_array;
1057 int base = 0;
1058
1059 if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
1060 base = get_extra_isize(dn->inode);
1061
1062 /* Get physical address of data block */
1063 addr_array = blkaddr_in_node(rn);
1064 addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1065}
1066
1067/*
1068 * Lock ordering for the change of data block address:
1069 * ->data_page
1070 * ->node_page
1071 * update block addresses in the node page
1072 */
1073void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
1074{
1075 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1076 __set_data_blkaddr(dn);
1077 if (set_page_dirty(dn->node_page))
1078 dn->node_changed = true;
1079}
1080
1081void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1082{
1083 dn->data_blkaddr = blkaddr;
1084 f2fs_set_data_blkaddr(dn);
1085 f2fs_update_extent_cache(dn);
1086}
1087
1088/* dn->ofs_in_node will be returned with up-to-date last block pointer */
1089int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1090{
1091 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1092 int err;
1093
1094 if (!count)
1095 return 0;
1096
1097 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1098 return -EPERM;
1099 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1100 return err;
1101
1102 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1103 dn->ofs_in_node, count);
1104
1105 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1106
1107 for (; count > 0; dn->ofs_in_node++) {
1108 block_t blkaddr = f2fs_data_blkaddr(dn);
1109
1110 if (blkaddr == NULL_ADDR) {
1111 dn->data_blkaddr = NEW_ADDR;
1112 __set_data_blkaddr(dn);
1113 count--;
1114 }
1115 }
1116
1117 if (set_page_dirty(dn->node_page))
1118 dn->node_changed = true;
1119 return 0;
1120}
1121
1122/* Should keep dn->ofs_in_node unchanged */
1123int f2fs_reserve_new_block(struct dnode_of_data *dn)
1124{
1125 unsigned int ofs_in_node = dn->ofs_in_node;
1126 int ret;
1127
1128 ret = f2fs_reserve_new_blocks(dn, 1);
1129 dn->ofs_in_node = ofs_in_node;
1130 return ret;
1131}
1132
1133int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1134{
1135 bool need_put = dn->inode_page ? false : true;
1136 int err;
1137
1138 err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1139 if (err)
1140 return err;
1141
1142 if (dn->data_blkaddr == NULL_ADDR)
1143 err = f2fs_reserve_new_block(dn);
1144 if (err || need_put)
1145 f2fs_put_dnode(dn);
1146 return err;
1147}
1148
1149int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
1150{
1151 struct extent_info ei = {0, 0, 0};
1152 struct inode *inode = dn->inode;
1153
1154 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1155 dn->data_blkaddr = ei.blk + index - ei.fofs;
1156 return 0;
1157 }
1158
1159 return f2fs_reserve_block(dn, index);
1160}
1161
1162struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1163 int op_flags, bool for_write)
1164{
1165 struct address_space *mapping = inode->i_mapping;
1166 struct dnode_of_data dn;
1167 struct page *page;
1168 struct extent_info ei = {0,0,0};
1169 int err;
1170
1171 page = f2fs_grab_cache_page(mapping, index, for_write);
1172 if (!page)
1173 return ERR_PTR(-ENOMEM);
1174
1175 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1176 dn.data_blkaddr = ei.blk + index - ei.fofs;
1177 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1178 DATA_GENERIC_ENHANCE_READ)) {
1179 err = -EFSCORRUPTED;
1180 goto put_err;
1181 }
1182 goto got_it;
1183 }
1184
1185 set_new_dnode(&dn, inode, NULL, NULL, 0);
1186 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1187 if (err)
1188 goto put_err;
1189 f2fs_put_dnode(&dn);
1190
1191 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1192 err = -ENOENT;
1193 goto put_err;
1194 }
1195 if (dn.data_blkaddr != NEW_ADDR &&
1196 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1197 dn.data_blkaddr,
1198 DATA_GENERIC_ENHANCE)) {
1199 err = -EFSCORRUPTED;
1200 goto put_err;
1201 }
1202got_it:
1203 if (PageUptodate(page)) {
1204 unlock_page(page);
1205 return page;
1206 }
1207
1208 /*
1209 * A new dentry page is allocated but not able to be written, since its
1210 * new inode page couldn't be allocated due to -ENOSPC.
1211 * In such the case, its blkaddr can be remained as NEW_ADDR.
1212 * see, f2fs_add_link -> f2fs_get_new_data_page ->
1213 * f2fs_init_inode_metadata.
1214 */
1215 if (dn.data_blkaddr == NEW_ADDR) {
1216 zero_user_segment(page, 0, PAGE_SIZE);
1217 if (!PageUptodate(page))
1218 SetPageUptodate(page);
1219 unlock_page(page);
1220 return page;
1221 }
1222
1223 err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
1224 op_flags, for_write);
1225 if (err)
1226 goto put_err;
1227 return page;
1228
1229put_err:
1230 f2fs_put_page(page, 1);
1231 return ERR_PTR(err);
1232}
1233
1234struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
1235{
1236 struct address_space *mapping = inode->i_mapping;
1237 struct page *page;
1238
1239 page = find_get_page(mapping, index);
1240 if (page && PageUptodate(page))
1241 return page;
1242 f2fs_put_page(page, 0);
1243
1244 page = f2fs_get_read_data_page(inode, index, 0, false);
1245 if (IS_ERR(page))
1246 return page;
1247
1248 if (PageUptodate(page))
1249 return page;
1250
1251 wait_on_page_locked(page);
1252 if (unlikely(!PageUptodate(page))) {
1253 f2fs_put_page(page, 0);
1254 return ERR_PTR(-EIO);
1255 }
1256 return page;
1257}
1258
1259/*
1260 * If it tries to access a hole, return an error.
1261 * Because, the callers, functions in dir.c and GC, should be able to know
1262 * whether this page exists or not.
1263 */
1264struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1265 bool for_write)
1266{
1267 struct address_space *mapping = inode->i_mapping;
1268 struct page *page;
1269repeat:
1270 page = f2fs_get_read_data_page(inode, index, 0, for_write);
1271 if (IS_ERR(page))
1272 return page;
1273
1274 /* wait for read completion */
1275 lock_page(page);
1276 if (unlikely(page->mapping != mapping)) {
1277 f2fs_put_page(page, 1);
1278 goto repeat;
1279 }
1280 if (unlikely(!PageUptodate(page))) {
1281 f2fs_put_page(page, 1);
1282 return ERR_PTR(-EIO);
1283 }
1284 return page;
1285}
1286
1287/*
1288 * Caller ensures that this data page is never allocated.
1289 * A new zero-filled data page is allocated in the page cache.
1290 *
1291 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1292 * f2fs_unlock_op().
1293 * Note that, ipage is set only by make_empty_dir, and if any error occur,
1294 * ipage should be released by this function.
1295 */
1296struct page *f2fs_get_new_data_page(struct inode *inode,
1297 struct page *ipage, pgoff_t index, bool new_i_size)
1298{
1299 struct address_space *mapping = inode->i_mapping;
1300 struct page *page;
1301 struct dnode_of_data dn;
1302 int err;
1303
1304 page = f2fs_grab_cache_page(mapping, index, true);
1305 if (!page) {
1306 /*
1307 * before exiting, we should make sure ipage will be released
1308 * if any error occur.
1309 */
1310 f2fs_put_page(ipage, 1);
1311 return ERR_PTR(-ENOMEM);
1312 }
1313
1314 set_new_dnode(&dn, inode, ipage, NULL, 0);
1315 err = f2fs_reserve_block(&dn, index);
1316 if (err) {
1317 f2fs_put_page(page, 1);
1318 return ERR_PTR(err);
1319 }
1320 if (!ipage)
1321 f2fs_put_dnode(&dn);
1322
1323 if (PageUptodate(page))
1324 goto got_it;
1325
1326 if (dn.data_blkaddr == NEW_ADDR) {
1327 zero_user_segment(page, 0, PAGE_SIZE);
1328 if (!PageUptodate(page))
1329 SetPageUptodate(page);
1330 } else {
1331 f2fs_put_page(page, 1);
1332
1333 /* if ipage exists, blkaddr should be NEW_ADDR */
1334 f2fs_bug_on(F2FS_I_SB(inode), ipage);
1335 page = f2fs_get_lock_data_page(inode, index, true);
1336 if (IS_ERR(page))
1337 return page;
1338 }
1339got_it:
1340 if (new_i_size && i_size_read(inode) <
1341 ((loff_t)(index + 1) << PAGE_SHIFT))
1342 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1343 return page;
1344}
1345
1346static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1347{
1348 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1349 struct f2fs_summary sum;
1350 struct node_info ni;
1351 block_t old_blkaddr;
1352 blkcnt_t count = 1;
1353 int err;
1354
1355 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1356 return -EPERM;
1357
1358 err = f2fs_get_node_info(sbi, dn->nid, &ni);
1359 if (err)
1360 return err;
1361
1362 dn->data_blkaddr = f2fs_data_blkaddr(dn);
1363 if (dn->data_blkaddr != NULL_ADDR)
1364 goto alloc;
1365
1366 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1367 return err;
1368
1369alloc:
1370 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1371 old_blkaddr = dn->data_blkaddr;
1372 f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1373 &sum, seg_type, NULL);
1374 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
1375 invalidate_mapping_pages(META_MAPPING(sbi),
1376 old_blkaddr, old_blkaddr);
1377 f2fs_invalidate_compress_page(sbi, old_blkaddr);
1378 }
1379 f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1380
1381 /*
1382 * i_size will be updated by direct_IO. Otherwise, we'll get stale
1383 * data from unwritten block via dio_read.
1384 */
1385 return 0;
1386}
1387
1388int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
1389{
1390 struct inode *inode = file_inode(iocb->ki_filp);
1391 struct f2fs_map_blocks map;
1392 int flag;
1393 int err = 0;
1394 bool direct_io = iocb->ki_flags & IOCB_DIRECT;
1395
1396 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
1397 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
1398 if (map.m_len > map.m_lblk)
1399 map.m_len -= map.m_lblk;
1400 else
1401 map.m_len = 0;
1402
1403 map.m_next_pgofs = NULL;
1404 map.m_next_extent = NULL;
1405 map.m_seg_type = NO_CHECK_TYPE;
1406 map.m_may_create = true;
1407
1408 if (direct_io) {
1409 map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
1410 flag = f2fs_force_buffered_io(inode, iocb, from) ?
1411 F2FS_GET_BLOCK_PRE_AIO :
1412 F2FS_GET_BLOCK_PRE_DIO;
1413 goto map_blocks;
1414 }
1415 if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
1416 err = f2fs_convert_inline_inode(inode);
1417 if (err)
1418 return err;
1419 }
1420 if (f2fs_has_inline_data(inode))
1421 return err;
1422
1423 flag = F2FS_GET_BLOCK_PRE_AIO;
1424
1425map_blocks:
1426 err = f2fs_map_blocks(inode, &map, 1, flag);
1427 if (map.m_len > 0 && err == -ENOSPC) {
1428 if (!direct_io)
1429 set_inode_flag(inode, FI_NO_PREALLOC);
1430 err = 0;
1431 }
1432 return err;
1433}
1434
1435void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
1436{
1437 if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1438 if (lock)
1439 down_read(&sbi->node_change);
1440 else
1441 up_read(&sbi->node_change);
1442 } else {
1443 if (lock)
1444 f2fs_lock_op(sbi);
1445 else
1446 f2fs_unlock_op(sbi);
1447 }
1448}
1449
1450/*
1451 * f2fs_map_blocks() tries to find or build mapping relationship which
1452 * maps continuous logical blocks to physical blocks, and return such
1453 * info via f2fs_map_blocks structure.
1454 */
1455int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1456 int create, int flag)
1457{
1458 unsigned int maxblocks = map->m_len;
1459 struct dnode_of_data dn;
1460 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1461 int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1462 pgoff_t pgofs, end_offset, end;
1463 int err = 0, ofs = 1;
1464 unsigned int ofs_in_node, last_ofs_in_node;
1465 blkcnt_t prealloc;
1466 struct extent_info ei = {0,0,0};
1467 block_t blkaddr;
1468 unsigned int start_pgofs;
1469
1470 if (!maxblocks)
1471 return 0;
1472
1473 map->m_len = 0;
1474 map->m_flags = 0;
1475
1476 /* it only supports block size == page size */
1477 pgofs = (pgoff_t)map->m_lblk;
1478 end = pgofs + maxblocks;
1479
1480 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1481 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1482 map->m_may_create)
1483 goto next_dnode;
1484
1485 map->m_pblk = ei.blk + pgofs - ei.fofs;
1486 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
1487 map->m_flags = F2FS_MAP_MAPPED;
1488 if (map->m_next_extent)
1489 *map->m_next_extent = pgofs + map->m_len;
1490
1491 /* for hardware encryption, but to avoid potential issue in future */
1492 if (flag == F2FS_GET_BLOCK_DIO)
1493 f2fs_wait_on_block_writeback_range(inode,
1494 map->m_pblk, map->m_len);
1495 goto out;
1496 }
1497
1498next_dnode:
1499 if (map->m_may_create)
1500 f2fs_do_map_lock(sbi, flag, true);
1501
1502 /* When reading holes, we need its node page */
1503 set_new_dnode(&dn, inode, NULL, NULL, 0);
1504 err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1505 if (err) {
1506 if (flag == F2FS_GET_BLOCK_BMAP)
1507 map->m_pblk = 0;
1508
1509 if (err == -ENOENT) {
1510 /*
1511 * There is one exceptional case that read_node_page()
1512 * may return -ENOENT due to filesystem has been
1513 * shutdown or cp_error, so force to convert error
1514 * number to EIO for such case.
1515 */
1516 if (map->m_may_create &&
1517 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1518 f2fs_cp_error(sbi))) {
1519 err = -EIO;
1520 goto unlock_out;
1521 }
1522
1523 err = 0;
1524 if (map->m_next_pgofs)
1525 *map->m_next_pgofs =
1526 f2fs_get_next_page_offset(&dn, pgofs);
1527 if (map->m_next_extent)
1528 *map->m_next_extent =
1529 f2fs_get_next_page_offset(&dn, pgofs);
1530 }
1531 goto unlock_out;
1532 }
1533
1534 start_pgofs = pgofs;
1535 prealloc = 0;
1536 last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1537 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1538
1539next_block:
1540 blkaddr = f2fs_data_blkaddr(&dn);
1541
1542 if (__is_valid_data_blkaddr(blkaddr) &&
1543 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1544 err = -EFSCORRUPTED;
1545 goto sync_out;
1546 }
1547
1548 if (__is_valid_data_blkaddr(blkaddr)) {
1549 /* use out-place-update for driect IO under LFS mode */
1550 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1551 map->m_may_create) {
1552 err = __allocate_data_block(&dn, map->m_seg_type);
1553 if (err)
1554 goto sync_out;
1555 blkaddr = dn.data_blkaddr;
1556 set_inode_flag(inode, FI_APPEND_WRITE);
1557 }
1558 } else {
1559 if (create) {
1560 if (unlikely(f2fs_cp_error(sbi))) {
1561 err = -EIO;
1562 goto sync_out;
1563 }
1564 if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1565 if (blkaddr == NULL_ADDR) {
1566 prealloc++;
1567 last_ofs_in_node = dn.ofs_in_node;
1568 }
1569 } else {
1570 WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
1571 flag != F2FS_GET_BLOCK_DIO);
1572 err = __allocate_data_block(&dn,
1573 map->m_seg_type);
1574 if (!err)
1575 set_inode_flag(inode, FI_APPEND_WRITE);
1576 }
1577 if (err)
1578 goto sync_out;
1579 map->m_flags |= F2FS_MAP_NEW;
1580 blkaddr = dn.data_blkaddr;
1581 } else {
1582 if (flag == F2FS_GET_BLOCK_BMAP) {
1583 map->m_pblk = 0;
1584 goto sync_out;
1585 }
1586 if (flag == F2FS_GET_BLOCK_PRECACHE)
1587 goto sync_out;
1588 if (flag == F2FS_GET_BLOCK_FIEMAP &&
1589 blkaddr == NULL_ADDR) {
1590 if (map->m_next_pgofs)
1591 *map->m_next_pgofs = pgofs + 1;
1592 goto sync_out;
1593 }
1594 if (flag != F2FS_GET_BLOCK_FIEMAP) {
1595 /* for defragment case */
1596 if (map->m_next_pgofs)
1597 *map->m_next_pgofs = pgofs + 1;
1598 goto sync_out;
1599 }
1600 }
1601 }
1602
1603 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1604 goto skip;
1605
1606 if (map->m_len == 0) {
1607 /* preallocated unwritten block should be mapped for fiemap. */
1608 if (blkaddr == NEW_ADDR)
1609 map->m_flags |= F2FS_MAP_UNWRITTEN;
1610 map->m_flags |= F2FS_MAP_MAPPED;
1611
1612 map->m_pblk = blkaddr;
1613 map->m_len = 1;
1614 } else if ((map->m_pblk != NEW_ADDR &&
1615 blkaddr == (map->m_pblk + ofs)) ||
1616 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1617 flag == F2FS_GET_BLOCK_PRE_DIO) {
1618 ofs++;
1619 map->m_len++;
1620 } else {
1621 goto sync_out;
1622 }
1623
1624skip:
1625 dn.ofs_in_node++;
1626 pgofs++;
1627
1628 /* preallocate blocks in batch for one dnode page */
1629 if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1630 (pgofs == end || dn.ofs_in_node == end_offset)) {
1631
1632 dn.ofs_in_node = ofs_in_node;
1633 err = f2fs_reserve_new_blocks(&dn, prealloc);
1634 if (err)
1635 goto sync_out;
1636
1637 map->m_len += dn.ofs_in_node - ofs_in_node;
1638 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1639 err = -ENOSPC;
1640 goto sync_out;
1641 }
1642 dn.ofs_in_node = end_offset;
1643 }
1644
1645 if (pgofs >= end)
1646 goto sync_out;
1647 else if (dn.ofs_in_node < end_offset)
1648 goto next_block;
1649
1650 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1651 if (map->m_flags & F2FS_MAP_MAPPED) {
1652 unsigned int ofs = start_pgofs - map->m_lblk;
1653
1654 f2fs_update_extent_cache_range(&dn,
1655 start_pgofs, map->m_pblk + ofs,
1656 map->m_len - ofs);
1657 }
1658 }
1659
1660 f2fs_put_dnode(&dn);
1661
1662 if (map->m_may_create) {
1663 f2fs_do_map_lock(sbi, flag, false);
1664 f2fs_balance_fs(sbi, dn.node_changed);
1665 }
1666 goto next_dnode;
1667
1668sync_out:
1669
1670 /* for hardware encryption, but to avoid potential issue in future */
1671 if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
1672 f2fs_wait_on_block_writeback_range(inode,
1673 map->m_pblk, map->m_len);
1674
1675 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1676 if (map->m_flags & F2FS_MAP_MAPPED) {
1677 unsigned int ofs = start_pgofs - map->m_lblk;
1678
1679 f2fs_update_extent_cache_range(&dn,
1680 start_pgofs, map->m_pblk + ofs,
1681 map->m_len - ofs);
1682 }
1683 if (map->m_next_extent)
1684 *map->m_next_extent = pgofs + 1;
1685 }
1686 f2fs_put_dnode(&dn);
1687unlock_out:
1688 if (map->m_may_create) {
1689 f2fs_do_map_lock(sbi, flag, false);
1690 f2fs_balance_fs(sbi, dn.node_changed);
1691 }
1692out:
1693 trace_f2fs_map_blocks(inode, map, err);
1694 return err;
1695}
1696
1697bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1698{
1699 struct f2fs_map_blocks map;
1700 block_t last_lblk;
1701 int err;
1702
1703 if (pos + len > i_size_read(inode))
1704 return false;
1705
1706 map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1707 map.m_next_pgofs = NULL;
1708 map.m_next_extent = NULL;
1709 map.m_seg_type = NO_CHECK_TYPE;
1710 map.m_may_create = false;
1711 last_lblk = F2FS_BLK_ALIGN(pos + len);
1712
1713 while (map.m_lblk < last_lblk) {
1714 map.m_len = last_lblk - map.m_lblk;
1715 err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
1716 if (err || map.m_len == 0)
1717 return false;
1718 map.m_lblk += map.m_len;
1719 }
1720 return true;
1721}
1722
1723static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1724{
1725 return (bytes >> inode->i_blkbits);
1726}
1727
1728static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1729{
1730 return (blks << inode->i_blkbits);
1731}
1732
1733static int __get_data_block(struct inode *inode, sector_t iblock,
1734 struct buffer_head *bh, int create, int flag,
1735 pgoff_t *next_pgofs, int seg_type, bool may_write)
1736{
1737 struct f2fs_map_blocks map;
1738 int err;
1739
1740 map.m_lblk = iblock;
1741 map.m_len = bytes_to_blks(inode, bh->b_size);
1742 map.m_next_pgofs = next_pgofs;
1743 map.m_next_extent = NULL;
1744 map.m_seg_type = seg_type;
1745 map.m_may_create = may_write;
1746
1747 err = f2fs_map_blocks(inode, &map, create, flag);
1748 if (!err) {
1749 map_bh(bh, inode->i_sb, map.m_pblk);
1750 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1751 bh->b_size = blks_to_bytes(inode, map.m_len);
1752 }
1753 return err;
1754}
1755
1756static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
1757 struct buffer_head *bh_result, int create)
1758{
1759 return __get_data_block(inode, iblock, bh_result, create,
1760 F2FS_GET_BLOCK_DIO, NULL,
1761 f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1762 true);
1763}
1764
1765static int get_data_block_dio(struct inode *inode, sector_t iblock,
1766 struct buffer_head *bh_result, int create)
1767{
1768 return __get_data_block(inode, iblock, bh_result, create,
1769 F2FS_GET_BLOCK_DIO, NULL,
1770 f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1771 false);
1772}
1773
1774static int f2fs_xattr_fiemap(struct inode *inode,
1775 struct fiemap_extent_info *fieinfo)
1776{
1777 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1778 struct page *page;
1779 struct node_info ni;
1780 __u64 phys = 0, len;
1781 __u32 flags;
1782 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1783 int err = 0;
1784
1785 if (f2fs_has_inline_xattr(inode)) {
1786 int offset;
1787
1788 page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1789 inode->i_ino, false);
1790 if (!page)
1791 return -ENOMEM;
1792
1793 err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
1794 if (err) {
1795 f2fs_put_page(page, 1);
1796 return err;
1797 }
1798
1799 phys = blks_to_bytes(inode, ni.blk_addr);
1800 offset = offsetof(struct f2fs_inode, i_addr) +
1801 sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1802 get_inline_xattr_addrs(inode));
1803
1804 phys += offset;
1805 len = inline_xattr_size(inode);
1806
1807 f2fs_put_page(page, 1);
1808
1809 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1810
1811 if (!xnid)
1812 flags |= FIEMAP_EXTENT_LAST;
1813
1814 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1815 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1816 if (err || err == 1)
1817 return err;
1818 }
1819
1820 if (xnid) {
1821 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1822 if (!page)
1823 return -ENOMEM;
1824
1825 err = f2fs_get_node_info(sbi, xnid, &ni);
1826 if (err) {
1827 f2fs_put_page(page, 1);
1828 return err;
1829 }
1830
1831 phys = blks_to_bytes(inode, ni.blk_addr);
1832 len = inode->i_sb->s_blocksize;
1833
1834 f2fs_put_page(page, 1);
1835
1836 flags = FIEMAP_EXTENT_LAST;
1837 }
1838
1839 if (phys) {
1840 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1841 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1842 }
1843
1844 return (err < 0 ? err : 0);
1845}
1846
1847static loff_t max_inode_blocks(struct inode *inode)
1848{
1849 loff_t result = ADDRS_PER_INODE(inode);
1850 loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1851
1852 /* two direct node blocks */
1853 result += (leaf_count * 2);
1854
1855 /* two indirect node blocks */
1856 leaf_count *= NIDS_PER_BLOCK;
1857 result += (leaf_count * 2);
1858
1859 /* one double indirect node block */
1860 leaf_count *= NIDS_PER_BLOCK;
1861 result += leaf_count;
1862
1863 return result;
1864}
1865
1866int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1867 u64 start, u64 len)
1868{
1869 struct f2fs_map_blocks map;
1870 sector_t start_blk, last_blk;
1871 pgoff_t next_pgofs;
1872 u64 logical = 0, phys = 0, size = 0;
1873 u32 flags = 0;
1874 int ret = 0;
1875 bool compr_cluster = false;
1876 unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1877 loff_t maxbytes;
1878
1879 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1880 ret = f2fs_precache_extents(inode);
1881 if (ret)
1882 return ret;
1883 }
1884
1885 ret = fiemap_prep(inode, fieinfo, start, &len,
1886 FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1887 if (ret)
1888 return ret;
1889
1890 inode_lock(inode);
1891
1892 maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
1893 if (start > maxbytes) {
1894 ret = -EFBIG;
1895 goto out;
1896 }
1897
1898 if (len > maxbytes || (maxbytes - len) < start)
1899 len = maxbytes - start;
1900
1901 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1902 ret = f2fs_xattr_fiemap(inode, fieinfo);
1903 goto out;
1904 }
1905
1906 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
1907 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1908 if (ret != -EAGAIN)
1909 goto out;
1910 }
1911
1912 if (bytes_to_blks(inode, len) == 0)
1913 len = blks_to_bytes(inode, 1);
1914
1915 start_blk = bytes_to_blks(inode, start);
1916 last_blk = bytes_to_blks(inode, start + len - 1);
1917
1918next:
1919 memset(&map, 0, sizeof(map));
1920 map.m_lblk = start_blk;
1921 map.m_len = bytes_to_blks(inode, len);
1922 map.m_next_pgofs = &next_pgofs;
1923 map.m_seg_type = NO_CHECK_TYPE;
1924
1925 if (compr_cluster)
1926 map.m_len = cluster_size - 1;
1927
1928 ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
1929 if (ret)
1930 goto out;
1931
1932 /* HOLE */
1933 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
1934 start_blk = next_pgofs;
1935
1936 if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
1937 max_inode_blocks(inode)))
1938 goto prep_next;
1939
1940 flags |= FIEMAP_EXTENT_LAST;
1941 }
1942
1943 if (size) {
1944 flags |= FIEMAP_EXTENT_MERGED;
1945 if (IS_ENCRYPTED(inode))
1946 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1947
1948 ret = fiemap_fill_next_extent(fieinfo, logical,
1949 phys, size, flags);
1950 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
1951 if (ret)
1952 goto out;
1953 size = 0;
1954 }
1955
1956 if (start_blk > last_blk)
1957 goto out;
1958
1959 if (compr_cluster) {
1960 compr_cluster = false;
1961
1962
1963 logical = blks_to_bytes(inode, start_blk - 1);
1964 phys = blks_to_bytes(inode, map.m_pblk);
1965 size = blks_to_bytes(inode, cluster_size);
1966
1967 flags |= FIEMAP_EXTENT_ENCODED;
1968
1969 start_blk += cluster_size - 1;
1970
1971 if (start_blk > last_blk)
1972 goto out;
1973
1974 goto prep_next;
1975 }
1976
1977 if (map.m_pblk == COMPRESS_ADDR) {
1978 compr_cluster = true;
1979 start_blk++;
1980 goto prep_next;
1981 }
1982
1983 logical = blks_to_bytes(inode, start_blk);
1984 phys = blks_to_bytes(inode, map.m_pblk);
1985 size = blks_to_bytes(inode, map.m_len);
1986 flags = 0;
1987 if (map.m_flags & F2FS_MAP_UNWRITTEN)
1988 flags = FIEMAP_EXTENT_UNWRITTEN;
1989
1990 start_blk += bytes_to_blks(inode, size);
1991
1992prep_next:
1993 cond_resched();
1994 if (fatal_signal_pending(current))
1995 ret = -EINTR;
1996 else
1997 goto next;
1998out:
1999 if (ret == 1)
2000 ret = 0;
2001
2002 inode_unlock(inode);
2003 return ret;
2004}
2005
2006static inline loff_t f2fs_readpage_limit(struct inode *inode)
2007{
2008 if (IS_ENABLED(CONFIG_FS_VERITY) &&
2009 (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
2010 return inode->i_sb->s_maxbytes;
2011
2012 return i_size_read(inode);
2013}
2014
2015static int f2fs_read_single_page(struct inode *inode, struct page *page,
2016 unsigned nr_pages,
2017 struct f2fs_map_blocks *map,
2018 struct bio **bio_ret,
2019 sector_t *last_block_in_bio,
2020 bool is_readahead)
2021{
2022 struct bio *bio = *bio_ret;
2023 const unsigned blocksize = blks_to_bytes(inode, 1);
2024 sector_t block_in_file;
2025 sector_t last_block;
2026 sector_t last_block_in_file;
2027 sector_t block_nr;
2028 int ret = 0;
2029
2030 block_in_file = (sector_t)page_index(page);
2031 last_block = block_in_file + nr_pages;
2032 last_block_in_file = bytes_to_blks(inode,
2033 f2fs_readpage_limit(inode) + blocksize - 1);
2034 if (last_block > last_block_in_file)
2035 last_block = last_block_in_file;
2036
2037 /* just zeroing out page which is beyond EOF */
2038 if (block_in_file >= last_block)
2039 goto zero_out;
2040 /*
2041 * Map blocks using the previous result first.
2042 */
2043 if ((map->m_flags & F2FS_MAP_MAPPED) &&
2044 block_in_file > map->m_lblk &&
2045 block_in_file < (map->m_lblk + map->m_len))
2046 goto got_it;
2047
2048 /*
2049 * Then do more f2fs_map_blocks() calls until we are
2050 * done with this page.
2051 */
2052 map->m_lblk = block_in_file;
2053 map->m_len = last_block - block_in_file;
2054
2055 ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
2056 if (ret)
2057 goto out;
2058got_it:
2059 if ((map->m_flags & F2FS_MAP_MAPPED)) {
2060 block_nr = map->m_pblk + block_in_file - map->m_lblk;
2061 SetPageMappedToDisk(page);
2062
2063 if (!PageUptodate(page) && (!PageSwapCache(page) &&
2064 !cleancache_get_page(page))) {
2065 SetPageUptodate(page);
2066 goto confused;
2067 }
2068
2069 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2070 DATA_GENERIC_ENHANCE_READ)) {
2071 ret = -EFSCORRUPTED;
2072 goto out;
2073 }
2074 } else {
2075zero_out:
2076 zero_user_segment(page, 0, PAGE_SIZE);
2077 if (f2fs_need_verity(inode, page->index) &&
2078 !fsverity_verify_page(page)) {
2079 ret = -EIO;
2080 goto out;
2081 }
2082 if (!PageUptodate(page))
2083 SetPageUptodate(page);
2084 unlock_page(page);
2085 goto out;
2086 }
2087
2088 /*
2089 * This page will go to BIO. Do we need to send this
2090 * BIO off first?
2091 */
2092 if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2093 *last_block_in_bio, block_nr) ||
2094 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2095submit_and_realloc:
2096 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2097 bio = NULL;
2098 }
2099 if (bio == NULL) {
2100 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2101 is_readahead ? REQ_RAHEAD : 0, page->index,
2102 false);
2103 if (IS_ERR(bio)) {
2104 ret = PTR_ERR(bio);
2105 bio = NULL;
2106 goto out;
2107 }
2108 }
2109
2110 /*
2111 * If the page is under writeback, we need to wait for
2112 * its completion to see the correct decrypted data.
2113 */
2114 f2fs_wait_on_block_writeback(inode, block_nr);
2115
2116 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2117 goto submit_and_realloc;
2118
2119 inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2120 f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
2121 ClearPageError(page);
2122 *last_block_in_bio = block_nr;
2123 goto out;
2124confused:
2125 if (bio) {
2126 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2127 bio = NULL;
2128 }
2129 unlock_page(page);
2130out:
2131 *bio_ret = bio;
2132 return ret;
2133}
2134
2135#ifdef CONFIG_F2FS_FS_COMPRESSION
2136int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2137 unsigned nr_pages, sector_t *last_block_in_bio,
2138 bool is_readahead, bool for_write)
2139{
2140 struct dnode_of_data dn;
2141 struct inode *inode = cc->inode;
2142 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2143 struct bio *bio = *bio_ret;
2144 unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2145 sector_t last_block_in_file;
2146 const unsigned blocksize = blks_to_bytes(inode, 1);
2147 struct decompress_io_ctx *dic = NULL;
2148 int i;
2149 int ret = 0;
2150
2151 f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2152
2153 last_block_in_file = bytes_to_blks(inode,
2154 f2fs_readpage_limit(inode) + blocksize - 1);
2155
2156 /* get rid of pages beyond EOF */
2157 for (i = 0; i < cc->cluster_size; i++) {
2158 struct page *page = cc->rpages[i];
2159
2160 if (!page)
2161 continue;
2162 if ((sector_t)page->index >= last_block_in_file) {
2163 zero_user_segment(page, 0, PAGE_SIZE);
2164 if (!PageUptodate(page))
2165 SetPageUptodate(page);
2166 } else if (!PageUptodate(page)) {
2167 continue;
2168 }
2169 unlock_page(page);
2170 cc->rpages[i] = NULL;
2171 cc->nr_rpages--;
2172 }
2173
2174 /* we are done since all pages are beyond EOF */
2175 if (f2fs_cluster_is_empty(cc))
2176 goto out;
2177
2178 set_new_dnode(&dn, inode, NULL, NULL, 0);
2179 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2180 if (ret)
2181 goto out;
2182
2183 f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2184
2185 for (i = 1; i < cc->cluster_size; i++) {
2186 block_t blkaddr;
2187
2188 blkaddr = data_blkaddr(dn.inode, dn.node_page,
2189 dn.ofs_in_node + i);
2190
2191 if (!__is_valid_data_blkaddr(blkaddr))
2192 break;
2193
2194 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2195 ret = -EFAULT;
2196 goto out_put_dnode;
2197 }
2198 cc->nr_cpages++;
2199 }
2200
2201 /* nothing to decompress */
2202 if (cc->nr_cpages == 0) {
2203 ret = 0;
2204 goto out_put_dnode;
2205 }
2206
2207 dic = f2fs_alloc_dic(cc);
2208 if (IS_ERR(dic)) {
2209 ret = PTR_ERR(dic);
2210 goto out_put_dnode;
2211 }
2212
2213 for (i = 0; i < cc->nr_cpages; i++) {
2214 struct page *page = dic->cpages[i];
2215 block_t blkaddr;
2216 struct bio_post_read_ctx *ctx;
2217
2218 blkaddr = data_blkaddr(dn.inode, dn.node_page,
2219 dn.ofs_in_node + i + 1);
2220
2221 f2fs_wait_on_block_writeback(inode, blkaddr);
2222
2223 if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2224 if (atomic_dec_and_test(&dic->remaining_pages))
2225 f2fs_decompress_cluster(dic);
2226 continue;
2227 }
2228
2229 if (bio && (!page_is_mergeable(sbi, bio,
2230 *last_block_in_bio, blkaddr) ||
2231 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2232submit_and_realloc:
2233 __submit_bio(sbi, bio, DATA);
2234 bio = NULL;
2235 }
2236
2237 if (!bio) {
2238 bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2239 is_readahead ? REQ_RAHEAD : 0,
2240 page->index, for_write);
2241 if (IS_ERR(bio)) {
2242 ret = PTR_ERR(bio);
2243 f2fs_decompress_end_io(dic, ret);
2244 f2fs_put_dnode(&dn);
2245 *bio_ret = NULL;
2246 return ret;
2247 }
2248 }
2249
2250 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2251 goto submit_and_realloc;
2252
2253 ctx = bio->bi_private;
2254 ctx->enabled_steps |= STEP_DECOMPRESS;
2255 refcount_inc(&dic->refcnt);
2256
2257 inc_page_count(sbi, F2FS_RD_DATA);
2258 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
2259 f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
2260 ClearPageError(page);
2261 *last_block_in_bio = blkaddr;
2262 }
2263
2264 f2fs_put_dnode(&dn);
2265
2266 *bio_ret = bio;
2267 return 0;
2268
2269out_put_dnode:
2270 f2fs_put_dnode(&dn);
2271out:
2272 for (i = 0; i < cc->cluster_size; i++) {
2273 if (cc->rpages[i]) {
2274 ClearPageUptodate(cc->rpages[i]);
2275 ClearPageError(cc->rpages[i]);
2276 unlock_page(cc->rpages[i]);
2277 }
2278 }
2279 *bio_ret = bio;
2280 return ret;
2281}
2282#endif
2283
2284/*
2285 * This function was originally taken from fs/mpage.c, and customized for f2fs.
2286 * Major change was from block_size == page_size in f2fs by default.
2287 *
2288 * Note that the aops->readpages() function is ONLY used for read-ahead. If
2289 * this function ever deviates from doing just read-ahead, it should either
2290 * use ->readpage() or do the necessary surgery to decouple ->readpages()
2291 * from read-ahead.
2292 */
2293int f2fs_mpage_readpages(struct address_space *mapping,
2294 struct list_head *pages, struct page *page,
2295 unsigned nr_pages, bool is_readahead)
2296{
2297 struct bio *bio = NULL;
2298 sector_t last_block_in_bio = 0;
2299 struct inode *inode = mapping->host;
2300 struct f2fs_map_blocks map;
2301#ifdef CONFIG_F2FS_FS_COMPRESSION
2302 struct compress_ctx cc = {
2303 .inode = inode,
2304 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2305 .cluster_size = F2FS_I(inode)->i_cluster_size,
2306 .cluster_idx = NULL_CLUSTER,
2307 .rpages = NULL,
2308 .cpages = NULL,
2309 .nr_rpages = 0,
2310 .nr_cpages = 0,
2311 };
2312#endif
2313 unsigned max_nr_pages = nr_pages;
2314 int ret = 0;
2315 bool drop_ra = false;
2316
2317 /* this is real from f2fs_merkle_tree_readahead() in old kernel only. */
2318 if (!nr_pages)
2319 return 0;
2320
2321 map.m_pblk = 0;
2322 map.m_lblk = 0;
2323 map.m_len = 0;
2324 map.m_flags = 0;
2325 map.m_next_pgofs = NULL;
2326 map.m_next_extent = NULL;
2327 map.m_seg_type = NO_CHECK_TYPE;
2328 map.m_may_create = false;
2329
2330 /*
2331 * Two readahead threads for same address range can cause race condition
2332 * which fragments sequential read IOs. So let's avoid each other.
2333 */
2334 if (pages && is_readahead) {
2335 page = list_last_entry(pages, struct page, lru);
2336 if (READ_ONCE(F2FS_I(inode)->ra_offset) == page_index(page))
2337 drop_ra = true;
2338 else
2339 WRITE_ONCE(F2FS_I(inode)->ra_offset, page_index(page));
2340 }
2341
2342 for (; nr_pages; nr_pages--) {
2343 if (pages) {
2344 page = list_last_entry(pages, struct page, lru);
2345
2346 prefetchw(&page->flags);
2347 list_del(&page->lru);
2348 if (drop_ra || add_to_page_cache_lru(page, mapping,
2349 page_index(page),
2350 readahead_gfp_mask(mapping)))
2351 goto next_page;
2352 }
2353
2354#ifdef CONFIG_F2FS_FS_COMPRESSION
2355 if (f2fs_compressed_file(inode)) {
2356 /* there are remained comressed pages, submit them */
2357 if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
2358 ret = f2fs_read_multi_pages(&cc, &bio,
2359 max_nr_pages,
2360 &last_block_in_bio,
2361 is_readahead, false);
2362 f2fs_destroy_compress_ctx(&cc, false);
2363 if (ret)
2364 goto set_error_page;
2365 }
2366 ret = f2fs_is_compressed_cluster(inode, page->index);
2367 if (ret < 0)
2368 goto set_error_page;
2369 else if (!ret)
2370 goto read_single_page;
2371
2372 ret = f2fs_init_compress_ctx(&cc);
2373 if (ret)
2374 goto set_error_page;
2375
2376 f2fs_compress_ctx_add_page(&cc, page);
2377
2378 goto next_page;
2379 }
2380read_single_page:
2381#endif
2382
2383 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2384 &bio, &last_block_in_bio, is_readahead);
2385 if (ret) {
2386#ifdef CONFIG_F2FS_FS_COMPRESSION
2387set_error_page:
2388#endif
2389 SetPageError(page);
2390 zero_user_segment(page, 0, PAGE_SIZE);
2391 unlock_page(page);
2392 }
2393next_page:
2394 if (pages)
2395 put_page(page);
2396
2397#ifdef CONFIG_F2FS_FS_COMPRESSION
2398 if (f2fs_compressed_file(inode)) {
2399 /* last page */
2400 if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2401 ret = f2fs_read_multi_pages(&cc, &bio,
2402 max_nr_pages,
2403 &last_block_in_bio,
2404 is_readahead, false);
2405 f2fs_destroy_compress_ctx(&cc, false);
2406 }
2407 }
2408#endif
2409 }
2410 BUG_ON(pages && !list_empty(pages));
2411 if (bio)
2412 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2413
2414 if (pages && is_readahead && !drop_ra)
2415 WRITE_ONCE(F2FS_I(inode)->ra_offset, -1);
2416 return pages ? 0 : ret;
2417}
2418
2419static int f2fs_read_data_page(struct file *file, struct page *page)
2420{
2421 struct inode *inode = page_file_mapping(page)->host;
2422 int ret = -EAGAIN;
2423
2424 trace_f2fs_readpage(page, DATA);
2425
2426 if (!f2fs_is_compress_backend_ready(inode)) {
2427 unlock_page(page);
2428 return -EOPNOTSUPP;
2429 }
2430
2431 /* If the file has inline data, try to read it directly */
2432 if (f2fs_has_inline_data(inode))
2433 ret = f2fs_read_inline_data(inode, page);
2434 if (ret == -EAGAIN)
2435 ret = f2fs_mpage_readpages(page_file_mapping(page),
2436 NULL, page, 1, false);
2437 return ret;
2438}
2439
2440static int f2fs_read_data_pages(struct file *file,
2441 struct address_space *mapping,
2442 struct list_head *pages, unsigned nr_pages)
2443{
2444 struct inode *inode = mapping->host;
2445 struct page *page = list_last_entry(pages, struct page, lru);
2446
2447 trace_f2fs_readpages(inode, page, nr_pages);
2448
2449 if (!f2fs_is_compress_backend_ready(inode))
2450 return 0;
2451
2452 /* If the file has inline data, skip readpages */
2453 if (f2fs_has_inline_data(inode))
2454 return 0;
2455
2456 return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
2457}
2458
2459int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2460{
2461 struct inode *inode = fio->page->mapping->host;
2462 struct page *mpage, *page;
2463 gfp_t gfp_flags = GFP_NOFS;
2464
2465 if (!f2fs_encrypted_file(inode))
2466 return 0;
2467
2468 page = fio->compressed_page ? fio->compressed_page : fio->page;
2469
2470 /* wait for GCed page writeback via META_MAPPING */
2471 f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2472
2473 if (fscrypt_inode_uses_inline_crypto(inode))
2474 return 0;
2475
2476retry_encrypt:
2477 fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2478 PAGE_SIZE, 0, gfp_flags);
2479 if (IS_ERR(fio->encrypted_page)) {
2480 /* flush pending IOs and wait for a while in the ENOMEM case */
2481 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2482 f2fs_flush_merged_writes(fio->sbi);
2483 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
2484 gfp_flags |= __GFP_NOFAIL;
2485 goto retry_encrypt;
2486 }
2487 return PTR_ERR(fio->encrypted_page);
2488 }
2489
2490 mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2491 if (mpage) {
2492 if (PageUptodate(mpage))
2493 memcpy(page_address(mpage),
2494 page_address(fio->encrypted_page), PAGE_SIZE);
2495 f2fs_put_page(mpage, 1);
2496 }
2497 return 0;
2498}
2499
2500static inline bool check_inplace_update_policy(struct inode *inode,
2501 struct f2fs_io_info *fio)
2502{
2503 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2504 unsigned int policy = SM_I(sbi)->ipu_policy;
2505
2506 if (policy & (0x1 << F2FS_IPU_FORCE))
2507 return true;
2508 if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
2509 return true;
2510 if (policy & (0x1 << F2FS_IPU_UTIL) &&
2511 utilization(sbi) > SM_I(sbi)->min_ipu_util)
2512 return true;
2513 if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
2514 utilization(sbi) > SM_I(sbi)->min_ipu_util)
2515 return true;
2516
2517 /*
2518 * IPU for rewrite async pages
2519 */
2520 if (policy & (0x1 << F2FS_IPU_ASYNC) &&
2521 fio && fio->op == REQ_OP_WRITE &&
2522 !(fio->op_flags & REQ_SYNC) &&
2523 !IS_ENCRYPTED(inode))
2524 return true;
2525
2526 /* this is only set during fdatasync */
2527 if (policy & (0x1 << F2FS_IPU_FSYNC) &&
2528 is_inode_flag_set(inode, FI_NEED_IPU))
2529 return true;
2530
2531 if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2532 !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2533 return true;
2534
2535 return false;
2536}
2537
2538bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2539{
2540 /* swap file is migrating in aligned write mode */
2541 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2542 return false;
2543
2544 if (f2fs_is_pinned_file(inode))
2545 return true;
2546
2547 /* if this is cold file, we should overwrite to avoid fragmentation */
2548 if (file_is_cold(inode))
2549 return true;
2550
2551 return check_inplace_update_policy(inode, fio);
2552}
2553
2554bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2555{
2556 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2557
2558 if (f2fs_lfs_mode(sbi))
2559 return true;
2560 if (S_ISDIR(inode->i_mode))
2561 return true;
2562 if (IS_NOQUOTA(inode))
2563 return true;
2564 if (f2fs_is_atomic_file(inode))
2565 return true;
2566 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2567 return true;
2568
2569 /* swap file is migrating in aligned write mode */
2570 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2571 return true;
2572
2573 if (fio) {
2574 if (page_private_gcing(fio->page))
2575 return true;
2576 if (page_private_dummy(fio->page))
2577 return true;
2578 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2579 f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2580 return true;
2581 }
2582 return false;
2583}
2584
2585static inline bool need_inplace_update(struct f2fs_io_info *fio)
2586{
2587 struct inode *inode = fio->page->mapping->host;
2588
2589 if (f2fs_should_update_outplace(inode, fio))
2590 return false;
2591
2592 return f2fs_should_update_inplace(inode, fio);
2593}
2594
2595int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2596{
2597 struct page *page = fio->page;
2598 struct inode *inode = page->mapping->host;
2599 struct dnode_of_data dn;
2600 struct extent_info ei = {0,0,0};
2601 struct node_info ni;
2602 bool ipu_force = false;
2603 int err = 0;
2604
2605 set_new_dnode(&dn, inode, NULL, NULL, 0);
2606 if (need_inplace_update(fio) &&
2607 f2fs_lookup_extent_cache(inode, page->index, &ei)) {
2608 fio->old_blkaddr = ei.blk + page->index - ei.fofs;
2609
2610 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2611 DATA_GENERIC_ENHANCE))
2612 return -EFSCORRUPTED;
2613
2614 ipu_force = true;
2615 fio->need_lock = LOCK_DONE;
2616 goto got_it;
2617 }
2618
2619 /* Deadlock due to between page->lock and f2fs_lock_op */
2620 if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2621 return -EAGAIN;
2622
2623 err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
2624 if (err)
2625 goto out;
2626
2627 fio->old_blkaddr = dn.data_blkaddr;
2628
2629 /* This page is already truncated */
2630 if (fio->old_blkaddr == NULL_ADDR) {
2631 ClearPageUptodate(page);
2632 clear_page_private_gcing(page);
2633 goto out_writepage;
2634 }
2635got_it:
2636 if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2637 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2638 DATA_GENERIC_ENHANCE)) {
2639 err = -EFSCORRUPTED;
2640 goto out_writepage;
2641 }
2642 /*
2643 * If current allocation needs SSR,
2644 * it had better in-place writes for updated data.
2645 */
2646 if (ipu_force ||
2647 (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2648 need_inplace_update(fio))) {
2649 err = f2fs_encrypt_one_page(fio);
2650 if (err)
2651 goto out_writepage;
2652
2653 set_page_writeback(page);
2654 ClearPageError(page);
2655 f2fs_put_dnode(&dn);
2656 if (fio->need_lock == LOCK_REQ)
2657 f2fs_unlock_op(fio->sbi);
2658 err = f2fs_inplace_write_data(fio);
2659 if (err) {
2660 if (fscrypt_inode_uses_fs_layer_crypto(inode))
2661 fscrypt_finalize_bounce_page(&fio->encrypted_page);
2662 if (PageWriteback(page))
2663 end_page_writeback(page);
2664 } else {
2665 set_inode_flag(inode, FI_UPDATE_WRITE);
2666 }
2667 trace_f2fs_do_write_data_page(fio->page, IPU);
2668 return err;
2669 }
2670
2671 if (fio->need_lock == LOCK_RETRY) {
2672 if (!f2fs_trylock_op(fio->sbi)) {
2673 err = -EAGAIN;
2674 goto out_writepage;
2675 }
2676 fio->need_lock = LOCK_REQ;
2677 }
2678
2679 err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
2680 if (err)
2681 goto out_writepage;
2682
2683 fio->version = ni.version;
2684
2685 err = f2fs_encrypt_one_page(fio);
2686 if (err)
2687 goto out_writepage;
2688
2689 set_page_writeback(page);
2690 ClearPageError(page);
2691
2692 if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2693 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2694
2695 /* LFS mode write path */
2696 f2fs_outplace_write_data(&dn, fio);
2697 trace_f2fs_do_write_data_page(page, OPU);
2698 set_inode_flag(inode, FI_APPEND_WRITE);
2699 if (page->index == 0)
2700 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2701out_writepage:
2702 f2fs_put_dnode(&dn);
2703out:
2704 if (fio->need_lock == LOCK_REQ)
2705 f2fs_unlock_op(fio->sbi);
2706 return err;
2707}
2708
2709int f2fs_write_single_data_page(struct page *page, int *submitted,
2710 struct bio **bio,
2711 sector_t *last_block,
2712 struct writeback_control *wbc,
2713 enum iostat_type io_type,
2714 int compr_blocks,
2715 bool allow_balance)
2716{
2717 struct inode *inode = page->mapping->host;
2718 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2719 loff_t i_size = i_size_read(inode);
2720 const pgoff_t end_index = ((unsigned long long)i_size)
2721 >> PAGE_SHIFT;
2722 loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
2723 unsigned offset = 0;
2724 bool need_balance_fs = false;
2725 int err = 0;
2726 struct f2fs_io_info fio = {
2727 .sbi = sbi,
2728 .ino = inode->i_ino,
2729 .type = DATA,
2730 .op = REQ_OP_WRITE,
2731 .op_flags = wbc_to_write_flags(wbc),
2732 .old_blkaddr = NULL_ADDR,
2733 .page = page,
2734 .encrypted_page = NULL,
2735 .submitted = false,
2736 .compr_blocks = compr_blocks,
2737 .need_lock = LOCK_RETRY,
2738 .io_type = io_type,
2739 .io_wbc = wbc,
2740 .bio = bio,
2741 .last_block = last_block,
2742 };
2743
2744 trace_f2fs_writepage(page, DATA);
2745
2746 /* we should bypass data pages to proceed the kworkder jobs */
2747 if (unlikely(f2fs_cp_error(sbi))) {
2748 mapping_set_error(page->mapping, -EIO);
2749 /*
2750 * don't drop any dirty dentry pages for keeping lastest
2751 * directory structure.
2752 */
2753 if (S_ISDIR(inode->i_mode) &&
2754 !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
2755 goto redirty_out;
2756 goto out;
2757 }
2758
2759 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2760 goto redirty_out;
2761
2762 if (page->index < end_index ||
2763 f2fs_verity_in_progress(inode) ||
2764 compr_blocks)
2765 goto write;
2766
2767 /*
2768 * If the offset is out-of-range of file size,
2769 * this page does not have to be written to disk.
2770 */
2771 offset = i_size & (PAGE_SIZE - 1);
2772 if ((page->index >= end_index + 1) || !offset)
2773 goto out;
2774
2775 zero_user_segment(page, offset, PAGE_SIZE);
2776write:
2777 if (f2fs_is_drop_cache(inode))
2778 goto out;
2779 /* we should not write 0'th page having journal header */
2780 if (f2fs_is_volatile_file(inode) && (!page->index ||
2781 (!wbc->for_reclaim &&
2782 f2fs_available_free_memory(sbi, BASE_CHECK))))
2783 goto redirty_out;
2784
2785 /* Dentry/quota blocks are controlled by checkpoint */
2786 if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
2787 /*
2788 * We need to wait for node_write to avoid block allocation during
2789 * checkpoint. This can only happen to quota writes which can cause
2790 * the below discard race condition.
2791 */
2792 if (IS_NOQUOTA(inode))
2793 down_read(&sbi->node_write);
2794
2795 fio.need_lock = LOCK_DONE;
2796 err = f2fs_do_write_data_page(&fio);
2797
2798 if (IS_NOQUOTA(inode))
2799 up_read(&sbi->node_write);
2800
2801 goto done;
2802 }
2803
2804 if (!wbc->for_reclaim)
2805 need_balance_fs = true;
2806 else if (has_not_enough_free_secs(sbi, 0, 0))
2807 goto redirty_out;
2808 else
2809 set_inode_flag(inode, FI_HOT_DATA);
2810
2811 err = -EAGAIN;
2812 if (f2fs_has_inline_data(inode)) {
2813 err = f2fs_write_inline_data(inode, page);
2814 if (!err)
2815 goto out;
2816 }
2817
2818 if (err == -EAGAIN) {
2819 err = f2fs_do_write_data_page(&fio);
2820 if (err == -EAGAIN) {
2821 fio.need_lock = LOCK_REQ;
2822 err = f2fs_do_write_data_page(&fio);
2823 }
2824 }
2825
2826 if (err) {
2827 file_set_keep_isize(inode);
2828 } else {
2829 spin_lock(&F2FS_I(inode)->i_size_lock);
2830 if (F2FS_I(inode)->last_disk_size < psize)
2831 F2FS_I(inode)->last_disk_size = psize;
2832 spin_unlock(&F2FS_I(inode)->i_size_lock);
2833 }
2834
2835done:
2836 if (err && err != -ENOENT)
2837 goto redirty_out;
2838
2839out:
2840 inode_dec_dirty_pages(inode);
2841 if (err) {
2842 ClearPageUptodate(page);
2843 clear_page_private_gcing(page);
2844 }
2845
2846 if (wbc->for_reclaim) {
2847 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2848 clear_inode_flag(inode, FI_HOT_DATA);
2849 f2fs_remove_dirty_inode(inode);
2850 submitted = NULL;
2851 }
2852 unlock_page(page);
2853 if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2854 !F2FS_I(inode)->cp_task && allow_balance)
2855 f2fs_balance_fs(sbi, need_balance_fs);
2856
2857 if (unlikely(f2fs_cp_error(sbi))) {
2858 f2fs_submit_merged_write(sbi, DATA);
2859 f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2860 submitted = NULL;
2861 }
2862
2863 if (submitted)
2864 *submitted = fio.submitted ? 1 : 0;
2865
2866 return 0;
2867
2868redirty_out:
2869 redirty_page_for_writepage(wbc, page);
2870 /*
2871 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
2872 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2873 * file_write_and_wait_range() will see EIO error, which is critical
2874 * to return value of fsync() followed by atomic_write failure to user.
2875 */
2876 if (!err || wbc->for_reclaim)
2877 return AOP_WRITEPAGE_ACTIVATE;
2878 unlock_page(page);
2879 return err;
2880}
2881
2882static int f2fs_write_data_page(struct page *page,
2883 struct writeback_control *wbc)
2884{
2885#ifdef CONFIG_F2FS_FS_COMPRESSION
2886 struct inode *inode = page->mapping->host;
2887
2888 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2889 goto out;
2890
2891 if (f2fs_compressed_file(inode)) {
2892 if (f2fs_is_compressed_cluster(inode, page->index)) {
2893 redirty_page_for_writepage(wbc, page);
2894 return AOP_WRITEPAGE_ACTIVATE;
2895 }
2896 }
2897out:
2898#endif
2899
2900 return f2fs_write_single_data_page(page, NULL, NULL, NULL,
2901 wbc, FS_DATA_IO, 0, true);
2902}
2903
2904/*
2905 * This function was copied from write_cche_pages from mm/page-writeback.c.
2906 * The major change is making write step of cold data page separately from
2907 * warm/hot data page.
2908 */
2909static int f2fs_write_cache_pages(struct address_space *mapping,
2910 struct writeback_control *wbc,
2911 enum iostat_type io_type)
2912{
2913 int ret = 0;
2914 int done = 0, retry = 0;
2915 struct pagevec pvec;
2916 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2917 struct bio *bio = NULL;
2918 sector_t last_block;
2919#ifdef CONFIG_F2FS_FS_COMPRESSION
2920 struct inode *inode = mapping->host;
2921 struct compress_ctx cc = {
2922 .inode = inode,
2923 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2924 .cluster_size = F2FS_I(inode)->i_cluster_size,
2925 .cluster_idx = NULL_CLUSTER,
2926 .rpages = NULL,
2927 .nr_rpages = 0,
2928 .cpages = NULL,
2929 .rbuf = NULL,
2930 .cbuf = NULL,
2931 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
2932 .private = NULL,
2933 };
2934#endif
2935 int nr_pages;
2936 pgoff_t uninitialized_var(writeback_index);
2937 pgoff_t index;
2938 pgoff_t end; /* Inclusive */
2939 pgoff_t done_index;
2940 int range_whole = 0;
2941 xa_mark_t tag;
2942 int nwritten = 0;
2943 int submitted = 0;
2944 int i;
2945
2946 pagevec_init(&pvec);
2947
2948 if (get_dirty_pages(mapping->host) <=
2949 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
2950 set_inode_flag(mapping->host, FI_HOT_DATA);
2951 else
2952 clear_inode_flag(mapping->host, FI_HOT_DATA);
2953
2954 if (wbc->range_cyclic) {
2955 writeback_index = mapping->writeback_index; /* prev offset */
2956 index = writeback_index;
2957 end = -1;
2958 } else {
2959 index = wbc->range_start >> PAGE_SHIFT;
2960 end = wbc->range_end >> PAGE_SHIFT;
2961 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2962 range_whole = 1;
2963 }
2964 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2965 tag = PAGECACHE_TAG_TOWRITE;
2966 else
2967 tag = PAGECACHE_TAG_DIRTY;
2968retry:
2969 retry = 0;
2970 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2971 tag_pages_for_writeback(mapping, index, end);
2972 done_index = index;
2973 while (!done && !retry && (index <= end)) {
2974 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2975 tag);
2976 if (nr_pages == 0)
2977 break;
2978
2979 for (i = 0; i < nr_pages; i++) {
2980 struct page *page = pvec.pages[i];
2981 bool need_readd;
2982readd:
2983 need_readd = false;
2984#ifdef CONFIG_F2FS_FS_COMPRESSION
2985 if (f2fs_compressed_file(inode)) {
2986 ret = f2fs_init_compress_ctx(&cc);
2987 if (ret) {
2988 done = 1;
2989 break;
2990 }
2991
2992 if (!f2fs_cluster_can_merge_page(&cc,
2993 page->index)) {
2994 ret = f2fs_write_multi_pages(&cc,
2995 &submitted, wbc, io_type);
2996 if (!ret)
2997 need_readd = true;
2998 goto result;
2999 }
3000
3001 if (unlikely(f2fs_cp_error(sbi)))
3002 goto lock_page;
3003
3004 if (f2fs_cluster_is_empty(&cc)) {
3005 void *fsdata = NULL;
3006 struct page *pagep;
3007 int ret2;
3008
3009 ret2 = f2fs_prepare_compress_overwrite(
3010 inode, &pagep,
3011 page->index, &fsdata);
3012 if (ret2 < 0) {
3013 ret = ret2;
3014 done = 1;
3015 break;
3016 } else if (ret2 &&
3017 !f2fs_compress_write_end(inode,
3018 fsdata, page->index,
3019 1)) {
3020 retry = 1;
3021 break;
3022 }
3023 } else {
3024 goto lock_page;
3025 }
3026 }
3027#endif
3028 /* give a priority to WB_SYNC threads */
3029 if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3030 wbc->sync_mode == WB_SYNC_NONE) {
3031 done = 1;
3032 break;
3033 }
3034#ifdef CONFIG_F2FS_FS_COMPRESSION
3035lock_page:
3036#endif
3037 done_index = page->index;
3038retry_write:
3039 lock_page(page);
3040
3041 if (unlikely(page->mapping != mapping)) {
3042continue_unlock:
3043 unlock_page(page);
3044 continue;
3045 }
3046
3047 if (!PageDirty(page)) {
3048 /* someone wrote it for us */
3049 goto continue_unlock;
3050 }
3051
3052 if (PageWriteback(page)) {
3053 if (wbc->sync_mode != WB_SYNC_NONE)
3054 f2fs_wait_on_page_writeback(page,
3055 DATA, true, true);
3056 else
3057 goto continue_unlock;
3058 }
3059
3060 if (!clear_page_dirty_for_io(page))
3061 goto continue_unlock;
3062
3063#ifdef CONFIG_F2FS_FS_COMPRESSION
3064 if (f2fs_compressed_file(inode)) {
3065 get_page(page);
3066 f2fs_compress_ctx_add_page(&cc, page);
3067 continue;
3068 }
3069#endif
3070 ret = f2fs_write_single_data_page(page, &submitted,
3071 &bio, &last_block, wbc, io_type,
3072 0, true);
3073 if (ret == AOP_WRITEPAGE_ACTIVATE)
3074 unlock_page(page);
3075#ifdef CONFIG_F2FS_FS_COMPRESSION
3076result:
3077#endif
3078 nwritten += submitted;
3079 wbc->nr_to_write -= submitted;
3080
3081 if (unlikely(ret)) {
3082 /*
3083 * keep nr_to_write, since vfs uses this to
3084 * get # of written pages.
3085 */
3086 if (ret == AOP_WRITEPAGE_ACTIVATE) {
3087 ret = 0;
3088 goto next;
3089 } else if (ret == -EAGAIN) {
3090 ret = 0;
3091 if (wbc->sync_mode == WB_SYNC_ALL) {
3092 cond_resched();
3093 congestion_wait(BLK_RW_ASYNC,
3094 DEFAULT_IO_TIMEOUT);
3095 goto retry_write;
3096 }
3097 goto next;
3098 }
3099 done_index = page->index + 1;
3100 done = 1;
3101 break;
3102 }
3103
3104 if (wbc->nr_to_write <= 0 &&
3105 wbc->sync_mode == WB_SYNC_NONE) {
3106 done = 1;
3107 break;
3108 }
3109next:
3110 if (need_readd)
3111 goto readd;
3112 }
3113 pagevec_release(&pvec);
3114 cond_resched();
3115 }
3116#ifdef CONFIG_F2FS_FS_COMPRESSION
3117 /* flush remained pages in compress cluster */
3118 if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3119 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3120 nwritten += submitted;
3121 wbc->nr_to_write -= submitted;
3122 if (ret) {
3123 done = 1;
3124 retry = 0;
3125 }
3126 }
3127 if (f2fs_compressed_file(inode))
3128 f2fs_destroy_compress_ctx(&cc, false);
3129#endif
3130 if (retry) {
3131 index = 0;
3132 end = -1;
3133 goto retry;
3134 }
3135 if (wbc->range_cyclic && !done)
3136 done_index = 0;
3137 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3138 mapping->writeback_index = done_index;
3139
3140 if (nwritten)
3141 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3142 NULL, 0, DATA);
3143 /* submit cached bio of IPU write */
3144 if (bio)
3145 f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3146
3147 return ret;
3148}
3149
3150static inline bool __should_serialize_io(struct inode *inode,
3151 struct writeback_control *wbc)
3152{
3153 /* to avoid deadlock in path of data flush */
3154 if (F2FS_I(inode)->cp_task)
3155 return false;
3156
3157 if (!S_ISREG(inode->i_mode))
3158 return false;
3159 if (IS_NOQUOTA(inode))
3160 return false;
3161
3162 if (f2fs_need_compress_data(inode))
3163 return true;
3164 if (wbc->sync_mode != WB_SYNC_ALL)
3165 return true;
3166 if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3167 return true;
3168 return false;
3169}
3170
3171static int __f2fs_write_data_pages(struct address_space *mapping,
3172 struct writeback_control *wbc,
3173 enum iostat_type io_type)
3174{
3175 struct inode *inode = mapping->host;
3176 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3177 struct blk_plug plug;
3178 int ret;
3179 bool locked = false;
3180
3181 /* deal with chardevs and other special file */
3182 if (!mapping->a_ops->writepage)
3183 return 0;
3184
3185 /* skip writing if there is no dirty page in this inode */
3186 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3187 return 0;
3188
3189 /* during POR, we don't need to trigger writepage at all. */
3190 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3191 goto skip_write;
3192
3193 if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3194 wbc->sync_mode == WB_SYNC_NONE &&
3195 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3196 f2fs_available_free_memory(sbi, DIRTY_DENTS))
3197 goto skip_write;
3198
3199 /* skip writing during file defragment */
3200 if (is_inode_flag_set(inode, FI_DO_DEFRAG))
3201 goto skip_write;
3202
3203 trace_f2fs_writepages(mapping->host, wbc, DATA);
3204
3205 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3206 if (wbc->sync_mode == WB_SYNC_ALL)
3207 atomic_inc(&sbi->wb_sync_req[DATA]);
3208 else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3209 /* to avoid potential deadlock */
3210 if (current->plug)
3211 blk_finish_plug(current->plug);
3212 goto skip_write;
3213 }
3214
3215 if (__should_serialize_io(inode, wbc)) {
3216 mutex_lock(&sbi->writepages);
3217 locked = true;
3218 }
3219
3220 blk_start_plug(&plug);
3221 ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3222 blk_finish_plug(&plug);
3223
3224 if (locked)
3225 mutex_unlock(&sbi->writepages);
3226
3227 if (wbc->sync_mode == WB_SYNC_ALL)
3228 atomic_dec(&sbi->wb_sync_req[DATA]);
3229 /*
3230 * if some pages were truncated, we cannot guarantee its mapping->host
3231 * to detect pending bios.
3232 */
3233
3234 f2fs_remove_dirty_inode(inode);
3235 return ret;
3236
3237skip_write:
3238 wbc->pages_skipped += get_dirty_pages(inode);
3239 trace_f2fs_writepages(mapping->host, wbc, DATA);
3240 return 0;
3241}
3242
3243static int f2fs_write_data_pages(struct address_space *mapping,
3244 struct writeback_control *wbc)
3245{
3246 struct inode *inode = mapping->host;
3247
3248 return __f2fs_write_data_pages(mapping, wbc,
3249 F2FS_I(inode)->cp_task == current ?
3250 FS_CP_DATA_IO : FS_DATA_IO);
3251}
3252
3253static void f2fs_write_failed(struct address_space *mapping, loff_t to)
3254{
3255 struct inode *inode = mapping->host;
3256 loff_t i_size = i_size_read(inode);
3257
3258 if (IS_NOQUOTA(inode))
3259 return;
3260
3261 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3262 if (to > i_size && !f2fs_verity_in_progress(inode)) {
3263 down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3264 down_write(&F2FS_I(inode)->i_mmap_sem);
3265
3266 truncate_pagecache(inode, i_size);
3267 f2fs_truncate_blocks(inode, i_size, true);
3268
3269 up_write(&F2FS_I(inode)->i_mmap_sem);
3270 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3271 }
3272}
3273
3274static int prepare_write_begin(struct f2fs_sb_info *sbi,
3275 struct page *page, loff_t pos, unsigned len,
3276 block_t *blk_addr, bool *node_changed)
3277{
3278 struct inode *inode = page->mapping->host;
3279 pgoff_t index = page->index;
3280 struct dnode_of_data dn;
3281 struct page *ipage;
3282 bool locked = false;
3283 struct extent_info ei = {0,0,0};
3284 int err = 0;
3285 int flag;
3286
3287 /*
3288 * we already allocated all the blocks, so we don't need to get
3289 * the block addresses when there is no need to fill the page.
3290 */
3291 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
3292 !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
3293 !f2fs_verity_in_progress(inode))
3294 return 0;
3295
3296 /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3297 if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
3298 flag = F2FS_GET_BLOCK_DEFAULT;
3299 else
3300 flag = F2FS_GET_BLOCK_PRE_AIO;
3301
3302 if (f2fs_has_inline_data(inode) ||
3303 (pos & PAGE_MASK) >= i_size_read(inode)) {
3304 f2fs_do_map_lock(sbi, flag, true);
3305 locked = true;
3306 }
3307
3308restart:
3309 /* check inline_data */
3310 ipage = f2fs_get_node_page(sbi, inode->i_ino);
3311 if (IS_ERR(ipage)) {
3312 err = PTR_ERR(ipage);
3313 goto unlock_out;
3314 }
3315
3316 set_new_dnode(&dn, inode, ipage, ipage, 0);
3317
3318 if (f2fs_has_inline_data(inode)) {
3319 if (pos + len <= MAX_INLINE_DATA(inode)) {
3320 f2fs_do_read_inline_data(page, ipage);
3321 set_inode_flag(inode, FI_DATA_EXIST);
3322 if (inode->i_nlink)
3323 set_page_private_inline(ipage);
3324 } else {
3325 err = f2fs_convert_inline_page(&dn, page);
3326 if (err)
3327 goto out;
3328 if (dn.data_blkaddr == NULL_ADDR)
3329 err = f2fs_get_block(&dn, index);
3330 }
3331 } else if (locked) {
3332 err = f2fs_get_block(&dn, index);
3333 } else {
3334 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3335 dn.data_blkaddr = ei.blk + index - ei.fofs;
3336 } else {
3337 /* hole case */
3338 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3339 if (err || dn.data_blkaddr == NULL_ADDR) {
3340 f2fs_put_dnode(&dn);
3341 f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
3342 true);
3343 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3344 locked = true;
3345 goto restart;
3346 }
3347 }
3348 }
3349
3350 /* convert_inline_page can make node_changed */
3351 *blk_addr = dn.data_blkaddr;
3352 *node_changed = dn.node_changed;
3353out:
3354 f2fs_put_dnode(&dn);
3355unlock_out:
3356 if (locked)
3357 f2fs_do_map_lock(sbi, flag, false);
3358 return err;
3359}
3360
3361static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3362 loff_t pos, unsigned len, unsigned flags,
3363 struct page **pagep, void **fsdata)
3364{
3365 struct inode *inode = mapping->host;
3366 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3367 struct page *page = NULL;
3368 pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3369 bool need_balance = false, drop_atomic = false;
3370 block_t blkaddr = NULL_ADDR;
3371 int err = 0;
3372
3373 /*
3374 * Should avoid quota operations which can make deadlock:
3375 * kswapd -> f2fs_evict_inode -> dquot_drop ->
3376 * f2fs_dquot_commit -> f2fs_write_begin ->
3377 * d_obtain_alias -> __d_alloc -> kmem_cache_alloc(GFP_KERNEL)
3378 */
3379 if (trace_android_fs_datawrite_start_enabled() && !IS_NOQUOTA(inode)) {
3380 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3381
3382 path = android_fstrace_get_pathname(pathbuf,
3383 MAX_TRACE_PATHBUF_LEN,
3384 inode);
3385 trace_android_fs_datawrite_start(inode, pos, len,
3386 current->pid, path,
3387 current->comm);
3388 }
3389 trace_f2fs_write_begin(inode, pos, len, flags);
3390
3391 if (!f2fs_is_checkpoint_ready(sbi)) {
3392 err = -ENOSPC;
3393 goto fail;
3394 }
3395
3396 if ((f2fs_is_atomic_file(inode) &&
3397 !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
3398 is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
3399 err = -ENOMEM;
3400 drop_atomic = true;
3401 goto fail;
3402 }
3403
3404 /*
3405 * We should check this at this moment to avoid deadlock on inode page
3406 * and #0 page. The locking rule for inline_data conversion should be:
3407 * lock_page(page #0) -> lock_page(inode_page)
3408 */
3409 if (index != 0) {
3410 err = f2fs_convert_inline_inode(inode);
3411 if (err)
3412 goto fail;
3413 }
3414
3415#ifdef CONFIG_F2FS_FS_COMPRESSION
3416 if (f2fs_compressed_file(inode)) {
3417 int ret;
3418
3419 *fsdata = NULL;
3420
3421 ret = f2fs_prepare_compress_overwrite(inode, pagep,
3422 index, fsdata);
3423 if (ret < 0) {
3424 err = ret;
3425 goto fail;
3426 } else if (ret) {
3427 return 0;
3428 }
3429 }
3430#endif
3431
3432repeat:
3433 /*
3434 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3435 * wait_for_stable_page. Will wait that below with our IO control.
3436 */
3437 page = f2fs_pagecache_get_page(mapping, index,
3438 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3439 if (!page) {
3440 err = -ENOMEM;
3441 goto fail;
3442 }
3443
3444 /* TODO: cluster can be compressed due to race with .writepage */
3445
3446 *pagep = page;
3447
3448 err = prepare_write_begin(sbi, page, pos, len,
3449 &blkaddr, &need_balance);
3450 if (err)
3451 goto fail;
3452
3453 if (need_balance && !IS_NOQUOTA(inode) &&
3454 has_not_enough_free_secs(sbi, 0, 0)) {
3455 unlock_page(page);
3456 f2fs_balance_fs(sbi, true);
3457 lock_page(page);
3458 if (page->mapping != mapping) {
3459 /* The page got truncated from under us */
3460 f2fs_put_page(page, 1);
3461 goto repeat;
3462 }
3463 }
3464
3465 f2fs_wait_on_page_writeback(page, DATA, false, true);
3466
3467 if (len == PAGE_SIZE || PageUptodate(page))
3468 return 0;
3469
3470 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3471 !f2fs_verity_in_progress(inode)) {
3472 zero_user_segment(page, len, PAGE_SIZE);
3473 return 0;
3474 }
3475
3476 if (blkaddr == NEW_ADDR) {
3477 zero_user_segment(page, 0, PAGE_SIZE);
3478 SetPageUptodate(page);
3479 } else {
3480 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3481 DATA_GENERIC_ENHANCE_READ)) {
3482 err = -EFSCORRUPTED;
3483 goto fail;
3484 }
3485 err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
3486 if (err)
3487 goto fail;
3488
3489 lock_page(page);
3490 if (unlikely(page->mapping != mapping)) {
3491 f2fs_put_page(page, 1);
3492 goto repeat;
3493 }
3494 if (unlikely(!PageUptodate(page))) {
3495 err = -EIO;
3496 goto fail;
3497 }
3498 }
3499 return 0;
3500
3501fail:
3502 f2fs_put_page(page, 1);
3503 f2fs_write_failed(mapping, pos + len);
3504 if (drop_atomic)
3505 f2fs_drop_inmem_pages_all(sbi, false);
3506 return err;
3507}
3508
3509static int f2fs_write_end(struct file *file,
3510 struct address_space *mapping,
3511 loff_t pos, unsigned len, unsigned copied,
3512 struct page *page, void *fsdata)
3513{
3514 struct inode *inode = page->mapping->host;
3515
3516 trace_android_fs_datawrite_end(inode, pos, len);
3517 trace_f2fs_write_end(inode, pos, len, copied);
3518
3519 /*
3520 * This should be come from len == PAGE_SIZE, and we expect copied
3521 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3522 * let generic_perform_write() try to copy data again through copied=0.
3523 */
3524 if (!PageUptodate(page)) {
3525 if (unlikely(copied != len))
3526 copied = 0;
3527 else
3528 SetPageUptodate(page);
3529 }
3530
3531#ifdef CONFIG_F2FS_FS_COMPRESSION
3532 /* overwrite compressed file */
3533 if (f2fs_compressed_file(inode) && fsdata) {
3534 f2fs_compress_write_end(inode, fsdata, page->index, copied);
3535 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3536
3537 if (pos + copied > i_size_read(inode) &&
3538 !f2fs_verity_in_progress(inode))
3539 f2fs_i_size_write(inode, pos + copied);
3540 return copied;
3541 }
3542#endif
3543
3544 if (!copied)
3545 goto unlock_out;
3546
3547 set_page_dirty(page);
3548
3549 if (pos + copied > i_size_read(inode) &&
3550 !f2fs_verity_in_progress(inode))
3551 f2fs_i_size_write(inode, pos + copied);
3552unlock_out:
3553 f2fs_put_page(page, 1);
3554 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3555 return copied;
3556}
3557
3558static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
3559 loff_t offset)
3560{
3561 unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
3562 unsigned blkbits = i_blkbits;
3563 unsigned blocksize_mask = (1 << blkbits) - 1;
3564 unsigned long align = offset | iov_iter_alignment(iter);
3565 struct block_device *bdev = inode->i_sb->s_bdev;
3566
3567 if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
3568 return 1;
3569
3570 if (align & blocksize_mask) {
3571 if (bdev)
3572 blkbits = blksize_bits(bdev_logical_block_size(bdev));
3573 blocksize_mask = (1 << blkbits) - 1;
3574 if (align & blocksize_mask)
3575 return -EINVAL;
3576 return 1;
3577 }
3578 return 0;
3579}
3580
3581static void f2fs_dio_end_io(struct bio *bio)
3582{
3583 struct f2fs_private_dio *dio = bio->bi_private;
3584
3585 dec_page_count(F2FS_I_SB(dio->inode),
3586 dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
3587
3588 bio->bi_private = dio->orig_private;
3589 bio->bi_end_io = dio->orig_end_io;
3590
3591 kfree(dio);
3592
3593 bio_endio(bio);
3594}
3595
3596static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
3597 loff_t file_offset)
3598{
3599 struct f2fs_private_dio *dio;
3600 bool write = (bio_op(bio) == REQ_OP_WRITE);
3601
3602 dio = f2fs_kzalloc(F2FS_I_SB(inode),
3603 sizeof(struct f2fs_private_dio), GFP_NOFS);
3604 if (!dio)
3605 goto out;
3606
3607 dio->inode = inode;
3608 dio->orig_end_io = bio->bi_end_io;
3609 dio->orig_private = bio->bi_private;
3610 dio->write = write;
3611
3612 bio->bi_end_io = f2fs_dio_end_io;
3613 bio->bi_private = dio;
3614
3615 inc_page_count(F2FS_I_SB(inode),
3616 write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
3617
3618 submit_bio(bio);
3619 return;
3620out:
3621 bio->bi_status = BLK_STS_IOERR;
3622 bio_endio(bio);
3623}
3624
3625static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
3626{
3627 struct address_space *mapping = iocb->ki_filp->f_mapping;
3628 struct inode *inode = mapping->host;
3629 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3630 struct f2fs_inode_info *fi = F2FS_I(inode);
3631 size_t count = iov_iter_count(iter);
3632 loff_t offset = iocb->ki_pos;
3633 int rw = iov_iter_rw(iter);
3634 int err;
3635 enum rw_hint hint = iocb->ki_hint;
3636 int whint_mode = F2FS_OPTION(sbi).whint_mode;
3637 bool do_opu;
3638
3639 err = check_direct_IO(inode, iter, offset);
3640 if (err)
3641 return err < 0 ? err : 0;
3642
3643 if (f2fs_force_buffered_io(inode, iocb, iter))
3644 return 0;
3645
3646 do_opu = allow_outplace_dio(inode, iocb, iter);
3647
3648 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
3649
3650 if (trace_android_fs_dataread_start_enabled() &&
3651 (rw == READ)) {
3652 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3653
3654 path = android_fstrace_get_pathname(pathbuf,
3655 MAX_TRACE_PATHBUF_LEN,
3656 inode);
3657 trace_android_fs_dataread_start(inode, offset,
3658 count, current->pid, path,
3659 current->comm);
3660 }
3661 if (trace_android_fs_datawrite_start_enabled() &&
3662 (rw == WRITE)) {
3663 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3664
3665 path = android_fstrace_get_pathname(pathbuf,
3666 MAX_TRACE_PATHBUF_LEN,
3667 inode);
3668 trace_android_fs_datawrite_start(inode, offset, count,
3669 current->pid, path,
3670 current->comm);
3671 }
3672
3673 if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
3674 iocb->ki_hint = WRITE_LIFE_NOT_SET;
3675
3676 if (iocb->ki_flags & IOCB_NOWAIT) {
3677 if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
3678 iocb->ki_hint = hint;
3679 err = -EAGAIN;
3680 goto out;
3681 }
3682 if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
3683 up_read(&fi->i_gc_rwsem[rw]);
3684 iocb->ki_hint = hint;
3685 err = -EAGAIN;
3686 goto out;
3687 }
3688 } else {
3689 down_read(&fi->i_gc_rwsem[rw]);
3690 if (do_opu)
3691 down_read(&fi->i_gc_rwsem[READ]);
3692 }
3693
3694 err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
3695 iter, rw == WRITE ? get_data_block_dio_write :
3696 get_data_block_dio, NULL, f2fs_dio_submit_bio,
3697 rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
3698 DIO_SKIP_HOLES);
3699
3700 if (do_opu)
3701 up_read(&fi->i_gc_rwsem[READ]);
3702
3703 up_read(&fi->i_gc_rwsem[rw]);
3704
3705 if (rw == WRITE) {
3706 if (whint_mode == WHINT_MODE_OFF)
3707 iocb->ki_hint = hint;
3708 if (err > 0) {
3709 f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
3710 err);
3711 if (!do_opu)
3712 set_inode_flag(inode, FI_UPDATE_WRITE);
3713 } else if (err == -EIOCBQUEUED) {
3714 f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
3715 count - iov_iter_count(iter));
3716 } else if (err < 0) {
3717 f2fs_write_failed(mapping, offset + count);
3718 }
3719 } else {
3720 if (err > 0)
3721 f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
3722 else if (err == -EIOCBQUEUED)
3723 f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
3724 count - iov_iter_count(iter));
3725 }
3726
3727out:
3728 if (trace_android_fs_dataread_start_enabled() &&
3729 (rw == READ))
3730 trace_android_fs_dataread_end(inode, offset, count);
3731 if (trace_android_fs_datawrite_start_enabled() &&
3732 (rw == WRITE))
3733 trace_android_fs_datawrite_end(inode, offset, count);
3734
3735 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
3736
3737 return err;
3738}
3739
3740void f2fs_invalidate_page(struct page *page, unsigned int offset,
3741 unsigned int length)
3742{
3743 struct inode *inode = page->mapping->host;
3744 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3745
3746 if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3747 (offset % PAGE_SIZE || length != PAGE_SIZE))
3748 return;
3749
3750 if (PageDirty(page)) {
3751 if (inode->i_ino == F2FS_META_INO(sbi)) {
3752 dec_page_count(sbi, F2FS_DIRTY_META);
3753 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3754 dec_page_count(sbi, F2FS_DIRTY_NODES);
3755 } else {
3756 inode_dec_dirty_pages(inode);
3757 f2fs_remove_dirty_inode(inode);
3758 }
3759 }
3760
3761 clear_page_private_gcing(page);
3762
3763 if (test_opt(sbi, COMPRESS_CACHE)) {
3764 if (f2fs_compressed_file(inode))
3765 f2fs_invalidate_compress_pages(sbi, inode->i_ino);
3766 if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
3767 clear_page_private_data(page);
3768 }
3769
3770 if (page_private_atomic(page))
3771 return f2fs_drop_inmem_page(inode, page);
3772
3773 detach_page_private(page);
3774 set_page_private(page, 0);
3775}
3776
3777int f2fs_release_page(struct page *page, gfp_t wait)
3778{
3779 /* If this is dirty page, keep PagePrivate */
3780 if (PageDirty(page))
3781 return 0;
3782
3783 /* This is atomic written page, keep Private */
3784 if (page_private_atomic(page))
3785 return 0;
3786
3787 if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) {
3788 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3789 struct inode *inode = page->mapping->host;
3790
3791 if (f2fs_compressed_file(inode))
3792 f2fs_invalidate_compress_pages(sbi, inode->i_ino);
3793 if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
3794 clear_page_private_data(page);
3795 }
3796
3797 clear_page_private_gcing(page);
3798
3799 detach_page_private(page);
3800 set_page_private(page, 0);
3801 return 1;
3802}
3803
3804static int f2fs_set_data_page_dirty(struct page *page)
3805{
3806 struct inode *inode = page_file_mapping(page)->host;
3807
3808 trace_f2fs_set_page_dirty(page, DATA);
3809
3810 if (!PageUptodate(page))
3811 SetPageUptodate(page);
3812 if (PageSwapCache(page))
3813 return __set_page_dirty_nobuffers(page);
3814
3815 if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
3816 if (!page_private_atomic(page)) {
3817 f2fs_register_inmem_page(inode, page);
3818 return 1;
3819 }
3820 /*
3821 * Previously, this page has been registered, we just
3822 * return here.
3823 */
3824 return 0;
3825 }
3826
3827 if (!PageDirty(page)) {
3828 __set_page_dirty_nobuffers(page);
3829 f2fs_update_dirty_page(inode, page);
3830 return 1;
3831 }
3832 return 0;
3833}
3834
3835
3836static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3837{
3838#ifdef CONFIG_F2FS_FS_COMPRESSION
3839 struct dnode_of_data dn;
3840 sector_t start_idx, blknr = 0;
3841 int ret;
3842
3843 start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3844
3845 set_new_dnode(&dn, inode, NULL, NULL, 0);
3846 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3847 if (ret)
3848 return 0;
3849
3850 if (dn.data_blkaddr != COMPRESS_ADDR) {
3851 dn.ofs_in_node += block - start_idx;
3852 blknr = f2fs_data_blkaddr(&dn);
3853 if (!__is_valid_data_blkaddr(blknr))
3854 blknr = 0;
3855 }
3856
3857 f2fs_put_dnode(&dn);
3858 return blknr;
3859#else
3860 return 0;
3861#endif
3862}
3863
3864
3865static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3866{
3867 struct inode *inode = mapping->host;
3868 sector_t blknr = 0;
3869
3870 if (f2fs_has_inline_data(inode))
3871 goto out;
3872
3873 /* make sure allocating whole blocks */
3874 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3875 filemap_write_and_wait(mapping);
3876
3877 /* Block number less than F2FS MAX BLOCKS */
3878 if (unlikely(block >= max_file_blocks(inode)))
3879 goto out;
3880
3881 if (f2fs_compressed_file(inode)) {
3882 blknr = f2fs_bmap_compress(inode, block);
3883 } else {
3884 struct f2fs_map_blocks map;
3885
3886 memset(&map, 0, sizeof(map));
3887 map.m_lblk = block;
3888 map.m_len = 1;
3889 map.m_next_pgofs = NULL;
3890 map.m_seg_type = NO_CHECK_TYPE;
3891
3892 if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
3893 blknr = map.m_pblk;
3894 }
3895out:
3896 trace_f2fs_bmap(inode, block, blknr);
3897 return blknr;
3898}
3899
3900#ifdef CONFIG_MIGRATION
3901#include <linux/migrate.h>
3902
3903int f2fs_migrate_page(struct address_space *mapping,
3904 struct page *newpage, struct page *page, enum migrate_mode mode)
3905{
3906 int rc, extra_count;
3907 struct f2fs_inode_info *fi = F2FS_I(mapping->host);
3908 bool atomic_written = page_private_atomic(page);
3909
3910 BUG_ON(PageWriteback(page));
3911
3912 /* migrating an atomic written page is safe with the inmem_lock hold */
3913 if (atomic_written) {
3914 if (mode != MIGRATE_SYNC)
3915 return -EBUSY;
3916 if (!mutex_trylock(&fi->inmem_lock))
3917 return -EAGAIN;
3918 }
3919
3920 /* one extra reference was held for atomic_write page */
3921 extra_count = atomic_written ? 1 : 0;
3922 rc = migrate_page_move_mapping(mapping, newpage,
3923 page, extra_count);
3924 if (rc != MIGRATEPAGE_SUCCESS) {
3925 if (atomic_written)
3926 mutex_unlock(&fi->inmem_lock);
3927 return rc;
3928 }
3929
3930 if (atomic_written) {
3931 struct inmem_pages *cur;
3932
3933 list_for_each_entry(cur, &fi->inmem_pages, list)
3934 if (cur->page == page) {
3935 cur->page = newpage;
3936 break;
3937 }
3938 mutex_unlock(&fi->inmem_lock);
3939 put_page(page);
3940 get_page(newpage);
3941 }
3942
3943 /* guarantee to start from no stale private field */
3944 set_page_private(newpage, 0);
3945 if (PagePrivate(page)) {
3946 set_page_private(newpage, page_private(page));
3947 SetPagePrivate(newpage);
3948 get_page(newpage);
3949
3950 set_page_private(page, 0);
3951 ClearPagePrivate(page);
3952 put_page(page);
3953 }
3954
3955 if (mode != MIGRATE_SYNC_NO_COPY)
3956 migrate_page_copy(newpage, page);
3957 else
3958 migrate_page_states(newpage, page);
3959
3960 return MIGRATEPAGE_SUCCESS;
3961}
3962#endif
3963
3964#ifdef CONFIG_SWAP
3965static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3966 unsigned int blkcnt)
3967{
3968 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3969 unsigned int blkofs;
3970 unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3971 unsigned int secidx = start_blk / blk_per_sec;
3972 unsigned int end_sec = secidx + blkcnt / blk_per_sec;
3973 int ret = 0;
3974
3975 down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3976 down_write(&F2FS_I(inode)->i_mmap_sem);
3977
3978 set_inode_flag(inode, FI_ALIGNED_WRITE);
3979
3980 for (; secidx < end_sec; secidx++) {
3981 down_write(&sbi->pin_sem);
3982
3983 f2fs_lock_op(sbi);
3984 f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3985 f2fs_unlock_op(sbi);
3986
3987 set_inode_flag(inode, FI_DO_DEFRAG);
3988
3989 for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
3990 struct page *page;
3991 unsigned int blkidx = secidx * blk_per_sec + blkofs;
3992
3993 page = f2fs_get_lock_data_page(inode, blkidx, true);
3994 if (IS_ERR(page)) {
3995 up_write(&sbi->pin_sem);
3996 ret = PTR_ERR(page);
3997 goto done;
3998 }
3999
4000 set_page_dirty(page);
4001 f2fs_put_page(page, 1);
4002 }
4003
4004 clear_inode_flag(inode, FI_DO_DEFRAG);
4005
4006 ret = filemap_fdatawrite(inode->i_mapping);
4007
4008 up_write(&sbi->pin_sem);
4009
4010 if (ret)
4011 break;
4012 }
4013
4014done:
4015 clear_inode_flag(inode, FI_DO_DEFRAG);
4016 clear_inode_flag(inode, FI_ALIGNED_WRITE);
4017
4018 up_write(&F2FS_I(inode)->i_mmap_sem);
4019 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4020
4021 return ret;
4022}
4023
4024static int check_swap_activate(struct swap_info_struct *sis,
4025 struct file *swap_file, sector_t *span)
4026{
4027 struct address_space *mapping = swap_file->f_mapping;
4028 struct inode *inode = mapping->host;
4029 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4030 sector_t cur_lblock;
4031 sector_t last_lblock;
4032 sector_t pblock;
4033 sector_t lowest_pblock = -1;
4034 sector_t highest_pblock = 0;
4035 int nr_extents = 0;
4036 unsigned long nr_pblocks;
4037 unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
4038 unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
4039 unsigned int not_aligned = 0;
4040 int ret = 0;
4041
4042 /*
4043 * Map all the blocks into the extent list. This code doesn't try
4044 * to be very smart.
4045 */
4046 cur_lblock = 0;
4047 last_lblock = bytes_to_blks(inode, i_size_read(inode));
4048
4049 while (cur_lblock < last_lblock && cur_lblock < sis->max) {
4050 struct f2fs_map_blocks map;
4051retry:
4052 cond_resched();
4053
4054 memset(&map, 0, sizeof(map));
4055 map.m_lblk = cur_lblock;
4056 map.m_len = last_lblock - cur_lblock;
4057 map.m_next_pgofs = NULL;
4058 map.m_next_extent = NULL;
4059 map.m_seg_type = NO_CHECK_TYPE;
4060 map.m_may_create = false;
4061
4062 ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
4063 if (ret)
4064 goto out;
4065
4066 /* hole */
4067 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
4068 f2fs_err(sbi, "Swapfile has holes");
4069 ret = -EINVAL;
4070 goto out;
4071 }
4072
4073 pblock = map.m_pblk;
4074 nr_pblocks = map.m_len;
4075
4076 if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
4077 nr_pblocks & sec_blks_mask) {
4078 not_aligned++;
4079
4080 nr_pblocks = roundup(nr_pblocks, blks_per_sec);
4081 if (cur_lblock + nr_pblocks > sis->max)
4082 nr_pblocks -= blks_per_sec;
4083
4084 if (!nr_pblocks) {
4085 /* this extent is last one */
4086 nr_pblocks = map.m_len;
4087 f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
4088 goto next;
4089 }
4090
4091 ret = f2fs_migrate_blocks(inode, cur_lblock,
4092 nr_pblocks);
4093 if (ret)
4094 goto out;
4095 goto retry;
4096 }
4097next:
4098 if (cur_lblock + nr_pblocks >= sis->max)
4099 nr_pblocks = sis->max - cur_lblock;
4100
4101 if (cur_lblock) { /* exclude the header page */
4102 if (pblock < lowest_pblock)
4103 lowest_pblock = pblock;
4104 if (pblock + nr_pblocks - 1 > highest_pblock)
4105 highest_pblock = pblock + nr_pblocks - 1;
4106 }
4107
4108 /*
4109 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
4110 */
4111 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
4112 if (ret < 0)
4113 goto out;
4114 nr_extents += ret;
4115 cur_lblock += nr_pblocks;
4116 }
4117 ret = nr_extents;
4118 *span = 1 + highest_pblock - lowest_pblock;
4119 if (cur_lblock == 0)
4120 cur_lblock = 1; /* force Empty message */
4121 sis->max = cur_lblock;
4122 sis->pages = cur_lblock - 1;
4123 sis->highest_bit = cur_lblock - 1;
4124out:
4125 if (not_aligned)
4126 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
4127 not_aligned, blks_per_sec * F2FS_BLKSIZE);
4128 return ret;
4129}
4130
4131static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4132 sector_t *span)
4133{
4134 struct inode *inode = file_inode(file);
4135 int ret;
4136
4137 if (!S_ISREG(inode->i_mode))
4138 return -EINVAL;
4139
4140 if (f2fs_readonly(F2FS_I_SB(inode)->sb))
4141 return -EROFS;
4142
4143 if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
4144 f2fs_err(F2FS_I_SB(inode),
4145 "Swapfile not supported in LFS mode");
4146 return -EINVAL;
4147 }
4148
4149 ret = f2fs_convert_inline_inode(inode);
4150 if (ret)
4151 return ret;
4152
4153 if (!f2fs_disable_compressed_file(inode))
4154 return -EINVAL;
4155
4156 f2fs_precache_extents(inode);
4157
4158 ret = check_swap_activate(sis, file, span);
4159 if (ret < 0)
4160 return ret;
4161
4162 set_inode_flag(inode, FI_PIN_FILE);
4163 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
4164 return ret;
4165}
4166
4167static void f2fs_swap_deactivate(struct file *file)
4168{
4169 struct inode *inode = file_inode(file);
4170
4171 clear_inode_flag(inode, FI_PIN_FILE);
4172}
4173#else
4174static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4175 sector_t *span)
4176{
4177 return -EOPNOTSUPP;
4178}
4179
4180static void f2fs_swap_deactivate(struct file *file)
4181{
4182}
4183#endif
4184
4185const struct address_space_operations f2fs_dblock_aops = {
4186 .readpage = f2fs_read_data_page,
4187 .readpages = f2fs_read_data_pages,
4188 .writepage = f2fs_write_data_page,
4189 .writepages = f2fs_write_data_pages,
4190 .write_begin = f2fs_write_begin,
4191 .write_end = f2fs_write_end,
4192 .set_page_dirty = f2fs_set_data_page_dirty,
4193 .invalidatepage = f2fs_invalidate_page,
4194 .releasepage = f2fs_release_page,
4195 .direct_IO = f2fs_direct_IO,
4196 .bmap = f2fs_bmap,
4197 .swap_activate = f2fs_swap_activate,
4198 .swap_deactivate = f2fs_swap_deactivate,
4199#ifdef CONFIG_MIGRATION
4200 .migratepage = f2fs_migrate_page,
4201#endif
4202};
4203
4204void f2fs_clear_page_cache_dirty_tag(struct page *page)
4205{
4206 struct address_space *mapping = page_mapping(page);
4207 unsigned long flags;
4208
4209 xa_lock_irqsave(&mapping->i_pages, flags);
4210 __xa_clear_mark(&mapping->i_pages, page_index(page),
4211 PAGECACHE_TAG_DIRTY);
4212 xa_unlock_irqrestore(&mapping->i_pages, flags);
4213}
4214
4215int __init f2fs_init_post_read_processing(void)
4216{
4217 bio_post_read_ctx_cache =
4218 kmem_cache_create("f2fs_bio_post_read_ctx",
4219 sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4220 if (!bio_post_read_ctx_cache)
4221 goto fail;
4222 bio_post_read_ctx_pool =
4223 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4224 bio_post_read_ctx_cache);
4225 if (!bio_post_read_ctx_pool)
4226 goto fail_free_cache;
4227 return 0;
4228
4229fail_free_cache:
4230 kmem_cache_destroy(bio_post_read_ctx_cache);
4231fail:
4232 return -ENOMEM;
4233}
4234
4235void f2fs_destroy_post_read_processing(void)
4236{
4237 mempool_destroy(bio_post_read_ctx_pool);
4238 kmem_cache_destroy(bio_post_read_ctx_cache);
4239}
4240
4241int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4242{
4243 if (!f2fs_sb_has_encrypt(sbi) &&
4244 !f2fs_sb_has_verity(sbi) &&
4245 !f2fs_sb_has_compression(sbi))
4246 return 0;
4247
4248 sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4249 WQ_UNBOUND | WQ_HIGHPRI,
4250 num_online_cpus());
4251 if (!sbi->post_read_wq)
4252 return -ENOMEM;
4253 return 0;
4254}
4255
4256void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4257{
4258 if (sbi->post_read_wq)
4259 destroy_workqueue(sbi->post_read_wq);
4260}
4261
4262int __init f2fs_init_bio_entry_cache(void)
4263{
4264 bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4265 sizeof(struct bio_entry));
4266 if (!bio_entry_slab)
4267 return -ENOMEM;
4268 return 0;
4269}
4270
4271void f2fs_destroy_bio_entry_cache(void)
4272{
4273 kmem_cache_destroy(bio_entry_slab);
4274}