Blame - src/kernel/linux/v4.14/fs/ext4/super.c - T103

blob: 41b1d2f5388168a118c9a610e66c262dff97a063 [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	/*
				2	* linux/fs/ext4/super.c
				3	*
				4	* Copyright (C) 1992, 1993, 1994, 1995
				5	* Remy Card (card@masi.ibp.fr)
				6	* Laboratoire MASI - Institut Blaise Pascal
				7	* Universite Pierre et Marie Curie (Paris VI)
				8	*
				9	* from
				10	*
				11	* linux/fs/minix/inode.c
				12	*
				13	* Copyright (C) 1991, 1992 Linus Torvalds
				14	*
				15	* Big-endian to little-endian byte-swapping/bitmaps by
				16	* David S. Miller (davem@caip.rutgers.edu), 1995
				17	*/
				18
				19	#include <linux/module.h>
				20	#include <linux/string.h>
				21	#include <linux/fs.h>
				22	#include <linux/time.h>
				23	#include <linux/vmalloc.h>
				24	#include <linux/slab.h>
				25	#include <linux/init.h>
				26	#include <linux/blkdev.h>
				27	#include <linux/backing-dev.h>
				28	#include <linux/parser.h>
				29	#include <linux/buffer_head.h>
				30	#include <linux/exportfs.h>
				31	#include <linux/vfs.h>
				32	#include <linux/random.h>
				33	#include <linux/mount.h>
				34	#include <linux/namei.h>
				35	#include <linux/quotaops.h>
				36	#include <linux/seq_file.h>
				37	#include <linux/ctype.h>
				38	#include <linux/log2.h>
				39	#include <linux/crc16.h>
				40	#include <linux/dax.h>
				41	#include <linux/cleancache.h>
				42	#include <linux/uaccess.h>
				43
				44	#include <linux/kthread.h>
				45	#include <linux/freezer.h>
				46
				47	#include "ext4.h"
				48	#include "ext4_extents.h" /* Needed for trace points definition */
				49	#include "ext4_jbd2.h"
				50	#include "xattr.h"
				51	#include "acl.h"
				52	#include "mballoc.h"
				53	#include "fsmap.h"
				54
				55	#define CREATE_TRACE_POINTS
				56	#include <trace/events/ext4.h>
				57
				58	static struct ext4_lazy_init *ext4_li_info;
				59	static struct mutex ext4_li_mtx;
				60	static struct ratelimit_state ext4_mount_msg_ratelimit;
				61
				62	static int ext4_load_journal(struct super_block , struct ext4_super_block ,
				63	unsigned long journal_devnum);
				64	static int ext4_show_options(struct seq_file seq, struct dentry root);
				65	static int ext4_commit_super(struct super_block *sb, int sync);
				66	static int ext4_mark_recovery_complete(struct super_block *sb,
				67	struct ext4_super_block *es);
				68	static int ext4_clear_journal_err(struct super_block *sb,
				69	struct ext4_super_block *es);
				70	static int ext4_sync_fs(struct super_block *sb, int wait);
				71	static int ext4_remount(struct super_block sb, int flags, char *data);
				72	static int ext4_statfs(struct dentry dentry, struct kstatfs buf);
				73	static int ext4_unfreeze(struct super_block *sb);
				74	static int ext4_freeze(struct super_block *sb);
				75	static struct dentry ext4_mount(struct file_system_type fs_type, int flags,
				76	const char dev_name, void data);
				77	static inline int ext2_feature_set_ok(struct super_block *sb);
				78	static inline int ext3_feature_set_ok(struct super_block *sb);
				79	static int ext4_feature_set_ok(struct super_block *sb, int readonly);
				80	static void ext4_destroy_lazyinit_thread(void);
				81	static void ext4_unregister_li_request(struct super_block *sb);
				82	static void ext4_clear_request_list(void);
				83	static struct inode ext4_get_journal_inode(struct super_block sb,
				84	unsigned int journal_inum);
				85
				86	/*
				87	* Lock ordering
				88	*
				89	* Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
				90	* i_mmap_rwsem (inode->i_mmap_rwsem)!
				91	*
				92	* page fault path:
				93	* mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
				94	* page lock -> i_data_sem (rw)
				95	*
				96	* buffered write path:
				97	* sb_start_write -> i_mutex -> mmap_sem
				98	* sb_start_write -> i_mutex -> transaction start -> page lock ->
				99	* i_data_sem (rw)
				100	*
				101	* truncate:
				102	* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
				103	* i_mmap_rwsem (w) -> page lock
				104	* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
				105	* transaction start -> i_data_sem (rw)
				106	*
				107	* direct IO:
				108	* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
				109	* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
				110	* transaction start -> i_data_sem (rw)
				111	*
				112	* writepages:
				113	* transaction start -> page lock(s) -> i_data_sem (rw)
				114	*/
				115
				116	#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
				117	static struct file_system_type ext2_fs_type = {
				118	.owner = THIS_MODULE,
				119	.name = "ext2",
				120	.mount = ext4_mount,
				121	.kill_sb = kill_block_super,
				122	.fs_flags = FS_REQUIRES_DEV,
				123	};
				124	MODULE_ALIAS_FS("ext2");
				125	MODULE_ALIAS("ext2");
				126	#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
				127	#else
				128	#define IS_EXT2_SB(sb) (0)
				129	#endif
				130
				131
				132	static struct file_system_type ext3_fs_type = {
				133	.owner = THIS_MODULE,
				134	.name = "ext3",
				135	.mount = ext4_mount,
				136	.kill_sb = kill_block_super,
				137	.fs_flags = FS_REQUIRES_DEV,
				138	};
				139	MODULE_ALIAS_FS("ext3");
				140	MODULE_ALIAS("ext3");
				141	#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
				142
				143	/*
				144	* This works like sb_bread() except it uses ERR_PTR for error
				145	* returns. Currently with sb_bread it's impossible to distinguish
				146	* between ENOMEM and EIO situations (since both result in a NULL
				147	* return.
				148	*/
				149	struct buffer_head *
				150	ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags)
				151	{
				152	struct buffer_head *bh = sb_getblk(sb, block);
				153
				154	if (bh == NULL)
				155	return ERR_PTR(-ENOMEM);
				156	if (buffer_uptodate(bh))
				157	return bh;
				158	ll_rw_block(REQ_OP_READ, REQ_META \| op_flags, 1, &bh);
				159	wait_on_buffer(bh);
				160	if (buffer_uptodate(bh))
				161	return bh;
				162	put_bh(bh);
				163	return ERR_PTR(-EIO);
				164	}
				165
				166	static int ext4_verify_csum_type(struct super_block *sb,
				167	struct ext4_super_block *es)
				168	{
				169	if (!ext4_has_feature_metadata_csum(sb))
				170	return 1;
				171
				172	return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
				173	}
				174
				175	static __le32 ext4_superblock_csum(struct super_block *sb,
				176	struct ext4_super_block *es)
				177	{
				178	struct ext4_sb_info *sbi = EXT4_SB(sb);
				179	int offset = offsetof(struct ext4_super_block, s_checksum);
				180	__u32 csum;
				181
				182	csum = ext4_chksum(sbi, ~0, (char *)es, offset);
				183
				184	return cpu_to_le32(csum);
				185	}
				186
				187	static int ext4_superblock_csum_verify(struct super_block *sb,
				188	struct ext4_super_block *es)
				189	{
				190	if (!ext4_has_metadata_csum(sb))
				191	return 1;
				192
				193	return es->s_checksum == ext4_superblock_csum(sb, es);
				194	}
				195
				196	void ext4_superblock_csum_set(struct super_block *sb)
				197	{
				198	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				199
				200	if (!ext4_has_metadata_csum(sb))
				201	return;
				202
				203	es->s_checksum = ext4_superblock_csum(sb, es);
				204	}
				205
				206	void *ext4_kvmalloc(size_t size, gfp_t flags)
				207	{
				208	void *ret;
				209
				210	ret = kmalloc(size, flags \| __GFP_NOWARN);
				211	if (!ret)
				212	ret = __vmalloc(size, flags, PAGE_KERNEL);
				213	return ret;
				214	}
				215
				216	void *ext4_kvzalloc(size_t size, gfp_t flags)
				217	{
				218	void *ret;
				219
				220	ret = kzalloc(size, flags \| __GFP_NOWARN);
				221	if (!ret)
				222	ret = __vmalloc(size, flags \| __GFP_ZERO, PAGE_KERNEL);
				223	return ret;
				224	}
				225
				226	ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
				227	struct ext4_group_desc *bg)
				228	{
				229	return le32_to_cpu(bg->bg_block_bitmap_lo) \|
				230	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				231	(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
				232	}
				233
				234	ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
				235	struct ext4_group_desc *bg)
				236	{
				237	return le32_to_cpu(bg->bg_inode_bitmap_lo) \|
				238	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				239	(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
				240	}
				241
				242	ext4_fsblk_t ext4_inode_table(struct super_block *sb,
				243	struct ext4_group_desc *bg)
				244	{
				245	return le32_to_cpu(bg->bg_inode_table_lo) \|
				246	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				247	(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
				248	}
				249
				250	__u32 ext4_free_group_clusters(struct super_block *sb,
				251	struct ext4_group_desc *bg)
				252	{
				253	return le16_to_cpu(bg->bg_free_blocks_count_lo) \|
				254	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				255	(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
				256	}
				257
				258	__u32 ext4_free_inodes_count(struct super_block *sb,
				259	struct ext4_group_desc *bg)
				260	{
				261	return le16_to_cpu(bg->bg_free_inodes_count_lo) \|
				262	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				263	(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
				264	}
				265
				266	__u32 ext4_used_dirs_count(struct super_block *sb,
				267	struct ext4_group_desc *bg)
				268	{
				269	return le16_to_cpu(bg->bg_used_dirs_count_lo) \|
				270	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				271	(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
				272	}
				273
				274	__u32 ext4_itable_unused_count(struct super_block *sb,
				275	struct ext4_group_desc *bg)
				276	{
				277	return le16_to_cpu(bg->bg_itable_unused_lo) \|
				278	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				279	(__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
				280	}
				281
				282	void ext4_block_bitmap_set(struct super_block *sb,
				283	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				284	{
				285	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
				286	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				287	bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
				288	}
				289
				290	void ext4_inode_bitmap_set(struct super_block *sb,
				291	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				292	{
				293	bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
				294	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				295	bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
				296	}
				297
				298	void ext4_inode_table_set(struct super_block *sb,
				299	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				300	{
				301	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
				302	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				303	bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
				304	}
				305
				306	void ext4_free_group_clusters_set(struct super_block *sb,
				307	struct ext4_group_desc *bg, __u32 count)
				308	{
				309	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
				310	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				311	bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
				312	}
				313
				314	void ext4_free_inodes_set(struct super_block *sb,
				315	struct ext4_group_desc *bg, __u32 count)
				316	{
				317	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
				318	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				319	bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
				320	}
				321
				322	void ext4_used_dirs_set(struct super_block *sb,
				323	struct ext4_group_desc *bg, __u32 count)
				324	{
				325	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
				326	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				327	bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
				328	}
				329
				330	void ext4_itable_unused_set(struct super_block *sb,
				331	struct ext4_group_desc *bg, __u32 count)
				332	{
				333	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
				334	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				335	bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
				336	}
				337
				338
				339	static void __save_error_info(struct super_block sb, const char func,
				340	unsigned int line)
				341	{
				342	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				343
				344	EXT4_SB(sb)->s_mount_state \|= EXT4_ERROR_FS;
				345	if (bdev_read_only(sb->s_bdev))
				346	return;
				347	es->s_state \|= cpu_to_le16(EXT4_ERROR_FS);
				348	es->s_last_error_time = cpu_to_le32(get_seconds());
				349	strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
				350	es->s_last_error_line = cpu_to_le32(line);
				351	if (!es->s_first_error_time) {
				352	es->s_first_error_time = es->s_last_error_time;
				353	strncpy(es->s_first_error_func, func,
				354	sizeof(es->s_first_error_func));
				355	es->s_first_error_line = cpu_to_le32(line);
				356	es->s_first_error_ino = es->s_last_error_ino;
				357	es->s_first_error_block = es->s_last_error_block;
				358	}
				359	/*
				360	* Start the daily error reporting function if it hasn't been
				361	* started already
				362	*/
				363	if (!es->s_error_count)
				364	mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 246060*HZ);
				365	le32_add_cpu(&es->s_error_count, 1);
				366	}
				367
				368	static void save_error_info(struct super_block sb, const char func,
				369	unsigned int line)
				370	{
				371	__save_error_info(sb, func, line);
				372	if (!bdev_read_only(sb->s_bdev))
				373	ext4_commit_super(sb, 1);
				374	}
				375
				376	/*
				377	* The del_gendisk() function uninitializes the disk-specific data
				378	* structures, including the bdi structure, without telling anyone
				379	* else. Once this happens, any attempt to call mark_buffer_dirty()
				380	* (for example, by ext4_commit_super), will cause a kernel OOPS.
				381	* This is a kludge to prevent these oops until we can put in a proper
				382	* hook in del_gendisk() to inform the VFS and file system layers.
				383	*/
				384	static int block_device_ejected(struct super_block *sb)
				385	{
				386	struct inode *bd_inode = sb->s_bdev->bd_inode;
				387	struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
				388
				389	return bdi->dev == NULL;
				390	}
				391
				392	static void ext4_journal_commit_callback(journal_t journal, transaction_t txn)
				393	{
				394	struct super_block *sb = journal->j_private;
				395	struct ext4_sb_info *sbi = EXT4_SB(sb);
				396	int error = is_journal_aborted(journal);
				397	struct ext4_journal_cb_entry *jce;
				398
				399	BUG_ON(txn->t_state == T_FINISHED);
				400
				401	ext4_process_freed_data(sb, txn->t_tid);
				402
				403	spin_lock(&sbi->s_md_lock);
				404	while (!list_empty(&txn->t_private_list)) {
				405	jce = list_entry(txn->t_private_list.next,
				406	struct ext4_journal_cb_entry, jce_list);
				407	list_del_init(&jce->jce_list);
				408	spin_unlock(&sbi->s_md_lock);
				409	jce->jce_func(sb, jce, error);
				410	spin_lock(&sbi->s_md_lock);
				411	}
				412	spin_unlock(&sbi->s_md_lock);
				413	}
				414
				415	/* Deal with the reporting of failure conditions on a filesystem such as
				416	* inconsistencies detected or read IO failures.
				417	*
				418	* On ext2, we can store the error state of the filesystem in the
				419	* superblock. That is not possible on ext4, because we may have other
				420	* write ordering constraints on the superblock which prevent us from
				421	* writing it out straight away; and given that the journal is about to
				422	* be aborted, we can't rely on the current, or future, transactions to
				423	* write out the superblock safely.
				424	*
				425	* We'll just use the jbd2_journal_abort() error code to record an error in
				426	* the journal instead. On recovery, the journal will complain about
				427	* that error until we've noted it down and cleared it.
				428	*/
				429
				430	static void ext4_handle_error(struct super_block *sb)
				431	{
				432	if (sb_rdonly(sb))
				433	return;
				434
				435	if (!test_opt(sb, ERRORS_CONT)) {
				436	journal_t *journal = EXT4_SB(sb)->s_journal;
				437
				438	EXT4_SB(sb)->s_mount_flags \|= EXT4_MF_FS_ABORTED;
				439	if (journal)
				440	jbd2_journal_abort(journal, -EIO);
				441	}
				442	if (test_opt(sb, ERRORS_RO)) {
				443	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
				444	/*
				445	* Make sure updated value of ->s_mount_flags will be visible
				446	* before ->s_flags update
				447	*/
				448	smp_wmb();
				449	sb->s_flags \|= MS_RDONLY;
				450	}
				451	if (test_opt(sb, ERRORS_PANIC)) {
				452	if (EXT4_SB(sb)->s_journal &&
				453	!(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
				454	return;
				455	panic("EXT4-fs (device %s): panic forced after error\n",
				456	sb->s_id);
				457	}
				458	}
				459
				460	#define ext4_error_ratelimit(sb) \
				461	___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
				462	"EXT4-fs error")
				463
				464	void __ext4_error(struct super_block sb, const char function,
				465	unsigned int line, const char *fmt, ...)
				466	{
				467	struct va_format vaf;
				468	va_list args;
				469
				470	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				471	return;
				472
				473	if (ext4_error_ratelimit(sb)) {
				474	va_start(args, fmt);
				475	vaf.fmt = fmt;
				476	vaf.va = &args;
				477	printk(KERN_CRIT
				478	"EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
				479	sb->s_id, function, line, current->comm, &vaf);
				480	va_end(args);
				481	}
				482	save_error_info(sb, function, line);
				483	ext4_handle_error(sb);
				484	}
				485
				486	void __ext4_error_inode(struct inode inode, const char function,
				487	unsigned int line, ext4_fsblk_t block,
				488	const char *fmt, ...)
				489	{
				490	va_list args;
				491	struct va_format vaf;
				492	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
				493
				494	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
				495	return;
				496
				497	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
				498	es->s_last_error_block = cpu_to_le64(block);
				499	if (ext4_error_ratelimit(inode->i_sb)) {
				500	va_start(args, fmt);
				501	vaf.fmt = fmt;
				502	vaf.va = &args;
				503	if (block)
				504	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
				505	"inode #%lu: block %llu: comm %s: %pV\n",
				506	inode->i_sb->s_id, function, line, inode->i_ino,
				507	block, current->comm, &vaf);
				508	else
				509	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
				510	"inode #%lu: comm %s: %pV\n",
				511	inode->i_sb->s_id, function, line, inode->i_ino,
				512	current->comm, &vaf);
				513	va_end(args);
				514	}
				515	save_error_info(inode->i_sb, function, line);
				516	ext4_handle_error(inode->i_sb);
				517	}
				518
				519	void __ext4_error_file(struct file file, const char function,
				520	unsigned int line, ext4_fsblk_t block,
				521	const char *fmt, ...)
				522	{
				523	va_list args;
				524	struct va_format vaf;
				525	struct ext4_super_block *es;
				526	struct inode *inode = file_inode(file);
				527	char pathname[80], *path;
				528
				529	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
				530	return;
				531
				532	es = EXT4_SB(inode->i_sb)->s_es;
				533	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
				534	if (ext4_error_ratelimit(inode->i_sb)) {
				535	path = file_path(file, pathname, sizeof(pathname));
				536	if (IS_ERR(path))
				537	path = "(unknown)";
				538	va_start(args, fmt);
				539	vaf.fmt = fmt;
				540	vaf.va = &args;
				541	if (block)
				542	printk(KERN_CRIT
				543	"EXT4-fs error (device %s): %s:%d: inode #%lu: "
				544	"block %llu: comm %s: path %s: %pV\n",
				545	inode->i_sb->s_id, function, line, inode->i_ino,
				546	block, current->comm, path, &vaf);
				547	else
				548	printk(KERN_CRIT
				549	"EXT4-fs error (device %s): %s:%d: inode #%lu: "
				550	"comm %s: path %s: %pV\n",
				551	inode->i_sb->s_id, function, line, inode->i_ino,
				552	current->comm, path, &vaf);
				553	va_end(args);
				554	}
				555	save_error_info(inode->i_sb, function, line);
				556	ext4_handle_error(inode->i_sb);
				557	}
				558
				559	const char ext4_decode_error(struct super_block sb, int errno,
				560	char nbuf[16])
				561	{
				562	char *errstr = NULL;
				563
				564	switch (errno) {
				565	case -EFSCORRUPTED:
				566	errstr = "Corrupt filesystem";
				567	break;
				568	case -EFSBADCRC:
				569	errstr = "Filesystem failed CRC";
				570	break;
				571	case -EIO:
				572	errstr = "IO failure";
				573	break;
				574	case -ENOMEM:
				575	errstr = "Out of memory";
				576	break;
				577	case -EROFS:
				578	if (!sb \|\| (EXT4_SB(sb)->s_journal &&
				579	EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
				580	errstr = "Journal has aborted";
				581	else
				582	errstr = "Readonly filesystem";
				583	break;
				584	default:
				585	/* If the caller passed in an extra buffer for unknown
				586	* errors, textualise them now. Else we just return
				587	* NULL. */
				588	if (nbuf) {
				589	/* Check for truncated error codes... */
				590	if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				591	errstr = nbuf;
				592	}
				593	break;
				594	}
				595
				596	return errstr;
				597	}
				598
				599	/* __ext4_std_error decodes expected errors from journaling functions
				600	* automatically and invokes the appropriate error response. */
				601
				602	void __ext4_std_error(struct super_block sb, const char function,
				603	unsigned int line, int errno)
				604	{
				605	char nbuf[16];
				606	const char *errstr;
				607
				608	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				609	return;
				610
				611	/* Special case: if the error is EROFS, and we're not already
				612	* inside a transaction, then there's really no point in logging
				613	* an error. */
				614	if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
				615	return;
				616
				617	if (ext4_error_ratelimit(sb)) {
				618	errstr = ext4_decode_error(sb, errno, nbuf);
				619	printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
				620	sb->s_id, function, line, errstr);
				621	}
				622
				623	save_error_info(sb, function, line);
				624	ext4_handle_error(sb);
				625	}
				626
				627	/*
				628	* ext4_abort is a much stronger failure handler than ext4_error. The
				629	* abort function may be used to deal with unrecoverable failures such
				630	* as journal IO errors or ENOMEM at a critical moment in log management.
				631	*
				632	* We unconditionally force the filesystem into an ABORT\|READONLY state,
				633	* unless the error response on the fs has been set to panic in which
				634	* case we take the easy way out and panic immediately.
				635	*/
				636
				637	void __ext4_abort(struct super_block sb, const char function,
				638	unsigned int line, const char *fmt, ...)
				639	{
				640	struct va_format vaf;
				641	va_list args;
				642
				643	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				644	return;
				645
				646	save_error_info(sb, function, line);
				647	va_start(args, fmt);
				648	vaf.fmt = fmt;
				649	vaf.va = &args;
				650	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
				651	sb->s_id, function, line, &vaf);
				652	va_end(args);
				653
				654	if (sb_rdonly(sb) == 0) {
				655	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
				656	EXT4_SB(sb)->s_mount_flags \|= EXT4_MF_FS_ABORTED;
				657	/*
				658	* Make sure updated value of ->s_mount_flags will be visible
				659	* before ->s_flags update
				660	*/
				661	smp_wmb();
				662	sb->s_flags \|= MS_RDONLY;
				663	if (EXT4_SB(sb)->s_journal)
				664	jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
				665	save_error_info(sb, function, line);
				666	}
				667	if (test_opt(sb, ERRORS_PANIC)) {
				668	if (EXT4_SB(sb)->s_journal &&
				669	!(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
				670	return;
				671	panic("EXT4-fs panic from previous error\n");
				672	}
				673	}
				674
				675	void __ext4_msg(struct super_block *sb,
				676	const char prefix, const char fmt, ...)
				677	{
				678	struct va_format vaf;
				679	va_list args;
				680
				681	if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
				682	return;
				683
				684	va_start(args, fmt);
				685	vaf.fmt = fmt;
				686	vaf.va = &args;
				687	printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
				688	va_end(args);
				689	}
				690
				691	#define ext4_warning_ratelimit(sb) \
				692	___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
				693	"EXT4-fs warning")
				694
				695	void __ext4_warning(struct super_block sb, const char function,
				696	unsigned int line, const char *fmt, ...)
				697	{
				698	struct va_format vaf;
				699	va_list args;
				700
				701	if (!ext4_warning_ratelimit(sb))
				702	return;
				703
				704	va_start(args, fmt);
				705	vaf.fmt = fmt;
				706	vaf.va = &args;
				707	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
				708	sb->s_id, function, line, &vaf);
				709	va_end(args);
				710	}
				711
				712	void __ext4_warning_inode(const struct inode inode, const char function,
				713	unsigned int line, const char *fmt, ...)
				714	{
				715	struct va_format vaf;
				716	va_list args;
				717
				718	if (!ext4_warning_ratelimit(inode->i_sb))
				719	return;
				720
				721	va_start(args, fmt);
				722	vaf.fmt = fmt;
				723	vaf.va = &args;
				724	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
				725	"inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
				726	function, line, inode->i_ino, current->comm, &vaf);
				727	va_end(args);
				728	}
				729
				730	void __ext4_grp_locked_error(const char *function, unsigned int line,
				731	struct super_block *sb, ext4_group_t grp,
				732	unsigned long ino, ext4_fsblk_t block,
				733	const char *fmt, ...)
				734	__releases(bitlock)
				735	__acquires(bitlock)
				736	{
				737	struct va_format vaf;
				738	va_list args;
				739	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				740
				741	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				742	return;
				743
				744	es->s_last_error_ino = cpu_to_le32(ino);
				745	es->s_last_error_block = cpu_to_le64(block);
				746	__save_error_info(sb, function, line);
				747
				748	if (ext4_error_ratelimit(sb)) {
				749	va_start(args, fmt);
				750	vaf.fmt = fmt;
				751	vaf.va = &args;
				752	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
				753	sb->s_id, function, line, grp);
				754	if (ino)
				755	printk(KERN_CONT "inode %lu: ", ino);
				756	if (block)
				757	printk(KERN_CONT "block %llu:",
				758	(unsigned long long) block);
				759	printk(KERN_CONT "%pV\n", &vaf);
				760	va_end(args);
				761	}
				762
				763	if (test_opt(sb, ERRORS_CONT)) {
				764	ext4_commit_super(sb, 0);
				765	return;
				766	}
				767
				768	ext4_unlock_group(sb, grp);
				769	ext4_commit_super(sb, 1);
				770	ext4_handle_error(sb);
				771	/*
				772	* We only get here in the ERRORS_RO case; relocking the group
				773	* may be dangerous, but nothing bad will happen since the
				774	* filesystem will have already been marked read/only and the
				775	* journal has been aborted. We return 1 as a hint to callers
				776	* who might what to use the return value from
				777	* ext4_grp_locked_error() to distinguish between the
				778	* ERRORS_CONT and ERRORS_RO case, and perhaps return more
				779	* aggressively from the ext4 function in question, with a
				780	* more appropriate error code.
				781	*/
				782	ext4_lock_group(sb, grp);
				783	return;
				784	}
				785
				786	void ext4_update_dynamic_rev(struct super_block *sb)
				787	{
				788	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				789
				790	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
				791	return;
				792
				793	ext4_warning(sb,
				794	"updating to rev %d because of new feature flag, "
				795	"running e2fsck is recommended",
				796	EXT4_DYNAMIC_REV);
				797
				798	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
				799	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
				800	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
				801	/* leave es->s_feature_compat flags alone /
				802	/* es->s_uuid will be set by e2fsck if empty */
				803
				804	/*
				805	* The rest of the superblock fields should be zero, and if not it
				806	* means they are likely already in use, so leave them alone. We
				807	* can leave it up to e2fsck to clean up any inconsistencies there.
				808	*/
				809	}
				810
				811	/*
				812	* Open the external journal device
				813	*/
				814	static struct block_device ext4_blkdev_get(dev_t dev, struct super_block sb)
				815	{
				816	struct block_device *bdev;
				817	char b[BDEVNAME_SIZE];
				818
				819	bdev = blkdev_get_by_dev(dev, FMODE_READ\|FMODE_WRITE\|FMODE_EXCL, sb);
				820	if (IS_ERR(bdev))
				821	goto fail;
				822	return bdev;
				823
				824	fail:
				825	ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
				826	__bdevname(dev, b), PTR_ERR(bdev));
				827	return NULL;
				828	}
				829
				830	/*
				831	* Release the journal device
				832	*/
				833	static void ext4_blkdev_put(struct block_device *bdev)
				834	{
				835	blkdev_put(bdev, FMODE_READ\|FMODE_WRITE\|FMODE_EXCL);
				836	}
				837
				838	static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
				839	{
				840	struct block_device *bdev;
				841	bdev = sbi->journal_bdev;
				842	if (bdev) {
				843	ext4_blkdev_put(bdev);
				844	sbi->journal_bdev = NULL;
				845	}
				846	}
				847
				848	static inline struct inode orphan_list_entry(struct list_head l)
				849	{
				850	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
				851	}
				852
				853	static void dump_orphan_list(struct super_block sb, struct ext4_sb_info sbi)
				854	{
				855	struct list_head *l;
				856
				857	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
				858	le32_to_cpu(sbi->s_es->s_last_orphan));
				859
				860	printk(KERN_ERR "sb_info orphan list:\n");
				861	list_for_each(l, &sbi->s_orphan) {
				862	struct inode *inode = orphan_list_entry(l);
				863	printk(KERN_ERR " "
				864	"inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
				865	inode->i_sb->s_id, inode->i_ino, inode,
				866	inode->i_mode, inode->i_nlink,
				867	NEXT_ORPHAN(inode));
				868	}
				869	}
				870
				871	#ifdef CONFIG_QUOTA
				872	static int ext4_quota_off(struct super_block *sb, int type);
				873
				874	static inline void ext4_quota_off_umount(struct super_block *sb)
				875	{
				876	int type;
				877
				878	/* Use our quota_off function to clear inode flags etc. */
				879	for (type = 0; type < EXT4_MAXQUOTAS; type++)
				880	ext4_quota_off(sb, type);
				881	}
				882
				883	/*
				884	* This is a helper function which is used in the mount/remount
				885	* codepaths (which holds s_umount) to fetch the quota file name.
				886	*/
				887	static inline char get_qf_name(struct super_block sb,
				888	struct ext4_sb_info *sbi,
				889	int type)
				890	{
				891	return rcu_dereference_protected(sbi->s_qf_names[type],
				892	lockdep_is_held(&sb->s_umount));
				893	}
				894	#else
				895	static inline void ext4_quota_off_umount(struct super_block *sb)
				896	{
				897	}
				898	#endif
				899
				900	static void ext4_put_super(struct super_block *sb)
				901	{
				902	struct ext4_sb_info *sbi = EXT4_SB(sb);
				903	struct ext4_super_block *es = sbi->s_es;
				904	struct buffer_head **group_desc;
				905	struct flex_groups **flex_groups;
				906	int aborted = 0;
				907	int i, err;
				908
				909	ext4_unregister_li_request(sb);
				910	ext4_quota_off_umount(sb);
				911
				912	flush_workqueue(sbi->rsv_conversion_wq);
				913	destroy_workqueue(sbi->rsv_conversion_wq);
				914
				915	if (sbi->s_journal) {
				916	aborted = is_journal_aborted(sbi->s_journal);
				917	err = jbd2_journal_destroy(sbi->s_journal);
				918	sbi->s_journal = NULL;
				919	if ((err < 0) && !aborted)
				920	ext4_abort(sb, "Couldn't clean up the journal");
				921	}
				922
				923	ext4_unregister_sysfs(sb);
				924	ext4_es_unregister_shrinker(sbi);
				925	del_timer_sync(&sbi->s_err_report);
				926	ext4_release_system_zone(sb);
				927	ext4_mb_release(sb);
				928	ext4_ext_release(sb);
				929
				930	if (!sb_rdonly(sb) && !aborted) {
				931	ext4_clear_feature_journal_needs_recovery(sb);
				932	es->s_state = cpu_to_le16(sbi->s_mount_state);
				933	}
				934	if (!sb_rdonly(sb))
				935	ext4_commit_super(sb, 1);
				936
				937	rcu_read_lock();
				938	group_desc = rcu_dereference(sbi->s_group_desc);
				939	for (i = 0; i < sbi->s_gdb_count; i++)
				940	brelse(group_desc[i]);
				941	kvfree(group_desc);
				942	flex_groups = rcu_dereference(sbi->s_flex_groups);
				943	if (flex_groups) {
				944	for (i = 0; i < sbi->s_flex_groups_allocated; i++)
				945	kvfree(flex_groups[i]);
				946	kvfree(flex_groups);
				947	}
				948	rcu_read_unlock();
				949	percpu_counter_destroy(&sbi->s_freeclusters_counter);
				950	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				951	percpu_counter_destroy(&sbi->s_dirs_counter);
				952	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
				953	percpu_free_rwsem(&sbi->s_writepages_rwsem);
				954	#ifdef CONFIG_QUOTA
				955	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				956	kfree(get_qf_name(sb, sbi, i));
				957	#endif
				958
				959	/* Debugging code just in case the in-memory inode orphan list
				960	* isn't empty. The on-disk one can be non-empty if we've
				961	* detected an error and taken the fs readonly, but the
				962	* in-memory list had better be clean by this point. */
				963	if (!list_empty(&sbi->s_orphan))
				964	dump_orphan_list(sb, sbi);
				965	J_ASSERT(list_empty(&sbi->s_orphan));
				966
				967	sync_blockdev(sb->s_bdev);
				968	invalidate_bdev(sb->s_bdev);
				969	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
				970	/*
				971	* Invalidate the journal device's buffers. We don't want them
				972	* floating about in memory - the physical journal device may
				973	* hotswapped, and it breaks the `ro-after' testing code.
				974	*/
				975	sync_blockdev(sbi->journal_bdev);
				976	invalidate_bdev(sbi->journal_bdev);
				977	ext4_blkdev_remove(sbi);
				978	}
				979	if (sbi->s_ea_inode_cache) {
				980	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
				981	sbi->s_ea_inode_cache = NULL;
				982	}
				983	if (sbi->s_ea_block_cache) {
				984	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
				985	sbi->s_ea_block_cache = NULL;
				986	}
				987	if (sbi->s_mmp_tsk)
				988	kthread_stop(sbi->s_mmp_tsk);
				989	brelse(sbi->s_sbh);
				990	sb->s_fs_info = NULL;
				991	/*
				992	* Now that we are completely done shutting down the
				993	* superblock, we need to actually destroy the kobject.
				994	*/
				995	kobject_put(&sbi->s_kobj);
				996	wait_for_completion(&sbi->s_kobj_unregister);
				997	if (sbi->s_chksum_driver)
				998	crypto_free_shash(sbi->s_chksum_driver);
				999	kfree(sbi->s_blockgroup_lock);
				1000	fs_put_dax(sbi->s_daxdev);
				1001	kfree(sbi);
				1002	}
				1003
				1004	static struct kmem_cache *ext4_inode_cachep;
				1005
				1006	/*
				1007	* Called inside transaction, so use GFP_NOFS
				1008	*/
				1009	static struct inode ext4_alloc_inode(struct super_block sb)
				1010	{
				1011	struct ext4_inode_info *ei;
				1012
				1013	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
				1014	if (!ei)
				1015	return NULL;
				1016
				1017	ei->vfs_inode.i_version = 1;
				1018	spin_lock_init(&ei->i_raw_lock);
				1019	INIT_LIST_HEAD(&ei->i_prealloc_list);
				1020	spin_lock_init(&ei->i_prealloc_lock);
				1021	ext4_es_init_tree(&ei->i_es_tree);
				1022	rwlock_init(&ei->i_es_lock);
				1023	INIT_LIST_HEAD(&ei->i_es_list);
				1024	ei->i_es_all_nr = 0;
				1025	ei->i_es_shk_nr = 0;
				1026	ei->i_es_shrink_lblk = 0;
				1027	ei->i_reserved_data_blocks = 0;
				1028	ei->i_da_metadata_calc_len = 0;
				1029	ei->i_da_metadata_calc_last_lblock = 0;
				1030	spin_lock_init(&(ei->i_block_reservation_lock));
				1031	#ifdef CONFIG_QUOTA
				1032	ei->i_reserved_quota = 0;
				1033	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
				1034	#endif
				1035	ei->jinode = NULL;
				1036	INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
				1037	spin_lock_init(&ei->i_completed_io_lock);
				1038	ei->i_sync_tid = 0;
				1039	ei->i_datasync_tid = 0;
				1040	atomic_set(&ei->i_unwritten, 0);
				1041	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
				1042	return &ei->vfs_inode;
				1043	}
				1044
				1045	static int ext4_drop_inode(struct inode *inode)
				1046	{
				1047	int drop = generic_drop_inode(inode);
				1048
				1049	trace_ext4_drop_inode(inode, drop);
				1050	return drop;
				1051	}
				1052
				1053	static void ext4_i_callback(struct rcu_head *head)
				1054	{
				1055	struct inode *inode = container_of(head, struct inode, i_rcu);
				1056	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
				1057	}
				1058
				1059	static void ext4_destroy_inode(struct inode *inode)
				1060	{
				1061	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
				1062	ext4_msg(inode->i_sb, KERN_ERR,
				1063	"Inode %lu (%p): orphan list check failed!",
				1064	inode->i_ino, EXT4_I(inode));
				1065	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
				1066	EXT4_I(inode), sizeof(struct ext4_inode_info),
				1067	true);
				1068	dump_stack();
				1069	}
				1070	call_rcu(&inode->i_rcu, ext4_i_callback);
				1071	}
				1072
				1073	static void init_once(void *foo)
				1074	{
				1075	struct ext4_inode_info ei = (struct ext4_inode_info ) foo;
				1076
				1077	INIT_LIST_HEAD(&ei->i_orphan);
				1078	init_rwsem(&ei->xattr_sem);
				1079	init_rwsem(&ei->i_data_sem);
				1080	init_rwsem(&ei->i_mmap_sem);
				1081	inode_init_once(&ei->vfs_inode);
				1082	}
				1083
				1084	static int __init init_inodecache(void)
				1085	{
				1086	ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
				1087	sizeof(struct ext4_inode_info),
				1088	0, (SLAB_RECLAIM_ACCOUNT\|
				1089	SLAB_MEM_SPREAD\|SLAB_ACCOUNT),
				1090	init_once);
				1091	if (ext4_inode_cachep == NULL)
				1092	return -ENOMEM;
				1093	return 0;
				1094	}
				1095
				1096	static void destroy_inodecache(void)
				1097	{
				1098	/*
				1099	* Make sure all delayed rcu free inodes are flushed before we
				1100	* destroy cache.
				1101	*/
				1102	rcu_barrier();
				1103	kmem_cache_destroy(ext4_inode_cachep);
				1104	}
				1105
				1106	void ext4_clear_inode(struct inode *inode)
				1107	{
				1108	invalidate_inode_buffers(inode);
				1109	clear_inode(inode);
				1110	dquot_drop(inode);
				1111	ext4_discard_preallocations(inode);
				1112	ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
				1113	if (EXT4_I(inode)->jinode) {
				1114	jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
				1115	EXT4_I(inode)->jinode);
				1116	jbd2_free_inode(EXT4_I(inode)->jinode);
				1117	EXT4_I(inode)->jinode = NULL;
				1118	}
				1119	fscrypt_put_encryption_info(inode);
				1120	}
				1121
				1122	static struct inode ext4_nfs_get_inode(struct super_block sb,
				1123	u64 ino, u32 generation)
				1124	{
				1125	struct inode *inode;
				1126
				1127	/*
				1128	* Currently we don't know the generation for parent directory, so
				1129	* a generation of 0 means "accept any"
				1130	*/
				1131	inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
				1132	if (IS_ERR(inode))
				1133	return ERR_CAST(inode);
				1134	if (generation && inode->i_generation != generation) {
				1135	iput(inode);
				1136	return ERR_PTR(-ESTALE);
				1137	}
				1138
				1139	return inode;
				1140	}
				1141
				1142	static struct dentry ext4_fh_to_dentry(struct super_block sb, struct fid *fid,
				1143	int fh_len, int fh_type)
				1144	{
				1145	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
				1146	ext4_nfs_get_inode);
				1147	}
				1148
				1149	static struct dentry ext4_fh_to_parent(struct super_block sb, struct fid *fid,
				1150	int fh_len, int fh_type)
				1151	{
				1152	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
				1153	ext4_nfs_get_inode);
				1154	}
				1155
				1156	static int ext4_nfs_commit_metadata(struct inode *inode)
				1157	{
				1158	struct writeback_control wbc = {
				1159	.sync_mode = WB_SYNC_ALL
				1160	};
				1161
				1162	trace_ext4_nfs_commit_metadata(inode);
				1163	return ext4_write_inode(inode, &wbc);
				1164	}
				1165
				1166	/*
				1167	* Try to release metadata pages (indirect blocks, directories) which are
				1168	* mapped via the block device. Since these pages could have journal heads
				1169	* which would prevent try_to_free_buffers() from freeing them, we must use
				1170	* jbd2 layer's try_to_free_buffers() function to release them.
				1171	*/
				1172	static int bdev_try_to_free_page(struct super_block sb, struct page page,
				1173	gfp_t wait)
				1174	{
				1175	journal_t *journal = EXT4_SB(sb)->s_journal;
				1176
				1177	WARN_ON(PageChecked(page));
				1178	if (!page_has_buffers(page))
				1179	return 0;
				1180	if (journal)
				1181	return jbd2_journal_try_to_free_buffers(journal, page,
				1182	wait & ~__GFP_DIRECT_RECLAIM);
				1183	return try_to_free_buffers(page);
				1184	}
				1185
				1186	#ifdef CONFIG_EXT4_FS_ENCRYPTION
				1187	static int ext4_get_context(struct inode inode, void ctx, size_t len)
				1188	{
				1189	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
				1190	EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
				1191	}
				1192
				1193	static int ext4_set_context(struct inode inode, const void ctx, size_t len,
				1194	void *fs_data)
				1195	{
				1196	handle_t *handle = fs_data;
				1197	int res, res2, credits, retries = 0;
				1198
				1199	/*
				1200	* Encrypting the root directory is not allowed because e2fsck expects
				1201	* lost+found to exist and be unencrypted, and encrypting the root
				1202	* directory would imply encrypting the lost+found directory as well as
				1203	* the filename "lost+found" itself.
				1204	*/
				1205	if (inode->i_ino == EXT4_ROOT_INO)
				1206	return -EPERM;
				1207
				1208	res = ext4_convert_inline_data(inode);
				1209	if (res)
				1210	return res;
				1211
				1212	/*
				1213	* If a journal handle was specified, then the encryption context is
				1214	* being set on a new inode via inheritance and is part of a larger
				1215	* transaction to create the inode. Otherwise the encryption context is
				1216	* being set on an existing inode in its own transaction. Only in the
				1217	* latter case should the "retry on ENOSPC" logic be used.
				1218	*/
				1219
				1220	if (handle) {
				1221	res = ext4_xattr_set_handle(handle, inode,
				1222	EXT4_XATTR_INDEX_ENCRYPTION,
				1223	EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
				1224	ctx, len, 0);
				1225	if (!res) {
				1226	ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
				1227	ext4_clear_inode_state(inode,
				1228	EXT4_STATE_MAY_INLINE_DATA);
				1229	/*
				1230	* Update inode->i_flags - S_ENCRYPTED will be enabled,
				1231	* S_DAX may be disabled
				1232	*/
				1233	ext4_set_inode_flags(inode);
				1234	}
				1235	return res;
				1236	}
				1237
				1238	res = dquot_initialize(inode);
				1239	if (res)
				1240	return res;
				1241	retry:
				1242	res = ext4_xattr_set_credits(inode, len, false /* is_create */,
				1243	&credits);
				1244	if (res)
				1245	return res;
				1246
				1247	handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
				1248	if (IS_ERR(handle))
				1249	return PTR_ERR(handle);
				1250
				1251	res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
				1252	EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
				1253	ctx, len, 0);
				1254	if (!res) {
				1255	ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
				1256	/*
				1257	* Update inode->i_flags - S_ENCRYPTED will be enabled,
				1258	* S_DAX may be disabled
				1259	*/
				1260	ext4_set_inode_flags(inode);
				1261	res = ext4_mark_inode_dirty(handle, inode);
				1262	if (res)
				1263	EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
				1264	}
				1265	res2 = ext4_journal_stop(handle);
				1266
				1267	if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
				1268	goto retry;
				1269	if (!res)
				1270	res = res2;
				1271	return res;
				1272	}
				1273
				1274	static bool ext4_dummy_context(struct inode *inode)
				1275	{
				1276	return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
				1277	}
				1278
				1279	static unsigned ext4_max_namelen(struct inode *inode)
				1280	{
				1281	return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
				1282	EXT4_NAME_LEN;
				1283	}
				1284
				1285	static const struct fscrypt_operations ext4_cryptops = {
				1286	.key_prefix = "ext4:",
				1287	.get_context = ext4_get_context,
				1288	.set_context = ext4_set_context,
				1289	.dummy_context = ext4_dummy_context,
				1290	.empty_dir = ext4_empty_dir,
				1291	.max_namelen = ext4_max_namelen,
				1292	};
				1293	#endif
				1294
				1295	#ifdef CONFIG_QUOTA
				1296	static const char * const quotatypes[] = INITQFNAMES;
				1297	#define QTYPE2NAME(t) (quotatypes[t])
				1298
				1299	static int ext4_write_dquot(struct dquot *dquot);
				1300	static int ext4_acquire_dquot(struct dquot *dquot);
				1301	static int ext4_release_dquot(struct dquot *dquot);
				1302	static int ext4_mark_dquot_dirty(struct dquot *dquot);
				1303	static int ext4_write_info(struct super_block *sb, int type);
				1304	static int ext4_quota_on(struct super_block *sb, int type, int format_id,
				1305	const struct path *path);
				1306	static int ext4_quota_on_mount(struct super_block *sb, int type);
				1307	static ssize_t ext4_quota_read(struct super_block sb, int type, char data,
				1308	size_t len, loff_t off);
				1309	static ssize_t ext4_quota_write(struct super_block *sb, int type,
				1310	const char *data, size_t len, loff_t off);
				1311	static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
				1312	unsigned int flags);
				1313	static int ext4_enable_quotas(struct super_block *sb);
				1314	static int ext4_get_next_id(struct super_block sb, struct kqid qid);
				1315
				1316	static struct dquot *ext4_get_dquots(struct inode inode)
				1317	{
				1318	return EXT4_I(inode)->i_dquot;
				1319	}
				1320
				1321	static const struct dquot_operations ext4_quota_operations = {
				1322	.get_reserved_space = ext4_get_reserved_space,
				1323	.write_dquot = ext4_write_dquot,
				1324	.acquire_dquot = ext4_acquire_dquot,
				1325	.release_dquot = ext4_release_dquot,
				1326	.mark_dirty = ext4_mark_dquot_dirty,
				1327	.write_info = ext4_write_info,
				1328	.alloc_dquot = dquot_alloc,
				1329	.destroy_dquot = dquot_destroy,
				1330	.get_projid = ext4_get_projid,
				1331	.get_inode_usage = ext4_get_inode_usage,
				1332	.get_next_id = ext4_get_next_id,
				1333	};
				1334
				1335	static const struct quotactl_ops ext4_qctl_operations = {
				1336	.quota_on = ext4_quota_on,
				1337	.quota_off = ext4_quota_off,
				1338	.quota_sync = dquot_quota_sync,
				1339	.get_state = dquot_get_state,
				1340	.set_info = dquot_set_dqinfo,
				1341	.get_dqblk = dquot_get_dqblk,
				1342	.set_dqblk = dquot_set_dqblk,
				1343	.get_nextdqblk = dquot_get_next_dqblk,
				1344	};
				1345	#endif
				1346
				1347	static const struct super_operations ext4_sops = {
				1348	.alloc_inode = ext4_alloc_inode,
				1349	.destroy_inode = ext4_destroy_inode,
				1350	.write_inode = ext4_write_inode,
				1351	.dirty_inode = ext4_dirty_inode,
				1352	.drop_inode = ext4_drop_inode,
				1353	.evict_inode = ext4_evict_inode,
				1354	.put_super = ext4_put_super,
				1355	.sync_fs = ext4_sync_fs,
				1356	.freeze_fs = ext4_freeze,
				1357	.unfreeze_fs = ext4_unfreeze,
				1358	.statfs = ext4_statfs,
				1359	.remount_fs = ext4_remount,
				1360	.show_options = ext4_show_options,
				1361	#ifdef CONFIG_QUOTA
				1362	.quota_read = ext4_quota_read,
				1363	.quota_write = ext4_quota_write,
				1364	.get_dquots = ext4_get_dquots,
				1365	#endif
				1366	.bdev_try_to_free_page = bdev_try_to_free_page,
				1367	};
				1368
				1369	static const struct export_operations ext4_export_ops = {
				1370	.fh_to_dentry = ext4_fh_to_dentry,
				1371	.fh_to_parent = ext4_fh_to_parent,
				1372	.get_parent = ext4_get_parent,
				1373	.commit_metadata = ext4_nfs_commit_metadata,
				1374	};
				1375
				1376	enum {
				1377	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
				1378	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
				1379	Opt_nouid32, Opt_debug, Opt_removed,
				1380	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
				1381	Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
				1382	Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
				1383	Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
				1384	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
				1385	Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
				1386	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
				1387	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
				1388	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
				1389	Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
				1390	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
				1391	Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
				1392	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
				1393	Opt_inode_readahead_blks, Opt_journal_ioprio,
				1394	Opt_dioread_nolock, Opt_dioread_lock,
				1395	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
				1396	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
				1397	};
				1398
				1399	static const match_table_t tokens = {
				1400	{Opt_bsd_df, "bsddf"},
				1401	{Opt_minix_df, "minixdf"},
				1402	{Opt_grpid, "grpid"},
				1403	{Opt_grpid, "bsdgroups"},
				1404	{Opt_nogrpid, "nogrpid"},
				1405	{Opt_nogrpid, "sysvgroups"},
				1406	{Opt_resgid, "resgid=%u"},
				1407	{Opt_resuid, "resuid=%u"},
				1408	{Opt_sb, "sb=%u"},
				1409	{Opt_err_cont, "errors=continue"},
				1410	{Opt_err_panic, "errors=panic"},
				1411	{Opt_err_ro, "errors=remount-ro"},
				1412	{Opt_nouid32, "nouid32"},
				1413	{Opt_debug, "debug"},
				1414	{Opt_removed, "oldalloc"},
				1415	{Opt_removed, "orlov"},
				1416	{Opt_user_xattr, "user_xattr"},
				1417	{Opt_nouser_xattr, "nouser_xattr"},
				1418	{Opt_acl, "acl"},
				1419	{Opt_noacl, "noacl"},
				1420	{Opt_noload, "norecovery"},
				1421	{Opt_noload, "noload"},
				1422	{Opt_removed, "nobh"},
				1423	{Opt_removed, "bh"},
				1424	{Opt_commit, "commit=%u"},
				1425	{Opt_min_batch_time, "min_batch_time=%u"},
				1426	{Opt_max_batch_time, "max_batch_time=%u"},
				1427	{Opt_journal_dev, "journal_dev=%u"},
				1428	{Opt_journal_path, "journal_path=%s"},
				1429	{Opt_journal_checksum, "journal_checksum"},
				1430	{Opt_nojournal_checksum, "nojournal_checksum"},
				1431	{Opt_journal_async_commit, "journal_async_commit"},
				1432	{Opt_abort, "abort"},
				1433	{Opt_data_journal, "data=journal"},
				1434	{Opt_data_ordered, "data=ordered"},
				1435	{Opt_data_writeback, "data=writeback"},
				1436	{Opt_data_err_abort, "data_err=abort"},
				1437	{Opt_data_err_ignore, "data_err=ignore"},
				1438	{Opt_offusrjquota, "usrjquota="},
				1439	{Opt_usrjquota, "usrjquota=%s"},
				1440	{Opt_offgrpjquota, "grpjquota="},
				1441	{Opt_grpjquota, "grpjquota=%s"},
				1442	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
				1443	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
				1444	{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
				1445	{Opt_grpquota, "grpquota"},
				1446	{Opt_noquota, "noquota"},
				1447	{Opt_quota, "quota"},
				1448	{Opt_usrquota, "usrquota"},
				1449	{Opt_prjquota, "prjquota"},
				1450	{Opt_barrier, "barrier=%u"},
				1451	{Opt_barrier, "barrier"},
				1452	{Opt_nobarrier, "nobarrier"},
				1453	{Opt_i_version, "i_version"},
				1454	{Opt_dax, "dax"},
				1455	{Opt_stripe, "stripe=%u"},
				1456	{Opt_delalloc, "delalloc"},
				1457	{Opt_lazytime, "lazytime"},
				1458	{Opt_nolazytime, "nolazytime"},
				1459	{Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
				1460	{Opt_nodelalloc, "nodelalloc"},
				1461	{Opt_removed, "mblk_io_submit"},
				1462	{Opt_removed, "nomblk_io_submit"},
				1463	{Opt_block_validity, "block_validity"},
				1464	{Opt_noblock_validity, "noblock_validity"},
				1465	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
				1466	{Opt_journal_ioprio, "journal_ioprio=%u"},
				1467	{Opt_auto_da_alloc, "auto_da_alloc=%u"},
				1468	{Opt_auto_da_alloc, "auto_da_alloc"},
				1469	{Opt_noauto_da_alloc, "noauto_da_alloc"},
				1470	{Opt_dioread_nolock, "dioread_nolock"},
				1471	{Opt_dioread_lock, "dioread_lock"},
				1472	{Opt_discard, "discard"},
				1473	{Opt_nodiscard, "nodiscard"},
				1474	{Opt_init_itable, "init_itable=%u"},
				1475	{Opt_init_itable, "init_itable"},
				1476	{Opt_noinit_itable, "noinit_itable"},
				1477	{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
				1478	{Opt_test_dummy_encryption, "test_dummy_encryption"},
				1479	{Opt_nombcache, "nombcache"},
				1480	{Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
				1481	{Opt_removed, "check=none"}, /* mount option from ext2/3 */
				1482	{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
				1483	{Opt_removed, "reservation"}, /* mount option from ext2/3 */
				1484	{Opt_removed, "noreservation"}, /* mount option from ext2/3 */
				1485	{Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
				1486	{Opt_err, NULL},
				1487	};
				1488
				1489	static ext4_fsblk_t get_sb_block(void **data)
				1490	{
				1491	ext4_fsblk_t sb_block;
				1492	char options = (char ) *data;
				1493
				1494	if (!options \|\| strncmp(options, "sb=", 3) != 0)
				1495	return 1; /* Default location */
				1496
				1497	options += 3;
				1498	/* TODO: use simple_strtoll with >32bit ext4 */
				1499	sb_block = simple_strtoul(options, &options, 0);
				1500	if (options && options != ',') {
				1501	printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
				1502	(char ) data);
				1503	return 1;
				1504	}
				1505	if (*options == ',')
				1506	options++;
				1507	data = (void ) options;
				1508
				1509	return sb_block;
				1510	}
				1511
				1512	#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
				1513	static const char deprecated_msg[] =
				1514	"Mount option \"%s\" will be removed by %s\n"
				1515	"Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
				1516
				1517	#ifdef CONFIG_QUOTA
				1518	static int set_qf_name(struct super_block sb, int qtype, substring_t args)
				1519	{
				1520	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1521	char qname, old_qname = get_qf_name(sb, sbi, qtype);
				1522	int ret = -1;
				1523
				1524	if (sb_any_quota_loaded(sb) && !old_qname) {
				1525	ext4_msg(sb, KERN_ERR,
				1526	"Cannot change journaled "
				1527	"quota options when quota turned on");
				1528	return -1;
				1529	}
				1530	if (ext4_has_feature_quota(sb)) {
				1531	ext4_msg(sb, KERN_INFO, "Journaled quota options "
				1532	"ignored when QUOTA feature is enabled");
				1533	return 1;
				1534	}
				1535	qname = match_strdup(args);
				1536	if (!qname) {
				1537	ext4_msg(sb, KERN_ERR,
				1538	"Not enough memory for storing quotafile name");
				1539	return -1;
				1540	}
				1541	if (old_qname) {
				1542	if (strcmp(old_qname, qname) == 0)
				1543	ret = 1;
				1544	else
				1545	ext4_msg(sb, KERN_ERR,
				1546	"%s quota file already specified",
				1547	QTYPE2NAME(qtype));
				1548	goto errout;
				1549	}
				1550	if (strchr(qname, '/')) {
				1551	ext4_msg(sb, KERN_ERR,
				1552	"quotafile must be on filesystem root");
				1553	goto errout;
				1554	}
				1555	rcu_assign_pointer(sbi->s_qf_names[qtype], qname);
				1556	set_opt(sb, QUOTA);
				1557	return 1;
				1558	errout:
				1559	kfree(qname);
				1560	return ret;
				1561	}
				1562
				1563	static int clear_qf_name(struct super_block *sb, int qtype)
				1564	{
				1565
				1566	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1567	char *old_qname = get_qf_name(sb, sbi, qtype);
				1568
				1569	if (sb_any_quota_loaded(sb) && old_qname) {
				1570	ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
				1571	" when quota turned on");
				1572	return -1;
				1573	}
				1574	rcu_assign_pointer(sbi->s_qf_names[qtype], NULL);
				1575	synchronize_rcu();
				1576	kfree(old_qname);
				1577	return 1;
				1578	}
				1579	#endif
				1580
				1581	#define MOPT_SET 0x0001
				1582	#define MOPT_CLEAR 0x0002
				1583	#define MOPT_NOSUPPORT 0x0004
				1584	#define MOPT_EXPLICIT 0x0008
				1585	#define MOPT_CLEAR_ERR 0x0010
				1586	#define MOPT_GTE0 0x0020
				1587	#ifdef CONFIG_QUOTA
				1588	#define MOPT_Q 0
				1589	#define MOPT_QFMT 0x0040
				1590	#else
				1591	#define MOPT_Q MOPT_NOSUPPORT
				1592	#define MOPT_QFMT MOPT_NOSUPPORT
				1593	#endif
				1594	#define MOPT_DATAJ 0x0080
				1595	#define MOPT_NO_EXT2 0x0100
				1596	#define MOPT_NO_EXT3 0x0200
				1597	#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 \| MOPT_NO_EXT3)
				1598	#define MOPT_STRING 0x0400
				1599
				1600	static const struct mount_opts {
				1601	int token;
				1602	int mount_opt;
				1603	int flags;
				1604	} ext4_mount_opts[] = {
				1605	{Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
				1606	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
				1607	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
				1608	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
				1609	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
				1610	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
				1611	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
				1612	MOPT_EXT4_ONLY \| MOPT_SET},
				1613	{Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
				1614	MOPT_EXT4_ONLY \| MOPT_CLEAR},
				1615	{Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
				1616	{Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
				1617	{Opt_delalloc, EXT4_MOUNT_DELALLOC,
				1618	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_EXPLICIT},
				1619	{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
				1620	MOPT_EXT4_ONLY \| MOPT_CLEAR},
				1621	{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
				1622	MOPT_EXT4_ONLY \| MOPT_CLEAR},
				1623	{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
				1624	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_EXPLICIT},
				1625	{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT \|
				1626	EXT4_MOUNT_JOURNAL_CHECKSUM),
				1627	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_EXPLICIT},
				1628	{Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 \| MOPT_SET},
				1629	{Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET \| MOPT_CLEAR_ERR},
				1630	{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET \| MOPT_CLEAR_ERR},
				1631	{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET \| MOPT_CLEAR_ERR},
				1632	{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
				1633	MOPT_NO_EXT2},
				1634	{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
				1635	MOPT_NO_EXT2},
				1636	{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
				1637	{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
				1638	{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
				1639	{Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
				1640	{Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
				1641	{Opt_commit, 0, MOPT_GTE0},
				1642	{Opt_max_batch_time, 0, MOPT_GTE0},
				1643	{Opt_min_batch_time, 0, MOPT_GTE0},
				1644	{Opt_inode_readahead_blks, 0, MOPT_GTE0},
				1645	{Opt_init_itable, 0, MOPT_GTE0},
				1646	{Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
				1647	{Opt_stripe, 0, MOPT_GTE0},
				1648	{Opt_resuid, 0, MOPT_GTE0},
				1649	{Opt_resgid, 0, MOPT_GTE0},
				1650	{Opt_journal_dev, 0, MOPT_NO_EXT2 \| MOPT_GTE0},
				1651	{Opt_journal_path, 0, MOPT_NO_EXT2 \| MOPT_STRING},
				1652	{Opt_journal_ioprio, 0, MOPT_NO_EXT2 \| MOPT_GTE0},
				1653	{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 \| MOPT_DATAJ},
				1654	{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 \| MOPT_DATAJ},
				1655	{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
				1656	MOPT_NO_EXT2 \| MOPT_DATAJ},
				1657	{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
				1658	{Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
				1659	#ifdef CONFIG_EXT4_FS_POSIX_ACL
				1660	{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
				1661	{Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
				1662	#else
				1663	{Opt_acl, 0, MOPT_NOSUPPORT},
				1664	{Opt_noacl, 0, MOPT_NOSUPPORT},
				1665	#endif
				1666	{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
				1667	{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
				1668	{Opt_debug_want_extra_isize, 0, MOPT_GTE0},
				1669	{Opt_quota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA, MOPT_SET \| MOPT_Q},
				1670	{Opt_usrquota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA,
				1671	MOPT_SET \| MOPT_Q},
				1672	{Opt_grpquota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_GRPQUOTA,
				1673	MOPT_SET \| MOPT_Q},
				1674	{Opt_prjquota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_PRJQUOTA,
				1675	MOPT_SET \| MOPT_Q},
				1676	{Opt_noquota, (EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA \|
				1677	EXT4_MOUNT_GRPQUOTA \| EXT4_MOUNT_PRJQUOTA),
				1678	MOPT_CLEAR \| MOPT_Q},
				1679	{Opt_usrjquota, 0, MOPT_Q},
				1680	{Opt_grpjquota, 0, MOPT_Q},
				1681	{Opt_offusrjquota, 0, MOPT_Q},
				1682	{Opt_offgrpjquota, 0, MOPT_Q},
				1683	{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
				1684	{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
				1685	{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
				1686	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
				1687	{Opt_test_dummy_encryption, 0, MOPT_GTE0},
				1688	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
				1689	{Opt_err, 0, 0}
				1690	};
				1691
				1692	static int handle_mount_opt(struct super_block sb, char opt, int token,
				1693	substring_t args, unsigned long journal_devnum,
				1694	unsigned int *journal_ioprio, int is_remount)
				1695	{
				1696	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1697	const struct mount_opts *m;
				1698	kuid_t uid;
				1699	kgid_t gid;
				1700	int arg = 0;
				1701
				1702	#ifdef CONFIG_QUOTA
				1703	if (token == Opt_usrjquota)
				1704	return set_qf_name(sb, USRQUOTA, &args[0]);
				1705	else if (token == Opt_grpjquota)
				1706	return set_qf_name(sb, GRPQUOTA, &args[0]);
				1707	else if (token == Opt_offusrjquota)
				1708	return clear_qf_name(sb, USRQUOTA);
				1709	else if (token == Opt_offgrpjquota)
				1710	return clear_qf_name(sb, GRPQUOTA);
				1711	#endif
				1712	switch (token) {
				1713	case Opt_noacl:
				1714	case Opt_nouser_xattr:
				1715	ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
				1716	break;
				1717	case Opt_sb:
				1718	return 1; /* handled by get_sb_block() */
				1719	case Opt_removed:
				1720	ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
				1721	return 1;
				1722	case Opt_abort:
				1723	sbi->s_mount_flags \|= EXT4_MF_FS_ABORTED;
				1724	return 1;
				1725	case Opt_i_version:
				1726	sb->s_flags \|= SB_I_VERSION;
				1727	return 1;
				1728	case Opt_lazytime:
				1729	sb->s_flags \|= MS_LAZYTIME;
				1730	return 1;
				1731	case Opt_nolazytime:
				1732	sb->s_flags &= ~MS_LAZYTIME;
				1733	return 1;
				1734	}
				1735
				1736	for (m = ext4_mount_opts; m->token != Opt_err; m++)
				1737	if (token == m->token)
				1738	break;
				1739
				1740	if (m->token == Opt_err) {
				1741	ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
				1742	"or missing value", opt);
				1743	return -1;
				1744	}
				1745
				1746	if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
				1747	ext4_msg(sb, KERN_ERR,
				1748	"Mount option \"%s\" incompatible with ext2", opt);
				1749	return -1;
				1750	}
				1751	if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
				1752	ext4_msg(sb, KERN_ERR,
				1753	"Mount option \"%s\" incompatible with ext3", opt);
				1754	return -1;
				1755	}
				1756
				1757	if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
				1758	return -1;
				1759	if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
				1760	return -1;
				1761	if (m->flags & MOPT_EXPLICIT) {
				1762	if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
				1763	set_opt2(sb, EXPLICIT_DELALLOC);
				1764	} else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
				1765	set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM);
				1766	} else
				1767	return -1;
				1768	}
				1769	if (m->flags & MOPT_CLEAR_ERR)
				1770	clear_opt(sb, ERRORS_MASK);
				1771	if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
				1772	ext4_msg(sb, KERN_ERR, "Cannot change quota "
				1773	"options when quota turned on");
				1774	return -1;
				1775	}
				1776
				1777	if (m->flags & MOPT_NOSUPPORT) {
				1778	ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
				1779	} else if (token == Opt_commit) {
				1780	if (arg == 0)
				1781	arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
				1782	sbi->s_commit_interval = HZ * arg;
				1783	} else if (token == Opt_debug_want_extra_isize) {
				1784	if ((arg & 1) \|\|
				1785	(arg < 4) \|\|
				1786	(arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) {
				1787	ext4_msg(sb, KERN_ERR,
				1788	"Invalid want_extra_isize %d", arg);
				1789	return -1;
				1790	}
				1791	sbi->s_want_extra_isize = arg;
				1792	} else if (token == Opt_max_batch_time) {
				1793	sbi->s_max_batch_time = arg;
				1794	} else if (token == Opt_min_batch_time) {
				1795	sbi->s_min_batch_time = arg;
				1796	} else if (token == Opt_inode_readahead_blks) {
				1797	if (arg && (arg > (1 << 30) \|\| !is_power_of_2(arg))) {
				1798	ext4_msg(sb, KERN_ERR,
				1799	"EXT4-fs: inode_readahead_blks must be "
				1800	"0 or a power of 2 smaller than 2^31");
				1801	return -1;
				1802	}
				1803	sbi->s_inode_readahead_blks = arg;
				1804	} else if (token == Opt_init_itable) {
				1805	set_opt(sb, INIT_INODE_TABLE);
				1806	if (!args->from)
				1807	arg = EXT4_DEF_LI_WAIT_MULT;
				1808	sbi->s_li_wait_mult = arg;
				1809	} else if (token == Opt_max_dir_size_kb) {
				1810	sbi->s_max_dir_size_kb = arg;
				1811	} else if (token == Opt_stripe) {
				1812	sbi->s_stripe = arg;
				1813	} else if (token == Opt_resuid) {
				1814	uid = make_kuid(current_user_ns(), arg);
				1815	if (!uid_valid(uid)) {
				1816	ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
				1817	return -1;
				1818	}
				1819	sbi->s_resuid = uid;
				1820	} else if (token == Opt_resgid) {
				1821	gid = make_kgid(current_user_ns(), arg);
				1822	if (!gid_valid(gid)) {
				1823	ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
				1824	return -1;
				1825	}
				1826	sbi->s_resgid = gid;
				1827	} else if (token == Opt_journal_dev) {
				1828	if (is_remount) {
				1829	ext4_msg(sb, KERN_ERR,
				1830	"Cannot specify journal on remount");
				1831	return -1;
				1832	}
				1833	*journal_devnum = arg;
				1834	} else if (token == Opt_journal_path) {
				1835	char *journal_path;
				1836	struct inode *journal_inode;
				1837	struct path path;
				1838	int error;
				1839
				1840	if (is_remount) {
				1841	ext4_msg(sb, KERN_ERR,
				1842	"Cannot specify journal on remount");
				1843	return -1;
				1844	}
				1845	journal_path = match_strdup(&args[0]);
				1846	if (!journal_path) {
				1847	ext4_msg(sb, KERN_ERR, "error: could not dup "
				1848	"journal device string");
				1849	return -1;
				1850	}
				1851
				1852	error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
				1853	if (error) {
				1854	ext4_msg(sb, KERN_ERR, "error: could not find "
				1855	"journal device path: error %d", error);
				1856	kfree(journal_path);
				1857	return -1;
				1858	}
				1859
				1860	journal_inode = d_inode(path.dentry);
				1861	if (!S_ISBLK(journal_inode->i_mode)) {
				1862	ext4_msg(sb, KERN_ERR, "error: journal path %s "
				1863	"is not a block device", journal_path);
				1864	path_put(&path);
				1865	kfree(journal_path);
				1866	return -1;
				1867	}
				1868
				1869	*journal_devnum = new_encode_dev(journal_inode->i_rdev);
				1870	path_put(&path);
				1871	kfree(journal_path);
				1872	} else if (token == Opt_journal_ioprio) {
				1873	if (arg > 7) {
				1874	ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
				1875	" (must be 0-7)");
				1876	return -1;
				1877	}
				1878	*journal_ioprio =
				1879	IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
				1880	} else if (token == Opt_test_dummy_encryption) {
				1881	#ifdef CONFIG_EXT4_FS_ENCRYPTION
				1882	sbi->s_mount_flags \|= EXT4_MF_TEST_DUMMY_ENCRYPTION;
				1883	ext4_msg(sb, KERN_WARNING,
				1884	"Test dummy encryption mode enabled");
				1885	#else
				1886	ext4_msg(sb, KERN_WARNING,
				1887	"Test dummy encryption mount option ignored");
				1888	#endif
				1889	} else if (m->flags & MOPT_DATAJ) {
				1890	if (is_remount) {
				1891	if (!sbi->s_journal)
				1892	ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
				1893	else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
				1894	ext4_msg(sb, KERN_ERR,
				1895	"Cannot change data mode on remount");
				1896	return -1;
				1897	}
				1898	} else {
				1899	clear_opt(sb, DATA_FLAGS);
				1900	sbi->s_mount_opt \|= m->mount_opt;
				1901	}
				1902	#ifdef CONFIG_QUOTA
				1903	} else if (m->flags & MOPT_QFMT) {
				1904	if (sb_any_quota_loaded(sb) &&
				1905	sbi->s_jquota_fmt != m->mount_opt) {
				1906	ext4_msg(sb, KERN_ERR, "Cannot change journaled "
				1907	"quota options when quota turned on");
				1908	return -1;
				1909	}
				1910	if (ext4_has_feature_quota(sb)) {
				1911	ext4_msg(sb, KERN_INFO,
				1912	"Quota format mount options ignored "
				1913	"when QUOTA feature is enabled");
				1914	return 1;
				1915	}
				1916	sbi->s_jquota_fmt = m->mount_opt;
				1917	#endif
				1918	} else if (token == Opt_dax) {
				1919	#ifdef CONFIG_FS_DAX
				1920	ext4_msg(sb, KERN_WARNING,
				1921	"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
				1922	sbi->s_mount_opt \|= m->mount_opt;
				1923	#else
				1924	ext4_msg(sb, KERN_INFO, "dax option not supported");
				1925	return -1;
				1926	#endif
				1927	} else if (token == Opt_data_err_abort) {
				1928	sbi->s_mount_opt \|= m->mount_opt;
				1929	} else if (token == Opt_data_err_ignore) {
				1930	sbi->s_mount_opt &= ~m->mount_opt;
				1931	} else {
				1932	if (!args->from)
				1933	arg = 1;
				1934	if (m->flags & MOPT_CLEAR)
				1935	arg = !arg;
				1936	else if (unlikely(!(m->flags & MOPT_SET))) {
				1937	ext4_msg(sb, KERN_WARNING,
				1938	"buggy handling of option %s", opt);
				1939	WARN_ON(1);
				1940	return -1;
				1941	}
				1942	if (arg != 0)
				1943	sbi->s_mount_opt \|= m->mount_opt;
				1944	else
				1945	sbi->s_mount_opt &= ~m->mount_opt;
				1946	}
				1947	return 1;
				1948	}
				1949
				1950	static int parse_options(char options, struct super_block sb,
				1951	unsigned long *journal_devnum,
				1952	unsigned int *journal_ioprio,
				1953	int is_remount)
				1954	{
				1955	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1956	char p, __maybe_unused usr_qf_name, __maybe_unused *grp_qf_name;
				1957	substring_t args[MAX_OPT_ARGS];
				1958	int token;
				1959
				1960	if (!options)
				1961	return 1;
				1962
				1963	while ((p = strsep(&options, ",")) != NULL) {
				1964	if (!*p)
				1965	continue;
				1966	/*
				1967	* Initialize args struct so we know whether arg was
				1968	* found; some options take optional arguments.
				1969	*/
				1970	args[0].to = args[0].from = NULL;
				1971	token = match_token(p, tokens, args);
				1972	if (handle_mount_opt(sb, p, token, args, journal_devnum,
				1973	journal_ioprio, is_remount) < 0)
				1974	return 0;
				1975	}
				1976	#ifdef CONFIG_QUOTA
				1977	/*
				1978	* We do the test below only for project quotas. 'usrquota' and
				1979	* 'grpquota' mount options are allowed even without quota feature
				1980	* to support legacy quotas in quota files.
				1981	*/
				1982	if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
				1983	ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
				1984	"Cannot enable project quota enforcement.");
				1985	return 0;
				1986	}
				1987	usr_qf_name = get_qf_name(sb, sbi, USRQUOTA);
				1988	grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA);
				1989	if (usr_qf_name \|\| grp_qf_name) {
				1990	if (test_opt(sb, USRQUOTA) && usr_qf_name)
				1991	clear_opt(sb, USRQUOTA);
				1992
				1993	if (test_opt(sb, GRPQUOTA) && grp_qf_name)
				1994	clear_opt(sb, GRPQUOTA);
				1995
				1996	if (test_opt(sb, GRPQUOTA) \|\| test_opt(sb, USRQUOTA)) {
				1997	ext4_msg(sb, KERN_ERR, "old and new quota "
				1998	"format mixing");
				1999	return 0;
				2000	}
				2001
				2002	if (!sbi->s_jquota_fmt) {
				2003	ext4_msg(sb, KERN_ERR, "journaled quota format "
				2004	"not specified");
				2005	return 0;
				2006	}
				2007	}
				2008	#endif
				2009	if (test_opt(sb, DIOREAD_NOLOCK)) {
				2010	int blocksize =
				2011	BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
				2012
				2013	if (blocksize < PAGE_SIZE) {
				2014	ext4_msg(sb, KERN_ERR, "can't mount with "
				2015	"dioread_nolock if block size != PAGE_SIZE");
				2016	return 0;
				2017	}
				2018	}
				2019	return 1;
				2020	}
				2021
				2022	static inline void ext4_show_quota_options(struct seq_file *seq,
				2023	struct super_block *sb)
				2024	{
				2025	#if defined(CONFIG_QUOTA)
				2026	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2027	char usr_qf_name, grp_qf_name;
				2028
				2029	if (sbi->s_jquota_fmt) {
				2030	char *fmtname = "";
				2031
				2032	switch (sbi->s_jquota_fmt) {
				2033	case QFMT_VFS_OLD:
				2034	fmtname = "vfsold";
				2035	break;
				2036	case QFMT_VFS_V0:
				2037	fmtname = "vfsv0";
				2038	break;
				2039	case QFMT_VFS_V1:
				2040	fmtname = "vfsv1";
				2041	break;
				2042	}
				2043	seq_printf(seq, ",jqfmt=%s", fmtname);
				2044	}
				2045
				2046	rcu_read_lock();
				2047	usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
				2048	grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
				2049	if (usr_qf_name)
				2050	seq_show_option(seq, "usrjquota", usr_qf_name);
				2051	if (grp_qf_name)
				2052	seq_show_option(seq, "grpjquota", grp_qf_name);
				2053	rcu_read_unlock();
				2054	#endif
				2055	}
				2056
				2057	static const char *token2str(int token)
				2058	{
				2059	const struct match_token *t;
				2060
				2061	for (t = tokens; t->token != Opt_err; t++)
				2062	if (t->token == token && !strchr(t->pattern, '='))
				2063	break;
				2064	return t->pattern;
				2065	}
				2066
				2067	/*
				2068	* Show an option if
				2069	* - it's set to a non-default value OR
				2070	* - if the per-sb default is different from the global default
				2071	*/
				2072	static int _ext4_show_options(struct seq_file seq, struct super_block sb,
				2073	int nodefs)
				2074	{
				2075	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2076	struct ext4_super_block *es = sbi->s_es;
				2077	int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
				2078	const struct mount_opts *m;
				2079	char sep = nodefs ? '\n' : ',';
				2080
				2081	#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
				2082	#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
				2083
				2084	if (sbi->s_sb_block != 1)
				2085	SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
				2086
				2087	for (m = ext4_mount_opts; m->token != Opt_err; m++) {
				2088	int want_set = m->flags & MOPT_SET;
				2089	if (((m->flags & (MOPT_SET\|MOPT_CLEAR)) == 0) \|\|
				2090	(m->flags & MOPT_CLEAR_ERR))
				2091	continue;
				2092	if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
				2093	continue; /* skip if same as the default */
				2094	if ((want_set &&
				2095	(sbi->s_mount_opt & m->mount_opt) != m->mount_opt) \|\|
				2096	(!want_set && (sbi->s_mount_opt & m->mount_opt)))
				2097	continue; /* select Opt_noFoo vs Opt_Foo */
				2098	SEQ_OPTS_PRINT("%s", token2str(m->token));
				2099	}
				2100
				2101	if (nodefs \|\| !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) \|\|
				2102	le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
				2103	SEQ_OPTS_PRINT("resuid=%u",
				2104	from_kuid_munged(&init_user_ns, sbi->s_resuid));
				2105	if (nodefs \|\| !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) \|\|
				2106	le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
				2107	SEQ_OPTS_PRINT("resgid=%u",
				2108	from_kgid_munged(&init_user_ns, sbi->s_resgid));
				2109	def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
				2110	if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
				2111	SEQ_OPTS_PUTS("errors=remount-ro");
				2112	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
				2113	SEQ_OPTS_PUTS("errors=continue");
				2114	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
				2115	SEQ_OPTS_PUTS("errors=panic");
				2116	if (nodefs \|\| sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
				2117	SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
				2118	if (nodefs \|\| sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
				2119	SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
				2120	if (nodefs \|\| sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
				2121	SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
				2122	if (sb->s_flags & SB_I_VERSION)
				2123	SEQ_OPTS_PUTS("i_version");
				2124	if (nodefs \|\| sbi->s_stripe)
				2125	SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
				2126	if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
				2127	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
				2128	SEQ_OPTS_PUTS("data=journal");
				2129	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
				2130	SEQ_OPTS_PUTS("data=ordered");
				2131	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
				2132	SEQ_OPTS_PUTS("data=writeback");
				2133	}
				2134	if (nodefs \|\|
				2135	sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
				2136	SEQ_OPTS_PRINT("inode_readahead_blks=%u",
				2137	sbi->s_inode_readahead_blks);
				2138
				2139	if (nodefs \|\| (test_opt(sb, INIT_INODE_TABLE) &&
				2140	(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
				2141	SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
				2142	if (nodefs \|\| sbi->s_max_dir_size_kb)
				2143	SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
				2144	if (test_opt(sb, DATA_ERR_ABORT))
				2145	SEQ_OPTS_PUTS("data_err=abort");
				2146	if (DUMMY_ENCRYPTION_ENABLED(sbi))
				2147	SEQ_OPTS_PUTS("test_dummy_encryption");
				2148
				2149	ext4_show_quota_options(seq, sb);
				2150	return 0;
				2151	}
				2152
				2153	static int ext4_show_options(struct seq_file seq, struct dentry root)
				2154	{
				2155	return _ext4_show_options(seq, root->d_sb, 0);
				2156	}
				2157
				2158	int ext4_seq_options_show(struct seq_file seq, void offset)
				2159	{
				2160	struct super_block *sb = seq->private;
				2161	int rc;
				2162
				2163	seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
				2164	rc = _ext4_show_options(seq, sb, 1);
				2165	seq_puts(seq, "\n");
				2166	return rc;
				2167	}
				2168
				2169	static int ext4_setup_super(struct super_block sb, struct ext4_super_block es,
				2170	int read_only)
				2171	{
				2172	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2173	int res = 0;
				2174
				2175	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
				2176	ext4_msg(sb, KERN_ERR, "revision level too high, "
				2177	"forcing read-only mode");
				2178	res = MS_RDONLY;
				2179	}
				2180	if (read_only)
				2181	goto done;
				2182	if (!(sbi->s_mount_state & EXT4_VALID_FS))
				2183	ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
				2184	"running e2fsck is recommended");
				2185	else if (sbi->s_mount_state & EXT4_ERROR_FS)
				2186	ext4_msg(sb, KERN_WARNING,
				2187	"warning: mounting fs with errors, "
				2188	"running e2fsck is recommended");
				2189	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
				2190	le16_to_cpu(es->s_mnt_count) >=
				2191	(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
				2192	ext4_msg(sb, KERN_WARNING,
				2193	"warning: maximal mount count reached, "
				2194	"running e2fsck is recommended");
				2195	else if (le32_to_cpu(es->s_checkinterval) &&
				2196	(le32_to_cpu(es->s_lastcheck) +
				2197	le32_to_cpu(es->s_checkinterval) <= get_seconds()))
				2198	ext4_msg(sb, KERN_WARNING,
				2199	"warning: checktime reached, "
				2200	"running e2fsck is recommended");
				2201	if (!sbi->s_journal)
				2202	es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
				2203	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
				2204	es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
				2205	le16_add_cpu(&es->s_mnt_count, 1);
				2206	es->s_mtime = cpu_to_le32(get_seconds());
				2207	ext4_update_dynamic_rev(sb);
				2208	if (sbi->s_journal)
				2209	ext4_set_feature_journal_needs_recovery(sb);
				2210
				2211	ext4_commit_super(sb, 1);
				2212	done:
				2213	if (test_opt(sb, DEBUG))
				2214	printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
				2215	"bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
				2216	sb->s_blocksize,
				2217	sbi->s_groups_count,
				2218	EXT4_BLOCKS_PER_GROUP(sb),
				2219	EXT4_INODES_PER_GROUP(sb),
				2220	sbi->s_mount_opt, sbi->s_mount_opt2);
				2221
				2222	cleancache_init_fs(sb);
				2223	return res;
				2224	}
				2225
				2226	int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
				2227	{
				2228	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2229	struct flex_groups old_groups, new_groups;
				2230	int size, i, j;
				2231
				2232	if (!sbi->s_log_groups_per_flex)
				2233	return 0;
				2234
				2235	size = ext4_flex_group(sbi, ngroup - 1) + 1;
				2236	if (size <= sbi->s_flex_groups_allocated)
				2237	return 0;
				2238
				2239	new_groups = kvzalloc(roundup_pow_of_two(size *
				2240	sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
				2241	if (!new_groups) {
				2242	ext4_msg(sb, KERN_ERR,
				2243	"not enough memory for %d flex group pointers", size);
				2244	return -ENOMEM;
				2245	}
				2246	for (i = sbi->s_flex_groups_allocated; i < size; i++) {
				2247	new_groups[i] = kvzalloc(roundup_pow_of_two(
				2248	sizeof(struct flex_groups)),
				2249	GFP_KERNEL);
				2250	if (!new_groups[i]) {
				2251	for (j = sbi->s_flex_groups_allocated; j < i; j++)
				2252	kvfree(new_groups[j]);
				2253	kvfree(new_groups);
				2254	ext4_msg(sb, KERN_ERR,
				2255	"not enough memory for %d flex groups", size);
				2256	return -ENOMEM;
				2257	}
				2258	}
				2259	rcu_read_lock();
				2260	old_groups = rcu_dereference(sbi->s_flex_groups);
				2261	if (old_groups)
				2262	memcpy(new_groups, old_groups,
				2263	(sbi->s_flex_groups_allocated *
				2264	sizeof(struct flex_groups *)));
				2265	rcu_read_unlock();
				2266	rcu_assign_pointer(sbi->s_flex_groups, new_groups);
				2267	sbi->s_flex_groups_allocated = size;
				2268	if (old_groups)
				2269	ext4_kvfree_array_rcu(old_groups);
				2270	return 0;
				2271	}
				2272
				2273	static int ext4_fill_flex_info(struct super_block *sb)
				2274	{
				2275	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2276	struct ext4_group_desc *gdp = NULL;
				2277	struct flex_groups *fg;
				2278	ext4_group_t flex_group;
				2279	int i, err;
				2280
				2281	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
				2282	if (sbi->s_log_groups_per_flex < 1 \|\| sbi->s_log_groups_per_flex > 31) {
				2283	sbi->s_log_groups_per_flex = 0;
				2284	return 1;
				2285	}
				2286
				2287	err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
				2288	if (err)
				2289	goto failed;
				2290
				2291	for (i = 0; i < sbi->s_groups_count; i++) {
				2292	gdp = ext4_get_group_desc(sb, i, NULL);
				2293
				2294	flex_group = ext4_flex_group(sbi, i);
				2295	fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
				2296	atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
				2297	atomic64_add(ext4_free_group_clusters(sb, gdp),
				2298	&fg->free_clusters);
				2299	atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
				2300	}
				2301
				2302	return 1;
				2303	failed:
				2304	return 0;
				2305	}
				2306
				2307	static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
				2308	struct ext4_group_desc *gdp)
				2309	{
				2310	int offset = offsetof(struct ext4_group_desc, bg_checksum);
				2311	__u16 crc = 0;
				2312	__le32 le_group = cpu_to_le32(block_group);
				2313	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2314
				2315	if (ext4_has_metadata_csum(sbi->s_sb)) {
				2316	/* Use new metadata_csum algorithm */
				2317	__u32 csum32;
				2318	__u16 dummy_csum = 0;
				2319
				2320	csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
				2321	sizeof(le_group));
				2322	csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
				2323	csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
				2324	sizeof(dummy_csum));
				2325	offset += sizeof(dummy_csum);
				2326	if (offset < sbi->s_desc_size)
				2327	csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
				2328	sbi->s_desc_size - offset);
				2329
				2330	crc = csum32 & 0xFFFF;
				2331	goto out;
				2332	}
				2333
				2334	/* old crc16 code */
				2335	if (!ext4_has_feature_gdt_csum(sb))
				2336	return 0;
				2337
				2338	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
				2339	crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
				2340	crc = crc16(crc, (__u8 *)gdp, offset);
				2341	offset += sizeof(gdp->bg_checksum); /* skip checksum */
				2342	/* for checksum of struct ext4_group_desc do the rest...*/
				2343	if (ext4_has_feature_64bit(sb) &&
				2344	offset < le16_to_cpu(sbi->s_es->s_desc_size))
				2345	crc = crc16(crc, (__u8 *)gdp + offset,
				2346	le16_to_cpu(sbi->s_es->s_desc_size) -
				2347	offset);
				2348
				2349	out:
				2350	return cpu_to_le16(crc);
				2351	}
				2352
				2353	int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
				2354	struct ext4_group_desc *gdp)
				2355	{
				2356	if (ext4_has_group_desc_csum(sb) &&
				2357	(gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
				2358	return 0;
				2359
				2360	return 1;
				2361	}
				2362
				2363	void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
				2364	struct ext4_group_desc *gdp)
				2365	{
				2366	if (!ext4_has_group_desc_csum(sb))
				2367	return;
				2368	gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
				2369	}
				2370
				2371	/* Called at mount-time, super-block is locked */
				2372	static int ext4_check_descriptors(struct super_block *sb,
				2373	ext4_fsblk_t sb_block,
				2374	ext4_group_t *first_not_zeroed)
				2375	{
				2376	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2377	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
				2378	ext4_fsblk_t last_block;
				2379	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
				2380	ext4_fsblk_t block_bitmap;
				2381	ext4_fsblk_t inode_bitmap;
				2382	ext4_fsblk_t inode_table;
				2383	int flexbg_flag = 0;
				2384	ext4_group_t i, grp = sbi->s_groups_count;
				2385
				2386	if (ext4_has_feature_flex_bg(sb))
				2387	flexbg_flag = 1;
				2388
				2389	ext4_debug("Checking group descriptors");
				2390
				2391	for (i = 0; i < sbi->s_groups_count; i++) {
				2392	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
				2393
				2394	if (i == sbi->s_groups_count - 1 \|\| flexbg_flag)
				2395	last_block = ext4_blocks_count(sbi->s_es) - 1;
				2396	else
				2397	last_block = first_block +
				2398	(EXT4_BLOCKS_PER_GROUP(sb) - 1);
				2399
				2400	if ((grp == sbi->s_groups_count) &&
				2401	!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				2402	grp = i;
				2403
				2404	block_bitmap = ext4_block_bitmap(sb, gdp);
				2405	if (block_bitmap == sb_block) {
				2406	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2407	"Block bitmap for group %u overlaps "
				2408	"superblock", i);
				2409	if (!sb_rdonly(sb))
				2410	return 0;
				2411	}
				2412	if (block_bitmap >= sb_block + 1 &&
				2413	block_bitmap <= last_bg_block) {
				2414	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2415	"Block bitmap for group %u overlaps "
				2416	"block group descriptors", i);
				2417	if (!sb_rdonly(sb))
				2418	return 0;
				2419	}
				2420	if (block_bitmap < first_block \|\| block_bitmap > last_block) {
				2421	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2422	"Block bitmap for group %u not in group "
				2423	"(block %llu)!", i, block_bitmap);
				2424	return 0;
				2425	}
				2426	inode_bitmap = ext4_inode_bitmap(sb, gdp);
				2427	if (inode_bitmap == sb_block) {
				2428	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2429	"Inode bitmap for group %u overlaps "
				2430	"superblock", i);
				2431	if (!sb_rdonly(sb))
				2432	return 0;
				2433	}
				2434	if (inode_bitmap >= sb_block + 1 &&
				2435	inode_bitmap <= last_bg_block) {
				2436	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2437	"Inode bitmap for group %u overlaps "
				2438	"block group descriptors", i);
				2439	if (!sb_rdonly(sb))
				2440	return 0;
				2441	}
				2442	if (inode_bitmap < first_block \|\| inode_bitmap > last_block) {
				2443	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2444	"Inode bitmap for group %u not in group "
				2445	"(block %llu)!", i, inode_bitmap);
				2446	return 0;
				2447	}
				2448	inode_table = ext4_inode_table(sb, gdp);
				2449	if (inode_table == sb_block) {
				2450	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2451	"Inode table for group %u overlaps "
				2452	"superblock", i);
				2453	if (!sb_rdonly(sb))
				2454	return 0;
				2455	}
				2456	if (inode_table >= sb_block + 1 &&
				2457	inode_table <= last_bg_block) {
				2458	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2459	"Inode table for group %u overlaps "
				2460	"block group descriptors", i);
				2461	if (!sb_rdonly(sb))
				2462	return 0;
				2463	}
				2464	if (inode_table < first_block \|\|
				2465	inode_table + sbi->s_itb_per_group - 1 > last_block) {
				2466	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2467	"Inode table for group %u not in group "
				2468	"(block %llu)!", i, inode_table);
				2469	return 0;
				2470	}
				2471	ext4_lock_group(sb, i);
				2472	if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
				2473	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2474	"Checksum for group %u failed (%u!=%u)",
				2475	i, le16_to_cpu(ext4_group_desc_csum(sb, i,
				2476	gdp)), le16_to_cpu(gdp->bg_checksum));
				2477	if (!sb_rdonly(sb)) {
				2478	ext4_unlock_group(sb, i);
				2479	return 0;
				2480	}
				2481	}
				2482	ext4_unlock_group(sb, i);
				2483	if (!flexbg_flag)
				2484	first_block += EXT4_BLOCKS_PER_GROUP(sb);
				2485	}
				2486	if (NULL != first_not_zeroed)
				2487	*first_not_zeroed = grp;
				2488	return 1;
				2489	}
				2490
				2491	/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
				2492	* the superblock) which were deleted from all directories, but held open by
				2493	* a process at the time of a crash. We walk the list and try to delete these
				2494	* inodes at recovery time (only with a read-write filesystem).
				2495	*
				2496	* In order to keep the orphan inode chain consistent during traversal (in
				2497	* case of crash during recovery), we link each inode into the superblock
				2498	* orphan list_head and handle it the same way as an inode deletion during
				2499	* normal operation (which journals the operations for us).
				2500	*
				2501	* We only do an iget() and an iput() on each inode, which is very safe if we
				2502	* accidentally point at an in-use or already deleted inode. The worst that
				2503	* can happen in this case is that we get a "bit already cleared" message from
				2504	* ext4_free_inode(). The only reason we would point at a wrong inode is if
				2505	* e2fsck was run on this filesystem, and it must have already done the orphan
				2506	* inode cleanup for us, so we can safely abort without any further action.
				2507	*/
				2508	static void ext4_orphan_cleanup(struct super_block *sb,
				2509	struct ext4_super_block *es)
				2510	{
				2511	unsigned int s_flags = sb->s_flags;
				2512	int ret, nr_orphans = 0, nr_truncates = 0;
				2513	#ifdef CONFIG_QUOTA
				2514	int quota_update = 0;
				2515	int i;
				2516	#endif
				2517	if (!es->s_last_orphan) {
				2518	jbd_debug(4, "no orphan inodes to clean up\n");
				2519	return;
				2520	}
				2521
				2522	if (bdev_read_only(sb->s_bdev)) {
				2523	ext4_msg(sb, KERN_ERR, "write access "
				2524	"unavailable, skipping orphan cleanup");
				2525	return;
				2526	}
				2527
				2528	/* Check if feature set would not allow a r/w mount */
				2529	if (!ext4_feature_set_ok(sb, 0)) {
				2530	ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
				2531	"unknown ROCOMPAT features");
				2532	return;
				2533	}
				2534
				2535	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
				2536	/* don't clear list on RO mount w/ errors */
				2537	if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
				2538	ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
				2539	"clearing orphan list.\n");
				2540	es->s_last_orphan = 0;
				2541	}
				2542	jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
				2543	return;
				2544	}
				2545
				2546	if (s_flags & MS_RDONLY) {
				2547	ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
				2548	sb->s_flags &= ~MS_RDONLY;
				2549	}
				2550	#ifdef CONFIG_QUOTA
				2551	/* Needed for iput() to work correctly and not trash data */
				2552	sb->s_flags \|= MS_ACTIVE;
				2553
				2554	/*
				2555	* Turn on quotas which were not enabled for read-only mounts if
				2556	* filesystem has quota feature, so that they are updated correctly.
				2557	*/
				2558	if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) {
				2559	int ret = ext4_enable_quotas(sb);
				2560
				2561	if (!ret)
				2562	quota_update = 1;
				2563	else
				2564	ext4_msg(sb, KERN_ERR,
				2565	"Cannot turn on quotas: error %d", ret);
				2566	}
				2567
				2568	/* Turn on journaled quotas used for old sytle */
				2569	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
				2570	if (EXT4_SB(sb)->s_qf_names[i]) {
				2571	int ret = ext4_quota_on_mount(sb, i);
				2572
				2573	if (!ret)
				2574	quota_update = 1;
				2575	else
				2576	ext4_msg(sb, KERN_ERR,
				2577	"Cannot turn on journaled "
				2578	"quota: type %d: error %d", i, ret);
				2579	}
				2580	}
				2581	#endif
				2582
				2583	while (es->s_last_orphan) {
				2584	struct inode *inode;
				2585
				2586	/*
				2587	* We may have encountered an error during cleanup; if
				2588	* so, skip the rest.
				2589	*/
				2590	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
				2591	jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
				2592	es->s_last_orphan = 0;
				2593	break;
				2594	}
				2595
				2596	inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
				2597	if (IS_ERR(inode)) {
				2598	es->s_last_orphan = 0;
				2599	break;
				2600	}
				2601
				2602	list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
				2603	dquot_initialize(inode);
				2604	if (inode->i_nlink) {
				2605	if (test_opt(sb, DEBUG))
				2606	ext4_msg(sb, KERN_DEBUG,
				2607	"%s: truncating inode %lu to %lld bytes",
				2608	__func__, inode->i_ino, inode->i_size);
				2609	jbd_debug(2, "truncating inode %lu to %lld bytes\n",
				2610	inode->i_ino, inode->i_size);
				2611	inode_lock(inode);
				2612	truncate_inode_pages(inode->i_mapping, inode->i_size);
				2613	ret = ext4_truncate(inode);
				2614	if (ret)
				2615	ext4_std_error(inode->i_sb, ret);
				2616	inode_unlock(inode);
				2617	nr_truncates++;
				2618	} else {
				2619	if (test_opt(sb, DEBUG))
				2620	ext4_msg(sb, KERN_DEBUG,
				2621	"%s: deleting unreferenced inode %lu",
				2622	__func__, inode->i_ino);
				2623	jbd_debug(2, "deleting unreferenced inode %lu\n",
				2624	inode->i_ino);
				2625	nr_orphans++;
				2626	}
				2627	iput(inode); /* The delete magic happens here! */
				2628	}
				2629
				2630	#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
				2631
				2632	if (nr_orphans)
				2633	ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
				2634	PLURAL(nr_orphans));
				2635	if (nr_truncates)
				2636	ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
				2637	PLURAL(nr_truncates));
				2638	#ifdef CONFIG_QUOTA
				2639	/* Turn off quotas if they were enabled for orphan cleanup */
				2640	if (quota_update) {
				2641	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
				2642	if (sb_dqopt(sb)->files[i])
				2643	dquot_quota_off(sb, i);
				2644	}
				2645	}
				2646	#endif
				2647	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
				2648	}
				2649
				2650	/*
				2651	* Maximal extent format file size.
				2652	* Resulting logical blkno at s_maxbytes must fit in our on-disk
				2653	* extent format containers, within a sector_t, and within i_blocks
				2654	* in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
				2655	* so that won't be a limiting factor.
				2656	*
				2657	* However there is other limiting factor. We do store extents in the form
				2658	* of starting block and length, hence the resulting length of the extent
				2659	* covering maximum file size must fit into on-disk format containers as
				2660	* well. Given that length is always by 1 unit bigger than max unit (because
				2661	* we count 0 as well) we have to lower the s_maxbytes by one fs block.
				2662	*
				2663	* Note, this does not consider any metadata overhead for vfs i_blocks.
				2664	*/
				2665	static loff_t ext4_max_size(int blkbits, int has_huge_files)
				2666	{
				2667	loff_t res;
				2668	loff_t upper_limit = MAX_LFS_FILESIZE;
				2669
				2670	/* small i_blocks in vfs inode? */
				2671	if (!has_huge_files \|\| sizeof(blkcnt_t) < sizeof(u64)) {
				2672	/*
				2673	* CONFIG_LBDAF is not enabled implies the inode
				2674	* i_block represent total blocks in 512 bytes
				2675	* 32 == size of vfs inode i_blocks * 8
				2676	*/
				2677	upper_limit = (1LL << 32) - 1;
				2678
				2679	/* total blocks in file system block size */
				2680	upper_limit >>= (blkbits - 9);
				2681	upper_limit <<= blkbits;
				2682	}
				2683
				2684	/*
				2685	* 32-bit extent-start container, ee_block. We lower the maxbytes
				2686	* by one fs block, so ee_len can cover the extent of maximum file
				2687	* size
				2688	*/
				2689	res = (1LL << 32) - 1;
				2690	res <<= blkbits;
				2691
				2692	/* Sanity check against vm- & vfs- imposed limits */
				2693	if (res > upper_limit)
				2694	res = upper_limit;
				2695
				2696	return res;
				2697	}
				2698
				2699	/*
				2700	* Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
				2701	* block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
				2702	* We need to be 1 filesystem block less than the 2^48 sector limit.
				2703	*/
				2704	static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
				2705	{
				2706	loff_t res = EXT4_NDIR_BLOCKS;
				2707	int meta_blocks;
				2708	loff_t upper_limit;
				2709	/* This is calculated to be the largest file size for a dense, block
				2710	* mapped file such that the file's total number of 512-byte sectors,
				2711	* including data and all indirect blocks, does not exceed (2^48 - 1).
				2712	*
				2713	* __u32 i_blocks_lo and _u16 i_blocks_high represent the total
				2714	* number of 512-byte sectors of the file.
				2715	*/
				2716
				2717	if (!has_huge_files \|\| sizeof(blkcnt_t) < sizeof(u64)) {
				2718	/*
				2719	* !has_huge_files or CONFIG_LBDAF not enabled implies that
				2720	* the inode i_block field represents total file blocks in
				2721	* 2^32 512-byte sectors == size of vfs inode i_blocks * 8
				2722	*/
				2723	upper_limit = (1LL << 32) - 1;
				2724
				2725	/* total blocks in file system block size */
				2726	upper_limit >>= (bits - 9);
				2727
				2728	} else {
				2729	/*
				2730	* We use 48 bit ext4_inode i_blocks
				2731	* With EXT4_HUGE_FILE_FL set the i_blocks
				2732	* represent total number of blocks in
				2733	* file system block size
				2734	*/
				2735	upper_limit = (1LL << 48) - 1;
				2736
				2737	}
				2738
				2739	/* indirect blocks */
				2740	meta_blocks = 1;
				2741	/* double indirect blocks */
				2742	meta_blocks += 1 + (1LL << (bits-2));
				2743	/* tripple indirect blocks */
				2744	meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
				2745
				2746	upper_limit -= meta_blocks;
				2747	upper_limit <<= bits;
				2748
				2749	res += 1LL << (bits-2);
				2750	res += 1LL << (2*(bits-2));
				2751	res += 1LL << (3*(bits-2));
				2752	res <<= bits;
				2753	if (res > upper_limit)
				2754	res = upper_limit;
				2755
				2756	if (res > MAX_LFS_FILESIZE)
				2757	res = MAX_LFS_FILESIZE;
				2758
				2759	return res;
				2760	}
				2761
				2762	static ext4_fsblk_t descriptor_loc(struct super_block *sb,
				2763	ext4_fsblk_t logical_sb_block, int nr)
				2764	{
				2765	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2766	ext4_group_t bg, first_meta_bg;
				2767	int has_super = 0;
				2768
				2769	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
				2770
				2771	if (!ext4_has_feature_meta_bg(sb) \|\| nr < first_meta_bg)
				2772	return logical_sb_block + nr + 1;
				2773	bg = sbi->s_desc_per_block * nr;
				2774	if (ext4_bg_has_super(sb, bg))
				2775	has_super = 1;
				2776
				2777	/*
				2778	* If we have a meta_bg fs with 1k blocks, group 0's GDT is at
				2779	* block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
				2780	* on modern mke2fs or blksize > 1k on older mke2fs) then we must
				2781	* compensate.
				2782	*/
				2783	if (sb->s_blocksize == 1024 && nr == 0 &&
				2784	le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
				2785	has_super++;
				2786
				2787	return (has_super + ext4_group_first_block_no(sb, bg));
				2788	}
				2789
				2790	/**
				2791	* ext4_get_stripe_size: Get the stripe size.
				2792	* @sbi: In memory super block info
				2793	*
				2794	* If we have specified it via mount option, then
				2795	* use the mount option value. If the value specified at mount time is
				2796	* greater than the blocks per group use the super block value.
				2797	* If the super block value is greater than blocks per group return 0.
				2798	* Allocator needs it be less than blocks per group.
				2799	*
				2800	*/
				2801	static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
				2802	{
				2803	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
				2804	unsigned long stripe_width =
				2805	le32_to_cpu(sbi->s_es->s_raid_stripe_width);
				2806	int ret;
				2807
				2808	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
				2809	ret = sbi->s_stripe;
				2810	else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
				2811	ret = stripe_width;
				2812	else if (stride && stride <= sbi->s_blocks_per_group)
				2813	ret = stride;
				2814	else
				2815	ret = 0;
				2816
				2817	/*
				2818	* If the stripe width is 1, this makes no sense and
				2819	* we set it to 0 to turn off stripe handling code.
				2820	*/
				2821	if (ret <= 1)
				2822	ret = 0;
				2823
				2824	return ret;
				2825	}
				2826
				2827	/*
				2828	* Check whether this filesystem can be mounted based on
				2829	* the features present and the RDONLY/RDWR mount requested.
				2830	* Returns 1 if this filesystem can be mounted as requested,
				2831	* 0 if it cannot be.
				2832	*/
				2833	static int ext4_feature_set_ok(struct super_block *sb, int readonly)
				2834	{
				2835	if (ext4_has_unknown_ext4_incompat_features(sb)) {
				2836	ext4_msg(sb, KERN_ERR,
				2837	"Couldn't mount because of "
				2838	"unsupported optional features (%x)",
				2839	(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
				2840	~EXT4_FEATURE_INCOMPAT_SUPP));
				2841	return 0;
				2842	}
				2843
				2844	if (readonly)
				2845	return 1;
				2846
				2847	if (ext4_has_feature_readonly(sb)) {
				2848	ext4_msg(sb, KERN_INFO, "filesystem is read-only");
				2849	sb->s_flags \|= MS_RDONLY;
				2850	return 1;
				2851	}
				2852
				2853	/* Check that feature set is OK for a read-write mount */
				2854	if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
				2855	ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
				2856	"unsupported optional features (%x)",
				2857	(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
				2858	~EXT4_FEATURE_RO_COMPAT_SUPP));
				2859	return 0;
				2860	}
				2861	/*
				2862	* Large file size enabled file system can only be mounted
				2863	* read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
				2864	*/
				2865	if (ext4_has_feature_huge_file(sb)) {
				2866	if (sizeof(blkcnt_t) < sizeof(u64)) {
				2867	ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
				2868	"cannot be mounted RDWR without "
				2869	"CONFIG_LBDAF");
				2870	return 0;
				2871	}
				2872	}
				2873	if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
				2874	ext4_msg(sb, KERN_ERR,
				2875	"Can't support bigalloc feature without "
				2876	"extents feature\n");
				2877	return 0;
				2878	}
				2879
				2880	#if !IS_ENABLED(CONFIG_QUOTA) \|\| !IS_ENABLED(CONFIG_QFMT_V2)
				2881	if (!readonly && (ext4_has_feature_quota(sb) \|\|
				2882	ext4_has_feature_project(sb))) {
				2883	ext4_msg(sb, KERN_ERR,
				2884	"The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
				2885	return 0;
				2886	}
				2887	#endif /* CONFIG_QUOTA */
				2888	return 1;
				2889	}
				2890
				2891	/*
				2892	* This function is called once a day if we have errors logged
				2893	* on the file system
				2894	*/
				2895	static void print_daily_error_info(unsigned long arg)
				2896	{
				2897	struct super_block sb = (struct super_block ) arg;
				2898	struct ext4_sb_info *sbi;
				2899	struct ext4_super_block *es;
				2900
				2901	sbi = EXT4_SB(sb);
				2902	es = sbi->s_es;
				2903
				2904	if (es->s_error_count)
				2905	/* fsck newer than v1.41.13 is needed to clean this condition. */
				2906	ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
				2907	le32_to_cpu(es->s_error_count));
				2908	if (es->s_first_error_time) {
				2909	printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
				2910	sb->s_id, le32_to_cpu(es->s_first_error_time),
				2911	(int) sizeof(es->s_first_error_func),
				2912	es->s_first_error_func,
				2913	le32_to_cpu(es->s_first_error_line));
				2914	if (es->s_first_error_ino)
				2915	printk(KERN_CONT ": inode %u",
				2916	le32_to_cpu(es->s_first_error_ino));
				2917	if (es->s_first_error_block)
				2918	printk(KERN_CONT ": block %llu", (unsigned long long)
				2919	le64_to_cpu(es->s_first_error_block));
				2920	printk(KERN_CONT "\n");
				2921	}
				2922	if (es->s_last_error_time) {
				2923	printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
				2924	sb->s_id, le32_to_cpu(es->s_last_error_time),
				2925	(int) sizeof(es->s_last_error_func),
				2926	es->s_last_error_func,
				2927	le32_to_cpu(es->s_last_error_line));
				2928	if (es->s_last_error_ino)
				2929	printk(KERN_CONT ": inode %u",
				2930	le32_to_cpu(es->s_last_error_ino));
				2931	if (es->s_last_error_block)
				2932	printk(KERN_CONT ": block %llu", (unsigned long long)
				2933	le64_to_cpu(es->s_last_error_block));
				2934	printk(KERN_CONT "\n");
				2935	}
				2936	mod_timer(&sbi->s_err_report, jiffies + 246060HZ); / Once a day */
				2937	}
				2938
				2939	/* Find next suitable group and run ext4_init_inode_table */
				2940	static int ext4_run_li_request(struct ext4_li_request *elr)
				2941	{
				2942	struct ext4_group_desc *gdp = NULL;
				2943	ext4_group_t group, ngroups;
				2944	struct super_block *sb;
				2945	unsigned long timeout = 0;
				2946	int ret = 0;
				2947
				2948	sb = elr->lr_super;
				2949	ngroups = EXT4_SB(sb)->s_groups_count;
				2950
				2951	for (group = elr->lr_next_group; group < ngroups; group++) {
				2952	gdp = ext4_get_group_desc(sb, group, NULL);
				2953	if (!gdp) {
				2954	ret = 1;
				2955	break;
				2956	}
				2957
				2958	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				2959	break;
				2960	}
				2961
				2962	if (group >= ngroups)
				2963	ret = 1;
				2964
				2965	if (!ret) {
				2966	timeout = jiffies;
				2967	ret = ext4_init_inode_table(sb, group,
				2968	elr->lr_timeout ? 0 : 1);
				2969	if (elr->lr_timeout == 0) {
				2970	timeout = (jiffies - timeout) *
				2971	elr->lr_sbi->s_li_wait_mult;
				2972	elr->lr_timeout = timeout;
				2973	}
				2974	elr->lr_next_sched = jiffies + elr->lr_timeout;
				2975	elr->lr_next_group = group + 1;
				2976	}
				2977	return ret;
				2978	}
				2979
				2980	/*
				2981	* Remove lr_request from the list_request and free the
				2982	* request structure. Should be called with li_list_mtx held
				2983	*/
				2984	static void ext4_remove_li_request(struct ext4_li_request *elr)
				2985	{
				2986	struct ext4_sb_info *sbi;
				2987
				2988	if (!elr)
				2989	return;
				2990
				2991	sbi = elr->lr_sbi;
				2992
				2993	list_del(&elr->lr_request);
				2994	sbi->s_li_request = NULL;
				2995	kfree(elr);
				2996	}
				2997
				2998	static void ext4_unregister_li_request(struct super_block *sb)
				2999	{
				3000	mutex_lock(&ext4_li_mtx);
				3001	if (!ext4_li_info) {
				3002	mutex_unlock(&ext4_li_mtx);
				3003	return;
				3004	}
				3005
				3006	mutex_lock(&ext4_li_info->li_list_mtx);
				3007	ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
				3008	mutex_unlock(&ext4_li_info->li_list_mtx);
				3009	mutex_unlock(&ext4_li_mtx);
				3010	}
				3011
				3012	static struct task_struct *ext4_lazyinit_task;
				3013
				3014	/*
				3015	* This is the function where ext4lazyinit thread lives. It walks
				3016	* through the request list searching for next scheduled filesystem.
				3017	* When such a fs is found, run the lazy initialization request
				3018	* (ext4_rn_li_request) and keep track of the time spend in this
				3019	* function. Based on that time we compute next schedule time of
				3020	* the request. When walking through the list is complete, compute
				3021	* next waking time and put itself into sleep.
				3022	*/
				3023	static int ext4_lazyinit_thread(void *arg)
				3024	{
				3025	struct ext4_lazy_init eli = (struct ext4_lazy_init )arg;
				3026	struct list_head pos, n;
				3027	struct ext4_li_request *elr;
				3028	unsigned long next_wakeup, cur;
				3029
				3030	BUG_ON(NULL == eli);
				3031
				3032	cont_thread:
				3033	while (true) {
				3034	next_wakeup = MAX_JIFFY_OFFSET;
				3035
				3036	mutex_lock(&eli->li_list_mtx);
				3037	if (list_empty(&eli->li_request_list)) {
				3038	mutex_unlock(&eli->li_list_mtx);
				3039	goto exit_thread;
				3040	}
				3041	list_for_each_safe(pos, n, &eli->li_request_list) {
				3042	int err = 0;
				3043	int progress = 0;
				3044	elr = list_entry(pos, struct ext4_li_request,
				3045	lr_request);
				3046
				3047	if (time_before(jiffies, elr->lr_next_sched)) {
				3048	if (time_before(elr->lr_next_sched, next_wakeup))
				3049	next_wakeup = elr->lr_next_sched;
				3050	continue;
				3051	}
				3052	if (down_read_trylock(&elr->lr_super->s_umount)) {
				3053	if (sb_start_write_trylock(elr->lr_super)) {
				3054	progress = 1;
				3055	/*
				3056	* We hold sb->s_umount, sb can not
				3057	* be removed from the list, it is
				3058	* now safe to drop li_list_mtx
				3059	*/
				3060	mutex_unlock(&eli->li_list_mtx);
				3061	err = ext4_run_li_request(elr);
				3062	sb_end_write(elr->lr_super);
				3063	mutex_lock(&eli->li_list_mtx);
				3064	n = pos->next;
				3065	}
				3066	up_read((&elr->lr_super->s_umount));
				3067	}
				3068	/* error, remove the lazy_init job */
				3069	if (err) {
				3070	ext4_remove_li_request(elr);
				3071	continue;
				3072	}
				3073	if (!progress) {
				3074	elr->lr_next_sched = jiffies +
				3075	(prandom_u32()
				3076	% (EXT4_DEF_LI_MAX_START_DELAY * HZ));
				3077	}
				3078	if (time_before(elr->lr_next_sched, next_wakeup))
				3079	next_wakeup = elr->lr_next_sched;
				3080	}
				3081	mutex_unlock(&eli->li_list_mtx);
				3082
				3083	try_to_freeze();
				3084
				3085	cur = jiffies;
				3086	if ((time_after_eq(cur, next_wakeup)) \|\|
				3087	(MAX_JIFFY_OFFSET == next_wakeup)) {
				3088	cond_resched();
				3089	continue;
				3090	}
				3091
				3092	schedule_timeout_interruptible(next_wakeup - cur);
				3093
				3094	if (kthread_should_stop()) {
				3095	ext4_clear_request_list();
				3096	goto exit_thread;
				3097	}
				3098	}
				3099
				3100	exit_thread:
				3101	/*
				3102	* It looks like the request list is empty, but we need
				3103	* to check it under the li_list_mtx lock, to prevent any
				3104	* additions into it, and of course we should lock ext4_li_mtx
				3105	* to atomically free the list and ext4_li_info, because at
				3106	* this point another ext4 filesystem could be registering
				3107	* new one.
				3108	*/
				3109	mutex_lock(&ext4_li_mtx);
				3110	mutex_lock(&eli->li_list_mtx);
				3111	if (!list_empty(&eli->li_request_list)) {
				3112	mutex_unlock(&eli->li_list_mtx);
				3113	mutex_unlock(&ext4_li_mtx);
				3114	goto cont_thread;
				3115	}
				3116	mutex_unlock(&eli->li_list_mtx);
				3117	kfree(ext4_li_info);
				3118	ext4_li_info = NULL;
				3119	mutex_unlock(&ext4_li_mtx);
				3120
				3121	return 0;
				3122	}
				3123
				3124	static void ext4_clear_request_list(void)
				3125	{
				3126	struct list_head pos, n;
				3127	struct ext4_li_request *elr;
				3128
				3129	mutex_lock(&ext4_li_info->li_list_mtx);
				3130	list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
				3131	elr = list_entry(pos, struct ext4_li_request,
				3132	lr_request);
				3133	ext4_remove_li_request(elr);
				3134	}
				3135	mutex_unlock(&ext4_li_info->li_list_mtx);
				3136	}
				3137
				3138	static int ext4_run_lazyinit_thread(void)
				3139	{
				3140	ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
				3141	ext4_li_info, "ext4lazyinit");
				3142	if (IS_ERR(ext4_lazyinit_task)) {
				3143	int err = PTR_ERR(ext4_lazyinit_task);
				3144	ext4_clear_request_list();
				3145	kfree(ext4_li_info);
				3146	ext4_li_info = NULL;
				3147	printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
				3148	"initialization thread\n",
				3149	err);
				3150	return err;
				3151	}
				3152	ext4_li_info->li_state \|= EXT4_LAZYINIT_RUNNING;
				3153	return 0;
				3154	}
				3155
				3156	/*
				3157	* Check whether it make sense to run itable init. thread or not.
				3158	* If there is at least one uninitialized inode table, return
				3159	* corresponding group number, else the loop goes through all
				3160	* groups and return total number of groups.
				3161	*/
				3162	static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
				3163	{
				3164	ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
				3165	struct ext4_group_desc *gdp = NULL;
				3166
				3167	if (!ext4_has_group_desc_csum(sb))
				3168	return ngroups;
				3169
				3170	for (group = 0; group < ngroups; group++) {
				3171	gdp = ext4_get_group_desc(sb, group, NULL);
				3172	if (!gdp)
				3173	continue;
				3174
				3175	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				3176	break;
				3177	}
				3178
				3179	return group;
				3180	}
				3181
				3182	static int ext4_li_info_new(void)
				3183	{
				3184	struct ext4_lazy_init *eli = NULL;
				3185
				3186	eli = kzalloc(sizeof(*eli), GFP_KERNEL);
				3187	if (!eli)
				3188	return -ENOMEM;
				3189
				3190	INIT_LIST_HEAD(&eli->li_request_list);
				3191	mutex_init(&eli->li_list_mtx);
				3192
				3193	eli->li_state \|= EXT4_LAZYINIT_QUIT;
				3194
				3195	ext4_li_info = eli;
				3196
				3197	return 0;
				3198	}
				3199
				3200	static struct ext4_li_request ext4_li_request_new(struct super_block sb,
				3201	ext4_group_t start)
				3202	{
				3203	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3204	struct ext4_li_request *elr;
				3205
				3206	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
				3207	if (!elr)
				3208	return NULL;
				3209
				3210	elr->lr_super = sb;
				3211	elr->lr_sbi = sbi;
				3212	elr->lr_next_group = start;
				3213
				3214	/*
				3215	* Randomize first schedule time of the request to
				3216	* spread the inode table initialization requests
				3217	* better.
				3218	*/
				3219	elr->lr_next_sched = jiffies + (prandom_u32() %
				3220	(EXT4_DEF_LI_MAX_START_DELAY * HZ));
				3221	return elr;
				3222	}
				3223
				3224	int ext4_register_li_request(struct super_block *sb,
				3225	ext4_group_t first_not_zeroed)
				3226	{
				3227	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3228	struct ext4_li_request *elr = NULL;
				3229	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
				3230	int ret = 0;
				3231
				3232	mutex_lock(&ext4_li_mtx);
				3233	if (sbi->s_li_request != NULL) {
				3234	/*
				3235	* Reset timeout so it can be computed again, because
				3236	* s_li_wait_mult might have changed.
				3237	*/
				3238	sbi->s_li_request->lr_timeout = 0;
				3239	goto out;
				3240	}
				3241
				3242	if (first_not_zeroed == ngroups \|\| sb_rdonly(sb) \|\|
				3243	!test_opt(sb, INIT_INODE_TABLE))
				3244	goto out;
				3245
				3246	elr = ext4_li_request_new(sb, first_not_zeroed);
				3247	if (!elr) {
				3248	ret = -ENOMEM;
				3249	goto out;
				3250	}
				3251
				3252	if (NULL == ext4_li_info) {
				3253	ret = ext4_li_info_new();
				3254	if (ret)
				3255	goto out;
				3256	}
				3257
				3258	mutex_lock(&ext4_li_info->li_list_mtx);
				3259	list_add(&elr->lr_request, &ext4_li_info->li_request_list);
				3260	mutex_unlock(&ext4_li_info->li_list_mtx);
				3261
				3262	sbi->s_li_request = elr;
				3263	/*
				3264	* set elr to NULL here since it has been inserted to
				3265	* the request_list and the removal and free of it is
				3266	* handled by ext4_clear_request_list from now on.
				3267	*/
				3268	elr = NULL;
				3269
				3270	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
				3271	ret = ext4_run_lazyinit_thread();
				3272	if (ret)
				3273	goto out;
				3274	}
				3275	out:
				3276	mutex_unlock(&ext4_li_mtx);
				3277	if (ret)
				3278	kfree(elr);
				3279	return ret;
				3280	}
				3281
				3282	/*
				3283	* We do not need to lock anything since this is called on
				3284	* module unload.
				3285	*/
				3286	static void ext4_destroy_lazyinit_thread(void)
				3287	{
				3288	/*
				3289	* If thread exited earlier
				3290	* there's nothing to be done.
				3291	*/
				3292	if (!ext4_li_info \|\| !ext4_lazyinit_task)
				3293	return;
				3294
				3295	kthread_stop(ext4_lazyinit_task);
				3296	}
				3297
				3298	static int set_journal_csum_feature_set(struct super_block *sb)
				3299	{
				3300	int ret = 1;
				3301	int compat, incompat;
				3302	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3303
				3304	if (ext4_has_metadata_csum(sb)) {
				3305	/* journal checksum v3 */
				3306	compat = 0;
				3307	incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
				3308	} else {
				3309	/* journal checksum v1 */
				3310	compat = JBD2_FEATURE_COMPAT_CHECKSUM;
				3311	incompat = 0;
				3312	}
				3313
				3314	jbd2_journal_clear_features(sbi->s_journal,
				3315	JBD2_FEATURE_COMPAT_CHECKSUM, 0,
				3316	JBD2_FEATURE_INCOMPAT_CSUM_V3 \|
				3317	JBD2_FEATURE_INCOMPAT_CSUM_V2);
				3318	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				3319	ret = jbd2_journal_set_features(sbi->s_journal,
				3320	compat, 0,
				3321	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT \|
				3322	incompat);
				3323	} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
				3324	ret = jbd2_journal_set_features(sbi->s_journal,
				3325	compat, 0,
				3326	incompat);
				3327	jbd2_journal_clear_features(sbi->s_journal, 0, 0,
				3328	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
				3329	} else {
				3330	jbd2_journal_clear_features(sbi->s_journal, 0, 0,
				3331	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
				3332	}
				3333
				3334	return ret;
				3335	}
				3336
				3337	/*
				3338	* Note: calculating the overhead so we can be compatible with
				3339	* historical BSD practice is quite difficult in the face of
				3340	* clusters/bigalloc. This is because multiple metadata blocks from
				3341	* different block group can end up in the same allocation cluster.
				3342	* Calculating the exact overhead in the face of clustered allocation
				3343	* requires either O(all block bitmaps) in memory or O(number of block
				3344	* groups**2) in time. We will still calculate the superblock for
				3345	* older file systems --- and if we come across with a bigalloc file
				3346	* system with zero in s_overhead_clusters the estimate will be close to
				3347	* correct especially for very large cluster sizes --- but for newer
				3348	* file systems, it's better to calculate this figure once at mkfs
				3349	* time, and store it in the superblock. If the superblock value is
				3350	* present (even for non-bigalloc file systems), we will use it.
				3351	*/
				3352	static int count_overhead(struct super_block *sb, ext4_group_t grp,
				3353	char *buf)
				3354	{
				3355	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3356	struct ext4_group_desc *gdp;
				3357	ext4_fsblk_t first_block, last_block, b;
				3358	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
				3359	int s, j, count = 0;
				3360
				3361	if (!ext4_has_feature_bigalloc(sb))
				3362	return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
				3363	sbi->s_itb_per_group + 2);
				3364
				3365	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
				3366	(grp * EXT4_BLOCKS_PER_GROUP(sb));
				3367	last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
				3368	for (i = 0; i < ngroups; i++) {
				3369	gdp = ext4_get_group_desc(sb, i, NULL);
				3370	b = ext4_block_bitmap(sb, gdp);
				3371	if (b >= first_block && b <= last_block) {
				3372	ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
				3373	count++;
				3374	}
				3375	b = ext4_inode_bitmap(sb, gdp);
				3376	if (b >= first_block && b <= last_block) {
				3377	ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
				3378	count++;
				3379	}
				3380	b = ext4_inode_table(sb, gdp);
				3381	if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
				3382	for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
				3383	int c = EXT4_B2C(sbi, b - first_block);
				3384	ext4_set_bit(c, buf);
				3385	count++;
				3386	}
				3387	if (i != grp)
				3388	continue;
				3389	s = 0;
				3390	if (ext4_bg_has_super(sb, grp)) {
				3391	ext4_set_bit(s++, buf);
				3392	count++;
				3393	}
				3394	j = ext4_bg_num_gdb(sb, grp);
				3395	if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
				3396	ext4_error(sb, "Invalid number of block group "
				3397	"descriptor blocks: %d", j);
				3398	j = EXT4_BLOCKS_PER_GROUP(sb) - s;
				3399	}
				3400	count += j;
				3401	for (; j > 0; j--)
				3402	ext4_set_bit(EXT4_B2C(sbi, s++), buf);
				3403	}
				3404	if (!count)
				3405	return 0;
				3406	return EXT4_CLUSTERS_PER_GROUP(sb) -
				3407	ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
				3408	}
				3409
				3410	/*
				3411	* Compute the overhead and stash it in sbi->s_overhead
				3412	*/
				3413	int ext4_calculate_overhead(struct super_block *sb)
				3414	{
				3415	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3416	struct ext4_super_block *es = sbi->s_es;
				3417	struct inode *j_inode;
				3418	unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
				3419	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
				3420	ext4_fsblk_t overhead = 0;
				3421	char buf = (char ) get_zeroed_page(GFP_NOFS);
				3422
				3423	if (!buf)
				3424	return -ENOMEM;
				3425
				3426	/*
				3427	* Compute the overhead (FS structures). This is constant
				3428	* for a given filesystem unless the number of block groups
				3429	* changes so we cache the previous value until it does.
				3430	*/
				3431
				3432	/*
				3433	* All of the blocks before first_data_block are overhead
				3434	*/
				3435	overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
				3436
				3437	/*
				3438	* Add the overhead found in each block group
				3439	*/
				3440	for (i = 0; i < ngroups; i++) {
				3441	int blks;
				3442
				3443	blks = count_overhead(sb, i, buf);
				3444	overhead += blks;
				3445	if (blks)
				3446	memset(buf, 0, PAGE_SIZE);
				3447	cond_resched();
				3448	}
				3449
				3450	/*
				3451	* Add the internal journal blocks whether the journal has been
				3452	* loaded or not
				3453	*/
				3454	if (sbi->s_journal && !sbi->journal_bdev)
				3455	overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
				3456	else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
				3457	/* j_inum for internal journal is non-zero */
				3458	j_inode = ext4_get_journal_inode(sb, j_inum);
				3459	if (j_inode) {
				3460	j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
				3461	overhead += EXT4_NUM_B2C(sbi, j_blocks);
				3462	iput(j_inode);
				3463	} else {
				3464	ext4_msg(sb, KERN_ERR, "can't get journal size");
				3465	}
				3466	}
				3467	sbi->s_overhead = overhead;
				3468	smp_wmb();
				3469	free_page((unsigned long) buf);
				3470	return 0;
				3471	}
				3472
				3473	static void ext4_set_resv_clusters(struct super_block *sb)
				3474	{
				3475	ext4_fsblk_t resv_clusters;
				3476	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3477
				3478	/*
				3479	* There's no need to reserve anything when we aren't using extents.
				3480	* The space estimates are exact, there are no unwritten extents,
				3481	* hole punching doesn't need new metadata... This is needed especially
				3482	* to keep ext2/3 backward compatibility.
				3483	*/
				3484	if (!ext4_has_feature_extents(sb))
				3485	return;
				3486	/*
				3487	* By default we reserve 2% or 4096 clusters, whichever is smaller.
				3488	* This should cover the situations where we can not afford to run
				3489	* out of space like for example punch hole, or converting
				3490	* unwritten extents in delalloc path. In most cases such
				3491	* allocation would require 1, or 2 blocks, higher numbers are
				3492	* very rare.
				3493	*/
				3494	resv_clusters = (ext4_blocks_count(sbi->s_es) >>
				3495	sbi->s_cluster_bits);
				3496
				3497	do_div(resv_clusters, 50);
				3498	resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
				3499
				3500	atomic64_set(&sbi->s_resv_clusters, resv_clusters);
				3501	}
				3502
				3503	static int ext4_fill_super(struct super_block sb, void data, int silent)
				3504	{
				3505	struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
				3506	char *orig_data = kstrdup(data, GFP_KERNEL);
				3507	struct buffer_head bh, *group_desc;
				3508	struct ext4_super_block *es = NULL;
				3509	struct ext4_sb_info sbi = kzalloc(sizeof(sbi), GFP_KERNEL);
				3510	struct flex_groups **flex_groups;
				3511	ext4_fsblk_t block;
				3512	ext4_fsblk_t sb_block = get_sb_block(&data);
				3513	ext4_fsblk_t logical_sb_block;
				3514	unsigned long offset = 0;
				3515	unsigned long journal_devnum = 0;
				3516	unsigned long def_mount_opts;
				3517	struct inode *root;
				3518	const char *descr;
				3519	int ret = -ENOMEM;
				3520	int blocksize, clustersize;
				3521	unsigned int db_count;
				3522	unsigned int i;
				3523	int needs_recovery, has_huge_files, has_bigalloc;
				3524	__u64 blocks_count;
				3525	int err = 0;
				3526	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
				3527	ext4_group_t first_not_zeroed;
				3528
				3529	if ((data && !orig_data) \|\| !sbi)
				3530	goto out_free_base;
				3531
				3532	sbi->s_daxdev = dax_dev;
				3533	sbi->s_blockgroup_lock =
				3534	kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
				3535	if (!sbi->s_blockgroup_lock)
				3536	goto out_free_base;
				3537
				3538	sb->s_fs_info = sbi;
				3539	sbi->s_sb = sb;
				3540	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
				3541	sbi->s_sb_block = sb_block;
				3542	if (sb->s_bdev->bd_part)
				3543	sbi->s_sectors_written_start =
				3544	part_stat_read(sb->s_bdev->bd_part, sectors[1]);
				3545
				3546	/* Cleanup superblock name */
				3547	strreplace(sb->s_id, '/', '!');
				3548
				3549	/* -EINVAL is default */
				3550	ret = -EINVAL;
				3551	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
				3552	if (!blocksize) {
				3553	ext4_msg(sb, KERN_ERR, "unable to set blocksize");
				3554	goto out_fail;
				3555	}
				3556
				3557	/*
				3558	* The ext4 superblock will not be buffer aligned for other than 1kB
				3559	* block sizes. We need to calculate the offset from buffer start.
				3560	*/
				3561	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
				3562	logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
				3563	offset = do_div(logical_sb_block, blocksize);
				3564	} else {
				3565	logical_sb_block = sb_block;
				3566	}
				3567
				3568	if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
				3569	ext4_msg(sb, KERN_ERR, "unable to read superblock");
				3570	goto out_fail;
				3571	}
				3572	/*
				3573	* Note: s_es must be initialized as soon as possible because
				3574	* some ext4 macro-instructions depend on its value
				3575	*/
				3576	es = (struct ext4_super_block *) (bh->b_data + offset);
				3577	sbi->s_es = es;
				3578	sb->s_magic = le16_to_cpu(es->s_magic);
				3579	if (sb->s_magic != EXT4_SUPER_MAGIC)
				3580	goto cantfind_ext4;
				3581	sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
				3582
				3583	/* Warn if metadata_csum and gdt_csum are both set. */
				3584	if (ext4_has_feature_metadata_csum(sb) &&
				3585	ext4_has_feature_gdt_csum(sb))
				3586	ext4_warning(sb, "metadata_csum and uninit_bg are "
				3587	"redundant flags; please run fsck.");
				3588
				3589	/* Check for a known checksum algorithm */
				3590	if (!ext4_verify_csum_type(sb, es)) {
				3591	ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
				3592	"unknown checksum algorithm.");
				3593	silent = 1;
				3594	goto cantfind_ext4;
				3595	}
				3596
				3597	/* Load the checksum driver */
				3598	sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
				3599	if (IS_ERR(sbi->s_chksum_driver)) {
				3600	ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
				3601	ret = PTR_ERR(sbi->s_chksum_driver);
				3602	sbi->s_chksum_driver = NULL;
				3603	goto failed_mount;
				3604	}
				3605
				3606	/* Check superblock checksum */
				3607	if (!ext4_superblock_csum_verify(sb, es)) {
				3608	ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
				3609	"invalid superblock checksum. Run e2fsck?");
				3610	silent = 1;
				3611	ret = -EFSBADCRC;
				3612	goto cantfind_ext4;
				3613	}
				3614
				3615	/* Precompute checksum seed for all metadata */
				3616	if (ext4_has_feature_csum_seed(sb))
				3617	sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
				3618	else if (ext4_has_metadata_csum(sb) \|\| ext4_has_feature_ea_inode(sb))
				3619	sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
				3620	sizeof(es->s_uuid));
				3621
				3622	/* Set defaults before we parse the mount options */
				3623	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
				3624	set_opt(sb, INIT_INODE_TABLE);
				3625	if (def_mount_opts & EXT4_DEFM_DEBUG)
				3626	set_opt(sb, DEBUG);
				3627	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
				3628	set_opt(sb, GRPID);
				3629	if (def_mount_opts & EXT4_DEFM_UID16)
				3630	set_opt(sb, NO_UID32);
				3631	/* xattr user namespace & acls are now defaulted on */
				3632	set_opt(sb, XATTR_USER);
				3633	#ifdef CONFIG_EXT4_FS_POSIX_ACL
				3634	set_opt(sb, POSIX_ACL);
				3635	#endif
				3636	/* don't forget to enable journal_csum when metadata_csum is enabled. */
				3637	if (ext4_has_metadata_csum(sb))
				3638	set_opt(sb, JOURNAL_CHECKSUM);
				3639
				3640	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
				3641	set_opt(sb, JOURNAL_DATA);
				3642	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
				3643	set_opt(sb, ORDERED_DATA);
				3644	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
				3645	set_opt(sb, WRITEBACK_DATA);
				3646
				3647	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
				3648	set_opt(sb, ERRORS_PANIC);
				3649	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
				3650	set_opt(sb, ERRORS_CONT);
				3651	else
				3652	set_opt(sb, ERRORS_RO);
				3653	/* block_validity enabled by default; disable with noblock_validity */
				3654	set_opt(sb, BLOCK_VALIDITY);
				3655	if (def_mount_opts & EXT4_DEFM_DISCARD)
				3656	set_opt(sb, DISCARD);
				3657
				3658	sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
				3659	sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
				3660	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
				3661	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
				3662	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
				3663
				3664	if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
				3665	set_opt(sb, BARRIER);
				3666
				3667	/*
				3668	* enable delayed allocation by default
				3669	* Use -o nodelalloc to turn it off
				3670	*/
				3671	if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
				3672	((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
				3673	set_opt(sb, DELALLOC);
				3674
				3675	/*
				3676	* set default s_li_wait_mult for lazyinit, for the case there is
				3677	* no mount option specified.
				3678	*/
				3679	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
				3680
				3681	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
				3682	if (blocksize < EXT4_MIN_BLOCK_SIZE \|\|
				3683	blocksize > EXT4_MAX_BLOCK_SIZE) {
				3684	ext4_msg(sb, KERN_ERR,
				3685	"Unsupported filesystem blocksize %d (%d log_block_size)",
				3686	blocksize, le32_to_cpu(es->s_log_block_size));
				3687	goto failed_mount;
				3688	}
				3689
				3690	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
				3691	sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
				3692	sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
				3693	} else {
				3694	sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
				3695	sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
				3696	if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
				3697	ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
				3698	sbi->s_first_ino);
				3699	goto failed_mount;
				3700	}
				3701	if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) \|\|
				3702	(!is_power_of_2(sbi->s_inode_size)) \|\|
				3703	(sbi->s_inode_size > blocksize)) {
				3704	ext4_msg(sb, KERN_ERR,
				3705	"unsupported inode size: %d",
				3706	sbi->s_inode_size);
				3707	ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
				3708	goto failed_mount;
				3709	}
				3710	/*
				3711	* i_atime_extra is the last extra field available for
				3712	* [acm]times in struct ext4_inode. Checking for that
				3713	* field should suffice to ensure we have extra space
				3714	* for all three.
				3715	*/
				3716	if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
				3717	sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
				3718	sb->s_time_gran = 1;
				3719	} else {
				3720	sb->s_time_gran = NSEC_PER_SEC;
				3721	}
				3722	}
				3723	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
				3724	sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
				3725	EXT4_GOOD_OLD_INODE_SIZE;
				3726	if (ext4_has_feature_extra_isize(sb)) {
				3727	unsigned v, max = (sbi->s_inode_size -
				3728	EXT4_GOOD_OLD_INODE_SIZE);
				3729
				3730	v = le16_to_cpu(es->s_want_extra_isize);
				3731	if (v > max) {
				3732	ext4_msg(sb, KERN_ERR,
				3733	"bad s_want_extra_isize: %d", v);
				3734	goto failed_mount;
				3735	}
				3736	if (sbi->s_want_extra_isize < v)
				3737	sbi->s_want_extra_isize = v;
				3738
				3739	v = le16_to_cpu(es->s_min_extra_isize);
				3740	if (v > max) {
				3741	ext4_msg(sb, KERN_ERR,
				3742	"bad s_min_extra_isize: %d", v);
				3743	goto failed_mount;
				3744	}
				3745	if (sbi->s_want_extra_isize < v)
				3746	sbi->s_want_extra_isize = v;
				3747	}
				3748	}
				3749
				3750	if (sbi->s_es->s_mount_opts[0]) {
				3751	char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
				3752	sizeof(sbi->s_es->s_mount_opts),
				3753	GFP_KERNEL);
				3754	if (!s_mount_opts)
				3755	goto failed_mount;
				3756	if (!parse_options(s_mount_opts, sb, &journal_devnum,
				3757	&journal_ioprio, 0)) {
				3758	ext4_msg(sb, KERN_WARNING,
				3759	"failed to parse options in superblock: %s",
				3760	s_mount_opts);
				3761	}
				3762	kfree(s_mount_opts);
				3763	}
				3764	sbi->s_def_mount_opt = sbi->s_mount_opt;
				3765	if (!parse_options((char *) data, sb, &journal_devnum,
				3766	&journal_ioprio, 0))
				3767	goto failed_mount;
				3768
				3769	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
				3770	printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
				3771	"with data=journal disables delayed "
				3772	"allocation and O_DIRECT support!\n");
				3773	if (test_opt2(sb, EXPLICIT_DELALLOC)) {
				3774	ext4_msg(sb, KERN_ERR, "can't mount with "
				3775	"both data=journal and delalloc");
				3776	goto failed_mount;
				3777	}
				3778	if (test_opt(sb, DIOREAD_NOLOCK)) {
				3779	ext4_msg(sb, KERN_ERR, "can't mount with "
				3780	"both data=journal and dioread_nolock");
				3781	goto failed_mount;
				3782	}
				3783	if (test_opt(sb, DAX)) {
				3784	ext4_msg(sb, KERN_ERR, "can't mount with "
				3785	"both data=journal and dax");
				3786	goto failed_mount;
				3787	}
				3788	if (ext4_has_feature_encrypt(sb)) {
				3789	ext4_msg(sb, KERN_WARNING,
				3790	"encrypted files will use data=ordered "
				3791	"instead of data journaling mode");
				3792	}
				3793	if (test_opt(sb, DELALLOC))
				3794	clear_opt(sb, DELALLOC);
				3795	} else {
				3796	sb->s_iflags \|= SB_I_CGROUPWB;
				3797	}
				3798
				3799	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				3800	(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
				3801
				3802	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
				3803	(ext4_has_compat_features(sb) \|\|
				3804	ext4_has_ro_compat_features(sb) \|\|
				3805	ext4_has_incompat_features(sb)))
				3806	ext4_msg(sb, KERN_WARNING,
				3807	"feature flags set on rev 0 fs, "
				3808	"running e2fsck is recommended");
				3809
				3810	if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
				3811	set_opt2(sb, HURD_COMPAT);
				3812	if (ext4_has_feature_64bit(sb)) {
				3813	ext4_msg(sb, KERN_ERR,
				3814	"The Hurd can't support 64-bit file systems");
				3815	goto failed_mount;
				3816	}
				3817
				3818	/*
				3819	* ea_inode feature uses l_i_version field which is not
				3820	* available in HURD_COMPAT mode.
				3821	*/
				3822	if (ext4_has_feature_ea_inode(sb)) {
				3823	ext4_msg(sb, KERN_ERR,
				3824	"ea_inode feature is not supported for Hurd");
				3825	goto failed_mount;
				3826	}
				3827	}
				3828
				3829	if (IS_EXT2_SB(sb)) {
				3830	if (ext2_feature_set_ok(sb))
				3831	ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
				3832	"using the ext4 subsystem");
				3833	else {
				3834	/*
				3835	* If we're probing be silent, if this looks like
				3836	* it's actually an ext[34] filesystem.
				3837	*/
				3838	if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
				3839	goto failed_mount;
				3840	ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
				3841	"to feature incompatibilities");
				3842	goto failed_mount;
				3843	}
				3844	}
				3845
				3846	if (IS_EXT3_SB(sb)) {
				3847	if (ext3_feature_set_ok(sb))
				3848	ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
				3849	"using the ext4 subsystem");
				3850	else {
				3851	/*
				3852	* If we're probing be silent, if this looks like
				3853	* it's actually an ext4 filesystem.
				3854	*/
				3855	if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
				3856	goto failed_mount;
				3857	ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
				3858	"to feature incompatibilities");
				3859	goto failed_mount;
				3860	}
				3861	}
				3862
				3863	/*
				3864	* Check feature flags regardless of the revision level, since we
				3865	* previously didn't change the revision level when setting the flags,
				3866	* so there is a chance incompat flags are set on a rev 0 filesystem.
				3867	*/
				3868	if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
				3869	goto failed_mount;
				3870
				3871	if (le32_to_cpu(es->s_log_block_size) >
				3872	(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
				3873	ext4_msg(sb, KERN_ERR,
				3874	"Invalid log block size: %u",
				3875	le32_to_cpu(es->s_log_block_size));
				3876	goto failed_mount;
				3877	}
				3878	if (le32_to_cpu(es->s_log_cluster_size) >
				3879	(EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
				3880	ext4_msg(sb, KERN_ERR,
				3881	"Invalid log cluster size: %u",
				3882	le32_to_cpu(es->s_log_cluster_size));
				3883	goto failed_mount;
				3884	}
				3885
				3886	if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
				3887	ext4_msg(sb, KERN_ERR,
				3888	"Number of reserved GDT blocks insanely large: %d",
				3889	le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
				3890	goto failed_mount;
				3891	}
				3892
				3893	if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
				3894	if (ext4_has_feature_inline_data(sb)) {
				3895	ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
				3896	" that may contain inline data");
				3897	goto failed_mount;
				3898	}
				3899	if (!bdev_dax_supported(sb->s_bdev, blocksize))
				3900	goto failed_mount;
				3901	}
				3902
				3903	if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
				3904	ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
				3905	es->s_encryption_level);
				3906	goto failed_mount;
				3907	}
				3908
				3909	if (sb->s_blocksize != blocksize) {
				3910	/* Validate the filesystem blocksize */
				3911	if (!sb_set_blocksize(sb, blocksize)) {
				3912	ext4_msg(sb, KERN_ERR, "bad block size %d",
				3913	blocksize);
				3914	goto failed_mount;
				3915	}
				3916
				3917	brelse(bh);
				3918	logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
				3919	offset = do_div(logical_sb_block, blocksize);
				3920	bh = sb_bread_unmovable(sb, logical_sb_block);
				3921	if (!bh) {
				3922	ext4_msg(sb, KERN_ERR,
				3923	"Can't read superblock on 2nd try");
				3924	goto failed_mount;
				3925	}
				3926	es = (struct ext4_super_block *)(bh->b_data + offset);
				3927	sbi->s_es = es;
				3928	if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
				3929	ext4_msg(sb, KERN_ERR,
				3930	"Magic mismatch, very weird!");
				3931	goto failed_mount;
				3932	}
				3933	}
				3934
				3935	has_huge_files = ext4_has_feature_huge_file(sb);
				3936	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
				3937	has_huge_files);
				3938	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
				3939
				3940	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
				3941	if (ext4_has_feature_64bit(sb)) {
				3942	if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT \|\|
				3943	sbi->s_desc_size > EXT4_MAX_DESC_SIZE \|\|
				3944	!is_power_of_2(sbi->s_desc_size)) {
				3945	ext4_msg(sb, KERN_ERR,
				3946	"unsupported descriptor size %lu",
				3947	sbi->s_desc_size);
				3948	goto failed_mount;
				3949	}
				3950	} else
				3951	sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
				3952
				3953	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
				3954	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
				3955
				3956	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
				3957	if (sbi->s_inodes_per_block == 0)
				3958	goto cantfind_ext4;
				3959	if (sbi->s_inodes_per_group < sbi->s_inodes_per_block \|\|
				3960	sbi->s_inodes_per_group > blocksize * 8) {
				3961	ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
				3962	sbi->s_inodes_per_group);
				3963	goto failed_mount;
				3964	}
				3965	sbi->s_itb_per_group = sbi->s_inodes_per_group /
				3966	sbi->s_inodes_per_block;
				3967	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
				3968	sbi->s_sbh = bh;
				3969	sbi->s_mount_state = le16_to_cpu(es->s_state);
				3970	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
				3971	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
				3972
				3973	for (i = 0; i < 4; i++)
				3974	sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
				3975	sbi->s_def_hash_version = es->s_def_hash_version;
				3976	if (ext4_has_feature_dir_index(sb)) {
				3977	i = le32_to_cpu(es->s_flags);
				3978	if (i & EXT2_FLAGS_UNSIGNED_HASH)
				3979	sbi->s_hash_unsigned = 3;
				3980	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
				3981	#ifdef __CHAR_UNSIGNED__
				3982	if (!sb_rdonly(sb))
				3983	es->s_flags \|=
				3984	cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
				3985	sbi->s_hash_unsigned = 3;
				3986	#else
				3987	if (!sb_rdonly(sb))
				3988	es->s_flags \|=
				3989	cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
				3990	#endif
				3991	}
				3992	}
				3993
				3994	/* Handle clustersize */
				3995	clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
				3996	has_bigalloc = ext4_has_feature_bigalloc(sb);
				3997	if (has_bigalloc) {
				3998	if (clustersize < blocksize) {
				3999	ext4_msg(sb, KERN_ERR,
				4000	"cluster size (%d) smaller than "
				4001	"block size (%d)", clustersize, blocksize);
				4002	goto failed_mount;
				4003	}
				4004	sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
				4005	le32_to_cpu(es->s_log_block_size);
				4006	sbi->s_clusters_per_group =
				4007	le32_to_cpu(es->s_clusters_per_group);
				4008	if (sbi->s_clusters_per_group > blocksize * 8) {
				4009	ext4_msg(sb, KERN_ERR,
				4010	"#clusters per group too big: %lu",
				4011	sbi->s_clusters_per_group);
				4012	goto failed_mount;
				4013	}
				4014	if (sbi->s_blocks_per_group !=
				4015	(sbi->s_clusters_per_group * (clustersize / blocksize))) {
				4016	ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
				4017	"clusters per group (%lu) inconsistent",
				4018	sbi->s_blocks_per_group,
				4019	sbi->s_clusters_per_group);
				4020	goto failed_mount;
				4021	}
				4022	} else {
				4023	if (clustersize != blocksize) {
				4024	ext4_msg(sb, KERN_ERR,
				4025	"fragment/cluster size (%d) != "
				4026	"block size (%d)", clustersize, blocksize);
				4027	goto failed_mount;
				4028	}
				4029	if (sbi->s_blocks_per_group > blocksize * 8) {
				4030	ext4_msg(sb, KERN_ERR,
				4031	"#blocks per group too big: %lu",
				4032	sbi->s_blocks_per_group);
				4033	goto failed_mount;
				4034	}
				4035	sbi->s_clusters_per_group = sbi->s_blocks_per_group;
				4036	sbi->s_cluster_bits = 0;
				4037	}
				4038	sbi->s_cluster_ratio = clustersize / blocksize;
				4039
				4040	/* Do we have standard group size of clustersize * 8 blocks ? */
				4041	if (sbi->s_blocks_per_group == clustersize << 3)
				4042	set_opt2(sb, STD_GROUP_SIZE);
				4043
				4044	/*
				4045	* Test whether we have more sectors than will fit in sector_t,
				4046	* and whether the max offset is addressable by the page cache.
				4047	*/
				4048	err = generic_check_addressable(sb->s_blocksize_bits,
				4049	ext4_blocks_count(es));
				4050	if (err) {
				4051	ext4_msg(sb, KERN_ERR, "filesystem"
				4052	" too large to mount safely on this system");
				4053	if (sizeof(sector_t) < 8)
				4054	ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
				4055	goto failed_mount;
				4056	}
				4057
				4058	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
				4059	goto cantfind_ext4;
				4060
				4061	/* check blocks count against device size */
				4062	blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
				4063	if (blocks_count && ext4_blocks_count(es) > blocks_count) {
				4064	ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
				4065	"exceeds size of device (%llu blocks)",
				4066	ext4_blocks_count(es), blocks_count);
				4067	goto failed_mount;
				4068	}
				4069
				4070	/*
				4071	* It makes no sense for the first data block to be beyond the end
				4072	* of the filesystem.
				4073	*/
				4074	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
				4075	ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
				4076	"block %u is beyond end of filesystem (%llu)",
				4077	le32_to_cpu(es->s_first_data_block),
				4078	ext4_blocks_count(es));
				4079	goto failed_mount;
				4080	}
				4081	if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
				4082	(sbi->s_cluster_ratio == 1)) {
				4083	ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
				4084	"block is 0 with a 1k block and cluster size");
				4085	goto failed_mount;
				4086	}
				4087
				4088	blocks_count = (ext4_blocks_count(es) -
				4089	le32_to_cpu(es->s_first_data_block) +
				4090	EXT4_BLOCKS_PER_GROUP(sb) - 1);
				4091	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
				4092	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
				4093	ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
				4094	"(block count %llu, first data block %u, "
				4095	"blocks per group %lu)", blocks_count,
				4096	ext4_blocks_count(es),
				4097	le32_to_cpu(es->s_first_data_block),
				4098	EXT4_BLOCKS_PER_GROUP(sb));
				4099	goto failed_mount;
				4100	}
				4101	sbi->s_groups_count = blocks_count;
				4102	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
				4103	(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
				4104	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
				4105	le32_to_cpu(es->s_inodes_count)) {
				4106	ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
				4107	le32_to_cpu(es->s_inodes_count),
				4108	((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
				4109	ret = -EINVAL;
				4110	goto failed_mount;
				4111	}
				4112	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
				4113	EXT4_DESC_PER_BLOCK(sb);
				4114	if (ext4_has_feature_meta_bg(sb)) {
				4115	if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
				4116	ext4_msg(sb, KERN_WARNING,
				4117	"first meta block group too large: %u "
				4118	"(group descriptor block count %u)",
				4119	le32_to_cpu(es->s_first_meta_bg), db_count);
				4120	goto failed_mount;
				4121	}
				4122	}
				4123	rcu_assign_pointer(sbi->s_group_desc,
				4124	kvmalloc_array(db_count,
				4125	sizeof(struct buffer_head *),
				4126	GFP_KERNEL));
				4127	if (sbi->s_group_desc == NULL) {
				4128	ext4_msg(sb, KERN_ERR, "not enough memory");
				4129	ret = -ENOMEM;
				4130	goto failed_mount;
				4131	}
				4132
				4133	bgl_lock_init(sbi->s_blockgroup_lock);
				4134
				4135	/* Pre-read the descriptors into the buffer cache */
				4136	for (i = 0; i < db_count; i++) {
				4137	block = descriptor_loc(sb, logical_sb_block, i);
				4138	sb_breadahead_unmovable(sb, block);
				4139	}
				4140
				4141	for (i = 0; i < db_count; i++) {
				4142	struct buffer_head *bh;
				4143
				4144	block = descriptor_loc(sb, logical_sb_block, i);
				4145	bh = sb_bread_unmovable(sb, block);
				4146	if (!bh) {
				4147	ext4_msg(sb, KERN_ERR,
				4148	"can't read group descriptor %d", i);
				4149	db_count = i;
				4150	goto failed_mount2;
				4151	}
				4152	rcu_read_lock();
				4153	rcu_dereference(sbi->s_group_desc)[i] = bh;
				4154	rcu_read_unlock();
				4155	}
				4156	sbi->s_gdb_count = db_count;
				4157	if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
				4158	ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
				4159	ret = -EFSCORRUPTED;
				4160	goto failed_mount2;
				4161	}
				4162
				4163	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
				4164	spin_lock_init(&sbi->s_next_gen_lock);
				4165
				4166	setup_timer(&sbi->s_err_report, print_daily_error_info,
				4167	(unsigned long) sb);
				4168
				4169	/* Register extent status tree shrinker */
				4170	if (ext4_es_register_shrinker(sbi))
				4171	goto failed_mount3;
				4172
				4173	sbi->s_stripe = ext4_get_stripe_size(sbi);
				4174	sbi->s_extent_max_zeroout_kb = 32;
				4175
				4176	/*
				4177	* set up enough so that it can read an inode
				4178	*/
				4179	sb->s_op = &ext4_sops;
				4180	sb->s_export_op = &ext4_export_ops;
				4181	sb->s_xattr = ext4_xattr_handlers;
				4182	#ifdef CONFIG_EXT4_FS_ENCRYPTION
				4183	sb->s_cop = &ext4_cryptops;
				4184	#endif
				4185	#ifdef CONFIG_QUOTA
				4186	sb->dq_op = &ext4_quota_operations;
				4187	if (ext4_has_feature_quota(sb))
				4188	sb->s_qcop = &dquot_quotactl_sysfile_ops;
				4189	else
				4190	sb->s_qcop = &ext4_qctl_operations;
				4191	sb->s_quota_types = QTYPE_MASK_USR \| QTYPE_MASK_GRP \| QTYPE_MASK_PRJ;
				4192	#endif
				4193	memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
				4194
				4195	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
				4196	mutex_init(&sbi->s_orphan_lock);
				4197
				4198	sb->s_root = NULL;
				4199
				4200	needs_recovery = (es->s_last_orphan != 0 \|\|
				4201	ext4_has_feature_journal_needs_recovery(sb));
				4202
				4203	if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
				4204	if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
				4205	goto failed_mount3a;
				4206
				4207	/*
				4208	* The first inode we look at is the journal inode. Don't try
				4209	* root first: it may be modified in the journal!
				4210	*/
				4211	if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
				4212	err = ext4_load_journal(sb, es, journal_devnum);
				4213	if (err)
				4214	goto failed_mount3a;
				4215	} else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
				4216	ext4_has_feature_journal_needs_recovery(sb)) {
				4217	ext4_msg(sb, KERN_ERR, "required journal recovery "
				4218	"suppressed and not mounted read-only");
				4219	goto failed_mount_wq;
				4220	} else {
				4221	/* Nojournal mode, all journal mount options are illegal */
				4222	if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
				4223	ext4_msg(sb, KERN_ERR, "can't mount with "
				4224	"journal_checksum, fs mounted w/o journal");
				4225	goto failed_mount_wq;
				4226	}
				4227	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				4228	ext4_msg(sb, KERN_ERR, "can't mount with "
				4229	"journal_async_commit, fs mounted w/o journal");
				4230	goto failed_mount_wq;
				4231	}
				4232	if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
				4233	ext4_msg(sb, KERN_ERR, "can't mount with "
				4234	"commit=%lu, fs mounted w/o journal",
				4235	sbi->s_commit_interval / HZ);
				4236	goto failed_mount_wq;
				4237	}
				4238	if (EXT4_MOUNT_DATA_FLAGS &
				4239	(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
				4240	ext4_msg(sb, KERN_ERR, "can't mount with "
				4241	"data=, fs mounted w/o journal");
				4242	goto failed_mount_wq;
				4243	}
				4244	sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
				4245	clear_opt(sb, JOURNAL_CHECKSUM);
				4246	clear_opt(sb, DATA_FLAGS);
				4247	sbi->s_journal = NULL;
				4248	needs_recovery = 0;
				4249	goto no_journal;
				4250	}
				4251
				4252	if (ext4_has_feature_64bit(sb) &&
				4253	!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
				4254	JBD2_FEATURE_INCOMPAT_64BIT)) {
				4255	ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
				4256	goto failed_mount_wq;
				4257	}
				4258
				4259	if (!set_journal_csum_feature_set(sb)) {
				4260	ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
				4261	"feature set");
				4262	goto failed_mount_wq;
				4263	}
				4264
				4265	/* We have now updated the journal if required, so we can
				4266	* validate the data journaling mode. */
				4267	switch (test_opt(sb, DATA_FLAGS)) {
				4268	case 0:
				4269	/* No mode set, assume a default based on the journal
				4270	* capabilities: ORDERED_DATA if the journal can
				4271	* cope, else JOURNAL_DATA
				4272	*/
				4273	if (jbd2_journal_check_available_features
				4274	(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
				4275	set_opt(sb, ORDERED_DATA);
				4276	else
				4277	set_opt(sb, JOURNAL_DATA);
				4278	break;
				4279
				4280	case EXT4_MOUNT_ORDERED_DATA:
				4281	case EXT4_MOUNT_WRITEBACK_DATA:
				4282	if (!jbd2_journal_check_available_features
				4283	(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
				4284	ext4_msg(sb, KERN_ERR, "Journal does not support "
				4285	"requested data journaling mode");
				4286	goto failed_mount_wq;
				4287	}
				4288	default:
				4289	break;
				4290	}
				4291
				4292	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
				4293	test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				4294	ext4_msg(sb, KERN_ERR, "can't mount with "
				4295	"journal_async_commit in data=ordered mode");
				4296	goto failed_mount_wq;
				4297	}
				4298
				4299	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
				4300
				4301	sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
				4302
				4303	no_journal:
				4304	if (!test_opt(sb, NO_MBCACHE)) {
				4305	sbi->s_ea_block_cache = ext4_xattr_create_cache();
				4306	if (!sbi->s_ea_block_cache) {
				4307	ext4_msg(sb, KERN_ERR,
				4308	"Failed to create ea_block_cache");
				4309	goto failed_mount_wq;
				4310	}
				4311
				4312	if (ext4_has_feature_ea_inode(sb)) {
				4313	sbi->s_ea_inode_cache = ext4_xattr_create_cache();
				4314	if (!sbi->s_ea_inode_cache) {
				4315	ext4_msg(sb, KERN_ERR,
				4316	"Failed to create ea_inode_cache");
				4317	goto failed_mount_wq;
				4318	}
				4319	}
				4320	}
				4321
				4322	if ((DUMMY_ENCRYPTION_ENABLED(sbi) \|\| ext4_has_feature_encrypt(sb)) &&
				4323	(blocksize != PAGE_SIZE)) {
				4324	ext4_msg(sb, KERN_ERR,
				4325	"Unsupported blocksize for fs encryption");
				4326	goto failed_mount_wq;
				4327	}
				4328
				4329	if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
				4330	!ext4_has_feature_encrypt(sb)) {
				4331	ext4_set_feature_encrypt(sb);
				4332	ext4_commit_super(sb, 1);
				4333	}
				4334
				4335	/*
				4336	* Get the # of file system overhead blocks from the
				4337	* superblock if present.
				4338	*/
				4339	if (es->s_overhead_clusters)
				4340	sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
				4341	else {
				4342	err = ext4_calculate_overhead(sb);
				4343	if (err)
				4344	goto failed_mount_wq;
				4345	}
				4346
				4347	/*
				4348	* The maximum number of concurrent works can be high and
				4349	* concurrency isn't really necessary. Limit it to 1.
				4350	*/
				4351	EXT4_SB(sb)->rsv_conversion_wq =
				4352	alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM \| WQ_UNBOUND, 1);
				4353	if (!EXT4_SB(sb)->rsv_conversion_wq) {
				4354	printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
				4355	ret = -ENOMEM;
				4356	goto failed_mount4;
				4357	}
				4358
				4359	/*
				4360	* The jbd2_journal_load will have done any necessary log recovery,
				4361	* so we can safely mount the rest of the filesystem now.
				4362	*/
				4363
				4364	root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
				4365	if (IS_ERR(root)) {
				4366	ext4_msg(sb, KERN_ERR, "get root inode failed");
				4367	ret = PTR_ERR(root);
				4368	root = NULL;
				4369	goto failed_mount4;
				4370	}
				4371	if (!S_ISDIR(root->i_mode) \|\| !root->i_blocks \|\| !root->i_size) {
				4372	ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
				4373	iput(root);
				4374	goto failed_mount4;
				4375	}
				4376	sb->s_root = d_make_root(root);
				4377	if (!sb->s_root) {
				4378	ext4_msg(sb, KERN_ERR, "get root dentry failed");
				4379	ret = -ENOMEM;
				4380	goto failed_mount4;
				4381	}
				4382
				4383	if (ext4_setup_super(sb, es, sb_rdonly(sb)))
				4384	sb->s_flags \|= MS_RDONLY;
				4385
				4386	ext4_set_resv_clusters(sb);
				4387
				4388	err = ext4_setup_system_zone(sb);
				4389	if (err) {
				4390	ext4_msg(sb, KERN_ERR, "failed to initialize system "
				4391	"zone (%d)", err);
				4392	goto failed_mount4a;
				4393	}
				4394
				4395	ext4_ext_init(sb);
				4396	err = ext4_mb_init(sb);
				4397	if (err) {
				4398	ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
				4399	err);
				4400	goto failed_mount5;
				4401	}
				4402
				4403	block = ext4_count_free_clusters(sb);
				4404	ext4_free_blocks_count_set(sbi->s_es,
				4405	EXT4_C2B(sbi, block));
				4406	ext4_superblock_csum_set(sb);
				4407	err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
				4408	GFP_KERNEL);
				4409	if (!err) {
				4410	unsigned long freei = ext4_count_free_inodes(sb);
				4411	sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
				4412	ext4_superblock_csum_set(sb);
				4413	err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
				4414	GFP_KERNEL);
				4415	}
				4416	if (!err)
				4417	err = percpu_counter_init(&sbi->s_dirs_counter,
				4418	ext4_count_dirs(sb), GFP_KERNEL);
				4419	if (!err)
				4420	err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
				4421	GFP_KERNEL);
				4422	if (!err)
				4423	err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
				4424
				4425	if (err) {
				4426	ext4_msg(sb, KERN_ERR, "insufficient memory");
				4427	goto failed_mount6;
				4428	}
				4429
				4430	if (ext4_has_feature_flex_bg(sb))
				4431	if (!ext4_fill_flex_info(sb)) {
				4432	ext4_msg(sb, KERN_ERR,
				4433	"unable to initialize "
				4434	"flex_bg meta info!");
				4435	goto failed_mount6;
				4436	}
				4437
				4438	err = ext4_register_li_request(sb, first_not_zeroed);
				4439	if (err)
				4440	goto failed_mount6;
				4441
				4442	err = ext4_register_sysfs(sb);
				4443	if (err)
				4444	goto failed_mount7;
				4445
				4446	#ifdef CONFIG_QUOTA
				4447	/* Enable quota usage during mount. */
				4448	if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
				4449	err = ext4_enable_quotas(sb);
				4450	if (err)
				4451	goto failed_mount8;
				4452	}
				4453	#endif /* CONFIG_QUOTA */
				4454
				4455	EXT4_SB(sb)->s_mount_state \|= EXT4_ORPHAN_FS;
				4456	ext4_orphan_cleanup(sb, es);
				4457	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
				4458	if (needs_recovery) {
				4459	ext4_msg(sb, KERN_INFO, "recovery complete");
				4460	err = ext4_mark_recovery_complete(sb, es);
				4461	if (err)
				4462	goto failed_mount8;
				4463	}
				4464	if (EXT4_SB(sb)->s_journal) {
				4465	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
				4466	descr = " journalled data mode";
				4467	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
				4468	descr = " ordered data mode";
				4469	else
				4470	descr = " writeback data mode";
				4471	} else
				4472	descr = "out journal";
				4473
				4474	if (test_opt(sb, DISCARD)) {
				4475	struct request_queue *q = bdev_get_queue(sb->s_bdev);
				4476	if (!blk_queue_discard(q))
				4477	ext4_msg(sb, KERN_WARNING,
				4478	"mounting with \"discard\" option, but "
				4479	"the device does not support discard");
				4480	}
				4481
				4482	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
				4483	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
				4484	"Opts: %.*s%s%s", descr,
				4485	(int) sizeof(sbi->s_es->s_mount_opts),
				4486	sbi->s_es->s_mount_opts,
				4487	*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
				4488
				4489	if (es->s_error_count)
				4490	mod_timer(&sbi->s_err_report, jiffies + 300HZ); / 5 minutes */
				4491
				4492	/* Enable message ratelimiting. Default is 10 messages per 5 secs. */
				4493	ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
				4494	ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
				4495	ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
				4496
				4497	kfree(orig_data);
				4498	return 0;
				4499
				4500	cantfind_ext4:
				4501	if (!silent)
				4502	ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
				4503	goto failed_mount;
				4504
				4505	failed_mount8:
				4506	ext4_unregister_sysfs(sb);
				4507	failed_mount7:
				4508	ext4_unregister_li_request(sb);
				4509	failed_mount6:
				4510	ext4_mb_release(sb);
				4511	rcu_read_lock();
				4512	flex_groups = rcu_dereference(sbi->s_flex_groups);
				4513	if (flex_groups) {
				4514	for (i = 0; i < sbi->s_flex_groups_allocated; i++)
				4515	kvfree(flex_groups[i]);
				4516	kvfree(flex_groups);
				4517	}
				4518	rcu_read_unlock();
				4519	percpu_counter_destroy(&sbi->s_freeclusters_counter);
				4520	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				4521	percpu_counter_destroy(&sbi->s_dirs_counter);
				4522	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
				4523	percpu_free_rwsem(&sbi->s_writepages_rwsem);
				4524	failed_mount5:
				4525	ext4_ext_release(sb);
				4526	ext4_release_system_zone(sb);
				4527	failed_mount4a:
				4528	dput(sb->s_root);
				4529	sb->s_root = NULL;
				4530	failed_mount4:
				4531	ext4_msg(sb, KERN_ERR, "mount failed");
				4532	if (EXT4_SB(sb)->rsv_conversion_wq)
				4533	destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
				4534	failed_mount_wq:
				4535	if (sbi->s_ea_inode_cache) {
				4536	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
				4537	sbi->s_ea_inode_cache = NULL;
				4538	}
				4539	if (sbi->s_ea_block_cache) {
				4540	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
				4541	sbi->s_ea_block_cache = NULL;
				4542	}
				4543	if (sbi->s_journal) {
				4544	jbd2_journal_destroy(sbi->s_journal);
				4545	sbi->s_journal = NULL;
				4546	}
				4547	failed_mount3a:
				4548	ext4_es_unregister_shrinker(sbi);
				4549	failed_mount3:
				4550	del_timer_sync(&sbi->s_err_report);
				4551	if (sbi->s_mmp_tsk)
				4552	kthread_stop(sbi->s_mmp_tsk);
				4553	failed_mount2:
				4554	rcu_read_lock();
				4555	group_desc = rcu_dereference(sbi->s_group_desc);
				4556	for (i = 0; i < db_count; i++)
				4557	brelse(group_desc[i]);
				4558	kvfree(group_desc);
				4559	rcu_read_unlock();
				4560	failed_mount:
				4561	if (sbi->s_chksum_driver)
				4562	crypto_free_shash(sbi->s_chksum_driver);
				4563	#ifdef CONFIG_QUOTA
				4564	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				4565	kfree(sbi->s_qf_names[i]);
				4566	#endif
				4567	ext4_blkdev_remove(sbi);
				4568	brelse(bh);
				4569	out_fail:
				4570	sb->s_fs_info = NULL;
				4571	kfree(sbi->s_blockgroup_lock);
				4572	out_free_base:
				4573	kfree(sbi);
				4574	kfree(orig_data);
				4575	fs_put_dax(dax_dev);
				4576	return err ? err : ret;
				4577	}
				4578
				4579	/*
				4580	* Setup any per-fs journal parameters now. We'll do this both on
				4581	* initial mount, once the journal has been initialised but before we've
				4582	* done any recovery; and again on any subsequent remount.
				4583	*/
				4584	static void ext4_init_journal_params(struct super_block sb, journal_t journal)
				4585	{
				4586	struct ext4_sb_info *sbi = EXT4_SB(sb);
				4587
				4588	journal->j_commit_interval = sbi->s_commit_interval;
				4589	journal->j_min_batch_time = sbi->s_min_batch_time;
				4590	journal->j_max_batch_time = sbi->s_max_batch_time;
				4591
				4592	write_lock(&journal->j_state_lock);
				4593	if (test_opt(sb, BARRIER))
				4594	journal->j_flags \|= JBD2_BARRIER;
				4595	else
				4596	journal->j_flags &= ~JBD2_BARRIER;
				4597	if (test_opt(sb, DATA_ERR_ABORT))
				4598	journal->j_flags \|= JBD2_ABORT_ON_SYNCDATA_ERR;
				4599	else
				4600	journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
				4601	write_unlock(&journal->j_state_lock);
				4602	}
				4603
				4604	static struct inode ext4_get_journal_inode(struct super_block sb,
				4605	unsigned int journal_inum)
				4606	{
				4607	struct inode *journal_inode;
				4608
				4609	/*
				4610	* Test for the existence of a valid inode on disk. Bad things
				4611	* happen if we iget() an unused inode, as the subsequent iput()
				4612	* will try to delete it.
				4613	*/
				4614	journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
				4615	if (IS_ERR(journal_inode)) {
				4616	ext4_msg(sb, KERN_ERR, "no journal found");
				4617	return NULL;
				4618	}
				4619	if (!journal_inode->i_nlink) {
				4620	make_bad_inode(journal_inode);
				4621	iput(journal_inode);
				4622	ext4_msg(sb, KERN_ERR, "journal inode is deleted");
				4623	return NULL;
				4624	}
				4625
				4626	jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
				4627	journal_inode, journal_inode->i_size);
				4628	if (!S_ISREG(journal_inode->i_mode)) {
				4629	ext4_msg(sb, KERN_ERR, "invalid journal inode");
				4630	iput(journal_inode);
				4631	return NULL;
				4632	}
				4633	return journal_inode;
				4634	}
				4635
				4636	static journal_t ext4_get_journal(struct super_block sb,
				4637	unsigned int journal_inum)
				4638	{
				4639	struct inode *journal_inode;
				4640	journal_t *journal;
				4641
				4642	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
				4643	return NULL;
				4644
				4645	journal_inode = ext4_get_journal_inode(sb, journal_inum);
				4646	if (!journal_inode)
				4647	return NULL;
				4648
				4649	journal = jbd2_journal_init_inode(journal_inode);
				4650	if (!journal) {
				4651	ext4_msg(sb, KERN_ERR, "Could not load journal inode");
				4652	iput(journal_inode);
				4653	return NULL;
				4654	}
				4655	journal->j_private = sb;
				4656	ext4_init_journal_params(sb, journal);
				4657	return journal;
				4658	}
				4659
				4660	static journal_t ext4_get_dev_journal(struct super_block sb,
				4661	dev_t j_dev)
				4662	{
				4663	struct buffer_head *bh;
				4664	journal_t *journal;
				4665	ext4_fsblk_t start;
				4666	ext4_fsblk_t len;
				4667	int hblock, blocksize;
				4668	ext4_fsblk_t sb_block;
				4669	unsigned long offset;
				4670	struct ext4_super_block *es;
				4671	struct block_device *bdev;
				4672
				4673	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
				4674	return NULL;
				4675
				4676	bdev = ext4_blkdev_get(j_dev, sb);
				4677	if (bdev == NULL)
				4678	return NULL;
				4679
				4680	blocksize = sb->s_blocksize;
				4681	hblock = bdev_logical_block_size(bdev);
				4682	if (blocksize < hblock) {
				4683	ext4_msg(sb, KERN_ERR,
				4684	"blocksize too small for journal device");
				4685	goto out_bdev;
				4686	}
				4687
				4688	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
				4689	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
				4690	set_blocksize(bdev, blocksize);
				4691	if (!(bh = __bread(bdev, sb_block, blocksize))) {
				4692	ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
				4693	"external journal");
				4694	goto out_bdev;
				4695	}
				4696
				4697	es = (struct ext4_super_block *) (bh->b_data + offset);
				4698	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) \|\|
				4699	!(le32_to_cpu(es->s_feature_incompat) &
				4700	EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
				4701	ext4_msg(sb, KERN_ERR, "external journal has "
				4702	"bad superblock");
				4703	brelse(bh);
				4704	goto out_bdev;
				4705	}
				4706
				4707	if ((le32_to_cpu(es->s_feature_ro_compat) &
				4708	EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
				4709	es->s_checksum != ext4_superblock_csum(sb, es)) {
				4710	ext4_msg(sb, KERN_ERR, "external journal has "
				4711	"corrupt superblock");
				4712	brelse(bh);
				4713	goto out_bdev;
				4714	}
				4715
				4716	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
				4717	ext4_msg(sb, KERN_ERR, "journal UUID does not match");
				4718	brelse(bh);
				4719	goto out_bdev;
				4720	}
				4721
				4722	len = ext4_blocks_count(es);
				4723	start = sb_block + 1;
				4724	brelse(bh); /* we're done with the superblock */
				4725
				4726	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
				4727	start, len, blocksize);
				4728	if (!journal) {
				4729	ext4_msg(sb, KERN_ERR, "failed to create device journal");
				4730	goto out_bdev;
				4731	}
				4732	journal->j_private = sb;
				4733	ll_rw_block(REQ_OP_READ, REQ_META \| REQ_PRIO, 1, &journal->j_sb_buffer);
				4734	wait_on_buffer(journal->j_sb_buffer);
				4735	if (!buffer_uptodate(journal->j_sb_buffer)) {
				4736	ext4_msg(sb, KERN_ERR, "I/O error on journal device");
				4737	goto out_journal;
				4738	}
				4739	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
				4740	ext4_msg(sb, KERN_ERR, "External journal has more than one "
				4741	"user (unsupported) - %d",
				4742	be32_to_cpu(journal->j_superblock->s_nr_users));
				4743	goto out_journal;
				4744	}
				4745	EXT4_SB(sb)->journal_bdev = bdev;
				4746	ext4_init_journal_params(sb, journal);
				4747	return journal;
				4748
				4749	out_journal:
				4750	jbd2_journal_destroy(journal);
				4751	out_bdev:
				4752	ext4_blkdev_put(bdev);
				4753	return NULL;
				4754	}
				4755
				4756	static int ext4_load_journal(struct super_block *sb,
				4757	struct ext4_super_block *es,
				4758	unsigned long journal_devnum)
				4759	{
				4760	journal_t *journal;
				4761	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
				4762	dev_t journal_dev;
				4763	int err = 0;
				4764	int really_read_only;
				4765
				4766	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
				4767	return -EFSCORRUPTED;
				4768
				4769	if (journal_devnum &&
				4770	journal_devnum != le32_to_cpu(es->s_journal_dev)) {
				4771	ext4_msg(sb, KERN_INFO, "external journal device major/minor "
				4772	"numbers have changed");
				4773	journal_dev = new_decode_dev(journal_devnum);
				4774	} else
				4775	journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
				4776
				4777	really_read_only = bdev_read_only(sb->s_bdev);
				4778
				4779	/*
				4780	* Are we loading a blank journal or performing recovery after a
				4781	* crash? For recovery, we need to check in advance whether we
				4782	* can get read-write access to the device.
				4783	*/
				4784	if (ext4_has_feature_journal_needs_recovery(sb)) {
				4785	if (sb_rdonly(sb)) {
				4786	ext4_msg(sb, KERN_INFO, "INFO: recovery "
				4787	"required on readonly filesystem");
				4788	if (really_read_only) {
				4789	ext4_msg(sb, KERN_ERR, "write access "
				4790	"unavailable, cannot proceed");
				4791	return -EROFS;
				4792	}
				4793	ext4_msg(sb, KERN_INFO, "write access will "
				4794	"be enabled during recovery");
				4795	}
				4796	}
				4797
				4798	if (journal_inum && journal_dev) {
				4799	ext4_msg(sb, KERN_ERR, "filesystem has both journal "
				4800	"and inode journals!");
				4801	return -EINVAL;
				4802	}
				4803
				4804	if (journal_inum) {
				4805	if (!(journal = ext4_get_journal(sb, journal_inum)))
				4806	return -EINVAL;
				4807	} else {
				4808	if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
				4809	return -EINVAL;
				4810	}
				4811
				4812	if (!(journal->j_flags & JBD2_BARRIER))
				4813	ext4_msg(sb, KERN_INFO, "barriers disabled");
				4814
				4815	if (!ext4_has_feature_journal_needs_recovery(sb))
				4816	err = jbd2_journal_wipe(journal, !really_read_only);
				4817	if (!err) {
				4818	char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
				4819	if (save)
				4820	memcpy(save, ((char *) es) +
				4821	EXT4_S_ERR_START, EXT4_S_ERR_LEN);
				4822	err = jbd2_journal_load(journal);
				4823	if (save)
				4824	memcpy(((char *) es) + EXT4_S_ERR_START,
				4825	save, EXT4_S_ERR_LEN);
				4826	kfree(save);
				4827	}
				4828
				4829	if (err) {
				4830	ext4_msg(sb, KERN_ERR, "error loading journal");
				4831	jbd2_journal_destroy(journal);
				4832	return err;
				4833	}
				4834
				4835	EXT4_SB(sb)->s_journal = journal;
				4836	err = ext4_clear_journal_err(sb, es);
				4837	if (err) {
				4838	EXT4_SB(sb)->s_journal = NULL;
				4839	jbd2_journal_destroy(journal);
				4840	return err;
				4841	}
				4842
				4843	if (!really_read_only && journal_devnum &&
				4844	journal_devnum != le32_to_cpu(es->s_journal_dev)) {
				4845	es->s_journal_dev = cpu_to_le32(journal_devnum);
				4846
				4847	/* Make sure we flush the recovery flag to disk. */
				4848	ext4_commit_super(sb, 1);
				4849	}
				4850
				4851	return 0;
				4852	}
				4853
				4854	static int ext4_commit_super(struct super_block *sb, int sync)
				4855	{
				4856	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				4857	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
				4858	int error = 0;
				4859
				4860	if (!sbh \|\| block_device_ejected(sb))
				4861	return error;
				4862
				4863	/*
				4864	* If the file system is mounted read-only, don't update the
				4865	* superblock write time. This avoids updating the superblock
				4866	* write time when we are mounting the root file system
				4867	* read/only but we need to replay the journal; at that point,
				4868	* for people who are east of GMT and who make their clock
				4869	* tick in localtime for Windows bug-for-bug compatibility,
				4870	* the clock is set in the future, and this will cause e2fsck
				4871	* to complain and force a full file system check.
				4872	*/
				4873	if (!(sb->s_flags & MS_RDONLY))
				4874	es->s_wtime = cpu_to_le32(get_seconds());
				4875	if (sb->s_bdev->bd_part)
				4876	es->s_kbytes_written =
				4877	cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
				4878	((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
				4879	EXT4_SB(sb)->s_sectors_written_start) >> 1));
				4880	else
				4881	es->s_kbytes_written =
				4882	cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
				4883	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
				4884	ext4_free_blocks_count_set(es,
				4885	EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
				4886	&EXT4_SB(sb)->s_freeclusters_counter)));
				4887	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
				4888	es->s_free_inodes_count =
				4889	cpu_to_le32(percpu_counter_sum_positive(
				4890	&EXT4_SB(sb)->s_freeinodes_counter));
				4891	BUFFER_TRACE(sbh, "marking dirty");
				4892	ext4_superblock_csum_set(sb);
				4893	if (sync)
				4894	lock_buffer(sbh);
				4895	if (buffer_write_io_error(sbh) \|\| !buffer_uptodate(sbh)) {
				4896	/*
				4897	* Oh, dear. A previous attempt to write the
				4898	* superblock failed. This could happen because the
				4899	* USB device was yanked out. Or it could happen to
				4900	* be a transient write error and maybe the block will
				4901	* be remapped. Nothing we can do but to retry the
				4902	* write and hope for the best.
				4903	*/
				4904	ext4_msg(sb, KERN_ERR, "previous I/O error to "
				4905	"superblock detected");
				4906	clear_buffer_write_io_error(sbh);
				4907	set_buffer_uptodate(sbh);
				4908	}
				4909	mark_buffer_dirty(sbh);
				4910	if (sync) {
				4911	unlock_buffer(sbh);
				4912	error = __sync_dirty_buffer(sbh,
				4913	REQ_SYNC \| (test_opt(sb, BARRIER) ? REQ_FUA : 0));
				4914	if (error)
				4915	return error;
				4916
				4917	error = buffer_write_io_error(sbh);
				4918	if (error) {
				4919	ext4_msg(sb, KERN_ERR, "I/O error while writing "
				4920	"superblock");
				4921	clear_buffer_write_io_error(sbh);
				4922	set_buffer_uptodate(sbh);
				4923	}
				4924	}
				4925	return error;
				4926	}
				4927
				4928	/*
				4929	* Have we just finished recovery? If so, and if we are mounting (or
				4930	* remounting) the filesystem readonly, then we will end up with a
				4931	* consistent fs on disk. Record that fact.
				4932	*/
				4933	static int ext4_mark_recovery_complete(struct super_block *sb,
				4934	struct ext4_super_block *es)
				4935	{
				4936	int err;
				4937	journal_t *journal = EXT4_SB(sb)->s_journal;
				4938
				4939	if (!ext4_has_feature_journal(sb)) {
				4940	if (journal != NULL) {
				4941	ext4_error(sb, "Journal got removed while the fs was "
				4942	"mounted!");
				4943	return -EFSCORRUPTED;
				4944	}
				4945	return 0;
				4946	}
				4947	jbd2_journal_lock_updates(journal);
				4948	err = jbd2_journal_flush(journal);
				4949	if (err < 0)
				4950	goto out;
				4951
				4952	if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
				4953	ext4_clear_feature_journal_needs_recovery(sb);
				4954	ext4_commit_super(sb, 1);
				4955	}
				4956	out:
				4957	jbd2_journal_unlock_updates(journal);
				4958	return err;
				4959	}
				4960
				4961	/*
				4962	* If we are mounting (or read-write remounting) a filesystem whose journal
				4963	* has recorded an error from a previous lifetime, move that error to the
				4964	* main filesystem now.
				4965	*/
				4966	static int ext4_clear_journal_err(struct super_block *sb,
				4967	struct ext4_super_block *es)
				4968	{
				4969	journal_t *journal;
				4970	int j_errno;
				4971	const char *errstr;
				4972
				4973	if (!ext4_has_feature_journal(sb)) {
				4974	ext4_error(sb, "Journal got removed while the fs was mounted!");
				4975	return -EFSCORRUPTED;
				4976	}
				4977
				4978	journal = EXT4_SB(sb)->s_journal;
				4979
				4980	/*
				4981	* Now check for any error status which may have been recorded in the
				4982	* journal by a prior ext4_error() or ext4_abort()
				4983	*/
				4984
				4985	j_errno = jbd2_journal_errno(journal);
				4986	if (j_errno) {
				4987	char nbuf[16];
				4988
				4989	errstr = ext4_decode_error(sb, j_errno, nbuf);
				4990	ext4_warning(sb, "Filesystem error recorded "
				4991	"from previous mount: %s", errstr);
				4992	ext4_warning(sb, "Marking fs in need of filesystem check.");
				4993
				4994	EXT4_SB(sb)->s_mount_state \|= EXT4_ERROR_FS;
				4995	es->s_state \|= cpu_to_le16(EXT4_ERROR_FS);
				4996	ext4_commit_super(sb, 1);
				4997
				4998	jbd2_journal_clear_err(journal);
				4999	jbd2_journal_update_sb_errno(journal);
				5000	}
				5001	return 0;
				5002	}
				5003
				5004	/*
				5005	* Force the running and committing transactions to commit,
				5006	* and wait on the commit.
				5007	*/
				5008	int ext4_force_commit(struct super_block *sb)
				5009	{
				5010	journal_t *journal;
				5011
				5012	if (sb_rdonly(sb))
				5013	return 0;
				5014
				5015	journal = EXT4_SB(sb)->s_journal;
				5016	return ext4_journal_force_commit(journal);
				5017	}
				5018
				5019	static int ext4_sync_fs(struct super_block *sb, int wait)
				5020	{
				5021	int ret = 0;
				5022	tid_t target;
				5023	bool needs_barrier = false;
				5024	struct ext4_sb_info *sbi = EXT4_SB(sb);
				5025
				5026	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				5027	return 0;
				5028
				5029	trace_ext4_sync_fs(sb, wait);
				5030	flush_workqueue(sbi->rsv_conversion_wq);
				5031	/*
				5032	* Writeback quota in non-journalled quota case - journalled quota has
				5033	* no dirty dquots
				5034	*/
				5035	dquot_writeback_dquots(sb, -1);
				5036	/*
				5037	* Data writeback is possible w/o journal transaction, so barrier must
				5038	* being sent at the end of the function. But we can skip it if
				5039	* transaction_commit will do it for us.
				5040	*/
				5041	if (sbi->s_journal) {
				5042	target = jbd2_get_latest_transaction(sbi->s_journal);
				5043	if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
				5044	!jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
				5045	needs_barrier = true;
				5046
				5047	if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
				5048	if (wait)
				5049	ret = jbd2_log_wait_commit(sbi->s_journal,
				5050	target);
				5051	}
				5052	} else if (wait && test_opt(sb, BARRIER))
				5053	needs_barrier = true;
				5054	if (needs_barrier) {
				5055	int err;
				5056	err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
				5057	if (!ret)
				5058	ret = err;
				5059	}
				5060
				5061	return ret;
				5062	}
				5063
				5064	/*
				5065	* LVM calls this function before a (read-only) snapshot is created. This
				5066	* gives us a chance to flush the journal completely and mark the fs clean.
				5067	*
				5068	* Note that only this function cannot bring a filesystem to be in a clean
				5069	* state independently. It relies on upper layer to stop all data & metadata
				5070	* modifications.
				5071	*/
				5072	static int ext4_freeze(struct super_block *sb)
				5073	{
				5074	int error = 0;
				5075	journal_t *journal;
				5076
				5077	if (sb_rdonly(sb))
				5078	return 0;
				5079
				5080	journal = EXT4_SB(sb)->s_journal;
				5081
				5082	if (journal) {
				5083	/* Now we set up the journal barrier. */
				5084	jbd2_journal_lock_updates(journal);
				5085
				5086	/*
				5087	* Don't clear the needs_recovery flag if we failed to
				5088	* flush the journal.
				5089	*/
				5090	error = jbd2_journal_flush(journal);
				5091	if (error < 0)
				5092	goto out;
				5093
				5094	/* Journal blocked and flushed, clear needs_recovery flag. */
				5095	ext4_clear_feature_journal_needs_recovery(sb);
				5096	}
				5097
				5098	error = ext4_commit_super(sb, 1);
				5099	out:
				5100	if (journal)
				5101	/* we rely on upper layer to stop further updates */
				5102	jbd2_journal_unlock_updates(journal);
				5103	return error;
				5104	}
				5105
				5106	/*
				5107	* Called by LVM after the snapshot is done. We need to reset the RECOVER
				5108	* flag here, even though the filesystem is not technically dirty yet.
				5109	*/
				5110	static int ext4_unfreeze(struct super_block *sb)
				5111	{
				5112	if (sb_rdonly(sb) \|\| ext4_forced_shutdown(EXT4_SB(sb)))
				5113	return 0;
				5114
				5115	if (EXT4_SB(sb)->s_journal) {
				5116	/* Reset the needs_recovery flag before the fs is unlocked. */
				5117	ext4_set_feature_journal_needs_recovery(sb);
				5118	}
				5119
				5120	ext4_commit_super(sb, 1);
				5121	return 0;
				5122	}
				5123
				5124	/*
				5125	* Structure to save mount options for ext4_remount's benefit
				5126	*/
				5127	struct ext4_mount_options {
				5128	unsigned long s_mount_opt;
				5129	unsigned long s_mount_opt2;
				5130	kuid_t s_resuid;
				5131	kgid_t s_resgid;
				5132	unsigned long s_commit_interval;
				5133	u32 s_min_batch_time, s_max_batch_time;
				5134	#ifdef CONFIG_QUOTA
				5135	int s_jquota_fmt;
				5136	char *s_qf_names[EXT4_MAXQUOTAS];
				5137	#endif
				5138	};
				5139
				5140	static int ext4_remount(struct super_block sb, int flags, char *data)
				5141	{
				5142	struct ext4_super_block *es;
				5143	struct ext4_sb_info *sbi = EXT4_SB(sb);
				5144	unsigned long old_sb_flags;
				5145	struct ext4_mount_options old_opts;
				5146	int enable_quota = 0;
				5147	ext4_group_t g;
				5148	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
				5149	int err = 0;
				5150	#ifdef CONFIG_QUOTA
				5151	int i, j;
				5152	char *to_free[EXT4_MAXQUOTAS];
				5153	#endif
				5154	char *orig_data = kstrdup(data, GFP_KERNEL);
				5155
				5156	/* Store the original options */
				5157	old_sb_flags = sb->s_flags;
				5158	old_opts.s_mount_opt = sbi->s_mount_opt;
				5159	old_opts.s_mount_opt2 = sbi->s_mount_opt2;
				5160	old_opts.s_resuid = sbi->s_resuid;
				5161	old_opts.s_resgid = sbi->s_resgid;
				5162	old_opts.s_commit_interval = sbi->s_commit_interval;
				5163	old_opts.s_min_batch_time = sbi->s_min_batch_time;
				5164	old_opts.s_max_batch_time = sbi->s_max_batch_time;
				5165	#ifdef CONFIG_QUOTA
				5166	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
				5167	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				5168	if (sbi->s_qf_names[i]) {
				5169	char *qf_name = get_qf_name(sb, sbi, i);
				5170
				5171	old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
				5172	if (!old_opts.s_qf_names[i]) {
				5173	for (j = 0; j < i; j++)
				5174	kfree(old_opts.s_qf_names[j]);
				5175	kfree(orig_data);
				5176	return -ENOMEM;
				5177	}
				5178	} else
				5179	old_opts.s_qf_names[i] = NULL;
				5180	#endif
				5181	if (sbi->s_journal && sbi->s_journal->j_task->io_context)
				5182	journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
				5183
				5184	if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
				5185	err = -EINVAL;
				5186	goto restore_opts;
				5187	}
				5188
				5189	if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
				5190	test_opt(sb, JOURNAL_CHECKSUM)) {
				5191	ext4_msg(sb, KERN_ERR, "changing journal_checksum "
				5192	"during remount not supported; ignoring");
				5193	sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
				5194	}
				5195
				5196	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
				5197	if (test_opt2(sb, EXPLICIT_DELALLOC)) {
				5198	ext4_msg(sb, KERN_ERR, "can't mount with "
				5199	"both data=journal and delalloc");
				5200	err = -EINVAL;
				5201	goto restore_opts;
				5202	}
				5203	if (test_opt(sb, DIOREAD_NOLOCK)) {
				5204	ext4_msg(sb, KERN_ERR, "can't mount with "
				5205	"both data=journal and dioread_nolock");
				5206	err = -EINVAL;
				5207	goto restore_opts;
				5208	}
				5209	if (test_opt(sb, DAX)) {
				5210	ext4_msg(sb, KERN_ERR, "can't mount with "
				5211	"both data=journal and dax");
				5212	err = -EINVAL;
				5213	goto restore_opts;
				5214	}
				5215	} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
				5216	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				5217	ext4_msg(sb, KERN_ERR, "can't mount with "
				5218	"journal_async_commit in data=ordered mode");
				5219	err = -EINVAL;
				5220	goto restore_opts;
				5221	}
				5222	}
				5223
				5224	if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
				5225	ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
				5226	err = -EINVAL;
				5227	goto restore_opts;
				5228	}
				5229
				5230	if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
				5231	ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
				5232	"dax flag with busy inodes while remounting");
				5233	sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
				5234	}
				5235
				5236	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
				5237	ext4_abort(sb, "Abort forced by user");
				5238
				5239	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				5240	(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
				5241
				5242	es = sbi->s_es;
				5243
				5244	if (sbi->s_journal) {
				5245	ext4_init_journal_params(sb, sbi->s_journal);
				5246	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
				5247	}
				5248
				5249	if (*flags & MS_LAZYTIME)
				5250	sb->s_flags \|= MS_LAZYTIME;
				5251
				5252	if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) {
				5253	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
				5254	err = -EROFS;
				5255	goto restore_opts;
				5256	}
				5257
				5258	if (*flags & MS_RDONLY) {
				5259	err = sync_filesystem(sb);
				5260	if (err < 0)
				5261	goto restore_opts;
				5262	err = dquot_suspend(sb, -1);
				5263	if (err < 0)
				5264	goto restore_opts;
				5265
				5266	/*
				5267	* First of all, the unconditional stuff we have to do
				5268	* to disable replay of the journal when we next remount
				5269	*/
				5270	sb->s_flags \|= MS_RDONLY;
				5271
				5272	/*
				5273	* OK, test if we are remounting a valid rw partition
				5274	* readonly, and if so set the rdonly flag and then
				5275	* mark the partition as valid again.
				5276	*/
				5277	if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
				5278	(sbi->s_mount_state & EXT4_VALID_FS))
				5279	es->s_state = cpu_to_le16(sbi->s_mount_state);
				5280
				5281	if (sbi->s_journal) {
				5282	/*
				5283	* We let remount-ro finish even if marking fs
				5284	* as clean failed...
				5285	*/
				5286	ext4_mark_recovery_complete(sb, es);
				5287	}
				5288	if (sbi->s_mmp_tsk)
				5289	kthread_stop(sbi->s_mmp_tsk);
				5290	} else {
				5291	/* Make sure we can mount this feature set readwrite */
				5292	if (ext4_has_feature_readonly(sb) \|\|
				5293	!ext4_feature_set_ok(sb, 0)) {
				5294	err = -EROFS;
				5295	goto restore_opts;
				5296	}
				5297	/*
				5298	* Make sure the group descriptor checksums
				5299	* are sane. If they aren't, refuse to remount r/w.
				5300	*/
				5301	for (g = 0; g < sbi->s_groups_count; g++) {
				5302	struct ext4_group_desc *gdp =
				5303	ext4_get_group_desc(sb, g, NULL);
				5304
				5305	if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
				5306	ext4_msg(sb, KERN_ERR,
				5307	"ext4_remount: Checksum for group %u failed (%u!=%u)",
				5308	g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
				5309	le16_to_cpu(gdp->bg_checksum));
				5310	err = -EFSBADCRC;
				5311	goto restore_opts;
				5312	}
				5313	}
				5314
				5315	/*
				5316	* If we have an unprocessed orphan list hanging
				5317	* around from a previously readonly bdev mount,
				5318	* require a full umount/remount for now.
				5319	*/
				5320	if (es->s_last_orphan) {
				5321	ext4_msg(sb, KERN_WARNING, "Couldn't "
				5322	"remount RDWR because of unprocessed "
				5323	"orphan inode list. Please "
				5324	"umount/remount instead");
				5325	err = -EINVAL;
				5326	goto restore_opts;
				5327	}
				5328
				5329	/*
				5330	* Mounting a RDONLY partition read-write, so reread
				5331	* and store the current valid flag. (It may have
				5332	* been changed by e2fsck since we originally mounted
				5333	* the partition.)
				5334	*/
				5335	if (sbi->s_journal) {
				5336	err = ext4_clear_journal_err(sb, es);
				5337	if (err)
				5338	goto restore_opts;
				5339	}
				5340	sbi->s_mount_state = le16_to_cpu(es->s_state);
				5341	if (!ext4_setup_super(sb, es, 0))
				5342	sb->s_flags &= ~MS_RDONLY;
				5343	if (ext4_has_feature_mmp(sb))
				5344	if (ext4_multi_mount_protect(sb,
				5345	le64_to_cpu(es->s_mmp_block))) {
				5346	err = -EROFS;
				5347	goto restore_opts;
				5348	}
				5349	enable_quota = 1;
				5350	}
				5351	}
				5352
				5353	/*
				5354	* Reinitialize lazy itable initialization thread based on
				5355	* current settings
				5356	*/
				5357	if (sb_rdonly(sb) \|\| !test_opt(sb, INIT_INODE_TABLE))
				5358	ext4_unregister_li_request(sb);
				5359	else {
				5360	ext4_group_t first_not_zeroed;
				5361	first_not_zeroed = ext4_has_uninit_itable(sb);
				5362	ext4_register_li_request(sb, first_not_zeroed);
				5363	}
				5364
				5365	ext4_setup_system_zone(sb);
				5366	if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
				5367	ext4_commit_super(sb, 1);
				5368
				5369	#ifdef CONFIG_QUOTA
				5370	/* Release old quota file names */
				5371	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				5372	kfree(old_opts.s_qf_names[i]);
				5373	if (enable_quota) {
				5374	if (sb_any_quota_suspended(sb))
				5375	dquot_resume(sb, -1);
				5376	else if (ext4_has_feature_quota(sb)) {
				5377	err = ext4_enable_quotas(sb);
				5378	if (err)
				5379	goto restore_opts;
				5380	}
				5381	}
				5382	#endif
				5383
				5384	flags = (flags & ~MS_LAZYTIME) \| (sb->s_flags & MS_LAZYTIME);
				5385	ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
				5386	kfree(orig_data);
				5387	return 0;
				5388
				5389	restore_opts:
				5390	sb->s_flags = old_sb_flags;
				5391	sbi->s_mount_opt = old_opts.s_mount_opt;
				5392	sbi->s_mount_opt2 = old_opts.s_mount_opt2;
				5393	sbi->s_resuid = old_opts.s_resuid;
				5394	sbi->s_resgid = old_opts.s_resgid;
				5395	sbi->s_commit_interval = old_opts.s_commit_interval;
				5396	sbi->s_min_batch_time = old_opts.s_min_batch_time;
				5397	sbi->s_max_batch_time = old_opts.s_max_batch_time;
				5398	#ifdef CONFIG_QUOTA
				5399	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
				5400	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
				5401	to_free[i] = get_qf_name(sb, sbi, i);
				5402	rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
				5403	}
				5404	synchronize_rcu();
				5405	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				5406	kfree(to_free[i]);
				5407	#endif
				5408	kfree(orig_data);
				5409	return err;
				5410	}
				5411
				5412	#ifdef CONFIG_QUOTA
				5413	static int ext4_statfs_project(struct super_block *sb,
				5414	kprojid_t projid, struct kstatfs *buf)
				5415	{
				5416	struct kqid qid;
				5417	struct dquot *dquot;
				5418	u64 limit;
				5419	u64 curblock;
				5420
				5421	qid = make_kqid_projid(projid);
				5422	dquot = dqget(sb, qid);
				5423	if (IS_ERR(dquot))
				5424	return PTR_ERR(dquot);
				5425	spin_lock(&dquot->dq_dqb_lock);
				5426
				5427	limit = (dquot->dq_dqb.dqb_bsoftlimit ?
				5428	dquot->dq_dqb.dqb_bsoftlimit :
				5429	dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
				5430	if (limit && buf->f_blocks > limit) {
				5431	curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
				5432	buf->f_blocks = limit;
				5433	buf->f_bfree = buf->f_bavail =
				5434	(buf->f_blocks > curblock) ?
				5435	(buf->f_blocks - curblock) : 0;
				5436	}
				5437
				5438	limit = dquot->dq_dqb.dqb_isoftlimit ?
				5439	dquot->dq_dqb.dqb_isoftlimit :
				5440	dquot->dq_dqb.dqb_ihardlimit;
				5441	if (limit && buf->f_files > limit) {
				5442	buf->f_files = limit;
				5443	buf->f_ffree =
				5444	(buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
				5445	(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
				5446	}
				5447
				5448	spin_unlock(&dquot->dq_dqb_lock);
				5449	dqput(dquot);
				5450	return 0;
				5451	}
				5452	#endif
				5453
				5454	static int ext4_statfs(struct dentry dentry, struct kstatfs buf)
				5455	{
				5456	struct super_block *sb = dentry->d_sb;
				5457	struct ext4_sb_info *sbi = EXT4_SB(sb);
				5458	struct ext4_super_block *es = sbi->s_es;
				5459	ext4_fsblk_t overhead = 0, resv_blocks;
				5460	u64 fsid;
				5461	s64 bfree;
				5462	resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
				5463
				5464	if (!test_opt(sb, MINIX_DF))
				5465	overhead = sbi->s_overhead;
				5466
				5467	buf->f_type = EXT4_SUPER_MAGIC;
				5468	buf->f_bsize = sb->s_blocksize;
				5469	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
				5470	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
				5471	percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
				5472	/* prevent underflow in case that few free space is available */
				5473	buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
				5474	buf->f_bavail = buf->f_bfree -
				5475	(ext4_r_blocks_count(es) + resv_blocks);
				5476	if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
				5477	buf->f_bavail = 0;
				5478	buf->f_files = le32_to_cpu(es->s_inodes_count);
				5479	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
				5480	buf->f_namelen = EXT4_NAME_LEN;
				5481	fsid = le64_to_cpup((void *)es->s_uuid) ^
				5482	le64_to_cpup((void *)es->s_uuid + sizeof(u64));
				5483	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
				5484	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
				5485
				5486	#ifdef CONFIG_QUOTA
				5487	if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
				5488	sb_has_quota_limits_enabled(sb, PRJQUOTA))
				5489	ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
				5490	#endif
				5491	return 0;
				5492	}
				5493
				5494
				5495	#ifdef CONFIG_QUOTA
				5496
				5497	/*
				5498	* Helper functions so that transaction is started before we acquire dqio_sem
				5499	* to keep correct lock ordering of transaction > dqio_sem
				5500	*/
				5501	static inline struct inode dquot_to_inode(struct dquot dquot)
				5502	{
				5503	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
				5504	}
				5505
				5506	static int ext4_write_dquot(struct dquot *dquot)
				5507	{
				5508	int ret, err;
				5509	handle_t *handle;
				5510	struct inode *inode;
				5511
				5512	inode = dquot_to_inode(dquot);
				5513	handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
				5514	EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
				5515	if (IS_ERR(handle))
				5516	return PTR_ERR(handle);
				5517	ret = dquot_commit(dquot);
				5518	err = ext4_journal_stop(handle);
				5519	if (!ret)
				5520	ret = err;
				5521	return ret;
				5522	}
				5523
				5524	static int ext4_acquire_dquot(struct dquot *dquot)
				5525	{
				5526	int ret, err;
				5527	handle_t *handle;
				5528
				5529	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
				5530	EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
				5531	if (IS_ERR(handle))
				5532	return PTR_ERR(handle);
				5533	ret = dquot_acquire(dquot);
				5534	err = ext4_journal_stop(handle);
				5535	if (!ret)
				5536	ret = err;
				5537	return ret;
				5538	}
				5539
				5540	static int ext4_release_dquot(struct dquot *dquot)
				5541	{
				5542	int ret, err;
				5543	handle_t *handle;
				5544
				5545	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
				5546	EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
				5547	if (IS_ERR(handle)) {
				5548	/* Release dquot anyway to avoid endless cycle in dqput() */
				5549	dquot_release(dquot);
				5550	return PTR_ERR(handle);
				5551	}
				5552	ret = dquot_release(dquot);
				5553	err = ext4_journal_stop(handle);
				5554	if (!ret)
				5555	ret = err;
				5556	return ret;
				5557	}
				5558
				5559	static int ext4_mark_dquot_dirty(struct dquot *dquot)
				5560	{
				5561	struct super_block *sb = dquot->dq_sb;
				5562	struct ext4_sb_info *sbi = EXT4_SB(sb);
				5563
				5564	/* Are we journaling quotas? */
				5565	if (ext4_has_feature_quota(sb) \|\|
				5566	sbi->s_qf_names[USRQUOTA] \|\| sbi->s_qf_names[GRPQUOTA]) {
				5567	dquot_mark_dquot_dirty(dquot);
				5568	return ext4_write_dquot(dquot);
				5569	} else {
				5570	return dquot_mark_dquot_dirty(dquot);
				5571	}
				5572	}
				5573
				5574	static int ext4_write_info(struct super_block *sb, int type)
				5575	{
				5576	int ret, err;
				5577	handle_t *handle;
				5578
				5579	/* Data block + inode block */
				5580	handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
				5581	if (IS_ERR(handle))
				5582	return PTR_ERR(handle);
				5583	ret = dquot_commit_info(sb, type);
				5584	err = ext4_journal_stop(handle);
				5585	if (!ret)
				5586	ret = err;
				5587	return ret;
				5588	}
				5589
				5590	/*
				5591	* Turn on quotas during mount time - we need to find
				5592	* the quota file and such...
				5593	*/
				5594	static int ext4_quota_on_mount(struct super_block *sb, int type)
				5595	{
				5596	return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
				5597	EXT4_SB(sb)->s_jquota_fmt, type);
				5598	}
				5599
				5600	static void lockdep_set_quota_inode(struct inode *inode, int subclass)
				5601	{
				5602	struct ext4_inode_info *ei = EXT4_I(inode);
				5603
				5604	/* The first argument of lockdep_set_subclass has to be
				5605	* exactly the same as the argument to init_rwsem() --- in
				5606	* this case, in init_once() --- or lockdep gets unhappy
				5607	* because the name of the lock is set using the
				5608	* stringification of the argument to init_rwsem().
				5609	*/
				5610	(void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
				5611	lockdep_set_subclass(&ei->i_data_sem, subclass);
				5612	}
				5613
				5614	/*
				5615	* Standard function to be called on quota_on
				5616	*/
				5617	static int ext4_quota_on(struct super_block *sb, int type, int format_id,
				5618	const struct path *path)
				5619	{
				5620	int err;
				5621
				5622	if (!test_opt(sb, QUOTA))
				5623	return -EINVAL;
				5624
				5625	/* Quotafile not on the same filesystem? */
				5626	if (path->dentry->d_sb != sb)
				5627	return -EXDEV;
				5628	/* Journaling quota? */
				5629	if (EXT4_SB(sb)->s_qf_names[type]) {
				5630	/* Quotafile not in fs root? */
				5631	if (path->dentry->d_parent != sb->s_root)
				5632	ext4_msg(sb, KERN_WARNING,
				5633	"Quota file not on filesystem root. "
				5634	"Journaled quota will not work");
				5635	sb_dqopt(sb)->flags \|= DQUOT_NOLIST_DIRTY;
				5636	} else {
				5637	/*
				5638	* Clear the flag just in case mount options changed since
				5639	* last time.
				5640	*/
				5641	sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
				5642	}
				5643
				5644	/*
				5645	* When we journal data on quota file, we have to flush journal to see
				5646	* all updates to the file when we bypass pagecache...
				5647	*/
				5648	if (EXT4_SB(sb)->s_journal &&
				5649	ext4_should_journal_data(d_inode(path->dentry))) {
				5650	/*
				5651	* We don't need to lock updates but journal_flush() could
				5652	* otherwise be livelocked...
				5653	*/
				5654	jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
				5655	err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
				5656	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
				5657	if (err)
				5658	return err;
				5659	}
				5660
				5661	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
				5662	err = dquot_quota_on(sb, type, format_id, path);
				5663	if (err) {
				5664	lockdep_set_quota_inode(path->dentry->d_inode,
				5665	I_DATA_SEM_NORMAL);
				5666	} else {
				5667	struct inode *inode = d_inode(path->dentry);
				5668	handle_t *handle;
				5669
				5670	/*
				5671	* Set inode flags to prevent userspace from messing with quota
				5672	* files. If this fails, we return success anyway since quotas
				5673	* are already enabled and this is not a hard failure.
				5674	*/
				5675	inode_lock(inode);
				5676	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
				5677	if (IS_ERR(handle))
				5678	goto unlock_inode;
				5679	EXT4_I(inode)->i_flags \|= EXT4_NOATIME_FL \| EXT4_IMMUTABLE_FL;
				5680	inode_set_flags(inode, S_NOATIME \| S_IMMUTABLE,
				5681	S_NOATIME \| S_IMMUTABLE);
				5682	ext4_mark_inode_dirty(handle, inode);
				5683	ext4_journal_stop(handle);
				5684	unlock_inode:
				5685	inode_unlock(inode);
				5686	}
				5687	return err;
				5688	}
				5689
				5690	static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
				5691	unsigned int flags)
				5692	{
				5693	int err;
				5694	struct inode *qf_inode;
				5695	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
				5696	le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
				5697	le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
				5698	le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
				5699	};
				5700
				5701	BUG_ON(!ext4_has_feature_quota(sb));
				5702
				5703	if (!qf_inums[type])
				5704	return -EPERM;
				5705
				5706	qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
				5707	if (IS_ERR(qf_inode)) {
				5708	ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
				5709	return PTR_ERR(qf_inode);
				5710	}
				5711
				5712	/* Don't account quota for quota files to avoid recursion */
				5713	qf_inode->i_flags \|= S_NOQUOTA;
				5714	lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
				5715	err = dquot_enable(qf_inode, type, format_id, flags);
				5716	if (err)
				5717	lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
				5718	iput(qf_inode);
				5719
				5720	return err;
				5721	}
				5722
				5723	/* Enable usage tracking for all quota types. */
				5724	static int ext4_enable_quotas(struct super_block *sb)
				5725	{
				5726	int type, err = 0;
				5727	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
				5728	le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
				5729	le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
				5730	le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
				5731	};
				5732	bool quota_mopt[EXT4_MAXQUOTAS] = {
				5733	test_opt(sb, USRQUOTA),
				5734	test_opt(sb, GRPQUOTA),
				5735	test_opt(sb, PRJQUOTA),
				5736	};
				5737
				5738	sb_dqopt(sb)->flags \|= DQUOT_QUOTA_SYS_FILE \| DQUOT_NOLIST_DIRTY;
				5739	for (type = 0; type < EXT4_MAXQUOTAS; type++) {
				5740	if (qf_inums[type]) {
				5741	err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
				5742	DQUOT_USAGE_ENABLED \|
				5743	(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
				5744	if (err) {
				5745	for (type--; type >= 0; type--)
				5746	dquot_quota_off(sb, type);
				5747
				5748	ext4_warning(sb,
				5749	"Failed to enable quota tracking "
				5750	"(type=%d, err=%d). Please run "
				5751	"e2fsck to fix.", type, err);
				5752	return err;
				5753	}
				5754	}
				5755	}
				5756	return 0;
				5757	}
				5758
				5759	static int ext4_quota_off(struct super_block *sb, int type)
				5760	{
				5761	struct inode *inode = sb_dqopt(sb)->files[type];
				5762	handle_t *handle;
				5763	int err;
				5764
				5765	/* Force all delayed allocation blocks to be allocated.
				5766	* Caller already holds s_umount sem */
				5767	if (test_opt(sb, DELALLOC))
				5768	sync_filesystem(sb);
				5769
				5770	if (!inode \|\| !igrab(inode))
				5771	goto out;
				5772
				5773	err = dquot_quota_off(sb, type);
				5774	if (err \|\| ext4_has_feature_quota(sb))
				5775	goto out_put;
				5776
				5777	inode_lock(inode);
				5778	/*
				5779	* Update modification times of quota files when userspace can
				5780	* start looking at them. If we fail, we return success anyway since
				5781	* this is not a hard failure and quotas are already disabled.
				5782	*/
				5783	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
				5784	if (IS_ERR(handle))
				5785	goto out_unlock;
				5786	EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL \| EXT4_IMMUTABLE_FL);
				5787	inode_set_flags(inode, 0, S_NOATIME \| S_IMMUTABLE);
				5788	inode->i_mtime = inode->i_ctime = current_time(inode);
				5789	ext4_mark_inode_dirty(handle, inode);
				5790	ext4_journal_stop(handle);
				5791	out_unlock:
				5792	inode_unlock(inode);
				5793	out_put:
				5794	lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
				5795	iput(inode);
				5796	return err;
				5797	out:
				5798	return dquot_quota_off(sb, type);
				5799	}
				5800
				5801	/* Read data from quotafile - avoid pagecache and such because we cannot afford
				5802	* acquiring the locks... As quota files are never truncated and quota code
				5803	* itself serializes the operations (and no one else should touch the files)
				5804	* we don't have to be afraid of races */
				5805	static ssize_t ext4_quota_read(struct super_block sb, int type, char data,
				5806	size_t len, loff_t off)
				5807	{
				5808	struct inode *inode = sb_dqopt(sb)->files[type];
				5809	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
				5810	int offset = off & (sb->s_blocksize - 1);
				5811	int tocopy;
				5812	size_t toread;
				5813	struct buffer_head *bh;
				5814	loff_t i_size = i_size_read(inode);
				5815
				5816	if (off > i_size)
				5817	return 0;
				5818	if (off+len > i_size)
				5819	len = i_size-off;
				5820	toread = len;
				5821	while (toread > 0) {
				5822	tocopy = sb->s_blocksize - offset < toread ?
				5823	sb->s_blocksize - offset : toread;
				5824	bh = ext4_bread(NULL, inode, blk, 0);
				5825	if (IS_ERR(bh))
				5826	return PTR_ERR(bh);
				5827	if (!bh) /* A hole? */
				5828	memset(data, 0, tocopy);
				5829	else
				5830	memcpy(data, bh->b_data+offset, tocopy);
				5831	brelse(bh);
				5832	offset = 0;
				5833	toread -= tocopy;
				5834	data += tocopy;
				5835	blk++;
				5836	}
				5837	return len;
				5838	}
				5839
				5840	/* Write to quotafile (we know the transaction is already started and has
				5841	* enough credits) */
				5842	static ssize_t ext4_quota_write(struct super_block *sb, int type,
				5843	const char *data, size_t len, loff_t off)
				5844	{
				5845	struct inode *inode = sb_dqopt(sb)->files[type];
				5846	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
				5847	int err, offset = off & (sb->s_blocksize - 1);
				5848	int retries = 0;
				5849	struct buffer_head *bh;
				5850	handle_t *handle = journal_current_handle();
				5851
				5852	if (EXT4_SB(sb)->s_journal && !handle) {
				5853	ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
				5854	" cancelled because transaction is not started",
				5855	(unsigned long long)off, (unsigned long long)len);
				5856	return -EIO;
				5857	}
				5858	/*
				5859	* Since we account only one data block in transaction credits,
				5860	* then it is impossible to cross a block boundary.
				5861	*/
				5862	if (sb->s_blocksize - offset < len) {
				5863	ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
				5864	" cancelled because not block aligned",
				5865	(unsigned long long)off, (unsigned long long)len);
				5866	return -EIO;
				5867	}
				5868
				5869	do {
				5870	bh = ext4_bread(handle, inode, blk,
				5871	EXT4_GET_BLOCKS_CREATE \|
				5872	EXT4_GET_BLOCKS_METADATA_NOFAIL);
				5873	} while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
				5874	ext4_should_retry_alloc(inode->i_sb, &retries));
				5875	if (IS_ERR(bh))
				5876	return PTR_ERR(bh);
				5877	if (!bh)
				5878	goto out;
				5879	BUFFER_TRACE(bh, "get write access");
				5880	err = ext4_journal_get_write_access(handle, bh);
				5881	if (err) {
				5882	brelse(bh);
				5883	return err;
				5884	}
				5885	lock_buffer(bh);
				5886	memcpy(bh->b_data+offset, data, len);
				5887	flush_dcache_page(bh->b_page);
				5888	unlock_buffer(bh);
				5889	err = ext4_handle_dirty_metadata(handle, NULL, bh);
				5890	brelse(bh);
				5891	out:
				5892	if (inode->i_size < off + len) {
				5893	i_size_write(inode, off + len);
				5894	EXT4_I(inode)->i_disksize = inode->i_size;
				5895	ext4_mark_inode_dirty(handle, inode);
				5896	}
				5897	return len;
				5898	}
				5899
				5900	static int ext4_get_next_id(struct super_block sb, struct kqid qid)
				5901	{
				5902	const struct quota_format_ops *ops;
				5903
				5904	if (!sb_has_quota_loaded(sb, qid->type))
				5905	return -ESRCH;
				5906	ops = sb_dqopt(sb)->ops[qid->type];
				5907	if (!ops \|\| !ops->get_next_id)
				5908	return -ENOSYS;
				5909	return dquot_get_next_id(sb, qid);
				5910	}
				5911	#endif
				5912
				5913	static struct dentry ext4_mount(struct file_system_type fs_type, int flags,
				5914	const char dev_name, void data)
				5915	{
				5916	return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
				5917	}
				5918
				5919	#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
				5920	static inline void register_as_ext2(void)
				5921	{
				5922	int err = register_filesystem(&ext2_fs_type);
				5923	if (err)
				5924	printk(KERN_WARNING
				5925	"EXT4-fs: Unable to register as ext2 (%d)\n", err);
				5926	}
				5927
				5928	static inline void unregister_as_ext2(void)
				5929	{
				5930	unregister_filesystem(&ext2_fs_type);
				5931	}
				5932
				5933	static inline int ext2_feature_set_ok(struct super_block *sb)
				5934	{
				5935	if (ext4_has_unknown_ext2_incompat_features(sb))
				5936	return 0;
				5937	if (sb_rdonly(sb))
				5938	return 1;
				5939	if (ext4_has_unknown_ext2_ro_compat_features(sb))
				5940	return 0;
				5941	return 1;
				5942	}
				5943	#else
				5944	static inline void register_as_ext2(void) { }
				5945	static inline void unregister_as_ext2(void) { }
				5946	static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
				5947	#endif
				5948
				5949	static inline void register_as_ext3(void)
				5950	{
				5951	int err = register_filesystem(&ext3_fs_type);
				5952	if (err)
				5953	printk(KERN_WARNING
				5954	"EXT4-fs: Unable to register as ext3 (%d)\n", err);
				5955	}
				5956
				5957	static inline void unregister_as_ext3(void)
				5958	{
				5959	unregister_filesystem(&ext3_fs_type);
				5960	}
				5961
				5962	static inline int ext3_feature_set_ok(struct super_block *sb)
				5963	{
				5964	if (ext4_has_unknown_ext3_incompat_features(sb))
				5965	return 0;
				5966	if (!ext4_has_feature_journal(sb))
				5967	return 0;
				5968	if (sb_rdonly(sb))
				5969	return 1;
				5970	if (ext4_has_unknown_ext3_ro_compat_features(sb))
				5971	return 0;
				5972	return 1;
				5973	}
				5974
				5975	static struct file_system_type ext4_fs_type = {
				5976	.owner = THIS_MODULE,
				5977	.name = "ext4",
				5978	.mount = ext4_mount,
				5979	.kill_sb = kill_block_super,
				5980	.fs_flags = FS_REQUIRES_DEV,
				5981	};
				5982	MODULE_ALIAS_FS("ext4");
				5983
				5984	/* Shared across all ext4 file systems */
				5985	wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
				5986
				5987	static int __init ext4_init_fs(void)
				5988	{
				5989	int i, err;
				5990
				5991	ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
				5992	ext4_li_info = NULL;
				5993	mutex_init(&ext4_li_mtx);
				5994
				5995	/* Build-time check for flags consistency */
				5996	ext4_check_flag_values();
				5997
				5998	for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
				5999	init_waitqueue_head(&ext4__ioend_wq[i]);
				6000
				6001	err = ext4_init_es();
				6002	if (err)
				6003	return err;
				6004
				6005	err = ext4_init_pageio();
				6006	if (err)
				6007	goto out5;
				6008
				6009	err = ext4_init_system_zone();
				6010	if (err)
				6011	goto out4;
				6012
				6013	err = ext4_init_sysfs();
				6014	if (err)
				6015	goto out3;
				6016
				6017	err = ext4_init_mballoc();
				6018	if (err)
				6019	goto out2;
				6020	err = init_inodecache();
				6021	if (err)
				6022	goto out1;
				6023	register_as_ext3();
				6024	register_as_ext2();
				6025	err = register_filesystem(&ext4_fs_type);
				6026	if (err)
				6027	goto out;
				6028
				6029	return 0;
				6030	out:
				6031	unregister_as_ext2();
				6032	unregister_as_ext3();
				6033	destroy_inodecache();
				6034	out1:
				6035	ext4_exit_mballoc();
				6036	out2:
				6037	ext4_exit_sysfs();
				6038	out3:
				6039	ext4_exit_system_zone();
				6040	out4:
				6041	ext4_exit_pageio();
				6042	out5:
				6043	ext4_exit_es();
				6044
				6045	return err;
				6046	}
				6047
				6048	static void __exit ext4_exit_fs(void)
				6049	{
				6050	ext4_destroy_lazyinit_thread();
				6051	unregister_as_ext2();
				6052	unregister_as_ext3();
				6053	unregister_filesystem(&ext4_fs_type);
				6054	destroy_inodecache();
				6055	ext4_exit_mballoc();
				6056	ext4_exit_sysfs();
				6057	ext4_exit_system_zone();
				6058	ext4_exit_pageio();
				6059	ext4_exit_es();
				6060	}
				6061
				6062	MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
				6063	MODULE_DESCRIPTION("Fourth Extended Filesystem");
				6064	MODULE_LICENSE("GPL");
				6065	MODULE_SOFTDEP("pre: crc32c");
				6066	module_init(ext4_init_fs)
				6067	module_exit(ext4_exit_fs)