Blame - ap/os/linux/linux-3.4.x/fs/ext4/super.c - T106_DC

blob: 2e26a542c8189470784ce28c64ab052fa9855ba1 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* linux/fs/ext4/super.c
				3	*
				4	* Copyright (C) 1992, 1993, 1994, 1995
				5	* Remy Card (card@masi.ibp.fr)
				6	* Laboratoire MASI - Institut Blaise Pascal
				7	* Universite Pierre et Marie Curie (Paris VI)
				8	*
				9	* from
				10	*
				11	* linux/fs/minix/inode.c
				12	*
				13	* Copyright (C) 1991, 1992 Linus Torvalds
				14	*
				15	* Big-endian to little-endian byte-swapping/bitmaps by
				16	* David S. Miller (davem@caip.rutgers.edu), 1995
				17	*/
				18
				19	#include <linux/module.h>
				20	#include <linux/string.h>
				21	#include <linux/fs.h>
				22	#include <linux/time.h>
				23	#include <linux/vmalloc.h>
				24	#include <linux/jbd2.h>
				25	#include <linux/slab.h>
				26	#include <linux/init.h>
				27	#include <linux/blkdev.h>
				28	#include <linux/parser.h>
				29	#include <linux/buffer_head.h>
				30	#include <linux/exportfs.h>
				31	#include <linux/vfs.h>
				32	#include <linux/random.h>
				33	#include <linux/mount.h>
				34	#include <linux/namei.h>
				35	#include <linux/quotaops.h>
				36	#include <linux/seq_file.h>
				37	#include <linux/proc_fs.h>
				38	#include <linux/ctype.h>
				39	#include <linux/log2.h>
				40	#include <linux/crc16.h>
				41	#include <linux/cleancache.h>
				42	#include <asm/uaccess.h>
				43
				44	#include <linux/kthread.h>
				45	#include <linux/freezer.h>
				46
				47	#include "ext4.h"
				48	#include "ext4_extents.h"
				49	#include "ext4_jbd2.h"
				50	#include "xattr.h"
				51	#include "acl.h"
				52	#include "mballoc.h"
				53
				54	#define CREATE_TRACE_POINTS
				55	#include <trace/events/ext4.h>
				56
				57	static struct proc_dir_entry *ext4_proc_root;
				58	static struct kset *ext4_kset;
				59	static struct ext4_lazy_init *ext4_li_info;
				60	static struct mutex ext4_li_mtx;
				61	static struct ext4_features *ext4_feat;
				62
				63	static int ext4_load_journal(struct super_block , struct ext4_super_block ,
				64	unsigned long journal_devnum);
				65	static int ext4_show_options(struct seq_file seq, struct dentry root);
				66	static int ext4_commit_super(struct super_block *sb, int sync);
				67	static void ext4_mark_recovery_complete(struct super_block *sb,
				68	struct ext4_super_block *es);
				69	static void ext4_clear_journal_err(struct super_block *sb,
				70	struct ext4_super_block *es);
				71	static int ext4_sync_fs(struct super_block *sb, int wait);
				72	static const char ext4_decode_error(struct super_block sb, int errno,
				73	char nbuf[16]);
				74	static int ext4_remount(struct super_block sb, int flags, char *data);
				75	static int ext4_statfs(struct dentry dentry, struct kstatfs buf);
				76	static int ext4_unfreeze(struct super_block *sb);
				77	static void ext4_write_super(struct super_block *sb);
				78	static int ext4_freeze(struct super_block *sb);
				79	static struct dentry ext4_mount(struct file_system_type fs_type, int flags,
				80	const char dev_name, void data);
				81	static inline int ext2_feature_set_ok(struct super_block *sb);
				82	static inline int ext3_feature_set_ok(struct super_block *sb);
				83	static int ext4_feature_set_ok(struct super_block *sb, int readonly);
				84	static void ext4_destroy_lazyinit_thread(void);
				85	static void ext4_unregister_li_request(struct super_block *sb);
				86	static void ext4_clear_request_list(void);
				87
				88	#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
				89	static struct file_system_type ext2_fs_type = {
				90	.owner = THIS_MODULE,
				91	.name = "ext2",
				92	.mount = ext4_mount,
				93	.kill_sb = kill_block_super,
				94	.fs_flags = FS_REQUIRES_DEV,
				95	};
				96	#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
				97	#else
				98	#define IS_EXT2_SB(sb) (0)
				99	#endif
				100
				101
				102	#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
				103	static struct file_system_type ext3_fs_type = {
				104	.owner = THIS_MODULE,
				105	.name = "ext3",
				106	.mount = ext4_mount,
				107	.kill_sb = kill_block_super,
				108	.fs_flags = FS_REQUIRES_DEV,
				109	};
				110	#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
				111	#else
				112	#define IS_EXT3_SB(sb) (0)
				113	#endif
				114
				115	void *ext4_kvmalloc(size_t size, gfp_t flags)
				116	{
				117	void *ret;
				118
				119	ret = kmalloc(size, flags);
				120	if (!ret)
				121	ret = __vmalloc(size, flags, PAGE_KERNEL);
				122	return ret;
				123	}
				124
				125	void *ext4_kvzalloc(size_t size, gfp_t flags)
				126	{
				127	void *ret;
				128
				129	ret = kzalloc(size, flags);
				130	if (!ret)
				131	ret = __vmalloc(size, flags \| __GFP_ZERO, PAGE_KERNEL);
				132	return ret;
				133	}
				134
				135	void ext4_kvfree(void *ptr)
				136	{
				137	if (is_vmalloc_addr(ptr))
				138	vfree(ptr);
				139	else
				140	kfree(ptr);
				141
				142	}
				143
				144	ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
				145	struct ext4_group_desc *bg)
				146	{
				147	return le32_to_cpu(bg->bg_block_bitmap_lo) \|
				148	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				149	(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
				150	}
				151
				152	ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
				153	struct ext4_group_desc *bg)
				154	{
				155	return le32_to_cpu(bg->bg_inode_bitmap_lo) \|
				156	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				157	(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
				158	}
				159
				160	ext4_fsblk_t ext4_inode_table(struct super_block *sb,
				161	struct ext4_group_desc *bg)
				162	{
				163	return le32_to_cpu(bg->bg_inode_table_lo) \|
				164	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				165	(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
				166	}
				167
				168	__u32 ext4_free_group_clusters(struct super_block *sb,
				169	struct ext4_group_desc *bg)
				170	{
				171	return le16_to_cpu(bg->bg_free_blocks_count_lo) \|
				172	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				173	(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
				174	}
				175
				176	__u32 ext4_free_inodes_count(struct super_block *sb,
				177	struct ext4_group_desc *bg)
				178	{
				179	return le16_to_cpu(bg->bg_free_inodes_count_lo) \|
				180	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				181	(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
				182	}
				183
				184	__u32 ext4_used_dirs_count(struct super_block *sb,
				185	struct ext4_group_desc *bg)
				186	{
				187	return le16_to_cpu(bg->bg_used_dirs_count_lo) \|
				188	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				189	(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
				190	}
				191
				192	__u32 ext4_itable_unused_count(struct super_block *sb,
				193	struct ext4_group_desc *bg)
				194	{
				195	return le16_to_cpu(bg->bg_itable_unused_lo) \|
				196	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				197	(__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
				198	}
				199
				200	void ext4_block_bitmap_set(struct super_block *sb,
				201	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				202	{
				203	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
				204	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				205	bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
				206	}
				207
				208	void ext4_inode_bitmap_set(struct super_block *sb,
				209	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				210	{
				211	bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
				212	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				213	bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
				214	}
				215
				216	void ext4_inode_table_set(struct super_block *sb,
				217	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				218	{
				219	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
				220	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				221	bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
				222	}
				223
				224	void ext4_free_group_clusters_set(struct super_block *sb,
				225	struct ext4_group_desc *bg, __u32 count)
				226	{
				227	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
				228	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				229	bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
				230	}
				231
				232	void ext4_free_inodes_set(struct super_block *sb,
				233	struct ext4_group_desc *bg, __u32 count)
				234	{
				235	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
				236	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				237	bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
				238	}
				239
				240	void ext4_used_dirs_set(struct super_block *sb,
				241	struct ext4_group_desc *bg, __u32 count)
				242	{
				243	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
				244	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				245	bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
				246	}
				247
				248	void ext4_itable_unused_set(struct super_block *sb,
				249	struct ext4_group_desc *bg, __u32 count)
				250	{
				251	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
				252	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				253	bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
				254	}
				255
				256
				257	/* Just increment the non-pointer handle value */
				258	static handle_t *ext4_get_nojournal(void)
				259	{
				260	handle_t *handle = current->journal_info;
				261	unsigned long ref_cnt = (unsigned long)handle;
				262
				263	BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
				264
				265	ref_cnt++;
				266	handle = (handle_t *)ref_cnt;
				267
				268	current->journal_info = handle;
				269	return handle;
				270	}
				271
				272
				273	/* Decrement the non-pointer handle value */
				274	static void ext4_put_nojournal(handle_t *handle)
				275	{
				276	unsigned long ref_cnt = (unsigned long)handle;
				277
				278	BUG_ON(ref_cnt == 0);
				279
				280	ref_cnt--;
				281	handle = (handle_t *)ref_cnt;
				282
				283	current->journal_info = handle;
				284	}
				285
				286	/*
				287	* Wrappers for jbd2_journal_start/end.
				288	*
				289	* The only special thing we need to do here is to make sure that all
				290	* journal_end calls result in the superblock being marked dirty, so
				291	* that sync() will call the filesystem's write_super callback if
				292	* appropriate.
				293	*
				294	* To avoid j_barrier hold in userspace when a user calls freeze(),
				295	* ext4 prevents a new handle from being started by s_frozen, which
				296	* is in an upper layer.
				297	*/
				298	handle_t ext4_journal_start_sb(struct super_block sb, int nblocks)
				299	{
				300	journal_t *journal;
				301	handle_t *handle;
				302
				303	trace_ext4_journal_start(sb, nblocks, _RET_IP_);
				304	if (sb->s_flags & MS_RDONLY)
				305	return ERR_PTR(-EROFS);
				306
				307	journal = EXT4_SB(sb)->s_journal;
				308	handle = ext4_journal_current_handle();
				309
				310	/*
				311	* If a handle has been started, it should be allowed to
				312	* finish, otherwise deadlock could happen between freeze
				313	* and others(e.g. truncate) due to the restart of the
				314	* journal handle if the filesystem is forzen and active
				315	* handles are not stopped.
				316	*/
				317	if (!handle)
				318	vfs_check_frozen(sb, SB_FREEZE_TRANS);
				319
				320	if (!journal)
				321	return ext4_get_nojournal();
				322	/*
				323	* Special case here: if the journal has aborted behind our
				324	* backs (eg. EIO in the commit thread), then we still need to
				325	* take the FS itself readonly cleanly.
				326	*/
				327	if (is_journal_aborted(journal)) {
				328	ext4_abort(sb, "Detected aborted journal");
				329	return ERR_PTR(-EROFS);
				330	}
				331	return jbd2_journal_start(journal, nblocks);
				332	}
				333
				334	/*
				335	* The only special thing we need to do here is to make sure that all
				336	* jbd2_journal_stop calls result in the superblock being marked dirty, so
				337	* that sync() will call the filesystem's write_super callback if
				338	* appropriate.
				339	*/
				340	int __ext4_journal_stop(const char where, unsigned int line, handle_t handle)
				341	{
				342	struct super_block *sb;
				343	int err;
				344	int rc;
				345
				346	if (!ext4_handle_valid(handle)) {
				347	ext4_put_nojournal(handle);
				348	return 0;
				349	}
				350	sb = handle->h_transaction->t_journal->j_private;
				351	err = handle->h_err;
				352	rc = jbd2_journal_stop(handle);
				353
				354	if (!err)
				355	err = rc;
				356	if (err)
				357	__ext4_std_error(sb, where, line, err);
				358	return err;
				359	}
				360
				361	void ext4_journal_abort_handle(const char *caller, unsigned int line,
				362	const char err_fn, struct buffer_head bh,
				363	handle_t *handle, int err)
				364	{
				365	char nbuf[16];
				366	const char *errstr = ext4_decode_error(NULL, err, nbuf);
				367
				368	BUG_ON(!ext4_handle_valid(handle));
				369
				370	if (bh)
				371	BUFFER_TRACE(bh, "abort");
				372
				373	if (!handle->h_err)
				374	handle->h_err = err;
				375
				376	if (is_handle_aborted(handle))
				377	return;
				378
				379	printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
				380	caller, line, errstr, err_fn);
				381
				382	jbd2_journal_abort_handle(handle);
				383	}
				384
				385	static void __save_error_info(struct super_block sb, const char func,
				386	unsigned int line)
				387	{
				388	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				389
				390	EXT4_SB(sb)->s_mount_state \|= EXT4_ERROR_FS;
				391	es->s_state \|= cpu_to_le16(EXT4_ERROR_FS);
				392	es->s_last_error_time = cpu_to_le32(get_seconds());
				393	strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
				394	es->s_last_error_line = cpu_to_le32(line);
				395	if (!es->s_first_error_time) {
				396	es->s_first_error_time = es->s_last_error_time;
				397	strncpy(es->s_first_error_func, func,
				398	sizeof(es->s_first_error_func));
				399	es->s_first_error_line = cpu_to_le32(line);
				400	es->s_first_error_ino = es->s_last_error_ino;
				401	es->s_first_error_block = es->s_last_error_block;
				402	}
				403	/*
				404	* Start the daily error reporting function if it hasn't been
				405	* started already
				406	*/
				407	if (!es->s_error_count)
				408	mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 246060*HZ);
				409	es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
				410	}
				411
				412	static void save_error_info(struct super_block sb, const char func,
				413	unsigned int line)
				414	{
				415	__save_error_info(sb, func, line);
				416	ext4_commit_super(sb, 1);
				417	}
				418
				419	/*
				420	* The del_gendisk() function uninitializes the disk-specific data
				421	* structures, including the bdi structure, without telling anyone
				422	* else. Once this happens, any attempt to call mark_buffer_dirty()
				423	* (for example, by ext4_commit_super), will cause a kernel OOPS.
				424	* This is a kludge to prevent these oops until we can put in a proper
				425	* hook in del_gendisk() to inform the VFS and file system layers.
				426	*/
				427	static int block_device_ejected(struct super_block *sb)
				428	{
				429	struct inode *bd_inode = sb->s_bdev->bd_inode;
				430	struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
				431
				432	return bdi->dev == NULL;
				433	}
				434
				435	static void ext4_journal_commit_callback(journal_t journal, transaction_t txn)
				436	{
				437	struct super_block *sb = journal->j_private;
				438	struct ext4_sb_info *sbi = EXT4_SB(sb);
				439	int error = is_journal_aborted(journal);
				440	struct ext4_journal_cb_entry *jce;
				441
				442	BUG_ON(txn->t_state == T_FINISHED);
				443	spin_lock(&sbi->s_md_lock);
				444	while (!list_empty(&txn->t_private_list)) {
				445	jce = list_entry(txn->t_private_list.next,
				446	struct ext4_journal_cb_entry, jce_list);
				447	list_del_init(&jce->jce_list);
				448	spin_unlock(&sbi->s_md_lock);
				449	jce->jce_func(sb, jce, error);
				450	spin_lock(&sbi->s_md_lock);
				451	}
				452	spin_unlock(&sbi->s_md_lock);
				453	}
				454
				455	/* Deal with the reporting of failure conditions on a filesystem such as
				456	* inconsistencies detected or read IO failures.
				457	*
				458	* On ext2, we can store the error state of the filesystem in the
				459	* superblock. That is not possible on ext4, because we may have other
				460	* write ordering constraints on the superblock which prevent us from
				461	* writing it out straight away; and given that the journal is about to
				462	* be aborted, we can't rely on the current, or future, transactions to
				463	* write out the superblock safely.
				464	*
				465	* We'll just use the jbd2_journal_abort() error code to record an error in
				466	* the journal instead. On recovery, the journal will complain about
				467	* that error until we've noted it down and cleared it.
				468	*/
				469
				470	static void ext4_handle_error(struct super_block *sb)
				471	{
				472	if (sb->s_flags & MS_RDONLY)
				473	return;
				474
				475	if (!test_opt(sb, ERRORS_CONT)) {
				476	journal_t *journal = EXT4_SB(sb)->s_journal;
				477
				478	EXT4_SB(sb)->s_mount_flags \|= EXT4_MF_FS_ABORTED;
				479	if (journal)
				480	jbd2_journal_abort(journal, -EIO);
				481	}
				482	if (test_opt(sb, ERRORS_RO)) {
				483	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
				484	sb->s_flags \|= MS_RDONLY;
				485	}
				486	if (test_opt(sb, ERRORS_PANIC))
				487	panic("EXT4-fs (device %s): panic forced after error\n",
				488	sb->s_id);
				489	}
				490
				491	void __ext4_error(struct super_block sb, const char function,
				492	unsigned int line, const char *fmt, ...)
				493	{
				494	struct va_format vaf;
				495	va_list args;
				496
				497	va_start(args, fmt);
				498	vaf.fmt = fmt;
				499	vaf.va = &args;
				500	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
				501	sb->s_id, function, line, current->comm, &vaf);
				502	va_end(args);
				503	save_error_info(sb, function, line);
				504
				505	ext4_handle_error(sb);
				506	}
				507
				508	void ext4_error_inode(struct inode inode, const char function,
				509	unsigned int line, ext4_fsblk_t block,
				510	const char *fmt, ...)
				511	{
				512	va_list args;
				513	struct va_format vaf;
				514	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
				515
				516	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
				517	es->s_last_error_block = cpu_to_le64(block);
				518	save_error_info(inode->i_sb, function, line);
				519	va_start(args, fmt);
				520	vaf.fmt = fmt;
				521	vaf.va = &args;
				522	if (block)
				523	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
				524	"inode #%lu: block %llu: comm %s: %pV\n",
				525	inode->i_sb->s_id, function, line, inode->i_ino,
				526	block, current->comm, &vaf);
				527	else
				528	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
				529	"inode #%lu: comm %s: %pV\n",
				530	inode->i_sb->s_id, function, line, inode->i_ino,
				531	current->comm, &vaf);
				532	va_end(args);
				533
				534	ext4_handle_error(inode->i_sb);
				535	}
				536
				537	void ext4_error_file(struct file file, const char function,
				538	unsigned int line, ext4_fsblk_t block,
				539	const char *fmt, ...)
				540	{
				541	va_list args;
				542	struct va_format vaf;
				543	struct ext4_super_block *es;
				544	struct inode *inode = file->f_dentry->d_inode;
				545	char pathname[80], *path;
				546
				547	es = EXT4_SB(inode->i_sb)->s_es;
				548	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
				549	save_error_info(inode->i_sb, function, line);
				550	path = d_path(&(file->f_path), pathname, sizeof(pathname));
				551	if (IS_ERR(path))
				552	path = "(unknown)";
				553	va_start(args, fmt);
				554	vaf.fmt = fmt;
				555	vaf.va = &args;
				556	if (block)
				557	printk(KERN_CRIT
				558	"EXT4-fs error (device %s): %s:%d: inode #%lu: "
				559	"block %llu: comm %s: path %s: %pV\n",
				560	inode->i_sb->s_id, function, line, inode->i_ino,
				561	block, current->comm, path, &vaf);
				562	else
				563	printk(KERN_CRIT
				564	"EXT4-fs error (device %s): %s:%d: inode #%lu: "
				565	"comm %s: path %s: %pV\n",
				566	inode->i_sb->s_id, function, line, inode->i_ino,
				567	current->comm, path, &vaf);
				568	va_end(args);
				569
				570	ext4_handle_error(inode->i_sb);
				571	}
				572
				573	static const char ext4_decode_error(struct super_block sb, int errno,
				574	char nbuf[16])
				575	{
				576	char *errstr = NULL;
				577
				578	switch (errno) {
				579	case -EIO:
				580	errstr = "IO failure";
				581	break;
				582	case -ENOMEM:
				583	errstr = "Out of memory";
				584	break;
				585	case -EROFS:
				586	if (!sb \|\| (EXT4_SB(sb)->s_journal &&
				587	EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
				588	errstr = "Journal has aborted";
				589	else
				590	errstr = "Readonly filesystem";
				591	break;
				592	default:
				593	/* If the caller passed in an extra buffer for unknown
				594	* errors, textualise them now. Else we just return
				595	* NULL. */
				596	if (nbuf) {
				597	/* Check for truncated error codes... */
				598	if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				599	errstr = nbuf;
				600	}
				601	break;
				602	}
				603
				604	return errstr;
				605	}
				606
				607	/* __ext4_std_error decodes expected errors from journaling functions
				608	* automatically and invokes the appropriate error response. */
				609
				610	void __ext4_std_error(struct super_block sb, const char function,
				611	unsigned int line, int errno)
				612	{
				613	char nbuf[16];
				614	const char *errstr;
				615
				616	/* Special case: if the error is EROFS, and we're not already
				617	* inside a transaction, then there's really no point in logging
				618	* an error. */
				619	if (errno == -EROFS && journal_current_handle() == NULL &&
				620	(sb->s_flags & MS_RDONLY))
				621	return;
				622
				623	errstr = ext4_decode_error(sb, errno, nbuf);
				624	printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
				625	sb->s_id, function, line, errstr);
				626	save_error_info(sb, function, line);
				627
				628	ext4_handle_error(sb);
				629	}
				630
				631	/*
				632	* ext4_abort is a much stronger failure handler than ext4_error. The
				633	* abort function may be used to deal with unrecoverable failures such
				634	* as journal IO errors or ENOMEM at a critical moment in log management.
				635	*
				636	* We unconditionally force the filesystem into an ABORT\|READONLY state,
				637	* unless the error response on the fs has been set to panic in which
				638	* case we take the easy way out and panic immediately.
				639	*/
				640
				641	void __ext4_abort(struct super_block sb, const char function,
				642	unsigned int line, const char *fmt, ...)
				643	{
				644	va_list args;
				645
				646	save_error_info(sb, function, line);
				647	va_start(args, fmt);
				648	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
				649	function, line);
				650	vprintk(fmt, args);
				651	printk("\n");
				652	va_end(args);
				653
				654	if ((sb->s_flags & MS_RDONLY) == 0) {
				655	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
				656	sb->s_flags \|= MS_RDONLY;
				657	EXT4_SB(sb)->s_mount_flags \|= EXT4_MF_FS_ABORTED;
				658	if (EXT4_SB(sb)->s_journal)
				659	jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
				660	save_error_info(sb, function, line);
				661	}
				662	if (test_opt(sb, ERRORS_PANIC))
				663	panic("EXT4-fs panic from previous error\n");
				664	}
				665
				666	void ext4_msg(struct super_block sb, const char prefix, const char *fmt, ...)
				667	{
				668	struct va_format vaf;
				669	va_list args;
				670
				671	va_start(args, fmt);
				672	vaf.fmt = fmt;
				673	vaf.va = &args;
				674	printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
				675	va_end(args);
				676	}
				677
				678	void __ext4_warning(struct super_block sb, const char function,
				679	unsigned int line, const char *fmt, ...)
				680	{
				681	struct va_format vaf;
				682	va_list args;
				683
				684	va_start(args, fmt);
				685	vaf.fmt = fmt;
				686	vaf.va = &args;
				687	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
				688	sb->s_id, function, line, &vaf);
				689	va_end(args);
				690	}
				691
				692	void __ext4_grp_locked_error(const char *function, unsigned int line,
				693	struct super_block *sb, ext4_group_t grp,
				694	unsigned long ino, ext4_fsblk_t block,
				695	const char *fmt, ...)
				696	__releases(bitlock)
				697	__acquires(bitlock)
				698	{
				699	struct va_format vaf;
				700	va_list args;
				701	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				702
				703	es->s_last_error_ino = cpu_to_le32(ino);
				704	es->s_last_error_block = cpu_to_le64(block);
				705	__save_error_info(sb, function, line);
				706
				707	va_start(args, fmt);
				708
				709	vaf.fmt = fmt;
				710	vaf.va = &args;
				711	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
				712	sb->s_id, function, line, grp);
				713	if (ino)
				714	printk(KERN_CONT "inode %lu: ", ino);
				715	if (block)
				716	printk(KERN_CONT "block %llu:", (unsigned long long) block);
				717	printk(KERN_CONT "%pV\n", &vaf);
				718	va_end(args);
				719
				720	if (test_opt(sb, ERRORS_CONT)) {
				721	ext4_commit_super(sb, 0);
				722	return;
				723	}
				724
				725	ext4_unlock_group(sb, grp);
				726	ext4_handle_error(sb);
				727	/*
				728	* We only get here in the ERRORS_RO case; relocking the group
				729	* may be dangerous, but nothing bad will happen since the
				730	* filesystem will have already been marked read/only and the
				731	* journal has been aborted. We return 1 as a hint to callers
				732	* who might what to use the return value from
				733	* ext4_grp_locked_error() to distinguish between the
				734	* ERRORS_CONT and ERRORS_RO case, and perhaps return more
				735	* aggressively from the ext4 function in question, with a
				736	* more appropriate error code.
				737	*/
				738	ext4_lock_group(sb, grp);
				739	return;
				740	}
				741
				742	void ext4_update_dynamic_rev(struct super_block *sb)
				743	{
				744	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				745
				746	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
				747	return;
				748
				749	ext4_warning(sb,
				750	"updating to rev %d because of new feature flag, "
				751	"running e2fsck is recommended",
				752	EXT4_DYNAMIC_REV);
				753
				754	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
				755	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
				756	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
				757	/* leave es->s_feature_compat flags alone /
				758	/* es->s_uuid will be set by e2fsck if empty */
				759
				760	/*
				761	* The rest of the superblock fields should be zero, and if not it
				762	* means they are likely already in use, so leave them alone. We
				763	* can leave it up to e2fsck to clean up any inconsistencies there.
				764	*/
				765	}
				766
				767	/*
				768	* Open the external journal device
				769	*/
				770	static struct block_device ext4_blkdev_get(dev_t dev, struct super_block sb)
				771	{
				772	struct block_device *bdev;
				773	char b[BDEVNAME_SIZE];
				774
				775	bdev = blkdev_get_by_dev(dev, FMODE_READ\|FMODE_WRITE\|FMODE_EXCL, sb);
				776	if (IS_ERR(bdev))
				777	goto fail;
				778	return bdev;
				779
				780	fail:
				781	ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
				782	__bdevname(dev, b), PTR_ERR(bdev));
				783	return NULL;
				784	}
				785
				786	/*
				787	* Release the journal device
				788	*/
				789	static int ext4_blkdev_put(struct block_device *bdev)
				790	{
				791	return blkdev_put(bdev, FMODE_READ\|FMODE_WRITE\|FMODE_EXCL);
				792	}
				793
				794	static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
				795	{
				796	struct block_device *bdev;
				797	int ret = -ENODEV;
				798
				799	bdev = sbi->journal_bdev;
				800	if (bdev) {
				801	ret = ext4_blkdev_put(bdev);
				802	sbi->journal_bdev = NULL;
				803	}
				804	return ret;
				805	}
				806
				807	static inline struct inode orphan_list_entry(struct list_head l)
				808	{
				809	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
				810	}
				811
				812	static void dump_orphan_list(struct super_block sb, struct ext4_sb_info sbi)
				813	{
				814	struct list_head *l;
				815
				816	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
				817	le32_to_cpu(sbi->s_es->s_last_orphan));
				818
				819	printk(KERN_ERR "sb_info orphan list:\n");
				820	list_for_each(l, &sbi->s_orphan) {
				821	struct inode *inode = orphan_list_entry(l);
				822	printk(KERN_ERR " "
				823	"inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
				824	inode->i_sb->s_id, inode->i_ino, inode,
				825	inode->i_mode, inode->i_nlink,
				826	NEXT_ORPHAN(inode));
				827	}
				828	}
				829
				830	static void ext4_put_super(struct super_block *sb)
				831	{
				832	struct ext4_sb_info *sbi = EXT4_SB(sb);
				833	struct ext4_super_block *es = sbi->s_es;
				834	int i, err;
				835
				836	ext4_unregister_li_request(sb);
				837	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED \| DQUOT_LIMITS_ENABLED);
				838
				839	flush_workqueue(sbi->dio_unwritten_wq);
				840	destroy_workqueue(sbi->dio_unwritten_wq);
				841
				842	lock_super(sb);
				843	if (sbi->s_journal) {
				844	err = jbd2_journal_destroy(sbi->s_journal);
				845	sbi->s_journal = NULL;
				846	if (err < 0)
				847	ext4_abort(sb, "Couldn't clean up the journal");
				848	}
				849
				850	del_timer(&sbi->s_err_report);
				851	ext4_release_system_zone(sb);
				852	ext4_mb_release(sb);
				853	ext4_ext_release(sb);
				854	ext4_xattr_put_super(sb);
				855
				856	if (!(sb->s_flags & MS_RDONLY)) {
				857	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
				858	es->s_state = cpu_to_le16(sbi->s_mount_state);
				859	}
				860	if (sb->s_dirt \|\| !(sb->s_flags & MS_RDONLY))
				861	ext4_commit_super(sb, 1);
				862
				863	if (sbi->s_proc) {
				864	remove_proc_entry("options", sbi->s_proc);
				865	remove_proc_entry(sb->s_id, ext4_proc_root);
				866	}
				867	kobject_del(&sbi->s_kobj);
				868
				869	for (i = 0; i < sbi->s_gdb_count; i++)
				870	brelse(sbi->s_group_desc[i]);
				871	ext4_kvfree(sbi->s_group_desc);
				872	ext4_kvfree(sbi->s_flex_groups);
				873	percpu_counter_destroy(&sbi->s_freeclusters_counter);
				874	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				875	percpu_counter_destroy(&sbi->s_dirs_counter);
				876	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
				877	brelse(sbi->s_sbh);
				878	#ifdef CONFIG_QUOTA
				879	for (i = 0; i < MAXQUOTAS; i++)
				880	kfree(sbi->s_qf_names[i]);
				881	#endif
				882
				883	/* Debugging code just in case the in-memory inode orphan list
				884	* isn't empty. The on-disk one can be non-empty if we've
				885	* detected an error and taken the fs readonly, but the
				886	* in-memory list had better be clean by this point. */
				887	if (!list_empty(&sbi->s_orphan))
				888	dump_orphan_list(sb, sbi);
				889	J_ASSERT(list_empty(&sbi->s_orphan));
				890
				891	sync_blockdev(sb->s_bdev);
				892	invalidate_bdev(sb->s_bdev);
				893	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
				894	/*
				895	* Invalidate the journal device's buffers. We don't want them
				896	* floating about in memory - the physical journal device may
				897	* hotswapped, and it breaks the `ro-after' testing code.
				898	*/
				899	sync_blockdev(sbi->journal_bdev);
				900	invalidate_bdev(sbi->journal_bdev);
				901	ext4_blkdev_remove(sbi);
				902	}
				903	if (sbi->s_mmp_tsk)
				904	kthread_stop(sbi->s_mmp_tsk);
				905	sb->s_fs_info = NULL;
				906	/*
				907	* Now that we are completely done shutting down the
				908	* superblock, we need to actually destroy the kobject.
				909	*/
				910	unlock_super(sb);
				911	kobject_put(&sbi->s_kobj);
				912	wait_for_completion(&sbi->s_kobj_unregister);
				913	kfree(sbi->s_blockgroup_lock);
				914	kfree(sbi);
				915	}
				916
				917	static struct kmem_cache *ext4_inode_cachep;
				918
				919	/*
				920	* Called inside transaction, so use GFP_NOFS
				921	*/
				922	static struct inode ext4_alloc_inode(struct super_block sb)
				923	{
				924	struct ext4_inode_info *ei;
				925
				926	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
				927	if (!ei)
				928	return NULL;
				929
				930	ei->vfs_inode.i_version = 1;
				931	ei->vfs_inode.i_data.writeback_index = 0;
				932	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
				933	INIT_LIST_HEAD(&ei->i_prealloc_list);
				934	spin_lock_init(&ei->i_prealloc_lock);
				935	ei->i_reserved_data_blocks = 0;
				936	ei->i_reserved_meta_blocks = 0;
				937	ei->i_allocated_meta_blocks = 0;
				938	ei->i_da_metadata_calc_len = 0;
				939	ei->i_da_metadata_calc_last_lblock = 0;
				940	spin_lock_init(&(ei->i_block_reservation_lock));
				941	#ifdef CONFIG_QUOTA
				942	ei->i_reserved_quota = 0;
				943	#endif
				944	ei->jinode = NULL;
				945	INIT_LIST_HEAD(&ei->i_completed_io_list);
				946	spin_lock_init(&ei->i_completed_io_lock);
				947	ei->cur_aio_dio = NULL;
				948	ei->i_sync_tid = 0;
				949	ei->i_datasync_tid = 0;
				950	atomic_set(&ei->i_ioend_count, 0);
				951	atomic_set(&ei->i_aiodio_unwritten, 0);
				952
				953	return &ei->vfs_inode;
				954	}
				955
				956	static int ext4_drop_inode(struct inode *inode)
				957	{
				958	int drop = generic_drop_inode(inode);
				959
				960	trace_ext4_drop_inode(inode, drop);
				961	return drop;
				962	}
				963
				964	static void ext4_i_callback(struct rcu_head *head)
				965	{
				966	struct inode *inode = container_of(head, struct inode, i_rcu);
				967	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
				968	}
				969
				970	static void ext4_destroy_inode(struct inode *inode)
				971	{
				972	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
				973	ext4_msg(inode->i_sb, KERN_ERR,
				974	"Inode %lu (%p): orphan list check failed!",
				975	inode->i_ino, EXT4_I(inode));
				976	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
				977	EXT4_I(inode), sizeof(struct ext4_inode_info),
				978	true);
				979	dump_stack();
				980	}
				981	call_rcu(&inode->i_rcu, ext4_i_callback);
				982	}
				983
				984	static void init_once(void *foo)
				985	{
				986	struct ext4_inode_info ei = (struct ext4_inode_info ) foo;
				987
				988	INIT_LIST_HEAD(&ei->i_orphan);
				989	#ifdef CONFIG_EXT4_FS_XATTR
				990	init_rwsem(&ei->xattr_sem);
				991	#endif
				992	init_rwsem(&ei->i_data_sem);
				993	inode_init_once(&ei->vfs_inode);
				994	}
				995
				996	static int init_inodecache(void)
				997	{
				998	ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
				999	sizeof(struct ext4_inode_info),
				1000	0, (SLAB_RECLAIM_ACCOUNT\|
				1001	SLAB_MEM_SPREAD),
				1002	init_once);
				1003	if (ext4_inode_cachep == NULL)
				1004	return -ENOMEM;
				1005	return 0;
				1006	}
				1007
				1008	static void destroy_inodecache(void)
				1009	{
				1010	kmem_cache_destroy(ext4_inode_cachep);
				1011	}
				1012
				1013	void ext4_clear_inode(struct inode *inode)
				1014	{
				1015	invalidate_inode_buffers(inode);
				1016	end_writeback(inode);
				1017	dquot_drop(inode);
				1018	ext4_discard_preallocations(inode);
				1019	if (EXT4_I(inode)->jinode) {
				1020	jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
				1021	EXT4_I(inode)->jinode);
				1022	jbd2_free_inode(EXT4_I(inode)->jinode);
				1023	EXT4_I(inode)->jinode = NULL;
				1024	}
				1025	}
				1026
				1027	static struct inode ext4_nfs_get_inode(struct super_block sb,
				1028	u64 ino, u32 generation)
				1029	{
				1030	struct inode *inode;
				1031
				1032	if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
				1033	return ERR_PTR(-ESTALE);
				1034	if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
				1035	return ERR_PTR(-ESTALE);
				1036
				1037	/* iget isn't really right if the inode is currently unallocated!!
				1038	*
				1039	* ext4_read_inode will return a bad_inode if the inode had been
				1040	* deleted, so we should be safe.
				1041	*
				1042	* Currently we don't know the generation for parent directory, so
				1043	* a generation of 0 means "accept any"
				1044	*/
				1045	inode = ext4_iget_normal(sb, ino);
				1046	if (IS_ERR(inode))
				1047	return ERR_CAST(inode);
				1048	if (generation && inode->i_generation != generation) {
				1049	iput(inode);
				1050	return ERR_PTR(-ESTALE);
				1051	}
				1052
				1053	return inode;
				1054	}
				1055
				1056	static struct dentry ext4_fh_to_dentry(struct super_block sb, struct fid *fid,
				1057	int fh_len, int fh_type)
				1058	{
				1059	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
				1060	ext4_nfs_get_inode);
				1061	}
				1062
				1063	static struct dentry ext4_fh_to_parent(struct super_block sb, struct fid *fid,
				1064	int fh_len, int fh_type)
				1065	{
				1066	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
				1067	ext4_nfs_get_inode);
				1068	}
				1069
				1070	/*
				1071	* Try to release metadata pages (indirect blocks, directories) which are
				1072	* mapped via the block device. Since these pages could have journal heads
				1073	* which would prevent try_to_free_buffers() from freeing them, we must use
				1074	* jbd2 layer's try_to_free_buffers() function to release them.
				1075	*/
				1076	static int bdev_try_to_free_page(struct super_block sb, struct page page,
				1077	gfp_t wait)
				1078	{
				1079	journal_t *journal = EXT4_SB(sb)->s_journal;
				1080
				1081	WARN_ON(PageChecked(page));
				1082	if (!page_has_buffers(page))
				1083	return 0;
				1084	if (journal)
				1085	return jbd2_journal_try_to_free_buffers(journal, page,
				1086	wait & ~__GFP_WAIT);
				1087	return try_to_free_buffers(page);
				1088	}
				1089
				1090	#ifdef CONFIG_QUOTA
				1091	#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
				1092	#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
				1093
				1094	static int ext4_write_dquot(struct dquot *dquot);
				1095	static int ext4_acquire_dquot(struct dquot *dquot);
				1096	static int ext4_release_dquot(struct dquot *dquot);
				1097	static int ext4_mark_dquot_dirty(struct dquot *dquot);
				1098	static int ext4_write_info(struct super_block *sb, int type);
				1099	static int ext4_quota_on(struct super_block *sb, int type, int format_id,
				1100	struct path *path);
				1101	static int ext4_quota_off(struct super_block *sb, int type);
				1102	static int ext4_quota_on_mount(struct super_block *sb, int type);
				1103	static ssize_t ext4_quota_read(struct super_block sb, int type, char data,
				1104	size_t len, loff_t off);
				1105	static ssize_t ext4_quota_write(struct super_block *sb, int type,
				1106	const char *data, size_t len, loff_t off);
				1107
				1108	static const struct dquot_operations ext4_quota_operations = {
				1109	.get_reserved_space = ext4_get_reserved_space,
				1110	.write_dquot = ext4_write_dquot,
				1111	.acquire_dquot = ext4_acquire_dquot,
				1112	.release_dquot = ext4_release_dquot,
				1113	.mark_dirty = ext4_mark_dquot_dirty,
				1114	.write_info = ext4_write_info,
				1115	.alloc_dquot = dquot_alloc,
				1116	.destroy_dquot = dquot_destroy,
				1117	};
				1118
				1119	static const struct quotactl_ops ext4_qctl_operations = {
				1120	.quota_on = ext4_quota_on,
				1121	.quota_off = ext4_quota_off,
				1122	.quota_sync = dquot_quota_sync,
				1123	.get_info = dquot_get_dqinfo,
				1124	.set_info = dquot_set_dqinfo,
				1125	.get_dqblk = dquot_get_dqblk,
				1126	.set_dqblk = dquot_set_dqblk
				1127	};
				1128	#endif
				1129
				1130	static const struct super_operations ext4_sops = {
				1131	.alloc_inode = ext4_alloc_inode,
				1132	.destroy_inode = ext4_destroy_inode,
				1133	.write_inode = ext4_write_inode,
				1134	.dirty_inode = ext4_dirty_inode,
				1135	.drop_inode = ext4_drop_inode,
				1136	.evict_inode = ext4_evict_inode,
				1137	.put_super = ext4_put_super,
				1138	.sync_fs = ext4_sync_fs,
				1139	.freeze_fs = ext4_freeze,
				1140	.unfreeze_fs = ext4_unfreeze,
				1141	.statfs = ext4_statfs,
				1142	.remount_fs = ext4_remount,
				1143	.show_options = ext4_show_options,
				1144	#ifdef CONFIG_QUOTA
				1145	.quota_read = ext4_quota_read,
				1146	.quota_write = ext4_quota_write,
				1147	#endif
				1148	.bdev_try_to_free_page = bdev_try_to_free_page,
				1149	};
				1150
				1151	static const struct super_operations ext4_nojournal_sops = {
				1152	.alloc_inode = ext4_alloc_inode,
				1153	.destroy_inode = ext4_destroy_inode,
				1154	.write_inode = ext4_write_inode,
				1155	.dirty_inode = ext4_dirty_inode,
				1156	.drop_inode = ext4_drop_inode,
				1157	.evict_inode = ext4_evict_inode,
				1158	.write_super = ext4_write_super,
				1159	.put_super = ext4_put_super,
				1160	.statfs = ext4_statfs,
				1161	.remount_fs = ext4_remount,
				1162	.show_options = ext4_show_options,
				1163	#ifdef CONFIG_QUOTA
				1164	.quota_read = ext4_quota_read,
				1165	.quota_write = ext4_quota_write,
				1166	#endif
				1167	.bdev_try_to_free_page = bdev_try_to_free_page,
				1168	};
				1169
				1170	static const struct export_operations ext4_export_ops = {
				1171	.fh_to_dentry = ext4_fh_to_dentry,
				1172	.fh_to_parent = ext4_fh_to_parent,
				1173	.get_parent = ext4_get_parent,
				1174	};
				1175
				1176	enum {
				1177	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
				1178	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
				1179	Opt_nouid32, Opt_debug, Opt_removed,
				1180	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
				1181	Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
				1182	Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
				1183	Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
				1184	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
				1185	Opt_data_err_abort, Opt_data_err_ignore,
				1186	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
				1187	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
				1188	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
				1189	Opt_usrquota, Opt_grpquota, Opt_i_version,
				1190	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
				1191	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
				1192	Opt_inode_readahead_blks, Opt_journal_ioprio,
				1193	Opt_dioread_nolock, Opt_dioread_lock,
				1194	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
				1195	};
				1196
				1197	static const match_table_t tokens = {
				1198	{Opt_bsd_df, "bsddf"},
				1199	{Opt_minix_df, "minixdf"},
				1200	{Opt_grpid, "grpid"},
				1201	{Opt_grpid, "bsdgroups"},
				1202	{Opt_nogrpid, "nogrpid"},
				1203	{Opt_nogrpid, "sysvgroups"},
				1204	{Opt_resgid, "resgid=%u"},
				1205	{Opt_resuid, "resuid=%u"},
				1206	{Opt_sb, "sb=%u"},
				1207	{Opt_err_cont, "errors=continue"},
				1208	{Opt_err_panic, "errors=panic"},
				1209	{Opt_err_ro, "errors=remount-ro"},
				1210	{Opt_nouid32, "nouid32"},
				1211	{Opt_debug, "debug"},
				1212	{Opt_removed, "oldalloc"},
				1213	{Opt_removed, "orlov"},
				1214	{Opt_user_xattr, "user_xattr"},
				1215	{Opt_nouser_xattr, "nouser_xattr"},
				1216	{Opt_acl, "acl"},
				1217	{Opt_noacl, "noacl"},
				1218	{Opt_noload, "norecovery"},
				1219	{Opt_noload, "noload"},
				1220	{Opt_removed, "nobh"},
				1221	{Opt_removed, "bh"},
				1222	{Opt_commit, "commit=%u"},
				1223	{Opt_min_batch_time, "min_batch_time=%u"},
				1224	{Opt_max_batch_time, "max_batch_time=%u"},
				1225	{Opt_journal_dev, "journal_dev=%u"},
				1226	{Opt_journal_checksum, "journal_checksum"},
				1227	{Opt_journal_async_commit, "journal_async_commit"},
				1228	{Opt_abort, "abort"},
				1229	{Opt_data_journal, "data=journal"},
				1230	{Opt_data_ordered, "data=ordered"},
				1231	{Opt_data_writeback, "data=writeback"},
				1232	{Opt_data_err_abort, "data_err=abort"},
				1233	{Opt_data_err_ignore, "data_err=ignore"},
				1234	{Opt_offusrjquota, "usrjquota="},
				1235	{Opt_usrjquota, "usrjquota=%s"},
				1236	{Opt_offgrpjquota, "grpjquota="},
				1237	{Opt_grpjquota, "grpjquota=%s"},
				1238	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
				1239	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
				1240	{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
				1241	{Opt_grpquota, "grpquota"},
				1242	{Opt_noquota, "noquota"},
				1243	{Opt_quota, "quota"},
				1244	{Opt_usrquota, "usrquota"},
				1245	{Opt_barrier, "barrier=%u"},
				1246	{Opt_barrier, "barrier"},
				1247	{Opt_nobarrier, "nobarrier"},
				1248	{Opt_i_version, "i_version"},
				1249	{Opt_stripe, "stripe=%u"},
				1250	{Opt_delalloc, "delalloc"},
				1251	{Opt_nodelalloc, "nodelalloc"},
				1252	{Opt_mblk_io_submit, "mblk_io_submit"},
				1253	{Opt_nomblk_io_submit, "nomblk_io_submit"},
				1254	{Opt_block_validity, "block_validity"},
				1255	{Opt_noblock_validity, "noblock_validity"},
				1256	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
				1257	{Opt_journal_ioprio, "journal_ioprio=%u"},
				1258	{Opt_auto_da_alloc, "auto_da_alloc=%u"},
				1259	{Opt_auto_da_alloc, "auto_da_alloc"},
				1260	{Opt_noauto_da_alloc, "noauto_da_alloc"},
				1261	{Opt_dioread_nolock, "dioread_nolock"},
				1262	{Opt_dioread_lock, "dioread_lock"},
				1263	{Opt_discard, "discard"},
				1264	{Opt_nodiscard, "nodiscard"},
				1265	{Opt_init_itable, "init_itable=%u"},
				1266	{Opt_init_itable, "init_itable"},
				1267	{Opt_noinit_itable, "noinit_itable"},
				1268	{Opt_removed, "check=none"}, /* mount option from ext2/3 */
				1269	{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
				1270	{Opt_removed, "reservation"}, /* mount option from ext2/3 */
				1271	{Opt_removed, "noreservation"}, /* mount option from ext2/3 */
				1272	{Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
				1273	{Opt_err, NULL},
				1274	};
				1275
				1276	static ext4_fsblk_t get_sb_block(void **data)
				1277	{
				1278	ext4_fsblk_t sb_block;
				1279	char options = (char ) *data;
				1280
				1281	if (!options \|\| strncmp(options, "sb=", 3) != 0)
				1282	return 1; /* Default location */
				1283
				1284	options += 3;
				1285	/* TODO: use simple_strtoll with >32bit ext4 */
				1286	sb_block = simple_strtoul(options, &options, 0);
				1287	if (options && options != ',') {
				1288	printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
				1289	(char ) data);
				1290	return 1;
				1291	}
				1292	if (*options == ',')
				1293	options++;
				1294	data = (void ) options;
				1295
				1296	return sb_block;
				1297	}
				1298
				1299	#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
				1300	static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
				1301	"Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
				1302
				1303	#ifdef CONFIG_QUOTA
				1304	static int set_qf_name(struct super_block sb, int qtype, substring_t args)
				1305	{
				1306	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1307	char *qname;
				1308
				1309	if (sb_any_quota_loaded(sb) &&
				1310	!sbi->s_qf_names[qtype]) {
				1311	ext4_msg(sb, KERN_ERR,
				1312	"Cannot change journaled "
				1313	"quota options when quota turned on");
				1314	return -1;
				1315	}
				1316	qname = match_strdup(args);
				1317	if (!qname) {
				1318	ext4_msg(sb, KERN_ERR,
				1319	"Not enough memory for storing quotafile name");
				1320	return -1;
				1321	}
				1322	if (sbi->s_qf_names[qtype] &&
				1323	strcmp(sbi->s_qf_names[qtype], qname)) {
				1324	ext4_msg(sb, KERN_ERR,
				1325	"%s quota file already specified", QTYPE2NAME(qtype));
				1326	kfree(qname);
				1327	return -1;
				1328	}
				1329	sbi->s_qf_names[qtype] = qname;
				1330	if (strchr(sbi->s_qf_names[qtype], '/')) {
				1331	ext4_msg(sb, KERN_ERR,
				1332	"quotafile must be on filesystem root");
				1333	kfree(sbi->s_qf_names[qtype]);
				1334	sbi->s_qf_names[qtype] = NULL;
				1335	return -1;
				1336	}
				1337	set_opt(sb, QUOTA);
				1338	return 1;
				1339	}
				1340
				1341	static int clear_qf_name(struct super_block *sb, int qtype)
				1342	{
				1343
				1344	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1345
				1346	if (sb_any_quota_loaded(sb) &&
				1347	sbi->s_qf_names[qtype]) {
				1348	ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
				1349	" when quota turned on");
				1350	return -1;
				1351	}
				1352	/*
				1353	* The space will be released later when all options are confirmed
				1354	* to be correct
				1355	*/
				1356	sbi->s_qf_names[qtype] = NULL;
				1357	return 1;
				1358	}
				1359	#endif
				1360
				1361	#define MOPT_SET 0x0001
				1362	#define MOPT_CLEAR 0x0002
				1363	#define MOPT_NOSUPPORT 0x0004
				1364	#define MOPT_EXPLICIT 0x0008
				1365	#define MOPT_CLEAR_ERR 0x0010
				1366	#define MOPT_GTE0 0x0020
				1367	#ifdef CONFIG_QUOTA
				1368	#define MOPT_Q 0
				1369	#define MOPT_QFMT 0x0040
				1370	#else
				1371	#define MOPT_Q MOPT_NOSUPPORT
				1372	#define MOPT_QFMT MOPT_NOSUPPORT
				1373	#endif
				1374	#define MOPT_DATAJ 0x0080
				1375
				1376	static const struct mount_opts {
				1377	int token;
				1378	int mount_opt;
				1379	int flags;
				1380	} ext4_mount_opts[] = {
				1381	{Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
				1382	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
				1383	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
				1384	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
				1385	{Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
				1386	{Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
				1387	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
				1388	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
				1389	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
				1390	{Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR},
				1391	{Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
				1392	{Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
				1393	{Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET \| MOPT_EXPLICIT},
				1394	{Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR \| MOPT_EXPLICIT},
				1395	{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET},
				1396	{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT \|
				1397	EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET},
				1398	{Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET},
				1399	{Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET \| MOPT_CLEAR_ERR},
				1400	{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET \| MOPT_CLEAR_ERR},
				1401	{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET \| MOPT_CLEAR_ERR},
				1402	{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET},
				1403	{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR},
				1404	{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
				1405	{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
				1406	{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
				1407	{Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
				1408	{Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
				1409	{Opt_commit, 0, MOPT_GTE0},
				1410	{Opt_max_batch_time, 0, MOPT_GTE0},
				1411	{Opt_min_batch_time, 0, MOPT_GTE0},
				1412	{Opt_inode_readahead_blks, 0, MOPT_GTE0},
				1413	{Opt_init_itable, 0, MOPT_GTE0},
				1414	{Opt_stripe, 0, MOPT_GTE0},
				1415	{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
				1416	{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
				1417	{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
				1418	#ifdef CONFIG_EXT4_FS_XATTR
				1419	{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
				1420	{Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
				1421	#else
				1422	{Opt_user_xattr, 0, MOPT_NOSUPPORT},
				1423	{Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
				1424	#endif
				1425	#ifdef CONFIG_EXT4_FS_POSIX_ACL
				1426	{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
				1427	{Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
				1428	#else
				1429	{Opt_acl, 0, MOPT_NOSUPPORT},
				1430	{Opt_noacl, 0, MOPT_NOSUPPORT},
				1431	#endif
				1432	{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
				1433	{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
				1434	{Opt_quota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA, MOPT_SET \| MOPT_Q},
				1435	{Opt_usrquota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA,
				1436	MOPT_SET \| MOPT_Q},
				1437	{Opt_grpquota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_GRPQUOTA,
				1438	MOPT_SET \| MOPT_Q},
				1439	{Opt_noquota, (EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA \|
				1440	EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR \| MOPT_Q},
				1441	{Opt_usrjquota, 0, MOPT_Q},
				1442	{Opt_grpjquota, 0, MOPT_Q},
				1443	{Opt_offusrjquota, 0, MOPT_Q},
				1444	{Opt_offgrpjquota, 0, MOPT_Q},
				1445	{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
				1446	{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
				1447	{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
				1448	{Opt_err, 0, 0}
				1449	};
				1450
				1451	static int handle_mount_opt(struct super_block sb, char opt, int token,
				1452	substring_t args, unsigned long journal_devnum,
				1453	unsigned int *journal_ioprio, int is_remount)
				1454	{
				1455	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1456	const struct mount_opts *m;
				1457	int arg = 0;
				1458
				1459	#ifdef CONFIG_QUOTA
				1460	if (token == Opt_usrjquota)
				1461	return set_qf_name(sb, USRQUOTA, &args[0]);
				1462	else if (token == Opt_grpjquota)
				1463	return set_qf_name(sb, GRPQUOTA, &args[0]);
				1464	else if (token == Opt_offusrjquota)
				1465	return clear_qf_name(sb, USRQUOTA);
				1466	else if (token == Opt_offgrpjquota)
				1467	return clear_qf_name(sb, GRPQUOTA);
				1468	#endif
				1469	if (args->from && match_int(args, &arg))
				1470	return -1;
				1471	switch (token) {
				1472	case Opt_noacl:
				1473	case Opt_nouser_xattr:
				1474	ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
				1475	break;
				1476	case Opt_sb:
				1477	return 1; /* handled by get_sb_block() */
				1478	case Opt_removed:
				1479	ext4_msg(sb, KERN_WARNING,
				1480	"Ignoring removed %s option", opt);
				1481	return 1;
				1482	case Opt_resuid:
				1483	sbi->s_resuid = arg;
				1484	return 1;
				1485	case Opt_resgid:
				1486	sbi->s_resgid = arg;
				1487	return 1;
				1488	case Opt_abort:
				1489	sbi->s_mount_flags \|= EXT4_MF_FS_ABORTED;
				1490	return 1;
				1491	case Opt_i_version:
				1492	sb->s_flags \|= MS_I_VERSION;
				1493	return 1;
				1494	case Opt_journal_dev:
				1495	if (is_remount) {
				1496	ext4_msg(sb, KERN_ERR,
				1497	"Cannot specify journal on remount");
				1498	return -1;
				1499	}
				1500	*journal_devnum = arg;
				1501	return 1;
				1502	case Opt_journal_ioprio:
				1503	if (arg < 0 \|\| arg > 7)
				1504	return -1;
				1505	*journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
				1506	return 1;
				1507	}
				1508
				1509	for (m = ext4_mount_opts; m->token != Opt_err; m++) {
				1510	if (token != m->token)
				1511	continue;
				1512	if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
				1513	return -1;
				1514	if (m->flags & MOPT_EXPLICIT)
				1515	set_opt2(sb, EXPLICIT_DELALLOC);
				1516	if (m->flags & MOPT_CLEAR_ERR)
				1517	clear_opt(sb, ERRORS_MASK);
				1518	if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
				1519	ext4_msg(sb, KERN_ERR, "Cannot change quota "
				1520	"options when quota turned on");
				1521	return -1;
				1522	}
				1523
				1524	if (m->flags & MOPT_NOSUPPORT) {
				1525	ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
				1526	} else if (token == Opt_commit) {
				1527	if (arg == 0)
				1528	arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
				1529	sbi->s_commit_interval = HZ * arg;
				1530	} else if (token == Opt_max_batch_time) {
				1531	if (arg == 0)
				1532	arg = EXT4_DEF_MAX_BATCH_TIME;
				1533	sbi->s_max_batch_time = arg;
				1534	} else if (token == Opt_min_batch_time) {
				1535	sbi->s_min_batch_time = arg;
				1536	} else if (token == Opt_inode_readahead_blks) {
				1537	if (arg > (1 << 30))
				1538	return -1;
				1539	if (arg && !is_power_of_2(arg)) {
				1540	ext4_msg(sb, KERN_ERR,
				1541	"EXT4-fs: inode_readahead_blks"
				1542	" must be a power of 2");
				1543	return -1;
				1544	}
				1545	sbi->s_inode_readahead_blks = arg;
				1546	} else if (token == Opt_init_itable) {
				1547	set_opt(sb, INIT_INODE_TABLE);
				1548	if (!args->from)
				1549	arg = EXT4_DEF_LI_WAIT_MULT;
				1550	sbi->s_li_wait_mult = arg;
				1551	} else if (token == Opt_stripe) {
				1552	sbi->s_stripe = arg;
				1553	} else if (m->flags & MOPT_DATAJ) {
				1554	if (is_remount) {
				1555	if (!sbi->s_journal)
				1556	ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
				1557	else if (test_opt(sb, DATA_FLAGS) !=
				1558	m->mount_opt) {
				1559	ext4_msg(sb, KERN_ERR,
				1560	"Cannot change data mode on remount");
				1561	return -1;
				1562	}
				1563	} else {
				1564	clear_opt(sb, DATA_FLAGS);
				1565	sbi->s_mount_opt \|= m->mount_opt;
				1566	}
				1567	#ifdef CONFIG_QUOTA
				1568	} else if (m->flags & MOPT_QFMT) {
				1569	if (sb_any_quota_loaded(sb) &&
				1570	sbi->s_jquota_fmt != m->mount_opt) {
				1571	ext4_msg(sb, KERN_ERR, "Cannot "
				1572	"change journaled quota options "
				1573	"when quota turned on");
				1574	return -1;
				1575	}
				1576	sbi->s_jquota_fmt = m->mount_opt;
				1577	#endif
				1578	} else {
				1579	if (!args->from)
				1580	arg = 1;
				1581	if (m->flags & MOPT_CLEAR)
				1582	arg = !arg;
				1583	else if (unlikely(!(m->flags & MOPT_SET))) {
				1584	ext4_msg(sb, KERN_WARNING,
				1585	"buggy handling of option %s", opt);
				1586	WARN_ON(1);
				1587	return -1;
				1588	}
				1589	if (arg != 0)
				1590	sbi->s_mount_opt \|= m->mount_opt;
				1591	else
				1592	sbi->s_mount_opt &= ~m->mount_opt;
				1593	}
				1594	return 1;
				1595	}
				1596	ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
				1597	"or missing value", opt);
				1598	return -1;
				1599	}
				1600
				1601	static int parse_options(char options, struct super_block sb,
				1602	unsigned long *journal_devnum,
				1603	unsigned int *journal_ioprio,
				1604	int is_remount)
				1605	{
				1606	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1607	char *p;
				1608	substring_t args[MAX_OPT_ARGS];
				1609	int token;
				1610
				1611	if (!options)
				1612	return 1;
				1613
				1614	while ((p = strsep(&options, ",")) != NULL) {
				1615	if (!*p)
				1616	continue;
				1617	/*
				1618	* Initialize args struct so we know whether arg was
				1619	* found; some options take optional arguments.
				1620	*/
				1621	args[0].to = args[0].from = 0;
				1622	token = match_token(p, tokens, args);
				1623	if (handle_mount_opt(sb, p, token, args, journal_devnum,
				1624	journal_ioprio, is_remount) < 0)
				1625	return 0;
				1626	}
				1627	#ifdef CONFIG_QUOTA
				1628	if (sbi->s_qf_names[USRQUOTA] \|\| sbi->s_qf_names[GRPQUOTA]) {
				1629	if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
				1630	clear_opt(sb, USRQUOTA);
				1631
				1632	if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
				1633	clear_opt(sb, GRPQUOTA);
				1634
				1635	if (test_opt(sb, GRPQUOTA) \|\| test_opt(sb, USRQUOTA)) {
				1636	ext4_msg(sb, KERN_ERR, "old and new quota "
				1637	"format mixing");
				1638	return 0;
				1639	}
				1640
				1641	if (!sbi->s_jquota_fmt) {
				1642	ext4_msg(sb, KERN_ERR, "journaled quota format "
				1643	"not specified");
				1644	return 0;
				1645	}
				1646	}
				1647	#endif
				1648	if (test_opt(sb, DIOREAD_NOLOCK)) {
				1649	int blocksize =
				1650	BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
				1651
				1652	if (blocksize < PAGE_CACHE_SIZE) {
				1653	ext4_msg(sb, KERN_ERR, "can't mount with "
				1654	"dioread_nolock if block size != PAGE_SIZE");
				1655	return 0;
				1656	}
				1657	}
				1658	return 1;
				1659	}
				1660
				1661	static inline void ext4_show_quota_options(struct seq_file *seq,
				1662	struct super_block *sb)
				1663	{
				1664	#if defined(CONFIG_QUOTA)
				1665	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1666
				1667	if (sbi->s_jquota_fmt) {
				1668	char *fmtname = "";
				1669
				1670	switch (sbi->s_jquota_fmt) {
				1671	case QFMT_VFS_OLD:
				1672	fmtname = "vfsold";
				1673	break;
				1674	case QFMT_VFS_V0:
				1675	fmtname = "vfsv0";
				1676	break;
				1677	case QFMT_VFS_V1:
				1678	fmtname = "vfsv1";
				1679	break;
				1680	}
				1681	seq_printf(seq, ",jqfmt=%s", fmtname);
				1682	}
				1683
				1684	if (sbi->s_qf_names[USRQUOTA])
				1685	seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
				1686
				1687	if (sbi->s_qf_names[GRPQUOTA])
				1688	seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
				1689
				1690	if (test_opt(sb, USRQUOTA))
				1691	seq_puts(seq, ",usrquota");
				1692
				1693	if (test_opt(sb, GRPQUOTA))
				1694	seq_puts(seq, ",grpquota");
				1695	#endif
				1696	}
				1697
				1698	static const char *token2str(int token)
				1699	{
				1700	const struct match_token *t;
				1701
				1702	for (t = tokens; t->token != Opt_err; t++)
				1703	if (t->token == token && !strchr(t->pattern, '='))
				1704	break;
				1705	return t->pattern;
				1706	}
				1707
				1708	/*
				1709	* Show an option if
				1710	* - it's set to a non-default value OR
				1711	* - if the per-sb default is different from the global default
				1712	*/
				1713	static int _ext4_show_options(struct seq_file seq, struct super_block sb,
				1714	int nodefs)
				1715	{
				1716	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1717	struct ext4_super_block *es = sbi->s_es;
				1718	int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
				1719	const struct mount_opts *m;
				1720	char sep = nodefs ? '\n' : ',';
				1721
				1722	#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
				1723	#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
				1724
				1725	if (sbi->s_sb_block != 1)
				1726	SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
				1727
				1728	for (m = ext4_mount_opts; m->token != Opt_err; m++) {
				1729	int want_set = m->flags & MOPT_SET;
				1730	if (((m->flags & (MOPT_SET\|MOPT_CLEAR)) == 0) \|\|
				1731	(m->flags & MOPT_CLEAR_ERR))
				1732	continue;
				1733	if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
				1734	continue; /* skip if same as the default */
				1735	if ((want_set &&
				1736	(sbi->s_mount_opt & m->mount_opt) != m->mount_opt) \|\|
				1737	(!want_set && (sbi->s_mount_opt & m->mount_opt)))
				1738	continue; /* select Opt_noFoo vs Opt_Foo */
				1739	SEQ_OPTS_PRINT("%s", token2str(m->token));
				1740	}
				1741
				1742	if (nodefs \|\| sbi->s_resuid != EXT4_DEF_RESUID \|\|
				1743	le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
				1744	SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid);
				1745	if (nodefs \|\| sbi->s_resgid != EXT4_DEF_RESGID \|\|
				1746	le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
				1747	SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid);
				1748	def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
				1749	if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
				1750	SEQ_OPTS_PUTS("errors=remount-ro");
				1751	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
				1752	SEQ_OPTS_PUTS("errors=continue");
				1753	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
				1754	SEQ_OPTS_PUTS("errors=panic");
				1755	if (nodefs \|\| sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
				1756	SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
				1757	if (nodefs \|\| sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
				1758	SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
				1759	if (nodefs \|\| sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
				1760	SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
				1761	if (sb->s_flags & MS_I_VERSION)
				1762	SEQ_OPTS_PUTS("i_version");
				1763	if (nodefs \|\| sbi->s_stripe)
				1764	SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
				1765	if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
				1766	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
				1767	SEQ_OPTS_PUTS("data=journal");
				1768	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
				1769	SEQ_OPTS_PUTS("data=ordered");
				1770	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
				1771	SEQ_OPTS_PUTS("data=writeback");
				1772	}
				1773	if (nodefs \|\|
				1774	sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
				1775	SEQ_OPTS_PRINT("inode_readahead_blks=%u",
				1776	sbi->s_inode_readahead_blks);
				1777
				1778	if (nodefs \|\| (test_opt(sb, INIT_INODE_TABLE) &&
				1779	(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
				1780	SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
				1781
				1782	ext4_show_quota_options(seq, sb);
				1783	return 0;
				1784	}
				1785
				1786	static int ext4_show_options(struct seq_file seq, struct dentry root)
				1787	{
				1788	return _ext4_show_options(seq, root->d_sb, 0);
				1789	}
				1790
				1791	static int options_seq_show(struct seq_file seq, void offset)
				1792	{
				1793	struct super_block *sb = seq->private;
				1794	int rc;
				1795
				1796	seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
				1797	rc = _ext4_show_options(seq, sb, 1);
				1798	seq_puts(seq, "\n");
				1799	return rc;
				1800	}
				1801
				1802	static int options_open_fs(struct inode inode, struct file file)
				1803	{
				1804	return single_open(file, options_seq_show, PDE(inode)->data);
				1805	}
				1806
				1807	static const struct file_operations ext4_seq_options_fops = {
				1808	.owner = THIS_MODULE,
				1809	.open = options_open_fs,
				1810	.read = seq_read,
				1811	.llseek = seq_lseek,
				1812	.release = single_release,
				1813	};
				1814
				1815	static int ext4_setup_super(struct super_block sb, struct ext4_super_block es,
				1816	int read_only)
				1817	{
				1818	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1819	int res = 0;
				1820
				1821	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
				1822	ext4_msg(sb, KERN_ERR, "revision level too high, "
				1823	"forcing read-only mode");
				1824	res = MS_RDONLY;
				1825	}
				1826	if (read_only)
				1827	goto done;
				1828	if (!(sbi->s_mount_state & EXT4_VALID_FS))
				1829	ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
				1830	"running e2fsck is recommended");
				1831	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
				1832	ext4_msg(sb, KERN_WARNING,
				1833	"warning: mounting fs with errors, "
				1834	"running e2fsck is recommended");
				1835	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
				1836	le16_to_cpu(es->s_mnt_count) >=
				1837	(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
				1838	ext4_msg(sb, KERN_WARNING,
				1839	"warning: maximal mount count reached, "
				1840	"running e2fsck is recommended");
				1841	else if (le32_to_cpu(es->s_checkinterval) &&
				1842	(le32_to_cpu(es->s_lastcheck) +
				1843	le32_to_cpu(es->s_checkinterval) <= get_seconds()))
				1844	ext4_msg(sb, KERN_WARNING,
				1845	"warning: checktime reached, "
				1846	"running e2fsck is recommended");
				1847	if (!sbi->s_journal)
				1848	es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
				1849	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
				1850	es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
				1851	le16_add_cpu(&es->s_mnt_count, 1);
				1852	es->s_mtime = cpu_to_le32(get_seconds());
				1853	ext4_update_dynamic_rev(sb);
				1854	if (sbi->s_journal)
				1855	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
				1856
				1857	ext4_commit_super(sb, 1);
				1858	done:
				1859	if (test_opt(sb, DEBUG))
				1860	printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
				1861	"bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
				1862	sb->s_blocksize,
				1863	sbi->s_groups_count,
				1864	EXT4_BLOCKS_PER_GROUP(sb),
				1865	EXT4_INODES_PER_GROUP(sb),
				1866	sbi->s_mount_opt, sbi->s_mount_opt2);
				1867
				1868	cleancache_init_fs(sb);
				1869	return res;
				1870	}
				1871
				1872	static int ext4_fill_flex_info(struct super_block *sb)
				1873	{
				1874	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1875	struct ext4_group_desc *gdp = NULL;
				1876	ext4_group_t flex_group_count;
				1877	ext4_group_t flex_group;
				1878	unsigned int groups_per_flex = 0;
				1879	size_t size;
				1880	int i;
				1881
				1882	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
				1883	if (sbi->s_log_groups_per_flex < 1 \|\| sbi->s_log_groups_per_flex > 31) {
				1884	sbi->s_log_groups_per_flex = 0;
				1885	return 1;
				1886	}
				1887	groups_per_flex = 1 << sbi->s_log_groups_per_flex;
				1888
				1889	/* We allocate both existing and potentially added groups */
				1890	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
				1891	((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
				1892	EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
				1893	size = flex_group_count * sizeof(struct flex_groups);
				1894	sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
				1895	if (sbi->s_flex_groups == NULL) {
				1896	ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
				1897	flex_group_count);
				1898	goto failed;
				1899	}
				1900
				1901	for (i = 0; i < sbi->s_groups_count; i++) {
				1902	gdp = ext4_get_group_desc(sb, i, NULL);
				1903
				1904	flex_group = ext4_flex_group(sbi, i);
				1905	atomic_add(ext4_free_inodes_count(sb, gdp),
				1906	&sbi->s_flex_groups[flex_group].free_inodes);
				1907	atomic64_add(ext4_free_group_clusters(sb, gdp),
				1908	&sbi->s_flex_groups[flex_group].free_clusters);
				1909	atomic_add(ext4_used_dirs_count(sb, gdp),
				1910	&sbi->s_flex_groups[flex_group].used_dirs);
				1911	}
				1912
				1913	return 1;
				1914	failed:
				1915	return 0;
				1916	}
				1917
				1918	__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
				1919	struct ext4_group_desc *gdp)
				1920	{
				1921	__u16 crc = 0;
				1922
				1923	if (sbi->s_es->s_feature_ro_compat &
				1924	cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
				1925	int offset = offsetof(struct ext4_group_desc, bg_checksum);
				1926	__le32 le_group = cpu_to_le32(block_group);
				1927
				1928	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
				1929	crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
				1930	crc = crc16(crc, (__u8 *)gdp, offset);
				1931	offset += sizeof(gdp->bg_checksum); /* skip checksum */
				1932	/* for checksum of struct ext4_group_desc do the rest...*/
				1933	if ((sbi->s_es->s_feature_incompat &
				1934	cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
				1935	offset < le16_to_cpu(sbi->s_es->s_desc_size))
				1936	crc = crc16(crc, (__u8 *)gdp + offset,
				1937	le16_to_cpu(sbi->s_es->s_desc_size) -
				1938	offset);
				1939	}
				1940
				1941	return cpu_to_le16(crc);
				1942	}
				1943
				1944	int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
				1945	struct ext4_group_desc *gdp)
				1946	{
				1947	if ((sbi->s_es->s_feature_ro_compat &
				1948	cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
				1949	(gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
				1950	return 0;
				1951
				1952	return 1;
				1953	}
				1954
				1955	/* Called at mount-time, super-block is locked */
				1956	static int ext4_check_descriptors(struct super_block *sb,
				1957	ext4_group_t *first_not_zeroed)
				1958	{
				1959	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1960	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
				1961	ext4_fsblk_t last_block;
				1962	ext4_fsblk_t block_bitmap;
				1963	ext4_fsblk_t inode_bitmap;
				1964	ext4_fsblk_t inode_table;
				1965	int flexbg_flag = 0;
				1966	ext4_group_t i, grp = sbi->s_groups_count;
				1967
				1968	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
				1969	flexbg_flag = 1;
				1970
				1971	ext4_debug("Checking group descriptors");
				1972
				1973	for (i = 0; i < sbi->s_groups_count; i++) {
				1974	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
				1975
				1976	if (i == sbi->s_groups_count - 1 \|\| flexbg_flag)
				1977	last_block = ext4_blocks_count(sbi->s_es) - 1;
				1978	else
				1979	last_block = first_block +
				1980	(EXT4_BLOCKS_PER_GROUP(sb) - 1);
				1981
				1982	if ((grp == sbi->s_groups_count) &&
				1983	!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				1984	grp = i;
				1985
				1986	block_bitmap = ext4_block_bitmap(sb, gdp);
				1987	if (block_bitmap < first_block \|\| block_bitmap > last_block) {
				1988	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				1989	"Block bitmap for group %u not in group "
				1990	"(block %llu)!", i, block_bitmap);
				1991	return 0;
				1992	}
				1993	inode_bitmap = ext4_inode_bitmap(sb, gdp);
				1994	if (inode_bitmap < first_block \|\| inode_bitmap > last_block) {
				1995	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				1996	"Inode bitmap for group %u not in group "
				1997	"(block %llu)!", i, inode_bitmap);
				1998	return 0;
				1999	}
				2000	inode_table = ext4_inode_table(sb, gdp);
				2001	if (inode_table < first_block \|\|
				2002	inode_table + sbi->s_itb_per_group - 1 > last_block) {
				2003	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2004	"Inode table for group %u not in group "
				2005	"(block %llu)!", i, inode_table);
				2006	return 0;
				2007	}
				2008	ext4_lock_group(sb, i);
				2009	if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
				2010	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2011	"Checksum for group %u failed (%u!=%u)",
				2012	i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
				2013	gdp)), le16_to_cpu(gdp->bg_checksum));
				2014	if (!(sb->s_flags & MS_RDONLY)) {
				2015	ext4_unlock_group(sb, i);
				2016	return 0;
				2017	}
				2018	}
				2019	ext4_unlock_group(sb, i);
				2020	if (!flexbg_flag)
				2021	first_block += EXT4_BLOCKS_PER_GROUP(sb);
				2022	}
				2023	if (NULL != first_not_zeroed)
				2024	*first_not_zeroed = grp;
				2025
				2026	ext4_free_blocks_count_set(sbi->s_es,
				2027	EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
				2028	sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
				2029	return 1;
				2030	}
				2031
				2032	/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
				2033	* the superblock) which were deleted from all directories, but held open by
				2034	* a process at the time of a crash. We walk the list and try to delete these
				2035	* inodes at recovery time (only with a read-write filesystem).
				2036	*
				2037	* In order to keep the orphan inode chain consistent during traversal (in
				2038	* case of crash during recovery), we link each inode into the superblock
				2039	* orphan list_head and handle it the same way as an inode deletion during
				2040	* normal operation (which journals the operations for us).
				2041	*
				2042	* We only do an iget() and an iput() on each inode, which is very safe if we
				2043	* accidentally point at an in-use or already deleted inode. The worst that
				2044	* can happen in this case is that we get a "bit already cleared" message from
				2045	* ext4_free_inode(). The only reason we would point at a wrong inode is if
				2046	* e2fsck was run on this filesystem, and it must have already done the orphan
				2047	* inode cleanup for us, so we can safely abort without any further action.
				2048	*/
				2049	static void ext4_orphan_cleanup(struct super_block *sb,
				2050	struct ext4_super_block *es)
				2051	{
				2052	unsigned int s_flags = sb->s_flags;
				2053	int nr_orphans = 0, nr_truncates = 0;
				2054	#ifdef CONFIG_QUOTA
				2055	int i;
				2056	#endif
				2057	if (!es->s_last_orphan) {
				2058	jbd_debug(4, "no orphan inodes to clean up\n");
				2059	return;
				2060	}
				2061
				2062	if (bdev_read_only(sb->s_bdev)) {
				2063	ext4_msg(sb, KERN_ERR, "write access "
				2064	"unavailable, skipping orphan cleanup");
				2065	return;
				2066	}
				2067
				2068	/* Check if feature set would not allow a r/w mount */
				2069	if (!ext4_feature_set_ok(sb, 0)) {
				2070	ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
				2071	"unknown ROCOMPAT features");
				2072	return;
				2073	}
				2074
				2075	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
				2076	if (es->s_last_orphan)
				2077	jbd_debug(1, "Errors on filesystem, "
				2078	"clearing orphan list.\n");
				2079	es->s_last_orphan = 0;
				2080	jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
				2081	return;
				2082	}
				2083
				2084	if (s_flags & MS_RDONLY) {
				2085	ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
				2086	sb->s_flags &= ~MS_RDONLY;
				2087	}
				2088	#ifdef CONFIG_QUOTA
				2089	/* Needed for iput() to work correctly and not trash data */
				2090	sb->s_flags \|= MS_ACTIVE;
				2091	/* Turn on quotas so that they are updated correctly */
				2092	for (i = 0; i < MAXQUOTAS; i++) {
				2093	if (EXT4_SB(sb)->s_qf_names[i]) {
				2094	int ret = ext4_quota_on_mount(sb, i);
				2095	if (ret < 0)
				2096	ext4_msg(sb, KERN_ERR,
				2097	"Cannot turn on journaled "
				2098	"quota: error %d", ret);
				2099	}
				2100	}
				2101	#endif
				2102
				2103	while (es->s_last_orphan) {
				2104	struct inode *inode;
				2105
				2106	inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
				2107	if (IS_ERR(inode)) {
				2108	es->s_last_orphan = 0;
				2109	break;
				2110	}
				2111
				2112	list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
				2113	dquot_initialize(inode);
				2114	if (inode->i_nlink) {
				2115	ext4_msg(sb, KERN_DEBUG,
				2116	"%s: truncating inode %lu to %lld bytes",
				2117	__func__, inode->i_ino, inode->i_size);
				2118	jbd_debug(2, "truncating inode %lu to %lld bytes\n",
				2119	inode->i_ino, inode->i_size);
				2120	mutex_lock(&inode->i_mutex);
				2121	ext4_truncate(inode);
				2122	mutex_unlock(&inode->i_mutex);
				2123	nr_truncates++;
				2124	} else {
				2125	ext4_msg(sb, KERN_DEBUG,
				2126	"%s: deleting unreferenced inode %lu",
				2127	__func__, inode->i_ino);
				2128	jbd_debug(2, "deleting unreferenced inode %lu\n",
				2129	inode->i_ino);
				2130	nr_orphans++;
				2131	}
				2132	iput(inode); /* The delete magic happens here! */
				2133	}
				2134
				2135	#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
				2136
				2137	if (nr_orphans)
				2138	ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
				2139	PLURAL(nr_orphans));
				2140	if (nr_truncates)
				2141	ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
				2142	PLURAL(nr_truncates));
				2143	#ifdef CONFIG_QUOTA
				2144	/* Turn quotas off */
				2145	for (i = 0; i < MAXQUOTAS; i++) {
				2146	if (sb_dqopt(sb)->files[i])
				2147	dquot_quota_off(sb, i);
				2148	}
				2149	#endif
				2150	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
				2151	}
				2152
				2153	/*
				2154	* Maximal extent format file size.
				2155	* Resulting logical blkno at s_maxbytes must fit in our on-disk
				2156	* extent format containers, within a sector_t, and within i_blocks
				2157	* in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
				2158	* so that won't be a limiting factor.
				2159	*
				2160	* However there is other limiting factor. We do store extents in the form
				2161	* of starting block and length, hence the resulting length of the extent
				2162	* covering maximum file size must fit into on-disk format containers as
				2163	* well. Given that length is always by 1 unit bigger than max unit (because
				2164	* we count 0 as well) we have to lower the s_maxbytes by one fs block.
				2165	*
				2166	* Note, this does not consider any metadata overhead for vfs i_blocks.
				2167	*/
				2168	static loff_t ext4_max_size(int blkbits, int has_huge_files)
				2169	{
				2170	loff_t res;
				2171	loff_t upper_limit = MAX_LFS_FILESIZE;
				2172
				2173	/* small i_blocks in vfs inode? */
				2174	if (!has_huge_files \|\| sizeof(blkcnt_t) < sizeof(u64)) {
				2175	/*
				2176	* CONFIG_LBDAF is not enabled implies the inode
				2177	* i_block represent total blocks in 512 bytes
				2178	* 32 == size of vfs inode i_blocks * 8
				2179	*/
				2180	upper_limit = (1LL << 32) - 1;
				2181
				2182	/* total blocks in file system block size */
				2183	upper_limit >>= (blkbits - 9);
				2184	upper_limit <<= blkbits;
				2185	}
				2186
				2187	/*
				2188	* 32-bit extent-start container, ee_block. We lower the maxbytes
				2189	* by one fs block, so ee_len can cover the extent of maximum file
				2190	* size
				2191	*/
				2192	res = (1LL << 32) - 1;
				2193	res <<= blkbits;
				2194
				2195	/* Sanity check against vm- & vfs- imposed limits */
				2196	if (res > upper_limit)
				2197	res = upper_limit;
				2198
				2199	return res;
				2200	}
				2201
				2202	/*
				2203	* Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
				2204	* block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
				2205	* We need to be 1 filesystem block less than the 2^48 sector limit.
				2206	*/
				2207	static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
				2208	{
				2209	loff_t res = EXT4_NDIR_BLOCKS;
				2210	int meta_blocks;
				2211	loff_t upper_limit;
				2212	/* This is calculated to be the largest file size for a dense, block
				2213	* mapped file such that the file's total number of 512-byte sectors,
				2214	* including data and all indirect blocks, does not exceed (2^48 - 1).
				2215	*
				2216	* __u32 i_blocks_lo and _u16 i_blocks_high represent the total
				2217	* number of 512-byte sectors of the file.
				2218	*/
				2219
				2220	if (!has_huge_files \|\| sizeof(blkcnt_t) < sizeof(u64)) {
				2221	/*
				2222	* !has_huge_files or CONFIG_LBDAF not enabled implies that
				2223	* the inode i_block field represents total file blocks in
				2224	* 2^32 512-byte sectors == size of vfs inode i_blocks * 8
				2225	*/
				2226	upper_limit = (1LL << 32) - 1;
				2227
				2228	/* total blocks in file system block size */
				2229	upper_limit >>= (bits - 9);
				2230
				2231	} else {
				2232	/*
				2233	* We use 48 bit ext4_inode i_blocks
				2234	* With EXT4_HUGE_FILE_FL set the i_blocks
				2235	* represent total number of blocks in
				2236	* file system block size
				2237	*/
				2238	upper_limit = (1LL << 48) - 1;
				2239
				2240	}
				2241
				2242	/* indirect blocks */
				2243	meta_blocks = 1;
				2244	/* double indirect blocks */
				2245	meta_blocks += 1 + (1LL << (bits-2));
				2246	/* tripple indirect blocks */
				2247	meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
				2248
				2249	upper_limit -= meta_blocks;
				2250	upper_limit <<= bits;
				2251
				2252	res += 1LL << (bits-2);
				2253	res += 1LL << (2*(bits-2));
				2254	res += 1LL << (3*(bits-2));
				2255	res <<= bits;
				2256	if (res > upper_limit)
				2257	res = upper_limit;
				2258
				2259	if (res > MAX_LFS_FILESIZE)
				2260	res = MAX_LFS_FILESIZE;
				2261
				2262	return res;
				2263	}
				2264
				2265	static ext4_fsblk_t descriptor_loc(struct super_block *sb,
				2266	ext4_fsblk_t logical_sb_block, int nr)
				2267	{
				2268	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2269	ext4_group_t bg, first_meta_bg;
				2270	int has_super = 0;
				2271
				2272	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
				2273
				2274	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) \|\|
				2275	nr < first_meta_bg)
				2276	return logical_sb_block + nr + 1;
				2277	bg = sbi->s_desc_per_block * nr;
				2278	if (ext4_bg_has_super(sb, bg))
				2279	has_super = 1;
				2280
				2281	return (has_super + ext4_group_first_block_no(sb, bg));
				2282	}
				2283
				2284	/**
				2285	* ext4_get_stripe_size: Get the stripe size.
				2286	* @sbi: In memory super block info
				2287	*
				2288	* If we have specified it via mount option, then
				2289	* use the mount option value. If the value specified at mount time is
				2290	* greater than the blocks per group use the super block value.
				2291	* If the super block value is greater than blocks per group return 0.
				2292	* Allocator needs it be less than blocks per group.
				2293	*
				2294	*/
				2295	static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
				2296	{
				2297	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
				2298	unsigned long stripe_width =
				2299	le32_to_cpu(sbi->s_es->s_raid_stripe_width);
				2300	int ret;
				2301
				2302	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
				2303	ret = sbi->s_stripe;
				2304	else if (stripe_width <= sbi->s_blocks_per_group)
				2305	ret = stripe_width;
				2306	else if (stride <= sbi->s_blocks_per_group)
				2307	ret = stride;
				2308	else
				2309	ret = 0;
				2310
				2311	/*
				2312	* If the stripe width is 1, this makes no sense and
				2313	* we set it to 0 to turn off stripe handling code.
				2314	*/
				2315	if (ret <= 1)
				2316	ret = 0;
				2317
				2318	return ret;
				2319	}
				2320
				2321	/* sysfs supprt */
				2322
				2323	struct ext4_attr {
				2324	struct attribute attr;
				2325	ssize_t (show)(struct ext4_attr , struct ext4_sb_info , char );
				2326	ssize_t (store)(struct ext4_attr , struct ext4_sb_info *,
				2327	const char *, size_t);
				2328	int offset;
				2329	};
				2330
				2331	static int parse_strtoul(const char *buf,
				2332	unsigned long max, unsigned long *value)
				2333	{
				2334	char *endp;
				2335
				2336	*value = simple_strtoul(skip_spaces(buf), &endp, 0);
				2337	endp = skip_spaces(endp);
				2338	if (endp \|\| value > max)
				2339	return -EINVAL;
				2340
				2341	return 0;
				2342	}
				2343
				2344	static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
				2345	struct ext4_sb_info *sbi,
				2346	char *buf)
				2347	{
				2348	return snprintf(buf, PAGE_SIZE, "%llu\n",
				2349	(s64) EXT4_C2B(sbi,
				2350	percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
				2351	}
				2352
				2353	static ssize_t session_write_kbytes_show(struct ext4_attr *a,
				2354	struct ext4_sb_info sbi, char buf)
				2355	{
				2356	struct super_block *sb = sbi->s_buddy_cache->i_sb;
				2357
				2358	if (!sb->s_bdev->bd_part)
				2359	return snprintf(buf, PAGE_SIZE, "0\n");
				2360	return snprintf(buf, PAGE_SIZE, "%lu\n",
				2361	(part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
				2362	sbi->s_sectors_written_start) >> 1);
				2363	}
				2364
				2365	static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
				2366	struct ext4_sb_info sbi, char buf)
				2367	{
				2368	struct super_block *sb = sbi->s_buddy_cache->i_sb;
				2369
				2370	if (!sb->s_bdev->bd_part)
				2371	return snprintf(buf, PAGE_SIZE, "0\n");
				2372	return snprintf(buf, PAGE_SIZE, "%llu\n",
				2373	(unsigned long long)(sbi->s_kbytes_written +
				2374	((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
				2375	EXT4_SB(sb)->s_sectors_written_start) >> 1)));
				2376	}
				2377
				2378	static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
				2379	struct ext4_sb_info *sbi,
				2380	const char *buf, size_t count)
				2381	{
				2382	unsigned long t;
				2383
				2384	if (parse_strtoul(buf, 0x40000000, &t))
				2385	return -EINVAL;
				2386
				2387	if (t && !is_power_of_2(t))
				2388	return -EINVAL;
				2389
				2390	sbi->s_inode_readahead_blks = t;
				2391	return count;
				2392	}
				2393
				2394	static ssize_t sbi_ui_show(struct ext4_attr *a,
				2395	struct ext4_sb_info sbi, char buf)
				2396	{
				2397	unsigned int ui = (unsigned int ) (((char *) sbi) + a->offset);
				2398
				2399	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
				2400	}
				2401
				2402	static ssize_t sbi_ui_store(struct ext4_attr *a,
				2403	struct ext4_sb_info *sbi,
				2404	const char *buf, size_t count)
				2405	{
				2406	unsigned int ui = (unsigned int ) (((char *) sbi) + a->offset);
				2407	unsigned long t;
				2408
				2409	if (parse_strtoul(buf, 0xffffffff, &t))
				2410	return -EINVAL;
				2411	*ui = t;
				2412	return count;
				2413	}
				2414
				2415	#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
				2416	static struct ext4_attr ext4_attr_##_name = { \
				2417	.attr = {.name = __stringify(_name), .mode = _mode }, \
				2418	.show = _show, \
				2419	.store = _store, \
				2420	.offset = offsetof(struct ext4_sb_info, _elname), \
				2421	}
				2422	#define EXT4_ATTR(name, mode, show, store) \
				2423	static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
				2424
				2425	#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
				2426	#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
				2427	#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
				2428	#define EXT4_RW_ATTR_SBI_UI(name, elname) \
				2429	EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
				2430	#define ATTR_LIST(name) &ext4_attr_##name.attr
				2431
				2432	EXT4_RO_ATTR(delayed_allocation_blocks);
				2433	EXT4_RO_ATTR(session_write_kbytes);
				2434	EXT4_RO_ATTR(lifetime_write_kbytes);
				2435	EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
				2436	inode_readahead_blks_store, s_inode_readahead_blks);
				2437	EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
				2438	EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
				2439	EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
				2440	EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
				2441	EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
				2442	EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
				2443	EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
				2444	EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
				2445
				2446	static struct attribute *ext4_attrs[] = {
				2447	ATTR_LIST(delayed_allocation_blocks),
				2448	ATTR_LIST(session_write_kbytes),
				2449	ATTR_LIST(lifetime_write_kbytes),
				2450	ATTR_LIST(inode_readahead_blks),
				2451	ATTR_LIST(inode_goal),
				2452	ATTR_LIST(mb_stats),
				2453	ATTR_LIST(mb_max_to_scan),
				2454	ATTR_LIST(mb_min_to_scan),
				2455	ATTR_LIST(mb_order2_req),
				2456	ATTR_LIST(mb_stream_req),
				2457	ATTR_LIST(mb_group_prealloc),
				2458	ATTR_LIST(max_writeback_mb_bump),
				2459	NULL,
				2460	};
				2461
				2462	/* Features this copy of ext4 supports */
				2463	EXT4_INFO_ATTR(lazy_itable_init);
				2464	EXT4_INFO_ATTR(batched_discard);
				2465
				2466	static struct attribute *ext4_feat_attrs[] = {
				2467	ATTR_LIST(lazy_itable_init),
				2468	ATTR_LIST(batched_discard),
				2469	NULL,
				2470	};
				2471
				2472	static ssize_t ext4_attr_show(struct kobject *kobj,
				2473	struct attribute attr, char buf)
				2474	{
				2475	struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
				2476	s_kobj);
				2477	struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
				2478
				2479	return a->show ? a->show(a, sbi, buf) : 0;
				2480	}
				2481
				2482	static ssize_t ext4_attr_store(struct kobject *kobj,
				2483	struct attribute *attr,
				2484	const char *buf, size_t len)
				2485	{
				2486	struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
				2487	s_kobj);
				2488	struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
				2489
				2490	return a->store ? a->store(a, sbi, buf, len) : 0;
				2491	}
				2492
				2493	static void ext4_sb_release(struct kobject *kobj)
				2494	{
				2495	struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
				2496	s_kobj);
				2497	complete(&sbi->s_kobj_unregister);
				2498	}
				2499
				2500	static const struct sysfs_ops ext4_attr_ops = {
				2501	.show = ext4_attr_show,
				2502	.store = ext4_attr_store,
				2503	};
				2504
				2505	static struct kobj_type ext4_ktype = {
				2506	.default_attrs = ext4_attrs,
				2507	.sysfs_ops = &ext4_attr_ops,
				2508	.release = ext4_sb_release,
				2509	};
				2510
				2511	static void ext4_feat_release(struct kobject *kobj)
				2512	{
				2513	complete(&ext4_feat->f_kobj_unregister);
				2514	}
				2515
				2516	static struct kobj_type ext4_feat_ktype = {
				2517	.default_attrs = ext4_feat_attrs,
				2518	.sysfs_ops = &ext4_attr_ops,
				2519	.release = ext4_feat_release,
				2520	};
				2521
				2522	/*
				2523	* Check whether this filesystem can be mounted based on
				2524	* the features present and the RDONLY/RDWR mount requested.
				2525	* Returns 1 if this filesystem can be mounted as requested,
				2526	* 0 if it cannot be.
				2527	*/
				2528	static int ext4_feature_set_ok(struct super_block *sb, int readonly)
				2529	{
				2530	if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
				2531	ext4_msg(sb, KERN_ERR,
				2532	"Couldn't mount because of "
				2533	"unsupported optional features (%x)",
				2534	(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
				2535	~EXT4_FEATURE_INCOMPAT_SUPP));
				2536	return 0;
				2537	}
				2538
				2539	if (readonly)
				2540	return 1;
				2541
				2542	/* Check that feature set is OK for a read-write mount */
				2543	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
				2544	ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
				2545	"unsupported optional features (%x)",
				2546	(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
				2547	~EXT4_FEATURE_RO_COMPAT_SUPP));
				2548	return 0;
				2549	}
				2550	/*
				2551	* Large file size enabled file system can only be mounted
				2552	* read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
				2553	*/
				2554	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
				2555	if (sizeof(blkcnt_t) < sizeof(u64)) {
				2556	ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
				2557	"cannot be mounted RDWR without "
				2558	"CONFIG_LBDAF");
				2559	return 0;
				2560	}
				2561	}
				2562	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
				2563	!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
				2564	ext4_msg(sb, KERN_ERR,
				2565	"Can't support bigalloc feature without "
				2566	"extents feature\n");
				2567	return 0;
				2568	}
				2569	return 1;
				2570	}
				2571
				2572	/*
				2573	* This function is called once a day if we have errors logged
				2574	* on the file system
				2575	*/
				2576	static void print_daily_error_info(unsigned long arg)
				2577	{
				2578	struct super_block sb = (struct super_block ) arg;
				2579	struct ext4_sb_info *sbi;
				2580	struct ext4_super_block *es;
				2581
				2582	sbi = EXT4_SB(sb);
				2583	es = sbi->s_es;
				2584
				2585	if (es->s_error_count)
				2586	/* fsck newer than v1.41.13 is needed to clean this condition. */
				2587	ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
				2588	le32_to_cpu(es->s_error_count));
				2589	if (es->s_first_error_time) {
				2590	printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
				2591	sb->s_id, le32_to_cpu(es->s_first_error_time),
				2592	(int) sizeof(es->s_first_error_func),
				2593	es->s_first_error_func,
				2594	le32_to_cpu(es->s_first_error_line));
				2595	if (es->s_first_error_ino)
				2596	printk(": inode %u",
				2597	le32_to_cpu(es->s_first_error_ino));
				2598	if (es->s_first_error_block)
				2599	printk(": block %llu", (unsigned long long)
				2600	le64_to_cpu(es->s_first_error_block));
				2601	printk("\n");
				2602	}
				2603	if (es->s_last_error_time) {
				2604	printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
				2605	sb->s_id, le32_to_cpu(es->s_last_error_time),
				2606	(int) sizeof(es->s_last_error_func),
				2607	es->s_last_error_func,
				2608	le32_to_cpu(es->s_last_error_line));
				2609	if (es->s_last_error_ino)
				2610	printk(": inode %u",
				2611	le32_to_cpu(es->s_last_error_ino));
				2612	if (es->s_last_error_block)
				2613	printk(": block %llu", (unsigned long long)
				2614	le64_to_cpu(es->s_last_error_block));
				2615	printk("\n");
				2616	}
				2617	mod_timer(&sbi->s_err_report, jiffies + 246060HZ); / Once a day */
				2618	}
				2619
				2620	/* Find next suitable group and run ext4_init_inode_table */
				2621	static int ext4_run_li_request(struct ext4_li_request *elr)
				2622	{
				2623	struct ext4_group_desc *gdp = NULL;
				2624	ext4_group_t group, ngroups;
				2625	struct super_block *sb;
				2626	unsigned long timeout = 0;
				2627	int ret = 0;
				2628
				2629	sb = elr->lr_super;
				2630	ngroups = EXT4_SB(sb)->s_groups_count;
				2631
				2632	for (group = elr->lr_next_group; group < ngroups; group++) {
				2633	gdp = ext4_get_group_desc(sb, group, NULL);
				2634	if (!gdp) {
				2635	ret = 1;
				2636	break;
				2637	}
				2638
				2639	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				2640	break;
				2641	}
				2642
				2643	if (group == ngroups)
				2644	ret = 1;
				2645
				2646	if (!ret) {
				2647	timeout = jiffies;
				2648	ret = ext4_init_inode_table(sb, group,
				2649	elr->lr_timeout ? 0 : 1);
				2650	if (elr->lr_timeout == 0) {
				2651	timeout = (jiffies - timeout) *
				2652	elr->lr_sbi->s_li_wait_mult;
				2653	elr->lr_timeout = timeout;
				2654	}
				2655	elr->lr_next_sched = jiffies + elr->lr_timeout;
				2656	elr->lr_next_group = group + 1;
				2657	}
				2658
				2659	return ret;
				2660	}
				2661
				2662	/*
				2663	* Remove lr_request from the list_request and free the
				2664	* request structure. Should be called with li_list_mtx held
				2665	*/
				2666	static void ext4_remove_li_request(struct ext4_li_request *elr)
				2667	{
				2668	struct ext4_sb_info *sbi;
				2669
				2670	if (!elr)
				2671	return;
				2672
				2673	sbi = elr->lr_sbi;
				2674
				2675	list_del(&elr->lr_request);
				2676	sbi->s_li_request = NULL;
				2677	kfree(elr);
				2678	}
				2679
				2680	static void ext4_unregister_li_request(struct super_block *sb)
				2681	{
				2682	mutex_lock(&ext4_li_mtx);
				2683	if (!ext4_li_info) {
				2684	mutex_unlock(&ext4_li_mtx);
				2685	return;
				2686	}
				2687
				2688	mutex_lock(&ext4_li_info->li_list_mtx);
				2689	ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
				2690	mutex_unlock(&ext4_li_info->li_list_mtx);
				2691	mutex_unlock(&ext4_li_mtx);
				2692	}
				2693
				2694	static struct task_struct *ext4_lazyinit_task;
				2695
				2696	/*
				2697	* This is the function where ext4lazyinit thread lives. It walks
				2698	* through the request list searching for next scheduled filesystem.
				2699	* When such a fs is found, run the lazy initialization request
				2700	* (ext4_rn_li_request) and keep track of the time spend in this
				2701	* function. Based on that time we compute next schedule time of
				2702	* the request. When walking through the list is complete, compute
				2703	* next waking time and put itself into sleep.
				2704	*/
				2705	static int ext4_lazyinit_thread(void *arg)
				2706	{
				2707	struct ext4_lazy_init eli = (struct ext4_lazy_init )arg;
				2708	struct list_head pos, n;
				2709	struct ext4_li_request *elr;
				2710	unsigned long next_wakeup, cur;
				2711
				2712	BUG_ON(NULL == eli);
				2713
				2714	cont_thread:
				2715	while (true) {
				2716	next_wakeup = MAX_JIFFY_OFFSET;
				2717
				2718	mutex_lock(&eli->li_list_mtx);
				2719	if (list_empty(&eli->li_request_list)) {
				2720	mutex_unlock(&eli->li_list_mtx);
				2721	goto exit_thread;
				2722	}
				2723
				2724	list_for_each_safe(pos, n, &eli->li_request_list) {
				2725	elr = list_entry(pos, struct ext4_li_request,
				2726	lr_request);
				2727
				2728	if (time_after_eq(jiffies, elr->lr_next_sched)) {
				2729	if (ext4_run_li_request(elr) != 0) {
				2730	/* error, remove the lazy_init job */
				2731	ext4_remove_li_request(elr);
				2732	continue;
				2733	}
				2734	}
				2735
				2736	if (time_before(elr->lr_next_sched, next_wakeup))
				2737	next_wakeup = elr->lr_next_sched;
				2738	}
				2739	mutex_unlock(&eli->li_list_mtx);
				2740
				2741	try_to_freeze();
				2742
				2743	cur = jiffies;
				2744	if ((time_after_eq(cur, next_wakeup)) \|\|
				2745	(MAX_JIFFY_OFFSET == next_wakeup)) {
				2746	cond_resched();
				2747	continue;
				2748	}
				2749
				2750	schedule_timeout_interruptible(next_wakeup - cur);
				2751
				2752	if (kthread_should_stop()) {
				2753	ext4_clear_request_list();
				2754	goto exit_thread;
				2755	}
				2756	}
				2757
				2758	exit_thread:
				2759	/*
				2760	* It looks like the request list is empty, but we need
				2761	* to check it under the li_list_mtx lock, to prevent any
				2762	* additions into it, and of course we should lock ext4_li_mtx
				2763	* to atomically free the list and ext4_li_info, because at
				2764	* this point another ext4 filesystem could be registering
				2765	* new one.
				2766	*/
				2767	mutex_lock(&ext4_li_mtx);
				2768	mutex_lock(&eli->li_list_mtx);
				2769	if (!list_empty(&eli->li_request_list)) {
				2770	mutex_unlock(&eli->li_list_mtx);
				2771	mutex_unlock(&ext4_li_mtx);
				2772	goto cont_thread;
				2773	}
				2774	mutex_unlock(&eli->li_list_mtx);
				2775	kfree(ext4_li_info);
				2776	ext4_li_info = NULL;
				2777	mutex_unlock(&ext4_li_mtx);
				2778
				2779	return 0;
				2780	}
				2781
				2782	static void ext4_clear_request_list(void)
				2783	{
				2784	struct list_head pos, n;
				2785	struct ext4_li_request *elr;
				2786
				2787	mutex_lock(&ext4_li_info->li_list_mtx);
				2788	list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
				2789	elr = list_entry(pos, struct ext4_li_request,
				2790	lr_request);
				2791	ext4_remove_li_request(elr);
				2792	}
				2793	mutex_unlock(&ext4_li_info->li_list_mtx);
				2794	}
				2795
				2796	static int ext4_run_lazyinit_thread(void)
				2797	{
				2798	ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
				2799	ext4_li_info, "ext4lazyinit");
				2800	if (IS_ERR(ext4_lazyinit_task)) {
				2801	int err = PTR_ERR(ext4_lazyinit_task);
				2802	ext4_clear_request_list();
				2803	kfree(ext4_li_info);
				2804	ext4_li_info = NULL;
				2805	printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
				2806	"initialization thread\n",
				2807	err);
				2808	return err;
				2809	}
				2810	ext4_li_info->li_state \|= EXT4_LAZYINIT_RUNNING;
				2811	return 0;
				2812	}
				2813
				2814	/*
				2815	* Check whether it make sense to run itable init. thread or not.
				2816	* If there is at least one uninitialized inode table, return
				2817	* corresponding group number, else the loop goes through all
				2818	* groups and return total number of groups.
				2819	*/
				2820	static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
				2821	{
				2822	ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
				2823	struct ext4_group_desc *gdp = NULL;
				2824
				2825	for (group = 0; group < ngroups; group++) {
				2826	gdp = ext4_get_group_desc(sb, group, NULL);
				2827	if (!gdp)
				2828	continue;
				2829
				2830	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				2831	break;
				2832	}
				2833
				2834	return group;
				2835	}
				2836
				2837	static int ext4_li_info_new(void)
				2838	{
				2839	struct ext4_lazy_init *eli = NULL;
				2840
				2841	eli = kzalloc(sizeof(*eli), GFP_KERNEL);
				2842	if (!eli)
				2843	return -ENOMEM;
				2844
				2845	INIT_LIST_HEAD(&eli->li_request_list);
				2846	mutex_init(&eli->li_list_mtx);
				2847
				2848	eli->li_state \|= EXT4_LAZYINIT_QUIT;
				2849
				2850	ext4_li_info = eli;
				2851
				2852	return 0;
				2853	}
				2854
				2855	static struct ext4_li_request ext4_li_request_new(struct super_block sb,
				2856	ext4_group_t start)
				2857	{
				2858	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2859	struct ext4_li_request *elr;
				2860	unsigned long rnd;
				2861
				2862	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
				2863	if (!elr)
				2864	return NULL;
				2865
				2866	elr->lr_super = sb;
				2867	elr->lr_sbi = sbi;
				2868	elr->lr_next_group = start;
				2869
				2870	/*
				2871	* Randomize first schedule time of the request to
				2872	* spread the inode table initialization requests
				2873	* better.
				2874	*/
				2875	get_random_bytes(&rnd, sizeof(rnd));
				2876	elr->lr_next_sched = jiffies + (unsigned long)rnd %
				2877	(EXT4_DEF_LI_MAX_START_DELAY * HZ);
				2878
				2879	return elr;
				2880	}
				2881
				2882	static int ext4_register_li_request(struct super_block *sb,
				2883	ext4_group_t first_not_zeroed)
				2884	{
				2885	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2886	struct ext4_li_request *elr;
				2887	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
				2888	int ret = 0;
				2889
				2890	if (sbi->s_li_request != NULL) {
				2891	/*
				2892	* Reset timeout so it can be computed again, because
				2893	* s_li_wait_mult might have changed.
				2894	*/
				2895	sbi->s_li_request->lr_timeout = 0;
				2896	return 0;
				2897	}
				2898
				2899	if (first_not_zeroed == ngroups \|\|
				2900	(sb->s_flags & MS_RDONLY) \|\|
				2901	!test_opt(sb, INIT_INODE_TABLE))
				2902	return 0;
				2903
				2904	elr = ext4_li_request_new(sb, first_not_zeroed);
				2905	if (!elr)
				2906	return -ENOMEM;
				2907
				2908	mutex_lock(&ext4_li_mtx);
				2909
				2910	if (NULL == ext4_li_info) {
				2911	ret = ext4_li_info_new();
				2912	if (ret)
				2913	goto out;
				2914	}
				2915
				2916	mutex_lock(&ext4_li_info->li_list_mtx);
				2917	list_add(&elr->lr_request, &ext4_li_info->li_request_list);
				2918	mutex_unlock(&ext4_li_info->li_list_mtx);
				2919
				2920	sbi->s_li_request = elr;
				2921	/*
				2922	* set elr to NULL here since it has been inserted to
				2923	* the request_list and the removal and free of it is
				2924	* handled by ext4_clear_request_list from now on.
				2925	*/
				2926	elr = NULL;
				2927
				2928	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
				2929	ret = ext4_run_lazyinit_thread();
				2930	if (ret)
				2931	goto out;
				2932	}
				2933	out:
				2934	mutex_unlock(&ext4_li_mtx);
				2935	if (ret)
				2936	kfree(elr);
				2937	return ret;
				2938	}
				2939
				2940	/*
				2941	* We do not need to lock anything since this is called on
				2942	* module unload.
				2943	*/
				2944	static void ext4_destroy_lazyinit_thread(void)
				2945	{
				2946	/*
				2947	* If thread exited earlier
				2948	* there's nothing to be done.
				2949	*/
				2950	if (!ext4_li_info \|\| !ext4_lazyinit_task)
				2951	return;
				2952
				2953	kthread_stop(ext4_lazyinit_task);
				2954	}
				2955
				2956	/*
				2957	* Note: calculating the overhead so we can be compatible with
				2958	* historical BSD practice is quite difficult in the face of
				2959	* clusters/bigalloc. This is because multiple metadata blocks from
				2960	* different block group can end up in the same allocation cluster.
				2961	* Calculating the exact overhead in the face of clustered allocation
				2962	* requires either O(all block bitmaps) in memory or O(number of block
				2963	* groups**2) in time. We will still calculate the superblock for
				2964	* older file systems --- and if we come across with a bigalloc file
				2965	* system with zero in s_overhead_clusters the estimate will be close to
				2966	* correct especially for very large cluster sizes --- but for newer
				2967	* file systems, it's better to calculate this figure once at mkfs
				2968	* time, and store it in the superblock. If the superblock value is
				2969	* present (even for non-bigalloc file systems), we will use it.
				2970	*/
				2971	static int count_overhead(struct super_block *sb, ext4_group_t grp,
				2972	char *buf)
				2973	{
				2974	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2975	struct ext4_group_desc *gdp;
				2976	ext4_fsblk_t first_block, last_block, b;
				2977	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
				2978	int s, j, count = 0;
				2979
				2980	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
				2981	return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
				2982	sbi->s_itb_per_group + 2);
				2983
				2984	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
				2985	(grp * EXT4_BLOCKS_PER_GROUP(sb));
				2986	last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
				2987	for (i = 0; i < ngroups; i++) {
				2988	gdp = ext4_get_group_desc(sb, i, NULL);
				2989	b = ext4_block_bitmap(sb, gdp);
				2990	if (b >= first_block && b <= last_block) {
				2991	ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
				2992	count++;
				2993	}
				2994	b = ext4_inode_bitmap(sb, gdp);
				2995	if (b >= first_block && b <= last_block) {
				2996	ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
				2997	count++;
				2998	}
				2999	b = ext4_inode_table(sb, gdp);
				3000	if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
				3001	for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
				3002	int c = EXT4_B2C(sbi, b - first_block);
				3003	ext4_set_bit(c, buf);
				3004	count++;
				3005	}
				3006	if (i != grp)
				3007	continue;
				3008	s = 0;
				3009	if (ext4_bg_has_super(sb, grp)) {
				3010	ext4_set_bit(s++, buf);
				3011	count++;
				3012	}
				3013	for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
				3014	ext4_set_bit(EXT4_B2C(sbi, s++), buf);
				3015	count++;
				3016	}
				3017	}
				3018	if (!count)
				3019	return 0;
				3020	return EXT4_CLUSTERS_PER_GROUP(sb) -
				3021	ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
				3022	}
				3023
				3024	/*
				3025	* Compute the overhead and stash it in sbi->s_overhead
				3026	*/
				3027	int ext4_calculate_overhead(struct super_block *sb)
				3028	{
				3029	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3030	struct ext4_super_block *es = sbi->s_es;
				3031	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
				3032	ext4_fsblk_t overhead = 0;
				3033	char buf = (char ) get_zeroed_page(GFP_KERNEL);
				3034
				3035	memset(buf, 0, PAGE_SIZE);
				3036	if (!buf)
				3037	return -ENOMEM;
				3038
				3039	/*
				3040	* Compute the overhead (FS structures). This is constant
				3041	* for a given filesystem unless the number of block groups
				3042	* changes so we cache the previous value until it does.
				3043	*/
				3044
				3045	/*
				3046	* All of the blocks before first_data_block are overhead
				3047	*/
				3048	overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
				3049
				3050	/*
				3051	* Add the overhead found in each block group
				3052	*/
				3053	for (i = 0; i < ngroups; i++) {
				3054	int blks;
				3055
				3056	blks = count_overhead(sb, i, buf);
				3057	overhead += blks;
				3058	if (blks)
				3059	memset(buf, 0, PAGE_SIZE);
				3060	cond_resched();
				3061	}
				3062	sbi->s_overhead = overhead;
				3063	smp_wmb();
				3064	free_page((unsigned long) buf);
				3065	return 0;
				3066	}
				3067
				3068	static int ext4_fill_super(struct super_block sb, void data, int silent)
				3069	{
				3070	char *orig_data = kstrdup(data, GFP_KERNEL);
				3071	struct buffer_head *bh;
				3072	struct ext4_super_block *es = NULL;
				3073	struct ext4_sb_info *sbi;
				3074	ext4_fsblk_t block;
				3075	ext4_fsblk_t sb_block = get_sb_block(&data);
				3076	ext4_fsblk_t logical_sb_block;
				3077	unsigned long offset = 0;
				3078	unsigned long journal_devnum = 0;
				3079	unsigned long def_mount_opts;
				3080	struct inode *root;
				3081	char *cp;
				3082	const char *descr;
				3083	int ret = -ENOMEM;
				3084	int blocksize, clustersize;
				3085	unsigned int db_count;
				3086	unsigned int i;
				3087	int needs_recovery, has_huge_files, has_bigalloc;
				3088	__u64 blocks_count;
				3089	int err;
				3090	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
				3091	ext4_group_t first_not_zeroed;
				3092
				3093	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
				3094	if (!sbi)
				3095	goto out_free_orig;
				3096
				3097	sbi->s_blockgroup_lock =
				3098	kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
				3099	if (!sbi->s_blockgroup_lock) {
				3100	kfree(sbi);
				3101	goto out_free_orig;
				3102	}
				3103	sb->s_fs_info = sbi;
				3104	sbi->s_mount_opt = 0;
				3105	sbi->s_resuid = EXT4_DEF_RESUID;
				3106	sbi->s_resgid = EXT4_DEF_RESGID;
				3107	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
				3108	sbi->s_sb_block = sb_block;
				3109	if (sb->s_bdev->bd_part)
				3110	sbi->s_sectors_written_start =
				3111	part_stat_read(sb->s_bdev->bd_part, sectors[1]);
				3112
				3113	/* Cleanup superblock name */
				3114	for (cp = sb->s_id; (cp = strchr(cp, '/'));)
				3115	*cp = '!';
				3116
				3117	ret = -EINVAL;
				3118	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
				3119	if (!blocksize) {
				3120	ext4_msg(sb, KERN_ERR, "unable to set blocksize");
				3121	goto out_fail;
				3122	}
				3123
				3124	/*
				3125	* The ext4 superblock will not be buffer aligned for other than 1kB
				3126	* block sizes. We need to calculate the offset from buffer start.
				3127	*/
				3128	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
				3129	logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
				3130	offset = do_div(logical_sb_block, blocksize);
				3131	} else {
				3132	logical_sb_block = sb_block;
				3133	}
				3134
				3135	if (!(bh = sb_bread(sb, logical_sb_block))) {
				3136	ext4_msg(sb, KERN_ERR, "unable to read superblock");
				3137	goto out_fail;
				3138	}
				3139	/*
				3140	* Note: s_es must be initialized as soon as possible because
				3141	* some ext4 macro-instructions depend on its value
				3142	*/
				3143	es = (struct ext4_super_block ) (((char )bh->b_data) + offset);
				3144	sbi->s_es = es;
				3145	sb->s_magic = le16_to_cpu(es->s_magic);
				3146	if (sb->s_magic != EXT4_SUPER_MAGIC)
				3147	goto cantfind_ext4;
				3148	sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
				3149
				3150	/* Set defaults before we parse the mount options */
				3151	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
				3152	set_opt(sb, INIT_INODE_TABLE);
				3153	if (def_mount_opts & EXT4_DEFM_DEBUG)
				3154	set_opt(sb, DEBUG);
				3155	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
				3156	set_opt(sb, GRPID);
				3157	if (def_mount_opts & EXT4_DEFM_UID16)
				3158	set_opt(sb, NO_UID32);
				3159	/* xattr user namespace & acls are now defaulted on */
				3160	#ifdef CONFIG_EXT4_FS_XATTR
				3161	set_opt(sb, XATTR_USER);
				3162	#endif
				3163	#ifdef CONFIG_EXT4_FS_POSIX_ACL
				3164	set_opt(sb, POSIX_ACL);
				3165	#endif
				3166	set_opt(sb, MBLK_IO_SUBMIT);
				3167	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
				3168	set_opt(sb, JOURNAL_DATA);
				3169	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
				3170	set_opt(sb, ORDERED_DATA);
				3171	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
				3172	set_opt(sb, WRITEBACK_DATA);
				3173
				3174	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
				3175	set_opt(sb, ERRORS_PANIC);
				3176	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
				3177	set_opt(sb, ERRORS_CONT);
				3178	else
				3179	set_opt(sb, ERRORS_RO);
				3180	if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
				3181	set_opt(sb, BLOCK_VALIDITY);
				3182	if (def_mount_opts & EXT4_DEFM_DISCARD)
				3183	set_opt(sb, DISCARD);
				3184
				3185	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
				3186	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
				3187	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
				3188	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
				3189	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
				3190
				3191	if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
				3192	set_opt(sb, BARRIER);
				3193
				3194	/*
				3195	* enable delayed allocation by default
				3196	* Use -o nodelalloc to turn it off
				3197	*/
				3198	if (!IS_EXT3_SB(sb) &&
				3199	((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
				3200	set_opt(sb, DELALLOC);
				3201
				3202	/*
				3203	* set default s_li_wait_mult for lazyinit, for the case there is
				3204	* no mount option specified.
				3205	*/
				3206	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
				3207
				3208	if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
				3209	&journal_devnum, &journal_ioprio, 0)) {
				3210	ext4_msg(sb, KERN_WARNING,
				3211	"failed to parse options in superblock: %s",
				3212	sbi->s_es->s_mount_opts);
				3213	}
				3214	sbi->s_def_mount_opt = sbi->s_mount_opt;
				3215	if (!parse_options((char *) data, sb, &journal_devnum,
				3216	&journal_ioprio, 0))
				3217	goto failed_mount;
				3218
				3219	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
				3220	printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
				3221	"with data=journal disables delayed "
				3222	"allocation and O_DIRECT support!\n");
				3223	if (test_opt2(sb, EXPLICIT_DELALLOC)) {
				3224	ext4_msg(sb, KERN_ERR, "can't mount with "
				3225	"both data=journal and delalloc");
				3226	goto failed_mount;
				3227	}
				3228	if (test_opt(sb, DIOREAD_NOLOCK)) {
				3229	ext4_msg(sb, KERN_ERR, "can't mount with "
				3230	"both data=journal and dioread_nolock");
				3231	goto failed_mount;
				3232	}
				3233	if (test_opt(sb, DELALLOC))
				3234	clear_opt(sb, DELALLOC);
				3235	}
				3236
				3237	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				3238	(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
				3239
				3240	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
				3241	(EXT4_HAS_COMPAT_FEATURE(sb, ~0U) \|\|
				3242	EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) \|\|
				3243	EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
				3244	ext4_msg(sb, KERN_WARNING,
				3245	"feature flags set on rev 0 fs, "
				3246	"running e2fsck is recommended");
				3247
				3248	if (IS_EXT2_SB(sb)) {
				3249	if (ext2_feature_set_ok(sb))
				3250	ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
				3251	"using the ext4 subsystem");
				3252	else {
				3253	ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
				3254	"to feature incompatibilities");
				3255	goto failed_mount;
				3256	}
				3257	}
				3258
				3259	if (IS_EXT3_SB(sb)) {
				3260	if (ext3_feature_set_ok(sb))
				3261	ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
				3262	"using the ext4 subsystem");
				3263	else {
				3264	ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
				3265	"to feature incompatibilities");
				3266	goto failed_mount;
				3267	}
				3268	}
				3269
				3270	/*
				3271	* Check feature flags regardless of the revision level, since we
				3272	* previously didn't change the revision level when setting the flags,
				3273	* so there is a chance incompat flags are set on a rev 0 filesystem.
				3274	*/
				3275	if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
				3276	goto failed_mount;
				3277
				3278	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
				3279	if (blocksize < EXT4_MIN_BLOCK_SIZE \|\|
				3280	blocksize > EXT4_MAX_BLOCK_SIZE) {
				3281	ext4_msg(sb, KERN_ERR,
				3282	"Unsupported filesystem blocksize %d", blocksize);
				3283	goto failed_mount;
				3284	}
				3285
				3286	if (sb->s_blocksize != blocksize) {
				3287	/* Validate the filesystem blocksize */
				3288	if (!sb_set_blocksize(sb, blocksize)) {
				3289	ext4_msg(sb, KERN_ERR, "bad block size %d",
				3290	blocksize);
				3291	goto failed_mount;
				3292	}
				3293
				3294	brelse(bh);
				3295	logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
				3296	offset = do_div(logical_sb_block, blocksize);
				3297	bh = sb_bread(sb, logical_sb_block);
				3298	if (!bh) {
				3299	ext4_msg(sb, KERN_ERR,
				3300	"Can't read superblock on 2nd try");
				3301	goto failed_mount;
				3302	}
				3303	es = (struct ext4_super_block )(((char )bh->b_data) + offset);
				3304	sbi->s_es = es;
				3305	if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
				3306	ext4_msg(sb, KERN_ERR,
				3307	"Magic mismatch, very weird!");
				3308	goto failed_mount;
				3309	}
				3310	}
				3311
				3312	has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
				3313	EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
				3314	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
				3315	has_huge_files);
				3316	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
				3317
				3318	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
				3319	sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
				3320	sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
				3321	} else {
				3322	sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
				3323	sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
				3324	if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) \|\|
				3325	(!is_power_of_2(sbi->s_inode_size)) \|\|
				3326	(sbi->s_inode_size > blocksize)) {
				3327	ext4_msg(sb, KERN_ERR,
				3328	"unsupported inode size: %d",
				3329	sbi->s_inode_size);
				3330	goto failed_mount;
				3331	}
				3332	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
				3333	sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
				3334	}
				3335
				3336	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
				3337	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
				3338	if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT \|\|
				3339	sbi->s_desc_size > EXT4_MAX_DESC_SIZE \|\|
				3340	!is_power_of_2(sbi->s_desc_size)) {
				3341	ext4_msg(sb, KERN_ERR,
				3342	"unsupported descriptor size %lu",
				3343	sbi->s_desc_size);
				3344	goto failed_mount;
				3345	}
				3346	} else
				3347	sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
				3348
				3349	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
				3350	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
				3351	if (EXT4_INODE_SIZE(sb) == 0 \|\| EXT4_INODES_PER_GROUP(sb) == 0)
				3352	goto cantfind_ext4;
				3353
				3354	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
				3355	if (sbi->s_inodes_per_block == 0)
				3356	goto cantfind_ext4;
				3357	sbi->s_itb_per_group = sbi->s_inodes_per_group /
				3358	sbi->s_inodes_per_block;
				3359	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
				3360	sbi->s_sbh = bh;
				3361	sbi->s_mount_state = le16_to_cpu(es->s_state);
				3362	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
				3363	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
				3364
				3365	for (i = 0; i < 4; i++)
				3366	sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
				3367	sbi->s_def_hash_version = es->s_def_hash_version;
				3368	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
				3369	i = le32_to_cpu(es->s_flags);
				3370	if (i & EXT2_FLAGS_UNSIGNED_HASH)
				3371	sbi->s_hash_unsigned = 3;
				3372	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
				3373	#ifdef __CHAR_UNSIGNED__
				3374	if (!(sb->s_flags & MS_RDONLY))
				3375	es->s_flags \|=
				3376	cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
				3377	sbi->s_hash_unsigned = 3;
				3378	#else
				3379	if (!(sb->s_flags & MS_RDONLY))
				3380	es->s_flags \|=
				3381	cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
				3382	#endif
				3383	}
				3384	}
				3385
				3386	/* Handle clustersize */
				3387	clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
				3388	has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
				3389	EXT4_FEATURE_RO_COMPAT_BIGALLOC);
				3390	if (has_bigalloc) {
				3391	if (clustersize < blocksize) {
				3392	ext4_msg(sb, KERN_ERR,
				3393	"cluster size (%d) smaller than "
				3394	"block size (%d)", clustersize, blocksize);
				3395	goto failed_mount;
				3396	}
				3397	sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
				3398	le32_to_cpu(es->s_log_block_size);
				3399	sbi->s_clusters_per_group =
				3400	le32_to_cpu(es->s_clusters_per_group);
				3401	if (sbi->s_clusters_per_group > blocksize * 8) {
				3402	ext4_msg(sb, KERN_ERR,
				3403	"#clusters per group too big: %lu",
				3404	sbi->s_clusters_per_group);
				3405	goto failed_mount;
				3406	}
				3407	if (sbi->s_blocks_per_group !=
				3408	(sbi->s_clusters_per_group * (clustersize / blocksize))) {
				3409	ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
				3410	"clusters per group (%lu) inconsistent",
				3411	sbi->s_blocks_per_group,
				3412	sbi->s_clusters_per_group);
				3413	goto failed_mount;
				3414	}
				3415	} else {
				3416	if (clustersize != blocksize) {
				3417	ext4_warning(sb, "fragment/cluster size (%d) != "
				3418	"block size (%d)", clustersize,
				3419	blocksize);
				3420	clustersize = blocksize;
				3421	}
				3422	if (sbi->s_blocks_per_group > blocksize * 8) {
				3423	ext4_msg(sb, KERN_ERR,
				3424	"#blocks per group too big: %lu",
				3425	sbi->s_blocks_per_group);
				3426	goto failed_mount;
				3427	}
				3428	sbi->s_clusters_per_group = sbi->s_blocks_per_group;
				3429	sbi->s_cluster_bits = 0;
				3430	}
				3431	sbi->s_cluster_ratio = clustersize / blocksize;
				3432
				3433	if (sbi->s_inodes_per_group > blocksize * 8) {
				3434	ext4_msg(sb, KERN_ERR,
				3435	"#inodes per group too big: %lu",
				3436	sbi->s_inodes_per_group);
				3437	goto failed_mount;
				3438	}
				3439
				3440	/*
				3441	* Test whether we have more sectors than will fit in sector_t,
				3442	* and whether the max offset is addressable by the page cache.
				3443	*/
				3444	err = generic_check_addressable(sb->s_blocksize_bits,
				3445	ext4_blocks_count(es));
				3446	if (err) {
				3447	ext4_msg(sb, KERN_ERR, "filesystem"
				3448	" too large to mount safely on this system");
				3449	if (sizeof(sector_t) < 8)
				3450	ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
				3451	ret = err;
				3452	goto failed_mount;
				3453	}
				3454
				3455	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
				3456	goto cantfind_ext4;
				3457
				3458	/* check blocks count against device size */
				3459	blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
				3460	if (blocks_count && ext4_blocks_count(es) > blocks_count) {
				3461	ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
				3462	"exceeds size of device (%llu blocks)",
				3463	ext4_blocks_count(es), blocks_count);
				3464	goto failed_mount;
				3465	}
				3466
				3467	/*
				3468	* It makes no sense for the first data block to be beyond the end
				3469	* of the filesystem.
				3470	*/
				3471	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
				3472	ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
				3473	"block %u is beyond end of filesystem (%llu)",
				3474	le32_to_cpu(es->s_first_data_block),
				3475	ext4_blocks_count(es));
				3476	goto failed_mount;
				3477	}
				3478	blocks_count = (ext4_blocks_count(es) -
				3479	le32_to_cpu(es->s_first_data_block) +
				3480	EXT4_BLOCKS_PER_GROUP(sb) - 1);
				3481	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
				3482	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
				3483	ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
				3484	"(block count %llu, first data block %u, "
				3485	"blocks per group %lu)", sbi->s_groups_count,
				3486	ext4_blocks_count(es),
				3487	le32_to_cpu(es->s_first_data_block),
				3488	EXT4_BLOCKS_PER_GROUP(sb));
				3489	goto failed_mount;
				3490	}
				3491	sbi->s_groups_count = blocks_count;
				3492	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
				3493	(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
				3494	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
				3495	EXT4_DESC_PER_BLOCK(sb);
				3496	sbi->s_group_desc = ext4_kvmalloc(db_count *
				3497	sizeof(struct buffer_head *),
				3498	GFP_KERNEL);
				3499	if (sbi->s_group_desc == NULL) {
				3500	ext4_msg(sb, KERN_ERR, "not enough memory");
				3501	goto failed_mount;
				3502	}
				3503
				3504	if (ext4_proc_root)
				3505	sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
				3506
				3507	if (sbi->s_proc)
				3508	proc_create_data("options", S_IRUGO, sbi->s_proc,
				3509	&ext4_seq_options_fops, sb);
				3510
				3511	bgl_lock_init(sbi->s_blockgroup_lock);
				3512
				3513	for (i = 0; i < db_count; i++) {
				3514	block = descriptor_loc(sb, logical_sb_block, i);
				3515	sbi->s_group_desc[i] = sb_bread(sb, block);
				3516	if (!sbi->s_group_desc[i]) {
				3517	ext4_msg(sb, KERN_ERR,
				3518	"can't read group descriptor %d", i);
				3519	db_count = i;
				3520	goto failed_mount2;
				3521	}
				3522	}
				3523	if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
				3524	ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
				3525	goto failed_mount2;
				3526	}
				3527	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
				3528	if (!ext4_fill_flex_info(sb)) {
				3529	ext4_msg(sb, KERN_ERR,
				3530	"unable to initialize "
				3531	"flex_bg meta info!");
				3532	goto failed_mount2;
				3533	}
				3534
				3535	sbi->s_gdb_count = db_count;
				3536	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
				3537	spin_lock_init(&sbi->s_next_gen_lock);
				3538
				3539	init_timer(&sbi->s_err_report);
				3540	sbi->s_err_report.function = print_daily_error_info;
				3541	sbi->s_err_report.data = (unsigned long) sb;
				3542
				3543	err = percpu_counter_init(&sbi->s_freeclusters_counter,
				3544	ext4_count_free_clusters(sb));
				3545	if (!err) {
				3546	err = percpu_counter_init(&sbi->s_freeinodes_counter,
				3547	ext4_count_free_inodes(sb));
				3548	}
				3549	if (!err) {
				3550	err = percpu_counter_init(&sbi->s_dirs_counter,
				3551	ext4_count_dirs(sb));
				3552	}
				3553	if (!err) {
				3554	err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
				3555	}
				3556	if (err) {
				3557	ext4_msg(sb, KERN_ERR, "insufficient memory");
				3558	goto failed_mount3;
				3559	}
				3560
				3561	sbi->s_stripe = ext4_get_stripe_size(sbi);
				3562	sbi->s_max_writeback_mb_bump = 128;
				3563
				3564	/*
				3565	* set up enough so that it can read an inode
				3566	*/
				3567	if (!test_opt(sb, NOLOAD) &&
				3568	EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
				3569	sb->s_op = &ext4_sops;
				3570	else
				3571	sb->s_op = &ext4_nojournal_sops;
				3572	sb->s_export_op = &ext4_export_ops;
				3573	sb->s_xattr = ext4_xattr_handlers;
				3574	#ifdef CONFIG_QUOTA
				3575	sb->s_qcop = &ext4_qctl_operations;
				3576	sb->dq_op = &ext4_quota_operations;
				3577	#endif
				3578	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
				3579
				3580	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
				3581	mutex_init(&sbi->s_orphan_lock);
				3582	sbi->s_resize_flags = 0;
				3583
				3584	sb->s_root = NULL;
				3585
				3586	needs_recovery = (es->s_last_orphan != 0 \|\|
				3587	EXT4_HAS_INCOMPAT_FEATURE(sb,
				3588	EXT4_FEATURE_INCOMPAT_RECOVER));
				3589
				3590	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
				3591	!(sb->s_flags & MS_RDONLY))
				3592	if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
				3593	goto failed_mount3;
				3594
				3595	/*
				3596	* The first inode we look at is the journal inode. Don't try
				3597	* root first: it may be modified in the journal!
				3598	*/
				3599	if (!test_opt(sb, NOLOAD) &&
				3600	EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
				3601	if (ext4_load_journal(sb, es, journal_devnum))
				3602	goto failed_mount3;
				3603	} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
				3604	EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
				3605	ext4_msg(sb, KERN_ERR, "required journal recovery "
				3606	"suppressed and not mounted read-only");
				3607	goto failed_mount_wq;
				3608	} else {
				3609	clear_opt(sb, DATA_FLAGS);
				3610	sbi->s_journal = NULL;
				3611	needs_recovery = 0;
				3612	goto no_journal;
				3613	}
				3614
				3615	if (ext4_blocks_count(es) > 0xffffffffULL &&
				3616	!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
				3617	JBD2_FEATURE_INCOMPAT_64BIT)) {
				3618	ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
				3619	goto failed_mount_wq;
				3620	}
				3621
				3622	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				3623	jbd2_journal_set_features(sbi->s_journal,
				3624	JBD2_FEATURE_COMPAT_CHECKSUM, 0,
				3625	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
				3626	} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
				3627	jbd2_journal_set_features(sbi->s_journal,
				3628	JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
				3629	jbd2_journal_clear_features(sbi->s_journal, 0, 0,
				3630	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
				3631	} else {
				3632	jbd2_journal_clear_features(sbi->s_journal,
				3633	JBD2_FEATURE_COMPAT_CHECKSUM, 0,
				3634	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
				3635	}
				3636
				3637	/* We have now updated the journal if required, so we can
				3638	* validate the data journaling mode. */
				3639	switch (test_opt(sb, DATA_FLAGS)) {
				3640	case 0:
				3641	/* No mode set, assume a default based on the journal
				3642	* capabilities: ORDERED_DATA if the journal can
				3643	* cope, else JOURNAL_DATA
				3644	*/
				3645	if (jbd2_journal_check_available_features
				3646	(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
				3647	set_opt(sb, ORDERED_DATA);
				3648	else
				3649	set_opt(sb, JOURNAL_DATA);
				3650	break;
				3651
				3652	case EXT4_MOUNT_ORDERED_DATA:
				3653	case EXT4_MOUNT_WRITEBACK_DATA:
				3654	if (!jbd2_journal_check_available_features
				3655	(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
				3656	ext4_msg(sb, KERN_ERR, "Journal does not support "
				3657	"requested data journaling mode");
				3658	goto failed_mount_wq;
				3659	}
				3660	default:
				3661	break;
				3662	}
				3663	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
				3664
				3665	sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
				3666
				3667	/*
				3668	* The journal may have updated the bg summary counts, so we
				3669	* need to update the global counters.
				3670	*/
				3671	percpu_counter_set(&sbi->s_freeclusters_counter,
				3672	ext4_count_free_clusters(sb));
				3673	percpu_counter_set(&sbi->s_freeinodes_counter,
				3674	ext4_count_free_inodes(sb));
				3675	percpu_counter_set(&sbi->s_dirs_counter,
				3676	ext4_count_dirs(sb));
				3677	percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
				3678
				3679	no_journal:
				3680	/*
				3681	* Get the # of file system overhead blocks from the
				3682	* superblock if present.
				3683	*/
				3684	if (es->s_overhead_clusters)
				3685	sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
				3686	else {
				3687	ret = ext4_calculate_overhead(sb);
				3688	if (ret)
				3689	goto failed_mount_wq;
				3690	}
				3691
				3692	/*
				3693	* The maximum number of concurrent works can be high and
				3694	* concurrency isn't really necessary. Limit it to 1.
				3695	*/
				3696	EXT4_SB(sb)->dio_unwritten_wq =
				3697	alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM \| WQ_UNBOUND, 1);
				3698	if (!EXT4_SB(sb)->dio_unwritten_wq) {
				3699	printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
				3700	goto failed_mount_wq;
				3701	}
				3702
				3703	/*
				3704	* The jbd2_journal_load will have done any necessary log recovery,
				3705	* so we can safely mount the rest of the filesystem now.
				3706	*/
				3707
				3708	root = ext4_iget(sb, EXT4_ROOT_INO);
				3709	if (IS_ERR(root)) {
				3710	ext4_msg(sb, KERN_ERR, "get root inode failed");
				3711	ret = PTR_ERR(root);
				3712	root = NULL;
				3713	goto failed_mount4;
				3714	}
				3715	if (!S_ISDIR(root->i_mode) \|\| !root->i_blocks \|\| !root->i_size) {
				3716	ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
				3717	iput(root);
				3718	goto failed_mount4;
				3719	}
				3720	sb->s_root = d_make_root(root);
				3721	if (!sb->s_root) {
				3722	ext4_msg(sb, KERN_ERR, "get root dentry failed");
				3723	ret = -ENOMEM;
				3724	goto failed_mount4;
				3725	}
				3726
				3727	if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
				3728	sb->s_flags \|= MS_RDONLY;
				3729
				3730	/* determine the minimum size of new large inodes, if present */
				3731	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
				3732	sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
				3733	EXT4_GOOD_OLD_INODE_SIZE;
				3734	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
				3735	EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
				3736	if (sbi->s_want_extra_isize <
				3737	le16_to_cpu(es->s_want_extra_isize))
				3738	sbi->s_want_extra_isize =
				3739	le16_to_cpu(es->s_want_extra_isize);
				3740	if (sbi->s_want_extra_isize <
				3741	le16_to_cpu(es->s_min_extra_isize))
				3742	sbi->s_want_extra_isize =
				3743	le16_to_cpu(es->s_min_extra_isize);
				3744	}
				3745	}
				3746	/* Check if enough inode space is available */
				3747	if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
				3748	sbi->s_inode_size) {
				3749	sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
				3750	EXT4_GOOD_OLD_INODE_SIZE;
				3751	ext4_msg(sb, KERN_INFO, "required extra inode space not"
				3752	"available");
				3753	}
				3754
				3755	err = ext4_setup_system_zone(sb);
				3756	if (err) {
				3757	ext4_msg(sb, KERN_ERR, "failed to initialize system "
				3758	"zone (%d)", err);
				3759	goto failed_mount4a;
				3760	}
				3761
				3762	ext4_ext_init(sb);
				3763	err = ext4_mb_init(sb, needs_recovery);
				3764	if (err) {
				3765	ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
				3766	err);
				3767	goto failed_mount5;
				3768	}
				3769
				3770	err = ext4_register_li_request(sb, first_not_zeroed);
				3771	if (err)
				3772	goto failed_mount6;
				3773
				3774	sbi->s_kobj.kset = ext4_kset;
				3775	init_completion(&sbi->s_kobj_unregister);
				3776	err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
				3777	"%s", sb->s_id);
				3778	if (err)
				3779	goto failed_mount7;
				3780
				3781	EXT4_SB(sb)->s_mount_state \|= EXT4_ORPHAN_FS;
				3782	ext4_orphan_cleanup(sb, es);
				3783	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
				3784	if (needs_recovery) {
				3785	ext4_msg(sb, KERN_INFO, "recovery complete");
				3786	ext4_mark_recovery_complete(sb, es);
				3787	}
				3788	if (EXT4_SB(sb)->s_journal) {
				3789	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
				3790	descr = " journalled data mode";
				3791	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
				3792	descr = " ordered data mode";
				3793	else
				3794	descr = " writeback data mode";
				3795	} else
				3796	descr = "out journal";
				3797
				3798	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
				3799	"Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
				3800	*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
				3801
				3802	if (es->s_error_count)
				3803	mod_timer(&sbi->s_err_report, jiffies + 300HZ); / 5 minutes */
				3804
				3805	kfree(orig_data);
				3806	return 0;
				3807
				3808	cantfind_ext4:
				3809	if (!silent)
				3810	ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
				3811	goto failed_mount;
				3812
				3813	failed_mount7:
				3814	ext4_unregister_li_request(sb);
				3815	failed_mount6:
				3816	ext4_mb_release(sb);
				3817	failed_mount5:
				3818	ext4_ext_release(sb);
				3819	ext4_release_system_zone(sb);
				3820	failed_mount4a:
				3821	dput(sb->s_root);
				3822	sb->s_root = NULL;
				3823	failed_mount4:
				3824	ext4_msg(sb, KERN_ERR, "mount failed");
				3825	destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
				3826	failed_mount_wq:
				3827	if (sbi->s_journal) {
				3828	jbd2_journal_destroy(sbi->s_journal);
				3829	sbi->s_journal = NULL;
				3830	}
				3831	failed_mount3:
				3832	del_timer(&sbi->s_err_report);
				3833	if (sbi->s_flex_groups)
				3834	ext4_kvfree(sbi->s_flex_groups);
				3835	percpu_counter_destroy(&sbi->s_freeclusters_counter);
				3836	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				3837	percpu_counter_destroy(&sbi->s_dirs_counter);
				3838	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
				3839	if (sbi->s_mmp_tsk)
				3840	kthread_stop(sbi->s_mmp_tsk);
				3841	failed_mount2:
				3842	for (i = 0; i < db_count; i++)
				3843	brelse(sbi->s_group_desc[i]);
				3844	ext4_kvfree(sbi->s_group_desc);
				3845	failed_mount:
				3846	if (sbi->s_proc) {
				3847	remove_proc_entry("options", sbi->s_proc);
				3848	remove_proc_entry(sb->s_id, ext4_proc_root);
				3849	}
				3850	#ifdef CONFIG_QUOTA
				3851	for (i = 0; i < MAXQUOTAS; i++)
				3852	kfree(sbi->s_qf_names[i]);
				3853	#endif
				3854	ext4_blkdev_remove(sbi);
				3855	brelse(bh);
				3856	out_fail:
				3857	sb->s_fs_info = NULL;
				3858	kfree(sbi->s_blockgroup_lock);
				3859	kfree(sbi);
				3860	out_free_orig:
				3861	kfree(orig_data);
				3862	return ret;
				3863	}
				3864
				3865	/*
				3866	* Setup any per-fs journal parameters now. We'll do this both on
				3867	* initial mount, once the journal has been initialised but before we've
				3868	* done any recovery; and again on any subsequent remount.
				3869	*/
				3870	static void ext4_init_journal_params(struct super_block sb, journal_t journal)
				3871	{
				3872	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3873
				3874	journal->j_commit_interval = sbi->s_commit_interval;
				3875	journal->j_min_batch_time = sbi->s_min_batch_time;
				3876	journal->j_max_batch_time = sbi->s_max_batch_time;
				3877
				3878	write_lock(&journal->j_state_lock);
				3879	if (test_opt(sb, BARRIER))
				3880	journal->j_flags \|= JBD2_BARRIER;
				3881	else
				3882	journal->j_flags &= ~JBD2_BARRIER;
				3883	if (test_opt(sb, DATA_ERR_ABORT))
				3884	journal->j_flags \|= JBD2_ABORT_ON_SYNCDATA_ERR;
				3885	else
				3886	journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
				3887	write_unlock(&journal->j_state_lock);
				3888	}
				3889
				3890	static journal_t ext4_get_journal(struct super_block sb,
				3891	unsigned int journal_inum)
				3892	{
				3893	struct inode *journal_inode;
				3894	journal_t *journal;
				3895
				3896	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
				3897
				3898	/* First, test for the existence of a valid inode on disk. Bad
				3899	* things happen if we iget() an unused inode, as the subsequent
				3900	* iput() will try to delete it. */
				3901
				3902	journal_inode = ext4_iget(sb, journal_inum);
				3903	if (IS_ERR(journal_inode)) {
				3904	ext4_msg(sb, KERN_ERR, "no journal found");
				3905	return NULL;
				3906	}
				3907	if (!journal_inode->i_nlink) {
				3908	make_bad_inode(journal_inode);
				3909	iput(journal_inode);
				3910	ext4_msg(sb, KERN_ERR, "journal inode is deleted");
				3911	return NULL;
				3912	}
				3913
				3914	jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
				3915	journal_inode, journal_inode->i_size);
				3916	if (!S_ISREG(journal_inode->i_mode)) {
				3917	ext4_msg(sb, KERN_ERR, "invalid journal inode");
				3918	iput(journal_inode);
				3919	return NULL;
				3920	}
				3921
				3922	journal = jbd2_journal_init_inode(journal_inode);
				3923	if (!journal) {
				3924	ext4_msg(sb, KERN_ERR, "Could not load journal inode");
				3925	iput(journal_inode);
				3926	return NULL;
				3927	}
				3928	journal->j_private = sb;
				3929	ext4_init_journal_params(sb, journal);
				3930	return journal;
				3931	}
				3932
				3933	static journal_t ext4_get_dev_journal(struct super_block sb,
				3934	dev_t j_dev)
				3935	{
				3936	struct buffer_head *bh;
				3937	journal_t *journal;
				3938	ext4_fsblk_t start;
				3939	ext4_fsblk_t len;
				3940	int hblock, blocksize;
				3941	ext4_fsblk_t sb_block;
				3942	unsigned long offset;
				3943	struct ext4_super_block *es;
				3944	struct block_device *bdev;
				3945
				3946	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
				3947
				3948	bdev = ext4_blkdev_get(j_dev, sb);
				3949	if (bdev == NULL)
				3950	return NULL;
				3951
				3952	blocksize = sb->s_blocksize;
				3953	hblock = bdev_logical_block_size(bdev);
				3954	if (blocksize < hblock) {
				3955	ext4_msg(sb, KERN_ERR,
				3956	"blocksize too small for journal device");
				3957	goto out_bdev;
				3958	}
				3959
				3960	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
				3961	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
				3962	set_blocksize(bdev, blocksize);
				3963	if (!(bh = __bread(bdev, sb_block, blocksize))) {
				3964	ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
				3965	"external journal");
				3966	goto out_bdev;
				3967	}
				3968
				3969	es = (struct ext4_super_block ) (((char )bh->b_data) + offset);
				3970	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) \|\|
				3971	!(le32_to_cpu(es->s_feature_incompat) &
				3972	EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
				3973	ext4_msg(sb, KERN_ERR, "external journal has "
				3974	"bad superblock");
				3975	brelse(bh);
				3976	goto out_bdev;
				3977	}
				3978
				3979	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
				3980	ext4_msg(sb, KERN_ERR, "journal UUID does not match");
				3981	brelse(bh);
				3982	goto out_bdev;
				3983	}
				3984
				3985	len = ext4_blocks_count(es);
				3986	start = sb_block + 1;
				3987	brelse(bh); /* we're done with the superblock */
				3988
				3989	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
				3990	start, len, blocksize);
				3991	if (!journal) {
				3992	ext4_msg(sb, KERN_ERR, "failed to create device journal");
				3993	goto out_bdev;
				3994	}
				3995	journal->j_private = sb;
				3996	ll_rw_block(READ, 1, &journal->j_sb_buffer);
				3997	wait_on_buffer(journal->j_sb_buffer);
				3998	if (!buffer_uptodate(journal->j_sb_buffer)) {
				3999	ext4_msg(sb, KERN_ERR, "I/O error on journal device");
				4000	goto out_journal;
				4001	}
				4002	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
				4003	ext4_msg(sb, KERN_ERR, "External journal has more than one "
				4004	"user (unsupported) - %d",
				4005	be32_to_cpu(journal->j_superblock->s_nr_users));
				4006	goto out_journal;
				4007	}
				4008	EXT4_SB(sb)->journal_bdev = bdev;
				4009	ext4_init_journal_params(sb, journal);
				4010	return journal;
				4011
				4012	out_journal:
				4013	jbd2_journal_destroy(journal);
				4014	out_bdev:
				4015	ext4_blkdev_put(bdev);
				4016	return NULL;
				4017	}
				4018
				4019	static int ext4_load_journal(struct super_block *sb,
				4020	struct ext4_super_block *es,
				4021	unsigned long journal_devnum)
				4022	{
				4023	journal_t *journal;
				4024	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
				4025	dev_t journal_dev;
				4026	int err = 0;
				4027	int really_read_only;
				4028
				4029	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
				4030
				4031	if (journal_devnum &&
				4032	journal_devnum != le32_to_cpu(es->s_journal_dev)) {
				4033	ext4_msg(sb, KERN_INFO, "external journal device major/minor "
				4034	"numbers have changed");
				4035	journal_dev = new_decode_dev(journal_devnum);
				4036	} else
				4037	journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
				4038
				4039	really_read_only = bdev_read_only(sb->s_bdev);
				4040
				4041	/*
				4042	* Are we loading a blank journal or performing recovery after a
				4043	* crash? For recovery, we need to check in advance whether we
				4044	* can get read-write access to the device.
				4045	*/
				4046	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
				4047	if (sb->s_flags & MS_RDONLY) {
				4048	ext4_msg(sb, KERN_INFO, "INFO: recovery "
				4049	"required on readonly filesystem");
				4050	if (really_read_only) {
				4051	ext4_msg(sb, KERN_ERR, "write access "
				4052	"unavailable, cannot proceed");
				4053	return -EROFS;
				4054	}
				4055	ext4_msg(sb, KERN_INFO, "write access will "
				4056	"be enabled during recovery");
				4057	}
				4058	}
				4059
				4060	if (journal_inum && journal_dev) {
				4061	ext4_msg(sb, KERN_ERR, "filesystem has both journal "
				4062	"and inode journals!");
				4063	return -EINVAL;
				4064	}
				4065
				4066	if (journal_inum) {
				4067	if (!(journal = ext4_get_journal(sb, journal_inum)))
				4068	return -EINVAL;
				4069	} else {
				4070	if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
				4071	return -EINVAL;
				4072	}
				4073
				4074	if (!(journal->j_flags & JBD2_BARRIER))
				4075	ext4_msg(sb, KERN_INFO, "barriers disabled");
				4076
				4077	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
				4078	err = jbd2_journal_wipe(journal, !really_read_only);
				4079	if (!err) {
				4080	char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
				4081	if (save)
				4082	memcpy(save, ((char *) es) +
				4083	EXT4_S_ERR_START, EXT4_S_ERR_LEN);
				4084	err = jbd2_journal_load(journal);
				4085	if (save)
				4086	memcpy(((char *) es) + EXT4_S_ERR_START,
				4087	save, EXT4_S_ERR_LEN);
				4088	kfree(save);
				4089	}
				4090
				4091	if (err) {
				4092	ext4_msg(sb, KERN_ERR, "error loading journal");
				4093	jbd2_journal_destroy(journal);
				4094	return err;
				4095	}
				4096
				4097	EXT4_SB(sb)->s_journal = journal;
				4098	ext4_clear_journal_err(sb, es);
				4099
				4100	if (!really_read_only && journal_devnum &&
				4101	journal_devnum != le32_to_cpu(es->s_journal_dev)) {
				4102	es->s_journal_dev = cpu_to_le32(journal_devnum);
				4103
				4104	/* Make sure we flush the recovery flag to disk. */
				4105	ext4_commit_super(sb, 1);
				4106	}
				4107
				4108	return 0;
				4109	}
				4110
				4111	static int ext4_commit_super(struct super_block *sb, int sync)
				4112	{
				4113	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				4114	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
				4115	int error = 0;
				4116
				4117	if (!sbh \|\| block_device_ejected(sb))
				4118	return error;
				4119	if (buffer_write_io_error(sbh)) {
				4120	/*
				4121	* Oh, dear. A previous attempt to write the
				4122	* superblock failed. This could happen because the
				4123	* USB device was yanked out. Or it could happen to
				4124	* be a transient write error and maybe the block will
				4125	* be remapped. Nothing we can do but to retry the
				4126	* write and hope for the best.
				4127	*/
				4128	ext4_msg(sb, KERN_ERR, "previous I/O error to "
				4129	"superblock detected");
				4130	clear_buffer_write_io_error(sbh);
				4131	set_buffer_uptodate(sbh);
				4132	}
				4133	/*
				4134	* If the file system is mounted read-only, don't update the
				4135	* superblock write time. This avoids updating the superblock
				4136	* write time when we are mounting the root file system
				4137	* read/only but we need to replay the journal; at that point,
				4138	* for people who are east of GMT and who make their clock
				4139	* tick in localtime for Windows bug-for-bug compatibility,
				4140	* the clock is set in the future, and this will cause e2fsck
				4141	* to complain and force a full file system check.
				4142	*/
				4143	if (!(sb->s_flags & MS_RDONLY))
				4144	es->s_wtime = cpu_to_le32(get_seconds());
				4145	if (sb->s_bdev->bd_part)
				4146	es->s_kbytes_written =
				4147	cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
				4148	((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
				4149	EXT4_SB(sb)->s_sectors_written_start) >> 1));
				4150	else
				4151	es->s_kbytes_written =
				4152	cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
				4153	ext4_free_blocks_count_set(es,
				4154	EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
				4155	&EXT4_SB(sb)->s_freeclusters_counter)));
				4156	es->s_free_inodes_count =
				4157	cpu_to_le32(percpu_counter_sum_positive(
				4158	&EXT4_SB(sb)->s_freeinodes_counter));
				4159	sb->s_dirt = 0;
				4160	BUFFER_TRACE(sbh, "marking dirty");
				4161	mark_buffer_dirty(sbh);
				4162	if (sync) {
				4163	error = sync_dirty_buffer(sbh);
				4164	if (error)
				4165	return error;
				4166
				4167	error = buffer_write_io_error(sbh);
				4168	if (error) {
				4169	ext4_msg(sb, KERN_ERR, "I/O error while writing "
				4170	"superblock");
				4171	clear_buffer_write_io_error(sbh);
				4172	set_buffer_uptodate(sbh);
				4173	}
				4174	}
				4175	return error;
				4176	}
				4177
				4178	/*
				4179	* Have we just finished recovery? If so, and if we are mounting (or
				4180	* remounting) the filesystem readonly, then we will end up with a
				4181	* consistent fs on disk. Record that fact.
				4182	*/
				4183	static void ext4_mark_recovery_complete(struct super_block *sb,
				4184	struct ext4_super_block *es)
				4185	{
				4186	journal_t *journal = EXT4_SB(sb)->s_journal;
				4187
				4188	if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
				4189	BUG_ON(journal != NULL);
				4190	return;
				4191	}
				4192	jbd2_journal_lock_updates(journal);
				4193	if (jbd2_journal_flush(journal) < 0)
				4194	goto out;
				4195
				4196	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
				4197	sb->s_flags & MS_RDONLY) {
				4198	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
				4199	ext4_commit_super(sb, 1);
				4200	}
				4201
				4202	out:
				4203	jbd2_journal_unlock_updates(journal);
				4204	}
				4205
				4206	/*
				4207	* If we are mounting (or read-write remounting) a filesystem whose journal
				4208	* has recorded an error from a previous lifetime, move that error to the
				4209	* main filesystem now.
				4210	*/
				4211	static void ext4_clear_journal_err(struct super_block *sb,
				4212	struct ext4_super_block *es)
				4213	{
				4214	journal_t *journal;
				4215	int j_errno;
				4216	const char *errstr;
				4217
				4218	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
				4219
				4220	journal = EXT4_SB(sb)->s_journal;
				4221
				4222	/*
				4223	* Now check for any error status which may have been recorded in the
				4224	* journal by a prior ext4_error() or ext4_abort()
				4225	*/
				4226
				4227	j_errno = jbd2_journal_errno(journal);
				4228	if (j_errno) {
				4229	char nbuf[16];
				4230
				4231	errstr = ext4_decode_error(sb, j_errno, nbuf);
				4232	ext4_warning(sb, "Filesystem error recorded "
				4233	"from previous mount: %s", errstr);
				4234	ext4_warning(sb, "Marking fs in need of filesystem check.");
				4235
				4236	EXT4_SB(sb)->s_mount_state \|= EXT4_ERROR_FS;
				4237	es->s_state \|= cpu_to_le16(EXT4_ERROR_FS);
				4238	ext4_commit_super(sb, 1);
				4239
				4240	jbd2_journal_clear_err(journal);
				4241	jbd2_journal_update_sb_errno(journal);
				4242	}
				4243	}
				4244
				4245	/*
				4246	* Force the running and committing transactions to commit,
				4247	* and wait on the commit.
				4248	*/
				4249	int ext4_force_commit(struct super_block *sb)
				4250	{
				4251	journal_t *journal;
				4252	int ret = 0;
				4253
				4254	if (sb->s_flags & MS_RDONLY)
				4255	return 0;
				4256
				4257	journal = EXT4_SB(sb)->s_journal;
				4258	if (journal) {
				4259	vfs_check_frozen(sb, SB_FREEZE_TRANS);
				4260	ret = ext4_journal_force_commit(journal);
				4261	}
				4262
				4263	return ret;
				4264	}
				4265
				4266	static void ext4_write_super(struct super_block *sb)
				4267	{
				4268	lock_super(sb);
				4269	ext4_commit_super(sb, 1);
				4270	unlock_super(sb);
				4271	}
				4272
				4273	static int ext4_sync_fs(struct super_block *sb, int wait)
				4274	{
				4275	int ret = 0;
				4276	tid_t target;
				4277	struct ext4_sb_info *sbi = EXT4_SB(sb);
				4278
				4279	trace_ext4_sync_fs(sb, wait);
				4280	flush_workqueue(sbi->dio_unwritten_wq);
				4281	if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
				4282	if (wait)
				4283	jbd2_log_wait_commit(sbi->s_journal, target);
				4284	}
				4285	return ret;
				4286	}
				4287
				4288	/*
				4289	* LVM calls this function before a (read-only) snapshot is created. This
				4290	* gives us a chance to flush the journal completely and mark the fs clean.
				4291	*
				4292	* Note that only this function cannot bring a filesystem to be in a clean
				4293	* state independently, because ext4 prevents a new handle from being started
				4294	* by @sb->s_frozen, which stays in an upper layer. It thus needs help from
				4295	* the upper layer.
				4296	*/
				4297	static int ext4_freeze(struct super_block *sb)
				4298	{
				4299	int error = 0;
				4300	journal_t *journal;
				4301
				4302	if (sb->s_flags & MS_RDONLY)
				4303	return 0;
				4304
				4305	journal = EXT4_SB(sb)->s_journal;
				4306
				4307	/* Now we set up the journal barrier. */
				4308	jbd2_journal_lock_updates(journal);
				4309
				4310	/*
				4311	* Don't clear the needs_recovery flag if we failed to flush
				4312	* the journal.
				4313	*/
				4314	error = jbd2_journal_flush(journal);
				4315	if (error < 0)
				4316	goto out;
				4317
				4318	/* Journal blocked and flushed, clear needs_recovery flag. */
				4319	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
				4320	error = ext4_commit_super(sb, 1);
				4321	out:
				4322	/* we rely on s_frozen to stop further updates */
				4323	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
				4324	return error;
				4325	}
				4326
				4327	/*
				4328	* Called by LVM after the snapshot is done. We need to reset the RECOVER
				4329	* flag here, even though the filesystem is not technically dirty yet.
				4330	*/
				4331	static int ext4_unfreeze(struct super_block *sb)
				4332	{
				4333	if (sb->s_flags & MS_RDONLY)
				4334	return 0;
				4335
				4336	lock_super(sb);
				4337	/* Reset the needs_recovery flag before the fs is unlocked. */
				4338	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
				4339	ext4_commit_super(sb, 1);
				4340	unlock_super(sb);
				4341	return 0;
				4342	}
				4343
				4344	/*
				4345	* Structure to save mount options for ext4_remount's benefit
				4346	*/
				4347	struct ext4_mount_options {
				4348	unsigned long s_mount_opt;
				4349	unsigned long s_mount_opt2;
				4350	uid_t s_resuid;
				4351	gid_t s_resgid;
				4352	unsigned long s_commit_interval;
				4353	u32 s_min_batch_time, s_max_batch_time;
				4354	#ifdef CONFIG_QUOTA
				4355	int s_jquota_fmt;
				4356	char *s_qf_names[MAXQUOTAS];
				4357	#endif
				4358	};
				4359
				4360	static int ext4_remount(struct super_block sb, int flags, char *data)
				4361	{
				4362	struct ext4_super_block *es;
				4363	struct ext4_sb_info *sbi = EXT4_SB(sb);
				4364	unsigned long old_sb_flags;
				4365	struct ext4_mount_options old_opts;
				4366	int enable_quota = 0;
				4367	ext4_group_t g;
				4368	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
				4369	int err = 0;
				4370	#ifdef CONFIG_QUOTA
				4371	int i;
				4372	#endif
				4373	char *orig_data = kstrdup(data, GFP_KERNEL);
				4374
				4375	/* Store the original options */
				4376	lock_super(sb);
				4377	old_sb_flags = sb->s_flags;
				4378	old_opts.s_mount_opt = sbi->s_mount_opt;
				4379	old_opts.s_mount_opt2 = sbi->s_mount_opt2;
				4380	old_opts.s_resuid = sbi->s_resuid;
				4381	old_opts.s_resgid = sbi->s_resgid;
				4382	old_opts.s_commit_interval = sbi->s_commit_interval;
				4383	old_opts.s_min_batch_time = sbi->s_min_batch_time;
				4384	old_opts.s_max_batch_time = sbi->s_max_batch_time;
				4385	#ifdef CONFIG_QUOTA
				4386	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
				4387	for (i = 0; i < MAXQUOTAS; i++)
				4388	old_opts.s_qf_names[i] = sbi->s_qf_names[i];
				4389	#endif
				4390	if (sbi->s_journal && sbi->s_journal->j_task->io_context)
				4391	journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
				4392
				4393	/*
				4394	* Allow the "check" option to be passed as a remount option.
				4395	*/
				4396	if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
				4397	err = -EINVAL;
				4398	goto restore_opts;
				4399	}
				4400
				4401	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
				4402	if (test_opt2(sb, EXPLICIT_DELALLOC)) {
				4403	ext4_msg(sb, KERN_ERR, "can't mount with "
				4404	"both data=journal and delalloc");
				4405	err = -EINVAL;
				4406	goto restore_opts;
				4407	}
				4408	if (test_opt(sb, DIOREAD_NOLOCK)) {
				4409	ext4_msg(sb, KERN_ERR, "can't mount with "
				4410	"both data=journal and dioread_nolock");
				4411	err = -EINVAL;
				4412	goto restore_opts;
				4413	}
				4414	}
				4415
				4416	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
				4417	ext4_abort(sb, "Abort forced by user");
				4418
				4419	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				4420	(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
				4421
				4422	es = sbi->s_es;
				4423
				4424	if (sbi->s_journal) {
				4425	ext4_init_journal_params(sb, sbi->s_journal);
				4426	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
				4427	}
				4428
				4429	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
				4430	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
				4431	err = -EROFS;
				4432	goto restore_opts;
				4433	}
				4434
				4435	if (*flags & MS_RDONLY) {
				4436	err = dquot_suspend(sb, -1);
				4437	if (err < 0)
				4438	goto restore_opts;
				4439
				4440	/*
				4441	* First of all, the unconditional stuff we have to do
				4442	* to disable replay of the journal when we next remount
				4443	*/
				4444	sb->s_flags \|= MS_RDONLY;
				4445
				4446	/*
				4447	* OK, test if we are remounting a valid rw partition
				4448	* readonly, and if so set the rdonly flag and then
				4449	* mark the partition as valid again.
				4450	*/
				4451	if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
				4452	(sbi->s_mount_state & EXT4_VALID_FS))
				4453	es->s_state = cpu_to_le16(sbi->s_mount_state);
				4454
				4455	if (sbi->s_journal)
				4456	ext4_mark_recovery_complete(sb, es);
				4457	} else {
				4458	/* Make sure we can mount this feature set readwrite */
				4459	if (!ext4_feature_set_ok(sb, 0)) {
				4460	err = -EROFS;
				4461	goto restore_opts;
				4462	}
				4463	/*
				4464	* Make sure the group descriptor checksums
				4465	* are sane. If they aren't, refuse to remount r/w.
				4466	*/
				4467	for (g = 0; g < sbi->s_groups_count; g++) {
				4468	struct ext4_group_desc *gdp =
				4469	ext4_get_group_desc(sb, g, NULL);
				4470
				4471	if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
				4472	ext4_msg(sb, KERN_ERR,
				4473	"ext4_remount: Checksum for group %u failed (%u!=%u)",
				4474	g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
				4475	le16_to_cpu(gdp->bg_checksum));
				4476	err = -EINVAL;
				4477	goto restore_opts;
				4478	}
				4479	}
				4480
				4481	/*
				4482	* If we have an unprocessed orphan list hanging
				4483	* around from a previously readonly bdev mount,
				4484	* require a full umount/remount for now.
				4485	*/
				4486	if (es->s_last_orphan) {
				4487	ext4_msg(sb, KERN_WARNING, "Couldn't "
				4488	"remount RDWR because of unprocessed "
				4489	"orphan inode list. Please "
				4490	"umount/remount instead");
				4491	err = -EINVAL;
				4492	goto restore_opts;
				4493	}
				4494
				4495	/*
				4496	* Mounting a RDONLY partition read-write, so reread
				4497	* and store the current valid flag. (It may have
				4498	* been changed by e2fsck since we originally mounted
				4499	* the partition.)
				4500	*/
				4501	if (sbi->s_journal)
				4502	ext4_clear_journal_err(sb, es);
				4503	sbi->s_mount_state = le16_to_cpu(es->s_state);
				4504	if (!ext4_setup_super(sb, es, 0))
				4505	sb->s_flags &= ~MS_RDONLY;
				4506	if (EXT4_HAS_INCOMPAT_FEATURE(sb,
				4507	EXT4_FEATURE_INCOMPAT_MMP))
				4508	if (ext4_multi_mount_protect(sb,
				4509	le64_to_cpu(es->s_mmp_block))) {
				4510	err = -EROFS;
				4511	goto restore_opts;
				4512	}
				4513	enable_quota = 1;
				4514	}
				4515	}
				4516
				4517	/*
				4518	* Reinitialize lazy itable initialization thread based on
				4519	* current settings
				4520	*/
				4521	if ((sb->s_flags & MS_RDONLY) \|\| !test_opt(sb, INIT_INODE_TABLE))
				4522	ext4_unregister_li_request(sb);
				4523	else {
				4524	ext4_group_t first_not_zeroed;
				4525	first_not_zeroed = ext4_has_uninit_itable(sb);
				4526	ext4_register_li_request(sb, first_not_zeroed);
				4527	}
				4528
				4529	ext4_setup_system_zone(sb);
				4530	if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
				4531	ext4_commit_super(sb, 1);
				4532
				4533	#ifdef CONFIG_QUOTA
				4534	/* Release old quota file names */
				4535	for (i = 0; i < MAXQUOTAS; i++)
				4536	if (old_opts.s_qf_names[i] &&
				4537	old_opts.s_qf_names[i] != sbi->s_qf_names[i])
				4538	kfree(old_opts.s_qf_names[i]);
				4539	#endif
				4540	unlock_super(sb);
				4541	if (enable_quota)
				4542	dquot_resume(sb, -1);
				4543
				4544	ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
				4545	kfree(orig_data);
				4546	return 0;
				4547
				4548	restore_opts:
				4549	sb->s_flags = old_sb_flags;
				4550	sbi->s_mount_opt = old_opts.s_mount_opt;
				4551	sbi->s_mount_opt2 = old_opts.s_mount_opt2;
				4552	sbi->s_resuid = old_opts.s_resuid;
				4553	sbi->s_resgid = old_opts.s_resgid;
				4554	sbi->s_commit_interval = old_opts.s_commit_interval;
				4555	sbi->s_min_batch_time = old_opts.s_min_batch_time;
				4556	sbi->s_max_batch_time = old_opts.s_max_batch_time;
				4557	#ifdef CONFIG_QUOTA
				4558	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
				4559	for (i = 0; i < MAXQUOTAS; i++) {
				4560	if (sbi->s_qf_names[i] &&
				4561	old_opts.s_qf_names[i] != sbi->s_qf_names[i])
				4562	kfree(sbi->s_qf_names[i]);
				4563	sbi->s_qf_names[i] = old_opts.s_qf_names[i];
				4564	}
				4565	#endif
				4566	unlock_super(sb);
				4567	kfree(orig_data);
				4568	return err;
				4569	}
				4570
				4571	static int ext4_statfs(struct dentry dentry, struct kstatfs buf)
				4572	{
				4573	struct super_block *sb = dentry->d_sb;
				4574	struct ext4_sb_info *sbi = EXT4_SB(sb);
				4575	struct ext4_super_block *es = sbi->s_es;
				4576	ext4_fsblk_t overhead = 0;
				4577	u64 fsid;
				4578	s64 bfree;
				4579
				4580	if (!test_opt(sb, MINIX_DF))
				4581	overhead = sbi->s_overhead;
				4582
				4583	buf->f_type = EXT4_SUPER_MAGIC;
				4584	buf->f_bsize = sb->s_blocksize;
				4585	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead);
				4586	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
				4587	percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
				4588	/* prevent underflow in case that few free space is available */
				4589	buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
				4590	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
				4591	if (buf->f_bfree < ext4_r_blocks_count(es))
				4592	buf->f_bavail = 0;
				4593	buf->f_files = le32_to_cpu(es->s_inodes_count);
				4594	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
				4595	buf->f_namelen = EXT4_NAME_LEN;
				4596	fsid = le64_to_cpup((void *)es->s_uuid) ^
				4597	le64_to_cpup((void *)es->s_uuid + sizeof(u64));
				4598	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
				4599	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
				4600
				4601	return 0;
				4602	}
				4603
				4604	/* Helper function for writing quotas on sync - we need to start transaction
				4605	* before quota file is locked for write. Otherwise the are possible deadlocks:
				4606	* Process 1 Process 2
				4607	* ext4_create() quota_sync()
				4608	* jbd2_journal_start() write_dquot()
				4609	* dquot_initialize() down(dqio_mutex)
				4610	* down(dqio_mutex) jbd2_journal_start()
				4611	*
				4612	*/
				4613
				4614	#ifdef CONFIG_QUOTA
				4615
				4616	static inline struct inode dquot_to_inode(struct dquot dquot)
				4617	{
				4618	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
				4619	}
				4620
				4621	static int ext4_write_dquot(struct dquot *dquot)
				4622	{
				4623	int ret, err;
				4624	handle_t *handle;
				4625	struct inode *inode;
				4626
				4627	inode = dquot_to_inode(dquot);
				4628	handle = ext4_journal_start(inode,
				4629	EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
				4630	if (IS_ERR(handle))
				4631	return PTR_ERR(handle);
				4632	ret = dquot_commit(dquot);
				4633	err = ext4_journal_stop(handle);
				4634	if (!ret)
				4635	ret = err;
				4636	return ret;
				4637	}
				4638
				4639	static int ext4_acquire_dquot(struct dquot *dquot)
				4640	{
				4641	int ret, err;
				4642	handle_t *handle;
				4643
				4644	handle = ext4_journal_start(dquot_to_inode(dquot),
				4645	EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
				4646	if (IS_ERR(handle))
				4647	return PTR_ERR(handle);
				4648	ret = dquot_acquire(dquot);
				4649	err = ext4_journal_stop(handle);
				4650	if (!ret)
				4651	ret = err;
				4652	return ret;
				4653	}
				4654
				4655	static int ext4_release_dquot(struct dquot *dquot)
				4656	{
				4657	int ret, err;
				4658	handle_t *handle;
				4659
				4660	handle = ext4_journal_start(dquot_to_inode(dquot),
				4661	EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
				4662	if (IS_ERR(handle)) {
				4663	/* Release dquot anyway to avoid endless cycle in dqput() */
				4664	dquot_release(dquot);
				4665	return PTR_ERR(handle);
				4666	}
				4667	ret = dquot_release(dquot);
				4668	err = ext4_journal_stop(handle);
				4669	if (!ret)
				4670	ret = err;
				4671	return ret;
				4672	}
				4673
				4674	static int ext4_mark_dquot_dirty(struct dquot *dquot)
				4675	{
				4676	/* Are we journaling quotas? */
				4677	if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] \|\|
				4678	EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
				4679	dquot_mark_dquot_dirty(dquot);
				4680	return ext4_write_dquot(dquot);
				4681	} else {
				4682	return dquot_mark_dquot_dirty(dquot);
				4683	}
				4684	}
				4685
				4686	static int ext4_write_info(struct super_block *sb, int type)
				4687	{
				4688	int ret, err;
				4689	handle_t *handle;
				4690
				4691	/* Data block + inode block */
				4692	handle = ext4_journal_start(sb->s_root->d_inode, 2);
				4693	if (IS_ERR(handle))
				4694	return PTR_ERR(handle);
				4695	ret = dquot_commit_info(sb, type);
				4696	err = ext4_journal_stop(handle);
				4697	if (!ret)
				4698	ret = err;
				4699	return ret;
				4700	}
				4701
				4702	/*
				4703	* Turn on quotas during mount time - we need to find
				4704	* the quota file and such...
				4705	*/
				4706	static int ext4_quota_on_mount(struct super_block *sb, int type)
				4707	{
				4708	return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
				4709	EXT4_SB(sb)->s_jquota_fmt, type);
				4710	}
				4711
				4712	/*
				4713	* Standard function to be called on quota_on
				4714	*/
				4715	static int ext4_quota_on(struct super_block *sb, int type, int format_id,
				4716	struct path *path)
				4717	{
				4718	int err;
				4719
				4720	if (!test_opt(sb, QUOTA))
				4721	return -EINVAL;
				4722
				4723	/* Quotafile not on the same filesystem? */
				4724	if (path->dentry->d_sb != sb)
				4725	return -EXDEV;
				4726	/* Journaling quota? */
				4727	if (EXT4_SB(sb)->s_qf_names[type]) {
				4728	/* Quotafile not in fs root? */
				4729	if (path->dentry->d_parent != sb->s_root)
				4730	ext4_msg(sb, KERN_WARNING,
				4731	"Quota file not on filesystem root. "
				4732	"Journaled quota will not work");
				4733	}
				4734
				4735	/*
				4736	* When we journal data on quota file, we have to flush journal to see
				4737	* all updates to the file when we bypass pagecache...
				4738	*/
				4739	if (EXT4_SB(sb)->s_journal &&
				4740	ext4_should_journal_data(path->dentry->d_inode)) {
				4741	/*
				4742	* We don't need to lock updates but journal_flush() could
				4743	* otherwise be livelocked...
				4744	*/
				4745	jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
				4746	err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
				4747	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
				4748	if (err)
				4749	return err;
				4750	}
				4751
				4752	return dquot_quota_on(sb, type, format_id, path);
				4753	}
				4754
				4755	static int ext4_quota_off(struct super_block *sb, int type)
				4756	{
				4757	struct inode *inode = sb_dqopt(sb)->files[type];
				4758	handle_t *handle;
				4759
				4760	/* Force all delayed allocation blocks to be allocated.
				4761	* Caller already holds s_umount sem */
				4762	if (test_opt(sb, DELALLOC))
				4763	sync_filesystem(sb);
				4764
				4765	if (!inode)
				4766	goto out;
				4767
				4768	/* Update modification times of quota files when userspace can
				4769	* start looking at them */
				4770	handle = ext4_journal_start(inode, 1);
				4771	if (IS_ERR(handle))
				4772	goto out;
				4773	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
				4774	ext4_mark_inode_dirty(handle, inode);
				4775	ext4_journal_stop(handle);
				4776
				4777	out:
				4778	return dquot_quota_off(sb, type);
				4779	}
				4780
				4781	/* Read data from quotafile - avoid pagecache and such because we cannot afford
				4782	* acquiring the locks... As quota files are never truncated and quota code
				4783	* itself serializes the operations (and no one else should touch the files)
				4784	* we don't have to be afraid of races */
				4785	static ssize_t ext4_quota_read(struct super_block sb, int type, char data,
				4786	size_t len, loff_t off)
				4787	{
				4788	struct inode *inode = sb_dqopt(sb)->files[type];
				4789	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
				4790	int err = 0;
				4791	int offset = off & (sb->s_blocksize - 1);
				4792	int tocopy;
				4793	size_t toread;
				4794	struct buffer_head *bh;
				4795	loff_t i_size = i_size_read(inode);
				4796
				4797	if (off > i_size)
				4798	return 0;
				4799	if (off+len > i_size)
				4800	len = i_size-off;
				4801	toread = len;
				4802	while (toread > 0) {
				4803	tocopy = sb->s_blocksize - offset < toread ?
				4804	sb->s_blocksize - offset : toread;
				4805	bh = ext4_bread(NULL, inode, blk, 0, &err);
				4806	if (err)
				4807	return err;
				4808	if (!bh) /* A hole? */
				4809	memset(data, 0, tocopy);
				4810	else
				4811	memcpy(data, bh->b_data+offset, tocopy);
				4812	brelse(bh);
				4813	offset = 0;
				4814	toread -= tocopy;
				4815	data += tocopy;
				4816	blk++;
				4817	}
				4818	return len;
				4819	}
				4820
				4821	/* Write to quotafile (we know the transaction is already started and has
				4822	* enough credits) */
				4823	static ssize_t ext4_quota_write(struct super_block *sb, int type,
				4824	const char *data, size_t len, loff_t off)
				4825	{
				4826	struct inode *inode = sb_dqopt(sb)->files[type];
				4827	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
				4828	int err = 0;
				4829	int offset = off & (sb->s_blocksize - 1);
				4830	struct buffer_head *bh;
				4831	handle_t *handle = journal_current_handle();
				4832
				4833	if (EXT4_SB(sb)->s_journal && !handle) {
				4834	ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
				4835	" cancelled because transaction is not started",
				4836	(unsigned long long)off, (unsigned long long)len);
				4837	return -EIO;
				4838	}
				4839	/*
				4840	* Since we account only one data block in transaction credits,
				4841	* then it is impossible to cross a block boundary.
				4842	*/
				4843	if (sb->s_blocksize - offset < len) {
				4844	ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
				4845	" cancelled because not block aligned",
				4846	(unsigned long long)off, (unsigned long long)len);
				4847	return -EIO;
				4848	}
				4849
				4850	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
				4851	bh = ext4_bread(handle, inode, blk, 1, &err);
				4852	if (!bh)
				4853	goto out;
				4854	err = ext4_journal_get_write_access(handle, bh);
				4855	if (err) {
				4856	brelse(bh);
				4857	goto out;
				4858	}
				4859	lock_buffer(bh);
				4860	memcpy(bh->b_data+offset, data, len);
				4861	flush_dcache_page(bh->b_page);
				4862	unlock_buffer(bh);
				4863	err = ext4_handle_dirty_metadata(handle, NULL, bh);
				4864	brelse(bh);
				4865	out:
				4866	if (err) {
				4867	mutex_unlock(&inode->i_mutex);
				4868	return err;
				4869	}
				4870	if (inode->i_size < off + len) {
				4871	i_size_write(inode, off + len);
				4872	EXT4_I(inode)->i_disksize = inode->i_size;
				4873	ext4_mark_inode_dirty(handle, inode);
				4874	}
				4875	mutex_unlock(&inode->i_mutex);
				4876	return len;
				4877	}
				4878
				4879	#endif
				4880
				4881	static struct dentry ext4_mount(struct file_system_type fs_type, int flags,
				4882	const char dev_name, void data)
				4883	{
				4884	return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
				4885	}
				4886
				4887	#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
				4888	static inline void register_as_ext2(void)
				4889	{
				4890	int err = register_filesystem(&ext2_fs_type);
				4891	if (err)
				4892	printk(KERN_WARNING
				4893	"EXT4-fs: Unable to register as ext2 (%d)\n", err);
				4894	}
				4895
				4896	static inline void unregister_as_ext2(void)
				4897	{
				4898	unregister_filesystem(&ext2_fs_type);
				4899	}
				4900
				4901	static inline int ext2_feature_set_ok(struct super_block *sb)
				4902	{
				4903	if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
				4904	return 0;
				4905	if (sb->s_flags & MS_RDONLY)
				4906	return 1;
				4907	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
				4908	return 0;
				4909	return 1;
				4910	}
				4911	MODULE_ALIAS("ext2");
				4912	#else
				4913	static inline void register_as_ext2(void) { }
				4914	static inline void unregister_as_ext2(void) { }
				4915	static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
				4916	#endif
				4917
				4918	#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
				4919	static inline void register_as_ext3(void)
				4920	{
				4921	int err = register_filesystem(&ext3_fs_type);
				4922	if (err)
				4923	printk(KERN_WARNING
				4924	"EXT4-fs: Unable to register as ext3 (%d)\n", err);
				4925	}
				4926
				4927	static inline void unregister_as_ext3(void)
				4928	{
				4929	unregister_filesystem(&ext3_fs_type);
				4930	}
				4931
				4932	static inline int ext3_feature_set_ok(struct super_block *sb)
				4933	{
				4934	if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
				4935	return 0;
				4936	if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
				4937	return 0;
				4938	if (sb->s_flags & MS_RDONLY)
				4939	return 1;
				4940	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
				4941	return 0;
				4942	return 1;
				4943	}
				4944	MODULE_ALIAS("ext3");
				4945	#else
				4946	static inline void register_as_ext3(void) { }
				4947	static inline void unregister_as_ext3(void) { }
				4948	static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
				4949	#endif
				4950
				4951	static struct file_system_type ext4_fs_type = {
				4952	.owner = THIS_MODULE,
				4953	.name = "ext4",
				4954	.mount = ext4_mount,
				4955	.kill_sb = kill_block_super,
				4956	.fs_flags = FS_REQUIRES_DEV,
				4957	};
				4958
				4959	static int __init ext4_init_feat_adverts(void)
				4960	{
				4961	struct ext4_features *ef;
				4962	int ret = -ENOMEM;
				4963
				4964	ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
				4965	if (!ef)
				4966	goto out;
				4967
				4968	ef->f_kobj.kset = ext4_kset;
				4969	init_completion(&ef->f_kobj_unregister);
				4970	ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
				4971	"features");
				4972	if (ret) {
				4973	kfree(ef);
				4974	goto out;
				4975	}
				4976
				4977	ext4_feat = ef;
				4978	ret = 0;
				4979	out:
				4980	return ret;
				4981	}
				4982
				4983	static void ext4_exit_feat_adverts(void)
				4984	{
				4985	kobject_put(&ext4_feat->f_kobj);
				4986	wait_for_completion(&ext4_feat->f_kobj_unregister);
				4987	kfree(ext4_feat);
				4988	}
				4989
				4990	/* Shared across all ext4 file systems */
				4991	wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
				4992	struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
				4993
				4994	static int __init ext4_init_fs(void)
				4995	{
				4996	int i, err;
				4997
				4998	ext4_li_info = NULL;
				4999	mutex_init(&ext4_li_mtx);
				5000
				5001	ext4_check_flag_values();
				5002
				5003	for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
				5004	mutex_init(&ext4__aio_mutex[i]);
				5005	init_waitqueue_head(&ext4__ioend_wq[i]);
				5006	}
				5007
				5008	err = ext4_init_pageio();
				5009	if (err)
				5010	return err;
				5011	err = ext4_init_system_zone();
				5012	if (err)
				5013	goto out6;
				5014	ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
				5015	if (!ext4_kset)
				5016	goto out5;
				5017	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
				5018
				5019	err = ext4_init_feat_adverts();
				5020	if (err)
				5021	goto out4;
				5022
				5023	err = ext4_init_mballoc();
				5024	if (err)
				5025	goto out3;
				5026
				5027	err = ext4_init_xattr();
				5028	if (err)
				5029	goto out2;
				5030	err = init_inodecache();
				5031	if (err)
				5032	goto out1;
				5033	register_as_ext3();
				5034	register_as_ext2();
				5035	err = register_filesystem(&ext4_fs_type);
				5036	if (err)
				5037	goto out;
				5038
				5039	return 0;
				5040	out:
				5041	unregister_as_ext2();
				5042	unregister_as_ext3();
				5043	destroy_inodecache();
				5044	out1:
				5045	ext4_exit_xattr();
				5046	out2:
				5047	ext4_exit_mballoc();
				5048	out3:
				5049	ext4_exit_feat_adverts();
				5050	out4:
				5051	if (ext4_proc_root)
				5052	remove_proc_entry("fs/ext4", NULL);
				5053	kset_unregister(ext4_kset);
				5054	out5:
				5055	ext4_exit_system_zone();
				5056	out6:
				5057	ext4_exit_pageio();
				5058	return err;
				5059	}
				5060
				5061	static void __exit ext4_exit_fs(void)
				5062	{
				5063	ext4_destroy_lazyinit_thread();
				5064	unregister_as_ext2();
				5065	unregister_as_ext3();
				5066	unregister_filesystem(&ext4_fs_type);
				5067	destroy_inodecache();
				5068	ext4_exit_xattr();
				5069	ext4_exit_mballoc();
				5070	ext4_exit_feat_adverts();
				5071	remove_proc_entry("fs/ext4", NULL);
				5072	kset_unregister(ext4_kset);
				5073	ext4_exit_system_zone();
				5074	ext4_exit_pageio();
				5075	}
				5076
				5077	MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
				5078	MODULE_DESCRIPTION("Fourth Extended Filesystem");
				5079	MODULE_LICENSE("GPL");
				5080	module_init(ext4_init_fs)
				5081	module_exit(ext4_exit_fs)