Blame - ap/os/linux/linux-3.4.x/fs/ext3/super.c - T106_DC

blob: 564f9429b3b1ecb12660375c205be068928d7f51 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* linux/fs/ext3/super.c
				3	*
				4	* Copyright (C) 1992, 1993, 1994, 1995
				5	* Remy Card (card@masi.ibp.fr)
				6	* Laboratoire MASI - Institut Blaise Pascal
				7	* Universite Pierre et Marie Curie (Paris VI)
				8	*
				9	* from
				10	*
				11	* linux/fs/minix/inode.c
				12	*
				13	* Copyright (C) 1991, 1992 Linus Torvalds
				14	*
				15	* Big-endian to little-endian byte-swapping/bitmaps by
				16	* David S. Miller (davem@caip.rutgers.edu), 1995
				17	*/
				18
				19	#include <linux/module.h>
				20	#include <linux/blkdev.h>
				21	#include <linux/parser.h>
				22	#include <linux/exportfs.h>
				23	#include <linux/statfs.h>
				24	#include <linux/random.h>
				25	#include <linux/mount.h>
				26	#include <linux/quotaops.h>
				27	#include <linux/seq_file.h>
				28	#include <linux/log2.h>
				29	#include <linux/cleancache.h>
				30
				31	#include <asm/uaccess.h>
				32
				33	#define CREATE_TRACE_POINTS
				34
				35	#include "ext3.h"
				36	#include "xattr.h"
				37	#include "acl.h"
				38	#include "namei.h"
				39
				40	#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
				41	#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
				42	#else
				43	#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA
				44	#endif
				45
				46	static int ext3_load_journal(struct super_block , struct ext3_super_block ,
				47	unsigned long journal_devnum);
				48	static int ext3_create_journal(struct super_block , struct ext3_super_block ,
				49	unsigned int);
				50	static int ext3_commit_super(struct super_block *sb,
				51	struct ext3_super_block *es,
				52	int sync);
				53	static void ext3_mark_recovery_complete(struct super_block * sb,
				54	struct ext3_super_block * es);
				55	static void ext3_clear_journal_err(struct super_block * sb,
				56	struct ext3_super_block * es);
				57	static int ext3_sync_fs(struct super_block *sb, int wait);
				58	static const char ext3_decode_error(struct super_block sb, int errno,
				59	char nbuf[16]);
				60	static int ext3_remount (struct super_block * sb, int * flags, char * data);
				61	static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
				62	static int ext3_unfreeze(struct super_block *sb);
				63	static int ext3_freeze(struct super_block *sb);
				64
				65	/*
				66	* Wrappers for journal_start/end.
				67	*
				68	* The only special thing we need to do here is to make sure that all
				69	* journal_end calls result in the superblock being marked dirty, so
				70	* that sync() will call the filesystem's write_super callback if
				71	* appropriate.
				72	*/
				73	handle_t ext3_journal_start_sb(struct super_block sb, int nblocks)
				74	{
				75	journal_t *journal;
				76
				77	if (sb->s_flags & MS_RDONLY)
				78	return ERR_PTR(-EROFS);
				79
				80	/* Special case here: if the journal has aborted behind our
				81	* backs (eg. EIO in the commit thread), then we still need to
				82	* take the FS itself readonly cleanly. */
				83	journal = EXT3_SB(sb)->s_journal;
				84	if (is_journal_aborted(journal)) {
				85	ext3_abort(sb, __func__,
				86	"Detected aborted journal");
				87	return ERR_PTR(-EROFS);
				88	}
				89
				90	return journal_start(journal, nblocks);
				91	}
				92
				93	/*
				94	* The only special thing we need to do here is to make sure that all
				95	* journal_stop calls result in the superblock being marked dirty, so
				96	* that sync() will call the filesystem's write_super callback if
				97	* appropriate.
				98	*/
				99	int __ext3_journal_stop(const char where, handle_t handle)
				100	{
				101	struct super_block *sb;
				102	int err;
				103	int rc;
				104
				105	sb = handle->h_transaction->t_journal->j_private;
				106	err = handle->h_err;
				107	rc = journal_stop(handle);
				108
				109	if (!err)
				110	err = rc;
				111	if (err)
				112	__ext3_std_error(sb, where, err);
				113	return err;
				114	}
				115
				116	void ext3_journal_abort_handle(const char caller, const char err_fn,
				117	struct buffer_head bh, handle_t handle, int err)
				118	{
				119	char nbuf[16];
				120	const char *errstr = ext3_decode_error(NULL, err, nbuf);
				121
				122	if (bh)
				123	BUFFER_TRACE(bh, "abort");
				124
				125	if (!handle->h_err)
				126	handle->h_err = err;
				127
				128	if (is_handle_aborted(handle))
				129	return;
				130
				131	printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
				132	caller, errstr, err_fn);
				133
				134	journal_abort_handle(handle);
				135	}
				136
				137	void ext3_msg(struct super_block sb, const char prefix,
				138	const char *fmt, ...)
				139	{
				140	struct va_format vaf;
				141	va_list args;
				142
				143	va_start(args, fmt);
				144
				145	vaf.fmt = fmt;
				146	vaf.va = &args;
				147
				148	printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
				149
				150	va_end(args);
				151	}
				152
				153	/* Deal with the reporting of failure conditions on a filesystem such as
				154	* inconsistencies detected or read IO failures.
				155	*
				156	* On ext2, we can store the error state of the filesystem in the
				157	* superblock. That is not possible on ext3, because we may have other
				158	* write ordering constraints on the superblock which prevent us from
				159	* writing it out straight away; and given that the journal is about to
				160	* be aborted, we can't rely on the current, or future, transactions to
				161	* write out the superblock safely.
				162	*
				163	* We'll just use the journal_abort() error code to record an error in
				164	* the journal instead. On recovery, the journal will complain about
				165	* that error until we've noted it down and cleared it.
				166	*/
				167
				168	static void ext3_handle_error(struct super_block *sb)
				169	{
				170	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				171
				172	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				173	es->s_state \|= cpu_to_le16(EXT3_ERROR_FS);
				174
				175	if (sb->s_flags & MS_RDONLY)
				176	return;
				177
				178	if (!test_opt (sb, ERRORS_CONT)) {
				179	journal_t *journal = EXT3_SB(sb)->s_journal;
				180
				181	set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
				182	if (journal)
				183	journal_abort(journal, -EIO);
				184	}
				185	if (test_opt (sb, ERRORS_RO)) {
				186	ext3_msg(sb, KERN_CRIT,
				187	"error: remounting filesystem read-only");
				188	sb->s_flags \|= MS_RDONLY;
				189	}
				190	ext3_commit_super(sb, es, 1);
				191	if (test_opt(sb, ERRORS_PANIC))
				192	panic("EXT3-fs (%s): panic forced after error\n",
				193	sb->s_id);
				194	}
				195
				196	void ext3_error(struct super_block sb, const char function,
				197	const char *fmt, ...)
				198	{
				199	struct va_format vaf;
				200	va_list args;
				201
				202	va_start(args, fmt);
				203
				204	vaf.fmt = fmt;
				205	vaf.va = &args;
				206
				207	printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
				208	sb->s_id, function, &vaf);
				209
				210	va_end(args);
				211
				212	ext3_handle_error(sb);
				213	}
				214
				215	static const char ext3_decode_error(struct super_block sb, int errno,
				216	char nbuf[16])
				217	{
				218	char *errstr = NULL;
				219
				220	switch (errno) {
				221	case -EIO:
				222	errstr = "IO failure";
				223	break;
				224	case -ENOMEM:
				225	errstr = "Out of memory";
				226	break;
				227	case -EROFS:
				228	if (!sb \|\| EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
				229	errstr = "Journal has aborted";
				230	else
				231	errstr = "Readonly filesystem";
				232	break;
				233	default:
				234	/* If the caller passed in an extra buffer for unknown
				235	* errors, textualise them now. Else we just return
				236	* NULL. */
				237	if (nbuf) {
				238	/* Check for truncated error codes... */
				239	if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				240	errstr = nbuf;
				241	}
				242	break;
				243	}
				244
				245	return errstr;
				246	}
				247
				248	/* __ext3_std_error decodes expected errors from journaling functions
				249	* automatically and invokes the appropriate error response. */
				250
				251	void __ext3_std_error (struct super_block * sb, const char * function,
				252	int errno)
				253	{
				254	char nbuf[16];
				255	const char *errstr;
				256
				257	/* Special case: if the error is EROFS, and we're not already
				258	* inside a transaction, then there's really no point in logging
				259	* an error. */
				260	if (errno == -EROFS && journal_current_handle() == NULL &&
				261	(sb->s_flags & MS_RDONLY))
				262	return;
				263
				264	errstr = ext3_decode_error(sb, errno, nbuf);
				265	ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
				266
				267	ext3_handle_error(sb);
				268	}
				269
				270	/*
				271	* ext3_abort is a much stronger failure handler than ext3_error. The
				272	* abort function may be used to deal with unrecoverable failures such
				273	* as journal IO errors or ENOMEM at a critical moment in log management.
				274	*
				275	* We unconditionally force the filesystem into an ABORT\|READONLY state,
				276	* unless the error response on the fs has been set to panic in which
				277	* case we take the easy way out and panic immediately.
				278	*/
				279
				280	void ext3_abort(struct super_block sb, const char function,
				281	const char *fmt, ...)
				282	{
				283	struct va_format vaf;
				284	va_list args;
				285
				286	va_start(args, fmt);
				287
				288	vaf.fmt = fmt;
				289	vaf.va = &args;
				290
				291	printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
				292	sb->s_id, function, &vaf);
				293
				294	va_end(args);
				295
				296	if (test_opt(sb, ERRORS_PANIC))
				297	panic("EXT3-fs: panic from previous error\n");
				298
				299	if (sb->s_flags & MS_RDONLY)
				300	return;
				301
				302	ext3_msg(sb, KERN_CRIT,
				303	"error: remounting filesystem read-only");
				304	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				305	sb->s_flags \|= MS_RDONLY;
				306	set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
				307	if (EXT3_SB(sb)->s_journal)
				308	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
				309	}
				310
				311	void ext3_warning(struct super_block sb, const char function,
				312	const char *fmt, ...)
				313	{
				314	struct va_format vaf;
				315	va_list args;
				316
				317	va_start(args, fmt);
				318
				319	vaf.fmt = fmt;
				320	vaf.va = &args;
				321
				322	printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
				323	sb->s_id, function, &vaf);
				324
				325	va_end(args);
				326	}
				327
				328	void ext3_update_dynamic_rev(struct super_block *sb)
				329	{
				330	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				331
				332	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
				333	return;
				334
				335	ext3_msg(sb, KERN_WARNING,
				336	"warning: updating to rev %d because of "
				337	"new feature flag, running e2fsck is recommended",
				338	EXT3_DYNAMIC_REV);
				339
				340	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
				341	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
				342	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
				343	/* leave es->s_feature_compat flags alone /
				344	/* es->s_uuid will be set by e2fsck if empty */
				345
				346	/*
				347	* The rest of the superblock fields should be zero, and if not it
				348	* means they are likely already in use, so leave them alone. We
				349	* can leave it up to e2fsck to clean up any inconsistencies there.
				350	*/
				351	}
				352
				353	/*
				354	* Open the external journal device
				355	*/
				356	static struct block_device ext3_blkdev_get(dev_t dev, struct super_block sb)
				357	{
				358	struct block_device *bdev;
				359	char b[BDEVNAME_SIZE];
				360
				361	bdev = blkdev_get_by_dev(dev, FMODE_READ\|FMODE_WRITE\|FMODE_EXCL, sb);
				362	if (IS_ERR(bdev))
				363	goto fail;
				364	return bdev;
				365
				366	fail:
				367	ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld",
				368	__bdevname(dev, b), PTR_ERR(bdev));
				369
				370	return NULL;
				371	}
				372
				373	/*
				374	* Release the journal device
				375	*/
				376	static int ext3_blkdev_put(struct block_device *bdev)
				377	{
				378	return blkdev_put(bdev, FMODE_READ\|FMODE_WRITE\|FMODE_EXCL);
				379	}
				380
				381	static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
				382	{
				383	struct block_device *bdev;
				384	int ret = -ENODEV;
				385
				386	bdev = sbi->journal_bdev;
				387	if (bdev) {
				388	ret = ext3_blkdev_put(bdev);
				389	sbi->journal_bdev = NULL;
				390	}
				391	return ret;
				392	}
				393
				394	static inline struct inode orphan_list_entry(struct list_head l)
				395	{
				396	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
				397	}
				398
				399	static void dump_orphan_list(struct super_block sb, struct ext3_sb_info sbi)
				400	{
				401	struct list_head *l;
				402
				403	ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
				404	le32_to_cpu(sbi->s_es->s_last_orphan));
				405
				406	ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
				407	list_for_each(l, &sbi->s_orphan) {
				408	struct inode *inode = orphan_list_entry(l);
				409	ext3_msg(sb, KERN_ERR, " "
				410	"inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
				411	inode->i_sb->s_id, inode->i_ino, inode,
				412	inode->i_mode, inode->i_nlink,
				413	NEXT_ORPHAN(inode));
				414	}
				415	}
				416
				417	static void ext3_put_super (struct super_block * sb)
				418	{
				419	struct ext3_sb_info *sbi = EXT3_SB(sb);
				420	struct ext3_super_block *es = sbi->s_es;
				421	int i, err;
				422
				423	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED \| DQUOT_LIMITS_ENABLED);
				424	ext3_xattr_put_super(sb);
				425	err = journal_destroy(sbi->s_journal);
				426	sbi->s_journal = NULL;
				427	if (err < 0)
				428	ext3_abort(sb, __func__, "Couldn't clean up the journal");
				429
				430	if (!(sb->s_flags & MS_RDONLY)) {
				431	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				432	es->s_state = cpu_to_le16(sbi->s_mount_state);
				433	BUFFER_TRACE(sbi->s_sbh, "marking dirty");
				434	mark_buffer_dirty(sbi->s_sbh);
				435	ext3_commit_super(sb, es, 1);
				436	}
				437
				438	for (i = 0; i < sbi->s_gdb_count; i++)
				439	brelse(sbi->s_group_desc[i]);
				440	kfree(sbi->s_group_desc);
				441	percpu_counter_destroy(&sbi->s_freeblocks_counter);
				442	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				443	percpu_counter_destroy(&sbi->s_dirs_counter);
				444	brelse(sbi->s_sbh);
				445	#ifdef CONFIG_QUOTA
				446	for (i = 0; i < MAXQUOTAS; i++)
				447	kfree(sbi->s_qf_names[i]);
				448	#endif
				449
				450	/* Debugging code just in case the in-memory inode orphan list
				451	* isn't empty. The on-disk one can be non-empty if we've
				452	* detected an error and taken the fs readonly, but the
				453	* in-memory list had better be clean by this point. */
				454	if (!list_empty(&sbi->s_orphan))
				455	dump_orphan_list(sb, sbi);
				456	J_ASSERT(list_empty(&sbi->s_orphan));
				457
				458	invalidate_bdev(sb->s_bdev);
				459	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
				460	/*
				461	* Invalidate the journal device's buffers. We don't want them
				462	* floating about in memory - the physical journal device may
				463	* hotswapped, and it breaks the `ro-after' testing code.
				464	*/
				465	sync_blockdev(sbi->journal_bdev);
				466	invalidate_bdev(sbi->journal_bdev);
				467	ext3_blkdev_remove(sbi);
				468	}
				469	sb->s_fs_info = NULL;
				470	kfree(sbi->s_blockgroup_lock);
				471	kfree(sbi);
				472	}
				473
				474	static struct kmem_cache *ext3_inode_cachep;
				475
				476	/*
				477	* Called inside transaction, so use GFP_NOFS
				478	*/
				479	static struct inode ext3_alloc_inode(struct super_block sb)
				480	{
				481	struct ext3_inode_info *ei;
				482
				483	ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
				484	if (!ei)
				485	return NULL;
				486	ei->i_block_alloc_info = NULL;
				487	ei->vfs_inode.i_version = 1;
				488	atomic_set(&ei->i_datasync_tid, 0);
				489	atomic_set(&ei->i_sync_tid, 0);
				490	return &ei->vfs_inode;
				491	}
				492
				493	static int ext3_drop_inode(struct inode *inode)
				494	{
				495	int drop = generic_drop_inode(inode);
				496
				497	trace_ext3_drop_inode(inode, drop);
				498	return drop;
				499	}
				500
				501	static void ext3_i_callback(struct rcu_head *head)
				502	{
				503	struct inode *inode = container_of(head, struct inode, i_rcu);
				504	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
				505	}
				506
				507	static void ext3_destroy_inode(struct inode *inode)
				508	{
				509	if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
				510	printk("EXT3 Inode %p: orphan list check failed!\n",
				511	EXT3_I(inode));
				512	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
				513	EXT3_I(inode), sizeof(struct ext3_inode_info),
				514	false);
				515	dump_stack();
				516	}
				517	call_rcu(&inode->i_rcu, ext3_i_callback);
				518	}
				519
				520	static void init_once(void *foo)
				521	{
				522	struct ext3_inode_info ei = (struct ext3_inode_info ) foo;
				523
				524	INIT_LIST_HEAD(&ei->i_orphan);
				525	#ifdef CONFIG_EXT3_FS_XATTR
				526	init_rwsem(&ei->xattr_sem);
				527	#endif
				528	mutex_init(&ei->truncate_mutex);
				529	inode_init_once(&ei->vfs_inode);
				530	}
				531
				532	static int init_inodecache(void)
				533	{
				534	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
				535	sizeof(struct ext3_inode_info),
				536	0, (SLAB_RECLAIM_ACCOUNT\|
				537	SLAB_MEM_SPREAD),
				538	init_once);
				539	if (ext3_inode_cachep == NULL)
				540	return -ENOMEM;
				541	return 0;
				542	}
				543
				544	static void destroy_inodecache(void)
				545	{
				546	kmem_cache_destroy(ext3_inode_cachep);
				547	}
				548
				549	static inline void ext3_show_quota_options(struct seq_file seq, struct super_block sb)
				550	{
				551	#if defined(CONFIG_QUOTA)
				552	struct ext3_sb_info *sbi = EXT3_SB(sb);
				553
				554	if (sbi->s_jquota_fmt) {
				555	char *fmtname = "";
				556
				557	switch (sbi->s_jquota_fmt) {
				558	case QFMT_VFS_OLD:
				559	fmtname = "vfsold";
				560	break;
				561	case QFMT_VFS_V0:
				562	fmtname = "vfsv0";
				563	break;
				564	case QFMT_VFS_V1:
				565	fmtname = "vfsv1";
				566	break;
				567	}
				568	seq_printf(seq, ",jqfmt=%s", fmtname);
				569	}
				570
				571	if (sbi->s_qf_names[USRQUOTA])
				572	seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
				573
				574	if (sbi->s_qf_names[GRPQUOTA])
				575	seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
				576
				577	if (test_opt(sb, USRQUOTA))
				578	seq_puts(seq, ",usrquota");
				579
				580	if (test_opt(sb, GRPQUOTA))
				581	seq_puts(seq, ",grpquota");
				582	#endif
				583	}
				584
				585	static char *data_mode_string(unsigned long mode)
				586	{
				587	switch (mode) {
				588	case EXT3_MOUNT_JOURNAL_DATA:
				589	return "journal";
				590	case EXT3_MOUNT_ORDERED_DATA:
				591	return "ordered";
				592	case EXT3_MOUNT_WRITEBACK_DATA:
				593	return "writeback";
				594	}
				595	return "unknown";
				596	}
				597
				598	/*
				599	* Show an option if
				600	* - it's set to a non-default value OR
				601	* - if the per-sb default is different from the global default
				602	*/
				603	static int ext3_show_options(struct seq_file seq, struct dentry root)
				604	{
				605	struct super_block *sb = root->d_sb;
				606	struct ext3_sb_info *sbi = EXT3_SB(sb);
				607	struct ext3_super_block *es = sbi->s_es;
				608	unsigned long def_mount_opts;
				609
				610	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
				611
				612	if (sbi->s_sb_block != 1)
				613	seq_printf(seq, ",sb=%lu", sbi->s_sb_block);
				614	if (test_opt(sb, MINIX_DF))
				615	seq_puts(seq, ",minixdf");
				616	if (test_opt(sb, GRPID))
				617	seq_puts(seq, ",grpid");
				618	if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
				619	seq_puts(seq, ",nogrpid");
				620	if (sbi->s_resuid != EXT3_DEF_RESUID \|\|
				621	le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
				622	seq_printf(seq, ",resuid=%u", sbi->s_resuid);
				623	}
				624	if (sbi->s_resgid != EXT3_DEF_RESGID \|\|
				625	le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
				626	seq_printf(seq, ",resgid=%u", sbi->s_resgid);
				627	}
				628	if (test_opt(sb, ERRORS_RO)) {
				629	int def_errors = le16_to_cpu(es->s_errors);
				630
				631	if (def_errors == EXT3_ERRORS_PANIC \|\|
				632	def_errors == EXT3_ERRORS_CONTINUE) {
				633	seq_puts(seq, ",errors=remount-ro");
				634	}
				635	}
				636	if (test_opt(sb, ERRORS_CONT))
				637	seq_puts(seq, ",errors=continue");
				638	if (test_opt(sb, ERRORS_PANIC))
				639	seq_puts(seq, ",errors=panic");
				640	if (test_opt(sb, NO_UID32))
				641	seq_puts(seq, ",nouid32");
				642	if (test_opt(sb, DEBUG))
				643	seq_puts(seq, ",debug");
				644	#ifdef CONFIG_EXT3_FS_XATTR
				645	if (test_opt(sb, XATTR_USER))
				646	seq_puts(seq, ",user_xattr");
				647	if (!test_opt(sb, XATTR_USER) &&
				648	(def_mount_opts & EXT3_DEFM_XATTR_USER)) {
				649	seq_puts(seq, ",nouser_xattr");
				650	}
				651	#endif
				652	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				653	if (test_opt(sb, POSIX_ACL))
				654	seq_puts(seq, ",acl");
				655	if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL))
				656	seq_puts(seq, ",noacl");
				657	#endif
				658	if (!test_opt(sb, RESERVATION))
				659	seq_puts(seq, ",noreservation");
				660	if (sbi->s_commit_interval) {
				661	seq_printf(seq, ",commit=%u",
				662	(unsigned) (sbi->s_commit_interval / HZ));
				663	}
				664
				665	/*
				666	* Always display barrier state so it's clear what the status is.
				667	*/
				668	seq_puts(seq, ",barrier=");
				669	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
				670	seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
				671	if (test_opt(sb, DATA_ERR_ABORT))
				672	seq_puts(seq, ",data_err=abort");
				673
				674	if (test_opt(sb, NOLOAD))
				675	seq_puts(seq, ",norecovery");
				676
				677	ext3_show_quota_options(seq, sb);
				678
				679	return 0;
				680	}
				681
				682
				683	static struct inode ext3_nfs_get_inode(struct super_block sb,
				684	u64 ino, u32 generation)
				685	{
				686	struct inode *inode;
				687
				688	if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
				689	return ERR_PTR(-ESTALE);
				690	if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
				691	return ERR_PTR(-ESTALE);
				692
				693	/* iget isn't really right if the inode is currently unallocated!!
				694	*
				695	* ext3_read_inode will return a bad_inode if the inode had been
				696	* deleted, so we should be safe.
				697	*
				698	* Currently we don't know the generation for parent directory, so
				699	* a generation of 0 means "accept any"
				700	*/
				701	inode = ext3_iget(sb, ino);
				702	if (IS_ERR(inode))
				703	return ERR_CAST(inode);
				704	if (generation && inode->i_generation != generation) {
				705	iput(inode);
				706	return ERR_PTR(-ESTALE);
				707	}
				708
				709	return inode;
				710	}
				711
				712	static struct dentry ext3_fh_to_dentry(struct super_block sb, struct fid *fid,
				713	int fh_len, int fh_type)
				714	{
				715	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
				716	ext3_nfs_get_inode);
				717	}
				718
				719	static struct dentry ext3_fh_to_parent(struct super_block sb, struct fid *fid,
				720	int fh_len, int fh_type)
				721	{
				722	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
				723	ext3_nfs_get_inode);
				724	}
				725
				726	/*
				727	* Try to release metadata pages (indirect blocks, directories) which are
				728	* mapped via the block device. Since these pages could have journal heads
				729	* which would prevent try_to_free_buffers() from freeing them, we must use
				730	* jbd layer's try_to_free_buffers() function to release them.
				731	*/
				732	static int bdev_try_to_free_page(struct super_block sb, struct page page,
				733	gfp_t wait)
				734	{
				735	journal_t *journal = EXT3_SB(sb)->s_journal;
				736
				737	WARN_ON(PageChecked(page));
				738	if (!page_has_buffers(page))
				739	return 0;
				740	if (journal)
				741	return journal_try_to_free_buffers(journal, page,
				742	wait & ~__GFP_WAIT);
				743	return try_to_free_buffers(page);
				744	}
				745
				746	#ifdef CONFIG_QUOTA
				747	#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
				748	#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
				749
				750	static int ext3_write_dquot(struct dquot *dquot);
				751	static int ext3_acquire_dquot(struct dquot *dquot);
				752	static int ext3_release_dquot(struct dquot *dquot);
				753	static int ext3_mark_dquot_dirty(struct dquot *dquot);
				754	static int ext3_write_info(struct super_block *sb, int type);
				755	static int ext3_quota_on(struct super_block *sb, int type, int format_id,
				756	struct path *path);
				757	static int ext3_quota_on_mount(struct super_block *sb, int type);
				758	static ssize_t ext3_quota_read(struct super_block sb, int type, char data,
				759	size_t len, loff_t off);
				760	static ssize_t ext3_quota_write(struct super_block *sb, int type,
				761	const char *data, size_t len, loff_t off);
				762
				763	static const struct dquot_operations ext3_quota_operations = {
				764	.write_dquot = ext3_write_dquot,
				765	.acquire_dquot = ext3_acquire_dquot,
				766	.release_dquot = ext3_release_dquot,
				767	.mark_dirty = ext3_mark_dquot_dirty,
				768	.write_info = ext3_write_info,
				769	.alloc_dquot = dquot_alloc,
				770	.destroy_dquot = dquot_destroy,
				771	};
				772
				773	static const struct quotactl_ops ext3_qctl_operations = {
				774	.quota_on = ext3_quota_on,
				775	.quota_off = dquot_quota_off,
				776	.quota_sync = dquot_quota_sync,
				777	.get_info = dquot_get_dqinfo,
				778	.set_info = dquot_set_dqinfo,
				779	.get_dqblk = dquot_get_dqblk,
				780	.set_dqblk = dquot_set_dqblk
				781	};
				782	#endif
				783
				784	static const struct super_operations ext3_sops = {
				785	.alloc_inode = ext3_alloc_inode,
				786	.destroy_inode = ext3_destroy_inode,
				787	.write_inode = ext3_write_inode,
				788	.dirty_inode = ext3_dirty_inode,
				789	.drop_inode = ext3_drop_inode,
				790	.evict_inode = ext3_evict_inode,
				791	.put_super = ext3_put_super,
				792	.sync_fs = ext3_sync_fs,
				793	.freeze_fs = ext3_freeze,
				794	.unfreeze_fs = ext3_unfreeze,
				795	.statfs = ext3_statfs,
				796	.remount_fs = ext3_remount,
				797	.show_options = ext3_show_options,
				798	#ifdef CONFIG_QUOTA
				799	.quota_read = ext3_quota_read,
				800	.quota_write = ext3_quota_write,
				801	#endif
				802	.bdev_try_to_free_page = bdev_try_to_free_page,
				803	};
				804
				805	static const struct export_operations ext3_export_ops = {
				806	.fh_to_dentry = ext3_fh_to_dentry,
				807	.fh_to_parent = ext3_fh_to_parent,
				808	.get_parent = ext3_get_parent,
				809	};
				810
				811	enum {
				812	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
				813	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
				814	Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
				815	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
				816	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
				817	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
				818	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
				819	Opt_data_err_abort, Opt_data_err_ignore,
				820	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
				821	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
				822	Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
				823	Opt_resize, Opt_usrquota, Opt_grpquota
				824	};
				825
				826	static const match_table_t tokens = {
				827	{Opt_bsd_df, "bsddf"},
				828	{Opt_minix_df, "minixdf"},
				829	{Opt_grpid, "grpid"},
				830	{Opt_grpid, "bsdgroups"},
				831	{Opt_nogrpid, "nogrpid"},
				832	{Opt_nogrpid, "sysvgroups"},
				833	{Opt_resgid, "resgid=%u"},
				834	{Opt_resuid, "resuid=%u"},
				835	{Opt_sb, "sb=%u"},
				836	{Opt_err_cont, "errors=continue"},
				837	{Opt_err_panic, "errors=panic"},
				838	{Opt_err_ro, "errors=remount-ro"},
				839	{Opt_nouid32, "nouid32"},
				840	{Opt_nocheck, "nocheck"},
				841	{Opt_nocheck, "check=none"},
				842	{Opt_debug, "debug"},
				843	{Opt_oldalloc, "oldalloc"},
				844	{Opt_orlov, "orlov"},
				845	{Opt_user_xattr, "user_xattr"},
				846	{Opt_nouser_xattr, "nouser_xattr"},
				847	{Opt_acl, "acl"},
				848	{Opt_noacl, "noacl"},
				849	{Opt_reservation, "reservation"},
				850	{Opt_noreservation, "noreservation"},
				851	{Opt_noload, "noload"},
				852	{Opt_noload, "norecovery"},
				853	{Opt_nobh, "nobh"},
				854	{Opt_bh, "bh"},
				855	{Opt_commit, "commit=%u"},
				856	{Opt_journal_update, "journal=update"},
				857	{Opt_journal_inum, "journal=%u"},
				858	{Opt_journal_dev, "journal_dev=%u"},
				859	{Opt_abort, "abort"},
				860	{Opt_data_journal, "data=journal"},
				861	{Opt_data_ordered, "data=ordered"},
				862	{Opt_data_writeback, "data=writeback"},
				863	{Opt_data_err_abort, "data_err=abort"},
				864	{Opt_data_err_ignore, "data_err=ignore"},
				865	{Opt_offusrjquota, "usrjquota="},
				866	{Opt_usrjquota, "usrjquota=%s"},
				867	{Opt_offgrpjquota, "grpjquota="},
				868	{Opt_grpjquota, "grpjquota=%s"},
				869	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
				870	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
				871	{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
				872	{Opt_grpquota, "grpquota"},
				873	{Opt_noquota, "noquota"},
				874	{Opt_quota, "quota"},
				875	{Opt_usrquota, "usrquota"},
				876	{Opt_barrier, "barrier=%u"},
				877	{Opt_barrier, "barrier"},
				878	{Opt_nobarrier, "nobarrier"},
				879	{Opt_resize, "resize"},
				880	{Opt_err, NULL},
				881	};
				882
				883	static ext3_fsblk_t get_sb_block(void *data, struct super_block sb)
				884	{
				885	ext3_fsblk_t sb_block;
				886	char options = (char ) *data;
				887
				888	if (!options \|\| strncmp(options, "sb=", 3) != 0)
				889	return 1; /* Default location */
				890	options += 3;
				891	/todo: use simple_strtoll with >32bit ext3 /
				892	sb_block = simple_strtoul(options, &options, 0);
				893	if (options && options != ',') {
				894	ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s",
				895	(char ) data);
				896	return 1;
				897	}
				898	if (*options == ',')
				899	options++;
				900	data = (void ) options;
				901	return sb_block;
				902	}
				903
				904	#ifdef CONFIG_QUOTA
				905	static int set_qf_name(struct super_block sb, int qtype, substring_t args)
				906	{
				907	struct ext3_sb_info *sbi = EXT3_SB(sb);
				908	char *qname;
				909
				910	if (sb_any_quota_loaded(sb) &&
				911	!sbi->s_qf_names[qtype]) {
				912	ext3_msg(sb, KERN_ERR,
				913	"Cannot change journaled "
				914	"quota options when quota turned on");
				915	return 0;
				916	}
				917	qname = match_strdup(args);
				918	if (!qname) {
				919	ext3_msg(sb, KERN_ERR,
				920	"Not enough memory for storing quotafile name");
				921	return 0;
				922	}
				923	if (sbi->s_qf_names[qtype] &&
				924	strcmp(sbi->s_qf_names[qtype], qname)) {
				925	ext3_msg(sb, KERN_ERR,
				926	"%s quota file already specified", QTYPE2NAME(qtype));
				927	kfree(qname);
				928	return 0;
				929	}
				930	sbi->s_qf_names[qtype] = qname;
				931	if (strchr(sbi->s_qf_names[qtype], '/')) {
				932	ext3_msg(sb, KERN_ERR,
				933	"quotafile must be on filesystem root");
				934	kfree(sbi->s_qf_names[qtype]);
				935	sbi->s_qf_names[qtype] = NULL;
				936	return 0;
				937	}
				938	set_opt(sbi->s_mount_opt, QUOTA);
				939	return 1;
				940	}
				941
				942	static int clear_qf_name(struct super_block *sb, int qtype) {
				943
				944	struct ext3_sb_info *sbi = EXT3_SB(sb);
				945
				946	if (sb_any_quota_loaded(sb) &&
				947	sbi->s_qf_names[qtype]) {
				948	ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options"
				949	" when quota turned on");
				950	return 0;
				951	}
				952	/*
				953	* The space will be released later when all options are confirmed
				954	* to be correct
				955	*/
				956	sbi->s_qf_names[qtype] = NULL;
				957	return 1;
				958	}
				959	#endif
				960
				961	static int parse_options (char options, struct super_block sb,
				962	unsigned int inum, unsigned long journal_devnum,
				963	ext3_fsblk_t *n_blocks_count, int is_remount)
				964	{
				965	struct ext3_sb_info *sbi = EXT3_SB(sb);
				966	char * p;
				967	substring_t args[MAX_OPT_ARGS];
				968	int data_opt = 0;
				969	int option;
				970	#ifdef CONFIG_QUOTA
				971	int qfmt;
				972	#endif
				973
				974	if (!options)
				975	return 1;
				976
				977	while ((p = strsep (&options, ",")) != NULL) {
				978	int token;
				979	if (!*p)
				980	continue;
				981	/*
				982	* Initialize args struct so we know whether arg was
				983	* found; some options take optional arguments.
				984	*/
				985	args[0].to = args[0].from = 0;
				986	token = match_token(p, tokens, args);
				987	switch (token) {
				988	case Opt_bsd_df:
				989	clear_opt (sbi->s_mount_opt, MINIX_DF);
				990	break;
				991	case Opt_minix_df:
				992	set_opt (sbi->s_mount_opt, MINIX_DF);
				993	break;
				994	case Opt_grpid:
				995	set_opt (sbi->s_mount_opt, GRPID);
				996	break;
				997	case Opt_nogrpid:
				998	clear_opt (sbi->s_mount_opt, GRPID);
				999	break;
				1000	case Opt_resuid:
				1001	if (match_int(&args[0], &option))
				1002	return 0;
				1003	sbi->s_resuid = option;
				1004	break;
				1005	case Opt_resgid:
				1006	if (match_int(&args[0], &option))
				1007	return 0;
				1008	sbi->s_resgid = option;
				1009	break;
				1010	case Opt_sb:
				1011	/* handled by get_sb_block() instead of here */
				1012	/* sb_block = match_int(&args[0]); /
				1013	break;
				1014	case Opt_err_panic:
				1015	clear_opt (sbi->s_mount_opt, ERRORS_CONT);
				1016	clear_opt (sbi->s_mount_opt, ERRORS_RO);
				1017	set_opt (sbi->s_mount_opt, ERRORS_PANIC);
				1018	break;
				1019	case Opt_err_ro:
				1020	clear_opt (sbi->s_mount_opt, ERRORS_CONT);
				1021	clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
				1022	set_opt (sbi->s_mount_opt, ERRORS_RO);
				1023	break;
				1024	case Opt_err_cont:
				1025	clear_opt (sbi->s_mount_opt, ERRORS_RO);
				1026	clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
				1027	set_opt (sbi->s_mount_opt, ERRORS_CONT);
				1028	break;
				1029	case Opt_nouid32:
				1030	set_opt (sbi->s_mount_opt, NO_UID32);
				1031	break;
				1032	case Opt_nocheck:
				1033	clear_opt (sbi->s_mount_opt, CHECK);
				1034	break;
				1035	case Opt_debug:
				1036	set_opt (sbi->s_mount_opt, DEBUG);
				1037	break;
				1038	case Opt_oldalloc:
				1039	ext3_msg(sb, KERN_WARNING,
				1040	"Ignoring deprecated oldalloc option");
				1041	break;
				1042	case Opt_orlov:
				1043	ext3_msg(sb, KERN_WARNING,
				1044	"Ignoring deprecated orlov option");
				1045	break;
				1046	#ifdef CONFIG_EXT3_FS_XATTR
				1047	case Opt_user_xattr:
				1048	set_opt (sbi->s_mount_opt, XATTR_USER);
				1049	break;
				1050	case Opt_nouser_xattr:
				1051	clear_opt (sbi->s_mount_opt, XATTR_USER);
				1052	break;
				1053	#else
				1054	case Opt_user_xattr:
				1055	case Opt_nouser_xattr:
				1056	ext3_msg(sb, KERN_INFO,
				1057	"(no)user_xattr options not supported");
				1058	break;
				1059	#endif
				1060	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				1061	case Opt_acl:
				1062	set_opt(sbi->s_mount_opt, POSIX_ACL);
				1063	break;
				1064	case Opt_noacl:
				1065	clear_opt(sbi->s_mount_opt, POSIX_ACL);
				1066	break;
				1067	#else
				1068	case Opt_acl:
				1069	case Opt_noacl:
				1070	ext3_msg(sb, KERN_INFO,
				1071	"(no)acl options not supported");
				1072	break;
				1073	#endif
				1074	case Opt_reservation:
				1075	set_opt(sbi->s_mount_opt, RESERVATION);
				1076	break;
				1077	case Opt_noreservation:
				1078	clear_opt(sbi->s_mount_opt, RESERVATION);
				1079	break;
				1080	case Opt_journal_update:
				1081	/* @@@ FIXME */
				1082	/* Eventually we will want to be able to create
				1083	a journal file here. For now, only allow the
				1084	user to specify an existing inode to be the
				1085	journal file. */
				1086	if (is_remount) {
				1087	ext3_msg(sb, KERN_ERR, "error: cannot specify "
				1088	"journal on remount");
				1089	return 0;
				1090	}
				1091	set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
				1092	break;
				1093	case Opt_journal_inum:
				1094	if (is_remount) {
				1095	ext3_msg(sb, KERN_ERR, "error: cannot specify "
				1096	"journal on remount");
				1097	return 0;
				1098	}
				1099	if (match_int(&args[0], &option))
				1100	return 0;
				1101	*inum = option;
				1102	break;
				1103	case Opt_journal_dev:
				1104	if (is_remount) {
				1105	ext3_msg(sb, KERN_ERR, "error: cannot specify "
				1106	"journal on remount");
				1107	return 0;
				1108	}
				1109	if (match_int(&args[0], &option))
				1110	return 0;
				1111	*journal_devnum = option;
				1112	break;
				1113	case Opt_noload:
				1114	set_opt (sbi->s_mount_opt, NOLOAD);
				1115	break;
				1116	case Opt_commit:
				1117	if (match_int(&args[0], &option))
				1118	return 0;
				1119	if (option < 0)
				1120	return 0;
				1121	if (option == 0)
				1122	option = JBD_DEFAULT_MAX_COMMIT_AGE;
				1123	sbi->s_commit_interval = HZ * option;
				1124	break;
				1125	case Opt_data_journal:
				1126	data_opt = EXT3_MOUNT_JOURNAL_DATA;
				1127	goto datacheck;
				1128	case Opt_data_ordered:
				1129	data_opt = EXT3_MOUNT_ORDERED_DATA;
				1130	goto datacheck;
				1131	case Opt_data_writeback:
				1132	data_opt = EXT3_MOUNT_WRITEBACK_DATA;
				1133	datacheck:
				1134	if (is_remount) {
				1135	if (test_opt(sb, DATA_FLAGS) == data_opt)
				1136	break;
				1137	ext3_msg(sb, KERN_ERR,
				1138	"error: cannot change "
				1139	"data mode on remount. The filesystem "
				1140	"is mounted in data=%s mode and you "
				1141	"try to remount it in data=%s mode.",
				1142	data_mode_string(test_opt(sb,
				1143	DATA_FLAGS)),
				1144	data_mode_string(data_opt));
				1145	return 0;
				1146	} else {
				1147	clear_opt(sbi->s_mount_opt, DATA_FLAGS);
				1148	sbi->s_mount_opt \|= data_opt;
				1149	}
				1150	break;
				1151	case Opt_data_err_abort:
				1152	set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
				1153	break;
				1154	case Opt_data_err_ignore:
				1155	clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
				1156	break;
				1157	#ifdef CONFIG_QUOTA
				1158	case Opt_usrjquota:
				1159	if (!set_qf_name(sb, USRQUOTA, &args[0]))
				1160	return 0;
				1161	break;
				1162	case Opt_grpjquota:
				1163	if (!set_qf_name(sb, GRPQUOTA, &args[0]))
				1164	return 0;
				1165	break;
				1166	case Opt_offusrjquota:
				1167	if (!clear_qf_name(sb, USRQUOTA))
				1168	return 0;
				1169	break;
				1170	case Opt_offgrpjquota:
				1171	if (!clear_qf_name(sb, GRPQUOTA))
				1172	return 0;
				1173	break;
				1174	case Opt_jqfmt_vfsold:
				1175	qfmt = QFMT_VFS_OLD;
				1176	goto set_qf_format;
				1177	case Opt_jqfmt_vfsv0:
				1178	qfmt = QFMT_VFS_V0;
				1179	goto set_qf_format;
				1180	case Opt_jqfmt_vfsv1:
				1181	qfmt = QFMT_VFS_V1;
				1182	set_qf_format:
				1183	if (sb_any_quota_loaded(sb) &&
				1184	sbi->s_jquota_fmt != qfmt) {
				1185	ext3_msg(sb, KERN_ERR, "error: cannot change "
				1186	"journaled quota options when "
				1187	"quota turned on.");
				1188	return 0;
				1189	}
				1190	sbi->s_jquota_fmt = qfmt;
				1191	break;
				1192	case Opt_quota:
				1193	case Opt_usrquota:
				1194	set_opt(sbi->s_mount_opt, QUOTA);
				1195	set_opt(sbi->s_mount_opt, USRQUOTA);
				1196	break;
				1197	case Opt_grpquota:
				1198	set_opt(sbi->s_mount_opt, QUOTA);
				1199	set_opt(sbi->s_mount_opt, GRPQUOTA);
				1200	break;
				1201	case Opt_noquota:
				1202	if (sb_any_quota_loaded(sb)) {
				1203	ext3_msg(sb, KERN_ERR, "error: cannot change "
				1204	"quota options when quota turned on.");
				1205	return 0;
				1206	}
				1207	clear_opt(sbi->s_mount_opt, QUOTA);
				1208	clear_opt(sbi->s_mount_opt, USRQUOTA);
				1209	clear_opt(sbi->s_mount_opt, GRPQUOTA);
				1210	break;
				1211	#else
				1212	case Opt_quota:
				1213	case Opt_usrquota:
				1214	case Opt_grpquota:
				1215	ext3_msg(sb, KERN_ERR,
				1216	"error: quota options not supported.");
				1217	break;
				1218	case Opt_usrjquota:
				1219	case Opt_grpjquota:
				1220	case Opt_offusrjquota:
				1221	case Opt_offgrpjquota:
				1222	case Opt_jqfmt_vfsold:
				1223	case Opt_jqfmt_vfsv0:
				1224	case Opt_jqfmt_vfsv1:
				1225	ext3_msg(sb, KERN_ERR,
				1226	"error: journaled quota options not "
				1227	"supported.");
				1228	break;
				1229	case Opt_noquota:
				1230	break;
				1231	#endif
				1232	case Opt_abort:
				1233	set_opt(sbi->s_mount_opt, ABORT);
				1234	break;
				1235	case Opt_nobarrier:
				1236	clear_opt(sbi->s_mount_opt, BARRIER);
				1237	break;
				1238	case Opt_barrier:
				1239	if (args[0].from) {
				1240	if (match_int(&args[0], &option))
				1241	return 0;
				1242	} else
				1243	option = 1; /* No argument, default to 1 */
				1244	if (option)
				1245	set_opt(sbi->s_mount_opt, BARRIER);
				1246	else
				1247	clear_opt(sbi->s_mount_opt, BARRIER);
				1248	break;
				1249	case Opt_ignore:
				1250	break;
				1251	case Opt_resize:
				1252	if (!is_remount) {
				1253	ext3_msg(sb, KERN_ERR,
				1254	"error: resize option only available "
				1255	"for remount");
				1256	return 0;
				1257	}
				1258	if (match_int(&args[0], &option) != 0)
				1259	return 0;
				1260	*n_blocks_count = option;
				1261	break;
				1262	case Opt_nobh:
				1263	ext3_msg(sb, KERN_WARNING,
				1264	"warning: ignoring deprecated nobh option");
				1265	break;
				1266	case Opt_bh:
				1267	ext3_msg(sb, KERN_WARNING,
				1268	"warning: ignoring deprecated bh option");
				1269	break;
				1270	default:
				1271	ext3_msg(sb, KERN_ERR,
				1272	"error: unrecognized mount option \"%s\" "
				1273	"or missing value", p);
				1274	return 0;
				1275	}
				1276	}
				1277	#ifdef CONFIG_QUOTA
				1278	if (sbi->s_qf_names[USRQUOTA] \|\| sbi->s_qf_names[GRPQUOTA]) {
				1279	if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
				1280	clear_opt(sbi->s_mount_opt, USRQUOTA);
				1281	if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
				1282	clear_opt(sbi->s_mount_opt, GRPQUOTA);
				1283
				1284	if (test_opt(sb, GRPQUOTA) \|\| test_opt(sb, USRQUOTA)) {
				1285	ext3_msg(sb, KERN_ERR, "error: old and new quota "
				1286	"format mixing.");
				1287	return 0;
				1288	}
				1289
				1290	if (!sbi->s_jquota_fmt) {
				1291	ext3_msg(sb, KERN_ERR, "error: journaled quota format "
				1292	"not specified.");
				1293	return 0;
				1294	}
				1295	}
				1296	#endif
				1297	return 1;
				1298	}
				1299
				1300	static int ext3_setup_super(struct super_block sb, struct ext3_super_block es,
				1301	int read_only)
				1302	{
				1303	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1304	int res = 0;
				1305
				1306	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
				1307	ext3_msg(sb, KERN_ERR,
				1308	"error: revision level too high, "
				1309	"forcing read-only mode");
				1310	res = MS_RDONLY;
				1311	}
				1312	if (read_only)
				1313	return res;
				1314	if (!(sbi->s_mount_state & EXT3_VALID_FS))
				1315	ext3_msg(sb, KERN_WARNING,
				1316	"warning: mounting unchecked fs, "
				1317	"running e2fsck is recommended");
				1318	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
				1319	ext3_msg(sb, KERN_WARNING,
				1320	"warning: mounting fs with errors, "
				1321	"running e2fsck is recommended");
				1322	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
				1323	le16_to_cpu(es->s_mnt_count) >=
				1324	le16_to_cpu(es->s_max_mnt_count))
				1325	ext3_msg(sb, KERN_WARNING,
				1326	"warning: maximal mount count reached, "
				1327	"running e2fsck is recommended");
				1328	else if (le32_to_cpu(es->s_checkinterval) &&
				1329	(le32_to_cpu(es->s_lastcheck) +
				1330	le32_to_cpu(es->s_checkinterval) <= get_seconds()))
				1331	ext3_msg(sb, KERN_WARNING,
				1332	"warning: checktime reached, "
				1333	"running e2fsck is recommended");
				1334	#if 0
				1335	/* @@@ We _will_ want to clear the valid bit if we find
				1336	inconsistencies, to force a fsck at reboot. But for
				1337	a plain journaled filesystem we can keep it set as
				1338	valid forever! :) */
				1339	es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
				1340	#endif
				1341	if (!le16_to_cpu(es->s_max_mnt_count))
				1342	es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
				1343	le16_add_cpu(&es->s_mnt_count, 1);
				1344	es->s_mtime = cpu_to_le32(get_seconds());
				1345	ext3_update_dynamic_rev(sb);
				1346	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				1347
				1348	ext3_commit_super(sb, es, 1);
				1349	if (test_opt(sb, DEBUG))
				1350	ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
				1351	"bpg=%lu, ipg=%lu, mo=%04lx]",
				1352	sb->s_blocksize,
				1353	sbi->s_groups_count,
				1354	EXT3_BLOCKS_PER_GROUP(sb),
				1355	EXT3_INODES_PER_GROUP(sb),
				1356	sbi->s_mount_opt);
				1357
				1358	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
				1359	char b[BDEVNAME_SIZE];
				1360	ext3_msg(sb, KERN_INFO, "using external journal on %s",
				1361	bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
				1362	} else {
				1363	ext3_msg(sb, KERN_INFO, "using internal journal");
				1364	}
				1365	cleancache_init_fs(sb);
				1366	return res;
				1367	}
				1368
				1369	/* Called at mount-time, super-block is locked */
				1370	static int ext3_check_descriptors(struct super_block *sb)
				1371	{
				1372	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1373	int i;
				1374
				1375	ext3_debug ("Checking group descriptors");
				1376
				1377	for (i = 0; i < sbi->s_groups_count; i++) {
				1378	struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
				1379	ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
				1380	ext3_fsblk_t last_block;
				1381
				1382	if (i == sbi->s_groups_count - 1)
				1383	last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
				1384	else
				1385	last_block = first_block +
				1386	(EXT3_BLOCKS_PER_GROUP(sb) - 1);
				1387
				1388	if (le32_to_cpu(gdp->bg_block_bitmap) < first_block \|\|
				1389	le32_to_cpu(gdp->bg_block_bitmap) > last_block)
				1390	{
				1391	ext3_error (sb, "ext3_check_descriptors",
				1392	"Block bitmap for group %d"
				1393	" not in group (block %lu)!",
				1394	i, (unsigned long)
				1395	le32_to_cpu(gdp->bg_block_bitmap));
				1396	return 0;
				1397	}
				1398	if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block \|\|
				1399	le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
				1400	{
				1401	ext3_error (sb, "ext3_check_descriptors",
				1402	"Inode bitmap for group %d"
				1403	" not in group (block %lu)!",
				1404	i, (unsigned long)
				1405	le32_to_cpu(gdp->bg_inode_bitmap));
				1406	return 0;
				1407	}
				1408	if (le32_to_cpu(gdp->bg_inode_table) < first_block \|\|
				1409	le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
				1410	last_block)
				1411	{
				1412	ext3_error (sb, "ext3_check_descriptors",
				1413	"Inode table for group %d"
				1414	" not in group (block %lu)!",
				1415	i, (unsigned long)
				1416	le32_to_cpu(gdp->bg_inode_table));
				1417	return 0;
				1418	}
				1419	}
				1420
				1421	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
				1422	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
				1423	return 1;
				1424	}
				1425
				1426
				1427	/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
				1428	* the superblock) which were deleted from all directories, but held open by
				1429	* a process at the time of a crash. We walk the list and try to delete these
				1430	* inodes at recovery time (only with a read-write filesystem).
				1431	*
				1432	* In order to keep the orphan inode chain consistent during traversal (in
				1433	* case of crash during recovery), we link each inode into the superblock
				1434	* orphan list_head and handle it the same way as an inode deletion during
				1435	* normal operation (which journals the operations for us).
				1436	*
				1437	* We only do an iget() and an iput() on each inode, which is very safe if we
				1438	* accidentally point at an in-use or already deleted inode. The worst that
				1439	* can happen in this case is that we get a "bit already cleared" message from
				1440	* ext3_free_inode(). The only reason we would point at a wrong inode is if
				1441	* e2fsck was run on this filesystem, and it must have already done the orphan
				1442	* inode cleanup for us, so we can safely abort without any further action.
				1443	*/
				1444	static void ext3_orphan_cleanup (struct super_block * sb,
				1445	struct ext3_super_block * es)
				1446	{
				1447	unsigned int s_flags = sb->s_flags;
				1448	int nr_orphans = 0, nr_truncates = 0;
				1449	#ifdef CONFIG_QUOTA
				1450	int i;
				1451	#endif
				1452	if (!es->s_last_orphan) {
				1453	jbd_debug(4, "no orphan inodes to clean up\n");
				1454	return;
				1455	}
				1456
				1457	if (bdev_read_only(sb->s_bdev)) {
				1458	ext3_msg(sb, KERN_ERR, "error: write access "
				1459	"unavailable, skipping orphan cleanup.");
				1460	return;
				1461	}
				1462
				1463	/* Check if feature set allows readwrite operations */
				1464	if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) {
				1465	ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
				1466	"unknown ROCOMPAT features");
				1467	return;
				1468	}
				1469
				1470	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
				1471	if (es->s_last_orphan)
				1472	jbd_debug(1, "Errors on filesystem, "
				1473	"clearing orphan list.\n");
				1474	es->s_last_orphan = 0;
				1475	jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
				1476	return;
				1477	}
				1478
				1479	if (s_flags & MS_RDONLY) {
				1480	ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
				1481	sb->s_flags &= ~MS_RDONLY;
				1482	}
				1483	#ifdef CONFIG_QUOTA
				1484	/* Needed for iput() to work correctly and not trash data */
				1485	sb->s_flags \|= MS_ACTIVE;
				1486	/* Turn on quotas so that they are updated correctly */
				1487	for (i = 0; i < MAXQUOTAS; i++) {
				1488	if (EXT3_SB(sb)->s_qf_names[i]) {
				1489	int ret = ext3_quota_on_mount(sb, i);
				1490	if (ret < 0)
				1491	ext3_msg(sb, KERN_ERR,
				1492	"error: cannot turn on journaled "
				1493	"quota: %d", ret);
				1494	}
				1495	}
				1496	#endif
				1497
				1498	while (es->s_last_orphan) {
				1499	struct inode *inode;
				1500
				1501	inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
				1502	if (IS_ERR(inode)) {
				1503	es->s_last_orphan = 0;
				1504	break;
				1505	}
				1506
				1507	list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
				1508	dquot_initialize(inode);
				1509	if (inode->i_nlink) {
				1510	printk(KERN_DEBUG
				1511	"%s: truncating inode %lu to %Ld bytes\n",
				1512	__func__, inode->i_ino, inode->i_size);
				1513	jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
				1514	inode->i_ino, inode->i_size);
				1515	ext3_truncate(inode);
				1516	nr_truncates++;
				1517	} else {
				1518	printk(KERN_DEBUG
				1519	"%s: deleting unreferenced inode %lu\n",
				1520	__func__, inode->i_ino);
				1521	jbd_debug(2, "deleting unreferenced inode %lu\n",
				1522	inode->i_ino);
				1523	nr_orphans++;
				1524	}
				1525	iput(inode); /* The delete magic happens here! */
				1526	}
				1527
				1528	#define PLURAL(x) (x), ((x)==1) ? "" : "s"
				1529
				1530	if (nr_orphans)
				1531	ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
				1532	PLURAL(nr_orphans));
				1533	if (nr_truncates)
				1534	ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
				1535	PLURAL(nr_truncates));
				1536	#ifdef CONFIG_QUOTA
				1537	/* Turn quotas off */
				1538	for (i = 0; i < MAXQUOTAS; i++) {
				1539	if (sb_dqopt(sb)->files[i])
				1540	dquot_quota_off(sb, i);
				1541	}
				1542	#endif
				1543	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
				1544	}
				1545
				1546	/*
				1547	* Maximal file size. There is a direct, and {,double-,triple-}indirect
				1548	* block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
				1549	* We need to be 1 filesystem block less than the 2^32 sector limit.
				1550	*/
				1551	static loff_t ext3_max_size(int bits)
				1552	{
				1553	loff_t res = EXT3_NDIR_BLOCKS;
				1554	int meta_blocks;
				1555	loff_t upper_limit;
				1556
				1557	/* This is calculated to be the largest file size for a
				1558	* dense, file such that the total number of
				1559	* sectors in the file, including data and all indirect blocks,
				1560	* does not exceed 2^32 -1
				1561	* __u32 i_blocks representing the total number of
				1562	* 512 bytes blocks of the file
				1563	*/
				1564	upper_limit = (1LL << 32) - 1;
				1565
				1566	/* total blocks in file system block size */
				1567	upper_limit >>= (bits - 9);
				1568
				1569
				1570	/* indirect blocks */
				1571	meta_blocks = 1;
				1572	/* double indirect blocks */
				1573	meta_blocks += 1 + (1LL << (bits-2));
				1574	/* tripple indirect blocks */
				1575	meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
				1576
				1577	upper_limit -= meta_blocks;
				1578	upper_limit <<= bits;
				1579
				1580	res += 1LL << (bits-2);
				1581	res += 1LL << (2*(bits-2));
				1582	res += 1LL << (3*(bits-2));
				1583	res <<= bits;
				1584	if (res > upper_limit)
				1585	res = upper_limit;
				1586
				1587	if (res > MAX_LFS_FILESIZE)
				1588	res = MAX_LFS_FILESIZE;
				1589
				1590	return res;
				1591	}
				1592
				1593	static ext3_fsblk_t descriptor_loc(struct super_block *sb,
				1594	ext3_fsblk_t logic_sb_block,
				1595	int nr)
				1596	{
				1597	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1598	unsigned long bg, first_meta_bg;
				1599	int has_super = 0;
				1600
				1601	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
				1602
				1603	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) \|\|
				1604	nr < first_meta_bg)
				1605	return (logic_sb_block + nr + 1);
				1606	bg = sbi->s_desc_per_block * nr;
				1607	if (ext3_bg_has_super(sb, bg))
				1608	has_super = 1;
				1609	return (has_super + ext3_group_first_block_no(sb, bg));
				1610	}
				1611
				1612
				1613	static int ext3_fill_super (struct super_block sb, void data, int silent)
				1614	{
				1615	struct buffer_head * bh;
				1616	struct ext3_super_block *es = NULL;
				1617	struct ext3_sb_info *sbi;
				1618	ext3_fsblk_t block;
				1619	ext3_fsblk_t sb_block = get_sb_block(&data, sb);
				1620	ext3_fsblk_t logic_sb_block;
				1621	unsigned long offset = 0;
				1622	unsigned int journal_inum = 0;
				1623	unsigned long journal_devnum = 0;
				1624	unsigned long def_mount_opts;
				1625	struct inode *root;
				1626	int blocksize;
				1627	int hblock;
				1628	int db_count;
				1629	int i;
				1630	int needs_recovery;
				1631	int ret = -EINVAL;
				1632	__le32 features;
				1633	int err;
				1634
				1635	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
				1636	if (!sbi)
				1637	return -ENOMEM;
				1638
				1639	sbi->s_blockgroup_lock =
				1640	kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
				1641	if (!sbi->s_blockgroup_lock) {
				1642	kfree(sbi);
				1643	return -ENOMEM;
				1644	}
				1645	sb->s_fs_info = sbi;
				1646	sbi->s_mount_opt = 0;
				1647	sbi->s_resuid = EXT3_DEF_RESUID;
				1648	sbi->s_resgid = EXT3_DEF_RESGID;
				1649	sbi->s_sb_block = sb_block;
				1650
				1651	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
				1652	if (!blocksize) {
				1653	ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
				1654	goto out_fail;
				1655	}
				1656
				1657	/*
				1658	* The ext3 superblock will not be buffer aligned for other than 1kB
				1659	* block sizes. We need to calculate the offset from buffer start.
				1660	*/
				1661	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
				1662	logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
				1663	offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
				1664	} else {
				1665	logic_sb_block = sb_block;
				1666	}
				1667
				1668	if (!(bh = sb_bread(sb, logic_sb_block))) {
				1669	ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
				1670	goto out_fail;
				1671	}
				1672	/*
				1673	* Note: s_es must be initialized as soon as possible because
				1674	* some ext3 macro-instructions depend on its value
				1675	*/
				1676	es = (struct ext3_super_block *) (bh->b_data + offset);
				1677	sbi->s_es = es;
				1678	sb->s_magic = le16_to_cpu(es->s_magic);
				1679	if (sb->s_magic != EXT3_SUPER_MAGIC)
				1680	goto cantfind_ext3;
				1681
				1682	/* Set defaults before we parse the mount options */
				1683	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
				1684	if (def_mount_opts & EXT3_DEFM_DEBUG)
				1685	set_opt(sbi->s_mount_opt, DEBUG);
				1686	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
				1687	set_opt(sbi->s_mount_opt, GRPID);
				1688	if (def_mount_opts & EXT3_DEFM_UID16)
				1689	set_opt(sbi->s_mount_opt, NO_UID32);
				1690	#ifdef CONFIG_EXT3_FS_XATTR
				1691	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
				1692	set_opt(sbi->s_mount_opt, XATTR_USER);
				1693	#endif
				1694	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				1695	if (def_mount_opts & EXT3_DEFM_ACL)
				1696	set_opt(sbi->s_mount_opt, POSIX_ACL);
				1697	#endif
				1698	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
				1699	set_opt(sbi->s_mount_opt, JOURNAL_DATA);
				1700	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
				1701	set_opt(sbi->s_mount_opt, ORDERED_DATA);
				1702	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
				1703	set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
				1704
				1705	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
				1706	set_opt(sbi->s_mount_opt, ERRORS_PANIC);
				1707	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE)
				1708	set_opt(sbi->s_mount_opt, ERRORS_CONT);
				1709	else
				1710	set_opt(sbi->s_mount_opt, ERRORS_RO);
				1711
				1712	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
				1713	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
				1714
				1715	/* enable barriers by default */
				1716	set_opt(sbi->s_mount_opt, BARRIER);
				1717	set_opt(sbi->s_mount_opt, RESERVATION);
				1718
				1719	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
				1720	NULL, 0))
				1721	goto failed_mount;
				1722
				1723	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				1724	(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
				1725
				1726	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
				1727	(EXT3_HAS_COMPAT_FEATURE(sb, ~0U) \|\|
				1728	EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) \|\|
				1729	EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
				1730	ext3_msg(sb, KERN_WARNING,
				1731	"warning: feature flags set on rev 0 fs, "
				1732	"running e2fsck is recommended");
				1733	/*
				1734	* Check feature flags regardless of the revision level, since we
				1735	* previously didn't change the revision level when setting the flags,
				1736	* so there is a chance incompat flags are set on a rev 0 filesystem.
				1737	*/
				1738	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
				1739	if (features) {
				1740	ext3_msg(sb, KERN_ERR,
				1741	"error: couldn't mount because of unsupported "
				1742	"optional features (%x)", le32_to_cpu(features));
				1743	goto failed_mount;
				1744	}
				1745	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
				1746	if (!(sb->s_flags & MS_RDONLY) && features) {
				1747	ext3_msg(sb, KERN_ERR,
				1748	"error: couldn't mount RDWR because of unsupported "
				1749	"optional features (%x)", le32_to_cpu(features));
				1750	goto failed_mount;
				1751	}
				1752	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
				1753
				1754	if (blocksize < EXT3_MIN_BLOCK_SIZE \|\|
				1755	blocksize > EXT3_MAX_BLOCK_SIZE) {
				1756	ext3_msg(sb, KERN_ERR,
				1757	"error: couldn't mount because of unsupported "
				1758	"filesystem blocksize %d", blocksize);
				1759	goto failed_mount;
				1760	}
				1761
				1762	hblock = bdev_logical_block_size(sb->s_bdev);
				1763	if (sb->s_blocksize != blocksize) {
				1764	/*
				1765	* Make sure the blocksize for the filesystem is larger
				1766	* than the hardware sectorsize for the machine.
				1767	*/
				1768	if (blocksize < hblock) {
				1769	ext3_msg(sb, KERN_ERR,
				1770	"error: fsblocksize %d too small for "
				1771	"hardware sectorsize %d", blocksize, hblock);
				1772	goto failed_mount;
				1773	}
				1774
				1775	brelse (bh);
				1776	if (!sb_set_blocksize(sb, blocksize)) {
				1777	ext3_msg(sb, KERN_ERR,
				1778	"error: bad blocksize %d", blocksize);
				1779	goto out_fail;
				1780	}
				1781	logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
				1782	offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
				1783	bh = sb_bread(sb, logic_sb_block);
				1784	if (!bh) {
				1785	ext3_msg(sb, KERN_ERR,
				1786	"error: can't read superblock on 2nd try");
				1787	goto failed_mount;
				1788	}
				1789	es = (struct ext3_super_block *)(bh->b_data + offset);
				1790	sbi->s_es = es;
				1791	if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
				1792	ext3_msg(sb, KERN_ERR,
				1793	"error: magic mismatch");
				1794	goto failed_mount;
				1795	}
				1796	}
				1797
				1798	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
				1799
				1800	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
				1801	sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
				1802	sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
				1803	} else {
				1804	sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
				1805	sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
				1806	if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) \|\|
				1807	(!is_power_of_2(sbi->s_inode_size)) \|\|
				1808	(sbi->s_inode_size > blocksize)) {
				1809	ext3_msg(sb, KERN_ERR,
				1810	"error: unsupported inode size: %d",
				1811	sbi->s_inode_size);
				1812	goto failed_mount;
				1813	}
				1814	}
				1815	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
				1816	le32_to_cpu(es->s_log_frag_size);
				1817	if (blocksize != sbi->s_frag_size) {
				1818	ext3_msg(sb, KERN_ERR,
				1819	"error: fragsize %lu != blocksize %u (unsupported)",
				1820	sbi->s_frag_size, blocksize);
				1821	goto failed_mount;
				1822	}
				1823	sbi->s_frags_per_block = 1;
				1824	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
				1825	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
				1826	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
				1827	if (EXT3_INODE_SIZE(sb) == 0 \|\| EXT3_INODES_PER_GROUP(sb) == 0)
				1828	goto cantfind_ext3;
				1829	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
				1830	if (sbi->s_inodes_per_block == 0)
				1831	goto cantfind_ext3;
				1832	sbi->s_itb_per_group = sbi->s_inodes_per_group /
				1833	sbi->s_inodes_per_block;
				1834	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
				1835	sbi->s_sbh = bh;
				1836	sbi->s_mount_state = le16_to_cpu(es->s_state);
				1837	sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
				1838	sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
				1839	for (i=0; i < 4; i++)
				1840	sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
				1841	sbi->s_def_hash_version = es->s_def_hash_version;
				1842	i = le32_to_cpu(es->s_flags);
				1843	if (i & EXT2_FLAGS_UNSIGNED_HASH)
				1844	sbi->s_hash_unsigned = 3;
				1845	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
				1846	#ifdef __CHAR_UNSIGNED__
				1847	es->s_flags \|= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
				1848	sbi->s_hash_unsigned = 3;
				1849	#else
				1850	es->s_flags \|= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
				1851	#endif
				1852	}
				1853
				1854	if (sbi->s_blocks_per_group > blocksize * 8) {
				1855	ext3_msg(sb, KERN_ERR,
				1856	"#blocks per group too big: %lu",
				1857	sbi->s_blocks_per_group);
				1858	goto failed_mount;
				1859	}
				1860	if (sbi->s_frags_per_group > blocksize * 8) {
				1861	ext3_msg(sb, KERN_ERR,
				1862	"error: #fragments per group too big: %lu",
				1863	sbi->s_frags_per_group);
				1864	goto failed_mount;
				1865	}
				1866	if (sbi->s_inodes_per_group > blocksize * 8) {
				1867	ext3_msg(sb, KERN_ERR,
				1868	"error: #inodes per group too big: %lu",
				1869	sbi->s_inodes_per_group);
				1870	goto failed_mount;
				1871	}
				1872
				1873	err = generic_check_addressable(sb->s_blocksize_bits,
				1874	le32_to_cpu(es->s_blocks_count));
				1875	if (err) {
				1876	ext3_msg(sb, KERN_ERR,
				1877	"error: filesystem is too large to mount safely");
				1878	if (sizeof(sector_t) < 8)
				1879	ext3_msg(sb, KERN_ERR,
				1880	"error: CONFIG_LBDAF not enabled");
				1881	ret = err;
				1882	goto failed_mount;
				1883	}
				1884
				1885	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
				1886	goto cantfind_ext3;
				1887	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
				1888	le32_to_cpu(es->s_first_data_block) - 1)
				1889	/ EXT3_BLOCKS_PER_GROUP(sb)) + 1;
				1890	db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
				1891	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
				1892	GFP_KERNEL);
				1893	if (sbi->s_group_desc == NULL) {
				1894	ext3_msg(sb, KERN_ERR,
				1895	"error: not enough memory");
				1896	ret = -ENOMEM;
				1897	goto failed_mount;
				1898	}
				1899
				1900	bgl_lock_init(sbi->s_blockgroup_lock);
				1901
				1902	for (i = 0; i < db_count; i++) {
				1903	block = descriptor_loc(sb, logic_sb_block, i);
				1904	sbi->s_group_desc[i] = sb_bread(sb, block);
				1905	if (!sbi->s_group_desc[i]) {
				1906	ext3_msg(sb, KERN_ERR,
				1907	"error: can't read group descriptor %d", i);
				1908	db_count = i;
				1909	goto failed_mount2;
				1910	}
				1911	}
				1912	if (!ext3_check_descriptors (sb)) {
				1913	ext3_msg(sb, KERN_ERR,
				1914	"error: group descriptors corrupted");
				1915	goto failed_mount2;
				1916	}
				1917	sbi->s_gdb_count = db_count;
				1918	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
				1919	spin_lock_init(&sbi->s_next_gen_lock);
				1920
				1921	/* per fileystem reservation list head & lock */
				1922	spin_lock_init(&sbi->s_rsv_window_lock);
				1923	sbi->s_rsv_window_root = RB_ROOT;
				1924	/* Add a single, static dummy reservation to the start of the
				1925	* reservation window list --- it gives us a placeholder for
				1926	* append-at-start-of-list which makes the allocation logic
				1927	* _much_ simpler. */
				1928	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
				1929	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
				1930	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
				1931	sbi->s_rsv_window_head.rsv_goal_size = 0;
				1932	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
				1933
				1934	/*
				1935	* set up enough so that it can read an inode
				1936	*/
				1937	sb->s_op = &ext3_sops;
				1938	sb->s_export_op = &ext3_export_ops;
				1939	sb->s_xattr = ext3_xattr_handlers;
				1940	#ifdef CONFIG_QUOTA
				1941	sb->s_qcop = &ext3_qctl_operations;
				1942	sb->dq_op = &ext3_quota_operations;
				1943	#endif
				1944	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
				1945	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
				1946	mutex_init(&sbi->s_orphan_lock);
				1947	mutex_init(&sbi->s_resize_lock);
				1948
				1949	sb->s_root = NULL;
				1950
				1951	needs_recovery = (es->s_last_orphan != 0 \|\|
				1952	EXT3_HAS_INCOMPAT_FEATURE(sb,
				1953	EXT3_FEATURE_INCOMPAT_RECOVER));
				1954
				1955	/*
				1956	* The first inode we look at is the journal inode. Don't try
				1957	* root first: it may be modified in the journal!
				1958	*/
				1959	if (!test_opt(sb, NOLOAD) &&
				1960	EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
				1961	if (ext3_load_journal(sb, es, journal_devnum))
				1962	goto failed_mount2;
				1963	} else if (journal_inum) {
				1964	if (ext3_create_journal(sb, es, journal_inum))
				1965	goto failed_mount2;
				1966	} else {
				1967	if (!silent)
				1968	ext3_msg(sb, KERN_ERR,
				1969	"error: no journal found. "
				1970	"mounting ext3 over ext2?");
				1971	goto failed_mount2;
				1972	}
				1973	err = percpu_counter_init(&sbi->s_freeblocks_counter,
				1974	ext3_count_free_blocks(sb));
				1975	if (!err) {
				1976	err = percpu_counter_init(&sbi->s_freeinodes_counter,
				1977	ext3_count_free_inodes(sb));
				1978	}
				1979	if (!err) {
				1980	err = percpu_counter_init(&sbi->s_dirs_counter,
				1981	ext3_count_dirs(sb));
				1982	}
				1983	if (err) {
				1984	ext3_msg(sb, KERN_ERR, "error: insufficient memory");
				1985	ret = err;
				1986	goto failed_mount3;
				1987	}
				1988
				1989	/* We have now updated the journal if required, so we can
				1990	* validate the data journaling mode. */
				1991	switch (test_opt(sb, DATA_FLAGS)) {
				1992	case 0:
				1993	/* No mode set, assume a default based on the journal
				1994	capabilities: ORDERED_DATA if the journal can
				1995	cope, else JOURNAL_DATA */
				1996	if (journal_check_available_features
				1997	(sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
				1998	set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE);
				1999	else
				2000	set_opt(sbi->s_mount_opt, JOURNAL_DATA);
				2001	break;
				2002
				2003	case EXT3_MOUNT_ORDERED_DATA:
				2004	case EXT3_MOUNT_WRITEBACK_DATA:
				2005	if (!journal_check_available_features
				2006	(sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
				2007	ext3_msg(sb, KERN_ERR,
				2008	"error: journal does not support "
				2009	"requested data journaling mode");
				2010	goto failed_mount3;
				2011	}
				2012	default:
				2013	break;
				2014	}
				2015
				2016	/*
				2017	* The journal_load will have done any necessary log recovery,
				2018	* so we can safely mount the rest of the filesystem now.
				2019	*/
				2020
				2021	root = ext3_iget(sb, EXT3_ROOT_INO);
				2022	if (IS_ERR(root)) {
				2023	ext3_msg(sb, KERN_ERR, "error: get root inode failed");
				2024	ret = PTR_ERR(root);
				2025	goto failed_mount3;
				2026	}
				2027	if (!S_ISDIR(root->i_mode) \|\| !root->i_blocks \|\| !root->i_size) {
				2028	iput(root);
				2029	ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
				2030	goto failed_mount3;
				2031	}
				2032	sb->s_root = d_make_root(root);
				2033	if (!sb->s_root) {
				2034	ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
				2035	ret = -ENOMEM;
				2036	goto failed_mount3;
				2037	}
				2038
				2039	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
				2040
				2041	EXT3_SB(sb)->s_mount_state \|= EXT3_ORPHAN_FS;
				2042	ext3_orphan_cleanup(sb, es);
				2043	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
				2044	if (needs_recovery) {
				2045	ext3_mark_recovery_complete(sb, es);
				2046	ext3_msg(sb, KERN_INFO, "recovery complete");
				2047	}
				2048	ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
				2049	test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
				2050	test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
				2051	"writeback");
				2052
				2053	return 0;
				2054
				2055	cantfind_ext3:
				2056	if (!silent)
				2057	ext3_msg(sb, KERN_INFO,
				2058	"error: can't find ext3 filesystem on dev %s.",
				2059	sb->s_id);
				2060	goto failed_mount;
				2061
				2062	failed_mount3:
				2063	percpu_counter_destroy(&sbi->s_freeblocks_counter);
				2064	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				2065	percpu_counter_destroy(&sbi->s_dirs_counter);
				2066	journal_destroy(sbi->s_journal);
				2067	failed_mount2:
				2068	for (i = 0; i < db_count; i++)
				2069	brelse(sbi->s_group_desc[i]);
				2070	kfree(sbi->s_group_desc);
				2071	failed_mount:
				2072	#ifdef CONFIG_QUOTA
				2073	for (i = 0; i < MAXQUOTAS; i++)
				2074	kfree(sbi->s_qf_names[i]);
				2075	#endif
				2076	ext3_blkdev_remove(sbi);
				2077	brelse(bh);
				2078	out_fail:
				2079	sb->s_fs_info = NULL;
				2080	kfree(sbi->s_blockgroup_lock);
				2081	kfree(sbi);
				2082	return ret;
				2083	}
				2084
				2085	/*
				2086	* Setup any per-fs journal parameters now. We'll do this both on
				2087	* initial mount, once the journal has been initialised but before we've
				2088	* done any recovery; and again on any subsequent remount.
				2089	*/
				2090	static void ext3_init_journal_params(struct super_block sb, journal_t journal)
				2091	{
				2092	struct ext3_sb_info *sbi = EXT3_SB(sb);
				2093
				2094	if (sbi->s_commit_interval)
				2095	journal->j_commit_interval = sbi->s_commit_interval;
				2096	/* We could also set up an ext3-specific default for the commit
				2097	* interval here, but for now we'll just fall back to the jbd
				2098	* default. */
				2099
				2100	spin_lock(&journal->j_state_lock);
				2101	if (test_opt(sb, BARRIER))
				2102	journal->j_flags \|= JFS_BARRIER;
				2103	else
				2104	journal->j_flags &= ~JFS_BARRIER;
				2105	if (test_opt(sb, DATA_ERR_ABORT))
				2106	journal->j_flags \|= JFS_ABORT_ON_SYNCDATA_ERR;
				2107	else
				2108	journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
				2109	spin_unlock(&journal->j_state_lock);
				2110	}
				2111
				2112	static journal_t ext3_get_journal(struct super_block sb,
				2113	unsigned int journal_inum)
				2114	{
				2115	struct inode *journal_inode;
				2116	journal_t *journal;
				2117
				2118	/* First, test for the existence of a valid inode on disk. Bad
				2119	* things happen if we iget() an unused inode, as the subsequent
				2120	* iput() will try to delete it. */
				2121
				2122	journal_inode = ext3_iget(sb, journal_inum);
				2123	if (IS_ERR(journal_inode)) {
				2124	ext3_msg(sb, KERN_ERR, "error: no journal found");
				2125	return NULL;
				2126	}
				2127	if (!journal_inode->i_nlink) {
				2128	make_bad_inode(journal_inode);
				2129	iput(journal_inode);
				2130	ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
				2131	return NULL;
				2132	}
				2133
				2134	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
				2135	journal_inode, journal_inode->i_size);
				2136	if (!S_ISREG(journal_inode->i_mode)) {
				2137	ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
				2138	iput(journal_inode);
				2139	return NULL;
				2140	}
				2141
				2142	journal = journal_init_inode(journal_inode);
				2143	if (!journal) {
				2144	ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
				2145	iput(journal_inode);
				2146	return NULL;
				2147	}
				2148	journal->j_private = sb;
				2149	ext3_init_journal_params(sb, journal);
				2150	return journal;
				2151	}
				2152
				2153	static journal_t ext3_get_dev_journal(struct super_block sb,
				2154	dev_t j_dev)
				2155	{
				2156	struct buffer_head * bh;
				2157	journal_t *journal;
				2158	ext3_fsblk_t start;
				2159	ext3_fsblk_t len;
				2160	int hblock, blocksize;
				2161	ext3_fsblk_t sb_block;
				2162	unsigned long offset;
				2163	struct ext3_super_block * es;
				2164	struct block_device *bdev;
				2165
				2166	bdev = ext3_blkdev_get(j_dev, sb);
				2167	if (bdev == NULL)
				2168	return NULL;
				2169
				2170	blocksize = sb->s_blocksize;
				2171	hblock = bdev_logical_block_size(bdev);
				2172	if (blocksize < hblock) {
				2173	ext3_msg(sb, KERN_ERR,
				2174	"error: blocksize too small for journal device");
				2175	goto out_bdev;
				2176	}
				2177
				2178	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
				2179	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
				2180	set_blocksize(bdev, blocksize);
				2181	if (!(bh = __bread(bdev, sb_block, blocksize))) {
				2182	ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
				2183	"external journal");
				2184	goto out_bdev;
				2185	}
				2186
				2187	es = (struct ext3_super_block *) (bh->b_data + offset);
				2188	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) \|\|
				2189	!(le32_to_cpu(es->s_feature_incompat) &
				2190	EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
				2191	ext3_msg(sb, KERN_ERR, "error: external journal has "
				2192	"bad superblock");
				2193	brelse(bh);
				2194	goto out_bdev;
				2195	}
				2196
				2197	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
				2198	ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
				2199	brelse(bh);
				2200	goto out_bdev;
				2201	}
				2202
				2203	len = le32_to_cpu(es->s_blocks_count);
				2204	start = sb_block + 1;
				2205	brelse(bh); /* we're done with the superblock */
				2206
				2207	journal = journal_init_dev(bdev, sb->s_bdev,
				2208	start, len, blocksize);
				2209	if (!journal) {
				2210	ext3_msg(sb, KERN_ERR,
				2211	"error: failed to create device journal");
				2212	goto out_bdev;
				2213	}
				2214	journal->j_private = sb;
				2215	if (!bh_uptodate_or_lock(journal->j_sb_buffer)) {
				2216	if (bh_submit_read(journal->j_sb_buffer)) {
				2217	ext3_msg(sb, KERN_ERR, "I/O error on journal device");
				2218	goto out_journal;
				2219	}
				2220	}
				2221	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
				2222	ext3_msg(sb, KERN_ERR,
				2223	"error: external journal has more than one "
				2224	"user (unsupported) - %d",
				2225	be32_to_cpu(journal->j_superblock->s_nr_users));
				2226	goto out_journal;
				2227	}
				2228	EXT3_SB(sb)->journal_bdev = bdev;
				2229	ext3_init_journal_params(sb, journal);
				2230	return journal;
				2231	out_journal:
				2232	journal_destroy(journal);
				2233	out_bdev:
				2234	ext3_blkdev_put(bdev);
				2235	return NULL;
				2236	}
				2237
				2238	static int ext3_load_journal(struct super_block *sb,
				2239	struct ext3_super_block *es,
				2240	unsigned long journal_devnum)
				2241	{
				2242	journal_t *journal;
				2243	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
				2244	dev_t journal_dev;
				2245	int err = 0;
				2246	int really_read_only;
				2247
				2248	if (journal_devnum &&
				2249	journal_devnum != le32_to_cpu(es->s_journal_dev)) {
				2250	ext3_msg(sb, KERN_INFO, "external journal device major/minor "
				2251	"numbers have changed");
				2252	journal_dev = new_decode_dev(journal_devnum);
				2253	} else
				2254	journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
				2255
				2256	really_read_only = bdev_read_only(sb->s_bdev);
				2257
				2258	/*
				2259	* Are we loading a blank journal or performing recovery after a
				2260	* crash? For recovery, we need to check in advance whether we
				2261	* can get read-write access to the device.
				2262	*/
				2263
				2264	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
				2265	if (sb->s_flags & MS_RDONLY) {
				2266	ext3_msg(sb, KERN_INFO,
				2267	"recovery required on readonly filesystem");
				2268	if (really_read_only) {
				2269	ext3_msg(sb, KERN_ERR, "error: write access "
				2270	"unavailable, cannot proceed");
				2271	return -EROFS;
				2272	}
				2273	ext3_msg(sb, KERN_INFO,
				2274	"write access will be enabled during recovery");
				2275	}
				2276	}
				2277
				2278	if (journal_inum && journal_dev) {
				2279	ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
				2280	"and inode journals");
				2281	return -EINVAL;
				2282	}
				2283
				2284	if (journal_inum) {
				2285	if (!(journal = ext3_get_journal(sb, journal_inum)))
				2286	return -EINVAL;
				2287	} else {
				2288	if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
				2289	return -EINVAL;
				2290	}
				2291
				2292	if (!(journal->j_flags & JFS_BARRIER))
				2293	printk(KERN_INFO "EXT3-fs: barriers not enabled\n");
				2294
				2295	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
				2296	err = journal_update_format(journal);
				2297	if (err) {
				2298	ext3_msg(sb, KERN_ERR, "error updating journal");
				2299	journal_destroy(journal);
				2300	return err;
				2301	}
				2302	}
				2303
				2304	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
				2305	err = journal_wipe(journal, !really_read_only);
				2306	if (!err)
				2307	err = journal_load(journal);
				2308
				2309	if (err) {
				2310	ext3_msg(sb, KERN_ERR, "error loading journal");
				2311	journal_destroy(journal);
				2312	return err;
				2313	}
				2314
				2315	EXT3_SB(sb)->s_journal = journal;
				2316	ext3_clear_journal_err(sb, es);
				2317
				2318	if (!really_read_only && journal_devnum &&
				2319	journal_devnum != le32_to_cpu(es->s_journal_dev)) {
				2320	es->s_journal_dev = cpu_to_le32(journal_devnum);
				2321
				2322	/* Make sure we flush the recovery flag to disk. */
				2323	ext3_commit_super(sb, es, 1);
				2324	}
				2325
				2326	return 0;
				2327	}
				2328
				2329	static int ext3_create_journal(struct super_block *sb,
				2330	struct ext3_super_block *es,
				2331	unsigned int journal_inum)
				2332	{
				2333	journal_t *journal;
				2334	int err;
				2335
				2336	if (sb->s_flags & MS_RDONLY) {
				2337	ext3_msg(sb, KERN_ERR,
				2338	"error: readonly filesystem when trying to "
				2339	"create journal");
				2340	return -EROFS;
				2341	}
				2342
				2343	journal = ext3_get_journal(sb, journal_inum);
				2344	if (!journal)
				2345	return -EINVAL;
				2346
				2347	ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
				2348	journal_inum);
				2349
				2350	err = journal_create(journal);
				2351	if (err) {
				2352	ext3_msg(sb, KERN_ERR, "error creating journal");
				2353	journal_destroy(journal);
				2354	return -EIO;
				2355	}
				2356
				2357	EXT3_SB(sb)->s_journal = journal;
				2358
				2359	ext3_update_dynamic_rev(sb);
				2360	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2361	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
				2362
				2363	es->s_journal_inum = cpu_to_le32(journal_inum);
				2364
				2365	/* Make sure we flush the recovery flag to disk. */
				2366	ext3_commit_super(sb, es, 1);
				2367
				2368	return 0;
				2369	}
				2370
				2371	static int ext3_commit_super(struct super_block *sb,
				2372	struct ext3_super_block *es,
				2373	int sync)
				2374	{
				2375	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
				2376	int error = 0;
				2377
				2378	if (!sbh)
				2379	return error;
				2380
				2381	if (buffer_write_io_error(sbh)) {
				2382	/*
				2383	* Oh, dear. A previous attempt to write the
				2384	* superblock failed. This could happen because the
				2385	* USB device was yanked out. Or it could happen to
				2386	* be a transient write error and maybe the block will
				2387	* be remapped. Nothing we can do but to retry the
				2388	* write and hope for the best.
				2389	*/
				2390	ext3_msg(sb, KERN_ERR, "previous I/O error to "
				2391	"superblock detected");
				2392	clear_buffer_write_io_error(sbh);
				2393	set_buffer_uptodate(sbh);
				2394	}
				2395	/*
				2396	* If the file system is mounted read-only, don't update the
				2397	* superblock write time. This avoids updating the superblock
				2398	* write time when we are mounting the root file system
				2399	* read/only but we need to replay the journal; at that point,
				2400	* for people who are east of GMT and who make their clock
				2401	* tick in localtime for Windows bug-for-bug compatibility,
				2402	* the clock is set in the future, and this will cause e2fsck
				2403	* to complain and force a full file system check.
				2404	*/
				2405	if (!(sb->s_flags & MS_RDONLY))
				2406	es->s_wtime = cpu_to_le32(get_seconds());
				2407	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
				2408	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
				2409	BUFFER_TRACE(sbh, "marking dirty");
				2410	mark_buffer_dirty(sbh);
				2411	if (sync) {
				2412	error = sync_dirty_buffer(sbh);
				2413	if (buffer_write_io_error(sbh)) {
				2414	ext3_msg(sb, KERN_ERR, "I/O error while writing "
				2415	"superblock");
				2416	clear_buffer_write_io_error(sbh);
				2417	set_buffer_uptodate(sbh);
				2418	}
				2419	}
				2420	return error;
				2421	}
				2422
				2423
				2424	/*
				2425	* Have we just finished recovery? If so, and if we are mounting (or
				2426	* remounting) the filesystem readonly, then we will end up with a
				2427	* consistent fs on disk. Record that fact.
				2428	*/
				2429	static void ext3_mark_recovery_complete(struct super_block * sb,
				2430	struct ext3_super_block * es)
				2431	{
				2432	journal_t *journal = EXT3_SB(sb)->s_journal;
				2433
				2434	journal_lock_updates(journal);
				2435	if (journal_flush(journal) < 0)
				2436	goto out;
				2437
				2438	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
				2439	sb->s_flags & MS_RDONLY) {
				2440	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2441	ext3_commit_super(sb, es, 1);
				2442	}
				2443
				2444	out:
				2445	journal_unlock_updates(journal);
				2446	}
				2447
				2448	/*
				2449	* If we are mounting (or read-write remounting) a filesystem whose journal
				2450	* has recorded an error from a previous lifetime, move that error to the
				2451	* main filesystem now.
				2452	*/
				2453	static void ext3_clear_journal_err(struct super_block *sb,
				2454	struct ext3_super_block *es)
				2455	{
				2456	journal_t *journal;
				2457	int j_errno;
				2458	const char *errstr;
				2459
				2460	journal = EXT3_SB(sb)->s_journal;
				2461
				2462	/*
				2463	* Now check for any error status which may have been recorded in the
				2464	* journal by a prior ext3_error() or ext3_abort()
				2465	*/
				2466
				2467	j_errno = journal_errno(journal);
				2468	if (j_errno) {
				2469	char nbuf[16];
				2470
				2471	errstr = ext3_decode_error(sb, j_errno, nbuf);
				2472	ext3_warning(sb, __func__, "Filesystem error recorded "
				2473	"from previous mount: %s", errstr);
				2474	ext3_warning(sb, __func__, "Marking fs in need of "
				2475	"filesystem check.");
				2476
				2477	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				2478	es->s_state \|= cpu_to_le16(EXT3_ERROR_FS);
				2479	ext3_commit_super (sb, es, 1);
				2480
				2481	journal_clear_err(journal);
				2482	}
				2483	}
				2484
				2485	/*
				2486	* Force the running and committing transactions to commit,
				2487	* and wait on the commit.
				2488	*/
				2489	int ext3_force_commit(struct super_block *sb)
				2490	{
				2491	journal_t *journal;
				2492	int ret;
				2493
				2494	if (sb->s_flags & MS_RDONLY)
				2495	return 0;
				2496
				2497	journal = EXT3_SB(sb)->s_journal;
				2498	ret = ext3_journal_force_commit(journal);
				2499	return ret;
				2500	}
				2501
				2502	static int ext3_sync_fs(struct super_block *sb, int wait)
				2503	{
				2504	tid_t target;
				2505
				2506	trace_ext3_sync_fs(sb, wait);
				2507	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
				2508	if (wait)
				2509	log_wait_commit(EXT3_SB(sb)->s_journal, target);
				2510	}
				2511	return 0;
				2512	}
				2513
				2514	/*
				2515	* LVM calls this function before a (read-only) snapshot is created. This
				2516	* gives us a chance to flush the journal completely and mark the fs clean.
				2517	*/
				2518	static int ext3_freeze(struct super_block *sb)
				2519	{
				2520	int error = 0;
				2521	journal_t *journal;
				2522
				2523	if (!(sb->s_flags & MS_RDONLY)) {
				2524	journal = EXT3_SB(sb)->s_journal;
				2525
				2526	/* Now we set up the journal barrier. */
				2527	journal_lock_updates(journal);
				2528
				2529	/*
				2530	* We don't want to clear needs_recovery flag when we failed
				2531	* to flush the journal.
				2532	*/
				2533	error = journal_flush(journal);
				2534	if (error < 0)
				2535	goto out;
				2536
				2537	/* Journal blocked and flushed, clear needs_recovery flag. */
				2538	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2539	error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
				2540	if (error)
				2541	goto out;
				2542	}
				2543	return 0;
				2544
				2545	out:
				2546	journal_unlock_updates(journal);
				2547	return error;
				2548	}
				2549
				2550	/*
				2551	* Called by LVM after the snapshot is done. We need to reset the RECOVER
				2552	* flag here, even though the filesystem is not technically dirty yet.
				2553	*/
				2554	static int ext3_unfreeze(struct super_block *sb)
				2555	{
				2556	if (!(sb->s_flags & MS_RDONLY)) {
				2557	lock_super(sb);
				2558	/* Reser the needs_recovery flag before the fs is unlocked. */
				2559	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2560	ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
				2561	unlock_super(sb);
				2562	journal_unlock_updates(EXT3_SB(sb)->s_journal);
				2563	}
				2564	return 0;
				2565	}
				2566
				2567	static int ext3_remount (struct super_block * sb, int * flags, char * data)
				2568	{
				2569	struct ext3_super_block * es;
				2570	struct ext3_sb_info *sbi = EXT3_SB(sb);
				2571	ext3_fsblk_t n_blocks_count = 0;
				2572	unsigned long old_sb_flags;
				2573	struct ext3_mount_options old_opts;
				2574	int enable_quota = 0;
				2575	int err;
				2576	#ifdef CONFIG_QUOTA
				2577	int i;
				2578	#endif
				2579
				2580	/* Store the original options */
				2581	lock_super(sb);
				2582	old_sb_flags = sb->s_flags;
				2583	old_opts.s_mount_opt = sbi->s_mount_opt;
				2584	old_opts.s_resuid = sbi->s_resuid;
				2585	old_opts.s_resgid = sbi->s_resgid;
				2586	old_opts.s_commit_interval = sbi->s_commit_interval;
				2587	#ifdef CONFIG_QUOTA
				2588	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
				2589	for (i = 0; i < MAXQUOTAS; i++)
				2590	old_opts.s_qf_names[i] = sbi->s_qf_names[i];
				2591	#endif
				2592
				2593	/*
				2594	* Allow the "check" option to be passed as a remount option.
				2595	*/
				2596	if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
				2597	err = -EINVAL;
				2598	goto restore_opts;
				2599	}
				2600
				2601	if (test_opt(sb, ABORT))
				2602	ext3_abort(sb, __func__, "Abort forced by user");
				2603
				2604	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				2605	(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
				2606
				2607	es = sbi->s_es;
				2608
				2609	ext3_init_journal_params(sb, sbi->s_journal);
				2610
				2611	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) \|\|
				2612	n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
				2613	if (test_opt(sb, ABORT)) {
				2614	err = -EROFS;
				2615	goto restore_opts;
				2616	}
				2617
				2618	if (*flags & MS_RDONLY) {
				2619	err = dquot_suspend(sb, -1);
				2620	if (err < 0)
				2621	goto restore_opts;
				2622
				2623	/*
				2624	* First of all, the unconditional stuff we have to do
				2625	* to disable replay of the journal when we next remount
				2626	*/
				2627	sb->s_flags \|= MS_RDONLY;
				2628
				2629	/*
				2630	* OK, test if we are remounting a valid rw partition
				2631	* readonly, and if so set the rdonly flag and then
				2632	* mark the partition as valid again.
				2633	*/
				2634	if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
				2635	(sbi->s_mount_state & EXT3_VALID_FS))
				2636	es->s_state = cpu_to_le16(sbi->s_mount_state);
				2637
				2638	ext3_mark_recovery_complete(sb, es);
				2639	} else {
				2640	__le32 ret;
				2641	if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
				2642	~EXT3_FEATURE_RO_COMPAT_SUPP))) {
				2643	ext3_msg(sb, KERN_WARNING,
				2644	"warning: couldn't remount RDWR "
				2645	"because of unsupported optional "
				2646	"features (%x)", le32_to_cpu(ret));
				2647	err = -EROFS;
				2648	goto restore_opts;
				2649	}
				2650
				2651	/*
				2652	* If we have an unprocessed orphan list hanging
				2653	* around from a previously readonly bdev mount,
				2654	* require a full umount & mount for now.
				2655	*/
				2656	if (es->s_last_orphan) {
				2657	ext3_msg(sb, KERN_WARNING, "warning: couldn't "
				2658	"remount RDWR because of unprocessed "
				2659	"orphan inode list. Please "
				2660	"umount & mount instead.");
				2661	err = -EINVAL;
				2662	goto restore_opts;
				2663	}
				2664
				2665	/*
				2666	* Mounting a RDONLY partition read-write, so reread
				2667	* and store the current valid flag. (It may have
				2668	* been changed by e2fsck since we originally mounted
				2669	* the partition.)
				2670	*/
				2671	ext3_clear_journal_err(sb, es);
				2672	sbi->s_mount_state = le16_to_cpu(es->s_state);
				2673	if ((err = ext3_group_extend(sb, es, n_blocks_count)))
				2674	goto restore_opts;
				2675	if (!ext3_setup_super (sb, es, 0))
				2676	sb->s_flags &= ~MS_RDONLY;
				2677	enable_quota = 1;
				2678	}
				2679	}
				2680	#ifdef CONFIG_QUOTA
				2681	/* Release old quota file names */
				2682	for (i = 0; i < MAXQUOTAS; i++)
				2683	if (old_opts.s_qf_names[i] &&
				2684	old_opts.s_qf_names[i] != sbi->s_qf_names[i])
				2685	kfree(old_opts.s_qf_names[i]);
				2686	#endif
				2687	unlock_super(sb);
				2688
				2689	if (enable_quota)
				2690	dquot_resume(sb, -1);
				2691	return 0;
				2692	restore_opts:
				2693	sb->s_flags = old_sb_flags;
				2694	sbi->s_mount_opt = old_opts.s_mount_opt;
				2695	sbi->s_resuid = old_opts.s_resuid;
				2696	sbi->s_resgid = old_opts.s_resgid;
				2697	sbi->s_commit_interval = old_opts.s_commit_interval;
				2698	#ifdef CONFIG_QUOTA
				2699	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
				2700	for (i = 0; i < MAXQUOTAS; i++) {
				2701	if (sbi->s_qf_names[i] &&
				2702	old_opts.s_qf_names[i] != sbi->s_qf_names[i])
				2703	kfree(sbi->s_qf_names[i]);
				2704	sbi->s_qf_names[i] = old_opts.s_qf_names[i];
				2705	}
				2706	#endif
				2707	unlock_super(sb);
				2708	return err;
				2709	}
				2710
				2711	static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
				2712	{
				2713	struct super_block *sb = dentry->d_sb;
				2714	struct ext3_sb_info *sbi = EXT3_SB(sb);
				2715	struct ext3_super_block *es = sbi->s_es;
				2716	u64 fsid;
				2717
				2718	if (test_opt(sb, MINIX_DF)) {
				2719	sbi->s_overhead_last = 0;
				2720	} else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
				2721	unsigned long ngroups = sbi->s_groups_count, i;
				2722	ext3_fsblk_t overhead = 0;
				2723	smp_rmb();
				2724
				2725	/*
				2726	* Compute the overhead (FS structures). This is constant
				2727	* for a given filesystem unless the number of block groups
				2728	* changes so we cache the previous value until it does.
				2729	*/
				2730
				2731	/*
				2732	* All of the blocks before first_data_block are
				2733	* overhead
				2734	*/
				2735	overhead = le32_to_cpu(es->s_first_data_block);
				2736
				2737	/*
				2738	* Add the overhead attributed to the superblock and
				2739	* block group descriptors. If the sparse superblocks
				2740	* feature is turned on, then not all groups have this.
				2741	*/
				2742	for (i = 0; i < ngroups; i++) {
				2743	overhead += ext3_bg_has_super(sb, i) +
				2744	ext3_bg_num_gdb(sb, i);
				2745	cond_resched();
				2746	}
				2747
				2748	/*
				2749	* Every block group has an inode bitmap, a block
				2750	* bitmap, and an inode table.
				2751	*/
				2752	overhead += ngroups * (2 + sbi->s_itb_per_group);
				2753	sbi->s_overhead_last = overhead;
				2754	smp_wmb();
				2755	sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
				2756	}
				2757
				2758	buf->f_type = EXT3_SUPER_MAGIC;
				2759	buf->f_bsize = sb->s_blocksize;
				2760	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
				2761	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
				2762	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
				2763	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
				2764	buf->f_bavail = 0;
				2765	buf->f_files = le32_to_cpu(es->s_inodes_count);
				2766	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
				2767	buf->f_namelen = EXT3_NAME_LEN;
				2768	fsid = le64_to_cpup((void *)es->s_uuid) ^
				2769	le64_to_cpup((void *)es->s_uuid + sizeof(u64));
				2770	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
				2771	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
				2772	return 0;
				2773	}
				2774
				2775	/* Helper function for writing quotas on sync - we need to start transaction before quota file
				2776	* is locked for write. Otherwise the are possible deadlocks:
				2777	* Process 1 Process 2
				2778	* ext3_create() quota_sync()
				2779	* journal_start() write_dquot()
				2780	* dquot_initialize() down(dqio_mutex)
				2781	* down(dqio_mutex) journal_start()
				2782	*
				2783	*/
				2784
				2785	#ifdef CONFIG_QUOTA
				2786
				2787	static inline struct inode dquot_to_inode(struct dquot dquot)
				2788	{
				2789	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
				2790	}
				2791
				2792	static int ext3_write_dquot(struct dquot *dquot)
				2793	{
				2794	int ret, err;
				2795	handle_t *handle;
				2796	struct inode *inode;
				2797
				2798	inode = dquot_to_inode(dquot);
				2799	handle = ext3_journal_start(inode,
				2800	EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
				2801	if (IS_ERR(handle))
				2802	return PTR_ERR(handle);
				2803	ret = dquot_commit(dquot);
				2804	err = ext3_journal_stop(handle);
				2805	if (!ret)
				2806	ret = err;
				2807	return ret;
				2808	}
				2809
				2810	static int ext3_acquire_dquot(struct dquot *dquot)
				2811	{
				2812	int ret, err;
				2813	handle_t *handle;
				2814
				2815	handle = ext3_journal_start(dquot_to_inode(dquot),
				2816	EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
				2817	if (IS_ERR(handle))
				2818	return PTR_ERR(handle);
				2819	ret = dquot_acquire(dquot);
				2820	err = ext3_journal_stop(handle);
				2821	if (!ret)
				2822	ret = err;
				2823	return ret;
				2824	}
				2825
				2826	static int ext3_release_dquot(struct dquot *dquot)
				2827	{
				2828	int ret, err;
				2829	handle_t *handle;
				2830
				2831	handle = ext3_journal_start(dquot_to_inode(dquot),
				2832	EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
				2833	if (IS_ERR(handle)) {
				2834	/* Release dquot anyway to avoid endless cycle in dqput() */
				2835	dquot_release(dquot);
				2836	return PTR_ERR(handle);
				2837	}
				2838	ret = dquot_release(dquot);
				2839	err = ext3_journal_stop(handle);
				2840	if (!ret)
				2841	ret = err;
				2842	return ret;
				2843	}
				2844
				2845	static int ext3_mark_dquot_dirty(struct dquot *dquot)
				2846	{
				2847	/* Are we journaling quotas? */
				2848	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] \|\|
				2849	EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
				2850	dquot_mark_dquot_dirty(dquot);
				2851	return ext3_write_dquot(dquot);
				2852	} else {
				2853	return dquot_mark_dquot_dirty(dquot);
				2854	}
				2855	}
				2856
				2857	static int ext3_write_info(struct super_block *sb, int type)
				2858	{
				2859	int ret, err;
				2860	handle_t *handle;
				2861
				2862	/* Data block + inode block */
				2863	handle = ext3_journal_start(sb->s_root->d_inode, 2);
				2864	if (IS_ERR(handle))
				2865	return PTR_ERR(handle);
				2866	ret = dquot_commit_info(sb, type);
				2867	err = ext3_journal_stop(handle);
				2868	if (!ret)
				2869	ret = err;
				2870	return ret;
				2871	}
				2872
				2873	/*
				2874	* Turn on quotas during mount time - we need to find
				2875	* the quota file and such...
				2876	*/
				2877	static int ext3_quota_on_mount(struct super_block *sb, int type)
				2878	{
				2879	return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
				2880	EXT3_SB(sb)->s_jquota_fmt, type);
				2881	}
				2882
				2883	/*
				2884	* Standard function to be called on quota_on
				2885	*/
				2886	static int ext3_quota_on(struct super_block *sb, int type, int format_id,
				2887	struct path *path)
				2888	{
				2889	int err;
				2890
				2891	if (!test_opt(sb, QUOTA))
				2892	return -EINVAL;
				2893
				2894	/* Quotafile not on the same filesystem? */
				2895	if (path->dentry->d_sb != sb)
				2896	return -EXDEV;
				2897	/* Journaling quota? */
				2898	if (EXT3_SB(sb)->s_qf_names[type]) {
				2899	/* Quotafile not of fs root? */
				2900	if (path->dentry->d_parent != sb->s_root)
				2901	ext3_msg(sb, KERN_WARNING,
				2902	"warning: Quota file not on filesystem root. "
				2903	"Journaled quota will not work.");
				2904	}
				2905
				2906	/*
				2907	* When we journal data on quota file, we have to flush journal to see
				2908	* all updates to the file when we bypass pagecache...
				2909	*/
				2910	if (ext3_should_journal_data(path->dentry->d_inode)) {
				2911	/*
				2912	* We don't need to lock updates but journal_flush() could
				2913	* otherwise be livelocked...
				2914	*/
				2915	journal_lock_updates(EXT3_SB(sb)->s_journal);
				2916	err = journal_flush(EXT3_SB(sb)->s_journal);
				2917	journal_unlock_updates(EXT3_SB(sb)->s_journal);
				2918	if (err)
				2919	return err;
				2920	}
				2921
				2922	return dquot_quota_on(sb, type, format_id, path);
				2923	}
				2924
				2925	/* Read data from quotafile - avoid pagecache and such because we cannot afford
				2926	* acquiring the locks... As quota files are never truncated and quota code
				2927	* itself serializes the operations (and no one else should touch the files)
				2928	* we don't have to be afraid of races */
				2929	static ssize_t ext3_quota_read(struct super_block sb, int type, char data,
				2930	size_t len, loff_t off)
				2931	{
				2932	struct inode *inode = sb_dqopt(sb)->files[type];
				2933	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
				2934	int err = 0;
				2935	int offset = off & (sb->s_blocksize - 1);
				2936	int tocopy;
				2937	size_t toread;
				2938	struct buffer_head *bh;
				2939	loff_t i_size = i_size_read(inode);
				2940
				2941	if (off > i_size)
				2942	return 0;
				2943	if (off+len > i_size)
				2944	len = i_size-off;
				2945	toread = len;
				2946	while (toread > 0) {
				2947	tocopy = sb->s_blocksize - offset < toread ?
				2948	sb->s_blocksize - offset : toread;
				2949	bh = ext3_bread(NULL, inode, blk, 0, &err);
				2950	if (err)
				2951	return err;
				2952	if (!bh) /* A hole? */
				2953	memset(data, 0, tocopy);
				2954	else
				2955	memcpy(data, bh->b_data+offset, tocopy);
				2956	brelse(bh);
				2957	offset = 0;
				2958	toread -= tocopy;
				2959	data += tocopy;
				2960	blk++;
				2961	}
				2962	return len;
				2963	}
				2964
				2965	/* Write to quotafile (we know the transaction is already started and has
				2966	* enough credits) */
				2967	static ssize_t ext3_quota_write(struct super_block *sb, int type,
				2968	const char *data, size_t len, loff_t off)
				2969	{
				2970	struct inode *inode = sb_dqopt(sb)->files[type];
				2971	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
				2972	int err = 0;
				2973	int offset = off & (sb->s_blocksize - 1);
				2974	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
				2975	struct buffer_head *bh;
				2976	handle_t *handle = journal_current_handle();
				2977
				2978	if (!handle) {
				2979	ext3_msg(sb, KERN_WARNING,
				2980	"warning: quota write (off=%llu, len=%llu)"
				2981	" cancelled because transaction is not started.",
				2982	(unsigned long long)off, (unsigned long long)len);
				2983	return -EIO;
				2984	}
				2985
				2986	/*
				2987	* Since we account only one data block in transaction credits,
				2988	* then it is impossible to cross a block boundary.
				2989	*/
				2990	if (sb->s_blocksize - offset < len) {
				2991	ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
				2992	" cancelled because not block aligned",
				2993	(unsigned long long)off, (unsigned long long)len);
				2994	return -EIO;
				2995	}
				2996	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
				2997	bh = ext3_bread(handle, inode, blk, 1, &err);
				2998	if (!bh)
				2999	goto out;
				3000	if (journal_quota) {
				3001	err = ext3_journal_get_write_access(handle, bh);
				3002	if (err) {
				3003	brelse(bh);
				3004	goto out;
				3005	}
				3006	}
				3007	lock_buffer(bh);
				3008	memcpy(bh->b_data+offset, data, len);
				3009	flush_dcache_page(bh->b_page);
				3010	unlock_buffer(bh);
				3011	if (journal_quota)
				3012	err = ext3_journal_dirty_metadata(handle, bh);
				3013	else {
				3014	/* Always do at least ordered writes for quotas */
				3015	err = ext3_journal_dirty_data(handle, bh);
				3016	mark_buffer_dirty(bh);
				3017	}
				3018	brelse(bh);
				3019	out:
				3020	if (err) {
				3021	mutex_unlock(&inode->i_mutex);
				3022	return err;
				3023	}
				3024	if (inode->i_size < off + len) {
				3025	i_size_write(inode, off + len);
				3026	EXT3_I(inode)->i_disksize = inode->i_size;
				3027	}
				3028	inode->i_version++;
				3029	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
				3030	ext3_mark_inode_dirty(handle, inode);
				3031	mutex_unlock(&inode->i_mutex);
				3032	return len;
				3033	}
				3034
				3035	#endif
				3036
				3037	static struct dentry ext3_mount(struct file_system_type fs_type,
				3038	int flags, const char dev_name, void data)
				3039	{
				3040	return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
				3041	}
				3042
				3043	static struct file_system_type ext3_fs_type = {
				3044	.owner = THIS_MODULE,
				3045	.name = "ext3",
				3046	.mount = ext3_mount,
				3047	.kill_sb = kill_block_super,
				3048	.fs_flags = FS_REQUIRES_DEV,
				3049	};
				3050
				3051	static int __init init_ext3_fs(void)
				3052	{
				3053	int err = init_ext3_xattr();
				3054	if (err)
				3055	return err;
				3056	err = init_inodecache();
				3057	if (err)
				3058	goto out1;
				3059	err = register_filesystem(&ext3_fs_type);
				3060	if (err)
				3061	goto out;
				3062	return 0;
				3063	out:
				3064	destroy_inodecache();
				3065	out1:
				3066	exit_ext3_xattr();
				3067	return err;
				3068	}
				3069
				3070	static void __exit exit_ext3_fs(void)
				3071	{
				3072	unregister_filesystem(&ext3_fs_type);
				3073	destroy_inodecache();
				3074	exit_ext3_xattr();
				3075	}
				3076
				3077	MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
				3078	MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
				3079	MODULE_LICENSE("GPL");
				3080	module_init(init_ext3_fs)
				3081	module_exit(exit_ext3_fs)