Blame - src/kernel/linux/v4.19/fs/btrfs/super.c - T800

blob: ddbad8d5094906af82109ad196dcc43203dd3422 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright (C) 2007 Oracle. All rights reserved.
				4	*/
				5
				6	#include <linux/blkdev.h>
				7	#include <linux/module.h>
				8	#include <linux/fs.h>
				9	#include <linux/pagemap.h>
				10	#include <linux/highmem.h>
				11	#include <linux/time.h>
				12	#include <linux/init.h>
				13	#include <linux/seq_file.h>
				14	#include <linux/string.h>
				15	#include <linux/backing-dev.h>
				16	#include <linux/mount.h>
				17	#include <linux/writeback.h>
				18	#include <linux/statfs.h>
				19	#include <linux/compat.h>
				20	#include <linux/parser.h>
				21	#include <linux/ctype.h>
				22	#include <linux/namei.h>
				23	#include <linux/miscdevice.h>
				24	#include <linux/magic.h>
				25	#include <linux/slab.h>
				26	#include <linux/cleancache.h>
				27	#include <linux/ratelimit.h>
				28	#include <linux/crc32c.h>
				29	#include <linux/btrfs.h>
				30	#include "delayed-inode.h"
				31	#include "ctree.h"
				32	#include "disk-io.h"
				33	#include "transaction.h"
				34	#include "btrfs_inode.h"
				35	#include "print-tree.h"
				36	#include "props.h"
				37	#include "xattr.h"
				38	#include "volumes.h"
				39	#include "export.h"
				40	#include "compression.h"
				41	#include "rcu-string.h"
				42	#include "dev-replace.h"
				43	#include "free-space-cache.h"
				44	#include "backref.h"
				45	#include "tests/btrfs-tests.h"
				46
				47	#include "qgroup.h"
				48	#define CREATE_TRACE_POINTS
				49	#include <trace/events/btrfs.h>
				50
				51	static const struct super_operations btrfs_super_ops;
				52
				53	/*
				54	* Types for mounting the default subvolume and a subvolume explicitly
				55	* requested by subvol=/path. That way the callchain is straightforward and we
				56	* don't have to play tricks with the mount options and recursive calls to
				57	* btrfs_mount.
				58	*
				59	* The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
				60	*/
				61	static struct file_system_type btrfs_fs_type;
				62	static struct file_system_type btrfs_root_fs_type;
				63
				64	static int btrfs_remount(struct super_block sb, int flags, char *data);
				65
				66	const char *btrfs_decode_error(int errno)
				67	{
				68	char *errstr = "unknown";
				69
				70	switch (errno) {
				71	case -EIO:
				72	errstr = "IO failure";
				73	break;
				74	case -ENOMEM:
				75	errstr = "Out of memory";
				76	break;
				77	case -EROFS:
				78	errstr = "Readonly filesystem";
				79	break;
				80	case -EEXIST:
				81	errstr = "Object already exists";
				82	break;
				83	case -ENOSPC:
				84	errstr = "No space left";
				85	break;
				86	case -ENOENT:
				87	errstr = "No such entry";
				88	break;
				89	}
				90
				91	return errstr;
				92	}
				93
				94	/*
				95	* __btrfs_handle_fs_error decodes expected errors from the caller and
				96	* invokes the approciate error response.
				97	*/
				98	__cold
				99	void __btrfs_handle_fs_error(struct btrfs_fs_info fs_info, const char function,
				100	unsigned int line, int errno, const char *fmt, ...)
				101	{
				102	struct super_block *sb = fs_info->sb;
				103	#ifdef CONFIG_PRINTK
				104	const char *errstr;
				105	#endif
				106
				107	/*
				108	* Special case: if the error is EROFS, and we're already
				109	* under SB_RDONLY, then it is safe here.
				110	*/
				111	if (errno == -EROFS && sb_rdonly(sb))
				112	return;
				113
				114	#ifdef CONFIG_PRINTK
				115	errstr = btrfs_decode_error(errno);
				116	if (fmt) {
				117	struct va_format vaf;
				118	va_list args;
				119
				120	va_start(args, fmt);
				121	vaf.fmt = fmt;
				122	vaf.va = &args;
				123
				124	pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
				125	sb->s_id, function, line, errno, errstr, &vaf);
				126	va_end(args);
				127	} else {
				128	pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
				129	sb->s_id, function, line, errno, errstr);
				130	}
				131	#endif
				132
				133	/*
				134	* Today we only save the error info to memory. Long term we'll
				135	* also send it down to the disk
				136	*/
				137	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
				138
				139	/* Don't go through full error handling during mount */
				140	if (!(sb->s_flags & SB_BORN))
				141	return;
				142
				143	if (sb_rdonly(sb))
				144	return;
				145
				146	/* btrfs handle error by forcing the filesystem readonly */
				147	sb->s_flags \|= SB_RDONLY;
				148	btrfs_info(fs_info, "forced readonly");
				149	/*
				150	* Note that a running device replace operation is not canceled here
				151	* although there is no way to update the progress. It would add the
				152	* risk of a deadlock, therefore the canceling is omitted. The only
				153	* penalty is that some I/O remains active until the procedure
				154	* completes. The next time when the filesystem is mounted writeable
				155	* again, the device replace operation continues.
				156	*/
				157	}
				158
				159	#ifdef CONFIG_PRINTK
				160	static const char * const logtypes[] = {
				161	"emergency",
				162	"alert",
				163	"critical",
				164	"error",
				165	"warning",
				166	"notice",
				167	"info",
				168	"debug",
				169	};
				170
				171
				172	/*
				173	* Use one ratelimit state per log level so that a flood of less important
				174	* messages doesn't cause more important ones to be dropped.
				175	*/
				176	static struct ratelimit_state printk_limits[] = {
				177	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
				178	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
				179	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
				180	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
				181	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
				182	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
				183	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
				184	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
				185	};
				186
				187	void btrfs_printk(const struct btrfs_fs_info fs_info, const char fmt, ...)
				188	{
				189	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
				190	struct va_format vaf;
				191	va_list args;
				192	int kern_level;
				193	const char *type = logtypes[4];
				194	struct ratelimit_state *ratelimit = &printk_limits[4];
				195
				196	va_start(args, fmt);
				197
				198	while ((kern_level = printk_get_level(fmt)) != 0) {
				199	size_t size = printk_skip_level(fmt) - fmt;
				200
				201	if (kern_level >= '0' && kern_level <= '7') {
				202	memcpy(lvl, fmt, size);
				203	lvl[size] = '\0';
				204	type = logtypes[kern_level - '0'];
				205	ratelimit = &printk_limits[kern_level - '0'];
				206	}
				207	fmt += size;
				208	}
				209
				210	vaf.fmt = fmt;
				211	vaf.va = &args;
				212
				213	if (__ratelimit(ratelimit))
				214	printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
				215	fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
				216
				217	va_end(args);
				218	}
				219	#endif
				220
				221	/*
				222	* We only mark the transaction aborted and then set the file system read-only.
				223	* This will prevent new transactions from starting or trying to join this
				224	* one.
				225	*
				226	* This means that error recovery at the call site is limited to freeing
				227	* any local memory allocations and passing the error code up without
				228	* further cleanup. The transaction should complete as it normally would
				229	* in the call path but will return -EIO.
				230	*
				231	* We'll complete the cleanup in btrfs_end_transaction and
				232	* btrfs_commit_transaction.
				233	*/
				234	__cold
				235	void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
				236	const char *function,
				237	unsigned int line, int errno)
				238	{
				239	struct btrfs_fs_info *fs_info = trans->fs_info;
				240
				241	trans->aborted = errno;
				242	/* Nothing used. The other threads that have joined this
				243	* transaction may be able to continue. */
				244	if (!trans->dirty && list_empty(&trans->new_bgs)) {
				245	const char *errstr;
				246
				247	errstr = btrfs_decode_error(errno);
				248	btrfs_warn(fs_info,
				249	"%s:%d: Aborting unused transaction(%s).",
				250	function, line, errstr);
				251	return;
				252	}
				253	WRITE_ONCE(trans->transaction->aborted, errno);
				254	/* Wake up anybody who may be waiting on this transaction */
				255	wake_up(&fs_info->transaction_wait);
				256	wake_up(&fs_info->transaction_blocked_wait);
				257	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
				258	}
				259	/*
				260	* __btrfs_panic decodes unexpected, fatal errors from the caller,
				261	* issues an alert, and either panics or BUGs, depending on mount options.
				262	*/
				263	__cold
				264	void __btrfs_panic(struct btrfs_fs_info fs_info, const char function,
				265	unsigned int line, int errno, const char *fmt, ...)
				266	{
				267	char *s_id = "<unknown>";
				268	const char *errstr;
				269	struct va_format vaf = { .fmt = fmt };
				270	va_list args;
				271
				272	if (fs_info)
				273	s_id = fs_info->sb->s_id;
				274
				275	va_start(args, fmt);
				276	vaf.va = &args;
				277
				278	errstr = btrfs_decode_error(errno);
				279	if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
				280	panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
				281	s_id, function, line, &vaf, errno, errstr);
				282
				283	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
				284	function, line, &vaf, errno, errstr);
				285	va_end(args);
				286	/* Caller calls BUG() */
				287	}
				288
				289	static void btrfs_put_super(struct super_block *sb)
				290	{
				291	close_ctree(btrfs_sb(sb));
				292	}
				293
				294	enum {
				295	Opt_acl, Opt_noacl,
				296	Opt_clear_cache,
				297	Opt_commit_interval,
				298	Opt_compress,
				299	Opt_compress_force,
				300	Opt_compress_force_type,
				301	Opt_compress_type,
				302	Opt_degraded,
				303	Opt_device,
				304	Opt_fatal_errors,
				305	Opt_flushoncommit, Opt_noflushoncommit,
				306	Opt_inode_cache, Opt_noinode_cache,
				307	Opt_max_inline,
				308	Opt_barrier, Opt_nobarrier,
				309	Opt_datacow, Opt_nodatacow,
				310	Opt_datasum, Opt_nodatasum,
				311	Opt_defrag, Opt_nodefrag,
				312	Opt_discard, Opt_nodiscard,
				313	Opt_nologreplay,
				314	Opt_norecovery,
				315	Opt_ratio,
				316	Opt_rescan_uuid_tree,
				317	Opt_skip_balance,
				318	Opt_space_cache, Opt_no_space_cache,
				319	Opt_space_cache_version,
				320	Opt_ssd, Opt_nossd,
				321	Opt_ssd_spread, Opt_nossd_spread,
				322	Opt_subvol,
				323	Opt_subvol_empty,
				324	Opt_subvolid,
				325	Opt_thread_pool,
				326	Opt_treelog, Opt_notreelog,
				327	Opt_usebackuproot,
				328	Opt_user_subvol_rm_allowed,
				329
				330	/* Deprecated options */
				331	Opt_alloc_start,
				332	Opt_recovery,
				333	Opt_subvolrootid,
				334
				335	/* Debugging options */
				336	Opt_check_integrity,
				337	Opt_check_integrity_including_extent_data,
				338	Opt_check_integrity_print_mask,
				339	Opt_enospc_debug, Opt_noenospc_debug,
				340	#ifdef CONFIG_BTRFS_DEBUG
				341	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
				342	#endif
				343	#ifdef CONFIG_BTRFS_FS_REF_VERIFY
				344	Opt_ref_verify,
				345	#endif
				346	Opt_err,
				347	};
				348
				349	static const match_table_t tokens = {
				350	{Opt_acl, "acl"},
				351	{Opt_noacl, "noacl"},
				352	{Opt_clear_cache, "clear_cache"},
				353	{Opt_commit_interval, "commit=%u"},
				354	{Opt_compress, "compress"},
				355	{Opt_compress_type, "compress=%s"},
				356	{Opt_compress_force, "compress-force"},
				357	{Opt_compress_force_type, "compress-force=%s"},
				358	{Opt_degraded, "degraded"},
				359	{Opt_device, "device=%s"},
				360	{Opt_fatal_errors, "fatal_errors=%s"},
				361	{Opt_flushoncommit, "flushoncommit"},
				362	{Opt_noflushoncommit, "noflushoncommit"},
				363	{Opt_inode_cache, "inode_cache"},
				364	{Opt_noinode_cache, "noinode_cache"},
				365	{Opt_max_inline, "max_inline=%s"},
				366	{Opt_barrier, "barrier"},
				367	{Opt_nobarrier, "nobarrier"},
				368	{Opt_datacow, "datacow"},
				369	{Opt_nodatacow, "nodatacow"},
				370	{Opt_datasum, "datasum"},
				371	{Opt_nodatasum, "nodatasum"},
				372	{Opt_defrag, "autodefrag"},
				373	{Opt_nodefrag, "noautodefrag"},
				374	{Opt_discard, "discard"},
				375	{Opt_nodiscard, "nodiscard"},
				376	{Opt_nologreplay, "nologreplay"},
				377	{Opt_norecovery, "norecovery"},
				378	{Opt_ratio, "metadata_ratio=%u"},
				379	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
				380	{Opt_skip_balance, "skip_balance"},
				381	{Opt_space_cache, "space_cache"},
				382	{Opt_no_space_cache, "nospace_cache"},
				383	{Opt_space_cache_version, "space_cache=%s"},
				384	{Opt_ssd, "ssd"},
				385	{Opt_nossd, "nossd"},
				386	{Opt_ssd_spread, "ssd_spread"},
				387	{Opt_nossd_spread, "nossd_spread"},
				388	{Opt_subvol, "subvol=%s"},
				389	{Opt_subvol_empty, "subvol="},
				390	{Opt_subvolid, "subvolid=%s"},
				391	{Opt_thread_pool, "thread_pool=%u"},
				392	{Opt_treelog, "treelog"},
				393	{Opt_notreelog, "notreelog"},
				394	{Opt_usebackuproot, "usebackuproot"},
				395	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
				396
				397	/* Deprecated options */
				398	{Opt_alloc_start, "alloc_start=%s"},
				399	{Opt_recovery, "recovery"},
				400	{Opt_subvolrootid, "subvolrootid=%d"},
				401
				402	/* Debugging options */
				403	{Opt_check_integrity, "check_int"},
				404	{Opt_check_integrity_including_extent_data, "check_int_data"},
				405	{Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
				406	{Opt_enospc_debug, "enospc_debug"},
				407	{Opt_noenospc_debug, "noenospc_debug"},
				408	#ifdef CONFIG_BTRFS_DEBUG
				409	{Opt_fragment_data, "fragment=data"},
				410	{Opt_fragment_metadata, "fragment=metadata"},
				411	{Opt_fragment_all, "fragment=all"},
				412	#endif
				413	#ifdef CONFIG_BTRFS_FS_REF_VERIFY
				414	{Opt_ref_verify, "ref_verify"},
				415	#endif
				416	{Opt_err, NULL},
				417	};
				418
				419	/*
				420	* Regular mount options parser. Everything that is needed only when
				421	* reading in a new superblock is parsed here.
				422	* XXX JDM: This needs to be cleaned up for remount.
				423	*/
				424	int btrfs_parse_options(struct btrfs_fs_info info, char options,
				425	unsigned long new_flags)
				426	{
				427	substring_t args[MAX_OPT_ARGS];
				428	char p, num;
				429	u64 cache_gen;
				430	int intarg;
				431	int ret = 0;
				432	char *compress_type;
				433	bool compress_force = false;
				434	enum btrfs_compression_type saved_compress_type;
				435	bool saved_compress_force;
				436	int no_compress = 0;
				437
				438	cache_gen = btrfs_super_cache_generation(info->super_copy);
				439	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
				440	btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
				441	else if (cache_gen)
				442	btrfs_set_opt(info->mount_opt, SPACE_CACHE);
				443
				444	/*
				445	* Even the options are empty, we still need to do extra check
				446	* against new flags
				447	*/
				448	if (!options)
				449	goto check;
				450
				451	while ((p = strsep(&options, ",")) != NULL) {
				452	int token;
				453	if (!*p)
				454	continue;
				455
				456	token = match_token(p, tokens, args);
				457	switch (token) {
				458	case Opt_degraded:
				459	btrfs_info(info, "allowing degraded mounts");
				460	btrfs_set_opt(info->mount_opt, DEGRADED);
				461	break;
				462	case Opt_subvol:
				463	case Opt_subvol_empty:
				464	case Opt_subvolid:
				465	case Opt_subvolrootid:
				466	case Opt_device:
				467	/*
				468	* These are parsed by btrfs_parse_subvol_options or
				469	* btrfs_parse_device_options and can be ignored here.
				470	*/
				471	break;
				472	case Opt_nodatasum:
				473	btrfs_set_and_info(info, NODATASUM,
				474	"setting nodatasum");
				475	break;
				476	case Opt_datasum:
				477	if (btrfs_test_opt(info, NODATASUM)) {
				478	if (btrfs_test_opt(info, NODATACOW))
				479	btrfs_info(info,
				480	"setting datasum, datacow enabled");
				481	else
				482	btrfs_info(info, "setting datasum");
				483	}
				484	btrfs_clear_opt(info->mount_opt, NODATACOW);
				485	btrfs_clear_opt(info->mount_opt, NODATASUM);
				486	break;
				487	case Opt_nodatacow:
				488	if (!btrfs_test_opt(info, NODATACOW)) {
				489	if (!btrfs_test_opt(info, COMPRESS) \|\|
				490	!btrfs_test_opt(info, FORCE_COMPRESS)) {
				491	btrfs_info(info,
				492	"setting nodatacow, compression disabled");
				493	} else {
				494	btrfs_info(info, "setting nodatacow");
				495	}
				496	}
				497	btrfs_clear_opt(info->mount_opt, COMPRESS);
				498	btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				499	btrfs_set_opt(info->mount_opt, NODATACOW);
				500	btrfs_set_opt(info->mount_opt, NODATASUM);
				501	break;
				502	case Opt_datacow:
				503	btrfs_clear_and_info(info, NODATACOW,
				504	"setting datacow");
				505	break;
				506	case Opt_compress_force:
				507	case Opt_compress_force_type:
				508	compress_force = true;
				509	/* Fallthrough */
				510	case Opt_compress:
				511	case Opt_compress_type:
				512	saved_compress_type = btrfs_test_opt(info,
				513	COMPRESS) ?
				514	info->compress_type : BTRFS_COMPRESS_NONE;
				515	saved_compress_force =
				516	btrfs_test_opt(info, FORCE_COMPRESS);
				517	if (token == Opt_compress \|\|
				518	token == Opt_compress_force \|\|
				519	strncmp(args[0].from, "zlib", 4) == 0) {
				520	compress_type = "zlib";
				521
				522	info->compress_type = BTRFS_COMPRESS_ZLIB;
				523	info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
				524	/*
				525	* args[0] contains uninitialized data since
				526	* for these tokens we don't expect any
				527	* parameter.
				528	*/
				529	if (token != Opt_compress &&
				530	token != Opt_compress_force)
				531	info->compress_level =
				532	btrfs_compress_str2level(args[0].from);
				533	btrfs_set_opt(info->mount_opt, COMPRESS);
				534	btrfs_clear_opt(info->mount_opt, NODATACOW);
				535	btrfs_clear_opt(info->mount_opt, NODATASUM);
				536	no_compress = 0;
				537	} else if (strncmp(args[0].from, "lzo", 3) == 0) {
				538	compress_type = "lzo";
				539	info->compress_type = BTRFS_COMPRESS_LZO;
				540	btrfs_set_opt(info->mount_opt, COMPRESS);
				541	btrfs_clear_opt(info->mount_opt, NODATACOW);
				542	btrfs_clear_opt(info->mount_opt, NODATASUM);
				543	btrfs_set_fs_incompat(info, COMPRESS_LZO);
				544	no_compress = 0;
				545	} else if (strcmp(args[0].from, "zstd") == 0) {
				546	compress_type = "zstd";
				547	info->compress_type = BTRFS_COMPRESS_ZSTD;
				548	btrfs_set_opt(info->mount_opt, COMPRESS);
				549	btrfs_clear_opt(info->mount_opt, NODATACOW);
				550	btrfs_clear_opt(info->mount_opt, NODATASUM);
				551	btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
				552	no_compress = 0;
				553	} else if (strncmp(args[0].from, "no", 2) == 0) {
				554	compress_type = "no";
				555	btrfs_clear_opt(info->mount_opt, COMPRESS);
				556	btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				557	compress_force = false;
				558	no_compress++;
				559	} else {
				560	ret = -EINVAL;
				561	goto out;
				562	}
				563
				564	if (compress_force) {
				565	btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
				566	} else {
				567	/*
				568	* If we remount from compress-force=xxx to
				569	* compress=xxx, we need clear FORCE_COMPRESS
				570	* flag, otherwise, there is no way for users
				571	* to disable forcible compression separately.
				572	*/
				573	btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				574	}
				575	if ((btrfs_test_opt(info, COMPRESS) &&
				576	(info->compress_type != saved_compress_type \|\|
				577	compress_force != saved_compress_force)) \|\|
				578	(!btrfs_test_opt(info, COMPRESS) &&
				579	no_compress == 1)) {
				580	btrfs_info(info, "%s %s compression, level %d",
				581	(compress_force) ? "force" : "use",
				582	compress_type, info->compress_level);
				583	}
				584	compress_force = false;
				585	break;
				586	case Opt_ssd:
				587	btrfs_set_and_info(info, SSD,
				588	"enabling ssd optimizations");
				589	btrfs_clear_opt(info->mount_opt, NOSSD);
				590	break;
				591	case Opt_ssd_spread:
				592	btrfs_set_and_info(info, SSD,
				593	"enabling ssd optimizations");
				594	btrfs_set_and_info(info, SSD_SPREAD,
				595	"using spread ssd allocation scheme");
				596	btrfs_clear_opt(info->mount_opt, NOSSD);
				597	break;
				598	case Opt_nossd:
				599	btrfs_set_opt(info->mount_opt, NOSSD);
				600	btrfs_clear_and_info(info, SSD,
				601	"not using ssd optimizations");
				602	/* Fallthrough */
				603	case Opt_nossd_spread:
				604	btrfs_clear_and_info(info, SSD_SPREAD,
				605	"not using spread ssd allocation scheme");
				606	break;
				607	case Opt_barrier:
				608	btrfs_clear_and_info(info, NOBARRIER,
				609	"turning on barriers");
				610	break;
				611	case Opt_nobarrier:
				612	btrfs_set_and_info(info, NOBARRIER,
				613	"turning off barriers");
				614	break;
				615	case Opt_thread_pool:
				616	ret = match_int(&args[0], &intarg);
				617	if (ret) {
				618	goto out;
				619	} else if (intarg == 0) {
				620	ret = -EINVAL;
				621	goto out;
				622	}
				623	info->thread_pool_size = intarg;
				624	break;
				625	case Opt_max_inline:
				626	num = match_strdup(&args[0]);
				627	if (num) {
				628	info->max_inline = memparse(num, NULL);
				629	kfree(num);
				630
				631	if (info->max_inline) {
				632	info->max_inline = min_t(u64,
				633	info->max_inline,
				634	info->sectorsize);
				635	}
				636	btrfs_info(info, "max_inline at %llu",
				637	info->max_inline);
				638	} else {
				639	ret = -ENOMEM;
				640	goto out;
				641	}
				642	break;
				643	case Opt_alloc_start:
				644	btrfs_info(info,
				645	"option alloc_start is obsolete, ignored");
				646	break;
				647	case Opt_acl:
				648	#ifdef CONFIG_BTRFS_FS_POSIX_ACL
				649	info->sb->s_flags \|= SB_POSIXACL;
				650	break;
				651	#else
				652	btrfs_err(info, "support for ACL not compiled in!");
				653	ret = -EINVAL;
				654	goto out;
				655	#endif
				656	case Opt_noacl:
				657	info->sb->s_flags &= ~SB_POSIXACL;
				658	break;
				659	case Opt_notreelog:
				660	btrfs_set_and_info(info, NOTREELOG,
				661	"disabling tree log");
				662	break;
				663	case Opt_treelog:
				664	btrfs_clear_and_info(info, NOTREELOG,
				665	"enabling tree log");
				666	break;
				667	case Opt_norecovery:
				668	case Opt_nologreplay:
				669	btrfs_set_and_info(info, NOLOGREPLAY,
				670	"disabling log replay at mount time");
				671	break;
				672	case Opt_flushoncommit:
				673	btrfs_set_and_info(info, FLUSHONCOMMIT,
				674	"turning on flush-on-commit");
				675	break;
				676	case Opt_noflushoncommit:
				677	btrfs_clear_and_info(info, FLUSHONCOMMIT,
				678	"turning off flush-on-commit");
				679	break;
				680	case Opt_ratio:
				681	ret = match_int(&args[0], &intarg);
				682	if (ret)
				683	goto out;
				684	info->metadata_ratio = intarg;
				685	btrfs_info(info, "metadata ratio %u",
				686	info->metadata_ratio);
				687	break;
				688	case Opt_discard:
				689	btrfs_set_and_info(info, DISCARD,
				690	"turning on discard");
				691	break;
				692	case Opt_nodiscard:
				693	btrfs_clear_and_info(info, DISCARD,
				694	"turning off discard");
				695	break;
				696	case Opt_space_cache:
				697	case Opt_space_cache_version:
				698	if (token == Opt_space_cache \|\|
				699	strcmp(args[0].from, "v1") == 0) {
				700	btrfs_clear_opt(info->mount_opt,
				701	FREE_SPACE_TREE);
				702	btrfs_set_and_info(info, SPACE_CACHE,
				703	"enabling disk space caching");
				704	} else if (strcmp(args[0].from, "v2") == 0) {
				705	btrfs_clear_opt(info->mount_opt,
				706	SPACE_CACHE);
				707	btrfs_set_and_info(info, FREE_SPACE_TREE,
				708	"enabling free space tree");
				709	} else {
				710	ret = -EINVAL;
				711	goto out;
				712	}
				713	break;
				714	case Opt_rescan_uuid_tree:
				715	btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
				716	break;
				717	case Opt_no_space_cache:
				718	if (btrfs_test_opt(info, SPACE_CACHE)) {
				719	btrfs_clear_and_info(info, SPACE_CACHE,
				720	"disabling disk space caching");
				721	}
				722	if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
				723	btrfs_clear_and_info(info, FREE_SPACE_TREE,
				724	"disabling free space tree");
				725	}
				726	break;
				727	case Opt_inode_cache:
				728	btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
				729	"enabling inode map caching");
				730	break;
				731	case Opt_noinode_cache:
				732	btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
				733	"disabling inode map caching");
				734	break;
				735	case Opt_clear_cache:
				736	btrfs_set_and_info(info, CLEAR_CACHE,
				737	"force clearing of disk cache");
				738	break;
				739	case Opt_user_subvol_rm_allowed:
				740	btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
				741	break;
				742	case Opt_enospc_debug:
				743	btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
				744	break;
				745	case Opt_noenospc_debug:
				746	btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
				747	break;
				748	case Opt_defrag:
				749	btrfs_set_and_info(info, AUTO_DEFRAG,
				750	"enabling auto defrag");
				751	break;
				752	case Opt_nodefrag:
				753	btrfs_clear_and_info(info, AUTO_DEFRAG,
				754	"disabling auto defrag");
				755	break;
				756	case Opt_recovery:
				757	btrfs_warn(info,
				758	"'recovery' is deprecated, use 'usebackuproot' instead");
				759	/* fall through */
				760	case Opt_usebackuproot:
				761	btrfs_info(info,
				762	"trying to use backup root at mount time");
				763	btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
				764	break;
				765	case Opt_skip_balance:
				766	btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
				767	break;
				768	#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
				769	case Opt_check_integrity_including_extent_data:
				770	btrfs_info(info,
				771	"enabling check integrity including extent data");
				772	btrfs_set_opt(info->mount_opt,
				773	CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
				774	btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
				775	break;
				776	case Opt_check_integrity:
				777	btrfs_info(info, "enabling check integrity");
				778	btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
				779	break;
				780	case Opt_check_integrity_print_mask:
				781	ret = match_int(&args[0], &intarg);
				782	if (ret)
				783	goto out;
				784	info->check_integrity_print_mask = intarg;
				785	btrfs_info(info, "check_integrity_print_mask 0x%x",
				786	info->check_integrity_print_mask);
				787	break;
				788	#else
				789	case Opt_check_integrity_including_extent_data:
				790	case Opt_check_integrity:
				791	case Opt_check_integrity_print_mask:
				792	btrfs_err(info,
				793	"support for check_integrity* not compiled in!");
				794	ret = -EINVAL;
				795	goto out;
				796	#endif
				797	case Opt_fatal_errors:
				798	if (strcmp(args[0].from, "panic") == 0)
				799	btrfs_set_opt(info->mount_opt,
				800	PANIC_ON_FATAL_ERROR);
				801	else if (strcmp(args[0].from, "bug") == 0)
				802	btrfs_clear_opt(info->mount_opt,
				803	PANIC_ON_FATAL_ERROR);
				804	else {
				805	ret = -EINVAL;
				806	goto out;
				807	}
				808	break;
				809	case Opt_commit_interval:
				810	intarg = 0;
				811	ret = match_int(&args[0], &intarg);
				812	if (ret)
				813	goto out;
				814	if (intarg == 0) {
				815	btrfs_info(info,
				816	"using default commit interval %us",
				817	BTRFS_DEFAULT_COMMIT_INTERVAL);
				818	intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
				819	} else if (intarg > 300) {
				820	btrfs_warn(info, "excessive commit interval %d",
				821	intarg);
				822	}
				823	info->commit_interval = intarg;
				824	break;
				825	#ifdef CONFIG_BTRFS_DEBUG
				826	case Opt_fragment_all:
				827	btrfs_info(info, "fragmenting all space");
				828	btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
				829	btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
				830	break;
				831	case Opt_fragment_metadata:
				832	btrfs_info(info, "fragmenting metadata");
				833	btrfs_set_opt(info->mount_opt,
				834	FRAGMENT_METADATA);
				835	break;
				836	case Opt_fragment_data:
				837	btrfs_info(info, "fragmenting data");
				838	btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
				839	break;
				840	#endif
				841	#ifdef CONFIG_BTRFS_FS_REF_VERIFY
				842	case Opt_ref_verify:
				843	btrfs_info(info, "doing ref verification");
				844	btrfs_set_opt(info->mount_opt, REF_VERIFY);
				845	break;
				846	#endif
				847	case Opt_err:
				848	btrfs_info(info, "unrecognized mount option '%s'", p);
				849	ret = -EINVAL;
				850	goto out;
				851	default:
				852	break;
				853	}
				854	}
				855	check:
				856	/*
				857	* Extra check for current option against current flag
				858	*/
				859	if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) {
				860	btrfs_err(info,
				861	"nologreplay must be used with ro mount option");
				862	ret = -EINVAL;
				863	}
				864	out:
				865	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
				866	!btrfs_test_opt(info, FREE_SPACE_TREE) &&
				867	!btrfs_test_opt(info, CLEAR_CACHE)) {
				868	btrfs_err(info, "cannot disable free space tree");
				869	ret = -EINVAL;
				870
				871	}
				872	if (!ret && btrfs_test_opt(info, SPACE_CACHE))
				873	btrfs_info(info, "disk space caching is enabled");
				874	if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
				875	btrfs_info(info, "using free space tree");
				876	return ret;
				877	}
				878
				879	/*
				880	* Parse mount options that are required early in the mount process.
				881	*
				882	* All other options will be parsed on much later in the mount process and
				883	* only when we need to allocate a new super block.
				884	*/
				885	static int btrfs_parse_device_options(const char *options, fmode_t flags,
				886	void *holder)
				887	{
				888	substring_t args[MAX_OPT_ARGS];
				889	char device_name, opts, orig, p;
				890	struct btrfs_device *device = NULL;
				891	int error = 0;
				892
				893	lockdep_assert_held(&uuid_mutex);
				894
				895	if (!options)
				896	return 0;
				897
				898	/*
				899	* strsep changes the string, duplicate it because btrfs_parse_options
				900	* gets called later
				901	*/
				902	opts = kstrdup(options, GFP_KERNEL);
				903	if (!opts)
				904	return -ENOMEM;
				905	orig = opts;
				906
				907	while ((p = strsep(&opts, ",")) != NULL) {
				908	int token;
				909
				910	if (!*p)
				911	continue;
				912
				913	token = match_token(p, tokens, args);
				914	if (token == Opt_device) {
				915	device_name = match_strdup(&args[0]);
				916	if (!device_name) {
				917	error = -ENOMEM;
				918	goto out;
				919	}
				920	device = btrfs_scan_one_device(device_name, flags,
				921	holder);
				922	kfree(device_name);
				923	if (IS_ERR(device)) {
				924	error = PTR_ERR(device);
				925	goto out;
				926	}
				927	}
				928	}
				929
				930	out:
				931	kfree(orig);
				932	return error;
				933	}
				934
				935	/*
				936	* Parse mount options that are related to subvolume id
				937	*
				938	* The value is later passed to mount_subvol()
				939	*/
				940	static int btrfs_parse_subvol_options(const char options, char *subvol_name,
				941	u64 *subvol_objectid)
				942	{
				943	substring_t args[MAX_OPT_ARGS];
				944	char opts, orig, *p;
				945	int error = 0;
				946	u64 subvolid;
				947
				948	if (!options)
				949	return 0;
				950
				951	/*
				952	* strsep changes the string, duplicate it because
				953	* btrfs_parse_device_options gets called later
				954	*/
				955	opts = kstrdup(options, GFP_KERNEL);
				956	if (!opts)
				957	return -ENOMEM;
				958	orig = opts;
				959
				960	while ((p = strsep(&opts, ",")) != NULL) {
				961	int token;
				962	if (!*p)
				963	continue;
				964
				965	token = match_token(p, tokens, args);
				966	switch (token) {
				967	case Opt_subvol:
				968	kfree(*subvol_name);
				969	*subvol_name = match_strdup(&args[0]);
				970	if (!*subvol_name) {
				971	error = -ENOMEM;
				972	goto out;
				973	}
				974	break;
				975	case Opt_subvolid:
				976	error = match_u64(&args[0], &subvolid);
				977	if (error)
				978	goto out;
				979
				980	/* we want the original fs_tree */
				981	if (subvolid == 0)
				982	subvolid = BTRFS_FS_TREE_OBJECTID;
				983
				984	*subvol_objectid = subvolid;
				985	break;
				986	case Opt_subvolrootid:
				987	pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
				988	break;
				989	default:
				990	break;
				991	}
				992	}
				993
				994	out:
				995	kfree(orig);
				996	return error;
				997	}
				998
				999	static char get_subvol_name_from_objectid(struct btrfs_fs_info fs_info,
				1000	u64 subvol_objectid)
				1001	{
				1002	struct btrfs_root *root = fs_info->tree_root;
				1003	struct btrfs_root *fs_root;
				1004	struct btrfs_root_ref *root_ref;
				1005	struct btrfs_inode_ref *inode_ref;
				1006	struct btrfs_key key;
				1007	struct btrfs_path *path = NULL;
				1008	char name = NULL, ptr;
				1009	u64 dirid;
				1010	int len;
				1011	int ret;
				1012
				1013	path = btrfs_alloc_path();
				1014	if (!path) {
				1015	ret = -ENOMEM;
				1016	goto err;
				1017	}
				1018	path->leave_spinning = 1;
				1019
				1020	name = kmalloc(PATH_MAX, GFP_KERNEL);
				1021	if (!name) {
				1022	ret = -ENOMEM;
				1023	goto err;
				1024	}
				1025	ptr = name + PATH_MAX - 1;
				1026	ptr[0] = '\0';
				1027
				1028	/*
				1029	* Walk up the subvolume trees in the tree of tree roots by root
				1030	* backrefs until we hit the top-level subvolume.
				1031	*/
				1032	while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
				1033	key.objectid = subvol_objectid;
				1034	key.type = BTRFS_ROOT_BACKREF_KEY;
				1035	key.offset = (u64)-1;
				1036
				1037	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				1038	if (ret < 0) {
				1039	goto err;
				1040	} else if (ret > 0) {
				1041	ret = btrfs_previous_item(root, path, subvol_objectid,
				1042	BTRFS_ROOT_BACKREF_KEY);
				1043	if (ret < 0) {
				1044	goto err;
				1045	} else if (ret > 0) {
				1046	ret = -ENOENT;
				1047	goto err;
				1048	}
				1049	}
				1050
				1051	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				1052	subvol_objectid = key.offset;
				1053
				1054	root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
				1055	struct btrfs_root_ref);
				1056	len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
				1057	ptr -= len + 1;
				1058	if (ptr < name) {
				1059	ret = -ENAMETOOLONG;
				1060	goto err;
				1061	}
				1062	read_extent_buffer(path->nodes[0], ptr + 1,
				1063	(unsigned long)(root_ref + 1), len);
				1064	ptr[0] = '/';
				1065	dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
				1066	btrfs_release_path(path);
				1067
				1068	key.objectid = subvol_objectid;
				1069	key.type = BTRFS_ROOT_ITEM_KEY;
				1070	key.offset = (u64)-1;
				1071	fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
				1072	if (IS_ERR(fs_root)) {
				1073	ret = PTR_ERR(fs_root);
				1074	goto err;
				1075	}
				1076
				1077	/*
				1078	* Walk up the filesystem tree by inode refs until we hit the
				1079	* root directory.
				1080	*/
				1081	while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
				1082	key.objectid = dirid;
				1083	key.type = BTRFS_INODE_REF_KEY;
				1084	key.offset = (u64)-1;
				1085
				1086	ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
				1087	if (ret < 0) {
				1088	goto err;
				1089	} else if (ret > 0) {
				1090	ret = btrfs_previous_item(fs_root, path, dirid,
				1091	BTRFS_INODE_REF_KEY);
				1092	if (ret < 0) {
				1093	goto err;
				1094	} else if (ret > 0) {
				1095	ret = -ENOENT;
				1096	goto err;
				1097	}
				1098	}
				1099
				1100	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				1101	dirid = key.offset;
				1102
				1103	inode_ref = btrfs_item_ptr(path->nodes[0],
				1104	path->slots[0],
				1105	struct btrfs_inode_ref);
				1106	len = btrfs_inode_ref_name_len(path->nodes[0],
				1107	inode_ref);
				1108	ptr -= len + 1;
				1109	if (ptr < name) {
				1110	ret = -ENAMETOOLONG;
				1111	goto err;
				1112	}
				1113	read_extent_buffer(path->nodes[0], ptr + 1,
				1114	(unsigned long)(inode_ref + 1), len);
				1115	ptr[0] = '/';
				1116	btrfs_release_path(path);
				1117	}
				1118	}
				1119
				1120	btrfs_free_path(path);
				1121	if (ptr == name + PATH_MAX - 1) {
				1122	name[0] = '/';
				1123	name[1] = '\0';
				1124	} else {
				1125	memmove(name, ptr, name + PATH_MAX - ptr);
				1126	}
				1127	return name;
				1128
				1129	err:
				1130	btrfs_free_path(path);
				1131	kfree(name);
				1132	return ERR_PTR(ret);
				1133	}
				1134
				1135	static int get_default_subvol_objectid(struct btrfs_fs_info fs_info, u64 objectid)
				1136	{
				1137	struct btrfs_root *root = fs_info->tree_root;
				1138	struct btrfs_dir_item *di;
				1139	struct btrfs_path *path;
				1140	struct btrfs_key location;
				1141	u64 dir_id;
				1142
				1143	path = btrfs_alloc_path();
				1144	if (!path)
				1145	return -ENOMEM;
				1146	path->leave_spinning = 1;
				1147
				1148	/*
				1149	* Find the "default" dir item which points to the root item that we
				1150	* will mount by default if we haven't been given a specific subvolume
				1151	* to mount.
				1152	*/
				1153	dir_id = btrfs_super_root_dir(fs_info->super_copy);
				1154	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
				1155	if (IS_ERR(di)) {
				1156	btrfs_free_path(path);
				1157	return PTR_ERR(di);
				1158	}
				1159	if (!di) {
				1160	/*
				1161	* Ok the default dir item isn't there. This is weird since
				1162	* it's always been there, but don't freak out, just try and
				1163	* mount the top-level subvolume.
				1164	*/
				1165	btrfs_free_path(path);
				1166	*objectid = BTRFS_FS_TREE_OBJECTID;
				1167	return 0;
				1168	}
				1169
				1170	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
				1171	btrfs_free_path(path);
				1172	*objectid = location.objectid;
				1173	return 0;
				1174	}
				1175
				1176	static int btrfs_fill_super(struct super_block *sb,
				1177	struct btrfs_fs_devices *fs_devices,
				1178	void *data)
				1179	{
				1180	struct inode *inode;
				1181	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				1182	struct btrfs_key key;
				1183	int err;
				1184
				1185	sb->s_maxbytes = MAX_LFS_FILESIZE;
				1186	sb->s_magic = BTRFS_SUPER_MAGIC;
				1187	sb->s_op = &btrfs_super_ops;
				1188	sb->s_d_op = &btrfs_dentry_operations;
				1189	sb->s_export_op = &btrfs_export_ops;
				1190	sb->s_xattr = btrfs_xattr_handlers;
				1191	sb->s_time_gran = 1;
				1192	#ifdef CONFIG_BTRFS_FS_POSIX_ACL
				1193	sb->s_flags \|= SB_POSIXACL;
				1194	#endif
				1195	sb->s_flags \|= SB_I_VERSION;
				1196	sb->s_iflags \|= SB_I_CGROUPWB;
				1197
				1198	err = super_setup_bdi(sb);
				1199	if (err) {
				1200	btrfs_err(fs_info, "super_setup_bdi failed");
				1201	return err;
				1202	}
				1203
				1204	err = open_ctree(sb, fs_devices, (char *)data);
				1205	if (err) {
				1206	btrfs_err(fs_info, "open_ctree failed");
				1207	return err;
				1208	}
				1209
				1210	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
				1211	key.type = BTRFS_INODE_ITEM_KEY;
				1212	key.offset = 0;
				1213	inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
				1214	if (IS_ERR(inode)) {
				1215	err = PTR_ERR(inode);
				1216	goto fail_close;
				1217	}
				1218
				1219	sb->s_root = d_make_root(inode);
				1220	if (!sb->s_root) {
				1221	err = -ENOMEM;
				1222	goto fail_close;
				1223	}
				1224
				1225	cleancache_init_fs(sb);
				1226	sb->s_flags \|= SB_ACTIVE;
				1227	return 0;
				1228
				1229	fail_close:
				1230	close_ctree(fs_info);
				1231	return err;
				1232	}
				1233
				1234	int btrfs_sync_fs(struct super_block *sb, int wait)
				1235	{
				1236	struct btrfs_trans_handle *trans;
				1237	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				1238	struct btrfs_root *root = fs_info->tree_root;
				1239
				1240	trace_btrfs_sync_fs(fs_info, wait);
				1241
				1242	if (!wait) {
				1243	filemap_flush(fs_info->btree_inode->i_mapping);
				1244	return 0;
				1245	}
				1246
				1247	btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
				1248
				1249	trans = btrfs_attach_transaction_barrier(root);
				1250	if (IS_ERR(trans)) {
				1251	/* no transaction, don't bother */
				1252	if (PTR_ERR(trans) == -ENOENT) {
				1253	/*
				1254	* Exit unless we have some pending changes
				1255	* that need to go through commit
				1256	*/
				1257	if (fs_info->pending_changes == 0)
				1258	return 0;
				1259	/*
				1260	* A non-blocking test if the fs is frozen. We must not
				1261	* start a new transaction here otherwise a deadlock
				1262	* happens. The pending operations are delayed to the
				1263	* next commit after thawing.
				1264	*/
				1265	if (sb_start_write_trylock(sb))
				1266	sb_end_write(sb);
				1267	else
				1268	return 0;
				1269	trans = btrfs_start_transaction(root, 0);
				1270	}
				1271	if (IS_ERR(trans))
				1272	return PTR_ERR(trans);
				1273	}
				1274	return btrfs_commit_transaction(trans);
				1275	}
				1276
				1277	static int btrfs_show_options(struct seq_file seq, struct dentry dentry)
				1278	{
				1279	struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
				1280	const char *compress_type;
				1281
				1282	if (btrfs_test_opt(info, DEGRADED))
				1283	seq_puts(seq, ",degraded");
				1284	if (btrfs_test_opt(info, NODATASUM))
				1285	seq_puts(seq, ",nodatasum");
				1286	if (btrfs_test_opt(info, NODATACOW))
				1287	seq_puts(seq, ",nodatacow");
				1288	if (btrfs_test_opt(info, NOBARRIER))
				1289	seq_puts(seq, ",nobarrier");
				1290	if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
				1291	seq_printf(seq, ",max_inline=%llu", info->max_inline);
				1292	if (info->thread_pool_size != min_t(unsigned long,
				1293	num_online_cpus() + 2, 8))
				1294	seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
				1295	if (btrfs_test_opt(info, COMPRESS)) {
				1296	compress_type = btrfs_compress_type2str(info->compress_type);
				1297	if (btrfs_test_opt(info, FORCE_COMPRESS))
				1298	seq_printf(seq, ",compress-force=%s", compress_type);
				1299	else
				1300	seq_printf(seq, ",compress=%s", compress_type);
				1301	if (info->compress_level)
				1302	seq_printf(seq, ":%d", info->compress_level);
				1303	}
				1304	if (btrfs_test_opt(info, NOSSD))
				1305	seq_puts(seq, ",nossd");
				1306	if (btrfs_test_opt(info, SSD_SPREAD))
				1307	seq_puts(seq, ",ssd_spread");
				1308	else if (btrfs_test_opt(info, SSD))
				1309	seq_puts(seq, ",ssd");
				1310	if (btrfs_test_opt(info, NOTREELOG))
				1311	seq_puts(seq, ",notreelog");
				1312	if (btrfs_test_opt(info, NOLOGREPLAY))
				1313	seq_puts(seq, ",nologreplay");
				1314	if (btrfs_test_opt(info, FLUSHONCOMMIT))
				1315	seq_puts(seq, ",flushoncommit");
				1316	if (btrfs_test_opt(info, DISCARD))
				1317	seq_puts(seq, ",discard");
				1318	if (!(info->sb->s_flags & SB_POSIXACL))
				1319	seq_puts(seq, ",noacl");
				1320	if (btrfs_test_opt(info, SPACE_CACHE))
				1321	seq_puts(seq, ",space_cache");
				1322	else if (btrfs_test_opt(info, FREE_SPACE_TREE))
				1323	seq_puts(seq, ",space_cache=v2");
				1324	else
				1325	seq_puts(seq, ",nospace_cache");
				1326	if (btrfs_test_opt(info, RESCAN_UUID_TREE))
				1327	seq_puts(seq, ",rescan_uuid_tree");
				1328	if (btrfs_test_opt(info, CLEAR_CACHE))
				1329	seq_puts(seq, ",clear_cache");
				1330	if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
				1331	seq_puts(seq, ",user_subvol_rm_allowed");
				1332	if (btrfs_test_opt(info, ENOSPC_DEBUG))
				1333	seq_puts(seq, ",enospc_debug");
				1334	if (btrfs_test_opt(info, AUTO_DEFRAG))
				1335	seq_puts(seq, ",autodefrag");
				1336	if (btrfs_test_opt(info, INODE_MAP_CACHE))
				1337	seq_puts(seq, ",inode_cache");
				1338	if (btrfs_test_opt(info, SKIP_BALANCE))
				1339	seq_puts(seq, ",skip_balance");
				1340	#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
				1341	if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
				1342	seq_puts(seq, ",check_int_data");
				1343	else if (btrfs_test_opt(info, CHECK_INTEGRITY))
				1344	seq_puts(seq, ",check_int");
				1345	if (info->check_integrity_print_mask)
				1346	seq_printf(seq, ",check_int_print_mask=%d",
				1347	info->check_integrity_print_mask);
				1348	#endif
				1349	if (info->metadata_ratio)
				1350	seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
				1351	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
				1352	seq_puts(seq, ",fatal_errors=panic");
				1353	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
				1354	seq_printf(seq, ",commit=%u", info->commit_interval);
				1355	#ifdef CONFIG_BTRFS_DEBUG
				1356	if (btrfs_test_opt(info, FRAGMENT_DATA))
				1357	seq_puts(seq, ",fragment=data");
				1358	if (btrfs_test_opt(info, FRAGMENT_METADATA))
				1359	seq_puts(seq, ",fragment=metadata");
				1360	#endif
				1361	if (btrfs_test_opt(info, REF_VERIFY))
				1362	seq_puts(seq, ",ref_verify");
				1363	seq_printf(seq, ",subvolid=%llu",
				1364	BTRFS_I(d_inode(dentry))->root->root_key.objectid);
				1365	seq_puts(seq, ",subvol=");
				1366	seq_dentry(seq, dentry, " \t\n\\");
				1367	return 0;
				1368	}
				1369
				1370	static int btrfs_test_super(struct super_block s, void data)
				1371	{
				1372	struct btrfs_fs_info *p = data;
				1373	struct btrfs_fs_info *fs_info = btrfs_sb(s);
				1374
				1375	return fs_info->fs_devices == p->fs_devices;
				1376	}
				1377
				1378	static int btrfs_set_super(struct super_block s, void data)
				1379	{
				1380	int err = set_anon_super(s, data);
				1381	if (!err)
				1382	s->s_fs_info = data;
				1383	return err;
				1384	}
				1385
				1386	/*
				1387	* subvolumes are identified by ino 256
				1388	*/
				1389	static inline int is_subvolume_inode(struct inode *inode)
				1390	{
				1391	if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
				1392	return 1;
				1393	return 0;
				1394	}
				1395
				1396	static struct dentry mount_subvol(const char subvol_name, u64 subvol_objectid,
				1397	const char device_name, struct vfsmount mnt)
				1398	{
				1399	struct dentry *root;
				1400	int ret;
				1401
				1402	if (!subvol_name) {
				1403	if (!subvol_objectid) {
				1404	ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
				1405	&subvol_objectid);
				1406	if (ret) {
				1407	root = ERR_PTR(ret);
				1408	goto out;
				1409	}
				1410	}
				1411	subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
				1412	subvol_objectid);
				1413	if (IS_ERR(subvol_name)) {
				1414	root = ERR_CAST(subvol_name);
				1415	subvol_name = NULL;
				1416	goto out;
				1417	}
				1418
				1419	}
				1420
				1421	root = mount_subtree(mnt, subvol_name);
				1422	/* mount_subtree() drops our reference on the vfsmount. */
				1423	mnt = NULL;
				1424
				1425	if (!IS_ERR(root)) {
				1426	struct super_block *s = root->d_sb;
				1427	struct btrfs_fs_info *fs_info = btrfs_sb(s);
				1428	struct inode *root_inode = d_inode(root);
				1429	u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
				1430
				1431	ret = 0;
				1432	if (!is_subvolume_inode(root_inode)) {
				1433	btrfs_err(fs_info, "'%s' is not a valid subvolume",
				1434	subvol_name);
				1435	ret = -EINVAL;
				1436	}
				1437	if (subvol_objectid && root_objectid != subvol_objectid) {
				1438	/*
				1439	* This will also catch a race condition where a
				1440	* subvolume which was passed by ID is renamed and
				1441	* another subvolume is renamed over the old location.
				1442	*/
				1443	btrfs_err(fs_info,
				1444	"subvol '%s' does not match subvolid %llu",
				1445	subvol_name, subvol_objectid);
				1446	ret = -EINVAL;
				1447	}
				1448	if (ret) {
				1449	dput(root);
				1450	root = ERR_PTR(ret);
				1451	deactivate_locked_super(s);
				1452	}
				1453	}
				1454
				1455	out:
				1456	mntput(mnt);
				1457	kfree(subvol_name);
				1458	return root;
				1459	}
				1460
				1461	static int parse_security_options(char *orig_opts,
				1462	struct security_mnt_opts *sec_opts)
				1463	{
				1464	char *secdata = NULL;
				1465	int ret = 0;
				1466
				1467	secdata = alloc_secdata();
				1468	if (!secdata)
				1469	return -ENOMEM;
				1470	ret = security_sb_copy_data(orig_opts, secdata);
				1471	if (ret) {
				1472	free_secdata(secdata);
				1473	return ret;
				1474	}
				1475	ret = security_sb_parse_opts_str(secdata, sec_opts);
				1476	free_secdata(secdata);
				1477	return ret;
				1478	}
				1479
				1480	static int setup_security_options(struct btrfs_fs_info *fs_info,
				1481	struct super_block *sb,
				1482	struct security_mnt_opts *sec_opts)
				1483	{
				1484	int ret = 0;
				1485
				1486	/*
				1487	* Call security_sb_set_mnt_opts() to check whether new sec_opts
				1488	* is valid.
				1489	*/
				1490	ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
				1491	if (ret)
				1492	return ret;
				1493
				1494	#ifdef CONFIG_SECURITY
				1495	if (!fs_info->security_opts.num_mnt_opts) {
				1496	/* first time security setup, copy sec_opts to fs_info */
				1497	memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
				1498	} else {
				1499	/*
				1500	* Since SELinux (the only one supporting security_mnt_opts)
				1501	* does NOT support changing context during remount/mount of
				1502	* the same sb, this must be the same or part of the same
				1503	* security options, just free it.
				1504	*/
				1505	security_free_mnt_opts(sec_opts);
				1506	}
				1507	#endif
				1508	return ret;
				1509	}
				1510
				1511	/*
				1512	* Find a superblock for the given device / mount point.
				1513	*
				1514	* Note: This is based on mount_bdev from fs/super.c with a few additions
				1515	* for multiple device setup. Make sure to keep it in sync.
				1516	*/
				1517	static struct dentry btrfs_mount_root(struct file_system_type fs_type,
				1518	int flags, const char device_name, void data)
				1519	{
				1520	struct block_device *bdev = NULL;
				1521	struct super_block *s;
				1522	struct btrfs_device *device = NULL;
				1523	struct btrfs_fs_devices *fs_devices = NULL;
				1524	struct btrfs_fs_info *fs_info = NULL;
				1525	struct security_mnt_opts new_sec_opts;
				1526	fmode_t mode = FMODE_READ;
				1527	int error = 0;
				1528
				1529	if (!(flags & SB_RDONLY))
				1530	mode \|= FMODE_WRITE;
				1531
				1532	security_init_mnt_opts(&new_sec_opts);
				1533	if (data) {
				1534	error = parse_security_options(data, &new_sec_opts);
				1535	if (error)
				1536	return ERR_PTR(error);
				1537	}
				1538
				1539	/*
				1540	* Setup a dummy root and fs_info for test/set super. This is because
				1541	* we don't actually fill this stuff out until open_ctree, but we need
				1542	* it for searching for existing supers, so this lets us do that and
				1543	* then open_ctree will properly initialize everything later.
				1544	*/
				1545	fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
				1546	if (!fs_info) {
				1547	error = -ENOMEM;
				1548	goto error_sec_opts;
				1549	}
				1550
				1551	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
				1552	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
				1553	security_init_mnt_opts(&fs_info->security_opts);
				1554	if (!fs_info->super_copy \|\| !fs_info->super_for_commit) {
				1555	error = -ENOMEM;
				1556	goto error_fs_info;
				1557	}
				1558
				1559	mutex_lock(&uuid_mutex);
				1560	error = btrfs_parse_device_options(data, mode, fs_type);
				1561	if (error) {
				1562	mutex_unlock(&uuid_mutex);
				1563	goto error_fs_info;
				1564	}
				1565
				1566	device = btrfs_scan_one_device(device_name, mode, fs_type);
				1567	if (IS_ERR(device)) {
				1568	mutex_unlock(&uuid_mutex);
				1569	error = PTR_ERR(device);
				1570	goto error_fs_info;
				1571	}
				1572
				1573	fs_devices = device->fs_devices;
				1574	fs_info->fs_devices = fs_devices;
				1575
				1576	error = btrfs_open_devices(fs_devices, mode, fs_type);
				1577	mutex_unlock(&uuid_mutex);
				1578	if (error)
				1579	goto error_fs_info;
				1580
				1581	if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
				1582	error = -EACCES;
				1583	goto error_close_devices;
				1584	}
				1585
				1586	bdev = fs_devices->latest_bdev;
				1587	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags \| SB_NOSEC,
				1588	fs_info);
				1589	if (IS_ERR(s)) {
				1590	error = PTR_ERR(s);
				1591	goto error_close_devices;
				1592	}
				1593
				1594	if (s->s_root) {
				1595	btrfs_close_devices(fs_devices);
				1596	free_fs_info(fs_info);
				1597	if ((flags ^ s->s_flags) & SB_RDONLY)
				1598	error = -EBUSY;
				1599	} else {
				1600	snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
				1601	btrfs_sb(s)->bdev_holder = fs_type;
				1602	error = btrfs_fill_super(s, fs_devices, data);
				1603	}
				1604	if (error) {
				1605	deactivate_locked_super(s);
				1606	goto error_sec_opts;
				1607	}
				1608
				1609	fs_info = btrfs_sb(s);
				1610	error = setup_security_options(fs_info, s, &new_sec_opts);
				1611	if (error) {
				1612	deactivate_locked_super(s);
				1613	goto error_sec_opts;
				1614	}
				1615
				1616	return dget(s->s_root);
				1617
				1618	error_close_devices:
				1619	btrfs_close_devices(fs_devices);
				1620	error_fs_info:
				1621	free_fs_info(fs_info);
				1622	error_sec_opts:
				1623	security_free_mnt_opts(&new_sec_opts);
				1624	return ERR_PTR(error);
				1625	}
				1626
				1627	/*
				1628	* Mount function which is called by VFS layer.
				1629	*
				1630	* In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
				1631	* which needs vfsmount* of device's root (/). This means device's root has to
				1632	* be mounted internally in any case.
				1633	*
				1634	* Operation flow:
				1635	* 1. Parse subvol id related options for later use in mount_subvol().
				1636	*
				1637	* 2. Mount device's root (/) by calling vfs_kern_mount().
				1638	*
				1639	* NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
				1640	* first place. In order to avoid calling btrfs_mount() again, we use
				1641	* different file_system_type which is not registered to VFS by
				1642	* register_filesystem() (btrfs_root_fs_type). As a result,
				1643	* btrfs_mount_root() is called. The return value will be used by
				1644	* mount_subtree() in mount_subvol().
				1645	*
				1646	* 3. Call mount_subvol() to get the dentry of subvolume. Since there is
				1647	* "btrfs subvolume set-default", mount_subvol() is called always.
				1648	*/
				1649	static struct dentry btrfs_mount(struct file_system_type fs_type, int flags,
				1650	const char device_name, void data)
				1651	{
				1652	struct vfsmount *mnt_root;
				1653	struct dentry *root;
				1654	fmode_t mode = FMODE_READ;
				1655	char *subvol_name = NULL;
				1656	u64 subvol_objectid = 0;
				1657	int error = 0;
				1658
				1659	if (!(flags & SB_RDONLY))
				1660	mode \|= FMODE_WRITE;
				1661
				1662	error = btrfs_parse_subvol_options(data, &subvol_name,
				1663	&subvol_objectid);
				1664	if (error) {
				1665	kfree(subvol_name);
				1666	return ERR_PTR(error);
				1667	}
				1668
				1669	/* mount device's root (/) */
				1670	mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
				1671	if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
				1672	if (flags & SB_RDONLY) {
				1673	mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				1674	flags & ~SB_RDONLY, device_name, data);
				1675	} else {
				1676	mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				1677	flags \| SB_RDONLY, device_name, data);
				1678	if (IS_ERR(mnt_root)) {
				1679	root = ERR_CAST(mnt_root);
				1680	kfree(subvol_name);
				1681	goto out;
				1682	}
				1683
				1684	down_write(&mnt_root->mnt_sb->s_umount);
				1685	error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
				1686	up_write(&mnt_root->mnt_sb->s_umount);
				1687	if (error < 0) {
				1688	root = ERR_PTR(error);
				1689	mntput(mnt_root);
				1690	kfree(subvol_name);
				1691	goto out;
				1692	}
				1693	}
				1694	}
				1695	if (IS_ERR(mnt_root)) {
				1696	root = ERR_CAST(mnt_root);
				1697	kfree(subvol_name);
				1698	goto out;
				1699	}
				1700
				1701	/* mount_subvol() will free subvol_name and mnt_root */
				1702	root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
				1703
				1704	out:
				1705	return root;
				1706	}
				1707
				1708	static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
				1709	u32 new_pool_size, u32 old_pool_size)
				1710	{
				1711	if (new_pool_size == old_pool_size)
				1712	return;
				1713
				1714	fs_info->thread_pool_size = new_pool_size;
				1715
				1716	btrfs_info(fs_info, "resize thread pool %d -> %d",
				1717	old_pool_size, new_pool_size);
				1718
				1719	btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
				1720	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
				1721	btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
				1722	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
				1723	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
				1724	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
				1725	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
				1726	new_pool_size);
				1727	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
				1728	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
				1729	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
				1730	btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
				1731	btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
				1732	new_pool_size);
				1733	}
				1734
				1735	static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
				1736	{
				1737	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
				1738	}
				1739
				1740	static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
				1741	unsigned long old_opts, int flags)
				1742	{
				1743	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
				1744	(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) \|\|
				1745	(flags & SB_RDONLY))) {
				1746	/* wait for any defraggers to finish */
				1747	wait_event(fs_info->transaction_wait,
				1748	(atomic_read(&fs_info->defrag_running) == 0));
				1749	if (flags & SB_RDONLY)
				1750	sync_filesystem(fs_info->sb);
				1751	}
				1752	}
				1753
				1754	static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
				1755	unsigned long old_opts)
				1756	{
				1757	/*
				1758	* We need to cleanup all defragable inodes if the autodefragment is
				1759	* close or the filesystem is read only.
				1760	*/
				1761	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
				1762	(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) \|\| sb_rdonly(fs_info->sb))) {
				1763	btrfs_cleanup_defrag_inodes(fs_info);
				1764	}
				1765
				1766	clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
				1767	}
				1768
				1769	static int btrfs_remount(struct super_block sb, int flags, char *data)
				1770	{
				1771	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				1772	struct btrfs_root *root = fs_info->tree_root;
				1773	unsigned old_flags = sb->s_flags;
				1774	unsigned long old_opts = fs_info->mount_opt;
				1775	unsigned long old_compress_type = fs_info->compress_type;
				1776	u64 old_max_inline = fs_info->max_inline;
				1777	u32 old_thread_pool_size = fs_info->thread_pool_size;
				1778	u32 old_metadata_ratio = fs_info->metadata_ratio;
				1779	int ret;
				1780
				1781	sync_filesystem(sb);
				1782	btrfs_remount_prepare(fs_info);
				1783
				1784	if (data) {
				1785	struct security_mnt_opts new_sec_opts;
				1786
				1787	security_init_mnt_opts(&new_sec_opts);
				1788	ret = parse_security_options(data, &new_sec_opts);
				1789	if (ret)
				1790	goto restore;
				1791	ret = setup_security_options(fs_info, sb,
				1792	&new_sec_opts);
				1793	if (ret) {
				1794	security_free_mnt_opts(&new_sec_opts);
				1795	goto restore;
				1796	}
				1797	}
				1798
				1799	ret = btrfs_parse_options(fs_info, data, *flags);
				1800	if (ret)
				1801	goto restore;
				1802
				1803	btrfs_remount_begin(fs_info, old_opts, *flags);
				1804	btrfs_resize_thread_pool(fs_info,
				1805	fs_info->thread_pool_size, old_thread_pool_size);
				1806
				1807	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
				1808	goto out;
				1809
				1810	if (*flags & SB_RDONLY) {
				1811	/*
				1812	* this also happens on 'umount -rf' or on shutdown, when
				1813	* the filesystem is busy.
				1814	*/
				1815	cancel_work_sync(&fs_info->async_reclaim_work);
				1816
				1817	/* wait for the uuid_scan task to finish */
				1818	down(&fs_info->uuid_tree_rescan_sem);
				1819	/* avoid complains from lockdep et al. */
				1820	up(&fs_info->uuid_tree_rescan_sem);
				1821
				1822	sb->s_flags \|= SB_RDONLY;
				1823
				1824	/*
				1825	* Setting SB_RDONLY will put the cleaner thread to
				1826	* sleep at the next loop if it's already active.
				1827	* If it's already asleep, we'll leave unused block
				1828	* groups on disk until we're mounted read-write again
				1829	* unless we clean them up here.
				1830	*/
				1831	btrfs_delete_unused_bgs(fs_info);
				1832
				1833	btrfs_dev_replace_suspend_for_unmount(fs_info);
				1834	btrfs_scrub_cancel(fs_info);
				1835	btrfs_pause_balance(fs_info);
				1836
				1837	ret = btrfs_commit_super(fs_info);
				1838	if (ret)
				1839	goto restore;
				1840	} else {
				1841	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
				1842	btrfs_err(fs_info,
				1843	"Remounting read-write after error is not allowed");
				1844	ret = -EINVAL;
				1845	goto restore;
				1846	}
				1847	if (fs_info->fs_devices->rw_devices == 0) {
				1848	ret = -EACCES;
				1849	goto restore;
				1850	}
				1851
				1852	if (!btrfs_check_rw_degradable(fs_info, NULL)) {
				1853	btrfs_warn(fs_info,
				1854	"too many missing devices, writeable remount is not allowed");
				1855	ret = -EACCES;
				1856	goto restore;
				1857	}
				1858
				1859	if (btrfs_super_log_root(fs_info->super_copy) != 0) {
				1860	ret = -EINVAL;
				1861	goto restore;
				1862	}
				1863
				1864	ret = btrfs_cleanup_fs_roots(fs_info);
				1865	if (ret)
				1866	goto restore;
				1867
				1868	/* recover relocation */
				1869	mutex_lock(&fs_info->cleaner_mutex);
				1870	ret = btrfs_recover_relocation(root);
				1871	mutex_unlock(&fs_info->cleaner_mutex);
				1872	if (ret)
				1873	goto restore;
				1874
				1875	ret = btrfs_resume_balance_async(fs_info);
				1876	if (ret)
				1877	goto restore;
				1878
				1879	ret = btrfs_resume_dev_replace_async(fs_info);
				1880	if (ret) {
				1881	btrfs_warn(fs_info, "failed to resume dev_replace");
				1882	goto restore;
				1883	}
				1884
				1885	btrfs_qgroup_rescan_resume(fs_info);
				1886
				1887	if (!fs_info->uuid_root) {
				1888	btrfs_info(fs_info, "creating UUID tree");
				1889	ret = btrfs_create_uuid_tree(fs_info);
				1890	if (ret) {
				1891	btrfs_warn(fs_info,
				1892	"failed to create the UUID tree %d",
				1893	ret);
				1894	goto restore;
				1895	}
				1896	}
				1897	sb->s_flags &= ~SB_RDONLY;
				1898
				1899	set_bit(BTRFS_FS_OPEN, &fs_info->flags);
				1900	}
				1901	out:
				1902	wake_up_process(fs_info->transaction_kthread);
				1903	btrfs_remount_cleanup(fs_info, old_opts);
				1904	return 0;
				1905
				1906	restore:
				1907	/* We've hit an error - don't reset SB_RDONLY */
				1908	if (sb_rdonly(sb))
				1909	old_flags \|= SB_RDONLY;
				1910	sb->s_flags = old_flags;
				1911	fs_info->mount_opt = old_opts;
				1912	fs_info->compress_type = old_compress_type;
				1913	fs_info->max_inline = old_max_inline;
				1914	btrfs_resize_thread_pool(fs_info,
				1915	old_thread_pool_size, fs_info->thread_pool_size);
				1916	fs_info->metadata_ratio = old_metadata_ratio;
				1917	btrfs_remount_cleanup(fs_info, old_opts);
				1918	return ret;
				1919	}
				1920
				1921	/* Used to sort the devices by max_avail(descending sort) */
				1922	static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
				1923	const void *dev_info2)
				1924	{
				1925	if (((struct btrfs_device_info *)dev_info1)->max_avail >
				1926	((struct btrfs_device_info *)dev_info2)->max_avail)
				1927	return -1;
				1928	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
				1929	((struct btrfs_device_info *)dev_info2)->max_avail)
				1930	return 1;
				1931	else
				1932	return 0;
				1933	}
				1934
				1935	/*
				1936	* sort the devices by max_avail, in which max free extent size of each device
				1937	* is stored.(Descending Sort)
				1938	*/
				1939	static inline void btrfs_descending_sort_devices(
				1940	struct btrfs_device_info *devices,
				1941	size_t nr_devices)
				1942	{
				1943	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
				1944	btrfs_cmp_device_free_bytes, NULL);
				1945	}
				1946
				1947	/*
				1948	* The helper to calc the free space on the devices that can be used to store
				1949	* file data.
				1950	*/
				1951	static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
				1952	u64 *free_bytes)
				1953	{
				1954	struct btrfs_device_info *devices_info;
				1955	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
				1956	struct btrfs_device *device;
				1957	u64 skip_space;
				1958	u64 type;
				1959	u64 avail_space;
				1960	u64 min_stripe_size;
				1961	int min_stripes = 1, num_stripes = 1;
				1962	int i = 0, nr_devices;
				1963
				1964	/*
				1965	* We aren't under the device list lock, so this is racy-ish, but good
				1966	* enough for our purposes.
				1967	*/
				1968	nr_devices = fs_info->fs_devices->open_devices;
				1969	if (!nr_devices) {
				1970	smp_mb();
				1971	nr_devices = fs_info->fs_devices->open_devices;
				1972	ASSERT(nr_devices);
				1973	if (!nr_devices) {
				1974	*free_bytes = 0;
				1975	return 0;
				1976	}
				1977	}
				1978
				1979	devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
				1980	GFP_KERNEL);
				1981	if (!devices_info)
				1982	return -ENOMEM;
				1983
				1984	/* calc min stripe number for data space allocation */
				1985	type = btrfs_data_alloc_profile(fs_info);
				1986	if (type & BTRFS_BLOCK_GROUP_RAID0) {
				1987	min_stripes = 2;
				1988	num_stripes = nr_devices;
				1989	} else if (type & BTRFS_BLOCK_GROUP_RAID1) {
				1990	min_stripes = 2;
				1991	num_stripes = 2;
				1992	} else if (type & BTRFS_BLOCK_GROUP_RAID10) {
				1993	min_stripes = 4;
				1994	num_stripes = 4;
				1995	}
				1996
				1997	if (type & BTRFS_BLOCK_GROUP_DUP)
				1998	min_stripe_size = 2 * BTRFS_STRIPE_LEN;
				1999	else
				2000	min_stripe_size = BTRFS_STRIPE_LEN;
				2001
				2002	rcu_read_lock();
				2003	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
				2004	if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
				2005	&device->dev_state) \|\|
				2006	!device->bdev \|\|
				2007	test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
				2008	continue;
				2009
				2010	if (i >= nr_devices)
				2011	break;
				2012
				2013	avail_space = device->total_bytes - device->bytes_used;
				2014
				2015	/* align with stripe_len */
				2016	avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
				2017	avail_space *= BTRFS_STRIPE_LEN;
				2018
				2019	/*
				2020	* In order to avoid overwriting the superblock on the drive,
				2021	* btrfs starts at an offset of at least 1MB when doing chunk
				2022	* allocation.
				2023	*/
				2024	skip_space = SZ_1M;
				2025
				2026	/*
				2027	* we can use the free space in [0, skip_space - 1], subtract
				2028	* it from the total.
				2029	*/
				2030	if (avail_space && avail_space >= skip_space)
				2031	avail_space -= skip_space;
				2032	else
				2033	avail_space = 0;
				2034
				2035	if (avail_space < min_stripe_size)
				2036	continue;
				2037
				2038	devices_info[i].dev = device;
				2039	devices_info[i].max_avail = avail_space;
				2040
				2041	i++;
				2042	}
				2043	rcu_read_unlock();
				2044
				2045	nr_devices = i;
				2046
				2047	btrfs_descending_sort_devices(devices_info, nr_devices);
				2048
				2049	i = nr_devices - 1;
				2050	avail_space = 0;
				2051	while (nr_devices >= min_stripes) {
				2052	if (num_stripes > nr_devices)
				2053	num_stripes = nr_devices;
				2054
				2055	if (devices_info[i].max_avail >= min_stripe_size) {
				2056	int j;
				2057	u64 alloc_size;
				2058
				2059	avail_space += devices_info[i].max_avail * num_stripes;
				2060	alloc_size = devices_info[i].max_avail;
				2061	for (j = i + 1 - num_stripes; j <= i; j++)
				2062	devices_info[j].max_avail -= alloc_size;
				2063	}
				2064	i--;
				2065	nr_devices--;
				2066	}
				2067
				2068	kfree(devices_info);
				2069	*free_bytes = avail_space;
				2070	return 0;
				2071	}
				2072
				2073	/*
				2074	* Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
				2075	*
				2076	* If there's a redundant raid level at DATA block groups, use the respective
				2077	* multiplier to scale the sizes.
				2078	*
				2079	* Unused device space usage is based on simulating the chunk allocator
				2080	* algorithm that respects the device sizes and order of allocations. This is
				2081	* a close approximation of the actual use but there are other factors that may
				2082	* change the result (like a new metadata chunk).
				2083	*
				2084	* If metadata is exhausted, f_bavail will be 0.
				2085	*/
				2086	static int btrfs_statfs(struct dentry dentry, struct kstatfs buf)
				2087	{
				2088	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
				2089	struct btrfs_super_block *disk_super = fs_info->super_copy;
				2090	struct list_head *head = &fs_info->space_info;
				2091	struct btrfs_space_info *found;
				2092	u64 total_used = 0;
				2093	u64 total_free_data = 0;
				2094	u64 total_free_meta = 0;
				2095	int bits = dentry->d_sb->s_blocksize_bits;
				2096	__be32 fsid = (__be32 )fs_info->fsid;
				2097	unsigned factor = 1;
				2098	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
				2099	int ret;
				2100	u64 thresh = 0;
				2101	int mixed = 0;
				2102
				2103	rcu_read_lock();
				2104	list_for_each_entry_rcu(found, head, list) {
				2105	if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
				2106	int i;
				2107
				2108	total_free_data += found->disk_total - found->disk_used;
				2109	total_free_data -=
				2110	btrfs_account_ro_block_groups_free_space(found);
				2111
				2112	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
				2113	if (!list_empty(&found->block_groups[i]))
				2114	factor = btrfs_bg_type_to_factor(
				2115	btrfs_raid_array[i].bg_flag);
				2116	}
				2117	}
				2118
				2119	/*
				2120	* Metadata in mixed block goup profiles are accounted in data
				2121	*/
				2122	if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
				2123	if (found->flags & BTRFS_BLOCK_GROUP_DATA)
				2124	mixed = 1;
				2125	else
				2126	total_free_meta += found->disk_total -
				2127	found->disk_used;
				2128	}
				2129
				2130	total_used += found->disk_used;
				2131	}
				2132
				2133	rcu_read_unlock();
				2134
				2135	buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
				2136	buf->f_blocks >>= bits;
				2137	buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
				2138
				2139	/* Account global block reserve as used, it's in logical size already */
				2140	spin_lock(&block_rsv->lock);
				2141	/* Mixed block groups accounting is not byte-accurate, avoid overflow */
				2142	if (buf->f_bfree >= block_rsv->size >> bits)
				2143	buf->f_bfree -= block_rsv->size >> bits;
				2144	else
				2145	buf->f_bfree = 0;
				2146	spin_unlock(&block_rsv->lock);
				2147
				2148	buf->f_bavail = div_u64(total_free_data, factor);
				2149	ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
				2150	if (ret)
				2151	return ret;
				2152	buf->f_bavail += div_u64(total_free_data, factor);
				2153	buf->f_bavail = buf->f_bavail >> bits;
				2154
				2155	/*
				2156	* We calculate the remaining metadata space minus global reserve. If
				2157	* this is (supposedly) smaller than zero, there's no space. But this
				2158	* does not hold in practice, the exhausted state happens where's still
				2159	* some positive delta. So we apply some guesswork and compare the
				2160	* delta to a 4M threshold. (Practically observed delta was ~2M.)
				2161	*
				2162	* We probably cannot calculate the exact threshold value because this
				2163	* depends on the internal reservations requested by various
				2164	* operations, so some operations that consume a few metadata will
				2165	* succeed even if the Avail is zero. But this is better than the other
				2166	* way around.
				2167	*/
				2168	thresh = SZ_4M;
				2169
				2170	if (!mixed && total_free_meta - thresh < block_rsv->size)
				2171	buf->f_bavail = 0;
				2172
				2173	buf->f_type = BTRFS_SUPER_MAGIC;
				2174	buf->f_bsize = dentry->d_sb->s_blocksize;
				2175	buf->f_namelen = BTRFS_NAME_LEN;
				2176
				2177	/* We treat it as constant endianness (it doesn't matter _which_)
				2178	because we want the fsid to come out the same whether mounted
				2179	on a big-endian or little-endian host */
				2180	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
				2181	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
				2182	/* Mask in the root object ID too, to disambiguate subvols */
				2183	buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
				2184	buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
				2185
				2186	return 0;
				2187	}
				2188
				2189	static void btrfs_kill_super(struct super_block *sb)
				2190	{
				2191	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				2192	kill_anon_super(sb);
				2193	free_fs_info(fs_info);
				2194	}
				2195
				2196	static struct file_system_type btrfs_fs_type = {
				2197	.owner = THIS_MODULE,
				2198	.name = "btrfs",
				2199	.mount = btrfs_mount,
				2200	.kill_sb = btrfs_kill_super,
				2201	.fs_flags = FS_REQUIRES_DEV \| FS_BINARY_MOUNTDATA,
				2202	};
				2203
				2204	static struct file_system_type btrfs_root_fs_type = {
				2205	.owner = THIS_MODULE,
				2206	.name = "btrfs",
				2207	.mount = btrfs_mount_root,
				2208	.kill_sb = btrfs_kill_super,
				2209	.fs_flags = FS_REQUIRES_DEV \| FS_BINARY_MOUNTDATA,
				2210	};
				2211
				2212	MODULE_ALIAS_FS("btrfs");
				2213
				2214	static int btrfs_control_open(struct inode inode, struct file file)
				2215	{
				2216	/*
				2217	* The control file's private_data is used to hold the
				2218	* transaction when it is started and is used to keep
				2219	* track of whether a transaction is already in progress.
				2220	*/
				2221	file->private_data = NULL;
				2222	return 0;
				2223	}
				2224
				2225	/*
				2226	* used by btrfsctl to scan devices when no FS is mounted
				2227	*/
				2228	static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
				2229	unsigned long arg)
				2230	{
				2231	struct btrfs_ioctl_vol_args *vol;
				2232	struct btrfs_device *device = NULL;
				2233	int ret = -ENOTTY;
				2234
				2235	if (!capable(CAP_SYS_ADMIN))
				2236	return -EPERM;
				2237
				2238	vol = memdup_user((void __user )arg, sizeof(vol));
				2239	if (IS_ERR(vol))
				2240	return PTR_ERR(vol);
				2241	vol->name[BTRFS_PATH_NAME_MAX] = '\0';
				2242
				2243	switch (cmd) {
				2244	case BTRFS_IOC_SCAN_DEV:
				2245	mutex_lock(&uuid_mutex);
				2246	device = btrfs_scan_one_device(vol->name, FMODE_READ,
				2247	&btrfs_root_fs_type);
				2248	ret = PTR_ERR_OR_ZERO(device);
				2249	mutex_unlock(&uuid_mutex);
				2250	break;
				2251	case BTRFS_IOC_DEVICES_READY:
				2252	mutex_lock(&uuid_mutex);
				2253	device = btrfs_scan_one_device(vol->name, FMODE_READ,
				2254	&btrfs_root_fs_type);
				2255	if (IS_ERR(device)) {
				2256	mutex_unlock(&uuid_mutex);
				2257	ret = PTR_ERR(device);
				2258	break;
				2259	}
				2260	ret = !(device->fs_devices->num_devices ==
				2261	device->fs_devices->total_devices);
				2262	mutex_unlock(&uuid_mutex);
				2263	break;
				2264	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
				2265	ret = btrfs_ioctl_get_supported_features((void __user*)arg);
				2266	break;
				2267	}
				2268
				2269	kfree(vol);
				2270	return ret;
				2271	}
				2272
				2273	static int btrfs_freeze(struct super_block *sb)
				2274	{
				2275	struct btrfs_trans_handle *trans;
				2276	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				2277	struct btrfs_root *root = fs_info->tree_root;
				2278
				2279	set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
				2280	/*
				2281	* We don't need a barrier here, we'll wait for any transaction that
				2282	* could be in progress on other threads (and do delayed iputs that
				2283	* we want to avoid on a frozen filesystem), or do the commit
				2284	* ourselves.
				2285	*/
				2286	trans = btrfs_attach_transaction_barrier(root);
				2287	if (IS_ERR(trans)) {
				2288	/* no transaction, don't bother */
				2289	if (PTR_ERR(trans) == -ENOENT)
				2290	return 0;
				2291	return PTR_ERR(trans);
				2292	}
				2293	return btrfs_commit_transaction(trans);
				2294	}
				2295
				2296	static int btrfs_unfreeze(struct super_block *sb)
				2297	{
				2298	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				2299
				2300	clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
				2301	return 0;
				2302	}
				2303
				2304	static int btrfs_show_devname(struct seq_file m, struct dentry root)
				2305	{
				2306	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
				2307	struct btrfs_fs_devices *cur_devices;
				2308	struct btrfs_device dev, first_dev = NULL;
				2309	struct list_head *head;
				2310
				2311	/*
				2312	* Lightweight locking of the devices. We should not need
				2313	* device_list_mutex here as we only read the device data and the list
				2314	* is protected by RCU. Even if a device is deleted during the list
				2315	* traversals, we'll get valid data, the freeing callback will wait at
				2316	* least until until the rcu_read_unlock.
				2317	*/
				2318	rcu_read_lock();
				2319	cur_devices = fs_info->fs_devices;
				2320	while (cur_devices) {
				2321	head = &cur_devices->devices;
				2322	list_for_each_entry_rcu(dev, head, dev_list) {
				2323	if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
				2324	continue;
				2325	if (!dev->name)
				2326	continue;
				2327	if (!first_dev \|\| dev->devid < first_dev->devid)
				2328	first_dev = dev;
				2329	}
				2330	cur_devices = cur_devices->seed;
				2331	}
				2332
				2333	if (first_dev)
				2334	seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
				2335	else
				2336	WARN_ON(1);
				2337	rcu_read_unlock();
				2338	return 0;
				2339	}
				2340
				2341	static const struct super_operations btrfs_super_ops = {
				2342	.drop_inode = btrfs_drop_inode,
				2343	.evict_inode = btrfs_evict_inode,
				2344	.put_super = btrfs_put_super,
				2345	.sync_fs = btrfs_sync_fs,
				2346	.show_options = btrfs_show_options,
				2347	.show_devname = btrfs_show_devname,
				2348	.alloc_inode = btrfs_alloc_inode,
				2349	.destroy_inode = btrfs_destroy_inode,
				2350	.statfs = btrfs_statfs,
				2351	.remount_fs = btrfs_remount,
				2352	.freeze_fs = btrfs_freeze,
				2353	.unfreeze_fs = btrfs_unfreeze,
				2354	};
				2355
				2356	static const struct file_operations btrfs_ctl_fops = {
				2357	.open = btrfs_control_open,
				2358	.unlocked_ioctl = btrfs_control_ioctl,
				2359	.compat_ioctl = btrfs_control_ioctl,
				2360	.owner = THIS_MODULE,
				2361	.llseek = noop_llseek,
				2362	};
				2363
				2364	static struct miscdevice btrfs_misc = {
				2365	.minor = BTRFS_MINOR,
				2366	.name = "btrfs-control",
				2367	.fops = &btrfs_ctl_fops
				2368	};
				2369
				2370	MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
				2371	MODULE_ALIAS("devname:btrfs-control");
				2372
				2373	static int __init btrfs_interface_init(void)
				2374	{
				2375	return misc_register(&btrfs_misc);
				2376	}
				2377
				2378	static __cold void btrfs_interface_exit(void)
				2379	{
				2380	misc_deregister(&btrfs_misc);
				2381	}
				2382
				2383	static void __init btrfs_print_mod_info(void)
				2384	{
				2385	static const char options[] = ""
				2386	#ifdef CONFIG_BTRFS_DEBUG
				2387	", debug=on"
				2388	#endif
				2389	#ifdef CONFIG_BTRFS_ASSERT
				2390	", assert=on"
				2391	#endif
				2392	#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
				2393	", integrity-checker=on"
				2394	#endif
				2395	#ifdef CONFIG_BTRFS_FS_REF_VERIFY
				2396	", ref-verify=on"
				2397	#endif
				2398	;
				2399	pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
				2400	}
				2401
				2402	static int __init init_btrfs_fs(void)
				2403	{
				2404	int err;
				2405
				2406	btrfs_props_init();
				2407
				2408	err = btrfs_init_sysfs();
				2409	if (err)
				2410	return err;
				2411
				2412	btrfs_init_compress();
				2413
				2414	err = btrfs_init_cachep();
				2415	if (err)
				2416	goto free_compress;
				2417
				2418	err = extent_io_init();
				2419	if (err)
				2420	goto free_cachep;
				2421
				2422	err = extent_map_init();
				2423	if (err)
				2424	goto free_extent_io;
				2425
				2426	err = ordered_data_init();
				2427	if (err)
				2428	goto free_extent_map;
				2429
				2430	err = btrfs_delayed_inode_init();
				2431	if (err)
				2432	goto free_ordered_data;
				2433
				2434	err = btrfs_auto_defrag_init();
				2435	if (err)
				2436	goto free_delayed_inode;
				2437
				2438	err = btrfs_delayed_ref_init();
				2439	if (err)
				2440	goto free_auto_defrag;
				2441
				2442	err = btrfs_prelim_ref_init();
				2443	if (err)
				2444	goto free_delayed_ref;
				2445
				2446	err = btrfs_end_io_wq_init();
				2447	if (err)
				2448	goto free_prelim_ref;
				2449
				2450	err = btrfs_interface_init();
				2451	if (err)
				2452	goto free_end_io_wq;
				2453
				2454	btrfs_init_lockdep();
				2455
				2456	btrfs_print_mod_info();
				2457
				2458	err = btrfs_run_sanity_tests();
				2459	if (err)
				2460	goto unregister_ioctl;
				2461
				2462	err = register_filesystem(&btrfs_fs_type);
				2463	if (err)
				2464	goto unregister_ioctl;
				2465
				2466	return 0;
				2467
				2468	unregister_ioctl:
				2469	btrfs_interface_exit();
				2470	free_end_io_wq:
				2471	btrfs_end_io_wq_exit();
				2472	free_prelim_ref:
				2473	btrfs_prelim_ref_exit();
				2474	free_delayed_ref:
				2475	btrfs_delayed_ref_exit();
				2476	free_auto_defrag:
				2477	btrfs_auto_defrag_exit();
				2478	free_delayed_inode:
				2479	btrfs_delayed_inode_exit();
				2480	free_ordered_data:
				2481	ordered_data_exit();
				2482	free_extent_map:
				2483	extent_map_exit();
				2484	free_extent_io:
				2485	extent_io_exit();
				2486	free_cachep:
				2487	btrfs_destroy_cachep();
				2488	free_compress:
				2489	btrfs_exit_compress();
				2490	btrfs_exit_sysfs();
				2491
				2492	return err;
				2493	}
				2494
				2495	static void __exit exit_btrfs_fs(void)
				2496	{
				2497	btrfs_destroy_cachep();
				2498	btrfs_delayed_ref_exit();
				2499	btrfs_auto_defrag_exit();
				2500	btrfs_delayed_inode_exit();
				2501	btrfs_prelim_ref_exit();
				2502	ordered_data_exit();
				2503	extent_map_exit();
				2504	extent_io_exit();
				2505	btrfs_interface_exit();
				2506	btrfs_end_io_wq_exit();
				2507	unregister_filesystem(&btrfs_fs_type);
				2508	btrfs_exit_sysfs();
				2509	btrfs_cleanup_fs_uuids();
				2510	btrfs_exit_compress();
				2511	}
				2512
				2513	late_initcall(init_btrfs_fs);
				2514	module_exit(exit_btrfs_fs)
				2515
				2516	MODULE_LICENSE("GPL");