Blame - src/kernel/linux/v4.14/fs/btrfs/super.c - T103

blob: eb64d4b159e07580bf6b966e216b5b48600ea0c2 [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	/*
				2	* Copyright (C) 2007 Oracle. All rights reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public
				6	* License v2 as published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				11	* General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public
				14	* License along with this program; if not, write to the
				15	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				16	* Boston, MA 021110-1307, USA.
				17	*/
				18
				19	#include <linux/blkdev.h>
				20	#include <linux/module.h>
				21	#include <linux/buffer_head.h>
				22	#include <linux/fs.h>
				23	#include <linux/pagemap.h>
				24	#include <linux/highmem.h>
				25	#include <linux/time.h>
				26	#include <linux/init.h>
				27	#include <linux/seq_file.h>
				28	#include <linux/string.h>
				29	#include <linux/backing-dev.h>
				30	#include <linux/mount.h>
				31	#include <linux/mpage.h>
				32	#include <linux/swap.h>
				33	#include <linux/writeback.h>
				34	#include <linux/statfs.h>
				35	#include <linux/compat.h>
				36	#include <linux/parser.h>
				37	#include <linux/ctype.h>
				38	#include <linux/namei.h>
				39	#include <linux/miscdevice.h>
				40	#include <linux/magic.h>
				41	#include <linux/slab.h>
				42	#include <linux/cleancache.h>
				43	#include <linux/ratelimit.h>
				44	#include <linux/btrfs.h>
				45	#include "delayed-inode.h"
				46	#include "ctree.h"
				47	#include "disk-io.h"
				48	#include "transaction.h"
				49	#include "btrfs_inode.h"
				50	#include "print-tree.h"
				51	#include "hash.h"
				52	#include "props.h"
				53	#include "xattr.h"
				54	#include "volumes.h"
				55	#include "export.h"
				56	#include "compression.h"
				57	#include "rcu-string.h"
				58	#include "dev-replace.h"
				59	#include "free-space-cache.h"
				60	#include "backref.h"
				61	#include "tests/btrfs-tests.h"
				62
				63	#include "qgroup.h"
				64	#include "backref.h"
				65	#define CREATE_TRACE_POINTS
				66	#include <trace/events/btrfs.h>
				67
				68	static const struct super_operations btrfs_super_ops;
				69	static struct file_system_type btrfs_fs_type;
				70
				71	static int btrfs_remount(struct super_block sb, int flags, char *data);
				72
				73	const char *btrfs_decode_error(int errno)
				74	{
				75	char *errstr = "unknown";
				76
				77	switch (errno) {
				78	case -EIO:
				79	errstr = "IO failure";
				80	break;
				81	case -ENOMEM:
				82	errstr = "Out of memory";
				83	break;
				84	case -EROFS:
				85	errstr = "Readonly filesystem";
				86	break;
				87	case -EEXIST:
				88	errstr = "Object already exists";
				89	break;
				90	case -ENOSPC:
				91	errstr = "No space left";
				92	break;
				93	case -ENOENT:
				94	errstr = "No such entry";
				95	break;
				96	}
				97
				98	return errstr;
				99	}
				100
				101	/* btrfs handle error by forcing the filesystem readonly */
				102	static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
				103	{
				104	struct super_block *sb = fs_info->sb;
				105
				106	if (sb_rdonly(sb))
				107	return;
				108
				109	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
				110	sb->s_flags \|= MS_RDONLY;
				111	btrfs_info(fs_info, "forced readonly");
				112	/*
				113	* Note that a running device replace operation is not
				114	* canceled here although there is no way to update
				115	* the progress. It would add the risk of a deadlock,
				116	* therefore the canceling is omitted. The only penalty
				117	* is that some I/O remains active until the procedure
				118	* completes. The next time when the filesystem is
				119	* mounted writeable again, the device replace
				120	* operation continues.
				121	*/
				122	}
				123	}
				124
				125	/*
				126	* __btrfs_handle_fs_error decodes expected errors from the caller and
				127	* invokes the approciate error response.
				128	*/
				129	__cold
				130	void __btrfs_handle_fs_error(struct btrfs_fs_info fs_info, const char function,
				131	unsigned int line, int errno, const char *fmt, ...)
				132	{
				133	struct super_block *sb = fs_info->sb;
				134	#ifdef CONFIG_PRINTK
				135	const char *errstr;
				136	#endif
				137
				138	/*
				139	* Special case: if the error is EROFS, and we're already
				140	* under MS_RDONLY, then it is safe here.
				141	*/
				142	if (errno == -EROFS && sb_rdonly(sb))
				143	return;
				144
				145	#ifdef CONFIG_PRINTK
				146	errstr = btrfs_decode_error(errno);
				147	if (fmt) {
				148	struct va_format vaf;
				149	va_list args;
				150
				151	va_start(args, fmt);
				152	vaf.fmt = fmt;
				153	vaf.va = &args;
				154
				155	pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
				156	sb->s_id, function, line, errno, errstr, &vaf);
				157	va_end(args);
				158	} else {
				159	pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
				160	sb->s_id, function, line, errno, errstr);
				161	}
				162	#endif
				163
				164	/*
				165	* Today we only save the error info to memory. Long term we'll
				166	* also send it down to the disk
				167	*/
				168	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
				169
				170	/* Don't go through full error handling during mount */
				171	if (sb->s_flags & MS_BORN)
				172	btrfs_handle_error(fs_info);
				173	}
				174
				175	#ifdef CONFIG_PRINTK
				176	static const char * const logtypes[] = {
				177	"emergency",
				178	"alert",
				179	"critical",
				180	"error",
				181	"warning",
				182	"notice",
				183	"info",
				184	"debug",
				185	};
				186
				187
				188	/*
				189	* Use one ratelimit state per log level so that a flood of less important
				190	* messages doesn't cause more important ones to be dropped.
				191	*/
				192	static struct ratelimit_state printk_limits[] = {
				193	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
				194	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
				195	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
				196	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
				197	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
				198	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
				199	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
				200	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
				201	};
				202
				203	void btrfs_printk(const struct btrfs_fs_info fs_info, const char fmt, ...)
				204	{
				205	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
				206	struct va_format vaf;
				207	va_list args;
				208	int kern_level;
				209	const char *type = logtypes[4];
				210	struct ratelimit_state *ratelimit = &printk_limits[4];
				211
				212	va_start(args, fmt);
				213
				214	while ((kern_level = printk_get_level(fmt)) != 0) {
				215	size_t size = printk_skip_level(fmt) - fmt;
				216
				217	if (kern_level >= '0' && kern_level <= '7') {
				218	memcpy(lvl, fmt, size);
				219	lvl[size] = '\0';
				220	type = logtypes[kern_level - '0'];
				221	ratelimit = &printk_limits[kern_level - '0'];
				222	}
				223	fmt += size;
				224	}
				225
				226	vaf.fmt = fmt;
				227	vaf.va = &args;
				228
				229	if (__ratelimit(ratelimit))
				230	printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
				231	fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
				232
				233	va_end(args);
				234	}
				235	#endif
				236
				237	/*
				238	* We only mark the transaction aborted and then set the file system read-only.
				239	* This will prevent new transactions from starting or trying to join this
				240	* one.
				241	*
				242	* This means that error recovery at the call site is limited to freeing
				243	* any local memory allocations and passing the error code up without
				244	* further cleanup. The transaction should complete as it normally would
				245	* in the call path but will return -EIO.
				246	*
				247	* We'll complete the cleanup in btrfs_end_transaction and
				248	* btrfs_commit_transaction.
				249	*/
				250	__cold
				251	void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
				252	const char *function,
				253	unsigned int line, int errno)
				254	{
				255	struct btrfs_fs_info *fs_info = trans->fs_info;
				256
				257	trans->aborted = errno;
				258	/* Nothing used. The other threads that have joined this
				259	* transaction may be able to continue. */
				260	if (!trans->dirty && list_empty(&trans->new_bgs)) {
				261	const char *errstr;
				262
				263	errstr = btrfs_decode_error(errno);
				264	btrfs_warn(fs_info,
				265	"%s:%d: Aborting unused transaction(%s).",
				266	function, line, errstr);
				267	return;
				268	}
				269	WRITE_ONCE(trans->transaction->aborted, errno);
				270	/* Wake up anybody who may be waiting on this transaction */
				271	wake_up(&fs_info->transaction_wait);
				272	wake_up(&fs_info->transaction_blocked_wait);
				273	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
				274	}
				275	/*
				276	* __btrfs_panic decodes unexpected, fatal errors from the caller,
				277	* issues an alert, and either panics or BUGs, depending on mount options.
				278	*/
				279	__cold
				280	void __btrfs_panic(struct btrfs_fs_info fs_info, const char function,
				281	unsigned int line, int errno, const char *fmt, ...)
				282	{
				283	char *s_id = "<unknown>";
				284	const char *errstr;
				285	struct va_format vaf = { .fmt = fmt };
				286	va_list args;
				287
				288	if (fs_info)
				289	s_id = fs_info->sb->s_id;
				290
				291	va_start(args, fmt);
				292	vaf.va = &args;
				293
				294	errstr = btrfs_decode_error(errno);
				295	if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
				296	panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
				297	s_id, function, line, &vaf, errno, errstr);
				298
				299	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
				300	function, line, &vaf, errno, errstr);
				301	va_end(args);
				302	/* Caller calls BUG() */
				303	}
				304
				305	static void btrfs_put_super(struct super_block *sb)
				306	{
				307	close_ctree(btrfs_sb(sb));
				308	}
				309
				310	enum {
				311	Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
				312	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
				313	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
				314	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
				315	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
				316	Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
				317	Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
				318	Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
				319	Opt_skip_balance, Opt_check_integrity,
				320	Opt_check_integrity_including_extent_data,
				321	Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
				322	Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
				323	Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
				324	Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
				325	Opt_nologreplay, Opt_norecovery,
				326	#ifdef CONFIG_BTRFS_DEBUG
				327	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
				328	#endif
				329	Opt_err,
				330	};
				331
				332	static const match_table_t tokens = {
				333	{Opt_degraded, "degraded"},
				334	{Opt_subvol, "subvol=%s"},
				335	{Opt_subvolid, "subvolid=%s"},
				336	{Opt_device, "device=%s"},
				337	{Opt_nodatasum, "nodatasum"},
				338	{Opt_datasum, "datasum"},
				339	{Opt_nodatacow, "nodatacow"},
				340	{Opt_datacow, "datacow"},
				341	{Opt_nobarrier, "nobarrier"},
				342	{Opt_barrier, "barrier"},
				343	{Opt_max_inline, "max_inline=%s"},
				344	{Opt_alloc_start, "alloc_start=%s"},
				345	{Opt_thread_pool, "thread_pool=%d"},
				346	{Opt_compress, "compress"},
				347	{Opt_compress_type, "compress=%s"},
				348	{Opt_compress_force, "compress-force"},
				349	{Opt_compress_force_type, "compress-force=%s"},
				350	{Opt_ssd, "ssd"},
				351	{Opt_ssd_spread, "ssd_spread"},
				352	{Opt_nossd, "nossd"},
				353	{Opt_acl, "acl"},
				354	{Opt_noacl, "noacl"},
				355	{Opt_notreelog, "notreelog"},
				356	{Opt_treelog, "treelog"},
				357	{Opt_nologreplay, "nologreplay"},
				358	{Opt_norecovery, "norecovery"},
				359	{Opt_flushoncommit, "flushoncommit"},
				360	{Opt_noflushoncommit, "noflushoncommit"},
				361	{Opt_ratio, "metadata_ratio=%d"},
				362	{Opt_discard, "discard"},
				363	{Opt_nodiscard, "nodiscard"},
				364	{Opt_space_cache, "space_cache"},
				365	{Opt_space_cache_version, "space_cache=%s"},
				366	{Opt_clear_cache, "clear_cache"},
				367	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
				368	{Opt_enospc_debug, "enospc_debug"},
				369	{Opt_noenospc_debug, "noenospc_debug"},
				370	{Opt_subvolrootid, "subvolrootid=%d"},
				371	{Opt_defrag, "autodefrag"},
				372	{Opt_nodefrag, "noautodefrag"},
				373	{Opt_inode_cache, "inode_cache"},
				374	{Opt_noinode_cache, "noinode_cache"},
				375	{Opt_no_space_cache, "nospace_cache"},
				376	{Opt_recovery, "recovery"}, /* deprecated */
				377	{Opt_usebackuproot, "usebackuproot"},
				378	{Opt_skip_balance, "skip_balance"},
				379	{Opt_check_integrity, "check_int"},
				380	{Opt_check_integrity_including_extent_data, "check_int_data"},
				381	{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
				382	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
				383	{Opt_fatal_errors, "fatal_errors=%s"},
				384	{Opt_commit_interval, "commit=%d"},
				385	#ifdef CONFIG_BTRFS_DEBUG
				386	{Opt_fragment_data, "fragment=data"},
				387	{Opt_fragment_metadata, "fragment=metadata"},
				388	{Opt_fragment_all, "fragment=all"},
				389	#endif
				390	{Opt_err, NULL},
				391	};
				392
				393	/*
				394	* Regular mount options parser. Everything that is needed only when
				395	* reading in a new superblock is parsed here.
				396	* XXX JDM: This needs to be cleaned up for remount.
				397	*/
				398	int btrfs_parse_options(struct btrfs_fs_info info, char options,
				399	unsigned long new_flags)
				400	{
				401	substring_t args[MAX_OPT_ARGS];
				402	char p, num, *orig = NULL;
				403	u64 cache_gen;
				404	int intarg;
				405	int ret = 0;
				406	char *compress_type;
				407	bool compress_force = false;
				408	enum btrfs_compression_type saved_compress_type;
				409	bool saved_compress_force;
				410	int no_compress = 0;
				411
				412	cache_gen = btrfs_super_cache_generation(info->super_copy);
				413	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
				414	btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
				415	else if (cache_gen)
				416	btrfs_set_opt(info->mount_opt, SPACE_CACHE);
				417
				418	/*
				419	* Even the options are empty, we still need to do extra check
				420	* against new flags
				421	*/
				422	if (!options)
				423	goto check;
				424
				425	/*
				426	* strsep changes the string, duplicate it because parse_options
				427	* gets called twice
				428	*/
				429	options = kstrdup(options, GFP_KERNEL);
				430	if (!options)
				431	return -ENOMEM;
				432
				433	orig = options;
				434
				435	while ((p = strsep(&options, ",")) != NULL) {
				436	int token;
				437	if (!*p)
				438	continue;
				439
				440	token = match_token(p, tokens, args);
				441	switch (token) {
				442	case Opt_degraded:
				443	btrfs_info(info, "allowing degraded mounts");
				444	btrfs_set_opt(info->mount_opt, DEGRADED);
				445	break;
				446	case Opt_subvol:
				447	case Opt_subvolid:
				448	case Opt_subvolrootid:
				449	case Opt_device:
				450	/*
				451	* These are parsed by btrfs_parse_early_options
				452	* and can be happily ignored here.
				453	*/
				454	break;
				455	case Opt_nodatasum:
				456	btrfs_set_and_info(info, NODATASUM,
				457	"setting nodatasum");
				458	break;
				459	case Opt_datasum:
				460	if (btrfs_test_opt(info, NODATASUM)) {
				461	if (btrfs_test_opt(info, NODATACOW))
				462	btrfs_info(info,
				463	"setting datasum, datacow enabled");
				464	else
				465	btrfs_info(info, "setting datasum");
				466	}
				467	btrfs_clear_opt(info->mount_opt, NODATACOW);
				468	btrfs_clear_opt(info->mount_opt, NODATASUM);
				469	break;
				470	case Opt_nodatacow:
				471	if (!btrfs_test_opt(info, NODATACOW)) {
				472	if (!btrfs_test_opt(info, COMPRESS) \|\|
				473	!btrfs_test_opt(info, FORCE_COMPRESS)) {
				474	btrfs_info(info,
				475	"setting nodatacow, compression disabled");
				476	} else {
				477	btrfs_info(info, "setting nodatacow");
				478	}
				479	}
				480	btrfs_clear_opt(info->mount_opt, COMPRESS);
				481	btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				482	btrfs_set_opt(info->mount_opt, NODATACOW);
				483	btrfs_set_opt(info->mount_opt, NODATASUM);
				484	break;
				485	case Opt_datacow:
				486	btrfs_clear_and_info(info, NODATACOW,
				487	"setting datacow");
				488	break;
				489	case Opt_compress_force:
				490	case Opt_compress_force_type:
				491	compress_force = true;
				492	/* Fallthrough */
				493	case Opt_compress:
				494	case Opt_compress_type:
				495	saved_compress_type = btrfs_test_opt(info,
				496	COMPRESS) ?
				497	info->compress_type : BTRFS_COMPRESS_NONE;
				498	saved_compress_force =
				499	btrfs_test_opt(info, FORCE_COMPRESS);
				500	if (token == Opt_compress \|\|
				501	token == Opt_compress_force \|\|
				502	strncmp(args[0].from, "zlib", 4) == 0) {
				503	compress_type = "zlib";
				504	info->compress_type = BTRFS_COMPRESS_ZLIB;
				505	btrfs_set_opt(info->mount_opt, COMPRESS);
				506	btrfs_clear_opt(info->mount_opt, NODATACOW);
				507	btrfs_clear_opt(info->mount_opt, NODATASUM);
				508	no_compress = 0;
				509	} else if (strncmp(args[0].from, "lzo", 3) == 0) {
				510	compress_type = "lzo";
				511	info->compress_type = BTRFS_COMPRESS_LZO;
				512	btrfs_set_opt(info->mount_opt, COMPRESS);
				513	btrfs_clear_opt(info->mount_opt, NODATACOW);
				514	btrfs_clear_opt(info->mount_opt, NODATASUM);
				515	btrfs_set_fs_incompat(info, COMPRESS_LZO);
				516	no_compress = 0;
				517	} else if (strcmp(args[0].from, "zstd") == 0) {
				518	compress_type = "zstd";
				519	info->compress_type = BTRFS_COMPRESS_ZSTD;
				520	btrfs_set_opt(info->mount_opt, COMPRESS);
				521	btrfs_clear_opt(info->mount_opt, NODATACOW);
				522	btrfs_clear_opt(info->mount_opt, NODATASUM);
				523	btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
				524	no_compress = 0;
				525	} else if (strncmp(args[0].from, "no", 2) == 0) {
				526	compress_type = "no";
				527	btrfs_clear_opt(info->mount_opt, COMPRESS);
				528	btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				529	compress_force = false;
				530	no_compress++;
				531	} else {
				532	ret = -EINVAL;
				533	goto out;
				534	}
				535
				536	if (compress_force) {
				537	btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
				538	} else {
				539	/*
				540	* If we remount from compress-force=xxx to
				541	* compress=xxx, we need clear FORCE_COMPRESS
				542	* flag, otherwise, there is no way for users
				543	* to disable forcible compression separately.
				544	*/
				545	btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				546	}
				547	if ((btrfs_test_opt(info, COMPRESS) &&
				548	(info->compress_type != saved_compress_type \|\|
				549	compress_force != saved_compress_force)) \|\|
				550	(!btrfs_test_opt(info, COMPRESS) &&
				551	no_compress == 1)) {
				552	btrfs_info(info, "%s %s compression",
				553	(compress_force) ? "force" : "use",
				554	compress_type);
				555	}
				556	compress_force = false;
				557	break;
				558	case Opt_ssd:
				559	btrfs_set_and_info(info, SSD,
				560	"enabling ssd optimizations");
				561	btrfs_clear_opt(info->mount_opt, NOSSD);
				562	break;
				563	case Opt_ssd_spread:
				564	btrfs_set_and_info(info, SSD,
				565	"enabling ssd optimizations");
				566	btrfs_set_and_info(info, SSD_SPREAD,
				567	"using spread ssd allocation scheme");
				568	btrfs_clear_opt(info->mount_opt, NOSSD);
				569	break;
				570	case Opt_nossd:
				571	btrfs_set_opt(info->mount_opt, NOSSD);
				572	btrfs_clear_and_info(info, SSD,
				573	"not using ssd optimizations");
				574	btrfs_clear_and_info(info, SSD_SPREAD,
				575	"not using spread ssd allocation scheme");
				576	break;
				577	case Opt_barrier:
				578	btrfs_clear_and_info(info, NOBARRIER,
				579	"turning on barriers");
				580	break;
				581	case Opt_nobarrier:
				582	btrfs_set_and_info(info, NOBARRIER,
				583	"turning off barriers");
				584	break;
				585	case Opt_thread_pool:
				586	ret = match_int(&args[0], &intarg);
				587	if (ret) {
				588	goto out;
				589	} else if (intarg > 0) {
				590	info->thread_pool_size = intarg;
				591	} else {
				592	ret = -EINVAL;
				593	goto out;
				594	}
				595	break;
				596	case Opt_max_inline:
				597	num = match_strdup(&args[0]);
				598	if (num) {
				599	info->max_inline = memparse(num, NULL);
				600	kfree(num);
				601
				602	if (info->max_inline) {
				603	info->max_inline = min_t(u64,
				604	info->max_inline,
				605	info->sectorsize);
				606	}
				607	btrfs_info(info, "max_inline at %llu",
				608	info->max_inline);
				609	} else {
				610	ret = -ENOMEM;
				611	goto out;
				612	}
				613	break;
				614	case Opt_alloc_start:
				615	btrfs_info(info,
				616	"option alloc_start is obsolete, ignored");
				617	break;
				618	case Opt_acl:
				619	#ifdef CONFIG_BTRFS_FS_POSIX_ACL
				620	info->sb->s_flags \|= MS_POSIXACL;
				621	break;
				622	#else
				623	btrfs_err(info, "support for ACL not compiled in!");
				624	ret = -EINVAL;
				625	goto out;
				626	#endif
				627	case Opt_noacl:
				628	info->sb->s_flags &= ~MS_POSIXACL;
				629	break;
				630	case Opt_notreelog:
				631	btrfs_set_and_info(info, NOTREELOG,
				632	"disabling tree log");
				633	break;
				634	case Opt_treelog:
				635	btrfs_clear_and_info(info, NOTREELOG,
				636	"enabling tree log");
				637	break;
				638	case Opt_norecovery:
				639	case Opt_nologreplay:
				640	btrfs_set_and_info(info, NOLOGREPLAY,
				641	"disabling log replay at mount time");
				642	break;
				643	case Opt_flushoncommit:
				644	btrfs_set_and_info(info, FLUSHONCOMMIT,
				645	"turning on flush-on-commit");
				646	break;
				647	case Opt_noflushoncommit:
				648	btrfs_clear_and_info(info, FLUSHONCOMMIT,
				649	"turning off flush-on-commit");
				650	break;
				651	case Opt_ratio:
				652	ret = match_int(&args[0], &intarg);
				653	if (ret) {
				654	goto out;
				655	} else if (intarg >= 0) {
				656	info->metadata_ratio = intarg;
				657	btrfs_info(info, "metadata ratio %d",
				658	info->metadata_ratio);
				659	} else {
				660	ret = -EINVAL;
				661	goto out;
				662	}
				663	break;
				664	case Opt_discard:
				665	btrfs_set_and_info(info, DISCARD,
				666	"turning on discard");
				667	break;
				668	case Opt_nodiscard:
				669	btrfs_clear_and_info(info, DISCARD,
				670	"turning off discard");
				671	break;
				672	case Opt_space_cache:
				673	case Opt_space_cache_version:
				674	if (token == Opt_space_cache \|\|
				675	strcmp(args[0].from, "v1") == 0) {
				676	btrfs_clear_opt(info->mount_opt,
				677	FREE_SPACE_TREE);
				678	btrfs_set_and_info(info, SPACE_CACHE,
				679	"enabling disk space caching");
				680	} else if (strcmp(args[0].from, "v2") == 0) {
				681	btrfs_clear_opt(info->mount_opt,
				682	SPACE_CACHE);
				683	btrfs_set_and_info(info, FREE_SPACE_TREE,
				684	"enabling free space tree");
				685	} else {
				686	ret = -EINVAL;
				687	goto out;
				688	}
				689	break;
				690	case Opt_rescan_uuid_tree:
				691	btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
				692	break;
				693	case Opt_no_space_cache:
				694	if (btrfs_test_opt(info, SPACE_CACHE)) {
				695	btrfs_clear_and_info(info, SPACE_CACHE,
				696	"disabling disk space caching");
				697	}
				698	if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
				699	btrfs_clear_and_info(info, FREE_SPACE_TREE,
				700	"disabling free space tree");
				701	}
				702	break;
				703	case Opt_inode_cache:
				704	btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
				705	"enabling inode map caching");
				706	break;
				707	case Opt_noinode_cache:
				708	btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
				709	"disabling inode map caching");
				710	break;
				711	case Opt_clear_cache:
				712	btrfs_set_and_info(info, CLEAR_CACHE,
				713	"force clearing of disk cache");
				714	break;
				715	case Opt_user_subvol_rm_allowed:
				716	btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
				717	break;
				718	case Opt_enospc_debug:
				719	btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
				720	break;
				721	case Opt_noenospc_debug:
				722	btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
				723	break;
				724	case Opt_defrag:
				725	btrfs_set_and_info(info, AUTO_DEFRAG,
				726	"enabling auto defrag");
				727	break;
				728	case Opt_nodefrag:
				729	btrfs_clear_and_info(info, AUTO_DEFRAG,
				730	"disabling auto defrag");
				731	break;
				732	case Opt_recovery:
				733	btrfs_warn(info,
				734	"'recovery' is deprecated, use 'usebackuproot' instead");
				735	case Opt_usebackuproot:
				736	btrfs_info(info,
				737	"trying to use backup root at mount time");
				738	btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
				739	break;
				740	case Opt_skip_balance:
				741	btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
				742	break;
				743	#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
				744	case Opt_check_integrity_including_extent_data:
				745	btrfs_info(info,
				746	"enabling check integrity including extent data");
				747	btrfs_set_opt(info->mount_opt,
				748	CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
				749	btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
				750	break;
				751	case Opt_check_integrity:
				752	btrfs_info(info, "enabling check integrity");
				753	btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
				754	break;
				755	case Opt_check_integrity_print_mask:
				756	ret = match_int(&args[0], &intarg);
				757	if (ret) {
				758	goto out;
				759	} else if (intarg >= 0) {
				760	info->check_integrity_print_mask = intarg;
				761	btrfs_info(info,
				762	"check_integrity_print_mask 0x%x",
				763	info->check_integrity_print_mask);
				764	} else {
				765	ret = -EINVAL;
				766	goto out;
				767	}
				768	break;
				769	#else
				770	case Opt_check_integrity_including_extent_data:
				771	case Opt_check_integrity:
				772	case Opt_check_integrity_print_mask:
				773	btrfs_err(info,
				774	"support for check_integrity* not compiled in!");
				775	ret = -EINVAL;
				776	goto out;
				777	#endif
				778	case Opt_fatal_errors:
				779	if (strcmp(args[0].from, "panic") == 0)
				780	btrfs_set_opt(info->mount_opt,
				781	PANIC_ON_FATAL_ERROR);
				782	else if (strcmp(args[0].from, "bug") == 0)
				783	btrfs_clear_opt(info->mount_opt,
				784	PANIC_ON_FATAL_ERROR);
				785	else {
				786	ret = -EINVAL;
				787	goto out;
				788	}
				789	break;
				790	case Opt_commit_interval:
				791	intarg = 0;
				792	ret = match_int(&args[0], &intarg);
				793	if (ret < 0) {
				794	btrfs_err(info, "invalid commit interval");
				795	ret = -EINVAL;
				796	goto out;
				797	}
				798	if (intarg > 0) {
				799	if (intarg > 300) {
				800	btrfs_warn(info,
				801	"excessive commit interval %d",
				802	intarg);
				803	}
				804	info->commit_interval = intarg;
				805	} else {
				806	btrfs_info(info,
				807	"using default commit interval %ds",
				808	BTRFS_DEFAULT_COMMIT_INTERVAL);
				809	info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
				810	}
				811	break;
				812	#ifdef CONFIG_BTRFS_DEBUG
				813	case Opt_fragment_all:
				814	btrfs_info(info, "fragmenting all space");
				815	btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
				816	btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
				817	break;
				818	case Opt_fragment_metadata:
				819	btrfs_info(info, "fragmenting metadata");
				820	btrfs_set_opt(info->mount_opt,
				821	FRAGMENT_METADATA);
				822	break;
				823	case Opt_fragment_data:
				824	btrfs_info(info, "fragmenting data");
				825	btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
				826	break;
				827	#endif
				828	case Opt_err:
				829	btrfs_info(info, "unrecognized mount option '%s'", p);
				830	ret = -EINVAL;
				831	goto out;
				832	default:
				833	break;
				834	}
				835	}
				836	check:
				837	/*
				838	* Extra check for current option against current flag
				839	*/
				840	if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
				841	btrfs_err(info,
				842	"nologreplay must be used with ro mount option");
				843	ret = -EINVAL;
				844	}
				845	out:
				846	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
				847	!btrfs_test_opt(info, FREE_SPACE_TREE) &&
				848	!btrfs_test_opt(info, CLEAR_CACHE)) {
				849	btrfs_err(info, "cannot disable free space tree");
				850	ret = -EINVAL;
				851
				852	}
				853	if (!ret && btrfs_test_opt(info, SPACE_CACHE))
				854	btrfs_info(info, "disk space caching is enabled");
				855	if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
				856	btrfs_info(info, "using free space tree");
				857	kfree(orig);
				858	return ret;
				859	}
				860
				861	/*
				862	* Parse mount options that are required early in the mount process.
				863	*
				864	* All other options will be parsed on much later in the mount process and
				865	* only when we need to allocate a new super block.
				866	*/
				867	static int btrfs_parse_early_options(const char *options, fmode_t flags,
				868	void holder, char subvol_name, u64 subvol_objectid,
				869	struct btrfs_fs_devices **fs_devices)
				870	{
				871	substring_t args[MAX_OPT_ARGS];
				872	char device_name, opts, orig, p;
				873	char *num = NULL;
				874	int error = 0;
				875
				876	if (!options)
				877	return 0;
				878
				879	/*
				880	* strsep changes the string, duplicate it because parse_options
				881	* gets called twice
				882	*/
				883	opts = kstrdup(options, GFP_KERNEL);
				884	if (!opts)
				885	return -ENOMEM;
				886	orig = opts;
				887
				888	while ((p = strsep(&opts, ",")) != NULL) {
				889	int token;
				890	if (!*p)
				891	continue;
				892
				893	token = match_token(p, tokens, args);
				894	switch (token) {
				895	case Opt_subvol:
				896	kfree(*subvol_name);
				897	*subvol_name = match_strdup(&args[0]);
				898	if (!*subvol_name) {
				899	error = -ENOMEM;
				900	goto out;
				901	}
				902	break;
				903	case Opt_subvolid:
				904	num = match_strdup(&args[0]);
				905	if (num) {
				906	*subvol_objectid = memparse(num, NULL);
				907	kfree(num);
				908	/* we want the original fs_tree */
				909	if (!*subvol_objectid)
				910	*subvol_objectid =
				911	BTRFS_FS_TREE_OBJECTID;
				912	} else {
				913	error = -EINVAL;
				914	goto out;
				915	}
				916	break;
				917	case Opt_subvolrootid:
				918	pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
				919	break;
				920	case Opt_device:
				921	device_name = match_strdup(&args[0]);
				922	if (!device_name) {
				923	error = -ENOMEM;
				924	goto out;
				925	}
				926	error = btrfs_scan_one_device(device_name,
				927	flags, holder, fs_devices);
				928	kfree(device_name);
				929	if (error)
				930	goto out;
				931	break;
				932	default:
				933	break;
				934	}
				935	}
				936
				937	out:
				938	kfree(orig);
				939	return error;
				940	}
				941
				942	char btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info fs_info,
				943	u64 subvol_objectid)
				944	{
				945	struct btrfs_root *root = fs_info->tree_root;
				946	struct btrfs_root *fs_root;
				947	struct btrfs_root_ref *root_ref;
				948	struct btrfs_inode_ref *inode_ref;
				949	struct btrfs_key key;
				950	struct btrfs_path *path = NULL;
				951	char name = NULL, ptr;
				952	u64 dirid;
				953	int len;
				954	int ret;
				955
				956	path = btrfs_alloc_path();
				957	if (!path) {
				958	ret = -ENOMEM;
				959	goto err;
				960	}
				961	path->leave_spinning = 1;
				962
				963	name = kmalloc(PATH_MAX, GFP_KERNEL);
				964	if (!name) {
				965	ret = -ENOMEM;
				966	goto err;
				967	}
				968	ptr = name + PATH_MAX - 1;
				969	ptr[0] = '\0';
				970
				971	/*
				972	* Walk up the subvolume trees in the tree of tree roots by root
				973	* backrefs until we hit the top-level subvolume.
				974	*/
				975	while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
				976	key.objectid = subvol_objectid;
				977	key.type = BTRFS_ROOT_BACKREF_KEY;
				978	key.offset = (u64)-1;
				979
				980	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				981	if (ret < 0) {
				982	goto err;
				983	} else if (ret > 0) {
				984	ret = btrfs_previous_item(root, path, subvol_objectid,
				985	BTRFS_ROOT_BACKREF_KEY);
				986	if (ret < 0) {
				987	goto err;
				988	} else if (ret > 0) {
				989	ret = -ENOENT;
				990	goto err;
				991	}
				992	}
				993
				994	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				995	subvol_objectid = key.offset;
				996
				997	root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
				998	struct btrfs_root_ref);
				999	len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
				1000	ptr -= len + 1;
				1001	if (ptr < name) {
				1002	ret = -ENAMETOOLONG;
				1003	goto err;
				1004	}
				1005	read_extent_buffer(path->nodes[0], ptr + 1,
				1006	(unsigned long)(root_ref + 1), len);
				1007	ptr[0] = '/';
				1008	dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
				1009	btrfs_release_path(path);
				1010
				1011	key.objectid = subvol_objectid;
				1012	key.type = BTRFS_ROOT_ITEM_KEY;
				1013	key.offset = (u64)-1;
				1014	fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
				1015	if (IS_ERR(fs_root)) {
				1016	ret = PTR_ERR(fs_root);
				1017	goto err;
				1018	}
				1019
				1020	/*
				1021	* Walk up the filesystem tree by inode refs until we hit the
				1022	* root directory.
				1023	*/
				1024	while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
				1025	key.objectid = dirid;
				1026	key.type = BTRFS_INODE_REF_KEY;
				1027	key.offset = (u64)-1;
				1028
				1029	ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
				1030	if (ret < 0) {
				1031	goto err;
				1032	} else if (ret > 0) {
				1033	ret = btrfs_previous_item(fs_root, path, dirid,
				1034	BTRFS_INODE_REF_KEY);
				1035	if (ret < 0) {
				1036	goto err;
				1037	} else if (ret > 0) {
				1038	ret = -ENOENT;
				1039	goto err;
				1040	}
				1041	}
				1042
				1043	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				1044	dirid = key.offset;
				1045
				1046	inode_ref = btrfs_item_ptr(path->nodes[0],
				1047	path->slots[0],
				1048	struct btrfs_inode_ref);
				1049	len = btrfs_inode_ref_name_len(path->nodes[0],
				1050	inode_ref);
				1051	ptr -= len + 1;
				1052	if (ptr < name) {
				1053	ret = -ENAMETOOLONG;
				1054	goto err;
				1055	}
				1056	read_extent_buffer(path->nodes[0], ptr + 1,
				1057	(unsigned long)(inode_ref + 1), len);
				1058	ptr[0] = '/';
				1059	btrfs_release_path(path);
				1060	}
				1061	}
				1062
				1063	btrfs_free_path(path);
				1064	if (ptr == name + PATH_MAX - 1) {
				1065	name[0] = '/';
				1066	name[1] = '\0';
				1067	} else {
				1068	memmove(name, ptr, name + PATH_MAX - ptr);
				1069	}
				1070	return name;
				1071
				1072	err:
				1073	btrfs_free_path(path);
				1074	kfree(name);
				1075	return ERR_PTR(ret);
				1076	}
				1077
				1078	static int get_default_subvol_objectid(struct btrfs_fs_info fs_info, u64 objectid)
				1079	{
				1080	struct btrfs_root *root = fs_info->tree_root;
				1081	struct btrfs_dir_item *di;
				1082	struct btrfs_path *path;
				1083	struct btrfs_key location;
				1084	u64 dir_id;
				1085
				1086	path = btrfs_alloc_path();
				1087	if (!path)
				1088	return -ENOMEM;
				1089	path->leave_spinning = 1;
				1090
				1091	/*
				1092	* Find the "default" dir item which points to the root item that we
				1093	* will mount by default if we haven't been given a specific subvolume
				1094	* to mount.
				1095	*/
				1096	dir_id = btrfs_super_root_dir(fs_info->super_copy);
				1097	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
				1098	if (IS_ERR(di)) {
				1099	btrfs_free_path(path);
				1100	return PTR_ERR(di);
				1101	}
				1102	if (!di) {
				1103	/*
				1104	* Ok the default dir item isn't there. This is weird since
				1105	* it's always been there, but don't freak out, just try and
				1106	* mount the top-level subvolume.
				1107	*/
				1108	btrfs_free_path(path);
				1109	*objectid = BTRFS_FS_TREE_OBJECTID;
				1110	return 0;
				1111	}
				1112
				1113	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
				1114	btrfs_free_path(path);
				1115	*objectid = location.objectid;
				1116	return 0;
				1117	}
				1118
				1119	static int btrfs_fill_super(struct super_block *sb,
				1120	struct btrfs_fs_devices *fs_devices,
				1121	void *data)
				1122	{
				1123	struct inode *inode;
				1124	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				1125	struct btrfs_key key;
				1126	int err;
				1127
				1128	sb->s_maxbytes = MAX_LFS_FILESIZE;
				1129	sb->s_magic = BTRFS_SUPER_MAGIC;
				1130	sb->s_op = &btrfs_super_ops;
				1131	sb->s_d_op = &btrfs_dentry_operations;
				1132	sb->s_export_op = &btrfs_export_ops;
				1133	sb->s_xattr = btrfs_xattr_handlers;
				1134	sb->s_time_gran = 1;
				1135	#ifdef CONFIG_BTRFS_FS_POSIX_ACL
				1136	sb->s_flags \|= MS_POSIXACL;
				1137	#endif
				1138	sb->s_flags \|= SB_I_VERSION;
				1139	sb->s_iflags \|= SB_I_CGROUPWB;
				1140
				1141	err = super_setup_bdi(sb);
				1142	if (err) {
				1143	btrfs_err(fs_info, "super_setup_bdi failed");
				1144	return err;
				1145	}
				1146
				1147	err = open_ctree(sb, fs_devices, (char *)data);
				1148	if (err) {
				1149	btrfs_err(fs_info, "open_ctree failed");
				1150	return err;
				1151	}
				1152
				1153	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
				1154	key.type = BTRFS_INODE_ITEM_KEY;
				1155	key.offset = 0;
				1156	inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
				1157	if (IS_ERR(inode)) {
				1158	err = PTR_ERR(inode);
				1159	goto fail_close;
				1160	}
				1161
				1162	sb->s_root = d_make_root(inode);
				1163	if (!sb->s_root) {
				1164	err = -ENOMEM;
				1165	goto fail_close;
				1166	}
				1167
				1168	cleancache_init_fs(sb);
				1169	sb->s_flags \|= MS_ACTIVE;
				1170	return 0;
				1171
				1172	fail_close:
				1173	close_ctree(fs_info);
				1174	return err;
				1175	}
				1176
				1177	int btrfs_sync_fs(struct super_block *sb, int wait)
				1178	{
				1179	struct btrfs_trans_handle *trans;
				1180	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				1181	struct btrfs_root *root = fs_info->tree_root;
				1182
				1183	trace_btrfs_sync_fs(fs_info, wait);
				1184
				1185	if (!wait) {
				1186	filemap_flush(fs_info->btree_inode->i_mapping);
				1187	return 0;
				1188	}
				1189
				1190	btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
				1191
				1192	trans = btrfs_attach_transaction_barrier(root);
				1193	if (IS_ERR(trans)) {
				1194	/* no transaction, don't bother */
				1195	if (PTR_ERR(trans) == -ENOENT) {
				1196	/*
				1197	* Exit unless we have some pending changes
				1198	* that need to go through commit
				1199	*/
				1200	if (fs_info->pending_changes == 0)
				1201	return 0;
				1202	/*
				1203	* A non-blocking test if the fs is frozen. We must not
				1204	* start a new transaction here otherwise a deadlock
				1205	* happens. The pending operations are delayed to the
				1206	* next commit after thawing.
				1207	*/
				1208	if (__sb_start_write(sb, SB_FREEZE_WRITE, false))
				1209	__sb_end_write(sb, SB_FREEZE_WRITE);
				1210	else
				1211	return 0;
				1212	trans = btrfs_start_transaction(root, 0);
				1213	}
				1214	if (IS_ERR(trans))
				1215	return PTR_ERR(trans);
				1216	}
				1217	return btrfs_commit_transaction(trans);
				1218	}
				1219
				1220	static int btrfs_show_options(struct seq_file seq, struct dentry dentry)
				1221	{
				1222	struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
				1223	char *compress_type;
				1224	const char *subvol_name;
				1225
				1226	if (btrfs_test_opt(info, DEGRADED))
				1227	seq_puts(seq, ",degraded");
				1228	if (btrfs_test_opt(info, NODATASUM))
				1229	seq_puts(seq, ",nodatasum");
				1230	if (btrfs_test_opt(info, NODATACOW))
				1231	seq_puts(seq, ",nodatacow");
				1232	if (btrfs_test_opt(info, NOBARRIER))
				1233	seq_puts(seq, ",nobarrier");
				1234	if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
				1235	seq_printf(seq, ",max_inline=%llu", info->max_inline);
				1236	if (info->thread_pool_size != min_t(unsigned long,
				1237	num_online_cpus() + 2, 8))
				1238	seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
				1239	if (btrfs_test_opt(info, COMPRESS)) {
				1240	if (info->compress_type == BTRFS_COMPRESS_ZLIB)
				1241	compress_type = "zlib";
				1242	else if (info->compress_type == BTRFS_COMPRESS_LZO)
				1243	compress_type = "lzo";
				1244	else
				1245	compress_type = "zstd";
				1246	if (btrfs_test_opt(info, FORCE_COMPRESS))
				1247	seq_printf(seq, ",compress-force=%s", compress_type);
				1248	else
				1249	seq_printf(seq, ",compress=%s", compress_type);
				1250	}
				1251	if (btrfs_test_opt(info, NOSSD))
				1252	seq_puts(seq, ",nossd");
				1253	if (btrfs_test_opt(info, SSD_SPREAD))
				1254	seq_puts(seq, ",ssd_spread");
				1255	else if (btrfs_test_opt(info, SSD))
				1256	seq_puts(seq, ",ssd");
				1257	if (btrfs_test_opt(info, NOTREELOG))
				1258	seq_puts(seq, ",notreelog");
				1259	if (btrfs_test_opt(info, NOLOGREPLAY))
				1260	seq_puts(seq, ",nologreplay");
				1261	if (btrfs_test_opt(info, FLUSHONCOMMIT))
				1262	seq_puts(seq, ",flushoncommit");
				1263	if (btrfs_test_opt(info, DISCARD))
				1264	seq_puts(seq, ",discard");
				1265	if (!(info->sb->s_flags & MS_POSIXACL))
				1266	seq_puts(seq, ",noacl");
				1267	if (btrfs_test_opt(info, SPACE_CACHE))
				1268	seq_puts(seq, ",space_cache");
				1269	else if (btrfs_test_opt(info, FREE_SPACE_TREE))
				1270	seq_puts(seq, ",space_cache=v2");
				1271	else
				1272	seq_puts(seq, ",nospace_cache");
				1273	if (btrfs_test_opt(info, RESCAN_UUID_TREE))
				1274	seq_puts(seq, ",rescan_uuid_tree");
				1275	if (btrfs_test_opt(info, CLEAR_CACHE))
				1276	seq_puts(seq, ",clear_cache");
				1277	if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
				1278	seq_puts(seq, ",user_subvol_rm_allowed");
				1279	if (btrfs_test_opt(info, ENOSPC_DEBUG))
				1280	seq_puts(seq, ",enospc_debug");
				1281	if (btrfs_test_opt(info, AUTO_DEFRAG))
				1282	seq_puts(seq, ",autodefrag");
				1283	if (btrfs_test_opt(info, INODE_MAP_CACHE))
				1284	seq_puts(seq, ",inode_cache");
				1285	if (btrfs_test_opt(info, SKIP_BALANCE))
				1286	seq_puts(seq, ",skip_balance");
				1287	#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
				1288	if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
				1289	seq_puts(seq, ",check_int_data");
				1290	else if (btrfs_test_opt(info, CHECK_INTEGRITY))
				1291	seq_puts(seq, ",check_int");
				1292	if (info->check_integrity_print_mask)
				1293	seq_printf(seq, ",check_int_print_mask=%d",
				1294	info->check_integrity_print_mask);
				1295	#endif
				1296	if (info->metadata_ratio)
				1297	seq_printf(seq, ",metadata_ratio=%d",
				1298	info->metadata_ratio);
				1299	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
				1300	seq_puts(seq, ",fatal_errors=panic");
				1301	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
				1302	seq_printf(seq, ",commit=%d", info->commit_interval);
				1303	#ifdef CONFIG_BTRFS_DEBUG
				1304	if (btrfs_test_opt(info, FRAGMENT_DATA))
				1305	seq_puts(seq, ",fragment=data");
				1306	if (btrfs_test_opt(info, FRAGMENT_METADATA))
				1307	seq_puts(seq, ",fragment=metadata");
				1308	#endif
				1309	seq_printf(seq, ",subvolid=%llu",
				1310	BTRFS_I(d_inode(dentry))->root->root_key.objectid);
				1311	subvol_name = btrfs_get_subvol_name_from_objectid(info,
				1312	BTRFS_I(d_inode(dentry))->root->root_key.objectid);
				1313	if (!IS_ERR(subvol_name)) {
				1314	seq_puts(seq, ",subvol=");
				1315	seq_escape(seq, subvol_name, " \t\n\\");
				1316	kfree(subvol_name);
				1317	}
				1318	return 0;
				1319	}
				1320
				1321	static int btrfs_test_super(struct super_block s, void data)
				1322	{
				1323	struct btrfs_fs_info *p = data;
				1324	struct btrfs_fs_info *fs_info = btrfs_sb(s);
				1325
				1326	return fs_info->fs_devices == p->fs_devices;
				1327	}
				1328
				1329	static int btrfs_set_super(struct super_block s, void data)
				1330	{
				1331	int err = set_anon_super(s, data);
				1332	if (!err)
				1333	s->s_fs_info = data;
				1334	return err;
				1335	}
				1336
				1337	/*
				1338	* subvolumes are identified by ino 256
				1339	*/
				1340	static inline int is_subvolume_inode(struct inode *inode)
				1341	{
				1342	if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
				1343	return 1;
				1344	return 0;
				1345	}
				1346
				1347	/*
				1348	* This will add subvolid=0 to the argument string while removing any subvol=
				1349	* and subvolid= arguments to make sure we get the top-level root for path
				1350	* walking to the subvol we want.
				1351	*/
				1352	static char setup_root_args(char args)
				1353	{
				1354	char buf, dst, *sep;
				1355
				1356	if (!args)
				1357	return kstrdup("subvolid=0", GFP_KERNEL);
				1358
				1359	/* The worst case is that we add ",subvolid=0" to the end. */
				1360	buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1,
				1361	GFP_KERNEL);
				1362	if (!buf)
				1363	return NULL;
				1364
				1365	while (1) {
				1366	sep = strchrnul(args, ',');
				1367	if (!strstarts(args, "subvol=") &&
				1368	!strstarts(args, "subvolid=")) {
				1369	memcpy(dst, args, sep - args);
				1370	dst += sep - args;
				1371	*dst++ = ',';
				1372	}
				1373	if (*sep)
				1374	args = sep + 1;
				1375	else
				1376	break;
				1377	}
				1378	strcpy(dst, "subvolid=0");
				1379
				1380	return buf;
				1381	}
				1382
				1383	static struct dentry mount_subvol(const char subvol_name, u64 subvol_objectid,
				1384	int flags, const char *device_name,
				1385	char *data)
				1386	{
				1387	struct dentry *root;
				1388	struct vfsmount *mnt = NULL;
				1389	char *newargs;
				1390	int ret;
				1391
				1392	newargs = setup_root_args(data);
				1393	if (!newargs) {
				1394	root = ERR_PTR(-ENOMEM);
				1395	goto out;
				1396	}
				1397
				1398	mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
				1399	if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
				1400	if (flags & MS_RDONLY) {
				1401	mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY,
				1402	device_name, newargs);
				1403	} else {
				1404	mnt = vfs_kern_mount(&btrfs_fs_type, flags \| MS_RDONLY,
				1405	device_name, newargs);
				1406	if (IS_ERR(mnt)) {
				1407	root = ERR_CAST(mnt);
				1408	mnt = NULL;
				1409	goto out;
				1410	}
				1411
				1412	down_write(&mnt->mnt_sb->s_umount);
				1413	ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
				1414	up_write(&mnt->mnt_sb->s_umount);
				1415	if (ret < 0) {
				1416	root = ERR_PTR(ret);
				1417	goto out;
				1418	}
				1419	}
				1420	}
				1421	if (IS_ERR(mnt)) {
				1422	root = ERR_CAST(mnt);
				1423	mnt = NULL;
				1424	goto out;
				1425	}
				1426
				1427	if (!subvol_name) {
				1428	if (!subvol_objectid) {
				1429	ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
				1430	&subvol_objectid);
				1431	if (ret) {
				1432	root = ERR_PTR(ret);
				1433	goto out;
				1434	}
				1435	}
				1436	subvol_name = btrfs_get_subvol_name_from_objectid(
				1437	btrfs_sb(mnt->mnt_sb), subvol_objectid);
				1438	if (IS_ERR(subvol_name)) {
				1439	root = ERR_CAST(subvol_name);
				1440	subvol_name = NULL;
				1441	goto out;
				1442	}
				1443
				1444	}
				1445
				1446	root = mount_subtree(mnt, subvol_name);
				1447	/* mount_subtree() drops our reference on the vfsmount. */
				1448	mnt = NULL;
				1449
				1450	if (!IS_ERR(root)) {
				1451	struct super_block *s = root->d_sb;
				1452	struct btrfs_fs_info *fs_info = btrfs_sb(s);
				1453	struct inode *root_inode = d_inode(root);
				1454	u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
				1455
				1456	ret = 0;
				1457	if (!is_subvolume_inode(root_inode)) {
				1458	btrfs_err(fs_info, "'%s' is not a valid subvolume",
				1459	subvol_name);
				1460	ret = -EINVAL;
				1461	}
				1462	if (subvol_objectid && root_objectid != subvol_objectid) {
				1463	/*
				1464	* This will also catch a race condition where a
				1465	* subvolume which was passed by ID is renamed and
				1466	* another subvolume is renamed over the old location.
				1467	*/
				1468	btrfs_err(fs_info,
				1469	"subvol '%s' does not match subvolid %llu",
				1470	subvol_name, subvol_objectid);
				1471	ret = -EINVAL;
				1472	}
				1473	if (ret) {
				1474	dput(root);
				1475	root = ERR_PTR(ret);
				1476	deactivate_locked_super(s);
				1477	}
				1478	}
				1479
				1480	out:
				1481	mntput(mnt);
				1482	kfree(newargs);
				1483	kfree(subvol_name);
				1484	return root;
				1485	}
				1486
				1487	static int parse_security_options(char *orig_opts,
				1488	struct security_mnt_opts *sec_opts)
				1489	{
				1490	char *secdata = NULL;
				1491	int ret = 0;
				1492
				1493	secdata = alloc_secdata();
				1494	if (!secdata)
				1495	return -ENOMEM;
				1496	ret = security_sb_copy_data(orig_opts, secdata);
				1497	if (ret) {
				1498	free_secdata(secdata);
				1499	return ret;
				1500	}
				1501	ret = security_sb_parse_opts_str(secdata, sec_opts);
				1502	free_secdata(secdata);
				1503	return ret;
				1504	}
				1505
				1506	static int setup_security_options(struct btrfs_fs_info *fs_info,
				1507	struct super_block *sb,
				1508	struct security_mnt_opts *sec_opts)
				1509	{
				1510	int ret = 0;
				1511
				1512	/*
				1513	* Call security_sb_set_mnt_opts() to check whether new sec_opts
				1514	* is valid.
				1515	*/
				1516	ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
				1517	if (ret)
				1518	return ret;
				1519
				1520	#ifdef CONFIG_SECURITY
				1521	if (!fs_info->security_opts.num_mnt_opts) {
				1522	/* first time security setup, copy sec_opts to fs_info */
				1523	memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
				1524	} else {
				1525	/*
				1526	* Since SELinux (the only one supporting security_mnt_opts)
				1527	* does NOT support changing context during remount/mount of
				1528	* the same sb, this must be the same or part of the same
				1529	* security options, just free it.
				1530	*/
				1531	security_free_mnt_opts(sec_opts);
				1532	}
				1533	#endif
				1534	return ret;
				1535	}
				1536
				1537	/*
				1538	* Find a superblock for the given device / mount point.
				1539	*
				1540	* Note: This is based on get_sb_bdev from fs/super.c with a few additions
				1541	* for multiple device setup. Make sure to keep it in sync.
				1542	*/
				1543	static struct dentry btrfs_mount(struct file_system_type fs_type, int flags,
				1544	const char device_name, void data)
				1545	{
				1546	struct block_device *bdev = NULL;
				1547	struct super_block *s;
				1548	struct btrfs_fs_devices *fs_devices = NULL;
				1549	struct btrfs_fs_info *fs_info = NULL;
				1550	struct security_mnt_opts new_sec_opts;
				1551	fmode_t mode = FMODE_READ;
				1552	char *subvol_name = NULL;
				1553	u64 subvol_objectid = 0;
				1554	int error = 0;
				1555
				1556	if (!(flags & MS_RDONLY))
				1557	mode \|= FMODE_WRITE;
				1558
				1559	error = btrfs_parse_early_options(data, mode, fs_type,
				1560	&subvol_name, &subvol_objectid,
				1561	&fs_devices);
				1562	if (error) {
				1563	kfree(subvol_name);
				1564	return ERR_PTR(error);
				1565	}
				1566
				1567	if (subvol_name \|\| subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
				1568	/* mount_subvol() will free subvol_name. */
				1569	return mount_subvol(subvol_name, subvol_objectid, flags,
				1570	device_name, data);
				1571	}
				1572
				1573	security_init_mnt_opts(&new_sec_opts);
				1574	if (data) {
				1575	error = parse_security_options(data, &new_sec_opts);
				1576	if (error)
				1577	return ERR_PTR(error);
				1578	}
				1579
				1580	error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
				1581	if (error)
				1582	goto error_sec_opts;
				1583
				1584	/*
				1585	* Setup a dummy root and fs_info for test/set super. This is because
				1586	* we don't actually fill this stuff out until open_ctree, but we need
				1587	* it for searching for existing supers, so this lets us do that and
				1588	* then open_ctree will properly initialize everything later.
				1589	*/
				1590	fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
				1591	if (!fs_info) {
				1592	error = -ENOMEM;
				1593	goto error_sec_opts;
				1594	}
				1595
				1596	fs_info->fs_devices = fs_devices;
				1597
				1598	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
				1599	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
				1600	security_init_mnt_opts(&fs_info->security_opts);
				1601	if (!fs_info->super_copy \|\| !fs_info->super_for_commit) {
				1602	error = -ENOMEM;
				1603	goto error_fs_info;
				1604	}
				1605
				1606	error = btrfs_open_devices(fs_devices, mode, fs_type);
				1607	if (error)
				1608	goto error_fs_info;
				1609
				1610	if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
				1611	error = -EACCES;
				1612	goto error_close_devices;
				1613	}
				1614
				1615	bdev = fs_devices->latest_bdev;
				1616	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags \| MS_NOSEC,
				1617	fs_info);
				1618	if (IS_ERR(s)) {
				1619	error = PTR_ERR(s);
				1620	goto error_close_devices;
				1621	}
				1622
				1623	if (s->s_root) {
				1624	btrfs_close_devices(fs_devices);
				1625	free_fs_info(fs_info);
				1626	if ((flags ^ s->s_flags) & MS_RDONLY)
				1627	error = -EBUSY;
				1628	} else {
				1629	snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
				1630	btrfs_sb(s)->bdev_holder = fs_type;
				1631	error = btrfs_fill_super(s, fs_devices, data);
				1632	}
				1633	if (error) {
				1634	deactivate_locked_super(s);
				1635	goto error_sec_opts;
				1636	}
				1637
				1638	fs_info = btrfs_sb(s);
				1639	error = setup_security_options(fs_info, s, &new_sec_opts);
				1640	if (error) {
				1641	deactivate_locked_super(s);
				1642	goto error_sec_opts;
				1643	}
				1644
				1645	return dget(s->s_root);
				1646
				1647	error_close_devices:
				1648	btrfs_close_devices(fs_devices);
				1649	error_fs_info:
				1650	free_fs_info(fs_info);
				1651	error_sec_opts:
				1652	security_free_mnt_opts(&new_sec_opts);
				1653	return ERR_PTR(error);
				1654	}
				1655
				1656	static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
				1657	int new_pool_size, int old_pool_size)
				1658	{
				1659	if (new_pool_size == old_pool_size)
				1660	return;
				1661
				1662	fs_info->thread_pool_size = new_pool_size;
				1663
				1664	btrfs_info(fs_info, "resize thread pool %d -> %d",
				1665	old_pool_size, new_pool_size);
				1666
				1667	btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
				1668	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
				1669	btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
				1670	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
				1671	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
				1672	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
				1673	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
				1674	new_pool_size);
				1675	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
				1676	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
				1677	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
				1678	btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
				1679	btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
				1680	new_pool_size);
				1681	}
				1682
				1683	static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
				1684	{
				1685	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
				1686	}
				1687
				1688	static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
				1689	unsigned long old_opts, int flags)
				1690	{
				1691	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
				1692	(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) \|\|
				1693	(flags & MS_RDONLY))) {
				1694	/* wait for any defraggers to finish */
				1695	wait_event(fs_info->transaction_wait,
				1696	(atomic_read(&fs_info->defrag_running) == 0));
				1697	if (flags & MS_RDONLY)
				1698	sync_filesystem(fs_info->sb);
				1699	}
				1700	}
				1701
				1702	static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
				1703	unsigned long old_opts)
				1704	{
				1705	/*
				1706	* We need to cleanup all defragable inodes if the autodefragment is
				1707	* close or the filesystem is read only.
				1708	*/
				1709	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
				1710	(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) \|\| sb_rdonly(fs_info->sb))) {
				1711	btrfs_cleanup_defrag_inodes(fs_info);
				1712	}
				1713
				1714	clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
				1715	}
				1716
				1717	static int btrfs_remount(struct super_block sb, int flags, char *data)
				1718	{
				1719	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				1720	struct btrfs_root *root = fs_info->tree_root;
				1721	unsigned old_flags = sb->s_flags;
				1722	unsigned long old_opts = fs_info->mount_opt;
				1723	unsigned long old_compress_type = fs_info->compress_type;
				1724	u64 old_max_inline = fs_info->max_inline;
				1725	int old_thread_pool_size = fs_info->thread_pool_size;
				1726	unsigned int old_metadata_ratio = fs_info->metadata_ratio;
				1727	int ret;
				1728
				1729	sync_filesystem(sb);
				1730	btrfs_remount_prepare(fs_info);
				1731
				1732	if (data) {
				1733	struct security_mnt_opts new_sec_opts;
				1734
				1735	security_init_mnt_opts(&new_sec_opts);
				1736	ret = parse_security_options(data, &new_sec_opts);
				1737	if (ret)
				1738	goto restore;
				1739	ret = setup_security_options(fs_info, sb,
				1740	&new_sec_opts);
				1741	if (ret) {
				1742	security_free_mnt_opts(&new_sec_opts);
				1743	goto restore;
				1744	}
				1745	}
				1746
				1747	ret = btrfs_parse_options(fs_info, data, *flags);
				1748	if (ret) {
				1749	ret = -EINVAL;
				1750	goto restore;
				1751	}
				1752
				1753	btrfs_remount_begin(fs_info, old_opts, *flags);
				1754	btrfs_resize_thread_pool(fs_info,
				1755	fs_info->thread_pool_size, old_thread_pool_size);
				1756
				1757	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
				1758	goto out;
				1759
				1760	if (*flags & MS_RDONLY) {
				1761	/*
				1762	* this also happens on 'umount -rf' or on shutdown, when
				1763	* the filesystem is busy.
				1764	*/
				1765	cancel_work_sync(&fs_info->async_reclaim_work);
				1766
				1767	/* wait for the uuid_scan task to finish */
				1768	down(&fs_info->uuid_tree_rescan_sem);
				1769	/* avoid complains from lockdep et al. */
				1770	up(&fs_info->uuid_tree_rescan_sem);
				1771
				1772	sb->s_flags \|= MS_RDONLY;
				1773
				1774	/*
				1775	* Setting MS_RDONLY will put the cleaner thread to
				1776	* sleep at the next loop if it's already active.
				1777	* If it's already asleep, we'll leave unused block
				1778	* groups on disk until we're mounted read-write again
				1779	* unless we clean them up here.
				1780	*/
				1781	btrfs_delete_unused_bgs(fs_info);
				1782
				1783	btrfs_dev_replace_suspend_for_unmount(fs_info);
				1784	btrfs_scrub_cancel(fs_info);
				1785	btrfs_pause_balance(fs_info);
				1786
				1787	ret = btrfs_commit_super(fs_info);
				1788	if (ret)
				1789	goto restore;
				1790	} else {
				1791	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
				1792	btrfs_err(fs_info,
				1793	"Remounting read-write after error is not allowed");
				1794	ret = -EINVAL;
				1795	goto restore;
				1796	}
				1797	if (fs_info->fs_devices->rw_devices == 0) {
				1798	ret = -EACCES;
				1799	goto restore;
				1800	}
				1801
				1802	if (!btrfs_check_rw_degradable(fs_info)) {
				1803	btrfs_warn(fs_info,
				1804	"too many missing devices, writeable remount is not allowed");
				1805	ret = -EACCES;
				1806	goto restore;
				1807	}
				1808
				1809	if (btrfs_super_log_root(fs_info->super_copy) != 0) {
				1810	btrfs_warn(fs_info,
				1811	"mount required to replay tree-log, cannot remount read-write");
				1812	ret = -EINVAL;
				1813	goto restore;
				1814	}
				1815
				1816	ret = btrfs_cleanup_fs_roots(fs_info);
				1817	if (ret)
				1818	goto restore;
				1819
				1820	/* recover relocation */
				1821	mutex_lock(&fs_info->cleaner_mutex);
				1822	ret = btrfs_recover_relocation(root);
				1823	mutex_unlock(&fs_info->cleaner_mutex);
				1824	if (ret)
				1825	goto restore;
				1826
				1827	ret = btrfs_resume_balance_async(fs_info);
				1828	if (ret)
				1829	goto restore;
				1830
				1831	ret = btrfs_resume_dev_replace_async(fs_info);
				1832	if (ret) {
				1833	btrfs_warn(fs_info, "failed to resume dev_replace");
				1834	goto restore;
				1835	}
				1836
				1837	btrfs_qgroup_rescan_resume(fs_info);
				1838
				1839	if (!fs_info->uuid_root) {
				1840	btrfs_info(fs_info, "creating UUID tree");
				1841	ret = btrfs_create_uuid_tree(fs_info);
				1842	if (ret) {
				1843	btrfs_warn(fs_info,
				1844	"failed to create the UUID tree %d",
				1845	ret);
				1846	goto restore;
				1847	}
				1848	}
				1849	sb->s_flags &= ~MS_RDONLY;
				1850
				1851	set_bit(BTRFS_FS_OPEN, &fs_info->flags);
				1852	}
				1853	out:
				1854	wake_up_process(fs_info->transaction_kthread);
				1855	btrfs_remount_cleanup(fs_info, old_opts);
				1856	return 0;
				1857
				1858	restore:
				1859	/* We've hit an error - don't reset MS_RDONLY */
				1860	if (sb_rdonly(sb))
				1861	old_flags \|= MS_RDONLY;
				1862	sb->s_flags = old_flags;
				1863	fs_info->mount_opt = old_opts;
				1864	fs_info->compress_type = old_compress_type;
				1865	fs_info->max_inline = old_max_inline;
				1866	btrfs_resize_thread_pool(fs_info,
				1867	old_thread_pool_size, fs_info->thread_pool_size);
				1868	fs_info->metadata_ratio = old_metadata_ratio;
				1869	btrfs_remount_cleanup(fs_info, old_opts);
				1870	return ret;
				1871	}
				1872
				1873	/* Used to sort the devices by max_avail(descending sort) */
				1874	static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
				1875	const void *dev_info2)
				1876	{
				1877	if (((struct btrfs_device_info *)dev_info1)->max_avail >
				1878	((struct btrfs_device_info *)dev_info2)->max_avail)
				1879	return -1;
				1880	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
				1881	((struct btrfs_device_info *)dev_info2)->max_avail)
				1882	return 1;
				1883	else
				1884	return 0;
				1885	}
				1886
				1887	/*
				1888	* sort the devices by max_avail, in which max free extent size of each device
				1889	* is stored.(Descending Sort)
				1890	*/
				1891	static inline void btrfs_descending_sort_devices(
				1892	struct btrfs_device_info *devices,
				1893	size_t nr_devices)
				1894	{
				1895	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
				1896	btrfs_cmp_device_free_bytes, NULL);
				1897	}
				1898
				1899	/*
				1900	* The helper to calc the free space on the devices that can be used to store
				1901	* file data.
				1902	*/
				1903	static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
				1904	u64 *free_bytes)
				1905	{
				1906	struct btrfs_device_info *devices_info;
				1907	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
				1908	struct btrfs_device *device;
				1909	u64 skip_space;
				1910	u64 type;
				1911	u64 avail_space;
				1912	u64 min_stripe_size;
				1913	int min_stripes = 1, num_stripes = 1;
				1914	int i = 0, nr_devices;
				1915
				1916	/*
				1917	* We aren't under the device list lock, so this is racy-ish, but good
				1918	* enough for our purposes.
				1919	*/
				1920	nr_devices = fs_info->fs_devices->open_devices;
				1921	if (!nr_devices) {
				1922	smp_mb();
				1923	nr_devices = fs_info->fs_devices->open_devices;
				1924	ASSERT(nr_devices);
				1925	if (!nr_devices) {
				1926	*free_bytes = 0;
				1927	return 0;
				1928	}
				1929	}
				1930
				1931	devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
				1932	GFP_KERNEL);
				1933	if (!devices_info)
				1934	return -ENOMEM;
				1935
				1936	/* calc min stripe number for data space allocation */
				1937	type = btrfs_data_alloc_profile(fs_info);
				1938	if (type & BTRFS_BLOCK_GROUP_RAID0) {
				1939	min_stripes = 2;
				1940	num_stripes = nr_devices;
				1941	} else if (type & BTRFS_BLOCK_GROUP_RAID1) {
				1942	min_stripes = 2;
				1943	num_stripes = 2;
				1944	} else if (type & BTRFS_BLOCK_GROUP_RAID10) {
				1945	min_stripes = 4;
				1946	num_stripes = 4;
				1947	}
				1948
				1949	if (type & BTRFS_BLOCK_GROUP_DUP)
				1950	min_stripe_size = 2 * BTRFS_STRIPE_LEN;
				1951	else
				1952	min_stripe_size = BTRFS_STRIPE_LEN;
				1953
				1954	rcu_read_lock();
				1955	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
				1956	if (!device->in_fs_metadata \|\| !device->bdev \|\|
				1957	device->is_tgtdev_for_dev_replace)
				1958	continue;
				1959
				1960	if (i >= nr_devices)
				1961	break;
				1962
				1963	avail_space = device->total_bytes - device->bytes_used;
				1964
				1965	/* align with stripe_len */
				1966	avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
				1967	avail_space *= BTRFS_STRIPE_LEN;
				1968
				1969	/*
				1970	* In order to avoid overwriting the superblock on the drive,
				1971	* btrfs starts at an offset of at least 1MB when doing chunk
				1972	* allocation.
				1973	*/
				1974	skip_space = SZ_1M;
				1975
				1976	/*
				1977	* we can use the free space in [0, skip_space - 1], subtract
				1978	* it from the total.
				1979	*/
				1980	if (avail_space && avail_space >= skip_space)
				1981	avail_space -= skip_space;
				1982	else
				1983	avail_space = 0;
				1984
				1985	if (avail_space < min_stripe_size)
				1986	continue;
				1987
				1988	devices_info[i].dev = device;
				1989	devices_info[i].max_avail = avail_space;
				1990
				1991	i++;
				1992	}
				1993	rcu_read_unlock();
				1994
				1995	nr_devices = i;
				1996
				1997	btrfs_descending_sort_devices(devices_info, nr_devices);
				1998
				1999	i = nr_devices - 1;
				2000	avail_space = 0;
				2001	while (nr_devices >= min_stripes) {
				2002	if (num_stripes > nr_devices)
				2003	num_stripes = nr_devices;
				2004
				2005	if (devices_info[i].max_avail >= min_stripe_size) {
				2006	int j;
				2007	u64 alloc_size;
				2008
				2009	avail_space += devices_info[i].max_avail * num_stripes;
				2010	alloc_size = devices_info[i].max_avail;
				2011	for (j = i + 1 - num_stripes; j <= i; j++)
				2012	devices_info[j].max_avail -= alloc_size;
				2013	}
				2014	i--;
				2015	nr_devices--;
				2016	}
				2017
				2018	kfree(devices_info);
				2019	*free_bytes = avail_space;
				2020	return 0;
				2021	}
				2022
				2023	/*
				2024	* Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
				2025	*
				2026	* If there's a redundant raid level at DATA block groups, use the respective
				2027	* multiplier to scale the sizes.
				2028	*
				2029	* Unused device space usage is based on simulating the chunk allocator
				2030	* algorithm that respects the device sizes and order of allocations. This is
				2031	* a close approximation of the actual use but there are other factors that may
				2032	* change the result (like a new metadata chunk).
				2033	*
				2034	* If metadata is exhausted, f_bavail will be 0.
				2035	*/
				2036	static int btrfs_statfs(struct dentry dentry, struct kstatfs buf)
				2037	{
				2038	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
				2039	struct btrfs_super_block *disk_super = fs_info->super_copy;
				2040	struct list_head *head = &fs_info->space_info;
				2041	struct btrfs_space_info *found;
				2042	u64 total_used = 0;
				2043	u64 total_free_data = 0;
				2044	u64 total_free_meta = 0;
				2045	int bits = dentry->d_sb->s_blocksize_bits;
				2046	__be32 fsid = (__be32 )fs_info->fsid;
				2047	unsigned factor = 1;
				2048	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
				2049	int ret;
				2050	u64 thresh = 0;
				2051	int mixed = 0;
				2052
				2053	rcu_read_lock();
				2054	list_for_each_entry_rcu(found, head, list) {
				2055	if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
				2056	int i;
				2057
				2058	total_free_data += found->disk_total - found->disk_used;
				2059	total_free_data -=
				2060	btrfs_account_ro_block_groups_free_space(found);
				2061
				2062	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
				2063	if (!list_empty(&found->block_groups[i])) {
				2064	switch (i) {
				2065	case BTRFS_RAID_DUP:
				2066	case BTRFS_RAID_RAID1:
				2067	case BTRFS_RAID_RAID10:
				2068	factor = 2;
				2069	}
				2070	}
				2071	}
				2072	}
				2073
				2074	/*
				2075	* Metadata in mixed block goup profiles are accounted in data
				2076	*/
				2077	if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
				2078	if (found->flags & BTRFS_BLOCK_GROUP_DATA)
				2079	mixed = 1;
				2080	else
				2081	total_free_meta += found->disk_total -
				2082	found->disk_used;
				2083	}
				2084
				2085	total_used += found->disk_used;
				2086	}
				2087
				2088	rcu_read_unlock();
				2089
				2090	buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
				2091	buf->f_blocks >>= bits;
				2092	buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
				2093
				2094	/* Account global block reserve as used, it's in logical size already */
				2095	spin_lock(&block_rsv->lock);
				2096	/* Mixed block groups accounting is not byte-accurate, avoid overflow */
				2097	if (buf->f_bfree >= block_rsv->size >> bits)
				2098	buf->f_bfree -= block_rsv->size >> bits;
				2099	else
				2100	buf->f_bfree = 0;
				2101	spin_unlock(&block_rsv->lock);
				2102
				2103	buf->f_bavail = div_u64(total_free_data, factor);
				2104	ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
				2105	if (ret)
				2106	return ret;
				2107	buf->f_bavail += div_u64(total_free_data, factor);
				2108	buf->f_bavail = buf->f_bavail >> bits;
				2109
				2110	/*
				2111	* We calculate the remaining metadata space minus global reserve. If
				2112	* this is (supposedly) smaller than zero, there's no space. But this
				2113	* does not hold in practice, the exhausted state happens where's still
				2114	* some positive delta. So we apply some guesswork and compare the
				2115	* delta to a 4M threshold. (Practically observed delta was ~2M.)
				2116	*
				2117	* We probably cannot calculate the exact threshold value because this
				2118	* depends on the internal reservations requested by various
				2119	* operations, so some operations that consume a few metadata will
				2120	* succeed even if the Avail is zero. But this is better than the other
				2121	* way around.
				2122	*/
				2123	thresh = 4 * 1024 * 1024;
				2124
				2125	/*
				2126	* We only want to claim there's no available space if we can no longer
				2127	* allocate chunks for our metadata profile and our global reserve will
				2128	* not fit in the free metadata space. If we aren't ->full then we
				2129	* still can allocate chunks and thus are fine using the currently
				2130	* calculated f_bavail.
				2131	*/
				2132	if (!mixed && block_rsv->space_info->full &&
				2133	total_free_meta - thresh < block_rsv->size)
				2134	buf->f_bavail = 0;
				2135
				2136	buf->f_type = BTRFS_SUPER_MAGIC;
				2137	buf->f_bsize = dentry->d_sb->s_blocksize;
				2138	buf->f_namelen = BTRFS_NAME_LEN;
				2139
				2140	/* We treat it as constant endianness (it doesn't matter _which_)
				2141	because we want the fsid to come out the same whether mounted
				2142	on a big-endian or little-endian host */
				2143	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
				2144	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
				2145	/* Mask in the root object ID too, to disambiguate subvols */
				2146	buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
				2147	buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
				2148
				2149	return 0;
				2150	}
				2151
				2152	static void btrfs_kill_super(struct super_block *sb)
				2153	{
				2154	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				2155	kill_anon_super(sb);
				2156	free_fs_info(fs_info);
				2157	}
				2158
				2159	static struct file_system_type btrfs_fs_type = {
				2160	.owner = THIS_MODULE,
				2161	.name = "btrfs",
				2162	.mount = btrfs_mount,
				2163	.kill_sb = btrfs_kill_super,
				2164	.fs_flags = FS_REQUIRES_DEV \| FS_BINARY_MOUNTDATA,
				2165	};
				2166	MODULE_ALIAS_FS("btrfs");
				2167
				2168	static int btrfs_control_open(struct inode inode, struct file file)
				2169	{
				2170	/*
				2171	* The control file's private_data is used to hold the
				2172	* transaction when it is started and is used to keep
				2173	* track of whether a transaction is already in progress.
				2174	*/
				2175	file->private_data = NULL;
				2176	return 0;
				2177	}
				2178
				2179	/*
				2180	* used by btrfsctl to scan devices when no FS is mounted
				2181	*/
				2182	static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
				2183	unsigned long arg)
				2184	{
				2185	struct btrfs_ioctl_vol_args *vol;
				2186	struct btrfs_fs_devices *fs_devices;
				2187	int ret = -ENOTTY;
				2188
				2189	if (!capable(CAP_SYS_ADMIN))
				2190	return -EPERM;
				2191
				2192	vol = memdup_user((void __user )arg, sizeof(vol));
				2193	if (IS_ERR(vol))
				2194	return PTR_ERR(vol);
				2195	vol->name[BTRFS_PATH_NAME_MAX] = '\0';
				2196
				2197	switch (cmd) {
				2198	case BTRFS_IOC_SCAN_DEV:
				2199	ret = btrfs_scan_one_device(vol->name, FMODE_READ,
				2200	&btrfs_fs_type, &fs_devices);
				2201	break;
				2202	case BTRFS_IOC_DEVICES_READY:
				2203	ret = btrfs_scan_one_device(vol->name, FMODE_READ,
				2204	&btrfs_fs_type, &fs_devices);
				2205	if (ret)
				2206	break;
				2207	ret = !(fs_devices->num_devices == fs_devices->total_devices);
				2208	break;
				2209	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
				2210	ret = btrfs_ioctl_get_supported_features((void __user*)arg);
				2211	break;
				2212	}
				2213
				2214	kfree(vol);
				2215	return ret;
				2216	}
				2217
				2218	static int btrfs_freeze(struct super_block *sb)
				2219	{
				2220	struct btrfs_trans_handle *trans;
				2221	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				2222	struct btrfs_root *root = fs_info->tree_root;
				2223
				2224	set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
				2225	/*
				2226	* We don't need a barrier here, we'll wait for any transaction that
				2227	* could be in progress on other threads (and do delayed iputs that
				2228	* we want to avoid on a frozen filesystem), or do the commit
				2229	* ourselves.
				2230	*/
				2231	trans = btrfs_attach_transaction_barrier(root);
				2232	if (IS_ERR(trans)) {
				2233	/* no transaction, don't bother */
				2234	if (PTR_ERR(trans) == -ENOENT)
				2235	return 0;
				2236	return PTR_ERR(trans);
				2237	}
				2238	return btrfs_commit_transaction(trans);
				2239	}
				2240
				2241	static int btrfs_unfreeze(struct super_block *sb)
				2242	{
				2243	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
				2244
				2245	clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
				2246	return 0;
				2247	}
				2248
				2249	static int btrfs_show_devname(struct seq_file m, struct dentry root)
				2250	{
				2251	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
				2252	struct btrfs_fs_devices *cur_devices;
				2253	struct btrfs_device dev, first_dev = NULL;
				2254	struct list_head *head;
				2255	struct rcu_string *name;
				2256
				2257	mutex_lock(&fs_info->fs_devices->device_list_mutex);
				2258	cur_devices = fs_info->fs_devices;
				2259	while (cur_devices) {
				2260	head = &cur_devices->devices;
				2261	list_for_each_entry(dev, head, dev_list) {
				2262	if (dev->missing)
				2263	continue;
				2264	if (!dev->name)
				2265	continue;
				2266	if (!first_dev \|\| dev->devid < first_dev->devid)
				2267	first_dev = dev;
				2268	}
				2269	cur_devices = cur_devices->seed;
				2270	}
				2271
				2272	if (first_dev) {
				2273	rcu_read_lock();
				2274	name = rcu_dereference(first_dev->name);
				2275	seq_escape(m, name->str, " \t\n\\");
				2276	rcu_read_unlock();
				2277	} else {
				2278	WARN_ON(1);
				2279	}
				2280	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
				2281	return 0;
				2282	}
				2283
				2284	static const struct super_operations btrfs_super_ops = {
				2285	.drop_inode = btrfs_drop_inode,
				2286	.evict_inode = btrfs_evict_inode,
				2287	.put_super = btrfs_put_super,
				2288	.sync_fs = btrfs_sync_fs,
				2289	.show_options = btrfs_show_options,
				2290	.show_devname = btrfs_show_devname,
				2291	.alloc_inode = btrfs_alloc_inode,
				2292	.destroy_inode = btrfs_destroy_inode,
				2293	.statfs = btrfs_statfs,
				2294	.remount_fs = btrfs_remount,
				2295	.freeze_fs = btrfs_freeze,
				2296	.unfreeze_fs = btrfs_unfreeze,
				2297	};
				2298
				2299	static const struct file_operations btrfs_ctl_fops = {
				2300	.open = btrfs_control_open,
				2301	.unlocked_ioctl = btrfs_control_ioctl,
				2302	.compat_ioctl = btrfs_control_ioctl,
				2303	.owner = THIS_MODULE,
				2304	.llseek = noop_llseek,
				2305	};
				2306
				2307	static struct miscdevice btrfs_misc = {
				2308	.minor = BTRFS_MINOR,
				2309	.name = "btrfs-control",
				2310	.fops = &btrfs_ctl_fops
				2311	};
				2312
				2313	MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
				2314	MODULE_ALIAS("devname:btrfs-control");
				2315
				2316	static int btrfs_interface_init(void)
				2317	{
				2318	return misc_register(&btrfs_misc);
				2319	}
				2320
				2321	static void btrfs_interface_exit(void)
				2322	{
				2323	misc_deregister(&btrfs_misc);
				2324	}
				2325
				2326	static void btrfs_print_mod_info(void)
				2327	{
				2328	pr_info("Btrfs loaded, crc32c=%s"
				2329	#ifdef CONFIG_BTRFS_DEBUG
				2330	", debug=on"
				2331	#endif
				2332	#ifdef CONFIG_BTRFS_ASSERT
				2333	", assert=on"
				2334	#endif
				2335	#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
				2336	", integrity-checker=on"
				2337	#endif
				2338	"\n",
				2339	btrfs_crc32c_impl());
				2340	}
				2341
				2342	static int __init init_btrfs_fs(void)
				2343	{
				2344	int err;
				2345
				2346	err = btrfs_hash_init();
				2347	if (err)
				2348	return err;
				2349
				2350	btrfs_props_init();
				2351
				2352	err = btrfs_init_sysfs();
				2353	if (err)
				2354	goto free_hash;
				2355
				2356	btrfs_init_compress();
				2357
				2358	err = btrfs_init_cachep();
				2359	if (err)
				2360	goto free_compress;
				2361
				2362	err = extent_io_init();
				2363	if (err)
				2364	goto free_cachep;
				2365
				2366	err = extent_map_init();
				2367	if (err)
				2368	goto free_extent_io;
				2369
				2370	err = ordered_data_init();
				2371	if (err)
				2372	goto free_extent_map;
				2373
				2374	err = btrfs_delayed_inode_init();
				2375	if (err)
				2376	goto free_ordered_data;
				2377
				2378	err = btrfs_auto_defrag_init();
				2379	if (err)
				2380	goto free_delayed_inode;
				2381
				2382	err = btrfs_delayed_ref_init();
				2383	if (err)
				2384	goto free_auto_defrag;
				2385
				2386	err = btrfs_prelim_ref_init();
				2387	if (err)
				2388	goto free_delayed_ref;
				2389
				2390	err = btrfs_end_io_wq_init();
				2391	if (err)
				2392	goto free_prelim_ref;
				2393
				2394	err = btrfs_interface_init();
				2395	if (err)
				2396	goto free_end_io_wq;
				2397
				2398	btrfs_init_lockdep();
				2399
				2400	btrfs_print_mod_info();
				2401
				2402	err = btrfs_run_sanity_tests();
				2403	if (err)
				2404	goto unregister_ioctl;
				2405
				2406	err = register_filesystem(&btrfs_fs_type);
				2407	if (err)
				2408	goto unregister_ioctl;
				2409
				2410	return 0;
				2411
				2412	unregister_ioctl:
				2413	btrfs_interface_exit();
				2414	free_end_io_wq:
				2415	btrfs_end_io_wq_exit();
				2416	free_prelim_ref:
				2417	btrfs_prelim_ref_exit();
				2418	free_delayed_ref:
				2419	btrfs_delayed_ref_exit();
				2420	free_auto_defrag:
				2421	btrfs_auto_defrag_exit();
				2422	free_delayed_inode:
				2423	btrfs_delayed_inode_exit();
				2424	free_ordered_data:
				2425	ordered_data_exit();
				2426	free_extent_map:
				2427	extent_map_exit();
				2428	free_extent_io:
				2429	extent_io_exit();
				2430	free_cachep:
				2431	btrfs_destroy_cachep();
				2432	free_compress:
				2433	btrfs_exit_compress();
				2434	btrfs_exit_sysfs();
				2435	free_hash:
				2436	btrfs_hash_exit();
				2437	return err;
				2438	}
				2439
				2440	static void __exit exit_btrfs_fs(void)
				2441	{
				2442	btrfs_destroy_cachep();
				2443	btrfs_delayed_ref_exit();
				2444	btrfs_auto_defrag_exit();
				2445	btrfs_delayed_inode_exit();
				2446	btrfs_prelim_ref_exit();
				2447	ordered_data_exit();
				2448	extent_map_exit();
				2449	extent_io_exit();
				2450	btrfs_interface_exit();
				2451	btrfs_end_io_wq_exit();
				2452	unregister_filesystem(&btrfs_fs_type);
				2453	btrfs_exit_sysfs();
				2454	btrfs_cleanup_fs_uuids();
				2455	btrfs_exit_compress();
				2456	btrfs_hash_exit();
				2457	}
				2458
				2459	late_initcall(init_btrfs_fs);
				2460	module_exit(exit_btrfs_fs)
				2461
				2462	MODULE_LICENSE("GPL");