Blame - src/kernel/linux/v4.19/fs/btrfs/qgroup.c - T800

blob: 0cd043f03081e1ad3df197b6bdbd1273d53bf9ad [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright (C) 2011 STRATO. All rights reserved.
				4	*/
				5
				6	#include <linux/sched.h>
				7	#include <linux/pagemap.h>
				8	#include <linux/writeback.h>
				9	#include <linux/blkdev.h>
				10	#include <linux/rbtree.h>
				11	#include <linux/slab.h>
				12	#include <linux/workqueue.h>
				13	#include <linux/btrfs.h>
				14	#include <linux/sizes.h>
				15
				16	#include "ctree.h"
				17	#include "transaction.h"
				18	#include "disk-io.h"
				19	#include "locking.h"
				20	#include "ulist.h"
				21	#include "backref.h"
				22	#include "extent_io.h"
				23	#include "qgroup.h"
				24
				25
				26	/* TODO XXX FIXME
				27	* - subvol delete -> delete when ref goes to 0? delete limits also?
				28	* - reorganize keys
				29	* - compressed
				30	* - sync
				31	* - copy also limits on subvol creation
				32	* - limit
				33	* - caches fuer ulists
				34	* - performance benchmarks
				35	* - check all ioctl parameters
				36	*/
				37
				38	/*
				39	* Helpers to access qgroup reservation
				40	*
				41	* Callers should ensure the lock context and type are valid
				42	*/
				43
				44	static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
				45	{
				46	u64 ret = 0;
				47	int i;
				48
				49	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
				50	ret += qgroup->rsv.values[i];
				51
				52	return ret;
				53	}
				54
				55	#ifdef CONFIG_BTRFS_DEBUG
				56	static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
				57	{
				58	if (type == BTRFS_QGROUP_RSV_DATA)
				59	return "data";
				60	if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
				61	return "meta_pertrans";
				62	if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
				63	return "meta_prealloc";
				64	return NULL;
				65	}
				66	#endif
				67
				68	static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
				69	struct btrfs_qgroup *qgroup, u64 num_bytes,
				70	enum btrfs_qgroup_rsv_type type)
				71	{
				72	trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
				73	qgroup->rsv.values[type] += num_bytes;
				74	}
				75
				76	static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
				77	struct btrfs_qgroup *qgroup, u64 num_bytes,
				78	enum btrfs_qgroup_rsv_type type)
				79	{
				80	trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
				81	if (qgroup->rsv.values[type] >= num_bytes) {
				82	qgroup->rsv.values[type] -= num_bytes;
				83	return;
				84	}
				85	#ifdef CONFIG_BTRFS_DEBUG
				86	WARN_RATELIMIT(1,
				87	"qgroup %llu %s reserved space underflow, have %llu to free %llu",
				88	qgroup->qgroupid, qgroup_rsv_type_str(type),
				89	qgroup->rsv.values[type], num_bytes);
				90	#endif
				91	qgroup->rsv.values[type] = 0;
				92	}
				93
				94	static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
				95	struct btrfs_qgroup *dest,
				96	struct btrfs_qgroup *src)
				97	{
				98	int i;
				99
				100	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
				101	qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
				102	}
				103
				104	static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
				105	struct btrfs_qgroup *dest,
				106	struct btrfs_qgroup *src)
				107	{
				108	int i;
				109
				110	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
				111	qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
				112	}
				113
				114	static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
				115	int mod)
				116	{
				117	if (qg->old_refcnt < seq)
				118	qg->old_refcnt = seq;
				119	qg->old_refcnt += mod;
				120	}
				121
				122	static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
				123	int mod)
				124	{
				125	if (qg->new_refcnt < seq)
				126	qg->new_refcnt = seq;
				127	qg->new_refcnt += mod;
				128	}
				129
				130	static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
				131	{
				132	if (qg->old_refcnt < seq)
				133	return 0;
				134	return qg->old_refcnt - seq;
				135	}
				136
				137	static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
				138	{
				139	if (qg->new_refcnt < seq)
				140	return 0;
				141	return qg->new_refcnt - seq;
				142	}
				143
				144	/*
				145	* glue structure to represent the relations between qgroups.
				146	*/
				147	struct btrfs_qgroup_list {
				148	struct list_head next_group;
				149	struct list_head next_member;
				150	struct btrfs_qgroup *group;
				151	struct btrfs_qgroup *member;
				152	};
				153
				154	static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg)
				155	{
				156	return (u64)(uintptr_t)qg;
				157	}
				158
				159	static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
				160	{
				161	return (struct btrfs_qgroup *)(uintptr_t)n->aux;
				162	}
				163
				164	static int
				165	qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
				166	int init_flags);
				167	static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
				168
				169	/* must be called with qgroup_ioctl_lock held */
				170	static struct btrfs_qgroup find_qgroup_rb(struct btrfs_fs_info fs_info,
				171	u64 qgroupid)
				172	{
				173	struct rb_node *n = fs_info->qgroup_tree.rb_node;
				174	struct btrfs_qgroup *qgroup;
				175
				176	while (n) {
				177	qgroup = rb_entry(n, struct btrfs_qgroup, node);
				178	if (qgroup->qgroupid < qgroupid)
				179	n = n->rb_left;
				180	else if (qgroup->qgroupid > qgroupid)
				181	n = n->rb_right;
				182	else
				183	return qgroup;
				184	}
				185	return NULL;
				186	}
				187
				188	/* must be called with qgroup_lock held */
				189	static struct btrfs_qgroup add_qgroup_rb(struct btrfs_fs_info fs_info,
				190	u64 qgroupid)
				191	{
				192	struct rb_node **p = &fs_info->qgroup_tree.rb_node;
				193	struct rb_node *parent = NULL;
				194	struct btrfs_qgroup *qgroup;
				195
				196	while (*p) {
				197	parent = *p;
				198	qgroup = rb_entry(parent, struct btrfs_qgroup, node);
				199
				200	if (qgroup->qgroupid < qgroupid)
				201	p = &(*p)->rb_left;
				202	else if (qgroup->qgroupid > qgroupid)
				203	p = &(*p)->rb_right;
				204	else
				205	return qgroup;
				206	}
				207
				208	qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
				209	if (!qgroup)
				210	return ERR_PTR(-ENOMEM);
				211
				212	qgroup->qgroupid = qgroupid;
				213	INIT_LIST_HEAD(&qgroup->groups);
				214	INIT_LIST_HEAD(&qgroup->members);
				215	INIT_LIST_HEAD(&qgroup->dirty);
				216
				217	rb_link_node(&qgroup->node, parent, p);
				218	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
				219
				220	return qgroup;
				221	}
				222
				223	static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
				224	{
				225	struct btrfs_qgroup_list *list;
				226
				227	list_del(&qgroup->dirty);
				228	while (!list_empty(&qgroup->groups)) {
				229	list = list_first_entry(&qgroup->groups,
				230	struct btrfs_qgroup_list, next_group);
				231	list_del(&list->next_group);
				232	list_del(&list->next_member);
				233	kfree(list);
				234	}
				235
				236	while (!list_empty(&qgroup->members)) {
				237	list = list_first_entry(&qgroup->members,
				238	struct btrfs_qgroup_list, next_member);
				239	list_del(&list->next_group);
				240	list_del(&list->next_member);
				241	kfree(list);
				242	}
				243	kfree(qgroup);
				244	}
				245
				246	/* must be called with qgroup_lock held */
				247	static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
				248	{
				249	struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
				250
				251	if (!qgroup)
				252	return -ENOENT;
				253
				254	rb_erase(&qgroup->node, &fs_info->qgroup_tree);
				255	__del_qgroup_rb(qgroup);
				256	return 0;
				257	}
				258
				259	/* must be called with qgroup_lock held */
				260	static int add_relation_rb(struct btrfs_fs_info *fs_info,
				261	u64 memberid, u64 parentid)
				262	{
				263	struct btrfs_qgroup *member;
				264	struct btrfs_qgroup *parent;
				265	struct btrfs_qgroup_list *list;
				266
				267	member = find_qgroup_rb(fs_info, memberid);
				268	parent = find_qgroup_rb(fs_info, parentid);
				269	if (!member \|\| !parent)
				270	return -ENOENT;
				271
				272	list = kzalloc(sizeof(*list), GFP_ATOMIC);
				273	if (!list)
				274	return -ENOMEM;
				275
				276	list->group = parent;
				277	list->member = member;
				278	list_add_tail(&list->next_group, &member->groups);
				279	list_add_tail(&list->next_member, &parent->members);
				280
				281	return 0;
				282	}
				283
				284	/* must be called with qgroup_lock held */
				285	static int del_relation_rb(struct btrfs_fs_info *fs_info,
				286	u64 memberid, u64 parentid)
				287	{
				288	struct btrfs_qgroup *member;
				289	struct btrfs_qgroup *parent;
				290	struct btrfs_qgroup_list *list;
				291
				292	member = find_qgroup_rb(fs_info, memberid);
				293	parent = find_qgroup_rb(fs_info, parentid);
				294	if (!member \|\| !parent)
				295	return -ENOENT;
				296
				297	list_for_each_entry(list, &member->groups, next_group) {
				298	if (list->group == parent) {
				299	list_del(&list->next_group);
				300	list_del(&list->next_member);
				301	kfree(list);
				302	return 0;
				303	}
				304	}
				305	return -ENOENT;
				306	}
				307
				308	#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
				309	int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
				310	u64 rfer, u64 excl)
				311	{
				312	struct btrfs_qgroup *qgroup;
				313
				314	qgroup = find_qgroup_rb(fs_info, qgroupid);
				315	if (!qgroup)
				316	return -EINVAL;
				317	if (qgroup->rfer != rfer \|\| qgroup->excl != excl)
				318	return -EINVAL;
				319	return 0;
				320	}
				321	#endif
				322
				323	/*
				324	* The full config is read in one go, only called from open_ctree()
				325	* It doesn't use any locking, as at this point we're still single-threaded
				326	*/
				327	int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
				328	{
				329	struct btrfs_key key;
				330	struct btrfs_key found_key;
				331	struct btrfs_root *quota_root = fs_info->quota_root;
				332	struct btrfs_path *path = NULL;
				333	struct extent_buffer *l;
				334	int slot;
				335	int ret = 0;
				336	u64 flags = 0;
				337	u64 rescan_progress = 0;
				338
				339	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				340	return 0;
				341
				342	fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
				343	if (!fs_info->qgroup_ulist) {
				344	ret = -ENOMEM;
				345	goto out;
				346	}
				347
				348	path = btrfs_alloc_path();
				349	if (!path) {
				350	ret = -ENOMEM;
				351	goto out;
				352	}
				353
				354	/* default this to quota off, in case no status key is found */
				355	fs_info->qgroup_flags = 0;
				356
				357	/*
				358	* pass 1: read status, all qgroup infos and limits
				359	*/
				360	key.objectid = 0;
				361	key.type = 0;
				362	key.offset = 0;
				363	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
				364	if (ret)
				365	goto out;
				366
				367	while (1) {
				368	struct btrfs_qgroup *qgroup;
				369
				370	slot = path->slots[0];
				371	l = path->nodes[0];
				372	btrfs_item_key_to_cpu(l, &found_key, slot);
				373
				374	if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
				375	struct btrfs_qgroup_status_item *ptr;
				376
				377	ptr = btrfs_item_ptr(l, slot,
				378	struct btrfs_qgroup_status_item);
				379
				380	if (btrfs_qgroup_status_version(l, ptr) !=
				381	BTRFS_QGROUP_STATUS_VERSION) {
				382	btrfs_err(fs_info,
				383	"old qgroup version, quota disabled");
				384	goto out;
				385	}
				386	if (btrfs_qgroup_status_generation(l, ptr) !=
				387	fs_info->generation) {
				388	flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				389	btrfs_err(fs_info,
				390	"qgroup generation mismatch, marked as inconsistent");
				391	}
				392	fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
				393	ptr);
				394	rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
				395	goto next1;
				396	}
				397
				398	if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
				399	found_key.type != BTRFS_QGROUP_LIMIT_KEY)
				400	goto next1;
				401
				402	qgroup = find_qgroup_rb(fs_info, found_key.offset);
				403	if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) \|\|
				404	(!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
				405	btrfs_err(fs_info, "inconsistent qgroup config");
				406	flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				407	}
				408	if (!qgroup) {
				409	qgroup = add_qgroup_rb(fs_info, found_key.offset);
				410	if (IS_ERR(qgroup)) {
				411	ret = PTR_ERR(qgroup);
				412	goto out;
				413	}
				414	}
				415	switch (found_key.type) {
				416	case BTRFS_QGROUP_INFO_KEY: {
				417	struct btrfs_qgroup_info_item *ptr;
				418
				419	ptr = btrfs_item_ptr(l, slot,
				420	struct btrfs_qgroup_info_item);
				421	qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
				422	qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
				423	qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
				424	qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
				425	/* generation currently unused */
				426	break;
				427	}
				428	case BTRFS_QGROUP_LIMIT_KEY: {
				429	struct btrfs_qgroup_limit_item *ptr;
				430
				431	ptr = btrfs_item_ptr(l, slot,
				432	struct btrfs_qgroup_limit_item);
				433	qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
				434	qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
				435	qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
				436	qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
				437	qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
				438	break;
				439	}
				440	}
				441	next1:
				442	ret = btrfs_next_item(quota_root, path);
				443	if (ret < 0)
				444	goto out;
				445	if (ret)
				446	break;
				447	}
				448	btrfs_release_path(path);
				449
				450	/*
				451	* pass 2: read all qgroup relations
				452	*/
				453	key.objectid = 0;
				454	key.type = BTRFS_QGROUP_RELATION_KEY;
				455	key.offset = 0;
				456	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
				457	if (ret)
				458	goto out;
				459	while (1) {
				460	slot = path->slots[0];
				461	l = path->nodes[0];
				462	btrfs_item_key_to_cpu(l, &found_key, slot);
				463
				464	if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
				465	goto next2;
				466
				467	if (found_key.objectid > found_key.offset) {
				468	/* parent <- member, not needed to build config */
				469	/* FIXME should we omit the key completely? */
				470	goto next2;
				471	}
				472
				473	ret = add_relation_rb(fs_info, found_key.objectid,
				474	found_key.offset);
				475	if (ret == -ENOENT) {
				476	btrfs_warn(fs_info,
				477	"orphan qgroup relation 0x%llx->0x%llx",
				478	found_key.objectid, found_key.offset);
				479	ret = 0; /* ignore the error */
				480	}
				481	if (ret)
				482	goto out;
				483	next2:
				484	ret = btrfs_next_item(quota_root, path);
				485	if (ret < 0)
				486	goto out;
				487	if (ret)
				488	break;
				489	}
				490	out:
				491	fs_info->qgroup_flags \|= flags;
				492	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
				493	clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
				494	else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
				495	ret >= 0)
				496	ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
				497	btrfs_free_path(path);
				498
				499	if (ret < 0) {
				500	ulist_free(fs_info->qgroup_ulist);
				501	fs_info->qgroup_ulist = NULL;
				502	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				503	}
				504
				505	return ret < 0 ? ret : 0;
				506	}
				507
				508	/*
				509	* This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
				510	* first two are in single-threaded paths.And for the third one, we have set
				511	* quota_root to be null with qgroup_lock held before, so it is safe to clean
				512	* up the in-memory structures without qgroup_lock held.
				513	*/
				514	void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
				515	{
				516	struct rb_node *n;
				517	struct btrfs_qgroup *qgroup;
				518
				519	while ((n = rb_first(&fs_info->qgroup_tree))) {
				520	qgroup = rb_entry(n, struct btrfs_qgroup, node);
				521	rb_erase(n, &fs_info->qgroup_tree);
				522	__del_qgroup_rb(qgroup);
				523	}
				524	/*
				525	* we call btrfs_free_qgroup_config() when umounting
				526	* filesystem and disabling quota, so we set qgroup_ulist
				527	* to be null here to avoid double free.
				528	*/
				529	ulist_free(fs_info->qgroup_ulist);
				530	fs_info->qgroup_ulist = NULL;
				531	}
				532
				533	static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
				534	u64 dst)
				535	{
				536	int ret;
				537	struct btrfs_root *quota_root = trans->fs_info->quota_root;
				538	struct btrfs_path *path;
				539	struct btrfs_key key;
				540
				541	path = btrfs_alloc_path();
				542	if (!path)
				543	return -ENOMEM;
				544
				545	key.objectid = src;
				546	key.type = BTRFS_QGROUP_RELATION_KEY;
				547	key.offset = dst;
				548
				549	ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
				550
				551	btrfs_mark_buffer_dirty(path->nodes[0]);
				552
				553	btrfs_free_path(path);
				554	return ret;
				555	}
				556
				557	static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
				558	u64 dst)
				559	{
				560	int ret;
				561	struct btrfs_root *quota_root = trans->fs_info->quota_root;
				562	struct btrfs_path *path;
				563	struct btrfs_key key;
				564
				565	path = btrfs_alloc_path();
				566	if (!path)
				567	return -ENOMEM;
				568
				569	key.objectid = src;
				570	key.type = BTRFS_QGROUP_RELATION_KEY;
				571	key.offset = dst;
				572
				573	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
				574	if (ret < 0)
				575	goto out;
				576
				577	if (ret > 0) {
				578	ret = -ENOENT;
				579	goto out;
				580	}
				581
				582	ret = btrfs_del_item(trans, quota_root, path);
				583	out:
				584	btrfs_free_path(path);
				585	return ret;
				586	}
				587
				588	static int add_qgroup_item(struct btrfs_trans_handle *trans,
				589	struct btrfs_root *quota_root, u64 qgroupid)
				590	{
				591	int ret;
				592	struct btrfs_path *path;
				593	struct btrfs_qgroup_info_item *qgroup_info;
				594	struct btrfs_qgroup_limit_item *qgroup_limit;
				595	struct extent_buffer *leaf;
				596	struct btrfs_key key;
				597
				598	if (btrfs_is_testing(quota_root->fs_info))
				599	return 0;
				600
				601	path = btrfs_alloc_path();
				602	if (!path)
				603	return -ENOMEM;
				604
				605	key.objectid = 0;
				606	key.type = BTRFS_QGROUP_INFO_KEY;
				607	key.offset = qgroupid;
				608
				609	/*
				610	* Avoid a transaction abort by catching -EEXIST here. In that
				611	* case, we proceed by re-initializing the existing structure
				612	* on disk.
				613	*/
				614
				615	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
				616	sizeof(*qgroup_info));
				617	if (ret && ret != -EEXIST)
				618	goto out;
				619
				620	leaf = path->nodes[0];
				621	qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
				622	struct btrfs_qgroup_info_item);
				623	btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
				624	btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
				625	btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
				626	btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
				627	btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
				628
				629	btrfs_mark_buffer_dirty(leaf);
				630
				631	btrfs_release_path(path);
				632
				633	key.type = BTRFS_QGROUP_LIMIT_KEY;
				634	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
				635	sizeof(*qgroup_limit));
				636	if (ret && ret != -EEXIST)
				637	goto out;
				638
				639	leaf = path->nodes[0];
				640	qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
				641	struct btrfs_qgroup_limit_item);
				642	btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
				643	btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
				644	btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
				645	btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
				646	btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
				647
				648	btrfs_mark_buffer_dirty(leaf);
				649
				650	ret = 0;
				651	out:
				652	btrfs_free_path(path);
				653	return ret;
				654	}
				655
				656	static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
				657	{
				658	int ret;
				659	struct btrfs_root *quota_root = trans->fs_info->quota_root;
				660	struct btrfs_path *path;
				661	struct btrfs_key key;
				662
				663	path = btrfs_alloc_path();
				664	if (!path)
				665	return -ENOMEM;
				666
				667	key.objectid = 0;
				668	key.type = BTRFS_QGROUP_INFO_KEY;
				669	key.offset = qgroupid;
				670	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
				671	if (ret < 0)
				672	goto out;
				673
				674	if (ret > 0) {
				675	ret = -ENOENT;
				676	goto out;
				677	}
				678
				679	ret = btrfs_del_item(trans, quota_root, path);
				680	if (ret)
				681	goto out;
				682
				683	btrfs_release_path(path);
				684
				685	key.type = BTRFS_QGROUP_LIMIT_KEY;
				686	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
				687	if (ret < 0)
				688	goto out;
				689
				690	if (ret > 0) {
				691	ret = -ENOENT;
				692	goto out;
				693	}
				694
				695	ret = btrfs_del_item(trans, quota_root, path);
				696
				697	out:
				698	btrfs_free_path(path);
				699	return ret;
				700	}
				701
				702	static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
				703	struct btrfs_qgroup *qgroup)
				704	{
				705	struct btrfs_root *quota_root = trans->fs_info->quota_root;
				706	struct btrfs_path *path;
				707	struct btrfs_key key;
				708	struct extent_buffer *l;
				709	struct btrfs_qgroup_limit_item *qgroup_limit;
				710	int ret;
				711	int slot;
				712
				713	key.objectid = 0;
				714	key.type = BTRFS_QGROUP_LIMIT_KEY;
				715	key.offset = qgroup->qgroupid;
				716
				717	path = btrfs_alloc_path();
				718	if (!path)
				719	return -ENOMEM;
				720
				721	ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
				722	if (ret > 0)
				723	ret = -ENOENT;
				724
				725	if (ret)
				726	goto out;
				727
				728	l = path->nodes[0];
				729	slot = path->slots[0];
				730	qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
				731	btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
				732	btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
				733	btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
				734	btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
				735	btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
				736
				737	btrfs_mark_buffer_dirty(l);
				738
				739	out:
				740	btrfs_free_path(path);
				741	return ret;
				742	}
				743
				744	static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
				745	struct btrfs_qgroup *qgroup)
				746	{
				747	struct btrfs_fs_info *fs_info = trans->fs_info;
				748	struct btrfs_root *quota_root = fs_info->quota_root;
				749	struct btrfs_path *path;
				750	struct btrfs_key key;
				751	struct extent_buffer *l;
				752	struct btrfs_qgroup_info_item *qgroup_info;
				753	int ret;
				754	int slot;
				755
				756	if (btrfs_is_testing(fs_info))
				757	return 0;
				758
				759	key.objectid = 0;
				760	key.type = BTRFS_QGROUP_INFO_KEY;
				761	key.offset = qgroup->qgroupid;
				762
				763	path = btrfs_alloc_path();
				764	if (!path)
				765	return -ENOMEM;
				766
				767	ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
				768	if (ret > 0)
				769	ret = -ENOENT;
				770
				771	if (ret)
				772	goto out;
				773
				774	l = path->nodes[0];
				775	slot = path->slots[0];
				776	qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
				777	btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
				778	btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
				779	btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
				780	btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
				781	btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
				782
				783	btrfs_mark_buffer_dirty(l);
				784
				785	out:
				786	btrfs_free_path(path);
				787	return ret;
				788	}
				789
				790	static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
				791	{
				792	struct btrfs_fs_info *fs_info = trans->fs_info;
				793	struct btrfs_root *quota_root = fs_info->quota_root;
				794	struct btrfs_path *path;
				795	struct btrfs_key key;
				796	struct extent_buffer *l;
				797	struct btrfs_qgroup_status_item *ptr;
				798	int ret;
				799	int slot;
				800
				801	key.objectid = 0;
				802	key.type = BTRFS_QGROUP_STATUS_KEY;
				803	key.offset = 0;
				804
				805	path = btrfs_alloc_path();
				806	if (!path)
				807	return -ENOMEM;
				808
				809	ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
				810	if (ret > 0)
				811	ret = -ENOENT;
				812
				813	if (ret)
				814	goto out;
				815
				816	l = path->nodes[0];
				817	slot = path->slots[0];
				818	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
				819	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
				820	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
				821	btrfs_set_qgroup_status_rescan(l, ptr,
				822	fs_info->qgroup_rescan_progress.objectid);
				823
				824	btrfs_mark_buffer_dirty(l);
				825
				826	out:
				827	btrfs_free_path(path);
				828	return ret;
				829	}
				830
				831	/*
				832	* called with qgroup_lock held
				833	*/
				834	static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
				835	struct btrfs_root *root)
				836	{
				837	struct btrfs_path *path;
				838	struct btrfs_key key;
				839	struct extent_buffer *leaf = NULL;
				840	int ret;
				841	int nr = 0;
				842
				843	path = btrfs_alloc_path();
				844	if (!path)
				845	return -ENOMEM;
				846
				847	path->leave_spinning = 1;
				848
				849	key.objectid = 0;
				850	key.offset = 0;
				851	key.type = 0;
				852
				853	while (1) {
				854	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				855	if (ret < 0)
				856	goto out;
				857	leaf = path->nodes[0];
				858	nr = btrfs_header_nritems(leaf);
				859	if (!nr)
				860	break;
				861	/*
				862	* delete the leaf one by one
				863	* since the whole tree is going
				864	* to be deleted.
				865	*/
				866	path->slots[0] = 0;
				867	ret = btrfs_del_items(trans, root, path, 0, nr);
				868	if (ret)
				869	goto out;
				870
				871	btrfs_release_path(path);
				872	}
				873	ret = 0;
				874	out:
				875	btrfs_free_path(path);
				876	return ret;
				877	}
				878
				879	int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
				880	{
				881	struct btrfs_root *quota_root;
				882	struct btrfs_root *tree_root = fs_info->tree_root;
				883	struct btrfs_path *path = NULL;
				884	struct btrfs_qgroup_status_item *ptr;
				885	struct extent_buffer *leaf;
				886	struct btrfs_key key;
				887	struct btrfs_key found_key;
				888	struct btrfs_qgroup *qgroup = NULL;
				889	struct btrfs_trans_handle *trans = NULL;
				890	int ret = 0;
				891	int slot;
				892
				893	mutex_lock(&fs_info->qgroup_ioctl_lock);
				894	if (fs_info->quota_root)
				895	goto out;
				896
				897	/*
				898	* 1 for quota root item
				899	* 1 for BTRFS_QGROUP_STATUS item
				900	*
				901	* Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
				902	* per subvolume. However those are not currently reserved since it
				903	* would be a lot of overkill.
				904	*/
				905	trans = btrfs_start_transaction(tree_root, 2);
				906	if (IS_ERR(trans)) {
				907	ret = PTR_ERR(trans);
				908	trans = NULL;
				909	goto out;
				910	}
				911
				912	fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
				913	if (!fs_info->qgroup_ulist) {
				914	ret = -ENOMEM;
				915	btrfs_abort_transaction(trans, ret);
				916	goto out;
				917	}
				918
				919	/*
				920	* initially create the quota tree
				921	*/
				922	quota_root = btrfs_create_tree(trans, fs_info,
				923	BTRFS_QUOTA_TREE_OBJECTID);
				924	if (IS_ERR(quota_root)) {
				925	ret = PTR_ERR(quota_root);
				926	btrfs_abort_transaction(trans, ret);
				927	goto out;
				928	}
				929
				930	path = btrfs_alloc_path();
				931	if (!path) {
				932	ret = -ENOMEM;
				933	btrfs_abort_transaction(trans, ret);
				934	goto out_free_root;
				935	}
				936
				937	key.objectid = 0;
				938	key.type = BTRFS_QGROUP_STATUS_KEY;
				939	key.offset = 0;
				940
				941	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
				942	sizeof(*ptr));
				943	if (ret) {
				944	btrfs_abort_transaction(trans, ret);
				945	goto out_free_path;
				946	}
				947
				948	leaf = path->nodes[0];
				949	ptr = btrfs_item_ptr(leaf, path->slots[0],
				950	struct btrfs_qgroup_status_item);
				951	btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
				952	btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
				953	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON \|
				954	BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				955	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
				956	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
				957
				958	btrfs_mark_buffer_dirty(leaf);
				959
				960	key.objectid = 0;
				961	key.type = BTRFS_ROOT_REF_KEY;
				962	key.offset = 0;
				963
				964	btrfs_release_path(path);
				965	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
				966	if (ret > 0)
				967	goto out_add_root;
				968	if (ret < 0) {
				969	btrfs_abort_transaction(trans, ret);
				970	goto out_free_path;
				971	}
				972
				973	while (1) {
				974	slot = path->slots[0];
				975	leaf = path->nodes[0];
				976	btrfs_item_key_to_cpu(leaf, &found_key, slot);
				977
				978	if (found_key.type == BTRFS_ROOT_REF_KEY) {
				979	ret = add_qgroup_item(trans, quota_root,
				980	found_key.offset);
				981	if (ret) {
				982	btrfs_abort_transaction(trans, ret);
				983	goto out_free_path;
				984	}
				985
				986	qgroup = add_qgroup_rb(fs_info, found_key.offset);
				987	if (IS_ERR(qgroup)) {
				988	ret = PTR_ERR(qgroup);
				989	btrfs_abort_transaction(trans, ret);
				990	goto out_free_path;
				991	}
				992	}
				993	ret = btrfs_next_item(tree_root, path);
				994	if (ret < 0) {
				995	btrfs_abort_transaction(trans, ret);
				996	goto out_free_path;
				997	}
				998	if (ret)
				999	break;
				1000	}
				1001
				1002	out_add_root:
				1003	btrfs_release_path(path);
				1004	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
				1005	if (ret) {
				1006	btrfs_abort_transaction(trans, ret);
				1007	goto out_free_path;
				1008	}
				1009
				1010	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
				1011	if (IS_ERR(qgroup)) {
				1012	ret = PTR_ERR(qgroup);
				1013	btrfs_abort_transaction(trans, ret);
				1014	goto out_free_path;
				1015	}
				1016
				1017	ret = btrfs_commit_transaction(trans);
				1018	trans = NULL;
				1019	if (ret)
				1020	goto out_free_path;
				1021
				1022	/*
				1023	* Set quota enabled flag after committing the transaction, to avoid
				1024	* deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot
				1025	* creation.
				1026	*/
				1027	spin_lock(&fs_info->qgroup_lock);
				1028	fs_info->quota_root = quota_root;
				1029	set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
				1030	spin_unlock(&fs_info->qgroup_lock);
				1031
				1032	ret = qgroup_rescan_init(fs_info, 0, 1);
				1033	if (!ret) {
				1034	qgroup_rescan_zero_tracking(fs_info);
				1035	btrfs_queue_work(fs_info->qgroup_rescan_workers,
				1036	&fs_info->qgroup_rescan_work);
				1037	}
				1038
				1039	out_free_path:
				1040	btrfs_free_path(path);
				1041	out_free_root:
				1042	if (ret) {
				1043	free_extent_buffer(quota_root->node);
				1044	free_extent_buffer(quota_root->commit_root);
				1045	kfree(quota_root);
				1046	}
				1047	out:
				1048	if (ret) {
				1049	ulist_free(fs_info->qgroup_ulist);
				1050	fs_info->qgroup_ulist = NULL;
				1051	if (trans)
				1052	btrfs_end_transaction(trans);
				1053	}
				1054	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1055	return ret;
				1056	}
				1057
				1058	int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
				1059	{
				1060	struct btrfs_root *quota_root;
				1061	struct btrfs_trans_handle *trans = NULL;
				1062	int ret = 0;
				1063
				1064	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1065	if (!fs_info->quota_root)
				1066	goto out;
				1067
				1068	/*
				1069	* 1 For the root item
				1070	*
				1071	* We should also reserve enough items for the quota tree deletion in
				1072	* btrfs_clean_quota_tree but this is not done.
				1073	*/
				1074	trans = btrfs_start_transaction(fs_info->tree_root, 1);
				1075	if (IS_ERR(trans)) {
				1076	ret = PTR_ERR(trans);
				1077	goto out;
				1078	}
				1079
				1080	clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
				1081	btrfs_qgroup_wait_for_completion(fs_info, false);
				1082	spin_lock(&fs_info->qgroup_lock);
				1083	quota_root = fs_info->quota_root;
				1084	fs_info->quota_root = NULL;
				1085	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
				1086	spin_unlock(&fs_info->qgroup_lock);
				1087
				1088	btrfs_free_qgroup_config(fs_info);
				1089
				1090	ret = btrfs_clean_quota_tree(trans, quota_root);
				1091	if (ret) {
				1092	btrfs_abort_transaction(trans, ret);
				1093	goto end_trans;
				1094	}
				1095
				1096	ret = btrfs_del_root(trans, &quota_root->root_key);
				1097	if (ret) {
				1098	btrfs_abort_transaction(trans, ret);
				1099	goto end_trans;
				1100	}
				1101
				1102	list_del(&quota_root->dirty_list);
				1103
				1104	btrfs_tree_lock(quota_root->node);
				1105	clean_tree_block(fs_info, quota_root->node);
				1106	btrfs_tree_unlock(quota_root->node);
				1107	btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
				1108
				1109	free_extent_buffer(quota_root->node);
				1110	free_extent_buffer(quota_root->commit_root);
				1111	kfree(quota_root);
				1112
				1113	end_trans:
				1114	ret = btrfs_end_transaction(trans);
				1115	out:
				1116	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1117	return ret;
				1118	}
				1119
				1120	static void qgroup_dirty(struct btrfs_fs_info *fs_info,
				1121	struct btrfs_qgroup *qgroup)
				1122	{
				1123	if (list_empty(&qgroup->dirty))
				1124	list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
				1125	}
				1126
				1127	/*
				1128	* The easy accounting, we're updating qgroup relationship whose child qgroup
				1129	* only has exclusive extents.
				1130	*
				1131	* In this case, all exclsuive extents will also be exlusive for parent, so
				1132	* excl/rfer just get added/removed.
				1133	*
				1134	* So is qgroup reservation space, which should also be added/removed to
				1135	* parent.
				1136	* Or when child tries to release reservation space, parent will underflow its
				1137	* reservation (for relationship adding case).
				1138	*
				1139	* Caller should hold fs_info->qgroup_lock.
				1140	*/
				1141	static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
				1142	struct ulist *tmp, u64 ref_root,
				1143	struct btrfs_qgroup *src, int sign)
				1144	{
				1145	struct btrfs_qgroup *qgroup;
				1146	struct btrfs_qgroup_list *glist;
				1147	struct ulist_node *unode;
				1148	struct ulist_iterator uiter;
				1149	u64 num_bytes = src->excl;
				1150	int ret = 0;
				1151
				1152	qgroup = find_qgroup_rb(fs_info, ref_root);
				1153	if (!qgroup)
				1154	goto out;
				1155
				1156	qgroup->rfer += sign * num_bytes;
				1157	qgroup->rfer_cmpr += sign * num_bytes;
				1158
				1159	WARN_ON(sign < 0 && qgroup->excl < num_bytes);
				1160	qgroup->excl += sign * num_bytes;
				1161	qgroup->excl_cmpr += sign * num_bytes;
				1162
				1163	if (sign > 0)
				1164	qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
				1165	else
				1166	qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
				1167
				1168	qgroup_dirty(fs_info, qgroup);
				1169
				1170	/* Get all of the parent groups that contain this qgroup */
				1171	list_for_each_entry(glist, &qgroup->groups, next_group) {
				1172	ret = ulist_add(tmp, glist->group->qgroupid,
				1173	qgroup_to_aux(glist->group), GFP_ATOMIC);
				1174	if (ret < 0)
				1175	goto out;
				1176	}
				1177
				1178	/* Iterate all of the parents and adjust their reference counts */
				1179	ULIST_ITER_INIT(&uiter);
				1180	while ((unode = ulist_next(tmp, &uiter))) {
				1181	qgroup = unode_aux_to_qgroup(unode);
				1182	qgroup->rfer += sign * num_bytes;
				1183	qgroup->rfer_cmpr += sign * num_bytes;
				1184	WARN_ON(sign < 0 && qgroup->excl < num_bytes);
				1185	qgroup->excl += sign * num_bytes;
				1186	if (sign > 0)
				1187	qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
				1188	else
				1189	qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
				1190	qgroup->excl_cmpr += sign * num_bytes;
				1191	qgroup_dirty(fs_info, qgroup);
				1192
				1193	/* Add any parents of the parents */
				1194	list_for_each_entry(glist, &qgroup->groups, next_group) {
				1195	ret = ulist_add(tmp, glist->group->qgroupid,
				1196	qgroup_to_aux(glist->group), GFP_ATOMIC);
				1197	if (ret < 0)
				1198	goto out;
				1199	}
				1200	}
				1201	ret = 0;
				1202	out:
				1203	return ret;
				1204	}
				1205
				1206
				1207	/*
				1208	* Quick path for updating qgroup with only excl refs.
				1209	*
				1210	* In that case, just update all parent will be enough.
				1211	* Or we needs to do a full rescan.
				1212	* Caller should also hold fs_info->qgroup_lock.
				1213	*
				1214	* Return 0 for quick update, return >0 for need to full rescan
				1215	* and mark INCONSISTENT flag.
				1216	* Return < 0 for other error.
				1217	*/
				1218	static int quick_update_accounting(struct btrfs_fs_info *fs_info,
				1219	struct ulist *tmp, u64 src, u64 dst,
				1220	int sign)
				1221	{
				1222	struct btrfs_qgroup *qgroup;
				1223	int ret = 1;
				1224	int err = 0;
				1225
				1226	qgroup = find_qgroup_rb(fs_info, src);
				1227	if (!qgroup)
				1228	goto out;
				1229	if (qgroup->excl == qgroup->rfer) {
				1230	ret = 0;
				1231	err = __qgroup_excl_accounting(fs_info, tmp, dst,
				1232	qgroup, sign);
				1233	if (err < 0) {
				1234	ret = err;
				1235	goto out;
				1236	}
				1237	}
				1238	out:
				1239	if (ret)
				1240	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				1241	return ret;
				1242	}
				1243
				1244	int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
				1245	u64 dst)
				1246	{
				1247	struct btrfs_fs_info *fs_info = trans->fs_info;
				1248	struct btrfs_root *quota_root;
				1249	struct btrfs_qgroup *parent;
				1250	struct btrfs_qgroup *member;
				1251	struct btrfs_qgroup_list *list;
				1252	struct ulist *tmp;
				1253	int ret = 0;
				1254
				1255	/* Check the level of src and dst first */
				1256	if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
				1257	return -EINVAL;
				1258
				1259	tmp = ulist_alloc(GFP_KERNEL);
				1260	if (!tmp)
				1261	return -ENOMEM;
				1262
				1263	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1264	quota_root = fs_info->quota_root;
				1265	if (!quota_root) {
				1266	ret = -EINVAL;
				1267	goto out;
				1268	}
				1269	member = find_qgroup_rb(fs_info, src);
				1270	parent = find_qgroup_rb(fs_info, dst);
				1271	if (!member \|\| !parent) {
				1272	ret = -EINVAL;
				1273	goto out;
				1274	}
				1275
				1276	/* check if such qgroup relation exist firstly */
				1277	list_for_each_entry(list, &member->groups, next_group) {
				1278	if (list->group == parent) {
				1279	ret = -EEXIST;
				1280	goto out;
				1281	}
				1282	}
				1283
				1284	ret = add_qgroup_relation_item(trans, src, dst);
				1285	if (ret)
				1286	goto out;
				1287
				1288	ret = add_qgroup_relation_item(trans, dst, src);
				1289	if (ret) {
				1290	del_qgroup_relation_item(trans, src, dst);
				1291	goto out;
				1292	}
				1293
				1294	spin_lock(&fs_info->qgroup_lock);
				1295	ret = add_relation_rb(fs_info, src, dst);
				1296	if (ret < 0) {
				1297	spin_unlock(&fs_info->qgroup_lock);
				1298	goto out;
				1299	}
				1300	ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
				1301	spin_unlock(&fs_info->qgroup_lock);
				1302	out:
				1303	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1304	ulist_free(tmp);
				1305	return ret;
				1306	}
				1307
				1308	static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
				1309	u64 dst)
				1310	{
				1311	struct btrfs_fs_info *fs_info = trans->fs_info;
				1312	struct btrfs_root *quota_root;
				1313	struct btrfs_qgroup *parent;
				1314	struct btrfs_qgroup *member;
				1315	struct btrfs_qgroup_list *list;
				1316	struct ulist *tmp;
				1317	int ret = 0;
				1318	int err;
				1319
				1320	tmp = ulist_alloc(GFP_KERNEL);
				1321	if (!tmp)
				1322	return -ENOMEM;
				1323
				1324	quota_root = fs_info->quota_root;
				1325	if (!quota_root) {
				1326	ret = -EINVAL;
				1327	goto out;
				1328	}
				1329
				1330	member = find_qgroup_rb(fs_info, src);
				1331	parent = find_qgroup_rb(fs_info, dst);
				1332	if (!member \|\| !parent) {
				1333	ret = -EINVAL;
				1334	goto out;
				1335	}
				1336
				1337	/* check if such qgroup relation exist firstly */
				1338	list_for_each_entry(list, &member->groups, next_group) {
				1339	if (list->group == parent)
				1340	goto exist;
				1341	}
				1342	ret = -ENOENT;
				1343	goto out;
				1344	exist:
				1345	ret = del_qgroup_relation_item(trans, src, dst);
				1346	err = del_qgroup_relation_item(trans, dst, src);
				1347	if (err && !ret)
				1348	ret = err;
				1349
				1350	spin_lock(&fs_info->qgroup_lock);
				1351	del_relation_rb(fs_info, src, dst);
				1352	ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
				1353	spin_unlock(&fs_info->qgroup_lock);
				1354	out:
				1355	ulist_free(tmp);
				1356	return ret;
				1357	}
				1358
				1359	int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
				1360	u64 dst)
				1361	{
				1362	struct btrfs_fs_info *fs_info = trans->fs_info;
				1363	int ret = 0;
				1364
				1365	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1366	ret = __del_qgroup_relation(trans, src, dst);
				1367	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1368
				1369	return ret;
				1370	}
				1371
				1372	int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
				1373	{
				1374	struct btrfs_fs_info *fs_info = trans->fs_info;
				1375	struct btrfs_root *quota_root;
				1376	struct btrfs_qgroup *qgroup;
				1377	int ret = 0;
				1378
				1379	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1380	quota_root = fs_info->quota_root;
				1381	if (!quota_root) {
				1382	ret = -EINVAL;
				1383	goto out;
				1384	}
				1385	qgroup = find_qgroup_rb(fs_info, qgroupid);
				1386	if (qgroup) {
				1387	ret = -EEXIST;
				1388	goto out;
				1389	}
				1390
				1391	ret = add_qgroup_item(trans, quota_root, qgroupid);
				1392	if (ret)
				1393	goto out;
				1394
				1395	spin_lock(&fs_info->qgroup_lock);
				1396	qgroup = add_qgroup_rb(fs_info, qgroupid);
				1397	spin_unlock(&fs_info->qgroup_lock);
				1398
				1399	if (IS_ERR(qgroup))
				1400	ret = PTR_ERR(qgroup);
				1401	out:
				1402	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1403	return ret;
				1404	}
				1405
				1406	int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
				1407	{
				1408	struct btrfs_fs_info *fs_info = trans->fs_info;
				1409	struct btrfs_root *quota_root;
				1410	struct btrfs_qgroup *qgroup;
				1411	struct btrfs_qgroup_list *list;
				1412	int ret = 0;
				1413
				1414	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1415	quota_root = fs_info->quota_root;
				1416	if (!quota_root) {
				1417	ret = -EINVAL;
				1418	goto out;
				1419	}
				1420
				1421	qgroup = find_qgroup_rb(fs_info, qgroupid);
				1422	if (!qgroup) {
				1423	ret = -ENOENT;
				1424	goto out;
				1425	} else {
				1426	/* check if there are no children of this qgroup */
				1427	if (!list_empty(&qgroup->members)) {
				1428	ret = -EBUSY;
				1429	goto out;
				1430	}
				1431	}
				1432	ret = del_qgroup_item(trans, qgroupid);
				1433	if (ret && ret != -ENOENT)
				1434	goto out;
				1435
				1436	while (!list_empty(&qgroup->groups)) {
				1437	list = list_first_entry(&qgroup->groups,
				1438	struct btrfs_qgroup_list, next_group);
				1439	ret = __del_qgroup_relation(trans, qgroupid,
				1440	list->group->qgroupid);
				1441	if (ret)
				1442	goto out;
				1443	}
				1444
				1445	spin_lock(&fs_info->qgroup_lock);
				1446	del_qgroup_rb(fs_info, qgroupid);
				1447	spin_unlock(&fs_info->qgroup_lock);
				1448	out:
				1449	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1450	return ret;
				1451	}
				1452
				1453	int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
				1454	struct btrfs_qgroup_limit *limit)
				1455	{
				1456	struct btrfs_fs_info *fs_info = trans->fs_info;
				1457	struct btrfs_root *quota_root;
				1458	struct btrfs_qgroup *qgroup;
				1459	int ret = 0;
				1460	/* Sometimes we would want to clear the limit on this qgroup.
				1461	* To meet this requirement, we treat the -1 as a special value
				1462	* which tell kernel to clear the limit on this qgroup.
				1463	*/
				1464	const u64 CLEAR_VALUE = -1;
				1465
				1466	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1467	quota_root = fs_info->quota_root;
				1468	if (!quota_root) {
				1469	ret = -EINVAL;
				1470	goto out;
				1471	}
				1472
				1473	qgroup = find_qgroup_rb(fs_info, qgroupid);
				1474	if (!qgroup) {
				1475	ret = -ENOENT;
				1476	goto out;
				1477	}
				1478
				1479	spin_lock(&fs_info->qgroup_lock);
				1480	if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
				1481	if (limit->max_rfer == CLEAR_VALUE) {
				1482	qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
				1483	limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
				1484	qgroup->max_rfer = 0;
				1485	} else {
				1486	qgroup->max_rfer = limit->max_rfer;
				1487	}
				1488	}
				1489	if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
				1490	if (limit->max_excl == CLEAR_VALUE) {
				1491	qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
				1492	limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
				1493	qgroup->max_excl = 0;
				1494	} else {
				1495	qgroup->max_excl = limit->max_excl;
				1496	}
				1497	}
				1498	if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
				1499	if (limit->rsv_rfer == CLEAR_VALUE) {
				1500	qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
				1501	limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
				1502	qgroup->rsv_rfer = 0;
				1503	} else {
				1504	qgroup->rsv_rfer = limit->rsv_rfer;
				1505	}
				1506	}
				1507	if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
				1508	if (limit->rsv_excl == CLEAR_VALUE) {
				1509	qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
				1510	limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
				1511	qgroup->rsv_excl = 0;
				1512	} else {
				1513	qgroup->rsv_excl = limit->rsv_excl;
				1514	}
				1515	}
				1516	qgroup->lim_flags \|= limit->flags;
				1517
				1518	spin_unlock(&fs_info->qgroup_lock);
				1519
				1520	ret = update_qgroup_limit_item(trans, qgroup);
				1521	if (ret) {
				1522	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				1523	btrfs_info(fs_info, "unable to update quota limit for %llu",
				1524	qgroupid);
				1525	}
				1526
				1527	out:
				1528	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1529	return ret;
				1530	}
				1531
				1532	int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
				1533	struct btrfs_delayed_ref_root *delayed_refs,
				1534	struct btrfs_qgroup_extent_record *record)
				1535	{
				1536	struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
				1537	struct rb_node *parent_node = NULL;
				1538	struct btrfs_qgroup_extent_record *entry;
				1539	u64 bytenr = record->bytenr;
				1540
				1541	lockdep_assert_held(&delayed_refs->lock);
				1542	trace_btrfs_qgroup_trace_extent(fs_info, record);
				1543
				1544	while (*p) {
				1545	parent_node = *p;
				1546	entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
				1547	node);
				1548	if (bytenr < entry->bytenr)
				1549	p = &(*p)->rb_left;
				1550	else if (bytenr > entry->bytenr)
				1551	p = &(*p)->rb_right;
				1552	else
				1553	return 1;
				1554	}
				1555
				1556	rb_link_node(&record->node, parent_node, p);
				1557	rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
				1558	return 0;
				1559	}
				1560
				1561	int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
				1562	struct btrfs_qgroup_extent_record *qrecord)
				1563	{
				1564	struct ulist *old_root;
				1565	u64 bytenr = qrecord->bytenr;
				1566	int ret;
				1567
				1568	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
				1569	if (ret < 0) {
				1570	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				1571	btrfs_warn(fs_info,
				1572	"error accounting new delayed refs extent (err code: %d), quota inconsistent",
				1573	ret);
				1574	return 0;
				1575	}
				1576
				1577	/*
				1578	* Here we don't need to get the lock of
				1579	* trans->transaction->delayed_refs, since inserted qrecord won't
				1580	* be deleted, only qrecord->node may be modified (new qrecord insert)
				1581	*
				1582	* So modifying qrecord->old_roots is safe here
				1583	*/
				1584	qrecord->old_roots = old_root;
				1585	return 0;
				1586	}
				1587
				1588	int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
				1589	u64 num_bytes, gfp_t gfp_flag)
				1590	{
				1591	struct btrfs_fs_info *fs_info = trans->fs_info;
				1592	struct btrfs_qgroup_extent_record *record;
				1593	struct btrfs_delayed_ref_root *delayed_refs;
				1594	int ret;
				1595
				1596	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
				1597	\|\| bytenr == 0 \|\| num_bytes == 0)
				1598	return 0;
				1599	record = kmalloc(sizeof(*record), gfp_flag);
				1600	if (!record)
				1601	return -ENOMEM;
				1602
				1603	delayed_refs = &trans->transaction->delayed_refs;
				1604	record->bytenr = bytenr;
				1605	record->num_bytes = num_bytes;
				1606	record->old_roots = NULL;
				1607
				1608	spin_lock(&delayed_refs->lock);
				1609	ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
				1610	spin_unlock(&delayed_refs->lock);
				1611	if (ret > 0) {
				1612	kfree(record);
				1613	return 0;
				1614	}
				1615	return btrfs_qgroup_trace_extent_post(fs_info, record);
				1616	}
				1617
				1618	int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
				1619	struct extent_buffer *eb)
				1620	{
				1621	struct btrfs_fs_info *fs_info = trans->fs_info;
				1622	int nr = btrfs_header_nritems(eb);
				1623	int i, extent_type, ret;
				1624	struct btrfs_key key;
				1625	struct btrfs_file_extent_item *fi;
				1626	u64 bytenr, num_bytes;
				1627
				1628	/* We can be called directly from walk_up_proc() */
				1629	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				1630	return 0;
				1631
				1632	for (i = 0; i < nr; i++) {
				1633	btrfs_item_key_to_cpu(eb, &key, i);
				1634
				1635	if (key.type != BTRFS_EXTENT_DATA_KEY)
				1636	continue;
				1637
				1638	fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
				1639	/* filter out non qgroup-accountable extents */
				1640	extent_type = btrfs_file_extent_type(eb, fi);
				1641
				1642	if (extent_type == BTRFS_FILE_EXTENT_INLINE)
				1643	continue;
				1644
				1645	bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
				1646	if (!bytenr)
				1647	continue;
				1648
				1649	num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
				1650
				1651	ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes,
				1652	GFP_NOFS);
				1653	if (ret)
				1654	return ret;
				1655	}
				1656	cond_resched();
				1657	return 0;
				1658	}
				1659
				1660	/*
				1661	* Walk up the tree from the bottom, freeing leaves and any interior
				1662	* nodes which have had all slots visited. If a node (leaf or
				1663	* interior) is freed, the node above it will have it's slot
				1664	* incremented. The root node will never be freed.
				1665	*
				1666	* At the end of this function, we should have a path which has all
				1667	* slots incremented to the next position for a search. If we need to
				1668	* read a new node it will be NULL and the node above it will have the
				1669	* correct slot selected for a later read.
				1670	*
				1671	* If we increment the root nodes slot counter past the number of
				1672	* elements, 1 is returned to signal completion of the search.
				1673	*/
				1674	static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
				1675	{
				1676	int level = 0;
				1677	int nr, slot;
				1678	struct extent_buffer *eb;
				1679
				1680	if (root_level == 0)
				1681	return 1;
				1682
				1683	while (level <= root_level) {
				1684	eb = path->nodes[level];
				1685	nr = btrfs_header_nritems(eb);
				1686	path->slots[level]++;
				1687	slot = path->slots[level];
				1688	if (slot >= nr \|\| level == 0) {
				1689	/*
				1690	* Don't free the root - we will detect this
				1691	* condition after our loop and return a
				1692	* positive value for caller to stop walking the tree.
				1693	*/
				1694	if (level != root_level) {
				1695	btrfs_tree_unlock_rw(eb, path->locks[level]);
				1696	path->locks[level] = 0;
				1697
				1698	free_extent_buffer(eb);
				1699	path->nodes[level] = NULL;
				1700	path->slots[level] = 0;
				1701	}
				1702	} else {
				1703	/*
				1704	* We have a valid slot to walk back down
				1705	* from. Stop here so caller can process these
				1706	* new nodes.
				1707	*/
				1708	break;
				1709	}
				1710
				1711	level++;
				1712	}
				1713
				1714	eb = path->nodes[root_level];
				1715	if (path->slots[root_level] >= btrfs_header_nritems(eb))
				1716	return 1;
				1717
				1718	return 0;
				1719	}
				1720
				1721	int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
				1722	struct extent_buffer *root_eb,
				1723	u64 root_gen, int root_level)
				1724	{
				1725	struct btrfs_fs_info *fs_info = trans->fs_info;
				1726	int ret = 0;
				1727	int level;
				1728	struct extent_buffer *eb = root_eb;
				1729	struct btrfs_path *path = NULL;
				1730
				1731	BUG_ON(root_level < 0 \|\| root_level >= BTRFS_MAX_LEVEL);
				1732	BUG_ON(root_eb == NULL);
				1733
				1734	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				1735	return 0;
				1736
				1737	if (!extent_buffer_uptodate(root_eb)) {
				1738	ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL);
				1739	if (ret)
				1740	goto out;
				1741	}
				1742
				1743	if (root_level == 0) {
				1744	ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
				1745	goto out;
				1746	}
				1747
				1748	path = btrfs_alloc_path();
				1749	if (!path)
				1750	return -ENOMEM;
				1751
				1752	/*
				1753	* Walk down the tree. Missing extent blocks are filled in as
				1754	* we go. Metadata is accounted every time we read a new
				1755	* extent block.
				1756	*
				1757	* When we reach a leaf, we account for file extent items in it,
				1758	* walk back up the tree (adjusting slot pointers as we go)
				1759	* and restart the search process.
				1760	*/
				1761	extent_buffer_get(root_eb); /* For path */
				1762	path->nodes[root_level] = root_eb;
				1763	path->slots[root_level] = 0;
				1764	path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
				1765	walk_down:
				1766	level = root_level;
				1767	while (level >= 0) {
				1768	if (path->nodes[level] == NULL) {
				1769	struct btrfs_key first_key;
				1770	int parent_slot;
				1771	u64 child_gen;
				1772	u64 child_bytenr;
				1773
				1774	/*
				1775	* We need to get child blockptr/gen from parent before
				1776	* we can read it.
				1777	*/
				1778	eb = path->nodes[level + 1];
				1779	parent_slot = path->slots[level + 1];
				1780	child_bytenr = btrfs_node_blockptr(eb, parent_slot);
				1781	child_gen = btrfs_node_ptr_generation(eb, parent_slot);
				1782	btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
				1783
				1784	eb = read_tree_block(fs_info, child_bytenr, child_gen,
				1785	level, &first_key);
				1786	if (IS_ERR(eb)) {
				1787	ret = PTR_ERR(eb);
				1788	goto out;
				1789	} else if (!extent_buffer_uptodate(eb)) {
				1790	free_extent_buffer(eb);
				1791	ret = -EIO;
				1792	goto out;
				1793	}
				1794
				1795	path->nodes[level] = eb;
				1796	path->slots[level] = 0;
				1797
				1798	btrfs_tree_read_lock(eb);
				1799	btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
				1800	path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
				1801
				1802	ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
				1803	fs_info->nodesize,
				1804	GFP_NOFS);
				1805	if (ret)
				1806	goto out;
				1807	}
				1808
				1809	if (level == 0) {
				1810	ret = btrfs_qgroup_trace_leaf_items(trans,
				1811	path->nodes[level]);
				1812	if (ret)
				1813	goto out;
				1814
				1815	/* Nonzero return here means we completed our search */
				1816	ret = adjust_slots_upwards(path, root_level);
				1817	if (ret)
				1818	break;
				1819
				1820	/* Restart search with new slots */
				1821	goto walk_down;
				1822	}
				1823
				1824	level--;
				1825	}
				1826
				1827	ret = 0;
				1828	out:
				1829	btrfs_free_path(path);
				1830
				1831	return ret;
				1832	}
				1833
				1834	#define UPDATE_NEW 0
				1835	#define UPDATE_OLD 1
				1836	/*
				1837	* Walk all of the roots that points to the bytenr and adjust their refcnts.
				1838	*/
				1839	static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
				1840	struct ulist roots, struct ulist tmp,
				1841	struct ulist *qgroups, u64 seq, int update_old)
				1842	{
				1843	struct ulist_node *unode;
				1844	struct ulist_iterator uiter;
				1845	struct ulist_node *tmp_unode;
				1846	struct ulist_iterator tmp_uiter;
				1847	struct btrfs_qgroup *qg;
				1848	int ret = 0;
				1849
				1850	if (!roots)
				1851	return 0;
				1852	ULIST_ITER_INIT(&uiter);
				1853	while ((unode = ulist_next(roots, &uiter))) {
				1854	qg = find_qgroup_rb(fs_info, unode->val);
				1855	if (!qg)
				1856	continue;
				1857
				1858	ulist_reinit(tmp);
				1859	ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg),
				1860	GFP_ATOMIC);
				1861	if (ret < 0)
				1862	return ret;
				1863	ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC);
				1864	if (ret < 0)
				1865	return ret;
				1866	ULIST_ITER_INIT(&tmp_uiter);
				1867	while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
				1868	struct btrfs_qgroup_list *glist;
				1869
				1870	qg = unode_aux_to_qgroup(tmp_unode);
				1871	if (update_old)
				1872	btrfs_qgroup_update_old_refcnt(qg, seq, 1);
				1873	else
				1874	btrfs_qgroup_update_new_refcnt(qg, seq, 1);
				1875	list_for_each_entry(glist, &qg->groups, next_group) {
				1876	ret = ulist_add(qgroups, glist->group->qgroupid,
				1877	qgroup_to_aux(glist->group),
				1878	GFP_ATOMIC);
				1879	if (ret < 0)
				1880	return ret;
				1881	ret = ulist_add(tmp, glist->group->qgroupid,
				1882	qgroup_to_aux(glist->group),
				1883	GFP_ATOMIC);
				1884	if (ret < 0)
				1885	return ret;
				1886	}
				1887	}
				1888	}
				1889	return 0;
				1890	}
				1891
				1892	/*
				1893	* Update qgroup rfer/excl counters.
				1894	* Rfer update is easy, codes can explain themselves.
				1895	*
				1896	* Excl update is tricky, the update is split into 2 part.
				1897	* Part 1: Possible exclusive <-> sharing detect:
				1898	* \| A \| !A \|
				1899	* -------------------------------------
				1900	* B \| * \| - \|
				1901	* -------------------------------------
				1902	* !B \| + \| ** \|
				1903	* -------------------------------------
				1904	*
				1905	* Conditions:
				1906	* A: cur_old_roots < nr_old_roots (not exclusive before)
				1907	* !A: cur_old_roots == nr_old_roots (possible exclusive before)
				1908	* B: cur_new_roots < nr_new_roots (not exclusive now)
				1909	* !B: cur_new_roots == nr_new_roots (possible exclusive now)
				1910	*
				1911	* Results:
				1912	* +: Possible sharing -> exclusive -: Possible exclusive -> sharing
				1913	* : Definitely not changed. *: Possible unchanged.
				1914	*
				1915	* For !A and !B condition, the exception is cur_old/new_roots == 0 case.
				1916	*
				1917	* To make the logic clear, we first use condition A and B to split
				1918	* combination into 4 results.
				1919	*
				1920	* Then, for result "+" and "-", check old/new_roots == 0 case, as in them
				1921	* only on variant maybe 0.
				1922	*
				1923	* Lastly, check result **, since there are 2 variants maybe 0, split them
				1924	* again(2x2).
				1925	* But this time we don't need to consider other things, the codes and logic
				1926	* is easy to understand now.
				1927	*/
				1928	static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
				1929	struct ulist *qgroups,
				1930	u64 nr_old_roots,
				1931	u64 nr_new_roots,
				1932	u64 num_bytes, u64 seq)
				1933	{
				1934	struct ulist_node *unode;
				1935	struct ulist_iterator uiter;
				1936	struct btrfs_qgroup *qg;
				1937	u64 cur_new_count, cur_old_count;
				1938
				1939	ULIST_ITER_INIT(&uiter);
				1940	while ((unode = ulist_next(qgroups, &uiter))) {
				1941	bool dirty = false;
				1942
				1943	qg = unode_aux_to_qgroup(unode);
				1944	cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
				1945	cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
				1946
				1947	trace_qgroup_update_counters(fs_info, qg, cur_old_count,
				1948	cur_new_count);
				1949
				1950	/* Rfer update part */
				1951	if (cur_old_count == 0 && cur_new_count > 0) {
				1952	qg->rfer += num_bytes;
				1953	qg->rfer_cmpr += num_bytes;
				1954	dirty = true;
				1955	}
				1956	if (cur_old_count > 0 && cur_new_count == 0) {
				1957	qg->rfer -= num_bytes;
				1958	qg->rfer_cmpr -= num_bytes;
				1959	dirty = true;
				1960	}
				1961
				1962	/* Excl update part */
				1963	/* Exclusive/none -> shared case */
				1964	if (cur_old_count == nr_old_roots &&
				1965	cur_new_count < nr_new_roots) {
				1966	/* Exclusive -> shared */
				1967	if (cur_old_count != 0) {
				1968	qg->excl -= num_bytes;
				1969	qg->excl_cmpr -= num_bytes;
				1970	dirty = true;
				1971	}
				1972	}
				1973
				1974	/* Shared -> exclusive/none case */
				1975	if (cur_old_count < nr_old_roots &&
				1976	cur_new_count == nr_new_roots) {
				1977	/* Shared->exclusive */
				1978	if (cur_new_count != 0) {
				1979	qg->excl += num_bytes;
				1980	qg->excl_cmpr += num_bytes;
				1981	dirty = true;
				1982	}
				1983	}
				1984
				1985	/* Exclusive/none -> exclusive/none case */
				1986	if (cur_old_count == nr_old_roots &&
				1987	cur_new_count == nr_new_roots) {
				1988	if (cur_old_count == 0) {
				1989	/* None -> exclusive/none */
				1990
				1991	if (cur_new_count != 0) {
				1992	/* None -> exclusive */
				1993	qg->excl += num_bytes;
				1994	qg->excl_cmpr += num_bytes;
				1995	dirty = true;
				1996	}
				1997	/* None -> none, nothing changed */
				1998	} else {
				1999	/* Exclusive -> exclusive/none */
				2000
				2001	if (cur_new_count == 0) {
				2002	/* Exclusive -> none */
				2003	qg->excl -= num_bytes;
				2004	qg->excl_cmpr -= num_bytes;
				2005	dirty = true;
				2006	}
				2007	/* Exclusive -> exclusive, nothing changed */
				2008	}
				2009	}
				2010
				2011	if (dirty)
				2012	qgroup_dirty(fs_info, qg);
				2013	}
				2014	return 0;
				2015	}
				2016
				2017	/*
				2018	* Check if the @roots potentially is a list of fs tree roots
				2019	*
				2020	* Return 0 for definitely not a fs/subvol tree roots ulist
				2021	* Return 1 for possible fs/subvol tree roots in the list (considering an empty
				2022	* one as well)
				2023	*/
				2024	static int maybe_fs_roots(struct ulist *roots)
				2025	{
				2026	struct ulist_node *unode;
				2027	struct ulist_iterator uiter;
				2028
				2029	/* Empty one, still possible for fs roots */
				2030	if (!roots \|\| roots->nnodes == 0)
				2031	return 1;
				2032
				2033	ULIST_ITER_INIT(&uiter);
				2034	unode = ulist_next(roots, &uiter);
				2035	if (!unode)
				2036	return 1;
				2037
				2038	/*
				2039	* If it contains fs tree roots, then it must belong to fs/subvol
				2040	* trees.
				2041	* If it contains a non-fs tree, it won't be shared with fs/subvol trees.
				2042	*/
				2043	return is_fstree(unode->val);
				2044	}
				2045
				2046	int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
				2047	u64 num_bytes, struct ulist *old_roots,
				2048	struct ulist *new_roots)
				2049	{
				2050	struct btrfs_fs_info *fs_info = trans->fs_info;
				2051	struct ulist *qgroups = NULL;
				2052	struct ulist *tmp = NULL;
				2053	u64 seq;
				2054	u64 nr_new_roots = 0;
				2055	u64 nr_old_roots = 0;
				2056	int ret = 0;
				2057
				2058	/*
				2059	* If quotas get disabled meanwhile, the resouces need to be freed and
				2060	* we can't just exit here.
				2061	*/
				2062	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				2063	goto out_free;
				2064
				2065	if (new_roots) {
				2066	if (!maybe_fs_roots(new_roots))
				2067	goto out_free;
				2068	nr_new_roots = new_roots->nnodes;
				2069	}
				2070	if (old_roots) {
				2071	if (!maybe_fs_roots(old_roots))
				2072	goto out_free;
				2073	nr_old_roots = old_roots->nnodes;
				2074	}
				2075
				2076	/* Quick exit, either not fs tree roots, or won't affect any qgroup */
				2077	if (nr_old_roots == 0 && nr_new_roots == 0)
				2078	goto out_free;
				2079
				2080	BUG_ON(!fs_info->quota_root);
				2081
				2082	trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
				2083	num_bytes, nr_old_roots, nr_new_roots);
				2084
				2085	qgroups = ulist_alloc(GFP_NOFS);
				2086	if (!qgroups) {
				2087	ret = -ENOMEM;
				2088	goto out_free;
				2089	}
				2090	tmp = ulist_alloc(GFP_NOFS);
				2091	if (!tmp) {
				2092	ret = -ENOMEM;
				2093	goto out_free;
				2094	}
				2095
				2096	mutex_lock(&fs_info->qgroup_rescan_lock);
				2097	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
				2098	if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
				2099	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2100	ret = 0;
				2101	goto out_free;
				2102	}
				2103	}
				2104	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2105
				2106	spin_lock(&fs_info->qgroup_lock);
				2107	seq = fs_info->qgroup_seq;
				2108
				2109	/* Update old refcnts using old_roots */
				2110	ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
				2111	UPDATE_OLD);
				2112	if (ret < 0)
				2113	goto out;
				2114
				2115	/* Update new refcnts using new_roots */
				2116	ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
				2117	UPDATE_NEW);
				2118	if (ret < 0)
				2119	goto out;
				2120
				2121	qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
				2122	num_bytes, seq);
				2123
				2124	/*
				2125	* Bump qgroup_seq to avoid seq overlap
				2126	*/
				2127	fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
				2128	out:
				2129	spin_unlock(&fs_info->qgroup_lock);
				2130	out_free:
				2131	ulist_free(tmp);
				2132	ulist_free(qgroups);
				2133	ulist_free(old_roots);
				2134	ulist_free(new_roots);
				2135	return ret;
				2136	}
				2137
				2138	int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
				2139	{
				2140	struct btrfs_fs_info *fs_info = trans->fs_info;
				2141	struct btrfs_qgroup_extent_record *record;
				2142	struct btrfs_delayed_ref_root *delayed_refs;
				2143	struct ulist *new_roots = NULL;
				2144	struct rb_node *node;
				2145	u64 qgroup_to_skip;
				2146	int ret = 0;
				2147
				2148	delayed_refs = &trans->transaction->delayed_refs;
				2149	qgroup_to_skip = delayed_refs->qgroup_to_skip;
				2150	while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
				2151	record = rb_entry(node, struct btrfs_qgroup_extent_record,
				2152	node);
				2153
				2154	trace_btrfs_qgroup_account_extents(fs_info, record);
				2155
				2156	if (!ret) {
				2157	/*
				2158	* Old roots should be searched when inserting qgroup
				2159	* extent record
				2160	*/
				2161	if (WARN_ON(!record->old_roots)) {
				2162	/* Search commit root to find old_roots */
				2163	ret = btrfs_find_all_roots(NULL, fs_info,
				2164	record->bytenr, 0,
				2165	&record->old_roots, false);
				2166	if (ret < 0)
				2167	goto cleanup;
				2168	}
				2169
				2170	/*
				2171	* Use SEQ_LAST as time_seq to do special search, which
				2172	* doesn't lock tree or delayed_refs and search current
				2173	* root. It's safe inside commit_transaction().
				2174	*/
				2175	ret = btrfs_find_all_roots(trans, fs_info,
				2176	record->bytenr, SEQ_LAST, &new_roots, false);
				2177	if (ret < 0)
				2178	goto cleanup;
				2179	if (qgroup_to_skip) {
				2180	ulist_del(new_roots, qgroup_to_skip, 0);
				2181	ulist_del(record->old_roots, qgroup_to_skip,
				2182	0);
				2183	}
				2184	ret = btrfs_qgroup_account_extent(trans, record->bytenr,
				2185	record->num_bytes,
				2186	record->old_roots,
				2187	new_roots);
				2188	record->old_roots = NULL;
				2189	new_roots = NULL;
				2190	}
				2191	cleanup:
				2192	ulist_free(record->old_roots);
				2193	ulist_free(new_roots);
				2194	new_roots = NULL;
				2195	rb_erase(node, &delayed_refs->dirty_extent_root);
				2196	kfree(record);
				2197
				2198	}
				2199	return ret;
				2200	}
				2201
				2202	/*
				2203	* called from commit_transaction. Writes all changed qgroups to disk.
				2204	*/
				2205	int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
				2206	{
				2207	struct btrfs_fs_info *fs_info = trans->fs_info;
				2208	struct btrfs_root *quota_root = fs_info->quota_root;
				2209	int ret = 0;
				2210
				2211	if (!quota_root)
				2212	return ret;
				2213
				2214	spin_lock(&fs_info->qgroup_lock);
				2215	while (!list_empty(&fs_info->dirty_qgroups)) {
				2216	struct btrfs_qgroup *qgroup;
				2217	qgroup = list_first_entry(&fs_info->dirty_qgroups,
				2218	struct btrfs_qgroup, dirty);
				2219	list_del_init(&qgroup->dirty);
				2220	spin_unlock(&fs_info->qgroup_lock);
				2221	ret = update_qgroup_info_item(trans, qgroup);
				2222	if (ret)
				2223	fs_info->qgroup_flags \|=
				2224	BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2225	ret = update_qgroup_limit_item(trans, qgroup);
				2226	if (ret)
				2227	fs_info->qgroup_flags \|=
				2228	BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2229	spin_lock(&fs_info->qgroup_lock);
				2230	}
				2231	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				2232	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_ON;
				2233	else
				2234	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
				2235	spin_unlock(&fs_info->qgroup_lock);
				2236
				2237	ret = update_qgroup_status_item(trans);
				2238	if (ret)
				2239	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2240
				2241	return ret;
				2242	}
				2243
				2244	/*
				2245	* Copy the accounting information between qgroups. This is necessary
				2246	* when a snapshot or a subvolume is created. Throwing an error will
				2247	* cause a transaction abort so we take extra care here to only error
				2248	* when a readonly fs is a reasonable outcome.
				2249	*/
				2250	int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
				2251	u64 objectid, struct btrfs_qgroup_inherit *inherit)
				2252	{
				2253	int ret = 0;
				2254	int i;
				2255	u64 *i_qgroups;
				2256	bool committing = false;
				2257	struct btrfs_fs_info *fs_info = trans->fs_info;
				2258	struct btrfs_root *quota_root;
				2259	struct btrfs_qgroup *srcgroup;
				2260	struct btrfs_qgroup *dstgroup;
				2261	u32 level_size = 0;
				2262	u64 nums;
				2263
				2264	/*
				2265	* There are only two callers of this function.
				2266	*
				2267	* One in create_subvol() in the ioctl context, which needs to hold
				2268	* the qgroup_ioctl_lock.
				2269	*
				2270	* The other one in create_pending_snapshot() where no other qgroup
				2271	* code can modify the fs as they all need to either start a new trans
				2272	* or hold a trans handler, thus we don't need to hold
				2273	* qgroup_ioctl_lock.
				2274	* This would avoid long and complex lock chain and make lockdep happy.
				2275	*/
				2276	spin_lock(&fs_info->trans_lock);
				2277	if (trans->transaction->state == TRANS_STATE_COMMIT_DOING)
				2278	committing = true;
				2279	spin_unlock(&fs_info->trans_lock);
				2280
				2281	if (!committing)
				2282	mutex_lock(&fs_info->qgroup_ioctl_lock);
				2283	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				2284	goto out;
				2285
				2286	quota_root = fs_info->quota_root;
				2287	if (!quota_root) {
				2288	ret = -EINVAL;
				2289	goto out;
				2290	}
				2291
				2292	if (inherit) {
				2293	i_qgroups = (u64 *)(inherit + 1);
				2294	nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
				2295	2 * inherit->num_excl_copies;
				2296	for (i = 0; i < nums; ++i) {
				2297	srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
				2298
				2299	/*
				2300	* Zero out invalid groups so we can ignore
				2301	* them later.
				2302	*/
				2303	if (!srcgroup \|\|
				2304	((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
				2305	*i_qgroups = 0ULL;
				2306
				2307	++i_qgroups;
				2308	}
				2309	}
				2310
				2311	/*
				2312	* create a tracking group for the subvol itself
				2313	*/
				2314	ret = add_qgroup_item(trans, quota_root, objectid);
				2315	if (ret)
				2316	goto out;
				2317
				2318	/*
				2319	* add qgroup to all inherited groups
				2320	*/
				2321	if (inherit) {
				2322	i_qgroups = (u64 *)(inherit + 1);
				2323	for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
				2324	if (*i_qgroups == 0)
				2325	continue;
				2326	ret = add_qgroup_relation_item(trans, objectid,
				2327	*i_qgroups);
				2328	if (ret && ret != -EEXIST)
				2329	goto out;
				2330	ret = add_qgroup_relation_item(trans, *i_qgroups,
				2331	objectid);
				2332	if (ret && ret != -EEXIST)
				2333	goto out;
				2334	}
				2335	ret = 0;
				2336	}
				2337
				2338
				2339	spin_lock(&fs_info->qgroup_lock);
				2340
				2341	dstgroup = add_qgroup_rb(fs_info, objectid);
				2342	if (IS_ERR(dstgroup)) {
				2343	ret = PTR_ERR(dstgroup);
				2344	goto unlock;
				2345	}
				2346
				2347	if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
				2348	dstgroup->lim_flags = inherit->lim.flags;
				2349	dstgroup->max_rfer = inherit->lim.max_rfer;
				2350	dstgroup->max_excl = inherit->lim.max_excl;
				2351	dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
				2352	dstgroup->rsv_excl = inherit->lim.rsv_excl;
				2353
				2354	ret = update_qgroup_limit_item(trans, dstgroup);
				2355	if (ret) {
				2356	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2357	btrfs_info(fs_info,
				2358	"unable to update quota limit for %llu",
				2359	dstgroup->qgroupid);
				2360	goto unlock;
				2361	}
				2362	}
				2363
				2364	if (srcid) {
				2365	srcgroup = find_qgroup_rb(fs_info, srcid);
				2366	if (!srcgroup)
				2367	goto unlock;
				2368
				2369	/*
				2370	* We call inherit after we clone the root in order to make sure
				2371	* our counts don't go crazy, so at this point the only
				2372	* difference between the two roots should be the root node.
				2373	*/
				2374	level_size = fs_info->nodesize;
				2375	dstgroup->rfer = srcgroup->rfer;
				2376	dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
				2377	dstgroup->excl = level_size;
				2378	dstgroup->excl_cmpr = level_size;
				2379	srcgroup->excl = level_size;
				2380	srcgroup->excl_cmpr = level_size;
				2381
				2382	/* inherit the limit info */
				2383	dstgroup->lim_flags = srcgroup->lim_flags;
				2384	dstgroup->max_rfer = srcgroup->max_rfer;
				2385	dstgroup->max_excl = srcgroup->max_excl;
				2386	dstgroup->rsv_rfer = srcgroup->rsv_rfer;
				2387	dstgroup->rsv_excl = srcgroup->rsv_excl;
				2388
				2389	qgroup_dirty(fs_info, dstgroup);
				2390	qgroup_dirty(fs_info, srcgroup);
				2391	}
				2392
				2393	if (!inherit)
				2394	goto unlock;
				2395
				2396	i_qgroups = (u64 *)(inherit + 1);
				2397	for (i = 0; i < inherit->num_qgroups; ++i) {
				2398	if (*i_qgroups) {
				2399	ret = add_relation_rb(fs_info, objectid, *i_qgroups);
				2400	if (ret)
				2401	goto unlock;
				2402	}
				2403	++i_qgroups;
				2404	}
				2405
				2406	for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
				2407	struct btrfs_qgroup *src;
				2408	struct btrfs_qgroup *dst;
				2409
				2410	if (!i_qgroups[0] \|\| !i_qgroups[1])
				2411	continue;
				2412
				2413	src = find_qgroup_rb(fs_info, i_qgroups[0]);
				2414	dst = find_qgroup_rb(fs_info, i_qgroups[1]);
				2415
				2416	if (!src \|\| !dst) {
				2417	ret = -EINVAL;
				2418	goto unlock;
				2419	}
				2420
				2421	dst->rfer = src->rfer - level_size;
				2422	dst->rfer_cmpr = src->rfer_cmpr - level_size;
				2423	}
				2424	for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
				2425	struct btrfs_qgroup *src;
				2426	struct btrfs_qgroup *dst;
				2427
				2428	if (!i_qgroups[0] \|\| !i_qgroups[1])
				2429	continue;
				2430
				2431	src = find_qgroup_rb(fs_info, i_qgroups[0]);
				2432	dst = find_qgroup_rb(fs_info, i_qgroups[1]);
				2433
				2434	if (!src \|\| !dst) {
				2435	ret = -EINVAL;
				2436	goto unlock;
				2437	}
				2438
				2439	dst->excl = src->excl + level_size;
				2440	dst->excl_cmpr = src->excl_cmpr + level_size;
				2441	}
				2442
				2443	unlock:
				2444	spin_unlock(&fs_info->qgroup_lock);
				2445	out:
				2446	if (!committing)
				2447	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				2448	return ret;
				2449	}
				2450
				2451	/*
				2452	* Two limits to commit transaction in advance.
				2453	*
				2454	* For RATIO, it will be 1/RATIO of the remaining limit as threshold.
				2455	* For SIZE, it will be in byte unit as threshold.
				2456	*/
				2457	#define QGROUP_FREE_RATIO 32
				2458	#define QGROUP_FREE_SIZE SZ_32M
				2459	static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
				2460	const struct btrfs_qgroup *qg, u64 num_bytes)
				2461	{
				2462	u64 free;
				2463	u64 threshold;
				2464
				2465	if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
				2466	qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
				2467	return false;
				2468
				2469	if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
				2470	qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
				2471	return false;
				2472
				2473	/*
				2474	* Even if we passed the check, it's better to check if reservation
				2475	* for meta_pertrans is pushing us near limit.
				2476	* If there is too much pertrans reservation or it's near the limit,
				2477	* let's try commit transaction to free some, using transaction_kthread
				2478	*/
				2479	if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER \|
				2480	BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
				2481	if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
				2482	free = qg->max_excl - qgroup_rsv_total(qg) - qg->excl;
				2483	threshold = min_t(u64, qg->max_excl / QGROUP_FREE_RATIO,
				2484	QGROUP_FREE_SIZE);
				2485	} else {
				2486	free = qg->max_rfer - qgroup_rsv_total(qg) - qg->rfer;
				2487	threshold = min_t(u64, qg->max_rfer / QGROUP_FREE_RATIO,
				2488	QGROUP_FREE_SIZE);
				2489	}
				2490
				2491	/*
				2492	* Use transaction_kthread to commit transaction, so we no
				2493	* longer need to bother nested transaction nor lock context.
				2494	*/
				2495	if (free < threshold)
				2496	btrfs_commit_transaction_locksafe(fs_info);
				2497	}
				2498
				2499	return true;
				2500	}
				2501
				2502	static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
				2503	enum btrfs_qgroup_rsv_type type)
				2504	{
				2505	struct btrfs_root *quota_root;
				2506	struct btrfs_qgroup *qgroup;
				2507	struct btrfs_fs_info *fs_info = root->fs_info;
				2508	u64 ref_root = root->root_key.objectid;
				2509	int ret = 0;
				2510	struct ulist_node *unode;
				2511	struct ulist_iterator uiter;
				2512
				2513	if (!is_fstree(ref_root))
				2514	return 0;
				2515
				2516	if (num_bytes == 0)
				2517	return 0;
				2518
				2519	if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
				2520	capable(CAP_SYS_RESOURCE))
				2521	enforce = false;
				2522
				2523	spin_lock(&fs_info->qgroup_lock);
				2524	quota_root = fs_info->quota_root;
				2525	if (!quota_root)
				2526	goto out;
				2527
				2528	qgroup = find_qgroup_rb(fs_info, ref_root);
				2529	if (!qgroup)
				2530	goto out;
				2531
				2532	/*
				2533	* in a first step, we check all affected qgroups if any limits would
				2534	* be exceeded
				2535	*/
				2536	ulist_reinit(fs_info->qgroup_ulist);
				2537	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
				2538	qgroup_to_aux(qgroup), GFP_ATOMIC);
				2539	if (ret < 0)
				2540	goto out;
				2541	ULIST_ITER_INIT(&uiter);
				2542	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
				2543	struct btrfs_qgroup *qg;
				2544	struct btrfs_qgroup_list *glist;
				2545
				2546	qg = unode_aux_to_qgroup(unode);
				2547
				2548	if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) {
				2549	ret = -EDQUOT;
				2550	goto out;
				2551	}
				2552
				2553	list_for_each_entry(glist, &qg->groups, next_group) {
				2554	ret = ulist_add(fs_info->qgroup_ulist,
				2555	glist->group->qgroupid,
				2556	qgroup_to_aux(glist->group), GFP_ATOMIC);
				2557	if (ret < 0)
				2558	goto out;
				2559	}
				2560	}
				2561	ret = 0;
				2562	/*
				2563	* no limits exceeded, now record the reservation into all qgroups
				2564	*/
				2565	ULIST_ITER_INIT(&uiter);
				2566	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
				2567	struct btrfs_qgroup *qg;
				2568
				2569	qg = unode_aux_to_qgroup(unode);
				2570
				2571	trace_qgroup_update_reserve(fs_info, qg, num_bytes, type);
				2572	qgroup_rsv_add(fs_info, qg, num_bytes, type);
				2573	}
				2574
				2575	out:
				2576	spin_unlock(&fs_info->qgroup_lock);
				2577	return ret;
				2578	}
				2579
				2580	/*
				2581	* Free @num_bytes of reserved space with @type for qgroup. (Normally level 0
				2582	* qgroup).
				2583	*
				2584	* Will handle all higher level qgroup too.
				2585	*
				2586	* NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
				2587	* This special case is only used for META_PERTRANS type.
				2588	*/
				2589	void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
				2590	u64 ref_root, u64 num_bytes,
				2591	enum btrfs_qgroup_rsv_type type)
				2592	{
				2593	struct btrfs_root *quota_root;
				2594	struct btrfs_qgroup *qgroup;
				2595	struct ulist_node *unode;
				2596	struct ulist_iterator uiter;
				2597	int ret = 0;
				2598
				2599	if (!is_fstree(ref_root))
				2600	return;
				2601
				2602	if (num_bytes == 0)
				2603	return;
				2604
				2605	if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) {
				2606	WARN(1, "%s: Invalid type to free", __func__);
				2607	return;
				2608	}
				2609	spin_lock(&fs_info->qgroup_lock);
				2610
				2611	quota_root = fs_info->quota_root;
				2612	if (!quota_root)
				2613	goto out;
				2614
				2615	qgroup = find_qgroup_rb(fs_info, ref_root);
				2616	if (!qgroup)
				2617	goto out;
				2618
				2619	if (num_bytes == (u64)-1)
				2620	/*
				2621	* We're freeing all pertrans rsv, get reserved value from
				2622	* level 0 qgroup as real num_bytes to free.
				2623	*/
				2624	num_bytes = qgroup->rsv.values[type];
				2625
				2626	ulist_reinit(fs_info->qgroup_ulist);
				2627	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
				2628	qgroup_to_aux(qgroup), GFP_ATOMIC);
				2629	if (ret < 0)
				2630	goto out;
				2631	ULIST_ITER_INIT(&uiter);
				2632	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
				2633	struct btrfs_qgroup *qg;
				2634	struct btrfs_qgroup_list *glist;
				2635
				2636	qg = unode_aux_to_qgroup(unode);
				2637
				2638	trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type);
				2639	qgroup_rsv_release(fs_info, qg, num_bytes, type);
				2640
				2641	list_for_each_entry(glist, &qg->groups, next_group) {
				2642	ret = ulist_add(fs_info->qgroup_ulist,
				2643	glist->group->qgroupid,
				2644	qgroup_to_aux(glist->group), GFP_ATOMIC);
				2645	if (ret < 0)
				2646	goto out;
				2647	}
				2648	}
				2649
				2650	out:
				2651	spin_unlock(&fs_info->qgroup_lock);
				2652	}
				2653
				2654	/*
				2655	* Check if the leaf is the last leaf. Which means all node pointers
				2656	* are at their last position.
				2657	*/
				2658	static bool is_last_leaf(struct btrfs_path *path)
				2659	{
				2660	int i;
				2661
				2662	for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
				2663	if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
				2664	return false;
				2665	}
				2666	return true;
				2667	}
				2668
				2669	/*
				2670	* returns < 0 on error, 0 when more leafs are to be scanned.
				2671	* returns 1 when done.
				2672	*/
				2673	static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
				2674	struct btrfs_path *path)
				2675	{
				2676	struct btrfs_fs_info *fs_info = trans->fs_info;
				2677	struct btrfs_key found;
				2678	struct extent_buffer *scratch_leaf = NULL;
				2679	struct ulist *roots = NULL;
				2680	u64 num_bytes;
				2681	bool done;
				2682	int slot;
				2683	int ret;
				2684
				2685	mutex_lock(&fs_info->qgroup_rescan_lock);
				2686	ret = btrfs_search_slot_for_read(fs_info->extent_root,
				2687	&fs_info->qgroup_rescan_progress,
				2688	path, 1, 0);
				2689
				2690	btrfs_debug(fs_info,
				2691	"current progress key (%llu %u %llu), search_slot ret %d",
				2692	fs_info->qgroup_rescan_progress.objectid,
				2693	fs_info->qgroup_rescan_progress.type,
				2694	fs_info->qgroup_rescan_progress.offset, ret);
				2695
				2696	if (ret) {
				2697	/*
				2698	* The rescan is about to end, we will not be scanning any
				2699	* further blocks. We cannot unset the RESCAN flag here, because
				2700	* we want to commit the transaction if everything went well.
				2701	* To make the live accounting work in this phase, we set our
				2702	* scan progress pointer such that every real extent objectid
				2703	* will be smaller.
				2704	*/
				2705	fs_info->qgroup_rescan_progress.objectid = (u64)-1;
				2706	btrfs_release_path(path);
				2707	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2708	return ret;
				2709	}
				2710	done = is_last_leaf(path);
				2711
				2712	btrfs_item_key_to_cpu(path->nodes[0], &found,
				2713	btrfs_header_nritems(path->nodes[0]) - 1);
				2714	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
				2715
				2716	scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
				2717	if (!scratch_leaf) {
				2718	ret = -ENOMEM;
				2719	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2720	goto out;
				2721	}
				2722	extent_buffer_get(scratch_leaf);
				2723	btrfs_tree_read_lock(scratch_leaf);
				2724	btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK);
				2725	slot = path->slots[0];
				2726	btrfs_release_path(path);
				2727	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2728
				2729	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
				2730	btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
				2731	if (found.type != BTRFS_EXTENT_ITEM_KEY &&
				2732	found.type != BTRFS_METADATA_ITEM_KEY)
				2733	continue;
				2734	if (found.type == BTRFS_METADATA_ITEM_KEY)
				2735	num_bytes = fs_info->nodesize;
				2736	else
				2737	num_bytes = found.offset;
				2738
				2739	ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
				2740	&roots, false);
				2741	if (ret < 0)
				2742	goto out;
				2743	/* For rescan, just pass old_roots as NULL */
				2744	ret = btrfs_qgroup_account_extent(trans, found.objectid,
				2745	num_bytes, NULL, roots);
				2746	if (ret < 0)
				2747	goto out;
				2748	}
				2749	out:
				2750	if (scratch_leaf) {
				2751	btrfs_tree_read_unlock_blocking(scratch_leaf);
				2752	free_extent_buffer(scratch_leaf);
				2753	}
				2754
				2755	if (done && !ret) {
				2756	ret = 1;
				2757	fs_info->qgroup_rescan_progress.objectid = (u64)-1;
				2758	}
				2759	return ret;
				2760	}
				2761
				2762	static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
				2763	{
				2764	struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
				2765	qgroup_rescan_work);
				2766	struct btrfs_path *path;
				2767	struct btrfs_trans_handle *trans = NULL;
				2768	int err = -ENOMEM;
				2769	int ret = 0;
				2770
				2771	path = btrfs_alloc_path();
				2772	if (!path)
				2773	goto out;
				2774	/*
				2775	* Rescan should only search for commit root, and any later difference
				2776	* should be recorded by qgroup
				2777	*/
				2778	path->search_commit_root = 1;
				2779	path->skip_locking = 1;
				2780
				2781	err = 0;
				2782	while (!err && !btrfs_fs_closing(fs_info)) {
				2783	trans = btrfs_start_transaction(fs_info->fs_root, 0);
				2784	if (IS_ERR(trans)) {
				2785	err = PTR_ERR(trans);
				2786	break;
				2787	}
				2788	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
				2789	err = -EINTR;
				2790	} else {
				2791	err = qgroup_rescan_leaf(trans, path);
				2792	}
				2793	if (err > 0)
				2794	btrfs_commit_transaction(trans);
				2795	else
				2796	btrfs_end_transaction(trans);
				2797	}
				2798
				2799	out:
				2800	btrfs_free_path(path);
				2801
				2802	mutex_lock(&fs_info->qgroup_rescan_lock);
				2803	if (err > 0 &&
				2804	fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
				2805	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2806	} else if (err < 0) {
				2807	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2808	}
				2809	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2810
				2811	/*
				2812	* only update status, since the previous part has already updated the
				2813	* qgroup info.
				2814	*/
				2815	trans = btrfs_start_transaction(fs_info->quota_root, 1);
				2816	if (IS_ERR(trans)) {
				2817	err = PTR_ERR(trans);
				2818	trans = NULL;
				2819	btrfs_err(fs_info,
				2820	"fail to start transaction for status update: %d",
				2821	err);
				2822	}
				2823
				2824	mutex_lock(&fs_info->qgroup_rescan_lock);
				2825	if (!btrfs_fs_closing(fs_info))
				2826	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				2827	if (trans) {
				2828	ret = update_qgroup_status_item(trans);
				2829	if (ret < 0) {
				2830	err = ret;
				2831	btrfs_err(fs_info, "fail to update qgroup status: %d",
				2832	err);
				2833	}
				2834	}
				2835	fs_info->qgroup_rescan_running = false;
				2836	complete_all(&fs_info->qgroup_rescan_completion);
				2837	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2838
				2839	if (!trans)
				2840	return;
				2841
				2842	btrfs_end_transaction(trans);
				2843
				2844	if (btrfs_fs_closing(fs_info)) {
				2845	btrfs_info(fs_info, "qgroup scan paused");
				2846	} else if (err >= 0) {
				2847	btrfs_info(fs_info, "qgroup scan completed%s",
				2848	err > 0 ? " (inconsistency flag cleared)" : "");
				2849	} else {
				2850	btrfs_err(fs_info, "qgroup scan failed with %d", err);
				2851	}
				2852	}
				2853
				2854	/*
				2855	* Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
				2856	* memory required for the rescan context.
				2857	*/
				2858	static int
				2859	qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
				2860	int init_flags)
				2861	{
				2862	int ret = 0;
				2863
				2864	if (!init_flags) {
				2865	/* we're resuming qgroup rescan at mount time */
				2866	if (!(fs_info->qgroup_flags &
				2867	BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
				2868	btrfs_warn(fs_info,
				2869	"qgroup rescan init failed, qgroup rescan is not queued");
				2870	ret = -EINVAL;
				2871	} else if (!(fs_info->qgroup_flags &
				2872	BTRFS_QGROUP_STATUS_FLAG_ON)) {
				2873	btrfs_warn(fs_info,
				2874	"qgroup rescan init failed, qgroup is not enabled");
				2875	ret = -EINVAL;
				2876	}
				2877
				2878	if (ret)
				2879	return ret;
				2880	}
				2881
				2882	mutex_lock(&fs_info->qgroup_rescan_lock);
				2883	spin_lock(&fs_info->qgroup_lock);
				2884
				2885	if (init_flags) {
				2886	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
				2887	btrfs_warn(fs_info,
				2888	"qgroup rescan is already in progress");
				2889	ret = -EINPROGRESS;
				2890	} else if (!(fs_info->qgroup_flags &
				2891	BTRFS_QGROUP_STATUS_FLAG_ON)) {
				2892	btrfs_warn(fs_info,
				2893	"qgroup rescan init failed, qgroup is not enabled");
				2894	ret = -EINVAL;
				2895	}
				2896
				2897	if (ret) {
				2898	spin_unlock(&fs_info->qgroup_lock);
				2899	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2900	return ret;
				2901	}
				2902	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				2903	}
				2904
				2905	memset(&fs_info->qgroup_rescan_progress, 0,
				2906	sizeof(fs_info->qgroup_rescan_progress));
				2907	fs_info->qgroup_rescan_progress.objectid = progress_objectid;
				2908	init_completion(&fs_info->qgroup_rescan_completion);
				2909	fs_info->qgroup_rescan_running = true;
				2910
				2911	spin_unlock(&fs_info->qgroup_lock);
				2912	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2913
				2914	memset(&fs_info->qgroup_rescan_work, 0,
				2915	sizeof(fs_info->qgroup_rescan_work));
				2916	btrfs_init_work(&fs_info->qgroup_rescan_work,
				2917	btrfs_qgroup_rescan_helper,
				2918	btrfs_qgroup_rescan_worker, NULL, NULL);
				2919	return 0;
				2920	}
				2921
				2922	static void
				2923	qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
				2924	{
				2925	struct rb_node *n;
				2926	struct btrfs_qgroup *qgroup;
				2927
				2928	spin_lock(&fs_info->qgroup_lock);
				2929	/* clear all current qgroup tracking information */
				2930	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
				2931	qgroup = rb_entry(n, struct btrfs_qgroup, node);
				2932	qgroup->rfer = 0;
				2933	qgroup->rfer_cmpr = 0;
				2934	qgroup->excl = 0;
				2935	qgroup->excl_cmpr = 0;
				2936	qgroup_dirty(fs_info, qgroup);
				2937	}
				2938	spin_unlock(&fs_info->qgroup_lock);
				2939	}
				2940
				2941	int
				2942	btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
				2943	{
				2944	int ret = 0;
				2945	struct btrfs_trans_handle *trans;
				2946
				2947	ret = qgroup_rescan_init(fs_info, 0, 1);
				2948	if (ret)
				2949	return ret;
				2950
				2951	/*
				2952	* We have set the rescan_progress to 0, which means no more
				2953	* delayed refs will be accounted by btrfs_qgroup_account_ref.
				2954	* However, btrfs_qgroup_account_ref may be right after its call
				2955	* to btrfs_find_all_roots, in which case it would still do the
				2956	* accounting.
				2957	* To solve this, we're committing the transaction, which will
				2958	* ensure we run all delayed refs and only after that, we are
				2959	* going to clear all tracking information for a clean start.
				2960	*/
				2961
				2962	trans = btrfs_join_transaction(fs_info->fs_root);
				2963	if (IS_ERR(trans)) {
				2964	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				2965	return PTR_ERR(trans);
				2966	}
				2967	ret = btrfs_commit_transaction(trans);
				2968	if (ret) {
				2969	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				2970	return ret;
				2971	}
				2972
				2973	qgroup_rescan_zero_tracking(fs_info);
				2974
				2975	btrfs_queue_work(fs_info->qgroup_rescan_workers,
				2976	&fs_info->qgroup_rescan_work);
				2977
				2978	return 0;
				2979	}
				2980
				2981	int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
				2982	bool interruptible)
				2983	{
				2984	int running;
				2985	int ret = 0;
				2986
				2987	mutex_lock(&fs_info->qgroup_rescan_lock);
				2988	spin_lock(&fs_info->qgroup_lock);
				2989	running = fs_info->qgroup_rescan_running;
				2990	spin_unlock(&fs_info->qgroup_lock);
				2991	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2992
				2993	if (!running)
				2994	return 0;
				2995
				2996	if (interruptible)
				2997	ret = wait_for_completion_interruptible(
				2998	&fs_info->qgroup_rescan_completion);
				2999	else
				3000	wait_for_completion(&fs_info->qgroup_rescan_completion);
				3001
				3002	return ret;
				3003	}
				3004
				3005	/*
				3006	* this is only called from open_ctree where we're still single threaded, thus
				3007	* locking is omitted here.
				3008	*/
				3009	void
				3010	btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
				3011	{
				3012	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
				3013	btrfs_queue_work(fs_info->qgroup_rescan_workers,
				3014	&fs_info->qgroup_rescan_work);
				3015	}
				3016
				3017	/*
				3018	* Reserve qgroup space for range [start, start + len).
				3019	*
				3020	* This function will either reserve space from related qgroups or doing
				3021	* nothing if the range is already reserved.
				3022	*
				3023	* Return 0 for successful reserve
				3024	* Return <0 for error (including -EQUOT)
				3025	*
				3026	* NOTE: this function may sleep for memory allocation.
				3027	* if btrfs_qgroup_reserve_data() is called multiple times with
				3028	* same @reserved, caller must ensure when error happens it's OK
				3029	* to free ALL reserved space.
				3030	*/
				3031	int btrfs_qgroup_reserve_data(struct inode *inode,
				3032	struct extent_changeset **reserved_ret, u64 start,
				3033	u64 len)
				3034	{
				3035	struct btrfs_root *root = BTRFS_I(inode)->root;
				3036	struct ulist_node *unode;
				3037	struct ulist_iterator uiter;
				3038	struct extent_changeset *reserved;
				3039	u64 orig_reserved;
				3040	u64 to_reserve;
				3041	int ret;
				3042
				3043	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) \|\|
				3044	!is_fstree(root->objectid) \|\| len == 0)
				3045	return 0;
				3046
				3047	/* @reserved parameter is mandatory for qgroup */
				3048	if (WARN_ON(!reserved_ret))
				3049	return -EINVAL;
				3050	if (!*reserved_ret) {
				3051	*reserved_ret = extent_changeset_alloc();
				3052	if (!*reserved_ret)
				3053	return -ENOMEM;
				3054	}
				3055	reserved = *reserved_ret;
				3056	/* Record already reserved space */
				3057	orig_reserved = reserved->bytes_changed;
				3058	ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
				3059	start + len -1, EXTENT_QGROUP_RESERVED, reserved);
				3060
				3061	/* Newly reserved space */
				3062	to_reserve = reserved->bytes_changed - orig_reserved;
				3063	trace_btrfs_qgroup_reserve_data(inode, start, len,
				3064	to_reserve, QGROUP_RESERVE);
				3065	if (ret < 0)
				3066	goto cleanup;
				3067	ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
				3068	if (ret < 0)
				3069	goto cleanup;
				3070
				3071	return ret;
				3072
				3073	cleanup:
				3074	/* cleanup ALL already reserved ranges */
				3075	ULIST_ITER_INIT(&uiter);
				3076	while ((unode = ulist_next(&reserved->range_changed, &uiter)))
				3077	clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
				3078	unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
				3079	/* Also free data bytes of already reserved one */
				3080	btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid,
				3081	orig_reserved, BTRFS_QGROUP_RSV_DATA);
				3082	extent_changeset_release(reserved);
				3083	return ret;
				3084	}
				3085
				3086	/* Free ranges specified by @reserved, normally in error path */
				3087	static int qgroup_free_reserved_data(struct inode *inode,
				3088	struct extent_changeset *reserved, u64 start, u64 len)
				3089	{
				3090	struct btrfs_root *root = BTRFS_I(inode)->root;
				3091	struct ulist_node *unode;
				3092	struct ulist_iterator uiter;
				3093	struct extent_changeset changeset;
				3094	int freed = 0;
				3095	int ret;
				3096
				3097	extent_changeset_init(&changeset);
				3098	len = round_up(start + len, root->fs_info->sectorsize);
				3099	start = round_down(start, root->fs_info->sectorsize);
				3100
				3101	ULIST_ITER_INIT(&uiter);
				3102	while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
				3103	u64 range_start = unode->val;
				3104	/* unode->aux is the inclusive end */
				3105	u64 range_len = unode->aux - range_start + 1;
				3106	u64 free_start;
				3107	u64 free_len;
				3108
				3109	extent_changeset_release(&changeset);
				3110
				3111	/* Only free range in range [start, start + len) */
				3112	if (range_start >= start + len \|\|
				3113	range_start + range_len <= start)
				3114	continue;
				3115	free_start = max(range_start, start);
				3116	free_len = min(start + len, range_start + range_len) -
				3117	free_start;
				3118	/*
				3119	* TODO: To also modify reserved->ranges_reserved to reflect
				3120	* the modification.
				3121	*
				3122	* However as long as we free qgroup reserved according to
				3123	* EXTENT_QGROUP_RESERVED, we won't double free.
				3124	* So not need to rush.
				3125	*/
				3126	ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree,
				3127	free_start, free_start + free_len - 1,
				3128	EXTENT_QGROUP_RESERVED, &changeset);
				3129	if (ret < 0)
				3130	goto out;
				3131	freed += changeset.bytes_changed;
				3132	}
				3133	btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed,
				3134	BTRFS_QGROUP_RSV_DATA);
				3135	ret = freed;
				3136	out:
				3137	extent_changeset_release(&changeset);
				3138	return ret;
				3139	}
				3140
				3141	static int __btrfs_qgroup_release_data(struct inode *inode,
				3142	struct extent_changeset *reserved, u64 start, u64 len,
				3143	int free)
				3144	{
				3145	struct extent_changeset changeset;
				3146	int trace_op = QGROUP_RELEASE;
				3147	int ret;
				3148
				3149	if (!test_bit(BTRFS_FS_QUOTA_ENABLED,
				3150	&BTRFS_I(inode)->root->fs_info->flags))
				3151	return 0;
				3152
				3153	/* In release case, we shouldn't have @reserved */
				3154	WARN_ON(!free && reserved);
				3155	if (free && reserved)
				3156	return qgroup_free_reserved_data(inode, reserved, start, len);
				3157	extent_changeset_init(&changeset);
				3158	ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
				3159	start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
				3160	if (ret < 0)
				3161	goto out;
				3162
				3163	if (free)
				3164	trace_op = QGROUP_FREE;
				3165	trace_btrfs_qgroup_release_data(inode, start, len,
				3166	changeset.bytes_changed, trace_op);
				3167	if (free)
				3168	btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
				3169	BTRFS_I(inode)->root->objectid,
				3170	changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
				3171	ret = changeset.bytes_changed;
				3172	out:
				3173	extent_changeset_release(&changeset);
				3174	return ret;
				3175	}
				3176
				3177	/*
				3178	* Free a reserved space range from io_tree and related qgroups
				3179	*
				3180	* Should be called when a range of pages get invalidated before reaching disk.
				3181	* Or for error cleanup case.
				3182	* if @reserved is given, only reserved range in [@start, @start + @len) will
				3183	* be freed.
				3184	*
				3185	* For data written to disk, use btrfs_qgroup_release_data().
				3186	*
				3187	* NOTE: This function may sleep for memory allocation.
				3188	*/
				3189	int btrfs_qgroup_free_data(struct inode *inode,
				3190	struct extent_changeset *reserved, u64 start, u64 len)
				3191	{
				3192	return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
				3193	}
				3194
				3195	/*
				3196	* Release a reserved space range from io_tree only.
				3197	*
				3198	* Should be called when a range of pages get written to disk and corresponding
				3199	* FILE_EXTENT is inserted into corresponding root.
				3200	*
				3201	* Since new qgroup accounting framework will only update qgroup numbers at
				3202	* commit_transaction() time, its reserved space shouldn't be freed from
				3203	* related qgroups.
				3204	*
				3205	* But we should release the range from io_tree, to allow further write to be
				3206	* COWed.
				3207	*
				3208	* NOTE: This function may sleep for memory allocation.
				3209	*/
				3210	int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
				3211	{
				3212	return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
				3213	}
				3214
				3215	static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
				3216	enum btrfs_qgroup_rsv_type type)
				3217	{
				3218	if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
				3219	type != BTRFS_QGROUP_RSV_META_PERTRANS)
				3220	return;
				3221	if (num_bytes == 0)
				3222	return;
				3223
				3224	spin_lock(&root->qgroup_meta_rsv_lock);
				3225	if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
				3226	root->qgroup_meta_rsv_prealloc += num_bytes;
				3227	else
				3228	root->qgroup_meta_rsv_pertrans += num_bytes;
				3229	spin_unlock(&root->qgroup_meta_rsv_lock);
				3230	}
				3231
				3232	static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
				3233	enum btrfs_qgroup_rsv_type type)
				3234	{
				3235	if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
				3236	type != BTRFS_QGROUP_RSV_META_PERTRANS)
				3237	return 0;
				3238	if (num_bytes == 0)
				3239	return 0;
				3240
				3241	spin_lock(&root->qgroup_meta_rsv_lock);
				3242	if (type == BTRFS_QGROUP_RSV_META_PREALLOC) {
				3243	num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc,
				3244	num_bytes);
				3245	root->qgroup_meta_rsv_prealloc -= num_bytes;
				3246	} else {
				3247	num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans,
				3248	num_bytes);
				3249	root->qgroup_meta_rsv_pertrans -= num_bytes;
				3250	}
				3251	spin_unlock(&root->qgroup_meta_rsv_lock);
				3252	return num_bytes;
				3253	}
				3254
				3255	int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
				3256	enum btrfs_qgroup_rsv_type type, bool enforce)
				3257	{
				3258	struct btrfs_fs_info *fs_info = root->fs_info;
				3259	int ret;
				3260
				3261	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) \|\|
				3262	!is_fstree(root->objectid) \|\| num_bytes == 0)
				3263	return 0;
				3264
				3265	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
				3266	trace_qgroup_meta_reserve(root, (s64)num_bytes, type);
				3267	ret = qgroup_reserve(root, num_bytes, enforce, type);
				3268	if (ret < 0)
				3269	return ret;
				3270	/*
				3271	* Record what we have reserved into root.
				3272	*
				3273	* To avoid quota disabled->enabled underflow.
				3274	* In that case, we may try to free space we haven't reserved
				3275	* (since quota was disabled), so record what we reserved into root.
				3276	* And ensure later release won't underflow this number.
				3277	*/
				3278	add_root_meta_rsv(root, num_bytes, type);
				3279	return ret;
				3280	}
				3281
				3282	void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
				3283	{
				3284	struct btrfs_fs_info *fs_info = root->fs_info;
				3285
				3286	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) \|\|
				3287	!is_fstree(root->objectid))
				3288	return;
				3289
				3290	/* TODO: Update trace point to handle such free */
				3291	trace_qgroup_meta_free_all_pertrans(root);
				3292	/* Special value -1 means to free all reserved space */
				3293	btrfs_qgroup_free_refroot(fs_info, root->objectid, (u64)-1,
				3294	BTRFS_QGROUP_RSV_META_PERTRANS);
				3295	}
				3296
				3297	void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
				3298	enum btrfs_qgroup_rsv_type type)
				3299	{
				3300	struct btrfs_fs_info *fs_info = root->fs_info;
				3301
				3302	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) \|\|
				3303	!is_fstree(root->objectid))
				3304	return;
				3305
				3306	/*
				3307	* reservation for META_PREALLOC can happen before quota is enabled,
				3308	* which can lead to underflow.
				3309	* Here ensure we will only free what we really have reserved.
				3310	*/
				3311	num_bytes = sub_root_meta_rsv(root, num_bytes, type);
				3312	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
				3313	trace_qgroup_meta_reserve(root, -(s64)num_bytes, type);
				3314	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type);
				3315	}
				3316
				3317	static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
				3318	int num_bytes)
				3319	{
				3320	struct btrfs_root *quota_root = fs_info->quota_root;
				3321	struct btrfs_qgroup *qgroup;
				3322	struct ulist_node *unode;
				3323	struct ulist_iterator uiter;
				3324	int ret = 0;
				3325
				3326	if (num_bytes == 0)
				3327	return;
				3328	if (!quota_root)
				3329	return;
				3330
				3331	spin_lock(&fs_info->qgroup_lock);
				3332	qgroup = find_qgroup_rb(fs_info, ref_root);
				3333	if (!qgroup)
				3334	goto out;
				3335	ulist_reinit(fs_info->qgroup_ulist);
				3336	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
				3337	qgroup_to_aux(qgroup), GFP_ATOMIC);
				3338	if (ret < 0)
				3339	goto out;
				3340	ULIST_ITER_INIT(&uiter);
				3341	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
				3342	struct btrfs_qgroup *qg;
				3343	struct btrfs_qgroup_list *glist;
				3344
				3345	qg = unode_aux_to_qgroup(unode);
				3346
				3347	qgroup_rsv_release(fs_info, qg, num_bytes,
				3348	BTRFS_QGROUP_RSV_META_PREALLOC);
				3349	qgroup_rsv_add(fs_info, qg, num_bytes,
				3350	BTRFS_QGROUP_RSV_META_PERTRANS);
				3351	list_for_each_entry(glist, &qg->groups, next_group) {
				3352	ret = ulist_add(fs_info->qgroup_ulist,
				3353	glist->group->qgroupid,
				3354	qgroup_to_aux(glist->group), GFP_ATOMIC);
				3355	if (ret < 0)
				3356	goto out;
				3357	}
				3358	}
				3359	out:
				3360	spin_unlock(&fs_info->qgroup_lock);
				3361	}
				3362
				3363	void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
				3364	{
				3365	struct btrfs_fs_info *fs_info = root->fs_info;
				3366
				3367	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) \|\|
				3368	!is_fstree(root->objectid))
				3369	return;
				3370	/* Same as btrfs_qgroup_free_meta_prealloc() */
				3371	num_bytes = sub_root_meta_rsv(root, num_bytes,
				3372	BTRFS_QGROUP_RSV_META_PREALLOC);
				3373	trace_qgroup_meta_convert(root, num_bytes);
				3374	qgroup_convert_meta(fs_info, root->objectid, num_bytes);
				3375	}
				3376
				3377	/*
				3378	* Check qgroup reserved space leaking, normally at destroy inode
				3379	* time
				3380	*/
				3381	void btrfs_qgroup_check_reserved_leak(struct inode *inode)
				3382	{
				3383	struct extent_changeset changeset;
				3384	struct ulist_node *unode;
				3385	struct ulist_iterator iter;
				3386	int ret;
				3387
				3388	extent_changeset_init(&changeset);
				3389	ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
				3390	EXTENT_QGROUP_RESERVED, &changeset);
				3391
				3392	WARN_ON(ret < 0);
				3393	if (WARN_ON(changeset.bytes_changed)) {
				3394	ULIST_ITER_INIT(&iter);
				3395	while ((unode = ulist_next(&changeset.range_changed, &iter))) {
				3396	btrfs_warn(BTRFS_I(inode)->root->fs_info,
				3397	"leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
				3398	inode->i_ino, unode->val, unode->aux);
				3399	}
				3400	btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
				3401	BTRFS_I(inode)->root->objectid,
				3402	changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
				3403
				3404	}
				3405	extent_changeset_release(&changeset);
				3406	}