Blame - src/kernel/linux/v4.14/fs/btrfs/qgroup.c - T103

blob: 39a00b57ff016c1e334808123b39c33c192e1757 [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	/*
				2	* Copyright (C) 2011 STRATO. All rights reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public
				6	* License v2 as published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				11	* General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public
				14	* License along with this program; if not, write to the
				15	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				16	* Boston, MA 021110-1307, USA.
				17	*/
				18
				19	#include <linux/sched.h>
				20	#include <linux/pagemap.h>
				21	#include <linux/writeback.h>
				22	#include <linux/blkdev.h>
				23	#include <linux/rbtree.h>
				24	#include <linux/slab.h>
				25	#include <linux/workqueue.h>
				26	#include <linux/btrfs.h>
				27
				28	#include "ctree.h"
				29	#include "transaction.h"
				30	#include "disk-io.h"
				31	#include "locking.h"
				32	#include "ulist.h"
				33	#include "backref.h"
				34	#include "extent_io.h"
				35	#include "qgroup.h"
				36
				37
				38	/* TODO XXX FIXME
				39	* - subvol delete -> delete when ref goes to 0? delete limits also?
				40	* - reorganize keys
				41	* - compressed
				42	* - sync
				43	* - copy also limits on subvol creation
				44	* - limit
				45	* - caches fuer ulists
				46	* - performance benchmarks
				47	* - check all ioctl parameters
				48	*/
				49
				50	static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
				51	int mod)
				52	{
				53	if (qg->old_refcnt < seq)
				54	qg->old_refcnt = seq;
				55	qg->old_refcnt += mod;
				56	}
				57
				58	static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
				59	int mod)
				60	{
				61	if (qg->new_refcnt < seq)
				62	qg->new_refcnt = seq;
				63	qg->new_refcnt += mod;
				64	}
				65
				66	static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
				67	{
				68	if (qg->old_refcnt < seq)
				69	return 0;
				70	return qg->old_refcnt - seq;
				71	}
				72
				73	static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
				74	{
				75	if (qg->new_refcnt < seq)
				76	return 0;
				77	return qg->new_refcnt - seq;
				78	}
				79
				80	/*
				81	* glue structure to represent the relations between qgroups.
				82	*/
				83	struct btrfs_qgroup_list {
				84	struct list_head next_group;
				85	struct list_head next_member;
				86	struct btrfs_qgroup *group;
				87	struct btrfs_qgroup *member;
				88	};
				89
				90	static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg)
				91	{
				92	return (u64)(uintptr_t)qg;
				93	}
				94
				95	static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
				96	{
				97	return (struct btrfs_qgroup *)(uintptr_t)n->aux;
				98	}
				99
				100	static int
				101	qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
				102	int init_flags);
				103	static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
				104
				105	/* must be called with qgroup_ioctl_lock held */
				106	static struct btrfs_qgroup find_qgroup_rb(struct btrfs_fs_info fs_info,
				107	u64 qgroupid)
				108	{
				109	struct rb_node *n = fs_info->qgroup_tree.rb_node;
				110	struct btrfs_qgroup *qgroup;
				111
				112	while (n) {
				113	qgroup = rb_entry(n, struct btrfs_qgroup, node);
				114	if (qgroup->qgroupid < qgroupid)
				115	n = n->rb_left;
				116	else if (qgroup->qgroupid > qgroupid)
				117	n = n->rb_right;
				118	else
				119	return qgroup;
				120	}
				121	return NULL;
				122	}
				123
				124	/* must be called with qgroup_lock held */
				125	static struct btrfs_qgroup add_qgroup_rb(struct btrfs_fs_info fs_info,
				126	u64 qgroupid)
				127	{
				128	struct rb_node **p = &fs_info->qgroup_tree.rb_node;
				129	struct rb_node *parent = NULL;
				130	struct btrfs_qgroup *qgroup;
				131
				132	while (*p) {
				133	parent = *p;
				134	qgroup = rb_entry(parent, struct btrfs_qgroup, node);
				135
				136	if (qgroup->qgroupid < qgroupid)
				137	p = &(*p)->rb_left;
				138	else if (qgroup->qgroupid > qgroupid)
				139	p = &(*p)->rb_right;
				140	else
				141	return qgroup;
				142	}
				143
				144	qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
				145	if (!qgroup)
				146	return ERR_PTR(-ENOMEM);
				147
				148	qgroup->qgroupid = qgroupid;
				149	INIT_LIST_HEAD(&qgroup->groups);
				150	INIT_LIST_HEAD(&qgroup->members);
				151	INIT_LIST_HEAD(&qgroup->dirty);
				152
				153	rb_link_node(&qgroup->node, parent, p);
				154	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
				155
				156	return qgroup;
				157	}
				158
				159	static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
				160	{
				161	struct btrfs_qgroup_list *list;
				162
				163	list_del(&qgroup->dirty);
				164	while (!list_empty(&qgroup->groups)) {
				165	list = list_first_entry(&qgroup->groups,
				166	struct btrfs_qgroup_list, next_group);
				167	list_del(&list->next_group);
				168	list_del(&list->next_member);
				169	kfree(list);
				170	}
				171
				172	while (!list_empty(&qgroup->members)) {
				173	list = list_first_entry(&qgroup->members,
				174	struct btrfs_qgroup_list, next_member);
				175	list_del(&list->next_group);
				176	list_del(&list->next_member);
				177	kfree(list);
				178	}
				179	kfree(qgroup);
				180	}
				181
				182	/* must be called with qgroup_lock held */
				183	static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
				184	{
				185	struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
				186
				187	if (!qgroup)
				188	return -ENOENT;
				189
				190	rb_erase(&qgroup->node, &fs_info->qgroup_tree);
				191	__del_qgroup_rb(qgroup);
				192	return 0;
				193	}
				194
				195	/* must be called with qgroup_lock held */
				196	static int add_relation_rb(struct btrfs_fs_info *fs_info,
				197	u64 memberid, u64 parentid)
				198	{
				199	struct btrfs_qgroup *member;
				200	struct btrfs_qgroup *parent;
				201	struct btrfs_qgroup_list *list;
				202
				203	member = find_qgroup_rb(fs_info, memberid);
				204	parent = find_qgroup_rb(fs_info, parentid);
				205	if (!member \|\| !parent)
				206	return -ENOENT;
				207
				208	list = kzalloc(sizeof(*list), GFP_ATOMIC);
				209	if (!list)
				210	return -ENOMEM;
				211
				212	list->group = parent;
				213	list->member = member;
				214	list_add_tail(&list->next_group, &member->groups);
				215	list_add_tail(&list->next_member, &parent->members);
				216
				217	return 0;
				218	}
				219
				220	/* must be called with qgroup_lock held */
				221	static int del_relation_rb(struct btrfs_fs_info *fs_info,
				222	u64 memberid, u64 parentid)
				223	{
				224	struct btrfs_qgroup *member;
				225	struct btrfs_qgroup *parent;
				226	struct btrfs_qgroup_list *list;
				227
				228	member = find_qgroup_rb(fs_info, memberid);
				229	parent = find_qgroup_rb(fs_info, parentid);
				230	if (!member \|\| !parent)
				231	return -ENOENT;
				232
				233	list_for_each_entry(list, &member->groups, next_group) {
				234	if (list->group == parent) {
				235	list_del(&list->next_group);
				236	list_del(&list->next_member);
				237	kfree(list);
				238	return 0;
				239	}
				240	}
				241	return -ENOENT;
				242	}
				243
				244	#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
				245	int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
				246	u64 rfer, u64 excl)
				247	{
				248	struct btrfs_qgroup *qgroup;
				249
				250	qgroup = find_qgroup_rb(fs_info, qgroupid);
				251	if (!qgroup)
				252	return -EINVAL;
				253	if (qgroup->rfer != rfer \|\| qgroup->excl != excl)
				254	return -EINVAL;
				255	return 0;
				256	}
				257	#endif
				258
				259	/*
				260	* The full config is read in one go, only called from open_ctree()
				261	* It doesn't use any locking, as at this point we're still single-threaded
				262	*/
				263	int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
				264	{
				265	struct btrfs_key key;
				266	struct btrfs_key found_key;
				267	struct btrfs_root *quota_root = fs_info->quota_root;
				268	struct btrfs_path *path = NULL;
				269	struct extent_buffer *l;
				270	int slot;
				271	int ret = 0;
				272	u64 flags = 0;
				273	u64 rescan_progress = 0;
				274
				275	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				276	return 0;
				277
				278	fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
				279	if (!fs_info->qgroup_ulist) {
				280	ret = -ENOMEM;
				281	goto out;
				282	}
				283
				284	path = btrfs_alloc_path();
				285	if (!path) {
				286	ret = -ENOMEM;
				287	goto out;
				288	}
				289
				290	/* default this to quota off, in case no status key is found */
				291	fs_info->qgroup_flags = 0;
				292
				293	/*
				294	* pass 1: read status, all qgroup infos and limits
				295	*/
				296	key.objectid = 0;
				297	key.type = 0;
				298	key.offset = 0;
				299	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
				300	if (ret)
				301	goto out;
				302
				303	while (1) {
				304	struct btrfs_qgroup *qgroup;
				305
				306	slot = path->slots[0];
				307	l = path->nodes[0];
				308	btrfs_item_key_to_cpu(l, &found_key, slot);
				309
				310	if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
				311	struct btrfs_qgroup_status_item *ptr;
				312
				313	ptr = btrfs_item_ptr(l, slot,
				314	struct btrfs_qgroup_status_item);
				315
				316	if (btrfs_qgroup_status_version(l, ptr) !=
				317	BTRFS_QGROUP_STATUS_VERSION) {
				318	btrfs_err(fs_info,
				319	"old qgroup version, quota disabled");
				320	goto out;
				321	}
				322	if (btrfs_qgroup_status_generation(l, ptr) !=
				323	fs_info->generation) {
				324	flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				325	btrfs_err(fs_info,
				326	"qgroup generation mismatch, marked as inconsistent");
				327	}
				328	fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
				329	ptr);
				330	rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
				331	goto next1;
				332	}
				333
				334	if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
				335	found_key.type != BTRFS_QGROUP_LIMIT_KEY)
				336	goto next1;
				337
				338	qgroup = find_qgroup_rb(fs_info, found_key.offset);
				339	if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) \|\|
				340	(!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
				341	btrfs_err(fs_info, "inconsistent qgroup config");
				342	flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				343	}
				344	if (!qgroup) {
				345	qgroup = add_qgroup_rb(fs_info, found_key.offset);
				346	if (IS_ERR(qgroup)) {
				347	ret = PTR_ERR(qgroup);
				348	goto out;
				349	}
				350	}
				351	switch (found_key.type) {
				352	case BTRFS_QGROUP_INFO_KEY: {
				353	struct btrfs_qgroup_info_item *ptr;
				354
				355	ptr = btrfs_item_ptr(l, slot,
				356	struct btrfs_qgroup_info_item);
				357	qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
				358	qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
				359	qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
				360	qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
				361	/* generation currently unused */
				362	break;
				363	}
				364	case BTRFS_QGROUP_LIMIT_KEY: {
				365	struct btrfs_qgroup_limit_item *ptr;
				366
				367	ptr = btrfs_item_ptr(l, slot,
				368	struct btrfs_qgroup_limit_item);
				369	qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
				370	qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
				371	qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
				372	qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
				373	qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
				374	break;
				375	}
				376	}
				377	next1:
				378	ret = btrfs_next_item(quota_root, path);
				379	if (ret < 0)
				380	goto out;
				381	if (ret)
				382	break;
				383	}
				384	btrfs_release_path(path);
				385
				386	/*
				387	* pass 2: read all qgroup relations
				388	*/
				389	key.objectid = 0;
				390	key.type = BTRFS_QGROUP_RELATION_KEY;
				391	key.offset = 0;
				392	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
				393	if (ret)
				394	goto out;
				395	while (1) {
				396	slot = path->slots[0];
				397	l = path->nodes[0];
				398	btrfs_item_key_to_cpu(l, &found_key, slot);
				399
				400	if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
				401	goto next2;
				402
				403	if (found_key.objectid > found_key.offset) {
				404	/* parent <- member, not needed to build config */
				405	/* FIXME should we omit the key completely? */
				406	goto next2;
				407	}
				408
				409	ret = add_relation_rb(fs_info, found_key.objectid,
				410	found_key.offset);
				411	if (ret == -ENOENT) {
				412	btrfs_warn(fs_info,
				413	"orphan qgroup relation 0x%llx->0x%llx",
				414	found_key.objectid, found_key.offset);
				415	ret = 0; /* ignore the error */
				416	}
				417	if (ret)
				418	goto out;
				419	next2:
				420	ret = btrfs_next_item(quota_root, path);
				421	if (ret < 0)
				422	goto out;
				423	if (ret)
				424	break;
				425	}
				426	out:
				427	fs_info->qgroup_flags \|= flags;
				428	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
				429	clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
				430	else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
				431	ret >= 0)
				432	ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
				433	btrfs_free_path(path);
				434
				435	if (ret < 0) {
				436	ulist_free(fs_info->qgroup_ulist);
				437	fs_info->qgroup_ulist = NULL;
				438	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				439	}
				440
				441	return ret < 0 ? ret : 0;
				442	}
				443
				444	/*
				445	* This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
				446	* first two are in single-threaded paths.And for the third one, we have set
				447	* quota_root to be null with qgroup_lock held before, so it is safe to clean
				448	* up the in-memory structures without qgroup_lock held.
				449	*/
				450	void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
				451	{
				452	struct rb_node *n;
				453	struct btrfs_qgroup *qgroup;
				454
				455	while ((n = rb_first(&fs_info->qgroup_tree))) {
				456	qgroup = rb_entry(n, struct btrfs_qgroup, node);
				457	rb_erase(n, &fs_info->qgroup_tree);
				458	__del_qgroup_rb(qgroup);
				459	}
				460	/*
				461	* we call btrfs_free_qgroup_config() when umounting
				462	* filesystem and disabling quota, so we set qgroup_ulist
				463	* to be null here to avoid double free.
				464	*/
				465	ulist_free(fs_info->qgroup_ulist);
				466	fs_info->qgroup_ulist = NULL;
				467	}
				468
				469	static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
				470	struct btrfs_root *quota_root,
				471	u64 src, u64 dst)
				472	{
				473	int ret;
				474	struct btrfs_path *path;
				475	struct btrfs_key key;
				476
				477	path = btrfs_alloc_path();
				478	if (!path)
				479	return -ENOMEM;
				480
				481	key.objectid = src;
				482	key.type = BTRFS_QGROUP_RELATION_KEY;
				483	key.offset = dst;
				484
				485	ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
				486
				487	btrfs_mark_buffer_dirty(path->nodes[0]);
				488
				489	btrfs_free_path(path);
				490	return ret;
				491	}
				492
				493	static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
				494	struct btrfs_root *quota_root,
				495	u64 src, u64 dst)
				496	{
				497	int ret;
				498	struct btrfs_path *path;
				499	struct btrfs_key key;
				500
				501	path = btrfs_alloc_path();
				502	if (!path)
				503	return -ENOMEM;
				504
				505	key.objectid = src;
				506	key.type = BTRFS_QGROUP_RELATION_KEY;
				507	key.offset = dst;
				508
				509	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
				510	if (ret < 0)
				511	goto out;
				512
				513	if (ret > 0) {
				514	ret = -ENOENT;
				515	goto out;
				516	}
				517
				518	ret = btrfs_del_item(trans, quota_root, path);
				519	out:
				520	btrfs_free_path(path);
				521	return ret;
				522	}
				523
				524	static int add_qgroup_item(struct btrfs_trans_handle *trans,
				525	struct btrfs_root *quota_root, u64 qgroupid)
				526	{
				527	int ret;
				528	struct btrfs_path *path;
				529	struct btrfs_qgroup_info_item *qgroup_info;
				530	struct btrfs_qgroup_limit_item *qgroup_limit;
				531	struct extent_buffer *leaf;
				532	struct btrfs_key key;
				533
				534	if (btrfs_is_testing(quota_root->fs_info))
				535	return 0;
				536
				537	path = btrfs_alloc_path();
				538	if (!path)
				539	return -ENOMEM;
				540
				541	key.objectid = 0;
				542	key.type = BTRFS_QGROUP_INFO_KEY;
				543	key.offset = qgroupid;
				544
				545	/*
				546	* Avoid a transaction abort by catching -EEXIST here. In that
				547	* case, we proceed by re-initializing the existing structure
				548	* on disk.
				549	*/
				550
				551	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
				552	sizeof(*qgroup_info));
				553	if (ret && ret != -EEXIST)
				554	goto out;
				555
				556	leaf = path->nodes[0];
				557	qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
				558	struct btrfs_qgroup_info_item);
				559	btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
				560	btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
				561	btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
				562	btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
				563	btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
				564
				565	btrfs_mark_buffer_dirty(leaf);
				566
				567	btrfs_release_path(path);
				568
				569	key.type = BTRFS_QGROUP_LIMIT_KEY;
				570	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
				571	sizeof(*qgroup_limit));
				572	if (ret && ret != -EEXIST)
				573	goto out;
				574
				575	leaf = path->nodes[0];
				576	qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
				577	struct btrfs_qgroup_limit_item);
				578	btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
				579	btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
				580	btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
				581	btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
				582	btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
				583
				584	btrfs_mark_buffer_dirty(leaf);
				585
				586	ret = 0;
				587	out:
				588	btrfs_free_path(path);
				589	return ret;
				590	}
				591
				592	static int del_qgroup_item(struct btrfs_trans_handle *trans,
				593	struct btrfs_root *quota_root, u64 qgroupid)
				594	{
				595	int ret;
				596	struct btrfs_path *path;
				597	struct btrfs_key key;
				598
				599	path = btrfs_alloc_path();
				600	if (!path)
				601	return -ENOMEM;
				602
				603	key.objectid = 0;
				604	key.type = BTRFS_QGROUP_INFO_KEY;
				605	key.offset = qgroupid;
				606	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
				607	if (ret < 0)
				608	goto out;
				609
				610	if (ret > 0) {
				611	ret = -ENOENT;
				612	goto out;
				613	}
				614
				615	ret = btrfs_del_item(trans, quota_root, path);
				616	if (ret)
				617	goto out;
				618
				619	btrfs_release_path(path);
				620
				621	key.type = BTRFS_QGROUP_LIMIT_KEY;
				622	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
				623	if (ret < 0)
				624	goto out;
				625
				626	if (ret > 0) {
				627	ret = -ENOENT;
				628	goto out;
				629	}
				630
				631	ret = btrfs_del_item(trans, quota_root, path);
				632
				633	out:
				634	btrfs_free_path(path);
				635	return ret;
				636	}
				637
				638	static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
				639	struct btrfs_root *root,
				640	struct btrfs_qgroup *qgroup)
				641	{
				642	struct btrfs_path *path;
				643	struct btrfs_key key;
				644	struct extent_buffer *l;
				645	struct btrfs_qgroup_limit_item *qgroup_limit;
				646	int ret;
				647	int slot;
				648
				649	key.objectid = 0;
				650	key.type = BTRFS_QGROUP_LIMIT_KEY;
				651	key.offset = qgroup->qgroupid;
				652
				653	path = btrfs_alloc_path();
				654	if (!path)
				655	return -ENOMEM;
				656
				657	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
				658	if (ret > 0)
				659	ret = -ENOENT;
				660
				661	if (ret)
				662	goto out;
				663
				664	l = path->nodes[0];
				665	slot = path->slots[0];
				666	qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
				667	btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
				668	btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
				669	btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
				670	btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
				671	btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
				672
				673	btrfs_mark_buffer_dirty(l);
				674
				675	out:
				676	btrfs_free_path(path);
				677	return ret;
				678	}
				679
				680	static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
				681	struct btrfs_root *root,
				682	struct btrfs_qgroup *qgroup)
				683	{
				684	struct btrfs_path *path;
				685	struct btrfs_key key;
				686	struct extent_buffer *l;
				687	struct btrfs_qgroup_info_item *qgroup_info;
				688	int ret;
				689	int slot;
				690
				691	if (btrfs_is_testing(root->fs_info))
				692	return 0;
				693
				694	key.objectid = 0;
				695	key.type = BTRFS_QGROUP_INFO_KEY;
				696	key.offset = qgroup->qgroupid;
				697
				698	path = btrfs_alloc_path();
				699	if (!path)
				700	return -ENOMEM;
				701
				702	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
				703	if (ret > 0)
				704	ret = -ENOENT;
				705
				706	if (ret)
				707	goto out;
				708
				709	l = path->nodes[0];
				710	slot = path->slots[0];
				711	qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
				712	btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
				713	btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
				714	btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
				715	btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
				716	btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
				717
				718	btrfs_mark_buffer_dirty(l);
				719
				720	out:
				721	btrfs_free_path(path);
				722	return ret;
				723	}
				724
				725	static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
				726	{
				727	struct btrfs_fs_info *fs_info = trans->fs_info;
				728	struct btrfs_root *quota_root = fs_info->quota_root;
				729	struct btrfs_path *path;
				730	struct btrfs_key key;
				731	struct extent_buffer *l;
				732	struct btrfs_qgroup_status_item *ptr;
				733	int ret;
				734	int slot;
				735
				736	key.objectid = 0;
				737	key.type = BTRFS_QGROUP_STATUS_KEY;
				738	key.offset = 0;
				739
				740	path = btrfs_alloc_path();
				741	if (!path)
				742	return -ENOMEM;
				743
				744	ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
				745	if (ret > 0)
				746	ret = -ENOENT;
				747
				748	if (ret)
				749	goto out;
				750
				751	l = path->nodes[0];
				752	slot = path->slots[0];
				753	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
				754	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
				755	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
				756	btrfs_set_qgroup_status_rescan(l, ptr,
				757	fs_info->qgroup_rescan_progress.objectid);
				758
				759	btrfs_mark_buffer_dirty(l);
				760
				761	out:
				762	btrfs_free_path(path);
				763	return ret;
				764	}
				765
				766	/*
				767	* called with qgroup_lock held
				768	*/
				769	static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
				770	struct btrfs_root *root)
				771	{
				772	struct btrfs_path *path;
				773	struct btrfs_key key;
				774	struct extent_buffer *leaf = NULL;
				775	int ret;
				776	int nr = 0;
				777
				778	path = btrfs_alloc_path();
				779	if (!path)
				780	return -ENOMEM;
				781
				782	path->leave_spinning = 1;
				783
				784	key.objectid = 0;
				785	key.offset = 0;
				786	key.type = 0;
				787
				788	while (1) {
				789	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				790	if (ret < 0)
				791	goto out;
				792	leaf = path->nodes[0];
				793	nr = btrfs_header_nritems(leaf);
				794	if (!nr)
				795	break;
				796	/*
				797	* delete the leaf one by one
				798	* since the whole tree is going
				799	* to be deleted.
				800	*/
				801	path->slots[0] = 0;
				802	ret = btrfs_del_items(trans, root, path, 0, nr);
				803	if (ret)
				804	goto out;
				805
				806	btrfs_release_path(path);
				807	}
				808	ret = 0;
				809	out:
				810	btrfs_free_path(path);
				811	return ret;
				812	}
				813
				814	int btrfs_quota_enable(struct btrfs_trans_handle *trans,
				815	struct btrfs_fs_info *fs_info)
				816	{
				817	struct btrfs_root *quota_root;
				818	struct btrfs_root *tree_root = fs_info->tree_root;
				819	struct btrfs_path *path = NULL;
				820	struct btrfs_qgroup_status_item *ptr;
				821	struct extent_buffer *leaf;
				822	struct btrfs_key key;
				823	struct btrfs_key found_key;
				824	struct btrfs_qgroup *qgroup = NULL;
				825	int ret = 0;
				826	int slot;
				827
				828	mutex_lock(&fs_info->qgroup_ioctl_lock);
				829	if (fs_info->quota_root) {
				830	set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags);
				831	goto out;
				832	}
				833
				834	fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
				835	if (!fs_info->qgroup_ulist) {
				836	ret = -ENOMEM;
				837	goto out;
				838	}
				839
				840	/*
				841	* initially create the quota tree
				842	*/
				843	quota_root = btrfs_create_tree(trans, fs_info,
				844	BTRFS_QUOTA_TREE_OBJECTID);
				845	if (IS_ERR(quota_root)) {
				846	ret = PTR_ERR(quota_root);
				847	goto out;
				848	}
				849
				850	path = btrfs_alloc_path();
				851	if (!path) {
				852	ret = -ENOMEM;
				853	goto out_free_root;
				854	}
				855
				856	key.objectid = 0;
				857	key.type = BTRFS_QGROUP_STATUS_KEY;
				858	key.offset = 0;
				859
				860	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
				861	sizeof(*ptr));
				862	if (ret)
				863	goto out_free_path;
				864
				865	leaf = path->nodes[0];
				866	ptr = btrfs_item_ptr(leaf, path->slots[0],
				867	struct btrfs_qgroup_status_item);
				868	btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
				869	btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
				870	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON \|
				871	BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				872	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
				873	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
				874
				875	btrfs_mark_buffer_dirty(leaf);
				876
				877	key.objectid = 0;
				878	key.type = BTRFS_ROOT_REF_KEY;
				879	key.offset = 0;
				880
				881	btrfs_release_path(path);
				882	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
				883	if (ret > 0)
				884	goto out_add_root;
				885	if (ret < 0)
				886	goto out_free_path;
				887
				888
				889	while (1) {
				890	slot = path->slots[0];
				891	leaf = path->nodes[0];
				892	btrfs_item_key_to_cpu(leaf, &found_key, slot);
				893
				894	if (found_key.type == BTRFS_ROOT_REF_KEY) {
				895	ret = add_qgroup_item(trans, quota_root,
				896	found_key.offset);
				897	if (ret)
				898	goto out_free_path;
				899
				900	qgroup = add_qgroup_rb(fs_info, found_key.offset);
				901	if (IS_ERR(qgroup)) {
				902	ret = PTR_ERR(qgroup);
				903	goto out_free_path;
				904	}
				905	}
				906	ret = btrfs_next_item(tree_root, path);
				907	if (ret < 0)
				908	goto out_free_path;
				909	if (ret)
				910	break;
				911	}
				912
				913	out_add_root:
				914	btrfs_release_path(path);
				915	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
				916	if (ret)
				917	goto out_free_path;
				918
				919	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
				920	if (IS_ERR(qgroup)) {
				921	ret = PTR_ERR(qgroup);
				922	goto out_free_path;
				923	}
				924	spin_lock(&fs_info->qgroup_lock);
				925	fs_info->quota_root = quota_root;
				926	set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags);
				927	spin_unlock(&fs_info->qgroup_lock);
				928	out_free_path:
				929	btrfs_free_path(path);
				930	out_free_root:
				931	if (ret) {
				932	free_extent_buffer(quota_root->node);
				933	free_extent_buffer(quota_root->commit_root);
				934	kfree(quota_root);
				935	}
				936	out:
				937	if (ret) {
				938	ulist_free(fs_info->qgroup_ulist);
				939	fs_info->qgroup_ulist = NULL;
				940	}
				941	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				942	return ret;
				943	}
				944
				945	int btrfs_quota_disable(struct btrfs_trans_handle *trans,
				946	struct btrfs_fs_info *fs_info)
				947	{
				948	struct btrfs_root *quota_root;
				949	int ret = 0;
				950
				951	mutex_lock(&fs_info->qgroup_ioctl_lock);
				952	if (!fs_info->quota_root)
				953	goto out;
				954	clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
				955	btrfs_qgroup_wait_for_completion(fs_info, false);
				956	spin_lock(&fs_info->qgroup_lock);
				957	quota_root = fs_info->quota_root;
				958	fs_info->quota_root = NULL;
				959	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
				960	spin_unlock(&fs_info->qgroup_lock);
				961
				962	btrfs_free_qgroup_config(fs_info);
				963
				964	ret = btrfs_clean_quota_tree(trans, quota_root);
				965	if (ret)
				966	goto out;
				967
				968	ret = btrfs_del_root(trans, fs_info, &quota_root->root_key);
				969	if (ret)
				970	goto out;
				971
				972	list_del(&quota_root->dirty_list);
				973
				974	btrfs_tree_lock(quota_root->node);
				975	clean_tree_block(fs_info, quota_root->node);
				976	btrfs_tree_unlock(quota_root->node);
				977	btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
				978
				979	free_extent_buffer(quota_root->node);
				980	free_extent_buffer(quota_root->commit_root);
				981	kfree(quota_root);
				982	out:
				983	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				984	return ret;
				985	}
				986
				987	static void qgroup_dirty(struct btrfs_fs_info *fs_info,
				988	struct btrfs_qgroup *qgroup)
				989	{
				990	if (list_empty(&qgroup->dirty))
				991	list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
				992	}
				993
				994	static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
				995	struct btrfs_qgroup *qgroup,
				996	u64 num_bytes)
				997	{
				998	#ifdef CONFIG_BTRFS_DEBUG
				999	WARN_ON(qgroup->reserved < num_bytes);
				1000	btrfs_debug(fs_info,
				1001	"qgroup %llu reserved space underflow, have: %llu, to free: %llu",
				1002	qgroup->qgroupid, qgroup->reserved, num_bytes);
				1003	#endif
				1004	qgroup->reserved = 0;
				1005	}
				1006	/*
				1007	* The easy accounting, if we are adding/removing the only ref for an extent
				1008	* then this qgroup and all of the parent qgroups get their reference and
				1009	* exclusive counts adjusted.
				1010	*
				1011	* Caller should hold fs_info->qgroup_lock.
				1012	*/
				1013	static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
				1014	struct ulist *tmp, u64 ref_root,
				1015	u64 num_bytes, int sign)
				1016	{
				1017	struct btrfs_qgroup *qgroup;
				1018	struct btrfs_qgroup_list *glist;
				1019	struct ulist_node *unode;
				1020	struct ulist_iterator uiter;
				1021	int ret = 0;
				1022
				1023	qgroup = find_qgroup_rb(fs_info, ref_root);
				1024	if (!qgroup)
				1025	goto out;
				1026
				1027	qgroup->rfer += sign * num_bytes;
				1028	qgroup->rfer_cmpr += sign * num_bytes;
				1029
				1030	WARN_ON(sign < 0 && qgroup->excl < num_bytes);
				1031	qgroup->excl += sign * num_bytes;
				1032	qgroup->excl_cmpr += sign * num_bytes;
				1033	if (sign > 0) {
				1034	trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
				1035	if (qgroup->reserved < num_bytes)
				1036	report_reserved_underflow(fs_info, qgroup, num_bytes);
				1037	else
				1038	qgroup->reserved -= num_bytes;
				1039	}
				1040
				1041	qgroup_dirty(fs_info, qgroup);
				1042
				1043	/* Get all of the parent groups that contain this qgroup */
				1044	list_for_each_entry(glist, &qgroup->groups, next_group) {
				1045	ret = ulist_add(tmp, glist->group->qgroupid,
				1046	qgroup_to_aux(glist->group), GFP_ATOMIC);
				1047	if (ret < 0)
				1048	goto out;
				1049	}
				1050
				1051	/* Iterate all of the parents and adjust their reference counts */
				1052	ULIST_ITER_INIT(&uiter);
				1053	while ((unode = ulist_next(tmp, &uiter))) {
				1054	qgroup = unode_aux_to_qgroup(unode);
				1055	qgroup->rfer += sign * num_bytes;
				1056	qgroup->rfer_cmpr += sign * num_bytes;
				1057	WARN_ON(sign < 0 && qgroup->excl < num_bytes);
				1058	qgroup->excl += sign * num_bytes;
				1059	if (sign > 0) {
				1060	trace_qgroup_update_reserve(fs_info, qgroup,
				1061	-(s64)num_bytes);
				1062	if (qgroup->reserved < num_bytes)
				1063	report_reserved_underflow(fs_info, qgroup,
				1064	num_bytes);
				1065	else
				1066	qgroup->reserved -= num_bytes;
				1067	}
				1068	qgroup->excl_cmpr += sign * num_bytes;
				1069	qgroup_dirty(fs_info, qgroup);
				1070
				1071	/* Add any parents of the parents */
				1072	list_for_each_entry(glist, &qgroup->groups, next_group) {
				1073	ret = ulist_add(tmp, glist->group->qgroupid,
				1074	qgroup_to_aux(glist->group), GFP_ATOMIC);
				1075	if (ret < 0)
				1076	goto out;
				1077	}
				1078	}
				1079	ret = 0;
				1080	out:
				1081	return ret;
				1082	}
				1083
				1084
				1085	/*
				1086	* Quick path for updating qgroup with only excl refs.
				1087	*
				1088	* In that case, just update all parent will be enough.
				1089	* Or we needs to do a full rescan.
				1090	* Caller should also hold fs_info->qgroup_lock.
				1091	*
				1092	* Return 0 for quick update, return >0 for need to full rescan
				1093	* and mark INCONSISTENT flag.
				1094	* Return < 0 for other error.
				1095	*/
				1096	static int quick_update_accounting(struct btrfs_fs_info *fs_info,
				1097	struct ulist *tmp, u64 src, u64 dst,
				1098	int sign)
				1099	{
				1100	struct btrfs_qgroup *qgroup;
				1101	int ret = 1;
				1102	int err = 0;
				1103
				1104	qgroup = find_qgroup_rb(fs_info, src);
				1105	if (!qgroup)
				1106	goto out;
				1107	if (qgroup->excl == qgroup->rfer) {
				1108	ret = 0;
				1109	err = __qgroup_excl_accounting(fs_info, tmp, dst,
				1110	qgroup->excl, sign);
				1111	if (err < 0) {
				1112	ret = err;
				1113	goto out;
				1114	}
				1115	}
				1116	out:
				1117	if (ret)
				1118	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				1119	return ret;
				1120	}
				1121
				1122	int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
				1123	struct btrfs_fs_info *fs_info, u64 src, u64 dst)
				1124	{
				1125	struct btrfs_root *quota_root;
				1126	struct btrfs_qgroup *parent;
				1127	struct btrfs_qgroup *member;
				1128	struct btrfs_qgroup_list *list;
				1129	struct ulist *tmp;
				1130	int ret = 0;
				1131
				1132	/* Check the level of src and dst first */
				1133	if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
				1134	return -EINVAL;
				1135
				1136	tmp = ulist_alloc(GFP_KERNEL);
				1137	if (!tmp)
				1138	return -ENOMEM;
				1139
				1140	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1141	quota_root = fs_info->quota_root;
				1142	if (!quota_root) {
				1143	ret = -EINVAL;
				1144	goto out;
				1145	}
				1146	member = find_qgroup_rb(fs_info, src);
				1147	parent = find_qgroup_rb(fs_info, dst);
				1148	if (!member \|\| !parent) {
				1149	ret = -EINVAL;
				1150	goto out;
				1151	}
				1152
				1153	/* check if such qgroup relation exist firstly */
				1154	list_for_each_entry(list, &member->groups, next_group) {
				1155	if (list->group == parent) {
				1156	ret = -EEXIST;
				1157	goto out;
				1158	}
				1159	}
				1160
				1161	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
				1162	if (ret)
				1163	goto out;
				1164
				1165	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
				1166	if (ret) {
				1167	del_qgroup_relation_item(trans, quota_root, src, dst);
				1168	goto out;
				1169	}
				1170
				1171	spin_lock(&fs_info->qgroup_lock);
				1172	ret = add_relation_rb(fs_info, src, dst);
				1173	if (ret < 0) {
				1174	spin_unlock(&fs_info->qgroup_lock);
				1175	goto out;
				1176	}
				1177	ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
				1178	spin_unlock(&fs_info->qgroup_lock);
				1179	out:
				1180	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1181	ulist_free(tmp);
				1182	return ret;
				1183	}
				1184
				1185	static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
				1186	struct btrfs_fs_info *fs_info, u64 src, u64 dst)
				1187	{
				1188	struct btrfs_root *quota_root;
				1189	struct btrfs_qgroup *parent;
				1190	struct btrfs_qgroup *member;
				1191	struct btrfs_qgroup_list *list;
				1192	struct ulist *tmp;
				1193	int ret = 0;
				1194	int err;
				1195
				1196	tmp = ulist_alloc(GFP_KERNEL);
				1197	if (!tmp)
				1198	return -ENOMEM;
				1199
				1200	quota_root = fs_info->quota_root;
				1201	if (!quota_root) {
				1202	ret = -EINVAL;
				1203	goto out;
				1204	}
				1205
				1206	member = find_qgroup_rb(fs_info, src);
				1207	parent = find_qgroup_rb(fs_info, dst);
				1208	if (!member \|\| !parent) {
				1209	ret = -EINVAL;
				1210	goto out;
				1211	}
				1212
				1213	/* check if such qgroup relation exist firstly */
				1214	list_for_each_entry(list, &member->groups, next_group) {
				1215	if (list->group == parent)
				1216	goto exist;
				1217	}
				1218	ret = -ENOENT;
				1219	goto out;
				1220	exist:
				1221	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
				1222	err = del_qgroup_relation_item(trans, quota_root, dst, src);
				1223	if (err && !ret)
				1224	ret = err;
				1225
				1226	spin_lock(&fs_info->qgroup_lock);
				1227	del_relation_rb(fs_info, src, dst);
				1228	ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
				1229	spin_unlock(&fs_info->qgroup_lock);
				1230	out:
				1231	ulist_free(tmp);
				1232	return ret;
				1233	}
				1234
				1235	int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
				1236	struct btrfs_fs_info *fs_info, u64 src, u64 dst)
				1237	{
				1238	int ret = 0;
				1239
				1240	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1241	ret = __del_qgroup_relation(trans, fs_info, src, dst);
				1242	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1243
				1244	return ret;
				1245	}
				1246
				1247	int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
				1248	struct btrfs_fs_info *fs_info, u64 qgroupid)
				1249	{
				1250	struct btrfs_root *quota_root;
				1251	struct btrfs_qgroup *qgroup;
				1252	int ret = 0;
				1253
				1254	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1255	quota_root = fs_info->quota_root;
				1256	if (!quota_root) {
				1257	ret = -EINVAL;
				1258	goto out;
				1259	}
				1260	qgroup = find_qgroup_rb(fs_info, qgroupid);
				1261	if (qgroup) {
				1262	ret = -EEXIST;
				1263	goto out;
				1264	}
				1265
				1266	ret = add_qgroup_item(trans, quota_root, qgroupid);
				1267	if (ret)
				1268	goto out;
				1269
				1270	spin_lock(&fs_info->qgroup_lock);
				1271	qgroup = add_qgroup_rb(fs_info, qgroupid);
				1272	spin_unlock(&fs_info->qgroup_lock);
				1273
				1274	if (IS_ERR(qgroup))
				1275	ret = PTR_ERR(qgroup);
				1276	out:
				1277	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1278	return ret;
				1279	}
				1280
				1281	int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
				1282	struct btrfs_fs_info *fs_info, u64 qgroupid)
				1283	{
				1284	struct btrfs_root *quota_root;
				1285	struct btrfs_qgroup *qgroup;
				1286	struct btrfs_qgroup_list *list;
				1287	int ret = 0;
				1288
				1289	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1290	quota_root = fs_info->quota_root;
				1291	if (!quota_root) {
				1292	ret = -EINVAL;
				1293	goto out;
				1294	}
				1295
				1296	qgroup = find_qgroup_rb(fs_info, qgroupid);
				1297	if (!qgroup) {
				1298	ret = -ENOENT;
				1299	goto out;
				1300	} else {
				1301	/* check if there are no children of this qgroup */
				1302	if (!list_empty(&qgroup->members)) {
				1303	ret = -EBUSY;
				1304	goto out;
				1305	}
				1306	}
				1307	ret = del_qgroup_item(trans, quota_root, qgroupid);
				1308	if (ret && ret != -ENOENT)
				1309	goto out;
				1310
				1311	while (!list_empty(&qgroup->groups)) {
				1312	list = list_first_entry(&qgroup->groups,
				1313	struct btrfs_qgroup_list, next_group);
				1314	ret = __del_qgroup_relation(trans, fs_info,
				1315	qgroupid,
				1316	list->group->qgroupid);
				1317	if (ret)
				1318	goto out;
				1319	}
				1320
				1321	spin_lock(&fs_info->qgroup_lock);
				1322	del_qgroup_rb(fs_info, qgroupid);
				1323	spin_unlock(&fs_info->qgroup_lock);
				1324	out:
				1325	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1326	return ret;
				1327	}
				1328
				1329	int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
				1330	struct btrfs_fs_info *fs_info, u64 qgroupid,
				1331	struct btrfs_qgroup_limit *limit)
				1332	{
				1333	struct btrfs_root *quota_root;
				1334	struct btrfs_qgroup *qgroup;
				1335	int ret = 0;
				1336	/* Sometimes we would want to clear the limit on this qgroup.
				1337	* To meet this requirement, we treat the -1 as a special value
				1338	* which tell kernel to clear the limit on this qgroup.
				1339	*/
				1340	const u64 CLEAR_VALUE = -1;
				1341
				1342	mutex_lock(&fs_info->qgroup_ioctl_lock);
				1343	quota_root = fs_info->quota_root;
				1344	if (!quota_root) {
				1345	ret = -EINVAL;
				1346	goto out;
				1347	}
				1348
				1349	qgroup = find_qgroup_rb(fs_info, qgroupid);
				1350	if (!qgroup) {
				1351	ret = -ENOENT;
				1352	goto out;
				1353	}
				1354
				1355	spin_lock(&fs_info->qgroup_lock);
				1356	if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
				1357	if (limit->max_rfer == CLEAR_VALUE) {
				1358	qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
				1359	limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
				1360	qgroup->max_rfer = 0;
				1361	} else {
				1362	qgroup->max_rfer = limit->max_rfer;
				1363	}
				1364	}
				1365	if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
				1366	if (limit->max_excl == CLEAR_VALUE) {
				1367	qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
				1368	limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
				1369	qgroup->max_excl = 0;
				1370	} else {
				1371	qgroup->max_excl = limit->max_excl;
				1372	}
				1373	}
				1374	if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
				1375	if (limit->rsv_rfer == CLEAR_VALUE) {
				1376	qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
				1377	limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
				1378	qgroup->rsv_rfer = 0;
				1379	} else {
				1380	qgroup->rsv_rfer = limit->rsv_rfer;
				1381	}
				1382	}
				1383	if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
				1384	if (limit->rsv_excl == CLEAR_VALUE) {
				1385	qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
				1386	limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
				1387	qgroup->rsv_excl = 0;
				1388	} else {
				1389	qgroup->rsv_excl = limit->rsv_excl;
				1390	}
				1391	}
				1392	qgroup->lim_flags \|= limit->flags;
				1393
				1394	spin_unlock(&fs_info->qgroup_lock);
				1395
				1396	ret = update_qgroup_limit_item(trans, quota_root, qgroup);
				1397	if (ret) {
				1398	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				1399	btrfs_info(fs_info, "unable to update quota limit for %llu",
				1400	qgroupid);
				1401	}
				1402
				1403	out:
				1404	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				1405	return ret;
				1406	}
				1407
				1408	int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
				1409	struct btrfs_delayed_ref_root *delayed_refs,
				1410	struct btrfs_qgroup_extent_record *record)
				1411	{
				1412	struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
				1413	struct rb_node *parent_node = NULL;
				1414	struct btrfs_qgroup_extent_record *entry;
				1415	u64 bytenr = record->bytenr;
				1416
				1417	assert_spin_locked(&delayed_refs->lock);
				1418	trace_btrfs_qgroup_trace_extent(fs_info, record);
				1419
				1420	while (*p) {
				1421	parent_node = *p;
				1422	entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
				1423	node);
				1424	if (bytenr < entry->bytenr)
				1425	p = &(*p)->rb_left;
				1426	else if (bytenr > entry->bytenr)
				1427	p = &(*p)->rb_right;
				1428	else
				1429	return 1;
				1430	}
				1431
				1432	rb_link_node(&record->node, parent_node, p);
				1433	rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
				1434	return 0;
				1435	}
				1436
				1437	int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
				1438	struct btrfs_qgroup_extent_record *qrecord)
				1439	{
				1440	struct ulist *old_root;
				1441	u64 bytenr = qrecord->bytenr;
				1442	int ret;
				1443
				1444	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
				1445	if (ret < 0)
				1446	return ret;
				1447
				1448	/*
				1449	* Here we don't need to get the lock of
				1450	* trans->transaction->delayed_refs, since inserted qrecord won't
				1451	* be deleted, only qrecord->node may be modified (new qrecord insert)
				1452	*
				1453	* So modifying qrecord->old_roots is safe here
				1454	*/
				1455	qrecord->old_roots = old_root;
				1456	return 0;
				1457	}
				1458
				1459	int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
				1460	struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
				1461	gfp_t gfp_flag)
				1462	{
				1463	struct btrfs_qgroup_extent_record *record;
				1464	struct btrfs_delayed_ref_root *delayed_refs;
				1465	int ret;
				1466
				1467	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
				1468	\|\| bytenr == 0 \|\| num_bytes == 0)
				1469	return 0;
				1470	if (WARN_ON(trans == NULL))
				1471	return -EINVAL;
				1472	record = kmalloc(sizeof(*record), gfp_flag);
				1473	if (!record)
				1474	return -ENOMEM;
				1475
				1476	delayed_refs = &trans->transaction->delayed_refs;
				1477	record->bytenr = bytenr;
				1478	record->num_bytes = num_bytes;
				1479	record->old_roots = NULL;
				1480
				1481	spin_lock(&delayed_refs->lock);
				1482	ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
				1483	spin_unlock(&delayed_refs->lock);
				1484	if (ret > 0) {
				1485	kfree(record);
				1486	return 0;
				1487	}
				1488	return btrfs_qgroup_trace_extent_post(fs_info, record);
				1489	}
				1490
				1491	int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
				1492	struct btrfs_fs_info *fs_info,
				1493	struct extent_buffer *eb)
				1494	{
				1495	int nr = btrfs_header_nritems(eb);
				1496	int i, extent_type, ret;
				1497	struct btrfs_key key;
				1498	struct btrfs_file_extent_item *fi;
				1499	u64 bytenr, num_bytes;
				1500
				1501	/* We can be called directly from walk_up_proc() */
				1502	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				1503	return 0;
				1504
				1505	for (i = 0; i < nr; i++) {
				1506	btrfs_item_key_to_cpu(eb, &key, i);
				1507
				1508	if (key.type != BTRFS_EXTENT_DATA_KEY)
				1509	continue;
				1510
				1511	fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
				1512	/* filter out non qgroup-accountable extents */
				1513	extent_type = btrfs_file_extent_type(eb, fi);
				1514
				1515	if (extent_type == BTRFS_FILE_EXTENT_INLINE)
				1516	continue;
				1517
				1518	bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
				1519	if (!bytenr)
				1520	continue;
				1521
				1522	num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
				1523
				1524	ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr,
				1525	num_bytes, GFP_NOFS);
				1526	if (ret)
				1527	return ret;
				1528	}
				1529	cond_resched();
				1530	return 0;
				1531	}
				1532
				1533	/*
				1534	* Walk up the tree from the bottom, freeing leaves and any interior
				1535	* nodes which have had all slots visited. If a node (leaf or
				1536	* interior) is freed, the node above it will have it's slot
				1537	* incremented. The root node will never be freed.
				1538	*
				1539	* At the end of this function, we should have a path which has all
				1540	* slots incremented to the next position for a search. If we need to
				1541	* read a new node it will be NULL and the node above it will have the
				1542	* correct slot selected for a later read.
				1543	*
				1544	* If we increment the root nodes slot counter past the number of
				1545	* elements, 1 is returned to signal completion of the search.
				1546	*/
				1547	static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
				1548	{
				1549	int level = 0;
				1550	int nr, slot;
				1551	struct extent_buffer *eb;
				1552
				1553	if (root_level == 0)
				1554	return 1;
				1555
				1556	while (level <= root_level) {
				1557	eb = path->nodes[level];
				1558	nr = btrfs_header_nritems(eb);
				1559	path->slots[level]++;
				1560	slot = path->slots[level];
				1561	if (slot >= nr \|\| level == 0) {
				1562	/*
				1563	* Don't free the root - we will detect this
				1564	* condition after our loop and return a
				1565	* positive value for caller to stop walking the tree.
				1566	*/
				1567	if (level != root_level) {
				1568	btrfs_tree_unlock_rw(eb, path->locks[level]);
				1569	path->locks[level] = 0;
				1570
				1571	free_extent_buffer(eb);
				1572	path->nodes[level] = NULL;
				1573	path->slots[level] = 0;
				1574	}
				1575	} else {
				1576	/*
				1577	* We have a valid slot to walk back down
				1578	* from. Stop here so caller can process these
				1579	* new nodes.
				1580	*/
				1581	break;
				1582	}
				1583
				1584	level++;
				1585	}
				1586
				1587	eb = path->nodes[root_level];
				1588	if (path->slots[root_level] >= btrfs_header_nritems(eb))
				1589	return 1;
				1590
				1591	return 0;
				1592	}
				1593
				1594	int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
				1595	struct btrfs_root *root,
				1596	struct extent_buffer *root_eb,
				1597	u64 root_gen, int root_level)
				1598	{
				1599	struct btrfs_fs_info *fs_info = root->fs_info;
				1600	int ret = 0;
				1601	int level;
				1602	struct extent_buffer *eb = root_eb;
				1603	struct btrfs_path *path = NULL;
				1604
				1605	BUG_ON(root_level < 0 \|\| root_level >= BTRFS_MAX_LEVEL);
				1606	BUG_ON(root_eb == NULL);
				1607
				1608	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				1609	return 0;
				1610
				1611	if (!extent_buffer_uptodate(root_eb)) {
				1612	ret = btrfs_read_buffer(root_eb, root_gen);
				1613	if (ret)
				1614	goto out;
				1615	}
				1616
				1617	if (root_level == 0) {
				1618	ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb);
				1619	goto out;
				1620	}
				1621
				1622	path = btrfs_alloc_path();
				1623	if (!path)
				1624	return -ENOMEM;
				1625
				1626	/*
				1627	* Walk down the tree. Missing extent blocks are filled in as
				1628	* we go. Metadata is accounted every time we read a new
				1629	* extent block.
				1630	*
				1631	* When we reach a leaf, we account for file extent items in it,
				1632	* walk back up the tree (adjusting slot pointers as we go)
				1633	* and restart the search process.
				1634	*/
				1635	extent_buffer_get(root_eb); /* For path */
				1636	path->nodes[root_level] = root_eb;
				1637	path->slots[root_level] = 0;
				1638	path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
				1639	walk_down:
				1640	level = root_level;
				1641	while (level >= 0) {
				1642	if (path->nodes[level] == NULL) {
				1643	int parent_slot;
				1644	u64 child_gen;
				1645	u64 child_bytenr;
				1646
				1647	/*
				1648	* We need to get child blockptr/gen from parent before
				1649	* we can read it.
				1650	*/
				1651	eb = path->nodes[level + 1];
				1652	parent_slot = path->slots[level + 1];
				1653	child_bytenr = btrfs_node_blockptr(eb, parent_slot);
				1654	child_gen = btrfs_node_ptr_generation(eb, parent_slot);
				1655
				1656	eb = read_tree_block(fs_info, child_bytenr, child_gen);
				1657	if (IS_ERR(eb)) {
				1658	ret = PTR_ERR(eb);
				1659	goto out;
				1660	} else if (!extent_buffer_uptodate(eb)) {
				1661	free_extent_buffer(eb);
				1662	ret = -EIO;
				1663	goto out;
				1664	}
				1665
				1666	path->nodes[level] = eb;
				1667	path->slots[level] = 0;
				1668
				1669	btrfs_tree_read_lock(eb);
				1670	btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
				1671	path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
				1672
				1673	ret = btrfs_qgroup_trace_extent(trans, fs_info,
				1674	child_bytenr,
				1675	fs_info->nodesize,
				1676	GFP_NOFS);
				1677	if (ret)
				1678	goto out;
				1679	}
				1680
				1681	if (level == 0) {
				1682	ret = btrfs_qgroup_trace_leaf_items(trans,fs_info,
				1683	path->nodes[level]);
				1684	if (ret)
				1685	goto out;
				1686
				1687	/* Nonzero return here means we completed our search */
				1688	ret = adjust_slots_upwards(path, root_level);
				1689	if (ret)
				1690	break;
				1691
				1692	/* Restart search with new slots */
				1693	goto walk_down;
				1694	}
				1695
				1696	level--;
				1697	}
				1698
				1699	ret = 0;
				1700	out:
				1701	btrfs_free_path(path);
				1702
				1703	return ret;
				1704	}
				1705
				1706	#define UPDATE_NEW 0
				1707	#define UPDATE_OLD 1
				1708	/*
				1709	* Walk all of the roots that points to the bytenr and adjust their refcnts.
				1710	*/
				1711	static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
				1712	struct ulist roots, struct ulist tmp,
				1713	struct ulist *qgroups, u64 seq, int update_old)
				1714	{
				1715	struct ulist_node *unode;
				1716	struct ulist_iterator uiter;
				1717	struct ulist_node *tmp_unode;
				1718	struct ulist_iterator tmp_uiter;
				1719	struct btrfs_qgroup *qg;
				1720	int ret = 0;
				1721
				1722	if (!roots)
				1723	return 0;
				1724	ULIST_ITER_INIT(&uiter);
				1725	while ((unode = ulist_next(roots, &uiter))) {
				1726	qg = find_qgroup_rb(fs_info, unode->val);
				1727	if (!qg)
				1728	continue;
				1729
				1730	ulist_reinit(tmp);
				1731	ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg),
				1732	GFP_ATOMIC);
				1733	if (ret < 0)
				1734	return ret;
				1735	ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC);
				1736	if (ret < 0)
				1737	return ret;
				1738	ULIST_ITER_INIT(&tmp_uiter);
				1739	while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
				1740	struct btrfs_qgroup_list *glist;
				1741
				1742	qg = unode_aux_to_qgroup(tmp_unode);
				1743	if (update_old)
				1744	btrfs_qgroup_update_old_refcnt(qg, seq, 1);
				1745	else
				1746	btrfs_qgroup_update_new_refcnt(qg, seq, 1);
				1747	list_for_each_entry(glist, &qg->groups, next_group) {
				1748	ret = ulist_add(qgroups, glist->group->qgroupid,
				1749	qgroup_to_aux(glist->group),
				1750	GFP_ATOMIC);
				1751	if (ret < 0)
				1752	return ret;
				1753	ret = ulist_add(tmp, glist->group->qgroupid,
				1754	qgroup_to_aux(glist->group),
				1755	GFP_ATOMIC);
				1756	if (ret < 0)
				1757	return ret;
				1758	}
				1759	}
				1760	}
				1761	return 0;
				1762	}
				1763
				1764	/*
				1765	* Update qgroup rfer/excl counters.
				1766	* Rfer update is easy, codes can explain themselves.
				1767	*
				1768	* Excl update is tricky, the update is split into 2 part.
				1769	* Part 1: Possible exclusive <-> sharing detect:
				1770	* \| A \| !A \|
				1771	* -------------------------------------
				1772	* B \| * \| - \|
				1773	* -------------------------------------
				1774	* !B \| + \| ** \|
				1775	* -------------------------------------
				1776	*
				1777	* Conditions:
				1778	* A: cur_old_roots < nr_old_roots (not exclusive before)
				1779	* !A: cur_old_roots == nr_old_roots (possible exclusive before)
				1780	* B: cur_new_roots < nr_new_roots (not exclusive now)
				1781	* !B: cur_new_roots == nr_new_roots (possible exclusive now)
				1782	*
				1783	* Results:
				1784	* +: Possible sharing -> exclusive -: Possible exclusive -> sharing
				1785	* : Definitely not changed. *: Possible unchanged.
				1786	*
				1787	* For !A and !B condition, the exception is cur_old/new_roots == 0 case.
				1788	*
				1789	* To make the logic clear, we first use condition A and B to split
				1790	* combination into 4 results.
				1791	*
				1792	* Then, for result "+" and "-", check old/new_roots == 0 case, as in them
				1793	* only on variant maybe 0.
				1794	*
				1795	* Lastly, check result **, since there are 2 variants maybe 0, split them
				1796	* again(2x2).
				1797	* But this time we don't need to consider other things, the codes and logic
				1798	* is easy to understand now.
				1799	*/
				1800	static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
				1801	struct ulist *qgroups,
				1802	u64 nr_old_roots,
				1803	u64 nr_new_roots,
				1804	u64 num_bytes, u64 seq)
				1805	{
				1806	struct ulist_node *unode;
				1807	struct ulist_iterator uiter;
				1808	struct btrfs_qgroup *qg;
				1809	u64 cur_new_count, cur_old_count;
				1810
				1811	ULIST_ITER_INIT(&uiter);
				1812	while ((unode = ulist_next(qgroups, &uiter))) {
				1813	bool dirty = false;
				1814
				1815	qg = unode_aux_to_qgroup(unode);
				1816	cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
				1817	cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
				1818
				1819	trace_qgroup_update_counters(fs_info, qg->qgroupid,
				1820	cur_old_count, cur_new_count);
				1821
				1822	/* Rfer update part */
				1823	if (cur_old_count == 0 && cur_new_count > 0) {
				1824	qg->rfer += num_bytes;
				1825	qg->rfer_cmpr += num_bytes;
				1826	dirty = true;
				1827	}
				1828	if (cur_old_count > 0 && cur_new_count == 0) {
				1829	qg->rfer -= num_bytes;
				1830	qg->rfer_cmpr -= num_bytes;
				1831	dirty = true;
				1832	}
				1833
				1834	/* Excl update part */
				1835	/* Exclusive/none -> shared case */
				1836	if (cur_old_count == nr_old_roots &&
				1837	cur_new_count < nr_new_roots) {
				1838	/* Exclusive -> shared */
				1839	if (cur_old_count != 0) {
				1840	qg->excl -= num_bytes;
				1841	qg->excl_cmpr -= num_bytes;
				1842	dirty = true;
				1843	}
				1844	}
				1845
				1846	/* Shared -> exclusive/none case */
				1847	if (cur_old_count < nr_old_roots &&
				1848	cur_new_count == nr_new_roots) {
				1849	/* Shared->exclusive */
				1850	if (cur_new_count != 0) {
				1851	qg->excl += num_bytes;
				1852	qg->excl_cmpr += num_bytes;
				1853	dirty = true;
				1854	}
				1855	}
				1856
				1857	/* Exclusive/none -> exclusive/none case */
				1858	if (cur_old_count == nr_old_roots &&
				1859	cur_new_count == nr_new_roots) {
				1860	if (cur_old_count == 0) {
				1861	/* None -> exclusive/none */
				1862
				1863	if (cur_new_count != 0) {
				1864	/* None -> exclusive */
				1865	qg->excl += num_bytes;
				1866	qg->excl_cmpr += num_bytes;
				1867	dirty = true;
				1868	}
				1869	/* None -> none, nothing changed */
				1870	} else {
				1871	/* Exclusive -> exclusive/none */
				1872
				1873	if (cur_new_count == 0) {
				1874	/* Exclusive -> none */
				1875	qg->excl -= num_bytes;
				1876	qg->excl_cmpr -= num_bytes;
				1877	dirty = true;
				1878	}
				1879	/* Exclusive -> exclusive, nothing changed */
				1880	}
				1881	}
				1882
				1883	if (dirty)
				1884	qgroup_dirty(fs_info, qg);
				1885	}
				1886	return 0;
				1887	}
				1888
				1889	/*
				1890	* Check if the @roots potentially is a list of fs tree roots
				1891	*
				1892	* Return 0 for definitely not a fs/subvol tree roots ulist
				1893	* Return 1 for possible fs/subvol tree roots in the list (considering an empty
				1894	* one as well)
				1895	*/
				1896	static int maybe_fs_roots(struct ulist *roots)
				1897	{
				1898	struct ulist_node *unode;
				1899	struct ulist_iterator uiter;
				1900
				1901	/* Empty one, still possible for fs roots */
				1902	if (!roots \|\| roots->nnodes == 0)
				1903	return 1;
				1904
				1905	ULIST_ITER_INIT(&uiter);
				1906	unode = ulist_next(roots, &uiter);
				1907	if (!unode)
				1908	return 1;
				1909
				1910	/*
				1911	* If it contains fs tree roots, then it must belong to fs/subvol
				1912	* trees.
				1913	* If it contains a non-fs tree, it won't be shared with fs/subvol trees.
				1914	*/
				1915	return is_fstree(unode->val);
				1916	}
				1917
				1918	int
				1919	btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
				1920	struct btrfs_fs_info *fs_info,
				1921	u64 bytenr, u64 num_bytes,
				1922	struct ulist old_roots, struct ulist new_roots)
				1923	{
				1924	struct ulist *qgroups = NULL;
				1925	struct ulist *tmp = NULL;
				1926	u64 seq;
				1927	u64 nr_new_roots = 0;
				1928	u64 nr_old_roots = 0;
				1929	int ret = 0;
				1930
				1931	/*
				1932	* If quotas get disabled meanwhile, the resouces need to be freed and
				1933	* we can't just exit here.
				1934	*/
				1935	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				1936	goto out_free;
				1937
				1938	if (new_roots) {
				1939	if (!maybe_fs_roots(new_roots))
				1940	goto out_free;
				1941	nr_new_roots = new_roots->nnodes;
				1942	}
				1943	if (old_roots) {
				1944	if (!maybe_fs_roots(old_roots))
				1945	goto out_free;
				1946	nr_old_roots = old_roots->nnodes;
				1947	}
				1948
				1949	/* Quick exit, either not fs tree roots, or won't affect any qgroup */
				1950	if (nr_old_roots == 0 && nr_new_roots == 0)
				1951	goto out_free;
				1952
				1953	BUG_ON(!fs_info->quota_root);
				1954
				1955	trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
				1956	nr_old_roots, nr_new_roots);
				1957
				1958	qgroups = ulist_alloc(GFP_NOFS);
				1959	if (!qgroups) {
				1960	ret = -ENOMEM;
				1961	goto out_free;
				1962	}
				1963	tmp = ulist_alloc(GFP_NOFS);
				1964	if (!tmp) {
				1965	ret = -ENOMEM;
				1966	goto out_free;
				1967	}
				1968
				1969	mutex_lock(&fs_info->qgroup_rescan_lock);
				1970	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
				1971	if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
				1972	mutex_unlock(&fs_info->qgroup_rescan_lock);
				1973	ret = 0;
				1974	goto out_free;
				1975	}
				1976	}
				1977	mutex_unlock(&fs_info->qgroup_rescan_lock);
				1978
				1979	spin_lock(&fs_info->qgroup_lock);
				1980	seq = fs_info->qgroup_seq;
				1981
				1982	/* Update old refcnts using old_roots */
				1983	ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
				1984	UPDATE_OLD);
				1985	if (ret < 0)
				1986	goto out;
				1987
				1988	/* Update new refcnts using new_roots */
				1989	ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
				1990	UPDATE_NEW);
				1991	if (ret < 0)
				1992	goto out;
				1993
				1994	qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
				1995	num_bytes, seq);
				1996
				1997	/*
				1998	* Bump qgroup_seq to avoid seq overlap
				1999	*/
				2000	fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
				2001	out:
				2002	spin_unlock(&fs_info->qgroup_lock);
				2003	out_free:
				2004	ulist_free(tmp);
				2005	ulist_free(qgroups);
				2006	ulist_free(old_roots);
				2007	ulist_free(new_roots);
				2008	return ret;
				2009	}
				2010
				2011	int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
				2012	struct btrfs_fs_info *fs_info)
				2013	{
				2014	struct btrfs_qgroup_extent_record *record;
				2015	struct btrfs_delayed_ref_root *delayed_refs;
				2016	struct ulist *new_roots = NULL;
				2017	struct rb_node *node;
				2018	u64 qgroup_to_skip;
				2019	int ret = 0;
				2020
				2021	delayed_refs = &trans->transaction->delayed_refs;
				2022	qgroup_to_skip = delayed_refs->qgroup_to_skip;
				2023	while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
				2024	record = rb_entry(node, struct btrfs_qgroup_extent_record,
				2025	node);
				2026
				2027	trace_btrfs_qgroup_account_extents(fs_info, record);
				2028
				2029	if (!ret) {
				2030	/*
				2031	* Old roots should be searched when inserting qgroup
				2032	* extent record
				2033	*/
				2034	if (WARN_ON(!record->old_roots)) {
				2035	/* Search commit root to find old_roots */
				2036	ret = btrfs_find_all_roots(NULL, fs_info,
				2037	record->bytenr, 0,
				2038	&record->old_roots);
				2039	if (ret < 0)
				2040	goto cleanup;
				2041	}
				2042
				2043	/*
				2044	* Use SEQ_LAST as time_seq to do special search, which
				2045	* doesn't lock tree or delayed_refs and search current
				2046	* root. It's safe inside commit_transaction().
				2047	*/
				2048	ret = btrfs_find_all_roots(trans, fs_info,
				2049	record->bytenr, SEQ_LAST, &new_roots);
				2050	if (ret < 0)
				2051	goto cleanup;
				2052	if (qgroup_to_skip) {
				2053	ulist_del(new_roots, qgroup_to_skip, 0);
				2054	ulist_del(record->old_roots, qgroup_to_skip,
				2055	0);
				2056	}
				2057	ret = btrfs_qgroup_account_extent(trans, fs_info,
				2058	record->bytenr, record->num_bytes,
				2059	record->old_roots, new_roots);
				2060	record->old_roots = NULL;
				2061	new_roots = NULL;
				2062	}
				2063	cleanup:
				2064	ulist_free(record->old_roots);
				2065	ulist_free(new_roots);
				2066	new_roots = NULL;
				2067	rb_erase(node, &delayed_refs->dirty_extent_root);
				2068	kfree(record);
				2069
				2070	}
				2071	return ret;
				2072	}
				2073
				2074	/*
				2075	* called from commit_transaction. Writes all changed qgroups to disk.
				2076	*/
				2077	int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
				2078	struct btrfs_fs_info *fs_info)
				2079	{
				2080	struct btrfs_root *quota_root = fs_info->quota_root;
				2081	int ret = 0;
				2082	int start_rescan_worker = 0;
				2083
				2084	if (!quota_root)
				2085	goto out;
				2086
				2087	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
				2088	test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
				2089	start_rescan_worker = 1;
				2090
				2091	if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
				2092	set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
				2093
				2094	spin_lock(&fs_info->qgroup_lock);
				2095	while (!list_empty(&fs_info->dirty_qgroups)) {
				2096	struct btrfs_qgroup *qgroup;
				2097	qgroup = list_first_entry(&fs_info->dirty_qgroups,
				2098	struct btrfs_qgroup, dirty);
				2099	list_del_init(&qgroup->dirty);
				2100	spin_unlock(&fs_info->qgroup_lock);
				2101	ret = update_qgroup_info_item(trans, quota_root, qgroup);
				2102	if (ret)
				2103	fs_info->qgroup_flags \|=
				2104	BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2105	ret = update_qgroup_limit_item(trans, quota_root, qgroup);
				2106	if (ret)
				2107	fs_info->qgroup_flags \|=
				2108	BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2109	spin_lock(&fs_info->qgroup_lock);
				2110	}
				2111	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				2112	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_ON;
				2113	else
				2114	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
				2115	spin_unlock(&fs_info->qgroup_lock);
				2116
				2117	ret = update_qgroup_status_item(trans);
				2118	if (ret)
				2119	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2120
				2121	if (!ret && start_rescan_worker) {
				2122	ret = qgroup_rescan_init(fs_info, 0, 1);
				2123	if (!ret) {
				2124	qgroup_rescan_zero_tracking(fs_info);
				2125	btrfs_queue_work(fs_info->qgroup_rescan_workers,
				2126	&fs_info->qgroup_rescan_work);
				2127	}
				2128	ret = 0;
				2129	}
				2130
				2131	out:
				2132
				2133	return ret;
				2134	}
				2135
				2136	/*
				2137	* Copy the accounting information between qgroups. This is necessary
				2138	* when a snapshot or a subvolume is created. Throwing an error will
				2139	* cause a transaction abort so we take extra care here to only error
				2140	* when a readonly fs is a reasonable outcome.
				2141	*/
				2142	int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
				2143	struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
				2144	struct btrfs_qgroup_inherit *inherit)
				2145	{
				2146	int ret = 0;
				2147	int i;
				2148	u64 *i_qgroups;
				2149	struct btrfs_root *quota_root = fs_info->quota_root;
				2150	struct btrfs_qgroup *srcgroup;
				2151	struct btrfs_qgroup *dstgroup;
				2152	u32 level_size = 0;
				2153	u64 nums;
				2154
				2155	mutex_lock(&fs_info->qgroup_ioctl_lock);
				2156	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
				2157	goto out;
				2158
				2159	if (!quota_root) {
				2160	ret = -EINVAL;
				2161	goto out;
				2162	}
				2163
				2164	if (inherit) {
				2165	i_qgroups = (u64 *)(inherit + 1);
				2166	nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
				2167	2 * inherit->num_excl_copies;
				2168	for (i = 0; i < nums; ++i) {
				2169	srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
				2170
				2171	/*
				2172	* Zero out invalid groups so we can ignore
				2173	* them later.
				2174	*/
				2175	if (!srcgroup \|\|
				2176	((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
				2177	*i_qgroups = 0ULL;
				2178
				2179	++i_qgroups;
				2180	}
				2181	}
				2182
				2183	/*
				2184	* create a tracking group for the subvol itself
				2185	*/
				2186	ret = add_qgroup_item(trans, quota_root, objectid);
				2187	if (ret)
				2188	goto out;
				2189
				2190	if (srcid) {
				2191	struct btrfs_root *srcroot;
				2192	struct btrfs_key srckey;
				2193
				2194	srckey.objectid = srcid;
				2195	srckey.type = BTRFS_ROOT_ITEM_KEY;
				2196	srckey.offset = (u64)-1;
				2197	srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
				2198	if (IS_ERR(srcroot)) {
				2199	ret = PTR_ERR(srcroot);
				2200	goto out;
				2201	}
				2202
				2203	level_size = fs_info->nodesize;
				2204	}
				2205
				2206	/*
				2207	* add qgroup to all inherited groups
				2208	*/
				2209	if (inherit) {
				2210	i_qgroups = (u64 *)(inherit + 1);
				2211	for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
				2212	if (*i_qgroups == 0)
				2213	continue;
				2214	ret = add_qgroup_relation_item(trans, quota_root,
				2215	objectid, *i_qgroups);
				2216	if (ret && ret != -EEXIST)
				2217	goto out;
				2218	ret = add_qgroup_relation_item(trans, quota_root,
				2219	*i_qgroups, objectid);
				2220	if (ret && ret != -EEXIST)
				2221	goto out;
				2222	}
				2223	ret = 0;
				2224	}
				2225
				2226
				2227	spin_lock(&fs_info->qgroup_lock);
				2228
				2229	dstgroup = add_qgroup_rb(fs_info, objectid);
				2230	if (IS_ERR(dstgroup)) {
				2231	ret = PTR_ERR(dstgroup);
				2232	goto unlock;
				2233	}
				2234
				2235	if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
				2236	dstgroup->lim_flags = inherit->lim.flags;
				2237	dstgroup->max_rfer = inherit->lim.max_rfer;
				2238	dstgroup->max_excl = inherit->lim.max_excl;
				2239	dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
				2240	dstgroup->rsv_excl = inherit->lim.rsv_excl;
				2241
				2242	ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
				2243	if (ret) {
				2244	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2245	btrfs_info(fs_info,
				2246	"unable to update quota limit for %llu",
				2247	dstgroup->qgroupid);
				2248	goto unlock;
				2249	}
				2250	}
				2251
				2252	if (srcid) {
				2253	srcgroup = find_qgroup_rb(fs_info, srcid);
				2254	if (!srcgroup)
				2255	goto unlock;
				2256
				2257	/*
				2258	* We call inherit after we clone the root in order to make sure
				2259	* our counts don't go crazy, so at this point the only
				2260	* difference between the two roots should be the root node.
				2261	*/
				2262	dstgroup->rfer = srcgroup->rfer;
				2263	dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
				2264	dstgroup->excl = level_size;
				2265	dstgroup->excl_cmpr = level_size;
				2266	srcgroup->excl = level_size;
				2267	srcgroup->excl_cmpr = level_size;
				2268
				2269	/* inherit the limit info */
				2270	dstgroup->lim_flags = srcgroup->lim_flags;
				2271	dstgroup->max_rfer = srcgroup->max_rfer;
				2272	dstgroup->max_excl = srcgroup->max_excl;
				2273	dstgroup->rsv_rfer = srcgroup->rsv_rfer;
				2274	dstgroup->rsv_excl = srcgroup->rsv_excl;
				2275
				2276	qgroup_dirty(fs_info, dstgroup);
				2277	qgroup_dirty(fs_info, srcgroup);
				2278	}
				2279
				2280	if (!inherit)
				2281	goto unlock;
				2282
				2283	i_qgroups = (u64 *)(inherit + 1);
				2284	for (i = 0; i < inherit->num_qgroups; ++i) {
				2285	if (*i_qgroups) {
				2286	ret = add_relation_rb(fs_info, objectid, *i_qgroups);
				2287	if (ret)
				2288	goto unlock;
				2289	}
				2290	++i_qgroups;
				2291	}
				2292
				2293	for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
				2294	struct btrfs_qgroup *src;
				2295	struct btrfs_qgroup *dst;
				2296
				2297	if (!i_qgroups[0] \|\| !i_qgroups[1])
				2298	continue;
				2299
				2300	src = find_qgroup_rb(fs_info, i_qgroups[0]);
				2301	dst = find_qgroup_rb(fs_info, i_qgroups[1]);
				2302
				2303	if (!src \|\| !dst) {
				2304	ret = -EINVAL;
				2305	goto unlock;
				2306	}
				2307
				2308	dst->rfer = src->rfer - level_size;
				2309	dst->rfer_cmpr = src->rfer_cmpr - level_size;
				2310	}
				2311	for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
				2312	struct btrfs_qgroup *src;
				2313	struct btrfs_qgroup *dst;
				2314
				2315	if (!i_qgroups[0] \|\| !i_qgroups[1])
				2316	continue;
				2317
				2318	src = find_qgroup_rb(fs_info, i_qgroups[0]);
				2319	dst = find_qgroup_rb(fs_info, i_qgroups[1]);
				2320
				2321	if (!src \|\| !dst) {
				2322	ret = -EINVAL;
				2323	goto unlock;
				2324	}
				2325
				2326	dst->excl = src->excl + level_size;
				2327	dst->excl_cmpr = src->excl_cmpr + level_size;
				2328	}
				2329
				2330	unlock:
				2331	spin_unlock(&fs_info->qgroup_lock);
				2332	out:
				2333	mutex_unlock(&fs_info->qgroup_ioctl_lock);
				2334	return ret;
				2335	}
				2336
				2337	static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
				2338	{
				2339	if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
				2340	qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
				2341	return false;
				2342
				2343	if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
				2344	qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
				2345	return false;
				2346
				2347	return true;
				2348	}
				2349
				2350	static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
				2351	{
				2352	struct btrfs_root *quota_root;
				2353	struct btrfs_qgroup *qgroup;
				2354	struct btrfs_fs_info *fs_info = root->fs_info;
				2355	u64 ref_root = root->root_key.objectid;
				2356	int ret = 0;
				2357	int retried = 0;
				2358	struct ulist_node *unode;
				2359	struct ulist_iterator uiter;
				2360
				2361	if (!is_fstree(ref_root))
				2362	return 0;
				2363
				2364	if (num_bytes == 0)
				2365	return 0;
				2366
				2367	if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
				2368	capable(CAP_SYS_RESOURCE))
				2369	enforce = false;
				2370
				2371	retry:
				2372	spin_lock(&fs_info->qgroup_lock);
				2373	quota_root = fs_info->quota_root;
				2374	if (!quota_root)
				2375	goto out;
				2376
				2377	qgroup = find_qgroup_rb(fs_info, ref_root);
				2378	if (!qgroup)
				2379	goto out;
				2380
				2381	/*
				2382	* in a first step, we check all affected qgroups if any limits would
				2383	* be exceeded
				2384	*/
				2385	ulist_reinit(fs_info->qgroup_ulist);
				2386	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
				2387	(uintptr_t)qgroup, GFP_ATOMIC);
				2388	if (ret < 0)
				2389	goto out;
				2390	ULIST_ITER_INIT(&uiter);
				2391	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
				2392	struct btrfs_qgroup *qg;
				2393	struct btrfs_qgroup_list *glist;
				2394
				2395	qg = unode_aux_to_qgroup(unode);
				2396
				2397	if (enforce && !qgroup_check_limits(qg, num_bytes)) {
				2398	/*
				2399	* Commit the tree and retry, since we may have
				2400	* deletions which would free up space.
				2401	*/
				2402	if (!retried && qg->reserved > 0) {
				2403	struct btrfs_trans_handle *trans;
				2404
				2405	spin_unlock(&fs_info->qgroup_lock);
				2406	ret = btrfs_start_delalloc_inodes(root, 0);
				2407	if (ret)
				2408	return ret;
				2409	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
				2410	trans = btrfs_join_transaction(root);
				2411	if (IS_ERR(trans))
				2412	return PTR_ERR(trans);
				2413	ret = btrfs_commit_transaction(trans);
				2414	if (ret)
				2415	return ret;
				2416	retried++;
				2417	goto retry;
				2418	}
				2419	ret = -EDQUOT;
				2420	goto out;
				2421	}
				2422
				2423	list_for_each_entry(glist, &qg->groups, next_group) {
				2424	ret = ulist_add(fs_info->qgroup_ulist,
				2425	glist->group->qgroupid,
				2426	(uintptr_t)glist->group, GFP_ATOMIC);
				2427	if (ret < 0)
				2428	goto out;
				2429	}
				2430	}
				2431	ret = 0;
				2432	/*
				2433	* no limits exceeded, now record the reservation into all qgroups
				2434	*/
				2435	ULIST_ITER_INIT(&uiter);
				2436	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
				2437	struct btrfs_qgroup *qg;
				2438
				2439	qg = unode_aux_to_qgroup(unode);
				2440
				2441	trace_qgroup_update_reserve(fs_info, qg, num_bytes);
				2442	qg->reserved += num_bytes;
				2443	}
				2444
				2445	out:
				2446	spin_unlock(&fs_info->qgroup_lock);
				2447	return ret;
				2448	}
				2449
				2450	void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
				2451	u64 ref_root, u64 num_bytes)
				2452	{
				2453	struct btrfs_root *quota_root;
				2454	struct btrfs_qgroup *qgroup;
				2455	struct ulist_node *unode;
				2456	struct ulist_iterator uiter;
				2457	int ret = 0;
				2458
				2459	if (!is_fstree(ref_root))
				2460	return;
				2461
				2462	if (num_bytes == 0)
				2463	return;
				2464
				2465	spin_lock(&fs_info->qgroup_lock);
				2466
				2467	quota_root = fs_info->quota_root;
				2468	if (!quota_root)
				2469	goto out;
				2470
				2471	qgroup = find_qgroup_rb(fs_info, ref_root);
				2472	if (!qgroup)
				2473	goto out;
				2474
				2475	ulist_reinit(fs_info->qgroup_ulist);
				2476	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
				2477	(uintptr_t)qgroup, GFP_ATOMIC);
				2478	if (ret < 0)
				2479	goto out;
				2480	ULIST_ITER_INIT(&uiter);
				2481	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
				2482	struct btrfs_qgroup *qg;
				2483	struct btrfs_qgroup_list *glist;
				2484
				2485	qg = unode_aux_to_qgroup(unode);
				2486
				2487	trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
				2488	if (qg->reserved < num_bytes)
				2489	report_reserved_underflow(fs_info, qg, num_bytes);
				2490	else
				2491	qg->reserved -= num_bytes;
				2492
				2493	list_for_each_entry(glist, &qg->groups, next_group) {
				2494	ret = ulist_add(fs_info->qgroup_ulist,
				2495	glist->group->qgroupid,
				2496	(uintptr_t)glist->group, GFP_ATOMIC);
				2497	if (ret < 0)
				2498	goto out;
				2499	}
				2500	}
				2501
				2502	out:
				2503	spin_unlock(&fs_info->qgroup_lock);
				2504	}
				2505
				2506	/*
				2507	* Check if the leaf is the last leaf. Which means all node pointers
				2508	* are at their last position.
				2509	*/
				2510	static bool is_last_leaf(struct btrfs_path *path)
				2511	{
				2512	int i;
				2513
				2514	for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
				2515	if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
				2516	return false;
				2517	}
				2518	return true;
				2519	}
				2520
				2521	/*
				2522	* returns < 0 on error, 0 when more leafs are to be scanned.
				2523	* returns 1 when done.
				2524	*/
				2525	static int
				2526	qgroup_rescan_leaf(struct btrfs_fs_info fs_info, struct btrfs_path path,
				2527	struct btrfs_trans_handle *trans)
				2528	{
				2529	struct btrfs_key found;
				2530	struct extent_buffer *scratch_leaf = NULL;
				2531	struct ulist *roots = NULL;
				2532	struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
				2533	u64 num_bytes;
				2534	bool done;
				2535	int slot;
				2536	int ret;
				2537
				2538	mutex_lock(&fs_info->qgroup_rescan_lock);
				2539	ret = btrfs_search_slot_for_read(fs_info->extent_root,
				2540	&fs_info->qgroup_rescan_progress,
				2541	path, 1, 0);
				2542
				2543	btrfs_debug(fs_info,
				2544	"current progress key (%llu %u %llu), search_slot ret %d",
				2545	fs_info->qgroup_rescan_progress.objectid,
				2546	fs_info->qgroup_rescan_progress.type,
				2547	fs_info->qgroup_rescan_progress.offset, ret);
				2548
				2549	if (ret) {
				2550	/*
				2551	* The rescan is about to end, we will not be scanning any
				2552	* further blocks. We cannot unset the RESCAN flag here, because
				2553	* we want to commit the transaction if everything went well.
				2554	* To make the live accounting work in this phase, we set our
				2555	* scan progress pointer such that every real extent objectid
				2556	* will be smaller.
				2557	*/
				2558	fs_info->qgroup_rescan_progress.objectid = (u64)-1;
				2559	btrfs_release_path(path);
				2560	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2561	return ret;
				2562	}
				2563	done = is_last_leaf(path);
				2564
				2565	btrfs_item_key_to_cpu(path->nodes[0], &found,
				2566	btrfs_header_nritems(path->nodes[0]) - 1);
				2567	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
				2568
				2569	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
				2570	scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
				2571	if (!scratch_leaf) {
				2572	ret = -ENOMEM;
				2573	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2574	goto out;
				2575	}
				2576	extent_buffer_get(scratch_leaf);
				2577	btrfs_tree_read_lock(scratch_leaf);
				2578	btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK);
				2579	slot = path->slots[0];
				2580	btrfs_release_path(path);
				2581	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2582
				2583	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
				2584	btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
				2585	if (found.type != BTRFS_EXTENT_ITEM_KEY &&
				2586	found.type != BTRFS_METADATA_ITEM_KEY)
				2587	continue;
				2588	if (found.type == BTRFS_METADATA_ITEM_KEY)
				2589	num_bytes = fs_info->nodesize;
				2590	else
				2591	num_bytes = found.offset;
				2592
				2593	ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
				2594	&roots);
				2595	if (ret < 0)
				2596	goto out;
				2597	/* For rescan, just pass old_roots as NULL */
				2598	ret = btrfs_qgroup_account_extent(trans, fs_info,
				2599	found.objectid, num_bytes, NULL, roots);
				2600	if (ret < 0)
				2601	goto out;
				2602	}
				2603	out:
				2604	if (scratch_leaf) {
				2605	btrfs_tree_read_unlock_blocking(scratch_leaf);
				2606	free_extent_buffer(scratch_leaf);
				2607	}
				2608	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
				2609
				2610	if (done && !ret) {
				2611	ret = 1;
				2612	fs_info->qgroup_rescan_progress.objectid = (u64)-1;
				2613	}
				2614	return ret;
				2615	}
				2616
				2617	static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
				2618	{
				2619	struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
				2620	qgroup_rescan_work);
				2621	struct btrfs_path *path;
				2622	struct btrfs_trans_handle *trans = NULL;
				2623	int err = -ENOMEM;
				2624	int ret = 0;
				2625
				2626	path = btrfs_alloc_path();
				2627	if (!path)
				2628	goto out;
				2629
				2630	err = 0;
				2631	while (!err && !btrfs_fs_closing(fs_info)) {
				2632	trans = btrfs_start_transaction(fs_info->fs_root, 0);
				2633	if (IS_ERR(trans)) {
				2634	err = PTR_ERR(trans);
				2635	break;
				2636	}
				2637	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
				2638	err = -EINTR;
				2639	} else {
				2640	err = qgroup_rescan_leaf(fs_info, path, trans);
				2641	}
				2642	if (err > 0)
				2643	btrfs_commit_transaction(trans);
				2644	else
				2645	btrfs_end_transaction(trans);
				2646	}
				2647
				2648	out:
				2649	btrfs_free_path(path);
				2650
				2651	mutex_lock(&fs_info->qgroup_rescan_lock);
				2652	if (err > 0 &&
				2653	fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
				2654	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2655	} else if (err < 0) {
				2656	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
				2657	}
				2658	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2659
				2660	/*
				2661	* only update status, since the previous part has already updated the
				2662	* qgroup info.
				2663	*/
				2664	trans = btrfs_start_transaction(fs_info->quota_root, 1);
				2665	if (IS_ERR(trans)) {
				2666	err = PTR_ERR(trans);
				2667	trans = NULL;
				2668	btrfs_err(fs_info,
				2669	"fail to start transaction for status update: %d",
				2670	err);
				2671	}
				2672
				2673	mutex_lock(&fs_info->qgroup_rescan_lock);
				2674	if (!btrfs_fs_closing(fs_info))
				2675	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				2676	if (trans) {
				2677	ret = update_qgroup_status_item(trans);
				2678	if (ret < 0) {
				2679	err = ret;
				2680	btrfs_err(fs_info, "fail to update qgroup status: %d",
				2681	err);
				2682	}
				2683	}
				2684	fs_info->qgroup_rescan_running = false;
				2685	complete_all(&fs_info->qgroup_rescan_completion);
				2686	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2687
				2688	if (!trans)
				2689	return;
				2690
				2691	btrfs_end_transaction(trans);
				2692
				2693	if (btrfs_fs_closing(fs_info)) {
				2694	btrfs_info(fs_info, "qgroup scan paused");
				2695	} else if (err >= 0) {
				2696	btrfs_info(fs_info, "qgroup scan completed%s",
				2697	err > 0 ? " (inconsistency flag cleared)" : "");
				2698	} else {
				2699	btrfs_err(fs_info, "qgroup scan failed with %d", err);
				2700	}
				2701	}
				2702
				2703	/*
				2704	* Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
				2705	* memory required for the rescan context.
				2706	*/
				2707	static int
				2708	qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
				2709	int init_flags)
				2710	{
				2711	int ret = 0;
				2712
				2713	if (!init_flags &&
				2714	(!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) \|\|
				2715	!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
				2716	ret = -EINVAL;
				2717	goto err;
				2718	}
				2719
				2720	mutex_lock(&fs_info->qgroup_rescan_lock);
				2721	spin_lock(&fs_info->qgroup_lock);
				2722
				2723	if (init_flags) {
				2724	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
				2725	ret = -EINPROGRESS;
				2726	else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
				2727	ret = -EINVAL;
				2728
				2729	if (ret) {
				2730	spin_unlock(&fs_info->qgroup_lock);
				2731	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2732	goto err;
				2733	}
				2734	fs_info->qgroup_flags \|= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				2735	}
				2736
				2737	memset(&fs_info->qgroup_rescan_progress, 0,
				2738	sizeof(fs_info->qgroup_rescan_progress));
				2739	fs_info->qgroup_rescan_progress.objectid = progress_objectid;
				2740	init_completion(&fs_info->qgroup_rescan_completion);
				2741	fs_info->qgroup_rescan_running = true;
				2742
				2743	spin_unlock(&fs_info->qgroup_lock);
				2744	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2745
				2746	memset(&fs_info->qgroup_rescan_work, 0,
				2747	sizeof(fs_info->qgroup_rescan_work));
				2748	btrfs_init_work(&fs_info->qgroup_rescan_work,
				2749	btrfs_qgroup_rescan_helper,
				2750	btrfs_qgroup_rescan_worker, NULL, NULL);
				2751
				2752	if (ret) {
				2753	err:
				2754	btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
				2755	return ret;
				2756	}
				2757
				2758	return 0;
				2759	}
				2760
				2761	static void
				2762	qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
				2763	{
				2764	struct rb_node *n;
				2765	struct btrfs_qgroup *qgroup;
				2766
				2767	spin_lock(&fs_info->qgroup_lock);
				2768	/* clear all current qgroup tracking information */
				2769	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
				2770	qgroup = rb_entry(n, struct btrfs_qgroup, node);
				2771	qgroup->rfer = 0;
				2772	qgroup->rfer_cmpr = 0;
				2773	qgroup->excl = 0;
				2774	qgroup->excl_cmpr = 0;
				2775	qgroup_dirty(fs_info, qgroup);
				2776	}
				2777	spin_unlock(&fs_info->qgroup_lock);
				2778	}
				2779
				2780	int
				2781	btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
				2782	{
				2783	int ret = 0;
				2784	struct btrfs_trans_handle *trans;
				2785
				2786	ret = qgroup_rescan_init(fs_info, 0, 1);
				2787	if (ret)
				2788	return ret;
				2789
				2790	/*
				2791	* We have set the rescan_progress to 0, which means no more
				2792	* delayed refs will be accounted by btrfs_qgroup_account_ref.
				2793	* However, btrfs_qgroup_account_ref may be right after its call
				2794	* to btrfs_find_all_roots, in which case it would still do the
				2795	* accounting.
				2796	* To solve this, we're committing the transaction, which will
				2797	* ensure we run all delayed refs and only after that, we are
				2798	* going to clear all tracking information for a clean start.
				2799	*/
				2800
				2801	trans = btrfs_join_transaction(fs_info->fs_root);
				2802	if (IS_ERR(trans)) {
				2803	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				2804	return PTR_ERR(trans);
				2805	}
				2806	ret = btrfs_commit_transaction(trans);
				2807	if (ret) {
				2808	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
				2809	return ret;
				2810	}
				2811
				2812	qgroup_rescan_zero_tracking(fs_info);
				2813
				2814	btrfs_queue_work(fs_info->qgroup_rescan_workers,
				2815	&fs_info->qgroup_rescan_work);
				2816
				2817	return 0;
				2818	}
				2819
				2820	int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
				2821	bool interruptible)
				2822	{
				2823	int running;
				2824	int ret = 0;
				2825
				2826	mutex_lock(&fs_info->qgroup_rescan_lock);
				2827	spin_lock(&fs_info->qgroup_lock);
				2828	running = fs_info->qgroup_rescan_running;
				2829	spin_unlock(&fs_info->qgroup_lock);
				2830	mutex_unlock(&fs_info->qgroup_rescan_lock);
				2831
				2832	if (!running)
				2833	return 0;
				2834
				2835	if (interruptible)
				2836	ret = wait_for_completion_interruptible(
				2837	&fs_info->qgroup_rescan_completion);
				2838	else
				2839	wait_for_completion(&fs_info->qgroup_rescan_completion);
				2840
				2841	return ret;
				2842	}
				2843
				2844	/*
				2845	* this is only called from open_ctree where we're still single threaded, thus
				2846	* locking is omitted here.
				2847	*/
				2848	void
				2849	btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
				2850	{
				2851	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
				2852	btrfs_queue_work(fs_info->qgroup_rescan_workers,
				2853	&fs_info->qgroup_rescan_work);
				2854	}
				2855
				2856	/*
				2857	* Reserve qgroup space for range [start, start + len).
				2858	*
				2859	* This function will either reserve space from related qgroups or doing
				2860	* nothing if the range is already reserved.
				2861	*
				2862	* Return 0 for successful reserve
				2863	* Return <0 for error (including -EQUOT)
				2864	*
				2865	* NOTE: this function may sleep for memory allocation.
				2866	* if btrfs_qgroup_reserve_data() is called multiple times with
				2867	* same @reserved, caller must ensure when error happens it's OK
				2868	* to free ALL reserved space.
				2869	*/
				2870	int btrfs_qgroup_reserve_data(struct inode *inode,
				2871	struct extent_changeset **reserved_ret, u64 start,
				2872	u64 len)
				2873	{
				2874	struct btrfs_root *root = BTRFS_I(inode)->root;
				2875	struct ulist_node *unode;
				2876	struct ulist_iterator uiter;
				2877	struct extent_changeset *reserved;
				2878	u64 orig_reserved;
				2879	u64 to_reserve;
				2880	int ret;
				2881
				2882	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) \|\|
				2883	!is_fstree(root->objectid) \|\| len == 0)
				2884	return 0;
				2885
				2886	/* @reserved parameter is mandatory for qgroup */
				2887	if (WARN_ON(!reserved_ret))
				2888	return -EINVAL;
				2889	if (!*reserved_ret) {
				2890	*reserved_ret = extent_changeset_alloc();
				2891	if (!*reserved_ret)
				2892	return -ENOMEM;
				2893	}
				2894	reserved = *reserved_ret;
				2895	/* Record already reserved space */
				2896	orig_reserved = reserved->bytes_changed;
				2897	ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
				2898	start + len -1, EXTENT_QGROUP_RESERVED, reserved);
				2899
				2900	/* Newly reserved space */
				2901	to_reserve = reserved->bytes_changed - orig_reserved;
				2902	trace_btrfs_qgroup_reserve_data(inode, start, len,
				2903	to_reserve, QGROUP_RESERVE);
				2904	if (ret < 0)
				2905	goto cleanup;
				2906	ret = qgroup_reserve(root, to_reserve, true);
				2907	if (ret < 0)
				2908	goto cleanup;
				2909
				2910	return ret;
				2911
				2912	cleanup:
				2913	/* cleanup ALL already reserved ranges */
				2914	ULIST_ITER_INIT(&uiter);
				2915	while ((unode = ulist_next(&reserved->range_changed, &uiter)))
				2916	clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
				2917	unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
				2918	GFP_NOFS);
				2919	extent_changeset_release(reserved);
				2920	return ret;
				2921	}
				2922
				2923	/* Free ranges specified by @reserved, normally in error path */
				2924	static int qgroup_free_reserved_data(struct inode *inode,
				2925	struct extent_changeset *reserved, u64 start, u64 len)
				2926	{
				2927	struct btrfs_root *root = BTRFS_I(inode)->root;
				2928	struct ulist_node *unode;
				2929	struct ulist_iterator uiter;
				2930	struct extent_changeset changeset;
				2931	int freed = 0;
				2932	int ret;
				2933
				2934	extent_changeset_init(&changeset);
				2935	len = round_up(start + len, root->fs_info->sectorsize);
				2936	start = round_down(start, root->fs_info->sectorsize);
				2937
				2938	ULIST_ITER_INIT(&uiter);
				2939	while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
				2940	u64 range_start = unode->val;
				2941	/* unode->aux is the inclusive end */
				2942	u64 range_len = unode->aux - range_start + 1;
				2943	u64 free_start;
				2944	u64 free_len;
				2945
				2946	extent_changeset_release(&changeset);
				2947
				2948	/* Only free range in range [start, start + len) */
				2949	if (range_start >= start + len \|\|
				2950	range_start + range_len <= start)
				2951	continue;
				2952	free_start = max(range_start, start);
				2953	free_len = min(start + len, range_start + range_len) -
				2954	free_start;
				2955	/*
				2956	* TODO: To also modify reserved->ranges_reserved to reflect
				2957	* the modification.
				2958	*
				2959	* However as long as we free qgroup reserved according to
				2960	* EXTENT_QGROUP_RESERVED, we won't double free.
				2961	* So not need to rush.
				2962	*/
				2963	ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree,
				2964	free_start, free_start + free_len - 1,
				2965	EXTENT_QGROUP_RESERVED, &changeset);
				2966	if (ret < 0)
				2967	goto out;
				2968	freed += changeset.bytes_changed;
				2969	}
				2970	btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
				2971	ret = freed;
				2972	out:
				2973	extent_changeset_release(&changeset);
				2974	return ret;
				2975	}
				2976
				2977	static int __btrfs_qgroup_release_data(struct inode *inode,
				2978	struct extent_changeset *reserved, u64 start, u64 len,
				2979	int free)
				2980	{
				2981	struct extent_changeset changeset;
				2982	int trace_op = QGROUP_RELEASE;
				2983	int ret;
				2984
				2985	if (!test_bit(BTRFS_FS_QUOTA_ENABLED,
				2986	&BTRFS_I(inode)->root->fs_info->flags))
				2987	return 0;
				2988
				2989	/* In release case, we shouldn't have @reserved */
				2990	WARN_ON(!free && reserved);
				2991	if (free && reserved)
				2992	return qgroup_free_reserved_data(inode, reserved, start, len);
				2993	extent_changeset_init(&changeset);
				2994	ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
				2995	start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
				2996	if (ret < 0)
				2997	goto out;
				2998
				2999	if (free)
				3000	trace_op = QGROUP_FREE;
				3001	trace_btrfs_qgroup_release_data(inode, start, len,
				3002	changeset.bytes_changed, trace_op);
				3003	if (free)
				3004	btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
				3005	BTRFS_I(inode)->root->objectid,
				3006	changeset.bytes_changed);
				3007	ret = changeset.bytes_changed;
				3008	out:
				3009	extent_changeset_release(&changeset);
				3010	return ret;
				3011	}
				3012
				3013	/*
				3014	* Free a reserved space range from io_tree and related qgroups
				3015	*
				3016	* Should be called when a range of pages get invalidated before reaching disk.
				3017	* Or for error cleanup case.
				3018	* if @reserved is given, only reserved range in [@start, @start + @len) will
				3019	* be freed.
				3020	*
				3021	* For data written to disk, use btrfs_qgroup_release_data().
				3022	*
				3023	* NOTE: This function may sleep for memory allocation.
				3024	*/
				3025	int btrfs_qgroup_free_data(struct inode *inode,
				3026	struct extent_changeset *reserved, u64 start, u64 len)
				3027	{
				3028	return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
				3029	}
				3030
				3031	/*
				3032	* Release a reserved space range from io_tree only.
				3033	*
				3034	* Should be called when a range of pages get written to disk and corresponding
				3035	* FILE_EXTENT is inserted into corresponding root.
				3036	*
				3037	* Since new qgroup accounting framework will only update qgroup numbers at
				3038	* commit_transaction() time, its reserved space shouldn't be freed from
				3039	* related qgroups.
				3040	*
				3041	* But we should release the range from io_tree, to allow further write to be
				3042	* COWed.
				3043	*
				3044	* NOTE: This function may sleep for memory allocation.
				3045	*/
				3046	int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
				3047	{
				3048	return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
				3049	}
				3050
				3051	int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
				3052	bool enforce)
				3053	{
				3054	struct btrfs_fs_info *fs_info = root->fs_info;
				3055	int ret;
				3056
				3057	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) \|\|
				3058	!is_fstree(root->objectid) \|\| num_bytes == 0)
				3059	return 0;
				3060
				3061	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
				3062	trace_qgroup_meta_reserve(root, (s64)num_bytes);
				3063	ret = qgroup_reserve(root, num_bytes, enforce);
				3064	if (ret < 0)
				3065	return ret;
				3066	atomic64_add(num_bytes, &root->qgroup_meta_rsv);
				3067	return ret;
				3068	}
				3069
				3070	void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
				3071	{
				3072	struct btrfs_fs_info *fs_info = root->fs_info;
				3073	u64 reserved;
				3074
				3075	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) \|\|
				3076	!is_fstree(root->objectid))
				3077	return;
				3078
				3079	reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
				3080	if (reserved == 0)
				3081	return;
				3082	trace_qgroup_meta_reserve(root, -(s64)reserved);
				3083	btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
				3084	}
				3085
				3086	void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
				3087	{
				3088	struct btrfs_fs_info *fs_info = root->fs_info;
				3089
				3090	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) \|\|
				3091	!is_fstree(root->objectid))
				3092	return;
				3093
				3094	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
				3095	WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
				3096	atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
				3097	trace_qgroup_meta_reserve(root, -(s64)num_bytes);
				3098	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
				3099	}
				3100
				3101	/*
				3102	* Check qgroup reserved space leaking, normally at destroy inode
				3103	* time
				3104	*/
				3105	void btrfs_qgroup_check_reserved_leak(struct inode *inode)
				3106	{
				3107	struct extent_changeset changeset;
				3108	struct ulist_node *unode;
				3109	struct ulist_iterator iter;
				3110	int ret;
				3111
				3112	extent_changeset_init(&changeset);
				3113	ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
				3114	EXTENT_QGROUP_RESERVED, &changeset);
				3115
				3116	WARN_ON(ret < 0);
				3117	if (WARN_ON(changeset.bytes_changed)) {
				3118	ULIST_ITER_INIT(&iter);
				3119	while ((unode = ulist_next(&changeset.range_changed, &iter))) {
				3120	btrfs_warn(BTRFS_I(inode)->root->fs_info,
				3121	"leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
				3122	inode->i_ino, unode->val, unode->aux);
				3123	}
				3124	btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
				3125	BTRFS_I(inode)->root->objectid,
				3126	changeset.bytes_changed);
				3127
				3128	}
				3129	extent_changeset_release(&changeset);
				3130	}