Blame - ap/os/linux/linux-3.4.x/fs/btrfs/extent-tree.c - T106_DC

blob: f4576dc0cf290579fdc519a4d509815b4b3fc1f6 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* Copyright (C) 2007 Oracle. All rights reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public
				6	* License v2 as published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				11	* General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public
				14	* License along with this program; if not, write to the
				15	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				16	* Boston, MA 021110-1307, USA.
				17	*/
				18	#include <linux/sched.h>
				19	#include <linux/pagemap.h>
				20	#include <linux/writeback.h>
				21	#include <linux/blkdev.h>
				22	#include <linux/sort.h>
				23	#include <linux/rcupdate.h>
				24	#include <linux/kthread.h>
				25	#include <linux/slab.h>
				26	#include <linux/ratelimit.h>
				27	#include "compat.h"
				28	#include "hash.h"
				29	#include "ctree.h"
				30	#include "disk-io.h"
				31	#include "print-tree.h"
				32	#include "transaction.h"
				33	#include "volumes.h"
				34	#include "locking.h"
				35	#include "free-space-cache.h"
				36
				37	/*
				38	* control flags for do_chunk_alloc's force field
				39	* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
				40	* if we really need one.
				41	*
				42	* CHUNK_ALLOC_LIMITED means to only try and allocate one
				43	* if we have very few chunks already allocated. This is
				44	* used as part of the clustering code to help make sure
				45	* we have a good pool of storage to cluster in, without
				46	* filling the FS with empty chunks
				47	*
				48	* CHUNK_ALLOC_FORCE means it must try to allocate one
				49	*
				50	*/
				51	enum {
				52	CHUNK_ALLOC_NO_FORCE = 0,
				53	CHUNK_ALLOC_LIMITED = 1,
				54	CHUNK_ALLOC_FORCE = 2,
				55	};
				56
				57	/*
				58	* Control how reservations are dealt with.
				59	*
				60	* RESERVE_FREE - freeing a reservation.
				61	* RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
				62	* ENOSPC accounting
				63	* RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
				64	* bytes_may_use as the ENOSPC accounting is done elsewhere
				65	*/
				66	enum {
				67	RESERVE_FREE = 0,
				68	RESERVE_ALLOC = 1,
				69	RESERVE_ALLOC_NO_ACCOUNT = 2,
				70	};
				71
				72	static int update_block_group(struct btrfs_trans_handle *trans,
				73	struct btrfs_root *root,
				74	u64 bytenr, u64 num_bytes, int alloc);
				75	static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
				76	struct btrfs_root *root,
				77	u64 bytenr, u64 num_bytes, u64 parent,
				78	u64 root_objectid, u64 owner_objectid,
				79	u64 owner_offset, int refs_to_drop,
				80	struct btrfs_delayed_extent_op *extra_op);
				81	static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
				82	struct extent_buffer *leaf,
				83	struct btrfs_extent_item *ei);
				84	static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
				85	struct btrfs_root *root,
				86	u64 parent, u64 root_objectid,
				87	u64 flags, u64 owner, u64 offset,
				88	struct btrfs_key *ins, int ref_mod);
				89	static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
				90	struct btrfs_root *root,
				91	u64 parent, u64 root_objectid,
				92	u64 flags, struct btrfs_disk_key *key,
				93	int level, struct btrfs_key *ins);
				94	static int do_chunk_alloc(struct btrfs_trans_handle *trans,
				95	struct btrfs_root *extent_root, u64 alloc_bytes,
				96	u64 flags, int force);
				97	static int find_next_key(struct btrfs_path *path, int level,
				98	struct btrfs_key *key);
				99	static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
				100	int dump_block_groups);
				101	static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
				102	u64 num_bytes, int reserve);
				103
				104	static noinline int
				105	block_group_cache_done(struct btrfs_block_group_cache *cache)
				106	{
				107	smp_mb();
				108	return cache->cached == BTRFS_CACHE_FINISHED;
				109	}
				110
				111	static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
				112	{
				113	return (cache->flags & bits) == bits;
				114	}
				115
				116	static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
				117	{
				118	atomic_inc(&cache->count);
				119	}
				120
				121	void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
				122	{
				123	if (atomic_dec_and_test(&cache->count)) {
				124	WARN_ON(cache->pinned > 0);
				125	WARN_ON(cache->reserved > 0);
				126	kfree(cache->free_space_ctl);
				127	kfree(cache);
				128	}
				129	}
				130
				131	/*
				132	* this adds the block group to the fs_info rb tree for the block group
				133	* cache
				134	*/
				135	static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
				136	struct btrfs_block_group_cache *block_group)
				137	{
				138	struct rb_node **p;
				139	struct rb_node *parent = NULL;
				140	struct btrfs_block_group_cache *cache;
				141
				142	spin_lock(&info->block_group_cache_lock);
				143	p = &info->block_group_cache_tree.rb_node;
				144
				145	while (*p) {
				146	parent = *p;
				147	cache = rb_entry(parent, struct btrfs_block_group_cache,
				148	cache_node);
				149	if (block_group->key.objectid < cache->key.objectid) {
				150	p = &(*p)->rb_left;
				151	} else if (block_group->key.objectid > cache->key.objectid) {
				152	p = &(*p)->rb_right;
				153	} else {
				154	spin_unlock(&info->block_group_cache_lock);
				155	return -EEXIST;
				156	}
				157	}
				158
				159	rb_link_node(&block_group->cache_node, parent, p);
				160	rb_insert_color(&block_group->cache_node,
				161	&info->block_group_cache_tree);
				162	spin_unlock(&info->block_group_cache_lock);
				163
				164	return 0;
				165	}
				166
				167	/*
				168	* This will return the block group at or after bytenr if contains is 0, else
				169	* it will return the block group that contains the bytenr
				170	*/
				171	static struct btrfs_block_group_cache *
				172	block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
				173	int contains)
				174	{
				175	struct btrfs_block_group_cache cache, ret = NULL;
				176	struct rb_node *n;
				177	u64 end, start;
				178
				179	spin_lock(&info->block_group_cache_lock);
				180	n = info->block_group_cache_tree.rb_node;
				181
				182	while (n) {
				183	cache = rb_entry(n, struct btrfs_block_group_cache,
				184	cache_node);
				185	end = cache->key.objectid + cache->key.offset - 1;
				186	start = cache->key.objectid;
				187
				188	if (bytenr < start) {
				189	if (!contains && (!ret \|\| start < ret->key.objectid))
				190	ret = cache;
				191	n = n->rb_left;
				192	} else if (bytenr > start) {
				193	if (contains && bytenr <= end) {
				194	ret = cache;
				195	break;
				196	}
				197	n = n->rb_right;
				198	} else {
				199	ret = cache;
				200	break;
				201	}
				202	}
				203	if (ret)
				204	btrfs_get_block_group(ret);
				205	spin_unlock(&info->block_group_cache_lock);
				206
				207	return ret;
				208	}
				209
				210	static int add_excluded_extent(struct btrfs_root *root,
				211	u64 start, u64 num_bytes)
				212	{
				213	u64 end = start + num_bytes - 1;
				214	set_extent_bits(&root->fs_info->freed_extents[0],
				215	start, end, EXTENT_UPTODATE, GFP_NOFS);
				216	set_extent_bits(&root->fs_info->freed_extents[1],
				217	start, end, EXTENT_UPTODATE, GFP_NOFS);
				218	return 0;
				219	}
				220
				221	static void free_excluded_extents(struct btrfs_root *root,
				222	struct btrfs_block_group_cache *cache)
				223	{
				224	u64 start, end;
				225
				226	start = cache->key.objectid;
				227	end = start + cache->key.offset - 1;
				228
				229	clear_extent_bits(&root->fs_info->freed_extents[0],
				230	start, end, EXTENT_UPTODATE, GFP_NOFS);
				231	clear_extent_bits(&root->fs_info->freed_extents[1],
				232	start, end, EXTENT_UPTODATE, GFP_NOFS);
				233	}
				234
				235	static int exclude_super_stripes(struct btrfs_root *root,
				236	struct btrfs_block_group_cache *cache)
				237	{
				238	u64 bytenr;
				239	u64 *logical;
				240	int stripe_len;
				241	int i, nr, ret;
				242
				243	if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
				244	stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
				245	cache->bytes_super += stripe_len;
				246	ret = add_excluded_extent(root, cache->key.objectid,
				247	stripe_len);
				248	BUG_ON(ret); /* -ENOMEM */
				249	}
				250
				251	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
				252	bytenr = btrfs_sb_offset(i);
				253	ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
				254	cache->key.objectid, bytenr,
				255	0, &logical, &nr, &stripe_len);
				256	BUG_ON(ret); /* -ENOMEM */
				257
				258	while (nr--) {
				259	cache->bytes_super += stripe_len;
				260	ret = add_excluded_extent(root, logical[nr],
				261	stripe_len);
				262	BUG_ON(ret); /* -ENOMEM */
				263	}
				264
				265	kfree(logical);
				266	}
				267	return 0;
				268	}
				269
				270	static struct btrfs_caching_control *
				271	get_caching_control(struct btrfs_block_group_cache *cache)
				272	{
				273	struct btrfs_caching_control *ctl;
				274
				275	spin_lock(&cache->lock);
				276	if (cache->cached != BTRFS_CACHE_STARTED) {
				277	spin_unlock(&cache->lock);
				278	return NULL;
				279	}
				280
				281	/* We're loading it the fast way, so we don't have a caching_ctl. */
				282	if (!cache->caching_ctl) {
				283	spin_unlock(&cache->lock);
				284	return NULL;
				285	}
				286
				287	ctl = cache->caching_ctl;
				288	atomic_inc(&ctl->count);
				289	spin_unlock(&cache->lock);
				290	return ctl;
				291	}
				292
				293	static void put_caching_control(struct btrfs_caching_control *ctl)
				294	{
				295	if (atomic_dec_and_test(&ctl->count))
				296	kfree(ctl);
				297	}
				298
				299	/*
				300	* this is only called by cache_block_group, since we could have freed extents
				301	* we need to check the pinned_extents for any extents that can't be used yet
				302	* since their free space will be released as soon as the transaction commits.
				303	*/
				304	static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
				305	struct btrfs_fs_info *info, u64 start, u64 end)
				306	{
				307	u64 extent_start, extent_end, size, total_added = 0;
				308	int ret;
				309
				310	while (start < end) {
				311	ret = find_first_extent_bit(info->pinned_extents, start,
				312	&extent_start, &extent_end,
				313	EXTENT_DIRTY \| EXTENT_UPTODATE);
				314	if (ret)
				315	break;
				316
				317	if (extent_start <= start) {
				318	start = extent_end + 1;
				319	} else if (extent_start > start && extent_start < end) {
				320	size = extent_start - start;
				321	total_added += size;
				322	ret = btrfs_add_free_space(block_group, start,
				323	size);
				324	BUG_ON(ret); /* -ENOMEM or logic error */
				325	start = extent_end + 1;
				326	} else {
				327	break;
				328	}
				329	}
				330
				331	if (start < end) {
				332	size = end - start;
				333	total_added += size;
				334	ret = btrfs_add_free_space(block_group, start, size);
				335	BUG_ON(ret); /* -ENOMEM or logic error */
				336	}
				337
				338	return total_added;
				339	}
				340
				341	static noinline void caching_thread(struct btrfs_work *work)
				342	{
				343	struct btrfs_block_group_cache *block_group;
				344	struct btrfs_fs_info *fs_info;
				345	struct btrfs_caching_control *caching_ctl;
				346	struct btrfs_root *extent_root;
				347	struct btrfs_path *path;
				348	struct extent_buffer *leaf;
				349	struct btrfs_key key;
				350	u64 total_found = 0;
				351	u64 last = 0;
				352	u32 nritems;
				353	int ret = 0;
				354
				355	caching_ctl = container_of(work, struct btrfs_caching_control, work);
				356	block_group = caching_ctl->block_group;
				357	fs_info = block_group->fs_info;
				358	extent_root = fs_info->extent_root;
				359
				360	path = btrfs_alloc_path();
				361	if (!path)
				362	goto out;
				363
				364	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
				365
				366	/*
				367	* We don't want to deadlock with somebody trying to allocate a new
				368	* extent for the extent root while also trying to search the extent
				369	* root to add free space. So we skip locking and search the commit
				370	* root, since its read-only
				371	*/
				372	path->skip_locking = 1;
				373	path->search_commit_root = 1;
				374	path->reada = 1;
				375
				376	key.objectid = last;
				377	key.offset = 0;
				378	key.type = BTRFS_EXTENT_ITEM_KEY;
				379	again:
				380	mutex_lock(&caching_ctl->mutex);
				381	/* need to make sure the commit_root doesn't disappear */
				382	down_read(&fs_info->extent_commit_sem);
				383
				384	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
				385	if (ret < 0)
				386	goto err;
				387
				388	leaf = path->nodes[0];
				389	nritems = btrfs_header_nritems(leaf);
				390
				391	while (1) {
				392	if (btrfs_fs_closing(fs_info) > 1) {
				393	last = (u64)-1;
				394	break;
				395	}
				396
				397	if (path->slots[0] < nritems) {
				398	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				399	} else {
				400	ret = find_next_key(path, 0, &key);
				401	if (ret)
				402	break;
				403
				404	if (need_resched() \|\|
				405	btrfs_next_leaf(extent_root, path)) {
				406	caching_ctl->progress = last;
				407	btrfs_release_path(path);
				408	up_read(&fs_info->extent_commit_sem);
				409	mutex_unlock(&caching_ctl->mutex);
				410	cond_resched();
				411	goto again;
				412	}
				413	leaf = path->nodes[0];
				414	nritems = btrfs_header_nritems(leaf);
				415	continue;
				416	}
				417
				418	if (key.objectid < block_group->key.objectid) {
				419	path->slots[0]++;
				420	continue;
				421	}
				422
				423	if (key.objectid >= block_group->key.objectid +
				424	block_group->key.offset)
				425	break;
				426
				427	if (key.type == BTRFS_EXTENT_ITEM_KEY) {
				428	total_found += add_new_free_space(block_group,
				429	fs_info, last,
				430	key.objectid);
				431	last = key.objectid + key.offset;
				432
				433	if (total_found > (1024 * 1024 * 2)) {
				434	total_found = 0;
				435	wake_up(&caching_ctl->wait);
				436	}
				437	}
				438	path->slots[0]++;
				439	}
				440	ret = 0;
				441
				442	total_found += add_new_free_space(block_group, fs_info, last,
				443	block_group->key.objectid +
				444	block_group->key.offset);
				445	caching_ctl->progress = (u64)-1;
				446
				447	spin_lock(&block_group->lock);
				448	block_group->caching_ctl = NULL;
				449	block_group->cached = BTRFS_CACHE_FINISHED;
				450	spin_unlock(&block_group->lock);
				451
				452	err:
				453	btrfs_free_path(path);
				454	up_read(&fs_info->extent_commit_sem);
				455
				456	free_excluded_extents(extent_root, block_group);
				457
				458	mutex_unlock(&caching_ctl->mutex);
				459	out:
				460	wake_up(&caching_ctl->wait);
				461
				462	put_caching_control(caching_ctl);
				463	btrfs_put_block_group(block_group);
				464	}
				465
				466	static int cache_block_group(struct btrfs_block_group_cache *cache,
				467	struct btrfs_trans_handle *trans,
				468	struct btrfs_root *root,
				469	int load_cache_only)
				470	{
				471	DEFINE_WAIT(wait);
				472	struct btrfs_fs_info *fs_info = cache->fs_info;
				473	struct btrfs_caching_control *caching_ctl;
				474	int ret = 0;
				475
				476	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
				477	if (!caching_ctl)
				478	return -ENOMEM;
				479
				480	INIT_LIST_HEAD(&caching_ctl->list);
				481	mutex_init(&caching_ctl->mutex);
				482	init_waitqueue_head(&caching_ctl->wait);
				483	caching_ctl->block_group = cache;
				484	caching_ctl->progress = cache->key.objectid;
				485	atomic_set(&caching_ctl->count, 1);
				486	caching_ctl->work.func = caching_thread;
				487
				488	spin_lock(&cache->lock);
				489	/*
				490	* This should be a rare occasion, but this could happen I think in the
				491	* case where one thread starts to load the space cache info, and then
				492	* some other thread starts a transaction commit which tries to do an
				493	* allocation while the other thread is still loading the space cache
				494	* info. The previous loop should have kept us from choosing this block
				495	* group, but if we've moved to the state where we will wait on caching
				496	* block groups we need to first check if we're doing a fast load here,
				497	* so we can wait for it to finish, otherwise we could end up allocating
				498	* from a block group who's cache gets evicted for one reason or
				499	* another.
				500	*/
				501	while (cache->cached == BTRFS_CACHE_FAST) {
				502	struct btrfs_caching_control *ctl;
				503
				504	ctl = cache->caching_ctl;
				505	atomic_inc(&ctl->count);
				506	prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
				507	spin_unlock(&cache->lock);
				508
				509	schedule();
				510
				511	finish_wait(&ctl->wait, &wait);
				512	put_caching_control(ctl);
				513	spin_lock(&cache->lock);
				514	}
				515
				516	if (cache->cached != BTRFS_CACHE_NO) {
				517	spin_unlock(&cache->lock);
				518	kfree(caching_ctl);
				519	return 0;
				520	}
				521	WARN_ON(cache->caching_ctl);
				522	cache->caching_ctl = caching_ctl;
				523	cache->cached = BTRFS_CACHE_FAST;
				524	spin_unlock(&cache->lock);
				525
				526	/*
				527	* We can't do the read from on-disk cache during a commit since we need
				528	* to have the normal tree locking. Also if we are currently trying to
				529	* allocate blocks for the tree root we can't do the fast caching since
				530	* we likely hold important locks.
				531	*/
				532	if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
				533	ret = load_free_space_cache(fs_info, cache);
				534
				535	spin_lock(&cache->lock);
				536	if (ret == 1) {
				537	cache->caching_ctl = NULL;
				538	cache->cached = BTRFS_CACHE_FINISHED;
				539	cache->last_byte_to_unpin = (u64)-1;
				540	} else {
				541	if (load_cache_only) {
				542	cache->caching_ctl = NULL;
				543	cache->cached = BTRFS_CACHE_NO;
				544	} else {
				545	cache->cached = BTRFS_CACHE_STARTED;
				546	}
				547	}
				548	spin_unlock(&cache->lock);
				549	wake_up(&caching_ctl->wait);
				550	if (ret == 1) {
				551	put_caching_control(caching_ctl);
				552	free_excluded_extents(fs_info->extent_root, cache);
				553	return 0;
				554	}
				555	} else {
				556	/*
				557	* We are not going to do the fast caching, set cached to the
				558	* appropriate value and wakeup any waiters.
				559	*/
				560	spin_lock(&cache->lock);
				561	if (load_cache_only) {
				562	cache->caching_ctl = NULL;
				563	cache->cached = BTRFS_CACHE_NO;
				564	} else {
				565	cache->cached = BTRFS_CACHE_STARTED;
				566	}
				567	spin_unlock(&cache->lock);
				568	wake_up(&caching_ctl->wait);
				569	}
				570
				571	if (load_cache_only) {
				572	put_caching_control(caching_ctl);
				573	return 0;
				574	}
				575
				576	down_write(&fs_info->extent_commit_sem);
				577	atomic_inc(&caching_ctl->count);
				578	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
				579	up_write(&fs_info->extent_commit_sem);
				580
				581	btrfs_get_block_group(cache);
				582
				583	btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
				584
				585	return ret;
				586	}
				587
				588	/*
				589	* return the block group that starts at or after bytenr
				590	*/
				591	static struct btrfs_block_group_cache *
				592	btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
				593	{
				594	struct btrfs_block_group_cache *cache;
				595
				596	cache = block_group_cache_tree_search(info, bytenr, 0);
				597
				598	return cache;
				599	}
				600
				601	/*
				602	* return the block group that contains the given bytenr
				603	*/
				604	struct btrfs_block_group_cache *btrfs_lookup_block_group(
				605	struct btrfs_fs_info *info,
				606	u64 bytenr)
				607	{
				608	struct btrfs_block_group_cache *cache;
				609
				610	cache = block_group_cache_tree_search(info, bytenr, 1);
				611
				612	return cache;
				613	}
				614
				615	static struct btrfs_space_info __find_space_info(struct btrfs_fs_info info,
				616	u64 flags)
				617	{
				618	struct list_head *head = &info->space_info;
				619	struct btrfs_space_info *found;
				620
				621	flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
				622
				623	rcu_read_lock();
				624	list_for_each_entry_rcu(found, head, list) {
				625	if (found->flags & flags) {
				626	rcu_read_unlock();
				627	return found;
				628	}
				629	}
				630	rcu_read_unlock();
				631	return NULL;
				632	}
				633
				634	/*
				635	* after adding space to the filesystem, we need to clear the full flags
				636	* on all the space infos.
				637	*/
				638	void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
				639	{
				640	struct list_head *head = &info->space_info;
				641	struct btrfs_space_info *found;
				642
				643	rcu_read_lock();
				644	list_for_each_entry_rcu(found, head, list)
				645	found->full = 0;
				646	rcu_read_unlock();
				647	}
				648
				649	static u64 div_factor(u64 num, int factor)
				650	{
				651	if (factor == 10)
				652	return num;
				653	num *= factor;
				654	do_div(num, 10);
				655	return num;
				656	}
				657
				658	static u64 div_factor_fine(u64 num, int factor)
				659	{
				660	if (factor == 100)
				661	return num;
				662	num *= factor;
				663	do_div(num, 100);
				664	return num;
				665	}
				666
				667	u64 btrfs_find_block_group(struct btrfs_root *root,
				668	u64 search_start, u64 search_hint, int owner)
				669	{
				670	struct btrfs_block_group_cache *cache;
				671	u64 used;
				672	u64 last = max(search_hint, search_start);
				673	u64 group_start = 0;
				674	int full_search = 0;
				675	int factor = 9;
				676	int wrapped = 0;
				677	again:
				678	while (1) {
				679	cache = btrfs_lookup_first_block_group(root->fs_info, last);
				680	if (!cache)
				681	break;
				682
				683	spin_lock(&cache->lock);
				684	last = cache->key.objectid + cache->key.offset;
				685	used = btrfs_block_group_used(&cache->item);
				686
				687	if ((full_search \|\| !cache->ro) &&
				688	block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
				689	if (used + cache->pinned + cache->reserved <
				690	div_factor(cache->key.offset, factor)) {
				691	group_start = cache->key.objectid;
				692	spin_unlock(&cache->lock);
				693	btrfs_put_block_group(cache);
				694	goto found;
				695	}
				696	}
				697	spin_unlock(&cache->lock);
				698	btrfs_put_block_group(cache);
				699	cond_resched();
				700	}
				701	if (!wrapped) {
				702	last = search_start;
				703	wrapped = 1;
				704	goto again;
				705	}
				706	if (!full_search && factor < 10) {
				707	last = search_start;
				708	full_search = 1;
				709	factor = 10;
				710	goto again;
				711	}
				712	found:
				713	return group_start;
				714	}
				715
				716	/* simple helper to search for an existing extent at a given offset */
				717	int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
				718	{
				719	int ret;
				720	struct btrfs_key key;
				721	struct btrfs_path *path;
				722
				723	path = btrfs_alloc_path();
				724	if (!path)
				725	return -ENOMEM;
				726
				727	key.objectid = start;
				728	key.offset = len;
				729	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
				730	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
				731	0, 0);
				732	btrfs_free_path(path);
				733	return ret;
				734	}
				735
				736	/*
				737	* helper function to lookup reference count and flags of extent.
				738	*
				739	* the head node for delayed ref is used to store the sum of all the
				740	* reference count modifications queued up in the rbtree. the head
				741	* node may also store the extent flags to set. This way you can check
				742	* to see what the reference count and extent flags would be if all of
				743	* the delayed refs are not processed.
				744	*/
				745	int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
				746	struct btrfs_root *root, u64 bytenr,
				747	u64 num_bytes, u64 refs, u64 flags)
				748	{
				749	struct btrfs_delayed_ref_head *head;
				750	struct btrfs_delayed_ref_root *delayed_refs;
				751	struct btrfs_path *path;
				752	struct btrfs_extent_item *ei;
				753	struct extent_buffer *leaf;
				754	struct btrfs_key key;
				755	u32 item_size;
				756	u64 num_refs;
				757	u64 extent_flags;
				758	int ret;
				759
				760	path = btrfs_alloc_path();
				761	if (!path)
				762	return -ENOMEM;
				763
				764	key.objectid = bytenr;
				765	key.type = BTRFS_EXTENT_ITEM_KEY;
				766	key.offset = num_bytes;
				767	if (!trans) {
				768	path->skip_locking = 1;
				769	path->search_commit_root = 1;
				770	}
				771	again:
				772	ret = btrfs_search_slot(trans, root->fs_info->extent_root,
				773	&key, path, 0, 0);
				774	if (ret < 0)
				775	goto out_free;
				776
				777	if (ret == 0) {
				778	leaf = path->nodes[0];
				779	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				780	if (item_size >= sizeof(*ei)) {
				781	ei = btrfs_item_ptr(leaf, path->slots[0],
				782	struct btrfs_extent_item);
				783	num_refs = btrfs_extent_refs(leaf, ei);
				784	extent_flags = btrfs_extent_flags(leaf, ei);
				785	} else {
				786	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				787	struct btrfs_extent_item_v0 *ei0;
				788	BUG_ON(item_size != sizeof(*ei0));
				789	ei0 = btrfs_item_ptr(leaf, path->slots[0],
				790	struct btrfs_extent_item_v0);
				791	num_refs = btrfs_extent_refs_v0(leaf, ei0);
				792	/* FIXME: this isn't correct for data */
				793	extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
				794	#else
				795	BUG();
				796	#endif
				797	}
				798	BUG_ON(num_refs == 0);
				799	} else {
				800	num_refs = 0;
				801	extent_flags = 0;
				802	ret = 0;
				803	}
				804
				805	if (!trans)
				806	goto out;
				807
				808	delayed_refs = &trans->transaction->delayed_refs;
				809	spin_lock(&delayed_refs->lock);
				810	head = btrfs_find_delayed_ref_head(trans, bytenr);
				811	if (head) {
				812	if (!mutex_trylock(&head->mutex)) {
				813	atomic_inc(&head->node.refs);
				814	spin_unlock(&delayed_refs->lock);
				815
				816	btrfs_release_path(path);
				817
				818	/*
				819	* Mutex was contended, block until it's released and try
				820	* again
				821	*/
				822	mutex_lock(&head->mutex);
				823	mutex_unlock(&head->mutex);
				824	btrfs_put_delayed_ref(&head->node);
				825	goto again;
				826	}
				827	if (head->extent_op && head->extent_op->update_flags)
				828	extent_flags \|= head->extent_op->flags_to_set;
				829	else
				830	BUG_ON(num_refs == 0);
				831
				832	num_refs += head->node.ref_mod;
				833	mutex_unlock(&head->mutex);
				834	}
				835	spin_unlock(&delayed_refs->lock);
				836	out:
				837	WARN_ON(num_refs == 0);
				838	if (refs)
				839	*refs = num_refs;
				840	if (flags)
				841	*flags = extent_flags;
				842	out_free:
				843	btrfs_free_path(path);
				844	return ret;
				845	}
				846
				847	/*
				848	* Back reference rules. Back refs have three main goals:
				849	*
				850	* 1) differentiate between all holders of references to an extent so that
				851	* when a reference is dropped we can make sure it was a valid reference
				852	* before freeing the extent.
				853	*
				854	* 2) Provide enough information to quickly find the holders of an extent
				855	* if we notice a given block is corrupted or bad.
				856	*
				857	* 3) Make it easy to migrate blocks for FS shrinking or storage pool
				858	* maintenance. This is actually the same as #2, but with a slightly
				859	* different use case.
				860	*
				861	* There are two kinds of back refs. The implicit back refs is optimized
				862	* for pointers in non-shared tree blocks. For a given pointer in a block,
				863	* back refs of this kind provide information about the block's owner tree
				864	* and the pointer's key. These information allow us to find the block by
				865	* b-tree searching. The full back refs is for pointers in tree blocks not
				866	* referenced by their owner trees. The location of tree block is recorded
				867	* in the back refs. Actually the full back refs is generic, and can be
				868	* used in all cases the implicit back refs is used. The major shortcoming
				869	* of the full back refs is its overhead. Every time a tree block gets
				870	* COWed, we have to update back refs entry for all pointers in it.
				871	*
				872	* For a newly allocated tree block, we use implicit back refs for
				873	* pointers in it. This means most tree related operations only involve
				874	* implicit back refs. For a tree block created in old transaction, the
				875	* only way to drop a reference to it is COW it. So we can detect the
				876	* event that tree block loses its owner tree's reference and do the
				877	* back refs conversion.
				878	*
				879	* When a tree block is COW'd through a tree, there are four cases:
				880	*
				881	* The reference count of the block is one and the tree is the block's
				882	* owner tree. Nothing to do in this case.
				883	*
				884	* The reference count of the block is one and the tree is not the
				885	* block's owner tree. In this case, full back refs is used for pointers
				886	* in the block. Remove these full back refs, add implicit back refs for
				887	* every pointers in the new block.
				888	*
				889	* The reference count of the block is greater than one and the tree is
				890	* the block's owner tree. In this case, implicit back refs is used for
				891	* pointers in the block. Add full back refs for every pointers in the
				892	* block, increase lower level extents' reference counts. The original
				893	* implicit back refs are entailed to the new block.
				894	*
				895	* The reference count of the block is greater than one and the tree is
				896	* not the block's owner tree. Add implicit back refs for every pointer in
				897	* the new block, increase lower level extents' reference count.
				898	*
				899	* Back Reference Key composing:
				900	*
				901	* The key objectid corresponds to the first byte in the extent,
				902	* The key type is used to differentiate between types of back refs.
				903	* There are different meanings of the key offset for different types
				904	* of back refs.
				905	*
				906	* File extents can be referenced by:
				907	*
				908	* - multiple snapshots, subvolumes, or different generations in one subvol
				909	* - different files inside a single subvolume
				910	* - different offsets inside a file (bookend extents in file.c)
				911	*
				912	* The extent ref structure for the implicit back refs has fields for:
				913	*
				914	* - Objectid of the subvolume root
				915	* - objectid of the file holding the reference
				916	* - original offset in the file
				917	* - how many bookend extents
				918	*
				919	* The key offset for the implicit back refs is hash of the first
				920	* three fields.
				921	*
				922	* The extent ref structure for the full back refs has field for:
				923	*
				924	* - number of pointers in the tree leaf
				925	*
				926	* The key offset for the implicit back refs is the first byte of
				927	* the tree leaf
				928	*
				929	* When a file extent is allocated, The implicit back refs is used.
				930	* the fields are filled in:
				931	*
				932	* (root_key.objectid, inode objectid, offset in file, 1)
				933	*
				934	* When a file extent is removed file truncation, we find the
				935	* corresponding implicit back refs and check the following fields:
				936	*
				937	* (btrfs_header_owner(leaf), inode objectid, offset in file)
				938	*
				939	* Btree extents can be referenced by:
				940	*
				941	* - Different subvolumes
				942	*
				943	* Both the implicit back refs and the full back refs for tree blocks
				944	* only consist of key. The key offset for the implicit back refs is
				945	* objectid of block's owner tree. The key offset for the full back refs
				946	* is the first byte of parent block.
				947	*
				948	* When implicit back refs is used, information about the lowest key and
				949	* level of the tree block are required. These information are stored in
				950	* tree block info structure.
				951	*/
				952
				953	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				954	static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
				955	struct btrfs_root *root,
				956	struct btrfs_path *path,
				957	u64 owner, u32 extra_size)
				958	{
				959	struct btrfs_extent_item *item;
				960	struct btrfs_extent_item_v0 *ei0;
				961	struct btrfs_extent_ref_v0 *ref0;
				962	struct btrfs_tree_block_info *bi;
				963	struct extent_buffer *leaf;
				964	struct btrfs_key key;
				965	struct btrfs_key found_key;
				966	u32 new_size = sizeof(*item);
				967	u64 refs;
				968	int ret;
				969
				970	leaf = path->nodes[0];
				971	BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
				972
				973	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				974	ei0 = btrfs_item_ptr(leaf, path->slots[0],
				975	struct btrfs_extent_item_v0);
				976	refs = btrfs_extent_refs_v0(leaf, ei0);
				977
				978	if (owner == (u64)-1) {
				979	while (1) {
				980	if (path->slots[0] >= btrfs_header_nritems(leaf)) {
				981	ret = btrfs_next_leaf(root, path);
				982	if (ret < 0)
				983	return ret;
				984	BUG_ON(ret > 0); /* Corruption */
				985	leaf = path->nodes[0];
				986	}
				987	btrfs_item_key_to_cpu(leaf, &found_key,
				988	path->slots[0]);
				989	BUG_ON(key.objectid != found_key.objectid);
				990	if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
				991	path->slots[0]++;
				992	continue;
				993	}
				994	ref0 = btrfs_item_ptr(leaf, path->slots[0],
				995	struct btrfs_extent_ref_v0);
				996	owner = btrfs_ref_objectid_v0(leaf, ref0);
				997	break;
				998	}
				999	}
				1000	btrfs_release_path(path);
				1001
				1002	if (owner < BTRFS_FIRST_FREE_OBJECTID)
				1003	new_size += sizeof(*bi);
				1004
				1005	new_size -= sizeof(*ei0);
				1006	ret = btrfs_search_slot(trans, root, &key, path,
				1007	new_size + extra_size, 1);
				1008	if (ret < 0)
				1009	return ret;
				1010	BUG_ON(ret); /* Corruption */
				1011
				1012	btrfs_extend_item(trans, root, path, new_size);
				1013
				1014	leaf = path->nodes[0];
				1015	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
				1016	btrfs_set_extent_refs(leaf, item, refs);
				1017	/* FIXME: get real generation */
				1018	btrfs_set_extent_generation(leaf, item, 0);
				1019	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
				1020	btrfs_set_extent_flags(leaf, item,
				1021	BTRFS_EXTENT_FLAG_TREE_BLOCK \|
				1022	BTRFS_BLOCK_FLAG_FULL_BACKREF);
				1023	bi = (struct btrfs_tree_block_info *)(item + 1);
				1024	/* FIXME: get first key of the block */
				1025	memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
				1026	btrfs_set_tree_block_level(leaf, bi, (int)owner);
				1027	} else {
				1028	btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
				1029	}
				1030	btrfs_mark_buffer_dirty(leaf);
				1031	return 0;
				1032	}
				1033	#endif
				1034
				1035	static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
				1036	{
				1037	u32 high_crc = ~(u32)0;
				1038	u32 low_crc = ~(u32)0;
				1039	__le64 lenum;
				1040
				1041	lenum = cpu_to_le64(root_objectid);
				1042	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
				1043	lenum = cpu_to_le64(owner);
				1044	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
				1045	lenum = cpu_to_le64(offset);
				1046	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
				1047
				1048	return ((u64)high_crc << 31) ^ (u64)low_crc;
				1049	}
				1050
				1051	static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
				1052	struct btrfs_extent_data_ref *ref)
				1053	{
				1054	return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
				1055	btrfs_extent_data_ref_objectid(leaf, ref),
				1056	btrfs_extent_data_ref_offset(leaf, ref));
				1057	}
				1058
				1059	static int match_extent_data_ref(struct extent_buffer *leaf,
				1060	struct btrfs_extent_data_ref *ref,
				1061	u64 root_objectid, u64 owner, u64 offset)
				1062	{
				1063	if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid \|\|
				1064	btrfs_extent_data_ref_objectid(leaf, ref) != owner \|\|
				1065	btrfs_extent_data_ref_offset(leaf, ref) != offset)
				1066	return 0;
				1067	return 1;
				1068	}
				1069
				1070	static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
				1071	struct btrfs_root *root,
				1072	struct btrfs_path *path,
				1073	u64 bytenr, u64 parent,
				1074	u64 root_objectid,
				1075	u64 owner, u64 offset)
				1076	{
				1077	struct btrfs_key key;
				1078	struct btrfs_extent_data_ref *ref;
				1079	struct extent_buffer *leaf;
				1080	u32 nritems;
				1081	int ret;
				1082	int recow;
				1083	int err = -ENOENT;
				1084
				1085	key.objectid = bytenr;
				1086	if (parent) {
				1087	key.type = BTRFS_SHARED_DATA_REF_KEY;
				1088	key.offset = parent;
				1089	} else {
				1090	key.type = BTRFS_EXTENT_DATA_REF_KEY;
				1091	key.offset = hash_extent_data_ref(root_objectid,
				1092	owner, offset);
				1093	}
				1094	again:
				1095	recow = 0;
				1096	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				1097	if (ret < 0) {
				1098	err = ret;
				1099	goto fail;
				1100	}
				1101
				1102	if (parent) {
				1103	if (!ret)
				1104	return 0;
				1105	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				1106	key.type = BTRFS_EXTENT_REF_V0_KEY;
				1107	btrfs_release_path(path);
				1108	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				1109	if (ret < 0) {
				1110	err = ret;
				1111	goto fail;
				1112	}
				1113	if (!ret)
				1114	return 0;
				1115	#endif
				1116	goto fail;
				1117	}
				1118
				1119	leaf = path->nodes[0];
				1120	nritems = btrfs_header_nritems(leaf);
				1121	while (1) {
				1122	if (path->slots[0] >= nritems) {
				1123	ret = btrfs_next_leaf(root, path);
				1124	if (ret < 0)
				1125	err = ret;
				1126	if (ret)
				1127	goto fail;
				1128
				1129	leaf = path->nodes[0];
				1130	nritems = btrfs_header_nritems(leaf);
				1131	recow = 1;
				1132	}
				1133
				1134	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				1135	if (key.objectid != bytenr \|\|
				1136	key.type != BTRFS_EXTENT_DATA_REF_KEY)
				1137	goto fail;
				1138
				1139	ref = btrfs_item_ptr(leaf, path->slots[0],
				1140	struct btrfs_extent_data_ref);
				1141
				1142	if (match_extent_data_ref(leaf, ref, root_objectid,
				1143	owner, offset)) {
				1144	if (recow) {
				1145	btrfs_release_path(path);
				1146	goto again;
				1147	}
				1148	err = 0;
				1149	break;
				1150	}
				1151	path->slots[0]++;
				1152	}
				1153	fail:
				1154	return err;
				1155	}
				1156
				1157	static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
				1158	struct btrfs_root *root,
				1159	struct btrfs_path *path,
				1160	u64 bytenr, u64 parent,
				1161	u64 root_objectid, u64 owner,
				1162	u64 offset, int refs_to_add)
				1163	{
				1164	struct btrfs_key key;
				1165	struct extent_buffer *leaf;
				1166	u32 size;
				1167	u32 num_refs;
				1168	int ret;
				1169
				1170	key.objectid = bytenr;
				1171	if (parent) {
				1172	key.type = BTRFS_SHARED_DATA_REF_KEY;
				1173	key.offset = parent;
				1174	size = sizeof(struct btrfs_shared_data_ref);
				1175	} else {
				1176	key.type = BTRFS_EXTENT_DATA_REF_KEY;
				1177	key.offset = hash_extent_data_ref(root_objectid,
				1178	owner, offset);
				1179	size = sizeof(struct btrfs_extent_data_ref);
				1180	}
				1181
				1182	ret = btrfs_insert_empty_item(trans, root, path, &key, size);
				1183	if (ret && ret != -EEXIST)
				1184	goto fail;
				1185
				1186	leaf = path->nodes[0];
				1187	if (parent) {
				1188	struct btrfs_shared_data_ref *ref;
				1189	ref = btrfs_item_ptr(leaf, path->slots[0],
				1190	struct btrfs_shared_data_ref);
				1191	if (ret == 0) {
				1192	btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
				1193	} else {
				1194	num_refs = btrfs_shared_data_ref_count(leaf, ref);
				1195	num_refs += refs_to_add;
				1196	btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
				1197	}
				1198	} else {
				1199	struct btrfs_extent_data_ref *ref;
				1200	while (ret == -EEXIST) {
				1201	ref = btrfs_item_ptr(leaf, path->slots[0],
				1202	struct btrfs_extent_data_ref);
				1203	if (match_extent_data_ref(leaf, ref, root_objectid,
				1204	owner, offset))
				1205	break;
				1206	btrfs_release_path(path);
				1207	key.offset++;
				1208	ret = btrfs_insert_empty_item(trans, root, path, &key,
				1209	size);
				1210	if (ret && ret != -EEXIST)
				1211	goto fail;
				1212
				1213	leaf = path->nodes[0];
				1214	}
				1215	ref = btrfs_item_ptr(leaf, path->slots[0],
				1216	struct btrfs_extent_data_ref);
				1217	if (ret == 0) {
				1218	btrfs_set_extent_data_ref_root(leaf, ref,
				1219	root_objectid);
				1220	btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
				1221	btrfs_set_extent_data_ref_offset(leaf, ref, offset);
				1222	btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
				1223	} else {
				1224	num_refs = btrfs_extent_data_ref_count(leaf, ref);
				1225	num_refs += refs_to_add;
				1226	btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
				1227	}
				1228	}
				1229	btrfs_mark_buffer_dirty(leaf);
				1230	ret = 0;
				1231	fail:
				1232	btrfs_release_path(path);
				1233	return ret;
				1234	}
				1235
				1236	static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
				1237	struct btrfs_root *root,
				1238	struct btrfs_path *path,
				1239	int refs_to_drop)
				1240	{
				1241	struct btrfs_key key;
				1242	struct btrfs_extent_data_ref *ref1 = NULL;
				1243	struct btrfs_shared_data_ref *ref2 = NULL;
				1244	struct extent_buffer *leaf;
				1245	u32 num_refs = 0;
				1246	int ret = 0;
				1247
				1248	leaf = path->nodes[0];
				1249	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				1250
				1251	if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
				1252	ref1 = btrfs_item_ptr(leaf, path->slots[0],
				1253	struct btrfs_extent_data_ref);
				1254	num_refs = btrfs_extent_data_ref_count(leaf, ref1);
				1255	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
				1256	ref2 = btrfs_item_ptr(leaf, path->slots[0],
				1257	struct btrfs_shared_data_ref);
				1258	num_refs = btrfs_shared_data_ref_count(leaf, ref2);
				1259	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				1260	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
				1261	struct btrfs_extent_ref_v0 *ref0;
				1262	ref0 = btrfs_item_ptr(leaf, path->slots[0],
				1263	struct btrfs_extent_ref_v0);
				1264	num_refs = btrfs_ref_count_v0(leaf, ref0);
				1265	#endif
				1266	} else {
				1267	BUG();
				1268	}
				1269
				1270	BUG_ON(num_refs < refs_to_drop);
				1271	num_refs -= refs_to_drop;
				1272
				1273	if (num_refs == 0) {
				1274	ret = btrfs_del_item(trans, root, path);
				1275	} else {
				1276	if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
				1277	btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
				1278	else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
				1279	btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
				1280	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				1281	else {
				1282	struct btrfs_extent_ref_v0 *ref0;
				1283	ref0 = btrfs_item_ptr(leaf, path->slots[0],
				1284	struct btrfs_extent_ref_v0);
				1285	btrfs_set_ref_count_v0(leaf, ref0, num_refs);
				1286	}
				1287	#endif
				1288	btrfs_mark_buffer_dirty(leaf);
				1289	}
				1290	return ret;
				1291	}
				1292
				1293	static noinline u32 extent_data_ref_count(struct btrfs_root *root,
				1294	struct btrfs_path *path,
				1295	struct btrfs_extent_inline_ref *iref)
				1296	{
				1297	struct btrfs_key key;
				1298	struct extent_buffer *leaf;
				1299	struct btrfs_extent_data_ref *ref1;
				1300	struct btrfs_shared_data_ref *ref2;
				1301	u32 num_refs = 0;
				1302
				1303	leaf = path->nodes[0];
				1304	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				1305	if (iref) {
				1306	if (btrfs_extent_inline_ref_type(leaf, iref) ==
				1307	BTRFS_EXTENT_DATA_REF_KEY) {
				1308	ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
				1309	num_refs = btrfs_extent_data_ref_count(leaf, ref1);
				1310	} else {
				1311	ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
				1312	num_refs = btrfs_shared_data_ref_count(leaf, ref2);
				1313	}
				1314	} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
				1315	ref1 = btrfs_item_ptr(leaf, path->slots[0],
				1316	struct btrfs_extent_data_ref);
				1317	num_refs = btrfs_extent_data_ref_count(leaf, ref1);
				1318	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
				1319	ref2 = btrfs_item_ptr(leaf, path->slots[0],
				1320	struct btrfs_shared_data_ref);
				1321	num_refs = btrfs_shared_data_ref_count(leaf, ref2);
				1322	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				1323	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
				1324	struct btrfs_extent_ref_v0 *ref0;
				1325	ref0 = btrfs_item_ptr(leaf, path->slots[0],
				1326	struct btrfs_extent_ref_v0);
				1327	num_refs = btrfs_ref_count_v0(leaf, ref0);
				1328	#endif
				1329	} else {
				1330	WARN_ON(1);
				1331	}
				1332	return num_refs;
				1333	}
				1334
				1335	static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
				1336	struct btrfs_root *root,
				1337	struct btrfs_path *path,
				1338	u64 bytenr, u64 parent,
				1339	u64 root_objectid)
				1340	{
				1341	struct btrfs_key key;
				1342	int ret;
				1343
				1344	key.objectid = bytenr;
				1345	if (parent) {
				1346	key.type = BTRFS_SHARED_BLOCK_REF_KEY;
				1347	key.offset = parent;
				1348	} else {
				1349	key.type = BTRFS_TREE_BLOCK_REF_KEY;
				1350	key.offset = root_objectid;
				1351	}
				1352
				1353	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				1354	if (ret > 0)
				1355	ret = -ENOENT;
				1356	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				1357	if (ret == -ENOENT && parent) {
				1358	btrfs_release_path(path);
				1359	key.type = BTRFS_EXTENT_REF_V0_KEY;
				1360	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				1361	if (ret > 0)
				1362	ret = -ENOENT;
				1363	}
				1364	#endif
				1365	return ret;
				1366	}
				1367
				1368	static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
				1369	struct btrfs_root *root,
				1370	struct btrfs_path *path,
				1371	u64 bytenr, u64 parent,
				1372	u64 root_objectid)
				1373	{
				1374	struct btrfs_key key;
				1375	int ret;
				1376
				1377	key.objectid = bytenr;
				1378	if (parent) {
				1379	key.type = BTRFS_SHARED_BLOCK_REF_KEY;
				1380	key.offset = parent;
				1381	} else {
				1382	key.type = BTRFS_TREE_BLOCK_REF_KEY;
				1383	key.offset = root_objectid;
				1384	}
				1385
				1386	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
				1387	btrfs_release_path(path);
				1388	return ret;
				1389	}
				1390
				1391	static inline int extent_ref_type(u64 parent, u64 owner)
				1392	{
				1393	int type;
				1394	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
				1395	if (parent > 0)
				1396	type = BTRFS_SHARED_BLOCK_REF_KEY;
				1397	else
				1398	type = BTRFS_TREE_BLOCK_REF_KEY;
				1399	} else {
				1400	if (parent > 0)
				1401	type = BTRFS_SHARED_DATA_REF_KEY;
				1402	else
				1403	type = BTRFS_EXTENT_DATA_REF_KEY;
				1404	}
				1405	return type;
				1406	}
				1407
				1408	static int find_next_key(struct btrfs_path *path, int level,
				1409	struct btrfs_key *key)
				1410
				1411	{
				1412	for (; level < BTRFS_MAX_LEVEL; level++) {
				1413	if (!path->nodes[level])
				1414	break;
				1415	if (path->slots[level] + 1 >=
				1416	btrfs_header_nritems(path->nodes[level]))
				1417	continue;
				1418	if (level == 0)
				1419	btrfs_item_key_to_cpu(path->nodes[level], key,
				1420	path->slots[level] + 1);
				1421	else
				1422	btrfs_node_key_to_cpu(path->nodes[level], key,
				1423	path->slots[level] + 1);
				1424	return 0;
				1425	}
				1426	return 1;
				1427	}
				1428
				1429	/*
				1430	* look for inline back ref. if back ref is found, *ref_ret is set
				1431	* to the address of inline back ref, and 0 is returned.
				1432	*
				1433	* if back ref isn't found, *ref_ret is set to the address where it
				1434	* should be inserted, and -ENOENT is returned.
				1435	*
				1436	* if insert is true and there are too many inline back refs, the path
				1437	* points to the extent item, and -EAGAIN is returned.
				1438	*
				1439	* NOTE: inline back refs are ordered in the same way that back ref
				1440	* items in the tree are ordered.
				1441	*/
				1442	static noinline_for_stack
				1443	int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
				1444	struct btrfs_root *root,
				1445	struct btrfs_path *path,
				1446	struct btrfs_extent_inline_ref **ref_ret,
				1447	u64 bytenr, u64 num_bytes,
				1448	u64 parent, u64 root_objectid,
				1449	u64 owner, u64 offset, int insert)
				1450	{
				1451	struct btrfs_key key;
				1452	struct extent_buffer *leaf;
				1453	struct btrfs_extent_item *ei;
				1454	struct btrfs_extent_inline_ref *iref;
				1455	u64 flags;
				1456	u64 item_size;
				1457	unsigned long ptr;
				1458	unsigned long end;
				1459	int extra_size;
				1460	int type;
				1461	int want;
				1462	int ret;
				1463	int err = 0;
				1464
				1465	key.objectid = bytenr;
				1466	key.type = BTRFS_EXTENT_ITEM_KEY;
				1467	key.offset = num_bytes;
				1468
				1469	want = extent_ref_type(parent, owner);
				1470	if (insert) {
				1471	extra_size = btrfs_extent_inline_ref_size(want);
				1472	path->keep_locks = 1;
				1473	} else
				1474	extra_size = -1;
				1475	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
				1476	if (ret < 0) {
				1477	err = ret;
				1478	goto out;
				1479	}
				1480	if (ret && !insert) {
				1481	err = -ENOENT;
				1482	goto out;
				1483	}
				1484	BUG_ON(ret); /* Corruption */
				1485
				1486	leaf = path->nodes[0];
				1487	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				1488	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				1489	if (item_size < sizeof(*ei)) {
				1490	if (!insert) {
				1491	err = -ENOENT;
				1492	goto out;
				1493	}
				1494	ret = convert_extent_item_v0(trans, root, path, owner,
				1495	extra_size);
				1496	if (ret < 0) {
				1497	err = ret;
				1498	goto out;
				1499	}
				1500	leaf = path->nodes[0];
				1501	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				1502	}
				1503	#endif
				1504	BUG_ON(item_size < sizeof(*ei));
				1505
				1506	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
				1507	flags = btrfs_extent_flags(leaf, ei);
				1508
				1509	ptr = (unsigned long)(ei + 1);
				1510	end = (unsigned long)ei + item_size;
				1511
				1512	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
				1513	ptr += sizeof(struct btrfs_tree_block_info);
				1514	BUG_ON(ptr > end);
				1515	} else {
				1516	BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
				1517	}
				1518
				1519	err = -ENOENT;
				1520	while (1) {
				1521	if (ptr >= end) {
				1522	WARN_ON(ptr > end);
				1523	break;
				1524	}
				1525	iref = (struct btrfs_extent_inline_ref *)ptr;
				1526	type = btrfs_extent_inline_ref_type(leaf, iref);
				1527	if (want < type)
				1528	break;
				1529	if (want > type) {
				1530	ptr += btrfs_extent_inline_ref_size(type);
				1531	continue;
				1532	}
				1533
				1534	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
				1535	struct btrfs_extent_data_ref *dref;
				1536	dref = (struct btrfs_extent_data_ref *)(&iref->offset);
				1537	if (match_extent_data_ref(leaf, dref, root_objectid,
				1538	owner, offset)) {
				1539	err = 0;
				1540	break;
				1541	}
				1542	if (hash_extent_data_ref_item(leaf, dref) <
				1543	hash_extent_data_ref(root_objectid, owner, offset))
				1544	break;
				1545	} else {
				1546	u64 ref_offset;
				1547	ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
				1548	if (parent > 0) {
				1549	if (parent == ref_offset) {
				1550	err = 0;
				1551	break;
				1552	}
				1553	if (ref_offset < parent)
				1554	break;
				1555	} else {
				1556	if (root_objectid == ref_offset) {
				1557	err = 0;
				1558	break;
				1559	}
				1560	if (ref_offset < root_objectid)
				1561	break;
				1562	}
				1563	}
				1564	ptr += btrfs_extent_inline_ref_size(type);
				1565	}
				1566	if (err == -ENOENT && insert) {
				1567	if (item_size + extra_size >=
				1568	BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
				1569	err = -EAGAIN;
				1570	goto out;
				1571	}
				1572	/*
				1573	* To add new inline back ref, we have to make sure
				1574	* there is no corresponding back ref item.
				1575	* For simplicity, we just do not add new inline back
				1576	* ref if there is any kind of item for this block
				1577	*/
				1578	if (find_next_key(path, 0, &key) == 0 &&
				1579	key.objectid == bytenr &&
				1580	key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
				1581	err = -EAGAIN;
				1582	goto out;
				1583	}
				1584	}
				1585	ref_ret = (struct btrfs_extent_inline_ref )ptr;
				1586	out:
				1587	if (insert) {
				1588	path->keep_locks = 0;
				1589	btrfs_unlock_up_safe(path, 1);
				1590	}
				1591	return err;
				1592	}
				1593
				1594	/*
				1595	* helper to add new inline back ref
				1596	*/
				1597	static noinline_for_stack
				1598	void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
				1599	struct btrfs_root *root,
				1600	struct btrfs_path *path,
				1601	struct btrfs_extent_inline_ref *iref,
				1602	u64 parent, u64 root_objectid,
				1603	u64 owner, u64 offset, int refs_to_add,
				1604	struct btrfs_delayed_extent_op *extent_op)
				1605	{
				1606	struct extent_buffer *leaf;
				1607	struct btrfs_extent_item *ei;
				1608	unsigned long ptr;
				1609	unsigned long end;
				1610	unsigned long item_offset;
				1611	u64 refs;
				1612	int size;
				1613	int type;
				1614
				1615	leaf = path->nodes[0];
				1616	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
				1617	item_offset = (unsigned long)iref - (unsigned long)ei;
				1618
				1619	type = extent_ref_type(parent, owner);
				1620	size = btrfs_extent_inline_ref_size(type);
				1621
				1622	btrfs_extend_item(trans, root, path, size);
				1623
				1624	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
				1625	refs = btrfs_extent_refs(leaf, ei);
				1626	refs += refs_to_add;
				1627	btrfs_set_extent_refs(leaf, ei, refs);
				1628	if (extent_op)
				1629	__run_delayed_extent_op(extent_op, leaf, ei);
				1630
				1631	ptr = (unsigned long)ei + item_offset;
				1632	end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
				1633	if (ptr < end - size)
				1634	memmove_extent_buffer(leaf, ptr + size, ptr,
				1635	end - size - ptr);
				1636
				1637	iref = (struct btrfs_extent_inline_ref *)ptr;
				1638	btrfs_set_extent_inline_ref_type(leaf, iref, type);
				1639	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
				1640	struct btrfs_extent_data_ref *dref;
				1641	dref = (struct btrfs_extent_data_ref *)(&iref->offset);
				1642	btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
				1643	btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
				1644	btrfs_set_extent_data_ref_offset(leaf, dref, offset);
				1645	btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
				1646	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
				1647	struct btrfs_shared_data_ref *sref;
				1648	sref = (struct btrfs_shared_data_ref *)(iref + 1);
				1649	btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
				1650	btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
				1651	} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
				1652	btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
				1653	} else {
				1654	btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
				1655	}
				1656	btrfs_mark_buffer_dirty(leaf);
				1657	}
				1658
				1659	static int lookup_extent_backref(struct btrfs_trans_handle *trans,
				1660	struct btrfs_root *root,
				1661	struct btrfs_path *path,
				1662	struct btrfs_extent_inline_ref **ref_ret,
				1663	u64 bytenr, u64 num_bytes, u64 parent,
				1664	u64 root_objectid, u64 owner, u64 offset)
				1665	{
				1666	int ret;
				1667
				1668	ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
				1669	bytenr, num_bytes, parent,
				1670	root_objectid, owner, offset, 0);
				1671	if (ret != -ENOENT)
				1672	return ret;
				1673
				1674	btrfs_release_path(path);
				1675	*ref_ret = NULL;
				1676
				1677	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
				1678	ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
				1679	root_objectid);
				1680	} else {
				1681	ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
				1682	root_objectid, owner, offset);
				1683	}
				1684	return ret;
				1685	}
				1686
				1687	/*
				1688	* helper to update/remove inline back ref
				1689	*/
				1690	static noinline_for_stack
				1691	void update_inline_extent_backref(struct btrfs_trans_handle *trans,
				1692	struct btrfs_root *root,
				1693	struct btrfs_path *path,
				1694	struct btrfs_extent_inline_ref *iref,
				1695	int refs_to_mod,
				1696	struct btrfs_delayed_extent_op *extent_op)
				1697	{
				1698	struct extent_buffer *leaf;
				1699	struct btrfs_extent_item *ei;
				1700	struct btrfs_extent_data_ref *dref = NULL;
				1701	struct btrfs_shared_data_ref *sref = NULL;
				1702	unsigned long ptr;
				1703	unsigned long end;
				1704	u32 item_size;
				1705	int size;
				1706	int type;
				1707	u64 refs;
				1708
				1709	leaf = path->nodes[0];
				1710	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
				1711	refs = btrfs_extent_refs(leaf, ei);
				1712	WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
				1713	refs += refs_to_mod;
				1714	btrfs_set_extent_refs(leaf, ei, refs);
				1715	if (extent_op)
				1716	__run_delayed_extent_op(extent_op, leaf, ei);
				1717
				1718	type = btrfs_extent_inline_ref_type(leaf, iref);
				1719
				1720	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
				1721	dref = (struct btrfs_extent_data_ref *)(&iref->offset);
				1722	refs = btrfs_extent_data_ref_count(leaf, dref);
				1723	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
				1724	sref = (struct btrfs_shared_data_ref *)(iref + 1);
				1725	refs = btrfs_shared_data_ref_count(leaf, sref);
				1726	} else {
				1727	refs = 1;
				1728	BUG_ON(refs_to_mod != -1);
				1729	}
				1730
				1731	BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
				1732	refs += refs_to_mod;
				1733
				1734	if (refs > 0) {
				1735	if (type == BTRFS_EXTENT_DATA_REF_KEY)
				1736	btrfs_set_extent_data_ref_count(leaf, dref, refs);
				1737	else
				1738	btrfs_set_shared_data_ref_count(leaf, sref, refs);
				1739	} else {
				1740	size = btrfs_extent_inline_ref_size(type);
				1741	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				1742	ptr = (unsigned long)iref;
				1743	end = (unsigned long)ei + item_size;
				1744	if (ptr + size < end)
				1745	memmove_extent_buffer(leaf, ptr, ptr + size,
				1746	end - ptr - size);
				1747	item_size -= size;
				1748	btrfs_truncate_item(trans, root, path, item_size, 1);
				1749	}
				1750	btrfs_mark_buffer_dirty(leaf);
				1751	}
				1752
				1753	static noinline_for_stack
				1754	int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
				1755	struct btrfs_root *root,
				1756	struct btrfs_path *path,
				1757	u64 bytenr, u64 num_bytes, u64 parent,
				1758	u64 root_objectid, u64 owner,
				1759	u64 offset, int refs_to_add,
				1760	struct btrfs_delayed_extent_op *extent_op)
				1761	{
				1762	struct btrfs_extent_inline_ref *iref;
				1763	int ret;
				1764
				1765	ret = lookup_inline_extent_backref(trans, root, path, &iref,
				1766	bytenr, num_bytes, parent,
				1767	root_objectid, owner, offset, 1);
				1768	if (ret == 0) {
				1769	BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
				1770	update_inline_extent_backref(trans, root, path, iref,
				1771	refs_to_add, extent_op);
				1772	} else if (ret == -ENOENT) {
				1773	setup_inline_extent_backref(trans, root, path, iref, parent,
				1774	root_objectid, owner, offset,
				1775	refs_to_add, extent_op);
				1776	ret = 0;
				1777	}
				1778	return ret;
				1779	}
				1780
				1781	static int insert_extent_backref(struct btrfs_trans_handle *trans,
				1782	struct btrfs_root *root,
				1783	struct btrfs_path *path,
				1784	u64 bytenr, u64 parent, u64 root_objectid,
				1785	u64 owner, u64 offset, int refs_to_add)
				1786	{
				1787	int ret;
				1788	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
				1789	BUG_ON(refs_to_add != 1);
				1790	ret = insert_tree_block_ref(trans, root, path, bytenr,
				1791	parent, root_objectid);
				1792	} else {
				1793	ret = insert_extent_data_ref(trans, root, path, bytenr,
				1794	parent, root_objectid,
				1795	owner, offset, refs_to_add);
				1796	}
				1797	return ret;
				1798	}
				1799
				1800	static int remove_extent_backref(struct btrfs_trans_handle *trans,
				1801	struct btrfs_root *root,
				1802	struct btrfs_path *path,
				1803	struct btrfs_extent_inline_ref *iref,
				1804	int refs_to_drop, int is_data)
				1805	{
				1806	int ret = 0;
				1807
				1808	BUG_ON(!is_data && refs_to_drop != 1);
				1809	if (iref) {
				1810	update_inline_extent_backref(trans, root, path, iref,
				1811	-refs_to_drop, NULL);
				1812	} else if (is_data) {
				1813	ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
				1814	} else {
				1815	ret = btrfs_del_item(trans, root, path);
				1816	}
				1817	return ret;
				1818	}
				1819
				1820	static int btrfs_issue_discard(struct block_device *bdev,
				1821	u64 start, u64 len)
				1822	{
				1823	return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
				1824	}
				1825
				1826	static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
				1827	u64 num_bytes, u64 *actual_bytes)
				1828	{
				1829	int ret;
				1830	u64 discarded_bytes = 0;
				1831	struct btrfs_bio *bbio = NULL;
				1832
				1833
				1834	/* Tell the block device(s) that the sectors can be discarded */
				1835	ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
				1836	bytenr, &num_bytes, &bbio, 0);
				1837	/* Error condition is -ENOMEM */
				1838	if (!ret) {
				1839	struct btrfs_bio_stripe *stripe = bbio->stripes;
				1840	int i;
				1841
				1842
				1843	for (i = 0; i < bbio->num_stripes; i++, stripe++) {
				1844	if (!stripe->dev->can_discard)
				1845	continue;
				1846
				1847	ret = btrfs_issue_discard(stripe->dev->bdev,
				1848	stripe->physical,
				1849	stripe->length);
				1850	if (!ret)
				1851	discarded_bytes += stripe->length;
				1852	else if (ret != -EOPNOTSUPP)
				1853	break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
				1854
				1855	/*
				1856	* Just in case we get back EOPNOTSUPP for some reason,
				1857	* just ignore the return value so we don't screw up
				1858	* people calling discard_extent.
				1859	*/
				1860	ret = 0;
				1861	}
				1862	kfree(bbio);
				1863	}
				1864
				1865	if (actual_bytes)
				1866	*actual_bytes = discarded_bytes;
				1867
				1868
				1869	return ret;
				1870	}
				1871
				1872	/* Can return -ENOMEM */
				1873	int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				1874	struct btrfs_root *root,
				1875	u64 bytenr, u64 num_bytes, u64 parent,
				1876	u64 root_objectid, u64 owner, u64 offset, int for_cow)
				1877	{
				1878	int ret;
				1879	struct btrfs_fs_info *fs_info = root->fs_info;
				1880
				1881	BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
				1882	root_objectid == BTRFS_TREE_LOG_OBJECTID);
				1883
				1884	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
				1885	ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
				1886	num_bytes,
				1887	parent, root_objectid, (int)owner,
				1888	BTRFS_ADD_DELAYED_REF, NULL, for_cow);
				1889	} else {
				1890	ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
				1891	num_bytes,
				1892	parent, root_objectid, owner, offset,
				1893	BTRFS_ADD_DELAYED_REF, NULL, for_cow);
				1894	}
				1895	return ret;
				1896	}
				1897
				1898	static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				1899	struct btrfs_root *root,
				1900	u64 bytenr, u64 num_bytes,
				1901	u64 parent, u64 root_objectid,
				1902	u64 owner, u64 offset, int refs_to_add,
				1903	struct btrfs_delayed_extent_op *extent_op)
				1904	{
				1905	struct btrfs_path *path;
				1906	struct extent_buffer *leaf;
				1907	struct btrfs_extent_item *item;
				1908	u64 refs;
				1909	int ret;
				1910	int err = 0;
				1911
				1912	path = btrfs_alloc_path();
				1913	if (!path)
				1914	return -ENOMEM;
				1915
				1916	path->reada = 1;
				1917	path->leave_spinning = 1;
				1918	/* this will setup the path even if it fails to insert the back ref */
				1919	ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
				1920	path, bytenr, num_bytes, parent,
				1921	root_objectid, owner, offset,
				1922	refs_to_add, extent_op);
				1923	if (ret == 0)
				1924	goto out;
				1925
				1926	if (ret != -EAGAIN) {
				1927	err = ret;
				1928	goto out;
				1929	}
				1930
				1931	leaf = path->nodes[0];
				1932	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
				1933	refs = btrfs_extent_refs(leaf, item);
				1934	btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
				1935	if (extent_op)
				1936	__run_delayed_extent_op(extent_op, leaf, item);
				1937
				1938	btrfs_mark_buffer_dirty(leaf);
				1939	btrfs_release_path(path);
				1940
				1941	path->reada = 1;
				1942	path->leave_spinning = 1;
				1943
				1944	/* now insert the actual backref */
				1945	ret = insert_extent_backref(trans, root->fs_info->extent_root,
				1946	path, bytenr, parent, root_objectid,
				1947	owner, offset, refs_to_add);
				1948	if (ret)
				1949	btrfs_abort_transaction(trans, root, ret);
				1950	out:
				1951	btrfs_free_path(path);
				1952	return err;
				1953	}
				1954
				1955	static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
				1956	struct btrfs_root *root,
				1957	struct btrfs_delayed_ref_node *node,
				1958	struct btrfs_delayed_extent_op *extent_op,
				1959	int insert_reserved)
				1960	{
				1961	int ret = 0;
				1962	struct btrfs_delayed_data_ref *ref;
				1963	struct btrfs_key ins;
				1964	u64 parent = 0;
				1965	u64 ref_root = 0;
				1966	u64 flags = 0;
				1967
				1968	ins.objectid = node->bytenr;
				1969	ins.offset = node->num_bytes;
				1970	ins.type = BTRFS_EXTENT_ITEM_KEY;
				1971
				1972	ref = btrfs_delayed_node_to_data_ref(node);
				1973	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
				1974	parent = ref->parent;
				1975	else
				1976	ref_root = ref->root;
				1977
				1978	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
				1979	if (extent_op) {
				1980	BUG_ON(extent_op->update_key);
				1981	flags \|= extent_op->flags_to_set;
				1982	}
				1983	ret = alloc_reserved_file_extent(trans, root,
				1984	parent, ref_root, flags,
				1985	ref->objectid, ref->offset,
				1986	&ins, node->ref_mod);
				1987	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
				1988	ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
				1989	node->num_bytes, parent,
				1990	ref_root, ref->objectid,
				1991	ref->offset, node->ref_mod,
				1992	extent_op);
				1993	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
				1994	ret = __btrfs_free_extent(trans, root, node->bytenr,
				1995	node->num_bytes, parent,
				1996	ref_root, ref->objectid,
				1997	ref->offset, node->ref_mod,
				1998	extent_op);
				1999	} else {
				2000	BUG();
				2001	}
				2002	return ret;
				2003	}
				2004
				2005	static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
				2006	struct extent_buffer *leaf,
				2007	struct btrfs_extent_item *ei)
				2008	{
				2009	u64 flags = btrfs_extent_flags(leaf, ei);
				2010	if (extent_op->update_flags) {
				2011	flags \|= extent_op->flags_to_set;
				2012	btrfs_set_extent_flags(leaf, ei, flags);
				2013	}
				2014
				2015	if (extent_op->update_key) {
				2016	struct btrfs_tree_block_info *bi;
				2017	BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
				2018	bi = (struct btrfs_tree_block_info *)(ei + 1);
				2019	btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
				2020	}
				2021	}
				2022
				2023	static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
				2024	struct btrfs_root *root,
				2025	struct btrfs_delayed_ref_node *node,
				2026	struct btrfs_delayed_extent_op *extent_op)
				2027	{
				2028	struct btrfs_key key;
				2029	struct btrfs_path *path;
				2030	struct btrfs_extent_item *ei;
				2031	struct extent_buffer *leaf;
				2032	u32 item_size;
				2033	int ret;
				2034	int err = 0;
				2035
				2036	if (trans->aborted)
				2037	return 0;
				2038
				2039	path = btrfs_alloc_path();
				2040	if (!path)
				2041	return -ENOMEM;
				2042
				2043	key.objectid = node->bytenr;
				2044	key.type = BTRFS_EXTENT_ITEM_KEY;
				2045	key.offset = node->num_bytes;
				2046
				2047	path->reada = 1;
				2048	path->leave_spinning = 1;
				2049	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
				2050	path, 0, 1);
				2051	if (ret < 0) {
				2052	err = ret;
				2053	goto out;
				2054	}
				2055	if (ret > 0) {
				2056	err = -EIO;
				2057	goto out;
				2058	}
				2059
				2060	leaf = path->nodes[0];
				2061	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				2062	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				2063	if (item_size < sizeof(*ei)) {
				2064	ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
				2065	path, (u64)-1, 0);
				2066	if (ret < 0) {
				2067	err = ret;
				2068	goto out;
				2069	}
				2070	leaf = path->nodes[0];
				2071	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				2072	}
				2073	#endif
				2074	BUG_ON(item_size < sizeof(*ei));
				2075	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
				2076	__run_delayed_extent_op(extent_op, leaf, ei);
				2077
				2078	btrfs_mark_buffer_dirty(leaf);
				2079	out:
				2080	btrfs_free_path(path);
				2081	return err;
				2082	}
				2083
				2084	static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
				2085	struct btrfs_root *root,
				2086	struct btrfs_delayed_ref_node *node,
				2087	struct btrfs_delayed_extent_op *extent_op,
				2088	int insert_reserved)
				2089	{
				2090	int ret = 0;
				2091	struct btrfs_delayed_tree_ref *ref;
				2092	struct btrfs_key ins;
				2093	u64 parent = 0;
				2094	u64 ref_root = 0;
				2095
				2096	ins.objectid = node->bytenr;
				2097	ins.offset = node->num_bytes;
				2098	ins.type = BTRFS_EXTENT_ITEM_KEY;
				2099
				2100	ref = btrfs_delayed_node_to_tree_ref(node);
				2101	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
				2102	parent = ref->parent;
				2103	else
				2104	ref_root = ref->root;
				2105
				2106	BUG_ON(node->ref_mod != 1);
				2107	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
				2108	BUG_ON(!extent_op \|\| !extent_op->update_flags \|\|
				2109	!extent_op->update_key);
				2110	ret = alloc_reserved_tree_block(trans, root,
				2111	parent, ref_root,
				2112	extent_op->flags_to_set,
				2113	&extent_op->key,
				2114	ref->level, &ins);
				2115	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
				2116	ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
				2117	node->num_bytes, parent, ref_root,
				2118	ref->level, 0, 1, extent_op);
				2119	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
				2120	ret = __btrfs_free_extent(trans, root, node->bytenr,
				2121	node->num_bytes, parent, ref_root,
				2122	ref->level, 0, 1, extent_op);
				2123	} else {
				2124	BUG();
				2125	}
				2126	return ret;
				2127	}
				2128
				2129	/* helper function to actually process a single delayed ref entry */
				2130	static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
				2131	struct btrfs_root *root,
				2132	struct btrfs_delayed_ref_node *node,
				2133	struct btrfs_delayed_extent_op *extent_op,
				2134	int insert_reserved)
				2135	{
				2136	int ret = 0;
				2137
				2138	if (trans->aborted)
				2139	return 0;
				2140
				2141	if (btrfs_delayed_ref_is_head(node)) {
				2142	struct btrfs_delayed_ref_head *head;
				2143	/*
				2144	* we've hit the end of the chain and we were supposed
				2145	* to insert this extent into the tree. But, it got
				2146	* deleted before we ever needed to insert it, so all
				2147	* we have to do is clean up the accounting
				2148	*/
				2149	BUG_ON(extent_op);
				2150	head = btrfs_delayed_node_to_head(node);
				2151	if (insert_reserved) {
				2152	btrfs_pin_extent(root, node->bytenr,
				2153	node->num_bytes, 1);
				2154	if (head->is_data) {
				2155	ret = btrfs_del_csums(trans, root,
				2156	node->bytenr,
				2157	node->num_bytes);
				2158	}
				2159	}
				2160	mutex_unlock(&head->mutex);
				2161	return ret;
				2162	}
				2163
				2164	if (node->type == BTRFS_TREE_BLOCK_REF_KEY \|\|
				2165	node->type == BTRFS_SHARED_BLOCK_REF_KEY)
				2166	ret = run_delayed_tree_ref(trans, root, node, extent_op,
				2167	insert_reserved);
				2168	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY \|\|
				2169	node->type == BTRFS_SHARED_DATA_REF_KEY)
				2170	ret = run_delayed_data_ref(trans, root, node, extent_op,
				2171	insert_reserved);
				2172	else
				2173	BUG();
				2174	return ret;
				2175	}
				2176
				2177	static noinline struct btrfs_delayed_ref_node *
				2178	select_delayed_ref(struct btrfs_delayed_ref_head *head)
				2179	{
				2180	struct rb_node *node;
				2181	struct btrfs_delayed_ref_node *ref;
				2182	int action = BTRFS_ADD_DELAYED_REF;
				2183	again:
				2184	/*
				2185	* select delayed ref of type BTRFS_ADD_DELAYED_REF first.
				2186	* this prevents ref count from going down to zero when
				2187	* there still are pending delayed ref.
				2188	*/
				2189	node = rb_prev(&head->node.rb_node);
				2190	while (1) {
				2191	if (!node)
				2192	break;
				2193	ref = rb_entry(node, struct btrfs_delayed_ref_node,
				2194	rb_node);
				2195	if (ref->bytenr != head->node.bytenr)
				2196	break;
				2197	if (ref->action == action)
				2198	return ref;
				2199	node = rb_prev(node);
				2200	}
				2201	if (action == BTRFS_ADD_DELAYED_REF) {
				2202	action = BTRFS_DROP_DELAYED_REF;
				2203	goto again;
				2204	}
				2205	return NULL;
				2206	}
				2207
				2208	/*
				2209	* Returns 0 on success or if called with an already aborted transaction.
				2210	* Returns -ENOMEM or -EIO on failure and will abort the transaction.
				2211	*/
				2212	static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
				2213	struct btrfs_root *root,
				2214	struct list_head *cluster)
				2215	{
				2216	struct btrfs_delayed_ref_root *delayed_refs;
				2217	struct btrfs_delayed_ref_node *ref;
				2218	struct btrfs_delayed_ref_head *locked_ref = NULL;
				2219	struct btrfs_delayed_extent_op *extent_op;
				2220	int ret;
				2221	int count = 0;
				2222	int must_insert_reserved = 0;
				2223
				2224	delayed_refs = &trans->transaction->delayed_refs;
				2225	while (1) {
				2226	if (!locked_ref) {
				2227	/* pick a new head ref from the cluster list */
				2228	if (list_empty(cluster))
				2229	break;
				2230
				2231	locked_ref = list_entry(cluster->next,
				2232	struct btrfs_delayed_ref_head, cluster);
				2233
				2234	/* grab the lock that says we are going to process
				2235	* all the refs for this head */
				2236	ret = btrfs_delayed_ref_lock(trans, locked_ref);
				2237
				2238	/*
				2239	* we may have dropped the spin lock to get the head
				2240	* mutex lock, and that might have given someone else
				2241	* time to free the head. If that's true, it has been
				2242	* removed from our list and we can move on.
				2243	*/
				2244	if (ret == -EAGAIN) {
				2245	locked_ref = NULL;
				2246	count++;
				2247	continue;
				2248	}
				2249	}
				2250
				2251	/*
				2252	* locked_ref is the head node, so we have to go one
				2253	* node back for any delayed ref updates
				2254	*/
				2255	ref = select_delayed_ref(locked_ref);
				2256
				2257	if (ref && ref->seq &&
				2258	btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
				2259	/*
				2260	* there are still refs with lower seq numbers in the
				2261	* process of being added. Don't run this ref yet.
				2262	*/
				2263	list_del_init(&locked_ref->cluster);
				2264	mutex_unlock(&locked_ref->mutex);
				2265	locked_ref = NULL;
				2266	delayed_refs->num_heads_ready++;
				2267	spin_unlock(&delayed_refs->lock);
				2268	cond_resched();
				2269	spin_lock(&delayed_refs->lock);
				2270	continue;
				2271	}
				2272
				2273	/*
				2274	* record the must insert reserved flag before we
				2275	* drop the spin lock.
				2276	*/
				2277	must_insert_reserved = locked_ref->must_insert_reserved;
				2278	locked_ref->must_insert_reserved = 0;
				2279
				2280	extent_op = locked_ref->extent_op;
				2281	locked_ref->extent_op = NULL;
				2282
				2283	if (!ref) {
				2284	/* All delayed refs have been processed, Go ahead
				2285	* and send the head node to run_one_delayed_ref,
				2286	* so that any accounting fixes can happen
				2287	*/
				2288	ref = &locked_ref->node;
				2289
				2290	if (extent_op && must_insert_reserved) {
				2291	kfree(extent_op);
				2292	extent_op = NULL;
				2293	}
				2294
				2295	if (extent_op) {
				2296	spin_unlock(&delayed_refs->lock);
				2297
				2298	ret = run_delayed_extent_op(trans, root,
				2299	ref, extent_op);
				2300	kfree(extent_op);
				2301
				2302	if (ret) {
				2303	printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
				2304	spin_lock(&delayed_refs->lock);
				2305	return ret;
				2306	}
				2307
				2308	goto next;
				2309	}
				2310
				2311	list_del_init(&locked_ref->cluster);
				2312	locked_ref = NULL;
				2313	}
				2314
				2315	ref->in_tree = 0;
				2316	rb_erase(&ref->rb_node, &delayed_refs->root);
				2317	delayed_refs->num_entries--;
				2318	/*
				2319	* we modified num_entries, but as we're currently running
				2320	* delayed refs, skip
				2321	* wake_up(&delayed_refs->seq_wait);
				2322	* here.
				2323	*/
				2324	spin_unlock(&delayed_refs->lock);
				2325
				2326	ret = run_one_delayed_ref(trans, root, ref, extent_op,
				2327	must_insert_reserved);
				2328
				2329	btrfs_put_delayed_ref(ref);
				2330	kfree(extent_op);
				2331	count++;
				2332
				2333	if (ret) {
				2334	printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
				2335	spin_lock(&delayed_refs->lock);
				2336	return ret;
				2337	}
				2338
				2339	next:
				2340	do_chunk_alloc(trans, root->fs_info->extent_root,
				2341	2 * 1024 * 1024,
				2342	btrfs_get_alloc_profile(root, 0),
				2343	CHUNK_ALLOC_NO_FORCE);
				2344	cond_resched();
				2345	spin_lock(&delayed_refs->lock);
				2346	}
				2347	return count;
				2348	}
				2349
				2350
				2351	static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
				2352	unsigned long num_refs)
				2353	{
				2354	struct list_head *first_seq = delayed_refs->seq_head.next;
				2355
				2356	spin_unlock(&delayed_refs->lock);
				2357	pr_debug("waiting for more refs (num %ld, first %p)\n",
				2358	num_refs, first_seq);
				2359	wait_event(delayed_refs->seq_wait,
				2360	num_refs != delayed_refs->num_entries \|\|
				2361	delayed_refs->seq_head.next != first_seq);
				2362	pr_debug("done waiting for more refs (num %ld, first %p)\n",
				2363	delayed_refs->num_entries, delayed_refs->seq_head.next);
				2364	spin_lock(&delayed_refs->lock);
				2365	}
				2366
				2367	/*
				2368	* this starts processing the delayed reference count updates and
				2369	* extent insertions we have queued up so far. count can be
				2370	* 0, which means to process everything in the tree at the start
				2371	* of the run (but not newly added entries), or it can be some target
				2372	* number you'd like to process.
				2373	*
				2374	* Returns 0 on success or if called with an aborted transaction
				2375	* Returns <0 on error and aborts the transaction
				2376	*/
				2377	int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
				2378	struct btrfs_root *root, unsigned long count)
				2379	{
				2380	struct rb_node *node;
				2381	struct btrfs_delayed_ref_root *delayed_refs;
				2382	struct btrfs_delayed_ref_node *ref;
				2383	struct list_head cluster;
				2384	int ret;
				2385	u64 delayed_start;
				2386	int run_all = count == (unsigned long)-1;
				2387	int run_most = 0;
				2388	unsigned long num_refs = 0;
				2389	int consider_waiting;
				2390
				2391	/* We'll clean this up in btrfs_cleanup_transaction */
				2392	if (trans->aborted)
				2393	return 0;
				2394
				2395	if (root == root->fs_info->extent_root)
				2396	root = root->fs_info->tree_root;
				2397
				2398	do_chunk_alloc(trans, root->fs_info->extent_root,
				2399	2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
				2400	CHUNK_ALLOC_NO_FORCE);
				2401
				2402	delayed_refs = &trans->transaction->delayed_refs;
				2403	INIT_LIST_HEAD(&cluster);
				2404	again:
				2405	consider_waiting = 0;
				2406	spin_lock(&delayed_refs->lock);
				2407	if (count == 0) {
				2408	count = delayed_refs->num_entries * 2;
				2409	run_most = 1;
				2410	}
				2411	while (1) {
				2412	if (!(run_all \|\| run_most) &&
				2413	delayed_refs->num_heads_ready < 64)
				2414	break;
				2415
				2416	/*
				2417	* go find something we can process in the rbtree. We start at
				2418	* the beginning of the tree, and then build a cluster
				2419	* of refs to process starting at the first one we are able to
				2420	* lock
				2421	*/
				2422	delayed_start = delayed_refs->run_delayed_start;
				2423	ret = btrfs_find_ref_cluster(trans, &cluster,
				2424	delayed_refs->run_delayed_start);
				2425	if (ret)
				2426	break;
				2427
				2428	if (delayed_start >= delayed_refs->run_delayed_start) {
				2429	if (consider_waiting == 0) {
				2430	/*
				2431	* btrfs_find_ref_cluster looped. let's do one
				2432	* more cycle. if we don't run any delayed ref
				2433	* during that cycle (because we can't because
				2434	* all of them are blocked) and if the number of
				2435	* refs doesn't change, we avoid busy waiting.
				2436	*/
				2437	consider_waiting = 1;
				2438	num_refs = delayed_refs->num_entries;
				2439	} else {
				2440	wait_for_more_refs(delayed_refs, num_refs);
				2441	/*
				2442	* after waiting, things have changed. we
				2443	* dropped the lock and someone else might have
				2444	* run some refs, built new clusters and so on.
				2445	* therefore, we restart staleness detection.
				2446	*/
				2447	consider_waiting = 0;
				2448	}
				2449	}
				2450
				2451	ret = run_clustered_refs(trans, root, &cluster);
				2452	if (ret < 0) {
				2453	spin_unlock(&delayed_refs->lock);
				2454	btrfs_abort_transaction(trans, root, ret);
				2455	return ret;
				2456	}
				2457
				2458	count -= min_t(unsigned long, ret, count);
				2459
				2460	if (count == 0)
				2461	break;
				2462
				2463	if (ret \|\| delayed_refs->run_delayed_start == 0) {
				2464	/* refs were run, let's reset staleness detection */
				2465	consider_waiting = 0;
				2466	}
				2467	}
				2468
				2469	if (run_all) {
				2470	node = rb_first(&delayed_refs->root);
				2471	if (!node)
				2472	goto out;
				2473	count = (unsigned long)-1;
				2474
				2475	while (node) {
				2476	ref = rb_entry(node, struct btrfs_delayed_ref_node,
				2477	rb_node);
				2478	if (btrfs_delayed_ref_is_head(ref)) {
				2479	struct btrfs_delayed_ref_head *head;
				2480
				2481	head = btrfs_delayed_node_to_head(ref);
				2482	atomic_inc(&ref->refs);
				2483
				2484	spin_unlock(&delayed_refs->lock);
				2485	/*
				2486	* Mutex was contended, block until it's
				2487	* released and try again
				2488	*/
				2489	mutex_lock(&head->mutex);
				2490	mutex_unlock(&head->mutex);
				2491
				2492	btrfs_put_delayed_ref(ref);
				2493	cond_resched();
				2494	goto again;
				2495	}
				2496	node = rb_next(node);
				2497	}
				2498	spin_unlock(&delayed_refs->lock);
				2499	schedule_timeout(1);
				2500	goto again;
				2501	}
				2502	out:
				2503	spin_unlock(&delayed_refs->lock);
				2504	return 0;
				2505	}
				2506
				2507	int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
				2508	struct btrfs_root *root,
				2509	u64 bytenr, u64 num_bytes, u64 flags,
				2510	int is_data)
				2511	{
				2512	struct btrfs_delayed_extent_op *extent_op;
				2513	int ret;
				2514
				2515	extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
				2516	if (!extent_op)
				2517	return -ENOMEM;
				2518
				2519	extent_op->flags_to_set = flags;
				2520	extent_op->update_flags = 1;
				2521	extent_op->update_key = 0;
				2522	extent_op->is_data = is_data ? 1 : 0;
				2523
				2524	ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
				2525	num_bytes, extent_op);
				2526	if (ret)
				2527	kfree(extent_op);
				2528	return ret;
				2529	}
				2530
				2531	static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
				2532	struct btrfs_root *root,
				2533	struct btrfs_path *path,
				2534	u64 objectid, u64 offset, u64 bytenr)
				2535	{
				2536	struct btrfs_delayed_ref_head *head;
				2537	struct btrfs_delayed_ref_node *ref;
				2538	struct btrfs_delayed_data_ref *data_ref;
				2539	struct btrfs_delayed_ref_root *delayed_refs;
				2540	struct rb_node *node;
				2541	int ret = 0;
				2542
				2543	ret = -ENOENT;
				2544	delayed_refs = &trans->transaction->delayed_refs;
				2545	spin_lock(&delayed_refs->lock);
				2546	head = btrfs_find_delayed_ref_head(trans, bytenr);
				2547	if (!head)
				2548	goto out;
				2549
				2550	if (!mutex_trylock(&head->mutex)) {
				2551	atomic_inc(&head->node.refs);
				2552	spin_unlock(&delayed_refs->lock);
				2553
				2554	btrfs_release_path(path);
				2555
				2556	/*
				2557	* Mutex was contended, block until it's released and let
				2558	* caller try again
				2559	*/
				2560	mutex_lock(&head->mutex);
				2561	mutex_unlock(&head->mutex);
				2562	btrfs_put_delayed_ref(&head->node);
				2563	return -EAGAIN;
				2564	}
				2565
				2566	node = rb_prev(&head->node.rb_node);
				2567	if (!node)
				2568	goto out_unlock;
				2569
				2570	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
				2571
				2572	if (ref->bytenr != bytenr)
				2573	goto out_unlock;
				2574
				2575	ret = 1;
				2576	if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
				2577	goto out_unlock;
				2578
				2579	data_ref = btrfs_delayed_node_to_data_ref(ref);
				2580
				2581	node = rb_prev(node);
				2582	if (node) {
				2583	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
				2584	if (ref->bytenr == bytenr)
				2585	goto out_unlock;
				2586	}
				2587
				2588	if (data_ref->root != root->root_key.objectid \|\|
				2589	data_ref->objectid != objectid \|\| data_ref->offset != offset)
				2590	goto out_unlock;
				2591
				2592	ret = 0;
				2593	out_unlock:
				2594	mutex_unlock(&head->mutex);
				2595	out:
				2596	spin_unlock(&delayed_refs->lock);
				2597	return ret;
				2598	}
				2599
				2600	static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
				2601	struct btrfs_root *root,
				2602	struct btrfs_path *path,
				2603	u64 objectid, u64 offset, u64 bytenr)
				2604	{
				2605	struct btrfs_root *extent_root = root->fs_info->extent_root;
				2606	struct extent_buffer *leaf;
				2607	struct btrfs_extent_data_ref *ref;
				2608	struct btrfs_extent_inline_ref *iref;
				2609	struct btrfs_extent_item *ei;
				2610	struct btrfs_key key;
				2611	u32 item_size;
				2612	int ret;
				2613
				2614	key.objectid = bytenr;
				2615	key.offset = (u64)-1;
				2616	key.type = BTRFS_EXTENT_ITEM_KEY;
				2617
				2618	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
				2619	if (ret < 0)
				2620	goto out;
				2621	BUG_ON(ret == 0); /* Corruption */
				2622
				2623	ret = -ENOENT;
				2624	if (path->slots[0] == 0)
				2625	goto out;
				2626
				2627	path->slots[0]--;
				2628	leaf = path->nodes[0];
				2629	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				2630
				2631	if (key.objectid != bytenr \|\| key.type != BTRFS_EXTENT_ITEM_KEY)
				2632	goto out;
				2633
				2634	ret = 1;
				2635	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				2636	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				2637	if (item_size < sizeof(*ei)) {
				2638	WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
				2639	goto out;
				2640	}
				2641	#endif
				2642	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
				2643
				2644	if (item_size != sizeof(*ei) +
				2645	btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
				2646	goto out;
				2647
				2648	if (btrfs_extent_generation(leaf, ei) <=
				2649	btrfs_root_last_snapshot(&root->root_item))
				2650	goto out;
				2651
				2652	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
				2653	if (btrfs_extent_inline_ref_type(leaf, iref) !=
				2654	BTRFS_EXTENT_DATA_REF_KEY)
				2655	goto out;
				2656
				2657	ref = (struct btrfs_extent_data_ref *)(&iref->offset);
				2658	if (btrfs_extent_refs(leaf, ei) !=
				2659	btrfs_extent_data_ref_count(leaf, ref) \|\|
				2660	btrfs_extent_data_ref_root(leaf, ref) !=
				2661	root->root_key.objectid \|\|
				2662	btrfs_extent_data_ref_objectid(leaf, ref) != objectid \|\|
				2663	btrfs_extent_data_ref_offset(leaf, ref) != offset)
				2664	goto out;
				2665
				2666	ret = 0;
				2667	out:
				2668	return ret;
				2669	}
				2670
				2671	int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
				2672	struct btrfs_root *root,
				2673	u64 objectid, u64 offset, u64 bytenr)
				2674	{
				2675	struct btrfs_path *path;
				2676	int ret;
				2677	int ret2;
				2678
				2679	path = btrfs_alloc_path();
				2680	if (!path)
				2681	return -ENOENT;
				2682
				2683	do {
				2684	ret = check_committed_ref(trans, root, path, objectid,
				2685	offset, bytenr);
				2686	if (ret && ret != -ENOENT)
				2687	goto out;
				2688
				2689	ret2 = check_delayed_ref(trans, root, path, objectid,
				2690	offset, bytenr);
				2691	} while (ret2 == -EAGAIN);
				2692
				2693	if (ret2 && ret2 != -ENOENT) {
				2694	ret = ret2;
				2695	goto out;
				2696	}
				2697
				2698	if (ret != -ENOENT \|\| ret2 != -ENOENT)
				2699	ret = 0;
				2700	out:
				2701	btrfs_free_path(path);
				2702	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
				2703	WARN_ON(ret > 0);
				2704	return ret;
				2705	}
				2706
				2707	static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
				2708	struct btrfs_root *root,
				2709	struct extent_buffer *buf,
				2710	int full_backref, int inc, int for_cow)
				2711	{
				2712	u64 bytenr;
				2713	u64 num_bytes;
				2714	u64 parent;
				2715	u64 ref_root;
				2716	u32 nritems;
				2717	struct btrfs_key key;
				2718	struct btrfs_file_extent_item *fi;
				2719	int i;
				2720	int level;
				2721	int ret = 0;
				2722	int (process_func)(struct btrfs_trans_handle , struct btrfs_root *,
				2723	u64, u64, u64, u64, u64, u64, int);
				2724
				2725	ref_root = btrfs_header_owner(buf);
				2726	nritems = btrfs_header_nritems(buf);
				2727	level = btrfs_header_level(buf);
				2728
				2729	if (!root->ref_cows && level == 0)
				2730	return 0;
				2731
				2732	if (inc)
				2733	process_func = btrfs_inc_extent_ref;
				2734	else
				2735	process_func = btrfs_free_extent;
				2736
				2737	if (full_backref)
				2738	parent = buf->start;
				2739	else
				2740	parent = 0;
				2741
				2742	for (i = 0; i < nritems; i++) {
				2743	if (level == 0) {
				2744	btrfs_item_key_to_cpu(buf, &key, i);
				2745	if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
				2746	continue;
				2747	fi = btrfs_item_ptr(buf, i,
				2748	struct btrfs_file_extent_item);
				2749	if (btrfs_file_extent_type(buf, fi) ==
				2750	BTRFS_FILE_EXTENT_INLINE)
				2751	continue;
				2752	bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
				2753	if (bytenr == 0)
				2754	continue;
				2755
				2756	num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
				2757	key.offset -= btrfs_file_extent_offset(buf, fi);
				2758	ret = process_func(trans, root, bytenr, num_bytes,
				2759	parent, ref_root, key.objectid,
				2760	key.offset, for_cow);
				2761	if (ret)
				2762	goto fail;
				2763	} else {
				2764	bytenr = btrfs_node_blockptr(buf, i);
				2765	num_bytes = btrfs_level_size(root, level - 1);
				2766	ret = process_func(trans, root, bytenr, num_bytes,
				2767	parent, ref_root, level - 1, 0,
				2768	for_cow);
				2769	if (ret)
				2770	goto fail;
				2771	}
				2772	}
				2773	return 0;
				2774	fail:
				2775	return ret;
				2776	}
				2777
				2778	int btrfs_inc_ref(struct btrfs_trans_handle trans, struct btrfs_root root,
				2779	struct extent_buffer *buf, int full_backref, int for_cow)
				2780	{
				2781	return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
				2782	}
				2783
				2784	int btrfs_dec_ref(struct btrfs_trans_handle trans, struct btrfs_root root,
				2785	struct extent_buffer *buf, int full_backref, int for_cow)
				2786	{
				2787	return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
				2788	}
				2789
				2790	static int write_one_cache_group(struct btrfs_trans_handle *trans,
				2791	struct btrfs_root *root,
				2792	struct btrfs_path *path,
				2793	struct btrfs_block_group_cache *cache)
				2794	{
				2795	int ret;
				2796	struct btrfs_root *extent_root = root->fs_info->extent_root;
				2797	unsigned long bi;
				2798	struct extent_buffer *leaf;
				2799
				2800	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
				2801	if (ret < 0)
				2802	goto fail;
				2803	BUG_ON(ret); /* Corruption */
				2804
				2805	leaf = path->nodes[0];
				2806	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
				2807	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
				2808	btrfs_mark_buffer_dirty(leaf);
				2809	btrfs_release_path(path);
				2810	fail:
				2811	if (ret) {
				2812	btrfs_abort_transaction(trans, root, ret);
				2813	return ret;
				2814	}
				2815	return 0;
				2816
				2817	}
				2818
				2819	static struct btrfs_block_group_cache *
				2820	next_block_group(struct btrfs_root *root,
				2821	struct btrfs_block_group_cache *cache)
				2822	{
				2823	struct rb_node *node;
				2824	spin_lock(&root->fs_info->block_group_cache_lock);
				2825	node = rb_next(&cache->cache_node);
				2826	btrfs_put_block_group(cache);
				2827	if (node) {
				2828	cache = rb_entry(node, struct btrfs_block_group_cache,
				2829	cache_node);
				2830	btrfs_get_block_group(cache);
				2831	} else
				2832	cache = NULL;
				2833	spin_unlock(&root->fs_info->block_group_cache_lock);
				2834	return cache;
				2835	}
				2836
				2837	static int cache_save_setup(struct btrfs_block_group_cache *block_group,
				2838	struct btrfs_trans_handle *trans,
				2839	struct btrfs_path *path)
				2840	{
				2841	struct btrfs_root *root = block_group->fs_info->tree_root;
				2842	struct inode *inode = NULL;
				2843	u64 alloc_hint = 0;
				2844	int dcs = BTRFS_DC_ERROR;
				2845	int num_pages = 0;
				2846	int retries = 0;
				2847	int ret = 0;
				2848
				2849	/*
				2850	* If this block group is smaller than 100 megs don't bother caching the
				2851	* block group.
				2852	*/
				2853	if (block_group->key.offset < (100 * 1024 * 1024)) {
				2854	spin_lock(&block_group->lock);
				2855	block_group->disk_cache_state = BTRFS_DC_WRITTEN;
				2856	spin_unlock(&block_group->lock);
				2857	return 0;
				2858	}
				2859
				2860	again:
				2861	inode = lookup_free_space_inode(root, block_group, path);
				2862	if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
				2863	ret = PTR_ERR(inode);
				2864	btrfs_release_path(path);
				2865	goto out;
				2866	}
				2867
				2868	if (IS_ERR(inode)) {
				2869	BUG_ON(retries);
				2870	retries++;
				2871
				2872	if (block_group->ro)
				2873	goto out_free;
				2874
				2875	ret = create_free_space_inode(root, trans, block_group, path);
				2876	if (ret)
				2877	goto out_free;
				2878	goto again;
				2879	}
				2880
				2881	/* We've already setup this transaction, go ahead and exit */
				2882	if (block_group->cache_generation == trans->transid &&
				2883	i_size_read(inode)) {
				2884	dcs = BTRFS_DC_SETUP;
				2885	goto out_put;
				2886	}
				2887
				2888	/*
				2889	* We want to set the generation to 0, that way if anything goes wrong
				2890	* from here on out we know not to trust this cache when we load up next
				2891	* time.
				2892	*/
				2893	BTRFS_I(inode)->generation = 0;
				2894	ret = btrfs_update_inode(trans, root, inode);
				2895	WARN_ON(ret);
				2896
				2897	if (i_size_read(inode) > 0) {
				2898	ret = btrfs_truncate_free_space_cache(root, trans, path,
				2899	inode);
				2900	if (ret)
				2901	goto out_put;
				2902	}
				2903
				2904	spin_lock(&block_group->lock);
				2905	if (block_group->cached != BTRFS_CACHE_FINISHED) {
				2906	/* We're not cached, don't bother trying to write stuff out */
				2907	dcs = BTRFS_DC_WRITTEN;
				2908	spin_unlock(&block_group->lock);
				2909	goto out_put;
				2910	}
				2911	spin_unlock(&block_group->lock);
				2912
				2913	num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
				2914	if (!num_pages)
				2915	num_pages = 1;
				2916
				2917	/*
				2918	* Just to make absolutely sure we have enough space, we're going to
				2919	* preallocate 12 pages worth of space for each block group. In
				2920	* practice we ought to use at most 8, but we need extra space so we can
				2921	* add our header and have a terminator between the extents and the
				2922	* bitmaps.
				2923	*/
				2924	num_pages *= 16;
				2925	num_pages *= PAGE_CACHE_SIZE;
				2926
				2927	ret = btrfs_check_data_free_space(inode, num_pages);
				2928	if (ret)
				2929	goto out_put;
				2930
				2931	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
				2932	num_pages, num_pages,
				2933	&alloc_hint);
				2934	if (!ret)
				2935	dcs = BTRFS_DC_SETUP;
				2936	btrfs_free_reserved_data_space(inode, num_pages);
				2937
				2938	out_put:
				2939	iput(inode);
				2940	out_free:
				2941	btrfs_release_path(path);
				2942	out:
				2943	spin_lock(&block_group->lock);
				2944	if (!ret && dcs == BTRFS_DC_SETUP)
				2945	block_group->cache_generation = trans->transid;
				2946	block_group->disk_cache_state = dcs;
				2947	spin_unlock(&block_group->lock);
				2948
				2949	return ret;
				2950	}
				2951
				2952	int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				2953	struct btrfs_root *root)
				2954	{
				2955	struct btrfs_block_group_cache *cache;
				2956	int err = 0;
				2957	struct btrfs_path *path;
				2958	u64 last = 0;
				2959
				2960	path = btrfs_alloc_path();
				2961	if (!path)
				2962	return -ENOMEM;
				2963
				2964	again:
				2965	while (1) {
				2966	cache = btrfs_lookup_first_block_group(root->fs_info, last);
				2967	while (cache) {
				2968	if (cache->disk_cache_state == BTRFS_DC_CLEAR)
				2969	break;
				2970	cache = next_block_group(root, cache);
				2971	}
				2972	if (!cache) {
				2973	if (last == 0)
				2974	break;
				2975	last = 0;
				2976	continue;
				2977	}
				2978	err = cache_save_setup(cache, trans, path);
				2979	last = cache->key.objectid + cache->key.offset;
				2980	btrfs_put_block_group(cache);
				2981	}
				2982
				2983	while (1) {
				2984	if (last == 0) {
				2985	err = btrfs_run_delayed_refs(trans, root,
				2986	(unsigned long)-1);
				2987	if (err) /* File system offline */
				2988	goto out;
				2989	}
				2990
				2991	cache = btrfs_lookup_first_block_group(root->fs_info, last);
				2992	while (cache) {
				2993	if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
				2994	btrfs_put_block_group(cache);
				2995	goto again;
				2996	}
				2997
				2998	if (cache->dirty)
				2999	break;
				3000	cache = next_block_group(root, cache);
				3001	}
				3002	if (!cache) {
				3003	if (last == 0)
				3004	break;
				3005	last = 0;
				3006	continue;
				3007	}
				3008
				3009	if (cache->disk_cache_state == BTRFS_DC_SETUP)
				3010	cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
				3011	cache->dirty = 0;
				3012	last = cache->key.objectid + cache->key.offset;
				3013
				3014	err = write_one_cache_group(trans, root, path, cache);
				3015	if (err) /* File system offline */
				3016	goto out;
				3017
				3018	btrfs_put_block_group(cache);
				3019	}
				3020
				3021	while (1) {
				3022	/*
				3023	* I don't think this is needed since we're just marking our
				3024	* preallocated extent as written, but just in case it can't
				3025	* hurt.
				3026	*/
				3027	if (last == 0) {
				3028	err = btrfs_run_delayed_refs(trans, root,
				3029	(unsigned long)-1);
				3030	if (err) /* File system offline */
				3031	goto out;
				3032	}
				3033
				3034	cache = btrfs_lookup_first_block_group(root->fs_info, last);
				3035	while (cache) {
				3036	/*
				3037	* Really this shouldn't happen, but it could if we
				3038	* couldn't write the entire preallocated extent and
				3039	* splitting the extent resulted in a new block.
				3040	*/
				3041	if (cache->dirty) {
				3042	btrfs_put_block_group(cache);
				3043	goto again;
				3044	}
				3045	if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
				3046	break;
				3047	cache = next_block_group(root, cache);
				3048	}
				3049	if (!cache) {
				3050	if (last == 0)
				3051	break;
				3052	last = 0;
				3053	continue;
				3054	}
				3055
				3056	err = btrfs_write_out_cache(root, trans, cache, path);
				3057
				3058	/*
				3059	* If we didn't have an error then the cache state is still
				3060	* NEED_WRITE, so we can set it to WRITTEN.
				3061	*/
				3062	if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
				3063	cache->disk_cache_state = BTRFS_DC_WRITTEN;
				3064	last = cache->key.objectid + cache->key.offset;
				3065	btrfs_put_block_group(cache);
				3066	}
				3067	out:
				3068
				3069	btrfs_free_path(path);
				3070	return err;
				3071	}
				3072
				3073	int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
				3074	{
				3075	struct btrfs_block_group_cache *block_group;
				3076	int readonly = 0;
				3077
				3078	block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
				3079	if (!block_group \|\| block_group->ro)
				3080	readonly = 1;
				3081	if (block_group)
				3082	btrfs_put_block_group(block_group);
				3083	return readonly;
				3084	}
				3085
				3086	static int update_space_info(struct btrfs_fs_info *info, u64 flags,
				3087	u64 total_bytes, u64 bytes_used,
				3088	struct btrfs_space_info **space_info)
				3089	{
				3090	struct btrfs_space_info *found;
				3091	int i;
				3092	int factor;
				3093
				3094	if (flags & (BTRFS_BLOCK_GROUP_DUP \| BTRFS_BLOCK_GROUP_RAID1 \|
				3095	BTRFS_BLOCK_GROUP_RAID10))
				3096	factor = 2;
				3097	else
				3098	factor = 1;
				3099
				3100	found = __find_space_info(info, flags);
				3101	if (found) {
				3102	spin_lock(&found->lock);
				3103	found->total_bytes += total_bytes;
				3104	found->disk_total += total_bytes * factor;
				3105	found->bytes_used += bytes_used;
				3106	found->disk_used += bytes_used * factor;
				3107	found->full = 0;
				3108	spin_unlock(&found->lock);
				3109	*space_info = found;
				3110	return 0;
				3111	}
				3112	found = kzalloc(sizeof(*found), GFP_NOFS);
				3113	if (!found)
				3114	return -ENOMEM;
				3115
				3116	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
				3117	INIT_LIST_HEAD(&found->block_groups[i]);
				3118	init_rwsem(&found->groups_sem);
				3119	spin_lock_init(&found->lock);
				3120	found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
				3121	found->total_bytes = total_bytes;
				3122	found->disk_total = total_bytes * factor;
				3123	found->bytes_used = bytes_used;
				3124	found->disk_used = bytes_used * factor;
				3125	found->bytes_pinned = 0;
				3126	found->bytes_reserved = 0;
				3127	found->bytes_readonly = 0;
				3128	found->bytes_may_use = 0;
				3129	found->full = 0;
				3130	found->force_alloc = CHUNK_ALLOC_NO_FORCE;
				3131	found->chunk_alloc = 0;
				3132	found->flush = 0;
				3133	init_waitqueue_head(&found->wait);
				3134	*space_info = found;
				3135	list_add_rcu(&found->list, &info->space_info);
				3136	return 0;
				3137	}
				3138
				3139	static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
				3140	{
				3141	u64 extra_flags = chunk_to_extended(flags) &
				3142	BTRFS_EXTENDED_PROFILE_MASK;
				3143
				3144	if (flags & BTRFS_BLOCK_GROUP_DATA)
				3145	fs_info->avail_data_alloc_bits \|= extra_flags;
				3146	if (flags & BTRFS_BLOCK_GROUP_METADATA)
				3147	fs_info->avail_metadata_alloc_bits \|= extra_flags;
				3148	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
				3149	fs_info->avail_system_alloc_bits \|= extra_flags;
				3150	}
				3151
				3152	/*
				3153	* returns target flags in extended format or 0 if restripe for this
				3154	* chunk_type is not in progress
				3155	*
				3156	* should be called with either volume_mutex or balance_lock held
				3157	*/
				3158	static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
				3159	{
				3160	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
				3161	u64 target = 0;
				3162
				3163	if (!bctl)
				3164	return 0;
				3165
				3166	if (flags & BTRFS_BLOCK_GROUP_DATA &&
				3167	bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
				3168	target = BTRFS_BLOCK_GROUP_DATA \| bctl->data.target;
				3169	} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
				3170	bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
				3171	target = BTRFS_BLOCK_GROUP_SYSTEM \| bctl->sys.target;
				3172	} else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
				3173	bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
				3174	target = BTRFS_BLOCK_GROUP_METADATA \| bctl->meta.target;
				3175	}
				3176
				3177	return target;
				3178	}
				3179
				3180	/*
				3181	* @flags: available profiles in extended format (see ctree.h)
				3182	*
				3183	* Returns reduced profile in chunk format. If profile changing is in
				3184	* progress (either running or paused) picks the target profile (if it's
				3185	* already available), otherwise falls back to plain reducing.
				3186	*/
				3187	u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
				3188	{
				3189	/*
				3190	* we add in the count of missing devices because we want
				3191	* to make sure that any RAID levels on a degraded FS
				3192	* continue to be honored.
				3193	*/
				3194	u64 num_devices = root->fs_info->fs_devices->rw_devices +
				3195	root->fs_info->fs_devices->missing_devices;
				3196	u64 target;
				3197
				3198	/*
				3199	* see if restripe for this chunk_type is in progress, if so
				3200	* try to reduce to the target profile
				3201	*/
				3202	spin_lock(&root->fs_info->balance_lock);
				3203	target = get_restripe_target(root->fs_info, flags);
				3204	if (target) {
				3205	/* pick target profile only if it's already available */
				3206	if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
				3207	spin_unlock(&root->fs_info->balance_lock);
				3208	return extended_to_chunk(target);
				3209	}
				3210	}
				3211	spin_unlock(&root->fs_info->balance_lock);
				3212
				3213	if (num_devices == 1)
				3214	flags &= ~(BTRFS_BLOCK_GROUP_RAID1 \| BTRFS_BLOCK_GROUP_RAID0);
				3215	if (num_devices < 4)
				3216	flags &= ~BTRFS_BLOCK_GROUP_RAID10;
				3217
				3218	if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
				3219	(flags & (BTRFS_BLOCK_GROUP_RAID1 \|
				3220	BTRFS_BLOCK_GROUP_RAID10))) {
				3221	flags &= ~BTRFS_BLOCK_GROUP_DUP;
				3222	}
				3223
				3224	if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
				3225	(flags & BTRFS_BLOCK_GROUP_RAID10)) {
				3226	flags &= ~BTRFS_BLOCK_GROUP_RAID1;
				3227	}
				3228
				3229	if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
				3230	((flags & BTRFS_BLOCK_GROUP_RAID1) \|
				3231	(flags & BTRFS_BLOCK_GROUP_RAID10) \|
				3232	(flags & BTRFS_BLOCK_GROUP_DUP))) {
				3233	flags &= ~BTRFS_BLOCK_GROUP_RAID0;
				3234	}
				3235
				3236	return extended_to_chunk(flags);
				3237	}
				3238
				3239	static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
				3240	{
				3241	if (flags & BTRFS_BLOCK_GROUP_DATA)
				3242	flags \|= root->fs_info->avail_data_alloc_bits;
				3243	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
				3244	flags \|= root->fs_info->avail_system_alloc_bits;
				3245	else if (flags & BTRFS_BLOCK_GROUP_METADATA)
				3246	flags \|= root->fs_info->avail_metadata_alloc_bits;
				3247
				3248	return btrfs_reduce_alloc_profile(root, flags);
				3249	}
				3250
				3251	u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
				3252	{
				3253	u64 flags;
				3254
				3255	if (data)
				3256	flags = BTRFS_BLOCK_GROUP_DATA;
				3257	else if (root == root->fs_info->chunk_root)
				3258	flags = BTRFS_BLOCK_GROUP_SYSTEM;
				3259	else
				3260	flags = BTRFS_BLOCK_GROUP_METADATA;
				3261
				3262	return get_alloc_profile(root, flags);
				3263	}
				3264
				3265	void btrfs_set_inode_space_info(struct btrfs_root root, struct inode inode)
				3266	{
				3267	BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
				3268	BTRFS_BLOCK_GROUP_DATA);
				3269	}
				3270
				3271	/*
				3272	* This will check the space that the inode allocates from to make sure we have
				3273	* enough space for bytes.
				3274	*/
				3275	int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
				3276	{
				3277	struct btrfs_space_info *data_sinfo;
				3278	struct btrfs_root *root = BTRFS_I(inode)->root;
				3279	u64 used;
				3280	int ret = 0, committed = 0, alloc_chunk = 1;
				3281
				3282	/* make sure bytes are sectorsize aligned */
				3283	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
				3284
				3285	if (root == root->fs_info->tree_root \|\|
				3286	BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
				3287	alloc_chunk = 0;
				3288	committed = 1;
				3289	}
				3290
				3291	data_sinfo = BTRFS_I(inode)->space_info;
				3292	if (!data_sinfo)
				3293	goto alloc;
				3294
				3295	again:
				3296	/* make sure we have enough space to handle the data first */
				3297	spin_lock(&data_sinfo->lock);
				3298	used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
				3299	data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
				3300	data_sinfo->bytes_may_use;
				3301
				3302	if (used + bytes > data_sinfo->total_bytes) {
				3303	struct btrfs_trans_handle *trans;
				3304
				3305	/*
				3306	* if we don't have enough free bytes in this space then we need
				3307	* to alloc a new chunk.
				3308	*/
				3309	if (!data_sinfo->full && alloc_chunk) {
				3310	u64 alloc_target;
				3311
				3312	data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
				3313	spin_unlock(&data_sinfo->lock);
				3314	alloc:
				3315	alloc_target = btrfs_get_alloc_profile(root, 1);
				3316	trans = btrfs_join_transaction(root);
				3317	if (IS_ERR(trans))
				3318	return PTR_ERR(trans);
				3319
				3320	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
				3321	bytes + 2 * 1024 * 1024,
				3322	alloc_target,
				3323	CHUNK_ALLOC_NO_FORCE);
				3324	btrfs_end_transaction(trans, root);
				3325	if (ret < 0) {
				3326	if (ret != -ENOSPC)
				3327	return ret;
				3328	else
				3329	goto commit_trans;
				3330	}
				3331
				3332	if (!data_sinfo) {
				3333	btrfs_set_inode_space_info(root, inode);
				3334	data_sinfo = BTRFS_I(inode)->space_info;
				3335	}
				3336	goto again;
				3337	}
				3338
				3339	/*
				3340	* If we have less pinned bytes than we want to allocate then
				3341	* don't bother committing the transaction, it won't help us.
				3342	*/
				3343	if (data_sinfo->bytes_pinned < bytes)
				3344	committed = 1;
				3345	spin_unlock(&data_sinfo->lock);
				3346
				3347	/* commit the current transaction and try again */
				3348	commit_trans:
				3349	if (!committed &&
				3350	!atomic_read(&root->fs_info->open_ioctl_trans)) {
				3351	committed = 1;
				3352	trans = btrfs_join_transaction(root);
				3353	if (IS_ERR(trans))
				3354	return PTR_ERR(trans);
				3355	ret = btrfs_commit_transaction(trans, root);
				3356	if (ret)
				3357	return ret;
				3358	goto again;
				3359	}
				3360
				3361	return -ENOSPC;
				3362	}
				3363	data_sinfo->bytes_may_use += bytes;
				3364	trace_btrfs_space_reservation(root->fs_info, "space_info",
				3365	data_sinfo->flags, bytes, 1);
				3366	spin_unlock(&data_sinfo->lock);
				3367
				3368	return 0;
				3369	}
				3370
				3371	/*
				3372	* Called if we need to clear a data reservation for this inode.
				3373	*/
				3374	void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
				3375	{
				3376	struct btrfs_root *root = BTRFS_I(inode)->root;
				3377	struct btrfs_space_info *data_sinfo;
				3378
				3379	/* make sure bytes are sectorsize aligned */
				3380	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
				3381
				3382	data_sinfo = BTRFS_I(inode)->space_info;
				3383	spin_lock(&data_sinfo->lock);
				3384	data_sinfo->bytes_may_use -= bytes;
				3385	trace_btrfs_space_reservation(root->fs_info, "space_info",
				3386	data_sinfo->flags, bytes, 0);
				3387	spin_unlock(&data_sinfo->lock);
				3388	}
				3389
				3390	static void force_metadata_allocation(struct btrfs_fs_info *info)
				3391	{
				3392	struct list_head *head = &info->space_info;
				3393	struct btrfs_space_info *found;
				3394
				3395	rcu_read_lock();
				3396	list_for_each_entry_rcu(found, head, list) {
				3397	if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
				3398	found->force_alloc = CHUNK_ALLOC_FORCE;
				3399	}
				3400	rcu_read_unlock();
				3401	}
				3402
				3403	static int should_alloc_chunk(struct btrfs_root *root,
				3404	struct btrfs_space_info *sinfo, u64 alloc_bytes,
				3405	int force)
				3406	{
				3407	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
				3408	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
				3409	u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
				3410	u64 thresh;
				3411
				3412	if (force == CHUNK_ALLOC_FORCE)
				3413	return 1;
				3414
				3415	/*
				3416	* We need to take into account the global rsv because for all intents
				3417	* and purposes it's used space. Don't worry about locking the
				3418	* global_rsv, it doesn't change except when the transaction commits.
				3419	*/
				3420	num_allocated += global_rsv->size;
				3421
				3422	/*
				3423	* in limited mode, we want to have some free space up to
				3424	* about 1% of the FS size.
				3425	*/
				3426	if (force == CHUNK_ALLOC_LIMITED) {
				3427	thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
				3428	thresh = max_t(u64, 64 * 1024 * 1024,
				3429	div_factor_fine(thresh, 1));
				3430
				3431	if (num_bytes - num_allocated < thresh)
				3432	return 1;
				3433	}
				3434	thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
				3435
				3436	/* 256MB or 2% of the FS */
				3437	thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2));
				3438	/* system chunks need a much small threshold */
				3439	if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM)
				3440	thresh = 32 * 1024 * 1024;
				3441
				3442	if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8))
				3443	return 0;
				3444	return 1;
				3445	}
				3446
				3447	static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
				3448	{
				3449	u64 num_dev;
				3450
				3451	if (type & BTRFS_BLOCK_GROUP_RAID10 \|\|
				3452	type & BTRFS_BLOCK_GROUP_RAID0)
				3453	num_dev = root->fs_info->fs_devices->rw_devices;
				3454	else if (type & BTRFS_BLOCK_GROUP_RAID1)
				3455	num_dev = 2;
				3456	else
				3457	num_dev = 1; /* DUP or single */
				3458
				3459	/* metadata for updaing devices and chunk tree */
				3460	return btrfs_calc_trans_metadata_size(root, num_dev + 1);
				3461	}
				3462
				3463	static void check_system_chunk(struct btrfs_trans_handle *trans,
				3464	struct btrfs_root *root, u64 type)
				3465	{
				3466	struct btrfs_space_info *info;
				3467	u64 left;
				3468	u64 thresh;
				3469
				3470	info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
				3471	spin_lock(&info->lock);
				3472	left = info->total_bytes - info->bytes_used - info->bytes_pinned -
				3473	info->bytes_reserved - info->bytes_readonly;
				3474	spin_unlock(&info->lock);
				3475
				3476	thresh = get_system_chunk_thresh(root, type);
				3477	if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
				3478	printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n",
				3479	left, thresh, type);
				3480	dump_space_info(info, 0, 0);
				3481	}
				3482
				3483	if (left < thresh) {
				3484	u64 flags;
				3485
				3486	flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
				3487	btrfs_alloc_chunk(trans, root, flags);
				3488	}
				3489	}
				3490
				3491	static int do_chunk_alloc(struct btrfs_trans_handle *trans,
				3492	struct btrfs_root *extent_root, u64 alloc_bytes,
				3493	u64 flags, int force)
				3494	{
				3495	struct btrfs_space_info *space_info;
				3496	struct btrfs_fs_info *fs_info = extent_root->fs_info;
				3497	int wait_for_alloc = 0;
				3498	int ret = 0;
				3499
				3500	space_info = __find_space_info(extent_root->fs_info, flags);
				3501	if (!space_info) {
				3502	ret = update_space_info(extent_root->fs_info, flags,
				3503	0, 0, &space_info);
				3504	BUG_ON(ret); /* -ENOMEM */
				3505	}
				3506	BUG_ON(!space_info); /* Logic error */
				3507
				3508	again:
				3509	spin_lock(&space_info->lock);
				3510	if (force < space_info->force_alloc)
				3511	force = space_info->force_alloc;
				3512	if (space_info->full) {
				3513	spin_unlock(&space_info->lock);
				3514	return 0;
				3515	}
				3516
				3517	if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
				3518	spin_unlock(&space_info->lock);
				3519	return 0;
				3520	} else if (space_info->chunk_alloc) {
				3521	wait_for_alloc = 1;
				3522	} else {
				3523	space_info->chunk_alloc = 1;
				3524	}
				3525
				3526	spin_unlock(&space_info->lock);
				3527
				3528	mutex_lock(&fs_info->chunk_mutex);
				3529
				3530	/*
				3531	* The chunk_mutex is held throughout the entirety of a chunk
				3532	* allocation, so once we've acquired the chunk_mutex we know that the
				3533	* other guy is done and we need to recheck and see if we should
				3534	* allocate.
				3535	*/
				3536	if (wait_for_alloc) {
				3537	mutex_unlock(&fs_info->chunk_mutex);
				3538	wait_for_alloc = 0;
				3539	goto again;
				3540	}
				3541
				3542	/*
				3543	* If we have mixed data/metadata chunks we want to make sure we keep
				3544	* allocating mixed chunks instead of individual chunks.
				3545	*/
				3546	if (btrfs_mixed_space_info(space_info))
				3547	flags \|= (BTRFS_BLOCK_GROUP_DATA \| BTRFS_BLOCK_GROUP_METADATA);
				3548
				3549	/*
				3550	* if we're doing a data chunk, go ahead and make sure that
				3551	* we keep a reasonable number of metadata chunks allocated in the
				3552	* FS as well.
				3553	*/
				3554	if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
				3555	fs_info->data_chunk_allocations++;
				3556	if (!(fs_info->data_chunk_allocations %
				3557	fs_info->metadata_ratio))
				3558	force_metadata_allocation(fs_info);
				3559	}
				3560
				3561	/*
				3562	* Check if we have enough space in SYSTEM chunk because we may need
				3563	* to update devices.
				3564	*/
				3565	check_system_chunk(trans, extent_root, flags);
				3566
				3567	ret = btrfs_alloc_chunk(trans, extent_root, flags);
				3568	if (ret < 0 && ret != -ENOSPC)
				3569	goto out;
				3570
				3571	spin_lock(&space_info->lock);
				3572	if (ret)
				3573	space_info->full = 1;
				3574	else
				3575	ret = 1;
				3576
				3577	space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
				3578	space_info->chunk_alloc = 0;
				3579	spin_unlock(&space_info->lock);
				3580	out:
				3581	mutex_unlock(&extent_root->fs_info->chunk_mutex);
				3582	return ret;
				3583	}
				3584
				3585	/*
				3586	* shrink metadata reservation for delalloc
				3587	*/
				3588	static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
				3589	bool wait_ordered)
				3590	{
				3591	struct btrfs_block_rsv *block_rsv;
				3592	struct btrfs_space_info *space_info;
				3593	struct btrfs_trans_handle *trans;
				3594	u64 reserved;
				3595	u64 max_reclaim;
				3596	u64 reclaimed = 0;
				3597	long time_left;
				3598	unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
				3599	int loops = 0;
				3600	unsigned long progress;
				3601
				3602	trans = (struct btrfs_trans_handle *)current->journal_info;
				3603	block_rsv = &root->fs_info->delalloc_block_rsv;
				3604	space_info = block_rsv->space_info;
				3605
				3606	smp_mb();
				3607	reserved = space_info->bytes_may_use;
				3608	progress = space_info->reservation_progress;
				3609
				3610	if (reserved == 0)
				3611	return 0;
				3612
				3613	smp_mb();
				3614	if (root->fs_info->delalloc_bytes == 0) {
				3615	if (trans)
				3616	return 0;
				3617	btrfs_wait_ordered_extents(root, 0, 0);
				3618	return 0;
				3619	}
				3620
				3621	max_reclaim = min(reserved, to_reclaim);
				3622	nr_pages = max_t(unsigned long, nr_pages,
				3623	max_reclaim >> PAGE_CACHE_SHIFT);
				3624	while (loops < 1024) {
				3625	/* have the flusher threads jump in and do some IO */
				3626	smp_mb();
				3627	nr_pages = min_t(unsigned long, nr_pages,
				3628	root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
				3629	writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
				3630	WB_REASON_FS_FREE_SPACE);
				3631
				3632	spin_lock(&space_info->lock);
				3633	if (reserved > space_info->bytes_may_use)
				3634	reclaimed += reserved - space_info->bytes_may_use;
				3635	reserved = space_info->bytes_may_use;
				3636	spin_unlock(&space_info->lock);
				3637
				3638	loops++;
				3639
				3640	if (reserved == 0 \|\| reclaimed >= max_reclaim)
				3641	break;
				3642
				3643	if (trans && trans->transaction->blocked)
				3644	return -EAGAIN;
				3645
				3646	if (wait_ordered && !trans) {
				3647	btrfs_wait_ordered_extents(root, 0, 0);
				3648	} else {
				3649	time_left = schedule_timeout_interruptible(1);
				3650
				3651	/* We were interrupted, exit */
				3652	if (time_left)
				3653	break;
				3654	}
				3655
				3656	/* we've kicked the IO a few times, if anything has been freed,
				3657	* exit. There is no sense in looping here for a long time
				3658	* when we really need to commit the transaction, or there are
				3659	* just too many writers without enough free space
				3660	*/
				3661
				3662	if (loops > 3) {
				3663	smp_mb();
				3664	if (progress != space_info->reservation_progress)
				3665	break;
				3666	}
				3667
				3668	}
				3669
				3670	return reclaimed >= to_reclaim;
				3671	}
				3672
				3673	/**
				3674	* maybe_commit_transaction - possibly commit the transaction if its ok to
				3675	* @root - the root we're allocating for
				3676	* @bytes - the number of bytes we want to reserve
				3677	* @force - force the commit
				3678	*
				3679	* This will check to make sure that committing the transaction will actually
				3680	* get us somewhere and then commit the transaction if it does. Otherwise it
				3681	* will return -ENOSPC.
				3682	*/
				3683	static int may_commit_transaction(struct btrfs_root *root,
				3684	struct btrfs_space_info *space_info,
				3685	u64 bytes, int force)
				3686	{
				3687	struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
				3688	struct btrfs_trans_handle *trans;
				3689
				3690	trans = (struct btrfs_trans_handle *)current->journal_info;
				3691	if (trans)
				3692	return -EAGAIN;
				3693
				3694	if (force)
				3695	goto commit;
				3696
				3697	/* See if there is enough pinned space to make this reservation */
				3698	spin_lock(&space_info->lock);
				3699	if (space_info->bytes_pinned >= bytes) {
				3700	spin_unlock(&space_info->lock);
				3701	goto commit;
				3702	}
				3703	spin_unlock(&space_info->lock);
				3704
				3705	/*
				3706	* See if there is some space in the delayed insertion reservation for
				3707	* this reservation.
				3708	*/
				3709	if (space_info != delayed_rsv->space_info)
				3710	return -ENOSPC;
				3711
				3712	spin_lock(&space_info->lock);
				3713	spin_lock(&delayed_rsv->lock);
				3714	if (space_info->bytes_pinned + delayed_rsv->size < bytes) {
				3715	spin_unlock(&delayed_rsv->lock);
				3716	spin_unlock(&space_info->lock);
				3717	return -ENOSPC;
				3718	}
				3719	spin_unlock(&delayed_rsv->lock);
				3720	spin_unlock(&space_info->lock);
				3721
				3722	commit:
				3723	trans = btrfs_join_transaction(root);
				3724	if (IS_ERR(trans))
				3725	return -ENOSPC;
				3726
				3727	return btrfs_commit_transaction(trans, root);
				3728	}
				3729
				3730	/**
				3731	* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
				3732	* @root - the root we're allocating for
				3733	* @block_rsv - the block_rsv we're allocating for
				3734	* @orig_bytes - the number of bytes we want
				3735	* @flush - wether or not we can flush to make our reservation
				3736	*
				3737	* This will reserve orgi_bytes number of bytes from the space info associated
				3738	* with the block_rsv. If there is not enough space it will make an attempt to
				3739	* flush out space to make room. It will do this by flushing delalloc if
				3740	* possible or committing the transaction. If flush is 0 then no attempts to
				3741	* regain reservations will be made and this will fail if there is not enough
				3742	* space already.
				3743	*/
				3744	static int reserve_metadata_bytes(struct btrfs_root *root,
				3745	struct btrfs_block_rsv *block_rsv,
				3746	u64 orig_bytes, int flush)
				3747	{
				3748	struct btrfs_space_info *space_info = block_rsv->space_info;
				3749	u64 used;
				3750	u64 num_bytes = orig_bytes;
				3751	int retries = 0;
				3752	int ret = 0;
				3753	bool committed = false;
				3754	bool flushing = false;
				3755	bool wait_ordered = false;
				3756
				3757	again:
				3758	ret = 0;
				3759	spin_lock(&space_info->lock);
				3760	/*
				3761	* We only want to wait if somebody other than us is flushing and we are
				3762	* actually alloed to flush.
				3763	*/
				3764	while (flush && !flushing && space_info->flush) {
				3765	spin_unlock(&space_info->lock);
				3766	/*
				3767	* If we have a trans handle we can't wait because the flusher
				3768	* may have to commit the transaction, which would mean we would
				3769	* deadlock since we are waiting for the flusher to finish, but
				3770	* hold the current transaction open.
				3771	*/
				3772	if (current->journal_info)
				3773	return -EAGAIN;
				3774	ret = wait_event_killable(space_info->wait, !space_info->flush);
				3775	/* Must have been killed, return */
				3776	if (ret)
				3777	return -EINTR;
				3778
				3779	spin_lock(&space_info->lock);
				3780	}
				3781
				3782	ret = -ENOSPC;
				3783	used = space_info->bytes_used + space_info->bytes_reserved +
				3784	space_info->bytes_pinned + space_info->bytes_readonly +
				3785	space_info->bytes_may_use;
				3786
				3787	/*
				3788	* The idea here is that we've not already over-reserved the block group
				3789	* then we can go ahead and save our reservation first and then start
				3790	* flushing if we need to. Otherwise if we've already overcommitted
				3791	* lets start flushing stuff first and then come back and try to make
				3792	* our reservation.
				3793	*/
				3794	if (used <= space_info->total_bytes) {
				3795	if (used + orig_bytes <= space_info->total_bytes) {
				3796	space_info->bytes_may_use += orig_bytes;
				3797	trace_btrfs_space_reservation(root->fs_info,
				3798	"space_info", space_info->flags, orig_bytes, 1);
				3799	ret = 0;
				3800	} else {
				3801	/*
				3802	* Ok set num_bytes to orig_bytes since we aren't
				3803	* overocmmitted, this way we only try and reclaim what
				3804	* we need.
				3805	*/
				3806	num_bytes = orig_bytes;
				3807	}
				3808	} else {
				3809	/*
				3810	* Ok we're over committed, set num_bytes to the overcommitted
				3811	* amount plus the amount of bytes that we need for this
				3812	* reservation.
				3813	*/
				3814	wait_ordered = true;
				3815	num_bytes = used - space_info->total_bytes +
				3816	(orig_bytes * (retries + 1));
				3817	}
				3818
				3819	if (ret) {
				3820	u64 profile = btrfs_get_alloc_profile(root, 0);
				3821	u64 avail;
				3822
				3823	/*
				3824	* If we have a lot of space that's pinned, don't bother doing
				3825	* the overcommit dance yet and just commit the transaction.
				3826	*/
				3827	avail = (space_info->total_bytes - space_info->bytes_used) * 8;
				3828	do_div(avail, 10);
				3829	if (space_info->bytes_pinned >= avail && flush && !committed) {
				3830	space_info->flush = 1;
				3831	flushing = true;
				3832	spin_unlock(&space_info->lock);
				3833	ret = may_commit_transaction(root, space_info,
				3834	orig_bytes, 1);
				3835	if (ret)
				3836	goto out;
				3837	committed = true;
				3838	goto again;
				3839	}
				3840
				3841	spin_lock(&root->fs_info->free_chunk_lock);
				3842	avail = root->fs_info->free_chunk_space;
				3843
				3844	/*
				3845	* If we have dup, raid1 or raid10 then only half of the free
				3846	* space is actually useable.
				3847	*/
				3848	if (profile & (BTRFS_BLOCK_GROUP_DUP \|
				3849	BTRFS_BLOCK_GROUP_RAID1 \|
				3850	BTRFS_BLOCK_GROUP_RAID10))
				3851	avail >>= 1;
				3852
				3853	/*
				3854	* If we aren't flushing don't let us overcommit too much, say
				3855	* 1/8th of the space. If we can flush, let it overcommit up to
				3856	* 1/2 of the space.
				3857	*/
				3858	if (flush)
				3859	avail >>= 3;
				3860	else
				3861	avail >>= 1;
				3862	spin_unlock(&root->fs_info->free_chunk_lock);
				3863
				3864	if (used + num_bytes < space_info->total_bytes + avail) {
				3865	space_info->bytes_may_use += orig_bytes;
				3866	trace_btrfs_space_reservation(root->fs_info,
				3867	"space_info", space_info->flags, orig_bytes, 1);
				3868	ret = 0;
				3869	} else {
				3870	wait_ordered = true;
				3871	}
				3872	}
				3873
				3874	/*
				3875	* Couldn't make our reservation, save our place so while we're trying
				3876	* to reclaim space we can actually use it instead of somebody else
				3877	* stealing it from us.
				3878	*/
				3879	if (ret && flush) {
				3880	flushing = true;
				3881	space_info->flush = 1;
				3882	}
				3883
				3884	spin_unlock(&space_info->lock);
				3885
				3886	if (!ret \|\| !flush)
				3887	goto out;
				3888
				3889	/*
				3890	* We do synchronous shrinking since we don't actually unreserve
				3891	* metadata until after the IO is completed.
				3892	*/
				3893	ret = shrink_delalloc(root, num_bytes, wait_ordered);
				3894	if (ret < 0)
				3895	goto out;
				3896
				3897	ret = 0;
				3898
				3899	/*
				3900	* So if we were overcommitted it's possible that somebody else flushed
				3901	* out enough space and we simply didn't have enough space to reclaim,
				3902	* so go back around and try again.
				3903	*/
				3904	if (retries < 2) {
				3905	wait_ordered = true;
				3906	retries++;
				3907	goto again;
				3908	}
				3909
				3910	ret = -ENOSPC;
				3911	if (committed)
				3912	goto out;
				3913
				3914	ret = may_commit_transaction(root, space_info, orig_bytes, 0);
				3915	if (!ret) {
				3916	committed = true;
				3917	goto again;
				3918	}
				3919
				3920	out:
				3921	if (flushing) {
				3922	spin_lock(&space_info->lock);
				3923	space_info->flush = 0;
				3924	wake_up_all(&space_info->wait);
				3925	spin_unlock(&space_info->lock);
				3926	}
				3927	return ret;
				3928	}
				3929
				3930	static struct btrfs_block_rsv *get_block_rsv(
				3931	const struct btrfs_trans_handle *trans,
				3932	const struct btrfs_root *root)
				3933	{
				3934	struct btrfs_block_rsv *block_rsv = NULL;
				3935
				3936	if (root->ref_cows \|\| root == root->fs_info->csum_root)
				3937	block_rsv = trans->block_rsv;
				3938
				3939	if (!block_rsv)
				3940	block_rsv = root->block_rsv;
				3941
				3942	if (!block_rsv)
				3943	block_rsv = &root->fs_info->empty_block_rsv;
				3944
				3945	return block_rsv;
				3946	}
				3947
				3948	static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
				3949	u64 num_bytes)
				3950	{
				3951	int ret = -ENOSPC;
				3952	spin_lock(&block_rsv->lock);
				3953	if (block_rsv->reserved >= num_bytes) {
				3954	block_rsv->reserved -= num_bytes;
				3955	if (block_rsv->reserved < block_rsv->size)
				3956	block_rsv->full = 0;
				3957	ret = 0;
				3958	}
				3959	spin_unlock(&block_rsv->lock);
				3960	return ret;
				3961	}
				3962
				3963	static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
				3964	u64 num_bytes, int update_size)
				3965	{
				3966	spin_lock(&block_rsv->lock);
				3967	block_rsv->reserved += num_bytes;
				3968	if (update_size)
				3969	block_rsv->size += num_bytes;
				3970	else if (block_rsv->reserved >= block_rsv->size)
				3971	block_rsv->full = 1;
				3972	spin_unlock(&block_rsv->lock);
				3973	}
				3974
				3975	static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
				3976	struct btrfs_block_rsv *block_rsv,
				3977	struct btrfs_block_rsv *dest, u64 num_bytes)
				3978	{
				3979	struct btrfs_space_info *space_info = block_rsv->space_info;
				3980
				3981	spin_lock(&block_rsv->lock);
				3982	if (num_bytes == (u64)-1)
				3983	num_bytes = block_rsv->size;
				3984	block_rsv->size -= num_bytes;
				3985	if (block_rsv->reserved >= block_rsv->size) {
				3986	num_bytes = block_rsv->reserved - block_rsv->size;
				3987	block_rsv->reserved = block_rsv->size;
				3988	block_rsv->full = 1;
				3989	} else {
				3990	num_bytes = 0;
				3991	}
				3992	spin_unlock(&block_rsv->lock);
				3993
				3994	if (num_bytes > 0) {
				3995	if (dest) {
				3996	spin_lock(&dest->lock);
				3997	if (!dest->full) {
				3998	u64 bytes_to_add;
				3999
				4000	bytes_to_add = dest->size - dest->reserved;
				4001	bytes_to_add = min(num_bytes, bytes_to_add);
				4002	dest->reserved += bytes_to_add;
				4003	if (dest->reserved >= dest->size)
				4004	dest->full = 1;
				4005	num_bytes -= bytes_to_add;
				4006	}
				4007	spin_unlock(&dest->lock);
				4008	}
				4009	if (num_bytes) {
				4010	spin_lock(&space_info->lock);
				4011	space_info->bytes_may_use -= num_bytes;
				4012	trace_btrfs_space_reservation(fs_info, "space_info",
				4013	space_info->flags, num_bytes, 0);
				4014	space_info->reservation_progress++;
				4015	spin_unlock(&space_info->lock);
				4016	}
				4017	}
				4018	}
				4019
				4020	static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
				4021	struct btrfs_block_rsv *dst, u64 num_bytes)
				4022	{
				4023	int ret;
				4024
				4025	ret = block_rsv_use_bytes(src, num_bytes);
				4026	if (ret)
				4027	return ret;
				4028
				4029	block_rsv_add_bytes(dst, num_bytes, 1);
				4030	return 0;
				4031	}
				4032
				4033	void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
				4034	{
				4035	memset(rsv, 0, sizeof(*rsv));
				4036	spin_lock_init(&rsv->lock);
				4037	}
				4038
				4039	struct btrfs_block_rsv btrfs_alloc_block_rsv(struct btrfs_root root)
				4040	{
				4041	struct btrfs_block_rsv *block_rsv;
				4042	struct btrfs_fs_info *fs_info = root->fs_info;
				4043
				4044	block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
				4045	if (!block_rsv)
				4046	return NULL;
				4047
				4048	btrfs_init_block_rsv(block_rsv);
				4049	block_rsv->space_info = __find_space_info(fs_info,
				4050	BTRFS_BLOCK_GROUP_METADATA);
				4051	return block_rsv;
				4052	}
				4053
				4054	void btrfs_free_block_rsv(struct btrfs_root *root,
				4055	struct btrfs_block_rsv *rsv)
				4056	{
				4057	btrfs_block_rsv_release(root, rsv, (u64)-1);
				4058	kfree(rsv);
				4059	}
				4060
				4061	static inline int __block_rsv_add(struct btrfs_root *root,
				4062	struct btrfs_block_rsv *block_rsv,
				4063	u64 num_bytes, int flush)
				4064	{
				4065	int ret;
				4066
				4067	if (num_bytes == 0)
				4068	return 0;
				4069
				4070	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
				4071	if (!ret) {
				4072	block_rsv_add_bytes(block_rsv, num_bytes, 1);
				4073	return 0;
				4074	}
				4075
				4076	return ret;
				4077	}
				4078
				4079	int btrfs_block_rsv_add(struct btrfs_root *root,
				4080	struct btrfs_block_rsv *block_rsv,
				4081	u64 num_bytes)
				4082	{
				4083	return __block_rsv_add(root, block_rsv, num_bytes, 1);
				4084	}
				4085
				4086	int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
				4087	struct btrfs_block_rsv *block_rsv,
				4088	u64 num_bytes)
				4089	{
				4090	return __block_rsv_add(root, block_rsv, num_bytes, 0);
				4091	}
				4092
				4093	int btrfs_block_rsv_check(struct btrfs_root *root,
				4094	struct btrfs_block_rsv *block_rsv, int min_factor)
				4095	{
				4096	u64 num_bytes = 0;
				4097	int ret = -ENOSPC;
				4098
				4099	if (!block_rsv)
				4100	return 0;
				4101
				4102	spin_lock(&block_rsv->lock);
				4103	num_bytes = div_factor(block_rsv->size, min_factor);
				4104	if (block_rsv->reserved >= num_bytes)
				4105	ret = 0;
				4106	spin_unlock(&block_rsv->lock);
				4107
				4108	return ret;
				4109	}
				4110
				4111	static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
				4112	struct btrfs_block_rsv *block_rsv,
				4113	u64 min_reserved, int flush)
				4114	{
				4115	u64 num_bytes = 0;
				4116	int ret = -ENOSPC;
				4117
				4118	if (!block_rsv)
				4119	return 0;
				4120
				4121	spin_lock(&block_rsv->lock);
				4122	num_bytes = min_reserved;
				4123	if (block_rsv->reserved >= num_bytes)
				4124	ret = 0;
				4125	else
				4126	num_bytes -= block_rsv->reserved;
				4127	spin_unlock(&block_rsv->lock);
				4128
				4129	if (!ret)
				4130	return 0;
				4131
				4132	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
				4133	if (!ret) {
				4134	block_rsv_add_bytes(block_rsv, num_bytes, 0);
				4135	return 0;
				4136	}
				4137
				4138	return ret;
				4139	}
				4140
				4141	int btrfs_block_rsv_refill(struct btrfs_root *root,
				4142	struct btrfs_block_rsv *block_rsv,
				4143	u64 min_reserved)
				4144	{
				4145	return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1);
				4146	}
				4147
				4148	int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
				4149	struct btrfs_block_rsv *block_rsv,
				4150	u64 min_reserved)
				4151	{
				4152	return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0);
				4153	}
				4154
				4155	int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
				4156	struct btrfs_block_rsv *dst_rsv,
				4157	u64 num_bytes)
				4158	{
				4159	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
				4160	}
				4161
				4162	void btrfs_block_rsv_release(struct btrfs_root *root,
				4163	struct btrfs_block_rsv *block_rsv,
				4164	u64 num_bytes)
				4165	{
				4166	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
				4167	if (global_rsv->full \|\| global_rsv == block_rsv \|\|
				4168	block_rsv->space_info != global_rsv->space_info)
				4169	global_rsv = NULL;
				4170	block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
				4171	num_bytes);
				4172	}
				4173
				4174	/*
				4175	* helper to calculate size of global block reservation.
				4176	* the desired value is sum of space used by extent tree,
				4177	* checksum tree and root tree
				4178	*/
				4179	static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
				4180	{
				4181	struct btrfs_space_info *sinfo;
				4182	u64 num_bytes;
				4183	u64 meta_used;
				4184	u64 data_used;
				4185	int csum_size = btrfs_super_csum_size(fs_info->super_copy);
				4186
				4187	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
				4188	spin_lock(&sinfo->lock);
				4189	data_used = sinfo->bytes_used;
				4190	spin_unlock(&sinfo->lock);
				4191
				4192	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
				4193	spin_lock(&sinfo->lock);
				4194	if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
				4195	data_used = 0;
				4196	meta_used = sinfo->bytes_used;
				4197	spin_unlock(&sinfo->lock);
				4198
				4199	num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
				4200	csum_size * 2;
				4201	num_bytes += div64_u64(data_used + meta_used, 50);
				4202
				4203	if (num_bytes * 3 > meta_used)
				4204	num_bytes = div64_u64(meta_used, 3);
				4205
				4206	return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
				4207	}
				4208
				4209	static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
				4210	{
				4211	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
				4212	struct btrfs_space_info *sinfo = block_rsv->space_info;
				4213	u64 num_bytes;
				4214
				4215	num_bytes = calc_global_metadata_size(fs_info);
				4216
				4217	spin_lock(&sinfo->lock);
				4218	spin_lock(&block_rsv->lock);
				4219
				4220	block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
				4221
				4222	num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
				4223	sinfo->bytes_reserved + sinfo->bytes_readonly +
				4224	sinfo->bytes_may_use;
				4225
				4226	if (sinfo->total_bytes > num_bytes) {
				4227	num_bytes = sinfo->total_bytes - num_bytes;
				4228	block_rsv->reserved += num_bytes;
				4229	sinfo->bytes_may_use += num_bytes;
				4230	trace_btrfs_space_reservation(fs_info, "space_info",
				4231	sinfo->flags, num_bytes, 1);
				4232	}
				4233
				4234	if (block_rsv->reserved >= block_rsv->size) {
				4235	num_bytes = block_rsv->reserved - block_rsv->size;
				4236	sinfo->bytes_may_use -= num_bytes;
				4237	trace_btrfs_space_reservation(fs_info, "space_info",
				4238	sinfo->flags, num_bytes, 0);
				4239	sinfo->reservation_progress++;
				4240	block_rsv->reserved = block_rsv->size;
				4241	block_rsv->full = 1;
				4242	}
				4243
				4244	spin_unlock(&block_rsv->lock);
				4245	spin_unlock(&sinfo->lock);
				4246	}
				4247
				4248	static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
				4249	{
				4250	struct btrfs_space_info *space_info;
				4251
				4252	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
				4253	fs_info->chunk_block_rsv.space_info = space_info;
				4254
				4255	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
				4256	fs_info->global_block_rsv.space_info = space_info;
				4257	fs_info->delalloc_block_rsv.space_info = space_info;
				4258	fs_info->trans_block_rsv.space_info = space_info;
				4259	fs_info->empty_block_rsv.space_info = space_info;
				4260	fs_info->delayed_block_rsv.space_info = space_info;
				4261
				4262	fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
				4263	fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
				4264	fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
				4265	fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
				4266	fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
				4267
				4268	update_global_block_rsv(fs_info);
				4269	}
				4270
				4271	static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
				4272	{
				4273	block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
				4274	(u64)-1);
				4275	WARN_ON(fs_info->delalloc_block_rsv.size > 0);
				4276	WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
				4277	WARN_ON(fs_info->trans_block_rsv.size > 0);
				4278	WARN_ON(fs_info->trans_block_rsv.reserved > 0);
				4279	WARN_ON(fs_info->chunk_block_rsv.size > 0);
				4280	WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
				4281	WARN_ON(fs_info->delayed_block_rsv.size > 0);
				4282	WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
				4283	}
				4284
				4285	void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
				4286	struct btrfs_root *root)
				4287	{
				4288	if (!trans->bytes_reserved)
				4289	return;
				4290
				4291	trace_btrfs_space_reservation(root->fs_info, "transaction",
				4292	trans->transid, trans->bytes_reserved, 0);
				4293	btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
				4294	trans->bytes_reserved = 0;
				4295	}
				4296
				4297	/* Can only return 0 or -ENOSPC */
				4298	int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
				4299	struct inode *inode)
				4300	{
				4301	struct btrfs_root *root = BTRFS_I(inode)->root;
				4302	struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
				4303	struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
				4304
				4305	/*
				4306	* We need to hold space in order to delete our orphan item once we've
				4307	* added it, so this takes the reservation so we can release it later
				4308	* when we are truly done with the orphan item.
				4309	*/
				4310	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
				4311	trace_btrfs_space_reservation(root->fs_info, "orphan",
				4312	btrfs_ino(inode), num_bytes, 1);
				4313	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
				4314	}
				4315
				4316	void btrfs_orphan_release_metadata(struct inode *inode)
				4317	{
				4318	struct btrfs_root *root = BTRFS_I(inode)->root;
				4319	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
				4320	trace_btrfs_space_reservation(root->fs_info, "orphan",
				4321	btrfs_ino(inode), num_bytes, 0);
				4322	btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
				4323	}
				4324
				4325	int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
				4326	struct btrfs_pending_snapshot *pending)
				4327	{
				4328	struct btrfs_root *root = pending->root;
				4329	struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
				4330	struct btrfs_block_rsv *dst_rsv = &pending->block_rsv;
				4331	/*
				4332	* two for root back/forward refs, two for directory entries
				4333	* and one for root of the snapshot.
				4334	*/
				4335	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
				4336	dst_rsv->space_info = src_rsv->space_info;
				4337	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
				4338	}
				4339
				4340	/**
				4341	* drop_outstanding_extent - drop an outstanding extent
				4342	* @inode: the inode we're dropping the extent for
				4343	*
				4344	* This is called when we are freeing up an outstanding extent, either called
				4345	* after an error or after an extent is written. This will return the number of
				4346	* reserved extents that need to be freed. This must be called with
				4347	* BTRFS_I(inode)->lock held.
				4348	*/
				4349	static unsigned drop_outstanding_extent(struct inode *inode)
				4350	{
				4351	unsigned drop_inode_space = 0;
				4352	unsigned dropped_extents = 0;
				4353
				4354	BUG_ON(!BTRFS_I(inode)->outstanding_extents);
				4355	BTRFS_I(inode)->outstanding_extents--;
				4356
				4357	if (BTRFS_I(inode)->outstanding_extents == 0 &&
				4358	BTRFS_I(inode)->delalloc_meta_reserved) {
				4359	drop_inode_space = 1;
				4360	BTRFS_I(inode)->delalloc_meta_reserved = 0;
				4361	}
				4362
				4363	/*
				4364	* If we have more or the same amount of outsanding extents than we have
				4365	* reserved then we need to leave the reserved extents count alone.
				4366	*/
				4367	if (BTRFS_I(inode)->outstanding_extents >=
				4368	BTRFS_I(inode)->reserved_extents)
				4369	return drop_inode_space;
				4370
				4371	dropped_extents = BTRFS_I(inode)->reserved_extents -
				4372	BTRFS_I(inode)->outstanding_extents;
				4373	BTRFS_I(inode)->reserved_extents -= dropped_extents;
				4374	return dropped_extents + drop_inode_space;
				4375	}
				4376
				4377	/**
				4378	* calc_csum_metadata_size - return the amount of metada space that must be
				4379	* reserved/free'd for the given bytes.
				4380	* @inode: the inode we're manipulating
				4381	* @num_bytes: the number of bytes in question
				4382	* @reserve: 1 if we are reserving space, 0 if we are freeing space
				4383	*
				4384	* This adjusts the number of csum_bytes in the inode and then returns the
				4385	* correct amount of metadata that must either be reserved or freed. We
				4386	* calculate how many checksums we can fit into one leaf and then divide the
				4387	* number of bytes that will need to be checksumed by this value to figure out
				4388	* how many checksums will be required. If we are adding bytes then the number
				4389	* may go up and we will return the number of additional bytes that must be
				4390	* reserved. If it is going down we will return the number of bytes that must
				4391	* be freed.
				4392	*
				4393	* This must be called with BTRFS_I(inode)->lock held.
				4394	*/
				4395	static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
				4396	int reserve)
				4397	{
				4398	struct btrfs_root *root = BTRFS_I(inode)->root;
				4399	u64 csum_size;
				4400	int num_csums_per_leaf;
				4401	int num_csums;
				4402	int old_csums;
				4403
				4404	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
				4405	BTRFS_I(inode)->csum_bytes == 0)
				4406	return 0;
				4407
				4408	old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
				4409	if (reserve)
				4410	BTRFS_I(inode)->csum_bytes += num_bytes;
				4411	else
				4412	BTRFS_I(inode)->csum_bytes -= num_bytes;
				4413	csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
				4414	num_csums_per_leaf = (int)div64_u64(csum_size,
				4415	sizeof(struct btrfs_csum_item) +
				4416	sizeof(struct btrfs_disk_key));
				4417	num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
				4418	num_csums = num_csums + num_csums_per_leaf - 1;
				4419	num_csums = num_csums / num_csums_per_leaf;
				4420
				4421	old_csums = old_csums + num_csums_per_leaf - 1;
				4422	old_csums = old_csums / num_csums_per_leaf;
				4423
				4424	/* No change, no need to reserve more */
				4425	if (old_csums == num_csums)
				4426	return 0;
				4427
				4428	if (reserve)
				4429	return btrfs_calc_trans_metadata_size(root,
				4430	num_csums - old_csums);
				4431
				4432	return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
				4433	}
				4434
				4435	int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
				4436	{
				4437	struct btrfs_root *root = BTRFS_I(inode)->root;
				4438	struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
				4439	u64 to_reserve = 0;
				4440	u64 csum_bytes;
				4441	unsigned nr_extents = 0;
				4442	int extra_reserve = 0;
				4443	int flush = 1;
				4444	int ret;
				4445
				4446	/* Need to be holding the i_mutex here if we aren't free space cache */
				4447	if (btrfs_is_free_space_inode(root, inode))
				4448	flush = 0;
				4449
				4450	if (flush && btrfs_transaction_in_commit(root->fs_info))
				4451	schedule_timeout(1);
				4452
				4453	mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
				4454	num_bytes = ALIGN(num_bytes, root->sectorsize);
				4455
				4456	spin_lock(&BTRFS_I(inode)->lock);
				4457	BTRFS_I(inode)->outstanding_extents++;
				4458
				4459	if (BTRFS_I(inode)->outstanding_extents >
				4460	BTRFS_I(inode)->reserved_extents)
				4461	nr_extents = BTRFS_I(inode)->outstanding_extents -
				4462	BTRFS_I(inode)->reserved_extents;
				4463
				4464	/*
				4465	* Add an item to reserve for updating the inode when we complete the
				4466	* delalloc io.
				4467	*/
				4468	if (!BTRFS_I(inode)->delalloc_meta_reserved) {
				4469	nr_extents++;
				4470	extra_reserve = 1;
				4471	}
				4472
				4473	to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
				4474	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
				4475	csum_bytes = BTRFS_I(inode)->csum_bytes;
				4476	spin_unlock(&BTRFS_I(inode)->lock);
				4477
				4478	ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
				4479	if (ret) {
				4480	u64 to_free = 0;
				4481	unsigned dropped;
				4482
				4483	spin_lock(&BTRFS_I(inode)->lock);
				4484	dropped = drop_outstanding_extent(inode);
				4485	/*
				4486	* If the inodes csum_bytes is the same as the original
				4487	* csum_bytes then we know we haven't raced with any free()ers
				4488	* so we can just reduce our inodes csum bytes and carry on.
				4489	*/
				4490	if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
				4491	calc_csum_metadata_size(inode, num_bytes, 0);
				4492	} else {
				4493	u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
				4494	u64 bytes;
				4495
				4496	/*
				4497	* This is tricky, but first we need to figure out how much we
				4498	* free'd from any free-ers that occured during this
				4499	* reservation, so we reset ->csum_bytes to the csum_bytes
				4500	* before we dropped our lock, and then call the free for the
				4501	* number of bytes that were freed while we were trying our
				4502	* reservation.
				4503	*/
				4504	bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
				4505	BTRFS_I(inode)->csum_bytes = csum_bytes;
				4506	to_free = calc_csum_metadata_size(inode, bytes, 0);
				4507
				4508
				4509	/*
				4510	* Now we need to see how much we would have freed had we not
				4511	* been making this reservation and our ->csum_bytes were not
				4512	* artificially inflated.
				4513	*/
				4514	BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
				4515	bytes = csum_bytes - orig_csum_bytes;
				4516	bytes = calc_csum_metadata_size(inode, bytes, 0);
				4517
				4518	/*
				4519	* Now reset ->csum_bytes to what it should be. If bytes is
				4520	* more than to_free then we would have free'd more space had we
				4521	* not had an artificially high ->csum_bytes, so we need to free
				4522	* the remainder. If bytes is the same or less then we don't
				4523	* need to do anything, the other free-ers did the correct
				4524	* thing.
				4525	*/
				4526	BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
				4527	if (bytes > to_free)
				4528	to_free = bytes - to_free;
				4529	else
				4530	to_free = 0;
				4531	}
				4532	spin_unlock(&BTRFS_I(inode)->lock);
				4533	if (dropped)
				4534	to_free += btrfs_calc_trans_metadata_size(root, dropped);
				4535
				4536	if (to_free) {
				4537	btrfs_block_rsv_release(root, block_rsv, to_free);
				4538	trace_btrfs_space_reservation(root->fs_info,
				4539	"delalloc",
				4540	btrfs_ino(inode),
				4541	to_free, 0);
				4542	}
				4543	mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
				4544	return ret;
				4545	}
				4546
				4547	spin_lock(&BTRFS_I(inode)->lock);
				4548	if (extra_reserve) {
				4549	BTRFS_I(inode)->delalloc_meta_reserved = 1;
				4550	nr_extents--;
				4551	}
				4552	BTRFS_I(inode)->reserved_extents += nr_extents;
				4553	spin_unlock(&BTRFS_I(inode)->lock);
				4554	mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
				4555
				4556	if (to_reserve)
				4557	trace_btrfs_space_reservation(root->fs_info,"delalloc",
				4558	btrfs_ino(inode), to_reserve, 1);
				4559	block_rsv_add_bytes(block_rsv, to_reserve, 1);
				4560
				4561	return 0;
				4562	}
				4563
				4564	/**
				4565	* btrfs_delalloc_release_metadata - release a metadata reservation for an inode
				4566	* @inode: the inode to release the reservation for
				4567	* @num_bytes: the number of bytes we're releasing
				4568	*
				4569	* This will release the metadata reservation for an inode. This can be called
				4570	* once we complete IO for a given set of bytes to release their metadata
				4571	* reservations.
				4572	*/
				4573	void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
				4574	{
				4575	struct btrfs_root *root = BTRFS_I(inode)->root;
				4576	u64 to_free = 0;
				4577	unsigned dropped;
				4578
				4579	num_bytes = ALIGN(num_bytes, root->sectorsize);
				4580	spin_lock(&BTRFS_I(inode)->lock);
				4581	dropped = drop_outstanding_extent(inode);
				4582
				4583	to_free = calc_csum_metadata_size(inode, num_bytes, 0);
				4584	spin_unlock(&BTRFS_I(inode)->lock);
				4585	if (dropped > 0)
				4586	to_free += btrfs_calc_trans_metadata_size(root, dropped);
				4587
				4588	trace_btrfs_space_reservation(root->fs_info, "delalloc",
				4589	btrfs_ino(inode), to_free, 0);
				4590	btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
				4591	to_free);
				4592	}
				4593
				4594	/**
				4595	* btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc
				4596	* @inode: inode we're writing to
				4597	* @num_bytes: the number of bytes we want to allocate
				4598	*
				4599	* This will do the following things
				4600	*
				4601	* o reserve space in the data space info for num_bytes
				4602	* o reserve space in the metadata space info based on number of outstanding
				4603	* extents and how much csums will be needed
				4604	* o add to the inodes ->delalloc_bytes
				4605	* o add it to the fs_info's delalloc inodes list.
				4606	*
				4607	* This will return 0 for success and -ENOSPC if there is no space left.
				4608	*/
				4609	int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
				4610	{
				4611	int ret;
				4612
				4613	ret = btrfs_check_data_free_space(inode, num_bytes);
				4614	if (ret)
				4615	return ret;
				4616
				4617	ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
				4618	if (ret) {
				4619	btrfs_free_reserved_data_space(inode, num_bytes);
				4620	return ret;
				4621	}
				4622
				4623	return 0;
				4624	}
				4625
				4626	/**
				4627	* btrfs_delalloc_release_space - release data and metadata space for delalloc
				4628	* @inode: inode we're releasing space for
				4629	* @num_bytes: the number of bytes we want to free up
				4630	*
				4631	* This must be matched with a call to btrfs_delalloc_reserve_space. This is
				4632	* called in the case that we don't need the metadata AND data reservations
				4633	* anymore. So if there is an error or we insert an inline extent.
				4634	*
				4635	* This function will release the metadata space that was not used and will
				4636	* decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
				4637	* list if there are no delalloc bytes left.
				4638	*/
				4639	void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
				4640	{
				4641	btrfs_delalloc_release_metadata(inode, num_bytes);
				4642	btrfs_free_reserved_data_space(inode, num_bytes);
				4643	}
				4644
				4645	static int update_block_group(struct btrfs_trans_handle *trans,
				4646	struct btrfs_root *root,
				4647	u64 bytenr, u64 num_bytes, int alloc)
				4648	{
				4649	struct btrfs_block_group_cache *cache = NULL;
				4650	struct btrfs_fs_info *info = root->fs_info;
				4651	u64 total = num_bytes;
				4652	u64 old_val;
				4653	u64 byte_in_group;
				4654	int factor;
				4655
				4656	/* block accounting for super block */
				4657	spin_lock(&info->delalloc_lock);
				4658	old_val = btrfs_super_bytes_used(info->super_copy);
				4659	if (alloc)
				4660	old_val += num_bytes;
				4661	else
				4662	old_val -= num_bytes;
				4663	btrfs_set_super_bytes_used(info->super_copy, old_val);
				4664	spin_unlock(&info->delalloc_lock);
				4665
				4666	while (total) {
				4667	cache = btrfs_lookup_block_group(info, bytenr);
				4668	if (!cache)
				4669	return -ENOENT;
				4670	if (cache->flags & (BTRFS_BLOCK_GROUP_DUP \|
				4671	BTRFS_BLOCK_GROUP_RAID1 \|
				4672	BTRFS_BLOCK_GROUP_RAID10))
				4673	factor = 2;
				4674	else
				4675	factor = 1;
				4676	/*
				4677	* If this block group has free space cache written out, we
				4678	* need to make sure to load it if we are removing space. This
				4679	* is because we need the unpinning stage to actually add the
				4680	* space back to the block group, otherwise we will leak space.
				4681	*/
				4682	if (!alloc && cache->cached == BTRFS_CACHE_NO)
				4683	cache_block_group(cache, trans, NULL, 1);
				4684
				4685	byte_in_group = bytenr - cache->key.objectid;
				4686	WARN_ON(byte_in_group > cache->key.offset);
				4687
				4688	spin_lock(&cache->space_info->lock);
				4689	spin_lock(&cache->lock);
				4690
				4691	if (btrfs_test_opt(root, SPACE_CACHE) &&
				4692	cache->disk_cache_state < BTRFS_DC_CLEAR)
				4693	cache->disk_cache_state = BTRFS_DC_CLEAR;
				4694
				4695	cache->dirty = 1;
				4696	old_val = btrfs_block_group_used(&cache->item);
				4697	num_bytes = min(total, cache->key.offset - byte_in_group);
				4698	if (alloc) {
				4699	old_val += num_bytes;
				4700	btrfs_set_block_group_used(&cache->item, old_val);
				4701	cache->reserved -= num_bytes;
				4702	cache->space_info->bytes_reserved -= num_bytes;
				4703	cache->space_info->bytes_used += num_bytes;
				4704	cache->space_info->disk_used += num_bytes * factor;
				4705	spin_unlock(&cache->lock);
				4706	spin_unlock(&cache->space_info->lock);
				4707	} else {
				4708	old_val -= num_bytes;
				4709	btrfs_set_block_group_used(&cache->item, old_val);
				4710	cache->pinned += num_bytes;
				4711	cache->space_info->bytes_pinned += num_bytes;
				4712	cache->space_info->bytes_used -= num_bytes;
				4713	cache->space_info->disk_used -= num_bytes * factor;
				4714	spin_unlock(&cache->lock);
				4715	spin_unlock(&cache->space_info->lock);
				4716
				4717	set_extent_dirty(info->pinned_extents,
				4718	bytenr, bytenr + num_bytes - 1,
				4719	GFP_NOFS \| __GFP_NOFAIL);
				4720	}
				4721	btrfs_put_block_group(cache);
				4722	total -= num_bytes;
				4723	bytenr += num_bytes;
				4724	}
				4725	return 0;
				4726	}
				4727
				4728	static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
				4729	{
				4730	struct btrfs_block_group_cache *cache;
				4731	u64 bytenr;
				4732
				4733	cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
				4734	if (!cache)
				4735	return 0;
				4736
				4737	bytenr = cache->key.objectid;
				4738	btrfs_put_block_group(cache);
				4739
				4740	return bytenr;
				4741	}
				4742
				4743	static int pin_down_extent(struct btrfs_root *root,
				4744	struct btrfs_block_group_cache *cache,
				4745	u64 bytenr, u64 num_bytes, int reserved)
				4746	{
				4747	spin_lock(&cache->space_info->lock);
				4748	spin_lock(&cache->lock);
				4749	cache->pinned += num_bytes;
				4750	cache->space_info->bytes_pinned += num_bytes;
				4751	if (reserved) {
				4752	cache->reserved -= num_bytes;
				4753	cache->space_info->bytes_reserved -= num_bytes;
				4754	}
				4755	spin_unlock(&cache->lock);
				4756	spin_unlock(&cache->space_info->lock);
				4757
				4758	set_extent_dirty(root->fs_info->pinned_extents, bytenr,
				4759	bytenr + num_bytes - 1, GFP_NOFS \| __GFP_NOFAIL);
				4760	return 0;
				4761	}
				4762
				4763	/*
				4764	* this function must be called within transaction
				4765	*/
				4766	int btrfs_pin_extent(struct btrfs_root *root,
				4767	u64 bytenr, u64 num_bytes, int reserved)
				4768	{
				4769	struct btrfs_block_group_cache *cache;
				4770
				4771	cache = btrfs_lookup_block_group(root->fs_info, bytenr);
				4772	BUG_ON(!cache); /* Logic error */
				4773
				4774	pin_down_extent(root, cache, bytenr, num_bytes, reserved);
				4775
				4776	btrfs_put_block_group(cache);
				4777	return 0;
				4778	}
				4779
				4780	/*
				4781	* this function must be called within transaction
				4782	*/
				4783	int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
				4784	struct btrfs_root *root,
				4785	u64 bytenr, u64 num_bytes)
				4786	{
				4787	struct btrfs_block_group_cache *cache;
				4788
				4789	cache = btrfs_lookup_block_group(root->fs_info, bytenr);
				4790	BUG_ON(!cache); /* Logic error */
				4791
				4792	/*
				4793	* pull in the free space cache (if any) so that our pin
				4794	* removes the free space from the cache. We have load_only set
				4795	* to one because the slow code to read in the free extents does check
				4796	* the pinned extents.
				4797	*/
				4798	cache_block_group(cache, trans, root, 1);
				4799
				4800	pin_down_extent(root, cache, bytenr, num_bytes, 0);
				4801
				4802	/* remove us from the free space cache (if we're there at all) */
				4803	btrfs_remove_free_space(cache, bytenr, num_bytes);
				4804	btrfs_put_block_group(cache);
				4805	return 0;
				4806	}
				4807
				4808	/**
				4809	* btrfs_update_reserved_bytes - update the block_group and space info counters
				4810	* @cache: The cache we are manipulating
				4811	* @num_bytes: The number of bytes in question
				4812	* @reserve: One of the reservation enums
				4813	*
				4814	* This is called by the allocator when it reserves space, or by somebody who is
				4815	* freeing space that was never actually used on disk. For example if you
				4816	* reserve some space for a new leaf in transaction A and before transaction A
				4817	* commits you free that leaf, you call this with reserve set to 0 in order to
				4818	* clear the reservation.
				4819	*
				4820	* Metadata reservations should be called with RESERVE_ALLOC so we do the proper
				4821	* ENOSPC accounting. For data we handle the reservation through clearing the
				4822	* delalloc bits in the io_tree. We have to do this since we could end up
				4823	* allocating less disk space for the amount of data we have reserved in the
				4824	* case of compression.
				4825	*
				4826	* If this is a reservation and the block group has become read only we cannot
				4827	* make the reservation and return -EAGAIN, otherwise this function always
				4828	* succeeds.
				4829	*/
				4830	static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
				4831	u64 num_bytes, int reserve)
				4832	{
				4833	struct btrfs_space_info *space_info = cache->space_info;
				4834	int ret = 0;
				4835
				4836	spin_lock(&space_info->lock);
				4837	spin_lock(&cache->lock);
				4838	if (reserve != RESERVE_FREE) {
				4839	if (cache->ro) {
				4840	ret = -EAGAIN;
				4841	} else {
				4842	cache->reserved += num_bytes;
				4843	space_info->bytes_reserved += num_bytes;
				4844	if (reserve == RESERVE_ALLOC) {
				4845	trace_btrfs_space_reservation(cache->fs_info,
				4846	"space_info", space_info->flags,
				4847	num_bytes, 0);
				4848	space_info->bytes_may_use -= num_bytes;
				4849	}
				4850	}
				4851	} else {
				4852	if (cache->ro)
				4853	space_info->bytes_readonly += num_bytes;
				4854	cache->reserved -= num_bytes;
				4855	space_info->bytes_reserved -= num_bytes;
				4856	space_info->reservation_progress++;
				4857	}
				4858	spin_unlock(&cache->lock);
				4859	spin_unlock(&space_info->lock);
				4860	return ret;
				4861	}
				4862
				4863	void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
				4864	struct btrfs_root *root)
				4865	{
				4866	struct btrfs_fs_info *fs_info = root->fs_info;
				4867	struct btrfs_caching_control *next;
				4868	struct btrfs_caching_control *caching_ctl;
				4869	struct btrfs_block_group_cache *cache;
				4870
				4871	down_write(&fs_info->extent_commit_sem);
				4872
				4873	list_for_each_entry_safe(caching_ctl, next,
				4874	&fs_info->caching_block_groups, list) {
				4875	cache = caching_ctl->block_group;
				4876	if (block_group_cache_done(cache)) {
				4877	cache->last_byte_to_unpin = (u64)-1;
				4878	list_del_init(&caching_ctl->list);
				4879	put_caching_control(caching_ctl);
				4880	} else {
				4881	cache->last_byte_to_unpin = caching_ctl->progress;
				4882	}
				4883	}
				4884
				4885	if (fs_info->pinned_extents == &fs_info->freed_extents[0])
				4886	fs_info->pinned_extents = &fs_info->freed_extents[1];
				4887	else
				4888	fs_info->pinned_extents = &fs_info->freed_extents[0];
				4889
				4890	up_write(&fs_info->extent_commit_sem);
				4891
				4892	update_global_block_rsv(fs_info);
				4893	}
				4894
				4895	static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
				4896	const bool return_free_space)
				4897	{
				4898	struct btrfs_fs_info *fs_info = root->fs_info;
				4899	struct btrfs_block_group_cache *cache = NULL;
				4900	u64 len;
				4901
				4902	while (start <= end) {
				4903	if (!cache \|\|
				4904	start >= cache->key.objectid + cache->key.offset) {
				4905	if (cache)
				4906	btrfs_put_block_group(cache);
				4907	cache = btrfs_lookup_block_group(fs_info, start);
				4908	BUG_ON(!cache); /* Logic error */
				4909	}
				4910
				4911	len = cache->key.objectid + cache->key.offset - start;
				4912	len = min(len, end + 1 - start);
				4913
				4914	if (start < cache->last_byte_to_unpin) {
				4915	len = min(len, cache->last_byte_to_unpin - start);
				4916	if (return_free_space)
				4917	btrfs_add_free_space(cache, start, len);
				4918	}
				4919
				4920	start += len;
				4921
				4922	spin_lock(&cache->space_info->lock);
				4923	spin_lock(&cache->lock);
				4924	cache->pinned -= len;
				4925	cache->space_info->bytes_pinned -= len;
				4926	if (cache->ro)
				4927	cache->space_info->bytes_readonly += len;
				4928	spin_unlock(&cache->lock);
				4929	spin_unlock(&cache->space_info->lock);
				4930	}
				4931
				4932	if (cache)
				4933	btrfs_put_block_group(cache);
				4934	return 0;
				4935	}
				4936
				4937	int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
				4938	struct btrfs_root *root)
				4939	{
				4940	struct btrfs_fs_info *fs_info = root->fs_info;
				4941	struct extent_io_tree *unpin;
				4942	u64 start;
				4943	u64 end;
				4944	int ret;
				4945
				4946	if (trans->aborted)
				4947	return 0;
				4948
				4949	if (fs_info->pinned_extents == &fs_info->freed_extents[0])
				4950	unpin = &fs_info->freed_extents[1];
				4951	else
				4952	unpin = &fs_info->freed_extents[0];
				4953
				4954	while (1) {
				4955	ret = find_first_extent_bit(unpin, 0, &start, &end,
				4956	EXTENT_DIRTY);
				4957	if (ret)
				4958	break;
				4959
				4960	if (btrfs_test_opt(root, DISCARD))
				4961	ret = btrfs_discard_extent(root, start,
				4962	end + 1 - start, NULL);
				4963
				4964	clear_extent_dirty(unpin, start, end, GFP_NOFS);
				4965	unpin_extent_range(root, start, end, true);
				4966	cond_resched();
				4967	}
				4968
				4969	return 0;
				4970	}
				4971
				4972	static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
				4973	struct btrfs_root *root,
				4974	u64 bytenr, u64 num_bytes, u64 parent,
				4975	u64 root_objectid, u64 owner_objectid,
				4976	u64 owner_offset, int refs_to_drop,
				4977	struct btrfs_delayed_extent_op *extent_op)
				4978	{
				4979	struct btrfs_key key;
				4980	struct btrfs_path *path;
				4981	struct btrfs_fs_info *info = root->fs_info;
				4982	struct btrfs_root *extent_root = info->extent_root;
				4983	struct extent_buffer *leaf;
				4984	struct btrfs_extent_item *ei;
				4985	struct btrfs_extent_inline_ref *iref;
				4986	int ret;
				4987	int is_data;
				4988	int extent_slot = 0;
				4989	int found_extent = 0;
				4990	int num_to_del = 1;
				4991	u32 item_size;
				4992	u64 refs;
				4993
				4994	path = btrfs_alloc_path();
				4995	if (!path)
				4996	return -ENOMEM;
				4997
				4998	path->reada = 1;
				4999	path->leave_spinning = 1;
				5000
				5001	is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
				5002	BUG_ON(!is_data && refs_to_drop != 1);
				5003
				5004	ret = lookup_extent_backref(trans, extent_root, path, &iref,
				5005	bytenr, num_bytes, parent,
				5006	root_objectid, owner_objectid,
				5007	owner_offset);
				5008	if (ret == 0) {
				5009	extent_slot = path->slots[0];
				5010	while (extent_slot >= 0) {
				5011	btrfs_item_key_to_cpu(path->nodes[0], &key,
				5012	extent_slot);
				5013	if (key.objectid != bytenr)
				5014	break;
				5015	if (key.type == BTRFS_EXTENT_ITEM_KEY &&
				5016	key.offset == num_bytes) {
				5017	found_extent = 1;
				5018	break;
				5019	}
				5020	if (path->slots[0] - extent_slot > 5)
				5021	break;
				5022	extent_slot--;
				5023	}
				5024	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				5025	item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
				5026	if (found_extent && item_size < sizeof(*ei))
				5027	found_extent = 0;
				5028	#endif
				5029	if (!found_extent) {
				5030	BUG_ON(iref);
				5031	ret = remove_extent_backref(trans, extent_root, path,
				5032	NULL, refs_to_drop,
				5033	is_data);
				5034	if (ret)
				5035	goto abort;
				5036	btrfs_release_path(path);
				5037	path->leave_spinning = 1;
				5038
				5039	key.objectid = bytenr;
				5040	key.type = BTRFS_EXTENT_ITEM_KEY;
				5041	key.offset = num_bytes;
				5042
				5043	ret = btrfs_search_slot(trans, extent_root,
				5044	&key, path, -1, 1);
				5045	if (ret) {
				5046	printk(KERN_ERR "umm, got %d back from search"
				5047	", was looking for %llu\n", ret,
				5048	(unsigned long long)bytenr);
				5049	if (ret > 0)
				5050	btrfs_print_leaf(extent_root,
				5051	path->nodes[0]);
				5052	}
				5053	if (ret < 0)
				5054	goto abort;
				5055	extent_slot = path->slots[0];
				5056	}
				5057	} else if (ret == -ENOENT) {
				5058	btrfs_print_leaf(extent_root, path->nodes[0]);
				5059	WARN_ON(1);
				5060	printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
				5061	"parent %llu root %llu owner %llu offset %llu\n",
				5062	(unsigned long long)bytenr,
				5063	(unsigned long long)parent,
				5064	(unsigned long long)root_objectid,
				5065	(unsigned long long)owner_objectid,
				5066	(unsigned long long)owner_offset);
				5067	} else {
				5068	goto abort;
				5069	}
				5070
				5071	leaf = path->nodes[0];
				5072	item_size = btrfs_item_size_nr(leaf, extent_slot);
				5073	#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
				5074	if (item_size < sizeof(*ei)) {
				5075	BUG_ON(found_extent \|\| extent_slot != path->slots[0]);
				5076	ret = convert_extent_item_v0(trans, extent_root, path,
				5077	owner_objectid, 0);
				5078	if (ret < 0)
				5079	goto abort;
				5080
				5081	btrfs_release_path(path);
				5082	path->leave_spinning = 1;
				5083
				5084	key.objectid = bytenr;
				5085	key.type = BTRFS_EXTENT_ITEM_KEY;
				5086	key.offset = num_bytes;
				5087
				5088	ret = btrfs_search_slot(trans, extent_root, &key, path,
				5089	-1, 1);
				5090	if (ret) {
				5091	printk(KERN_ERR "umm, got %d back from search"
				5092	", was looking for %llu\n", ret,
				5093	(unsigned long long)bytenr);
				5094	btrfs_print_leaf(extent_root, path->nodes[0]);
				5095	}
				5096	if (ret < 0)
				5097	goto abort;
				5098	extent_slot = path->slots[0];
				5099	leaf = path->nodes[0];
				5100	item_size = btrfs_item_size_nr(leaf, extent_slot);
				5101	}
				5102	#endif
				5103	BUG_ON(item_size < sizeof(*ei));
				5104	ei = btrfs_item_ptr(leaf, extent_slot,
				5105	struct btrfs_extent_item);
				5106	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
				5107	struct btrfs_tree_block_info *bi;
				5108	BUG_ON(item_size < sizeof(ei) + sizeof(bi));
				5109	bi = (struct btrfs_tree_block_info *)(ei + 1);
				5110	WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
				5111	}
				5112
				5113	refs = btrfs_extent_refs(leaf, ei);
				5114	BUG_ON(refs < refs_to_drop);
				5115	refs -= refs_to_drop;
				5116
				5117	if (refs > 0) {
				5118	if (extent_op)
				5119	__run_delayed_extent_op(extent_op, leaf, ei);
				5120	/*
				5121	* In the case of inline back ref, reference count will
				5122	* be updated by remove_extent_backref
				5123	*/
				5124	if (iref) {
				5125	BUG_ON(!found_extent);
				5126	} else {
				5127	btrfs_set_extent_refs(leaf, ei, refs);
				5128	btrfs_mark_buffer_dirty(leaf);
				5129	}
				5130	if (found_extent) {
				5131	ret = remove_extent_backref(trans, extent_root, path,
				5132	iref, refs_to_drop,
				5133	is_data);
				5134	if (ret)
				5135	goto abort;
				5136	}
				5137	} else {
				5138	if (found_extent) {
				5139	BUG_ON(is_data && refs_to_drop !=
				5140	extent_data_ref_count(root, path, iref));
				5141	if (iref) {
				5142	BUG_ON(path->slots[0] != extent_slot);
				5143	} else {
				5144	BUG_ON(path->slots[0] != extent_slot + 1);
				5145	path->slots[0] = extent_slot;
				5146	num_to_del = 2;
				5147	}
				5148	}
				5149
				5150	ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
				5151	num_to_del);
				5152	if (ret)
				5153	goto abort;
				5154	btrfs_release_path(path);
				5155
				5156	if (is_data) {
				5157	ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
				5158	if (ret)
				5159	goto abort;
				5160	}
				5161
				5162	ret = update_block_group(trans, root, bytenr, num_bytes, 0);
				5163	if (ret)
				5164	goto abort;
				5165	}
				5166	out:
				5167	btrfs_free_path(path);
				5168	return ret;
				5169
				5170	abort:
				5171	btrfs_abort_transaction(trans, extent_root, ret);
				5172	goto out;
				5173	}
				5174
				5175	/*
				5176	* when we free an block, it is possible (and likely) that we free the last
				5177	* delayed ref for that extent as well. This searches the delayed ref tree for
				5178	* a given extent, and if there are no other delayed refs to be processed, it
				5179	* removes it from the tree.
				5180	*/
				5181	static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
				5182	struct btrfs_root *root, u64 bytenr)
				5183	{
				5184	struct btrfs_delayed_ref_head *head;
				5185	struct btrfs_delayed_ref_root *delayed_refs;
				5186	struct btrfs_delayed_ref_node *ref;
				5187	struct rb_node *node;
				5188	int ret = 0;
				5189
				5190	delayed_refs = &trans->transaction->delayed_refs;
				5191	spin_lock(&delayed_refs->lock);
				5192	head = btrfs_find_delayed_ref_head(trans, bytenr);
				5193	if (!head)
				5194	goto out;
				5195
				5196	node = rb_prev(&head->node.rb_node);
				5197	if (!node)
				5198	goto out;
				5199
				5200	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
				5201
				5202	/* there are still entries for this ref, we can't drop it */
				5203	if (ref->bytenr == bytenr)
				5204	goto out;
				5205
				5206	if (head->extent_op) {
				5207	if (!head->must_insert_reserved)
				5208	goto out;
				5209	kfree(head->extent_op);
				5210	head->extent_op = NULL;
				5211	}
				5212
				5213	/*
				5214	* waiting for the lock here would deadlock. If someone else has it
				5215	* locked they are already in the process of dropping it anyway
				5216	*/
				5217	if (!mutex_trylock(&head->mutex))
				5218	goto out;
				5219
				5220	/*
				5221	* at this point we have a head with no other entries. Go
				5222	* ahead and process it.
				5223	*/
				5224	head->node.in_tree = 0;
				5225	rb_erase(&head->node.rb_node, &delayed_refs->root);
				5226
				5227	delayed_refs->num_entries--;
				5228	if (waitqueue_active(&delayed_refs->seq_wait))
				5229	wake_up(&delayed_refs->seq_wait);
				5230
				5231	/*
				5232	* we don't take a ref on the node because we're removing it from the
				5233	* tree, so we just steal the ref the tree was holding.
				5234	*/
				5235	delayed_refs->num_heads--;
				5236	if (list_empty(&head->cluster))
				5237	delayed_refs->num_heads_ready--;
				5238
				5239	list_del_init(&head->cluster);
				5240	spin_unlock(&delayed_refs->lock);
				5241
				5242	BUG_ON(head->extent_op);
				5243	if (head->must_insert_reserved)
				5244	ret = 1;
				5245
				5246	mutex_unlock(&head->mutex);
				5247	btrfs_put_delayed_ref(&head->node);
				5248	return ret;
				5249	out:
				5250	spin_unlock(&delayed_refs->lock);
				5251	return 0;
				5252	}
				5253
				5254	void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
				5255	struct btrfs_root *root,
				5256	struct extent_buffer *buf,
				5257	u64 parent, int last_ref, int for_cow)
				5258	{
				5259	struct btrfs_block_group_cache *cache = NULL;
				5260	int ret;
				5261
				5262	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
				5263	ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
				5264	buf->start, buf->len,
				5265	parent, root->root_key.objectid,
				5266	btrfs_header_level(buf),
				5267	BTRFS_DROP_DELAYED_REF, NULL, for_cow);
				5268	BUG_ON(ret); /* -ENOMEM */
				5269	}
				5270
				5271	if (!last_ref)
				5272	return;
				5273
				5274	cache = btrfs_lookup_block_group(root->fs_info, buf->start);
				5275
				5276	if (btrfs_header_generation(buf) == trans->transid) {
				5277	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
				5278	ret = check_ref_cleanup(trans, root, buf->start);
				5279	if (!ret)
				5280	goto out;
				5281	}
				5282
				5283	if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
				5284	pin_down_extent(root, cache, buf->start, buf->len, 1);
				5285	goto out;
				5286	}
				5287
				5288	WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
				5289
				5290	btrfs_add_free_space(cache, buf->start, buf->len);
				5291	btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
				5292	}
				5293	out:
				5294	/*
				5295	* Deleting the buffer, clear the corrupt flag since it doesn't matter
				5296	* anymore.
				5297	*/
				5298	clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
				5299	btrfs_put_block_group(cache);
				5300	}
				5301
				5302	/* Can return -ENOMEM */
				5303	int btrfs_free_extent(struct btrfs_trans_handle trans, struct btrfs_root root,
				5304	u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
				5305	u64 owner, u64 offset, int for_cow)
				5306	{
				5307	int ret;
				5308	struct btrfs_fs_info *fs_info = root->fs_info;
				5309
				5310	/*
				5311	* tree log blocks never actually go into the extent allocation
				5312	* tree, just update pinning info and exit early.
				5313	*/
				5314	if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
				5315	WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
				5316	/* unlocks the pinned mutex */
				5317	btrfs_pin_extent(root, bytenr, num_bytes, 1);
				5318	ret = 0;
				5319	} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
				5320	ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
				5321	num_bytes,
				5322	parent, root_objectid, (int)owner,
				5323	BTRFS_DROP_DELAYED_REF, NULL, for_cow);
				5324	} else {
				5325	ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
				5326	num_bytes,
				5327	parent, root_objectid, owner,
				5328	offset, BTRFS_DROP_DELAYED_REF,
				5329	NULL, for_cow);
				5330	}
				5331	return ret;
				5332	}
				5333
				5334	static u64 stripe_align(struct btrfs_root *root, u64 val)
				5335	{
				5336	u64 mask = ((u64)root->stripesize - 1);
				5337	u64 ret = (val + mask) & ~mask;
				5338	return ret;
				5339	}
				5340
				5341	/*
				5342	* when we wait for progress in the block group caching, its because
				5343	* our allocation attempt failed at least once. So, we must sleep
				5344	* and let some progress happen before we try again.
				5345	*
				5346	* This function will sleep at least once waiting for new free space to
				5347	* show up, and then it will check the block group free space numbers
				5348	* for our min num_bytes. Another option is to have it go ahead
				5349	* and look in the rbtree for a free extent of a given size, but this
				5350	* is a good start.
				5351	*/
				5352	static noinline int
				5353	wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
				5354	u64 num_bytes)
				5355	{
				5356	struct btrfs_caching_control *caching_ctl;
				5357	DEFINE_WAIT(wait);
				5358
				5359	caching_ctl = get_caching_control(cache);
				5360	if (!caching_ctl)
				5361	return 0;
				5362
				5363	wait_event(caching_ctl->wait, block_group_cache_done(cache) \|\|
				5364	(cache->free_space_ctl->free_space >= num_bytes));
				5365
				5366	put_caching_control(caching_ctl);
				5367	return 0;
				5368	}
				5369
				5370	static noinline int
				5371	wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
				5372	{
				5373	struct btrfs_caching_control *caching_ctl;
				5374	DEFINE_WAIT(wait);
				5375
				5376	caching_ctl = get_caching_control(cache);
				5377	if (!caching_ctl)
				5378	return 0;
				5379
				5380	wait_event(caching_ctl->wait, block_group_cache_done(cache));
				5381
				5382	put_caching_control(caching_ctl);
				5383	return 0;
				5384	}
				5385
				5386	static int __get_block_group_index(u64 flags)
				5387	{
				5388	int index;
				5389
				5390	if (flags & BTRFS_BLOCK_GROUP_RAID10)
				5391	index = 0;
				5392	else if (flags & BTRFS_BLOCK_GROUP_RAID1)
				5393	index = 1;
				5394	else if (flags & BTRFS_BLOCK_GROUP_DUP)
				5395	index = 2;
				5396	else if (flags & BTRFS_BLOCK_GROUP_RAID0)
				5397	index = 3;
				5398	else
				5399	index = 4;
				5400
				5401	return index;
				5402	}
				5403
				5404	static int get_block_group_index(struct btrfs_block_group_cache *cache)
				5405	{
				5406	return __get_block_group_index(cache->flags);
				5407	}
				5408
				5409	enum btrfs_loop_type {
				5410	LOOP_CACHING_NOWAIT = 0,
				5411	LOOP_CACHING_WAIT = 1,
				5412	LOOP_ALLOC_CHUNK = 2,
				5413	LOOP_NO_EMPTY_SIZE = 3,
				5414	};
				5415
				5416	/*
				5417	* walks the btree of allocated extents and find a hole of a given size.
				5418	* The key ins is changed to record the hole:
				5419	* ins->objectid == block start
				5420	* ins->flags = BTRFS_EXTENT_ITEM_KEY
				5421	* ins->offset == number of blocks
				5422	* Any available blocks before search_start are skipped.
				5423	*/
				5424	static noinline int find_free_extent(struct btrfs_trans_handle *trans,
				5425	struct btrfs_root *orig_root,
				5426	u64 num_bytes, u64 empty_size,
				5427	u64 hint_byte, struct btrfs_key *ins,
				5428	u64 data)
				5429	{
				5430	int ret = 0;
				5431	struct btrfs_root *root = orig_root->fs_info->extent_root;
				5432	struct btrfs_free_cluster *last_ptr = NULL;
				5433	struct btrfs_block_group_cache *block_group = NULL;
				5434	struct btrfs_block_group_cache *used_block_group;
				5435	u64 search_start = 0;
				5436	int empty_cluster = 2 * 1024 * 1024;
				5437	int allowed_chunk_alloc = 0;
				5438	int done_chunk_alloc = 0;
				5439	struct btrfs_space_info *space_info;
				5440	int loop = 0;
				5441	int index = 0;
				5442	int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
				5443	RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
				5444	bool found_uncached_bg = false;
				5445	bool failed_cluster_refill = false;
				5446	bool failed_alloc = false;
				5447	bool use_cluster = true;
				5448	bool have_caching_bg = false;
				5449
				5450	WARN_ON(num_bytes < root->sectorsize);
				5451	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
				5452	ins->objectid = 0;
				5453	ins->offset = 0;
				5454
				5455	trace_find_free_extent(orig_root, num_bytes, empty_size, data);
				5456
				5457	space_info = __find_space_info(root->fs_info, data);
				5458	if (!space_info) {
				5459	printk(KERN_ERR "No space info for %llu\n", data);
				5460	return -ENOSPC;
				5461	}
				5462
				5463	/*
				5464	* If the space info is for both data and metadata it means we have a
				5465	* small filesystem and we can't use the clustering stuff.
				5466	*/
				5467	if (btrfs_mixed_space_info(space_info))
				5468	use_cluster = false;
				5469
				5470	if (orig_root->ref_cows \|\| empty_size)
				5471	allowed_chunk_alloc = 1;
				5472
				5473	if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
				5474	last_ptr = &root->fs_info->meta_alloc_cluster;
				5475	if (!btrfs_test_opt(root, SSD))
				5476	empty_cluster = 64 * 1024;
				5477	}
				5478
				5479	if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
				5480	btrfs_test_opt(root, SSD)) {
				5481	last_ptr = &root->fs_info->data_alloc_cluster;
				5482	}
				5483
				5484	if (last_ptr) {
				5485	spin_lock(&last_ptr->lock);
				5486	if (last_ptr->block_group)
				5487	hint_byte = last_ptr->window_start;
				5488	spin_unlock(&last_ptr->lock);
				5489	}
				5490
				5491	search_start = max(search_start, first_logical_byte(root, 0));
				5492	search_start = max(search_start, hint_byte);
				5493
				5494	if (!last_ptr)
				5495	empty_cluster = 0;
				5496
				5497	if (search_start == hint_byte) {
				5498	block_group = btrfs_lookup_block_group(root->fs_info,
				5499	search_start);
				5500	used_block_group = block_group;
				5501	/*
				5502	* we don't want to use the block group if it doesn't match our
				5503	* allocation bits, or if its not cached.
				5504	*
				5505	* However if we are re-searching with an ideal block group
				5506	* picked out then we don't care that the block group is cached.
				5507	*/
				5508	if (block_group && block_group_bits(block_group, data) &&
				5509	block_group->cached != BTRFS_CACHE_NO) {
				5510	down_read(&space_info->groups_sem);
				5511	if (list_empty(&block_group->list) \|\|
				5512	block_group->ro) {
				5513	/*
				5514	* someone is removing this block group,
				5515	* we can't jump into the have_block_group
				5516	* target because our list pointers are not
				5517	* valid
				5518	*/
				5519	btrfs_put_block_group(block_group);
				5520	up_read(&space_info->groups_sem);
				5521	} else {
				5522	index = get_block_group_index(block_group);
				5523	goto have_block_group;
				5524	}
				5525	} else if (block_group) {
				5526	btrfs_put_block_group(block_group);
				5527	}
				5528	}
				5529	search:
				5530	have_caching_bg = false;
				5531	down_read(&space_info->groups_sem);
				5532	list_for_each_entry(block_group, &space_info->block_groups[index],
				5533	list) {
				5534	u64 offset;
				5535	int cached;
				5536
				5537	used_block_group = block_group;
				5538	btrfs_get_block_group(block_group);
				5539	search_start = block_group->key.objectid;
				5540
				5541	/*
				5542	* this can happen if we end up cycling through all the
				5543	* raid types, but we want to make sure we only allocate
				5544	* for the proper type.
				5545	*/
				5546	if (!block_group_bits(block_group, data)) {
				5547	u64 extra = BTRFS_BLOCK_GROUP_DUP \|
				5548	BTRFS_BLOCK_GROUP_RAID1 \|
				5549	BTRFS_BLOCK_GROUP_RAID10;
				5550
				5551	/*
				5552	* if they asked for extra copies and this block group
				5553	* doesn't provide them, bail. This does allow us to
				5554	* fill raid0 from raid1.
				5555	*/
				5556	if ((data & extra) && !(block_group->flags & extra))
				5557	goto loop;
				5558	}
				5559
				5560	have_block_group:
				5561	cached = block_group_cache_done(block_group);
				5562	if (unlikely(!cached)) {
				5563	found_uncached_bg = true;
				5564	ret = cache_block_group(block_group, trans,
				5565	orig_root, 0);
				5566	BUG_ON(ret < 0);
				5567	ret = 0;
				5568	}
				5569
				5570	if (unlikely(block_group->ro))
				5571	goto loop;
				5572
				5573	/*
				5574	* Ok we want to try and use the cluster allocator, so
				5575	* lets look there
				5576	*/
				5577	if (last_ptr) {
				5578	/*
				5579	* the refill lock keeps out other
				5580	* people trying to start a new cluster
				5581	*/
				5582	spin_lock(&last_ptr->refill_lock);
				5583	used_block_group = last_ptr->block_group;
				5584	if (used_block_group != block_group &&
				5585	(!used_block_group \|\|
				5586	used_block_group->ro \|\|
				5587	!block_group_bits(used_block_group, data))) {
				5588	used_block_group = block_group;
				5589	goto refill_cluster;
				5590	}
				5591
				5592	if (used_block_group != block_group)
				5593	btrfs_get_block_group(used_block_group);
				5594
				5595	offset = btrfs_alloc_from_cluster(used_block_group,
				5596	last_ptr, num_bytes, used_block_group->key.objectid);
				5597	if (offset) {
				5598	/* we have a block, we're done */
				5599	spin_unlock(&last_ptr->refill_lock);
				5600	trace_btrfs_reserve_extent_cluster(root,
				5601	block_group, search_start, num_bytes);
				5602	goto checks;
				5603	}
				5604
				5605	WARN_ON(last_ptr->block_group != used_block_group);
				5606	if (used_block_group != block_group) {
				5607	btrfs_put_block_group(used_block_group);
				5608	used_block_group = block_group;
				5609	}
				5610	refill_cluster:
				5611	BUG_ON(used_block_group != block_group);
				5612	/* If we are on LOOP_NO_EMPTY_SIZE, we can't
				5613	* set up a new clusters, so lets just skip it
				5614	* and let the allocator find whatever block
				5615	* it can find. If we reach this point, we
				5616	* will have tried the cluster allocator
				5617	* plenty of times and not have found
				5618	* anything, so we are likely way too
				5619	* fragmented for the clustering stuff to find
				5620	* anything.
				5621	*
				5622	* However, if the cluster is taken from the
				5623	* current block group, release the cluster
				5624	* first, so that we stand a better chance of
				5625	* succeeding in the unclustered
				5626	* allocation. */
				5627	if (loop >= LOOP_NO_EMPTY_SIZE &&
				5628	last_ptr->block_group != block_group) {
				5629	spin_unlock(&last_ptr->refill_lock);
				5630	goto unclustered_alloc;
				5631	}
				5632
				5633	/*
				5634	* this cluster didn't work out, free it and
				5635	* start over
				5636	*/
				5637	btrfs_return_cluster_to_free_space(NULL, last_ptr);
				5638
				5639	if (loop >= LOOP_NO_EMPTY_SIZE) {
				5640	spin_unlock(&last_ptr->refill_lock);
				5641	goto unclustered_alloc;
				5642	}
				5643
				5644	/* allocate a cluster in this block group */
				5645	ret = btrfs_find_space_cluster(trans, root,
				5646	block_group, last_ptr,
				5647	search_start, num_bytes,
				5648	empty_cluster + empty_size);
				5649	if (ret == 0) {
				5650	/*
				5651	* now pull our allocation out of this
				5652	* cluster
				5653	*/
				5654	offset = btrfs_alloc_from_cluster(block_group,
				5655	last_ptr, num_bytes,
				5656	search_start);
				5657	if (offset) {
				5658	/* we found one, proceed */
				5659	spin_unlock(&last_ptr->refill_lock);
				5660	trace_btrfs_reserve_extent_cluster(root,
				5661	block_group, search_start,
				5662	num_bytes);
				5663	goto checks;
				5664	}
				5665	} else if (!cached && loop > LOOP_CACHING_NOWAIT
				5666	&& !failed_cluster_refill) {
				5667	spin_unlock(&last_ptr->refill_lock);
				5668
				5669	failed_cluster_refill = true;
				5670	wait_block_group_cache_progress(block_group,
				5671	num_bytes + empty_cluster + empty_size);
				5672	goto have_block_group;
				5673	}
				5674
				5675	/*
				5676	* at this point we either didn't find a cluster
				5677	* or we weren't able to allocate a block from our
				5678	* cluster. Free the cluster we've been trying
				5679	* to use, and go to the next block group
				5680	*/
				5681	btrfs_return_cluster_to_free_space(NULL, last_ptr);
				5682	spin_unlock(&last_ptr->refill_lock);
				5683	goto loop;
				5684	}
				5685
				5686	unclustered_alloc:
				5687	spin_lock(&block_group->free_space_ctl->tree_lock);
				5688	if (cached &&
				5689	block_group->free_space_ctl->free_space <
				5690	num_bytes + empty_cluster + empty_size) {
				5691	spin_unlock(&block_group->free_space_ctl->tree_lock);
				5692	goto loop;
				5693	}
				5694	spin_unlock(&block_group->free_space_ctl->tree_lock);
				5695
				5696	offset = btrfs_find_space_for_alloc(block_group, search_start,
				5697	num_bytes, empty_size);
				5698	/*
				5699	* If we didn't find a chunk, and we haven't failed on this
				5700	* block group before, and this block group is in the middle of
				5701	* caching and we are ok with waiting, then go ahead and wait
				5702	* for progress to be made, and set failed_alloc to true.
				5703	*
				5704	* If failed_alloc is true then we've already waited on this
				5705	* block group once and should move on to the next block group.
				5706	*/
				5707	if (!offset && !failed_alloc && !cached &&
				5708	loop > LOOP_CACHING_NOWAIT) {
				5709	wait_block_group_cache_progress(block_group,
				5710	num_bytes + empty_size);
				5711	failed_alloc = true;
				5712	goto have_block_group;
				5713	} else if (!offset) {
				5714	if (!cached)
				5715	have_caching_bg = true;
				5716	goto loop;
				5717	}
				5718	checks:
				5719	search_start = stripe_align(root, offset);
				5720
				5721	/* move on to the next group */
				5722	if (search_start + num_bytes >
				5723	used_block_group->key.objectid + used_block_group->key.offset) {
				5724	btrfs_add_free_space(used_block_group, offset, num_bytes);
				5725	goto loop;
				5726	}
				5727
				5728	if (offset < search_start)
				5729	btrfs_add_free_space(used_block_group, offset,
				5730	search_start - offset);
				5731	BUG_ON(offset > search_start);
				5732
				5733	ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
				5734	alloc_type);
				5735	if (ret == -EAGAIN) {
				5736	btrfs_add_free_space(used_block_group, offset, num_bytes);
				5737	goto loop;
				5738	}
				5739
				5740	/* we are all good, lets return */
				5741	ins->objectid = search_start;
				5742	ins->offset = num_bytes;
				5743
				5744	trace_btrfs_reserve_extent(orig_root, block_group,
				5745	search_start, num_bytes);
				5746	if (offset < search_start)
				5747	btrfs_add_free_space(used_block_group, offset,
				5748	search_start - offset);
				5749	BUG_ON(offset > search_start);
				5750	if (used_block_group != block_group)
				5751	btrfs_put_block_group(used_block_group);
				5752	btrfs_put_block_group(block_group);
				5753	break;
				5754	loop:
				5755	failed_cluster_refill = false;
				5756	failed_alloc = false;
				5757	BUG_ON(index != get_block_group_index(block_group));
				5758	if (used_block_group != block_group)
				5759	btrfs_put_block_group(used_block_group);
				5760	btrfs_put_block_group(block_group);
				5761	}
				5762	up_read(&space_info->groups_sem);
				5763
				5764	if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
				5765	goto search;
				5766
				5767	if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
				5768	goto search;
				5769
				5770	/*
				5771	* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
				5772	* caching kthreads as we move along
				5773	* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
				5774	* LOOP_ALLOC_CHUNK, force a chunk allocation and try again
				5775	* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
				5776	* again
				5777	*/
				5778	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
				5779	index = 0;
				5780	loop++;
				5781	if (loop == LOOP_ALLOC_CHUNK) {
				5782	if (allowed_chunk_alloc) {
				5783	ret = do_chunk_alloc(trans, root, num_bytes +
				5784	2 * 1024 * 1024, data,
				5785	CHUNK_ALLOC_LIMITED);
				5786	if (ret < 0) {
				5787	btrfs_abort_transaction(trans,
				5788	root, ret);
				5789	goto out;
				5790	}
				5791	allowed_chunk_alloc = 0;
				5792	if (ret == 1)
				5793	done_chunk_alloc = 1;
				5794	} else if (!done_chunk_alloc &&
				5795	space_info->force_alloc ==
				5796	CHUNK_ALLOC_NO_FORCE) {
				5797	space_info->force_alloc = CHUNK_ALLOC_LIMITED;
				5798	}
				5799
				5800	/*
				5801	* We didn't allocate a chunk, go ahead and drop the
				5802	* empty size and loop again.
				5803	*/
				5804	if (!done_chunk_alloc)
				5805	loop = LOOP_NO_EMPTY_SIZE;
				5806	}
				5807
				5808	if (loop == LOOP_NO_EMPTY_SIZE) {
				5809	empty_size = 0;
				5810	empty_cluster = 0;
				5811	}
				5812
				5813	goto search;
				5814	} else if (!ins->objectid) {
				5815	ret = -ENOSPC;
				5816	} else if (ins->objectid) {
				5817	ret = 0;
				5818	}
				5819	out:
				5820
				5821	return ret;
				5822	}
				5823
				5824	static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
				5825	int dump_block_groups)
				5826	{
				5827	struct btrfs_block_group_cache *cache;
				5828	int index = 0;
				5829
				5830	spin_lock(&info->lock);
				5831	printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
				5832	(unsigned long long)info->flags,
				5833	(unsigned long long)(info->total_bytes - info->bytes_used -
				5834	info->bytes_pinned - info->bytes_reserved -
				5835	info->bytes_readonly),
				5836	(info->full) ? "" : "not ");
				5837	printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
				5838	"reserved=%llu, may_use=%llu, readonly=%llu\n",
				5839	(unsigned long long)info->total_bytes,
				5840	(unsigned long long)info->bytes_used,
				5841	(unsigned long long)info->bytes_pinned,
				5842	(unsigned long long)info->bytes_reserved,
				5843	(unsigned long long)info->bytes_may_use,
				5844	(unsigned long long)info->bytes_readonly);
				5845	spin_unlock(&info->lock);
				5846
				5847	if (!dump_block_groups)
				5848	return;
				5849
				5850	down_read(&info->groups_sem);
				5851	again:
				5852	list_for_each_entry(cache, &info->block_groups[index], list) {
				5853	spin_lock(&cache->lock);
				5854	printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
				5855	"%llu pinned %llu reserved\n",
				5856	(unsigned long long)cache->key.objectid,
				5857	(unsigned long long)cache->key.offset,
				5858	(unsigned long long)btrfs_block_group_used(&cache->item),
				5859	(unsigned long long)cache->pinned,
				5860	(unsigned long long)cache->reserved);
				5861	btrfs_dump_free_space(cache, bytes);
				5862	spin_unlock(&cache->lock);
				5863	}
				5864	if (++index < BTRFS_NR_RAID_TYPES)
				5865	goto again;
				5866	up_read(&info->groups_sem);
				5867	}
				5868
				5869	int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
				5870	struct btrfs_root *root,
				5871	u64 num_bytes, u64 min_alloc_size,
				5872	u64 empty_size, u64 hint_byte,
				5873	struct btrfs_key *ins, u64 data)
				5874	{
				5875	bool final_tried = false;
				5876	int ret;
				5877
				5878	data = btrfs_get_alloc_profile(root, data);
				5879	again:
				5880	/*
				5881	* the only place that sets empty_size is btrfs_realloc_node, which
				5882	* is not called recursively on allocations
				5883	*/
				5884	if (empty_size \|\| root->ref_cows) {
				5885	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
				5886	num_bytes + 2 * 1024 * 1024, data,
				5887	CHUNK_ALLOC_NO_FORCE);
				5888	if (ret < 0 && ret != -ENOSPC) {
				5889	btrfs_abort_transaction(trans, root, ret);
				5890	return ret;
				5891	}
				5892	}
				5893
				5894	WARN_ON(num_bytes < root->sectorsize);
				5895	ret = find_free_extent(trans, root, num_bytes, empty_size,
				5896	hint_byte, ins, data);
				5897
				5898	if (ret == -ENOSPC) {
				5899	if (!final_tried) {
				5900	num_bytes = num_bytes >> 1;
				5901	num_bytes = num_bytes & ~(root->sectorsize - 1);
				5902	num_bytes = max(num_bytes, min_alloc_size);
				5903	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
				5904	num_bytes, data, CHUNK_ALLOC_FORCE);
				5905	if (ret < 0 && ret != -ENOSPC) {
				5906	btrfs_abort_transaction(trans, root, ret);
				5907	return ret;
				5908	}
				5909	if (num_bytes == min_alloc_size)
				5910	final_tried = true;
				5911	goto again;
				5912	} else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
				5913	struct btrfs_space_info *sinfo;
				5914
				5915	sinfo = __find_space_info(root->fs_info, data);
				5916	printk(KERN_ERR "btrfs allocation failed flags %llu, "
				5917	"wanted %llu\n", (unsigned long long)data,
				5918	(unsigned long long)num_bytes);
				5919	if (sinfo)
				5920	dump_space_info(sinfo, num_bytes, 1);
				5921	}
				5922	}
				5923
				5924	trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
				5925
				5926	return ret;
				5927	}
				5928
				5929	static int __btrfs_free_reserved_extent(struct btrfs_root *root,
				5930	u64 start, u64 len, int pin)
				5931	{
				5932	struct btrfs_block_group_cache *cache;
				5933	int ret = 0;
				5934
				5935	cache = btrfs_lookup_block_group(root->fs_info, start);
				5936	if (!cache) {
				5937	printk(KERN_ERR "Unable to find block group for %llu\n",
				5938	(unsigned long long)start);
				5939	return -ENOSPC;
				5940	}
				5941
				5942	if (pin)
				5943	pin_down_extent(root, cache, start, len, 1);
				5944	else {
				5945	if (btrfs_test_opt(root, DISCARD))
				5946	ret = btrfs_discard_extent(root, start, len, NULL);
				5947	btrfs_add_free_space(cache, start, len);
				5948	btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
				5949	}
				5950	btrfs_put_block_group(cache);
				5951
				5952	trace_btrfs_reserved_extent_free(root, start, len);
				5953
				5954	return ret;
				5955	}
				5956
				5957	int btrfs_free_reserved_extent(struct btrfs_root *root,
				5958	u64 start, u64 len)
				5959	{
				5960	return __btrfs_free_reserved_extent(root, start, len, 0);
				5961	}
				5962
				5963	int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
				5964	u64 start, u64 len)
				5965	{
				5966	return __btrfs_free_reserved_extent(root, start, len, 1);
				5967	}
				5968
				5969	static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
				5970	struct btrfs_root *root,
				5971	u64 parent, u64 root_objectid,
				5972	u64 flags, u64 owner, u64 offset,
				5973	struct btrfs_key *ins, int ref_mod)
				5974	{
				5975	int ret;
				5976	struct btrfs_fs_info *fs_info = root->fs_info;
				5977	struct btrfs_extent_item *extent_item;
				5978	struct btrfs_extent_inline_ref *iref;
				5979	struct btrfs_path *path;
				5980	struct extent_buffer *leaf;
				5981	int type;
				5982	u32 size;
				5983
				5984	if (parent > 0)
				5985	type = BTRFS_SHARED_DATA_REF_KEY;
				5986	else
				5987	type = BTRFS_EXTENT_DATA_REF_KEY;
				5988
				5989	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
				5990
				5991	path = btrfs_alloc_path();
				5992	if (!path)
				5993	return -ENOMEM;
				5994
				5995	path->leave_spinning = 1;
				5996	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
				5997	ins, size);
				5998	if (ret) {
				5999	btrfs_free_path(path);
				6000	return ret;
				6001	}
				6002
				6003	leaf = path->nodes[0];
				6004	extent_item = btrfs_item_ptr(leaf, path->slots[0],
				6005	struct btrfs_extent_item);
				6006	btrfs_set_extent_refs(leaf, extent_item, ref_mod);
				6007	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
				6008	btrfs_set_extent_flags(leaf, extent_item,
				6009	flags \| BTRFS_EXTENT_FLAG_DATA);
				6010
				6011	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
				6012	btrfs_set_extent_inline_ref_type(leaf, iref, type);
				6013	if (parent > 0) {
				6014	struct btrfs_shared_data_ref *ref;
				6015	ref = (struct btrfs_shared_data_ref *)(iref + 1);
				6016	btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
				6017	btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
				6018	} else {
				6019	struct btrfs_extent_data_ref *ref;
				6020	ref = (struct btrfs_extent_data_ref *)(&iref->offset);
				6021	btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
				6022	btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
				6023	btrfs_set_extent_data_ref_offset(leaf, ref, offset);
				6024	btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
				6025	}
				6026
				6027	btrfs_mark_buffer_dirty(path->nodes[0]);
				6028	btrfs_free_path(path);
				6029
				6030	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
				6031	if (ret) { /* -ENOENT, logic error */
				6032	printk(KERN_ERR "btrfs update block group failed for %llu "
				6033	"%llu\n", (unsigned long long)ins->objectid,
				6034	(unsigned long long)ins->offset);
				6035	BUG();
				6036	}
				6037	return ret;
				6038	}
				6039
				6040	static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
				6041	struct btrfs_root *root,
				6042	u64 parent, u64 root_objectid,
				6043	u64 flags, struct btrfs_disk_key *key,
				6044	int level, struct btrfs_key *ins)
				6045	{
				6046	int ret;
				6047	struct btrfs_fs_info *fs_info = root->fs_info;
				6048	struct btrfs_extent_item *extent_item;
				6049	struct btrfs_tree_block_info *block_info;
				6050	struct btrfs_extent_inline_ref *iref;
				6051	struct btrfs_path *path;
				6052	struct extent_buffer *leaf;
				6053	u32 size = sizeof(extent_item) + sizeof(block_info) + sizeof(*iref);
				6054
				6055	path = btrfs_alloc_path();
				6056	if (!path)
				6057	return -ENOMEM;
				6058
				6059	path->leave_spinning = 1;
				6060	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
				6061	ins, size);
				6062	if (ret) {
				6063	btrfs_free_path(path);
				6064	return ret;
				6065	}
				6066
				6067	leaf = path->nodes[0];
				6068	extent_item = btrfs_item_ptr(leaf, path->slots[0],
				6069	struct btrfs_extent_item);
				6070	btrfs_set_extent_refs(leaf, extent_item, 1);
				6071	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
				6072	btrfs_set_extent_flags(leaf, extent_item,
				6073	flags \| BTRFS_EXTENT_FLAG_TREE_BLOCK);
				6074	block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
				6075
				6076	btrfs_set_tree_block_key(leaf, block_info, key);
				6077	btrfs_set_tree_block_level(leaf, block_info, level);
				6078
				6079	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
				6080	if (parent > 0) {
				6081	BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
				6082	btrfs_set_extent_inline_ref_type(leaf, iref,
				6083	BTRFS_SHARED_BLOCK_REF_KEY);
				6084	btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
				6085	} else {
				6086	btrfs_set_extent_inline_ref_type(leaf, iref,
				6087	BTRFS_TREE_BLOCK_REF_KEY);
				6088	btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
				6089	}
				6090
				6091	btrfs_mark_buffer_dirty(leaf);
				6092	btrfs_free_path(path);
				6093
				6094	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
				6095	if (ret) { /* -ENOENT, logic error */
				6096	printk(KERN_ERR "btrfs update block group failed for %llu "
				6097	"%llu\n", (unsigned long long)ins->objectid,
				6098	(unsigned long long)ins->offset);
				6099	BUG();
				6100	}
				6101	return ret;
				6102	}
				6103
				6104	int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
				6105	struct btrfs_root *root,
				6106	u64 root_objectid, u64 owner,
				6107	u64 offset, struct btrfs_key *ins)
				6108	{
				6109	int ret;
				6110
				6111	BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
				6112
				6113	ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
				6114	ins->offset, 0,
				6115	root_objectid, owner, offset,
				6116	BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
				6117	return ret;
				6118	}
				6119
				6120	/*
				6121	* this is used by the tree logging recovery code. It records that
				6122	* an extent has been allocated and makes sure to clear the free
				6123	* space cache bits as well
				6124	*/
				6125	int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
				6126	struct btrfs_root *root,
				6127	u64 root_objectid, u64 owner, u64 offset,
				6128	struct btrfs_key *ins)
				6129	{
				6130	int ret;
				6131	struct btrfs_block_group_cache *block_group;
				6132	struct btrfs_caching_control *caching_ctl;
				6133	u64 start = ins->objectid;
				6134	u64 num_bytes = ins->offset;
				6135
				6136	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
				6137	cache_block_group(block_group, trans, NULL, 0);
				6138	caching_ctl = get_caching_control(block_group);
				6139
				6140	if (!caching_ctl) {
				6141	BUG_ON(!block_group_cache_done(block_group));
				6142	ret = btrfs_remove_free_space(block_group, start, num_bytes);
				6143	BUG_ON(ret); /* -ENOMEM */
				6144	} else {
				6145	mutex_lock(&caching_ctl->mutex);
				6146
				6147	if (start >= caching_ctl->progress) {
				6148	ret = add_excluded_extent(root, start, num_bytes);
				6149	BUG_ON(ret); /* -ENOMEM */
				6150	} else if (start + num_bytes <= caching_ctl->progress) {
				6151	ret = btrfs_remove_free_space(block_group,
				6152	start, num_bytes);
				6153	BUG_ON(ret); /* -ENOMEM */
				6154	} else {
				6155	num_bytes = caching_ctl->progress - start;
				6156	ret = btrfs_remove_free_space(block_group,
				6157	start, num_bytes);
				6158	BUG_ON(ret); /* -ENOMEM */
				6159
				6160	start = caching_ctl->progress;
				6161	num_bytes = ins->objectid + ins->offset -
				6162	caching_ctl->progress;
				6163	ret = add_excluded_extent(root, start, num_bytes);
				6164	BUG_ON(ret); /* -ENOMEM */
				6165	}
				6166
				6167	mutex_unlock(&caching_ctl->mutex);
				6168	put_caching_control(caching_ctl);
				6169	}
				6170
				6171	ret = btrfs_update_reserved_bytes(block_group, ins->offset,
				6172	RESERVE_ALLOC_NO_ACCOUNT);
				6173	BUG_ON(ret); /* logic error */
				6174	btrfs_put_block_group(block_group);
				6175	ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
				6176	0, owner, offset, ins, 1);
				6177	return ret;
				6178	}
				6179
				6180	struct extent_buffer btrfs_init_new_buffer(struct btrfs_trans_handle trans,
				6181	struct btrfs_root *root,
				6182	u64 bytenr, u32 blocksize,
				6183	int level)
				6184	{
				6185	struct extent_buffer *buf;
				6186
				6187	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
				6188	if (!buf)
				6189	return ERR_PTR(-ENOMEM);
				6190	btrfs_set_header_generation(buf, trans->transid);
				6191	btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
				6192	btrfs_tree_lock(buf);
				6193	clean_tree_block(trans, root, buf);
				6194	clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
				6195
				6196	btrfs_set_lock_blocking(buf);
				6197	btrfs_set_buffer_uptodate(buf);
				6198
				6199	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
				6200	/*
				6201	* we allow two log transactions at a time, use different
				6202	* EXENT bit to differentiate dirty pages.
				6203	*/
				6204	if (root->log_transid % 2 == 0)
				6205	set_extent_dirty(&root->dirty_log_pages, buf->start,
				6206	buf->start + buf->len - 1, GFP_NOFS);
				6207	else
				6208	set_extent_new(&root->dirty_log_pages, buf->start,
				6209	buf->start + buf->len - 1, GFP_NOFS);
				6210	} else {
				6211	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
				6212	buf->start + buf->len - 1, GFP_NOFS);
				6213	}
				6214	trans->blocks_used++;
				6215	/* this returns a buffer locked for blocking */
				6216	return buf;
				6217	}
				6218
				6219	static struct btrfs_block_rsv *
				6220	use_block_rsv(struct btrfs_trans_handle *trans,
				6221	struct btrfs_root *root, u32 blocksize)
				6222	{
				6223	struct btrfs_block_rsv *block_rsv;
				6224	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
				6225	int ret;
				6226
				6227	block_rsv = get_block_rsv(trans, root);
				6228
				6229	if (block_rsv->size == 0) {
				6230	ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
				6231	/*
				6232	* If we couldn't reserve metadata bytes try and use some from
				6233	* the global reserve.
				6234	*/
				6235	if (ret && block_rsv != global_rsv) {
				6236	ret = block_rsv_use_bytes(global_rsv, blocksize);
				6237	if (!ret)
				6238	return global_rsv;
				6239	return ERR_PTR(ret);
				6240	} else if (ret) {
				6241	return ERR_PTR(ret);
				6242	}
				6243	return block_rsv;
				6244	}
				6245
				6246	ret = block_rsv_use_bytes(block_rsv, blocksize);
				6247	if (!ret)
				6248	return block_rsv;
				6249	if (ret) {
				6250	static DEFINE_RATELIMIT_STATE(_rs,
				6251	DEFAULT_RATELIMIT_INTERVAL,
				6252	/DEFAULT_RATELIMIT_BURST/ 2);
				6253	if (__ratelimit(&_rs)) {
				6254	printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret);
				6255	WARN_ON(1);
				6256	}
				6257	ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
				6258	if (!ret) {
				6259	return block_rsv;
				6260	} else if (ret && block_rsv != global_rsv) {
				6261	ret = block_rsv_use_bytes(global_rsv, blocksize);
				6262	if (!ret)
				6263	return global_rsv;
				6264	}
				6265	}
				6266
				6267	return ERR_PTR(-ENOSPC);
				6268	}
				6269
				6270	static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
				6271	struct btrfs_block_rsv *block_rsv, u32 blocksize)
				6272	{
				6273	block_rsv_add_bytes(block_rsv, blocksize, 0);
				6274	block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
				6275	}
				6276
				6277	/*
				6278	* finds a free extent and does all the dirty work required for allocation
				6279	* returns the key for the extent through ins, and a tree buffer for
				6280	* the first block of the extent through buf.
				6281	*
				6282	* returns the tree buffer or NULL.
				6283	*/
				6284	struct extent_buffer btrfs_alloc_free_block(struct btrfs_trans_handle trans,
				6285	struct btrfs_root *root, u32 blocksize,
				6286	u64 parent, u64 root_objectid,
				6287	struct btrfs_disk_key *key, int level,
				6288	u64 hint, u64 empty_size, int for_cow)
				6289	{
				6290	struct btrfs_key ins;
				6291	struct btrfs_block_rsv *block_rsv;
				6292	struct extent_buffer *buf;
				6293	u64 flags = 0;
				6294	int ret;
				6295
				6296
				6297	block_rsv = use_block_rsv(trans, root, blocksize);
				6298	if (IS_ERR(block_rsv))
				6299	return ERR_CAST(block_rsv);
				6300
				6301	ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
				6302	empty_size, hint, &ins, 0);
				6303	if (ret) {
				6304	unuse_block_rsv(root->fs_info, block_rsv, blocksize);
				6305	return ERR_PTR(ret);
				6306	}
				6307
				6308	buf = btrfs_init_new_buffer(trans, root, ins.objectid,
				6309	blocksize, level);
				6310	BUG_ON(IS_ERR(buf)); /* -ENOMEM */
				6311
				6312	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
				6313	if (parent == 0)
				6314	parent = ins.objectid;
				6315	flags \|= BTRFS_BLOCK_FLAG_FULL_BACKREF;
				6316	} else
				6317	BUG_ON(parent > 0);
				6318
				6319	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
				6320	struct btrfs_delayed_extent_op *extent_op;
				6321	extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
				6322	BUG_ON(!extent_op); /* -ENOMEM */
				6323	if (key)
				6324	memcpy(&extent_op->key, key, sizeof(extent_op->key));
				6325	else
				6326	memset(&extent_op->key, 0, sizeof(extent_op->key));
				6327	extent_op->flags_to_set = flags;
				6328	extent_op->update_key = 1;
				6329	extent_op->update_flags = 1;
				6330	extent_op->is_data = 0;
				6331
				6332	ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
				6333	ins.objectid,
				6334	ins.offset, parent, root_objectid,
				6335	level, BTRFS_ADD_DELAYED_EXTENT,
				6336	extent_op, for_cow);
				6337	BUG_ON(ret); /* -ENOMEM */
				6338	}
				6339	return buf;
				6340	}
				6341
				6342	struct walk_control {
				6343	u64 refs[BTRFS_MAX_LEVEL];
				6344	u64 flags[BTRFS_MAX_LEVEL];
				6345	struct btrfs_key update_progress;
				6346	int stage;
				6347	int level;
				6348	int shared_level;
				6349	int update_ref;
				6350	int keep_locks;
				6351	int reada_slot;
				6352	int reada_count;
				6353	int for_reloc;
				6354	};
				6355
				6356	#define DROP_REFERENCE 1
				6357	#define UPDATE_BACKREF 2
				6358
				6359	static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
				6360	struct btrfs_root *root,
				6361	struct walk_control *wc,
				6362	struct btrfs_path *path)
				6363	{
				6364	u64 bytenr;
				6365	u64 generation;
				6366	u64 refs;
				6367	u64 flags;
				6368	u32 nritems;
				6369	u32 blocksize;
				6370	struct btrfs_key key;
				6371	struct extent_buffer *eb;
				6372	int ret;
				6373	int slot;
				6374	int nread = 0;
				6375
				6376	if (path->slots[wc->level] < wc->reada_slot) {
				6377	wc->reada_count = wc->reada_count * 2 / 3;
				6378	wc->reada_count = max(wc->reada_count, 2);
				6379	} else {
				6380	wc->reada_count = wc->reada_count * 3 / 2;
				6381	wc->reada_count = min_t(int, wc->reada_count,
				6382	BTRFS_NODEPTRS_PER_BLOCK(root));
				6383	}
				6384
				6385	eb = path->nodes[wc->level];
				6386	nritems = btrfs_header_nritems(eb);
				6387	blocksize = btrfs_level_size(root, wc->level - 1);
				6388
				6389	for (slot = path->slots[wc->level]; slot < nritems; slot++) {
				6390	if (nread >= wc->reada_count)
				6391	break;
				6392
				6393	cond_resched();
				6394	bytenr = btrfs_node_blockptr(eb, slot);
				6395	generation = btrfs_node_ptr_generation(eb, slot);
				6396
				6397	if (slot == path->slots[wc->level])
				6398	goto reada;
				6399
				6400	if (wc->stage == UPDATE_BACKREF &&
				6401	generation <= root->root_key.offset)
				6402	continue;
				6403
				6404	/* We don't lock the tree block, it's OK to be racy here */
				6405	ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
				6406	&refs, &flags);
				6407	/* We don't care about errors in readahead. */
				6408	if (ret < 0)
				6409	continue;
				6410	BUG_ON(refs == 0);
				6411
				6412	if (wc->stage == DROP_REFERENCE) {
				6413	if (refs == 1)
				6414	goto reada;
				6415
				6416	if (wc->level == 1 &&
				6417	(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
				6418	continue;
				6419	if (!wc->update_ref \|\|
				6420	generation <= root->root_key.offset)
				6421	continue;
				6422	btrfs_node_key_to_cpu(eb, &key, slot);
				6423	ret = btrfs_comp_cpu_keys(&key,
				6424	&wc->update_progress);
				6425	if (ret < 0)
				6426	continue;
				6427	} else {
				6428	if (wc->level == 1 &&
				6429	(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
				6430	continue;
				6431	}
				6432	reada:
				6433	ret = readahead_tree_block(root, bytenr, blocksize,
				6434	generation);
				6435	if (ret)
				6436	break;
				6437	nread++;
				6438	}
				6439	wc->reada_slot = slot;
				6440	}
				6441
				6442	/*
				6443	* hepler to process tree block while walking down the tree.
				6444	*
				6445	* when wc->stage == UPDATE_BACKREF, this function updates
				6446	* back refs for pointers in the block.
				6447	*
				6448	* NOTE: return value 1 means we should stop walking down.
				6449	*/
				6450	static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
				6451	struct btrfs_root *root,
				6452	struct btrfs_path *path,
				6453	struct walk_control *wc, int lookup_info)
				6454	{
				6455	int level = wc->level;
				6456	struct extent_buffer *eb = path->nodes[level];
				6457	u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
				6458	int ret;
				6459
				6460	if (wc->stage == UPDATE_BACKREF &&
				6461	btrfs_header_owner(eb) != root->root_key.objectid)
				6462	return 1;
				6463
				6464	/*
				6465	* when reference count of tree block is 1, it won't increase
				6466	* again. once full backref flag is set, we never clear it.
				6467	*/
				6468	if (lookup_info &&
				6469	((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) \|\|
				6470	(wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
				6471	BUG_ON(!path->locks[level]);
				6472	ret = btrfs_lookup_extent_info(trans, root,
				6473	eb->start, eb->len,
				6474	&wc->refs[level],
				6475	&wc->flags[level]);
				6476	BUG_ON(ret == -ENOMEM);
				6477	if (ret)
				6478	return ret;
				6479	BUG_ON(wc->refs[level] == 0);
				6480	}
				6481
				6482	if (wc->stage == DROP_REFERENCE) {
				6483	if (wc->refs[level] > 1)
				6484	return 1;
				6485
				6486	if (path->locks[level] && !wc->keep_locks) {
				6487	btrfs_tree_unlock_rw(eb, path->locks[level]);
				6488	path->locks[level] = 0;
				6489	}
				6490	return 0;
				6491	}
				6492
				6493	/* wc->stage == UPDATE_BACKREF */
				6494	if (!(wc->flags[level] & flag)) {
				6495	BUG_ON(!path->locks[level]);
				6496	ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
				6497	BUG_ON(ret); /* -ENOMEM */
				6498	ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
				6499	BUG_ON(ret); /* -ENOMEM */
				6500	ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
				6501	eb->len, flag, 0);
				6502	BUG_ON(ret); /* -ENOMEM */
				6503	wc->flags[level] \|= flag;
				6504	}
				6505
				6506	/*
				6507	* the block is shared by multiple trees, so it's not good to
				6508	* keep the tree lock
				6509	*/
				6510	if (path->locks[level] && level > 0) {
				6511	btrfs_tree_unlock_rw(eb, path->locks[level]);
				6512	path->locks[level] = 0;
				6513	}
				6514	return 0;
				6515	}
				6516
				6517	/*
				6518	* hepler to process tree block pointer.
				6519	*
				6520	* when wc->stage == DROP_REFERENCE, this function checks
				6521	* reference count of the block pointed to. if the block
				6522	* is shared and we need update back refs for the subtree
				6523	* rooted at the block, this function changes wc->stage to
				6524	* UPDATE_BACKREF. if the block is shared and there is no
				6525	* need to update back, this function drops the reference
				6526	* to the block.
				6527	*
				6528	* NOTE: return value 1 means we should stop walking down.
				6529	*/
				6530	static noinline int do_walk_down(struct btrfs_trans_handle *trans,
				6531	struct btrfs_root *root,
				6532	struct btrfs_path *path,
				6533	struct walk_control wc, int lookup_info)
				6534	{
				6535	u64 bytenr;
				6536	u64 generation;
				6537	u64 parent;
				6538	u32 blocksize;
				6539	struct btrfs_key key;
				6540	struct extent_buffer *next;
				6541	int level = wc->level;
				6542	int reada = 0;
				6543	int ret = 0;
				6544
				6545	generation = btrfs_node_ptr_generation(path->nodes[level],
				6546	path->slots[level]);
				6547	/*
				6548	* if the lower level block was created before the snapshot
				6549	* was created, we know there is no need to update back refs
				6550	* for the subtree
				6551	*/
				6552	if (wc->stage == UPDATE_BACKREF &&
				6553	generation <= root->root_key.offset) {
				6554	*lookup_info = 1;
				6555	return 1;
				6556	}
				6557
				6558	bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
				6559	blocksize = btrfs_level_size(root, level - 1);
				6560
				6561	next = btrfs_find_tree_block(root, bytenr, blocksize);
				6562	if (!next) {
				6563	next = btrfs_find_create_tree_block(root, bytenr, blocksize);
				6564	if (!next)
				6565	return -ENOMEM;
				6566	reada = 1;
				6567	}
				6568	btrfs_tree_lock(next);
				6569	btrfs_set_lock_blocking(next);
				6570
				6571	ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
				6572	&wc->refs[level - 1],
				6573	&wc->flags[level - 1]);
				6574	if (ret < 0) {
				6575	btrfs_tree_unlock(next);
				6576	return ret;
				6577	}
				6578
				6579	BUG_ON(wc->refs[level - 1] == 0);
				6580	*lookup_info = 0;
				6581
				6582	if (wc->stage == DROP_REFERENCE) {
				6583	if (wc->refs[level - 1] > 1) {
				6584	if (level == 1 &&
				6585	(wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
				6586	goto skip;
				6587
				6588	if (!wc->update_ref \|\|
				6589	generation <= root->root_key.offset)
				6590	goto skip;
				6591
				6592	btrfs_node_key_to_cpu(path->nodes[level], &key,
				6593	path->slots[level]);
				6594	ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
				6595	if (ret < 0)
				6596	goto skip;
				6597
				6598	wc->stage = UPDATE_BACKREF;
				6599	wc->shared_level = level - 1;
				6600	}
				6601	} else {
				6602	if (level == 1 &&
				6603	(wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
				6604	goto skip;
				6605	}
				6606
				6607	if (!btrfs_buffer_uptodate(next, generation, 0)) {
				6608	btrfs_tree_unlock(next);
				6609	free_extent_buffer(next);
				6610	next = NULL;
				6611	*lookup_info = 1;
				6612	}
				6613
				6614	if (!next) {
				6615	if (reada && level == 1)
				6616	reada_walk_down(trans, root, wc, path);
				6617	next = read_tree_block(root, bytenr, blocksize, generation);
				6618	if (!next)
				6619	return -EIO;
				6620	btrfs_tree_lock(next);
				6621	btrfs_set_lock_blocking(next);
				6622	}
				6623
				6624	level--;
				6625	BUG_ON(level != btrfs_header_level(next));
				6626	path->nodes[level] = next;
				6627	path->slots[level] = 0;
				6628	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
				6629	wc->level = level;
				6630	if (wc->level == 1)
				6631	wc->reada_slot = 0;
				6632	return 0;
				6633	skip:
				6634	wc->refs[level - 1] = 0;
				6635	wc->flags[level - 1] = 0;
				6636	if (wc->stage == DROP_REFERENCE) {
				6637	if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
				6638	parent = path->nodes[level]->start;
				6639	} else {
				6640	BUG_ON(root->root_key.objectid !=
				6641	btrfs_header_owner(path->nodes[level]));
				6642	parent = 0;
				6643	}
				6644
				6645	ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
				6646	root->root_key.objectid, level - 1, 0, 0);
				6647	BUG_ON(ret); /* -ENOMEM */
				6648	}
				6649	btrfs_tree_unlock(next);
				6650	free_extent_buffer(next);
				6651	*lookup_info = 1;
				6652	return 1;
				6653	}
				6654
				6655	/*
				6656	* hepler to process tree block while walking up the tree.
				6657	*
				6658	* when wc->stage == DROP_REFERENCE, this function drops
				6659	* reference count on the block.
				6660	*
				6661	* when wc->stage == UPDATE_BACKREF, this function changes
				6662	* wc->stage back to DROP_REFERENCE if we changed wc->stage
				6663	* to UPDATE_BACKREF previously while processing the block.
				6664	*
				6665	* NOTE: return value 1 means we should stop walking up.
				6666	*/
				6667	static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
				6668	struct btrfs_root *root,
				6669	struct btrfs_path *path,
				6670	struct walk_control *wc)
				6671	{
				6672	int ret;
				6673	int level = wc->level;
				6674	struct extent_buffer *eb = path->nodes[level];
				6675	u64 parent = 0;
				6676
				6677	if (wc->stage == UPDATE_BACKREF) {
				6678	BUG_ON(wc->shared_level < level);
				6679	if (level < wc->shared_level)
				6680	goto out;
				6681
				6682	ret = find_next_key(path, level + 1, &wc->update_progress);
				6683	if (ret > 0)
				6684	wc->update_ref = 0;
				6685
				6686	wc->stage = DROP_REFERENCE;
				6687	wc->shared_level = -1;
				6688	path->slots[level] = 0;
				6689
				6690	/*
				6691	* check reference count again if the block isn't locked.
				6692	* we should start walking down the tree again if reference
				6693	* count is one.
				6694	*/
				6695	if (!path->locks[level]) {
				6696	BUG_ON(level == 0);
				6697	btrfs_tree_lock(eb);
				6698	btrfs_set_lock_blocking(eb);
				6699	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
				6700
				6701	ret = btrfs_lookup_extent_info(trans, root,
				6702	eb->start, eb->len,
				6703	&wc->refs[level],
				6704	&wc->flags[level]);
				6705	if (ret < 0) {
				6706	btrfs_tree_unlock_rw(eb, path->locks[level]);
				6707	return ret;
				6708	}
				6709	BUG_ON(wc->refs[level] == 0);
				6710	if (wc->refs[level] == 1) {
				6711	btrfs_tree_unlock_rw(eb, path->locks[level]);
				6712	return 1;
				6713	}
				6714	}
				6715	}
				6716
				6717	/* wc->stage == DROP_REFERENCE */
				6718	BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
				6719
				6720	if (wc->refs[level] == 1) {
				6721	if (level == 0) {
				6722	if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
				6723	ret = btrfs_dec_ref(trans, root, eb, 1,
				6724	wc->for_reloc);
				6725	else
				6726	ret = btrfs_dec_ref(trans, root, eb, 0,
				6727	wc->for_reloc);
				6728	BUG_ON(ret); /* -ENOMEM */
				6729	}
				6730	/* make block locked assertion in clean_tree_block happy */
				6731	if (!path->locks[level] &&
				6732	btrfs_header_generation(eb) == trans->transid) {
				6733	btrfs_tree_lock(eb);
				6734	btrfs_set_lock_blocking(eb);
				6735	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
				6736	}
				6737	clean_tree_block(trans, root, eb);
				6738	}
				6739
				6740	if (eb == root->node) {
				6741	if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
				6742	parent = eb->start;
				6743	else
				6744	BUG_ON(root->root_key.objectid !=
				6745	btrfs_header_owner(eb));
				6746	} else {
				6747	if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
				6748	parent = path->nodes[level + 1]->start;
				6749	else
				6750	BUG_ON(root->root_key.objectid !=
				6751	btrfs_header_owner(path->nodes[level + 1]));
				6752	}
				6753
				6754	btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
				6755	out:
				6756	wc->refs[level] = 0;
				6757	wc->flags[level] = 0;
				6758	return 0;
				6759	}
				6760
				6761	static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
				6762	struct btrfs_root *root,
				6763	struct btrfs_path *path,
				6764	struct walk_control *wc)
				6765	{
				6766	int level = wc->level;
				6767	int lookup_info = 1;
				6768	int ret;
				6769
				6770	while (level >= 0) {
				6771	ret = walk_down_proc(trans, root, path, wc, lookup_info);
				6772	if (ret > 0)
				6773	break;
				6774
				6775	if (level == 0)
				6776	break;
				6777
				6778	if (path->slots[level] >=
				6779	btrfs_header_nritems(path->nodes[level]))
				6780	break;
				6781
				6782	ret = do_walk_down(trans, root, path, wc, &lookup_info);
				6783	if (ret > 0) {
				6784	path->slots[level]++;
				6785	continue;
				6786	} else if (ret < 0)
				6787	return ret;
				6788	level = wc->level;
				6789	}
				6790	return 0;
				6791	}
				6792
				6793	static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
				6794	struct btrfs_root *root,
				6795	struct btrfs_path *path,
				6796	struct walk_control *wc, int max_level)
				6797	{
				6798	int level = wc->level;
				6799	int ret;
				6800
				6801	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
				6802	while (level < max_level && path->nodes[level]) {
				6803	wc->level = level;
				6804	if (path->slots[level] + 1 <
				6805	btrfs_header_nritems(path->nodes[level])) {
				6806	path->slots[level]++;
				6807	return 0;
				6808	} else {
				6809	ret = walk_up_proc(trans, root, path, wc);
				6810	if (ret > 0)
				6811	return 0;
				6812
				6813	if (path->locks[level]) {
				6814	btrfs_tree_unlock_rw(path->nodes[level],
				6815	path->locks[level]);
				6816	path->locks[level] = 0;
				6817	}
				6818	free_extent_buffer(path->nodes[level]);
				6819	path->nodes[level] = NULL;
				6820	level++;
				6821	}
				6822	}
				6823	return 1;
				6824	}
				6825
				6826	/*
				6827	* drop a subvolume tree.
				6828	*
				6829	* this function traverses the tree freeing any blocks that only
				6830	* referenced by the tree.
				6831	*
				6832	* when a shared tree block is found. this function decreases its
				6833	* reference count by one. if update_ref is true, this function
				6834	* also make sure backrefs for the shared block and all lower level
				6835	* blocks are properly updated.
				6836	*/
				6837	int btrfs_drop_snapshot(struct btrfs_root *root,
				6838	struct btrfs_block_rsv *block_rsv, int update_ref,
				6839	int for_reloc)
				6840	{
				6841	struct btrfs_path *path;
				6842	struct btrfs_trans_handle *trans;
				6843	struct btrfs_root *tree_root = root->fs_info->tree_root;
				6844	struct btrfs_root_item *root_item = &root->root_item;
				6845	struct walk_control *wc;
				6846	struct btrfs_key key;
				6847	int err = 0;
				6848	int ret;
				6849	int level;
				6850	bool root_dropped = false;
				6851
				6852	path = btrfs_alloc_path();
				6853	if (!path) {
				6854	err = -ENOMEM;
				6855	goto out;
				6856	}
				6857
				6858	wc = kzalloc(sizeof(*wc), GFP_NOFS);
				6859	if (!wc) {
				6860	btrfs_free_path(path);
				6861	err = -ENOMEM;
				6862	goto out;
				6863	}
				6864
				6865	trans = btrfs_start_transaction(tree_root, 0);
				6866	if (IS_ERR(trans)) {
				6867	err = PTR_ERR(trans);
				6868	goto out_free;
				6869	}
				6870
				6871	if (block_rsv)
				6872	trans->block_rsv = block_rsv;
				6873
				6874	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
				6875	level = btrfs_header_level(root->node);
				6876	path->nodes[level] = btrfs_lock_root_node(root);
				6877	btrfs_set_lock_blocking(path->nodes[level]);
				6878	path->slots[level] = 0;
				6879	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
				6880	memset(&wc->update_progress, 0,
				6881	sizeof(wc->update_progress));
				6882	} else {
				6883	btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
				6884	memcpy(&wc->update_progress, &key,
				6885	sizeof(wc->update_progress));
				6886
				6887	level = root_item->drop_level;
				6888	BUG_ON(level == 0);
				6889	path->lowest_level = level;
				6890	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				6891	path->lowest_level = 0;
				6892	if (ret < 0) {
				6893	err = ret;
				6894	goto out_end_trans;
				6895	}
				6896	WARN_ON(ret > 0);
				6897
				6898	/*
				6899	* unlock our path, this is safe because only this
				6900	* function is allowed to delete this snapshot
				6901	*/
				6902	btrfs_unlock_up_safe(path, 0);
				6903
				6904	level = btrfs_header_level(root->node);
				6905	while (1) {
				6906	btrfs_tree_lock(path->nodes[level]);
				6907	btrfs_set_lock_blocking(path->nodes[level]);
				6908	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
				6909
				6910	ret = btrfs_lookup_extent_info(trans, root,
				6911	path->nodes[level]->start,
				6912	path->nodes[level]->len,
				6913	&wc->refs[level],
				6914	&wc->flags[level]);
				6915	if (ret < 0) {
				6916	err = ret;
				6917	goto out_end_trans;
				6918	}
				6919	BUG_ON(wc->refs[level] == 0);
				6920
				6921	if (level == root_item->drop_level)
				6922	break;
				6923
				6924	btrfs_tree_unlock(path->nodes[level]);
				6925	path->locks[level] = 0;
				6926	WARN_ON(wc->refs[level] != 1);
				6927	level--;
				6928	}
				6929	}
				6930
				6931	wc->level = level;
				6932	wc->shared_level = -1;
				6933	wc->stage = DROP_REFERENCE;
				6934	wc->update_ref = update_ref;
				6935	wc->keep_locks = 0;
				6936	wc->for_reloc = for_reloc;
				6937	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
				6938
				6939	while (1) {
				6940	ret = walk_down_tree(trans, root, path, wc);
				6941	if (ret < 0) {
				6942	err = ret;
				6943	break;
				6944	}
				6945
				6946	ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
				6947	if (ret < 0) {
				6948	err = ret;
				6949	break;
				6950	}
				6951
				6952	if (ret > 0) {
				6953	BUG_ON(wc->stage != DROP_REFERENCE);
				6954	break;
				6955	}
				6956
				6957	if (wc->stage == DROP_REFERENCE) {
				6958	level = wc->level;
				6959	btrfs_node_key(path->nodes[level],
				6960	&root_item->drop_progress,
				6961	path->slots[level]);
				6962	root_item->drop_level = level;
				6963	}
				6964
				6965	BUG_ON(wc->level == 0);
				6966	if (btrfs_should_end_transaction(trans, tree_root)) {
				6967	ret = btrfs_update_root(trans, tree_root,
				6968	&root->root_key,
				6969	root_item);
				6970	if (ret) {
				6971	btrfs_abort_transaction(trans, tree_root, ret);
				6972	err = ret;
				6973	goto out_end_trans;
				6974	}
				6975
				6976	btrfs_end_transaction_throttle(trans, tree_root);
				6977	trans = btrfs_start_transaction(tree_root, 0);
				6978	if (IS_ERR(trans)) {
				6979	err = PTR_ERR(trans);
				6980	goto out_free;
				6981	}
				6982	if (block_rsv)
				6983	trans->block_rsv = block_rsv;
				6984	}
				6985	}
				6986	btrfs_release_path(path);
				6987	if (err)
				6988	goto out_end_trans;
				6989
				6990	ret = btrfs_del_root(trans, tree_root, &root->root_key);
				6991	if (ret) {
				6992	btrfs_abort_transaction(trans, tree_root, ret);
				6993	goto out_end_trans;
				6994	}
				6995
				6996	if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
				6997	ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
				6998	NULL, NULL);
				6999	if (ret < 0) {
				7000	btrfs_abort_transaction(trans, tree_root, ret);
				7001	err = ret;
				7002	goto out_end_trans;
				7003	} else if (ret > 0) {
				7004	/* if we fail to delete the orphan item this time
				7005	* around, it'll get picked up the next time.
				7006	*
				7007	* The most common failure here is just -ENOENT.
				7008	*/
				7009	btrfs_del_orphan_item(trans, tree_root,
				7010	root->root_key.objectid);
				7011	}
				7012	}
				7013
				7014	if (root->in_radix) {
				7015	btrfs_free_fs_root(tree_root->fs_info, root);
				7016	} else {
				7017	free_extent_buffer(root->node);
				7018	free_extent_buffer(root->commit_root);
				7019	kfree(root);
				7020	}
				7021	root_dropped = true;
				7022	out_end_trans:
				7023	btrfs_end_transaction_throttle(trans, tree_root);
				7024	out_free:
				7025	kfree(wc);
				7026	btrfs_free_path(path);
				7027	out:
				7028	/*
				7029	* So if we need to stop dropping the snapshot for whatever reason we
				7030	* need to make sure to add it back to the dead root list so that we
				7031	* keep trying to do the work later. This also cleans up roots if we
				7032	* don't have it in the radix (like when we recover after a power fail
				7033	* or unmount) so we don't leak memory.
				7034	*/
				7035	if (root_dropped == false)
				7036	btrfs_add_dead_root(root);
				7037	if (err && err != -EAGAIN)
				7038	btrfs_std_error(root->fs_info, err);
				7039	return err;
				7040	}
				7041
				7042	/*
				7043	* drop subtree rooted at tree block 'node'.
				7044	*
				7045	* NOTE: this function will unlock and release tree block 'node'
				7046	* only used by relocation code
				7047	*/
				7048	int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
				7049	struct btrfs_root *root,
				7050	struct extent_buffer *node,
				7051	struct extent_buffer *parent)
				7052	{
				7053	struct btrfs_path *path;
				7054	struct walk_control *wc;
				7055	int level;
				7056	int parent_level;
				7057	int ret = 0;
				7058	int wret;
				7059
				7060	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
				7061
				7062	path = btrfs_alloc_path();
				7063	if (!path)
				7064	return -ENOMEM;
				7065
				7066	wc = kzalloc(sizeof(*wc), GFP_NOFS);
				7067	if (!wc) {
				7068	btrfs_free_path(path);
				7069	return -ENOMEM;
				7070	}
				7071
				7072	btrfs_assert_tree_locked(parent);
				7073	parent_level = btrfs_header_level(parent);
				7074	extent_buffer_get(parent);
				7075	path->nodes[parent_level] = parent;
				7076	path->slots[parent_level] = btrfs_header_nritems(parent);
				7077
				7078	btrfs_assert_tree_locked(node);
				7079	level = btrfs_header_level(node);
				7080	path->nodes[level] = node;
				7081	path->slots[level] = 0;
				7082	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
				7083
				7084	wc->refs[parent_level] = 1;
				7085	wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
				7086	wc->level = level;
				7087	wc->shared_level = -1;
				7088	wc->stage = DROP_REFERENCE;
				7089	wc->update_ref = 0;
				7090	wc->keep_locks = 1;
				7091	wc->for_reloc = 1;
				7092	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
				7093
				7094	while (1) {
				7095	wret = walk_down_tree(trans, root, path, wc);
				7096	if (wret < 0) {
				7097	ret = wret;
				7098	break;
				7099	}
				7100
				7101	wret = walk_up_tree(trans, root, path, wc, parent_level);
				7102	if (wret < 0)
				7103	ret = wret;
				7104	if (wret != 0)
				7105	break;
				7106	}
				7107
				7108	kfree(wc);
				7109	btrfs_free_path(path);
				7110	return ret;
				7111	}
				7112
				7113	static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
				7114	{
				7115	u64 num_devices;
				7116	u64 stripped;
				7117
				7118	/*
				7119	* if restripe for this chunk_type is on pick target profile and
				7120	* return, otherwise do the usual balance
				7121	*/
				7122	stripped = get_restripe_target(root->fs_info, flags);
				7123	if (stripped)
				7124	return extended_to_chunk(stripped);
				7125
				7126	/*
				7127	* we add in the count of missing devices because we want
				7128	* to make sure that any RAID levels on a degraded FS
				7129	* continue to be honored.
				7130	*/
				7131	num_devices = root->fs_info->fs_devices->rw_devices +
				7132	root->fs_info->fs_devices->missing_devices;
				7133
				7134	stripped = BTRFS_BLOCK_GROUP_RAID0 \|
				7135	BTRFS_BLOCK_GROUP_RAID1 \| BTRFS_BLOCK_GROUP_RAID10;
				7136
				7137	if (num_devices == 1) {
				7138	stripped \|= BTRFS_BLOCK_GROUP_DUP;
				7139	stripped = flags & ~stripped;
				7140
				7141	/* turn raid0 into single device chunks */
				7142	if (flags & BTRFS_BLOCK_GROUP_RAID0)
				7143	return stripped;
				7144
				7145	/* turn mirroring into duplication */
				7146	if (flags & (BTRFS_BLOCK_GROUP_RAID1 \|
				7147	BTRFS_BLOCK_GROUP_RAID10))
				7148	return stripped \| BTRFS_BLOCK_GROUP_DUP;
				7149	} else {
				7150	/* they already had raid on here, just return */
				7151	if (flags & stripped)
				7152	return flags;
				7153
				7154	stripped \|= BTRFS_BLOCK_GROUP_DUP;
				7155	stripped = flags & ~stripped;
				7156
				7157	/* switch duplicated blocks with raid1 */
				7158	if (flags & BTRFS_BLOCK_GROUP_DUP)
				7159	return stripped \| BTRFS_BLOCK_GROUP_RAID1;
				7160
				7161	/* this is drive concat, leave it alone */
				7162	}
				7163
				7164	return flags;
				7165	}
				7166
				7167	static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
				7168	{
				7169	struct btrfs_space_info *sinfo = cache->space_info;
				7170	u64 num_bytes;
				7171	u64 min_allocable_bytes;
				7172	int ret = -ENOSPC;
				7173
				7174
				7175	/*
				7176	* We need some metadata space and system metadata space for
				7177	* allocating chunks in some corner cases until we force to set
				7178	* it to be readonly.
				7179	*/
				7180	if ((sinfo->flags &
				7181	(BTRFS_BLOCK_GROUP_SYSTEM \| BTRFS_BLOCK_GROUP_METADATA)) &&
				7182	!force)
				7183	min_allocable_bytes = 1 * 1024 * 1024;
				7184	else
				7185	min_allocable_bytes = 0;
				7186
				7187	spin_lock(&sinfo->lock);
				7188	spin_lock(&cache->lock);
				7189
				7190	if (cache->ro) {
				7191	ret = 0;
				7192	goto out;
				7193	}
				7194
				7195	num_bytes = cache->key.offset - cache->reserved - cache->pinned -
				7196	cache->bytes_super - btrfs_block_group_used(&cache->item);
				7197
				7198	if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
				7199	sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
				7200	min_allocable_bytes <= sinfo->total_bytes) {
				7201	sinfo->bytes_readonly += num_bytes;
				7202	cache->ro = 1;
				7203	ret = 0;
				7204	}
				7205	out:
				7206	spin_unlock(&cache->lock);
				7207	spin_unlock(&sinfo->lock);
				7208	return ret;
				7209	}
				7210
				7211	int btrfs_set_block_group_ro(struct btrfs_root *root,
				7212	struct btrfs_block_group_cache *cache)
				7213
				7214	{
				7215	struct btrfs_trans_handle *trans;
				7216	u64 alloc_flags;
				7217	int ret;
				7218
				7219	BUG_ON(cache->ro);
				7220
				7221	trans = btrfs_join_transaction(root);
				7222	if (IS_ERR(trans))
				7223	return PTR_ERR(trans);
				7224
				7225	alloc_flags = update_block_group_flags(root, cache->flags);
				7226	if (alloc_flags != cache->flags) {
				7227	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
				7228	CHUNK_ALLOC_FORCE);
				7229	if (ret < 0)
				7230	goto out;
				7231	}
				7232
				7233	ret = set_block_group_ro(cache, 0);
				7234	if (!ret)
				7235	goto out;
				7236	alloc_flags = get_alloc_profile(root, cache->space_info->flags);
				7237	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
				7238	CHUNK_ALLOC_FORCE);
				7239	if (ret < 0)
				7240	goto out;
				7241	ret = set_block_group_ro(cache, 0);
				7242	out:
				7243	btrfs_end_transaction(trans, root);
				7244	return ret;
				7245	}
				7246
				7247	int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
				7248	struct btrfs_root *root, u64 type)
				7249	{
				7250	u64 alloc_flags = get_alloc_profile(root, type);
				7251	return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
				7252	CHUNK_ALLOC_FORCE);
				7253	}
				7254
				7255	/*
				7256	* helper to account the unused space of all the readonly block group in the
				7257	* list. takes mirrors into account.
				7258	*/
				7259	static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
				7260	{
				7261	struct btrfs_block_group_cache *block_group;
				7262	u64 free_bytes = 0;
				7263	int factor;
				7264
				7265	list_for_each_entry(block_group, groups_list, list) {
				7266	spin_lock(&block_group->lock);
				7267
				7268	if (!block_group->ro) {
				7269	spin_unlock(&block_group->lock);
				7270	continue;
				7271	}
				7272
				7273	if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 \|
				7274	BTRFS_BLOCK_GROUP_RAID10 \|
				7275	BTRFS_BLOCK_GROUP_DUP))
				7276	factor = 2;
				7277	else
				7278	factor = 1;
				7279
				7280	free_bytes += (block_group->key.offset -
				7281	btrfs_block_group_used(&block_group->item)) *
				7282	factor;
				7283
				7284	spin_unlock(&block_group->lock);
				7285	}
				7286
				7287	return free_bytes;
				7288	}
				7289
				7290	/*
				7291	* helper to account the unused space of all the readonly block group in the
				7292	* space_info. takes mirrors into account.
				7293	*/
				7294	u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
				7295	{
				7296	int i;
				7297	u64 free_bytes = 0;
				7298
				7299	spin_lock(&sinfo->lock);
				7300
				7301	for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
				7302	if (!list_empty(&sinfo->block_groups[i]))
				7303	free_bytes += __btrfs_get_ro_block_group_free_space(
				7304	&sinfo->block_groups[i]);
				7305
				7306	spin_unlock(&sinfo->lock);
				7307
				7308	return free_bytes;
				7309	}
				7310
				7311	void btrfs_set_block_group_rw(struct btrfs_root *root,
				7312	struct btrfs_block_group_cache *cache)
				7313	{
				7314	struct btrfs_space_info *sinfo = cache->space_info;
				7315	u64 num_bytes;
				7316
				7317	BUG_ON(!cache->ro);
				7318
				7319	spin_lock(&sinfo->lock);
				7320	spin_lock(&cache->lock);
				7321	num_bytes = cache->key.offset - cache->reserved - cache->pinned -
				7322	cache->bytes_super - btrfs_block_group_used(&cache->item);
				7323	sinfo->bytes_readonly -= num_bytes;
				7324	cache->ro = 0;
				7325	spin_unlock(&cache->lock);
				7326	spin_unlock(&sinfo->lock);
				7327	}
				7328
				7329	/*
				7330	* checks to see if its even possible to relocate this block group.
				7331	*
				7332	* @return - -1 if it's not a good idea to relocate this block group, 0 if its
				7333	* ok to go ahead and try.
				7334	*/
				7335	int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
				7336	{
				7337	struct btrfs_block_group_cache *block_group;
				7338	struct btrfs_space_info *space_info;
				7339	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
				7340	struct btrfs_device *device;
				7341	u64 min_free;
				7342	u64 dev_min = 1;
				7343	u64 dev_nr = 0;
				7344	u64 target;
				7345	int index;
				7346	int full = 0;
				7347	int ret = 0;
				7348
				7349	block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
				7350
				7351	/* odd, couldn't find the block group, leave it alone */
				7352	if (!block_group)
				7353	return -1;
				7354
				7355	min_free = btrfs_block_group_used(&block_group->item);
				7356
				7357	/* no bytes used, we're good */
				7358	if (!min_free)
				7359	goto out;
				7360
				7361	space_info = block_group->space_info;
				7362	spin_lock(&space_info->lock);
				7363
				7364	full = space_info->full;
				7365
				7366	/*
				7367	* if this is the last block group we have in this space, we can't
				7368	* relocate it unless we're able to allocate a new chunk below.
				7369	*
				7370	* Otherwise, we need to make sure we have room in the space to handle
				7371	* all of the extents from this block group. If we can, we're good
				7372	*/
				7373	if ((space_info->total_bytes != block_group->key.offset) &&
				7374	(space_info->bytes_used + space_info->bytes_reserved +
				7375	space_info->bytes_pinned + space_info->bytes_readonly +
				7376	min_free < space_info->total_bytes)) {
				7377	spin_unlock(&space_info->lock);
				7378	goto out;
				7379	}
				7380	spin_unlock(&space_info->lock);
				7381
				7382	/*
				7383	* ok we don't have enough space, but maybe we have free space on our
				7384	* devices to allocate new chunks for relocation, so loop through our
				7385	* alloc devices and guess if we have enough space. if this block
				7386	* group is going to be restriped, run checks against the target
				7387	* profile instead of the current one.
				7388	*/
				7389	ret = -1;
				7390
				7391	/*
				7392	* index:
				7393	* 0: raid10
				7394	* 1: raid1
				7395	* 2: dup
				7396	* 3: raid0
				7397	* 4: single
				7398	*/
				7399	target = get_restripe_target(root->fs_info, block_group->flags);
				7400	if (target) {
				7401	index = __get_block_group_index(extended_to_chunk(target));
				7402	} else {
				7403	/*
				7404	* this is just a balance, so if we were marked as full
				7405	* we know there is no space for a new chunk
				7406	*/
				7407	if (full)
				7408	goto out;
				7409
				7410	index = get_block_group_index(block_group);
				7411	}
				7412
				7413	if (index == 0) {
				7414	dev_min = 4;
				7415	/* Divide by 2 */
				7416	min_free >>= 1;
				7417	} else if (index == 1) {
				7418	dev_min = 2;
				7419	} else if (index == 2) {
				7420	/* Multiply by 2 */
				7421	min_free <<= 1;
				7422	} else if (index == 3) {
				7423	dev_min = fs_devices->rw_devices;
				7424	do_div(min_free, dev_min);
				7425	}
				7426
				7427	mutex_lock(&root->fs_info->chunk_mutex);
				7428	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
				7429	u64 dev_offset;
				7430
				7431	/*
				7432	* check to make sure we can actually find a chunk with enough
				7433	* space to fit our block group in.
				7434	*/
				7435	if (device->total_bytes > device->bytes_used + min_free) {
				7436	ret = find_free_dev_extent(device, min_free,
				7437	&dev_offset, NULL);
				7438	if (!ret)
				7439	dev_nr++;
				7440
				7441	if (dev_nr >= dev_min)
				7442	break;
				7443
				7444	ret = -1;
				7445	}
				7446	}
				7447	mutex_unlock(&root->fs_info->chunk_mutex);
				7448	out:
				7449	btrfs_put_block_group(block_group);
				7450	return ret;
				7451	}
				7452
				7453	static int find_first_block_group(struct btrfs_root *root,
				7454	struct btrfs_path path, struct btrfs_key key)
				7455	{
				7456	int ret = 0;
				7457	struct btrfs_key found_key;
				7458	struct extent_buffer *leaf;
				7459	int slot;
				7460
				7461	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
				7462	if (ret < 0)
				7463	goto out;
				7464
				7465	while (1) {
				7466	slot = path->slots[0];
				7467	leaf = path->nodes[0];
				7468	if (slot >= btrfs_header_nritems(leaf)) {
				7469	ret = btrfs_next_leaf(root, path);
				7470	if (ret == 0)
				7471	continue;
				7472	if (ret < 0)
				7473	goto out;
				7474	break;
				7475	}
				7476	btrfs_item_key_to_cpu(leaf, &found_key, slot);
				7477
				7478	if (found_key.objectid >= key->objectid &&
				7479	found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
				7480	ret = 0;
				7481	goto out;
				7482	}
				7483	path->slots[0]++;
				7484	}
				7485	out:
				7486	return ret;
				7487	}
				7488
				7489	void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
				7490	{
				7491	struct btrfs_block_group_cache *block_group;
				7492	u64 last = 0;
				7493
				7494	while (1) {
				7495	struct inode *inode;
				7496
				7497	block_group = btrfs_lookup_first_block_group(info, last);
				7498	while (block_group) {
				7499	spin_lock(&block_group->lock);
				7500	if (block_group->iref)
				7501	break;
				7502	spin_unlock(&block_group->lock);
				7503	block_group = next_block_group(info->tree_root,
				7504	block_group);
				7505	}
				7506	if (!block_group) {
				7507	if (last == 0)
				7508	break;
				7509	last = 0;
				7510	continue;
				7511	}
				7512
				7513	inode = block_group->inode;
				7514	block_group->iref = 0;
				7515	block_group->inode = NULL;
				7516	spin_unlock(&block_group->lock);
				7517	iput(inode);
				7518	last = block_group->key.objectid + block_group->key.offset;
				7519	btrfs_put_block_group(block_group);
				7520	}
				7521	}
				7522
				7523	int btrfs_free_block_groups(struct btrfs_fs_info *info)
				7524	{
				7525	struct btrfs_block_group_cache *block_group;
				7526	struct btrfs_space_info *space_info;
				7527	struct btrfs_caching_control *caching_ctl;
				7528	struct rb_node *n;
				7529
				7530	down_write(&info->extent_commit_sem);
				7531	while (!list_empty(&info->caching_block_groups)) {
				7532	caching_ctl = list_entry(info->caching_block_groups.next,
				7533	struct btrfs_caching_control, list);
				7534	list_del(&caching_ctl->list);
				7535	put_caching_control(caching_ctl);
				7536	}
				7537	up_write(&info->extent_commit_sem);
				7538
				7539	spin_lock(&info->block_group_cache_lock);
				7540	while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
				7541	block_group = rb_entry(n, struct btrfs_block_group_cache,
				7542	cache_node);
				7543	rb_erase(&block_group->cache_node,
				7544	&info->block_group_cache_tree);
				7545	spin_unlock(&info->block_group_cache_lock);
				7546
				7547	down_write(&block_group->space_info->groups_sem);
				7548	list_del(&block_group->list);
				7549	up_write(&block_group->space_info->groups_sem);
				7550
				7551	if (block_group->cached == BTRFS_CACHE_STARTED)
				7552	wait_block_group_cache_done(block_group);
				7553
				7554	/*
				7555	* We haven't cached this block group, which means we could
				7556	* possibly have excluded extents on this block group.
				7557	*/
				7558	if (block_group->cached == BTRFS_CACHE_NO)
				7559	free_excluded_extents(info->extent_root, block_group);
				7560
				7561	btrfs_remove_free_space_cache(block_group);
				7562	btrfs_put_block_group(block_group);
				7563
				7564	spin_lock(&info->block_group_cache_lock);
				7565	}
				7566	spin_unlock(&info->block_group_cache_lock);
				7567
				7568	/* now that all the block groups are freed, go through and
				7569	* free all the space_info structs. This is only called during
				7570	* the final stages of unmount, and so we know nobody is
				7571	* using them. We call synchronize_rcu() once before we start,
				7572	* just to be on the safe side.
				7573	*/
				7574	synchronize_rcu();
				7575
				7576	release_global_block_rsv(info);
				7577
				7578	while(!list_empty(&info->space_info)) {
				7579	space_info = list_entry(info->space_info.next,
				7580	struct btrfs_space_info,
				7581	list);
				7582	if (space_info->bytes_pinned > 0 \|\|
				7583	space_info->bytes_reserved > 0 \|\|
				7584	space_info->bytes_may_use > 0) {
				7585	WARN_ON(1);
				7586	dump_space_info(space_info, 0, 0);
				7587	}
				7588	list_del(&space_info->list);
				7589	kfree(space_info);
				7590	}
				7591	return 0;
				7592	}
				7593
				7594	static void __link_block_group(struct btrfs_space_info *space_info,
				7595	struct btrfs_block_group_cache *cache)
				7596	{
				7597	int index = get_block_group_index(cache);
				7598
				7599	down_write(&space_info->groups_sem);
				7600	list_add_tail(&cache->list, &space_info->block_groups[index]);
				7601	up_write(&space_info->groups_sem);
				7602	}
				7603
				7604	int btrfs_read_block_groups(struct btrfs_root *root)
				7605	{
				7606	struct btrfs_path *path;
				7607	int ret;
				7608	struct btrfs_block_group_cache *cache;
				7609	struct btrfs_fs_info *info = root->fs_info;
				7610	struct btrfs_space_info *space_info;
				7611	struct btrfs_key key;
				7612	struct btrfs_key found_key;
				7613	struct extent_buffer *leaf;
				7614	int need_clear = 0;
				7615	u64 cache_gen;
				7616
				7617	root = info->extent_root;
				7618	key.objectid = 0;
				7619	key.offset = 0;
				7620	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
				7621	path = btrfs_alloc_path();
				7622	if (!path)
				7623	return -ENOMEM;
				7624	path->reada = 1;
				7625
				7626	cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
				7627	if (btrfs_test_opt(root, SPACE_CACHE) &&
				7628	btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
				7629	need_clear = 1;
				7630	if (btrfs_test_opt(root, CLEAR_CACHE))
				7631	need_clear = 1;
				7632
				7633	while (1) {
				7634	ret = find_first_block_group(root, path, &key);
				7635	if (ret > 0)
				7636	break;
				7637	if (ret != 0)
				7638	goto error;
				7639	leaf = path->nodes[0];
				7640	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
				7641	cache = kzalloc(sizeof(*cache), GFP_NOFS);
				7642	if (!cache) {
				7643	ret = -ENOMEM;
				7644	goto error;
				7645	}
				7646	cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
				7647	GFP_NOFS);
				7648	if (!cache->free_space_ctl) {
				7649	kfree(cache);
				7650	ret = -ENOMEM;
				7651	goto error;
				7652	}
				7653
				7654	atomic_set(&cache->count, 1);
				7655	spin_lock_init(&cache->lock);
				7656	cache->fs_info = info;
				7657	INIT_LIST_HEAD(&cache->list);
				7658	INIT_LIST_HEAD(&cache->cluster_list);
				7659
				7660	if (need_clear)
				7661	cache->disk_cache_state = BTRFS_DC_CLEAR;
				7662
				7663	read_extent_buffer(leaf, &cache->item,
				7664	btrfs_item_ptr_offset(leaf, path->slots[0]),
				7665	sizeof(cache->item));
				7666	memcpy(&cache->key, &found_key, sizeof(found_key));
				7667
				7668	key.objectid = found_key.objectid + found_key.offset;
				7669	btrfs_release_path(path);
				7670	cache->flags = btrfs_block_group_flags(&cache->item);
				7671	cache->sectorsize = root->sectorsize;
				7672
				7673	btrfs_init_free_space_ctl(cache);
				7674
				7675	/*
				7676	* We need to exclude the super stripes now so that the space
				7677	* info has super bytes accounted for, otherwise we'll think
				7678	* we have more space than we actually do.
				7679	*/
				7680	exclude_super_stripes(root, cache);
				7681
				7682	/*
				7683	* check for two cases, either we are full, and therefore
				7684	* don't need to bother with the caching work since we won't
				7685	* find any space, or we are empty, and we can just add all
				7686	* the space in and be done with it. This saves us _alot_ of
				7687	* time, particularly in the full case.
				7688	*/
				7689	if (found_key.offset == btrfs_block_group_used(&cache->item)) {
				7690	cache->last_byte_to_unpin = (u64)-1;
				7691	cache->cached = BTRFS_CACHE_FINISHED;
				7692	free_excluded_extents(root, cache);
				7693	} else if (btrfs_block_group_used(&cache->item) == 0) {
				7694	cache->last_byte_to_unpin = (u64)-1;
				7695	cache->cached = BTRFS_CACHE_FINISHED;
				7696	add_new_free_space(cache, root->fs_info,
				7697	found_key.objectid,
				7698	found_key.objectid +
				7699	found_key.offset);
				7700	free_excluded_extents(root, cache);
				7701	}
				7702
				7703	ret = update_space_info(info, cache->flags, found_key.offset,
				7704	btrfs_block_group_used(&cache->item),
				7705	&space_info);
				7706	BUG_ON(ret); /* -ENOMEM */
				7707	cache->space_info = space_info;
				7708	spin_lock(&cache->space_info->lock);
				7709	cache->space_info->bytes_readonly += cache->bytes_super;
				7710	spin_unlock(&cache->space_info->lock);
				7711
				7712	__link_block_group(space_info, cache);
				7713
				7714	ret = btrfs_add_block_group_cache(root->fs_info, cache);
				7715	BUG_ON(ret); /* Logic error */
				7716
				7717	set_avail_alloc_bits(root->fs_info, cache->flags);
				7718	if (btrfs_chunk_readonly(root, cache->key.objectid))
				7719	set_block_group_ro(cache, 1);
				7720	}
				7721
				7722	list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
				7723	if (!(get_alloc_profile(root, space_info->flags) &
				7724	(BTRFS_BLOCK_GROUP_RAID10 \|
				7725	BTRFS_BLOCK_GROUP_RAID1 \|
				7726	BTRFS_BLOCK_GROUP_DUP)))
				7727	continue;
				7728	/*
				7729	* avoid allocating from un-mirrored block group if there are
				7730	* mirrored block groups.
				7731	*/
				7732	list_for_each_entry(cache, &space_info->block_groups[3], list)
				7733	set_block_group_ro(cache, 1);
				7734	list_for_each_entry(cache, &space_info->block_groups[4], list)
				7735	set_block_group_ro(cache, 1);
				7736	}
				7737
				7738	init_global_block_rsv(info);
				7739	ret = 0;
				7740	error:
				7741	btrfs_free_path(path);
				7742	return ret;
				7743	}
				7744
				7745	int btrfs_make_block_group(struct btrfs_trans_handle *trans,
				7746	struct btrfs_root *root, u64 bytes_used,
				7747	u64 type, u64 chunk_objectid, u64 chunk_offset,
				7748	u64 size)
				7749	{
				7750	int ret;
				7751	struct btrfs_root *extent_root;
				7752	struct btrfs_block_group_cache *cache;
				7753
				7754	extent_root = root->fs_info->extent_root;
				7755
				7756	root->fs_info->last_trans_log_full_commit = trans->transid;
				7757
				7758	cache = kzalloc(sizeof(*cache), GFP_NOFS);
				7759	if (!cache)
				7760	return -ENOMEM;
				7761	cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
				7762	GFP_NOFS);
				7763	if (!cache->free_space_ctl) {
				7764	kfree(cache);
				7765	return -ENOMEM;
				7766	}
				7767
				7768	cache->key.objectid = chunk_offset;
				7769	cache->key.offset = size;
				7770	cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
				7771	cache->sectorsize = root->sectorsize;
				7772	cache->fs_info = root->fs_info;
				7773
				7774	atomic_set(&cache->count, 1);
				7775	spin_lock_init(&cache->lock);
				7776	INIT_LIST_HEAD(&cache->list);
				7777	INIT_LIST_HEAD(&cache->cluster_list);
				7778
				7779	btrfs_init_free_space_ctl(cache);
				7780
				7781	btrfs_set_block_group_used(&cache->item, bytes_used);
				7782	btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
				7783	cache->flags = type;
				7784	btrfs_set_block_group_flags(&cache->item, type);
				7785
				7786	cache->last_byte_to_unpin = (u64)-1;
				7787	cache->cached = BTRFS_CACHE_FINISHED;
				7788	exclude_super_stripes(root, cache);
				7789
				7790	add_new_free_space(cache, root->fs_info, chunk_offset,
				7791	chunk_offset + size);
				7792
				7793	free_excluded_extents(root, cache);
				7794
				7795	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
				7796	&cache->space_info);
				7797	BUG_ON(ret); /* -ENOMEM */
				7798	update_global_block_rsv(root->fs_info);
				7799
				7800	spin_lock(&cache->space_info->lock);
				7801	cache->space_info->bytes_readonly += cache->bytes_super;
				7802	spin_unlock(&cache->space_info->lock);
				7803
				7804	__link_block_group(cache->space_info, cache);
				7805
				7806	ret = btrfs_add_block_group_cache(root->fs_info, cache);
				7807	BUG_ON(ret); /* Logic error */
				7808
				7809	ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
				7810	sizeof(cache->item));
				7811	if (ret) {
				7812	btrfs_abort_transaction(trans, extent_root, ret);
				7813	return ret;
				7814	}
				7815
				7816	set_avail_alloc_bits(extent_root->fs_info, type);
				7817
				7818	return 0;
				7819	}
				7820
				7821	static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
				7822	{
				7823	u64 extra_flags = chunk_to_extended(flags) &
				7824	BTRFS_EXTENDED_PROFILE_MASK;
				7825
				7826	if (flags & BTRFS_BLOCK_GROUP_DATA)
				7827	fs_info->avail_data_alloc_bits &= ~extra_flags;
				7828	if (flags & BTRFS_BLOCK_GROUP_METADATA)
				7829	fs_info->avail_metadata_alloc_bits &= ~extra_flags;
				7830	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
				7831	fs_info->avail_system_alloc_bits &= ~extra_flags;
				7832	}
				7833
				7834	int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
				7835	struct btrfs_root *root, u64 group_start)
				7836	{
				7837	struct btrfs_path *path;
				7838	struct btrfs_block_group_cache *block_group;
				7839	struct btrfs_free_cluster *cluster;
				7840	struct btrfs_root *tree_root = root->fs_info->tree_root;
				7841	struct btrfs_key key;
				7842	struct inode *inode;
				7843	int ret;
				7844	int index;
				7845	int factor;
				7846
				7847	root = root->fs_info->extent_root;
				7848
				7849	block_group = btrfs_lookup_block_group(root->fs_info, group_start);
				7850	BUG_ON(!block_group);
				7851	BUG_ON(!block_group->ro);
				7852
				7853	/*
				7854	* Free the reserved super bytes from this block group before
				7855	* remove it.
				7856	*/
				7857	free_excluded_extents(root, block_group);
				7858
				7859	memcpy(&key, &block_group->key, sizeof(key));
				7860	index = get_block_group_index(block_group);
				7861	if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP \|
				7862	BTRFS_BLOCK_GROUP_RAID1 \|
				7863	BTRFS_BLOCK_GROUP_RAID10))
				7864	factor = 2;
				7865	else
				7866	factor = 1;
				7867
				7868	/* make sure this block group isn't part of an allocation cluster */
				7869	cluster = &root->fs_info->data_alloc_cluster;
				7870	spin_lock(&cluster->refill_lock);
				7871	btrfs_return_cluster_to_free_space(block_group, cluster);
				7872	spin_unlock(&cluster->refill_lock);
				7873
				7874	/*
				7875	* make sure this block group isn't part of a metadata
				7876	* allocation cluster
				7877	*/
				7878	cluster = &root->fs_info->meta_alloc_cluster;
				7879	spin_lock(&cluster->refill_lock);
				7880	btrfs_return_cluster_to_free_space(block_group, cluster);
				7881	spin_unlock(&cluster->refill_lock);
				7882
				7883	path = btrfs_alloc_path();
				7884	if (!path) {
				7885	ret = -ENOMEM;
				7886	goto out;
				7887	}
				7888
				7889	inode = lookup_free_space_inode(tree_root, block_group, path);
				7890	if (!IS_ERR(inode)) {
				7891	ret = btrfs_orphan_add(trans, inode);
				7892	if (ret) {
				7893	btrfs_add_delayed_iput(inode);
				7894	goto out;
				7895	}
				7896	clear_nlink(inode);
				7897	/* One for the block groups ref */
				7898	spin_lock(&block_group->lock);
				7899	if (block_group->iref) {
				7900	block_group->iref = 0;
				7901	block_group->inode = NULL;
				7902	spin_unlock(&block_group->lock);
				7903	iput(inode);
				7904	} else {
				7905	spin_unlock(&block_group->lock);
				7906	}
				7907	/* One for our lookup ref */
				7908	btrfs_add_delayed_iput(inode);
				7909	}
				7910
				7911	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
				7912	key.offset = block_group->key.objectid;
				7913	key.type = 0;
				7914
				7915	ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
				7916	if (ret < 0)
				7917	goto out;
				7918	if (ret > 0)
				7919	btrfs_release_path(path);
				7920	if (ret == 0) {
				7921	ret = btrfs_del_item(trans, tree_root, path);
				7922	if (ret)
				7923	goto out;
				7924	btrfs_release_path(path);
				7925	}
				7926
				7927	spin_lock(&root->fs_info->block_group_cache_lock);
				7928	rb_erase(&block_group->cache_node,
				7929	&root->fs_info->block_group_cache_tree);
				7930	spin_unlock(&root->fs_info->block_group_cache_lock);
				7931
				7932	down_write(&block_group->space_info->groups_sem);
				7933	/*
				7934	* we must use list_del_init so people can check to see if they
				7935	* are still on the list after taking the semaphore
				7936	*/
				7937	list_del_init(&block_group->list);
				7938	if (list_empty(&block_group->space_info->block_groups[index]))
				7939	clear_avail_alloc_bits(root->fs_info, block_group->flags);
				7940	up_write(&block_group->space_info->groups_sem);
				7941
				7942	if (block_group->cached == BTRFS_CACHE_STARTED)
				7943	wait_block_group_cache_done(block_group);
				7944
				7945	btrfs_remove_free_space_cache(block_group);
				7946
				7947	spin_lock(&block_group->space_info->lock);
				7948	block_group->space_info->total_bytes -= block_group->key.offset;
				7949	block_group->space_info->bytes_readonly -= block_group->key.offset;
				7950	block_group->space_info->disk_total -= block_group->key.offset * factor;
				7951	spin_unlock(&block_group->space_info->lock);
				7952
				7953	memcpy(&key, &block_group->key, sizeof(key));
				7954
				7955	btrfs_clear_space_info_full(root->fs_info);
				7956
				7957	btrfs_put_block_group(block_group);
				7958	btrfs_put_block_group(block_group);
				7959
				7960	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				7961	if (ret > 0)
				7962	ret = -EIO;
				7963	if (ret < 0)
				7964	goto out;
				7965
				7966	ret = btrfs_del_item(trans, root, path);
				7967	out:
				7968	btrfs_free_path(path);
				7969	return ret;
				7970	}
				7971
				7972	int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
				7973	{
				7974	struct btrfs_space_info *space_info;
				7975	struct btrfs_super_block *disk_super;
				7976	u64 features;
				7977	u64 flags;
				7978	int mixed = 0;
				7979	int ret;
				7980
				7981	disk_super = fs_info->super_copy;
				7982	if (!btrfs_super_root(disk_super))
				7983	return 1;
				7984
				7985	features = btrfs_super_incompat_flags(disk_super);
				7986	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
				7987	mixed = 1;
				7988
				7989	flags = BTRFS_BLOCK_GROUP_SYSTEM;
				7990	ret = update_space_info(fs_info, flags, 0, 0, &space_info);
				7991	if (ret)
				7992	goto out;
				7993
				7994	if (mixed) {
				7995	flags = BTRFS_BLOCK_GROUP_METADATA \| BTRFS_BLOCK_GROUP_DATA;
				7996	ret = update_space_info(fs_info, flags, 0, 0, &space_info);
				7997	} else {
				7998	flags = BTRFS_BLOCK_GROUP_METADATA;
				7999	ret = update_space_info(fs_info, flags, 0, 0, &space_info);
				8000	if (ret)
				8001	goto out;
				8002
				8003	flags = BTRFS_BLOCK_GROUP_DATA;
				8004	ret = update_space_info(fs_info, flags, 0, 0, &space_info);
				8005	}
				8006	out:
				8007	return ret;
				8008	}
				8009
				8010	int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
				8011	{
				8012	return unpin_extent_range(root, start, end, false);
				8013	}
				8014
				8015	int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
				8016	u64 num_bytes, u64 *actual_bytes)
				8017	{
				8018	return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
				8019	}
				8020
				8021	int btrfs_trim_fs(struct btrfs_root root, struct fstrim_range range)
				8022	{
				8023	struct btrfs_fs_info *fs_info = root->fs_info;
				8024	struct btrfs_block_group_cache *cache = NULL;
				8025	u64 group_trimmed;
				8026	u64 start;
				8027	u64 end;
				8028	u64 trimmed = 0;
				8029	u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
				8030	int ret = 0;
				8031
				8032	/*
				8033	* try to trim all FS space, our block group may start from non-zero.
				8034	*/
				8035	if (range->len == total_bytes)
				8036	cache = btrfs_lookup_first_block_group(fs_info, range->start);
				8037	else
				8038	cache = btrfs_lookup_block_group(fs_info, range->start);
				8039
				8040	while (cache) {
				8041	if (cache->key.objectid >= (range->start + range->len)) {
				8042	btrfs_put_block_group(cache);
				8043	break;
				8044	}
				8045
				8046	start = max(range->start, cache->key.objectid);
				8047	end = min(range->start + range->len,
				8048	cache->key.objectid + cache->key.offset);
				8049
				8050	if (end - start >= range->minlen) {
				8051	if (!block_group_cache_done(cache)) {
				8052	ret = cache_block_group(cache, NULL, root, 0);
				8053	if (!ret)
				8054	wait_block_group_cache_done(cache);
				8055	}
				8056	ret = btrfs_trim_block_group(cache,
				8057	&group_trimmed,
				8058	start,
				8059	end,
				8060	range->minlen);
				8061
				8062	trimmed += group_trimmed;
				8063	if (ret) {
				8064	btrfs_put_block_group(cache);
				8065	break;
				8066	}
				8067	}
				8068
				8069	cache = next_block_group(fs_info->tree_root, cache);
				8070	}
				8071
				8072	range->len = trimmed;
				8073	return ret;
				8074	}