Blame - ap/os/linux/linux-3.4.x/fs/btrfs/transaction.c - T106_DC

blob: 36422254ef6765c14290a2373fa6d83cf2d364d5 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/*
				2	* Copyright (C) 2007 Oracle. All rights reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public
				6	* License v2 as published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				11	* General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public
				14	* License along with this program; if not, write to the
				15	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				16	* Boston, MA 021110-1307, USA.
				17	*/
				18
				19	#include <linux/fs.h>
				20	#include <linux/slab.h>
				21	#include <linux/sched.h>
				22	#include <linux/writeback.h>
				23	#include <linux/pagemap.h>
				24	#include <linux/blkdev.h>
				25	#include "ctree.h"
				26	#include "disk-io.h"
				27	#include "transaction.h"
				28	#include "locking.h"
				29	#include "tree-log.h"
				30	#include "inode-map.h"
				31
				32	#define BTRFS_ROOT_TRANS_TAG 0
				33
				34	void put_transaction(struct btrfs_transaction *transaction)
				35	{
				36	WARN_ON(atomic_read(&transaction->use_count) == 0);
				37	if (atomic_dec_and_test(&transaction->use_count)) {
				38	BUG_ON(!list_empty(&transaction->list));
				39	WARN_ON(transaction->delayed_refs.root.rb_node);
				40	WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
				41	memset(transaction, 0, sizeof(*transaction));
				42	kmem_cache_free(btrfs_transaction_cachep, transaction);
				43	}
				44	}
				45
				46	static noinline void switch_commit_root(struct btrfs_root *root)
				47	{
				48	free_extent_buffer(root->commit_root);
				49	root->commit_root = btrfs_root_node(root);
				50	}
				51
				52	/*
				53	* either allocate a new transaction or hop into the existing one
				54	*/
				55	static noinline int join_transaction(struct btrfs_root *root, int nofail)
				56	{
				57	struct btrfs_transaction *cur_trans;
				58
				59	spin_lock(&root->fs_info->trans_lock);
				60	loop:
				61	/* The file system has been taken offline. No new transactions. */
				62	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
				63	spin_unlock(&root->fs_info->trans_lock);
				64	return -EROFS;
				65	}
				66
				67	if (root->fs_info->trans_no_join) {
				68	if (!nofail) {
				69	spin_unlock(&root->fs_info->trans_lock);
				70	return -EBUSY;
				71	}
				72	}
				73
				74	cur_trans = root->fs_info->running_transaction;
				75	if (cur_trans) {
				76	if (cur_trans->aborted) {
				77	spin_unlock(&root->fs_info->trans_lock);
				78	return cur_trans->aborted;
				79	}
				80	atomic_inc(&cur_trans->use_count);
				81	atomic_inc(&cur_trans->num_writers);
				82	cur_trans->num_joined++;
				83	spin_unlock(&root->fs_info->trans_lock);
				84	return 0;
				85	}
				86	spin_unlock(&root->fs_info->trans_lock);
				87
				88	cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
				89	if (!cur_trans)
				90	return -ENOMEM;
				91
				92	spin_lock(&root->fs_info->trans_lock);
				93	if (root->fs_info->running_transaction) {
				94	/*
				95	* someone started a transaction after we unlocked. Make sure
				96	* to redo the trans_no_join checks above
				97	*/
				98	kmem_cache_free(btrfs_transaction_cachep, cur_trans);
				99	cur_trans = root->fs_info->running_transaction;
				100	goto loop;
				101	}
				102
				103	atomic_set(&cur_trans->num_writers, 1);
				104	cur_trans->num_joined = 0;
				105	init_waitqueue_head(&cur_trans->writer_wait);
				106	init_waitqueue_head(&cur_trans->commit_wait);
				107	cur_trans->in_commit = 0;
				108	cur_trans->blocked = 0;
				109	/*
				110	* One for this trans handle, one so it will live on until we
				111	* commit the transaction.
				112	*/
				113	atomic_set(&cur_trans->use_count, 2);
				114	cur_trans->commit_done = 0;
				115	cur_trans->start_time = get_seconds();
				116
				117	cur_trans->delayed_refs.root = RB_ROOT;
				118	cur_trans->delayed_refs.num_entries = 0;
				119	cur_trans->delayed_refs.num_heads_ready = 0;
				120	cur_trans->delayed_refs.num_heads = 0;
				121	cur_trans->delayed_refs.flushing = 0;
				122	cur_trans->delayed_refs.run_delayed_start = 0;
				123	cur_trans->delayed_refs.seq = 1;
				124	init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
				125	spin_lock_init(&cur_trans->commit_lock);
				126	spin_lock_init(&cur_trans->delayed_refs.lock);
				127	INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
				128
				129	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
				130	list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
				131	extent_io_tree_init(&cur_trans->dirty_pages,
				132	root->fs_info->btree_inode->i_mapping);
				133	root->fs_info->generation++;
				134	cur_trans->transid = root->fs_info->generation;
				135	root->fs_info->running_transaction = cur_trans;
				136	cur_trans->aborted = 0;
				137	spin_unlock(&root->fs_info->trans_lock);
				138
				139	return 0;
				140	}
				141
				142	/*
				143	* this does all the record keeping required to make sure that a reference
				144	* counted root is properly recorded in a given transaction. This is required
				145	* to make sure the old root from before we joined the transaction is deleted
				146	* when the transaction commits
				147	*/
				148	static int record_root_in_trans(struct btrfs_trans_handle *trans,
				149	struct btrfs_root *root)
				150	{
				151	if (root->ref_cows && root->last_trans < trans->transid) {
				152	WARN_ON(root == root->fs_info->extent_root);
				153	WARN_ON(root->commit_root != root->node);
				154
				155	/*
				156	* see below for in_trans_setup usage rules
				157	* we have the reloc mutex held now, so there
				158	* is only one writer in this function
				159	*/
				160	root->in_trans_setup = 1;
				161
				162	/* make sure readers find in_trans_setup before
				163	* they find our root->last_trans update
				164	*/
				165	smp_wmb();
				166
				167	spin_lock(&root->fs_info->fs_roots_radix_lock);
				168	if (root->last_trans == trans->transid) {
				169	spin_unlock(&root->fs_info->fs_roots_radix_lock);
				170	return 0;
				171	}
				172	radix_tree_tag_set(&root->fs_info->fs_roots_radix,
				173	(unsigned long)root->root_key.objectid,
				174	BTRFS_ROOT_TRANS_TAG);
				175	spin_unlock(&root->fs_info->fs_roots_radix_lock);
				176	root->last_trans = trans->transid;
				177
				178	/* this is pretty tricky. We don't want to
				179	* take the relocation lock in btrfs_record_root_in_trans
				180	* unless we're really doing the first setup for this root in
				181	* this transaction.
				182	*
				183	* Normally we'd use root->last_trans as a flag to decide
				184	* if we want to take the expensive mutex.
				185	*
				186	* But, we have to set root->last_trans before we
				187	* init the relocation root, otherwise, we trip over warnings
				188	* in ctree.c. The solution used here is to flag ourselves
				189	* with root->in_trans_setup. When this is 1, we're still
				190	* fixing up the reloc trees and everyone must wait.
				191	*
				192	* When this is zero, they can trust root->last_trans and fly
				193	* through btrfs_record_root_in_trans without having to take the
				194	* lock. smp_wmb() makes sure that all the writes above are
				195	* done before we pop in the zero below
				196	*/
				197	btrfs_init_reloc_root(trans, root);
				198	smp_wmb();
				199	root->in_trans_setup = 0;
				200	}
				201	return 0;
				202	}
				203
				204
				205	int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
				206	struct btrfs_root *root)
				207	{
				208	if (!root->ref_cows)
				209	return 0;
				210
				211	/*
				212	* see record_root_in_trans for comments about in_trans_setup usage
				213	* and barriers
				214	*/
				215	smp_rmb();
				216	if (root->last_trans == trans->transid &&
				217	!root->in_trans_setup)
				218	return 0;
				219
				220	mutex_lock(&root->fs_info->reloc_mutex);
				221	record_root_in_trans(trans, root);
				222	mutex_unlock(&root->fs_info->reloc_mutex);
				223
				224	return 0;
				225	}
				226
				227	/* wait for commit against the current transaction to become unblocked
				228	* when this is done, it is safe to start a new transaction, but the current
				229	* transaction might not be fully on disk.
				230	*/
				231	static void wait_current_trans(struct btrfs_root *root)
				232	{
				233	struct btrfs_transaction *cur_trans;
				234
				235	spin_lock(&root->fs_info->trans_lock);
				236	cur_trans = root->fs_info->running_transaction;
				237	if (cur_trans && cur_trans->blocked) {
				238	atomic_inc(&cur_trans->use_count);
				239	spin_unlock(&root->fs_info->trans_lock);
				240
				241	wait_event(root->fs_info->transaction_wait,
				242	!cur_trans->blocked);
				243	put_transaction(cur_trans);
				244	} else {
				245	spin_unlock(&root->fs_info->trans_lock);
				246	}
				247	}
				248
				249	enum btrfs_trans_type {
				250	TRANS_START,
				251	TRANS_JOIN,
				252	TRANS_USERSPACE,
				253	TRANS_JOIN_NOLOCK,
				254	};
				255
				256	static int may_wait_transaction(struct btrfs_root *root, int type)
				257	{
				258	if (root->fs_info->log_root_recovering)
				259	return 0;
				260
				261	if (type == TRANS_USERSPACE)
				262	return 1;
				263
				264	if (type == TRANS_START &&
				265	!atomic_read(&root->fs_info->open_ioctl_trans))
				266	return 1;
				267
				268	return 0;
				269	}
				270
				271	static struct btrfs_trans_handle start_transaction(struct btrfs_root root,
				272	u64 num_items, int type)
				273	{
				274	struct btrfs_trans_handle *h;
				275	struct btrfs_transaction *cur_trans;
				276	u64 num_bytes = 0;
				277	int ret;
				278
				279	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
				280	return ERR_PTR(-EROFS);
				281
				282	if (current->journal_info) {
				283	WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
				284	h = current->journal_info;
				285	h->use_count++;
				286	h->orig_rsv = h->block_rsv;
				287	h->block_rsv = NULL;
				288	goto got_it;
				289	}
				290
				291	/*
				292	* Do the reservation before we join the transaction so we can do all
				293	* the appropriate flushing if need be.
				294	*/
				295	if (num_items > 0 && root != root->fs_info->chunk_root) {
				296	num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
				297	ret = btrfs_block_rsv_add(root,
				298	&root->fs_info->trans_block_rsv,
				299	num_bytes);
				300	if (ret)
				301	return ERR_PTR(ret);
				302	}
				303	again:
				304	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
				305	if (!h)
				306	return ERR_PTR(-ENOMEM);
				307
				308	if (may_wait_transaction(root, type))
				309	wait_current_trans(root);
				310
				311	do {
				312	ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
				313	if (ret == -EBUSY)
				314	wait_current_trans(root);
				315	} while (ret == -EBUSY);
				316
				317	if (ret < 0) {
				318	kmem_cache_free(btrfs_trans_handle_cachep, h);
				319	return ERR_PTR(ret);
				320	}
				321
				322	cur_trans = root->fs_info->running_transaction;
				323
				324	h->transid = cur_trans->transid;
				325	h->transaction = cur_trans;
				326	h->blocks_used = 0;
				327	h->bytes_reserved = 0;
				328	h->delayed_ref_updates = 0;
				329	h->use_count = 1;
				330	h->block_rsv = NULL;
				331	h->orig_rsv = NULL;
				332	h->aborted = 0;
				333
				334	smp_mb();
				335	if (cur_trans->blocked && may_wait_transaction(root, type)) {
				336	btrfs_commit_transaction(h, root);
				337	goto again;
				338	}
				339
				340	if (num_bytes) {
				341	trace_btrfs_space_reservation(root->fs_info, "transaction",
				342	h->transid, num_bytes, 1);
				343	h->block_rsv = &root->fs_info->trans_block_rsv;
				344	h->bytes_reserved = num_bytes;
				345	}
				346
				347	got_it:
				348	btrfs_record_root_in_trans(h, root);
				349
				350	if (!current->journal_info && type != TRANS_USERSPACE)
				351	current->journal_info = h;
				352	return h;
				353	}
				354
				355	struct btrfs_trans_handle btrfs_start_transaction(struct btrfs_root root,
				356	int num_items)
				357	{
				358	return start_transaction(root, num_items, TRANS_START);
				359	}
				360	struct btrfs_trans_handle btrfs_join_transaction(struct btrfs_root root)
				361	{
				362	return start_transaction(root, 0, TRANS_JOIN);
				363	}
				364
				365	struct btrfs_trans_handle btrfs_join_transaction_nolock(struct btrfs_root root)
				366	{
				367	return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
				368	}
				369
				370	struct btrfs_trans_handle btrfs_start_ioctl_transaction(struct btrfs_root root)
				371	{
				372	return start_transaction(root, 0, TRANS_USERSPACE);
				373	}
				374
				375	/* wait for a transaction commit to be fully complete */
				376	static noinline void wait_for_commit(struct btrfs_root *root,
				377	struct btrfs_transaction *commit)
				378	{
				379	wait_event(commit->commit_wait, commit->commit_done);
				380	}
				381
				382	int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
				383	{
				384	struct btrfs_transaction cur_trans = NULL, t;
				385	int ret;
				386
				387	ret = 0;
				388	if (transid) {
				389	if (transid <= root->fs_info->last_trans_committed)
				390	goto out;
				391
				392	/* find specified transaction */
				393	spin_lock(&root->fs_info->trans_lock);
				394	list_for_each_entry(t, &root->fs_info->trans_list, list) {
				395	if (t->transid == transid) {
				396	cur_trans = t;
				397	atomic_inc(&cur_trans->use_count);
				398	break;
				399	}
				400	if (t->transid > transid)
				401	break;
				402	}
				403	spin_unlock(&root->fs_info->trans_lock);
				404	ret = -EINVAL;
				405	if (!cur_trans)
				406	goto out; /* bad transid */
				407	} else {
				408	/* find newest transaction that is committing \| committed */
				409	spin_lock(&root->fs_info->trans_lock);
				410	list_for_each_entry_reverse(t, &root->fs_info->trans_list,
				411	list) {
				412	if (t->in_commit) {
				413	if (t->commit_done)
				414	break;
				415	cur_trans = t;
				416	atomic_inc(&cur_trans->use_count);
				417	break;
				418	}
				419	}
				420	spin_unlock(&root->fs_info->trans_lock);
				421	if (!cur_trans)
				422	goto out; /* nothing committing\|committed */
				423	}
				424
				425	wait_for_commit(root, cur_trans);
				426
				427	put_transaction(cur_trans);
				428	ret = 0;
				429	out:
				430	return ret;
				431	}
				432
				433	void btrfs_throttle(struct btrfs_root *root)
				434	{
				435	if (!atomic_read(&root->fs_info->open_ioctl_trans))
				436	wait_current_trans(root);
				437	}
				438
				439	static int should_end_transaction(struct btrfs_trans_handle *trans,
				440	struct btrfs_root *root)
				441	{
				442	int ret;
				443
				444	ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
				445	return ret ? 1 : 0;
				446	}
				447
				448	int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
				449	struct btrfs_root *root)
				450	{
				451	struct btrfs_transaction *cur_trans = trans->transaction;
				452	struct btrfs_block_rsv *rsv = trans->block_rsv;
				453	int updates;
				454	int err;
				455
				456	smp_mb();
				457	if (cur_trans->blocked \|\| cur_trans->delayed_refs.flushing)
				458	return 1;
				459
				460	/*
				461	* We need to do this in case we're deleting csums so the global block
				462	* rsv get's used instead of the csum block rsv.
				463	*/
				464	trans->block_rsv = NULL;
				465
				466	updates = trans->delayed_ref_updates;
				467	trans->delayed_ref_updates = 0;
				468	if (updates) {
				469	err = btrfs_run_delayed_refs(trans, root, updates);
				470	if (err) /* Error code will also eval true */
				471	return err;
				472	}
				473
				474	trans->block_rsv = rsv;
				475
				476	return should_end_transaction(trans, root);
				477	}
				478
				479	static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
				480	struct btrfs_root *root, int throttle, int lock)
				481	{
				482	struct btrfs_transaction *cur_trans = trans->transaction;
				483	struct btrfs_fs_info *info = root->fs_info;
				484	int count = 0;
				485	int err = 0;
				486
				487	if (--trans->use_count) {
				488	trans->block_rsv = trans->orig_rsv;
				489	return 0;
				490	}
				491
				492	btrfs_trans_release_metadata(trans, root);
				493	trans->block_rsv = NULL;
				494	while (count < 2) {
				495	unsigned long cur = trans->delayed_ref_updates;
				496	trans->delayed_ref_updates = 0;
				497	if (cur &&
				498	trans->transaction->delayed_refs.num_heads_ready > 64) {
				499	trans->delayed_ref_updates = 0;
				500	btrfs_run_delayed_refs(trans, root, cur);
				501	} else {
				502	break;
				503	}
				504	count++;
				505	}
				506
				507	if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
				508	should_end_transaction(trans, root)) {
				509	trans->transaction->blocked = 1;
				510	smp_wmb();
				511	}
				512
				513	if (lock && cur_trans->blocked && !cur_trans->in_commit) {
				514	if (throttle) {
				515	/*
				516	* We may race with somebody else here so end up having
				517	* to call end_transaction on ourselves again, so inc
				518	* our use_count.
				519	*/
				520	trans->use_count++;
				521	return btrfs_commit_transaction(trans, root);
				522	} else {
				523	wake_up_process(info->transaction_kthread);
				524	}
				525	}
				526
				527	WARN_ON(cur_trans != info->running_transaction);
				528	WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
				529	atomic_dec(&cur_trans->num_writers);
				530
				531	smp_mb();
				532	if (waitqueue_active(&cur_trans->writer_wait))
				533	wake_up(&cur_trans->writer_wait);
				534	put_transaction(cur_trans);
				535
				536	if (current->journal_info == trans)
				537	current->journal_info = NULL;
				538
				539	if (throttle)
				540	btrfs_run_delayed_iputs(root);
				541
				542	if (trans->aborted \|\|
				543	root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
				544	err = -EIO;
				545	}
				546
				547	memset(trans, 0, sizeof(*trans));
				548	kmem_cache_free(btrfs_trans_handle_cachep, trans);
				549	return err;
				550	}
				551
				552	int btrfs_end_transaction(struct btrfs_trans_handle *trans,
				553	struct btrfs_root *root)
				554	{
				555	int ret;
				556
				557	ret = __btrfs_end_transaction(trans, root, 0, 1);
				558	if (ret)
				559	return ret;
				560	return 0;
				561	}
				562
				563	int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
				564	struct btrfs_root *root)
				565	{
				566	int ret;
				567
				568	ret = __btrfs_end_transaction(trans, root, 1, 1);
				569	if (ret)
				570	return ret;
				571	return 0;
				572	}
				573
				574	int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
				575	struct btrfs_root *root)
				576	{
				577	int ret;
				578
				579	ret = __btrfs_end_transaction(trans, root, 0, 0);
				580	if (ret)
				581	return ret;
				582	return 0;
				583	}
				584
				585	int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
				586	struct btrfs_root *root)
				587	{
				588	return __btrfs_end_transaction(trans, root, 1, 1);
				589	}
				590
				591	/*
				592	* when btree blocks are allocated, they have some corresponding bits set for
				593	* them in one of two extent_io trees. This is used to make sure all of
				594	* those extents are sent to disk but does not wait on them
				595	*/
				596	int btrfs_write_marked_extents(struct btrfs_root *root,
				597	struct extent_io_tree *dirty_pages, int mark)
				598	{
				599	int err = 0;
				600	int werr = 0;
				601	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
				602	u64 start = 0;
				603	u64 end;
				604
				605	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
				606	mark)) {
				607	convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark,
				608	GFP_NOFS);
				609	err = filemap_fdatawrite_range(mapping, start, end);
				610	if (err)
				611	werr = err;
				612	cond_resched();
				613	start = end + 1;
				614	}
				615	if (err)
				616	werr = err;
				617	return werr;
				618	}
				619
				620	/*
				621	* when btree blocks are allocated, they have some corresponding bits set for
				622	* them in one of two extent_io trees. This is used to make sure all of
				623	* those extents are on disk for transaction or log commit. We wait
				624	* on all the pages and clear them from the dirty pages state tree
				625	*/
				626	int btrfs_wait_marked_extents(struct btrfs_root *root,
				627	struct extent_io_tree *dirty_pages, int mark)
				628	{
				629	int err = 0;
				630	int werr = 0;
				631	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
				632	u64 start = 0;
				633	u64 end;
				634
				635	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
				636	EXTENT_NEED_WAIT)) {
				637	clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS);
				638	err = filemap_fdatawait_range(mapping, start, end);
				639	if (err)
				640	werr = err;
				641	cond_resched();
				642	start = end + 1;
				643	}
				644	if (err)
				645	werr = err;
				646	return werr;
				647	}
				648
				649	/*
				650	* when btree blocks are allocated, they have some corresponding bits set for
				651	* them in one of two extent_io trees. This is used to make sure all of
				652	* those extents are on disk for transaction or log commit
				653	*/
				654	int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
				655	struct extent_io_tree *dirty_pages, int mark)
				656	{
				657	int ret;
				658	int ret2;
				659
				660	ret = btrfs_write_marked_extents(root, dirty_pages, mark);
				661	ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
				662
				663	if (ret)
				664	return ret;
				665	if (ret2)
				666	return ret2;
				667	return 0;
				668	}
				669
				670	int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
				671	struct btrfs_root *root)
				672	{
				673	if (!trans \|\| !trans->transaction) {
				674	struct inode *btree_inode;
				675	btree_inode = root->fs_info->btree_inode;
				676	return filemap_write_and_wait(btree_inode->i_mapping);
				677	}
				678	return btrfs_write_and_wait_marked_extents(root,
				679	&trans->transaction->dirty_pages,
				680	EXTENT_DIRTY);
				681	}
				682
				683	/*
				684	* this is used to update the root pointer in the tree of tree roots.
				685	*
				686	* But, in the case of the extent allocation tree, updating the root
				687	* pointer may allocate blocks which may change the root of the extent
				688	* allocation tree.
				689	*
				690	* So, this loops and repeats and makes sure the cowonly root didn't
				691	* change while the root pointer was being updated in the metadata.
				692	*/
				693	static int update_cowonly_root(struct btrfs_trans_handle *trans,
				694	struct btrfs_root *root)
				695	{
				696	int ret;
				697	u64 old_root_bytenr;
				698	u64 old_root_used;
				699	struct btrfs_root *tree_root = root->fs_info->tree_root;
				700
				701	old_root_used = btrfs_root_used(&root->root_item);
				702	btrfs_write_dirty_block_groups(trans, root);
				703
				704	while (1) {
				705	old_root_bytenr = btrfs_root_bytenr(&root->root_item);
				706	if (old_root_bytenr == root->node->start &&
				707	old_root_used == btrfs_root_used(&root->root_item))
				708	break;
				709
				710	btrfs_set_root_node(&root->root_item, root->node);
				711	ret = btrfs_update_root(trans, tree_root,
				712	&root->root_key,
				713	&root->root_item);
				714	if (ret)
				715	return ret;
				716
				717	old_root_used = btrfs_root_used(&root->root_item);
				718	ret = btrfs_write_dirty_block_groups(trans, root);
				719	if (ret)
				720	return ret;
				721	}
				722
				723	if (root != root->fs_info->extent_root)
				724	switch_commit_root(root);
				725
				726	return 0;
				727	}
				728
				729	/*
				730	* update all the cowonly tree roots on disk
				731	*
				732	* The error handling in this function may not be obvious. Any of the
				733	* failures will cause the file system to go offline. We still need
				734	* to clean up the delayed refs.
				735	*/
				736	static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
				737	struct btrfs_root *root)
				738	{
				739	struct btrfs_fs_info *fs_info = root->fs_info;
				740	struct list_head *next;
				741	struct extent_buffer *eb;
				742	int ret;
				743
				744	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
				745	if (ret)
				746	return ret;
				747
				748	eb = btrfs_lock_root_node(fs_info->tree_root);
				749	ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
				750	0, &eb);
				751	btrfs_tree_unlock(eb);
				752	free_extent_buffer(eb);
				753
				754	if (ret)
				755	return ret;
				756
				757	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
				758	if (ret)
				759	return ret;
				760
				761	while (!list_empty(&fs_info->dirty_cowonly_roots)) {
				762	next = fs_info->dirty_cowonly_roots.next;
				763	list_del_init(next);
				764	root = list_entry(next, struct btrfs_root, dirty_list);
				765
				766	ret = update_cowonly_root(trans, root);
				767	if (ret)
				768	return ret;
				769	}
				770
				771	down_write(&fs_info->extent_commit_sem);
				772	switch_commit_root(fs_info->extent_root);
				773	up_write(&fs_info->extent_commit_sem);
				774
				775	return 0;
				776	}
				777
				778	/*
				779	* dead roots are old snapshots that need to be deleted. This allocates
				780	* a dirty root struct and adds it into the list of dead roots that need to
				781	* be deleted
				782	*/
				783	int btrfs_add_dead_root(struct btrfs_root *root)
				784	{
				785	spin_lock(&root->fs_info->trans_lock);
				786	list_add(&root->root_list, &root->fs_info->dead_roots);
				787	spin_unlock(&root->fs_info->trans_lock);
				788	return 0;
				789	}
				790
				791	/*
				792	* update all the cowonly tree roots on disk
				793	*/
				794	static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
				795	struct btrfs_root *root)
				796	{
				797	struct btrfs_root *gang[8];
				798	struct btrfs_fs_info *fs_info = root->fs_info;
				799	int i;
				800	int ret;
				801	int err = 0;
				802
				803	spin_lock(&fs_info->fs_roots_radix_lock);
				804	while (1) {
				805	ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
				806	(void **)gang, 0,
				807	ARRAY_SIZE(gang),
				808	BTRFS_ROOT_TRANS_TAG);
				809	if (ret == 0)
				810	break;
				811	for (i = 0; i < ret; i++) {
				812	root = gang[i];
				813	radix_tree_tag_clear(&fs_info->fs_roots_radix,
				814	(unsigned long)root->root_key.objectid,
				815	BTRFS_ROOT_TRANS_TAG);
				816	spin_unlock(&fs_info->fs_roots_radix_lock);
				817
				818	btrfs_free_log(trans, root);
				819	btrfs_update_reloc_root(trans, root);
				820	btrfs_orphan_commit_root(trans, root);
				821
				822	btrfs_save_ino_cache(root, trans);
				823
				824	/* see comments in should_cow_block() */
				825	root->force_cow = 0;
				826	smp_wmb();
				827
				828	if (root->commit_root != root->node) {
				829	mutex_lock(&root->fs_commit_mutex);
				830	switch_commit_root(root);
				831	btrfs_unpin_free_ino(root);
				832	mutex_unlock(&root->fs_commit_mutex);
				833
				834	btrfs_set_root_node(&root->root_item,
				835	root->node);
				836	}
				837
				838	err = btrfs_update_root(trans, fs_info->tree_root,
				839	&root->root_key,
				840	&root->root_item);
				841	spin_lock(&fs_info->fs_roots_radix_lock);
				842	if (err)
				843	break;
				844	}
				845	}
				846	spin_unlock(&fs_info->fs_roots_radix_lock);
				847	return err;
				848	}
				849
				850	/*
				851	* defrag a given btree. If cacheonly == 1, this won't read from the disk,
				852	* otherwise every leaf in the btree is read and defragged.
				853	*/
				854	int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
				855	{
				856	struct btrfs_fs_info *info = root->fs_info;
				857	struct btrfs_trans_handle *trans;
				858	int ret;
				859	unsigned long nr;
				860
				861	if (xchg(&root->defrag_running, 1))
				862	return 0;
				863
				864	while (1) {
				865	trans = btrfs_start_transaction(root, 0);
				866	if (IS_ERR(trans))
				867	return PTR_ERR(trans);
				868
				869	ret = btrfs_defrag_leaves(trans, root, cacheonly);
				870
				871	nr = trans->blocks_used;
				872	btrfs_end_transaction(trans, root);
				873	btrfs_btree_balance_dirty(info->tree_root, nr);
				874	cond_resched();
				875
				876	if (btrfs_fs_closing(root->fs_info) \|\| ret != -EAGAIN)
				877	break;
				878	}
				879	root->defrag_running = 0;
				880	return ret;
				881	}
				882
				883	/*
				884	* new snapshots need to be created at a very specific time in the
				885	* transaction commit. This does the actual creation
				886	*/
				887	static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
				888	struct btrfs_fs_info *fs_info,
				889	struct btrfs_pending_snapshot *pending)
				890	{
				891	struct btrfs_key key;
				892	struct btrfs_root_item *new_root_item;
				893	struct btrfs_root *tree_root = fs_info->tree_root;
				894	struct btrfs_root *root = pending->root;
				895	struct btrfs_root *parent_root;
				896	struct btrfs_block_rsv *rsv;
				897	struct inode *parent_inode;
				898	struct dentry *parent;
				899	struct dentry *dentry;
				900	struct extent_buffer *tmp;
				901	struct extent_buffer *old;
				902	int ret;
				903	u64 to_reserve = 0;
				904	u64 index = 0;
				905	u64 objectid;
				906	u64 root_flags;
				907
				908	rsv = trans->block_rsv;
				909
				910	new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
				911	if (!new_root_item) {
				912	ret = pending->error = -ENOMEM;
				913	goto fail;
				914	}
				915
				916	ret = btrfs_find_free_objectid(tree_root, &objectid);
				917	if (ret) {
				918	pending->error = ret;
				919	goto fail;
				920	}
				921
				922	btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
				923
				924	if (to_reserve > 0) {
				925	ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv,
				926	to_reserve);
				927	if (ret) {
				928	pending->error = ret;
				929	goto fail;
				930	}
				931	}
				932
				933	key.objectid = objectid;
				934	key.offset = (u64)-1;
				935	key.type = BTRFS_ROOT_ITEM_KEY;
				936
				937	trans->block_rsv = &pending->block_rsv;
				938
				939	dentry = pending->dentry;
				940	parent = dget_parent(dentry);
				941	parent_inode = parent->d_inode;
				942	parent_root = BTRFS_I(parent_inode)->root;
				943	record_root_in_trans(trans, parent_root);
				944
				945	/*
				946	* insert the directory item
				947	*/
				948	ret = btrfs_set_inode_index(parent_inode, &index);
				949	BUG_ON(ret); /* -ENOMEM */
				950	ret = btrfs_insert_dir_item(trans, parent_root,
				951	dentry->d_name.name, dentry->d_name.len,
				952	parent_inode, &key,
				953	BTRFS_FT_DIR, index);
				954	if (ret == -EEXIST) {
				955	pending->error = -EEXIST;
				956	dput(parent);
				957	goto fail;
				958	} else if (ret) {
				959	goto abort_trans_dput;
				960	}
				961
				962	btrfs_i_size_write(parent_inode, parent_inode->i_size +
				963	dentry->d_name.len * 2);
				964	ret = btrfs_update_inode(trans, parent_root, parent_inode);
				965	if (ret)
				966	goto abort_trans_dput;
				967
				968	/*
				969	* pull in the delayed directory update
				970	* and the delayed inode item
				971	* otherwise we corrupt the FS during
				972	* snapshot
				973	*/
				974	ret = btrfs_run_delayed_items(trans, root);
				975	if (ret) { /* Transaction aborted */
				976	dput(parent);
				977	goto fail;
				978	}
				979
				980	record_root_in_trans(trans, root);
				981	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
				982	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
				983	btrfs_check_and_init_root_item(new_root_item);
				984
				985	root_flags = btrfs_root_flags(new_root_item);
				986	if (pending->readonly)
				987	root_flags \|= BTRFS_ROOT_SUBVOL_RDONLY;
				988	else
				989	root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
				990	btrfs_set_root_flags(new_root_item, root_flags);
				991
				992	old = btrfs_lock_root_node(root);
				993	ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
				994	if (ret) {
				995	btrfs_tree_unlock(old);
				996	free_extent_buffer(old);
				997	goto abort_trans_dput;
				998	}
				999
				1000	btrfs_set_lock_blocking(old);
				1001
				1002	ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
				1003	/* clean up in any case */
				1004	btrfs_tree_unlock(old);
				1005	free_extent_buffer(old);
				1006	if (ret)
				1007	goto abort_trans_dput;
				1008
				1009	/* see comments in should_cow_block() */
				1010	root->force_cow = 1;
				1011	smp_wmb();
				1012
				1013	btrfs_set_root_node(new_root_item, tmp);
				1014	/* record when the snapshot was created in key.offset */
				1015	key.offset = trans->transid;
				1016	ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
				1017	btrfs_tree_unlock(tmp);
				1018	free_extent_buffer(tmp);
				1019	if (ret)
				1020	goto abort_trans_dput;
				1021
				1022	/*
				1023	* insert root back/forward references
				1024	*/
				1025	ret = btrfs_add_root_ref(trans, tree_root, objectid,
				1026	parent_root->root_key.objectid,
				1027	btrfs_ino(parent_inode), index,
				1028	dentry->d_name.name, dentry->d_name.len);
				1029	dput(parent);
				1030	if (ret)
				1031	goto fail;
				1032
				1033	key.offset = (u64)-1;
				1034	pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
				1035	if (IS_ERR(pending->snap)) {
				1036	ret = PTR_ERR(pending->snap);
				1037	goto abort_trans;
				1038	}
				1039
				1040	ret = btrfs_reloc_post_snapshot(trans, pending);
				1041	if (ret)
				1042	goto abort_trans;
				1043	ret = 0;
				1044	fail:
				1045	kfree(new_root_item);
				1046	trans->block_rsv = rsv;
				1047	btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
				1048	return ret;
				1049
				1050	abort_trans_dput:
				1051	dput(parent);
				1052	abort_trans:
				1053	btrfs_abort_transaction(trans, root, ret);
				1054	goto fail;
				1055	}
				1056
				1057	/*
				1058	* create all the snapshots we've scheduled for creation
				1059	*/
				1060	static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
				1061	struct btrfs_fs_info *fs_info)
				1062	{
				1063	struct btrfs_pending_snapshot *pending;
				1064	struct list_head *head = &trans->transaction->pending_snapshots;
				1065
				1066	list_for_each_entry(pending, head, list)
				1067	create_pending_snapshot(trans, fs_info, pending);
				1068	return 0;
				1069	}
				1070
				1071	static void update_super_roots(struct btrfs_root *root)
				1072	{
				1073	struct btrfs_root_item *root_item;
				1074	struct btrfs_super_block *super;
				1075
				1076	super = root->fs_info->super_copy;
				1077
				1078	root_item = &root->fs_info->chunk_root->root_item;
				1079	super->chunk_root = root_item->bytenr;
				1080	super->chunk_root_generation = root_item->generation;
				1081	super->chunk_root_level = root_item->level;
				1082
				1083	root_item = &root->fs_info->tree_root->root_item;
				1084	super->root = root_item->bytenr;
				1085	super->generation = root_item->generation;
				1086	super->root_level = root_item->level;
				1087	if (btrfs_test_opt(root, SPACE_CACHE))
				1088	super->cache_generation = root_item->generation;
				1089	}
				1090
				1091	int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
				1092	{
				1093	int ret = 0;
				1094	spin_lock(&info->trans_lock);
				1095	if (info->running_transaction)
				1096	ret = info->running_transaction->in_commit;
				1097	spin_unlock(&info->trans_lock);
				1098	return ret;
				1099	}
				1100
				1101	int btrfs_transaction_blocked(struct btrfs_fs_info *info)
				1102	{
				1103	int ret = 0;
				1104	spin_lock(&info->trans_lock);
				1105	if (info->running_transaction)
				1106	ret = info->running_transaction->blocked;
				1107	spin_unlock(&info->trans_lock);
				1108	return ret;
				1109	}
				1110
				1111	/*
				1112	* wait for the current transaction commit to start and block subsequent
				1113	* transaction joins
				1114	*/
				1115	static void wait_current_trans_commit_start(struct btrfs_root *root,
				1116	struct btrfs_transaction *trans)
				1117	{
				1118	wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
				1119	}
				1120
				1121	/*
				1122	* wait for the current transaction to start and then become unblocked.
				1123	* caller holds ref.
				1124	*/
				1125	static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
				1126	struct btrfs_transaction *trans)
				1127	{
				1128	wait_event(root->fs_info->transaction_wait,
				1129	trans->commit_done \|\| (trans->in_commit && !trans->blocked));
				1130	}
				1131
				1132	/*
				1133	* commit transactions asynchronously. once btrfs_commit_transaction_async
				1134	* returns, any subsequent transaction will not be allowed to join.
				1135	*/
				1136	struct btrfs_async_commit {
				1137	struct btrfs_trans_handle *newtrans;
				1138	struct btrfs_root *root;
				1139	struct delayed_work work;
				1140	};
				1141
				1142	static void do_async_commit(struct work_struct *work)
				1143	{
				1144	struct btrfs_async_commit *ac =
				1145	container_of(work, struct btrfs_async_commit, work.work);
				1146
				1147	btrfs_commit_transaction(ac->newtrans, ac->root);
				1148	kfree(ac);
				1149	}
				1150
				1151	int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
				1152	struct btrfs_root *root,
				1153	int wait_for_unblock)
				1154	{
				1155	struct btrfs_async_commit *ac;
				1156	struct btrfs_transaction *cur_trans;
				1157
				1158	ac = kmalloc(sizeof(*ac), GFP_NOFS);
				1159	if (!ac)
				1160	return -ENOMEM;
				1161
				1162	INIT_DELAYED_WORK(&ac->work, do_async_commit);
				1163	ac->root = root;
				1164	ac->newtrans = btrfs_join_transaction(root);
				1165	if (IS_ERR(ac->newtrans)) {
				1166	int err = PTR_ERR(ac->newtrans);
				1167	kfree(ac);
				1168	return err;
				1169	}
				1170
				1171	/* take transaction reference */
				1172	cur_trans = trans->transaction;
				1173	atomic_inc(&cur_trans->use_count);
				1174
				1175	btrfs_end_transaction(trans, root);
				1176	schedule_delayed_work(&ac->work, 0);
				1177
				1178	/* wait for transaction to start and unblock */
				1179	if (wait_for_unblock)
				1180	wait_current_trans_commit_start_and_unblock(root, cur_trans);
				1181	else
				1182	wait_current_trans_commit_start(root, cur_trans);
				1183
				1184	if (current->journal_info == trans)
				1185	current->journal_info = NULL;
				1186
				1187	put_transaction(cur_trans);
				1188	return 0;
				1189	}
				1190
				1191
				1192	static void cleanup_transaction(struct btrfs_trans_handle *trans,
				1193	struct btrfs_root *root)
				1194	{
				1195	struct btrfs_transaction *cur_trans = trans->transaction;
				1196
				1197	WARN_ON(trans->use_count > 1);
				1198
				1199	spin_lock(&root->fs_info->trans_lock);
				1200	list_del_init(&cur_trans->list);
				1201	spin_unlock(&root->fs_info->trans_lock);
				1202
				1203	btrfs_cleanup_one_transaction(trans->transaction, root);
				1204
				1205	put_transaction(cur_trans);
				1206	put_transaction(cur_trans);
				1207
				1208	trace_btrfs_transaction_commit(root);
				1209
				1210	btrfs_scrub_continue(root);
				1211
				1212	if (current->journal_info == trans)
				1213	current->journal_info = NULL;
				1214
				1215	kmem_cache_free(btrfs_trans_handle_cachep, trans);
				1216	}
				1217
				1218	/*
				1219	* btrfs_transaction state sequence:
				1220	* in_commit = 0, blocked = 0 (initial)
				1221	* in_commit = 1, blocked = 1
				1222	* blocked = 0
				1223	* commit_done = 1
				1224	*/
				1225	int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
				1226	struct btrfs_root *root)
				1227	{
				1228	unsigned long joined = 0;
				1229	struct btrfs_transaction *cur_trans = trans->transaction;
				1230	struct btrfs_transaction *prev_trans = NULL;
				1231	DEFINE_WAIT(wait);
				1232	int ret = -EIO;
				1233	int should_grow = 0;
				1234	unsigned long now = get_seconds();
				1235	int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
				1236
				1237	btrfs_run_ordered_operations(root, 0);
				1238
				1239	btrfs_trans_release_metadata(trans, root);
				1240	trans->block_rsv = NULL;
				1241
				1242	if (cur_trans->aborted)
				1243	goto cleanup_transaction;
				1244
				1245	/* make a pass through all the delayed refs we have so far
				1246	* any runnings procs may add more while we are here
				1247	*/
				1248	ret = btrfs_run_delayed_refs(trans, root, 0);
				1249	if (ret)
				1250	goto cleanup_transaction;
				1251
				1252	cur_trans = trans->transaction;
				1253
				1254	/*
				1255	* set the flushing flag so procs in this transaction have to
				1256	* start sending their work down.
				1257	*/
				1258	cur_trans->delayed_refs.flushing = 1;
				1259
				1260	ret = btrfs_run_delayed_refs(trans, root, 0);
				1261	if (ret)
				1262	goto cleanup_transaction;
				1263
				1264	spin_lock(&cur_trans->commit_lock);
				1265	if (cur_trans->in_commit) {
				1266	spin_unlock(&cur_trans->commit_lock);
				1267	atomic_inc(&cur_trans->use_count);
				1268	ret = btrfs_end_transaction(trans, root);
				1269
				1270	wait_for_commit(root, cur_trans);
				1271
				1272	put_transaction(cur_trans);
				1273
				1274	return ret;
				1275	}
				1276
				1277	trans->transaction->in_commit = 1;
				1278	trans->transaction->blocked = 1;
				1279	spin_unlock(&cur_trans->commit_lock);
				1280	wake_up(&root->fs_info->transaction_blocked_wait);
				1281
				1282	spin_lock(&root->fs_info->trans_lock);
				1283	if (cur_trans->list.prev != &root->fs_info->trans_list) {
				1284	prev_trans = list_entry(cur_trans->list.prev,
				1285	struct btrfs_transaction, list);
				1286	if (!prev_trans->commit_done) {
				1287	atomic_inc(&prev_trans->use_count);
				1288	spin_unlock(&root->fs_info->trans_lock);
				1289
				1290	wait_for_commit(root, prev_trans);
				1291
				1292	put_transaction(prev_trans);
				1293	} else {
				1294	spin_unlock(&root->fs_info->trans_lock);
				1295	}
				1296	} else {
				1297	spin_unlock(&root->fs_info->trans_lock);
				1298	}
				1299
				1300	if (now < cur_trans->start_time \|\| now - cur_trans->start_time < 1)
				1301	should_grow = 1;
				1302
				1303	do {
				1304	int snap_pending = 0;
				1305
				1306	joined = cur_trans->num_joined;
				1307	if (!list_empty(&trans->transaction->pending_snapshots))
				1308	snap_pending = 1;
				1309
				1310	WARN_ON(cur_trans != trans->transaction);
				1311
				1312	if (flush_on_commit \|\| snap_pending) {
				1313	btrfs_start_delalloc_inodes(root, 1);
				1314	btrfs_wait_ordered_extents(root, 0, 1);
				1315	}
				1316
				1317	ret = btrfs_run_delayed_items(trans, root);
				1318	if (ret)
				1319	goto cleanup_transaction;
				1320
				1321	/*
				1322	* rename don't use btrfs_join_transaction, so, once we
				1323	* set the transaction to blocked above, we aren't going
				1324	* to get any new ordered operations. We can safely run
				1325	* it here and no for sure that nothing new will be added
				1326	* to the list
				1327	*/
				1328	btrfs_run_ordered_operations(root, 1);
				1329
				1330	prepare_to_wait(&cur_trans->writer_wait, &wait,
				1331	TASK_UNINTERRUPTIBLE);
				1332
				1333	if (atomic_read(&cur_trans->num_writers) > 1)
				1334	schedule_timeout(MAX_SCHEDULE_TIMEOUT);
				1335	else if (should_grow)
				1336	schedule_timeout(1);
				1337
				1338	finish_wait(&cur_trans->writer_wait, &wait);
				1339	} while (atomic_read(&cur_trans->num_writers) > 1 \|\|
				1340	(should_grow && cur_trans->num_joined != joined));
				1341
				1342	/*
				1343	* Ok now we need to make sure to block out any other joins while we
				1344	* commit the transaction. We could have started a join before setting
				1345	* no_join so make sure to wait for num_writers to == 1 again.
				1346	*/
				1347	spin_lock(&root->fs_info->trans_lock);
				1348	root->fs_info->trans_no_join = 1;
				1349	spin_unlock(&root->fs_info->trans_lock);
				1350	wait_event(cur_trans->writer_wait,
				1351	atomic_read(&cur_trans->num_writers) == 1);
				1352
				1353	/*
				1354	* the reloc mutex makes sure that we stop
				1355	* the balancing code from coming in and moving
				1356	* extents around in the middle of the commit
				1357	*/
				1358	mutex_lock(&root->fs_info->reloc_mutex);
				1359
				1360	ret = btrfs_run_delayed_items(trans, root);
				1361	if (ret) {
				1362	mutex_unlock(&root->fs_info->reloc_mutex);
				1363	goto cleanup_transaction;
				1364	}
				1365
				1366	ret = create_pending_snapshots(trans, root->fs_info);
				1367	if (ret) {
				1368	mutex_unlock(&root->fs_info->reloc_mutex);
				1369	goto cleanup_transaction;
				1370	}
				1371
				1372	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
				1373	if (ret) {
				1374	mutex_unlock(&root->fs_info->reloc_mutex);
				1375	goto cleanup_transaction;
				1376	}
				1377
				1378	/*
				1379	* make sure none of the code above managed to slip in a
				1380	* delayed item
				1381	*/
				1382	btrfs_assert_delayed_root_empty(root);
				1383
				1384	WARN_ON(cur_trans != trans->transaction);
				1385
				1386	btrfs_scrub_pause(root);
				1387	/* btrfs_commit_tree_roots is responsible for getting the
				1388	* various roots consistent with each other. Every pointer
				1389	* in the tree of tree roots has to point to the most up to date
				1390	* root for every subvolume and other tree. So, we have to keep
				1391	* the tree logging code from jumping in and changing any
				1392	* of the trees.
				1393	*
				1394	* At this point in the commit, there can't be any tree-log
				1395	* writers, but a little lower down we drop the trans mutex
				1396	* and let new people in. By holding the tree_log_mutex
				1397	* from now until after the super is written, we avoid races
				1398	* with the tree-log code.
				1399	*/
				1400	mutex_lock(&root->fs_info->tree_log_mutex);
				1401
				1402	ret = commit_fs_roots(trans, root);
				1403	if (ret) {
				1404	mutex_unlock(&root->fs_info->tree_log_mutex);
				1405	mutex_unlock(&root->fs_info->reloc_mutex);
				1406	goto cleanup_transaction;
				1407	}
				1408
				1409	/* commit_fs_roots gets rid of all the tree log roots, it is now
				1410	* safe to free the root of tree log roots
				1411	*/
				1412	btrfs_free_log_root_tree(trans, root->fs_info);
				1413
				1414	ret = commit_cowonly_roots(trans, root);
				1415	if (ret) {
				1416	mutex_unlock(&root->fs_info->tree_log_mutex);
				1417	mutex_unlock(&root->fs_info->reloc_mutex);
				1418	goto cleanup_transaction;
				1419	}
				1420
				1421	btrfs_prepare_extent_commit(trans, root);
				1422
				1423	cur_trans = root->fs_info->running_transaction;
				1424
				1425	btrfs_set_root_node(&root->fs_info->tree_root->root_item,
				1426	root->fs_info->tree_root->node);
				1427	switch_commit_root(root->fs_info->tree_root);
				1428
				1429	btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
				1430	root->fs_info->chunk_root->node);
				1431	switch_commit_root(root->fs_info->chunk_root);
				1432
				1433	update_super_roots(root);
				1434
				1435	if (!root->fs_info->log_root_recovering) {
				1436	btrfs_set_super_log_root(root->fs_info->super_copy, 0);
				1437	btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
				1438	}
				1439
				1440	memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
				1441	sizeof(*root->fs_info->super_copy));
				1442
				1443	trans->transaction->blocked = 0;
				1444	spin_lock(&root->fs_info->trans_lock);
				1445	root->fs_info->running_transaction = NULL;
				1446	root->fs_info->trans_no_join = 0;
				1447	spin_unlock(&root->fs_info->trans_lock);
				1448	mutex_unlock(&root->fs_info->reloc_mutex);
				1449
				1450	wake_up(&root->fs_info->transaction_wait);
				1451
				1452	ret = btrfs_write_and_wait_transaction(trans, root);
				1453	if (ret) {
				1454	btrfs_error(root->fs_info, ret,
				1455	"Error while writing out transaction.");
				1456	mutex_unlock(&root->fs_info->tree_log_mutex);
				1457	goto cleanup_transaction;
				1458	}
				1459
				1460	ret = write_ctree_super(trans, root, 0);
				1461	if (ret) {
				1462	mutex_unlock(&root->fs_info->tree_log_mutex);
				1463	goto cleanup_transaction;
				1464	}
				1465
				1466	/*
				1467	* the super is written, we can safely allow the tree-loggers
				1468	* to go about their business
				1469	*/
				1470	mutex_unlock(&root->fs_info->tree_log_mutex);
				1471
				1472	btrfs_finish_extent_commit(trans, root);
				1473
				1474	cur_trans->commit_done = 1;
				1475
				1476	root->fs_info->last_trans_committed = cur_trans->transid;
				1477
				1478	wake_up(&cur_trans->commit_wait);
				1479
				1480	spin_lock(&root->fs_info->trans_lock);
				1481	list_del_init(&cur_trans->list);
				1482	spin_unlock(&root->fs_info->trans_lock);
				1483
				1484	put_transaction(cur_trans);
				1485	put_transaction(cur_trans);
				1486
				1487	trace_btrfs_transaction_commit(root);
				1488
				1489	btrfs_scrub_continue(root);
				1490
				1491	if (current->journal_info == trans)
				1492	current->journal_info = NULL;
				1493
				1494	kmem_cache_free(btrfs_trans_handle_cachep, trans);
				1495
				1496	if (current != root->fs_info->transaction_kthread)
				1497	btrfs_run_delayed_iputs(root);
				1498
				1499	return ret;
				1500
				1501	cleanup_transaction:
				1502	btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
				1503	// WARN_ON(1);
				1504	if (current->journal_info == trans)
				1505	current->journal_info = NULL;
				1506	cleanup_transaction(trans, root);
				1507
				1508	return ret;
				1509	}
				1510
				1511	/*
				1512	* interface function to delete all the snapshots we have scheduled for deletion
				1513	*/
				1514	int btrfs_clean_old_snapshots(struct btrfs_root *root)
				1515	{
				1516	LIST_HEAD(list);
				1517	struct btrfs_fs_info *fs_info = root->fs_info;
				1518
				1519	spin_lock(&fs_info->trans_lock);
				1520	list_splice_init(&fs_info->dead_roots, &list);
				1521	spin_unlock(&fs_info->trans_lock);
				1522
				1523	while (!list_empty(&list)) {
				1524	int ret;
				1525
				1526	root = list_entry(list.next, struct btrfs_root, root_list);
				1527	list_del(&root->root_list);
				1528
				1529	btrfs_kill_all_delayed_nodes(root);
				1530
				1531	if (btrfs_header_backref_rev(root->node) <
				1532	BTRFS_MIXED_BACKREF_REV)
				1533	ret = btrfs_drop_snapshot(root, NULL, 0, 0);
				1534	else
				1535	ret =btrfs_drop_snapshot(root, NULL, 1, 0);
				1536	BUG_ON(ret < 0);
				1537	}
				1538	return 0;
				1539	}