| rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2007 Oracle.  All rights reserved. | 
|  | 3 | * | 
|  | 4 | * This program is free software; you can redistribute it and/or | 
|  | 5 | * modify it under the terms of the GNU General Public | 
|  | 6 | * License v2 as published by the Free Software Foundation. | 
|  | 7 | * | 
|  | 8 | * This program is distributed in the hope that it will be useful, | 
|  | 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 11 | * General Public License for more details. | 
|  | 12 | * | 
|  | 13 | * You should have received a copy of the GNU General Public | 
|  | 14 | * License along with this program; if not, write to the | 
|  | 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 
|  | 16 | * Boston, MA 021110-1307, USA. | 
|  | 17 | */ | 
|  | 18 |  | 
|  | 19 | #include <linux/fs.h> | 
|  | 20 | #include <linux/slab.h> | 
|  | 21 | #include <linux/sched.h> | 
|  | 22 | #include <linux/writeback.h> | 
|  | 23 | #include <linux/pagemap.h> | 
|  | 24 | #include <linux/blkdev.h> | 
|  | 25 | #include <linux/uuid.h> | 
|  | 26 | #include "ctree.h" | 
|  | 27 | #include "disk-io.h" | 
|  | 28 | #include "transaction.h" | 
|  | 29 | #include "locking.h" | 
|  | 30 | #include "tree-log.h" | 
|  | 31 | #include "inode-map.h" | 
|  | 32 | #include "volumes.h" | 
|  | 33 | #include "dev-replace.h" | 
|  | 34 | #include "qgroup.h" | 
|  | 35 |  | 
|  | 36 | #define BTRFS_ROOT_TRANS_TAG 0 | 
|  | 37 |  | 
|  | 38 | static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { | 
|  | 39 | [TRANS_STATE_RUNNING]		= 0U, | 
|  | 40 | [TRANS_STATE_BLOCKED]		= (__TRANS_USERSPACE | | 
|  | 41 | __TRANS_START), | 
|  | 42 | [TRANS_STATE_COMMIT_START]	= (__TRANS_USERSPACE | | 
|  | 43 | __TRANS_START | | 
|  | 44 | __TRANS_ATTACH), | 
|  | 45 | [TRANS_STATE_COMMIT_DOING]	= (__TRANS_USERSPACE | | 
|  | 46 | __TRANS_START | | 
|  | 47 | __TRANS_ATTACH | | 
|  | 48 | __TRANS_JOIN), | 
|  | 49 | [TRANS_STATE_UNBLOCKED]		= (__TRANS_USERSPACE | | 
|  | 50 | __TRANS_START | | 
|  | 51 | __TRANS_ATTACH | | 
|  | 52 | __TRANS_JOIN | | 
|  | 53 | __TRANS_JOIN_NOLOCK), | 
|  | 54 | [TRANS_STATE_COMPLETED]		= (__TRANS_USERSPACE | | 
|  | 55 | __TRANS_START | | 
|  | 56 | __TRANS_ATTACH | | 
|  | 57 | __TRANS_JOIN | | 
|  | 58 | __TRANS_JOIN_NOLOCK), | 
|  | 59 | }; | 
|  | 60 |  | 
|  | 61 | void btrfs_put_transaction(struct btrfs_transaction *transaction) | 
|  | 62 | { | 
|  | 63 | WARN_ON(refcount_read(&transaction->use_count) == 0); | 
|  | 64 | if (refcount_dec_and_test(&transaction->use_count)) { | 
|  | 65 | BUG_ON(!list_empty(&transaction->list)); | 
|  | 66 | WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); | 
|  | 67 | if (transaction->delayed_refs.pending_csums) | 
|  | 68 | btrfs_err(transaction->fs_info, | 
|  | 69 | "pending csums is %llu", | 
|  | 70 | transaction->delayed_refs.pending_csums); | 
|  | 71 | while (!list_empty(&transaction->pending_chunks)) { | 
|  | 72 | struct extent_map *em; | 
|  | 73 |  | 
|  | 74 | em = list_first_entry(&transaction->pending_chunks, | 
|  | 75 | struct extent_map, list); | 
|  | 76 | list_del_init(&em->list); | 
|  | 77 | free_extent_map(em); | 
|  | 78 | } | 
|  | 79 | /* | 
|  | 80 | * If any block groups are found in ->deleted_bgs then it's | 
|  | 81 | * because the transaction was aborted and a commit did not | 
|  | 82 | * happen (things failed before writing the new superblock | 
|  | 83 | * and calling btrfs_finish_extent_commit()), so we can not | 
|  | 84 | * discard the physical locations of the block groups. | 
|  | 85 | */ | 
|  | 86 | while (!list_empty(&transaction->deleted_bgs)) { | 
|  | 87 | struct btrfs_block_group_cache *cache; | 
|  | 88 |  | 
|  | 89 | cache = list_first_entry(&transaction->deleted_bgs, | 
|  | 90 | struct btrfs_block_group_cache, | 
|  | 91 | bg_list); | 
|  | 92 | list_del_init(&cache->bg_list); | 
|  | 93 | btrfs_put_block_group_trimming(cache); | 
|  | 94 | btrfs_put_block_group(cache); | 
|  | 95 | } | 
|  | 96 | kfree(transaction); | 
|  | 97 | } | 
|  | 98 | } | 
|  | 99 |  | 
|  | 100 | static void clear_btree_io_tree(struct extent_io_tree *tree) | 
|  | 101 | { | 
|  | 102 | spin_lock(&tree->lock); | 
|  | 103 | /* | 
|  | 104 | * Do a single barrier for the waitqueue_active check here, the state | 
|  | 105 | * of the waitqueue should not change once clear_btree_io_tree is | 
|  | 106 | * called. | 
|  | 107 | */ | 
|  | 108 | smp_mb(); | 
|  | 109 | while (!RB_EMPTY_ROOT(&tree->state)) { | 
|  | 110 | struct rb_node *node; | 
|  | 111 | struct extent_state *state; | 
|  | 112 |  | 
|  | 113 | node = rb_first(&tree->state); | 
|  | 114 | state = rb_entry(node, struct extent_state, rb_node); | 
|  | 115 | rb_erase(&state->rb_node, &tree->state); | 
|  | 116 | RB_CLEAR_NODE(&state->rb_node); | 
|  | 117 | /* | 
|  | 118 | * btree io trees aren't supposed to have tasks waiting for | 
|  | 119 | * changes in the flags of extent states ever. | 
|  | 120 | */ | 
|  | 121 | ASSERT(!waitqueue_active(&state->wq)); | 
|  | 122 | free_extent_state(state); | 
|  | 123 |  | 
|  | 124 | cond_resched_lock(&tree->lock); | 
|  | 125 | } | 
|  | 126 | spin_unlock(&tree->lock); | 
|  | 127 | } | 
|  | 128 |  | 
|  | 129 | static noinline void switch_commit_roots(struct btrfs_transaction *trans, | 
|  | 130 | struct btrfs_fs_info *fs_info) | 
|  | 131 | { | 
|  | 132 | struct btrfs_root *root, *tmp; | 
|  | 133 |  | 
|  | 134 | down_write(&fs_info->commit_root_sem); | 
|  | 135 | list_for_each_entry_safe(root, tmp, &trans->switch_commits, | 
|  | 136 | dirty_list) { | 
|  | 137 | list_del_init(&root->dirty_list); | 
|  | 138 | free_extent_buffer(root->commit_root); | 
|  | 139 | root->commit_root = btrfs_root_node(root); | 
|  | 140 | if (is_fstree(root->objectid)) | 
|  | 141 | btrfs_unpin_free_ino(root); | 
|  | 142 | clear_btree_io_tree(&root->dirty_log_pages); | 
|  | 143 | } | 
|  | 144 |  | 
|  | 145 | /* We can free old roots now. */ | 
|  | 146 | spin_lock(&trans->dropped_roots_lock); | 
|  | 147 | while (!list_empty(&trans->dropped_roots)) { | 
|  | 148 | root = list_first_entry(&trans->dropped_roots, | 
|  | 149 | struct btrfs_root, root_list); | 
|  | 150 | list_del_init(&root->root_list); | 
|  | 151 | spin_unlock(&trans->dropped_roots_lock); | 
|  | 152 | btrfs_drop_and_free_fs_root(fs_info, root); | 
|  | 153 | spin_lock(&trans->dropped_roots_lock); | 
|  | 154 | } | 
|  | 155 | spin_unlock(&trans->dropped_roots_lock); | 
|  | 156 | up_write(&fs_info->commit_root_sem); | 
|  | 157 | } | 
|  | 158 |  | 
|  | 159 | static inline void extwriter_counter_inc(struct btrfs_transaction *trans, | 
|  | 160 | unsigned int type) | 
|  | 161 | { | 
|  | 162 | if (type & TRANS_EXTWRITERS) | 
|  | 163 | atomic_inc(&trans->num_extwriters); | 
|  | 164 | } | 
|  | 165 |  | 
|  | 166 | static inline void extwriter_counter_dec(struct btrfs_transaction *trans, | 
|  | 167 | unsigned int type) | 
|  | 168 | { | 
|  | 169 | if (type & TRANS_EXTWRITERS) | 
|  | 170 | atomic_dec(&trans->num_extwriters); | 
|  | 171 | } | 
|  | 172 |  | 
|  | 173 | static inline void extwriter_counter_init(struct btrfs_transaction *trans, | 
|  | 174 | unsigned int type) | 
|  | 175 | { | 
|  | 176 | atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0)); | 
|  | 177 | } | 
|  | 178 |  | 
|  | 179 | static inline int extwriter_counter_read(struct btrfs_transaction *trans) | 
|  | 180 | { | 
|  | 181 | return atomic_read(&trans->num_extwriters); | 
|  | 182 | } | 
|  | 183 |  | 
|  | 184 | /* | 
|  | 185 | * either allocate a new transaction or hop into the existing one | 
|  | 186 | */ | 
|  | 187 | static noinline int join_transaction(struct btrfs_fs_info *fs_info, | 
|  | 188 | unsigned int type) | 
|  | 189 | { | 
|  | 190 | struct btrfs_transaction *cur_trans; | 
|  | 191 |  | 
|  | 192 | spin_lock(&fs_info->trans_lock); | 
|  | 193 | loop: | 
|  | 194 | /* The file system has been taken offline. No new transactions. */ | 
|  | 195 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { | 
|  | 196 | spin_unlock(&fs_info->trans_lock); | 
|  | 197 | return -EROFS; | 
|  | 198 | } | 
|  | 199 |  | 
|  | 200 | cur_trans = fs_info->running_transaction; | 
|  | 201 | if (cur_trans) { | 
|  | 202 | if (cur_trans->aborted) { | 
|  | 203 | spin_unlock(&fs_info->trans_lock); | 
|  | 204 | return cur_trans->aborted; | 
|  | 205 | } | 
|  | 206 | if (btrfs_blocked_trans_types[cur_trans->state] & type) { | 
|  | 207 | spin_unlock(&fs_info->trans_lock); | 
|  | 208 | return -EBUSY; | 
|  | 209 | } | 
|  | 210 | refcount_inc(&cur_trans->use_count); | 
|  | 211 | atomic_inc(&cur_trans->num_writers); | 
|  | 212 | extwriter_counter_inc(cur_trans, type); | 
|  | 213 | spin_unlock(&fs_info->trans_lock); | 
|  | 214 | return 0; | 
|  | 215 | } | 
|  | 216 | spin_unlock(&fs_info->trans_lock); | 
|  | 217 |  | 
|  | 218 | /* | 
|  | 219 | * If we are ATTACH, we just want to catch the current transaction, | 
|  | 220 | * and commit it. If there is no transaction, just return ENOENT. | 
|  | 221 | */ | 
|  | 222 | if (type == TRANS_ATTACH) | 
|  | 223 | return -ENOENT; | 
|  | 224 |  | 
|  | 225 | /* | 
|  | 226 | * JOIN_NOLOCK only happens during the transaction commit, so | 
|  | 227 | * it is impossible that ->running_transaction is NULL | 
|  | 228 | */ | 
|  | 229 | BUG_ON(type == TRANS_JOIN_NOLOCK); | 
|  | 230 |  | 
|  | 231 | cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); | 
|  | 232 | if (!cur_trans) | 
|  | 233 | return -ENOMEM; | 
|  | 234 |  | 
|  | 235 | spin_lock(&fs_info->trans_lock); | 
|  | 236 | if (fs_info->running_transaction) { | 
|  | 237 | /* | 
|  | 238 | * someone started a transaction after we unlocked.  Make sure | 
|  | 239 | * to redo the checks above | 
|  | 240 | */ | 
|  | 241 | kfree(cur_trans); | 
|  | 242 | goto loop; | 
|  | 243 | } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { | 
|  | 244 | spin_unlock(&fs_info->trans_lock); | 
|  | 245 | kfree(cur_trans); | 
|  | 246 | return -EROFS; | 
|  | 247 | } | 
|  | 248 |  | 
|  | 249 | cur_trans->fs_info = fs_info; | 
|  | 250 | atomic_set(&cur_trans->num_writers, 1); | 
|  | 251 | extwriter_counter_init(cur_trans, type); | 
|  | 252 | init_waitqueue_head(&cur_trans->writer_wait); | 
|  | 253 | init_waitqueue_head(&cur_trans->commit_wait); | 
|  | 254 | init_waitqueue_head(&cur_trans->pending_wait); | 
|  | 255 | cur_trans->state = TRANS_STATE_RUNNING; | 
|  | 256 | /* | 
|  | 257 | * One for this trans handle, one so it will live on until we | 
|  | 258 | * commit the transaction. | 
|  | 259 | */ | 
|  | 260 | refcount_set(&cur_trans->use_count, 2); | 
|  | 261 | atomic_set(&cur_trans->pending_ordered, 0); | 
|  | 262 | cur_trans->flags = 0; | 
|  | 263 | cur_trans->start_time = get_seconds(); | 
|  | 264 |  | 
|  | 265 | memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); | 
|  | 266 |  | 
|  | 267 | cur_trans->delayed_refs.href_root = RB_ROOT; | 
|  | 268 | cur_trans->delayed_refs.dirty_extent_root = RB_ROOT; | 
|  | 269 | atomic_set(&cur_trans->delayed_refs.num_entries, 0); | 
|  | 270 |  | 
|  | 271 | /* | 
|  | 272 | * although the tree mod log is per file system and not per transaction, | 
|  | 273 | * the log must never go across transaction boundaries. | 
|  | 274 | */ | 
|  | 275 | smp_mb(); | 
|  | 276 | if (!list_empty(&fs_info->tree_mod_seq_list)) | 
|  | 277 | WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when creating a fresh transaction\n"); | 
|  | 278 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) | 
|  | 279 | WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when creating a fresh transaction\n"); | 
|  | 280 | atomic64_set(&fs_info->tree_mod_seq, 0); | 
|  | 281 |  | 
|  | 282 | spin_lock_init(&cur_trans->delayed_refs.lock); | 
|  | 283 |  | 
|  | 284 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 
|  | 285 | INIT_LIST_HEAD(&cur_trans->pending_chunks); | 
|  | 286 | INIT_LIST_HEAD(&cur_trans->switch_commits); | 
|  | 287 | INIT_LIST_HEAD(&cur_trans->dirty_bgs); | 
|  | 288 | INIT_LIST_HEAD(&cur_trans->io_bgs); | 
|  | 289 | INIT_LIST_HEAD(&cur_trans->dropped_roots); | 
|  | 290 | mutex_init(&cur_trans->cache_write_mutex); | 
|  | 291 | cur_trans->num_dirty_bgs = 0; | 
|  | 292 | spin_lock_init(&cur_trans->dirty_bgs_lock); | 
|  | 293 | INIT_LIST_HEAD(&cur_trans->deleted_bgs); | 
|  | 294 | spin_lock_init(&cur_trans->dropped_roots_lock); | 
|  | 295 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 
|  | 296 | extent_io_tree_init(&cur_trans->dirty_pages, | 
|  | 297 | fs_info->btree_inode); | 
|  | 298 | fs_info->generation++; | 
|  | 299 | cur_trans->transid = fs_info->generation; | 
|  | 300 | fs_info->running_transaction = cur_trans; | 
|  | 301 | cur_trans->aborted = 0; | 
|  | 302 | spin_unlock(&fs_info->trans_lock); | 
|  | 303 |  | 
|  | 304 | return 0; | 
|  | 305 | } | 
|  | 306 |  | 
|  | 307 | /* | 
|  | 308 | * this does all the record keeping required to make sure that a reference | 
|  | 309 | * counted root is properly recorded in a given transaction.  This is required | 
|  | 310 | * to make sure the old root from before we joined the transaction is deleted | 
|  | 311 | * when the transaction commits | 
|  | 312 | */ | 
|  | 313 | static int record_root_in_trans(struct btrfs_trans_handle *trans, | 
|  | 314 | struct btrfs_root *root, | 
|  | 315 | int force) | 
|  | 316 | { | 
|  | 317 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 318 |  | 
|  | 319 | if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) && | 
|  | 320 | root->last_trans < trans->transid) || force) { | 
|  | 321 | WARN_ON(root == fs_info->extent_root); | 
|  | 322 | WARN_ON(!force && root->commit_root != root->node); | 
|  | 323 |  | 
|  | 324 | /* | 
|  | 325 | * see below for IN_TRANS_SETUP usage rules | 
|  | 326 | * we have the reloc mutex held now, so there | 
|  | 327 | * is only one writer in this function | 
|  | 328 | */ | 
|  | 329 | set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); | 
|  | 330 |  | 
|  | 331 | /* make sure readers find IN_TRANS_SETUP before | 
|  | 332 | * they find our root->last_trans update | 
|  | 333 | */ | 
|  | 334 | smp_wmb(); | 
|  | 335 |  | 
|  | 336 | spin_lock(&fs_info->fs_roots_radix_lock); | 
|  | 337 | if (root->last_trans == trans->transid && !force) { | 
|  | 338 | spin_unlock(&fs_info->fs_roots_radix_lock); | 
|  | 339 | return 0; | 
|  | 340 | } | 
|  | 341 | radix_tree_tag_set(&fs_info->fs_roots_radix, | 
|  | 342 | (unsigned long)root->root_key.objectid, | 
|  | 343 | BTRFS_ROOT_TRANS_TAG); | 
|  | 344 | spin_unlock(&fs_info->fs_roots_radix_lock); | 
|  | 345 | root->last_trans = trans->transid; | 
|  | 346 |  | 
|  | 347 | /* this is pretty tricky.  We don't want to | 
|  | 348 | * take the relocation lock in btrfs_record_root_in_trans | 
|  | 349 | * unless we're really doing the first setup for this root in | 
|  | 350 | * this transaction. | 
|  | 351 | * | 
|  | 352 | * Normally we'd use root->last_trans as a flag to decide | 
|  | 353 | * if we want to take the expensive mutex. | 
|  | 354 | * | 
|  | 355 | * But, we have to set root->last_trans before we | 
|  | 356 | * init the relocation root, otherwise, we trip over warnings | 
|  | 357 | * in ctree.c.  The solution used here is to flag ourselves | 
|  | 358 | * with root IN_TRANS_SETUP.  When this is 1, we're still | 
|  | 359 | * fixing up the reloc trees and everyone must wait. | 
|  | 360 | * | 
|  | 361 | * When this is zero, they can trust root->last_trans and fly | 
|  | 362 | * through btrfs_record_root_in_trans without having to take the | 
|  | 363 | * lock.  smp_wmb() makes sure that all the writes above are | 
|  | 364 | * done before we pop in the zero below | 
|  | 365 | */ | 
|  | 366 | btrfs_init_reloc_root(trans, root); | 
|  | 367 | smp_mb__before_atomic(); | 
|  | 368 | clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); | 
|  | 369 | } | 
|  | 370 | return 0; | 
|  | 371 | } | 
|  | 372 |  | 
|  | 373 |  | 
|  | 374 | void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, | 
|  | 375 | struct btrfs_root *root) | 
|  | 376 | { | 
|  | 377 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 378 | struct btrfs_transaction *cur_trans = trans->transaction; | 
|  | 379 |  | 
|  | 380 | /* Add ourselves to the transaction dropped list */ | 
|  | 381 | spin_lock(&cur_trans->dropped_roots_lock); | 
|  | 382 | list_add_tail(&root->root_list, &cur_trans->dropped_roots); | 
|  | 383 | spin_unlock(&cur_trans->dropped_roots_lock); | 
|  | 384 |  | 
|  | 385 | /* Make sure we don't try to update the root at commit time */ | 
|  | 386 | spin_lock(&fs_info->fs_roots_radix_lock); | 
|  | 387 | radix_tree_tag_clear(&fs_info->fs_roots_radix, | 
|  | 388 | (unsigned long)root->root_key.objectid, | 
|  | 389 | BTRFS_ROOT_TRANS_TAG); | 
|  | 390 | spin_unlock(&fs_info->fs_roots_radix_lock); | 
|  | 391 | } | 
|  | 392 |  | 
|  | 393 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 
|  | 394 | struct btrfs_root *root) | 
|  | 395 | { | 
|  | 396 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 397 |  | 
|  | 398 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) | 
|  | 399 | return 0; | 
|  | 400 |  | 
|  | 401 | /* | 
|  | 402 | * see record_root_in_trans for comments about IN_TRANS_SETUP usage | 
|  | 403 | * and barriers | 
|  | 404 | */ | 
|  | 405 | smp_rmb(); | 
|  | 406 | if (root->last_trans == trans->transid && | 
|  | 407 | !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state)) | 
|  | 408 | return 0; | 
|  | 409 |  | 
|  | 410 | mutex_lock(&fs_info->reloc_mutex); | 
|  | 411 | record_root_in_trans(trans, root, 0); | 
|  | 412 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 413 |  | 
|  | 414 | return 0; | 
|  | 415 | } | 
|  | 416 |  | 
|  | 417 | static inline int is_transaction_blocked(struct btrfs_transaction *trans) | 
|  | 418 | { | 
|  | 419 | return (trans->state >= TRANS_STATE_BLOCKED && | 
|  | 420 | trans->state < TRANS_STATE_UNBLOCKED && | 
|  | 421 | !trans->aborted); | 
|  | 422 | } | 
|  | 423 |  | 
|  | 424 | /* wait for commit against the current transaction to become unblocked | 
|  | 425 | * when this is done, it is safe to start a new transaction, but the current | 
|  | 426 | * transaction might not be fully on disk. | 
|  | 427 | */ | 
|  | 428 | static void wait_current_trans(struct btrfs_fs_info *fs_info) | 
|  | 429 | { | 
|  | 430 | struct btrfs_transaction *cur_trans; | 
|  | 431 |  | 
|  | 432 | spin_lock(&fs_info->trans_lock); | 
|  | 433 | cur_trans = fs_info->running_transaction; | 
|  | 434 | if (cur_trans && is_transaction_blocked(cur_trans)) { | 
|  | 435 | refcount_inc(&cur_trans->use_count); | 
|  | 436 | spin_unlock(&fs_info->trans_lock); | 
|  | 437 |  | 
|  | 438 | wait_event(fs_info->transaction_wait, | 
|  | 439 | cur_trans->state >= TRANS_STATE_UNBLOCKED || | 
|  | 440 | cur_trans->aborted); | 
|  | 441 | btrfs_put_transaction(cur_trans); | 
|  | 442 | } else { | 
|  | 443 | spin_unlock(&fs_info->trans_lock); | 
|  | 444 | } | 
|  | 445 | } | 
|  | 446 |  | 
|  | 447 | static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type) | 
|  | 448 | { | 
|  | 449 | if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) | 
|  | 450 | return 0; | 
|  | 451 |  | 
|  | 452 | if (type == TRANS_USERSPACE) | 
|  | 453 | return 1; | 
|  | 454 |  | 
|  | 455 | if (type == TRANS_START && | 
|  | 456 | !atomic_read(&fs_info->open_ioctl_trans)) | 
|  | 457 | return 1; | 
|  | 458 |  | 
|  | 459 | return 0; | 
|  | 460 | } | 
|  | 461 |  | 
|  | 462 | static inline bool need_reserve_reloc_root(struct btrfs_root *root) | 
|  | 463 | { | 
|  | 464 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 465 |  | 
|  | 466 | if (!fs_info->reloc_ctl || | 
|  | 467 | !test_bit(BTRFS_ROOT_REF_COWS, &root->state) || | 
|  | 468 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || | 
|  | 469 | root->reloc_root) | 
|  | 470 | return false; | 
|  | 471 |  | 
|  | 472 | return true; | 
|  | 473 | } | 
|  | 474 |  | 
|  | 475 | static struct btrfs_trans_handle * | 
|  | 476 | start_transaction(struct btrfs_root *root, unsigned int num_items, | 
|  | 477 | unsigned int type, enum btrfs_reserve_flush_enum flush, | 
|  | 478 | bool enforce_qgroups) | 
|  | 479 | { | 
|  | 480 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 481 |  | 
|  | 482 | struct btrfs_trans_handle *h; | 
|  | 483 | struct btrfs_transaction *cur_trans; | 
|  | 484 | u64 num_bytes = 0; | 
|  | 485 | u64 qgroup_reserved = 0; | 
|  | 486 | bool reloc_reserved = false; | 
|  | 487 | int ret; | 
|  | 488 |  | 
|  | 489 | /* Send isn't supposed to start transactions. */ | 
|  | 490 | ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB); | 
|  | 491 |  | 
|  | 492 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) | 
|  | 493 | return ERR_PTR(-EROFS); | 
|  | 494 |  | 
|  | 495 | if (current->journal_info) { | 
|  | 496 | WARN_ON(type & TRANS_EXTWRITERS); | 
|  | 497 | h = current->journal_info; | 
|  | 498 | h->use_count++; | 
|  | 499 | WARN_ON(h->use_count > 2); | 
|  | 500 | h->orig_rsv = h->block_rsv; | 
|  | 501 | h->block_rsv = NULL; | 
|  | 502 | goto got_it; | 
|  | 503 | } | 
|  | 504 |  | 
|  | 505 | /* | 
|  | 506 | * Do the reservation before we join the transaction so we can do all | 
|  | 507 | * the appropriate flushing if need be. | 
|  | 508 | */ | 
|  | 509 | if (num_items && root != fs_info->chunk_root) { | 
|  | 510 | qgroup_reserved = num_items * fs_info->nodesize; | 
|  | 511 | ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved, | 
|  | 512 | enforce_qgroups); | 
|  | 513 | if (ret) | 
|  | 514 | return ERR_PTR(ret); | 
|  | 515 |  | 
|  | 516 | num_bytes = btrfs_calc_trans_metadata_size(fs_info, num_items); | 
|  | 517 | /* | 
|  | 518 | * Do the reservation for the relocation root creation | 
|  | 519 | */ | 
|  | 520 | if (need_reserve_reloc_root(root)) { | 
|  | 521 | num_bytes += fs_info->nodesize; | 
|  | 522 | reloc_reserved = true; | 
|  | 523 | } | 
|  | 524 |  | 
|  | 525 | ret = btrfs_block_rsv_add(root, &fs_info->trans_block_rsv, | 
|  | 526 | num_bytes, flush); | 
|  | 527 | if (ret) | 
|  | 528 | goto reserve_fail; | 
|  | 529 | } | 
|  | 530 | again: | 
|  | 531 | h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS); | 
|  | 532 | if (!h) { | 
|  | 533 | ret = -ENOMEM; | 
|  | 534 | goto alloc_fail; | 
|  | 535 | } | 
|  | 536 |  | 
|  | 537 | /* | 
|  | 538 | * If we are JOIN_NOLOCK we're already committing a transaction and | 
|  | 539 | * waiting on this guy, so we don't need to do the sb_start_intwrite | 
|  | 540 | * because we're already holding a ref.  We need this because we could | 
|  | 541 | * have raced in and did an fsync() on a file which can kick a commit | 
|  | 542 | * and then we deadlock with somebody doing a freeze. | 
|  | 543 | * | 
|  | 544 | * If we are ATTACH, it means we just want to catch the current | 
|  | 545 | * transaction and commit it, so we needn't do sb_start_intwrite(). | 
|  | 546 | */ | 
|  | 547 | if (type & __TRANS_FREEZABLE) | 
|  | 548 | sb_start_intwrite(fs_info->sb); | 
|  | 549 |  | 
|  | 550 | if (may_wait_transaction(fs_info, type)) | 
|  | 551 | wait_current_trans(fs_info); | 
|  | 552 |  | 
|  | 553 | do { | 
|  | 554 | ret = join_transaction(fs_info, type); | 
|  | 555 | if (ret == -EBUSY) { | 
|  | 556 | wait_current_trans(fs_info); | 
|  | 557 | if (unlikely(type == TRANS_ATTACH)) | 
|  | 558 | ret = -ENOENT; | 
|  | 559 | } | 
|  | 560 | } while (ret == -EBUSY); | 
|  | 561 |  | 
|  | 562 | if (ret < 0) | 
|  | 563 | goto join_fail; | 
|  | 564 |  | 
|  | 565 | cur_trans = fs_info->running_transaction; | 
|  | 566 |  | 
|  | 567 | h->transid = cur_trans->transid; | 
|  | 568 | h->transaction = cur_trans; | 
|  | 569 | h->root = root; | 
|  | 570 | h->use_count = 1; | 
|  | 571 | h->fs_info = root->fs_info; | 
|  | 572 |  | 
|  | 573 | h->type = type; | 
|  | 574 | h->can_flush_pending_bgs = true; | 
|  | 575 | INIT_LIST_HEAD(&h->new_bgs); | 
|  | 576 |  | 
|  | 577 | smp_mb(); | 
|  | 578 | if (cur_trans->state >= TRANS_STATE_BLOCKED && | 
|  | 579 | may_wait_transaction(fs_info, type)) { | 
|  | 580 | current->journal_info = h; | 
|  | 581 | btrfs_commit_transaction(h); | 
|  | 582 | goto again; | 
|  | 583 | } | 
|  | 584 |  | 
|  | 585 | if (num_bytes) { | 
|  | 586 | trace_btrfs_space_reservation(fs_info, "transaction", | 
|  | 587 | h->transid, num_bytes, 1); | 
|  | 588 | h->block_rsv = &fs_info->trans_block_rsv; | 
|  | 589 | h->bytes_reserved = num_bytes; | 
|  | 590 | h->reloc_reserved = reloc_reserved; | 
|  | 591 | } | 
|  | 592 |  | 
|  | 593 | got_it: | 
|  | 594 | btrfs_record_root_in_trans(h, root); | 
|  | 595 |  | 
|  | 596 | if (!current->journal_info && type != TRANS_USERSPACE) | 
|  | 597 | current->journal_info = h; | 
|  | 598 | return h; | 
|  | 599 |  | 
|  | 600 | join_fail: | 
|  | 601 | if (type & __TRANS_FREEZABLE) | 
|  | 602 | sb_end_intwrite(fs_info->sb); | 
|  | 603 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 
|  | 604 | alloc_fail: | 
|  | 605 | if (num_bytes) | 
|  | 606 | btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv, | 
|  | 607 | num_bytes); | 
|  | 608 | reserve_fail: | 
|  | 609 | btrfs_qgroup_free_meta(root, qgroup_reserved); | 
|  | 610 | return ERR_PTR(ret); | 
|  | 611 | } | 
|  | 612 |  | 
|  | 613 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 
|  | 614 | unsigned int num_items) | 
|  | 615 | { | 
|  | 616 | return start_transaction(root, num_items, TRANS_START, | 
|  | 617 | BTRFS_RESERVE_FLUSH_ALL, true); | 
|  | 618 | } | 
|  | 619 |  | 
|  | 620 | struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( | 
|  | 621 | struct btrfs_root *root, | 
|  | 622 | unsigned int num_items, | 
|  | 623 | int min_factor) | 
|  | 624 | { | 
|  | 625 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 626 | struct btrfs_trans_handle *trans; | 
|  | 627 | u64 num_bytes; | 
|  | 628 | int ret; | 
|  | 629 |  | 
|  | 630 | /* | 
|  | 631 | * We have two callers: unlink and block group removal.  The | 
|  | 632 | * former should succeed even if we will temporarily exceed | 
|  | 633 | * quota and the latter operates on the extent root so | 
|  | 634 | * qgroup enforcement is ignored anyway. | 
|  | 635 | */ | 
|  | 636 | trans = start_transaction(root, num_items, TRANS_START, | 
|  | 637 | BTRFS_RESERVE_FLUSH_ALL, false); | 
|  | 638 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 
|  | 639 | return trans; | 
|  | 640 |  | 
|  | 641 | trans = btrfs_start_transaction(root, 0); | 
|  | 642 | if (IS_ERR(trans)) | 
|  | 643 | return trans; | 
|  | 644 |  | 
|  | 645 | num_bytes = btrfs_calc_trans_metadata_size(fs_info, num_items); | 
|  | 646 | ret = btrfs_cond_migrate_bytes(fs_info, &fs_info->trans_block_rsv, | 
|  | 647 | num_bytes, min_factor); | 
|  | 648 | if (ret) { | 
|  | 649 | btrfs_end_transaction(trans); | 
|  | 650 | return ERR_PTR(ret); | 
|  | 651 | } | 
|  | 652 |  | 
|  | 653 | trans->block_rsv = &fs_info->trans_block_rsv; | 
|  | 654 | trans->bytes_reserved = num_bytes; | 
|  | 655 | trace_btrfs_space_reservation(fs_info, "transaction", | 
|  | 656 | trans->transid, num_bytes, 1); | 
|  | 657 |  | 
|  | 658 | return trans; | 
|  | 659 | } | 
|  | 660 |  | 
|  | 661 | struct btrfs_trans_handle *btrfs_start_transaction_lflush( | 
|  | 662 | struct btrfs_root *root, | 
|  | 663 | unsigned int num_items) | 
|  | 664 | { | 
|  | 665 | return start_transaction(root, num_items, TRANS_START, | 
|  | 666 | BTRFS_RESERVE_FLUSH_LIMIT, true); | 
|  | 667 | } | 
|  | 668 |  | 
|  | 669 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) | 
|  | 670 | { | 
|  | 671 | return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH, | 
|  | 672 | true); | 
|  | 673 | } | 
|  | 674 |  | 
|  | 675 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) | 
|  | 676 | { | 
|  | 677 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK, | 
|  | 678 | BTRFS_RESERVE_NO_FLUSH, true); | 
|  | 679 | } | 
|  | 680 |  | 
|  | 681 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) | 
|  | 682 | { | 
|  | 683 | return start_transaction(root, 0, TRANS_USERSPACE, | 
|  | 684 | BTRFS_RESERVE_NO_FLUSH, true); | 
|  | 685 | } | 
|  | 686 |  | 
|  | 687 | /* | 
|  | 688 | * btrfs_attach_transaction() - catch the running transaction | 
|  | 689 | * | 
|  | 690 | * It is used when we want to commit the current the transaction, but | 
|  | 691 | * don't want to start a new one. | 
|  | 692 | * | 
|  | 693 | * Note: If this function return -ENOENT, it just means there is no | 
|  | 694 | * running transaction. But it is possible that the inactive transaction | 
|  | 695 | * is still in the memory, not fully on disk. If you hope there is no | 
|  | 696 | * inactive transaction in the fs when -ENOENT is returned, you should | 
|  | 697 | * invoke | 
|  | 698 | *     btrfs_attach_transaction_barrier() | 
|  | 699 | */ | 
|  | 700 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) | 
|  | 701 | { | 
|  | 702 | return start_transaction(root, 0, TRANS_ATTACH, | 
|  | 703 | BTRFS_RESERVE_NO_FLUSH, true); | 
|  | 704 | } | 
|  | 705 |  | 
|  | 706 | /* | 
|  | 707 | * btrfs_attach_transaction_barrier() - catch the running transaction | 
|  | 708 | * | 
|  | 709 | * It is similar to the above function, the differentia is this one | 
|  | 710 | * will wait for all the inactive transactions until they fully | 
|  | 711 | * complete. | 
|  | 712 | */ | 
|  | 713 | struct btrfs_trans_handle * | 
|  | 714 | btrfs_attach_transaction_barrier(struct btrfs_root *root) | 
|  | 715 | { | 
|  | 716 | struct btrfs_trans_handle *trans; | 
|  | 717 |  | 
|  | 718 | trans = start_transaction(root, 0, TRANS_ATTACH, | 
|  | 719 | BTRFS_RESERVE_NO_FLUSH, true); | 
|  | 720 | if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT) | 
|  | 721 | btrfs_wait_for_commit(root->fs_info, 0); | 
|  | 722 |  | 
|  | 723 | return trans; | 
|  | 724 | } | 
|  | 725 |  | 
|  | 726 | /* wait for a transaction commit to be fully complete */ | 
|  | 727 | static noinline void wait_for_commit(struct btrfs_transaction *commit) | 
|  | 728 | { | 
|  | 729 | wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED); | 
|  | 730 | } | 
|  | 731 |  | 
|  | 732 | int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid) | 
|  | 733 | { | 
|  | 734 | struct btrfs_transaction *cur_trans = NULL, *t; | 
|  | 735 | int ret = 0; | 
|  | 736 |  | 
|  | 737 | if (transid) { | 
|  | 738 | if (transid <= fs_info->last_trans_committed) | 
|  | 739 | goto out; | 
|  | 740 |  | 
|  | 741 | /* find specified transaction */ | 
|  | 742 | spin_lock(&fs_info->trans_lock); | 
|  | 743 | list_for_each_entry(t, &fs_info->trans_list, list) { | 
|  | 744 | if (t->transid == transid) { | 
|  | 745 | cur_trans = t; | 
|  | 746 | refcount_inc(&cur_trans->use_count); | 
|  | 747 | ret = 0; | 
|  | 748 | break; | 
|  | 749 | } | 
|  | 750 | if (t->transid > transid) { | 
|  | 751 | ret = 0; | 
|  | 752 | break; | 
|  | 753 | } | 
|  | 754 | } | 
|  | 755 | spin_unlock(&fs_info->trans_lock); | 
|  | 756 |  | 
|  | 757 | /* | 
|  | 758 | * The specified transaction doesn't exist, or we | 
|  | 759 | * raced with btrfs_commit_transaction | 
|  | 760 | */ | 
|  | 761 | if (!cur_trans) { | 
|  | 762 | if (transid > fs_info->last_trans_committed) | 
|  | 763 | ret = -EINVAL; | 
|  | 764 | goto out; | 
|  | 765 | } | 
|  | 766 | } else { | 
|  | 767 | /* find newest transaction that is committing | committed */ | 
|  | 768 | spin_lock(&fs_info->trans_lock); | 
|  | 769 | list_for_each_entry_reverse(t, &fs_info->trans_list, | 
|  | 770 | list) { | 
|  | 771 | if (t->state >= TRANS_STATE_COMMIT_START) { | 
|  | 772 | if (t->state == TRANS_STATE_COMPLETED) | 
|  | 773 | break; | 
|  | 774 | cur_trans = t; | 
|  | 775 | refcount_inc(&cur_trans->use_count); | 
|  | 776 | break; | 
|  | 777 | } | 
|  | 778 | } | 
|  | 779 | spin_unlock(&fs_info->trans_lock); | 
|  | 780 | if (!cur_trans) | 
|  | 781 | goto out;  /* nothing committing|committed */ | 
|  | 782 | } | 
|  | 783 |  | 
|  | 784 | wait_for_commit(cur_trans); | 
|  | 785 | btrfs_put_transaction(cur_trans); | 
|  | 786 | out: | 
|  | 787 | return ret; | 
|  | 788 | } | 
|  | 789 |  | 
|  | 790 | void btrfs_throttle(struct btrfs_fs_info *fs_info) | 
|  | 791 | { | 
|  | 792 | if (!atomic_read(&fs_info->open_ioctl_trans)) | 
|  | 793 | wait_current_trans(fs_info); | 
|  | 794 | } | 
|  | 795 |  | 
|  | 796 | static int should_end_transaction(struct btrfs_trans_handle *trans) | 
|  | 797 | { | 
|  | 798 | struct btrfs_fs_info *fs_info = trans->fs_info; | 
|  | 799 |  | 
|  | 800 | if (fs_info->global_block_rsv.space_info->full && | 
|  | 801 | btrfs_check_space_for_delayed_refs(trans, fs_info)) | 
|  | 802 | return 1; | 
|  | 803 |  | 
|  | 804 | return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5); | 
|  | 805 | } | 
|  | 806 |  | 
|  | 807 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans) | 
|  | 808 | { | 
|  | 809 | struct btrfs_transaction *cur_trans = trans->transaction; | 
|  | 810 | struct btrfs_fs_info *fs_info = trans->fs_info; | 
|  | 811 | int updates; | 
|  | 812 | int err; | 
|  | 813 |  | 
|  | 814 | smp_mb(); | 
|  | 815 | if (cur_trans->state >= TRANS_STATE_BLOCKED || | 
|  | 816 | cur_trans->delayed_refs.flushing) | 
|  | 817 | return 1; | 
|  | 818 |  | 
|  | 819 | updates = trans->delayed_ref_updates; | 
|  | 820 | trans->delayed_ref_updates = 0; | 
|  | 821 | if (updates) { | 
|  | 822 | err = btrfs_run_delayed_refs(trans, fs_info, updates * 2); | 
|  | 823 | if (err) /* Error code will also eval true */ | 
|  | 824 | return err; | 
|  | 825 | } | 
|  | 826 |  | 
|  | 827 | return should_end_transaction(trans); | 
|  | 828 | } | 
|  | 829 |  | 
|  | 830 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 
|  | 831 | int throttle) | 
|  | 832 | { | 
|  | 833 | struct btrfs_fs_info *info = trans->fs_info; | 
|  | 834 | struct btrfs_transaction *cur_trans = trans->transaction; | 
|  | 835 | u64 transid = trans->transid; | 
|  | 836 | unsigned long cur = trans->delayed_ref_updates; | 
|  | 837 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 
|  | 838 | int err = 0; | 
|  | 839 | int must_run_delayed_refs = 0; | 
|  | 840 |  | 
|  | 841 | if (trans->use_count > 1) { | 
|  | 842 | trans->use_count--; | 
|  | 843 | trans->block_rsv = trans->orig_rsv; | 
|  | 844 | return 0; | 
|  | 845 | } | 
|  | 846 |  | 
|  | 847 | btrfs_trans_release_metadata(trans, info); | 
|  | 848 | trans->block_rsv = NULL; | 
|  | 849 |  | 
|  | 850 | if (!list_empty(&trans->new_bgs)) | 
|  | 851 | btrfs_create_pending_block_groups(trans, info); | 
|  | 852 |  | 
|  | 853 | trans->delayed_ref_updates = 0; | 
|  | 854 | if (!trans->sync) { | 
|  | 855 | must_run_delayed_refs = | 
|  | 856 | btrfs_should_throttle_delayed_refs(trans, info); | 
|  | 857 | cur = max_t(unsigned long, cur, 32); | 
|  | 858 |  | 
|  | 859 | /* | 
|  | 860 | * don't make the caller wait if they are from a NOLOCK | 
|  | 861 | * or ATTACH transaction, it will deadlock with commit | 
|  | 862 | */ | 
|  | 863 | if (must_run_delayed_refs == 1 && | 
|  | 864 | (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH))) | 
|  | 865 | must_run_delayed_refs = 2; | 
|  | 866 | } | 
|  | 867 |  | 
|  | 868 | btrfs_trans_release_metadata(trans, info); | 
|  | 869 | trans->block_rsv = NULL; | 
|  | 870 |  | 
|  | 871 | if (!list_empty(&trans->new_bgs)) | 
|  | 872 | btrfs_create_pending_block_groups(trans, info); | 
|  | 873 |  | 
|  | 874 | btrfs_trans_release_chunk_metadata(trans); | 
|  | 875 |  | 
|  | 876 | if (lock && !atomic_read(&info->open_ioctl_trans) && | 
|  | 877 | should_end_transaction(trans) && | 
|  | 878 | READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { | 
|  | 879 | spin_lock(&info->trans_lock); | 
|  | 880 | if (cur_trans->state == TRANS_STATE_RUNNING) | 
|  | 881 | cur_trans->state = TRANS_STATE_BLOCKED; | 
|  | 882 | spin_unlock(&info->trans_lock); | 
|  | 883 | } | 
|  | 884 |  | 
|  | 885 | if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { | 
|  | 886 | if (throttle) | 
|  | 887 | return btrfs_commit_transaction(trans); | 
|  | 888 | else | 
|  | 889 | wake_up_process(info->transaction_kthread); | 
|  | 890 | } | 
|  | 891 |  | 
|  | 892 | if (trans->type & __TRANS_FREEZABLE) | 
|  | 893 | sb_end_intwrite(info->sb); | 
|  | 894 |  | 
|  | 895 | WARN_ON(cur_trans != info->running_transaction); | 
|  | 896 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); | 
|  | 897 | atomic_dec(&cur_trans->num_writers); | 
|  | 898 | extwriter_counter_dec(cur_trans, trans->type); | 
|  | 899 |  | 
|  | 900 | /* | 
|  | 901 | * Make sure counter is updated before we wake up waiters. | 
|  | 902 | */ | 
|  | 903 | smp_mb(); | 
|  | 904 | if (waitqueue_active(&cur_trans->writer_wait)) | 
|  | 905 | wake_up(&cur_trans->writer_wait); | 
|  | 906 | btrfs_put_transaction(cur_trans); | 
|  | 907 |  | 
|  | 908 | if (current->journal_info == trans) | 
|  | 909 | current->journal_info = NULL; | 
|  | 910 |  | 
|  | 911 | if (throttle) | 
|  | 912 | btrfs_run_delayed_iputs(info); | 
|  | 913 |  | 
|  | 914 | if (trans->aborted || | 
|  | 915 | test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) { | 
|  | 916 | wake_up_process(info->transaction_kthread); | 
|  | 917 | err = -EIO; | 
|  | 918 | } | 
|  | 919 |  | 
|  | 920 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 
|  | 921 | if (must_run_delayed_refs) { | 
|  | 922 | btrfs_async_run_delayed_refs(info, cur, transid, | 
|  | 923 | must_run_delayed_refs == 1); | 
|  | 924 | } | 
|  | 925 | return err; | 
|  | 926 | } | 
|  | 927 |  | 
|  | 928 | int btrfs_end_transaction(struct btrfs_trans_handle *trans) | 
|  | 929 | { | 
|  | 930 | return __btrfs_end_transaction(trans, 0); | 
|  | 931 | } | 
|  | 932 |  | 
|  | 933 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans) | 
|  | 934 | { | 
|  | 935 | return __btrfs_end_transaction(trans, 1); | 
|  | 936 | } | 
|  | 937 |  | 
|  | 938 | /* | 
|  | 939 | * when btree blocks are allocated, they have some corresponding bits set for | 
|  | 940 | * them in one of two extent_io trees.  This is used to make sure all of | 
|  | 941 | * those extents are sent to disk but does not wait on them | 
|  | 942 | */ | 
|  | 943 | int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info, | 
|  | 944 | struct extent_io_tree *dirty_pages, int mark) | 
|  | 945 | { | 
|  | 946 | int err = 0; | 
|  | 947 | int werr = 0; | 
|  | 948 | struct address_space *mapping = fs_info->btree_inode->i_mapping; | 
|  | 949 | struct extent_state *cached_state = NULL; | 
|  | 950 | u64 start = 0; | 
|  | 951 | u64 end; | 
|  | 952 |  | 
|  | 953 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 
|  | 954 | mark, &cached_state)) { | 
|  | 955 | bool wait_writeback = false; | 
|  | 956 |  | 
|  | 957 | err = convert_extent_bit(dirty_pages, start, end, | 
|  | 958 | EXTENT_NEED_WAIT, | 
|  | 959 | mark, &cached_state); | 
|  | 960 | /* | 
|  | 961 | * convert_extent_bit can return -ENOMEM, which is most of the | 
|  | 962 | * time a temporary error. So when it happens, ignore the error | 
|  | 963 | * and wait for writeback of this range to finish - because we | 
|  | 964 | * failed to set the bit EXTENT_NEED_WAIT for the range, a call | 
|  | 965 | * to __btrfs_wait_marked_extents() would not know that | 
|  | 966 | * writeback for this range started and therefore wouldn't | 
|  | 967 | * wait for it to finish - we don't want to commit a | 
|  | 968 | * superblock that points to btree nodes/leafs for which | 
|  | 969 | * writeback hasn't finished yet (and without errors). | 
|  | 970 | * We cleanup any entries left in the io tree when committing | 
|  | 971 | * the transaction (through clear_btree_io_tree()). | 
|  | 972 | */ | 
|  | 973 | if (err == -ENOMEM) { | 
|  | 974 | err = 0; | 
|  | 975 | wait_writeback = true; | 
|  | 976 | } | 
|  | 977 | if (!err) | 
|  | 978 | err = filemap_fdatawrite_range(mapping, start, end); | 
|  | 979 | if (err) | 
|  | 980 | werr = err; | 
|  | 981 | else if (wait_writeback) | 
|  | 982 | werr = filemap_fdatawait_range(mapping, start, end); | 
|  | 983 | free_extent_state(cached_state); | 
|  | 984 | cached_state = NULL; | 
|  | 985 | cond_resched(); | 
|  | 986 | start = end + 1; | 
|  | 987 | } | 
|  | 988 | return werr; | 
|  | 989 | } | 
|  | 990 |  | 
|  | 991 | /* | 
|  | 992 | * when btree blocks are allocated, they have some corresponding bits set for | 
|  | 993 | * them in one of two extent_io trees.  This is used to make sure all of | 
|  | 994 | * those extents are on disk for transaction or log commit.  We wait | 
|  | 995 | * on all the pages and clear them from the dirty pages state tree | 
|  | 996 | */ | 
|  | 997 | static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info, | 
|  | 998 | struct extent_io_tree *dirty_pages) | 
|  | 999 | { | 
|  | 1000 | int err = 0; | 
|  | 1001 | int werr = 0; | 
|  | 1002 | struct address_space *mapping = fs_info->btree_inode->i_mapping; | 
|  | 1003 | struct extent_state *cached_state = NULL; | 
|  | 1004 | u64 start = 0; | 
|  | 1005 | u64 end; | 
|  | 1006 |  | 
|  | 1007 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 
|  | 1008 | EXTENT_NEED_WAIT, &cached_state)) { | 
|  | 1009 | /* | 
|  | 1010 | * Ignore -ENOMEM errors returned by clear_extent_bit(). | 
|  | 1011 | * When committing the transaction, we'll remove any entries | 
|  | 1012 | * left in the io tree. For a log commit, we don't remove them | 
|  | 1013 | * after committing the log because the tree can be accessed | 
|  | 1014 | * concurrently - we do it only at transaction commit time when | 
|  | 1015 | * it's safe to do it (through clear_btree_io_tree()). | 
|  | 1016 | */ | 
|  | 1017 | err = clear_extent_bit(dirty_pages, start, end, | 
|  | 1018 | EXTENT_NEED_WAIT, | 
|  | 1019 | 0, 0, &cached_state, GFP_NOFS); | 
|  | 1020 | if (err == -ENOMEM) | 
|  | 1021 | err = 0; | 
|  | 1022 | if (!err) | 
|  | 1023 | err = filemap_fdatawait_range(mapping, start, end); | 
|  | 1024 | if (err) | 
|  | 1025 | werr = err; | 
|  | 1026 | free_extent_state(cached_state); | 
|  | 1027 | cached_state = NULL; | 
|  | 1028 | cond_resched(); | 
|  | 1029 | start = end + 1; | 
|  | 1030 | } | 
|  | 1031 | if (err) | 
|  | 1032 | werr = err; | 
|  | 1033 | return werr; | 
|  | 1034 | } | 
|  | 1035 |  | 
|  | 1036 | int btrfs_wait_extents(struct btrfs_fs_info *fs_info, | 
|  | 1037 | struct extent_io_tree *dirty_pages) | 
|  | 1038 | { | 
|  | 1039 | bool errors = false; | 
|  | 1040 | int err; | 
|  | 1041 |  | 
|  | 1042 | err = __btrfs_wait_marked_extents(fs_info, dirty_pages); | 
|  | 1043 | if (test_and_clear_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags)) | 
|  | 1044 | errors = true; | 
|  | 1045 |  | 
|  | 1046 | if (errors && !err) | 
|  | 1047 | err = -EIO; | 
|  | 1048 | return err; | 
|  | 1049 | } | 
|  | 1050 |  | 
|  | 1051 | int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark) | 
|  | 1052 | { | 
|  | 1053 | struct btrfs_fs_info *fs_info = log_root->fs_info; | 
|  | 1054 | struct extent_io_tree *dirty_pages = &log_root->dirty_log_pages; | 
|  | 1055 | bool errors = false; | 
|  | 1056 | int err; | 
|  | 1057 |  | 
|  | 1058 | ASSERT(log_root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); | 
|  | 1059 |  | 
|  | 1060 | err = __btrfs_wait_marked_extents(fs_info, dirty_pages); | 
|  | 1061 | if ((mark & EXTENT_DIRTY) && | 
|  | 1062 | test_and_clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags)) | 
|  | 1063 | errors = true; | 
|  | 1064 |  | 
|  | 1065 | if ((mark & EXTENT_NEW) && | 
|  | 1066 | test_and_clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags)) | 
|  | 1067 | errors = true; | 
|  | 1068 |  | 
|  | 1069 | if (errors && !err) | 
|  | 1070 | err = -EIO; | 
|  | 1071 | return err; | 
|  | 1072 | } | 
|  | 1073 |  | 
|  | 1074 | /* | 
|  | 1075 | * when btree blocks are allocated, they have some corresponding bits set for | 
|  | 1076 | * them in one of two extent_io trees.  This is used to make sure all of | 
|  | 1077 | * those extents are on disk for transaction or log commit | 
|  | 1078 | */ | 
|  | 1079 | static int btrfs_write_and_wait_marked_extents(struct btrfs_fs_info *fs_info, | 
|  | 1080 | struct extent_io_tree *dirty_pages, int mark) | 
|  | 1081 | { | 
|  | 1082 | int ret; | 
|  | 1083 | int ret2; | 
|  | 1084 | struct blk_plug plug; | 
|  | 1085 |  | 
|  | 1086 | blk_start_plug(&plug); | 
|  | 1087 | ret = btrfs_write_marked_extents(fs_info, dirty_pages, mark); | 
|  | 1088 | blk_finish_plug(&plug); | 
|  | 1089 | ret2 = btrfs_wait_extents(fs_info, dirty_pages); | 
|  | 1090 |  | 
|  | 1091 | if (ret) | 
|  | 1092 | return ret; | 
|  | 1093 | if (ret2) | 
|  | 1094 | return ret2; | 
|  | 1095 | return 0; | 
|  | 1096 | } | 
|  | 1097 |  | 
|  | 1098 | static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 
|  | 1099 | struct btrfs_fs_info *fs_info) | 
|  | 1100 | { | 
|  | 1101 | int ret; | 
|  | 1102 |  | 
|  | 1103 | ret = btrfs_write_and_wait_marked_extents(fs_info, | 
|  | 1104 | &trans->transaction->dirty_pages, | 
|  | 1105 | EXTENT_DIRTY); | 
|  | 1106 | clear_btree_io_tree(&trans->transaction->dirty_pages); | 
|  | 1107 |  | 
|  | 1108 | return ret; | 
|  | 1109 | } | 
|  | 1110 |  | 
|  | 1111 | /* | 
|  | 1112 | * this is used to update the root pointer in the tree of tree roots. | 
|  | 1113 | * | 
|  | 1114 | * But, in the case of the extent allocation tree, updating the root | 
|  | 1115 | * pointer may allocate blocks which may change the root of the extent | 
|  | 1116 | * allocation tree. | 
|  | 1117 | * | 
|  | 1118 | * So, this loops and repeats and makes sure the cowonly root didn't | 
|  | 1119 | * change while the root pointer was being updated in the metadata. | 
|  | 1120 | */ | 
|  | 1121 | static int update_cowonly_root(struct btrfs_trans_handle *trans, | 
|  | 1122 | struct btrfs_root *root) | 
|  | 1123 | { | 
|  | 1124 | int ret; | 
|  | 1125 | u64 old_root_bytenr; | 
|  | 1126 | u64 old_root_used; | 
|  | 1127 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 1128 | struct btrfs_root *tree_root = fs_info->tree_root; | 
|  | 1129 |  | 
|  | 1130 | old_root_used = btrfs_root_used(&root->root_item); | 
|  | 1131 |  | 
|  | 1132 | while (1) { | 
|  | 1133 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 
|  | 1134 | if (old_root_bytenr == root->node->start && | 
|  | 1135 | old_root_used == btrfs_root_used(&root->root_item)) | 
|  | 1136 | break; | 
|  | 1137 |  | 
|  | 1138 | btrfs_set_root_node(&root->root_item, root->node); | 
|  | 1139 | ret = btrfs_update_root(trans, tree_root, | 
|  | 1140 | &root->root_key, | 
|  | 1141 | &root->root_item); | 
|  | 1142 | if (ret) | 
|  | 1143 | return ret; | 
|  | 1144 |  | 
|  | 1145 | old_root_used = btrfs_root_used(&root->root_item); | 
|  | 1146 | } | 
|  | 1147 |  | 
|  | 1148 | return 0; | 
|  | 1149 | } | 
|  | 1150 |  | 
|  | 1151 | /* | 
|  | 1152 | * update all the cowonly tree roots on disk | 
|  | 1153 | * | 
|  | 1154 | * The error handling in this function may not be obvious. Any of the | 
|  | 1155 | * failures will cause the file system to go offline. We still need | 
|  | 1156 | * to clean up the delayed refs. | 
|  | 1157 | */ | 
|  | 1158 | static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | 
|  | 1159 | struct btrfs_fs_info *fs_info) | 
|  | 1160 | { | 
|  | 1161 | struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; | 
|  | 1162 | struct list_head *io_bgs = &trans->transaction->io_bgs; | 
|  | 1163 | struct list_head *next; | 
|  | 1164 | struct extent_buffer *eb; | 
|  | 1165 | int ret; | 
|  | 1166 |  | 
|  | 1167 | eb = btrfs_lock_root_node(fs_info->tree_root); | 
|  | 1168 | ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, | 
|  | 1169 | 0, &eb); | 
|  | 1170 | btrfs_tree_unlock(eb); | 
|  | 1171 | free_extent_buffer(eb); | 
|  | 1172 |  | 
|  | 1173 | if (ret) | 
|  | 1174 | return ret; | 
|  | 1175 |  | 
|  | 1176 | ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); | 
|  | 1177 | if (ret) | 
|  | 1178 | return ret; | 
|  | 1179 |  | 
|  | 1180 | ret = btrfs_run_dev_stats(trans, fs_info); | 
|  | 1181 | if (ret) | 
|  | 1182 | return ret; | 
|  | 1183 | ret = btrfs_run_dev_replace(trans, fs_info); | 
|  | 1184 | if (ret) | 
|  | 1185 | return ret; | 
|  | 1186 | ret = btrfs_run_qgroups(trans, fs_info); | 
|  | 1187 | if (ret) | 
|  | 1188 | return ret; | 
|  | 1189 |  | 
|  | 1190 | ret = btrfs_setup_space_cache(trans, fs_info); | 
|  | 1191 | if (ret) | 
|  | 1192 | return ret; | 
|  | 1193 |  | 
|  | 1194 | /* run_qgroups might have added some more refs */ | 
|  | 1195 | ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); | 
|  | 1196 | if (ret) | 
|  | 1197 | return ret; | 
|  | 1198 | again: | 
|  | 1199 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 
|  | 1200 | struct btrfs_root *root; | 
|  | 1201 | next = fs_info->dirty_cowonly_roots.next; | 
|  | 1202 | list_del_init(next); | 
|  | 1203 | root = list_entry(next, struct btrfs_root, dirty_list); | 
|  | 1204 | clear_bit(BTRFS_ROOT_DIRTY, &root->state); | 
|  | 1205 |  | 
|  | 1206 | if (root != fs_info->extent_root) | 
|  | 1207 | list_add_tail(&root->dirty_list, | 
|  | 1208 | &trans->transaction->switch_commits); | 
|  | 1209 | ret = update_cowonly_root(trans, root); | 
|  | 1210 | if (ret) | 
|  | 1211 | return ret; | 
|  | 1212 | ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); | 
|  | 1213 | if (ret) | 
|  | 1214 | return ret; | 
|  | 1215 | } | 
|  | 1216 |  | 
|  | 1217 | while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) { | 
|  | 1218 | ret = btrfs_write_dirty_block_groups(trans, fs_info); | 
|  | 1219 | if (ret) | 
|  | 1220 | return ret; | 
|  | 1221 | ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); | 
|  | 1222 | if (ret) | 
|  | 1223 | return ret; | 
|  | 1224 | } | 
|  | 1225 |  | 
|  | 1226 | if (!list_empty(&fs_info->dirty_cowonly_roots)) | 
|  | 1227 | goto again; | 
|  | 1228 |  | 
|  | 1229 | list_add_tail(&fs_info->extent_root->dirty_list, | 
|  | 1230 | &trans->transaction->switch_commits); | 
|  | 1231 | btrfs_after_dev_replace_commit(fs_info); | 
|  | 1232 |  | 
|  | 1233 | return 0; | 
|  | 1234 | } | 
|  | 1235 |  | 
|  | 1236 | /* | 
|  | 1237 | * dead roots are old snapshots that need to be deleted.  This allocates | 
|  | 1238 | * a dirty root struct and adds it into the list of dead roots that need to | 
|  | 1239 | * be deleted | 
|  | 1240 | */ | 
|  | 1241 | void btrfs_add_dead_root(struct btrfs_root *root) | 
|  | 1242 | { | 
|  | 1243 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 1244 |  | 
|  | 1245 | spin_lock(&fs_info->trans_lock); | 
|  | 1246 | if (list_empty(&root->root_list)) | 
|  | 1247 | list_add_tail(&root->root_list, &fs_info->dead_roots); | 
|  | 1248 | spin_unlock(&fs_info->trans_lock); | 
|  | 1249 | } | 
|  | 1250 |  | 
|  | 1251 | /* | 
|  | 1252 | * update all the cowonly tree roots on disk | 
|  | 1253 | */ | 
|  | 1254 | static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | 
|  | 1255 | struct btrfs_fs_info *fs_info) | 
|  | 1256 | { | 
|  | 1257 | struct btrfs_root *gang[8]; | 
|  | 1258 | int i; | 
|  | 1259 | int ret; | 
|  | 1260 | int err = 0; | 
|  | 1261 |  | 
|  | 1262 | spin_lock(&fs_info->fs_roots_radix_lock); | 
|  | 1263 | while (1) { | 
|  | 1264 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, | 
|  | 1265 | (void **)gang, 0, | 
|  | 1266 | ARRAY_SIZE(gang), | 
|  | 1267 | BTRFS_ROOT_TRANS_TAG); | 
|  | 1268 | if (ret == 0) | 
|  | 1269 | break; | 
|  | 1270 | for (i = 0; i < ret; i++) { | 
|  | 1271 | struct btrfs_root *root = gang[i]; | 
|  | 1272 | radix_tree_tag_clear(&fs_info->fs_roots_radix, | 
|  | 1273 | (unsigned long)root->root_key.objectid, | 
|  | 1274 | BTRFS_ROOT_TRANS_TAG); | 
|  | 1275 | spin_unlock(&fs_info->fs_roots_radix_lock); | 
|  | 1276 |  | 
|  | 1277 | btrfs_free_log(trans, root); | 
|  | 1278 | btrfs_update_reloc_root(trans, root); | 
|  | 1279 | btrfs_orphan_commit_root(trans, root); | 
|  | 1280 |  | 
|  | 1281 | btrfs_save_ino_cache(root, trans); | 
|  | 1282 |  | 
|  | 1283 | /* see comments in should_cow_block() */ | 
|  | 1284 | clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); | 
|  | 1285 | smp_mb__after_atomic(); | 
|  | 1286 |  | 
|  | 1287 | if (root->commit_root != root->node) { | 
|  | 1288 | list_add_tail(&root->dirty_list, | 
|  | 1289 | &trans->transaction->switch_commits); | 
|  | 1290 | btrfs_set_root_node(&root->root_item, | 
|  | 1291 | root->node); | 
|  | 1292 | } | 
|  | 1293 |  | 
|  | 1294 | err = btrfs_update_root(trans, fs_info->tree_root, | 
|  | 1295 | &root->root_key, | 
|  | 1296 | &root->root_item); | 
|  | 1297 | spin_lock(&fs_info->fs_roots_radix_lock); | 
|  | 1298 | if (err) | 
|  | 1299 | break; | 
|  | 1300 | btrfs_qgroup_free_meta_all(root); | 
|  | 1301 | } | 
|  | 1302 | } | 
|  | 1303 | spin_unlock(&fs_info->fs_roots_radix_lock); | 
|  | 1304 | return err; | 
|  | 1305 | } | 
|  | 1306 |  | 
|  | 1307 | /* | 
|  | 1308 | * defrag a given btree. | 
|  | 1309 | * Every leaf in the btree is read and defragged. | 
|  | 1310 | */ | 
|  | 1311 | int btrfs_defrag_root(struct btrfs_root *root) | 
|  | 1312 | { | 
|  | 1313 | struct btrfs_fs_info *info = root->fs_info; | 
|  | 1314 | struct btrfs_trans_handle *trans; | 
|  | 1315 | int ret; | 
|  | 1316 |  | 
|  | 1317 | if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state)) | 
|  | 1318 | return 0; | 
|  | 1319 |  | 
|  | 1320 | while (1) { | 
|  | 1321 | trans = btrfs_start_transaction(root, 0); | 
|  | 1322 | if (IS_ERR(trans)) | 
|  | 1323 | return PTR_ERR(trans); | 
|  | 1324 |  | 
|  | 1325 | ret = btrfs_defrag_leaves(trans, root); | 
|  | 1326 |  | 
|  | 1327 | btrfs_end_transaction(trans); | 
|  | 1328 | btrfs_btree_balance_dirty(info); | 
|  | 1329 | cond_resched(); | 
|  | 1330 |  | 
|  | 1331 | if (btrfs_fs_closing(info) || ret != -EAGAIN) | 
|  | 1332 | break; | 
|  | 1333 |  | 
|  | 1334 | if (btrfs_defrag_cancelled(info)) { | 
|  | 1335 | btrfs_debug(info, "defrag_root cancelled"); | 
|  | 1336 | ret = -EAGAIN; | 
|  | 1337 | break; | 
|  | 1338 | } | 
|  | 1339 | } | 
|  | 1340 | clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state); | 
|  | 1341 | return ret; | 
|  | 1342 | } | 
|  | 1343 |  | 
|  | 1344 | /* | 
|  | 1345 | * Do all special snapshot related qgroup dirty hack. | 
|  | 1346 | * | 
|  | 1347 | * Will do all needed qgroup inherit and dirty hack like switch commit | 
|  | 1348 | * roots inside one transaction and write all btree into disk, to make | 
|  | 1349 | * qgroup works. | 
|  | 1350 | */ | 
|  | 1351 | static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, | 
|  | 1352 | struct btrfs_root *src, | 
|  | 1353 | struct btrfs_root *parent, | 
|  | 1354 | struct btrfs_qgroup_inherit *inherit, | 
|  | 1355 | u64 dst_objectid) | 
|  | 1356 | { | 
|  | 1357 | struct btrfs_fs_info *fs_info = src->fs_info; | 
|  | 1358 | int ret; | 
|  | 1359 |  | 
|  | 1360 | /* | 
|  | 1361 | * Save some performance in the case that qgroups are not | 
|  | 1362 | * enabled. If this check races with the ioctl, rescan will | 
|  | 1363 | * kick in anyway. | 
|  | 1364 | */ | 
|  | 1365 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) | 
|  | 1366 | return 0; | 
|  | 1367 |  | 
|  | 1368 | /* | 
|  | 1369 | * Ensure dirty @src will be commited.  Or, after comming | 
|  | 1370 | * commit_fs_roots() and switch_commit_roots(), any dirty but not | 
|  | 1371 | * recorded root will never be updated again, causing an outdated root | 
|  | 1372 | * item. | 
|  | 1373 | */ | 
|  | 1374 | record_root_in_trans(trans, src, 1); | 
|  | 1375 |  | 
|  | 1376 | /* | 
|  | 1377 | * We are going to commit transaction, see btrfs_commit_transaction() | 
|  | 1378 | * comment for reason locking tree_log_mutex | 
|  | 1379 | */ | 
|  | 1380 | mutex_lock(&fs_info->tree_log_mutex); | 
|  | 1381 |  | 
|  | 1382 | ret = commit_fs_roots(trans, fs_info); | 
|  | 1383 | if (ret) | 
|  | 1384 | goto out; | 
|  | 1385 | ret = btrfs_qgroup_account_extents(trans, fs_info); | 
|  | 1386 | if (ret < 0) | 
|  | 1387 | goto out; | 
|  | 1388 |  | 
|  | 1389 | /* Now qgroup are all updated, we can inherit it to new qgroups */ | 
|  | 1390 | ret = btrfs_qgroup_inherit(trans, fs_info, | 
|  | 1391 | src->root_key.objectid, dst_objectid, | 
|  | 1392 | inherit); | 
|  | 1393 | if (ret < 0) | 
|  | 1394 | goto out; | 
|  | 1395 |  | 
|  | 1396 | /* | 
|  | 1397 | * Now we do a simplified commit transaction, which will: | 
|  | 1398 | * 1) commit all subvolume and extent tree | 
|  | 1399 | *    To ensure all subvolume and extent tree have a valid | 
|  | 1400 | *    commit_root to accounting later insert_dir_item() | 
|  | 1401 | * 2) write all btree blocks onto disk | 
|  | 1402 | *    This is to make sure later btree modification will be cowed | 
|  | 1403 | *    Or commit_root can be populated and cause wrong qgroup numbers | 
|  | 1404 | * In this simplified commit, we don't really care about other trees | 
|  | 1405 | * like chunk and root tree, as they won't affect qgroup. | 
|  | 1406 | * And we don't write super to avoid half committed status. | 
|  | 1407 | */ | 
|  | 1408 | ret = commit_cowonly_roots(trans, fs_info); | 
|  | 1409 | if (ret) | 
|  | 1410 | goto out; | 
|  | 1411 | switch_commit_roots(trans->transaction, fs_info); | 
|  | 1412 | ret = btrfs_write_and_wait_transaction(trans, fs_info); | 
|  | 1413 | if (ret) | 
|  | 1414 | btrfs_handle_fs_error(fs_info, ret, | 
|  | 1415 | "Error while writing out transaction for qgroup"); | 
|  | 1416 |  | 
|  | 1417 | out: | 
|  | 1418 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 1419 |  | 
|  | 1420 | /* | 
|  | 1421 | * Force parent root to be updated, as we recorded it before so its | 
|  | 1422 | * last_trans == cur_transid. | 
|  | 1423 | * Or it won't be committed again onto disk after later | 
|  | 1424 | * insert_dir_item() | 
|  | 1425 | */ | 
|  | 1426 | if (!ret) | 
|  | 1427 | record_root_in_trans(trans, parent, 1); | 
|  | 1428 | return ret; | 
|  | 1429 | } | 
|  | 1430 |  | 
|  | 1431 | /* | 
|  | 1432 | * new snapshots need to be created at a very specific time in the | 
|  | 1433 | * transaction commit.  This does the actual creation. | 
|  | 1434 | * | 
|  | 1435 | * Note: | 
|  | 1436 | * If the error which may affect the commitment of the current transaction | 
|  | 1437 | * happens, we should return the error number. If the error which just affect | 
|  | 1438 | * the creation of the pending snapshots, just return 0. | 
|  | 1439 | */ | 
|  | 1440 | static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | 
|  | 1441 | struct btrfs_fs_info *fs_info, | 
|  | 1442 | struct btrfs_pending_snapshot *pending) | 
|  | 1443 | { | 
|  | 1444 | struct btrfs_key key; | 
|  | 1445 | struct btrfs_root_item *new_root_item; | 
|  | 1446 | struct btrfs_root *tree_root = fs_info->tree_root; | 
|  | 1447 | struct btrfs_root *root = pending->root; | 
|  | 1448 | struct btrfs_root *parent_root; | 
|  | 1449 | struct btrfs_block_rsv *rsv; | 
|  | 1450 | struct inode *parent_inode; | 
|  | 1451 | struct btrfs_path *path; | 
|  | 1452 | struct btrfs_dir_item *dir_item; | 
|  | 1453 | struct dentry *dentry; | 
|  | 1454 | struct extent_buffer *tmp; | 
|  | 1455 | struct extent_buffer *old; | 
|  | 1456 | struct timespec cur_time; | 
|  | 1457 | int ret = 0; | 
|  | 1458 | u64 to_reserve = 0; | 
|  | 1459 | u64 index = 0; | 
|  | 1460 | u64 objectid; | 
|  | 1461 | u64 root_flags; | 
|  | 1462 | uuid_le new_uuid; | 
|  | 1463 |  | 
|  | 1464 | ASSERT(pending->path); | 
|  | 1465 | path = pending->path; | 
|  | 1466 |  | 
|  | 1467 | ASSERT(pending->root_item); | 
|  | 1468 | new_root_item = pending->root_item; | 
|  | 1469 |  | 
|  | 1470 | pending->error = btrfs_find_free_objectid(tree_root, &objectid); | 
|  | 1471 | if (pending->error) | 
|  | 1472 | goto no_free_objectid; | 
|  | 1473 |  | 
|  | 1474 | /* | 
|  | 1475 | * Make qgroup to skip current new snapshot's qgroupid, as it is | 
|  | 1476 | * accounted by later btrfs_qgroup_inherit(). | 
|  | 1477 | */ | 
|  | 1478 | btrfs_set_skip_qgroup(trans, objectid); | 
|  | 1479 |  | 
|  | 1480 | btrfs_reloc_pre_snapshot(pending, &to_reserve); | 
|  | 1481 |  | 
|  | 1482 | if (to_reserve > 0) { | 
|  | 1483 | pending->error = btrfs_block_rsv_add(root, | 
|  | 1484 | &pending->block_rsv, | 
|  | 1485 | to_reserve, | 
|  | 1486 | BTRFS_RESERVE_NO_FLUSH); | 
|  | 1487 | if (pending->error) | 
|  | 1488 | goto clear_skip_qgroup; | 
|  | 1489 | } | 
|  | 1490 |  | 
|  | 1491 | key.objectid = objectid; | 
|  | 1492 | key.offset = (u64)-1; | 
|  | 1493 | key.type = BTRFS_ROOT_ITEM_KEY; | 
|  | 1494 |  | 
|  | 1495 | rsv = trans->block_rsv; | 
|  | 1496 | trans->block_rsv = &pending->block_rsv; | 
|  | 1497 | trans->bytes_reserved = trans->block_rsv->reserved; | 
|  | 1498 | trace_btrfs_space_reservation(fs_info, "transaction", | 
|  | 1499 | trans->transid, | 
|  | 1500 | trans->bytes_reserved, 1); | 
|  | 1501 | dentry = pending->dentry; | 
|  | 1502 | parent_inode = pending->dir; | 
|  | 1503 | parent_root = BTRFS_I(parent_inode)->root; | 
|  | 1504 | record_root_in_trans(trans, parent_root, 0); | 
|  | 1505 |  | 
|  | 1506 | cur_time = current_time(parent_inode); | 
|  | 1507 |  | 
|  | 1508 | /* | 
|  | 1509 | * insert the directory item | 
|  | 1510 | */ | 
|  | 1511 | ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index); | 
|  | 1512 | BUG_ON(ret); /* -ENOMEM */ | 
|  | 1513 |  | 
|  | 1514 | /* check if there is a file/dir which has the same name. */ | 
|  | 1515 | dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, | 
|  | 1516 | btrfs_ino(BTRFS_I(parent_inode)), | 
|  | 1517 | dentry->d_name.name, | 
|  | 1518 | dentry->d_name.len, 0); | 
|  | 1519 | if (dir_item != NULL && !IS_ERR(dir_item)) { | 
|  | 1520 | pending->error = -EEXIST; | 
|  | 1521 | goto dir_item_existed; | 
|  | 1522 | } else if (IS_ERR(dir_item)) { | 
|  | 1523 | ret = PTR_ERR(dir_item); | 
|  | 1524 | btrfs_abort_transaction(trans, ret); | 
|  | 1525 | goto fail; | 
|  | 1526 | } | 
|  | 1527 | btrfs_release_path(path); | 
|  | 1528 |  | 
|  | 1529 | /* | 
|  | 1530 | * pull in the delayed directory update | 
|  | 1531 | * and the delayed inode item | 
|  | 1532 | * otherwise we corrupt the FS during | 
|  | 1533 | * snapshot | 
|  | 1534 | */ | 
|  | 1535 | ret = btrfs_run_delayed_items(trans, fs_info); | 
|  | 1536 | if (ret) {	/* Transaction aborted */ | 
|  | 1537 | btrfs_abort_transaction(trans, ret); | 
|  | 1538 | goto fail; | 
|  | 1539 | } | 
|  | 1540 |  | 
|  | 1541 | record_root_in_trans(trans, root, 0); | 
|  | 1542 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 
|  | 1543 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 
|  | 1544 | btrfs_check_and_init_root_item(new_root_item); | 
|  | 1545 |  | 
|  | 1546 | root_flags = btrfs_root_flags(new_root_item); | 
|  | 1547 | if (pending->readonly) | 
|  | 1548 | root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; | 
|  | 1549 | else | 
|  | 1550 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | 
|  | 1551 | btrfs_set_root_flags(new_root_item, root_flags); | 
|  | 1552 |  | 
|  | 1553 | btrfs_set_root_generation_v2(new_root_item, | 
|  | 1554 | trans->transid); | 
|  | 1555 | uuid_le_gen(&new_uuid); | 
|  | 1556 | memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); | 
|  | 1557 | memcpy(new_root_item->parent_uuid, root->root_item.uuid, | 
|  | 1558 | BTRFS_UUID_SIZE); | 
|  | 1559 | if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) { | 
|  | 1560 | memset(new_root_item->received_uuid, 0, | 
|  | 1561 | sizeof(new_root_item->received_uuid)); | 
|  | 1562 | memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); | 
|  | 1563 | memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); | 
|  | 1564 | btrfs_set_root_stransid(new_root_item, 0); | 
|  | 1565 | btrfs_set_root_rtransid(new_root_item, 0); | 
|  | 1566 | } | 
|  | 1567 | btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec); | 
|  | 1568 | btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec); | 
|  | 1569 | btrfs_set_root_otransid(new_root_item, trans->transid); | 
|  | 1570 |  | 
|  | 1571 | old = btrfs_lock_root_node(root); | 
|  | 1572 | ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); | 
|  | 1573 | if (ret) { | 
|  | 1574 | btrfs_tree_unlock(old); | 
|  | 1575 | free_extent_buffer(old); | 
|  | 1576 | btrfs_abort_transaction(trans, ret); | 
|  | 1577 | goto fail; | 
|  | 1578 | } | 
|  | 1579 |  | 
|  | 1580 | btrfs_set_lock_blocking(old); | 
|  | 1581 |  | 
|  | 1582 | ret = btrfs_copy_root(trans, root, old, &tmp, objectid); | 
|  | 1583 | /* clean up in any case */ | 
|  | 1584 | btrfs_tree_unlock(old); | 
|  | 1585 | free_extent_buffer(old); | 
|  | 1586 | if (ret) { | 
|  | 1587 | btrfs_abort_transaction(trans, ret); | 
|  | 1588 | goto fail; | 
|  | 1589 | } | 
|  | 1590 | /* see comments in should_cow_block() */ | 
|  | 1591 | set_bit(BTRFS_ROOT_FORCE_COW, &root->state); | 
|  | 1592 | smp_wmb(); | 
|  | 1593 |  | 
|  | 1594 | btrfs_set_root_node(new_root_item, tmp); | 
|  | 1595 | /* record when the snapshot was created in key.offset */ | 
|  | 1596 | key.offset = trans->transid; | 
|  | 1597 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); | 
|  | 1598 | btrfs_tree_unlock(tmp); | 
|  | 1599 | free_extent_buffer(tmp); | 
|  | 1600 | if (ret) { | 
|  | 1601 | btrfs_abort_transaction(trans, ret); | 
|  | 1602 | goto fail; | 
|  | 1603 | } | 
|  | 1604 |  | 
|  | 1605 | /* | 
|  | 1606 | * insert root back/forward references | 
|  | 1607 | */ | 
|  | 1608 | ret = btrfs_add_root_ref(trans, fs_info, objectid, | 
|  | 1609 | parent_root->root_key.objectid, | 
|  | 1610 | btrfs_ino(BTRFS_I(parent_inode)), index, | 
|  | 1611 | dentry->d_name.name, dentry->d_name.len); | 
|  | 1612 | if (ret) { | 
|  | 1613 | btrfs_abort_transaction(trans, ret); | 
|  | 1614 | goto fail; | 
|  | 1615 | } | 
|  | 1616 |  | 
|  | 1617 | key.offset = (u64)-1; | 
|  | 1618 | pending->snap = btrfs_read_fs_root_no_name(fs_info, &key); | 
|  | 1619 | if (IS_ERR(pending->snap)) { | 
|  | 1620 | ret = PTR_ERR(pending->snap); | 
|  | 1621 | btrfs_abort_transaction(trans, ret); | 
|  | 1622 | goto fail; | 
|  | 1623 | } | 
|  | 1624 |  | 
|  | 1625 | ret = btrfs_reloc_post_snapshot(trans, pending); | 
|  | 1626 | if (ret) { | 
|  | 1627 | btrfs_abort_transaction(trans, ret); | 
|  | 1628 | goto fail; | 
|  | 1629 | } | 
|  | 1630 |  | 
|  | 1631 | ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); | 
|  | 1632 | if (ret) { | 
|  | 1633 | btrfs_abort_transaction(trans, ret); | 
|  | 1634 | goto fail; | 
|  | 1635 | } | 
|  | 1636 |  | 
|  | 1637 | /* | 
|  | 1638 | * Do special qgroup accounting for snapshot, as we do some qgroup | 
|  | 1639 | * snapshot hack to do fast snapshot. | 
|  | 1640 | * To co-operate with that hack, we do hack again. | 
|  | 1641 | * Or snapshot will be greatly slowed down by a subtree qgroup rescan | 
|  | 1642 | */ | 
|  | 1643 | ret = qgroup_account_snapshot(trans, root, parent_root, | 
|  | 1644 | pending->inherit, objectid); | 
|  | 1645 | if (ret < 0) | 
|  | 1646 | goto fail; | 
|  | 1647 |  | 
|  | 1648 | ret = btrfs_insert_dir_item(trans, parent_root, | 
|  | 1649 | dentry->d_name.name, dentry->d_name.len, | 
|  | 1650 | BTRFS_I(parent_inode), &key, | 
|  | 1651 | BTRFS_FT_DIR, index); | 
|  | 1652 | /* We have check then name at the beginning, so it is impossible. */ | 
|  | 1653 | BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); | 
|  | 1654 | if (ret) { | 
|  | 1655 | btrfs_abort_transaction(trans, ret); | 
|  | 1656 | goto fail; | 
|  | 1657 | } | 
|  | 1658 |  | 
|  | 1659 | btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + | 
|  | 1660 | dentry->d_name.len * 2); | 
|  | 1661 | parent_inode->i_mtime = parent_inode->i_ctime = | 
|  | 1662 | current_time(parent_inode); | 
|  | 1663 | ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); | 
|  | 1664 | if (ret) { | 
|  | 1665 | btrfs_abort_transaction(trans, ret); | 
|  | 1666 | goto fail; | 
|  | 1667 | } | 
|  | 1668 | ret = btrfs_uuid_tree_add(trans, fs_info, new_uuid.b, | 
|  | 1669 | BTRFS_UUID_KEY_SUBVOL, objectid); | 
|  | 1670 | if (ret) { | 
|  | 1671 | btrfs_abort_transaction(trans, ret); | 
|  | 1672 | goto fail; | 
|  | 1673 | } | 
|  | 1674 | if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) { | 
|  | 1675 | ret = btrfs_uuid_tree_add(trans, fs_info, | 
|  | 1676 | new_root_item->received_uuid, | 
|  | 1677 | BTRFS_UUID_KEY_RECEIVED_SUBVOL, | 
|  | 1678 | objectid); | 
|  | 1679 | if (ret && ret != -EEXIST) { | 
|  | 1680 | btrfs_abort_transaction(trans, ret); | 
|  | 1681 | goto fail; | 
|  | 1682 | } | 
|  | 1683 | } | 
|  | 1684 |  | 
|  | 1685 | ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); | 
|  | 1686 | if (ret) { | 
|  | 1687 | btrfs_abort_transaction(trans, ret); | 
|  | 1688 | goto fail; | 
|  | 1689 | } | 
|  | 1690 |  | 
|  | 1691 | fail: | 
|  | 1692 | pending->error = ret; | 
|  | 1693 | dir_item_existed: | 
|  | 1694 | trans->block_rsv = rsv; | 
|  | 1695 | trans->bytes_reserved = 0; | 
|  | 1696 | clear_skip_qgroup: | 
|  | 1697 | btrfs_clear_skip_qgroup(trans); | 
|  | 1698 | no_free_objectid: | 
|  | 1699 | kfree(new_root_item); | 
|  | 1700 | pending->root_item = NULL; | 
|  | 1701 | btrfs_free_path(path); | 
|  | 1702 | pending->path = NULL; | 
|  | 1703 |  | 
|  | 1704 | return ret; | 
|  | 1705 | } | 
|  | 1706 |  | 
|  | 1707 | /* | 
|  | 1708 | * create all the snapshots we've scheduled for creation | 
|  | 1709 | */ | 
|  | 1710 | static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, | 
|  | 1711 | struct btrfs_fs_info *fs_info) | 
|  | 1712 | { | 
|  | 1713 | struct btrfs_pending_snapshot *pending, *next; | 
|  | 1714 | struct list_head *head = &trans->transaction->pending_snapshots; | 
|  | 1715 | int ret = 0; | 
|  | 1716 |  | 
|  | 1717 | list_for_each_entry_safe(pending, next, head, list) { | 
|  | 1718 | list_del(&pending->list); | 
|  | 1719 | ret = create_pending_snapshot(trans, fs_info, pending); | 
|  | 1720 | if (ret) | 
|  | 1721 | break; | 
|  | 1722 | } | 
|  | 1723 | return ret; | 
|  | 1724 | } | 
|  | 1725 |  | 
|  | 1726 | static void update_super_roots(struct btrfs_fs_info *fs_info) | 
|  | 1727 | { | 
|  | 1728 | struct btrfs_root_item *root_item; | 
|  | 1729 | struct btrfs_super_block *super; | 
|  | 1730 |  | 
|  | 1731 | super = fs_info->super_copy; | 
|  | 1732 |  | 
|  | 1733 | root_item = &fs_info->chunk_root->root_item; | 
|  | 1734 | super->chunk_root = root_item->bytenr; | 
|  | 1735 | super->chunk_root_generation = root_item->generation; | 
|  | 1736 | super->chunk_root_level = root_item->level; | 
|  | 1737 |  | 
|  | 1738 | root_item = &fs_info->tree_root->root_item; | 
|  | 1739 | super->root = root_item->bytenr; | 
|  | 1740 | super->generation = root_item->generation; | 
|  | 1741 | super->root_level = root_item->level; | 
|  | 1742 | if (btrfs_test_opt(fs_info, SPACE_CACHE)) | 
|  | 1743 | super->cache_generation = root_item->generation; | 
|  | 1744 | if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags)) | 
|  | 1745 | super->uuid_tree_generation = root_item->generation; | 
|  | 1746 | } | 
|  | 1747 |  | 
|  | 1748 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 
|  | 1749 | { | 
|  | 1750 | struct btrfs_transaction *trans; | 
|  | 1751 | int ret = 0; | 
|  | 1752 |  | 
|  | 1753 | spin_lock(&info->trans_lock); | 
|  | 1754 | trans = info->running_transaction; | 
|  | 1755 | if (trans) | 
|  | 1756 | ret = (trans->state >= TRANS_STATE_COMMIT_START); | 
|  | 1757 | spin_unlock(&info->trans_lock); | 
|  | 1758 | return ret; | 
|  | 1759 | } | 
|  | 1760 |  | 
|  | 1761 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | 
|  | 1762 | { | 
|  | 1763 | struct btrfs_transaction *trans; | 
|  | 1764 | int ret = 0; | 
|  | 1765 |  | 
|  | 1766 | spin_lock(&info->trans_lock); | 
|  | 1767 | trans = info->running_transaction; | 
|  | 1768 | if (trans) | 
|  | 1769 | ret = is_transaction_blocked(trans); | 
|  | 1770 | spin_unlock(&info->trans_lock); | 
|  | 1771 | return ret; | 
|  | 1772 | } | 
|  | 1773 |  | 
|  | 1774 | /* | 
|  | 1775 | * wait for the current transaction commit to start and block subsequent | 
|  | 1776 | * transaction joins | 
|  | 1777 | */ | 
|  | 1778 | static void wait_current_trans_commit_start(struct btrfs_fs_info *fs_info, | 
|  | 1779 | struct btrfs_transaction *trans) | 
|  | 1780 | { | 
|  | 1781 | wait_event(fs_info->transaction_blocked_wait, | 
|  | 1782 | trans->state >= TRANS_STATE_COMMIT_START || trans->aborted); | 
|  | 1783 | } | 
|  | 1784 |  | 
|  | 1785 | /* | 
|  | 1786 | * wait for the current transaction to start and then become unblocked. | 
|  | 1787 | * caller holds ref. | 
|  | 1788 | */ | 
|  | 1789 | static void wait_current_trans_commit_start_and_unblock( | 
|  | 1790 | struct btrfs_fs_info *fs_info, | 
|  | 1791 | struct btrfs_transaction *trans) | 
|  | 1792 | { | 
|  | 1793 | wait_event(fs_info->transaction_wait, | 
|  | 1794 | trans->state >= TRANS_STATE_UNBLOCKED || trans->aborted); | 
|  | 1795 | } | 
|  | 1796 |  | 
|  | 1797 | /* | 
|  | 1798 | * commit transactions asynchronously. once btrfs_commit_transaction_async | 
|  | 1799 | * returns, any subsequent transaction will not be allowed to join. | 
|  | 1800 | */ | 
|  | 1801 | struct btrfs_async_commit { | 
|  | 1802 | struct btrfs_trans_handle *newtrans; | 
|  | 1803 | struct work_struct work; | 
|  | 1804 | }; | 
|  | 1805 |  | 
|  | 1806 | static void do_async_commit(struct work_struct *work) | 
|  | 1807 | { | 
|  | 1808 | struct btrfs_async_commit *ac = | 
|  | 1809 | container_of(work, struct btrfs_async_commit, work); | 
|  | 1810 |  | 
|  | 1811 | /* | 
|  | 1812 | * We've got freeze protection passed with the transaction. | 
|  | 1813 | * Tell lockdep about it. | 
|  | 1814 | */ | 
|  | 1815 | if (ac->newtrans->type & __TRANS_FREEZABLE) | 
|  | 1816 | __sb_writers_acquired(ac->newtrans->fs_info->sb, SB_FREEZE_FS); | 
|  | 1817 |  | 
|  | 1818 | current->journal_info = ac->newtrans; | 
|  | 1819 |  | 
|  | 1820 | btrfs_commit_transaction(ac->newtrans); | 
|  | 1821 | kfree(ac); | 
|  | 1822 | } | 
|  | 1823 |  | 
|  | 1824 | int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | 
|  | 1825 | int wait_for_unblock) | 
|  | 1826 | { | 
|  | 1827 | struct btrfs_fs_info *fs_info = trans->fs_info; | 
|  | 1828 | struct btrfs_async_commit *ac; | 
|  | 1829 | struct btrfs_transaction *cur_trans; | 
|  | 1830 |  | 
|  | 1831 | ac = kmalloc(sizeof(*ac), GFP_NOFS); | 
|  | 1832 | if (!ac) | 
|  | 1833 | return -ENOMEM; | 
|  | 1834 |  | 
|  | 1835 | INIT_WORK(&ac->work, do_async_commit); | 
|  | 1836 | ac->newtrans = btrfs_join_transaction(trans->root); | 
|  | 1837 | if (IS_ERR(ac->newtrans)) { | 
|  | 1838 | int err = PTR_ERR(ac->newtrans); | 
|  | 1839 | kfree(ac); | 
|  | 1840 | return err; | 
|  | 1841 | } | 
|  | 1842 |  | 
|  | 1843 | /* take transaction reference */ | 
|  | 1844 | cur_trans = trans->transaction; | 
|  | 1845 | refcount_inc(&cur_trans->use_count); | 
|  | 1846 |  | 
|  | 1847 | btrfs_end_transaction(trans); | 
|  | 1848 |  | 
|  | 1849 | /* | 
|  | 1850 | * Tell lockdep we've released the freeze rwsem, since the | 
|  | 1851 | * async commit thread will be the one to unlock it. | 
|  | 1852 | */ | 
|  | 1853 | if (ac->newtrans->type & __TRANS_FREEZABLE) | 
|  | 1854 | __sb_writers_release(fs_info->sb, SB_FREEZE_FS); | 
|  | 1855 |  | 
|  | 1856 | schedule_work(&ac->work); | 
|  | 1857 |  | 
|  | 1858 | /* wait for transaction to start and unblock */ | 
|  | 1859 | if (wait_for_unblock) | 
|  | 1860 | wait_current_trans_commit_start_and_unblock(fs_info, cur_trans); | 
|  | 1861 | else | 
|  | 1862 | wait_current_trans_commit_start(fs_info, cur_trans); | 
|  | 1863 |  | 
|  | 1864 | if (current->journal_info == trans) | 
|  | 1865 | current->journal_info = NULL; | 
|  | 1866 |  | 
|  | 1867 | btrfs_put_transaction(cur_trans); | 
|  | 1868 | return 0; | 
|  | 1869 | } | 
|  | 1870 |  | 
|  | 1871 |  | 
|  | 1872 | static void cleanup_transaction(struct btrfs_trans_handle *trans, | 
|  | 1873 | struct btrfs_root *root, int err) | 
|  | 1874 | { | 
|  | 1875 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 1876 | struct btrfs_transaction *cur_trans = trans->transaction; | 
|  | 1877 | DEFINE_WAIT(wait); | 
|  | 1878 |  | 
|  | 1879 | WARN_ON(trans->use_count > 1); | 
|  | 1880 |  | 
|  | 1881 | btrfs_abort_transaction(trans, err); | 
|  | 1882 |  | 
|  | 1883 | spin_lock(&fs_info->trans_lock); | 
|  | 1884 |  | 
|  | 1885 | /* | 
|  | 1886 | * If the transaction is removed from the list, it means this | 
|  | 1887 | * transaction has been committed successfully, so it is impossible | 
|  | 1888 | * to call the cleanup function. | 
|  | 1889 | */ | 
|  | 1890 | BUG_ON(list_empty(&cur_trans->list)); | 
|  | 1891 |  | 
|  | 1892 | list_del_init(&cur_trans->list); | 
|  | 1893 | if (cur_trans == fs_info->running_transaction) { | 
|  | 1894 | cur_trans->state = TRANS_STATE_COMMIT_DOING; | 
|  | 1895 | spin_unlock(&fs_info->trans_lock); | 
|  | 1896 | wait_event(cur_trans->writer_wait, | 
|  | 1897 | atomic_read(&cur_trans->num_writers) == 1); | 
|  | 1898 |  | 
|  | 1899 | spin_lock(&fs_info->trans_lock); | 
|  | 1900 | } | 
|  | 1901 | spin_unlock(&fs_info->trans_lock); | 
|  | 1902 |  | 
|  | 1903 | btrfs_cleanup_one_transaction(trans->transaction, fs_info); | 
|  | 1904 |  | 
|  | 1905 | spin_lock(&fs_info->trans_lock); | 
|  | 1906 | if (cur_trans == fs_info->running_transaction) | 
|  | 1907 | fs_info->running_transaction = NULL; | 
|  | 1908 | spin_unlock(&fs_info->trans_lock); | 
|  | 1909 |  | 
|  | 1910 | if (trans->type & __TRANS_FREEZABLE) | 
|  | 1911 | sb_end_intwrite(fs_info->sb); | 
|  | 1912 | btrfs_put_transaction(cur_trans); | 
|  | 1913 | btrfs_put_transaction(cur_trans); | 
|  | 1914 |  | 
|  | 1915 | trace_btrfs_transaction_commit(root); | 
|  | 1916 |  | 
|  | 1917 | if (current->journal_info == trans) | 
|  | 1918 | current->journal_info = NULL; | 
|  | 1919 | btrfs_scrub_cancel(fs_info); | 
|  | 1920 |  | 
|  | 1921 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 
|  | 1922 | } | 
|  | 1923 |  | 
|  | 1924 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 
|  | 1925 | { | 
|  | 1926 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) | 
|  | 1927 | return btrfs_start_delalloc_roots(fs_info, 1, -1); | 
|  | 1928 | return 0; | 
|  | 1929 | } | 
|  | 1930 |  | 
|  | 1931 | static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | 
|  | 1932 | { | 
|  | 1933 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) | 
|  | 1934 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); | 
|  | 1935 | } | 
|  | 1936 |  | 
|  | 1937 | static inline void | 
|  | 1938 | btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans) | 
|  | 1939 | { | 
|  | 1940 | wait_event(cur_trans->pending_wait, | 
|  | 1941 | atomic_read(&cur_trans->pending_ordered) == 0); | 
|  | 1942 | } | 
|  | 1943 |  | 
|  | 1944 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans) | 
|  | 1945 | { | 
|  | 1946 | struct btrfs_fs_info *fs_info = trans->fs_info; | 
|  | 1947 | struct btrfs_transaction *cur_trans = trans->transaction; | 
|  | 1948 | struct btrfs_transaction *prev_trans = NULL; | 
|  | 1949 | int ret; | 
|  | 1950 |  | 
|  | 1951 | /* | 
|  | 1952 | * Some places just start a transaction to commit it.  We need to make | 
|  | 1953 | * sure that if this commit fails that the abort code actually marks the | 
|  | 1954 | * transaction as failed, so set trans->dirty to make the abort code do | 
|  | 1955 | * the right thing. | 
|  | 1956 | */ | 
|  | 1957 | trans->dirty = true; | 
|  | 1958 |  | 
|  | 1959 | /* Stop the commit early if ->aborted is set */ | 
|  | 1960 | if (unlikely(READ_ONCE(cur_trans->aborted))) { | 
|  | 1961 | ret = cur_trans->aborted; | 
|  | 1962 | btrfs_end_transaction(trans); | 
|  | 1963 | return ret; | 
|  | 1964 | } | 
|  | 1965 |  | 
|  | 1966 | btrfs_trans_release_metadata(trans, fs_info); | 
|  | 1967 | trans->block_rsv = NULL; | 
|  | 1968 |  | 
|  | 1969 | /* make a pass through all the delayed refs we have so far | 
|  | 1970 | * any runnings procs may add more while we are here | 
|  | 1971 | */ | 
|  | 1972 | ret = btrfs_run_delayed_refs(trans, fs_info, 0); | 
|  | 1973 | if (ret) { | 
|  | 1974 | btrfs_end_transaction(trans); | 
|  | 1975 | return ret; | 
|  | 1976 | } | 
|  | 1977 |  | 
|  | 1978 | cur_trans = trans->transaction; | 
|  | 1979 |  | 
|  | 1980 | /* | 
|  | 1981 | * set the flushing flag so procs in this transaction have to | 
|  | 1982 | * start sending their work down. | 
|  | 1983 | */ | 
|  | 1984 | cur_trans->delayed_refs.flushing = 1; | 
|  | 1985 | smp_wmb(); | 
|  | 1986 |  | 
|  | 1987 | if (!list_empty(&trans->new_bgs)) | 
|  | 1988 | btrfs_create_pending_block_groups(trans, fs_info); | 
|  | 1989 |  | 
|  | 1990 | ret = btrfs_run_delayed_refs(trans, fs_info, 0); | 
|  | 1991 | if (ret) { | 
|  | 1992 | btrfs_end_transaction(trans); | 
|  | 1993 | return ret; | 
|  | 1994 | } | 
|  | 1995 |  | 
|  | 1996 | if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) { | 
|  | 1997 | int run_it = 0; | 
|  | 1998 |  | 
|  | 1999 | /* this mutex is also taken before trying to set | 
|  | 2000 | * block groups readonly.  We need to make sure | 
|  | 2001 | * that nobody has set a block group readonly | 
|  | 2002 | * after a extents from that block group have been | 
|  | 2003 | * allocated for cache files.  btrfs_set_block_group_ro | 
|  | 2004 | * will wait for the transaction to commit if it | 
|  | 2005 | * finds BTRFS_TRANS_DIRTY_BG_RUN set. | 
|  | 2006 | * | 
|  | 2007 | * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure | 
|  | 2008 | * only one process starts all the block group IO.  It wouldn't | 
|  | 2009 | * hurt to have more than one go through, but there's no | 
|  | 2010 | * real advantage to it either. | 
|  | 2011 | */ | 
|  | 2012 | mutex_lock(&fs_info->ro_block_group_mutex); | 
|  | 2013 | if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN, | 
|  | 2014 | &cur_trans->flags)) | 
|  | 2015 | run_it = 1; | 
|  | 2016 | mutex_unlock(&fs_info->ro_block_group_mutex); | 
|  | 2017 |  | 
|  | 2018 | if (run_it) | 
|  | 2019 | ret = btrfs_start_dirty_block_groups(trans, fs_info); | 
|  | 2020 | } | 
|  | 2021 | if (ret) { | 
|  | 2022 | btrfs_end_transaction(trans); | 
|  | 2023 | return ret; | 
|  | 2024 | } | 
|  | 2025 |  | 
|  | 2026 | spin_lock(&fs_info->trans_lock); | 
|  | 2027 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { | 
|  | 2028 | spin_unlock(&fs_info->trans_lock); | 
|  | 2029 | refcount_inc(&cur_trans->use_count); | 
|  | 2030 | ret = btrfs_end_transaction(trans); | 
|  | 2031 |  | 
|  | 2032 | wait_for_commit(cur_trans); | 
|  | 2033 |  | 
|  | 2034 | if (unlikely(cur_trans->aborted)) | 
|  | 2035 | ret = cur_trans->aborted; | 
|  | 2036 |  | 
|  | 2037 | btrfs_put_transaction(cur_trans); | 
|  | 2038 |  | 
|  | 2039 | return ret; | 
|  | 2040 | } | 
|  | 2041 |  | 
|  | 2042 | cur_trans->state = TRANS_STATE_COMMIT_START; | 
|  | 2043 | wake_up(&fs_info->transaction_blocked_wait); | 
|  | 2044 |  | 
|  | 2045 | if (cur_trans->list.prev != &fs_info->trans_list) { | 
|  | 2046 | prev_trans = list_entry(cur_trans->list.prev, | 
|  | 2047 | struct btrfs_transaction, list); | 
|  | 2048 | if (prev_trans->state != TRANS_STATE_COMPLETED) { | 
|  | 2049 | refcount_inc(&prev_trans->use_count); | 
|  | 2050 | spin_unlock(&fs_info->trans_lock); | 
|  | 2051 |  | 
|  | 2052 | wait_for_commit(prev_trans); | 
|  | 2053 | ret = prev_trans->aborted; | 
|  | 2054 |  | 
|  | 2055 | btrfs_put_transaction(prev_trans); | 
|  | 2056 | if (ret) | 
|  | 2057 | goto cleanup_transaction; | 
|  | 2058 | } else { | 
|  | 2059 | spin_unlock(&fs_info->trans_lock); | 
|  | 2060 | } | 
|  | 2061 | } else { | 
|  | 2062 | spin_unlock(&fs_info->trans_lock); | 
|  | 2063 | /* | 
|  | 2064 | * The previous transaction was aborted and was already removed | 
|  | 2065 | * from the list of transactions at fs_info->trans_list. So we | 
|  | 2066 | * abort to prevent writing a new superblock that reflects a | 
|  | 2067 | * corrupt state (pointing to trees with unwritten nodes/leafs). | 
|  | 2068 | */ | 
|  | 2069 | if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) { | 
|  | 2070 | ret = -EROFS; | 
|  | 2071 | goto cleanup_transaction; | 
|  | 2072 | } | 
|  | 2073 | } | 
|  | 2074 |  | 
|  | 2075 | extwriter_counter_dec(cur_trans, trans->type); | 
|  | 2076 |  | 
|  | 2077 | ret = btrfs_start_delalloc_flush(fs_info); | 
|  | 2078 | if (ret) | 
|  | 2079 | goto cleanup_transaction; | 
|  | 2080 |  | 
|  | 2081 | ret = btrfs_run_delayed_items(trans, fs_info); | 
|  | 2082 | if (ret) | 
|  | 2083 | goto cleanup_transaction; | 
|  | 2084 |  | 
|  | 2085 | wait_event(cur_trans->writer_wait, | 
|  | 2086 | extwriter_counter_read(cur_trans) == 0); | 
|  | 2087 |  | 
|  | 2088 | /* some pending stuffs might be added after the previous flush. */ | 
|  | 2089 | ret = btrfs_run_delayed_items(trans, fs_info); | 
|  | 2090 | if (ret) | 
|  | 2091 | goto cleanup_transaction; | 
|  | 2092 |  | 
|  | 2093 | btrfs_wait_delalloc_flush(fs_info); | 
|  | 2094 |  | 
|  | 2095 | btrfs_wait_pending_ordered(cur_trans); | 
|  | 2096 |  | 
|  | 2097 | btrfs_scrub_pause(fs_info); | 
|  | 2098 | /* | 
|  | 2099 | * Ok now we need to make sure to block out any other joins while we | 
|  | 2100 | * commit the transaction.  We could have started a join before setting | 
|  | 2101 | * COMMIT_DOING so make sure to wait for num_writers to == 1 again. | 
|  | 2102 | */ | 
|  | 2103 | spin_lock(&fs_info->trans_lock); | 
|  | 2104 | cur_trans->state = TRANS_STATE_COMMIT_DOING; | 
|  | 2105 | spin_unlock(&fs_info->trans_lock); | 
|  | 2106 | wait_event(cur_trans->writer_wait, | 
|  | 2107 | atomic_read(&cur_trans->num_writers) == 1); | 
|  | 2108 |  | 
|  | 2109 | /* ->aborted might be set after the previous check, so check it */ | 
|  | 2110 | if (unlikely(READ_ONCE(cur_trans->aborted))) { | 
|  | 2111 | ret = cur_trans->aborted; | 
|  | 2112 | goto scrub_continue; | 
|  | 2113 | } | 
|  | 2114 | /* | 
|  | 2115 | * the reloc mutex makes sure that we stop | 
|  | 2116 | * the balancing code from coming in and moving | 
|  | 2117 | * extents around in the middle of the commit | 
|  | 2118 | */ | 
|  | 2119 | mutex_lock(&fs_info->reloc_mutex); | 
|  | 2120 |  | 
|  | 2121 | /* | 
|  | 2122 | * We needn't worry about the delayed items because we will | 
|  | 2123 | * deal with them in create_pending_snapshot(), which is the | 
|  | 2124 | * core function of the snapshot creation. | 
|  | 2125 | */ | 
|  | 2126 | ret = create_pending_snapshots(trans, fs_info); | 
|  | 2127 | if (ret) { | 
|  | 2128 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2129 | goto scrub_continue; | 
|  | 2130 | } | 
|  | 2131 |  | 
|  | 2132 | /* | 
|  | 2133 | * We insert the dir indexes of the snapshots and update the inode | 
|  | 2134 | * of the snapshots' parents after the snapshot creation, so there | 
|  | 2135 | * are some delayed items which are not dealt with. Now deal with | 
|  | 2136 | * them. | 
|  | 2137 | * | 
|  | 2138 | * We needn't worry that this operation will corrupt the snapshots, | 
|  | 2139 | * because all the tree which are snapshoted will be forced to COW | 
|  | 2140 | * the nodes and leaves. | 
|  | 2141 | */ | 
|  | 2142 | ret = btrfs_run_delayed_items(trans, fs_info); | 
|  | 2143 | if (ret) { | 
|  | 2144 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2145 | goto scrub_continue; | 
|  | 2146 | } | 
|  | 2147 |  | 
|  | 2148 | ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); | 
|  | 2149 | if (ret) { | 
|  | 2150 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2151 | goto scrub_continue; | 
|  | 2152 | } | 
|  | 2153 |  | 
|  | 2154 | /* | 
|  | 2155 | * make sure none of the code above managed to slip in a | 
|  | 2156 | * delayed item | 
|  | 2157 | */ | 
|  | 2158 | btrfs_assert_delayed_root_empty(fs_info); | 
|  | 2159 |  | 
|  | 2160 | WARN_ON(cur_trans != trans->transaction); | 
|  | 2161 |  | 
|  | 2162 | /* btrfs_commit_tree_roots is responsible for getting the | 
|  | 2163 | * various roots consistent with each other.  Every pointer | 
|  | 2164 | * in the tree of tree roots has to point to the most up to date | 
|  | 2165 | * root for every subvolume and other tree.  So, we have to keep | 
|  | 2166 | * the tree logging code from jumping in and changing any | 
|  | 2167 | * of the trees. | 
|  | 2168 | * | 
|  | 2169 | * At this point in the commit, there can't be any tree-log | 
|  | 2170 | * writers, but a little lower down we drop the trans mutex | 
|  | 2171 | * and let new people in.  By holding the tree_log_mutex | 
|  | 2172 | * from now until after the super is written, we avoid races | 
|  | 2173 | * with the tree-log code. | 
|  | 2174 | */ | 
|  | 2175 | mutex_lock(&fs_info->tree_log_mutex); | 
|  | 2176 |  | 
|  | 2177 | ret = commit_fs_roots(trans, fs_info); | 
|  | 2178 | if (ret) { | 
|  | 2179 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 2180 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2181 | goto scrub_continue; | 
|  | 2182 | } | 
|  | 2183 |  | 
|  | 2184 | /* | 
|  | 2185 | * Since the transaction is done, we can apply the pending changes | 
|  | 2186 | * before the next transaction. | 
|  | 2187 | */ | 
|  | 2188 | btrfs_apply_pending_changes(fs_info); | 
|  | 2189 |  | 
|  | 2190 | /* commit_fs_roots gets rid of all the tree log roots, it is now | 
|  | 2191 | * safe to free the root of tree log roots | 
|  | 2192 | */ | 
|  | 2193 | btrfs_free_log_root_tree(trans, fs_info); | 
|  | 2194 |  | 
|  | 2195 | /* | 
|  | 2196 | * commit_fs_roots() can call btrfs_save_ino_cache(), which generates | 
|  | 2197 | * new delayed refs. Must handle them or qgroup can be wrong. | 
|  | 2198 | */ | 
|  | 2199 | ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); | 
|  | 2200 | if (ret) { | 
|  | 2201 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 2202 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2203 | goto scrub_continue; | 
|  | 2204 | } | 
|  | 2205 |  | 
|  | 2206 | /* | 
|  | 2207 | * Since fs roots are all committed, we can get a quite accurate | 
|  | 2208 | * new_roots. So let's do quota accounting. | 
|  | 2209 | */ | 
|  | 2210 | ret = btrfs_qgroup_account_extents(trans, fs_info); | 
|  | 2211 | if (ret < 0) { | 
|  | 2212 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 2213 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2214 | goto scrub_continue; | 
|  | 2215 | } | 
|  | 2216 |  | 
|  | 2217 | ret = commit_cowonly_roots(trans, fs_info); | 
|  | 2218 | if (ret) { | 
|  | 2219 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 2220 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2221 | goto scrub_continue; | 
|  | 2222 | } | 
|  | 2223 |  | 
|  | 2224 | /* | 
|  | 2225 | * The tasks which save the space cache and inode cache may also | 
|  | 2226 | * update ->aborted, check it. | 
|  | 2227 | */ | 
|  | 2228 | if (unlikely(READ_ONCE(cur_trans->aborted))) { | 
|  | 2229 | ret = cur_trans->aborted; | 
|  | 2230 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 2231 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2232 | goto scrub_continue; | 
|  | 2233 | } | 
|  | 2234 |  | 
|  | 2235 | btrfs_prepare_extent_commit(fs_info); | 
|  | 2236 |  | 
|  | 2237 | cur_trans = fs_info->running_transaction; | 
|  | 2238 |  | 
|  | 2239 | btrfs_set_root_node(&fs_info->tree_root->root_item, | 
|  | 2240 | fs_info->tree_root->node); | 
|  | 2241 | list_add_tail(&fs_info->tree_root->dirty_list, | 
|  | 2242 | &cur_trans->switch_commits); | 
|  | 2243 |  | 
|  | 2244 | btrfs_set_root_node(&fs_info->chunk_root->root_item, | 
|  | 2245 | fs_info->chunk_root->node); | 
|  | 2246 | list_add_tail(&fs_info->chunk_root->dirty_list, | 
|  | 2247 | &cur_trans->switch_commits); | 
|  | 2248 |  | 
|  | 2249 | switch_commit_roots(cur_trans, fs_info); | 
|  | 2250 |  | 
|  | 2251 | ASSERT(list_empty(&cur_trans->dirty_bgs)); | 
|  | 2252 | ASSERT(list_empty(&cur_trans->io_bgs)); | 
|  | 2253 | update_super_roots(fs_info); | 
|  | 2254 |  | 
|  | 2255 | btrfs_set_super_log_root(fs_info->super_copy, 0); | 
|  | 2256 | btrfs_set_super_log_root_level(fs_info->super_copy, 0); | 
|  | 2257 | memcpy(fs_info->super_for_commit, fs_info->super_copy, | 
|  | 2258 | sizeof(*fs_info->super_copy)); | 
|  | 2259 |  | 
|  | 2260 | btrfs_update_commit_device_size(fs_info); | 
|  | 2261 | btrfs_update_commit_device_bytes_used(fs_info, cur_trans); | 
|  | 2262 |  | 
|  | 2263 | clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags); | 
|  | 2264 | clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags); | 
|  | 2265 |  | 
|  | 2266 | btrfs_trans_release_chunk_metadata(trans); | 
|  | 2267 |  | 
|  | 2268 | spin_lock(&fs_info->trans_lock); | 
|  | 2269 | cur_trans->state = TRANS_STATE_UNBLOCKED; | 
|  | 2270 | fs_info->running_transaction = NULL; | 
|  | 2271 | spin_unlock(&fs_info->trans_lock); | 
|  | 2272 | mutex_unlock(&fs_info->reloc_mutex); | 
|  | 2273 |  | 
|  | 2274 | wake_up(&fs_info->transaction_wait); | 
|  | 2275 |  | 
|  | 2276 | ret = btrfs_write_and_wait_transaction(trans, fs_info); | 
|  | 2277 | if (ret) { | 
|  | 2278 | btrfs_handle_fs_error(fs_info, ret, | 
|  | 2279 | "Error while writing out transaction"); | 
|  | 2280 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 2281 | goto scrub_continue; | 
|  | 2282 | } | 
|  | 2283 |  | 
|  | 2284 | ret = write_all_supers(fs_info, 0); | 
|  | 2285 | if (ret) { | 
|  | 2286 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 2287 | goto scrub_continue; | 
|  | 2288 | } | 
|  | 2289 |  | 
|  | 2290 | /* | 
|  | 2291 | * the super is written, we can safely allow the tree-loggers | 
|  | 2292 | * to go about their business | 
|  | 2293 | */ | 
|  | 2294 | mutex_unlock(&fs_info->tree_log_mutex); | 
|  | 2295 |  | 
|  | 2296 | btrfs_finish_extent_commit(trans, fs_info); | 
|  | 2297 |  | 
|  | 2298 | if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags)) | 
|  | 2299 | btrfs_clear_space_info_full(fs_info); | 
|  | 2300 |  | 
|  | 2301 | fs_info->last_trans_committed = cur_trans->transid; | 
|  | 2302 | /* | 
|  | 2303 | * We needn't acquire the lock here because there is no other task | 
|  | 2304 | * which can change it. | 
|  | 2305 | */ | 
|  | 2306 | cur_trans->state = TRANS_STATE_COMPLETED; | 
|  | 2307 | wake_up(&cur_trans->commit_wait); | 
|  | 2308 |  | 
|  | 2309 | spin_lock(&fs_info->trans_lock); | 
|  | 2310 | list_del_init(&cur_trans->list); | 
|  | 2311 | spin_unlock(&fs_info->trans_lock); | 
|  | 2312 |  | 
|  | 2313 | btrfs_put_transaction(cur_trans); | 
|  | 2314 | btrfs_put_transaction(cur_trans); | 
|  | 2315 |  | 
|  | 2316 | if (trans->type & __TRANS_FREEZABLE) | 
|  | 2317 | sb_end_intwrite(fs_info->sb); | 
|  | 2318 |  | 
|  | 2319 | trace_btrfs_transaction_commit(trans->root); | 
|  | 2320 |  | 
|  | 2321 | btrfs_scrub_continue(fs_info); | 
|  | 2322 |  | 
|  | 2323 | if (current->journal_info == trans) | 
|  | 2324 | current->journal_info = NULL; | 
|  | 2325 |  | 
|  | 2326 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 
|  | 2327 |  | 
|  | 2328 | return ret; | 
|  | 2329 |  | 
|  | 2330 | scrub_continue: | 
|  | 2331 | btrfs_scrub_continue(fs_info); | 
|  | 2332 | cleanup_transaction: | 
|  | 2333 | btrfs_trans_release_metadata(trans, fs_info); | 
|  | 2334 | btrfs_trans_release_chunk_metadata(trans); | 
|  | 2335 | trans->block_rsv = NULL; | 
|  | 2336 | btrfs_warn(fs_info, "Skipping commit of aborted transaction."); | 
|  | 2337 | if (current->journal_info == trans) | 
|  | 2338 | current->journal_info = NULL; | 
|  | 2339 | cleanup_transaction(trans, trans->root, ret); | 
|  | 2340 |  | 
|  | 2341 | return ret; | 
|  | 2342 | } | 
|  | 2343 |  | 
|  | 2344 | /* | 
|  | 2345 | * return < 0 if error | 
|  | 2346 | * 0 if there are no more dead_roots at the time of call | 
|  | 2347 | * 1 there are more to be processed, call me again | 
|  | 2348 | * | 
|  | 2349 | * The return value indicates there are certainly more snapshots to delete, but | 
|  | 2350 | * if there comes a new one during processing, it may return 0. We don't mind, | 
|  | 2351 | * because btrfs_commit_super will poke cleaner thread and it will process it a | 
|  | 2352 | * few seconds later. | 
|  | 2353 | */ | 
|  | 2354 | int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | 
|  | 2355 | { | 
|  | 2356 | int ret; | 
|  | 2357 | struct btrfs_fs_info *fs_info = root->fs_info; | 
|  | 2358 |  | 
|  | 2359 | spin_lock(&fs_info->trans_lock); | 
|  | 2360 | if (list_empty(&fs_info->dead_roots)) { | 
|  | 2361 | spin_unlock(&fs_info->trans_lock); | 
|  | 2362 | return 0; | 
|  | 2363 | } | 
|  | 2364 | root = list_first_entry(&fs_info->dead_roots, | 
|  | 2365 | struct btrfs_root, root_list); | 
|  | 2366 | list_del_init(&root->root_list); | 
|  | 2367 | spin_unlock(&fs_info->trans_lock); | 
|  | 2368 |  | 
|  | 2369 | btrfs_debug(fs_info, "cleaner removing %llu", root->objectid); | 
|  | 2370 |  | 
|  | 2371 | btrfs_kill_all_delayed_nodes(root); | 
|  | 2372 |  | 
|  | 2373 | if (btrfs_header_backref_rev(root->node) < | 
|  | 2374 | BTRFS_MIXED_BACKREF_REV) | 
|  | 2375 | ret = btrfs_drop_snapshot(root, NULL, 0, 0); | 
|  | 2376 | else | 
|  | 2377 | ret = btrfs_drop_snapshot(root, NULL, 1, 0); | 
|  | 2378 |  | 
|  | 2379 | return (ret < 0) ? 0 : 1; | 
|  | 2380 | } | 
|  | 2381 |  | 
|  | 2382 | void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info) | 
|  | 2383 | { | 
|  | 2384 | unsigned long prev; | 
|  | 2385 | unsigned long bit; | 
|  | 2386 |  | 
|  | 2387 | prev = xchg(&fs_info->pending_changes, 0); | 
|  | 2388 | if (!prev) | 
|  | 2389 | return; | 
|  | 2390 |  | 
|  | 2391 | bit = 1 << BTRFS_PENDING_SET_INODE_MAP_CACHE; | 
|  | 2392 | if (prev & bit) | 
|  | 2393 | btrfs_set_opt(fs_info->mount_opt, INODE_MAP_CACHE); | 
|  | 2394 | prev &= ~bit; | 
|  | 2395 |  | 
|  | 2396 | bit = 1 << BTRFS_PENDING_CLEAR_INODE_MAP_CACHE; | 
|  | 2397 | if (prev & bit) | 
|  | 2398 | btrfs_clear_opt(fs_info->mount_opt, INODE_MAP_CACHE); | 
|  | 2399 | prev &= ~bit; | 
|  | 2400 |  | 
|  | 2401 | bit = 1 << BTRFS_PENDING_COMMIT; | 
|  | 2402 | if (prev & bit) | 
|  | 2403 | btrfs_debug(fs_info, "pending commit done"); | 
|  | 2404 | prev &= ~bit; | 
|  | 2405 |  | 
|  | 2406 | if (prev) | 
|  | 2407 | btrfs_warn(fs_info, | 
|  | 2408 | "unknown pending changes left 0x%lx, ignoring", prev); | 
|  | 2409 | } |