rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2009 Oracle. All rights reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public |
| 6 | * License v2 as published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, |
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. |
| 12 | * |
| 13 | * You should have received a copy of the GNU General Public |
| 14 | * License along with this program; if not, write to the |
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ |
| 18 | |
| 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> |
| 21 | #include <linux/sort.h> |
| 22 | #include "ctree.h" |
| 23 | #include "delayed-ref.h" |
| 24 | #include "transaction.h" |
| 25 | #include "qgroup.h" |
| 26 | |
| 27 | struct kmem_cache *btrfs_delayed_ref_head_cachep; |
| 28 | struct kmem_cache *btrfs_delayed_tree_ref_cachep; |
| 29 | struct kmem_cache *btrfs_delayed_data_ref_cachep; |
| 30 | struct kmem_cache *btrfs_delayed_extent_op_cachep; |
| 31 | /* |
| 32 | * delayed back reference update tracking. For subvolume trees |
| 33 | * we queue up extent allocations and backref maintenance for |
| 34 | * delayed processing. This avoids deep call chains where we |
| 35 | * add extents in the middle of btrfs_search_slot, and it allows |
| 36 | * us to buffer up frequently modified backrefs in an rb tree instead |
| 37 | * of hammering updates on the extent allocation tree. |
| 38 | */ |
| 39 | |
| 40 | /* |
| 41 | * compare two delayed tree backrefs with same bytenr and type |
| 42 | */ |
| 43 | static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, |
| 44 | struct btrfs_delayed_tree_ref *ref1, int type) |
| 45 | { |
| 46 | if (type == BTRFS_TREE_BLOCK_REF_KEY) { |
| 47 | if (ref1->root < ref2->root) |
| 48 | return -1; |
| 49 | if (ref1->root > ref2->root) |
| 50 | return 1; |
| 51 | } else { |
| 52 | if (ref1->parent < ref2->parent) |
| 53 | return -1; |
| 54 | if (ref1->parent > ref2->parent) |
| 55 | return 1; |
| 56 | } |
| 57 | return 0; |
| 58 | } |
| 59 | |
| 60 | /* |
| 61 | * compare two delayed data backrefs with same bytenr and type |
| 62 | */ |
| 63 | static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, |
| 64 | struct btrfs_delayed_data_ref *ref1) |
| 65 | { |
| 66 | if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { |
| 67 | if (ref1->root < ref2->root) |
| 68 | return -1; |
| 69 | if (ref1->root > ref2->root) |
| 70 | return 1; |
| 71 | if (ref1->objectid < ref2->objectid) |
| 72 | return -1; |
| 73 | if (ref1->objectid > ref2->objectid) |
| 74 | return 1; |
| 75 | if (ref1->offset < ref2->offset) |
| 76 | return -1; |
| 77 | if (ref1->offset > ref2->offset) |
| 78 | return 1; |
| 79 | } else { |
| 80 | if (ref1->parent < ref2->parent) |
| 81 | return -1; |
| 82 | if (ref1->parent > ref2->parent) |
| 83 | return 1; |
| 84 | } |
| 85 | return 0; |
| 86 | } |
| 87 | |
| 88 | /* insert a new ref to head ref rbtree */ |
| 89 | static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, |
| 90 | struct rb_node *node) |
| 91 | { |
| 92 | struct rb_node **p = &root->rb_node; |
| 93 | struct rb_node *parent_node = NULL; |
| 94 | struct btrfs_delayed_ref_head *entry; |
| 95 | struct btrfs_delayed_ref_head *ins; |
| 96 | u64 bytenr; |
| 97 | |
| 98 | ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); |
| 99 | bytenr = ins->node.bytenr; |
| 100 | while (*p) { |
| 101 | parent_node = *p; |
| 102 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, |
| 103 | href_node); |
| 104 | |
| 105 | if (bytenr < entry->node.bytenr) |
| 106 | p = &(*p)->rb_left; |
| 107 | else if (bytenr > entry->node.bytenr) |
| 108 | p = &(*p)->rb_right; |
| 109 | else |
| 110 | return entry; |
| 111 | } |
| 112 | |
| 113 | rb_link_node(node, parent_node, p); |
| 114 | rb_insert_color(node, root); |
| 115 | return NULL; |
| 116 | } |
| 117 | |
| 118 | /* |
| 119 | * find an head entry based on bytenr. This returns the delayed ref |
| 120 | * head if it was able to find one, or NULL if nothing was in that spot. |
| 121 | * If return_bigger is given, the next bigger entry is returned if no exact |
| 122 | * match is found. |
| 123 | */ |
| 124 | static struct btrfs_delayed_ref_head * |
| 125 | find_ref_head(struct rb_root *root, u64 bytenr, |
| 126 | int return_bigger) |
| 127 | { |
| 128 | struct rb_node *n; |
| 129 | struct btrfs_delayed_ref_head *entry; |
| 130 | |
| 131 | n = root->rb_node; |
| 132 | entry = NULL; |
| 133 | while (n) { |
| 134 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
| 135 | |
| 136 | if (bytenr < entry->node.bytenr) |
| 137 | n = n->rb_left; |
| 138 | else if (bytenr > entry->node.bytenr) |
| 139 | n = n->rb_right; |
| 140 | else |
| 141 | return entry; |
| 142 | } |
| 143 | if (entry && return_bigger) { |
| 144 | if (bytenr > entry->node.bytenr) { |
| 145 | n = rb_next(&entry->href_node); |
| 146 | if (!n) |
| 147 | n = rb_first(root); |
| 148 | entry = rb_entry(n, struct btrfs_delayed_ref_head, |
| 149 | href_node); |
| 150 | return entry; |
| 151 | } |
| 152 | return entry; |
| 153 | } |
| 154 | return NULL; |
| 155 | } |
| 156 | |
| 157 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, |
| 158 | struct btrfs_delayed_ref_head *head) |
| 159 | { |
| 160 | struct btrfs_delayed_ref_root *delayed_refs; |
| 161 | |
| 162 | delayed_refs = &trans->transaction->delayed_refs; |
| 163 | assert_spin_locked(&delayed_refs->lock); |
| 164 | if (mutex_trylock(&head->mutex)) |
| 165 | return 0; |
| 166 | |
| 167 | refcount_inc(&head->node.refs); |
| 168 | spin_unlock(&delayed_refs->lock); |
| 169 | |
| 170 | mutex_lock(&head->mutex); |
| 171 | spin_lock(&delayed_refs->lock); |
| 172 | if (!head->node.in_tree) { |
| 173 | mutex_unlock(&head->mutex); |
| 174 | btrfs_put_delayed_ref(&head->node); |
| 175 | return -EAGAIN; |
| 176 | } |
| 177 | btrfs_put_delayed_ref(&head->node); |
| 178 | return 0; |
| 179 | } |
| 180 | |
| 181 | static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, |
| 182 | struct btrfs_delayed_ref_root *delayed_refs, |
| 183 | struct btrfs_delayed_ref_head *head, |
| 184 | struct btrfs_delayed_ref_node *ref) |
| 185 | { |
| 186 | if (btrfs_delayed_ref_is_head(ref)) { |
| 187 | head = btrfs_delayed_node_to_head(ref); |
| 188 | rb_erase(&head->href_node, &delayed_refs->href_root); |
| 189 | } else { |
| 190 | assert_spin_locked(&head->lock); |
| 191 | list_del(&ref->list); |
| 192 | if (!list_empty(&ref->add_list)) |
| 193 | list_del(&ref->add_list); |
| 194 | } |
| 195 | ref->in_tree = 0; |
| 196 | btrfs_put_delayed_ref(ref); |
| 197 | atomic_dec(&delayed_refs->num_entries); |
| 198 | } |
| 199 | |
| 200 | static bool merge_ref(struct btrfs_trans_handle *trans, |
| 201 | struct btrfs_delayed_ref_root *delayed_refs, |
| 202 | struct btrfs_delayed_ref_head *head, |
| 203 | struct btrfs_delayed_ref_node *ref, |
| 204 | u64 seq) |
| 205 | { |
| 206 | struct btrfs_delayed_ref_node *next; |
| 207 | bool done = false; |
| 208 | |
| 209 | next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, |
| 210 | list); |
| 211 | while (!done && &next->list != &head->ref_list) { |
| 212 | int mod; |
| 213 | struct btrfs_delayed_ref_node *next2; |
| 214 | |
| 215 | next2 = list_next_entry(next, list); |
| 216 | |
| 217 | if (next == ref) |
| 218 | goto next; |
| 219 | |
| 220 | if (seq && next->seq >= seq) |
| 221 | goto next; |
| 222 | |
| 223 | if (next->type != ref->type) |
| 224 | goto next; |
| 225 | |
| 226 | if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY || |
| 227 | ref->type == BTRFS_SHARED_BLOCK_REF_KEY) && |
| 228 | comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref), |
| 229 | btrfs_delayed_node_to_tree_ref(next), |
| 230 | ref->type)) |
| 231 | goto next; |
| 232 | if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY || |
| 233 | ref->type == BTRFS_SHARED_DATA_REF_KEY) && |
| 234 | comp_data_refs(btrfs_delayed_node_to_data_ref(ref), |
| 235 | btrfs_delayed_node_to_data_ref(next))) |
| 236 | goto next; |
| 237 | |
| 238 | if (ref->action == next->action) { |
| 239 | mod = next->ref_mod; |
| 240 | } else { |
| 241 | if (ref->ref_mod < next->ref_mod) { |
| 242 | swap(ref, next); |
| 243 | done = true; |
| 244 | } |
| 245 | mod = -next->ref_mod; |
| 246 | } |
| 247 | |
| 248 | drop_delayed_ref(trans, delayed_refs, head, next); |
| 249 | ref->ref_mod += mod; |
| 250 | if (ref->ref_mod == 0) { |
| 251 | drop_delayed_ref(trans, delayed_refs, head, ref); |
| 252 | done = true; |
| 253 | } else { |
| 254 | /* |
| 255 | * Can't have multiples of the same ref on a tree block. |
| 256 | */ |
| 257 | WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || |
| 258 | ref->type == BTRFS_SHARED_BLOCK_REF_KEY); |
| 259 | } |
| 260 | next: |
| 261 | next = next2; |
| 262 | } |
| 263 | |
| 264 | return done; |
| 265 | } |
| 266 | |
| 267 | void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, |
| 268 | struct btrfs_fs_info *fs_info, |
| 269 | struct btrfs_delayed_ref_root *delayed_refs, |
| 270 | struct btrfs_delayed_ref_head *head) |
| 271 | { |
| 272 | struct btrfs_delayed_ref_node *ref; |
| 273 | u64 seq = 0; |
| 274 | |
| 275 | assert_spin_locked(&head->lock); |
| 276 | |
| 277 | if (list_empty(&head->ref_list)) |
| 278 | return; |
| 279 | |
| 280 | /* We don't have too many refs to merge for data. */ |
| 281 | if (head->is_data) |
| 282 | return; |
| 283 | |
| 284 | read_lock(&fs_info->tree_mod_log_lock); |
| 285 | if (!list_empty(&fs_info->tree_mod_seq_list)) { |
| 286 | struct seq_list *elem; |
| 287 | |
| 288 | elem = list_first_entry(&fs_info->tree_mod_seq_list, |
| 289 | struct seq_list, list); |
| 290 | seq = elem->seq; |
| 291 | } |
| 292 | read_unlock(&fs_info->tree_mod_log_lock); |
| 293 | |
| 294 | ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, |
| 295 | list); |
| 296 | while (&ref->list != &head->ref_list) { |
| 297 | if (seq && ref->seq >= seq) |
| 298 | goto next; |
| 299 | |
| 300 | if (merge_ref(trans, delayed_refs, head, ref, seq)) { |
| 301 | if (list_empty(&head->ref_list)) |
| 302 | break; |
| 303 | ref = list_first_entry(&head->ref_list, |
| 304 | struct btrfs_delayed_ref_node, |
| 305 | list); |
| 306 | continue; |
| 307 | } |
| 308 | next: |
| 309 | ref = list_next_entry(ref, list); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, |
| 314 | struct btrfs_delayed_ref_root *delayed_refs, |
| 315 | u64 seq) |
| 316 | { |
| 317 | struct seq_list *elem; |
| 318 | int ret = 0; |
| 319 | |
| 320 | read_lock(&fs_info->tree_mod_log_lock); |
| 321 | if (!list_empty(&fs_info->tree_mod_seq_list)) { |
| 322 | elem = list_first_entry(&fs_info->tree_mod_seq_list, |
| 323 | struct seq_list, list); |
| 324 | if (seq >= elem->seq) { |
| 325 | btrfs_debug(fs_info, |
| 326 | "holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)", |
| 327 | (u32)(seq >> 32), (u32)seq, |
| 328 | (u32)(elem->seq >> 32), (u32)elem->seq, |
| 329 | delayed_refs); |
| 330 | ret = 1; |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | read_unlock(&fs_info->tree_mod_log_lock); |
| 335 | return ret; |
| 336 | } |
| 337 | |
| 338 | struct btrfs_delayed_ref_head * |
| 339 | btrfs_select_ref_head(struct btrfs_trans_handle *trans) |
| 340 | { |
| 341 | struct btrfs_delayed_ref_root *delayed_refs; |
| 342 | struct btrfs_delayed_ref_head *head; |
| 343 | u64 start; |
| 344 | bool loop = false; |
| 345 | |
| 346 | delayed_refs = &trans->transaction->delayed_refs; |
| 347 | |
| 348 | again: |
| 349 | start = delayed_refs->run_delayed_start; |
| 350 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
| 351 | if (!head && !loop) { |
| 352 | delayed_refs->run_delayed_start = 0; |
| 353 | start = 0; |
| 354 | loop = true; |
| 355 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
| 356 | if (!head) |
| 357 | return NULL; |
| 358 | } else if (!head && loop) { |
| 359 | return NULL; |
| 360 | } |
| 361 | |
| 362 | while (head->processing) { |
| 363 | struct rb_node *node; |
| 364 | |
| 365 | node = rb_next(&head->href_node); |
| 366 | if (!node) { |
| 367 | if (loop) |
| 368 | return NULL; |
| 369 | delayed_refs->run_delayed_start = 0; |
| 370 | start = 0; |
| 371 | loop = true; |
| 372 | goto again; |
| 373 | } |
| 374 | head = rb_entry(node, struct btrfs_delayed_ref_head, |
| 375 | href_node); |
| 376 | } |
| 377 | |
| 378 | head->processing = 1; |
| 379 | WARN_ON(delayed_refs->num_heads_ready == 0); |
| 380 | delayed_refs->num_heads_ready--; |
| 381 | delayed_refs->run_delayed_start = head->node.bytenr + |
| 382 | head->node.num_bytes; |
| 383 | return head; |
| 384 | } |
| 385 | |
| 386 | /* |
| 387 | * Helper to insert the ref_node to the tail or merge with tail. |
| 388 | * |
| 389 | * Return 0 for insert. |
| 390 | * Return >0 for merge. |
| 391 | */ |
| 392 | static int |
| 393 | add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, |
| 394 | struct btrfs_delayed_ref_root *root, |
| 395 | struct btrfs_delayed_ref_head *href, |
| 396 | struct btrfs_delayed_ref_node *ref) |
| 397 | { |
| 398 | struct btrfs_delayed_ref_node *exist; |
| 399 | int mod; |
| 400 | int ret = 0; |
| 401 | |
| 402 | spin_lock(&href->lock); |
| 403 | /* Check whether we can merge the tail node with ref */ |
| 404 | if (list_empty(&href->ref_list)) |
| 405 | goto add_tail; |
| 406 | exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node, |
| 407 | list); |
| 408 | /* No need to compare bytenr nor is_head */ |
| 409 | if (exist->type != ref->type || exist->seq != ref->seq) |
| 410 | goto add_tail; |
| 411 | |
| 412 | if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY || |
| 413 | exist->type == BTRFS_SHARED_BLOCK_REF_KEY) && |
| 414 | comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist), |
| 415 | btrfs_delayed_node_to_tree_ref(ref), |
| 416 | ref->type)) |
| 417 | goto add_tail; |
| 418 | if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY || |
| 419 | exist->type == BTRFS_SHARED_DATA_REF_KEY) && |
| 420 | comp_data_refs(btrfs_delayed_node_to_data_ref(exist), |
| 421 | btrfs_delayed_node_to_data_ref(ref))) |
| 422 | goto add_tail; |
| 423 | |
| 424 | /* Now we are sure we can merge */ |
| 425 | ret = 1; |
| 426 | if (exist->action == ref->action) { |
| 427 | mod = ref->ref_mod; |
| 428 | } else { |
| 429 | /* Need to change action */ |
| 430 | if (exist->ref_mod < ref->ref_mod) { |
| 431 | exist->action = ref->action; |
| 432 | mod = -exist->ref_mod; |
| 433 | exist->ref_mod = ref->ref_mod; |
| 434 | if (ref->action == BTRFS_ADD_DELAYED_REF) |
| 435 | list_add_tail(&exist->add_list, |
| 436 | &href->ref_add_list); |
| 437 | else if (ref->action == BTRFS_DROP_DELAYED_REF) { |
| 438 | ASSERT(!list_empty(&exist->add_list)); |
| 439 | list_del(&exist->add_list); |
| 440 | } else { |
| 441 | ASSERT(0); |
| 442 | } |
| 443 | } else |
| 444 | mod = -ref->ref_mod; |
| 445 | } |
| 446 | exist->ref_mod += mod; |
| 447 | |
| 448 | /* remove existing tail if its ref_mod is zero */ |
| 449 | if (exist->ref_mod == 0) |
| 450 | drop_delayed_ref(trans, root, href, exist); |
| 451 | spin_unlock(&href->lock); |
| 452 | return ret; |
| 453 | |
| 454 | add_tail: |
| 455 | list_add_tail(&ref->list, &href->ref_list); |
| 456 | if (ref->action == BTRFS_ADD_DELAYED_REF) |
| 457 | list_add_tail(&ref->add_list, &href->ref_add_list); |
| 458 | atomic_inc(&root->num_entries); |
| 459 | spin_unlock(&href->lock); |
| 460 | return ret; |
| 461 | } |
| 462 | |
| 463 | /* |
| 464 | * helper function to update the accounting in the head ref |
| 465 | * existing and update must have the same bytenr |
| 466 | */ |
| 467 | static noinline void |
| 468 | update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, |
| 469 | struct btrfs_delayed_ref_node *existing, |
| 470 | struct btrfs_delayed_ref_node *update, |
| 471 | int *old_ref_mod_ret) |
| 472 | { |
| 473 | struct btrfs_delayed_ref_head *existing_ref; |
| 474 | struct btrfs_delayed_ref_head *ref; |
| 475 | int old_ref_mod; |
| 476 | |
| 477 | existing_ref = btrfs_delayed_node_to_head(existing); |
| 478 | ref = btrfs_delayed_node_to_head(update); |
| 479 | BUG_ON(existing_ref->is_data != ref->is_data); |
| 480 | |
| 481 | spin_lock(&existing_ref->lock); |
| 482 | if (ref->must_insert_reserved) { |
| 483 | /* if the extent was freed and then |
| 484 | * reallocated before the delayed ref |
| 485 | * entries were processed, we can end up |
| 486 | * with an existing head ref without |
| 487 | * the must_insert_reserved flag set. |
| 488 | * Set it again here |
| 489 | */ |
| 490 | existing_ref->must_insert_reserved = ref->must_insert_reserved; |
| 491 | |
| 492 | /* |
| 493 | * update the num_bytes so we make sure the accounting |
| 494 | * is done correctly |
| 495 | */ |
| 496 | existing->num_bytes = update->num_bytes; |
| 497 | |
| 498 | } |
| 499 | |
| 500 | if (ref->extent_op) { |
| 501 | if (!existing_ref->extent_op) { |
| 502 | existing_ref->extent_op = ref->extent_op; |
| 503 | } else { |
| 504 | if (ref->extent_op->update_key) { |
| 505 | memcpy(&existing_ref->extent_op->key, |
| 506 | &ref->extent_op->key, |
| 507 | sizeof(ref->extent_op->key)); |
| 508 | existing_ref->extent_op->update_key = true; |
| 509 | } |
| 510 | if (ref->extent_op->update_flags) { |
| 511 | existing_ref->extent_op->flags_to_set |= |
| 512 | ref->extent_op->flags_to_set; |
| 513 | existing_ref->extent_op->update_flags = true; |
| 514 | } |
| 515 | btrfs_free_delayed_extent_op(ref->extent_op); |
| 516 | } |
| 517 | } |
| 518 | /* |
| 519 | * update the reference mod on the head to reflect this new operation, |
| 520 | * only need the lock for this case cause we could be processing it |
| 521 | * currently, for refs we just added we know we're a-ok. |
| 522 | */ |
| 523 | old_ref_mod = existing_ref->total_ref_mod; |
| 524 | if (old_ref_mod_ret) |
| 525 | *old_ref_mod_ret = old_ref_mod; |
| 526 | existing->ref_mod += update->ref_mod; |
| 527 | existing_ref->total_ref_mod += update->ref_mod; |
| 528 | |
| 529 | /* |
| 530 | * If we are going to from a positive ref mod to a negative or vice |
| 531 | * versa we need to make sure to adjust pending_csums accordingly. |
| 532 | */ |
| 533 | if (existing_ref->is_data) { |
| 534 | if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0) |
| 535 | delayed_refs->pending_csums -= existing->num_bytes; |
| 536 | if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0) |
| 537 | delayed_refs->pending_csums += existing->num_bytes; |
| 538 | } |
| 539 | spin_unlock(&existing_ref->lock); |
| 540 | } |
| 541 | |
| 542 | /* |
| 543 | * helper function to actually insert a head node into the rbtree. |
| 544 | * this does all the dirty work in terms of maintaining the correct |
| 545 | * overall modification count. |
| 546 | */ |
| 547 | static noinline struct btrfs_delayed_ref_head * |
| 548 | add_delayed_ref_head(struct btrfs_fs_info *fs_info, |
| 549 | struct btrfs_trans_handle *trans, |
| 550 | struct btrfs_delayed_ref_node *ref, |
| 551 | struct btrfs_qgroup_extent_record *qrecord, |
| 552 | u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, |
| 553 | int action, int is_data, int *qrecord_inserted_ret, |
| 554 | int *old_ref_mod, int *new_ref_mod) |
| 555 | { |
| 556 | struct btrfs_delayed_ref_head *existing; |
| 557 | struct btrfs_delayed_ref_head *head_ref = NULL; |
| 558 | struct btrfs_delayed_ref_root *delayed_refs; |
| 559 | int count_mod = 1; |
| 560 | int must_insert_reserved = 0; |
| 561 | int qrecord_inserted = 0; |
| 562 | |
| 563 | /* If reserved is provided, it must be a data extent. */ |
| 564 | BUG_ON(!is_data && reserved); |
| 565 | |
| 566 | /* |
| 567 | * the head node stores the sum of all the mods, so dropping a ref |
| 568 | * should drop the sum in the head node by one. |
| 569 | */ |
| 570 | if (action == BTRFS_UPDATE_DELAYED_HEAD) |
| 571 | count_mod = 0; |
| 572 | else if (action == BTRFS_DROP_DELAYED_REF) |
| 573 | count_mod = -1; |
| 574 | |
| 575 | /* |
| 576 | * BTRFS_ADD_DELAYED_EXTENT means that we need to update |
| 577 | * the reserved accounting when the extent is finally added, or |
| 578 | * if a later modification deletes the delayed ref without ever |
| 579 | * inserting the extent into the extent allocation tree. |
| 580 | * ref->must_insert_reserved is the flag used to record |
| 581 | * that accounting mods are required. |
| 582 | * |
| 583 | * Once we record must_insert_reserved, switch the action to |
| 584 | * BTRFS_ADD_DELAYED_REF because other special casing is not required. |
| 585 | */ |
| 586 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
| 587 | must_insert_reserved = 1; |
| 588 | else |
| 589 | must_insert_reserved = 0; |
| 590 | |
| 591 | delayed_refs = &trans->transaction->delayed_refs; |
| 592 | |
| 593 | /* first set the basic ref node struct up */ |
| 594 | refcount_set(&ref->refs, 1); |
| 595 | ref->bytenr = bytenr; |
| 596 | ref->num_bytes = num_bytes; |
| 597 | ref->ref_mod = count_mod; |
| 598 | ref->type = 0; |
| 599 | ref->action = 0; |
| 600 | ref->is_head = 1; |
| 601 | ref->in_tree = 1; |
| 602 | ref->seq = 0; |
| 603 | |
| 604 | head_ref = btrfs_delayed_node_to_head(ref); |
| 605 | head_ref->must_insert_reserved = must_insert_reserved; |
| 606 | head_ref->is_data = is_data; |
| 607 | INIT_LIST_HEAD(&head_ref->ref_list); |
| 608 | INIT_LIST_HEAD(&head_ref->ref_add_list); |
| 609 | head_ref->processing = 0; |
| 610 | head_ref->total_ref_mod = count_mod; |
| 611 | head_ref->qgroup_reserved = 0; |
| 612 | head_ref->qgroup_ref_root = 0; |
| 613 | |
| 614 | /* Record qgroup extent info if provided */ |
| 615 | if (qrecord) { |
| 616 | if (ref_root && reserved) { |
| 617 | head_ref->qgroup_ref_root = ref_root; |
| 618 | head_ref->qgroup_reserved = reserved; |
| 619 | } |
| 620 | |
| 621 | qrecord->bytenr = bytenr; |
| 622 | qrecord->num_bytes = num_bytes; |
| 623 | qrecord->old_roots = NULL; |
| 624 | |
| 625 | if(btrfs_qgroup_trace_extent_nolock(fs_info, |
| 626 | delayed_refs, qrecord)) |
| 627 | kfree(qrecord); |
| 628 | else |
| 629 | qrecord_inserted = 1; |
| 630 | } |
| 631 | |
| 632 | spin_lock_init(&head_ref->lock); |
| 633 | mutex_init(&head_ref->mutex); |
| 634 | |
| 635 | trace_add_delayed_ref_head(fs_info, ref, head_ref, action); |
| 636 | |
| 637 | existing = htree_insert(&delayed_refs->href_root, |
| 638 | &head_ref->href_node); |
| 639 | if (existing) { |
| 640 | WARN_ON(ref_root && reserved && existing->qgroup_ref_root |
| 641 | && existing->qgroup_reserved); |
| 642 | update_existing_head_ref(delayed_refs, &existing->node, ref, |
| 643 | old_ref_mod); |
| 644 | /* |
| 645 | * we've updated the existing ref, free the newly |
| 646 | * allocated ref |
| 647 | */ |
| 648 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); |
| 649 | head_ref = existing; |
| 650 | } else { |
| 651 | if (old_ref_mod) |
| 652 | *old_ref_mod = 0; |
| 653 | if (is_data && count_mod < 0) |
| 654 | delayed_refs->pending_csums += num_bytes; |
| 655 | delayed_refs->num_heads++; |
| 656 | delayed_refs->num_heads_ready++; |
| 657 | atomic_inc(&delayed_refs->num_entries); |
| 658 | trans->delayed_ref_updates++; |
| 659 | } |
| 660 | if (qrecord_inserted_ret) |
| 661 | *qrecord_inserted_ret = qrecord_inserted; |
| 662 | if (new_ref_mod) |
| 663 | *new_ref_mod = head_ref->total_ref_mod; |
| 664 | return head_ref; |
| 665 | } |
| 666 | |
| 667 | /* |
| 668 | * helper to insert a delayed tree ref into the rbtree. |
| 669 | */ |
| 670 | static noinline void |
| 671 | add_delayed_tree_ref(struct btrfs_fs_info *fs_info, |
| 672 | struct btrfs_trans_handle *trans, |
| 673 | struct btrfs_delayed_ref_head *head_ref, |
| 674 | struct btrfs_delayed_ref_node *ref, u64 bytenr, |
| 675 | u64 num_bytes, u64 parent, u64 ref_root, int level, |
| 676 | int action) |
| 677 | { |
| 678 | struct btrfs_delayed_tree_ref *full_ref; |
| 679 | struct btrfs_delayed_ref_root *delayed_refs; |
| 680 | u64 seq = 0; |
| 681 | int ret; |
| 682 | |
| 683 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
| 684 | action = BTRFS_ADD_DELAYED_REF; |
| 685 | |
| 686 | if (is_fstree(ref_root)) |
| 687 | seq = atomic64_read(&fs_info->tree_mod_seq); |
| 688 | delayed_refs = &trans->transaction->delayed_refs; |
| 689 | |
| 690 | /* first set the basic ref node struct up */ |
| 691 | refcount_set(&ref->refs, 1); |
| 692 | ref->bytenr = bytenr; |
| 693 | ref->num_bytes = num_bytes; |
| 694 | ref->ref_mod = 1; |
| 695 | ref->action = action; |
| 696 | ref->is_head = 0; |
| 697 | ref->in_tree = 1; |
| 698 | ref->seq = seq; |
| 699 | INIT_LIST_HEAD(&ref->list); |
| 700 | INIT_LIST_HEAD(&ref->add_list); |
| 701 | |
| 702 | full_ref = btrfs_delayed_node_to_tree_ref(ref); |
| 703 | full_ref->parent = parent; |
| 704 | full_ref->root = ref_root; |
| 705 | if (parent) |
| 706 | ref->type = BTRFS_SHARED_BLOCK_REF_KEY; |
| 707 | else |
| 708 | ref->type = BTRFS_TREE_BLOCK_REF_KEY; |
| 709 | full_ref->level = level; |
| 710 | |
| 711 | trace_add_delayed_tree_ref(fs_info, ref, full_ref, action); |
| 712 | |
| 713 | ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); |
| 714 | |
| 715 | /* |
| 716 | * XXX: memory should be freed at the same level allocated. |
| 717 | * But bad practice is anywhere... Follow it now. Need cleanup. |
| 718 | */ |
| 719 | if (ret > 0) |
| 720 | kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); |
| 721 | } |
| 722 | |
| 723 | /* |
| 724 | * helper to insert a delayed data ref into the rbtree. |
| 725 | */ |
| 726 | static noinline void |
| 727 | add_delayed_data_ref(struct btrfs_fs_info *fs_info, |
| 728 | struct btrfs_trans_handle *trans, |
| 729 | struct btrfs_delayed_ref_head *head_ref, |
| 730 | struct btrfs_delayed_ref_node *ref, u64 bytenr, |
| 731 | u64 num_bytes, u64 parent, u64 ref_root, u64 owner, |
| 732 | u64 offset, int action) |
| 733 | { |
| 734 | struct btrfs_delayed_data_ref *full_ref; |
| 735 | struct btrfs_delayed_ref_root *delayed_refs; |
| 736 | u64 seq = 0; |
| 737 | int ret; |
| 738 | |
| 739 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
| 740 | action = BTRFS_ADD_DELAYED_REF; |
| 741 | |
| 742 | delayed_refs = &trans->transaction->delayed_refs; |
| 743 | |
| 744 | if (is_fstree(ref_root)) |
| 745 | seq = atomic64_read(&fs_info->tree_mod_seq); |
| 746 | |
| 747 | /* first set the basic ref node struct up */ |
| 748 | refcount_set(&ref->refs, 1); |
| 749 | ref->bytenr = bytenr; |
| 750 | ref->num_bytes = num_bytes; |
| 751 | ref->ref_mod = 1; |
| 752 | ref->action = action; |
| 753 | ref->is_head = 0; |
| 754 | ref->in_tree = 1; |
| 755 | ref->seq = seq; |
| 756 | INIT_LIST_HEAD(&ref->list); |
| 757 | INIT_LIST_HEAD(&ref->add_list); |
| 758 | |
| 759 | full_ref = btrfs_delayed_node_to_data_ref(ref); |
| 760 | full_ref->parent = parent; |
| 761 | full_ref->root = ref_root; |
| 762 | if (parent) |
| 763 | ref->type = BTRFS_SHARED_DATA_REF_KEY; |
| 764 | else |
| 765 | ref->type = BTRFS_EXTENT_DATA_REF_KEY; |
| 766 | |
| 767 | full_ref->objectid = owner; |
| 768 | full_ref->offset = offset; |
| 769 | |
| 770 | trace_add_delayed_data_ref(fs_info, ref, full_ref, action); |
| 771 | |
| 772 | ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); |
| 773 | |
| 774 | if (ret > 0) |
| 775 | kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); |
| 776 | } |
| 777 | |
| 778 | /* |
| 779 | * add a delayed tree ref. This does all of the accounting required |
| 780 | * to make sure the delayed ref is eventually processed before this |
| 781 | * transaction commits. |
| 782 | */ |
| 783 | int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, |
| 784 | struct btrfs_trans_handle *trans, |
| 785 | u64 bytenr, u64 num_bytes, u64 parent, |
| 786 | u64 ref_root, int level, int action, |
| 787 | struct btrfs_delayed_extent_op *extent_op, |
| 788 | int *old_ref_mod, int *new_ref_mod) |
| 789 | { |
| 790 | struct btrfs_delayed_tree_ref *ref; |
| 791 | struct btrfs_delayed_ref_head *head_ref; |
| 792 | struct btrfs_delayed_ref_root *delayed_refs; |
| 793 | struct btrfs_qgroup_extent_record *record = NULL; |
| 794 | int qrecord_inserted; |
| 795 | |
| 796 | BUG_ON(extent_op && extent_op->is_data); |
| 797 | ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); |
| 798 | if (!ref) |
| 799 | return -ENOMEM; |
| 800 | |
| 801 | head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); |
| 802 | if (!head_ref) |
| 803 | goto free_ref; |
| 804 | |
| 805 | if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && |
| 806 | is_fstree(ref_root)) { |
| 807 | record = kmalloc(sizeof(*record), GFP_NOFS); |
| 808 | if (!record) |
| 809 | goto free_head_ref; |
| 810 | } |
| 811 | |
| 812 | head_ref->extent_op = extent_op; |
| 813 | |
| 814 | delayed_refs = &trans->transaction->delayed_refs; |
| 815 | spin_lock(&delayed_refs->lock); |
| 816 | |
| 817 | /* |
| 818 | * insert both the head node and the new ref without dropping |
| 819 | * the spin lock |
| 820 | */ |
| 821 | head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, |
| 822 | bytenr, num_bytes, 0, 0, action, 0, |
| 823 | &qrecord_inserted, old_ref_mod, |
| 824 | new_ref_mod); |
| 825 | |
| 826 | add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, |
| 827 | num_bytes, parent, ref_root, level, action); |
| 828 | spin_unlock(&delayed_refs->lock); |
| 829 | |
| 830 | if (qrecord_inserted) |
| 831 | return btrfs_qgroup_trace_extent_post(fs_info, record); |
| 832 | return 0; |
| 833 | |
| 834 | free_head_ref: |
| 835 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); |
| 836 | free_ref: |
| 837 | kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); |
| 838 | |
| 839 | return -ENOMEM; |
| 840 | } |
| 841 | |
| 842 | /* |
| 843 | * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. |
| 844 | */ |
| 845 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, |
| 846 | struct btrfs_trans_handle *trans, |
| 847 | u64 bytenr, u64 num_bytes, |
| 848 | u64 parent, u64 ref_root, |
| 849 | u64 owner, u64 offset, u64 reserved, int action, |
| 850 | int *old_ref_mod, int *new_ref_mod) |
| 851 | { |
| 852 | struct btrfs_delayed_data_ref *ref; |
| 853 | struct btrfs_delayed_ref_head *head_ref; |
| 854 | struct btrfs_delayed_ref_root *delayed_refs; |
| 855 | struct btrfs_qgroup_extent_record *record = NULL; |
| 856 | int qrecord_inserted; |
| 857 | |
| 858 | ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); |
| 859 | if (!ref) |
| 860 | return -ENOMEM; |
| 861 | |
| 862 | head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); |
| 863 | if (!head_ref) { |
| 864 | kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); |
| 865 | return -ENOMEM; |
| 866 | } |
| 867 | |
| 868 | if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && |
| 869 | is_fstree(ref_root)) { |
| 870 | record = kmalloc(sizeof(*record), GFP_NOFS); |
| 871 | if (!record) { |
| 872 | kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); |
| 873 | kmem_cache_free(btrfs_delayed_ref_head_cachep, |
| 874 | head_ref); |
| 875 | return -ENOMEM; |
| 876 | } |
| 877 | } |
| 878 | |
| 879 | head_ref->extent_op = NULL; |
| 880 | |
| 881 | delayed_refs = &trans->transaction->delayed_refs; |
| 882 | spin_lock(&delayed_refs->lock); |
| 883 | |
| 884 | /* |
| 885 | * insert both the head node and the new ref without dropping |
| 886 | * the spin lock |
| 887 | */ |
| 888 | head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, |
| 889 | bytenr, num_bytes, ref_root, reserved, |
| 890 | action, 1, &qrecord_inserted, |
| 891 | old_ref_mod, new_ref_mod); |
| 892 | |
| 893 | add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, |
| 894 | num_bytes, parent, ref_root, owner, offset, |
| 895 | action); |
| 896 | spin_unlock(&delayed_refs->lock); |
| 897 | |
| 898 | if (qrecord_inserted) |
| 899 | return btrfs_qgroup_trace_extent_post(fs_info, record); |
| 900 | return 0; |
| 901 | } |
| 902 | |
| 903 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
| 904 | struct btrfs_trans_handle *trans, |
| 905 | u64 bytenr, u64 num_bytes, |
| 906 | struct btrfs_delayed_extent_op *extent_op) |
| 907 | { |
| 908 | struct btrfs_delayed_ref_head *head_ref; |
| 909 | struct btrfs_delayed_ref_root *delayed_refs; |
| 910 | |
| 911 | head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); |
| 912 | if (!head_ref) |
| 913 | return -ENOMEM; |
| 914 | |
| 915 | head_ref->extent_op = extent_op; |
| 916 | |
| 917 | delayed_refs = &trans->transaction->delayed_refs; |
| 918 | spin_lock(&delayed_refs->lock); |
| 919 | |
| 920 | add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, |
| 921 | num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, |
| 922 | extent_op->is_data, NULL, NULL, NULL); |
| 923 | |
| 924 | spin_unlock(&delayed_refs->lock); |
| 925 | return 0; |
| 926 | } |
| 927 | |
| 928 | /* |
| 929 | * this does a simple search for the head node for a given extent. |
| 930 | * It must be called with the delayed ref spinlock held, and it returns |
| 931 | * the head node if any where found, or NULL if not. |
| 932 | */ |
| 933 | struct btrfs_delayed_ref_head * |
| 934 | btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr) |
| 935 | { |
| 936 | return find_ref_head(&delayed_refs->href_root, bytenr, 0); |
| 937 | } |
| 938 | |
| 939 | void btrfs_delayed_ref_exit(void) |
| 940 | { |
| 941 | kmem_cache_destroy(btrfs_delayed_ref_head_cachep); |
| 942 | kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); |
| 943 | kmem_cache_destroy(btrfs_delayed_data_ref_cachep); |
| 944 | kmem_cache_destroy(btrfs_delayed_extent_op_cachep); |
| 945 | } |
| 946 | |
| 947 | int btrfs_delayed_ref_init(void) |
| 948 | { |
| 949 | btrfs_delayed_ref_head_cachep = kmem_cache_create( |
| 950 | "btrfs_delayed_ref_head", |
| 951 | sizeof(struct btrfs_delayed_ref_head), 0, |
| 952 | SLAB_MEM_SPREAD, NULL); |
| 953 | if (!btrfs_delayed_ref_head_cachep) |
| 954 | goto fail; |
| 955 | |
| 956 | btrfs_delayed_tree_ref_cachep = kmem_cache_create( |
| 957 | "btrfs_delayed_tree_ref", |
| 958 | sizeof(struct btrfs_delayed_tree_ref), 0, |
| 959 | SLAB_MEM_SPREAD, NULL); |
| 960 | if (!btrfs_delayed_tree_ref_cachep) |
| 961 | goto fail; |
| 962 | |
| 963 | btrfs_delayed_data_ref_cachep = kmem_cache_create( |
| 964 | "btrfs_delayed_data_ref", |
| 965 | sizeof(struct btrfs_delayed_data_ref), 0, |
| 966 | SLAB_MEM_SPREAD, NULL); |
| 967 | if (!btrfs_delayed_data_ref_cachep) |
| 968 | goto fail; |
| 969 | |
| 970 | btrfs_delayed_extent_op_cachep = kmem_cache_create( |
| 971 | "btrfs_delayed_extent_op", |
| 972 | sizeof(struct btrfs_delayed_extent_op), 0, |
| 973 | SLAB_MEM_SPREAD, NULL); |
| 974 | if (!btrfs_delayed_extent_op_cachep) |
| 975 | goto fail; |
| 976 | |
| 977 | return 0; |
| 978 | fail: |
| 979 | btrfs_delayed_ref_exit(); |
| 980 | return -ENOMEM; |
| 981 | } |