| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | 
|  | 3 | */ | 
|  | 4 |  | 
|  | 5 | #include <linux/time.h> | 
|  | 6 | #include <linux/slab.h> | 
|  | 7 | #include <linux/string.h> | 
|  | 8 | #include "reiserfs.h" | 
|  | 9 | #include <linux/buffer_head.h> | 
|  | 10 |  | 
|  | 11 | /* | 
|  | 12 | * To make any changes in the tree we find a node that contains item | 
|  | 13 | * to be changed/deleted or position in the node we insert a new item | 
|  | 14 | * to. We call this node S. To do balancing we need to decide what we | 
|  | 15 | * will shift to left/right neighbor, or to a new node, where new item | 
|  | 16 | * will be etc. To make this analysis simpler we build virtual | 
|  | 17 | * node. Virtual node is an array of items, that will replace items of | 
|  | 18 | * node S. (For instance if we are going to delete an item, virtual | 
|  | 19 | * node does not contain it). Virtual node keeps information about | 
|  | 20 | * item sizes and types, mergeability of first and last items, sizes | 
|  | 21 | * of all entries in directory item. We use this array of items when | 
|  | 22 | * calculating what we can shift to neighbors and how many nodes we | 
|  | 23 | * have to have if we do not any shiftings, if we shift to left/right | 
|  | 24 | * neighbor or to both. | 
|  | 25 | */ | 
|  | 26 |  | 
|  | 27 | /* | 
|  | 28 | * Takes item number in virtual node, returns number of item | 
|  | 29 | * that it has in source buffer | 
|  | 30 | */ | 
|  | 31 | static inline int old_item_num(int new_num, int affected_item_num, int mode) | 
|  | 32 | { | 
|  | 33 | if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) | 
|  | 34 | return new_num; | 
|  | 35 |  | 
|  | 36 | if (mode == M_INSERT) { | 
|  | 37 |  | 
|  | 38 | RFALSE(new_num == 0, | 
|  | 39 | "vs-8005: for INSERT mode and item number of inserted item"); | 
|  | 40 |  | 
|  | 41 | return new_num - 1; | 
|  | 42 | } | 
|  | 43 |  | 
|  | 44 | RFALSE(mode != M_DELETE, | 
|  | 45 | "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", | 
|  | 46 | mode); | 
|  | 47 | /* delete mode */ | 
|  | 48 | return new_num + 1; | 
|  | 49 | } | 
|  | 50 |  | 
|  | 51 | static void create_virtual_node(struct tree_balance *tb, int h) | 
|  | 52 | { | 
|  | 53 | struct item_head *ih; | 
|  | 54 | struct virtual_node *vn = tb->tb_vn; | 
|  | 55 | int new_num; | 
|  | 56 | struct buffer_head *Sh;	/* this comes from tb->S[h] */ | 
|  | 57 |  | 
|  | 58 | Sh = PATH_H_PBUFFER(tb->tb_path, h); | 
|  | 59 |  | 
|  | 60 | /* size of changed node */ | 
|  | 61 | vn->vn_size = | 
|  | 62 | MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h]; | 
|  | 63 |  | 
|  | 64 | /* for internal nodes array if virtual items is not created */ | 
|  | 65 | if (h) { | 
|  | 66 | vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); | 
|  | 67 | return; | 
|  | 68 | } | 
|  | 69 |  | 
|  | 70 | /* number of items in virtual node  */ | 
|  | 71 | vn->vn_nr_item = | 
|  | 72 | B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) - | 
|  | 73 | ((vn->vn_mode == M_DELETE) ? 1 : 0); | 
|  | 74 |  | 
|  | 75 | /* first virtual item */ | 
|  | 76 | vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); | 
|  | 77 | memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item)); | 
|  | 78 | vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); | 
|  | 79 |  | 
|  | 80 | /* first item in the node */ | 
|  | 81 | ih = item_head(Sh, 0); | 
|  | 82 |  | 
|  | 83 | /* define the mergeability for 0-th item (if it is not being deleted) */ | 
|  | 84 | if (op_is_left_mergeable(&ih->ih_key, Sh->b_size) | 
|  | 85 | && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) | 
|  | 86 | vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; | 
|  | 87 |  | 
|  | 88 | /* | 
|  | 89 | * go through all items that remain in the virtual | 
|  | 90 | * node (except for the new (inserted) one) | 
|  | 91 | */ | 
|  | 92 | for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { | 
|  | 93 | int j; | 
|  | 94 | struct virtual_item *vi = vn->vn_vi + new_num; | 
|  | 95 | int is_affected = | 
|  | 96 | ((new_num != vn->vn_affected_item_num) ? 0 : 1); | 
|  | 97 |  | 
|  | 98 | if (is_affected && vn->vn_mode == M_INSERT) | 
|  | 99 | continue; | 
|  | 100 |  | 
|  | 101 | /* get item number in source node */ | 
|  | 102 | j = old_item_num(new_num, vn->vn_affected_item_num, | 
|  | 103 | vn->vn_mode); | 
|  | 104 |  | 
|  | 105 | vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; | 
|  | 106 | vi->vi_ih = ih + j; | 
|  | 107 | vi->vi_item = ih_item_body(Sh, ih + j); | 
|  | 108 | vi->vi_uarea = vn->vn_free_ptr; | 
|  | 109 |  | 
|  | 110 | /* | 
|  | 111 | * FIXME: there is no check that item operation did not | 
|  | 112 | * consume too much memory | 
|  | 113 | */ | 
|  | 114 | vn->vn_free_ptr += | 
|  | 115 | op_create_vi(vn, vi, is_affected, tb->insert_size[0]); | 
|  | 116 | if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) | 
|  | 117 | reiserfs_panic(tb->tb_sb, "vs-8030", | 
|  | 118 | "virtual node space consumed"); | 
|  | 119 |  | 
|  | 120 | if (!is_affected) | 
|  | 121 | /* this is not being changed */ | 
|  | 122 | continue; | 
|  | 123 |  | 
|  | 124 | if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { | 
|  | 125 | vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; | 
|  | 126 | /* pointer to data which is going to be pasted */ | 
|  | 127 | vi->vi_new_data = vn->vn_data; | 
|  | 128 | } | 
|  | 129 | } | 
|  | 130 |  | 
|  | 131 | /* virtual inserted item is not defined yet */ | 
|  | 132 | if (vn->vn_mode == M_INSERT) { | 
|  | 133 | struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num; | 
|  | 134 |  | 
|  | 135 | RFALSE(vn->vn_ins_ih == NULL, | 
|  | 136 | "vs-8040: item header of inserted item is not specified"); | 
|  | 137 | vi->vi_item_len = tb->insert_size[0]; | 
|  | 138 | vi->vi_ih = vn->vn_ins_ih; | 
|  | 139 | vi->vi_item = vn->vn_data; | 
|  | 140 | vi->vi_uarea = vn->vn_free_ptr; | 
|  | 141 |  | 
|  | 142 | op_create_vi(vn, vi, 0 /*not pasted or cut */ , | 
|  | 143 | tb->insert_size[0]); | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | /* | 
|  | 147 | * set right merge flag we take right delimiting key and | 
|  | 148 | * check whether it is a mergeable item | 
|  | 149 | */ | 
|  | 150 | if (tb->CFR[0]) { | 
|  | 151 | struct reiserfs_key *key; | 
|  | 152 |  | 
|  | 153 | key = internal_key(tb->CFR[0], tb->rkey[0]); | 
|  | 154 | if (op_is_left_mergeable(key, Sh->b_size) | 
|  | 155 | && (vn->vn_mode != M_DELETE | 
|  | 156 | || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) | 
|  | 157 | vn->vn_vi[vn->vn_nr_item - 1].vi_type |= | 
|  | 158 | VI_TYPE_RIGHT_MERGEABLE; | 
|  | 159 |  | 
|  | 160 | #ifdef CONFIG_REISERFS_CHECK | 
|  | 161 | if (op_is_left_mergeable(key, Sh->b_size) && | 
|  | 162 | !(vn->vn_mode != M_DELETE | 
|  | 163 | || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { | 
|  | 164 | /* | 
|  | 165 | * we delete last item and it could be merged | 
|  | 166 | * with right neighbor's first item | 
|  | 167 | */ | 
|  | 168 | if (! | 
|  | 169 | (B_NR_ITEMS(Sh) == 1 | 
|  | 170 | && is_direntry_le_ih(item_head(Sh, 0)) | 
|  | 171 | && ih_entry_count(item_head(Sh, 0)) == 1)) { | 
|  | 172 | /* | 
|  | 173 | * node contains more than 1 item, or item | 
|  | 174 | * is not directory item, or this item | 
|  | 175 | * contains more than 1 entry | 
|  | 176 | */ | 
|  | 177 | print_block(Sh, 0, -1, -1); | 
|  | 178 | reiserfs_panic(tb->tb_sb, "vs-8045", | 
|  | 179 | "rdkey %k, affected item==%d " | 
|  | 180 | "(mode==%c) Must be %c", | 
|  | 181 | key, vn->vn_affected_item_num, | 
|  | 182 | vn->vn_mode, M_DELETE); | 
|  | 183 | } | 
|  | 184 | } | 
|  | 185 | #endif | 
|  | 186 |  | 
|  | 187 | } | 
|  | 188 | } | 
|  | 189 |  | 
|  | 190 | /* | 
|  | 191 | * Using virtual node check, how many items can be | 
|  | 192 | * shifted to left neighbor | 
|  | 193 | */ | 
|  | 194 | static void check_left(struct tree_balance *tb, int h, int cur_free) | 
|  | 195 | { | 
|  | 196 | int i; | 
|  | 197 | struct virtual_node *vn = tb->tb_vn; | 
|  | 198 | struct virtual_item *vi; | 
|  | 199 | int d_size, ih_size; | 
|  | 200 |  | 
|  | 201 | RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); | 
|  | 202 |  | 
|  | 203 | /* internal level */ | 
|  | 204 | if (h > 0) { | 
|  | 205 | tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); | 
|  | 206 | return; | 
|  | 207 | } | 
|  | 208 |  | 
|  | 209 | /* leaf level */ | 
|  | 210 |  | 
|  | 211 | if (!cur_free || !vn->vn_nr_item) { | 
|  | 212 | /* no free space or nothing to move */ | 
|  | 213 | tb->lnum[h] = 0; | 
|  | 214 | tb->lbytes = -1; | 
|  | 215 | return; | 
|  | 216 | } | 
|  | 217 |  | 
|  | 218 | RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), | 
|  | 219 | "vs-8055: parent does not exist or invalid"); | 
|  | 220 |  | 
|  | 221 | vi = vn->vn_vi; | 
|  | 222 | if ((unsigned int)cur_free >= | 
|  | 223 | (vn->vn_size - | 
|  | 224 | ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { | 
|  | 225 | /* all contents of S[0] fits into L[0] */ | 
|  | 226 |  | 
|  | 227 | RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, | 
|  | 228 | "vs-8055: invalid mode or balance condition failed"); | 
|  | 229 |  | 
|  | 230 | tb->lnum[0] = vn->vn_nr_item; | 
|  | 231 | tb->lbytes = -1; | 
|  | 232 | return; | 
|  | 233 | } | 
|  | 234 |  | 
|  | 235 | d_size = 0, ih_size = IH_SIZE; | 
|  | 236 |  | 
|  | 237 | /* first item may be merge with last item in left neighbor */ | 
|  | 238 | if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) | 
|  | 239 | d_size = -((int)IH_SIZE), ih_size = 0; | 
|  | 240 |  | 
|  | 241 | tb->lnum[0] = 0; | 
|  | 242 | for (i = 0; i < vn->vn_nr_item; | 
|  | 243 | i++, ih_size = IH_SIZE, d_size = 0, vi++) { | 
|  | 244 | d_size += vi->vi_item_len; | 
|  | 245 | if (cur_free >= d_size) { | 
|  | 246 | /* the item can be shifted entirely */ | 
|  | 247 | cur_free -= d_size; | 
|  | 248 | tb->lnum[0]++; | 
|  | 249 | continue; | 
|  | 250 | } | 
|  | 251 |  | 
|  | 252 | /* the item cannot be shifted entirely, try to split it */ | 
|  | 253 | /* | 
|  | 254 | * check whether L[0] can hold ih and at least one byte | 
|  | 255 | * of the item body | 
|  | 256 | */ | 
|  | 257 |  | 
|  | 258 | /* cannot shift even a part of the current item */ | 
|  | 259 | if (cur_free <= ih_size) { | 
|  | 260 | tb->lbytes = -1; | 
|  | 261 | return; | 
|  | 262 | } | 
|  | 263 | cur_free -= ih_size; | 
|  | 264 |  | 
|  | 265 | tb->lbytes = op_check_left(vi, cur_free, 0, 0); | 
|  | 266 | if (tb->lbytes != -1) | 
|  | 267 | /* count partially shifted item */ | 
|  | 268 | tb->lnum[0]++; | 
|  | 269 |  | 
|  | 270 | break; | 
|  | 271 | } | 
|  | 272 |  | 
|  | 273 | return; | 
|  | 274 | } | 
|  | 275 |  | 
|  | 276 | /* | 
|  | 277 | * Using virtual node check, how many items can be | 
|  | 278 | * shifted to right neighbor | 
|  | 279 | */ | 
|  | 280 | static void check_right(struct tree_balance *tb, int h, int cur_free) | 
|  | 281 | { | 
|  | 282 | int i; | 
|  | 283 | struct virtual_node *vn = tb->tb_vn; | 
|  | 284 | struct virtual_item *vi; | 
|  | 285 | int d_size, ih_size; | 
|  | 286 |  | 
|  | 287 | RFALSE(cur_free < 0, "vs-8070: cur_free < 0"); | 
|  | 288 |  | 
|  | 289 | /* internal level */ | 
|  | 290 | if (h > 0) { | 
|  | 291 | tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); | 
|  | 292 | return; | 
|  | 293 | } | 
|  | 294 |  | 
|  | 295 | /* leaf level */ | 
|  | 296 |  | 
|  | 297 | if (!cur_free || !vn->vn_nr_item) { | 
|  | 298 | /* no free space  */ | 
|  | 299 | tb->rnum[h] = 0; | 
|  | 300 | tb->rbytes = -1; | 
|  | 301 | return; | 
|  | 302 | } | 
|  | 303 |  | 
|  | 304 | RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), | 
|  | 305 | "vs-8075: parent does not exist or invalid"); | 
|  | 306 |  | 
|  | 307 | vi = vn->vn_vi + vn->vn_nr_item - 1; | 
|  | 308 | if ((unsigned int)cur_free >= | 
|  | 309 | (vn->vn_size - | 
|  | 310 | ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { | 
|  | 311 | /* all contents of S[0] fits into R[0] */ | 
|  | 312 |  | 
|  | 313 | RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, | 
|  | 314 | "vs-8080: invalid mode or balance condition failed"); | 
|  | 315 |  | 
|  | 316 | tb->rnum[h] = vn->vn_nr_item; | 
|  | 317 | tb->rbytes = -1; | 
|  | 318 | return; | 
|  | 319 | } | 
|  | 320 |  | 
|  | 321 | d_size = 0, ih_size = IH_SIZE; | 
|  | 322 |  | 
|  | 323 | /* last item may be merge with first item in right neighbor */ | 
|  | 324 | if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) | 
|  | 325 | d_size = -(int)IH_SIZE, ih_size = 0; | 
|  | 326 |  | 
|  | 327 | tb->rnum[0] = 0; | 
|  | 328 | for (i = vn->vn_nr_item - 1; i >= 0; | 
|  | 329 | i--, d_size = 0, ih_size = IH_SIZE, vi--) { | 
|  | 330 | d_size += vi->vi_item_len; | 
|  | 331 | if (cur_free >= d_size) { | 
|  | 332 | /* the item can be shifted entirely */ | 
|  | 333 | cur_free -= d_size; | 
|  | 334 | tb->rnum[0]++; | 
|  | 335 | continue; | 
|  | 336 | } | 
|  | 337 |  | 
|  | 338 | /* | 
|  | 339 | * check whether R[0] can hold ih and at least one | 
|  | 340 | * byte of the item body | 
|  | 341 | */ | 
|  | 342 |  | 
|  | 343 | /* cannot shift even a part of the current item */ | 
|  | 344 | if (cur_free <= ih_size) { | 
|  | 345 | tb->rbytes = -1; | 
|  | 346 | return; | 
|  | 347 | } | 
|  | 348 |  | 
|  | 349 | /* | 
|  | 350 | * R[0] can hold the header of the item and at least | 
|  | 351 | * one byte of its body | 
|  | 352 | */ | 
|  | 353 | cur_free -= ih_size;	/* cur_free is still > 0 */ | 
|  | 354 |  | 
|  | 355 | tb->rbytes = op_check_right(vi, cur_free); | 
|  | 356 | if (tb->rbytes != -1) | 
|  | 357 | /* count partially shifted item */ | 
|  | 358 | tb->rnum[0]++; | 
|  | 359 |  | 
|  | 360 | break; | 
|  | 361 | } | 
|  | 362 |  | 
|  | 363 | return; | 
|  | 364 | } | 
|  | 365 |  | 
|  | 366 | /* | 
|  | 367 | * from - number of items, which are shifted to left neighbor entirely | 
|  | 368 | * to - number of item, which are shifted to right neighbor entirely | 
|  | 369 | * from_bytes - number of bytes of boundary item (or directory entries) | 
|  | 370 | *              which are shifted to left neighbor | 
|  | 371 | * to_bytes - number of bytes of boundary item (or directory entries) | 
|  | 372 | *            which are shifted to right neighbor | 
|  | 373 | */ | 
|  | 374 | static int get_num_ver(int mode, struct tree_balance *tb, int h, | 
|  | 375 | int from, int from_bytes, | 
|  | 376 | int to, int to_bytes, short *snum012, int flow) | 
|  | 377 | { | 
|  | 378 | int i; | 
|  | 379 | int cur_free; | 
|  | 380 | int units; | 
|  | 381 | struct virtual_node *vn = tb->tb_vn; | 
|  | 382 | int total_node_size, max_node_size, current_item_size; | 
|  | 383 | int needed_nodes; | 
|  | 384 |  | 
|  | 385 | /* position of item we start filling node from */ | 
|  | 386 | int start_item; | 
|  | 387 |  | 
|  | 388 | /* position of item we finish filling node by */ | 
|  | 389 | int end_item; | 
|  | 390 |  | 
|  | 391 | /* | 
|  | 392 | * number of first bytes (entries for directory) of start_item-th item | 
|  | 393 | * we do not include into node that is being filled | 
|  | 394 | */ | 
|  | 395 | int start_bytes; | 
|  | 396 |  | 
|  | 397 | /* | 
|  | 398 | * number of last bytes (entries for directory) of end_item-th item | 
|  | 399 | * we do node include into node that is being filled | 
|  | 400 | */ | 
|  | 401 | int end_bytes; | 
|  | 402 |  | 
|  | 403 | /* | 
|  | 404 | * these are positions in virtual item of items, that are split | 
|  | 405 | * between S[0] and S1new and S1new and S2new | 
|  | 406 | */ | 
|  | 407 | int split_item_positions[2]; | 
|  | 408 |  | 
|  | 409 | split_item_positions[0] = -1; | 
|  | 410 | split_item_positions[1] = -1; | 
|  | 411 |  | 
|  | 412 | /* | 
|  | 413 | * We only create additional nodes if we are in insert or paste mode | 
|  | 414 | * or we are in replace mode at the internal level. If h is 0 and | 
|  | 415 | * the mode is M_REPLACE then in fix_nodes we change the mode to | 
|  | 416 | * paste or insert before we get here in the code. | 
|  | 417 | */ | 
|  | 418 | RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), | 
|  | 419 | "vs-8100: insert_size < 0 in overflow"); | 
|  | 420 |  | 
|  | 421 | max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); | 
|  | 422 |  | 
|  | 423 | /* | 
|  | 424 | * snum012 [0-2] - number of items, that lay | 
|  | 425 | * to S[0], first new node and second new node | 
|  | 426 | */ | 
|  | 427 | snum012[3] = -1;	/* s1bytes */ | 
|  | 428 | snum012[4] = -1;	/* s2bytes */ | 
|  | 429 |  | 
|  | 430 | /* internal level */ | 
|  | 431 | if (h > 0) { | 
|  | 432 | i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); | 
|  | 433 | if (i == max_node_size) | 
|  | 434 | return 1; | 
|  | 435 | return (i / max_node_size + 1); | 
|  | 436 | } | 
|  | 437 |  | 
|  | 438 | /* leaf level */ | 
|  | 439 | needed_nodes = 1; | 
|  | 440 | total_node_size = 0; | 
|  | 441 | cur_free = max_node_size; | 
|  | 442 |  | 
|  | 443 | /* start from 'from'-th item */ | 
|  | 444 | start_item = from; | 
|  | 445 | /* skip its first 'start_bytes' units */ | 
|  | 446 | start_bytes = ((from_bytes != -1) ? from_bytes : 0); | 
|  | 447 |  | 
|  | 448 | /* last included item is the 'end_item'-th one */ | 
|  | 449 | end_item = vn->vn_nr_item - to - 1; | 
|  | 450 | /* do not count last 'end_bytes' units of 'end_item'-th item */ | 
|  | 451 | end_bytes = (to_bytes != -1) ? to_bytes : 0; | 
|  | 452 |  | 
|  | 453 | /* | 
|  | 454 | * go through all item beginning from the start_item-th item | 
|  | 455 | * and ending by the end_item-th item. Do not count first | 
|  | 456 | * 'start_bytes' units of 'start_item'-th item and last | 
|  | 457 | * 'end_bytes' of 'end_item'-th item | 
|  | 458 | */ | 
|  | 459 | for (i = start_item; i <= end_item; i++) { | 
|  | 460 | struct virtual_item *vi = vn->vn_vi + i; | 
|  | 461 | int skip_from_end = ((i == end_item) ? end_bytes : 0); | 
|  | 462 |  | 
|  | 463 | RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed"); | 
|  | 464 |  | 
|  | 465 | /* get size of current item */ | 
|  | 466 | current_item_size = vi->vi_item_len; | 
|  | 467 |  | 
|  | 468 | /* | 
|  | 469 | * do not take in calculation head part (from_bytes) | 
|  | 470 | * of from-th item | 
|  | 471 | */ | 
|  | 472 | current_item_size -= | 
|  | 473 | op_part_size(vi, 0 /*from start */ , start_bytes); | 
|  | 474 |  | 
|  | 475 | /* do not take in calculation tail part of last item */ | 
|  | 476 | current_item_size -= | 
|  | 477 | op_part_size(vi, 1 /*from end */ , skip_from_end); | 
|  | 478 |  | 
|  | 479 | /* if item fits into current node entierly */ | 
|  | 480 | if (total_node_size + current_item_size <= max_node_size) { | 
|  | 481 | snum012[needed_nodes - 1]++; | 
|  | 482 | total_node_size += current_item_size; | 
|  | 483 | start_bytes = 0; | 
|  | 484 | continue; | 
|  | 485 | } | 
|  | 486 |  | 
|  | 487 | /* | 
|  | 488 | * virtual item length is longer, than max size of item in | 
|  | 489 | * a node. It is impossible for direct item | 
|  | 490 | */ | 
|  | 491 | if (current_item_size > max_node_size) { | 
|  | 492 | RFALSE(is_direct_le_ih(vi->vi_ih), | 
|  | 493 | "vs-8110: " | 
|  | 494 | "direct item length is %d. It can not be longer than %d", | 
|  | 495 | current_item_size, max_node_size); | 
|  | 496 | /* we will try to split it */ | 
|  | 497 | flow = 1; | 
|  | 498 | } | 
|  | 499 |  | 
|  | 500 | /* as we do not split items, take new node and continue */ | 
|  | 501 | if (!flow) { | 
|  | 502 | needed_nodes++; | 
|  | 503 | i--; | 
|  | 504 | total_node_size = 0; | 
|  | 505 | continue; | 
|  | 506 | } | 
|  | 507 |  | 
|  | 508 | /* | 
|  | 509 | * calculate number of item units which fit into node being | 
|  | 510 | * filled | 
|  | 511 | */ | 
|  | 512 | { | 
|  | 513 | int free_space; | 
|  | 514 |  | 
|  | 515 | free_space = max_node_size - total_node_size - IH_SIZE; | 
|  | 516 | units = | 
|  | 517 | op_check_left(vi, free_space, start_bytes, | 
|  | 518 | skip_from_end); | 
|  | 519 | /* | 
|  | 520 | * nothing fits into current node, take new | 
|  | 521 | * node and continue | 
|  | 522 | */ | 
|  | 523 | if (units == -1) { | 
|  | 524 | needed_nodes++, i--, total_node_size = 0; | 
|  | 525 | continue; | 
|  | 526 | } | 
|  | 527 | } | 
|  | 528 |  | 
|  | 529 | /* something fits into the current node */ | 
|  | 530 | start_bytes += units; | 
|  | 531 | snum012[needed_nodes - 1 + 3] = units; | 
|  | 532 |  | 
|  | 533 | if (needed_nodes > 2) | 
|  | 534 | reiserfs_warning(tb->tb_sb, "vs-8111", | 
|  | 535 | "split_item_position is out of range"); | 
|  | 536 | snum012[needed_nodes - 1]++; | 
|  | 537 | split_item_positions[needed_nodes - 1] = i; | 
|  | 538 | needed_nodes++; | 
|  | 539 | /* continue from the same item with start_bytes != -1 */ | 
|  | 540 | start_item = i; | 
|  | 541 | i--; | 
|  | 542 | total_node_size = 0; | 
|  | 543 | } | 
|  | 544 |  | 
|  | 545 | /* | 
|  | 546 | * sum012[4] (if it is not -1) contains number of units of which | 
|  | 547 | * are to be in S1new, snum012[3] - to be in S0. They are supposed | 
|  | 548 | * to be S1bytes and S2bytes correspondingly, so recalculate | 
|  | 549 | */ | 
|  | 550 | if (snum012[4] > 0) { | 
|  | 551 | int split_item_num; | 
|  | 552 | int bytes_to_r, bytes_to_l; | 
|  | 553 | int bytes_to_S1new; | 
|  | 554 |  | 
|  | 555 | split_item_num = split_item_positions[1]; | 
|  | 556 | bytes_to_l = | 
|  | 557 | ((from == split_item_num | 
|  | 558 | && from_bytes != -1) ? from_bytes : 0); | 
|  | 559 | bytes_to_r = | 
|  | 560 | ((end_item == split_item_num | 
|  | 561 | && end_bytes != -1) ? end_bytes : 0); | 
|  | 562 | bytes_to_S1new = | 
|  | 563 | ((split_item_positions[0] == | 
|  | 564 | split_item_positions[1]) ? snum012[3] : 0); | 
|  | 565 |  | 
|  | 566 | /* s2bytes */ | 
|  | 567 | snum012[4] = | 
|  | 568 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - | 
|  | 569 | bytes_to_r - bytes_to_l - bytes_to_S1new; | 
|  | 570 |  | 
|  | 571 | if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && | 
|  | 572 | vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) | 
|  | 573 | reiserfs_warning(tb->tb_sb, "vs-8115", | 
|  | 574 | "not directory or indirect item"); | 
|  | 575 | } | 
|  | 576 |  | 
|  | 577 | /* now we know S2bytes, calculate S1bytes */ | 
|  | 578 | if (snum012[3] > 0) { | 
|  | 579 | int split_item_num; | 
|  | 580 | int bytes_to_r, bytes_to_l; | 
|  | 581 | int bytes_to_S2new; | 
|  | 582 |  | 
|  | 583 | split_item_num = split_item_positions[0]; | 
|  | 584 | bytes_to_l = | 
|  | 585 | ((from == split_item_num | 
|  | 586 | && from_bytes != -1) ? from_bytes : 0); | 
|  | 587 | bytes_to_r = | 
|  | 588 | ((end_item == split_item_num | 
|  | 589 | && end_bytes != -1) ? end_bytes : 0); | 
|  | 590 | bytes_to_S2new = | 
|  | 591 | ((split_item_positions[0] == split_item_positions[1] | 
|  | 592 | && snum012[4] != -1) ? snum012[4] : 0); | 
|  | 593 |  | 
|  | 594 | /* s1bytes */ | 
|  | 595 | snum012[3] = | 
|  | 596 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - | 
|  | 597 | bytes_to_r - bytes_to_l - bytes_to_S2new; | 
|  | 598 | } | 
|  | 599 |  | 
|  | 600 | return needed_nodes; | 
|  | 601 | } | 
|  | 602 |  | 
|  | 603 |  | 
|  | 604 | /* | 
|  | 605 | * Set parameters for balancing. | 
|  | 606 | * Performs write of results of analysis of balancing into structure tb, | 
|  | 607 | * where it will later be used by the functions that actually do the balancing. | 
|  | 608 | * Parameters: | 
|  | 609 | *	tb	tree_balance structure; | 
|  | 610 | *	h	current level of the node; | 
|  | 611 | *	lnum	number of items from S[h] that must be shifted to L[h]; | 
|  | 612 | *	rnum	number of items from S[h] that must be shifted to R[h]; | 
|  | 613 | *	blk_num	number of blocks that S[h] will be splitted into; | 
|  | 614 | *	s012	number of items that fall into splitted nodes. | 
|  | 615 | *	lbytes	number of bytes which flow to the left neighbor from the | 
|  | 616 | *              item that is not not shifted entirely | 
|  | 617 | *	rbytes	number of bytes which flow to the right neighbor from the | 
|  | 618 | *              item that is not not shifted entirely | 
|  | 619 | *	s1bytes	number of bytes which flow to the first  new node when | 
|  | 620 | *              S[0] splits (this number is contained in s012 array) | 
|  | 621 | */ | 
|  | 622 |  | 
|  | 623 | static void set_parameters(struct tree_balance *tb, int h, int lnum, | 
|  | 624 | int rnum, int blk_num, short *s012, int lb, int rb) | 
|  | 625 | { | 
|  | 626 |  | 
|  | 627 | tb->lnum[h] = lnum; | 
|  | 628 | tb->rnum[h] = rnum; | 
|  | 629 | tb->blknum[h] = blk_num; | 
|  | 630 |  | 
|  | 631 | /* only for leaf level */ | 
|  | 632 | if (h == 0) { | 
|  | 633 | if (s012 != NULL) { | 
|  | 634 | tb->s0num = *s012++; | 
|  | 635 | tb->snum[0] = *s012++; | 
|  | 636 | tb->snum[1] = *s012++; | 
|  | 637 | tb->sbytes[0] = *s012++; | 
|  | 638 | tb->sbytes[1] = *s012; | 
|  | 639 | } | 
|  | 640 | tb->lbytes = lb; | 
|  | 641 | tb->rbytes = rb; | 
|  | 642 | } | 
|  | 643 | PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum); | 
|  | 644 | PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum); | 
|  | 645 |  | 
|  | 646 | PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb); | 
|  | 647 | PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); | 
|  | 648 | } | 
|  | 649 |  | 
|  | 650 | /* | 
|  | 651 | * check if node disappears if we shift tb->lnum[0] items to left | 
|  | 652 | * neighbor and tb->rnum[0] to the right one. | 
|  | 653 | */ | 
|  | 654 | static int is_leaf_removable(struct tree_balance *tb) | 
|  | 655 | { | 
|  | 656 | struct virtual_node *vn = tb->tb_vn; | 
|  | 657 | int to_left, to_right; | 
|  | 658 | int size; | 
|  | 659 | int remain_items; | 
|  | 660 |  | 
|  | 661 | /* | 
|  | 662 | * number of items that will be shifted to left (right) neighbor | 
|  | 663 | * entirely | 
|  | 664 | */ | 
|  | 665 | to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); | 
|  | 666 | to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); | 
|  | 667 | remain_items = vn->vn_nr_item; | 
|  | 668 |  | 
|  | 669 | /* how many items remain in S[0] after shiftings to neighbors */ | 
|  | 670 | remain_items -= (to_left + to_right); | 
|  | 671 |  | 
|  | 672 | /* all content of node can be shifted to neighbors */ | 
|  | 673 | if (remain_items < 1) { | 
|  | 674 | set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, | 
|  | 675 | NULL, -1, -1); | 
|  | 676 | return 1; | 
|  | 677 | } | 
|  | 678 |  | 
|  | 679 | /* S[0] is not removable */ | 
|  | 680 | if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) | 
|  | 681 | return 0; | 
|  | 682 |  | 
|  | 683 | /* check whether we can divide 1 remaining item between neighbors */ | 
|  | 684 |  | 
|  | 685 | /* get size of remaining item (in item units) */ | 
|  | 686 | size = op_unit_num(&vn->vn_vi[to_left]); | 
|  | 687 |  | 
|  | 688 | if (tb->lbytes + tb->rbytes >= size) { | 
|  | 689 | set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, | 
|  | 690 | tb->lbytes, -1); | 
|  | 691 | return 1; | 
|  | 692 | } | 
|  | 693 |  | 
|  | 694 | return 0; | 
|  | 695 | } | 
|  | 696 |  | 
|  | 697 | /* check whether L, S, R can be joined in one node */ | 
|  | 698 | static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) | 
|  | 699 | { | 
|  | 700 | struct virtual_node *vn = tb->tb_vn; | 
|  | 701 | int ih_size; | 
|  | 702 | struct buffer_head *S0; | 
|  | 703 |  | 
|  | 704 | S0 = PATH_H_PBUFFER(tb->tb_path, 0); | 
|  | 705 |  | 
|  | 706 | ih_size = 0; | 
|  | 707 | if (vn->vn_nr_item) { | 
|  | 708 | if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) | 
|  | 709 | ih_size += IH_SIZE; | 
|  | 710 |  | 
|  | 711 | if (vn->vn_vi[vn->vn_nr_item - 1]. | 
|  | 712 | vi_type & VI_TYPE_RIGHT_MERGEABLE) | 
|  | 713 | ih_size += IH_SIZE; | 
|  | 714 | } else { | 
|  | 715 | /* there was only one item and it will be deleted */ | 
|  | 716 | struct item_head *ih; | 
|  | 717 |  | 
|  | 718 | RFALSE(B_NR_ITEMS(S0) != 1, | 
|  | 719 | "vs-8125: item number must be 1: it is %d", | 
|  | 720 | B_NR_ITEMS(S0)); | 
|  | 721 |  | 
|  | 722 | ih = item_head(S0, 0); | 
|  | 723 | if (tb->CFR[0] | 
|  | 724 | && !comp_short_le_keys(&ih->ih_key, | 
|  | 725 | internal_key(tb->CFR[0], | 
|  | 726 | tb->rkey[0]))) | 
|  | 727 | /* | 
|  | 728 | * Directory must be in correct state here: that is | 
|  | 729 | * somewhere at the left side should exist first | 
|  | 730 | * directory item. But the item being deleted can | 
|  | 731 | * not be that first one because its right neighbor | 
|  | 732 | * is item of the same directory. (But first item | 
|  | 733 | * always gets deleted in last turn). So, neighbors | 
|  | 734 | * of deleted item can be merged, so we can save | 
|  | 735 | * ih_size | 
|  | 736 | */ | 
|  | 737 | if (is_direntry_le_ih(ih)) { | 
|  | 738 | ih_size = IH_SIZE; | 
|  | 739 |  | 
|  | 740 | /* | 
|  | 741 | * we might check that left neighbor exists | 
|  | 742 | * and is of the same directory | 
|  | 743 | */ | 
|  | 744 | RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, | 
|  | 745 | "vs-8130: first directory item can not be removed until directory is not empty"); | 
|  | 746 | } | 
|  | 747 |  | 
|  | 748 | } | 
|  | 749 |  | 
|  | 750 | if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) { | 
|  | 751 | set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1); | 
|  | 752 | PROC_INFO_INC(tb->tb_sb, leaves_removable); | 
|  | 753 | return 1; | 
|  | 754 | } | 
|  | 755 | return 0; | 
|  | 756 |  | 
|  | 757 | } | 
|  | 758 |  | 
|  | 759 | /* when we do not split item, lnum and rnum are numbers of entire items */ | 
|  | 760 | #define SET_PAR_SHIFT_LEFT \ | 
|  | 761 | if (h)\ | 
|  | 762 | {\ | 
|  | 763 | int to_l;\ | 
|  | 764 | \ | 
|  | 765 | to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\ | 
|  | 766 | (MAX_NR_KEY(Sh) + 1 - lpar);\ | 
|  | 767 | \ | 
|  | 768 | set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\ | 
|  | 769 | }\ | 
|  | 770 | else \ | 
|  | 771 | {\ | 
|  | 772 | if (lset==LEFT_SHIFT_FLOW)\ | 
|  | 773 | set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\ | 
|  | 774 | tb->lbytes, -1);\ | 
|  | 775 | else\ | 
|  | 776 | set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\ | 
|  | 777 | -1, -1);\ | 
|  | 778 | } | 
|  | 779 |  | 
|  | 780 | #define SET_PAR_SHIFT_RIGHT \ | 
|  | 781 | if (h)\ | 
|  | 782 | {\ | 
|  | 783 | int to_r;\ | 
|  | 784 | \ | 
|  | 785 | to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\ | 
|  | 786 | \ | 
|  | 787 | set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\ | 
|  | 788 | }\ | 
|  | 789 | else \ | 
|  | 790 | {\ | 
|  | 791 | if (rset==RIGHT_SHIFT_FLOW)\ | 
|  | 792 | set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\ | 
|  | 793 | -1, tb->rbytes);\ | 
|  | 794 | else\ | 
|  | 795 | set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\ | 
|  | 796 | -1, -1);\ | 
|  | 797 | } | 
|  | 798 |  | 
|  | 799 | static void free_buffers_in_tb(struct tree_balance *tb) | 
|  | 800 | { | 
|  | 801 | int i; | 
|  | 802 |  | 
|  | 803 | pathrelse(tb->tb_path); | 
|  | 804 |  | 
|  | 805 | for (i = 0; i < MAX_HEIGHT; i++) { | 
|  | 806 | brelse(tb->L[i]); | 
|  | 807 | brelse(tb->R[i]); | 
|  | 808 | brelse(tb->FL[i]); | 
|  | 809 | brelse(tb->FR[i]); | 
|  | 810 | brelse(tb->CFL[i]); | 
|  | 811 | brelse(tb->CFR[i]); | 
|  | 812 |  | 
|  | 813 | tb->L[i] = NULL; | 
|  | 814 | tb->R[i] = NULL; | 
|  | 815 | tb->FL[i] = NULL; | 
|  | 816 | tb->FR[i] = NULL; | 
|  | 817 | tb->CFL[i] = NULL; | 
|  | 818 | tb->CFR[i] = NULL; | 
|  | 819 | } | 
|  | 820 | } | 
|  | 821 |  | 
|  | 822 | /* | 
|  | 823 | * Get new buffers for storing new nodes that are created while balancing. | 
|  | 824 | * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked; | 
|  | 825 | *	        CARRY_ON - schedule didn't occur while the function worked; | 
|  | 826 | *	        NO_DISK_SPACE - no disk space. | 
|  | 827 | */ | 
|  | 828 | /* The function is NOT SCHEDULE-SAFE! */ | 
|  | 829 | static int get_empty_nodes(struct tree_balance *tb, int h) | 
|  | 830 | { | 
|  | 831 | struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h); | 
|  | 832 | b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; | 
|  | 833 | int counter, number_of_freeblk; | 
|  | 834 | int  amount_needed;	/* number of needed empty blocks */ | 
|  | 835 | int  retval = CARRY_ON; | 
|  | 836 | struct super_block *sb = tb->tb_sb; | 
|  | 837 |  | 
|  | 838 | /* | 
|  | 839 | * number_of_freeblk is the number of empty blocks which have been | 
|  | 840 | * acquired for use by the balancing algorithm minus the number of | 
|  | 841 | * empty blocks used in the previous levels of the analysis, | 
|  | 842 | * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule | 
|  | 843 | * occurs after empty blocks are acquired, and the balancing analysis | 
|  | 844 | * is then restarted, amount_needed is the number needed by this | 
|  | 845 | * level (h) of the balancing analysis. | 
|  | 846 | * | 
|  | 847 | * Note that for systems with many processes writing, it would be | 
|  | 848 | * more layout optimal to calculate the total number needed by all | 
|  | 849 | * levels and then to run reiserfs_new_blocks to get all of them at | 
|  | 850 | * once. | 
|  | 851 | */ | 
|  | 852 |  | 
|  | 853 | /* | 
|  | 854 | * Initiate number_of_freeblk to the amount acquired prior to the | 
|  | 855 | * restart of the analysis or 0 if not restarted, then subtract the | 
|  | 856 | * amount needed by all of the levels of the tree below h. | 
|  | 857 | */ | 
|  | 858 | /* blknum includes S[h], so we subtract 1 in this calculation */ | 
|  | 859 | for (counter = 0, number_of_freeblk = tb->cur_blknum; | 
|  | 860 | counter < h; counter++) | 
|  | 861 | number_of_freeblk -= | 
|  | 862 | (tb->blknum[counter]) ? (tb->blknum[counter] - | 
|  | 863 | 1) : 0; | 
|  | 864 |  | 
|  | 865 | /* Allocate missing empty blocks. */ | 
|  | 866 | /* if Sh == 0  then we are getting a new root */ | 
|  | 867 | amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; | 
|  | 868 | /* | 
|  | 869 | * Amount_needed = the amount that we need more than the | 
|  | 870 | * amount that we have. | 
|  | 871 | */ | 
|  | 872 | if (amount_needed > number_of_freeblk) | 
|  | 873 | amount_needed -= number_of_freeblk; | 
|  | 874 | else	/* If we have enough already then there is nothing to do. */ | 
|  | 875 | return CARRY_ON; | 
|  | 876 |  | 
|  | 877 | /* | 
|  | 878 | * No need to check quota - is not allocated for blocks used | 
|  | 879 | * for formatted nodes | 
|  | 880 | */ | 
|  | 881 | if (reiserfs_new_form_blocknrs(tb, blocknrs, | 
|  | 882 | amount_needed) == NO_DISK_SPACE) | 
|  | 883 | return NO_DISK_SPACE; | 
|  | 884 |  | 
|  | 885 | /* for each blocknumber we just got, get a buffer and stick it on FEB */ | 
|  | 886 | for (blocknr = blocknrs, counter = 0; | 
|  | 887 | counter < amount_needed; blocknr++, counter++) { | 
|  | 888 |  | 
|  | 889 | RFALSE(!*blocknr, | 
|  | 890 | "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); | 
|  | 891 |  | 
|  | 892 | new_bh = sb_getblk(sb, *blocknr); | 
|  | 893 | RFALSE(buffer_dirty(new_bh) || | 
|  | 894 | buffer_journaled(new_bh) || | 
|  | 895 | buffer_journal_dirty(new_bh), | 
|  | 896 | "PAP-8140: journaled or dirty buffer %b for the new block", | 
|  | 897 | new_bh); | 
|  | 898 |  | 
|  | 899 | /* Put empty buffers into the array. */ | 
|  | 900 | RFALSE(tb->FEB[tb->cur_blknum], | 
|  | 901 | "PAP-8141: busy slot for new buffer"); | 
|  | 902 |  | 
|  | 903 | set_buffer_journal_new(new_bh); | 
|  | 904 | tb->FEB[tb->cur_blknum++] = new_bh; | 
|  | 905 | } | 
|  | 906 |  | 
|  | 907 | if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) | 
|  | 908 | retval = REPEAT_SEARCH; | 
|  | 909 |  | 
|  | 910 | return retval; | 
|  | 911 | } | 
|  | 912 |  | 
|  | 913 | /* | 
|  | 914 | * Get free space of the left neighbor, which is stored in the parent | 
|  | 915 | * node of the left neighbor. | 
|  | 916 | */ | 
|  | 917 | static int get_lfree(struct tree_balance *tb, int h) | 
|  | 918 | { | 
|  | 919 | struct buffer_head *l, *f; | 
|  | 920 | int order; | 
|  | 921 |  | 
|  | 922 | if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || | 
|  | 923 | (l = tb->FL[h]) == NULL) | 
|  | 924 | return 0; | 
|  | 925 |  | 
|  | 926 | if (f == l) | 
|  | 927 | order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1; | 
|  | 928 | else { | 
|  | 929 | order = B_NR_ITEMS(l); | 
|  | 930 | f = l; | 
|  | 931 | } | 
|  | 932 |  | 
|  | 933 | return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); | 
|  | 934 | } | 
|  | 935 |  | 
|  | 936 | /* | 
|  | 937 | * Get free space of the right neighbor, | 
|  | 938 | * which is stored in the parent node of the right neighbor. | 
|  | 939 | */ | 
|  | 940 | static int get_rfree(struct tree_balance *tb, int h) | 
|  | 941 | { | 
|  | 942 | struct buffer_head *r, *f; | 
|  | 943 | int order; | 
|  | 944 |  | 
|  | 945 | if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || | 
|  | 946 | (r = tb->FR[h]) == NULL) | 
|  | 947 | return 0; | 
|  | 948 |  | 
|  | 949 | if (f == r) | 
|  | 950 | order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1; | 
|  | 951 | else { | 
|  | 952 | order = 0; | 
|  | 953 | f = r; | 
|  | 954 | } | 
|  | 955 |  | 
|  | 956 | return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); | 
|  | 957 |  | 
|  | 958 | } | 
|  | 959 |  | 
|  | 960 | /* Check whether left neighbor is in memory. */ | 
|  | 961 | static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) | 
|  | 962 | { | 
|  | 963 | struct buffer_head *father, *left; | 
|  | 964 | struct super_block *sb = tb->tb_sb; | 
|  | 965 | b_blocknr_t left_neighbor_blocknr; | 
|  | 966 | int left_neighbor_position; | 
|  | 967 |  | 
|  | 968 | /* Father of the left neighbor does not exist. */ | 
|  | 969 | if (!tb->FL[h]) | 
|  | 970 | return 0; | 
|  | 971 |  | 
|  | 972 | /* Calculate father of the node to be balanced. */ | 
|  | 973 | father = PATH_H_PBUFFER(tb->tb_path, h + 1); | 
|  | 974 |  | 
|  | 975 | RFALSE(!father || | 
|  | 976 | !B_IS_IN_TREE(father) || | 
|  | 977 | !B_IS_IN_TREE(tb->FL[h]) || | 
|  | 978 | !buffer_uptodate(father) || | 
|  | 979 | !buffer_uptodate(tb->FL[h]), | 
|  | 980 | "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", | 
|  | 981 | father, tb->FL[h]); | 
|  | 982 |  | 
|  | 983 | /* | 
|  | 984 | * Get position of the pointer to the left neighbor | 
|  | 985 | * into the left father. | 
|  | 986 | */ | 
|  | 987 | left_neighbor_position = (father == tb->FL[h]) ? | 
|  | 988 | tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); | 
|  | 989 | /* Get left neighbor block number. */ | 
|  | 990 | left_neighbor_blocknr = | 
|  | 991 | B_N_CHILD_NUM(tb->FL[h], left_neighbor_position); | 
|  | 992 | /* Look for the left neighbor in the cache. */ | 
|  | 993 | if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) { | 
|  | 994 |  | 
|  | 995 | RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), | 
|  | 996 | "vs-8170: left neighbor (%b %z) is not in the tree", | 
|  | 997 | left, left); | 
|  | 998 | put_bh(left); | 
|  | 999 | return 1; | 
|  | 1000 | } | 
|  | 1001 |  | 
|  | 1002 | return 0; | 
|  | 1003 | } | 
|  | 1004 |  | 
|  | 1005 | #define LEFT_PARENTS  'l' | 
|  | 1006 | #define RIGHT_PARENTS 'r' | 
|  | 1007 |  | 
|  | 1008 | static void decrement_key(struct cpu_key *key) | 
|  | 1009 | { | 
|  | 1010 | /* call item specific function for this key */ | 
|  | 1011 | item_ops[cpu_key_k_type(key)]->decrement_key(key); | 
|  | 1012 | } | 
|  | 1013 |  | 
|  | 1014 | /* | 
|  | 1015 | * Calculate far left/right parent of the left/right neighbor of the | 
|  | 1016 | * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor | 
|  | 1017 | * of the parent F[h]. | 
|  | 1018 | * Calculate left/right common parent of the current node and L[h]/R[h]. | 
|  | 1019 | * Calculate left/right delimiting key position. | 
|  | 1020 | * Returns:	PATH_INCORRECT    - path in the tree is not correct | 
|  | 1021 | *		SCHEDULE_OCCURRED - schedule occurred while the function worked | 
|  | 1022 | *	        CARRY_ON          - schedule didn't occur while the function | 
|  | 1023 | *				    worked | 
|  | 1024 | */ | 
|  | 1025 | static int get_far_parent(struct tree_balance *tb, | 
|  | 1026 | int h, | 
|  | 1027 | struct buffer_head **pfather, | 
|  | 1028 | struct buffer_head **pcom_father, char c_lr_par) | 
|  | 1029 | { | 
|  | 1030 | struct buffer_head *parent; | 
|  | 1031 | INITIALIZE_PATH(s_path_to_neighbor_father); | 
|  | 1032 | struct treepath *path = tb->tb_path; | 
|  | 1033 | struct cpu_key s_lr_father_key; | 
|  | 1034 | int counter, | 
|  | 1035 | position = INT_MAX, | 
|  | 1036 | first_last_position = 0, | 
|  | 1037 | path_offset = PATH_H_PATH_OFFSET(path, h); | 
|  | 1038 |  | 
|  | 1039 | /* | 
|  | 1040 | * Starting from F[h] go upwards in the tree, and look for the common | 
|  | 1041 | * ancestor of F[h], and its neighbor l/r, that should be obtained. | 
|  | 1042 | */ | 
|  | 1043 |  | 
|  | 1044 | counter = path_offset; | 
|  | 1045 |  | 
|  | 1046 | RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET, | 
|  | 1047 | "PAP-8180: invalid path length"); | 
|  | 1048 |  | 
|  | 1049 | for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { | 
|  | 1050 | /* | 
|  | 1051 | * Check whether parent of the current buffer in the path | 
|  | 1052 | * is really parent in the tree. | 
|  | 1053 | */ | 
|  | 1054 | if (!B_IS_IN_TREE | 
|  | 1055 | (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) | 
|  | 1056 | return REPEAT_SEARCH; | 
|  | 1057 |  | 
|  | 1058 | /* Check whether position in the parent is correct. */ | 
|  | 1059 | if ((position = | 
|  | 1060 | PATH_OFFSET_POSITION(path, | 
|  | 1061 | counter - 1)) > | 
|  | 1062 | B_NR_ITEMS(parent)) | 
|  | 1063 | return REPEAT_SEARCH; | 
|  | 1064 |  | 
|  | 1065 | /* | 
|  | 1066 | * Check whether parent at the path really points | 
|  | 1067 | * to the child. | 
|  | 1068 | */ | 
|  | 1069 | if (B_N_CHILD_NUM(parent, position) != | 
|  | 1070 | PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) | 
|  | 1071 | return REPEAT_SEARCH; | 
|  | 1072 |  | 
|  | 1073 | /* | 
|  | 1074 | * Return delimiting key if position in the parent is not | 
|  | 1075 | * equal to first/last one. | 
|  | 1076 | */ | 
|  | 1077 | if (c_lr_par == RIGHT_PARENTS) | 
|  | 1078 | first_last_position = B_NR_ITEMS(parent); | 
|  | 1079 | if (position != first_last_position) { | 
|  | 1080 | *pcom_father = parent; | 
|  | 1081 | get_bh(*pcom_father); | 
|  | 1082 | /*(*pcom_father = parent)->b_count++; */ | 
|  | 1083 | break; | 
|  | 1084 | } | 
|  | 1085 | } | 
|  | 1086 |  | 
|  | 1087 | /* if we are in the root of the tree, then there is no common father */ | 
|  | 1088 | if (counter == FIRST_PATH_ELEMENT_OFFSET) { | 
|  | 1089 | /* | 
|  | 1090 | * Check whether first buffer in the path is the | 
|  | 1091 | * root of the tree. | 
|  | 1092 | */ | 
|  | 1093 | if (PATH_OFFSET_PBUFFER | 
|  | 1094 | (tb->tb_path, | 
|  | 1095 | FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == | 
|  | 1096 | SB_ROOT_BLOCK(tb->tb_sb)) { | 
|  | 1097 | *pfather = *pcom_father = NULL; | 
|  | 1098 | return CARRY_ON; | 
|  | 1099 | } | 
|  | 1100 | return REPEAT_SEARCH; | 
|  | 1101 | } | 
|  | 1102 |  | 
|  | 1103 | RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL, | 
|  | 1104 | "PAP-8185: (%b %z) level too small", | 
|  | 1105 | *pcom_father, *pcom_father); | 
|  | 1106 |  | 
|  | 1107 | /* Check whether the common parent is locked. */ | 
|  | 1108 |  | 
|  | 1109 | if (buffer_locked(*pcom_father)) { | 
|  | 1110 |  | 
|  | 1111 | /* Release the write lock while the buffer is busy */ | 
|  | 1112 | int depth = reiserfs_write_unlock_nested(tb->tb_sb); | 
|  | 1113 | __wait_on_buffer(*pcom_father); | 
|  | 1114 | reiserfs_write_lock_nested(tb->tb_sb, depth); | 
|  | 1115 | if (FILESYSTEM_CHANGED_TB(tb)) { | 
|  | 1116 | brelse(*pcom_father); | 
|  | 1117 | return REPEAT_SEARCH; | 
|  | 1118 | } | 
|  | 1119 | } | 
|  | 1120 |  | 
|  | 1121 | /* | 
|  | 1122 | * So, we got common parent of the current node and its | 
|  | 1123 | * left/right neighbor.  Now we are getting the parent of the | 
|  | 1124 | * left/right neighbor. | 
|  | 1125 | */ | 
|  | 1126 |  | 
|  | 1127 | /* Form key to get parent of the left/right neighbor. */ | 
|  | 1128 | le_key2cpu_key(&s_lr_father_key, | 
|  | 1129 | internal_key(*pcom_father, | 
|  | 1130 | (c_lr_par == | 
|  | 1131 | LEFT_PARENTS) ? (tb->lkey[h - 1] = | 
|  | 1132 | position - | 
|  | 1133 | 1) : (tb->rkey[h - | 
|  | 1134 | 1] = | 
|  | 1135 | position))); | 
|  | 1136 |  | 
|  | 1137 | if (c_lr_par == LEFT_PARENTS) | 
|  | 1138 | decrement_key(&s_lr_father_key); | 
|  | 1139 |  | 
|  | 1140 | if (search_by_key | 
|  | 1141 | (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, | 
|  | 1142 | h + 1) == IO_ERROR) | 
|  | 1143 | /* path is released */ | 
|  | 1144 | return IO_ERROR; | 
|  | 1145 |  | 
|  | 1146 | if (FILESYSTEM_CHANGED_TB(tb)) { | 
|  | 1147 | pathrelse(&s_path_to_neighbor_father); | 
|  | 1148 | brelse(*pcom_father); | 
|  | 1149 | return REPEAT_SEARCH; | 
|  | 1150 | } | 
|  | 1151 |  | 
|  | 1152 | *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); | 
|  | 1153 |  | 
|  | 1154 | RFALSE(B_LEVEL(*pfather) != h + 1, | 
|  | 1155 | "PAP-8190: (%b %z) level too small", *pfather, *pfather); | 
|  | 1156 | RFALSE(s_path_to_neighbor_father.path_length < | 
|  | 1157 | FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); | 
|  | 1158 |  | 
|  | 1159 | s_path_to_neighbor_father.path_length--; | 
|  | 1160 | pathrelse(&s_path_to_neighbor_father); | 
|  | 1161 | return CARRY_ON; | 
|  | 1162 | } | 
|  | 1163 |  | 
|  | 1164 | /* | 
|  | 1165 | * Get parents of neighbors of node in the path(S[path_offset]) and | 
|  | 1166 | * common parents of S[path_offset] and L[path_offset]/R[path_offset]: | 
|  | 1167 | * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset], | 
|  | 1168 | * CFR[path_offset]. | 
|  | 1169 | * Calculate numbers of left and right delimiting keys position: | 
|  | 1170 | * lkey[path_offset], rkey[path_offset]. | 
|  | 1171 | * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked | 
|  | 1172 | *	        CARRY_ON - schedule didn't occur while the function worked | 
|  | 1173 | */ | 
|  | 1174 | static int get_parents(struct tree_balance *tb, int h) | 
|  | 1175 | { | 
|  | 1176 | struct treepath *path = tb->tb_path; | 
|  | 1177 | int position, | 
|  | 1178 | ret, | 
|  | 1179 | path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); | 
|  | 1180 | struct buffer_head *curf, *curcf; | 
|  | 1181 |  | 
|  | 1182 | /* Current node is the root of the tree or will be root of the tree */ | 
|  | 1183 | if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { | 
|  | 1184 | /* | 
|  | 1185 | * The root can not have parents. | 
|  | 1186 | * Release nodes which previously were obtained as | 
|  | 1187 | * parents of the current node neighbors. | 
|  | 1188 | */ | 
|  | 1189 | brelse(tb->FL[h]); | 
|  | 1190 | brelse(tb->CFL[h]); | 
|  | 1191 | brelse(tb->FR[h]); | 
|  | 1192 | brelse(tb->CFR[h]); | 
|  | 1193 | tb->FL[h]  = NULL; | 
|  | 1194 | tb->CFL[h] = NULL; | 
|  | 1195 | tb->FR[h]  = NULL; | 
|  | 1196 | tb->CFR[h] = NULL; | 
|  | 1197 | return CARRY_ON; | 
|  | 1198 | } | 
|  | 1199 |  | 
|  | 1200 | /* Get parent FL[path_offset] of L[path_offset]. */ | 
|  | 1201 | position = PATH_OFFSET_POSITION(path, path_offset - 1); | 
|  | 1202 | if (position) { | 
|  | 1203 | /* Current node is not the first child of its parent. */ | 
|  | 1204 | curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); | 
|  | 1205 | curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); | 
|  | 1206 | get_bh(curf); | 
|  | 1207 | get_bh(curf); | 
|  | 1208 | tb->lkey[h] = position - 1; | 
|  | 1209 | } else { | 
|  | 1210 | /* | 
|  | 1211 | * Calculate current parent of L[path_offset], which is the | 
|  | 1212 | * left neighbor of the current node.  Calculate current | 
|  | 1213 | * common parent of L[path_offset] and the current node. | 
|  | 1214 | * Note that CFL[path_offset] not equal FL[path_offset] and | 
|  | 1215 | * CFL[path_offset] not equal F[path_offset]. | 
|  | 1216 | * Calculate lkey[path_offset]. | 
|  | 1217 | */ | 
|  | 1218 | if ((ret = get_far_parent(tb, h + 1, &curf, | 
|  | 1219 | &curcf, | 
|  | 1220 | LEFT_PARENTS)) != CARRY_ON) | 
|  | 1221 | return ret; | 
|  | 1222 | } | 
|  | 1223 |  | 
|  | 1224 | brelse(tb->FL[h]); | 
|  | 1225 | tb->FL[h] = curf;	/* New initialization of FL[h]. */ | 
|  | 1226 | brelse(tb->CFL[h]); | 
|  | 1227 | tb->CFL[h] = curcf;	/* New initialization of CFL[h]. */ | 
|  | 1228 |  | 
|  | 1229 | RFALSE((curf && !B_IS_IN_TREE(curf)) || | 
|  | 1230 | (curcf && !B_IS_IN_TREE(curcf)), | 
|  | 1231 | "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); | 
|  | 1232 |  | 
|  | 1233 | /* Get parent FR[h] of R[h]. */ | 
|  | 1234 |  | 
|  | 1235 | /* Current node is the last child of F[h]. FR[h] != F[h]. */ | 
|  | 1236 | if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { | 
|  | 1237 | /* | 
|  | 1238 | * Calculate current parent of R[h], which is the right | 
|  | 1239 | * neighbor of F[h].  Calculate current common parent of | 
|  | 1240 | * R[h] and current node. Note that CFR[h] not equal | 
|  | 1241 | * FR[path_offset] and CFR[h] not equal F[h]. | 
|  | 1242 | */ | 
|  | 1243 | if ((ret = | 
|  | 1244 | get_far_parent(tb, h + 1, &curf, &curcf, | 
|  | 1245 | RIGHT_PARENTS)) != CARRY_ON) | 
|  | 1246 | return ret; | 
|  | 1247 | } else { | 
|  | 1248 | /* Current node is not the last child of its parent F[h]. */ | 
|  | 1249 | curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); | 
|  | 1250 | curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); | 
|  | 1251 | get_bh(curf); | 
|  | 1252 | get_bh(curf); | 
|  | 1253 | tb->rkey[h] = position; | 
|  | 1254 | } | 
|  | 1255 |  | 
|  | 1256 | brelse(tb->FR[h]); | 
|  | 1257 | /* New initialization of FR[path_offset]. */ | 
|  | 1258 | tb->FR[h] = curf; | 
|  | 1259 |  | 
|  | 1260 | brelse(tb->CFR[h]); | 
|  | 1261 | /* New initialization of CFR[path_offset]. */ | 
|  | 1262 | tb->CFR[h] = curcf; | 
|  | 1263 |  | 
|  | 1264 | RFALSE((curf && !B_IS_IN_TREE(curf)) || | 
|  | 1265 | (curcf && !B_IS_IN_TREE(curcf)), | 
|  | 1266 | "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf); | 
|  | 1267 |  | 
|  | 1268 | return CARRY_ON; | 
|  | 1269 | } | 
|  | 1270 |  | 
|  | 1271 | /* | 
|  | 1272 | * it is possible to remove node as result of shiftings to | 
|  | 1273 | * neighbors even when we insert or paste item. | 
|  | 1274 | */ | 
|  | 1275 | static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | 
|  | 1276 | struct tree_balance *tb, int h) | 
|  | 1277 | { | 
|  | 1278 | struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h); | 
|  | 1279 | int levbytes = tb->insert_size[h]; | 
|  | 1280 | struct item_head *ih; | 
|  | 1281 | struct reiserfs_key *r_key = NULL; | 
|  | 1282 |  | 
|  | 1283 | ih = item_head(Sh, 0); | 
|  | 1284 | if (tb->CFR[h]) | 
|  | 1285 | r_key = internal_key(tb->CFR[h], tb->rkey[h]); | 
|  | 1286 |  | 
|  | 1287 | if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes | 
|  | 1288 | /* shifting may merge items which might save space */ | 
|  | 1289 | - | 
|  | 1290 | ((!h | 
|  | 1291 | && op_is_left_mergeable(&ih->ih_key, Sh->b_size)) ? IH_SIZE : 0) | 
|  | 1292 | - | 
|  | 1293 | ((!h && r_key | 
|  | 1294 | && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) | 
|  | 1295 | + ((h) ? KEY_SIZE : 0)) { | 
|  | 1296 | /* node can not be removed */ | 
|  | 1297 | if (sfree >= levbytes) { | 
|  | 1298 | /* new item fits into node S[h] without any shifting */ | 
|  | 1299 | if (!h) | 
|  | 1300 | tb->s0num = | 
|  | 1301 | B_NR_ITEMS(Sh) + | 
|  | 1302 | ((mode == M_INSERT) ? 1 : 0); | 
|  | 1303 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 
|  | 1304 | return NO_BALANCING_NEEDED; | 
|  | 1305 | } | 
|  | 1306 | } | 
|  | 1307 | PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]); | 
|  | 1308 | return !NO_BALANCING_NEEDED; | 
|  | 1309 | } | 
|  | 1310 |  | 
|  | 1311 | /* | 
|  | 1312 | * Check whether current node S[h] is balanced when increasing its size by | 
|  | 1313 | * Inserting or Pasting. | 
|  | 1314 | * Calculate parameters for balancing for current level h. | 
|  | 1315 | * Parameters: | 
|  | 1316 | *	tb	tree_balance structure; | 
|  | 1317 | *	h	current level of the node; | 
|  | 1318 | *	inum	item number in S[h]; | 
|  | 1319 | *	mode	i - insert, p - paste; | 
|  | 1320 | * Returns:	1 - schedule occurred; | 
|  | 1321 | *	        0 - balancing for higher levels needed; | 
|  | 1322 | *	       -1 - no balancing for higher levels needed; | 
|  | 1323 | *	       -2 - no disk space. | 
|  | 1324 | */ | 
|  | 1325 | /* ip means Inserting or Pasting */ | 
|  | 1326 | static int ip_check_balance(struct tree_balance *tb, int h) | 
|  | 1327 | { | 
|  | 1328 | struct virtual_node *vn = tb->tb_vn; | 
|  | 1329 | /* | 
|  | 1330 | * Number of bytes that must be inserted into (value is negative | 
|  | 1331 | * if bytes are deleted) buffer which contains node being balanced. | 
|  | 1332 | * The mnemonic is that the attempted change in node space used | 
|  | 1333 | * level is levbytes bytes. | 
|  | 1334 | */ | 
|  | 1335 | int levbytes; | 
|  | 1336 | int ret; | 
|  | 1337 |  | 
|  | 1338 | int lfree, sfree, rfree /* free space in L, S and R */ ; | 
|  | 1339 |  | 
|  | 1340 | /* | 
|  | 1341 | * nver is short for number of vertixes, and lnver is the number if | 
|  | 1342 | * we shift to the left, rnver is the number if we shift to the | 
|  | 1343 | * right, and lrnver is the number if we shift in both directions. | 
|  | 1344 | * The goal is to minimize first the number of vertixes, and second, | 
|  | 1345 | * the number of vertixes whose contents are changed by shifting, | 
|  | 1346 | * and third the number of uncached vertixes whose contents are | 
|  | 1347 | * changed by shifting and must be read from disk. | 
|  | 1348 | */ | 
|  | 1349 | int nver, lnver, rnver, lrnver; | 
|  | 1350 |  | 
|  | 1351 | /* | 
|  | 1352 | * used at leaf level only, S0 = S[0] is the node being balanced, | 
|  | 1353 | * sInum [ I = 0,1,2 ] is the number of items that will | 
|  | 1354 | * remain in node SI after balancing.  S1 and S2 are new | 
|  | 1355 | * nodes that might be created. | 
|  | 1356 | */ | 
|  | 1357 |  | 
|  | 1358 | /* | 
|  | 1359 | * we perform 8 calls to get_num_ver().  For each call we | 
|  | 1360 | * calculate five parameters.  where 4th parameter is s1bytes | 
|  | 1361 | * and 5th - s2bytes | 
|  | 1362 | * | 
|  | 1363 | * s0num, s1num, s2num for 8 cases | 
|  | 1364 | * 0,1 - do not shift and do not shift but bottle | 
|  | 1365 | * 2   - shift only whole item to left | 
|  | 1366 | * 3   - shift to left and bottle as much as possible | 
|  | 1367 | * 4,5 - shift to right (whole items and as much as possible | 
|  | 1368 | * 6,7 - shift to both directions (whole items and as much as possible) | 
|  | 1369 | */ | 
|  | 1370 | short snum012[40] = { 0, }; | 
|  | 1371 |  | 
|  | 1372 | /* Sh is the node whose balance is currently being checked */ | 
|  | 1373 | struct buffer_head *Sh; | 
|  | 1374 |  | 
|  | 1375 | Sh = PATH_H_PBUFFER(tb->tb_path, h); | 
|  | 1376 | levbytes = tb->insert_size[h]; | 
|  | 1377 |  | 
|  | 1378 | /* Calculate balance parameters for creating new root. */ | 
|  | 1379 | if (!Sh) { | 
|  | 1380 | if (!h) | 
|  | 1381 | reiserfs_panic(tb->tb_sb, "vs-8210", | 
|  | 1382 | "S[0] can not be 0"); | 
|  | 1383 | switch (ret = get_empty_nodes(tb, h)) { | 
|  | 1384 | /* no balancing for higher levels needed */ | 
|  | 1385 | case CARRY_ON: | 
|  | 1386 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 
|  | 1387 | return NO_BALANCING_NEEDED; | 
|  | 1388 |  | 
|  | 1389 | case NO_DISK_SPACE: | 
|  | 1390 | case REPEAT_SEARCH: | 
|  | 1391 | return ret; | 
|  | 1392 | default: | 
|  | 1393 | reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect " | 
|  | 1394 | "return value of get_empty_nodes"); | 
|  | 1395 | } | 
|  | 1396 | } | 
|  | 1397 |  | 
|  | 1398 | /* get parents of S[h] neighbors. */ | 
|  | 1399 | ret = get_parents(tb, h); | 
|  | 1400 | if (ret != CARRY_ON) | 
|  | 1401 | return ret; | 
|  | 1402 |  | 
|  | 1403 | sfree = B_FREE_SPACE(Sh); | 
|  | 1404 |  | 
|  | 1405 | /* get free space of neighbors */ | 
|  | 1406 | rfree = get_rfree(tb, h); | 
|  | 1407 | lfree = get_lfree(tb, h); | 
|  | 1408 |  | 
|  | 1409 | /* and new item fits into node S[h] without any shifting */ | 
|  | 1410 | if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == | 
|  | 1411 | NO_BALANCING_NEEDED) | 
|  | 1412 | return NO_BALANCING_NEEDED; | 
|  | 1413 |  | 
|  | 1414 | create_virtual_node(tb, h); | 
|  | 1415 |  | 
|  | 1416 | /* | 
|  | 1417 | * determine maximal number of items we can shift to the left | 
|  | 1418 | * neighbor (in tb structure) and the maximal number of bytes | 
|  | 1419 | * that can flow to the left neighbor from the left most liquid | 
|  | 1420 | * item that cannot be shifted from S[0] entirely (returned value) | 
|  | 1421 | */ | 
|  | 1422 | check_left(tb, h, lfree); | 
|  | 1423 |  | 
|  | 1424 | /* | 
|  | 1425 | * determine maximal number of items we can shift to the right | 
|  | 1426 | * neighbor (in tb structure) and the maximal number of bytes | 
|  | 1427 | * that can flow to the right neighbor from the right most liquid | 
|  | 1428 | * item that cannot be shifted from S[0] entirely (returned value) | 
|  | 1429 | */ | 
|  | 1430 | check_right(tb, h, rfree); | 
|  | 1431 |  | 
|  | 1432 | /* | 
|  | 1433 | * all contents of internal node S[h] can be moved into its | 
|  | 1434 | * neighbors, S[h] will be removed after balancing | 
|  | 1435 | */ | 
|  | 1436 | if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { | 
|  | 1437 | int to_r; | 
|  | 1438 |  | 
|  | 1439 | /* | 
|  | 1440 | * Since we are working on internal nodes, and our internal | 
|  | 1441 | * nodes have fixed size entries, then we can balance by the | 
|  | 1442 | * number of items rather than the space they consume.  In this | 
|  | 1443 | * routine we set the left node equal to the right node, | 
|  | 1444 | * allowing a difference of less than or equal to 1 child | 
|  | 1445 | * pointer. | 
|  | 1446 | */ | 
|  | 1447 | to_r = | 
|  | 1448 | ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + | 
|  | 1449 | vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - | 
|  | 1450 | tb->rnum[h]); | 
|  | 1451 | set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, | 
|  | 1452 | -1, -1); | 
|  | 1453 | return CARRY_ON; | 
|  | 1454 | } | 
|  | 1455 |  | 
|  | 1456 | /* | 
|  | 1457 | * this checks balance condition, that any two neighboring nodes | 
|  | 1458 | * can not fit in one node | 
|  | 1459 | */ | 
|  | 1460 | RFALSE(h && | 
|  | 1461 | (tb->lnum[h] >= vn->vn_nr_item + 1 || | 
|  | 1462 | tb->rnum[h] >= vn->vn_nr_item + 1), | 
|  | 1463 | "vs-8220: tree is not balanced on internal level"); | 
|  | 1464 | RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || | 
|  | 1465 | (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), | 
|  | 1466 | "vs-8225: tree is not balanced on leaf level"); | 
|  | 1467 |  | 
|  | 1468 | /* | 
|  | 1469 | * all contents of S[0] can be moved into its neighbors | 
|  | 1470 | * S[0] will be removed after balancing. | 
|  | 1471 | */ | 
|  | 1472 | if (!h && is_leaf_removable(tb)) | 
|  | 1473 | return CARRY_ON; | 
|  | 1474 |  | 
|  | 1475 | /* | 
|  | 1476 | * why do we perform this check here rather than earlier?? | 
|  | 1477 | * Answer: we can win 1 node in some cases above. Moreover we | 
|  | 1478 | * checked it above, when we checked, that S[0] is not removable | 
|  | 1479 | * in principle | 
|  | 1480 | */ | 
|  | 1481 |  | 
|  | 1482 | /* new item fits into node S[h] without any shifting */ | 
|  | 1483 | if (sfree >= levbytes) { | 
|  | 1484 | if (!h) | 
|  | 1485 | tb->s0num = vn->vn_nr_item; | 
|  | 1486 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 
|  | 1487 | return NO_BALANCING_NEEDED; | 
|  | 1488 | } | 
|  | 1489 |  | 
|  | 1490 | { | 
|  | 1491 | int lpar, rpar, nset, lset, rset, lrset; | 
|  | 1492 | /* regular overflowing of the node */ | 
|  | 1493 |  | 
|  | 1494 | /* | 
|  | 1495 | * get_num_ver works in 2 modes (FLOW & NO_FLOW) | 
|  | 1496 | * lpar, rpar - number of items we can shift to left/right | 
|  | 1497 | *              neighbor (including splitting item) | 
|  | 1498 | * nset, lset, rset, lrset - shows, whether flowing items | 
|  | 1499 | *                           give better packing | 
|  | 1500 | */ | 
|  | 1501 | #define FLOW 1 | 
|  | 1502 | #define NO_FLOW 0		/* do not any splitting */ | 
|  | 1503 |  | 
|  | 1504 | /* we choose one of the following */ | 
|  | 1505 | #define NOTHING_SHIFT_NO_FLOW	0 | 
|  | 1506 | #define NOTHING_SHIFT_FLOW	5 | 
|  | 1507 | #define LEFT_SHIFT_NO_FLOW	10 | 
|  | 1508 | #define LEFT_SHIFT_FLOW		15 | 
|  | 1509 | #define RIGHT_SHIFT_NO_FLOW	20 | 
|  | 1510 | #define RIGHT_SHIFT_FLOW	25 | 
|  | 1511 | #define LR_SHIFT_NO_FLOW	30 | 
|  | 1512 | #define LR_SHIFT_FLOW		35 | 
|  | 1513 |  | 
|  | 1514 | lpar = tb->lnum[h]; | 
|  | 1515 | rpar = tb->rnum[h]; | 
|  | 1516 |  | 
|  | 1517 | /* | 
|  | 1518 | * calculate number of blocks S[h] must be split into when | 
|  | 1519 | * nothing is shifted to the neighbors, as well as number of | 
|  | 1520 | * items in each part of the split node (s012 numbers), | 
|  | 1521 | * and number of bytes (s1bytes) of the shared drop which | 
|  | 1522 | * flow to S1 if any | 
|  | 1523 | */ | 
|  | 1524 | nset = NOTHING_SHIFT_NO_FLOW; | 
|  | 1525 | nver = get_num_ver(vn->vn_mode, tb, h, | 
|  | 1526 | 0, -1, h ? vn->vn_nr_item : 0, -1, | 
|  | 1527 | snum012, NO_FLOW); | 
|  | 1528 |  | 
|  | 1529 | if (!h) { | 
|  | 1530 | int nver1; | 
|  | 1531 |  | 
|  | 1532 | /* | 
|  | 1533 | * note, that in this case we try to bottle | 
|  | 1534 | * between S[0] and S1 (S1 - the first new node) | 
|  | 1535 | */ | 
|  | 1536 | nver1 = get_num_ver(vn->vn_mode, tb, h, | 
|  | 1537 | 0, -1, 0, -1, | 
|  | 1538 | snum012 + NOTHING_SHIFT_FLOW, FLOW); | 
|  | 1539 | if (nver > nver1) | 
|  | 1540 | nset = NOTHING_SHIFT_FLOW, nver = nver1; | 
|  | 1541 | } | 
|  | 1542 |  | 
|  | 1543 | /* | 
|  | 1544 | * calculate number of blocks S[h] must be split into when | 
|  | 1545 | * l_shift_num first items and l_shift_bytes of the right | 
|  | 1546 | * most liquid item to be shifted are shifted to the left | 
|  | 1547 | * neighbor, as well as number of items in each part of the | 
|  | 1548 | * splitted node (s012 numbers), and number of bytes | 
|  | 1549 | * (s1bytes) of the shared drop which flow to S1 if any | 
|  | 1550 | */ | 
|  | 1551 | lset = LEFT_SHIFT_NO_FLOW; | 
|  | 1552 | lnver = get_num_ver(vn->vn_mode, tb, h, | 
|  | 1553 | lpar - ((h || tb->lbytes == -1) ? 0 : 1), | 
|  | 1554 | -1, h ? vn->vn_nr_item : 0, -1, | 
|  | 1555 | snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); | 
|  | 1556 | if (!h) { | 
|  | 1557 | int lnver1; | 
|  | 1558 |  | 
|  | 1559 | lnver1 = get_num_ver(vn->vn_mode, tb, h, | 
|  | 1560 | lpar - | 
|  | 1561 | ((tb->lbytes != -1) ? 1 : 0), | 
|  | 1562 | tb->lbytes, 0, -1, | 
|  | 1563 | snum012 + LEFT_SHIFT_FLOW, FLOW); | 
|  | 1564 | if (lnver > lnver1) | 
|  | 1565 | lset = LEFT_SHIFT_FLOW, lnver = lnver1; | 
|  | 1566 | } | 
|  | 1567 |  | 
|  | 1568 | /* | 
|  | 1569 | * calculate number of blocks S[h] must be split into when | 
|  | 1570 | * r_shift_num first items and r_shift_bytes of the left most | 
|  | 1571 | * liquid item to be shifted are shifted to the right neighbor, | 
|  | 1572 | * as well as number of items in each part of the splitted | 
|  | 1573 | * node (s012 numbers), and number of bytes (s1bytes) of the | 
|  | 1574 | * shared drop which flow to S1 if any | 
|  | 1575 | */ | 
|  | 1576 | rset = RIGHT_SHIFT_NO_FLOW; | 
|  | 1577 | rnver = get_num_ver(vn->vn_mode, tb, h, | 
|  | 1578 | 0, -1, | 
|  | 1579 | h ? (vn->vn_nr_item - rpar) : (rpar - | 
|  | 1580 | ((tb-> | 
|  | 1581 | rbytes != | 
|  | 1582 | -1) ? 1 : | 
|  | 1583 | 0)), -1, | 
|  | 1584 | snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); | 
|  | 1585 | if (!h) { | 
|  | 1586 | int rnver1; | 
|  | 1587 |  | 
|  | 1588 | rnver1 = get_num_ver(vn->vn_mode, tb, h, | 
|  | 1589 | 0, -1, | 
|  | 1590 | (rpar - | 
|  | 1591 | ((tb->rbytes != -1) ? 1 : 0)), | 
|  | 1592 | tb->rbytes, | 
|  | 1593 | snum012 + RIGHT_SHIFT_FLOW, FLOW); | 
|  | 1594 |  | 
|  | 1595 | if (rnver > rnver1) | 
|  | 1596 | rset = RIGHT_SHIFT_FLOW, rnver = rnver1; | 
|  | 1597 | } | 
|  | 1598 |  | 
|  | 1599 | /* | 
|  | 1600 | * calculate number of blocks S[h] must be split into when | 
|  | 1601 | * items are shifted in both directions, as well as number | 
|  | 1602 | * of items in each part of the splitted node (s012 numbers), | 
|  | 1603 | * and number of bytes (s1bytes) of the shared drop which | 
|  | 1604 | * flow to S1 if any | 
|  | 1605 | */ | 
|  | 1606 | lrset = LR_SHIFT_NO_FLOW; | 
|  | 1607 | lrnver = get_num_ver(vn->vn_mode, tb, h, | 
|  | 1608 | lpar - ((h || tb->lbytes == -1) ? 0 : 1), | 
|  | 1609 | -1, | 
|  | 1610 | h ? (vn->vn_nr_item - rpar) : (rpar - | 
|  | 1611 | ((tb-> | 
|  | 1612 | rbytes != | 
|  | 1613 | -1) ? 1 : | 
|  | 1614 | 0)), -1, | 
|  | 1615 | snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); | 
|  | 1616 | if (!h) { | 
|  | 1617 | int lrnver1; | 
|  | 1618 |  | 
|  | 1619 | lrnver1 = get_num_ver(vn->vn_mode, tb, h, | 
|  | 1620 | lpar - | 
|  | 1621 | ((tb->lbytes != -1) ? 1 : 0), | 
|  | 1622 | tb->lbytes, | 
|  | 1623 | (rpar - | 
|  | 1624 | ((tb->rbytes != -1) ? 1 : 0)), | 
|  | 1625 | tb->rbytes, | 
|  | 1626 | snum012 + LR_SHIFT_FLOW, FLOW); | 
|  | 1627 | if (lrnver > lrnver1) | 
|  | 1628 | lrset = LR_SHIFT_FLOW, lrnver = lrnver1; | 
|  | 1629 | } | 
|  | 1630 |  | 
|  | 1631 | /* | 
|  | 1632 | * Our general shifting strategy is: | 
|  | 1633 | * 1) to minimized number of new nodes; | 
|  | 1634 | * 2) to minimized number of neighbors involved in shifting; | 
|  | 1635 | * 3) to minimized number of disk reads; | 
|  | 1636 | */ | 
|  | 1637 |  | 
|  | 1638 | /* we can win TWO or ONE nodes by shifting in both directions */ | 
|  | 1639 | if (lrnver < lnver && lrnver < rnver) { | 
|  | 1640 | RFALSE(h && | 
|  | 1641 | (tb->lnum[h] != 1 || | 
|  | 1642 | tb->rnum[h] != 1 || | 
|  | 1643 | lrnver != 1 || rnver != 2 || lnver != 2 | 
|  | 1644 | || h != 1), "vs-8230: bad h"); | 
|  | 1645 | if (lrset == LR_SHIFT_FLOW) | 
|  | 1646 | set_parameters(tb, h, tb->lnum[h], tb->rnum[h], | 
|  | 1647 | lrnver, snum012 + lrset, | 
|  | 1648 | tb->lbytes, tb->rbytes); | 
|  | 1649 | else | 
|  | 1650 | set_parameters(tb, h, | 
|  | 1651 | tb->lnum[h] - | 
|  | 1652 | ((tb->lbytes == -1) ? 0 : 1), | 
|  | 1653 | tb->rnum[h] - | 
|  | 1654 | ((tb->rbytes == -1) ? 0 : 1), | 
|  | 1655 | lrnver, snum012 + lrset, -1, -1); | 
|  | 1656 |  | 
|  | 1657 | return CARRY_ON; | 
|  | 1658 | } | 
|  | 1659 |  | 
|  | 1660 | /* | 
|  | 1661 | * if shifting doesn't lead to better packing | 
|  | 1662 | * then don't shift | 
|  | 1663 | */ | 
|  | 1664 | if (nver == lrnver) { | 
|  | 1665 | set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, | 
|  | 1666 | -1); | 
|  | 1667 | return CARRY_ON; | 
|  | 1668 | } | 
|  | 1669 |  | 
|  | 1670 | /* | 
|  | 1671 | * now we know that for better packing shifting in only one | 
|  | 1672 | * direction either to the left or to the right is required | 
|  | 1673 | */ | 
|  | 1674 |  | 
|  | 1675 | /* | 
|  | 1676 | * if shifting to the left is better than | 
|  | 1677 | * shifting to the right | 
|  | 1678 | */ | 
|  | 1679 | if (lnver < rnver) { | 
|  | 1680 | SET_PAR_SHIFT_LEFT; | 
|  | 1681 | return CARRY_ON; | 
|  | 1682 | } | 
|  | 1683 |  | 
|  | 1684 | /* | 
|  | 1685 | * if shifting to the right is better than | 
|  | 1686 | * shifting to the left | 
|  | 1687 | */ | 
|  | 1688 | if (lnver > rnver) { | 
|  | 1689 | SET_PAR_SHIFT_RIGHT; | 
|  | 1690 | return CARRY_ON; | 
|  | 1691 | } | 
|  | 1692 |  | 
|  | 1693 | /* | 
|  | 1694 | * now shifting in either direction gives the same number | 
|  | 1695 | * of nodes and we can make use of the cached neighbors | 
|  | 1696 | */ | 
|  | 1697 | if (is_left_neighbor_in_cache(tb, h)) { | 
|  | 1698 | SET_PAR_SHIFT_LEFT; | 
|  | 1699 | return CARRY_ON; | 
|  | 1700 | } | 
|  | 1701 |  | 
|  | 1702 | /* | 
|  | 1703 | * shift to the right independently on whether the | 
|  | 1704 | * right neighbor in cache or not | 
|  | 1705 | */ | 
|  | 1706 | SET_PAR_SHIFT_RIGHT; | 
|  | 1707 | return CARRY_ON; | 
|  | 1708 | } | 
|  | 1709 | } | 
|  | 1710 |  | 
|  | 1711 | /* | 
|  | 1712 | * Check whether current node S[h] is balanced when Decreasing its size by | 
|  | 1713 | * Deleting or Cutting for INTERNAL node of S+tree. | 
|  | 1714 | * Calculate parameters for balancing for current level h. | 
|  | 1715 | * Parameters: | 
|  | 1716 | *	tb	tree_balance structure; | 
|  | 1717 | *	h	current level of the node; | 
|  | 1718 | *	inum	item number in S[h]; | 
|  | 1719 | *	mode	i - insert, p - paste; | 
|  | 1720 | * Returns:	1 - schedule occurred; | 
|  | 1721 | *	        0 - balancing for higher levels needed; | 
|  | 1722 | *	       -1 - no balancing for higher levels needed; | 
|  | 1723 | *	       -2 - no disk space. | 
|  | 1724 | * | 
|  | 1725 | * Note: Items of internal nodes have fixed size, so the balance condition for | 
|  | 1726 | * the internal part of S+tree is as for the B-trees. | 
|  | 1727 | */ | 
|  | 1728 | static int dc_check_balance_internal(struct tree_balance *tb, int h) | 
|  | 1729 | { | 
|  | 1730 | struct virtual_node *vn = tb->tb_vn; | 
|  | 1731 |  | 
|  | 1732 | /* | 
|  | 1733 | * Sh is the node whose balance is currently being checked, | 
|  | 1734 | * and Fh is its father. | 
|  | 1735 | */ | 
|  | 1736 | struct buffer_head *Sh, *Fh; | 
|  | 1737 | int maxsize, ret; | 
|  | 1738 | int lfree, rfree /* free space in L and R */ ; | 
|  | 1739 |  | 
|  | 1740 | Sh = PATH_H_PBUFFER(tb->tb_path, h); | 
|  | 1741 | Fh = PATH_H_PPARENT(tb->tb_path, h); | 
|  | 1742 |  | 
|  | 1743 | maxsize = MAX_CHILD_SIZE(Sh); | 
|  | 1744 |  | 
|  | 1745 | /* | 
|  | 1746 | * using tb->insert_size[h], which is negative in this case, | 
|  | 1747 | * create_virtual_node calculates: | 
|  | 1748 | * new_nr_item = number of items node would have if operation is | 
|  | 1749 | * performed without balancing (new_nr_item); | 
|  | 1750 | */ | 
|  | 1751 | create_virtual_node(tb, h); | 
|  | 1752 |  | 
|  | 1753 | if (!Fh) {		/* S[h] is the root. */ | 
|  | 1754 | /* no balancing for higher levels needed */ | 
|  | 1755 | if (vn->vn_nr_item > 0) { | 
|  | 1756 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 
|  | 1757 | return NO_BALANCING_NEEDED; | 
|  | 1758 | } | 
|  | 1759 | /* | 
|  | 1760 | * new_nr_item == 0. | 
|  | 1761 | * Current root will be deleted resulting in | 
|  | 1762 | * decrementing the tree height. | 
|  | 1763 | */ | 
|  | 1764 | set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); | 
|  | 1765 | return CARRY_ON; | 
|  | 1766 | } | 
|  | 1767 |  | 
|  | 1768 | if ((ret = get_parents(tb, h)) != CARRY_ON) | 
|  | 1769 | return ret; | 
|  | 1770 |  | 
|  | 1771 | /* get free space of neighbors */ | 
|  | 1772 | rfree = get_rfree(tb, h); | 
|  | 1773 | lfree = get_lfree(tb, h); | 
|  | 1774 |  | 
|  | 1775 | /* determine maximal number of items we can fit into neighbors */ | 
|  | 1776 | check_left(tb, h, lfree); | 
|  | 1777 | check_right(tb, h, rfree); | 
|  | 1778 |  | 
|  | 1779 | /* | 
|  | 1780 | * Balance condition for the internal node is valid. | 
|  | 1781 | * In this case we balance only if it leads to better packing. | 
|  | 1782 | */ | 
|  | 1783 | if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { | 
|  | 1784 | /* | 
|  | 1785 | * Here we join S[h] with one of its neighbors, | 
|  | 1786 | * which is impossible with greater values of new_nr_item. | 
|  | 1787 | */ | 
|  | 1788 | if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { | 
|  | 1789 | /* All contents of S[h] can be moved to L[h]. */ | 
|  | 1790 | if (tb->lnum[h] >= vn->vn_nr_item + 1) { | 
|  | 1791 | int n; | 
|  | 1792 | int order_L; | 
|  | 1793 |  | 
|  | 1794 | order_L = | 
|  | 1795 | ((n = | 
|  | 1796 | PATH_H_B_ITEM_ORDER(tb->tb_path, | 
|  | 1797 | h)) == | 
|  | 1798 | 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; | 
|  | 1799 | n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / | 
|  | 1800 | (DC_SIZE + KEY_SIZE); | 
|  | 1801 | set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, | 
|  | 1802 | -1); | 
|  | 1803 | return CARRY_ON; | 
|  | 1804 | } | 
|  | 1805 |  | 
|  | 1806 | /* All contents of S[h] can be moved to R[h]. */ | 
|  | 1807 | if (tb->rnum[h] >= vn->vn_nr_item + 1) { | 
|  | 1808 | int n; | 
|  | 1809 | int order_R; | 
|  | 1810 |  | 
|  | 1811 | order_R = | 
|  | 1812 | ((n = | 
|  | 1813 | PATH_H_B_ITEM_ORDER(tb->tb_path, | 
|  | 1814 | h)) == | 
|  | 1815 | B_NR_ITEMS(Fh)) ? 0 : n + 1; | 
|  | 1816 | n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / | 
|  | 1817 | (DC_SIZE + KEY_SIZE); | 
|  | 1818 | set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, | 
|  | 1819 | -1); | 
|  | 1820 | return CARRY_ON; | 
|  | 1821 | } | 
|  | 1822 | } | 
|  | 1823 |  | 
|  | 1824 | /* | 
|  | 1825 | * All contents of S[h] can be moved to the neighbors | 
|  | 1826 | * (L[h] & R[h]). | 
|  | 1827 | */ | 
|  | 1828 | if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { | 
|  | 1829 | int to_r; | 
|  | 1830 |  | 
|  | 1831 | to_r = | 
|  | 1832 | ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - | 
|  | 1833 | tb->rnum[h] + vn->vn_nr_item + 1) / 2 - | 
|  | 1834 | (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); | 
|  | 1835 | set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, | 
|  | 1836 | 0, NULL, -1, -1); | 
|  | 1837 | return CARRY_ON; | 
|  | 1838 | } | 
|  | 1839 |  | 
|  | 1840 | /* Balancing does not lead to better packing. */ | 
|  | 1841 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 
|  | 1842 | return NO_BALANCING_NEEDED; | 
|  | 1843 | } | 
|  | 1844 |  | 
|  | 1845 | /* | 
|  | 1846 | * Current node contain insufficient number of items. | 
|  | 1847 | * Balancing is required. | 
|  | 1848 | */ | 
|  | 1849 | /* Check whether we can merge S[h] with left neighbor. */ | 
|  | 1850 | if (tb->lnum[h] >= vn->vn_nr_item + 1) | 
|  | 1851 | if (is_left_neighbor_in_cache(tb, h) | 
|  | 1852 | || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) { | 
|  | 1853 | int n; | 
|  | 1854 | int order_L; | 
|  | 1855 |  | 
|  | 1856 | order_L = | 
|  | 1857 | ((n = | 
|  | 1858 | PATH_H_B_ITEM_ORDER(tb->tb_path, | 
|  | 1859 | h)) == | 
|  | 1860 | 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; | 
|  | 1861 | n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE + | 
|  | 1862 | KEY_SIZE); | 
|  | 1863 | set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1); | 
|  | 1864 | return CARRY_ON; | 
|  | 1865 | } | 
|  | 1866 |  | 
|  | 1867 | /* Check whether we can merge S[h] with right neighbor. */ | 
|  | 1868 | if (tb->rnum[h] >= vn->vn_nr_item + 1) { | 
|  | 1869 | int n; | 
|  | 1870 | int order_R; | 
|  | 1871 |  | 
|  | 1872 | order_R = | 
|  | 1873 | ((n = | 
|  | 1874 | PATH_H_B_ITEM_ORDER(tb->tb_path, | 
|  | 1875 | h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1); | 
|  | 1876 | n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE + | 
|  | 1877 | KEY_SIZE); | 
|  | 1878 | set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1); | 
|  | 1879 | return CARRY_ON; | 
|  | 1880 | } | 
|  | 1881 |  | 
|  | 1882 | /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ | 
|  | 1883 | if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { | 
|  | 1884 | int to_r; | 
|  | 1885 |  | 
|  | 1886 | to_r = | 
|  | 1887 | ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + | 
|  | 1888 | vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - | 
|  | 1889 | tb->rnum[h]); | 
|  | 1890 | set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, | 
|  | 1891 | -1, -1); | 
|  | 1892 | return CARRY_ON; | 
|  | 1893 | } | 
|  | 1894 |  | 
|  | 1895 | /* For internal nodes try to borrow item from a neighbor */ | 
|  | 1896 | RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root"); | 
|  | 1897 |  | 
|  | 1898 | /* Borrow one or two items from caching neighbor */ | 
|  | 1899 | if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) { | 
|  | 1900 | int from_l; | 
|  | 1901 |  | 
|  | 1902 | from_l = | 
|  | 1903 | (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + | 
|  | 1904 | 1) / 2 - (vn->vn_nr_item + 1); | 
|  | 1905 | set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1); | 
|  | 1906 | return CARRY_ON; | 
|  | 1907 | } | 
|  | 1908 |  | 
|  | 1909 | set_parameters(tb, h, 0, | 
|  | 1910 | -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item + | 
|  | 1911 | 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1); | 
|  | 1912 | return CARRY_ON; | 
|  | 1913 | } | 
|  | 1914 |  | 
|  | 1915 | /* | 
|  | 1916 | * Check whether current node S[h] is balanced when Decreasing its size by | 
|  | 1917 | * Deleting or Truncating for LEAF node of S+tree. | 
|  | 1918 | * Calculate parameters for balancing for current level h. | 
|  | 1919 | * Parameters: | 
|  | 1920 | *	tb	tree_balance structure; | 
|  | 1921 | *	h	current level of the node; | 
|  | 1922 | *	inum	item number in S[h]; | 
|  | 1923 | *	mode	i - insert, p - paste; | 
|  | 1924 | * Returns:	1 - schedule occurred; | 
|  | 1925 | *	        0 - balancing for higher levels needed; | 
|  | 1926 | *	       -1 - no balancing for higher levels needed; | 
|  | 1927 | *	       -2 - no disk space. | 
|  | 1928 | */ | 
|  | 1929 | static int dc_check_balance_leaf(struct tree_balance *tb, int h) | 
|  | 1930 | { | 
|  | 1931 | struct virtual_node *vn = tb->tb_vn; | 
|  | 1932 |  | 
|  | 1933 | /* | 
|  | 1934 | * Number of bytes that must be deleted from | 
|  | 1935 | * (value is negative if bytes are deleted) buffer which | 
|  | 1936 | * contains node being balanced.  The mnemonic is that the | 
|  | 1937 | * attempted change in node space used level is levbytes bytes. | 
|  | 1938 | */ | 
|  | 1939 | int levbytes; | 
|  | 1940 |  | 
|  | 1941 | /* the maximal item size */ | 
|  | 1942 | int maxsize, ret; | 
|  | 1943 |  | 
|  | 1944 | /* | 
|  | 1945 | * S0 is the node whose balance is currently being checked, | 
|  | 1946 | * and F0 is its father. | 
|  | 1947 | */ | 
|  | 1948 | struct buffer_head *S0, *F0; | 
|  | 1949 | int lfree, rfree /* free space in L and R */ ; | 
|  | 1950 |  | 
|  | 1951 | S0 = PATH_H_PBUFFER(tb->tb_path, 0); | 
|  | 1952 | F0 = PATH_H_PPARENT(tb->tb_path, 0); | 
|  | 1953 |  | 
|  | 1954 | levbytes = tb->insert_size[h]; | 
|  | 1955 |  | 
|  | 1956 | maxsize = MAX_CHILD_SIZE(S0);	/* maximal possible size of an item */ | 
|  | 1957 |  | 
|  | 1958 | if (!F0) {		/* S[0] is the root now. */ | 
|  | 1959 |  | 
|  | 1960 | RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0), | 
|  | 1961 | "vs-8240: attempt to create empty buffer tree"); | 
|  | 1962 |  | 
|  | 1963 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 
|  | 1964 | return NO_BALANCING_NEEDED; | 
|  | 1965 | } | 
|  | 1966 |  | 
|  | 1967 | if ((ret = get_parents(tb, h)) != CARRY_ON) | 
|  | 1968 | return ret; | 
|  | 1969 |  | 
|  | 1970 | /* get free space of neighbors */ | 
|  | 1971 | rfree = get_rfree(tb, h); | 
|  | 1972 | lfree = get_lfree(tb, h); | 
|  | 1973 |  | 
|  | 1974 | create_virtual_node(tb, h); | 
|  | 1975 |  | 
|  | 1976 | /* if 3 leaves can be merge to one, set parameters and return */ | 
|  | 1977 | if (are_leaves_removable(tb, lfree, rfree)) | 
|  | 1978 | return CARRY_ON; | 
|  | 1979 |  | 
|  | 1980 | /* | 
|  | 1981 | * determine maximal number of items we can shift to the left/right | 
|  | 1982 | * neighbor and the maximal number of bytes that can flow to the | 
|  | 1983 | * left/right neighbor from the left/right most liquid item that | 
|  | 1984 | * cannot be shifted from S[0] entirely | 
|  | 1985 | */ | 
|  | 1986 | check_left(tb, h, lfree); | 
|  | 1987 | check_right(tb, h, rfree); | 
|  | 1988 |  | 
|  | 1989 | /* check whether we can merge S with left neighbor. */ | 
|  | 1990 | if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) | 
|  | 1991 | if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) ||	/* S can not be merged with R */ | 
|  | 1992 | !tb->FR[h]) { | 
|  | 1993 |  | 
|  | 1994 | RFALSE(!tb->FL[h], | 
|  | 1995 | "vs-8245: dc_check_balance_leaf: FL[h] must exist"); | 
|  | 1996 |  | 
|  | 1997 | /* set parameter to merge S[0] with its left neighbor */ | 
|  | 1998 | set_parameters(tb, h, -1, 0, 0, NULL, -1, -1); | 
|  | 1999 | return CARRY_ON; | 
|  | 2000 | } | 
|  | 2001 |  | 
|  | 2002 | /* check whether we can merge S[0] with right neighbor. */ | 
|  | 2003 | if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { | 
|  | 2004 | set_parameters(tb, h, 0, -1, 0, NULL, -1, -1); | 
|  | 2005 | return CARRY_ON; | 
|  | 2006 | } | 
|  | 2007 |  | 
|  | 2008 | /* | 
|  | 2009 | * All contents of S[0] can be moved to the neighbors (L[0] & R[0]). | 
|  | 2010 | * Set parameters and return | 
|  | 2011 | */ | 
|  | 2012 | if (is_leaf_removable(tb)) | 
|  | 2013 | return CARRY_ON; | 
|  | 2014 |  | 
|  | 2015 | /* Balancing is not required. */ | 
|  | 2016 | tb->s0num = vn->vn_nr_item; | 
|  | 2017 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 
|  | 2018 | return NO_BALANCING_NEEDED; | 
|  | 2019 | } | 
|  | 2020 |  | 
|  | 2021 | /* | 
|  | 2022 | * Check whether current node S[h] is balanced when Decreasing its size by | 
|  | 2023 | * Deleting or Cutting. | 
|  | 2024 | * Calculate parameters for balancing for current level h. | 
|  | 2025 | * Parameters: | 
|  | 2026 | *	tb	tree_balance structure; | 
|  | 2027 | *	h	current level of the node; | 
|  | 2028 | *	inum	item number in S[h]; | 
|  | 2029 | *	mode	d - delete, c - cut. | 
|  | 2030 | * Returns:	1 - schedule occurred; | 
|  | 2031 | *	        0 - balancing for higher levels needed; | 
|  | 2032 | *	       -1 - no balancing for higher levels needed; | 
|  | 2033 | *	       -2 - no disk space. | 
|  | 2034 | */ | 
|  | 2035 | static int dc_check_balance(struct tree_balance *tb, int h) | 
|  | 2036 | { | 
|  | 2037 | RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)), | 
|  | 2038 | "vs-8250: S is not initialized"); | 
|  | 2039 |  | 
|  | 2040 | if (h) | 
|  | 2041 | return dc_check_balance_internal(tb, h); | 
|  | 2042 | else | 
|  | 2043 | return dc_check_balance_leaf(tb, h); | 
|  | 2044 | } | 
|  | 2045 |  | 
|  | 2046 | /* | 
|  | 2047 | * Check whether current node S[h] is balanced. | 
|  | 2048 | * Calculate parameters for balancing for current level h. | 
|  | 2049 | * Parameters: | 
|  | 2050 | * | 
|  | 2051 | *	tb	tree_balance structure: | 
|  | 2052 | * | 
|  | 2053 | *              tb is a large structure that must be read about in the header | 
|  | 2054 | *		file at the same time as this procedure if the reader is | 
|  | 2055 | *		to successfully understand this procedure | 
|  | 2056 | * | 
|  | 2057 | *	h	current level of the node; | 
|  | 2058 | *	inum	item number in S[h]; | 
|  | 2059 | *	mode	i - insert, p - paste, d - delete, c - cut. | 
|  | 2060 | * Returns:	1 - schedule occurred; | 
|  | 2061 | *	        0 - balancing for higher levels needed; | 
|  | 2062 | *	       -1 - no balancing for higher levels needed; | 
|  | 2063 | *	       -2 - no disk space. | 
|  | 2064 | */ | 
|  | 2065 | static int check_balance(int mode, | 
|  | 2066 | struct tree_balance *tb, | 
|  | 2067 | int h, | 
|  | 2068 | int inum, | 
|  | 2069 | int pos_in_item, | 
|  | 2070 | struct item_head *ins_ih, const void *data) | 
|  | 2071 | { | 
|  | 2072 | struct virtual_node *vn; | 
|  | 2073 |  | 
|  | 2074 | vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); | 
|  | 2075 | vn->vn_free_ptr = (char *)(tb->tb_vn + 1); | 
|  | 2076 | vn->vn_mode = mode; | 
|  | 2077 | vn->vn_affected_item_num = inum; | 
|  | 2078 | vn->vn_pos_in_item = pos_in_item; | 
|  | 2079 | vn->vn_ins_ih = ins_ih; | 
|  | 2080 | vn->vn_data = data; | 
|  | 2081 |  | 
|  | 2082 | RFALSE(mode == M_INSERT && !vn->vn_ins_ih, | 
|  | 2083 | "vs-8255: ins_ih can not be 0 in insert mode"); | 
|  | 2084 |  | 
|  | 2085 | /* Calculate balance parameters when size of node is increasing. */ | 
|  | 2086 | if (tb->insert_size[h] > 0) | 
|  | 2087 | return ip_check_balance(tb, h); | 
|  | 2088 |  | 
|  | 2089 | /* Calculate balance parameters when  size of node is decreasing. */ | 
|  | 2090 | return dc_check_balance(tb, h); | 
|  | 2091 | } | 
|  | 2092 |  | 
|  | 2093 | /* Check whether parent at the path is the really parent of the current node.*/ | 
|  | 2094 | static int get_direct_parent(struct tree_balance *tb, int h) | 
|  | 2095 | { | 
|  | 2096 | struct buffer_head *bh; | 
|  | 2097 | struct treepath *path = tb->tb_path; | 
|  | 2098 | int position, | 
|  | 2099 | path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); | 
|  | 2100 |  | 
|  | 2101 | /* We are in the root or in the new root. */ | 
|  | 2102 | if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { | 
|  | 2103 |  | 
|  | 2104 | RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, | 
|  | 2105 | "PAP-8260: invalid offset in the path"); | 
|  | 2106 |  | 
|  | 2107 | if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)-> | 
|  | 2108 | b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) { | 
|  | 2109 | /* Root is not changed. */ | 
|  | 2110 | PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL; | 
|  | 2111 | PATH_OFFSET_POSITION(path, path_offset - 1) = 0; | 
|  | 2112 | return CARRY_ON; | 
|  | 2113 | } | 
|  | 2114 | /* Root is changed and we must recalculate the path. */ | 
|  | 2115 | return REPEAT_SEARCH; | 
|  | 2116 | } | 
|  | 2117 |  | 
|  | 2118 | /* Parent in the path is not in the tree. */ | 
|  | 2119 | if (!B_IS_IN_TREE | 
|  | 2120 | (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) | 
|  | 2121 | return REPEAT_SEARCH; | 
|  | 2122 |  | 
|  | 2123 | if ((position = | 
|  | 2124 | PATH_OFFSET_POSITION(path, | 
|  | 2125 | path_offset - 1)) > B_NR_ITEMS(bh)) | 
|  | 2126 | return REPEAT_SEARCH; | 
|  | 2127 |  | 
|  | 2128 | /* Parent in the path is not parent of the current node in the tree. */ | 
|  | 2129 | if (B_N_CHILD_NUM(bh, position) != | 
|  | 2130 | PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) | 
|  | 2131 | return REPEAT_SEARCH; | 
|  | 2132 |  | 
|  | 2133 | if (buffer_locked(bh)) { | 
|  | 2134 | int depth = reiserfs_write_unlock_nested(tb->tb_sb); | 
|  | 2135 | __wait_on_buffer(bh); | 
|  | 2136 | reiserfs_write_lock_nested(tb->tb_sb, depth); | 
|  | 2137 | if (FILESYSTEM_CHANGED_TB(tb)) | 
|  | 2138 | return REPEAT_SEARCH; | 
|  | 2139 | } | 
|  | 2140 |  | 
|  | 2141 | /* | 
|  | 2142 | * Parent in the path is unlocked and really parent | 
|  | 2143 | * of the current node. | 
|  | 2144 | */ | 
|  | 2145 | return CARRY_ON; | 
|  | 2146 | } | 
|  | 2147 |  | 
|  | 2148 | /* | 
|  | 2149 | * Using lnum[h] and rnum[h] we should determine what neighbors | 
|  | 2150 | * of S[h] we | 
|  | 2151 | * need in order to balance S[h], and get them if necessary. | 
|  | 2152 | * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked; | 
|  | 2153 | *	        CARRY_ON - schedule didn't occur while the function worked; | 
|  | 2154 | */ | 
|  | 2155 | static int get_neighbors(struct tree_balance *tb, int h) | 
|  | 2156 | { | 
|  | 2157 | int child_position, | 
|  | 2158 | path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1); | 
|  | 2159 | unsigned long son_number; | 
|  | 2160 | struct super_block *sb = tb->tb_sb; | 
|  | 2161 | struct buffer_head *bh; | 
|  | 2162 | int depth; | 
|  | 2163 |  | 
|  | 2164 | PROC_INFO_INC(sb, get_neighbors[h]); | 
|  | 2165 |  | 
|  | 2166 | if (tb->lnum[h]) { | 
|  | 2167 | /* We need left neighbor to balance S[h]. */ | 
|  | 2168 | PROC_INFO_INC(sb, need_l_neighbor[h]); | 
|  | 2169 | bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); | 
|  | 2170 |  | 
|  | 2171 | RFALSE(bh == tb->FL[h] && | 
|  | 2172 | !PATH_OFFSET_POSITION(tb->tb_path, path_offset), | 
|  | 2173 | "PAP-8270: invalid position in the parent"); | 
|  | 2174 |  | 
|  | 2175 | child_position = | 
|  | 2176 | (bh == | 
|  | 2177 | tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> | 
|  | 2178 | FL[h]); | 
|  | 2179 | son_number = B_N_CHILD_NUM(tb->FL[h], child_position); | 
|  | 2180 | depth = reiserfs_write_unlock_nested(tb->tb_sb); | 
|  | 2181 | bh = sb_bread(sb, son_number); | 
|  | 2182 | reiserfs_write_lock_nested(tb->tb_sb, depth); | 
|  | 2183 | if (!bh) | 
|  | 2184 | return IO_ERROR; | 
|  | 2185 | if (FILESYSTEM_CHANGED_TB(tb)) { | 
|  | 2186 | brelse(bh); | 
|  | 2187 | PROC_INFO_INC(sb, get_neighbors_restart[h]); | 
|  | 2188 | return REPEAT_SEARCH; | 
|  | 2189 | } | 
|  | 2190 |  | 
|  | 2191 | RFALSE(!B_IS_IN_TREE(tb->FL[h]) || | 
|  | 2192 | child_position > B_NR_ITEMS(tb->FL[h]) || | 
|  | 2193 | B_N_CHILD_NUM(tb->FL[h], child_position) != | 
|  | 2194 | bh->b_blocknr, "PAP-8275: invalid parent"); | 
|  | 2195 | RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child"); | 
|  | 2196 | RFALSE(!h && | 
|  | 2197 | B_FREE_SPACE(bh) != | 
|  | 2198 | MAX_CHILD_SIZE(bh) - | 
|  | 2199 | dc_size(B_N_CHILD(tb->FL[0], child_position)), | 
|  | 2200 | "PAP-8290: invalid child size of left neighbor"); | 
|  | 2201 |  | 
|  | 2202 | brelse(tb->L[h]); | 
|  | 2203 | tb->L[h] = bh; | 
|  | 2204 | } | 
|  | 2205 |  | 
|  | 2206 | /* We need right neighbor to balance S[path_offset]. */ | 
|  | 2207 | if (tb->rnum[h]) { | 
|  | 2208 | PROC_INFO_INC(sb, need_r_neighbor[h]); | 
|  | 2209 | bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); | 
|  | 2210 |  | 
|  | 2211 | RFALSE(bh == tb->FR[h] && | 
|  | 2212 | PATH_OFFSET_POSITION(tb->tb_path, | 
|  | 2213 | path_offset) >= | 
|  | 2214 | B_NR_ITEMS(bh), | 
|  | 2215 | "PAP-8295: invalid position in the parent"); | 
|  | 2216 |  | 
|  | 2217 | child_position = | 
|  | 2218 | (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; | 
|  | 2219 | son_number = B_N_CHILD_NUM(tb->FR[h], child_position); | 
|  | 2220 | depth = reiserfs_write_unlock_nested(tb->tb_sb); | 
|  | 2221 | bh = sb_bread(sb, son_number); | 
|  | 2222 | reiserfs_write_lock_nested(tb->tb_sb, depth); | 
|  | 2223 | if (!bh) | 
|  | 2224 | return IO_ERROR; | 
|  | 2225 | if (FILESYSTEM_CHANGED_TB(tb)) { | 
|  | 2226 | brelse(bh); | 
|  | 2227 | PROC_INFO_INC(sb, get_neighbors_restart[h]); | 
|  | 2228 | return REPEAT_SEARCH; | 
|  | 2229 | } | 
|  | 2230 | brelse(tb->R[h]); | 
|  | 2231 | tb->R[h] = bh; | 
|  | 2232 |  | 
|  | 2233 | RFALSE(!h | 
|  | 2234 | && B_FREE_SPACE(bh) != | 
|  | 2235 | MAX_CHILD_SIZE(bh) - | 
|  | 2236 | dc_size(B_N_CHILD(tb->FR[0], child_position)), | 
|  | 2237 | "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", | 
|  | 2238 | B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh), | 
|  | 2239 | dc_size(B_N_CHILD(tb->FR[0], child_position))); | 
|  | 2240 |  | 
|  | 2241 | } | 
|  | 2242 | return CARRY_ON; | 
|  | 2243 | } | 
|  | 2244 |  | 
|  | 2245 | static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) | 
|  | 2246 | { | 
|  | 2247 | int max_num_of_items; | 
|  | 2248 | int max_num_of_entries; | 
|  | 2249 | unsigned long blocksize = sb->s_blocksize; | 
|  | 2250 |  | 
|  | 2251 | #define MIN_NAME_LEN 1 | 
|  | 2252 |  | 
|  | 2253 | max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); | 
|  | 2254 | max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / | 
|  | 2255 | (DEH_SIZE + MIN_NAME_LEN); | 
|  | 2256 |  | 
|  | 2257 | return sizeof(struct virtual_node) + | 
|  | 2258 | max(max_num_of_items * sizeof(struct virtual_item), | 
|  | 2259 | sizeof(struct virtual_item) + sizeof(struct direntry_uarea) + | 
|  | 2260 | (max_num_of_entries - 1) * sizeof(__u16)); | 
|  | 2261 | } | 
|  | 2262 |  | 
|  | 2263 | /* | 
|  | 2264 | * maybe we should fail balancing we are going to perform when kmalloc | 
|  | 2265 | * fails several times. But now it will loop until kmalloc gets | 
|  | 2266 | * required memory | 
|  | 2267 | */ | 
|  | 2268 | static int get_mem_for_virtual_node(struct tree_balance *tb) | 
|  | 2269 | { | 
|  | 2270 | int check_fs = 0; | 
|  | 2271 | int size; | 
|  | 2272 | char *buf; | 
|  | 2273 |  | 
|  | 2274 | size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); | 
|  | 2275 |  | 
|  | 2276 | /* we have to allocate more memory for virtual node */ | 
|  | 2277 | if (size > tb->vn_buf_size) { | 
|  | 2278 | if (tb->vn_buf) { | 
|  | 2279 | /* free memory allocated before */ | 
|  | 2280 | kfree(tb->vn_buf); | 
|  | 2281 | /* this is not needed if kfree is atomic */ | 
|  | 2282 | check_fs = 1; | 
|  | 2283 | } | 
|  | 2284 |  | 
|  | 2285 | /* virtual node requires now more memory */ | 
|  | 2286 | tb->vn_buf_size = size; | 
|  | 2287 |  | 
|  | 2288 | /* get memory for virtual item */ | 
|  | 2289 | buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); | 
|  | 2290 | if (!buf) { | 
|  | 2291 | /* | 
|  | 2292 | * getting memory with GFP_KERNEL priority may involve | 
|  | 2293 | * balancing now (due to indirect_to_direct conversion | 
|  | 2294 | * on dcache shrinking). So, release path and collected | 
|  | 2295 | * resources here | 
|  | 2296 | */ | 
|  | 2297 | free_buffers_in_tb(tb); | 
|  | 2298 | buf = kmalloc(size, GFP_NOFS); | 
|  | 2299 | if (!buf) { | 
|  | 2300 | tb->vn_buf_size = 0; | 
|  | 2301 | } | 
|  | 2302 | tb->vn_buf = buf; | 
|  | 2303 | schedule(); | 
|  | 2304 | return REPEAT_SEARCH; | 
|  | 2305 | } | 
|  | 2306 |  | 
|  | 2307 | tb->vn_buf = buf; | 
|  | 2308 | } | 
|  | 2309 |  | 
|  | 2310 | if (check_fs && FILESYSTEM_CHANGED_TB(tb)) | 
|  | 2311 | return REPEAT_SEARCH; | 
|  | 2312 |  | 
|  | 2313 | return CARRY_ON; | 
|  | 2314 | } | 
|  | 2315 |  | 
|  | 2316 | #ifdef CONFIG_REISERFS_CHECK | 
|  | 2317 | static void tb_buffer_sanity_check(struct super_block *sb, | 
|  | 2318 | struct buffer_head *bh, | 
|  | 2319 | const char *descr, int level) | 
|  | 2320 | { | 
|  | 2321 | if (bh) { | 
|  | 2322 | if (atomic_read(&(bh->b_count)) <= 0) | 
|  | 2323 |  | 
|  | 2324 | reiserfs_panic(sb, "jmacd-1", "negative or zero " | 
|  | 2325 | "reference counter for buffer %s[%d] " | 
|  | 2326 | "(%b)", descr, level, bh); | 
|  | 2327 |  | 
|  | 2328 | if (!buffer_uptodate(bh)) | 
|  | 2329 | reiserfs_panic(sb, "jmacd-2", "buffer is not up " | 
|  | 2330 | "to date %s[%d] (%b)", | 
|  | 2331 | descr, level, bh); | 
|  | 2332 |  | 
|  | 2333 | if (!B_IS_IN_TREE(bh)) | 
|  | 2334 | reiserfs_panic(sb, "jmacd-3", "buffer is not " | 
|  | 2335 | "in tree %s[%d] (%b)", | 
|  | 2336 | descr, level, bh); | 
|  | 2337 |  | 
|  | 2338 | if (bh->b_bdev != sb->s_bdev) | 
|  | 2339 | reiserfs_panic(sb, "jmacd-4", "buffer has wrong " | 
|  | 2340 | "device %s[%d] (%b)", | 
|  | 2341 | descr, level, bh); | 
|  | 2342 |  | 
|  | 2343 | if (bh->b_size != sb->s_blocksize) | 
|  | 2344 | reiserfs_panic(sb, "jmacd-5", "buffer has wrong " | 
|  | 2345 | "blocksize %s[%d] (%b)", | 
|  | 2346 | descr, level, bh); | 
|  | 2347 |  | 
|  | 2348 | if (bh->b_blocknr > SB_BLOCK_COUNT(sb)) | 
|  | 2349 | reiserfs_panic(sb, "jmacd-6", "buffer block " | 
|  | 2350 | "number too high %s[%d] (%b)", | 
|  | 2351 | descr, level, bh); | 
|  | 2352 | } | 
|  | 2353 | } | 
|  | 2354 | #else | 
|  | 2355 | static void tb_buffer_sanity_check(struct super_block *sb, | 
|  | 2356 | struct buffer_head *bh, | 
|  | 2357 | const char *descr, int level) | 
|  | 2358 | {; | 
|  | 2359 | } | 
|  | 2360 | #endif | 
|  | 2361 |  | 
|  | 2362 | static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh) | 
|  | 2363 | { | 
|  | 2364 | return reiserfs_prepare_for_journal(s, bh, 0); | 
|  | 2365 | } | 
|  | 2366 |  | 
|  | 2367 | static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | 
|  | 2368 | { | 
|  | 2369 | struct buffer_head *locked; | 
|  | 2370 | #ifdef CONFIG_REISERFS_CHECK | 
|  | 2371 | int repeat_counter = 0; | 
|  | 2372 | #endif | 
|  | 2373 | int i; | 
|  | 2374 |  | 
|  | 2375 | do { | 
|  | 2376 |  | 
|  | 2377 | locked = NULL; | 
|  | 2378 |  | 
|  | 2379 | for (i = tb->tb_path->path_length; | 
|  | 2380 | !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { | 
|  | 2381 | if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { | 
|  | 2382 | /* | 
|  | 2383 | * if I understand correctly, we can only | 
|  | 2384 | * be sure the last buffer in the path is | 
|  | 2385 | * in the tree --clm | 
|  | 2386 | */ | 
|  | 2387 | #ifdef CONFIG_REISERFS_CHECK | 
|  | 2388 | if (PATH_PLAST_BUFFER(tb->tb_path) == | 
|  | 2389 | PATH_OFFSET_PBUFFER(tb->tb_path, i)) | 
|  | 2390 | tb_buffer_sanity_check(tb->tb_sb, | 
|  | 2391 | PATH_OFFSET_PBUFFER | 
|  | 2392 | (tb->tb_path, | 
|  | 2393 | i), "S", | 
|  | 2394 | tb->tb_path-> | 
|  | 2395 | path_length - i); | 
|  | 2396 | #endif | 
|  | 2397 | if (!clear_all_dirty_bits(tb->tb_sb, | 
|  | 2398 | PATH_OFFSET_PBUFFER | 
|  | 2399 | (tb->tb_path, | 
|  | 2400 | i))) { | 
|  | 2401 | locked = | 
|  | 2402 | PATH_OFFSET_PBUFFER(tb->tb_path, | 
|  | 2403 | i); | 
|  | 2404 | } | 
|  | 2405 | } | 
|  | 2406 | } | 
|  | 2407 |  | 
|  | 2408 | for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i]; | 
|  | 2409 | i++) { | 
|  | 2410 |  | 
|  | 2411 | if (tb->lnum[i]) { | 
|  | 2412 |  | 
|  | 2413 | if (tb->L[i]) { | 
|  | 2414 | tb_buffer_sanity_check(tb->tb_sb, | 
|  | 2415 | tb->L[i], | 
|  | 2416 | "L", i); | 
|  | 2417 | if (!clear_all_dirty_bits | 
|  | 2418 | (tb->tb_sb, tb->L[i])) | 
|  | 2419 | locked = tb->L[i]; | 
|  | 2420 | } | 
|  | 2421 |  | 
|  | 2422 | if (!locked && tb->FL[i]) { | 
|  | 2423 | tb_buffer_sanity_check(tb->tb_sb, | 
|  | 2424 | tb->FL[i], | 
|  | 2425 | "FL", i); | 
|  | 2426 | if (!clear_all_dirty_bits | 
|  | 2427 | (tb->tb_sb, tb->FL[i])) | 
|  | 2428 | locked = tb->FL[i]; | 
|  | 2429 | } | 
|  | 2430 |  | 
|  | 2431 | if (!locked && tb->CFL[i]) { | 
|  | 2432 | tb_buffer_sanity_check(tb->tb_sb, | 
|  | 2433 | tb->CFL[i], | 
|  | 2434 | "CFL", i); | 
|  | 2435 | if (!clear_all_dirty_bits | 
|  | 2436 | (tb->tb_sb, tb->CFL[i])) | 
|  | 2437 | locked = tb->CFL[i]; | 
|  | 2438 | } | 
|  | 2439 |  | 
|  | 2440 | } | 
|  | 2441 |  | 
|  | 2442 | if (!locked && (tb->rnum[i])) { | 
|  | 2443 |  | 
|  | 2444 | if (tb->R[i]) { | 
|  | 2445 | tb_buffer_sanity_check(tb->tb_sb, | 
|  | 2446 | tb->R[i], | 
|  | 2447 | "R", i); | 
|  | 2448 | if (!clear_all_dirty_bits | 
|  | 2449 | (tb->tb_sb, tb->R[i])) | 
|  | 2450 | locked = tb->R[i]; | 
|  | 2451 | } | 
|  | 2452 |  | 
|  | 2453 | if (!locked && tb->FR[i]) { | 
|  | 2454 | tb_buffer_sanity_check(tb->tb_sb, | 
|  | 2455 | tb->FR[i], | 
|  | 2456 | "FR", i); | 
|  | 2457 | if (!clear_all_dirty_bits | 
|  | 2458 | (tb->tb_sb, tb->FR[i])) | 
|  | 2459 | locked = tb->FR[i]; | 
|  | 2460 | } | 
|  | 2461 |  | 
|  | 2462 | if (!locked && tb->CFR[i]) { | 
|  | 2463 | tb_buffer_sanity_check(tb->tb_sb, | 
|  | 2464 | tb->CFR[i], | 
|  | 2465 | "CFR", i); | 
|  | 2466 | if (!clear_all_dirty_bits | 
|  | 2467 | (tb->tb_sb, tb->CFR[i])) | 
|  | 2468 | locked = tb->CFR[i]; | 
|  | 2469 | } | 
|  | 2470 | } | 
|  | 2471 | } | 
|  | 2472 |  | 
|  | 2473 | /* | 
|  | 2474 | * as far as I can tell, this is not required.  The FEB list | 
|  | 2475 | * seems to be full of newly allocated nodes, which will | 
|  | 2476 | * never be locked, dirty, or anything else. | 
|  | 2477 | * To be safe, I'm putting in the checks and waits in. | 
|  | 2478 | * For the moment, they are needed to keep the code in | 
|  | 2479 | * journal.c from complaining about the buffer. | 
|  | 2480 | * That code is inside CONFIG_REISERFS_CHECK as well.  --clm | 
|  | 2481 | */ | 
|  | 2482 | for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { | 
|  | 2483 | if (tb->FEB[i]) { | 
|  | 2484 | if (!clear_all_dirty_bits | 
|  | 2485 | (tb->tb_sb, tb->FEB[i])) | 
|  | 2486 | locked = tb->FEB[i]; | 
|  | 2487 | } | 
|  | 2488 | } | 
|  | 2489 |  | 
|  | 2490 | if (locked) { | 
|  | 2491 | int depth; | 
|  | 2492 | #ifdef CONFIG_REISERFS_CHECK | 
|  | 2493 | repeat_counter++; | 
|  | 2494 | if ((repeat_counter % 10000) == 0) { | 
|  | 2495 | reiserfs_warning(tb->tb_sb, "reiserfs-8200", | 
|  | 2496 | "too many iterations waiting " | 
|  | 2497 | "for buffer to unlock " | 
|  | 2498 | "(%b)", locked); | 
|  | 2499 |  | 
|  | 2500 | /* Don't loop forever.  Try to recover from possible error. */ | 
|  | 2501 |  | 
|  | 2502 | return (FILESYSTEM_CHANGED_TB(tb)) ? | 
|  | 2503 | REPEAT_SEARCH : CARRY_ON; | 
|  | 2504 | } | 
|  | 2505 | #endif | 
|  | 2506 | depth = reiserfs_write_unlock_nested(tb->tb_sb); | 
|  | 2507 | __wait_on_buffer(locked); | 
|  | 2508 | reiserfs_write_lock_nested(tb->tb_sb, depth); | 
|  | 2509 | if (FILESYSTEM_CHANGED_TB(tb)) | 
|  | 2510 | return REPEAT_SEARCH; | 
|  | 2511 | } | 
|  | 2512 |  | 
|  | 2513 | } while (locked); | 
|  | 2514 |  | 
|  | 2515 | return CARRY_ON; | 
|  | 2516 | } | 
|  | 2517 |  | 
|  | 2518 | /* | 
|  | 2519 | * Prepare for balancing, that is | 
|  | 2520 | *	get all necessary parents, and neighbors; | 
|  | 2521 | *	analyze what and where should be moved; | 
|  | 2522 | *	get sufficient number of new nodes; | 
|  | 2523 | * Balancing will start only after all resources will be collected at a time. | 
|  | 2524 | * | 
|  | 2525 | * When ported to SMP kernels, only at the last moment after all needed nodes | 
|  | 2526 | * are collected in cache, will the resources be locked using the usual | 
|  | 2527 | * textbook ordered lock acquisition algorithms.  Note that ensuring that | 
|  | 2528 | * this code neither write locks what it does not need to write lock nor locks | 
|  | 2529 | * out of order will be a pain in the butt that could have been avoided. | 
|  | 2530 | * Grumble grumble. -Hans | 
|  | 2531 | * | 
|  | 2532 | * fix is meant in the sense of render unchanging | 
|  | 2533 | * | 
|  | 2534 | * Latency might be improved by first gathering a list of what buffers | 
|  | 2535 | * are needed and then getting as many of them in parallel as possible? -Hans | 
|  | 2536 | * | 
|  | 2537 | * Parameters: | 
|  | 2538 | *	op_mode	i - insert, d - delete, c - cut (truncate), p - paste (append) | 
|  | 2539 | *	tb	tree_balance structure; | 
|  | 2540 | *	inum	item number in S[h]; | 
|  | 2541 | *      pos_in_item - comment this if you can | 
|  | 2542 | *      ins_ih	item head of item being inserted | 
|  | 2543 | *	data	inserted item or data to be pasted | 
|  | 2544 | * Returns:	1 - schedule occurred while the function worked; | 
|  | 2545 | *	        0 - schedule didn't occur while the function worked; | 
|  | 2546 | *             -1 - if no_disk_space | 
|  | 2547 | */ | 
|  | 2548 |  | 
|  | 2549 | int fix_nodes(int op_mode, struct tree_balance *tb, | 
|  | 2550 | struct item_head *ins_ih, const void *data) | 
|  | 2551 | { | 
|  | 2552 | int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); | 
|  | 2553 | int pos_in_item; | 
|  | 2554 |  | 
|  | 2555 | /* | 
|  | 2556 | * we set wait_tb_buffers_run when we have to restore any dirty | 
|  | 2557 | * bits cleared during wait_tb_buffers_run | 
|  | 2558 | */ | 
|  | 2559 | int wait_tb_buffers_run = 0; | 
|  | 2560 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | 
|  | 2561 |  | 
|  | 2562 | ++REISERFS_SB(tb->tb_sb)->s_fix_nodes; | 
|  | 2563 |  | 
|  | 2564 | pos_in_item = tb->tb_path->pos_in_item; | 
|  | 2565 |  | 
|  | 2566 | tb->fs_gen = get_generation(tb->tb_sb); | 
|  | 2567 |  | 
|  | 2568 | /* | 
|  | 2569 | * we prepare and log the super here so it will already be in the | 
|  | 2570 | * transaction when do_balance needs to change it. | 
|  | 2571 | * This way do_balance won't have to schedule when trying to prepare | 
|  | 2572 | * the super for logging | 
|  | 2573 | */ | 
|  | 2574 | reiserfs_prepare_for_journal(tb->tb_sb, | 
|  | 2575 | SB_BUFFER_WITH_SB(tb->tb_sb), 1); | 
|  | 2576 | journal_mark_dirty(tb->transaction_handle, | 
|  | 2577 | SB_BUFFER_WITH_SB(tb->tb_sb)); | 
|  | 2578 | if (FILESYSTEM_CHANGED_TB(tb)) | 
|  | 2579 | return REPEAT_SEARCH; | 
|  | 2580 |  | 
|  | 2581 | /* if it possible in indirect_to_direct conversion */ | 
|  | 2582 | if (buffer_locked(tbS0)) { | 
|  | 2583 | int depth = reiserfs_write_unlock_nested(tb->tb_sb); | 
|  | 2584 | __wait_on_buffer(tbS0); | 
|  | 2585 | reiserfs_write_lock_nested(tb->tb_sb, depth); | 
|  | 2586 | if (FILESYSTEM_CHANGED_TB(tb)) | 
|  | 2587 | return REPEAT_SEARCH; | 
|  | 2588 | } | 
|  | 2589 | #ifdef CONFIG_REISERFS_CHECK | 
|  | 2590 | if (REISERFS_SB(tb->tb_sb)->cur_tb) { | 
|  | 2591 | print_cur_tb("fix_nodes"); | 
|  | 2592 | reiserfs_panic(tb->tb_sb, "PAP-8305", | 
|  | 2593 | "there is pending do_balance"); | 
|  | 2594 | } | 
|  | 2595 |  | 
|  | 2596 | if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0)) | 
|  | 2597 | reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " | 
|  | 2598 | "not uptodate at the beginning of fix_nodes " | 
|  | 2599 | "or not in tree (mode %c)", | 
|  | 2600 | tbS0, tbS0, op_mode); | 
|  | 2601 |  | 
|  | 2602 | /* Check parameters. */ | 
|  | 2603 | switch (op_mode) { | 
|  | 2604 | case M_INSERT: | 
|  | 2605 | if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0)) | 
|  | 2606 | reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect " | 
|  | 2607 | "item number %d (in S0 - %d) in case " | 
|  | 2608 | "of insert", item_num, | 
|  | 2609 | B_NR_ITEMS(tbS0)); | 
|  | 2610 | break; | 
|  | 2611 | case M_PASTE: | 
|  | 2612 | case M_DELETE: | 
|  | 2613 | case M_CUT: | 
|  | 2614 | if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) { | 
|  | 2615 | print_block(tbS0, 0, -1, -1); | 
|  | 2616 | reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect " | 
|  | 2617 | "item number(%d); mode = %c " | 
|  | 2618 | "insert_size = %d", | 
|  | 2619 | item_num, op_mode, | 
|  | 2620 | tb->insert_size[0]); | 
|  | 2621 | } | 
|  | 2622 | break; | 
|  | 2623 | default: | 
|  | 2624 | reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode " | 
|  | 2625 | "of operation"); | 
|  | 2626 | } | 
|  | 2627 | #endif | 
|  | 2628 |  | 
|  | 2629 | if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) | 
|  | 2630 | /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */ | 
|  | 2631 | return REPEAT_SEARCH; | 
|  | 2632 |  | 
|  | 2633 | /* Starting from the leaf level; for all levels h of the tree. */ | 
|  | 2634 | for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) { | 
|  | 2635 | ret = get_direct_parent(tb, h); | 
|  | 2636 | if (ret != CARRY_ON) | 
|  | 2637 | goto repeat; | 
|  | 2638 |  | 
|  | 2639 | ret = check_balance(op_mode, tb, h, item_num, | 
|  | 2640 | pos_in_item, ins_ih, data); | 
|  | 2641 | if (ret != CARRY_ON) { | 
|  | 2642 | if (ret == NO_BALANCING_NEEDED) { | 
|  | 2643 | /* No balancing for higher levels needed. */ | 
|  | 2644 | ret = get_neighbors(tb, h); | 
|  | 2645 | if (ret != CARRY_ON) | 
|  | 2646 | goto repeat; | 
|  | 2647 | if (h != MAX_HEIGHT - 1) | 
|  | 2648 | tb->insert_size[h + 1] = 0; | 
|  | 2649 | /* | 
|  | 2650 | * ok, analysis and resource gathering | 
|  | 2651 | * are complete | 
|  | 2652 | */ | 
|  | 2653 | break; | 
|  | 2654 | } | 
|  | 2655 | goto repeat; | 
|  | 2656 | } | 
|  | 2657 |  | 
|  | 2658 | ret = get_neighbors(tb, h); | 
|  | 2659 | if (ret != CARRY_ON) | 
|  | 2660 | goto repeat; | 
|  | 2661 |  | 
|  | 2662 | /* | 
|  | 2663 | * No disk space, or schedule occurred and analysis may be | 
|  | 2664 | * invalid and needs to be redone. | 
|  | 2665 | */ | 
|  | 2666 | ret = get_empty_nodes(tb, h); | 
|  | 2667 | if (ret != CARRY_ON) | 
|  | 2668 | goto repeat; | 
|  | 2669 |  | 
|  | 2670 | /* | 
|  | 2671 | * We have a positive insert size but no nodes exist on this | 
|  | 2672 | * level, this means that we are creating a new root. | 
|  | 2673 | */ | 
|  | 2674 | if (!PATH_H_PBUFFER(tb->tb_path, h)) { | 
|  | 2675 |  | 
|  | 2676 | RFALSE(tb->blknum[h] != 1, | 
|  | 2677 | "PAP-8350: creating new empty root"); | 
|  | 2678 |  | 
|  | 2679 | if (h < MAX_HEIGHT - 1) | 
|  | 2680 | tb->insert_size[h + 1] = 0; | 
|  | 2681 | } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { | 
|  | 2682 | /* | 
|  | 2683 | * The tree needs to be grown, so this node S[h] | 
|  | 2684 | * which is the root node is split into two nodes, | 
|  | 2685 | * and a new node (S[h+1]) will be created to | 
|  | 2686 | * become the root node. | 
|  | 2687 | */ | 
|  | 2688 | if (tb->blknum[h] > 1) { | 
|  | 2689 |  | 
|  | 2690 | RFALSE(h == MAX_HEIGHT - 1, | 
|  | 2691 | "PAP-8355: attempt to create too high of a tree"); | 
|  | 2692 |  | 
|  | 2693 | tb->insert_size[h + 1] = | 
|  | 2694 | (DC_SIZE + | 
|  | 2695 | KEY_SIZE) * (tb->blknum[h] - 1) + | 
|  | 2696 | DC_SIZE; | 
|  | 2697 | } else if (h < MAX_HEIGHT - 1) | 
|  | 2698 | tb->insert_size[h + 1] = 0; | 
|  | 2699 | } else | 
|  | 2700 | tb->insert_size[h + 1] = | 
|  | 2701 | (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1); | 
|  | 2702 | } | 
|  | 2703 |  | 
|  | 2704 | ret = wait_tb_buffers_until_unlocked(tb); | 
|  | 2705 | if (ret == CARRY_ON) { | 
|  | 2706 | if (FILESYSTEM_CHANGED_TB(tb)) { | 
|  | 2707 | wait_tb_buffers_run = 1; | 
|  | 2708 | ret = REPEAT_SEARCH; | 
|  | 2709 | goto repeat; | 
|  | 2710 | } else { | 
|  | 2711 | return CARRY_ON; | 
|  | 2712 | } | 
|  | 2713 | } else { | 
|  | 2714 | wait_tb_buffers_run = 1; | 
|  | 2715 | goto repeat; | 
|  | 2716 | } | 
|  | 2717 |  | 
|  | 2718 | repeat: | 
|  | 2719 | /* | 
|  | 2720 | * fix_nodes was unable to perform its calculation due to | 
|  | 2721 | * filesystem got changed under us, lack of free disk space or i/o | 
|  | 2722 | * failure. If the first is the case - the search will be | 
|  | 2723 | * repeated. For now - free all resources acquired so far except | 
|  | 2724 | * for the new allocated nodes | 
|  | 2725 | */ | 
|  | 2726 | { | 
|  | 2727 | int i; | 
|  | 2728 |  | 
|  | 2729 | /* Release path buffers. */ | 
|  | 2730 | if (wait_tb_buffers_run) { | 
|  | 2731 | pathrelse_and_restore(tb->tb_sb, tb->tb_path); | 
|  | 2732 | } else { | 
|  | 2733 | pathrelse(tb->tb_path); | 
|  | 2734 | } | 
|  | 2735 | /* brelse all resources collected for balancing */ | 
|  | 2736 | for (i = 0; i < MAX_HEIGHT; i++) { | 
|  | 2737 | if (wait_tb_buffers_run) { | 
|  | 2738 | reiserfs_restore_prepared_buffer(tb->tb_sb, | 
|  | 2739 | tb->L[i]); | 
|  | 2740 | reiserfs_restore_prepared_buffer(tb->tb_sb, | 
|  | 2741 | tb->R[i]); | 
|  | 2742 | reiserfs_restore_prepared_buffer(tb->tb_sb, | 
|  | 2743 | tb->FL[i]); | 
|  | 2744 | reiserfs_restore_prepared_buffer(tb->tb_sb, | 
|  | 2745 | tb->FR[i]); | 
|  | 2746 | reiserfs_restore_prepared_buffer(tb->tb_sb, | 
|  | 2747 | tb-> | 
|  | 2748 | CFL[i]); | 
|  | 2749 | reiserfs_restore_prepared_buffer(tb->tb_sb, | 
|  | 2750 | tb-> | 
|  | 2751 | CFR[i]); | 
|  | 2752 | } | 
|  | 2753 |  | 
|  | 2754 | brelse(tb->L[i]); | 
|  | 2755 | brelse(tb->R[i]); | 
|  | 2756 | brelse(tb->FL[i]); | 
|  | 2757 | brelse(tb->FR[i]); | 
|  | 2758 | brelse(tb->CFL[i]); | 
|  | 2759 | brelse(tb->CFR[i]); | 
|  | 2760 |  | 
|  | 2761 | tb->L[i] = NULL; | 
|  | 2762 | tb->R[i] = NULL; | 
|  | 2763 | tb->FL[i] = NULL; | 
|  | 2764 | tb->FR[i] = NULL; | 
|  | 2765 | tb->CFL[i] = NULL; | 
|  | 2766 | tb->CFR[i] = NULL; | 
|  | 2767 | } | 
|  | 2768 |  | 
|  | 2769 | if (wait_tb_buffers_run) { | 
|  | 2770 | for (i = 0; i < MAX_FEB_SIZE; i++) { | 
|  | 2771 | if (tb->FEB[i]) | 
|  | 2772 | reiserfs_restore_prepared_buffer | 
|  | 2773 | (tb->tb_sb, tb->FEB[i]); | 
|  | 2774 | } | 
|  | 2775 | } | 
|  | 2776 | return ret; | 
|  | 2777 | } | 
|  | 2778 |  | 
|  | 2779 | } | 
|  | 2780 |  | 
|  | 2781 | void unfix_nodes(struct tree_balance *tb) | 
|  | 2782 | { | 
|  | 2783 | int i; | 
|  | 2784 |  | 
|  | 2785 | /* Release path buffers. */ | 
|  | 2786 | pathrelse_and_restore(tb->tb_sb, tb->tb_path); | 
|  | 2787 |  | 
|  | 2788 | /* brelse all resources collected for balancing */ | 
|  | 2789 | for (i = 0; i < MAX_HEIGHT; i++) { | 
|  | 2790 | reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]); | 
|  | 2791 | reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]); | 
|  | 2792 | reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]); | 
|  | 2793 | reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]); | 
|  | 2794 | reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]); | 
|  | 2795 | reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]); | 
|  | 2796 |  | 
|  | 2797 | brelse(tb->L[i]); | 
|  | 2798 | brelse(tb->R[i]); | 
|  | 2799 | brelse(tb->FL[i]); | 
|  | 2800 | brelse(tb->FR[i]); | 
|  | 2801 | brelse(tb->CFL[i]); | 
|  | 2802 | brelse(tb->CFR[i]); | 
|  | 2803 | } | 
|  | 2804 |  | 
|  | 2805 | /* deal with list of allocated (used and unused) nodes */ | 
|  | 2806 | for (i = 0; i < MAX_FEB_SIZE; i++) { | 
|  | 2807 | if (tb->FEB[i]) { | 
|  | 2808 | b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; | 
|  | 2809 | /* | 
|  | 2810 | * de-allocated block which was not used by | 
|  | 2811 | * balancing and bforget about buffer for it | 
|  | 2812 | */ | 
|  | 2813 | brelse(tb->FEB[i]); | 
|  | 2814 | reiserfs_free_block(tb->transaction_handle, NULL, | 
|  | 2815 | blocknr, 0); | 
|  | 2816 | } | 
|  | 2817 | if (tb->used[i]) { | 
|  | 2818 | /* release used as new nodes including a new root */ | 
|  | 2819 | brelse(tb->used[i]); | 
|  | 2820 | } | 
|  | 2821 | } | 
|  | 2822 |  | 
|  | 2823 | kfree(tb->vn_buf); | 
|  | 2824 |  | 
|  | 2825 | } |