[T106][ZXW-22]7520V3SCV2.01.01.02P42U09_VEC_V0.8_AP_VEC origin source commit Change-Id: Ic6e05d89ecd62fc34f82b23dcf306c93764aec4b

commit: 9ed821d7e5d875a3395740a9cc2545671fa429b7 [log] [tgz]
author: lh <lh@exm.com> Fri Apr 07 01:36:19 2023 -0700
committer: lh <lh@exm.com> Fri Apr 07 01:36:19 2023 -0700
tree: 121b5ff9a43e30066e3b33d57065b04bcf9bbabf
parent: a10a76fcf09e2ec7e9055902f242dff467ab9654 [diff]
diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/Kconfig b/ap/os/linux/linux-3.4.x/fs/reiserfs/Kconfig
new file mode 100644
index 0000000..7cd4666
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/Kconfig

@@ -0,0 +1,88 @@
+config REISERFS_FS
+	tristate "Reiserfs support"
+	select CRC32
+	help
+	  Stores not just filenames but the files themselves in a balanced
+	  tree.  Uses journalling.
+
+	  Balanced trees are more efficient than traditional file system
+	  architectural foundations.
+
+	  In general, ReiserFS is as fast as ext2, but is very efficient with
+	  large directories and small files.  Additional patches are needed
+	  for NFS and quotas, please see 
+	  <https://reiser4.wiki.kernel.org/index.php/Main_Page> for links.
+
+	  It is more easily extended to have features currently found in
+	  database and keyword search systems than block allocation based file
+	  systems are.  The next version will be so extended, and will support
+	  plugins consistent with our motto ``It takes more than a license to
+	  make source code open.''
+
+	  Read <https://reiser4.wiki.kernel.org/index.php/Main_Page> 
+	  to learn more about reiserfs.
+
+	  Sponsored by Threshold Networks, Emusic.com, and Bigstorage.com.
+
+	  If you like it, you can pay us to add new features to it that you
+	  need, buy a support contract, or pay us to port it to another OS.
+
+config REISERFS_CHECK
+	bool "Enable reiserfs debug mode"
+	depends on REISERFS_FS
+	help
+	  If you set this to Y, then ReiserFS will perform every check it can
+	  possibly imagine of its internal consistency throughout its
+	  operation.  It will also go substantially slower.  More than once we
+	  have forgotten that this was on, and then gone despondent over the
+	  latest benchmarks.:-) Use of this option allows our team to go all
+	  out in checking for consistency when debugging without fear of its
+	  effect on end users.  If you are on the verge of sending in a bug
+	  report, say Y and you might get a useful error message.  Almost
+	  everyone should say N.
+
+config REISERFS_PROC_INFO
+	bool "Stats in /proc/fs/reiserfs"
+	depends on REISERFS_FS && PROC_FS
+	help
+	  Create under /proc/fs/reiserfs a hierarchy of files, displaying
+	  various ReiserFS statistics and internal data at the expense of
+	  making your kernel or module slightly larger (+8 KB). This also
+	  increases the amount of kernel memory required for each mount.
+	  Almost everyone but ReiserFS developers and people fine-tuning
+	  reiserfs or tracing problems should say N.
+
+config REISERFS_FS_XATTR
+	bool "ReiserFS extended attributes"
+	depends on REISERFS_FS
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+config REISERFS_FS_POSIX_ACL
+	bool "ReiserFS POSIX Access Control Lists"
+	depends on REISERFS_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the Posix ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config REISERFS_FS_SECURITY
+	bool "ReiserFS Security Labels"
+	depends on REISERFS_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ReiserFS filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/Makefile b/ap/os/linux/linux-3.4.x/fs/reiserfs/Makefile
new file mode 100644
index 0000000..3c3b001
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/Makefile

@@ -0,0 +1,38 @@
+#
+# Makefile for the linux reiser-filesystem routines.
+#
+
+obj-$(CONFIG_REISERFS_FS) += reiserfs.o
+
+reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
+		 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
+		 hashes.o tail_conversion.o journal.o resize.o \
+		 item_ops.o ioctl.o xattr.o lock.o
+
+ifeq ($(CONFIG_REISERFS_PROC_INFO),y)
+reiserfs-objs += procfs.o
+endif
+
+ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
+reiserfs-objs += xattr_user.o xattr_trusted.o
+endif
+
+ifeq ($(CONFIG_REISERFS_FS_SECURITY),y)
+reiserfs-objs += xattr_security.o
+endif
+
+ifeq ($(CONFIG_REISERFS_FS_POSIX_ACL),y)
+reiserfs-objs += xattr_acl.o
+endif
+
+# gcc -O2 (the kernel default)  is overaggressive on ppc32 when many inline
+# functions are used.  This causes the compiler to advance the stack
+# pointer out of the available stack space, corrupting kernel space,
+# and causing a panic. Since this behavior only affects ppc32, this ifeq
+# will work around it. If any other architecture displays this behavior,
+# add it here.
+ccflags-$(CONFIG_PPC32) := $(call cc-ifversion, -lt, 0400, -O1)
+
+TAGS:
+	etags *.c
+

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/README b/ap/os/linux/linux-3.4.x/fs/reiserfs/README
new file mode 100644
index 0000000..e2f7a26
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/README

@@ -0,0 +1,161 @@
+[LICENSING]
+
+ReiserFS is hereby licensed under the GNU General
+Public License version 2.
+
+Source code files that contain the phrase "licensing governed by
+reiserfs/README" are "governed files" throughout this file.  Governed
+files are licensed under the GPL.  The portions of them owned by Hans
+Reiser, or authorized to be licensed by him, have been in the past,
+and likely will be in the future, licensed to other parties under
+other licenses.  If you add your code to governed files, and don't
+want it to be owned by Hans Reiser, put your copyright label on that
+code so the poor blight and his customers can keep things straight.
+All portions of governed files not labeled otherwise are owned by Hans
+Reiser, and by adding your code to it, widely distributing it to
+others or sending us a patch, and leaving the sentence in stating that
+licensing is governed by the statement in this file, you accept this.
+It will be a kindness if you identify whether Hans Reiser is allowed
+to license code labeled as owned by you on your behalf other than
+under the GPL, because he wants to know if it is okay to do so and put
+a check in the mail to you (for non-trivial improvements) when he
+makes his next sale.  He makes no guarantees as to the amount if any,
+though he feels motivated to motivate contributors, and you can surely
+discuss this with him before or after contributing.  You have the
+right to decline to allow him to license your code contribution other
+than under the GPL.
+
+Further licensing options are available for commercial and/or other
+interests directly from Hans Reiser: hans@reiser.to.  If you interpret
+the GPL as not allowing those additional licensing options, you read
+it wrongly, and Richard Stallman agrees with me, when carefully read
+you can see that those restrictions on additional terms do not apply
+to the owner of the copyright, and my interpretation of this shall
+govern for this license.
+
+Finally, nothing in this license shall be interpreted to allow you to
+fail to fairly credit me, or to remove my credits, without my
+permission, unless you are an end user not redistributing to others.
+If you have doubts about how to properly do that, or about what is
+fair, ask.  (Last I spoke with him Richard was contemplating how best
+to address the fair crediting issue in the next GPL version.)
+
+[END LICENSING]
+
+Reiserfs is a file system based on balanced tree algorithms, which is
+described at https://reiser4.wiki.kernel.org/index.php/Main_Page 
+
+Stop reading here.  Go there, then return.
+
+Send bug reports to yura@namesys.botik.ru.
+
+mkreiserfs and other utilities are in reiserfs/utils, or wherever your
+Linux provider put them.  There is some disagreement about how useful
+it is for users to get their fsck and mkreiserfs out of sync with the
+version of reiserfs that is in their kernel, with many important
+distributors wanting them out of sync.:-) Please try to remember to
+recompile and reinstall fsck and mkreiserfs with every update of
+reiserfs, this is a common source of confusion.  Note that some of the
+utilities cannot be compiled without accessing the balancing code
+which is in the kernel code, and relocating the utilities may require
+you to specify where that code can be found.
+
+Yes, if you update your reiserfs kernel module you do have to
+recompile your kernel, most of the time.  The errors you get will be
+quite cryptic if your forget to do so.
+
+Real users, as opposed to folks who want to hack and then understand
+what went wrong, will want REISERFS_CHECK off.
+
+Hideous Commercial Pitch: Spread your development costs across other OS
+vendors.  Select from the best in the world, not the best in your
+building, by buying from third party OS component suppliers.  Leverage
+the software component development power of the internet.  Be the most
+aggressive in taking advantage of the commercial possibilities of
+decentralized internet development, and add value through your branded
+integration that you sell as an operating system.  Let your competitors
+be the ones to compete against the entire internet by themselves.  Be
+hip, get with the new economic trend, before your competitors do.  Send
+email to hans@reiser.to.
+
+To understand the code, after reading the website, start reading the
+code by reading reiserfs_fs.h first.
+
+Hans Reiser was the project initiator, primary architect, source of all
+funding for the first 5.5 years, and one of the programmers.  He owns
+the copyright.
+
+Vladimir Saveljev was one of the programmers, and he worked long hours
+writing the cleanest code.  He always made the effort to be the best he
+could be, and to make his code the best that it could be.  What resulted
+was quite remarkable. I don't think that money can ever motivate someone
+to work the way he did, he is one of the most selfless men I know.
+
+Yura helps with benchmarking, coding hashes, and block pre-allocation
+code.
+
+Anatoly Pinchuk is a former member of our team who worked closely with
+Vladimir throughout the project's development.  He wrote a quite
+substantial portion of the total code.  He realized that there was a
+space problem with packing tails of files for files larger than a node
+that start on a node aligned boundary (there are reasons to want to node
+align files), and he invented and implemented indirect items and
+unformatted nodes as the solution.
+
+Konstantin Shvachko, with the help of the Russian version of a VC,
+tried to put me in a position where I was forced into giving control
+of the project to him.  (Fortunately, as the person paying the money
+for all salaries from my dayjob I owned all copyrights, and you can't
+really force takeovers of sole proprietorships.)  This was something
+curious, because he never really understood the value of our project,
+why we should do what we do, or why innovation was possible in
+general, but he was sure that he ought to be controlling it.  Every
+innovation had to be forced past him while he was with us.  He added
+two years to the time required to complete reiserfs, and was a net
+loss for me.  Mikhail Gilula was a brilliant innovator who also left
+in a destructive way that erased the value of his contributions, and
+that he was shown much generosity just makes it more painful.
+
+Grigory Zaigralin was an extremely effective system administrator for
+our group.
+
+Igor Krasheninnikov was wonderful at hardware procurement, repair, and
+network installation.
+
+Jeremy Fitzhardinge wrote the teahash.c code, and he gives credit to a
+textbook he got the algorithm from in the code.  Note that his analysis
+of how we could use the hashing code in making 32 bit NFS cookies work
+was probably more important than the actual algorithm.  Colin Plumb also
+contributed to it.
+
+Chris Mason dived right into our code, and in just a few months produced
+the journaling code that dramatically increased the value of ReiserFS.
+He is just an amazing programmer.
+
+Igor Zagorovsky is writing much of the new item handler and extent code
+for our next major release.
+
+Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the
+resizer, and is hard at work on implementing allocate on flush.  SGI
+implemented allocate on flush before us for XFS, and generously took
+the time to convince me we should do it also.  They are great people,
+and a great company.
+
+Yuri Shevchuk and Nikita Danilov are doing squid cache optimization.
+
+Vitaly Fertman is doing fsck.
+
+Jeff Mahoney, of SuSE, contributed a few cleanup fixes, most notably
+the endian safe patches which allow ReiserFS to run on any platform
+supported by the Linux kernel.
+
+SuSE, IntegratedLinux.com, Ecila, MP3.com, bigstorage.com, and the
+Alpha PC Company made it possible for me to not have a day job
+anymore, and to dramatically increase our staffing.  Ecila funded
+hypertext feature development, MP3.com funded journaling, SuSE funded
+core development, IntegratedLinux.com funded squid web cache
+appliances, bigstorage.com funded HSM, and the alpha PC company funded
+the alpha port.  Many of these tasks were helped by sponsors other
+than the ones just named.  SuSE has helped in much more than just
+funding....
+

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/acl.h b/ap/os/linux/linux-3.4.x/fs/reiserfs/acl.h
new file mode 100644
index 0000000..f096b80
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/acl.h

@@ -0,0 +1,76 @@
+#include <linux/init.h>
+#include <linux/posix_acl.h>
+
+#define REISERFS_ACL_VERSION	0x0001
+
+typedef struct {
+	__le16 e_tag;
+	__le16 e_perm;
+	__le32 e_id;
+} reiserfs_acl_entry;
+
+typedef struct {
+	__le16 e_tag;
+	__le16 e_perm;
+} reiserfs_acl_entry_short;
+
+typedef struct {
+	__le32 a_version;
+} reiserfs_acl_header;
+
+static inline size_t reiserfs_acl_size(int count)
+{
+	if (count <= 4) {
+		return sizeof(reiserfs_acl_header) +
+		    count * sizeof(reiserfs_acl_entry_short);
+	} else {
+		return sizeof(reiserfs_acl_header) +
+		    4 * sizeof(reiserfs_acl_entry_short) +
+		    (count - 4) * sizeof(reiserfs_acl_entry);
+	}
+}
+
+static inline int reiserfs_acl_count(size_t size)
+{
+	ssize_t s;
+	size -= sizeof(reiserfs_acl_header);
+	s = size - 4 * sizeof(reiserfs_acl_entry_short);
+	if (s < 0) {
+		if (size % sizeof(reiserfs_acl_entry_short))
+			return -1;
+		return size / sizeof(reiserfs_acl_entry_short);
+	} else {
+		if (s % sizeof(reiserfs_acl_entry))
+			return -1;
+		return s / sizeof(reiserfs_acl_entry) + 4;
+	}
+}
+
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
+struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
+int reiserfs_acl_chmod(struct inode *inode);
+int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
+				 struct inode *dir, struct dentry *dentry,
+				 struct inode *inode);
+int reiserfs_cache_default_acl(struct inode *dir);
+extern const struct xattr_handler reiserfs_posix_acl_default_handler;
+extern const struct xattr_handler reiserfs_posix_acl_access_handler;
+
+#else
+
+#define reiserfs_cache_default_acl(inode) 0
+#define reiserfs_get_acl NULL
+
+static inline int reiserfs_acl_chmod(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int
+reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
+			     const struct inode *dir, struct dentry *dentry,
+			     struct inode *inode)
+{
+	return 0;
+}
+#endif

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/bitmap.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/bitmap.c
new file mode 100644
index 0000000..4c0c7d1
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/bitmap.c

@@ -0,0 +1,1382 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+/* Reiserfs block (de)allocator, bitmap-based. */
+
+#include <linux/time.h>
+#include "reiserfs.h"
+#include <linux/errno.h>
+#include <linux/buffer_head.h>
+#include <linux/kernel.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/quotaops.h>
+#include <linux/seq_file.h>
+
+#define PREALLOCATION_SIZE 9
+
+/* different reiserfs block allocator options */
+
+#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
+
+#define  _ALLOC_concentrating_formatted_nodes 0
+#define  _ALLOC_displacing_large_files 1
+#define  _ALLOC_displacing_new_packing_localities 2
+#define  _ALLOC_old_hashed_relocation 3
+#define  _ALLOC_new_hashed_relocation 4
+#define  _ALLOC_skip_busy 5
+#define  _ALLOC_displace_based_on_dirid 6
+#define  _ALLOC_hashed_formatted_nodes 7
+#define  _ALLOC_old_way 8
+#define  _ALLOC_hundredth_slices 9
+#define  _ALLOC_dirid_groups 10
+#define  _ALLOC_oid_groups 11
+#define  _ALLOC_packing_groups 12
+
+#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
+#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
+#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
+
+#define SET_OPTION(optname) \
+   do { \
+	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
+	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
+    } while(0)
+#define TEST_OPTION(optname, s) \
+    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
+
+static inline void get_bit_address(struct super_block *s,
+				   b_blocknr_t block,
+				   unsigned int *bmap_nr,
+				   unsigned int *offset)
+{
+	/* It is in the bitmap block number equal to the block
+	 * number divided by the number of bits in a block. */
+	*bmap_nr = block >> (s->s_blocksize_bits + 3);
+	/* Within that bitmap block it is located at bit offset *offset. */
+	*offset = block & ((s->s_blocksize << 3) - 1);
+}
+
+int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
+{
+	unsigned int bmap, offset;
+	unsigned int bmap_count = reiserfs_bmap_count(s);
+
+	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
+		reiserfs_error(s, "vs-4010",
+			       "block number is out of range %lu (%u)",
+			       block, SB_BLOCK_COUNT(s));
+		return 0;
+	}
+
+	get_bit_address(s, block, &bmap, &offset);
+
+	/* Old format filesystem? Unlikely, but the bitmaps are all up front so
+	 * we need to account for it. */
+	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
+			      &(REISERFS_SB(s)->s_properties)))) {
+		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
+		if (block >= bmap1 &&
+		    block <= bmap1 + bmap_count) {
+			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
+				       "can't be freed or reused",
+				       block, bmap_count);
+			return 0;
+		}
+	} else {
+		if (offset == 0) {
+			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
+				       "can't be freed or reused",
+				       block, bmap_count);
+			return 0;
+		}
+	}
+
+	if (bmap >= bmap_count) {
+		reiserfs_error(s, "vs-4030", "bitmap for requested block "
+			       "is out of range: block=%lu, bitmap_nr=%u",
+			       block, bmap);
+		return 0;
+	}
+
+	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
+		reiserfs_error(s, "vs-4050", "this is root block (%u), "
+			       "it must be busy", SB_ROOT_BLOCK(s));
+		return 0;
+	}
+
+	return 1;
+}
+
+/* searches in journal structures for a given block number (bmap, off). If block
+   is found in reiserfs journal it suggests next free block candidate to test. */
+static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
+				      int off, int *next)
+{
+	b_blocknr_t tmp;
+
+	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
+		if (tmp) {	/* hint supplied */
+			*next = tmp;
+			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
+		} else {
+			(*next) = off + 1;	/* inc offset to avoid looping. */
+			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
+		}
+		PROC_INFO_INC(s, scan_bitmap.retry);
+		return 1;
+	}
+	return 0;
+}
+
+/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
+ * block; */
+static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
+			     unsigned int bmap_n, int *beg, int boundary,
+			     int min, int max, int unfm)
+{
+	struct super_block *s = th->t_super;
+	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
+	struct buffer_head *bh;
+	int end, next;
+	int org = *beg;
+
+	BUG_ON(!th->t_trans_id);
+
+	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
+	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
+	PROC_INFO_INC(s, scan_bitmap.bmap);
+/* this is unclear and lacks comments, explain how journal bitmaps
+   work here for the reader.  Convey a sense of the design here. What
+   is a window? */
+/* - I mean `a window of zero bits' as in description of this function - Zam. */
+
+	if (!bi) {
+		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
+			       "for bitmap %d", bmap_n);
+		return 0;
+	}
+
+	bh = reiserfs_read_bitmap_block(s, bmap_n);
+	if (bh == NULL)
+		return 0;
+
+	while (1) {
+	      cont:
+		if (bi->free_count < min) {
+			brelse(bh);
+			return 0;	// No free blocks in this bitmap
+		}
+
+		/* search for a first zero bit -- beginning of a window */
+		*beg = reiserfs_find_next_zero_le_bit
+		    ((unsigned long *)(bh->b_data), boundary, *beg);
+
+		if (*beg + min > boundary) {	/* search for a zero bit fails or the rest of bitmap block
+						 * cannot contain a zero window of minimum size */
+			brelse(bh);
+			return 0;
+		}
+
+		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
+			continue;
+		/* first zero bit found; we check next bits */
+		for (end = *beg + 1;; end++) {
+			if (end >= *beg + max || end >= boundary
+			    || reiserfs_test_le_bit(end, bh->b_data)) {
+				next = end;
+				break;
+			}
+			/* finding the other end of zero bit window requires looking into journal structures (in
+			 * case of searching for free blocks for unformatted nodes) */
+			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
+				break;
+		}
+
+		/* now (*beg) points to beginning of zero bits window,
+		 * (end) points to one bit after the window end */
+		if (end - *beg >= min) {	/* it seems we have found window of proper size */
+			int i;
+			reiserfs_prepare_for_journal(s, bh, 1);
+			/* try to set all blocks used checking are they still free */
+			for (i = *beg; i < end; i++) {
+				/* It seems that we should not check in journal again. */
+				if (reiserfs_test_and_set_le_bit
+				    (i, bh->b_data)) {
+					/* bit was set by another process
+					 * while we slept in prepare_for_journal() */
+					PROC_INFO_INC(s, scan_bitmap.stolen);
+					if (i >= *beg + min) {	/* we can continue with smaller set of allocated blocks,
+								 * if length of this set is more or equal to `min' */
+						end = i;
+						break;
+					}
+					/* otherwise we clear all bit were set ... */
+					while (--i >= *beg)
+						reiserfs_clear_le_bit
+						    (i, bh->b_data);
+					reiserfs_restore_prepared_buffer(s, bh);
+					*beg = org;
+					/* ... and search again in current block from beginning */
+					goto cont;
+				}
+			}
+			bi->free_count -= (end - *beg);
+			journal_mark_dirty(th, s, bh);
+			brelse(bh);
+
+			/* free block count calculation */
+			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
+						     1);
+			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
+			journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
+
+			return end - (*beg);
+		} else {
+			*beg = next;
+		}
+	}
+}
+
+static int bmap_hash_id(struct super_block *s, u32 id)
+{
+	char *hash_in = NULL;
+	unsigned long hash;
+	unsigned bm;
+
+	if (id <= 2) {
+		bm = 1;
+	} else {
+		hash_in = (char *)(&id);
+		hash = keyed_hash(hash_in, 4);
+		bm = hash % reiserfs_bmap_count(s);
+		if (!bm)
+			bm = 1;
+	}
+	/* this can only be true when SB_BMAP_NR = 1 */
+	if (bm >= reiserfs_bmap_count(s))
+		bm = 0;
+	return bm;
+}
+
+/*
+ * hashes the id and then returns > 0 if the block group for the
+ * corresponding hash is full
+ */
+static inline int block_group_used(struct super_block *s, u32 id)
+{
+	int bm = bmap_hash_id(s, id);
+	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
+
+	/* If we don't have cached information on this bitmap block, we're
+	 * going to have to load it later anyway. Loading it here allows us
+	 * to make a better decision. This favors long-term performance gain
+	 * with a better on-disk layout vs. a short term gain of skipping the
+	 * read and potentially having a bad placement. */
+	if (info->free_count == UINT_MAX) {
+		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
+		brelse(bh);
+	}
+
+	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * the packing is returned in disk byte order
+ */
+__le32 reiserfs_choose_packing(struct inode * dir)
+{
+	__le32 packing;
+	if (TEST_OPTION(packing_groups, dir->i_sb)) {
+		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
+		/*
+		 * some versions of reiserfsck expect packing locality 1 to be
+		 * special
+		 */
+		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
+			packing = INODE_PKEY(dir)->k_objectid;
+		else
+			packing = INODE_PKEY(dir)->k_dir_id;
+	} else
+		packing = INODE_PKEY(dir)->k_objectid;
+	return packing;
+}
+
+/* Tries to find contiguous zero bit window (given size) in given region of
+ * bitmap and place new blocks there. Returns number of allocated blocks. */
+static int scan_bitmap(struct reiserfs_transaction_handle *th,
+		       b_blocknr_t * start, b_blocknr_t finish,
+		       int min, int max, int unfm, sector_t file_block)
+{
+	int nr_allocated = 0;
+	struct super_block *s = th->t_super;
+	/* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
+	 * - Hans, it is not a block number - Zam. */
+
+	unsigned int bm, off;
+	unsigned int end_bm, end_off;
+	unsigned int off_max = s->s_blocksize << 3;
+
+	BUG_ON(!th->t_trans_id);
+
+	PROC_INFO_INC(s, scan_bitmap.call);
+	if (SB_FREE_BLOCKS(s) <= 0)
+		return 0;	// No point in looking for more free blocks
+
+	get_bit_address(s, *start, &bm, &off);
+	get_bit_address(s, finish, &end_bm, &end_off);
+	if (bm > reiserfs_bmap_count(s))
+		return 0;
+	if (end_bm > reiserfs_bmap_count(s))
+		end_bm = reiserfs_bmap_count(s);
+
+	/* When the bitmap is more than 10% free, anyone can allocate.
+	 * When it's less than 10% free, only files that already use the
+	 * bitmap are allowed. Once we pass 80% full, this restriction
+	 * is lifted.
+	 *
+	 * We do this so that files that grow later still have space close to
+	 * their original allocation. This improves locality, and presumably
+	 * performance as a result.
+	 *
+	 * This is only an allocation policy and does not make up for getting a
+	 * bad hint. Decent hinting must be implemented for this to work well.
+	 */
+	if (TEST_OPTION(skip_busy, s)
+	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
+		for (; bm < end_bm; bm++, off = 0) {
+			if ((off && (!unfm || (file_block != 0)))
+			    || SB_AP_BITMAP(s)[bm].free_count >
+			    (s->s_blocksize << 3) / 10)
+				nr_allocated =
+				    scan_bitmap_block(th, bm, &off, off_max,
+						      min, max, unfm);
+			if (nr_allocated)
+				goto ret;
+		}
+		/* we know from above that start is a reasonable number */
+		get_bit_address(s, *start, &bm, &off);
+	}
+
+	for (; bm < end_bm; bm++, off = 0) {
+		nr_allocated =
+		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
+		if (nr_allocated)
+			goto ret;
+	}
+
+	nr_allocated =
+	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
+
+      ret:
+	*start = bm * off_max + off;
+	return nr_allocated;
+
+}
+
+static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
+				 struct inode *inode, b_blocknr_t block,
+				 int for_unformatted)
+{
+	struct super_block *s = th->t_super;
+	struct reiserfs_super_block *rs;
+	struct buffer_head *sbh, *bmbh;
+	struct reiserfs_bitmap_info *apbi;
+	unsigned int nr, offset;
+
+	BUG_ON(!th->t_trans_id);
+
+	PROC_INFO_INC(s, free_block);
+
+	rs = SB_DISK_SUPER_BLOCK(s);
+	sbh = SB_BUFFER_WITH_SB(s);
+	apbi = SB_AP_BITMAP(s);
+
+	get_bit_address(s, block, &nr, &offset);
+
+	if (nr >= reiserfs_bmap_count(s)) {
+		reiserfs_error(s, "vs-4075", "block %lu is out of range",
+			       block);
+		return;
+	}
+
+	bmbh = reiserfs_read_bitmap_block(s, nr);
+	if (!bmbh)
+		return;
+
+	reiserfs_prepare_for_journal(s, bmbh, 1);
+
+	/* clear bit for the given block in bit map */
+	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
+		reiserfs_error(s, "vs-4080",
+			       "block %lu: bit already cleared", block);
+	}
+	apbi[nr].free_count++;
+	journal_mark_dirty(th, s, bmbh);
+	brelse(bmbh);
+
+	reiserfs_prepare_for_journal(s, sbh, 1);
+	/* update super block */
+	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
+
+	journal_mark_dirty(th, s, sbh);
+	if (for_unformatted)
+		dquot_free_block_nodirty(inode, 1);
+}
+
+void reiserfs_free_block(struct reiserfs_transaction_handle *th,
+			 struct inode *inode, b_blocknr_t block,
+			 int for_unformatted)
+{
+	struct super_block *s = th->t_super;
+	BUG_ON(!th->t_trans_id);
+
+	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
+	if (!is_reusable(s, block, 1))
+		return;
+
+	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
+		reiserfs_error(th->t_super, "bitmap-4072",
+			       "Trying to free block outside file system "
+			       "boundaries (%lu > %lu)",
+			       block, sb_block_count(REISERFS_SB(s)->s_rs));
+		return;
+	}
+	/* mark it before we clear it, just in case */
+	journal_mark_freed(th, s, block);
+	_reiserfs_free_block(th, inode, block, for_unformatted);
+}
+
+/* preallocated blocks don't need to be run through journal_mark_freed */
+static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
+					 struct inode *inode, b_blocknr_t block)
+{
+	BUG_ON(!th->t_trans_id);
+	RFALSE(!th->t_super,
+	       "vs-4060: trying to free block on nonexistent device");
+	if (!is_reusable(th->t_super, block, 1))
+		return;
+	_reiserfs_free_block(th, inode, block, 1);
+}
+
+static void __discard_prealloc(struct reiserfs_transaction_handle *th,
+			       struct reiserfs_inode_info *ei)
+{
+	unsigned long save = ei->i_prealloc_block;
+	int dirty = 0;
+	struct inode *inode = &ei->vfs_inode;
+	BUG_ON(!th->t_trans_id);
+#ifdef CONFIG_REISERFS_CHECK
+	if (ei->i_prealloc_count < 0)
+		reiserfs_error(th->t_super, "zam-4001",
+			       "inode has negative prealloc blocks count.");
+#endif
+	while (ei->i_prealloc_count > 0) {
+		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
+		ei->i_prealloc_block++;
+		ei->i_prealloc_count--;
+		dirty = 1;
+	}
+	if (dirty)
+		reiserfs_update_sd(th, inode);
+	ei->i_prealloc_block = save;
+	list_del_init(&(ei->i_prealloc_list));
+}
+
+/* FIXME: It should be inline function */
+void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
+			       struct inode *inode)
+{
+	struct reiserfs_inode_info *ei = REISERFS_I(inode);
+	BUG_ON(!th->t_trans_id);
+	if (ei->i_prealloc_count)
+		__discard_prealloc(th, ei);
+}
+
+void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
+{
+	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
+
+	BUG_ON(!th->t_trans_id);
+
+	while (!list_empty(plist)) {
+		struct reiserfs_inode_info *ei;
+		ei = list_entry(plist->next, struct reiserfs_inode_info,
+				i_prealloc_list);
+#ifdef CONFIG_REISERFS_CHECK
+		if (!ei->i_prealloc_count) {
+			reiserfs_error(th->t_super, "zam-4001",
+				       "inode is in prealloc list but has "
+				       "no preallocated blocks.");
+		}
+#endif
+		__discard_prealloc(th, ei);
+	}
+}
+
+void reiserfs_init_alloc_options(struct super_block *s)
+{
+	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
+	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
+	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
+}
+
+/* block allocator related options are parsed here */
+int reiserfs_parse_alloc_options(struct super_block *s, char *options)
+{
+	char *this_char, *value;
+
+	REISERFS_SB(s)->s_alloc_options.bits = 0;	/* clear default settings */
+
+	while ((this_char = strsep(&options, ":")) != NULL) {
+		if ((value = strchr(this_char, '=')) != NULL)
+			*value++ = 0;
+
+		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
+			int temp;
+			SET_OPTION(concentrating_formatted_nodes);
+			temp = (value
+				&& *value) ? simple_strtoul(value, &value,
+							    0) : 10;
+			if (temp <= 0 || temp > 100) {
+				REISERFS_SB(s)->s_alloc_options.border = 10;
+			} else {
+				REISERFS_SB(s)->s_alloc_options.border =
+				    100 / temp;
+			}
+			continue;
+		}
+		if (!strcmp(this_char, "displacing_large_files")) {
+			SET_OPTION(displacing_large_files);
+			REISERFS_SB(s)->s_alloc_options.large_file_size =
+			    (value
+			     && *value) ? simple_strtoul(value, &value, 0) : 16;
+			continue;
+		}
+		if (!strcmp(this_char, "displacing_new_packing_localities")) {
+			SET_OPTION(displacing_new_packing_localities);
+			continue;
+		};
+
+		if (!strcmp(this_char, "old_hashed_relocation")) {
+			SET_OPTION(old_hashed_relocation);
+			continue;
+		}
+
+		if (!strcmp(this_char, "new_hashed_relocation")) {
+			SET_OPTION(new_hashed_relocation);
+			continue;
+		}
+
+		if (!strcmp(this_char, "dirid_groups")) {
+			SET_OPTION(dirid_groups);
+			continue;
+		}
+		if (!strcmp(this_char, "oid_groups")) {
+			SET_OPTION(oid_groups);
+			continue;
+		}
+		if (!strcmp(this_char, "packing_groups")) {
+			SET_OPTION(packing_groups);
+			continue;
+		}
+		if (!strcmp(this_char, "hashed_formatted_nodes")) {
+			SET_OPTION(hashed_formatted_nodes);
+			continue;
+		}
+
+		if (!strcmp(this_char, "skip_busy")) {
+			SET_OPTION(skip_busy);
+			continue;
+		}
+
+		if (!strcmp(this_char, "hundredth_slices")) {
+			SET_OPTION(hundredth_slices);
+			continue;
+		}
+
+		if (!strcmp(this_char, "old_way")) {
+			SET_OPTION(old_way);
+			continue;
+		}
+
+		if (!strcmp(this_char, "displace_based_on_dirid")) {
+			SET_OPTION(displace_based_on_dirid);
+			continue;
+		}
+
+		if (!strcmp(this_char, "preallocmin")) {
+			REISERFS_SB(s)->s_alloc_options.preallocmin =
+			    (value
+			     && *value) ? simple_strtoul(value, &value, 0) : 4;
+			continue;
+		}
+
+		if (!strcmp(this_char, "preallocsize")) {
+			REISERFS_SB(s)->s_alloc_options.preallocsize =
+			    (value
+			     && *value) ? simple_strtoul(value, &value,
+							 0) :
+			    PREALLOCATION_SIZE;
+			continue;
+		}
+
+		reiserfs_warning(s, "zam-4001", "unknown option - %s",
+				 this_char);
+		return 1;
+	}
+
+	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
+	return 0;
+}
+
+static void print_sep(struct seq_file *seq, int *first)
+{
+	if (!*first)
+		seq_puts(seq, ":");
+	else
+		*first = 0;
+}
+
+void show_alloc_options(struct seq_file *seq, struct super_block *s)
+{
+	int first = 1;
+
+	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
+		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
+		return;
+
+	seq_puts(seq, ",alloc=");
+
+	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
+		print_sep(seq, &first);
+		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
+			seq_printf(seq, "concentrating_formatted_nodes=%d",
+				100 / REISERFS_SB(s)->s_alloc_options.border);
+		} else
+			seq_puts(seq, "concentrating_formatted_nodes");
+	}
+	if (TEST_OPTION(displacing_large_files, s)) {
+		print_sep(seq, &first);
+		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
+			seq_printf(seq, "displacing_large_files=%lu",
+			    REISERFS_SB(s)->s_alloc_options.large_file_size);
+		} else
+			seq_puts(seq, "displacing_large_files");
+	}
+	if (TEST_OPTION(displacing_new_packing_localities, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "displacing_new_packing_localities");
+	}
+	if (TEST_OPTION(old_hashed_relocation, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "old_hashed_relocation");
+	}
+	if (TEST_OPTION(new_hashed_relocation, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "new_hashed_relocation");
+	}
+	if (TEST_OPTION(dirid_groups, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "dirid_groups");
+	}
+	if (TEST_OPTION(oid_groups, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "oid_groups");
+	}
+	if (TEST_OPTION(packing_groups, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "packing_groups");
+	}
+	if (TEST_OPTION(hashed_formatted_nodes, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "hashed_formatted_nodes");
+	}
+	if (TEST_OPTION(skip_busy, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "skip_busy");
+	}
+	if (TEST_OPTION(hundredth_slices, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "hundredth_slices");
+	}
+	if (TEST_OPTION(old_way, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "old_way");
+	}
+	if (TEST_OPTION(displace_based_on_dirid, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "displace_based_on_dirid");
+	}
+	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
+		print_sep(seq, &first);
+		seq_printf(seq, "preallocmin=%d",
+				REISERFS_SB(s)->s_alloc_options.preallocmin);
+	}
+	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
+		print_sep(seq, &first);
+		seq_printf(seq, "preallocsize=%d",
+				REISERFS_SB(s)->s_alloc_options.preallocsize);
+	}
+}
+
+static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
+{
+	char *hash_in;
+	if (hint->formatted_node) {
+		hash_in = (char *)&hint->key.k_dir_id;
+	} else {
+		if (!hint->inode) {
+			//hint->search_start = hint->beg;
+			hash_in = (char *)&hint->key.k_dir_id;
+		} else
+		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
+			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
+		else
+			hash_in =
+			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
+	}
+
+	hint->search_start =
+	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
+}
+
+/*
+ * Relocation based on dirid, hashing them into a given bitmap block
+ * files. Formatted nodes are unaffected, a separate policy covers them
+ */
+static void dirid_groups(reiserfs_blocknr_hint_t * hint)
+{
+	unsigned long hash;
+	__u32 dirid = 0;
+	int bm = 0;
+	struct super_block *sb = hint->th->t_super;
+	if (hint->inode)
+		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
+	else if (hint->formatted_node)
+		dirid = hint->key.k_dir_id;
+
+	if (dirid) {
+		bm = bmap_hash_id(sb, dirid);
+		hash = bm * (sb->s_blocksize << 3);
+		/* give a portion of the block group to metadata */
+		if (hint->inode)
+			hash += sb->s_blocksize / 2;
+		hint->search_start = hash;
+	}
+}
+
+/*
+ * Relocation based on oid, hashing them into a given bitmap block
+ * files. Formatted nodes are unaffected, a separate policy covers them
+ */
+static void oid_groups(reiserfs_blocknr_hint_t * hint)
+{
+	if (hint->inode) {
+		unsigned long hash;
+		__u32 oid;
+		__u32 dirid;
+		int bm;
+
+		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
+
+		/* keep the root dir and it's first set of subdirs close to
+		 * the start of the disk
+		 */
+		if (dirid <= 2)
+			hash = (hint->inode->i_sb->s_blocksize << 3);
+		else {
+			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
+			bm = bmap_hash_id(hint->inode->i_sb, oid);
+			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
+		}
+		hint->search_start = hash;
+	}
+}
+
+/* returns 1 if it finds an indirect item and gets valid hint info
+ * from it, otherwise 0
+ */
+static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
+{
+	struct treepath *path;
+	struct buffer_head *bh;
+	struct item_head *ih;
+	int pos_in_item;
+	__le32 *item;
+	int ret = 0;
+
+	if (!hint->path)	/* reiserfs code can call this function w/o pointer to path
+				 * structure supplied; then we rely on supplied search_start */
+		return 0;
+
+	path = hint->path;
+	bh = get_last_bh(path);
+	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
+	ih = get_ih(path);
+	pos_in_item = path->pos_in_item;
+	item = get_item(path);
+
+	hint->search_start = bh->b_blocknr;
+
+	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
+		/* for indirect item: go to left and look for the first non-hole entry
+		   in the indirect item */
+		if (pos_in_item == I_UNFM_NUM(ih))
+			pos_in_item--;
+//          pos_in_item = I_UNFM_NUM (ih) - 1;
+		while (pos_in_item >= 0) {
+			int t = get_block_num(item, pos_in_item);
+			if (t) {
+				hint->search_start = t;
+				ret = 1;
+				break;
+			}
+			pos_in_item--;
+		}
+	}
+
+	/* does result value fit into specified region? */
+	return ret;
+}
+
+/* should be, if formatted node, then try to put on first part of the device
+   specified as number of percent with mount option device, else try to put
+   on last of device.  This is not to say it is good code to do so,
+   but the effect should be measured.  */
+static inline void set_border_in_hint(struct super_block *s,
+				      reiserfs_blocknr_hint_t * hint)
+{
+	b_blocknr_t border =
+	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
+
+	if (hint->formatted_node)
+		hint->end = border - 1;
+	else
+		hint->beg = border;
+}
+
+static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
+{
+	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
+		hint->search_start =
+		    hint->beg +
+		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
+			       4) % (hint->end - hint->beg);
+	else
+		hint->search_start =
+		    hint->beg +
+		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
+			       4) % (hint->end - hint->beg);
+}
+
+static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
+{
+	char *hash_in;
+
+	if (!hint->inode)
+		hash_in = (char *)&hint->key.k_dir_id;
+	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
+		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
+	else
+		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
+
+	hint->search_start =
+	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
+}
+
+static inline int
+this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
+						   hint)
+{
+	return hint->block ==
+	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
+}
+
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
+{
+	struct in_core_key *key = &hint->key;
+
+	hint->th->displace_new_blocks = 0;
+	hint->search_start =
+	    hint->beg + keyed_hash((char *)(&key->k_objectid),
+				   4) % (hint->end - hint->beg);
+}
+#endif
+
+static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
+{
+	b_blocknr_t border;
+	u32 hash_in;
+
+	if (hint->formatted_node || hint->inode == NULL) {
+		return 0;
+	}
+
+	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
+	border =
+	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
+					 4) % (hint->end - hint->beg - 1);
+	if (border > hint->search_start)
+		hint->search_start = border;
+
+	return 1;
+}
+
+static inline int old_way(reiserfs_blocknr_hint_t * hint)
+{
+	b_blocknr_t border;
+
+	if (hint->formatted_node || hint->inode == NULL) {
+		return 0;
+	}
+
+	border =
+	    hint->beg +
+	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
+							      hint->beg);
+	if (border > hint->search_start)
+		hint->search_start = border;
+
+	return 1;
+}
+
+static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
+{
+	struct in_core_key *key = &hint->key;
+	b_blocknr_t slice_start;
+
+	slice_start =
+	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
+	if (slice_start > hint->search_start
+	    || slice_start + (hint->end / 100) <= hint->search_start) {
+		hint->search_start = slice_start;
+	}
+}
+
+static void determine_search_start(reiserfs_blocknr_hint_t * hint,
+				   int amount_needed)
+{
+	struct super_block *s = hint->th->t_super;
+	int unfm_hint;
+
+	hint->beg = 0;
+	hint->end = SB_BLOCK_COUNT(s) - 1;
+
+	/* This is former border algorithm. Now with tunable border offset */
+	if (concentrating_formatted_nodes(s))
+		set_border_in_hint(s, hint);
+
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	/* whenever we create a new directory, we displace it.  At first we will
+	   hash for location, later we might look for a moderately empty place for
+	   it */
+	if (displacing_new_packing_localities(s)
+	    && hint->th->displace_new_blocks) {
+		displace_new_packing_locality(hint);
+
+		/* we do not continue determine_search_start,
+		 * if new packing locality is being displaced */
+		return;
+	}
+#endif
+
+	/* all persons should feel encouraged to add more special cases here and
+	 * test them */
+
+	if (displacing_large_files(s) && !hint->formatted_node
+	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
+		displace_large_file(hint);
+		return;
+	}
+
+	/* if none of our special cases is relevant, use the left neighbor in the
+	   tree order of the new node we are allocating for */
+	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
+		hash_formatted_node(hint);
+		return;
+	}
+
+	unfm_hint = get_left_neighbor(hint);
+
+	/* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
+	   new blocks are displaced based on directory ID. Also, if suggested search_start
+	   is less than last preallocated block, we start searching from it, assuming that
+	   HDD dataflow is faster in forward direction */
+	if (TEST_OPTION(old_way, s)) {
+		if (!hint->formatted_node) {
+			if (!reiserfs_hashed_relocation(s))
+				old_way(hint);
+			else if (!reiserfs_no_unhashed_relocation(s))
+				old_hashed_relocation(hint);
+
+			if (hint->inode
+			    && hint->search_start <
+			    REISERFS_I(hint->inode)->i_prealloc_block)
+				hint->search_start =
+				    REISERFS_I(hint->inode)->i_prealloc_block;
+		}
+		return;
+	}
+
+	/* This is an approach proposed by Hans */
+	if (TEST_OPTION(hundredth_slices, s)
+	    && !(displacing_large_files(s) && !hint->formatted_node)) {
+		hundredth_slices(hint);
+		return;
+	}
+
+	/* old_hashed_relocation only works on unformatted */
+	if (!unfm_hint && !hint->formatted_node &&
+	    TEST_OPTION(old_hashed_relocation, s)) {
+		old_hashed_relocation(hint);
+	}
+	/* new_hashed_relocation works with both formatted/unformatted nodes */
+	if ((!unfm_hint || hint->formatted_node) &&
+	    TEST_OPTION(new_hashed_relocation, s)) {
+		new_hashed_relocation(hint);
+	}
+	/* dirid grouping works only on unformatted nodes */
+	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
+		dirid_groups(hint);
+	}
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
+		dirid_groups(hint);
+	}
+#endif
+
+	/* oid grouping works only on unformatted nodes */
+	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
+		oid_groups(hint);
+	}
+	return;
+}
+
+static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
+{
+	/* make minimum size a mount option and benchmark both ways */
+	/* we preallocate blocks only for regular files, specific size */
+	/* benchmark preallocating always and see what happens */
+
+	hint->prealloc_size = 0;
+
+	if (!hint->formatted_node && hint->preallocate) {
+		if (S_ISREG(hint->inode->i_mode)
+		    && hint->inode->i_size >=
+		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
+		    preallocmin * hint->inode->i_sb->s_blocksize)
+			hint->prealloc_size =
+			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
+			    preallocsize - 1;
+	}
+	return CARRY_ON;
+}
+
+/* XXX I know it could be merged with upper-level function;
+   but may be result function would be too complex. */
+static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
+						 b_blocknr_t * new_blocknrs,
+						 b_blocknr_t start,
+						 b_blocknr_t finish, int min,
+						 int amount_needed,
+						 int prealloc_size)
+{
+	int rest = amount_needed;
+	int nr_allocated;
+
+	while (rest > 0 && start <= finish) {
+		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
+					   rest + prealloc_size,
+					   !hint->formatted_node, hint->block);
+
+		if (nr_allocated == 0)	/* no new blocks allocated, return */
+			break;
+
+		/* fill free_blocknrs array first */
+		while (rest > 0 && nr_allocated > 0) {
+			*new_blocknrs++ = start++;
+			rest--;
+			nr_allocated--;
+		}
+
+		/* do we have something to fill prealloc. array also ? */
+		if (nr_allocated > 0) {
+			/* it means prealloc_size was greater that 0 and we do preallocation */
+			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
+				 &SB_JOURNAL(hint->th->t_super)->
+				 j_prealloc_list);
+			REISERFS_I(hint->inode)->i_prealloc_block = start;
+			REISERFS_I(hint->inode)->i_prealloc_count =
+			    nr_allocated;
+			break;
+		}
+	}
+
+	return (amount_needed - rest);
+}
+
+static inline int blocknrs_and_prealloc_arrays_from_search_start
+    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
+     int amount_needed) {
+	struct super_block *s = hint->th->t_super;
+	b_blocknr_t start = hint->search_start;
+	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
+	int passno = 0;
+	int nr_allocated = 0;
+
+	determine_prealloc_size(hint);
+	if (!hint->formatted_node) {
+		int quota_ret;
+#ifdef REISERQUOTA_DEBUG
+		reiserfs_debug(s, REISERFS_DEBUG_CODE,
+			       "reiserquota: allocating %d blocks id=%u",
+			       amount_needed, hint->inode->i_uid);
+#endif
+		quota_ret =
+		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
+		if (quota_ret)	/* Quota exceeded? */
+			return QUOTA_EXCEEDED;
+		if (hint->preallocate && hint->prealloc_size) {
+#ifdef REISERQUOTA_DEBUG
+			reiserfs_debug(s, REISERFS_DEBUG_CODE,
+				       "reiserquota: allocating (prealloc) %d blocks id=%u",
+				       hint->prealloc_size, hint->inode->i_uid);
+#endif
+			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
+							 hint->prealloc_size);
+			if (quota_ret)
+				hint->preallocate = hint->prealloc_size = 0;
+		}
+		/* for unformatted nodes, force large allocations */
+	}
+
+	do {
+		switch (passno++) {
+		case 0:	/* Search from hint->search_start to end of disk */
+			start = hint->search_start;
+			finish = SB_BLOCK_COUNT(s) - 1;
+			break;
+		case 1:	/* Search from hint->beg to hint->search_start */
+			start = hint->beg;
+			finish = hint->search_start;
+			break;
+		case 2:	/* Last chance: Search from 0 to hint->beg */
+			start = 0;
+			finish = hint->beg;
+			break;
+		default:	/* We've tried searching everywhere, not enough space */
+			/* Free the blocks */
+			if (!hint->formatted_node) {
+#ifdef REISERQUOTA_DEBUG
+				reiserfs_debug(s, REISERFS_DEBUG_CODE,
+					       "reiserquota: freeing (nospace) %d blocks id=%u",
+					       amount_needed +
+					       hint->prealloc_size -
+					       nr_allocated,
+					       hint->inode->i_uid);
+#endif
+				/* Free not allocated blocks */
+				dquot_free_block_nodirty(hint->inode,
+					amount_needed + hint->prealloc_size -
+					nr_allocated);
+			}
+			while (nr_allocated--)
+				reiserfs_free_block(hint->th, hint->inode,
+						    new_blocknrs[nr_allocated],
+						    !hint->formatted_node);
+
+			return NO_DISK_SPACE;
+		}
+	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
+								 new_blocknrs +
+								 nr_allocated,
+								 start, finish,
+								 1,
+								 amount_needed -
+								 nr_allocated,
+								 hint->
+								 prealloc_size))
+		 < amount_needed);
+	if (!hint->formatted_node &&
+	    amount_needed + hint->prealloc_size >
+	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
+		/* Some of preallocation blocks were not allocated */
+#ifdef REISERQUOTA_DEBUG
+		reiserfs_debug(s, REISERFS_DEBUG_CODE,
+			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
+			       amount_needed + hint->prealloc_size -
+			       nr_allocated -
+			       REISERFS_I(hint->inode)->i_prealloc_count,
+			       hint->inode->i_uid);
+#endif
+		dquot_free_block_nodirty(hint->inode, amount_needed +
+					 hint->prealloc_size - nr_allocated -
+					 REISERFS_I(hint->inode)->
+					 i_prealloc_count);
+	}
+
+	return CARRY_ON;
+}
+
+/* grab new blocknrs from preallocated list */
+/* return amount still needed after using them */
+static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
+					      b_blocknr_t * new_blocknrs,
+					      int amount_needed)
+{
+	struct inode *inode = hint->inode;
+
+	if (REISERFS_I(inode)->i_prealloc_count > 0) {
+		while (amount_needed) {
+
+			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
+			REISERFS_I(inode)->i_prealloc_count--;
+
+			amount_needed--;
+
+			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
+				list_del(&REISERFS_I(inode)->i_prealloc_list);
+				break;
+			}
+		}
+	}
+	/* return amount still needed after using preallocated blocks */
+	return amount_needed;
+}
+
+int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us	/* Amount of blocks we have
+																	   already reserved */ )
+{
+	int initial_amount_needed = amount_needed;
+	int ret;
+	struct super_block *s = hint->th->t_super;
+
+	/* Check if there is enough space, taking into account reserved space */
+	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
+	    amount_needed - reserved_by_us)
+		return NO_DISK_SPACE;
+	/* should this be if !hint->inode &&  hint->preallocate? */
+	/* do you mean hint->formatted_node can be removed ? - Zam */
+	/* hint->formatted_node cannot be removed because we try to access
+	   inode information here, and there is often no inode assotiated with
+	   metadata allocations - green */
+
+	if (!hint->formatted_node && hint->preallocate) {
+		amount_needed = use_preallocated_list_if_available
+		    (hint, new_blocknrs, amount_needed);
+		if (amount_needed == 0)	/* all blocknrs we need we got from
+					   prealloc. list */
+			return CARRY_ON;
+		new_blocknrs += (initial_amount_needed - amount_needed);
+	}
+
+	/* find search start and save it in hint structure */
+	determine_search_start(hint, amount_needed);
+	if (hint->search_start >= SB_BLOCK_COUNT(s))
+		hint->search_start = SB_BLOCK_COUNT(s) - 1;
+
+	/* allocation itself; fill new_blocknrs and preallocation arrays */
+	ret = blocknrs_and_prealloc_arrays_from_search_start
+	    (hint, new_blocknrs, amount_needed);
+
+	/* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
+	 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
+	 * variant) */
+
+	if (ret != CARRY_ON) {
+		while (amount_needed++ < initial_amount_needed) {
+			reiserfs_free_block(hint->th, hint->inode,
+					    *(--new_blocknrs), 1);
+		}
+	}
+	return ret;
+}
+
+void reiserfs_cache_bitmap_metadata(struct super_block *sb,
+                                    struct buffer_head *bh,
+                                    struct reiserfs_bitmap_info *info)
+{
+	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
+
+	/* The first bit must ALWAYS be 1 */
+	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
+		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
+			       "corrupted: first bit must be 1", bh->b_blocknr);
+
+	info->free_count = 0;
+
+	while (--cur >= (unsigned long *)bh->b_data) {
+		/* 0 and ~0 are special, we can optimize for them */
+		if (*cur == 0)
+			info->free_count += BITS_PER_LONG;
+		else if (*cur != ~0L)	/* A mix, investigate */
+			info->free_count += BITS_PER_LONG - hweight_long(*cur);
+	}
+}
+
+struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
+                                               unsigned int bitmap)
+{
+	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
+	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
+	struct buffer_head *bh;
+
+	/* Way old format filesystems had the bitmaps packed up front.
+	 * I doubt there are any of these left, but just in case... */
+	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
+	                      &(REISERFS_SB(sb)->s_properties))))
+		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
+	else if (bitmap == 0)
+		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
+
+	reiserfs_write_unlock(sb);
+	bh = sb_bread(sb, block);
+	reiserfs_write_lock(sb);
+	if (bh == NULL)
+		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
+		                 "reading failed", __func__, block);
+	else {
+		if (buffer_locked(bh)) {
+			PROC_INFO_INC(sb, scan_bitmap.wait);
+			reiserfs_write_unlock(sb);
+			__wait_on_buffer(bh);
+			reiserfs_write_lock(sb);
+		}
+		BUG_ON(!buffer_uptodate(bh));
+		BUG_ON(atomic_read(&bh->b_count) == 0);
+
+		if (info->free_count == UINT_MAX)
+			reiserfs_cache_bitmap_metadata(sb, bh, info);
+	}
+
+	return bh;
+}
+
+int reiserfs_init_bitmap_cache(struct super_block *sb)
+{
+	struct reiserfs_bitmap_info *bitmap;
+	unsigned int bmap_nr = reiserfs_bmap_count(sb);
+
+	bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
+	if (bitmap == NULL)
+		return -ENOMEM;
+
+	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
+
+	SB_AP_BITMAP(sb) = bitmap;
+
+	return 0;
+}
+
+void reiserfs_free_bitmap_cache(struct super_block *sb)
+{
+	if (SB_AP_BITMAP(sb)) {
+		vfree(SB_AP_BITMAP(sb));
+		SB_AP_BITMAP(sb) = NULL;
+	}
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/dir.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/dir.c
new file mode 100644
index 0000000..7c431ed
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/dir.c

@@ -0,0 +1,321 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include "reiserfs.h"
+#include <linux/stat.h>
+#include <linux/buffer_head.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+
+extern const struct reiserfs_key MIN_KEY;
+
+static int reiserfs_readdir(struct file *, void *, filldir_t);
+static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
+			      int datasync);
+
+const struct file_operations reiserfs_dir_operations = {
+	.llseek = generic_file_llseek,
+	.read = generic_read_dir,
+	.readdir = reiserfs_readdir,
+	.fsync = reiserfs_dir_fsync,
+	.unlocked_ioctl = reiserfs_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = reiserfs_compat_ioctl,
+#endif
+};
+
+static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
+			      int datasync)
+{
+	struct inode *inode = filp->f_mapping->host;
+	int err;
+
+	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (err)
+		return err;
+
+	mutex_lock(&inode->i_mutex);
+	reiserfs_write_lock(inode->i_sb);
+	err = reiserfs_commit_for_inode(inode);
+	reiserfs_write_unlock(inode->i_sb);
+	mutex_unlock(&inode->i_mutex);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+#define store_ih(where,what) copy_item_head (where, what)
+
+static inline bool is_privroot_deh(struct dentry *dir,
+				   struct reiserfs_de_head *deh)
+{
+	struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
+	return (dir == dir->d_parent && privroot->d_inode &&
+	        deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
+}
+
+int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
+			   filldir_t filldir, loff_t *pos)
+{
+	struct inode *inode = dentry->d_inode;
+	struct cpu_key pos_key;	/* key of current position in the directory (key of directory entry) */
+	INITIALIZE_PATH(path_to_entry);
+	struct buffer_head *bh;
+	int item_num, entry_num;
+	const struct reiserfs_key *rkey;
+	struct item_head *ih, tmp_ih;
+	int search_res;
+	char *local_buf;
+	loff_t next_pos;
+	char small_buf[32];	/* avoid kmalloc if we can */
+	struct reiserfs_dir_entry de;
+	int ret = 0;
+
+	reiserfs_write_lock(inode->i_sb);
+
+	reiserfs_check_lock_depth(inode->i_sb, "readdir");
+
+	/* form key for search the next directory entry using f_pos field of
+	   file structure */
+	make_cpu_key(&pos_key, inode, *pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3);
+	next_pos = cpu_key_k_offset(&pos_key);
+
+	path_to_entry.reada = PATH_READA;
+	while (1) {
+	      research:
+		/* search the directory item, containing entry with specified key */
+		search_res =
+		    search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
+					&de);
+		if (search_res == IO_ERROR) {
+			// FIXME: we could just skip part of directory which could
+			// not be read
+			ret = -EIO;
+			goto out;
+		}
+		entry_num = de.de_entry_num;
+		bh = de.de_bh;
+		item_num = de.de_item_num;
+		ih = de.de_ih;
+		store_ih(&tmp_ih, ih);
+
+		/* we must have found item, that is item of this directory, */
+		RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
+		       "vs-9000: found item %h does not match to dir we readdir %K",
+		       ih, &pos_key);
+		RFALSE(item_num > B_NR_ITEMS(bh) - 1,
+		       "vs-9005 item_num == %d, item amount == %d",
+		       item_num, B_NR_ITEMS(bh));
+
+		/* and entry must be not more than number of entries in the item */
+		RFALSE(I_ENTRY_COUNT(ih) < entry_num,
+		       "vs-9010: entry number is too big %d (%d)",
+		       entry_num, I_ENTRY_COUNT(ih));
+
+		if (search_res == POSITION_FOUND
+		    || entry_num < I_ENTRY_COUNT(ih)) {
+			/* go through all entries in the directory item beginning from the entry, that has been found */
+			struct reiserfs_de_head *deh =
+			    B_I_DEH(bh, ih) + entry_num;
+
+			for (; entry_num < I_ENTRY_COUNT(ih);
+			     entry_num++, deh++) {
+				int d_reclen;
+				char *d_name;
+				off_t d_off;
+				ino_t d_ino;
+				loff_t cur_pos = deh_offset(deh);
+
+				if (!de_visible(deh))
+					/* it is hidden entry */
+					continue;
+				d_reclen = entry_length(bh, ih, entry_num);
+				d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
+
+				if (d_reclen <= 0 ||
+				    d_name + d_reclen > bh->b_data + bh->b_size) {
+					/* There is corrupted data in entry,
+					 * We'd better stop here */
+					pathrelse(&path_to_entry);
+					ret = -EIO;
+					goto out;
+				}
+
+				if (!d_name[d_reclen - 1])
+					d_reclen = strlen(d_name);
+
+				if (d_reclen >
+				    REISERFS_MAX_NAME(inode->i_sb->
+						      s_blocksize)) {
+					/* too big to send back to VFS */
+					continue;
+				}
+
+				/* Ignore the .reiserfs_priv entry */
+				if (is_privroot_deh(dentry, deh))
+					continue;
+
+				d_off = deh_offset(deh);
+				*pos = d_off;
+				d_ino = deh_objectid(deh);
+				if (d_reclen <= 32) {
+					local_buf = small_buf;
+				} else {
+					local_buf = kmalloc(d_reclen,
+							    GFP_NOFS);
+					if (!local_buf) {
+						pathrelse(&path_to_entry);
+						ret = -ENOMEM;
+						goto out;
+					}
+					if (item_moved(&tmp_ih, &path_to_entry)) {
+						kfree(local_buf);
+						goto research;
+					}
+				}
+				// Note, that we copy name to user space via temporary
+				// buffer (local_buf) because filldir will block if
+				// user space buffer is swapped out. At that time
+				// entry can move to somewhere else
+				memcpy(local_buf, d_name, d_reclen);
+
+				/*
+				 * Since filldir might sleep, we can release
+				 * the write lock here for other waiters
+				 */
+				reiserfs_write_unlock(inode->i_sb);
+				if (filldir
+				    (dirent, local_buf, d_reclen, d_off, d_ino,
+				     DT_UNKNOWN) < 0) {
+					reiserfs_write_lock(inode->i_sb);
+					if (local_buf != small_buf) {
+						kfree(local_buf);
+					}
+					goto end;
+				}
+				reiserfs_write_lock(inode->i_sb);
+				if (local_buf != small_buf) {
+					kfree(local_buf);
+				}
+
+				/* deh_offset(deh) may be invalid now. */
+				next_pos = cur_pos + 1;
+
+				if (item_moved(&tmp_ih, &path_to_entry)) {
+					goto research;
+				}
+			}	/* for */
+		}
+
+		if (item_num != B_NR_ITEMS(bh) - 1)
+			// end of directory has been reached
+			goto end;
+
+		/* item we went through is last item of node. Using right
+		   delimiting key check is it directory end */
+		rkey = get_rkey(&path_to_entry, inode->i_sb);
+		if (!comp_le_keys(rkey, &MIN_KEY)) {
+			/* set pos_key to key, that is the smallest and greater
+			   that key of the last entry in the item */
+			set_cpu_key_k_offset(&pos_key, next_pos);
+			continue;
+		}
+
+		if (COMP_SHORT_KEYS(rkey, &pos_key)) {
+			// end of directory has been reached
+			goto end;
+		}
+
+		/* directory continues in the right neighboring block */
+		set_cpu_key_k_offset(&pos_key,
+				     le_key_k_offset(KEY_FORMAT_3_5, rkey));
+
+	}			/* while */
+
+end:
+	*pos = next_pos;
+	pathrelse(&path_to_entry);
+	reiserfs_check_path(&path_to_entry);
+out:
+	reiserfs_write_unlock(inode->i_sb);
+	return ret;
+}
+
+static int reiserfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	return reiserfs_readdir_dentry(dentry, dirent, filldir, &file->f_pos);
+}
+
+/* compose directory item containing "." and ".." entries (entries are
+   not aligned to 4 byte boundary) */
+/* the last four params are LE */
+void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
+			    __le32 par_dirid, __le32 par_objid)
+{
+	struct reiserfs_de_head *deh;
+
+	memset(body, 0, EMPTY_DIR_SIZE_V1);
+	deh = (struct reiserfs_de_head *)body;
+
+	/* direntry header of "." */
+	put_deh_offset(&(deh[0]), DOT_OFFSET);
+	/* these two are from make_le_item_head, and are are LE */
+	deh[0].deh_dir_id = dirid;
+	deh[0].deh_objectid = objid;
+	deh[0].deh_state = 0;	/* Endian safe if 0 */
+	put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen("."));
+	mark_de_visible(&(deh[0]));
+
+	/* direntry header of ".." */
+	put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
+	/* key of ".." for the root directory */
+	/* these two are from the inode, and are are LE */
+	deh[1].deh_dir_id = par_dirid;
+	deh[1].deh_objectid = par_objid;
+	deh[1].deh_state = 0;	/* Endian safe if 0 */
+	put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen(".."));
+	mark_de_visible(&(deh[1]));
+
+	/* copy ".." and "." */
+	memcpy(body + deh_location(&(deh[0])), ".", 1);
+	memcpy(body + deh_location(&(deh[1])), "..", 2);
+}
+
+/* compose directory item containing "." and ".." entries */
+void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
+			 __le32 par_dirid, __le32 par_objid)
+{
+	struct reiserfs_de_head *deh;
+
+	memset(body, 0, EMPTY_DIR_SIZE);
+	deh = (struct reiserfs_de_head *)body;
+
+	/* direntry header of "." */
+	put_deh_offset(&(deh[0]), DOT_OFFSET);
+	/* these two are from make_le_item_head, and are are LE */
+	deh[0].deh_dir_id = dirid;
+	deh[0].deh_objectid = objid;
+	deh[0].deh_state = 0;	/* Endian safe if 0 */
+	put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen(".")));
+	mark_de_visible(&(deh[0]));
+
+	/* direntry header of ".." */
+	put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
+	/* key of ".." for the root directory */
+	/* these two are from the inode, and are are LE */
+	deh[1].deh_dir_id = par_dirid;
+	deh[1].deh_objectid = par_objid;
+	deh[1].deh_state = 0;	/* Endian safe if 0 */
+	put_deh_location(&(deh[1]),
+			 deh_location(&(deh[0])) - ROUND_UP(strlen("..")));
+	mark_de_visible(&(deh[1]));
+
+	/* copy ".." and "." */
+	memcpy(body + deh_location(&(deh[0])), ".", 1);
+	memcpy(body + deh_location(&(deh[1])), "..", 2);
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/do_balan.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/do_balan.c
new file mode 100644
index 0000000..2b7882b
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/do_balan.c

@@ -0,0 +1,2074 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+/* Now we have all buffers that must be used in balancing of the tree 	*/
+/* Further calculations can not cause schedule(), and thus the buffer 	*/
+/* tree will be stable until the balancing will be finished 		*/
+/* balance the tree according to the analysis made before,		*/
+/* and using buffers obtained after all above.				*/
+
+/**
+ ** balance_leaf_when_delete
+ ** balance_leaf
+ ** do_balance
+ **
+ **/
+
+#include <asm/uaccess.h>
+#include <linux/time.h>
+#include "reiserfs.h"
+#include <linux/buffer_head.h>
+#include <linux/kernel.h>
+
+static inline void buffer_info_init_left(struct tree_balance *tb,
+                                         struct buffer_info *bi)
+{
+	bi->tb          = tb;
+	bi->bi_bh       = tb->L[0];
+	bi->bi_parent   = tb->FL[0];
+	bi->bi_position = get_left_neighbor_position(tb, 0);
+}
+
+static inline void buffer_info_init_right(struct tree_balance *tb,
+                                          struct buffer_info *bi)
+{
+	bi->tb          = tb;
+	bi->bi_bh       = tb->R[0];
+	bi->bi_parent   = tb->FR[0];
+	bi->bi_position = get_right_neighbor_position(tb, 0);
+}
+
+static inline void buffer_info_init_tbS0(struct tree_balance *tb,
+                                         struct buffer_info *bi)
+{
+	bi->tb          = tb;
+	bi->bi_bh        = PATH_PLAST_BUFFER(tb->tb_path);
+	bi->bi_parent   = PATH_H_PPARENT(tb->tb_path, 0);
+	bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
+}
+
+static inline void buffer_info_init_bh(struct tree_balance *tb,
+                                       struct buffer_info *bi,
+                                       struct buffer_head *bh)
+{
+	bi->tb          = tb;
+	bi->bi_bh       = bh;
+	bi->bi_parent   = NULL;
+	bi->bi_position = 0;
+}
+
+inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
+				       struct buffer_head *bh, int flag)
+{
+	journal_mark_dirty(tb->transaction_handle,
+			   tb->transaction_handle->t_super, bh);
+}
+
+#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
+#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
+
+/* summary:
+ if deleting something ( tb->insert_size[0] < 0 )
+   return(balance_leaf_when_delete()); (flag d handled here)
+ else
+   if lnum is larger than 0 we put items into the left node
+   if rnum is larger than 0 we put items into the right node
+   if snum1 is larger than 0 we put items into the new node s1
+   if snum2 is larger than 0 we put items into the new node s2
+Note that all *num* count new items being created.
+
+It would be easier to read balance_leaf() if each of these summary
+lines was a separate procedure rather than being inlined.  I think
+that there are many passages here and in balance_leaf_when_delete() in
+which two calls to one procedure can replace two passages, and it
+might save cache space and improve software maintenance costs to do so.
+
+Vladimir made the perceptive comment that we should offload most of
+the decision making in this function into fix_nodes/check_balance, and
+then create some sort of structure in tb that says what actions should
+be performed by do_balance.
+
+-Hans */
+
+/* Balance leaf node in case of delete or cut: insert_size[0] < 0
+ *
+ * lnum, rnum can have values >= -1
+ *	-1 means that the neighbor must be joined with S
+ *	 0 means that nothing should be done with the neighbor
+ *	>0 means to shift entirely or partly the specified number of items to the neighbor
+ */
+static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int item_pos = PATH_LAST_POSITION(tb->tb_path);
+	int pos_in_item = tb->tb_path->pos_in_item;
+	struct buffer_info bi;
+	int n;
+	struct item_head *ih;
+
+	RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
+	       "vs- 12000: level: wrong FR %z", tb->FR[0]);
+	RFALSE(tb->blknum[0] > 1,
+	       "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
+	RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
+	       "PAP-12010: tree can not be empty");
+
+	ih = B_N_PITEM_HEAD(tbS0, item_pos);
+	buffer_info_init_tbS0(tb, &bi);
+
+	/* Delete or truncate the item */
+
+	switch (flag) {
+	case M_DELETE:		/* delete item in S[0] */
+
+		RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
+		       "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
+		       -tb->insert_size[0], ih);
+
+		leaf_delete_items(&bi, 0, item_pos, 1, -1);
+
+		if (!item_pos && tb->CFL[0]) {
+			if (B_NR_ITEMS(tbS0)) {
+				replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
+					    0);
+			} else {
+				if (!PATH_H_POSITION(tb->tb_path, 1))
+					replace_key(tb, tb->CFL[0], tb->lkey[0],
+						    PATH_H_PPARENT(tb->tb_path,
+								   0), 0);
+			}
+		}
+
+		RFALSE(!item_pos && !tb->CFL[0],
+		       "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
+		       tb->L[0]);
+
+		break;
+
+	case M_CUT:{		/* cut item in S[0] */
+			if (is_direntry_le_ih(ih)) {
+
+				/* UFS unlink semantics are such that you can only delete one directory entry at a time. */
+				/* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
+				tb->insert_size[0] = -1;
+				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
+						     -tb->insert_size[0]);
+
+				RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
+				       "PAP-12030: can not change delimiting key. CFL[0]=%p",
+				       tb->CFL[0]);
+
+				if (!item_pos && !pos_in_item && tb->CFL[0]) {
+					replace_key(tb, tb->CFL[0], tb->lkey[0],
+						    tbS0, 0);
+				}
+			} else {
+				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
+						     -tb->insert_size[0]);
+
+				RFALSE(!ih_item_len(ih),
+				       "PAP-12035: cut must leave non-zero dynamic length of item");
+			}
+			break;
+		}
+
+	default:
+		print_cur_tb("12040");
+		reiserfs_panic(tb->tb_sb, "PAP-12040",
+			       "unexpected mode: %s(%d)",
+			       (flag ==
+				M_PASTE) ? "PASTE" : ((flag ==
+						       M_INSERT) ? "INSERT" :
+						      "UNKNOWN"), flag);
+	}
+
+	/* the rule is that no shifting occurs unless by shifting a node can be freed */
+	n = B_NR_ITEMS(tbS0);
+	if (tb->lnum[0]) {	/* L[0] takes part in balancing */
+		if (tb->lnum[0] == -1) {	/* L[0] must be joined with S[0] */
+			if (tb->rnum[0] == -1) {	/* R[0] must be also joined with S[0] */
+				if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
+					/* all contents of all the 3 buffers will be in L[0] */
+					if (PATH_H_POSITION(tb->tb_path, 1) == 0
+					    && 1 < B_NR_ITEMS(tb->FR[0]))
+						replace_key(tb, tb->CFL[0],
+							    tb->lkey[0],
+							    tb->FR[0], 1);
+
+					leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
+							-1, NULL);
+					leaf_move_items(LEAF_FROM_R_TO_L, tb,
+							B_NR_ITEMS(tb->R[0]),
+							-1, NULL);
+
+					reiserfs_invalidate_buffer(tb, tbS0);
+					reiserfs_invalidate_buffer(tb,
+								   tb->R[0]);
+
+					return 0;
+				}
+				/* all contents of all the 3 buffers will be in R[0] */
+				leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
+						NULL);
+				leaf_move_items(LEAF_FROM_L_TO_R, tb,
+						B_NR_ITEMS(tb->L[0]), -1, NULL);
+
+				/* right_delimiting_key is correct in R[0] */
+				replace_key(tb, tb->CFR[0], tb->rkey[0],
+					    tb->R[0], 0);
+
+				reiserfs_invalidate_buffer(tb, tbS0);
+				reiserfs_invalidate_buffer(tb, tb->L[0]);
+
+				return -1;
+			}
+
+			RFALSE(tb->rnum[0] != 0,
+			       "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
+			/* all contents of L[0] and S[0] will be in L[0] */
+			leaf_shift_left(tb, n, -1);
+
+			reiserfs_invalidate_buffer(tb, tbS0);
+
+			return 0;
+		}
+		/* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
+
+		RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
+		       (tb->lnum[0] + tb->rnum[0] > n + 1),
+		       "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
+		       tb->rnum[0], tb->lnum[0], n);
+		RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
+		       (tb->lbytes != -1 || tb->rbytes != -1),
+		       "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
+		       tb->rbytes, tb->lbytes);
+		RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
+		       (tb->lbytes < 1 || tb->rbytes != -1),
+		       "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
+		       tb->rbytes, tb->lbytes);
+
+		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
+		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+
+		reiserfs_invalidate_buffer(tb, tbS0);
+
+		return 0;
+	}
+
+	if (tb->rnum[0] == -1) {
+		/* all contents of R[0] and S[0] will be in R[0] */
+		leaf_shift_right(tb, n, -1);
+		reiserfs_invalidate_buffer(tb, tbS0);
+		return 0;
+	}
+
+	RFALSE(tb->rnum[0],
+	       "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
+	return 0;
+}
+
+static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item header of inserted item (this is on little endian) */
+			const char *body,	/* body  of inserted item or bytes to paste */
+			int flag,	/* i - insert, d - delete, c - cut, p - paste
+					   (see comment to do_balance) */
+			struct item_head *insert_key,	/* in our processing of one level we sometimes determine what
+							   must be inserted into the next higher level.  This insertion
+							   consists of a key or two keys and their corresponding
+							   pointers */
+			struct buffer_head **insert_ptr	/* inserted node-ptrs for the next level */
+    )
+{
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int item_pos = PATH_LAST_POSITION(tb->tb_path);	/*  index into the array of item headers in S[0]
+							   of the affected item */
+	struct buffer_info bi;
+	struct buffer_head *S_new[2];	/* new nodes allocated to hold what could not fit into S */
+	int snum[2];		/* number of items that will be placed
+				   into S_new (includes partially shifted
+				   items) */
+	int sbytes[2];		/* if an item is partially shifted into S_new then
+				   if it is a directory item
+				   it is the number of entries from the item that are shifted into S_new
+				   else
+				   it is the number of bytes from the item that are shifted into S_new
+				 */
+	int n, i;
+	int ret_val;
+	int pos_in_item;
+	int zeros_num;
+
+	PROC_INFO_INC(tb->tb_sb, balance_at[0]);
+
+	/* Make balance in case insert_size[0] < 0 */
+	if (tb->insert_size[0] < 0)
+		return balance_leaf_when_delete(tb, flag);
+
+	zeros_num = 0;
+	if (flag == M_INSERT && !body)
+		zeros_num = ih_item_len(ih);
+
+	pos_in_item = tb->tb_path->pos_in_item;
+	/* for indirect item pos_in_item is measured in unformatted node
+	   pointers. Recalculate to bytes */
+	if (flag != M_INSERT
+	    && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
+		pos_in_item *= UNFM_P_SIZE;
+
+	if (tb->lnum[0] > 0) {
+		/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
+		if (item_pos < tb->lnum[0]) {
+			/* new item or it part falls to L[0], shift it too */
+			n = B_NR_ITEMS(tb->L[0]);
+
+			switch (flag) {
+			case M_INSERT:	/* insert item into L[0] */
+
+				if (item_pos == tb->lnum[0] - 1
+				    && tb->lbytes != -1) {
+					/* part of new item falls into L[0] */
+					int new_item_len;
+					int version;
+
+					ret_val =
+					    leaf_shift_left(tb, tb->lnum[0] - 1,
+							    -1);
+
+					/* Calculate item length to insert to S[0] */
+					new_item_len =
+					    ih_item_len(ih) - tb->lbytes;
+					/* Calculate and check item length to insert to L[0] */
+					put_ih_item_len(ih,
+							ih_item_len(ih) -
+							new_item_len);
+
+					RFALSE(ih_item_len(ih) <= 0,
+					       "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
+					       ih_item_len(ih));
+
+					/* Insert new item into L[0] */
+					buffer_info_init_left(tb, &bi);
+					leaf_insert_into_buf(&bi,
+							     n + item_pos -
+							     ret_val, ih, body,
+							     zeros_num >
+							     ih_item_len(ih) ?
+							     ih_item_len(ih) :
+							     zeros_num);
+
+					version = ih_version(ih);
+
+					/* Calculate key component, item length and body to insert into S[0] */
+					set_le_ih_k_offset(ih,
+							   le_ih_k_offset(ih) +
+							   (tb->
+							    lbytes <<
+							    (is_indirect_le_ih
+							     (ih) ? tb->tb_sb->
+							     s_blocksize_bits -
+							     UNFM_P_SHIFT :
+							     0)));
+
+					put_ih_item_len(ih, new_item_len);
+					if (tb->lbytes > zeros_num) {
+						body +=
+						    (tb->lbytes - zeros_num);
+						zeros_num = 0;
+					} else
+						zeros_num -= tb->lbytes;
+
+					RFALSE(ih_item_len(ih) <= 0,
+					       "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
+					       ih_item_len(ih));
+				} else {
+					/* new item in whole falls into L[0] */
+					/* Shift lnum[0]-1 items to L[0] */
+					ret_val =
+					    leaf_shift_left(tb, tb->lnum[0] - 1,
+							    tb->lbytes);
+					/* Insert new item into L[0] */
+					buffer_info_init_left(tb, &bi);
+					leaf_insert_into_buf(&bi,
+							     n + item_pos -
+							     ret_val, ih, body,
+							     zeros_num);
+					tb->insert_size[0] = 0;
+					zeros_num = 0;
+				}
+				break;
+
+			case M_PASTE:	/* append item in L[0] */
+
+				if (item_pos == tb->lnum[0] - 1
+				    && tb->lbytes != -1) {
+					/* we must shift the part of the appended item */
+					if (is_direntry_le_ih
+					    (B_N_PITEM_HEAD(tbS0, item_pos))) {
+
+						RFALSE(zeros_num,
+						       "PAP-12090: invalid parameter in case of a directory");
+						/* directory item */
+						if (tb->lbytes > pos_in_item) {
+							/* new directory entry falls into L[0] */
+							struct item_head
+							    *pasted;
+							int l_pos_in_item =
+							    pos_in_item;
+
+							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
+							ret_val =
+							    leaf_shift_left(tb,
+									    tb->
+									    lnum
+									    [0],
+									    tb->
+									    lbytes
+									    -
+									    1);
+							if (ret_val
+							    && !item_pos) {
+								pasted =
+								    B_N_PITEM_HEAD
+								    (tb->L[0],
+								     B_NR_ITEMS
+								     (tb->
+								      L[0]) -
+								     1);
+								l_pos_in_item +=
+								    I_ENTRY_COUNT
+								    (pasted) -
+								    (tb->
+								     lbytes -
+								     1);
+							}
+
+							/* Append given directory entry to directory item */
+							buffer_info_init_left(tb, &bi);
+							leaf_paste_in_buffer
+							    (&bi,
+							     n + item_pos -
+							     ret_val,
+							     l_pos_in_item,
+							     tb->insert_size[0],
+							     body, zeros_num);
+
+							/* previous string prepared space for pasting new entry, following string pastes this entry */
+
+							/* when we have merge directory item, pos_in_item has been changed too */
+
+							/* paste new directory entry. 1 is entry number */
+							leaf_paste_entries(&bi,
+									   n +
+									   item_pos
+									   -
+									   ret_val,
+									   l_pos_in_item,
+									   1,
+									   (struct
+									    reiserfs_de_head
+									    *)
+									   body,
+									   body
+									   +
+									   DEH_SIZE,
+									   tb->
+									   insert_size
+									   [0]
+							    );
+							tb->insert_size[0] = 0;
+						} else {
+							/* new directory item doesn't fall into L[0] */
+							/* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
+							leaf_shift_left(tb,
+									tb->
+									lnum[0],
+									tb->
+									lbytes);
+						}
+						/* Calculate new position to append in item body */
+						pos_in_item -= tb->lbytes;
+					} else {
+						/* regular object */
+						RFALSE(tb->lbytes <= 0,
+						       "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
+						       tb->lbytes);
+						RFALSE(pos_in_item !=
+						       ih_item_len
+						       (B_N_PITEM_HEAD
+							(tbS0, item_pos)),
+						       "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
+						       ih_item_len
+						       (B_N_PITEM_HEAD
+							(tbS0, item_pos)),
+						       pos_in_item);
+
+						if (tb->lbytes >= pos_in_item) {
+							/* appended item will be in L[0] in whole */
+							int l_n;
+
+							/* this bytes number must be appended to the last item of L[h] */
+							l_n =
+							    tb->lbytes -
+							    pos_in_item;
+
+							/* Calculate new insert_size[0] */
+							tb->insert_size[0] -=
+							    l_n;
+
+							RFALSE(tb->
+							       insert_size[0] <=
+							       0,
+							       "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
+							       tb->
+							       insert_size[0]);
+							ret_val =
+							    leaf_shift_left(tb,
+									    tb->
+									    lnum
+									    [0],
+									    ih_item_len
+									    (B_N_PITEM_HEAD
+									     (tbS0,
+									      item_pos)));
+							/* Append to body of item in L[0] */
+							buffer_info_init_left(tb, &bi);
+							leaf_paste_in_buffer
+							    (&bi,
+							     n + item_pos -
+							     ret_val,
+							     ih_item_len
+							     (B_N_PITEM_HEAD
+							      (tb->L[0],
+							       n + item_pos -
+							       ret_val)), l_n,
+							     body,
+							     zeros_num >
+							     l_n ? l_n :
+							     zeros_num);
+							/* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
+							{
+								int version;
+								int temp_l =
+								    l_n;
+
+								RFALSE
+								    (ih_item_len
+								     (B_N_PITEM_HEAD
+								      (tbS0,
+								       0)),
+								     "PAP-12106: item length must be 0");
+								RFALSE
+								    (comp_short_le_keys
+								     (B_N_PKEY
+								      (tbS0, 0),
+								      B_N_PKEY
+								      (tb->L[0],
+								       n +
+								       item_pos
+								       -
+								       ret_val)),
+								     "PAP-12107: items must be of the same file");
+								if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
+									temp_l =
+									    l_n
+									    <<
+									    (tb->
+									     tb_sb->
+									     s_blocksize_bits
+									     -
+									     UNFM_P_SHIFT);
+								}
+								/* update key of first item in S0 */
+								version =
+								    ih_version
+								    (B_N_PITEM_HEAD
+								     (tbS0, 0));
+								set_le_key_k_offset
+								    (version,
+								     B_N_PKEY
+								     (tbS0, 0),
+								     le_key_k_offset
+								     (version,
+								      B_N_PKEY
+								      (tbS0,
+								       0)) +
+								     temp_l);
+								/* update left delimiting key */
+								set_le_key_k_offset
+								    (version,
+								     B_N_PDELIM_KEY
+								     (tb->
+								      CFL[0],
+								      tb->
+								      lkey[0]),
+								     le_key_k_offset
+								     (version,
+								      B_N_PDELIM_KEY
+								      (tb->
+								       CFL[0],
+								       tb->
+								       lkey[0]))
+								     + temp_l);
+							}
+
+							/* Calculate new body, position in item and insert_size[0] */
+							if (l_n > zeros_num) {
+								body +=
+								    (l_n -
+								     zeros_num);
+								zeros_num = 0;
+							} else
+								zeros_num -=
+								    l_n;
+							pos_in_item = 0;
+
+							RFALSE
+							    (comp_short_le_keys
+							     (B_N_PKEY(tbS0, 0),
+							      B_N_PKEY(tb->L[0],
+								       B_NR_ITEMS
+								       (tb->
+									L[0]) -
+								       1))
+							     ||
+							     !op_is_left_mergeable
+							     (B_N_PKEY(tbS0, 0),
+							      tbS0->b_size)
+							     ||
+							     !op_is_left_mergeable
+							     (B_N_PDELIM_KEY
+							      (tb->CFL[0],
+							       tb->lkey[0]),
+							      tbS0->b_size),
+							     "PAP-12120: item must be merge-able with left neighboring item");
+						} else {	/* only part of the appended item will be in L[0] */
+
+							/* Calculate position in item for append in S[0] */
+							pos_in_item -=
+							    tb->lbytes;
+
+							RFALSE(pos_in_item <= 0,
+							       "PAP-12125: no place for paste. pos_in_item=%d",
+							       pos_in_item);
+
+							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
+							leaf_shift_left(tb,
+									tb->
+									lnum[0],
+									tb->
+									lbytes);
+						}
+					}
+				} else {	/* appended item will be in L[0] in whole */
+
+					struct item_head *pasted;
+
+					if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) {	/* if we paste into first item of S[0] and it is left mergable */
+						/* then increment pos_in_item by the size of the last item in L[0] */
+						pasted =
+						    B_N_PITEM_HEAD(tb->L[0],
+								   n - 1);
+						if (is_direntry_le_ih(pasted))
+							pos_in_item +=
+							    ih_entry_count
+							    (pasted);
+						else
+							pos_in_item +=
+							    ih_item_len(pasted);
+					}
+
+					/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
+					ret_val =
+					    leaf_shift_left(tb, tb->lnum[0],
+							    tb->lbytes);
+					/* Append to body of item in L[0] */
+					buffer_info_init_left(tb, &bi);
+					leaf_paste_in_buffer(&bi,
+							     n + item_pos -
+							     ret_val,
+							     pos_in_item,
+							     tb->insert_size[0],
+							     body, zeros_num);
+
+					/* if appended item is directory, paste entry */
+					pasted =
+					    B_N_PITEM_HEAD(tb->L[0],
+							   n + item_pos -
+							   ret_val);
+					if (is_direntry_le_ih(pasted))
+						leaf_paste_entries(&bi,
+								   n +
+								   item_pos -
+								   ret_val,
+								   pos_in_item,
+								   1,
+								   (struct
+								    reiserfs_de_head
+								    *)body,
+								   body +
+								   DEH_SIZE,
+								   tb->
+								   insert_size
+								   [0]
+						    );
+					/* if appended item is indirect item, put unformatted node into un list */
+					if (is_indirect_le_ih(pasted))
+						set_ih_free_space(pasted, 0);
+					tb->insert_size[0] = 0;
+					zeros_num = 0;
+				}
+				break;
+			default:	/* cases d and t */
+				reiserfs_panic(tb->tb_sb, "PAP-12130",
+					       "lnum > 0: unexpected mode: "
+					       " %s(%d)",
+					       (flag ==
+						M_DELETE) ? "DELETE" : ((flag ==
+									 M_CUT)
+									? "CUT"
+									:
+									"UNKNOWN"),
+					       flag);
+			}
+		} else {
+			/* new item doesn't fall into L[0] */
+			leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
+		}
+	}
+
+	/* tb->lnum[0] > 0 */
+	/* Calculate new item position */
+	item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
+
+	if (tb->rnum[0] > 0) {
+		/* shift rnum[0] items from S[0] to the right neighbor R[0] */
+		n = B_NR_ITEMS(tbS0);
+		switch (flag) {
+
+		case M_INSERT:	/* insert item */
+			if (n - tb->rnum[0] < item_pos) {	/* new item or its part falls to R[0] */
+				if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {	/* part of new item falls into R[0] */
+					loff_t old_key_comp, old_len,
+					    r_zeros_number;
+					const char *r_body;
+					int version;
+					loff_t offset;
+
+					leaf_shift_right(tb, tb->rnum[0] - 1,
+							 -1);
+
+					version = ih_version(ih);
+					/* Remember key component and item length */
+					old_key_comp = le_ih_k_offset(ih);
+					old_len = ih_item_len(ih);
+
+					/* Calculate key component and item length to insert into R[0] */
+					offset =
+					    le_ih_k_offset(ih) +
+					    ((old_len -
+					      tb->
+					      rbytes) << (is_indirect_le_ih(ih)
+							  ? tb->tb_sb->
+							  s_blocksize_bits -
+							  UNFM_P_SHIFT : 0));
+					set_le_ih_k_offset(ih, offset);
+					put_ih_item_len(ih, tb->rbytes);
+					/* Insert part of the item into R[0] */
+					buffer_info_init_right(tb, &bi);
+					if ((old_len - tb->rbytes) > zeros_num) {
+						r_zeros_number = 0;
+						r_body =
+						    body + (old_len -
+							    tb->rbytes) -
+						    zeros_num;
+					} else {
+						r_body = body;
+						r_zeros_number =
+						    zeros_num - (old_len -
+								 tb->rbytes);
+						zeros_num -= r_zeros_number;
+					}
+
+					leaf_insert_into_buf(&bi, 0, ih, r_body,
+							     r_zeros_number);
+
+					/* Replace right delimiting key by first key in R[0] */
+					replace_key(tb, tb->CFR[0], tb->rkey[0],
+						    tb->R[0], 0);
+
+					/* Calculate key component and item length to insert into S[0] */
+					set_le_ih_k_offset(ih, old_key_comp);
+					put_ih_item_len(ih,
+							old_len - tb->rbytes);
+
+					tb->insert_size[0] -= tb->rbytes;
+
+				} else {	/* whole new item falls into R[0] */
+
+					/* Shift rnum[0]-1 items to R[0] */
+					ret_val =
+					    leaf_shift_right(tb,
+							     tb->rnum[0] - 1,
+							     tb->rbytes);
+					/* Insert new item into R[0] */
+					buffer_info_init_right(tb, &bi);
+					leaf_insert_into_buf(&bi,
+							     item_pos - n +
+							     tb->rnum[0] - 1,
+							     ih, body,
+							     zeros_num);
+
+					if (item_pos - n + tb->rnum[0] - 1 == 0) {
+						replace_key(tb, tb->CFR[0],
+							    tb->rkey[0],
+							    tb->R[0], 0);
+
+					}
+					zeros_num = tb->insert_size[0] = 0;
+				}
+			} else {	/* new item or part of it doesn't fall into R[0] */
+
+				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+			}
+			break;
+
+		case M_PASTE:	/* append item */
+
+			if (n - tb->rnum[0] <= item_pos) {	/* pasted item or part of it falls to R[0] */
+				if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) {	/* we must shift the part of the appended item */
+					if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {	/* we append to directory item */
+						int entry_count;
+
+						RFALSE(zeros_num,
+						       "PAP-12145: invalid parameter in case of a directory");
+						entry_count =
+						    I_ENTRY_COUNT(B_N_PITEM_HEAD
+								  (tbS0,
+								   item_pos));
+						if (entry_count - tb->rbytes <
+						    pos_in_item)
+							/* new directory entry falls into R[0] */
+						{
+							int paste_entry_position;
+
+							RFALSE(tb->rbytes - 1 >=
+							       entry_count
+							       || !tb->
+							       insert_size[0],
+							       "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
+							       tb->rbytes,
+							       entry_count);
+							/* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
+							leaf_shift_right(tb,
+									 tb->
+									 rnum
+									 [0],
+									 tb->
+									 rbytes
+									 - 1);
+							/* Paste given directory entry to directory item */
+							paste_entry_position =
+							    pos_in_item -
+							    entry_count +
+							    tb->rbytes - 1;
+							buffer_info_init_right(tb, &bi);
+							leaf_paste_in_buffer
+							    (&bi, 0,
+							     paste_entry_position,
+							     tb->insert_size[0],
+							     body, zeros_num);
+							/* paste entry */
+							leaf_paste_entries(&bi,
+									   0,
+									   paste_entry_position,
+									   1,
+									   (struct
+									    reiserfs_de_head
+									    *)
+									   body,
+									   body
+									   +
+									   DEH_SIZE,
+									   tb->
+									   insert_size
+									   [0]
+							    );
+
+							if (paste_entry_position
+							    == 0) {
+								/* change delimiting keys */
+								replace_key(tb,
+									    tb->
+									    CFR
+									    [0],
+									    tb->
+									    rkey
+									    [0],
+									    tb->
+									    R
+									    [0],
+									    0);
+							}
+
+							tb->insert_size[0] = 0;
+							pos_in_item++;
+						} else {	/* new directory entry doesn't fall into R[0] */
+
+							leaf_shift_right(tb,
+									 tb->
+									 rnum
+									 [0],
+									 tb->
+									 rbytes);
+						}
+					} else {	/* regular object */
+
+						int n_shift, n_rem,
+						    r_zeros_number;
+						const char *r_body;
+
+						/* Calculate number of bytes which must be shifted from appended item */
+						if ((n_shift =
+						     tb->rbytes -
+						     tb->insert_size[0]) < 0)
+							n_shift = 0;
+
+						RFALSE(pos_in_item !=
+						       ih_item_len
+						       (B_N_PITEM_HEAD
+							(tbS0, item_pos)),
+						       "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
+						       pos_in_item,
+						       ih_item_len
+						       (B_N_PITEM_HEAD
+							(tbS0, item_pos)));
+
+						leaf_shift_right(tb,
+								 tb->rnum[0],
+								 n_shift);
+						/* Calculate number of bytes which must remain in body after appending to R[0] */
+						if ((n_rem =
+						     tb->insert_size[0] -
+						     tb->rbytes) < 0)
+							n_rem = 0;
+
+						{
+							int version;
+							unsigned long temp_rem =
+							    n_rem;
+
+							version =
+							    ih_version
+							    (B_N_PITEM_HEAD
+							     (tb->R[0], 0));
+							if (is_indirect_le_key
+							    (version,
+							     B_N_PKEY(tb->R[0],
+								      0))) {
+								temp_rem =
+								    n_rem <<
+								    (tb->tb_sb->
+								     s_blocksize_bits
+								     -
+								     UNFM_P_SHIFT);
+							}
+							set_le_key_k_offset
+							    (version,
+							     B_N_PKEY(tb->R[0],
+								      0),
+							     le_key_k_offset
+							     (version,
+							      B_N_PKEY(tb->R[0],
+								       0)) +
+							     temp_rem);
+							set_le_key_k_offset
+							    (version,
+							     B_N_PDELIM_KEY(tb->
+									    CFR
+									    [0],
+									    tb->
+									    rkey
+									    [0]),
+							     le_key_k_offset
+							     (version,
+							      B_N_PDELIM_KEY
+							      (tb->CFR[0],
+							       tb->rkey[0])) +
+							     temp_rem);
+						}
+/*		  k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
+		  k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
+						do_balance_mark_internal_dirty
+						    (tb, tb->CFR[0], 0);
+
+						/* Append part of body into R[0] */
+						buffer_info_init_right(tb, &bi);
+						if (n_rem > zeros_num) {
+							r_zeros_number = 0;
+							r_body =
+							    body + n_rem -
+							    zeros_num;
+						} else {
+							r_body = body;
+							r_zeros_number =
+							    zeros_num - n_rem;
+							zeros_num -=
+							    r_zeros_number;
+						}
+
+						leaf_paste_in_buffer(&bi, 0,
+								     n_shift,
+								     tb->
+								     insert_size
+								     [0] -
+								     n_rem,
+								     r_body,
+								     r_zeros_number);
+
+						if (is_indirect_le_ih
+						    (B_N_PITEM_HEAD
+						     (tb->R[0], 0))) {
+#if 0
+							RFALSE(n_rem,
+							       "PAP-12160: paste more than one unformatted node pointer");
+#endif
+							set_ih_free_space
+							    (B_N_PITEM_HEAD
+							     (tb->R[0], 0), 0);
+						}
+						tb->insert_size[0] = n_rem;
+						if (!n_rem)
+							pos_in_item++;
+					}
+				} else {	/* pasted item in whole falls into R[0] */
+
+					struct item_head *pasted;
+
+					ret_val =
+					    leaf_shift_right(tb, tb->rnum[0],
+							     tb->rbytes);
+					/* append item in R[0] */
+					if (pos_in_item >= 0) {
+						buffer_info_init_right(tb, &bi);
+						leaf_paste_in_buffer(&bi,
+								     item_pos -
+								     n +
+								     tb->
+								     rnum[0],
+								     pos_in_item,
+								     tb->
+								     insert_size
+								     [0], body,
+								     zeros_num);
+					}
+
+					/* paste new entry, if item is directory item */
+					pasted =
+					    B_N_PITEM_HEAD(tb->R[0],
+							   item_pos - n +
+							   tb->rnum[0]);
+					if (is_direntry_le_ih(pasted)
+					    && pos_in_item >= 0) {
+						leaf_paste_entries(&bi,
+								   item_pos -
+								   n +
+								   tb->rnum[0],
+								   pos_in_item,
+								   1,
+								   (struct
+								    reiserfs_de_head
+								    *)body,
+								   body +
+								   DEH_SIZE,
+								   tb->
+								   insert_size
+								   [0]
+						    );
+						if (!pos_in_item) {
+
+							RFALSE(item_pos - n +
+							       tb->rnum[0],
+							       "PAP-12165: directory item must be first item of node when pasting is in 0th position");
+
+							/* update delimiting keys */
+							replace_key(tb,
+								    tb->CFR[0],
+								    tb->rkey[0],
+								    tb->R[0],
+								    0);
+						}
+					}
+
+					if (is_indirect_le_ih(pasted))
+						set_ih_free_space(pasted, 0);
+					zeros_num = tb->insert_size[0] = 0;
+				}
+			} else {	/* new item doesn't fall into R[0] */
+
+				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+			}
+			break;
+		default:	/* cases d and t */
+			reiserfs_panic(tb->tb_sb, "PAP-12175",
+				       "rnum > 0: unexpected mode: %s(%d)",
+				       (flag ==
+					M_DELETE) ? "DELETE" : ((flag ==
+								 M_CUT) ? "CUT"
+								: "UNKNOWN"),
+				       flag);
+		}
+
+	}
+
+	/* tb->rnum[0] > 0 */
+	RFALSE(tb->blknum[0] > 3,
+	       "PAP-12180: blknum can not be %d. It must be <= 3",
+	       tb->blknum[0]);
+	RFALSE(tb->blknum[0] < 0,
+	       "PAP-12185: blknum can not be %d. It must be >= 0",
+	       tb->blknum[0]);
+
+	/* if while adding to a node we discover that it is possible to split
+	   it in two, and merge the left part into the left neighbor and the
+	   right part into the right neighbor, eliminating the node */
+	if (tb->blknum[0] == 0) {	/* node S[0] is empty now */
+
+		RFALSE(!tb->lnum[0] || !tb->rnum[0],
+		       "PAP-12190: lnum and rnum must not be zero");
+		/* if insertion was done before 0-th position in R[0], right
+		   delimiting key of the tb->L[0]'s and left delimiting key are
+		   not set correctly */
+		if (tb->CFL[0]) {
+			if (!tb->CFR[0])
+				reiserfs_panic(tb->tb_sb, "vs-12195",
+					       "CFR not initialized");
+			copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
+				 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
+			do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
+		}
+
+		reiserfs_invalidate_buffer(tb, tbS0);
+		return 0;
+	}
+
+	/* Fill new nodes that appear in place of S[0] */
+
+	/* I am told that this copying is because we need an array to enable
+	   the looping code. -Hans */
+	snum[0] = tb->s1num, snum[1] = tb->s2num;
+	sbytes[0] = tb->s1bytes;
+	sbytes[1] = tb->s2bytes;
+	for (i = tb->blknum[0] - 2; i >= 0; i--) {
+
+		RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
+		       snum[i]);
+
+		/* here we shift from S to S_new nodes */
+
+		S_new[i] = get_FEB(tb);
+
+		/* initialized block type and tree level */
+		set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
+
+		n = B_NR_ITEMS(tbS0);
+
+		switch (flag) {
+		case M_INSERT:	/* insert item */
+
+			if (n - snum[i] < item_pos) {	/* new item or it's part falls to first new node S_new[i] */
+				if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) {	/* part of new item falls into S_new[i] */
+					int old_key_comp, old_len,
+					    r_zeros_number;
+					const char *r_body;
+					int version;
+
+					/* Move snum[i]-1 items from S[0] to S_new[i] */
+					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
+							snum[i] - 1, -1,
+							S_new[i]);
+					/* Remember key component and item length */
+					version = ih_version(ih);
+					old_key_comp = le_ih_k_offset(ih);
+					old_len = ih_item_len(ih);
+
+					/* Calculate key component and item length to insert into S_new[i] */
+					set_le_ih_k_offset(ih,
+							   le_ih_k_offset(ih) +
+							   ((old_len -
+							     sbytes[i]) <<
+							    (is_indirect_le_ih
+							     (ih) ? tb->tb_sb->
+							     s_blocksize_bits -
+							     UNFM_P_SHIFT :
+							     0)));
+
+					put_ih_item_len(ih, sbytes[i]);
+
+					/* Insert part of the item into S_new[i] before 0-th item */
+					buffer_info_init_bh(tb, &bi, S_new[i]);
+
+					if ((old_len - sbytes[i]) > zeros_num) {
+						r_zeros_number = 0;
+						r_body =
+						    body + (old_len -
+							    sbytes[i]) -
+						    zeros_num;
+					} else {
+						r_body = body;
+						r_zeros_number =
+						    zeros_num - (old_len -
+								 sbytes[i]);
+						zeros_num -= r_zeros_number;
+					}
+
+					leaf_insert_into_buf(&bi, 0, ih, r_body,
+							     r_zeros_number);
+
+					/* Calculate key component and item length to insert into S[i] */
+					set_le_ih_k_offset(ih, old_key_comp);
+					put_ih_item_len(ih,
+							old_len - sbytes[i]);
+					tb->insert_size[0] -= sbytes[i];
+				} else {	/* whole new item falls into S_new[i] */
+
+					/* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
+					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
+							snum[i] - 1, sbytes[i],
+							S_new[i]);
+
+					/* Insert new item into S_new[i] */
+					buffer_info_init_bh(tb, &bi, S_new[i]);
+					leaf_insert_into_buf(&bi,
+							     item_pos - n +
+							     snum[i] - 1, ih,
+							     body, zeros_num);
+
+					zeros_num = tb->insert_size[0] = 0;
+				}
+			}
+
+			else {	/* new item or it part don't falls into S_new[i] */
+
+				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
+						snum[i], sbytes[i], S_new[i]);
+			}
+			break;
+
+		case M_PASTE:	/* append item */
+
+			if (n - snum[i] <= item_pos) {	/* pasted item or part if it falls to S_new[i] */
+				if (item_pos == n - snum[i] && sbytes[i] != -1) {	/* we must shift part of the appended item */
+					struct item_head *aux_ih;
+
+					RFALSE(ih, "PAP-12210: ih must be 0");
+
+					aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
+					if (is_direntry_le_ih(aux_ih)) {
+						/* we append to directory item */
+
+						int entry_count;
+
+						entry_count =
+						    ih_entry_count(aux_ih);
+
+						if (entry_count - sbytes[i] <
+						    pos_in_item
+						    && pos_in_item <=
+						    entry_count) {
+							/* new directory entry falls into S_new[i] */
+
+							RFALSE(!tb->
+							       insert_size[0],
+							       "PAP-12215: insert_size is already 0");
+							RFALSE(sbytes[i] - 1 >=
+							       entry_count,
+							       "PAP-12220: there are no so much entries (%d), only %d",
+							       sbytes[i] - 1,
+							       entry_count);
+
+							/* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
+							leaf_move_items
+							    (LEAF_FROM_S_TO_SNEW,
+							     tb, snum[i],
+							     sbytes[i] - 1,
+							     S_new[i]);
+							/* Paste given directory entry to directory item */
+							buffer_info_init_bh(tb, &bi, S_new[i]);
+							leaf_paste_in_buffer
+							    (&bi, 0,
+							     pos_in_item -
+							     entry_count +
+							     sbytes[i] - 1,
+							     tb->insert_size[0],
+							     body, zeros_num);
+							/* paste new directory entry */
+							leaf_paste_entries(&bi,
+									   0,
+									   pos_in_item
+									   -
+									   entry_count
+									   +
+									   sbytes
+									   [i] -
+									   1, 1,
+									   (struct
+									    reiserfs_de_head
+									    *)
+									   body,
+									   body
+									   +
+									   DEH_SIZE,
+									   tb->
+									   insert_size
+									   [0]
+							    );
+							tb->insert_size[0] = 0;
+							pos_in_item++;
+						} else {	/* new directory entry doesn't fall into S_new[i] */
+							leaf_move_items
+							    (LEAF_FROM_S_TO_SNEW,
+							     tb, snum[i],
+							     sbytes[i],
+							     S_new[i]);
+						}
+					} else {	/* regular object */
+
+						int n_shift, n_rem,
+						    r_zeros_number;
+						const char *r_body;
+
+						RFALSE(pos_in_item !=
+						       ih_item_len
+						       (B_N_PITEM_HEAD
+							(tbS0, item_pos))
+						       || tb->insert_size[0] <=
+						       0,
+						       "PAP-12225: item too short or insert_size <= 0");
+
+						/* Calculate number of bytes which must be shifted from appended item */
+						n_shift =
+						    sbytes[i] -
+						    tb->insert_size[0];
+						if (n_shift < 0)
+							n_shift = 0;
+						leaf_move_items
+						    (LEAF_FROM_S_TO_SNEW, tb,
+						     snum[i], n_shift,
+						     S_new[i]);
+
+						/* Calculate number of bytes which must remain in body after append to S_new[i] */
+						n_rem =
+						    tb->insert_size[0] -
+						    sbytes[i];
+						if (n_rem < 0)
+							n_rem = 0;
+						/* Append part of body into S_new[0] */
+						buffer_info_init_bh(tb, &bi, S_new[i]);
+						if (n_rem > zeros_num) {
+							r_zeros_number = 0;
+							r_body =
+							    body + n_rem -
+							    zeros_num;
+						} else {
+							r_body = body;
+							r_zeros_number =
+							    zeros_num - n_rem;
+							zeros_num -=
+							    r_zeros_number;
+						}
+
+						leaf_paste_in_buffer(&bi, 0,
+								     n_shift,
+								     tb->
+								     insert_size
+								     [0] -
+								     n_rem,
+								     r_body,
+								     r_zeros_number);
+						{
+							struct item_head *tmp;
+
+							tmp =
+							    B_N_PITEM_HEAD(S_new
+									   [i],
+									   0);
+							if (is_indirect_le_ih
+							    (tmp)) {
+								set_ih_free_space
+								    (tmp, 0);
+								set_le_ih_k_offset
+								    (tmp,
+								     le_ih_k_offset
+								     (tmp) +
+								     (n_rem <<
+								      (tb->
+								       tb_sb->
+								       s_blocksize_bits
+								       -
+								       UNFM_P_SHIFT)));
+							} else {
+								set_le_ih_k_offset
+								    (tmp,
+								     le_ih_k_offset
+								     (tmp) +
+								     n_rem);
+							}
+						}
+
+						tb->insert_size[0] = n_rem;
+						if (!n_rem)
+							pos_in_item++;
+					}
+				} else
+					/* item falls wholly into S_new[i] */
+				{
+					int leaf_mi;
+					struct item_head *pasted;
+
+#ifdef CONFIG_REISERFS_CHECK
+					struct item_head *ih_check =
+					    B_N_PITEM_HEAD(tbS0, item_pos);
+
+					if (!is_direntry_le_ih(ih_check)
+					    && (pos_in_item != ih_item_len(ih_check)
+						|| tb->insert_size[0] <= 0))
+						reiserfs_panic(tb->tb_sb,
+							     "PAP-12235",
+							     "pos_in_item "
+							     "must be equal "
+							     "to ih_item_len");
+#endif				/* CONFIG_REISERFS_CHECK */
+
+					leaf_mi =
+					    leaf_move_items(LEAF_FROM_S_TO_SNEW,
+							    tb, snum[i],
+							    sbytes[i],
+							    S_new[i]);
+
+					RFALSE(leaf_mi,
+					       "PAP-12240: unexpected value returned by leaf_move_items (%d)",
+					       leaf_mi);
+
+					/* paste into item */
+					buffer_info_init_bh(tb, &bi, S_new[i]);
+					leaf_paste_in_buffer(&bi,
+							     item_pos - n +
+							     snum[i],
+							     pos_in_item,
+							     tb->insert_size[0],
+							     body, zeros_num);
+
+					pasted =
+					    B_N_PITEM_HEAD(S_new[i],
+							   item_pos - n +
+							   snum[i]);
+					if (is_direntry_le_ih(pasted)) {
+						leaf_paste_entries(&bi,
+								   item_pos -
+								   n + snum[i],
+								   pos_in_item,
+								   1,
+								   (struct
+								    reiserfs_de_head
+								    *)body,
+								   body +
+								   DEH_SIZE,
+								   tb->
+								   insert_size
+								   [0]
+						    );
+					}
+
+					/* if we paste to indirect item update ih_free_space */
+					if (is_indirect_le_ih(pasted))
+						set_ih_free_space(pasted, 0);
+					zeros_num = tb->insert_size[0] = 0;
+				}
+			}
+
+			else {	/* pasted item doesn't fall into S_new[i] */
+
+				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
+						snum[i], sbytes[i], S_new[i]);
+			}
+			break;
+		default:	/* cases d and t */
+			reiserfs_panic(tb->tb_sb, "PAP-12245",
+				       "blknum > 2: unexpected mode: %s(%d)",
+				       (flag ==
+					M_DELETE) ? "DELETE" : ((flag ==
+								 M_CUT) ? "CUT"
+								: "UNKNOWN"),
+				       flag);
+		}
+
+		memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
+		insert_ptr[i] = S_new[i];
+
+		RFALSE(!buffer_journaled(S_new[i])
+		       || buffer_journal_dirty(S_new[i])
+		       || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
+		       i, S_new[i]);
+	}
+
+	/* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
+	   affected item which remains in S */
+	if (0 <= item_pos && item_pos < tb->s0num) {	/* if we must insert or append into buffer S[0] */
+
+		switch (flag) {
+		case M_INSERT:	/* insert item into S[0] */
+			buffer_info_init_tbS0(tb, &bi);
+			leaf_insert_into_buf(&bi, item_pos, ih, body,
+					     zeros_num);
+
+			/* If we insert the first key change the delimiting key */
+			if (item_pos == 0) {
+				if (tb->CFL[0])	/* can be 0 in reiserfsck */
+					replace_key(tb, tb->CFL[0], tb->lkey[0],
+						    tbS0, 0);
+
+			}
+			break;
+
+		case M_PASTE:{	/* append item in S[0] */
+				struct item_head *pasted;
+
+				pasted = B_N_PITEM_HEAD(tbS0, item_pos);
+				/* when directory, may be new entry already pasted */
+				if (is_direntry_le_ih(pasted)) {
+					if (pos_in_item >= 0 &&
+					    pos_in_item <=
+					    ih_entry_count(pasted)) {
+
+						RFALSE(!tb->insert_size[0],
+						       "PAP-12260: insert_size is 0 already");
+
+						/* prepare space */
+						buffer_info_init_tbS0(tb, &bi);
+						leaf_paste_in_buffer(&bi,
+								     item_pos,
+								     pos_in_item,
+								     tb->
+								     insert_size
+								     [0], body,
+								     zeros_num);
+
+						/* paste entry */
+						leaf_paste_entries(&bi,
+								   item_pos,
+								   pos_in_item,
+								   1,
+								   (struct
+								    reiserfs_de_head
+								    *)body,
+								   body +
+								   DEH_SIZE,
+								   tb->
+								   insert_size
+								   [0]
+						    );
+						if (!item_pos && !pos_in_item) {
+							RFALSE(!tb->CFL[0]
+							       || !tb->L[0],
+							       "PAP-12270: CFL[0]/L[0] must be specified");
+							if (tb->CFL[0]) {
+								replace_key(tb,
+									    tb->
+									    CFL
+									    [0],
+									    tb->
+									    lkey
+									    [0],
+									    tbS0,
+									    0);
+
+							}
+						}
+						tb->insert_size[0] = 0;
+					}
+				} else {	/* regular object */
+					if (pos_in_item == ih_item_len(pasted)) {
+
+						RFALSE(tb->insert_size[0] <= 0,
+						       "PAP-12275: insert size must not be %d",
+						       tb->insert_size[0]);
+						buffer_info_init_tbS0(tb, &bi);
+						leaf_paste_in_buffer(&bi,
+								     item_pos,
+								     pos_in_item,
+								     tb->
+								     insert_size
+								     [0], body,
+								     zeros_num);
+
+						if (is_indirect_le_ih(pasted)) {
+#if 0
+							RFALSE(tb->
+							       insert_size[0] !=
+							       UNFM_P_SIZE,
+							       "PAP-12280: insert_size for indirect item must be %d, not %d",
+							       UNFM_P_SIZE,
+							       tb->
+							       insert_size[0]);
+#endif
+							set_ih_free_space
+							    (pasted, 0);
+						}
+						tb->insert_size[0] = 0;
+					}
+#ifdef CONFIG_REISERFS_CHECK
+					else {
+						if (tb->insert_size[0]) {
+							print_cur_tb("12285");
+							reiserfs_panic(tb->
+								       tb_sb,
+							    "PAP-12285",
+							    "insert_size "
+							    "must be 0 "
+							    "(%d)",
+							    tb->insert_size[0]);
+						}
+					}
+#endif				/* CONFIG_REISERFS_CHECK */
+
+				}
+			}	/* case M_PASTE: */
+		}
+	}
+#ifdef CONFIG_REISERFS_CHECK
+	if (flag == M_PASTE && tb->insert_size[0]) {
+		print_cur_tb("12290");
+		reiserfs_panic(tb->tb_sb,
+			       "PAP-12290", "insert_size is still not 0 (%d)",
+			       tb->insert_size[0]);
+	}
+#endif				/* CONFIG_REISERFS_CHECK */
+	return 0;
+}				/* Leaf level of the tree is balanced (end of balance_leaf) */
+
+/* Make empty node */
+void make_empty_node(struct buffer_info *bi)
+{
+	struct block_head *blkh;
+
+	RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
+
+	blkh = B_BLK_HEAD(bi->bi_bh);
+	set_blkh_nr_item(blkh, 0);
+	set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
+
+	if (bi->bi_parent)
+		B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0;	/* Endian safe if 0 */
+}
+
+/* Get first empty buffer */
+struct buffer_head *get_FEB(struct tree_balance *tb)
+{
+	int i;
+	struct buffer_info bi;
+
+	for (i = 0; i < MAX_FEB_SIZE; i++)
+		if (tb->FEB[i] != NULL)
+			break;
+
+	if (i == MAX_FEB_SIZE)
+		reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
+
+	buffer_info_init_bh(tb, &bi, tb->FEB[i]);
+	make_empty_node(&bi);
+	set_buffer_uptodate(tb->FEB[i]);
+	tb->used[i] = tb->FEB[i];
+	tb->FEB[i] = NULL;
+
+	return tb->used[i];
+}
+
+/* This is now used because reiserfs_free_block has to be able to
+** schedule.
+*/
+static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
+{
+	int i;
+
+	if (buffer_dirty(bh))
+		reiserfs_warning(tb->tb_sb, "reiserfs-12320",
+				 "called with dirty buffer");
+	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
+		if (!tb->thrown[i]) {
+			tb->thrown[i] = bh;
+			get_bh(bh);	/* free_thrown puts this */
+			return;
+		}
+	reiserfs_warning(tb->tb_sb, "reiserfs-12321",
+			 "too many thrown buffers");
+}
+
+static void free_thrown(struct tree_balance *tb)
+{
+	int i;
+	b_blocknr_t blocknr;
+	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
+		if (tb->thrown[i]) {
+			blocknr = tb->thrown[i]->b_blocknr;
+			if (buffer_dirty(tb->thrown[i]))
+				reiserfs_warning(tb->tb_sb, "reiserfs-12322",
+						 "called with dirty buffer %d",
+						 blocknr);
+			brelse(tb->thrown[i]);	/* incremented in store_thrown */
+			reiserfs_free_block(tb->transaction_handle, NULL,
+					    blocknr, 0);
+		}
+	}
+}
+
+void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
+{
+	struct block_head *blkh;
+	blkh = B_BLK_HEAD(bh);
+	set_blkh_level(blkh, FREE_LEVEL);
+	set_blkh_nr_item(blkh, 0);
+
+	clear_buffer_dirty(bh);
+	store_thrown(tb, bh);
+}
+
+/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
+void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
+		 struct buffer_head *src, int n_src)
+{
+
+	RFALSE(dest == NULL || src == NULL,
+	       "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
+	       src, dest);
+	RFALSE(!B_IS_KEYS_LEVEL(dest),
+	       "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
+	       dest);
+	RFALSE(n_dest < 0 || n_src < 0,
+	       "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
+	RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
+	       "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
+	       n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
+
+	if (B_IS_ITEMS_LEVEL(src))
+		/* source buffer contains leaf node */
+		memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
+		       KEY_SIZE);
+	else
+		memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
+		       KEY_SIZE);
+
+	do_balance_mark_internal_dirty(tb, dest, 0);
+}
+
+int get_left_neighbor_position(struct tree_balance *tb, int h)
+{
+	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
+
+	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
+	       "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
+	       h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
+
+	if (Sh_position == 0)
+		return B_NR_ITEMS(tb->FL[h]);
+	else
+		return Sh_position - 1;
+}
+
+int get_right_neighbor_position(struct tree_balance *tb, int h)
+{
+	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
+
+	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
+	       "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
+	       h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
+
+	if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
+		return 0;
+	else
+		return Sh_position + 1;
+}
+
+#ifdef CONFIG_REISERFS_CHECK
+
+int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
+static void check_internal_node(struct super_block *s, struct buffer_head *bh,
+				char *mes)
+{
+	struct disk_child *dc;
+	int i;
+
+	RFALSE(!bh, "PAP-12336: bh == 0");
+
+	if (!bh || !B_IS_IN_TREE(bh))
+		return;
+
+	RFALSE(!buffer_dirty(bh) &&
+	       !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
+	       "PAP-12337: buffer (%b) must be dirty", bh);
+	dc = B_N_CHILD(bh, 0);
+
+	for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
+		if (!is_reusable(s, dc_block_number(dc), 1)) {
+			print_cur_tb(mes);
+			reiserfs_panic(s, "PAP-12338",
+				       "invalid child pointer %y in %b",
+				       dc, bh);
+		}
+	}
+}
+
+static int locked_or_not_in_tree(struct tree_balance *tb,
+				  struct buffer_head *bh, char *which)
+{
+	if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
+	    !B_IS_IN_TREE(bh)) {
+		reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
+		return 1;
+	}
+	return 0;
+}
+
+static int check_before_balancing(struct tree_balance *tb)
+{
+	int retval = 0;
+
+	if (REISERFS_SB(tb->tb_sb)->cur_tb) {
+		reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
+			       "occurred based on cur_tb not being null at "
+			       "this point in code. do_balance cannot properly "
+			       "handle concurrent tree accesses on a same "
+			       "mount point.");
+	}
+
+	/* double check that buffers that we will modify are unlocked. (fix_nodes should already have
+	   prepped all of these for us). */
+	if (tb->lnum[0]) {
+		retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
+		retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
+		retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
+		check_leaf(tb->L[0]);
+	}
+	if (tb->rnum[0]) {
+		retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
+		retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
+		retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
+		check_leaf(tb->R[0]);
+	}
+	retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
+					"S[0]");
+	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
+
+	return retval;
+}
+
+static void check_after_balance_leaf(struct tree_balance *tb)
+{
+	if (tb->lnum[0]) {
+		if (B_FREE_SPACE(tb->L[0]) !=
+		    MAX_CHILD_SIZE(tb->L[0]) -
+		    dc_size(B_N_CHILD
+			    (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
+			print_cur_tb("12221");
+			reiserfs_panic(tb->tb_sb, "PAP-12355",
+				       "shift to left was incorrect");
+		}
+	}
+	if (tb->rnum[0]) {
+		if (B_FREE_SPACE(tb->R[0]) !=
+		    MAX_CHILD_SIZE(tb->R[0]) -
+		    dc_size(B_N_CHILD
+			    (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
+			print_cur_tb("12222");
+			reiserfs_panic(tb->tb_sb, "PAP-12360",
+				       "shift to right was incorrect");
+		}
+	}
+	if (PATH_H_PBUFFER(tb->tb_path, 1) &&
+	    (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
+	     (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
+	      dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
+				PATH_H_POSITION(tb->tb_path, 1)))))) {
+		int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
+		int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
+			     dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
+					       PATH_H_POSITION(tb->tb_path,
+							       1))));
+		print_cur_tb("12223");
+		reiserfs_warning(tb->tb_sb, "reiserfs-12363",
+				 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
+				 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
+				 left,
+				 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
+				 PATH_H_PBUFFER(tb->tb_path, 1),
+				 PATH_H_POSITION(tb->tb_path, 1),
+				 dc_size(B_N_CHILD
+					 (PATH_H_PBUFFER(tb->tb_path, 1),
+					  PATH_H_POSITION(tb->tb_path, 1))),
+				 right);
+		reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
+	}
+}
+
+static void check_leaf_level(struct tree_balance *tb)
+{
+	check_leaf(tb->L[0]);
+	check_leaf(tb->R[0]);
+	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
+}
+
+static void check_internal_levels(struct tree_balance *tb)
+{
+	int h;
+
+	/* check all internal nodes */
+	for (h = 1; tb->insert_size[h]; h++) {
+		check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
+				    "BAD BUFFER ON PATH");
+		if (tb->lnum[h])
+			check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
+		if (tb->rnum[h])
+			check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
+	}
+
+}
+
+#endif
+
+/* Now we have all of the buffers that must be used in balancing of
+   the tree.  We rely on the assumption that schedule() will not occur
+   while do_balance works. ( Only interrupt handlers are acceptable.)
+   We balance the tree according to the analysis made before this,
+   using buffers already obtained.  For SMP support it will someday be
+   necessary to add ordered locking of tb. */
+
+/* Some interesting rules of balancing:
+
+   we delete a maximum of two nodes per level per balancing: we never
+   delete R, when we delete two of three nodes L, S, R then we move
+   them into R.
+
+   we only delete L if we are deleting two nodes, if we delete only
+   one node we delete S
+
+   if we shift leaves then we shift as much as we can: this is a
+   deliberate policy of extremism in node packing which results in
+   higher average utilization after repeated random balance operations
+   at the cost of more memory copies and more balancing as a result of
+   small insertions to full nodes.
+
+   if we shift internal nodes we try to evenly balance the node
+   utilization, with consequent less balancing at the cost of lower
+   utilization.
+
+   one could argue that the policy for directories in leaves should be
+   that of internal nodes, but we will wait until another day to
+   evaluate this....  It would be nice to someday measure and prove
+   these assumptions as to what is optimal....
+
+*/
+
+static inline void do_balance_starts(struct tree_balance *tb)
+{
+	/* use print_cur_tb() to see initial state of struct
+	   tree_balance */
+
+	/* store_print_tb (tb); */
+
+	/* do not delete, just comment it out */
+/*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
+	     "check");*/
+	RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
+#ifdef CONFIG_REISERFS_CHECK
+	REISERFS_SB(tb->tb_sb)->cur_tb = tb;
+#endif
+}
+
+static inline void do_balance_completed(struct tree_balance *tb)
+{
+
+#ifdef CONFIG_REISERFS_CHECK
+	check_leaf_level(tb);
+	check_internal_levels(tb);
+	REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
+#endif
+
+	/* reiserfs_free_block is no longer schedule safe.  So, we need to
+	 ** put the buffers we want freed on the thrown list during do_balance,
+	 ** and then free them now
+	 */
+
+	REISERFS_SB(tb->tb_sb)->s_do_balance++;
+
+	/* release all nodes hold to perform the balancing */
+	unfix_nodes(tb);
+
+	free_thrown(tb);
+}
+
+void do_balance(struct tree_balance *tb,	/* tree_balance structure */
+		struct item_head *ih,	/* item header of inserted item */
+		const char *body,	/* body  of inserted item or bytes to paste */
+		int flag)
+{				/* i - insert, d - delete
+				   c - cut, p - paste
+
+				   Cut means delete part of an item
+				   (includes removing an entry from a
+				   directory).
+
+				   Delete means delete whole item.
+
+				   Insert means add a new item into the
+				   tree.
+
+				   Paste means to append to the end of an
+				   existing file or to insert a directory
+				   entry.  */
+	int child_pos,		/* position of a child node in its parent */
+	 h;			/* level of the tree being processed */
+	struct item_head insert_key[2];	/* in our processing of one level
+					   we sometimes determine what
+					   must be inserted into the next
+					   higher level.  This insertion
+					   consists of a key or two keys
+					   and their corresponding
+					   pointers */
+	struct buffer_head *insert_ptr[2];	/* inserted node-ptrs for the next
+						   level */
+
+	tb->tb_mode = flag;
+	tb->need_balance_dirty = 0;
+
+	if (FILESYSTEM_CHANGED_TB(tb)) {
+		reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
+			       "changed");
+	}
+	/* if we have no real work to do  */
+	if (!tb->insert_size[0]) {
+		reiserfs_warning(tb->tb_sb, "PAP-12350",
+				 "insert_size == 0, mode == %c", flag);
+		unfix_nodes(tb);
+		return;
+	}
+
+	atomic_inc(&(fs_generation(tb->tb_sb)));
+	do_balance_starts(tb);
+
+	/* balance leaf returns 0 except if combining L R and S into
+	   one node.  see balance_internal() for explanation of this
+	   line of code. */
+	child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
+	    balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
+
+#ifdef CONFIG_REISERFS_CHECK
+	check_after_balance_leaf(tb);
+#endif
+
+	/* Balance internal level of the tree. */
+	for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
+		child_pos =
+		    balance_internal(tb, h, child_pos, insert_key, insert_ptr);
+
+	do_balance_completed(tb);
+
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/file.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/file.c
new file mode 100644
index 0000000..8375c92
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/file.c

@@ -0,0 +1,323 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <linux/time.h>
+#include "reiserfs.h"
+#include "acl.h"
+#include "xattr.h"
+#include <asm/uaccess.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/quotaops.h>
+
+/*
+** We pack the tails of files on file close, not at the time they are written.
+** This implies an unnecessary copy of the tail and an unnecessary indirect item
+** insertion/balancing, for files that are written in one write.
+** It avoids unnecessary tail packings (balances) for files that are written in
+** multiple writes and are small enough to have tails.
+**
+** file_release is called by the VFS layer when the file is closed.  If
+** this is the last open file descriptor, and the file
+** small enough to have a tail, and the tail is currently in an
+** unformatted node, the tail is converted back into a direct item.
+**
+** We use reiserfs_truncate_file to pack the tail, since it already has
+** all the conditions coded.
+*/
+static int reiserfs_file_release(struct inode *inode, struct file *filp)
+{
+
+	struct reiserfs_transaction_handle th;
+	int err;
+	int jbegin_failure = 0;
+
+	BUG_ON(!S_ISREG(inode->i_mode));
+
+        if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1))
+		return 0;
+
+	mutex_lock(&(REISERFS_I(inode)->tailpack));
+
+        if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) {
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+		return 0;
+	}
+
+	/* fast out for when nothing needs to be done */
+	if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
+	     !tail_has_to_be_packed(inode)) &&
+	    REISERFS_I(inode)->i_prealloc_count <= 0) {
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+		return 0;
+	}
+
+	reiserfs_write_lock(inode->i_sb);
+	/* freeing preallocation only involves relogging blocks that
+	 * are already in the current transaction.  preallocation gets
+	 * freed at the end of each transaction, so it is impossible for
+	 * us to log any additional blocks (including quota blocks)
+	 */
+	err = journal_begin(&th, inode->i_sb, 1);
+	if (err) {
+		/* uh oh, we can't allow the inode to go away while there
+		 * is still preallocation blocks pending.  Try to join the
+		 * aborted transaction
+		 */
+		jbegin_failure = err;
+		err = journal_join_abort(&th, inode->i_sb, 1);
+
+		if (err) {
+			/* hmpf, our choices here aren't good.  We can pin the inode
+			 * which will disallow unmount from every happening, we can
+			 * do nothing, which will corrupt random memory on unmount,
+			 * or we can forcibly remove the file from the preallocation
+			 * list, which will leak blocks on disk.  Lets pin the inode
+			 * and let the admin know what is going on.
+			 */
+			igrab(inode);
+			reiserfs_warning(inode->i_sb, "clm-9001",
+					 "pinning inode %lu because the "
+					 "preallocation can't be freed",
+					 inode->i_ino);
+			goto out;
+		}
+	}
+	reiserfs_update_inode_transaction(inode);
+
+#ifdef REISERFS_PREALLOCATE
+	reiserfs_discard_prealloc(&th, inode);
+#endif
+	err = journal_end(&th, inode->i_sb, 1);
+
+	/* copy back the error code from journal_begin */
+	if (!err)
+		err = jbegin_failure;
+
+	if (!err &&
+	    (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
+	    tail_has_to_be_packed(inode)) {
+
+		/* if regular file is released by last holder and it has been
+		   appended (we append by unformatted node only) or its direct
+		   item(s) had to be converted, then it may have to be
+		   indirect2direct converted */
+		err = reiserfs_truncate_file(inode, 0);
+	}
+      out:
+	reiserfs_write_unlock(inode->i_sb);
+	mutex_unlock(&(REISERFS_I(inode)->tailpack));
+	return err;
+}
+
+static int reiserfs_file_open(struct inode *inode, struct file *file)
+{
+	int err = dquot_file_open(inode, file);
+        if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
+		/* somebody might be tailpacking on final close; wait for it */
+		mutex_lock(&(REISERFS_I(inode)->tailpack));
+		atomic_inc(&REISERFS_I(inode)->openers);
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+	}
+	return err;
+}
+
+static void reiserfs_vfs_truncate_file(struct inode *inode)
+{
+	mutex_lock(&(REISERFS_I(inode)->tailpack));
+	reiserfs_truncate_file(inode, 1);
+	mutex_unlock(&(REISERFS_I(inode)->tailpack));
+}
+
+/* Sync a reiserfs file. */
+
+/*
+ * FIXME: sync_mapping_buffers() never has anything to sync.  Can
+ * be removed...
+ */
+
+static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
+			      int datasync)
+{
+	struct inode *inode = filp->f_mapping->host;
+	int err;
+	int barrier_done;
+
+	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (err)
+		return err;
+
+	mutex_lock(&inode->i_mutex);
+	BUG_ON(!S_ISREG(inode->i_mode));
+	err = sync_mapping_buffers(inode->i_mapping);
+	reiserfs_write_lock(inode->i_sb);
+	barrier_done = reiserfs_commit_for_inode(inode);
+	reiserfs_write_unlock(inode->i_sb);
+	if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+	mutex_unlock(&inode->i_mutex);
+	if (barrier_done < 0)
+		return barrier_done;
+	return (err < 0) ? -EIO : 0;
+}
+
+/* taken fs/buffer.c:__block_commit_write */
+int reiserfs_commit_page(struct inode *inode, struct page *page,
+			 unsigned from, unsigned to)
+{
+	unsigned block_start, block_end;
+	int partial = 0;
+	unsigned blocksize;
+	struct buffer_head *bh, *head;
+	unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	int new;
+	int logit = reiserfs_file_data_log(inode);
+	struct super_block *s = inode->i_sb;
+	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
+	struct reiserfs_transaction_handle th;
+	int ret = 0;
+
+	th.t_trans_id = 0;
+	blocksize = 1 << inode->i_blkbits;
+
+	if (logit) {
+		reiserfs_write_lock(s);
+		ret = journal_begin(&th, s, bh_per_page + 1);
+		if (ret)
+			goto drop_write_lock;
+		reiserfs_update_inode_transaction(inode);
+	}
+	for (bh = head = page_buffers(page), block_start = 0;
+	     bh != head || !block_start;
+	     block_start = block_end, bh = bh->b_this_page) {
+
+		new = buffer_new(bh);
+		clear_buffer_new(bh);
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to) {
+			if (!buffer_uptodate(bh))
+				partial = 1;
+		} else {
+			set_buffer_uptodate(bh);
+			if (logit) {
+				reiserfs_prepare_for_journal(s, bh, 1);
+				journal_mark_dirty(&th, s, bh);
+			} else if (!buffer_dirty(bh)) {
+				mark_buffer_dirty(bh);
+				/* do data=ordered on any page past the end
+				 * of file and any buffer marked BH_New.
+				 */
+				if (reiserfs_data_ordered(inode->i_sb) &&
+				    (new || page->index >= i_size_index)) {
+					reiserfs_add_ordered_list(inode, bh);
+				}
+			}
+		}
+	}
+	if (logit) {
+		ret = journal_end(&th, s, bh_per_page + 1);
+	      drop_write_lock:
+		reiserfs_write_unlock(s);
+	}
+	/*
+	 * If this is a partial write which happened to make all buffers
+	 * uptodate then we can optimize away a bogus readpage() for
+	 * the next read(). Here we 'discover' whether the page went
+	 * uptodate as a result of this (potentially partial) write.
+	 */
+	if (!partial)
+		SetPageUptodate(page);
+	return ret;
+}
+
+/* Write @count bytes at position @ppos in a file indicated by @file
+   from the buffer @buf.
+
+   generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
+   something simple that works.  It is not for serious use by general purpose filesystems, excepting the one that it was
+   written for (ext2/3).  This is for several reasons:
+
+   * It has no understanding of any filesystem specific optimizations.
+
+   * It enters the filesystem repeatedly for each page that is written.
+
+   * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
+   * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
+   * to reiserfs which allows for fewer tree traversals.
+
+   * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
+
+   * Asking the block allocation code for blocks one at a time is slightly less efficient.
+
+   All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
+   use it, but we were in a hurry to make code freeze, and so it couldn't be revised then.  This new code should make
+   things right finally.
+
+   Future Features: providing search_by_key with hints.
+
+*/
+static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going to write into */
+				   const char __user * buf,	/*  pointer to user supplied data
+								   (in userspace) */
+				   size_t count,	/* amount of bytes to write */
+				   loff_t * ppos	/* pointer to position in file that we start writing at. Should be updated to
+							 * new current position before returning. */
+				   )
+{
+	struct inode *inode = file->f_path.dentry->d_inode;	// Inode of the file that we are writing to.
+	/* To simplify coding at this time, we store
+	   locked pages in array for now */
+	struct reiserfs_transaction_handle th;
+	th.t_trans_id = 0;
+
+	/* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
+	* lying around (most of the disk, in fact). Despite the filesystem
+	* now being a v3.6 format, the old items still can't support large
+	* file sizes. Catch this case here, as the rest of the VFS layer is
+	* oblivious to the different limitations between old and new items.
+	* reiserfs_setattr catches this for truncates. This chunk is lifted
+	* from generic_write_checks. */
+	if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
+	    *ppos + count > MAX_NON_LFS) {
+		if (*ppos >= MAX_NON_LFS) {
+			return -EFBIG;
+		}
+		if (count > MAX_NON_LFS - (unsigned long)*ppos)
+			count = MAX_NON_LFS - (unsigned long)*ppos;
+	}
+
+	return do_sync_write(file, buf, count, ppos);
+}
+
+const struct file_operations reiserfs_file_operations = {
+	.read = do_sync_read,
+	.write = reiserfs_file_write,
+	.unlocked_ioctl = reiserfs_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = reiserfs_compat_ioctl,
+#endif
+	.mmap = generic_file_mmap,
+	.open = reiserfs_file_open,
+	.release = reiserfs_file_release,
+	.fsync = reiserfs_sync_file,
+	.aio_read = generic_file_aio_read,
+	.aio_write = generic_file_aio_write,
+	.splice_read = generic_file_splice_read,
+	.splice_write = generic_file_splice_write,
+	.llseek = generic_file_llseek,
+};
+
+const struct inode_operations reiserfs_file_inode_operations = {
+	.truncate = reiserfs_vfs_truncate_file,
+	.setattr = reiserfs_setattr,
+	.setxattr = reiserfs_setxattr,
+	.getxattr = reiserfs_getxattr,
+	.listxattr = reiserfs_listxattr,
+	.removexattr = reiserfs_removexattr,
+	.permission = reiserfs_permission,
+	.get_acl = reiserfs_get_acl,
+};

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/fix_node.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/fix_node.c
new file mode 100644
index 0000000..430e065
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/fix_node.c

@@ -0,0 +1,2593 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+/**
+ ** old_item_num
+ ** old_entry_num
+ ** set_entry_sizes
+ ** create_virtual_node
+ ** check_left
+ ** check_right
+ ** directory_part_size
+ ** get_num_ver
+ ** set_parameters
+ ** is_leaf_removable
+ ** are_leaves_removable
+ ** get_empty_nodes
+ ** get_lfree
+ ** get_rfree
+ ** is_left_neighbor_in_cache
+ ** decrement_key
+ ** get_far_parent
+ ** get_parents
+ ** can_node_be_removed
+ ** ip_check_balance
+ ** dc_check_balance_internal
+ ** dc_check_balance_leaf
+ ** dc_check_balance
+ ** check_balance
+ ** get_direct_parent
+ ** get_neighbors
+ ** fix_nodes
+ **
+ **
+ **/
+
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "reiserfs.h"
+#include <linux/buffer_head.h>
+
+/* To make any changes in the tree we find a node, that contains item
+   to be changed/deleted or position in the node we insert a new item
+   to. We call this node S. To do balancing we need to decide what we
+   will shift to left/right neighbor, or to a new node, where new item
+   will be etc. To make this analysis simpler we build virtual
+   node. Virtual node is an array of items, that will replace items of
+   node S. (For instance if we are going to delete an item, virtual
+   node does not contain it). Virtual node keeps information about
+   item sizes and types, mergeability of first and last items, sizes
+   of all entries in directory item. We use this array of items when
+   calculating what we can shift to neighbors and how many nodes we
+   have to have if we do not any shiftings, if we shift to left/right
+   neighbor or to both. */
+
+/* taking item number in virtual node, returns number of item, that it has in source buffer */
+static inline int old_item_num(int new_num, int affected_item_num, int mode)
+{
+	if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
+		return new_num;
+
+	if (mode == M_INSERT) {
+
+		RFALSE(new_num == 0,
+		       "vs-8005: for INSERT mode and item number of inserted item");
+
+		return new_num - 1;
+	}
+
+	RFALSE(mode != M_DELETE,
+	       "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
+	       mode);
+	/* delete mode */
+	return new_num + 1;
+}
+
+static void create_virtual_node(struct tree_balance *tb, int h)
+{
+	struct item_head *ih;
+	struct virtual_node *vn = tb->tb_vn;
+	int new_num;
+	struct buffer_head *Sh;	/* this comes from tb->S[h] */
+
+	Sh = PATH_H_PBUFFER(tb->tb_path, h);
+
+	/* size of changed node */
+	vn->vn_size =
+	    MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
+
+	/* for internal nodes array if virtual items is not created */
+	if (h) {
+		vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
+		return;
+	}
+
+	/* number of items in virtual node  */
+	vn->vn_nr_item =
+	    B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
+	    ((vn->vn_mode == M_DELETE) ? 1 : 0);
+
+	/* first virtual item */
+	vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
+	memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
+	vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
+
+	/* first item in the node */
+	ih = B_N_PITEM_HEAD(Sh, 0);
+
+	/* define the mergeability for 0-th item (if it is not being deleted) */
+	if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
+	    && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
+		vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
+
+	/* go through all items those remain in the virtual node (except for the new (inserted) one) */
+	for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
+		int j;
+		struct virtual_item *vi = vn->vn_vi + new_num;
+		int is_affected =
+		    ((new_num != vn->vn_affected_item_num) ? 0 : 1);
+
+		if (is_affected && vn->vn_mode == M_INSERT)
+			continue;
+
+		/* get item number in source node */
+		j = old_item_num(new_num, vn->vn_affected_item_num,
+				 vn->vn_mode);
+
+		vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
+		vi->vi_ih = ih + j;
+		vi->vi_item = B_I_PITEM(Sh, ih + j);
+		vi->vi_uarea = vn->vn_free_ptr;
+
+		// FIXME: there is no check, that item operation did not
+		// consume too much memory
+		vn->vn_free_ptr +=
+		    op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
+		if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
+			reiserfs_panic(tb->tb_sb, "vs-8030",
+				       "virtual node space consumed");
+
+		if (!is_affected)
+			/* this is not being changed */
+			continue;
+
+		if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
+			vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
+			vi->vi_new_data = vn->vn_data;	// pointer to data which is going to be pasted
+		}
+	}
+
+	/* virtual inserted item is not defined yet */
+	if (vn->vn_mode == M_INSERT) {
+		struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
+
+		RFALSE(vn->vn_ins_ih == NULL,
+		       "vs-8040: item header of inserted item is not specified");
+		vi->vi_item_len = tb->insert_size[0];
+		vi->vi_ih = vn->vn_ins_ih;
+		vi->vi_item = vn->vn_data;
+		vi->vi_uarea = vn->vn_free_ptr;
+
+		op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
+			     tb->insert_size[0]);
+	}
+
+	/* set right merge flag we take right delimiting key and check whether it is a mergeable item */
+	if (tb->CFR[0]) {
+		struct reiserfs_key *key;
+
+		key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
+		if (op_is_left_mergeable(key, Sh->b_size)
+		    && (vn->vn_mode != M_DELETE
+			|| vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
+			vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
+			    VI_TYPE_RIGHT_MERGEABLE;
+
+#ifdef CONFIG_REISERFS_CHECK
+		if (op_is_left_mergeable(key, Sh->b_size) &&
+		    !(vn->vn_mode != M_DELETE
+		      || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
+			/* we delete last item and it could be merged with right neighbor's first item */
+			if (!
+			    (B_NR_ITEMS(Sh) == 1
+			     && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
+			     && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
+				/* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
+				print_block(Sh, 0, -1, -1);
+				reiserfs_panic(tb->tb_sb, "vs-8045",
+					       "rdkey %k, affected item==%d "
+					       "(mode==%c) Must be %c",
+					       key, vn->vn_affected_item_num,
+					       vn->vn_mode, M_DELETE);
+			}
+		}
+#endif
+
+	}
+}
+
+/* using virtual node check, how many items can be shifted to left
+   neighbor */
+static void check_left(struct tree_balance *tb, int h, int cur_free)
+{
+	int i;
+	struct virtual_node *vn = tb->tb_vn;
+	struct virtual_item *vi;
+	int d_size, ih_size;
+
+	RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
+
+	/* internal level */
+	if (h > 0) {
+		tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
+		return;
+	}
+
+	/* leaf level */
+
+	if (!cur_free || !vn->vn_nr_item) {
+		/* no free space or nothing to move */
+		tb->lnum[h] = 0;
+		tb->lbytes = -1;
+		return;
+	}
+
+	RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
+	       "vs-8055: parent does not exist or invalid");
+
+	vi = vn->vn_vi;
+	if ((unsigned int)cur_free >=
+	    (vn->vn_size -
+	     ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
+		/* all contents of S[0] fits into L[0] */
+
+		RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
+		       "vs-8055: invalid mode or balance condition failed");
+
+		tb->lnum[0] = vn->vn_nr_item;
+		tb->lbytes = -1;
+		return;
+	}
+
+	d_size = 0, ih_size = IH_SIZE;
+
+	/* first item may be merge with last item in left neighbor */
+	if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
+		d_size = -((int)IH_SIZE), ih_size = 0;
+
+	tb->lnum[0] = 0;
+	for (i = 0; i < vn->vn_nr_item;
+	     i++, ih_size = IH_SIZE, d_size = 0, vi++) {
+		d_size += vi->vi_item_len;
+		if (cur_free >= d_size) {
+			/* the item can be shifted entirely */
+			cur_free -= d_size;
+			tb->lnum[0]++;
+			continue;
+		}
+
+		/* the item cannot be shifted entirely, try to split it */
+		/* check whether L[0] can hold ih and at least one byte of the item body */
+		if (cur_free <= ih_size) {
+			/* cannot shift even a part of the current item */
+			tb->lbytes = -1;
+			return;
+		}
+		cur_free -= ih_size;
+
+		tb->lbytes = op_check_left(vi, cur_free, 0, 0);
+		if (tb->lbytes != -1)
+			/* count partially shifted item */
+			tb->lnum[0]++;
+
+		break;
+	}
+
+	return;
+}
+
+/* using virtual node check, how many items can be shifted to right
+   neighbor */
+static void check_right(struct tree_balance *tb, int h, int cur_free)
+{
+	int i;
+	struct virtual_node *vn = tb->tb_vn;
+	struct virtual_item *vi;
+	int d_size, ih_size;
+
+	RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
+
+	/* internal level */
+	if (h > 0) {
+		tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
+		return;
+	}
+
+	/* leaf level */
+
+	if (!cur_free || !vn->vn_nr_item) {
+		/* no free space  */
+		tb->rnum[h] = 0;
+		tb->rbytes = -1;
+		return;
+	}
+
+	RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
+	       "vs-8075: parent does not exist or invalid");
+
+	vi = vn->vn_vi + vn->vn_nr_item - 1;
+	if ((unsigned int)cur_free >=
+	    (vn->vn_size -
+	     ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
+		/* all contents of S[0] fits into R[0] */
+
+		RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
+		       "vs-8080: invalid mode or balance condition failed");
+
+		tb->rnum[h] = vn->vn_nr_item;
+		tb->rbytes = -1;
+		return;
+	}
+
+	d_size = 0, ih_size = IH_SIZE;
+
+	/* last item may be merge with first item in right neighbor */
+	if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
+		d_size = -(int)IH_SIZE, ih_size = 0;
+
+	tb->rnum[0] = 0;
+	for (i = vn->vn_nr_item - 1; i >= 0;
+	     i--, d_size = 0, ih_size = IH_SIZE, vi--) {
+		d_size += vi->vi_item_len;
+		if (cur_free >= d_size) {
+			/* the item can be shifted entirely */
+			cur_free -= d_size;
+			tb->rnum[0]++;
+			continue;
+		}
+
+		/* check whether R[0] can hold ih and at least one byte of the item body */
+		if (cur_free <= ih_size) {	/* cannot shift even a part of the current item */
+			tb->rbytes = -1;
+			return;
+		}
+
+		/* R[0] can hold the header of the item and at least one byte of its body */
+		cur_free -= ih_size;	/* cur_free is still > 0 */
+
+		tb->rbytes = op_check_right(vi, cur_free);
+		if (tb->rbytes != -1)
+			/* count partially shifted item */
+			tb->rnum[0]++;
+
+		break;
+	}
+
+	return;
+}
+
+/*
+ * from - number of items, which are shifted to left neighbor entirely
+ * to - number of item, which are shifted to right neighbor entirely
+ * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
+ * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
+static int get_num_ver(int mode, struct tree_balance *tb, int h,
+		       int from, int from_bytes,
+		       int to, int to_bytes, short *snum012, int flow)
+{
+	int i;
+	int cur_free;
+	//    int bytes;
+	int units;
+	struct virtual_node *vn = tb->tb_vn;
+	//    struct virtual_item * vi;
+
+	int total_node_size, max_node_size, current_item_size;
+	int needed_nodes;
+	int start_item,		/* position of item we start filling node from */
+	 end_item,		/* position of item we finish filling node by */
+	 start_bytes,		/* number of first bytes (entries for directory) of start_item-th item
+				   we do not include into node that is being filled */
+	 end_bytes;		/* number of last bytes (entries for directory) of end_item-th item
+				   we do node include into node that is being filled */
+	int split_item_positions[2];	/* these are positions in virtual item of
+					   items, that are split between S[0] and
+					   S1new and S1new and S2new */
+
+	split_item_positions[0] = -1;
+	split_item_positions[1] = -1;
+
+	/* We only create additional nodes if we are in insert or paste mode
+	   or we are in replace mode at the internal level. If h is 0 and
+	   the mode is M_REPLACE then in fix_nodes we change the mode to
+	   paste or insert before we get here in the code.  */
+	RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
+	       "vs-8100: insert_size < 0 in overflow");
+
+	max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
+
+	/* snum012 [0-2] - number of items, that lay
+	   to S[0], first new node and second new node */
+	snum012[3] = -1;	/* s1bytes */
+	snum012[4] = -1;	/* s2bytes */
+
+	/* internal level */
+	if (h > 0) {
+		i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
+		if (i == max_node_size)
+			return 1;
+		return (i / max_node_size + 1);
+	}
+
+	/* leaf level */
+	needed_nodes = 1;
+	total_node_size = 0;
+	cur_free = max_node_size;
+
+	// start from 'from'-th item
+	start_item = from;
+	// skip its first 'start_bytes' units
+	start_bytes = ((from_bytes != -1) ? from_bytes : 0);
+
+	// last included item is the 'end_item'-th one
+	end_item = vn->vn_nr_item - to - 1;
+	// do not count last 'end_bytes' units of 'end_item'-th item
+	end_bytes = (to_bytes != -1) ? to_bytes : 0;
+
+	/* go through all item beginning from the start_item-th item and ending by
+	   the end_item-th item. Do not count first 'start_bytes' units of
+	   'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
+
+	for (i = start_item; i <= end_item; i++) {
+		struct virtual_item *vi = vn->vn_vi + i;
+		int skip_from_end = ((i == end_item) ? end_bytes : 0);
+
+		RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
+
+		/* get size of current item */
+		current_item_size = vi->vi_item_len;
+
+		/* do not take in calculation head part (from_bytes) of from-th item */
+		current_item_size -=
+		    op_part_size(vi, 0 /*from start */ , start_bytes);
+
+		/* do not take in calculation tail part of last item */
+		current_item_size -=
+		    op_part_size(vi, 1 /*from end */ , skip_from_end);
+
+		/* if item fits into current node entierly */
+		if (total_node_size + current_item_size <= max_node_size) {
+			snum012[needed_nodes - 1]++;
+			total_node_size += current_item_size;
+			start_bytes = 0;
+			continue;
+		}
+
+		if (current_item_size > max_node_size) {
+			/* virtual item length is longer, than max size of item in
+			   a node. It is impossible for direct item */
+			RFALSE(is_direct_le_ih(vi->vi_ih),
+			       "vs-8110: "
+			       "direct item length is %d. It can not be longer than %d",
+			       current_item_size, max_node_size);
+			/* we will try to split it */
+			flow = 1;
+		}
+
+		if (!flow) {
+			/* as we do not split items, take new node and continue */
+			needed_nodes++;
+			i--;
+			total_node_size = 0;
+			continue;
+		}
+		// calculate number of item units which fit into node being
+		// filled
+		{
+			int free_space;
+
+			free_space = max_node_size - total_node_size - IH_SIZE;
+			units =
+			    op_check_left(vi, free_space, start_bytes,
+					  skip_from_end);
+			if (units == -1) {
+				/* nothing fits into current node, take new node and continue */
+				needed_nodes++, i--, total_node_size = 0;
+				continue;
+			}
+		}
+
+		/* something fits into the current node */
+		//if (snum012[3] != -1 || needed_nodes != 1)
+		//  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
+		//snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
+		start_bytes += units;
+		snum012[needed_nodes - 1 + 3] = units;
+
+		if (needed_nodes > 2)
+			reiserfs_warning(tb->tb_sb, "vs-8111",
+					 "split_item_position is out of range");
+		snum012[needed_nodes - 1]++;
+		split_item_positions[needed_nodes - 1] = i;
+		needed_nodes++;
+		/* continue from the same item with start_bytes != -1 */
+		start_item = i;
+		i--;
+		total_node_size = 0;
+	}
+
+	// sum012[4] (if it is not -1) contains number of units of which
+	// are to be in S1new, snum012[3] - to be in S0. They are supposed
+	// to be S1bytes and S2bytes correspondingly, so recalculate
+	if (snum012[4] > 0) {
+		int split_item_num;
+		int bytes_to_r, bytes_to_l;
+		int bytes_to_S1new;
+
+		split_item_num = split_item_positions[1];
+		bytes_to_l =
+		    ((from == split_item_num
+		      && from_bytes != -1) ? from_bytes : 0);
+		bytes_to_r =
+		    ((end_item == split_item_num
+		      && end_bytes != -1) ? end_bytes : 0);
+		bytes_to_S1new =
+		    ((split_item_positions[0] ==
+		      split_item_positions[1]) ? snum012[3] : 0);
+
+		// s2bytes
+		snum012[4] =
+		    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
+		    bytes_to_r - bytes_to_l - bytes_to_S1new;
+
+		if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
+		    vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
+			reiserfs_warning(tb->tb_sb, "vs-8115",
+					 "not directory or indirect item");
+	}
+
+	/* now we know S2bytes, calculate S1bytes */
+	if (snum012[3] > 0) {
+		int split_item_num;
+		int bytes_to_r, bytes_to_l;
+		int bytes_to_S2new;
+
+		split_item_num = split_item_positions[0];
+		bytes_to_l =
+		    ((from == split_item_num
+		      && from_bytes != -1) ? from_bytes : 0);
+		bytes_to_r =
+		    ((end_item == split_item_num
+		      && end_bytes != -1) ? end_bytes : 0);
+		bytes_to_S2new =
+		    ((split_item_positions[0] == split_item_positions[1]
+		      && snum012[4] != -1) ? snum012[4] : 0);
+
+		// s1bytes
+		snum012[3] =
+		    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
+		    bytes_to_r - bytes_to_l - bytes_to_S2new;
+	}
+
+	return needed_nodes;
+}
+
+
+/* Set parameters for balancing.
+ * Performs write of results of analysis of balancing into structure tb,
+ * where it will later be used by the functions that actually do the balancing.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	lnum	number of items from S[h] that must be shifted to L[h];
+ *	rnum	number of items from S[h] that must be shifted to R[h];
+ *	blk_num	number of blocks that S[h] will be splitted into;
+ *	s012	number of items that fall into splitted nodes.
+ *	lbytes	number of bytes which flow to the left neighbor from the item that is not
+ *		not shifted entirely
+ *	rbytes	number of bytes which flow to the right neighbor from the item that is not
+ *		not shifted entirely
+ *	s1bytes	number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
+ */
+
+static void set_parameters(struct tree_balance *tb, int h, int lnum,
+			   int rnum, int blk_num, short *s012, int lb, int rb)
+{
+
+	tb->lnum[h] = lnum;
+	tb->rnum[h] = rnum;
+	tb->blknum[h] = blk_num;
+
+	if (h == 0) {		/* only for leaf level */
+		if (s012 != NULL) {
+			tb->s0num = *s012++,
+			    tb->s1num = *s012++, tb->s2num = *s012++;
+			tb->s1bytes = *s012++;
+			tb->s2bytes = *s012;
+		}
+		tb->lbytes = lb;
+		tb->rbytes = rb;
+	}
+	PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
+	PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
+
+	PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
+	PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
+}
+
+/* check, does node disappear if we shift tb->lnum[0] items to left
+   neighbor and tb->rnum[0] to the right one. */
+static int is_leaf_removable(struct tree_balance *tb)
+{
+	struct virtual_node *vn = tb->tb_vn;
+	int to_left, to_right;
+	int size;
+	int remain_items;
+
+	/* number of items, that will be shifted to left (right) neighbor
+	   entirely */
+	to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
+	to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
+	remain_items = vn->vn_nr_item;
+
+	/* how many items remain in S[0] after shiftings to neighbors */
+	remain_items -= (to_left + to_right);
+
+	if (remain_items < 1) {
+		/* all content of node can be shifted to neighbors */
+		set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
+			       NULL, -1, -1);
+		return 1;
+	}
+
+	if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
+		/* S[0] is not removable */
+		return 0;
+
+	/* check, whether we can divide 1 remaining item between neighbors */
+
+	/* get size of remaining item (in item units) */
+	size = op_unit_num(&(vn->vn_vi[to_left]));
+
+	if (tb->lbytes + tb->rbytes >= size) {
+		set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
+			       tb->lbytes, -1);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* check whether L, S, R can be joined in one node */
+static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
+{
+	struct virtual_node *vn = tb->tb_vn;
+	int ih_size;
+	struct buffer_head *S0;
+
+	S0 = PATH_H_PBUFFER(tb->tb_path, 0);
+
+	ih_size = 0;
+	if (vn->vn_nr_item) {
+		if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
+			ih_size += IH_SIZE;
+
+		if (vn->vn_vi[vn->vn_nr_item - 1].
+		    vi_type & VI_TYPE_RIGHT_MERGEABLE)
+			ih_size += IH_SIZE;
+	} else {
+		/* there was only one item and it will be deleted */
+		struct item_head *ih;
+
+		RFALSE(B_NR_ITEMS(S0) != 1,
+		       "vs-8125: item number must be 1: it is %d",
+		       B_NR_ITEMS(S0));
+
+		ih = B_N_PITEM_HEAD(S0, 0);
+		if (tb->CFR[0]
+		    && !comp_short_le_keys(&(ih->ih_key),
+					   B_N_PDELIM_KEY(tb->CFR[0],
+							  tb->rkey[0])))
+			if (is_direntry_le_ih(ih)) {
+				/* Directory must be in correct state here: that is
+				   somewhere at the left side should exist first directory
+				   item. But the item being deleted can not be that first
+				   one because its right neighbor is item of the same
+				   directory. (But first item always gets deleted in last
+				   turn). So, neighbors of deleted item can be merged, so
+				   we can save ih_size */
+				ih_size = IH_SIZE;
+
+				/* we might check that left neighbor exists and is of the
+				   same directory */
+				RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
+				       "vs-8130: first directory item can not be removed until directory is not empty");
+			}
+
+	}
+
+	if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
+		set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
+		PROC_INFO_INC(tb->tb_sb, leaves_removable);
+		return 1;
+	}
+	return 0;
+
+}
+
+/* when we do not split item, lnum and rnum are numbers of entire items */
+#define SET_PAR_SHIFT_LEFT \
+if (h)\
+{\
+   int to_l;\
+   \
+   to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
+	      (MAX_NR_KEY(Sh) + 1 - lpar);\
+	      \
+	      set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
+}\
+else \
+{\
+   if (lset==LEFT_SHIFT_FLOW)\
+     set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
+		     tb->lbytes, -1);\
+   else\
+     set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
+		     -1, -1);\
+}
+
+#define SET_PAR_SHIFT_RIGHT \
+if (h)\
+{\
+   int to_r;\
+   \
+   to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
+   \
+   set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
+}\
+else \
+{\
+   if (rset==RIGHT_SHIFT_FLOW)\
+     set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
+		  -1, tb->rbytes);\
+   else\
+     set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
+		  -1, -1);\
+}
+
+static void free_buffers_in_tb(struct tree_balance *tb)
+{
+	int i;
+
+	pathrelse(tb->tb_path);
+
+	for (i = 0; i < MAX_HEIGHT; i++) {
+		brelse(tb->L[i]);
+		brelse(tb->R[i]);
+		brelse(tb->FL[i]);
+		brelse(tb->FR[i]);
+		brelse(tb->CFL[i]);
+		brelse(tb->CFR[i]);
+
+		tb->L[i] = NULL;
+		tb->R[i] = NULL;
+		tb->FL[i] = NULL;
+		tb->FR[i] = NULL;
+		tb->CFL[i] = NULL;
+		tb->CFR[i] = NULL;
+	}
+}
+
+/* Get new buffers for storing new nodes that are created while balancing.
+ * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
+ *	        CARRY_ON - schedule didn't occur while the function worked;
+ *	        NO_DISK_SPACE - no disk space.
+ */
+/* The function is NOT SCHEDULE-SAFE! */
+static int get_empty_nodes(struct tree_balance *tb, int h)
+{
+	struct buffer_head *new_bh,
+	    *Sh = PATH_H_PBUFFER(tb->tb_path, h);
+	b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
+	int counter, number_of_freeblk, amount_needed,	/* number of needed empty blocks */
+	 retval = CARRY_ON;
+	struct super_block *sb = tb->tb_sb;
+
+	/* number_of_freeblk is the number of empty blocks which have been
+	   acquired for use by the balancing algorithm minus the number of
+	   empty blocks used in the previous levels of the analysis,
+	   number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
+	   after empty blocks are acquired, and the balancing analysis is
+	   then restarted, amount_needed is the number needed by this level
+	   (h) of the balancing analysis.
+
+	   Note that for systems with many processes writing, it would be
+	   more layout optimal to calculate the total number needed by all
+	   levels and then to run reiserfs_new_blocks to get all of them at once.  */
+
+	/* Initiate number_of_freeblk to the amount acquired prior to the restart of
+	   the analysis or 0 if not restarted, then subtract the amount needed
+	   by all of the levels of the tree below h. */
+	/* blknum includes S[h], so we subtract 1 in this calculation */
+	for (counter = 0, number_of_freeblk = tb->cur_blknum;
+	     counter < h; counter++)
+		number_of_freeblk -=
+		    (tb->blknum[counter]) ? (tb->blknum[counter] -
+						   1) : 0;
+
+	/* Allocate missing empty blocks. */
+	/* if Sh == 0  then we are getting a new root */
+	amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
+	/*  Amount_needed = the amount that we need more than the amount that we have. */
+	if (amount_needed > number_of_freeblk)
+		amount_needed -= number_of_freeblk;
+	else			/* If we have enough already then there is nothing to do. */
+		return CARRY_ON;
+
+	/* No need to check quota - is not allocated for blocks used for formatted nodes */
+	if (reiserfs_new_form_blocknrs(tb, blocknrs,
+				       amount_needed) == NO_DISK_SPACE)
+		return NO_DISK_SPACE;
+
+	/* for each blocknumber we just got, get a buffer and stick it on FEB */
+	for (blocknr = blocknrs, counter = 0;
+	     counter < amount_needed; blocknr++, counter++) {
+
+		RFALSE(!*blocknr,
+		       "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
+
+		new_bh = sb_getblk(sb, *blocknr);
+		RFALSE(buffer_dirty(new_bh) ||
+		       buffer_journaled(new_bh) ||
+		       buffer_journal_dirty(new_bh),
+		       "PAP-8140: journaled or dirty buffer %b for the new block",
+		       new_bh);
+
+		/* Put empty buffers into the array. */
+		RFALSE(tb->FEB[tb->cur_blknum],
+		       "PAP-8141: busy slot for new buffer");
+
+		set_buffer_journal_new(new_bh);
+		tb->FEB[tb->cur_blknum++] = new_bh;
+	}
+
+	if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb))
+		retval = REPEAT_SEARCH;
+
+	return retval;
+}
+
+/* Get free space of the left neighbor, which is stored in the parent
+ * node of the left neighbor.  */
+static int get_lfree(struct tree_balance *tb, int h)
+{
+	struct buffer_head *l, *f;
+	int order;
+
+	if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
+	    (l = tb->FL[h]) == NULL)
+		return 0;
+
+	if (f == l)
+		order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
+	else {
+		order = B_NR_ITEMS(l);
+		f = l;
+	}
+
+	return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
+}
+
+/* Get free space of the right neighbor,
+ * which is stored in the parent node of the right neighbor.
+ */
+static int get_rfree(struct tree_balance *tb, int h)
+{
+	struct buffer_head *r, *f;
+	int order;
+
+	if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
+	    (r = tb->FR[h]) == NULL)
+		return 0;
+
+	if (f == r)
+		order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
+	else {
+		order = 0;
+		f = r;
+	}
+
+	return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
+
+}
+
+/* Check whether left neighbor is in memory. */
+static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
+{
+	struct buffer_head *father, *left;
+	struct super_block *sb = tb->tb_sb;
+	b_blocknr_t left_neighbor_blocknr;
+	int left_neighbor_position;
+
+	/* Father of the left neighbor does not exist. */
+	if (!tb->FL[h])
+		return 0;
+
+	/* Calculate father of the node to be balanced. */
+	father = PATH_H_PBUFFER(tb->tb_path, h + 1);
+
+	RFALSE(!father ||
+	       !B_IS_IN_TREE(father) ||
+	       !B_IS_IN_TREE(tb->FL[h]) ||
+	       !buffer_uptodate(father) ||
+	       !buffer_uptodate(tb->FL[h]),
+	       "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
+	       father, tb->FL[h]);
+
+	/* Get position of the pointer to the left neighbor into the left father. */
+	left_neighbor_position = (father == tb->FL[h]) ?
+	    tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
+	/* Get left neighbor block number. */
+	left_neighbor_blocknr =
+	    B_N_CHILD_NUM(tb->FL[h], left_neighbor_position);
+	/* Look for the left neighbor in the cache. */
+	if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) {
+
+		RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
+		       "vs-8170: left neighbor (%b %z) is not in the tree",
+		       left, left);
+		put_bh(left);
+		return 1;
+	}
+
+	return 0;
+}
+
+#define LEFT_PARENTS  'l'
+#define RIGHT_PARENTS 'r'
+
+static void decrement_key(struct cpu_key *key)
+{
+	// call item specific function for this key
+	item_ops[cpu_key_k_type(key)]->decrement_key(key);
+}
+
+/* Calculate far left/right parent of the left/right neighbor of the current node, that
+ * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
+ * Calculate left/right common parent of the current node and L[h]/R[h].
+ * Calculate left/right delimiting key position.
+ * Returns:	PATH_INCORRECT   - path in the tree is not correct;
+ 		SCHEDULE_OCCURRED - schedule occurred while the function worked;
+ *	        CARRY_ON         - schedule didn't occur while the function worked;
+ */
+static int get_far_parent(struct tree_balance *tb,
+			  int h,
+			  struct buffer_head **pfather,
+			  struct buffer_head **pcom_father, char c_lr_par)
+{
+	struct buffer_head *parent;
+	INITIALIZE_PATH(s_path_to_neighbor_father);
+	struct treepath *path = tb->tb_path;
+	struct cpu_key s_lr_father_key;
+	int counter,
+	    position = INT_MAX,
+	    first_last_position = 0,
+	    path_offset = PATH_H_PATH_OFFSET(path, h);
+
+	/* Starting from F[h] go upwards in the tree, and look for the common
+	   ancestor of F[h], and its neighbor l/r, that should be obtained. */
+
+	counter = path_offset;
+
+	RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET,
+	       "PAP-8180: invalid path length");
+
+	for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
+		/* Check whether parent of the current buffer in the path is really parent in the tree. */
+		if (!B_IS_IN_TREE
+		    (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
+			return REPEAT_SEARCH;
+		/* Check whether position in the parent is correct. */
+		if ((position =
+		     PATH_OFFSET_POSITION(path,
+					  counter - 1)) >
+		    B_NR_ITEMS(parent))
+			return REPEAT_SEARCH;
+		/* Check whether parent at the path really points to the child. */
+		if (B_N_CHILD_NUM(parent, position) !=
+		    PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
+			return REPEAT_SEARCH;
+		/* Return delimiting key if position in the parent is not equal to first/last one. */
+		if (c_lr_par == RIGHT_PARENTS)
+			first_last_position = B_NR_ITEMS(parent);
+		if (position != first_last_position) {
+			*pcom_father = parent;
+			get_bh(*pcom_father);
+			/*(*pcom_father = parent)->b_count++; */
+			break;
+		}
+	}
+
+	/* if we are in the root of the tree, then there is no common father */
+	if (counter == FIRST_PATH_ELEMENT_OFFSET) {
+		/* Check whether first buffer in the path is the root of the tree. */
+		if (PATH_OFFSET_PBUFFER
+		    (tb->tb_path,
+		     FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
+		    SB_ROOT_BLOCK(tb->tb_sb)) {
+			*pfather = *pcom_father = NULL;
+			return CARRY_ON;
+		}
+		return REPEAT_SEARCH;
+	}
+
+	RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL,
+	       "PAP-8185: (%b %z) level too small",
+	       *pcom_father, *pcom_father);
+
+	/* Check whether the common parent is locked. */
+
+	if (buffer_locked(*pcom_father)) {
+
+		/* Release the write lock while the buffer is busy */
+		reiserfs_write_unlock(tb->tb_sb);
+		__wait_on_buffer(*pcom_father);
+		reiserfs_write_lock(tb->tb_sb);
+		if (FILESYSTEM_CHANGED_TB(tb)) {
+			brelse(*pcom_father);
+			return REPEAT_SEARCH;
+		}
+	}
+
+	/* So, we got common parent of the current node and its left/right neighbor.
+	   Now we are geting the parent of the left/right neighbor. */
+
+	/* Form key to get parent of the left/right neighbor. */
+	le_key2cpu_key(&s_lr_father_key,
+		       B_N_PDELIM_KEY(*pcom_father,
+				      (c_lr_par ==
+				       LEFT_PARENTS) ? (tb->lkey[h - 1] =
+							position -
+							1) : (tb->rkey[h -
+									   1] =
+							      position)));
+
+	if (c_lr_par == LEFT_PARENTS)
+		decrement_key(&s_lr_father_key);
+
+	if (search_by_key
+	    (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
+	     h + 1) == IO_ERROR)
+		// path is released
+		return IO_ERROR;
+
+	if (FILESYSTEM_CHANGED_TB(tb)) {
+		pathrelse(&s_path_to_neighbor_father);
+		brelse(*pcom_father);
+		return REPEAT_SEARCH;
+	}
+
+	*pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
+
+	RFALSE(B_LEVEL(*pfather) != h + 1,
+	       "PAP-8190: (%b %z) level too small", *pfather, *pfather);
+	RFALSE(s_path_to_neighbor_father.path_length <
+	       FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
+
+	s_path_to_neighbor_father.path_length--;
+	pathrelse(&s_path_to_neighbor_father);
+	return CARRY_ON;
+}
+
+/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of
+ * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset],
+ * FR[path_offset], CFL[path_offset], CFR[path_offset].
+ * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset].
+ * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
+ *	        CARRY_ON - schedule didn't occur while the function worked;
+ */
+static int get_parents(struct tree_balance *tb, int h)
+{
+	struct treepath *path = tb->tb_path;
+	int position,
+	    ret,
+	    path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
+	struct buffer_head *curf, *curcf;
+
+	/* Current node is the root of the tree or will be root of the tree */
+	if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
+		/* The root can not have parents.
+		   Release nodes which previously were obtained as parents of the current node neighbors. */
+		brelse(tb->FL[h]);
+		brelse(tb->CFL[h]);
+		brelse(tb->FR[h]);
+		brelse(tb->CFR[h]);
+		tb->FL[h]  = NULL;
+		tb->CFL[h] = NULL;
+		tb->FR[h]  = NULL;
+		tb->CFR[h] = NULL;
+		return CARRY_ON;
+	}
+
+	/* Get parent FL[path_offset] of L[path_offset]. */
+	position = PATH_OFFSET_POSITION(path, path_offset - 1);
+	if (position) {
+		/* Current node is not the first child of its parent. */
+		curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
+		curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
+		get_bh(curf);
+		get_bh(curf);
+		tb->lkey[h] = position - 1;
+	} else {
+		/* Calculate current parent of L[path_offset], which is the left neighbor of the current node.
+		   Calculate current common parent of L[path_offset] and the current node. Note that
+		   CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset].
+		   Calculate lkey[path_offset]. */
+		if ((ret = get_far_parent(tb, h + 1, &curf,
+						  &curcf,
+						  LEFT_PARENTS)) != CARRY_ON)
+			return ret;
+	}
+
+	brelse(tb->FL[h]);
+	tb->FL[h] = curf;	/* New initialization of FL[h]. */
+	brelse(tb->CFL[h]);
+	tb->CFL[h] = curcf;	/* New initialization of CFL[h]. */
+
+	RFALSE((curf && !B_IS_IN_TREE(curf)) ||
+	       (curcf && !B_IS_IN_TREE(curcf)),
+	       "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
+
+/* Get parent FR[h] of R[h]. */
+
+/* Current node is the last child of F[h]. FR[h] != F[h]. */
+	if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
+/* Calculate current parent of R[h], which is the right neighbor of F[h].
+   Calculate current common parent of R[h] and current node. Note that CFR[h]
+   not equal FR[path_offset] and CFR[h] not equal F[h]. */
+		if ((ret =
+		     get_far_parent(tb, h + 1, &curf, &curcf,
+				    RIGHT_PARENTS)) != CARRY_ON)
+			return ret;
+	} else {
+/* Current node is not the last child of its parent F[h]. */
+		curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
+		curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
+		get_bh(curf);
+		get_bh(curf);
+		tb->rkey[h] = position;
+	}
+
+	brelse(tb->FR[h]);
+	/* New initialization of FR[path_offset]. */
+	tb->FR[h] = curf;
+
+	brelse(tb->CFR[h]);
+	/* New initialization of CFR[path_offset]. */
+	tb->CFR[h] = curcf;
+
+	RFALSE((curf && !B_IS_IN_TREE(curf)) ||
+	       (curcf && !B_IS_IN_TREE(curcf)),
+	       "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf);
+
+	return CARRY_ON;
+}
+
+/* it is possible to remove node as result of shiftings to
+   neighbors even when we insert or paste item. */
+static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
+				      struct tree_balance *tb, int h)
+{
+	struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
+	int levbytes = tb->insert_size[h];
+	struct item_head *ih;
+	struct reiserfs_key *r_key = NULL;
+
+	ih = B_N_PITEM_HEAD(Sh, 0);
+	if (tb->CFR[h])
+		r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
+
+	if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
+	    /* shifting may merge items which might save space */
+	    -
+	    ((!h
+	      && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
+	    -
+	    ((!h && r_key
+	      && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
+	    + ((h) ? KEY_SIZE : 0)) {
+		/* node can not be removed */
+		if (sfree >= levbytes) {	/* new item fits into node S[h] without any shifting */
+			if (!h)
+				tb->s0num =
+				    B_NR_ITEMS(Sh) +
+				    ((mode == M_INSERT) ? 1 : 0);
+			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
+			return NO_BALANCING_NEEDED;
+		}
+	}
+	PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
+	return !NO_BALANCING_NEEDED;
+}
+
+/* Check whether current node S[h] is balanced when increasing its size by
+ * Inserting or Pasting.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	i - insert, p - paste;
+ * Returns:	1 - schedule occurred;
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ */
+/* ip means Inserting or Pasting */
+static int ip_check_balance(struct tree_balance *tb, int h)
+{
+	struct virtual_node *vn = tb->tb_vn;
+	int levbytes,		/* Number of bytes that must be inserted into (value
+				   is negative if bytes are deleted) buffer which
+				   contains node being balanced.  The mnemonic is
+				   that the attempted change in node space used level
+				   is levbytes bytes. */
+	 ret;
+
+	int lfree, sfree, rfree /* free space in L, S and R */ ;
+
+	/* nver is short for number of vertixes, and lnver is the number if
+	   we shift to the left, rnver is the number if we shift to the
+	   right, and lrnver is the number if we shift in both directions.
+	   The goal is to minimize first the number of vertixes, and second,
+	   the number of vertixes whose contents are changed by shifting,
+	   and third the number of uncached vertixes whose contents are
+	   changed by shifting and must be read from disk.  */
+	int nver, lnver, rnver, lrnver;
+
+	/* used at leaf level only, S0 = S[0] is the node being balanced,
+	   sInum [ I = 0,1,2 ] is the number of items that will
+	   remain in node SI after balancing.  S1 and S2 are new
+	   nodes that might be created. */
+
+	/* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
+	   where 4th parameter is s1bytes and 5th - s2bytes
+	 */
+	short snum012[40] = { 0, };	/* s0num, s1num, s2num for 8 cases
+					   0,1 - do not shift and do not shift but bottle
+					   2 - shift only whole item to left
+					   3 - shift to left and bottle as much as possible
+					   4,5 - shift to right (whole items and as much as possible
+					   6,7 - shift to both directions (whole items and as much as possible)
+					 */
+
+	/* Sh is the node whose balance is currently being checked */
+	struct buffer_head *Sh;
+
+	Sh = PATH_H_PBUFFER(tb->tb_path, h);
+	levbytes = tb->insert_size[h];
+
+	/* Calculate balance parameters for creating new root. */
+	if (!Sh) {
+		if (!h)
+			reiserfs_panic(tb->tb_sb, "vs-8210",
+				       "S[0] can not be 0");
+		switch (ret = get_empty_nodes(tb, h)) {
+		case CARRY_ON:
+			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
+			return NO_BALANCING_NEEDED;	/* no balancing for higher levels needed */
+
+		case NO_DISK_SPACE:
+		case REPEAT_SEARCH:
+			return ret;
+		default:
+			reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect "
+				       "return value of get_empty_nodes");
+		}
+	}
+
+	if ((ret = get_parents(tb, h)) != CARRY_ON)	/* get parents of S[h] neighbors. */
+		return ret;
+
+	sfree = B_FREE_SPACE(Sh);
+
+	/* get free space of neighbors */
+	rfree = get_rfree(tb, h);
+	lfree = get_lfree(tb, h);
+
+	if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
+	    NO_BALANCING_NEEDED)
+		/* and new item fits into node S[h] without any shifting */
+		return NO_BALANCING_NEEDED;
+
+	create_virtual_node(tb, h);
+
+	/*
+	   determine maximal number of items we can shift to the left neighbor (in tb structure)
+	   and the maximal number of bytes that can flow to the left neighbor
+	   from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
+	 */
+	check_left(tb, h, lfree);
+
+	/*
+	   determine maximal number of items we can shift to the right neighbor (in tb structure)
+	   and the maximal number of bytes that can flow to the right neighbor
+	   from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
+	 */
+	check_right(tb, h, rfree);
+
+	/* all contents of internal node S[h] can be moved into its
+	   neighbors, S[h] will be removed after balancing */
+	if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
+		int to_r;
+
+		/* Since we are working on internal nodes, and our internal
+		   nodes have fixed size entries, then we can balance by the
+		   number of items rather than the space they consume.  In this
+		   routine we set the left node equal to the right node,
+		   allowing a difference of less than or equal to 1 child
+		   pointer. */
+		to_r =
+		    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
+		     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
+						tb->rnum[h]);
+		set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
+			       -1, -1);
+		return CARRY_ON;
+	}
+
+	/* this checks balance condition, that any two neighboring nodes can not fit in one node */
+	RFALSE(h &&
+	       (tb->lnum[h] >= vn->vn_nr_item + 1 ||
+		tb->rnum[h] >= vn->vn_nr_item + 1),
+	       "vs-8220: tree is not balanced on internal level");
+	RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
+		      (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
+	       "vs-8225: tree is not balanced on leaf level");
+
+	/* all contents of S[0] can be moved into its neighbors
+	   S[0] will be removed after balancing. */
+	if (!h && is_leaf_removable(tb))
+		return CARRY_ON;
+
+	/* why do we perform this check here rather than earlier??
+	   Answer: we can win 1 node in some cases above. Moreover we
+	   checked it above, when we checked, that S[0] is not removable
+	   in principle */
+	if (sfree >= levbytes) {	/* new item fits into node S[h] without any shifting */
+		if (!h)
+			tb->s0num = vn->vn_nr_item;
+		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
+		return NO_BALANCING_NEEDED;
+	}
+
+	{
+		int lpar, rpar, nset, lset, rset, lrset;
+		/*
+		 * regular overflowing of the node
+		 */
+
+		/* get_num_ver works in 2 modes (FLOW & NO_FLOW)
+		   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
+		   nset, lset, rset, lrset - shows, whether flowing items give better packing
+		 */
+#define FLOW 1
+#define NO_FLOW 0		/* do not any splitting */
+
+		/* we choose one the following */
+#define NOTHING_SHIFT_NO_FLOW	0
+#define NOTHING_SHIFT_FLOW	5
+#define LEFT_SHIFT_NO_FLOW	10
+#define LEFT_SHIFT_FLOW		15
+#define RIGHT_SHIFT_NO_FLOW	20
+#define RIGHT_SHIFT_FLOW	25
+#define LR_SHIFT_NO_FLOW	30
+#define LR_SHIFT_FLOW		35
+
+		lpar = tb->lnum[h];
+		rpar = tb->rnum[h];
+
+		/* calculate number of blocks S[h] must be split into when
+		   nothing is shifted to the neighbors,
+		   as well as number of items in each part of the split node (s012 numbers),
+		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
+		nset = NOTHING_SHIFT_NO_FLOW;
+		nver = get_num_ver(vn->vn_mode, tb, h,
+				   0, -1, h ? vn->vn_nr_item : 0, -1,
+				   snum012, NO_FLOW);
+
+		if (!h) {
+			int nver1;
+
+			/* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
+			nver1 = get_num_ver(vn->vn_mode, tb, h,
+					    0, -1, 0, -1,
+					    snum012 + NOTHING_SHIFT_FLOW, FLOW);
+			if (nver > nver1)
+				nset = NOTHING_SHIFT_FLOW, nver = nver1;
+		}
+
+		/* calculate number of blocks S[h] must be split into when
+		   l_shift_num first items and l_shift_bytes of the right most
+		   liquid item to be shifted are shifted to the left neighbor,
+		   as well as number of items in each part of the splitted node (s012 numbers),
+		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+		 */
+		lset = LEFT_SHIFT_NO_FLOW;
+		lnver = get_num_ver(vn->vn_mode, tb, h,
+				    lpar - ((h || tb->lbytes == -1) ? 0 : 1),
+				    -1, h ? vn->vn_nr_item : 0, -1,
+				    snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
+		if (!h) {
+			int lnver1;
+
+			lnver1 = get_num_ver(vn->vn_mode, tb, h,
+					     lpar -
+					     ((tb->lbytes != -1) ? 1 : 0),
+					     tb->lbytes, 0, -1,
+					     snum012 + LEFT_SHIFT_FLOW, FLOW);
+			if (lnver > lnver1)
+				lset = LEFT_SHIFT_FLOW, lnver = lnver1;
+		}
+
+		/* calculate number of blocks S[h] must be split into when
+		   r_shift_num first items and r_shift_bytes of the left most
+		   liquid item to be shifted are shifted to the right neighbor,
+		   as well as number of items in each part of the splitted node (s012 numbers),
+		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+		 */
+		rset = RIGHT_SHIFT_NO_FLOW;
+		rnver = get_num_ver(vn->vn_mode, tb, h,
+				    0, -1,
+				    h ? (vn->vn_nr_item - rpar) : (rpar -
+								   ((tb->
+								     rbytes !=
+								     -1) ? 1 :
+								    0)), -1,
+				    snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
+		if (!h) {
+			int rnver1;
+
+			rnver1 = get_num_ver(vn->vn_mode, tb, h,
+					     0, -1,
+					     (rpar -
+					      ((tb->rbytes != -1) ? 1 : 0)),
+					     tb->rbytes,
+					     snum012 + RIGHT_SHIFT_FLOW, FLOW);
+
+			if (rnver > rnver1)
+				rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
+		}
+
+		/* calculate number of blocks S[h] must be split into when
+		   items are shifted in both directions,
+		   as well as number of items in each part of the splitted node (s012 numbers),
+		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+		 */
+		lrset = LR_SHIFT_NO_FLOW;
+		lrnver = get_num_ver(vn->vn_mode, tb, h,
+				     lpar - ((h || tb->lbytes == -1) ? 0 : 1),
+				     -1,
+				     h ? (vn->vn_nr_item - rpar) : (rpar -
+								    ((tb->
+								      rbytes !=
+								      -1) ? 1 :
+								     0)), -1,
+				     snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
+		if (!h) {
+			int lrnver1;
+
+			lrnver1 = get_num_ver(vn->vn_mode, tb, h,
+					      lpar -
+					      ((tb->lbytes != -1) ? 1 : 0),
+					      tb->lbytes,
+					      (rpar -
+					       ((tb->rbytes != -1) ? 1 : 0)),
+					      tb->rbytes,
+					      snum012 + LR_SHIFT_FLOW, FLOW);
+			if (lrnver > lrnver1)
+				lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
+		}
+
+		/* Our general shifting strategy is:
+		   1) to minimized number of new nodes;
+		   2) to minimized number of neighbors involved in shifting;
+		   3) to minimized number of disk reads; */
+
+		/* we can win TWO or ONE nodes by shifting in both directions */
+		if (lrnver < lnver && lrnver < rnver) {
+			RFALSE(h &&
+			       (tb->lnum[h] != 1 ||
+				tb->rnum[h] != 1 ||
+				lrnver != 1 || rnver != 2 || lnver != 2
+				|| h != 1), "vs-8230: bad h");
+			if (lrset == LR_SHIFT_FLOW)
+				set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
+					       lrnver, snum012 + lrset,
+					       tb->lbytes, tb->rbytes);
+			else
+				set_parameters(tb, h,
+					       tb->lnum[h] -
+					       ((tb->lbytes == -1) ? 0 : 1),
+					       tb->rnum[h] -
+					       ((tb->rbytes == -1) ? 0 : 1),
+					       lrnver, snum012 + lrset, -1, -1);
+
+			return CARRY_ON;
+		}
+
+		/* if shifting doesn't lead to better packing then don't shift */
+		if (nver == lrnver) {
+			set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
+				       -1);
+			return CARRY_ON;
+		}
+
+		/* now we know that for better packing shifting in only one
+		   direction either to the left or to the right is required */
+
+		/*  if shifting to the left is better than shifting to the right */
+		if (lnver < rnver) {
+			SET_PAR_SHIFT_LEFT;
+			return CARRY_ON;
+		}
+
+		/* if shifting to the right is better than shifting to the left */
+		if (lnver > rnver) {
+			SET_PAR_SHIFT_RIGHT;
+			return CARRY_ON;
+		}
+
+		/* now shifting in either direction gives the same number
+		   of nodes and we can make use of the cached neighbors */
+		if (is_left_neighbor_in_cache(tb, h)) {
+			SET_PAR_SHIFT_LEFT;
+			return CARRY_ON;
+		}
+
+		/* shift to the right independently on whether the right neighbor in cache or not */
+		SET_PAR_SHIFT_RIGHT;
+		return CARRY_ON;
+	}
+}
+
+/* Check whether current node S[h] is balanced when Decreasing its size by
+ * Deleting or Cutting for INTERNAL node of S+tree.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	i - insert, p - paste;
+ * Returns:	1 - schedule occurred;
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ *
+ * Note: Items of internal nodes have fixed size, so the balance condition for
+ * the internal part of S+tree is as for the B-trees.
+ */
+static int dc_check_balance_internal(struct tree_balance *tb, int h)
+{
+	struct virtual_node *vn = tb->tb_vn;
+
+	/* Sh is the node whose balance is currently being checked,
+	   and Fh is its father.  */
+	struct buffer_head *Sh, *Fh;
+	int maxsize, ret;
+	int lfree, rfree /* free space in L and R */ ;
+
+	Sh = PATH_H_PBUFFER(tb->tb_path, h);
+	Fh = PATH_H_PPARENT(tb->tb_path, h);
+
+	maxsize = MAX_CHILD_SIZE(Sh);
+
+/*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
+/*   new_nr_item = number of items node would have if operation is */
+/* 	performed without balancing (new_nr_item); */
+	create_virtual_node(tb, h);
+
+	if (!Fh) {		/* S[h] is the root. */
+		if (vn->vn_nr_item > 0) {
+			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
+			return NO_BALANCING_NEEDED;	/* no balancing for higher levels needed */
+		}
+		/* new_nr_item == 0.
+		 * Current root will be deleted resulting in
+		 * decrementing the tree height. */
+		set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
+		return CARRY_ON;
+	}
+
+	if ((ret = get_parents(tb, h)) != CARRY_ON)
+		return ret;
+
+	/* get free space of neighbors */
+	rfree = get_rfree(tb, h);
+	lfree = get_lfree(tb, h);
+
+	/* determine maximal number of items we can fit into neighbors */
+	check_left(tb, h, lfree);
+	check_right(tb, h, rfree);
+
+	if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) {	/* Balance condition for the internal node is valid.
+						 * In this case we balance only if it leads to better packing. */
+		if (vn->vn_nr_item == MIN_NR_KEY(Sh)) {	/* Here we join S[h] with one of its neighbors,
+							 * which is impossible with greater values of new_nr_item. */
+			if (tb->lnum[h] >= vn->vn_nr_item + 1) {
+				/* All contents of S[h] can be moved to L[h]. */
+				int n;
+				int order_L;
+
+				order_L =
+				    ((n =
+				      PATH_H_B_ITEM_ORDER(tb->tb_path,
+							  h)) ==
+				     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
+				n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
+				    (DC_SIZE + KEY_SIZE);
+				set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
+					       -1);
+				return CARRY_ON;
+			}
+
+			if (tb->rnum[h] >= vn->vn_nr_item + 1) {
+				/* All contents of S[h] can be moved to R[h]. */
+				int n;
+				int order_R;
+
+				order_R =
+				    ((n =
+				      PATH_H_B_ITEM_ORDER(tb->tb_path,
+							  h)) ==
+				     B_NR_ITEMS(Fh)) ? 0 : n + 1;
+				n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
+				    (DC_SIZE + KEY_SIZE);
+				set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
+					       -1);
+				return CARRY_ON;
+			}
+		}
+
+		if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
+			/* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
+			int to_r;
+
+			to_r =
+			    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
+			     tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
+			    (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
+			set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
+				       0, NULL, -1, -1);
+			return CARRY_ON;
+		}
+
+		/* Balancing does not lead to better packing. */
+		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
+		return NO_BALANCING_NEEDED;
+	}
+
+	/* Current node contain insufficient number of items. Balancing is required. */
+	/* Check whether we can merge S[h] with left neighbor. */
+	if (tb->lnum[h] >= vn->vn_nr_item + 1)
+		if (is_left_neighbor_in_cache(tb, h)
+		    || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
+			int n;
+			int order_L;
+
+			order_L =
+			    ((n =
+			      PATH_H_B_ITEM_ORDER(tb->tb_path,
+						  h)) ==
+			     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
+			n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
+								      KEY_SIZE);
+			set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
+			return CARRY_ON;
+		}
+
+	/* Check whether we can merge S[h] with right neighbor. */
+	if (tb->rnum[h] >= vn->vn_nr_item + 1) {
+		int n;
+		int order_R;
+
+		order_R =
+		    ((n =
+		      PATH_H_B_ITEM_ORDER(tb->tb_path,
+					  h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
+		n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
+							      KEY_SIZE);
+		set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
+		return CARRY_ON;
+	}
+
+	/* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
+	if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
+		int to_r;
+
+		to_r =
+		    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
+		     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
+						tb->rnum[h]);
+		set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
+			       -1, -1);
+		return CARRY_ON;
+	}
+
+	/* For internal nodes try to borrow item from a neighbor */
+	RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
+
+	/* Borrow one or two items from caching neighbor */
+	if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
+		int from_l;
+
+		from_l =
+		    (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
+		     1) / 2 - (vn->vn_nr_item + 1);
+		set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
+		return CARRY_ON;
+	}
+
+	set_parameters(tb, h, 0,
+		       -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
+			  1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
+	return CARRY_ON;
+}
+
+/* Check whether current node S[h] is balanced when Decreasing its size by
+ * Deleting or Truncating for LEAF node of S+tree.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	i - insert, p - paste;
+ * Returns:	1 - schedule occurred;
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ */
+static int dc_check_balance_leaf(struct tree_balance *tb, int h)
+{
+	struct virtual_node *vn = tb->tb_vn;
+
+	/* Number of bytes that must be deleted from
+	   (value is negative if bytes are deleted) buffer which
+	   contains node being balanced.  The mnemonic is that the
+	   attempted change in node space used level is levbytes bytes. */
+	int levbytes;
+	/* the maximal item size */
+	int maxsize, ret;
+	/* S0 is the node whose balance is currently being checked,
+	   and F0 is its father.  */
+	struct buffer_head *S0, *F0;
+	int lfree, rfree /* free space in L and R */ ;
+
+	S0 = PATH_H_PBUFFER(tb->tb_path, 0);
+	F0 = PATH_H_PPARENT(tb->tb_path, 0);
+
+	levbytes = tb->insert_size[h];
+
+	maxsize = MAX_CHILD_SIZE(S0);	/* maximal possible size of an item */
+
+	if (!F0) {		/* S[0] is the root now. */
+
+		RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
+		       "vs-8240: attempt to create empty buffer tree");
+
+		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
+		return NO_BALANCING_NEEDED;
+	}
+
+	if ((ret = get_parents(tb, h)) != CARRY_ON)
+		return ret;
+
+	/* get free space of neighbors */
+	rfree = get_rfree(tb, h);
+	lfree = get_lfree(tb, h);
+
+	create_virtual_node(tb, h);
+
+	/* if 3 leaves can be merge to one, set parameters and return */
+	if (are_leaves_removable(tb, lfree, rfree))
+		return CARRY_ON;
+
+	/* determine maximal number of items we can shift to the left/right  neighbor
+	   and the maximal number of bytes that can flow to the left/right neighbor
+	   from the left/right most liquid item that cannot be shifted from S[0] entirely
+	 */
+	check_left(tb, h, lfree);
+	check_right(tb, h, rfree);
+
+	/* check whether we can merge S with left neighbor. */
+	if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
+		if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) ||	/* S can not be merged with R */
+		    !tb->FR[h]) {
+
+			RFALSE(!tb->FL[h],
+			       "vs-8245: dc_check_balance_leaf: FL[h] must exist");
+
+			/* set parameter to merge S[0] with its left neighbor */
+			set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
+			return CARRY_ON;
+		}
+
+	/* check whether we can merge S[0] with right neighbor. */
+	if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
+		set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
+		return CARRY_ON;
+	}
+
+	/* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
+	if (is_leaf_removable(tb))
+		return CARRY_ON;
+
+	/* Balancing is not required. */
+	tb->s0num = vn->vn_nr_item;
+	set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
+	return NO_BALANCING_NEEDED;
+}
+
+/* Check whether current node S[h] is balanced when Decreasing its size by
+ * Deleting or Cutting.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	d - delete, c - cut.
+ * Returns:	1 - schedule occurred;
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ */
+static int dc_check_balance(struct tree_balance *tb, int h)
+{
+	RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
+	       "vs-8250: S is not initialized");
+
+	if (h)
+		return dc_check_balance_internal(tb, h);
+	else
+		return dc_check_balance_leaf(tb, h);
+}
+
+/* Check whether current node S[h] is balanced.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *
+ *	tb	tree_balance structure:
+ *
+ *              tb is a large structure that must be read about in the header file
+ *              at the same time as this procedure if the reader is to successfully
+ *              understand this procedure
+ *
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	i - insert, p - paste, d - delete, c - cut.
+ * Returns:	1 - schedule occurred;
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ */
+static int check_balance(int mode,
+			 struct tree_balance *tb,
+			 int h,
+			 int inum,
+			 int pos_in_item,
+			 struct item_head *ins_ih, const void *data)
+{
+	struct virtual_node *vn;
+
+	vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
+	vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
+	vn->vn_mode = mode;
+	vn->vn_affected_item_num = inum;
+	vn->vn_pos_in_item = pos_in_item;
+	vn->vn_ins_ih = ins_ih;
+	vn->vn_data = data;
+
+	RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
+	       "vs-8255: ins_ih can not be 0 in insert mode");
+
+	if (tb->insert_size[h] > 0)
+		/* Calculate balance parameters when size of node is increasing. */
+		return ip_check_balance(tb, h);
+
+	/* Calculate balance parameters when  size of node is decreasing. */
+	return dc_check_balance(tb, h);
+}
+
+/* Check whether parent at the path is the really parent of the current node.*/
+static int get_direct_parent(struct tree_balance *tb, int h)
+{
+	struct buffer_head *bh;
+	struct treepath *path = tb->tb_path;
+	int position,
+	    path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
+
+	/* We are in the root or in the new root. */
+	if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
+
+		RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
+		       "PAP-8260: invalid offset in the path");
+
+		if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)->
+		    b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) {
+			/* Root is not changed. */
+			PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL;
+			PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
+			return CARRY_ON;
+		}
+		return REPEAT_SEARCH;	/* Root is changed and we must recalculate the path. */
+	}
+
+	if (!B_IS_IN_TREE
+	    (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
+		return REPEAT_SEARCH;	/* Parent in the path is not in the tree. */
+
+	if ((position =
+	     PATH_OFFSET_POSITION(path,
+				  path_offset - 1)) > B_NR_ITEMS(bh))
+		return REPEAT_SEARCH;
+
+	if (B_N_CHILD_NUM(bh, position) !=
+	    PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
+		/* Parent in the path is not parent of the current node in the tree. */
+		return REPEAT_SEARCH;
+
+	if (buffer_locked(bh)) {
+		reiserfs_write_unlock(tb->tb_sb);
+		__wait_on_buffer(bh);
+		reiserfs_write_lock(tb->tb_sb);
+		if (FILESYSTEM_CHANGED_TB(tb))
+			return REPEAT_SEARCH;
+	}
+
+	return CARRY_ON;	/* Parent in the path is unlocked and really parent of the current node.  */
+}
+
+/* Using lnum[h] and rnum[h] we should determine what neighbors
+ * of S[h] we
+ * need in order to balance S[h], and get them if necessary.
+ * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
+ *	        CARRY_ON - schedule didn't occur while the function worked;
+ */
+static int get_neighbors(struct tree_balance *tb, int h)
+{
+	int child_position,
+	    path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1);
+	unsigned long son_number;
+	struct super_block *sb = tb->tb_sb;
+	struct buffer_head *bh;
+
+	PROC_INFO_INC(sb, get_neighbors[h]);
+
+	if (tb->lnum[h]) {
+		/* We need left neighbor to balance S[h]. */
+		PROC_INFO_INC(sb, need_l_neighbor[h]);
+		bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
+
+		RFALSE(bh == tb->FL[h] &&
+		       !PATH_OFFSET_POSITION(tb->tb_path, path_offset),
+		       "PAP-8270: invalid position in the parent");
+
+		child_position =
+		    (bh ==
+		     tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
+								       FL[h]);
+		son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
+		reiserfs_write_unlock(sb);
+		bh = sb_bread(sb, son_number);
+		reiserfs_write_lock(sb);
+		if (!bh)
+			return IO_ERROR;
+		if (FILESYSTEM_CHANGED_TB(tb)) {
+			brelse(bh);
+			PROC_INFO_INC(sb, get_neighbors_restart[h]);
+			return REPEAT_SEARCH;
+		}
+
+		RFALSE(!B_IS_IN_TREE(tb->FL[h]) ||
+		       child_position > B_NR_ITEMS(tb->FL[h]) ||
+		       B_N_CHILD_NUM(tb->FL[h], child_position) !=
+		       bh->b_blocknr, "PAP-8275: invalid parent");
+		RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child");
+		RFALSE(!h &&
+		       B_FREE_SPACE(bh) !=
+		       MAX_CHILD_SIZE(bh) -
+		       dc_size(B_N_CHILD(tb->FL[0], child_position)),
+		       "PAP-8290: invalid child size of left neighbor");
+
+		brelse(tb->L[h]);
+		tb->L[h] = bh;
+	}
+
+	/* We need right neighbor to balance S[path_offset]. */
+	if (tb->rnum[h]) {	/* We need right neighbor to balance S[path_offset]. */
+		PROC_INFO_INC(sb, need_r_neighbor[h]);
+		bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
+
+		RFALSE(bh == tb->FR[h] &&
+		       PATH_OFFSET_POSITION(tb->tb_path,
+					    path_offset) >=
+		       B_NR_ITEMS(bh),
+		       "PAP-8295: invalid position in the parent");
+
+		child_position =
+		    (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
+		son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
+		reiserfs_write_unlock(sb);
+		bh = sb_bread(sb, son_number);
+		reiserfs_write_lock(sb);
+		if (!bh)
+			return IO_ERROR;
+		if (FILESYSTEM_CHANGED_TB(tb)) {
+			brelse(bh);
+			PROC_INFO_INC(sb, get_neighbors_restart[h]);
+			return REPEAT_SEARCH;
+		}
+		brelse(tb->R[h]);
+		tb->R[h] = bh;
+
+		RFALSE(!h
+		       && B_FREE_SPACE(bh) !=
+		       MAX_CHILD_SIZE(bh) -
+		       dc_size(B_N_CHILD(tb->FR[0], child_position)),
+		       "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
+		       B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh),
+		       dc_size(B_N_CHILD(tb->FR[0], child_position)));
+
+	}
+	return CARRY_ON;
+}
+
+static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
+{
+	int max_num_of_items;
+	int max_num_of_entries;
+	unsigned long blocksize = sb->s_blocksize;
+
+#define MIN_NAME_LEN 1
+
+	max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
+	max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
+	    (DEH_SIZE + MIN_NAME_LEN);
+
+	return sizeof(struct virtual_node) +
+	    max(max_num_of_items * sizeof(struct virtual_item),
+		sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
+		(max_num_of_entries - 1) * sizeof(__u16));
+}
+
+/* maybe we should fail balancing we are going to perform when kmalloc
+   fails several times. But now it will loop until kmalloc gets
+   required memory */
+static int get_mem_for_virtual_node(struct tree_balance *tb)
+{
+	int check_fs = 0;
+	int size;
+	char *buf;
+
+	size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
+
+	if (size > tb->vn_buf_size) {
+		/* we have to allocate more memory for virtual node */
+		if (tb->vn_buf) {
+			/* free memory allocated before */
+			kfree(tb->vn_buf);
+			/* this is not needed if kfree is atomic */
+			check_fs = 1;
+		}
+
+		/* virtual node requires now more memory */
+		tb->vn_buf_size = size;
+
+		/* get memory for virtual item */
+		buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
+		if (!buf) {
+			/* getting memory with GFP_KERNEL priority may involve
+			   balancing now (due to indirect_to_direct conversion on
+			   dcache shrinking). So, release path and collected
+			   resources here */
+			free_buffers_in_tb(tb);
+			buf = kmalloc(size, GFP_NOFS);
+			if (!buf) {
+				tb->vn_buf_size = 0;
+			}
+			tb->vn_buf = buf;
+			schedule();
+			return REPEAT_SEARCH;
+		}
+
+		tb->vn_buf = buf;
+	}
+
+	if (check_fs && FILESYSTEM_CHANGED_TB(tb))
+		return REPEAT_SEARCH;
+
+	return CARRY_ON;
+}
+
+#ifdef CONFIG_REISERFS_CHECK
+static void tb_buffer_sanity_check(struct super_block *sb,
+				   struct buffer_head *bh,
+				   const char *descr, int level)
+{
+	if (bh) {
+		if (atomic_read(&(bh->b_count)) <= 0)
+
+			reiserfs_panic(sb, "jmacd-1", "negative or zero "
+				       "reference counter for buffer %s[%d] "
+				       "(%b)", descr, level, bh);
+
+		if (!buffer_uptodate(bh))
+			reiserfs_panic(sb, "jmacd-2", "buffer is not up "
+				       "to date %s[%d] (%b)",
+				       descr, level, bh);
+
+		if (!B_IS_IN_TREE(bh))
+			reiserfs_panic(sb, "jmacd-3", "buffer is not "
+				       "in tree %s[%d] (%b)",
+				       descr, level, bh);
+
+		if (bh->b_bdev != sb->s_bdev)
+			reiserfs_panic(sb, "jmacd-4", "buffer has wrong "
+				       "device %s[%d] (%b)",
+				       descr, level, bh);
+
+		if (bh->b_size != sb->s_blocksize)
+			reiserfs_panic(sb, "jmacd-5", "buffer has wrong "
+				       "blocksize %s[%d] (%b)",
+				       descr, level, bh);
+
+		if (bh->b_blocknr > SB_BLOCK_COUNT(sb))
+			reiserfs_panic(sb, "jmacd-6", "buffer block "
+				       "number too high %s[%d] (%b)",
+				       descr, level, bh);
+	}
+}
+#else
+static void tb_buffer_sanity_check(struct super_block *sb,
+				   struct buffer_head *bh,
+				   const char *descr, int level)
+{;
+}
+#endif
+
+static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
+{
+	return reiserfs_prepare_for_journal(s, bh, 0);
+}
+
+static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
+{
+	struct buffer_head *locked;
+#ifdef CONFIG_REISERFS_CHECK
+	int repeat_counter = 0;
+#endif
+	int i;
+
+	do {
+
+		locked = NULL;
+
+		for (i = tb->tb_path->path_length;
+		     !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
+			if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
+				/* if I understand correctly, we can only be sure the last buffer
+				 ** in the path is in the tree --clm
+				 */
+#ifdef CONFIG_REISERFS_CHECK
+				if (PATH_PLAST_BUFFER(tb->tb_path) ==
+				    PATH_OFFSET_PBUFFER(tb->tb_path, i))
+					tb_buffer_sanity_check(tb->tb_sb,
+							       PATH_OFFSET_PBUFFER
+							       (tb->tb_path,
+								i), "S",
+							       tb->tb_path->
+							       path_length - i);
+#endif
+				if (!clear_all_dirty_bits(tb->tb_sb,
+							  PATH_OFFSET_PBUFFER
+							  (tb->tb_path,
+							   i))) {
+					locked =
+					    PATH_OFFSET_PBUFFER(tb->tb_path,
+								i);
+				}
+			}
+		}
+
+		for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i];
+		     i++) {
+
+			if (tb->lnum[i]) {
+
+				if (tb->L[i]) {
+					tb_buffer_sanity_check(tb->tb_sb,
+							       tb->L[i],
+							       "L", i);
+					if (!clear_all_dirty_bits
+					    (tb->tb_sb, tb->L[i]))
+						locked = tb->L[i];
+				}
+
+				if (!locked && tb->FL[i]) {
+					tb_buffer_sanity_check(tb->tb_sb,
+							       tb->FL[i],
+							       "FL", i);
+					if (!clear_all_dirty_bits
+					    (tb->tb_sb, tb->FL[i]))
+						locked = tb->FL[i];
+				}
+
+				if (!locked && tb->CFL[i]) {
+					tb_buffer_sanity_check(tb->tb_sb,
+							       tb->CFL[i],
+							       "CFL", i);
+					if (!clear_all_dirty_bits
+					    (tb->tb_sb, tb->CFL[i]))
+						locked = tb->CFL[i];
+				}
+
+			}
+
+			if (!locked && (tb->rnum[i])) {
+
+				if (tb->R[i]) {
+					tb_buffer_sanity_check(tb->tb_sb,
+							       tb->R[i],
+							       "R", i);
+					if (!clear_all_dirty_bits
+					    (tb->tb_sb, tb->R[i]))
+						locked = tb->R[i];
+				}
+
+				if (!locked && tb->FR[i]) {
+					tb_buffer_sanity_check(tb->tb_sb,
+							       tb->FR[i],
+							       "FR", i);
+					if (!clear_all_dirty_bits
+					    (tb->tb_sb, tb->FR[i]))
+						locked = tb->FR[i];
+				}
+
+				if (!locked && tb->CFR[i]) {
+					tb_buffer_sanity_check(tb->tb_sb,
+							       tb->CFR[i],
+							       "CFR", i);
+					if (!clear_all_dirty_bits
+					    (tb->tb_sb, tb->CFR[i]))
+						locked = tb->CFR[i];
+				}
+			}
+		}
+		/* as far as I can tell, this is not required.  The FEB list seems
+		 ** to be full of newly allocated nodes, which will never be locked,
+		 ** dirty, or anything else.
+		 ** To be safe, I'm putting in the checks and waits in.  For the moment,
+		 ** they are needed to keep the code in journal.c from complaining
+		 ** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
+		 ** --clm
+		 */
+		for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
+			if (tb->FEB[i]) {
+				if (!clear_all_dirty_bits
+				    (tb->tb_sb, tb->FEB[i]))
+					locked = tb->FEB[i];
+			}
+		}
+
+		if (locked) {
+#ifdef CONFIG_REISERFS_CHECK
+			repeat_counter++;
+			if ((repeat_counter % 10000) == 0) {
+				reiserfs_warning(tb->tb_sb, "reiserfs-8200",
+						 "too many iterations waiting "
+						 "for buffer to unlock "
+						 "(%b)", locked);
+
+				/* Don't loop forever.  Try to recover from possible error. */
+
+				return (FILESYSTEM_CHANGED_TB(tb)) ?
+				    REPEAT_SEARCH : CARRY_ON;
+			}
+#endif
+			reiserfs_write_unlock(tb->tb_sb);
+			__wait_on_buffer(locked);
+			reiserfs_write_lock(tb->tb_sb);
+			if (FILESYSTEM_CHANGED_TB(tb))
+				return REPEAT_SEARCH;
+		}
+
+	} while (locked);
+
+	return CARRY_ON;
+}
+
+/* Prepare for balancing, that is
+ *	get all necessary parents, and neighbors;
+ *	analyze what and where should be moved;
+ *	get sufficient number of new nodes;
+ * Balancing will start only after all resources will be collected at a time.
+ *
+ * When ported to SMP kernels, only at the last moment after all needed nodes
+ * are collected in cache, will the resources be locked using the usual
+ * textbook ordered lock acquisition algorithms.  Note that ensuring that
+ * this code neither write locks what it does not need to write lock nor locks out of order
+ * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
+ *
+ * fix is meant in the sense of render unchanging
+ *
+ * Latency might be improved by first gathering a list of what buffers are needed
+ * and then getting as many of them in parallel as possible? -Hans
+ *
+ * Parameters:
+ *	op_mode	i - insert, d - delete, c - cut (truncate), p - paste (append)
+ *	tb	tree_balance structure;
+ *	inum	item number in S[h];
+ *      pos_in_item - comment this if you can
+ *      ins_ih	item head of item being inserted
+ *	data	inserted item or data to be pasted
+ * Returns:	1 - schedule occurred while the function worked;
+ *	        0 - schedule didn't occur while the function worked;
+ *             -1 - if no_disk_space
+ */
+
+int fix_nodes(int op_mode, struct tree_balance *tb,
+	      struct item_head *ins_ih, const void *data)
+{
+	int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
+	int pos_in_item;
+
+	/* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
+	 ** during wait_tb_buffers_run
+	 */
+	int wait_tb_buffers_run = 0;
+	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
+
+	++REISERFS_SB(tb->tb_sb)->s_fix_nodes;
+
+	pos_in_item = tb->tb_path->pos_in_item;
+
+	tb->fs_gen = get_generation(tb->tb_sb);
+
+	/* we prepare and log the super here so it will already be in the
+	 ** transaction when do_balance needs to change it.
+	 ** This way do_balance won't have to schedule when trying to prepare
+	 ** the super for logging
+	 */
+	reiserfs_prepare_for_journal(tb->tb_sb,
+				     SB_BUFFER_WITH_SB(tb->tb_sb), 1);
+	journal_mark_dirty(tb->transaction_handle, tb->tb_sb,
+			   SB_BUFFER_WITH_SB(tb->tb_sb));
+	if (FILESYSTEM_CHANGED_TB(tb))
+		return REPEAT_SEARCH;
+
+	/* if it possible in indirect_to_direct conversion */
+	if (buffer_locked(tbS0)) {
+		reiserfs_write_unlock(tb->tb_sb);
+		__wait_on_buffer(tbS0);
+		reiserfs_write_lock(tb->tb_sb);
+		if (FILESYSTEM_CHANGED_TB(tb))
+			return REPEAT_SEARCH;
+	}
+#ifdef CONFIG_REISERFS_CHECK
+	if (REISERFS_SB(tb->tb_sb)->cur_tb) {
+		print_cur_tb("fix_nodes");
+		reiserfs_panic(tb->tb_sb, "PAP-8305",
+			       "there is pending do_balance");
+	}
+
+	if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0))
+		reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is "
+			       "not uptodate at the beginning of fix_nodes "
+			       "or not in tree (mode %c)",
+			       tbS0, tbS0, op_mode);
+
+	/* Check parameters. */
+	switch (op_mode) {
+	case M_INSERT:
+		if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0))
+			reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect "
+				       "item number %d (in S0 - %d) in case "
+				       "of insert", item_num,
+				       B_NR_ITEMS(tbS0));
+		break;
+	case M_PASTE:
+	case M_DELETE:
+	case M_CUT:
+		if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) {
+			print_block(tbS0, 0, -1, -1);
+			reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect "
+				       "item number(%d); mode = %c "
+				       "insert_size = %d",
+				       item_num, op_mode,
+				       tb->insert_size[0]);
+		}
+		break;
+	default:
+		reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode "
+			       "of operation");
+	}
+#endif
+
+	if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
+		// FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
+		return REPEAT_SEARCH;
+
+	/* Starting from the leaf level; for all levels h of the tree. */
+	for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) {
+		ret = get_direct_parent(tb, h);
+		if (ret != CARRY_ON)
+			goto repeat;
+
+		ret = check_balance(op_mode, tb, h, item_num,
+				    pos_in_item, ins_ih, data);
+		if (ret != CARRY_ON) {
+			if (ret == NO_BALANCING_NEEDED) {
+				/* No balancing for higher levels needed. */
+				ret = get_neighbors(tb, h);
+				if (ret != CARRY_ON)
+					goto repeat;
+				if (h != MAX_HEIGHT - 1)
+					tb->insert_size[h + 1] = 0;
+				/* ok, analysis and resource gathering are complete */
+				break;
+			}
+			goto repeat;
+		}
+
+		ret = get_neighbors(tb, h);
+		if (ret != CARRY_ON)
+			goto repeat;
+
+		/* No disk space, or schedule occurred and analysis may be
+		 * invalid and needs to be redone. */
+		ret = get_empty_nodes(tb, h);
+		if (ret != CARRY_ON)
+			goto repeat;
+
+		if (!PATH_H_PBUFFER(tb->tb_path, h)) {
+			/* We have a positive insert size but no nodes exist on this
+			   level, this means that we are creating a new root. */
+
+			RFALSE(tb->blknum[h] != 1,
+			       "PAP-8350: creating new empty root");
+
+			if (h < MAX_HEIGHT - 1)
+				tb->insert_size[h + 1] = 0;
+		} else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
+			if (tb->blknum[h] > 1) {
+				/* The tree needs to be grown, so this node S[h]
+				   which is the root node is split into two nodes,
+				   and a new node (S[h+1]) will be created to
+				   become the root node.  */
+
+				RFALSE(h == MAX_HEIGHT - 1,
+				       "PAP-8355: attempt to create too high of a tree");
+
+				tb->insert_size[h + 1] =
+				    (DC_SIZE +
+				     KEY_SIZE) * (tb->blknum[h] - 1) +
+				    DC_SIZE;
+			} else if (h < MAX_HEIGHT - 1)
+				tb->insert_size[h + 1] = 0;
+		} else
+			tb->insert_size[h + 1] =
+			    (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1);
+	}
+
+	ret = wait_tb_buffers_until_unlocked(tb);
+	if (ret == CARRY_ON) {
+		if (FILESYSTEM_CHANGED_TB(tb)) {
+			wait_tb_buffers_run = 1;
+			ret = REPEAT_SEARCH;
+			goto repeat;
+		} else {
+			return CARRY_ON;
+		}
+	} else {
+		wait_tb_buffers_run = 1;
+		goto repeat;
+	}
+
+      repeat:
+	// fix_nodes was unable to perform its calculation due to
+	// filesystem got changed under us, lack of free disk space or i/o
+	// failure. If the first is the case - the search will be
+	// repeated. For now - free all resources acquired so far except
+	// for the new allocated nodes
+	{
+		int i;
+
+		/* Release path buffers. */
+		if (wait_tb_buffers_run) {
+			pathrelse_and_restore(tb->tb_sb, tb->tb_path);
+		} else {
+			pathrelse(tb->tb_path);
+		}
+		/* brelse all resources collected for balancing */
+		for (i = 0; i < MAX_HEIGHT; i++) {
+			if (wait_tb_buffers_run) {
+				reiserfs_restore_prepared_buffer(tb->tb_sb,
+								 tb->L[i]);
+				reiserfs_restore_prepared_buffer(tb->tb_sb,
+								 tb->R[i]);
+				reiserfs_restore_prepared_buffer(tb->tb_sb,
+								 tb->FL[i]);
+				reiserfs_restore_prepared_buffer(tb->tb_sb,
+								 tb->FR[i]);
+				reiserfs_restore_prepared_buffer(tb->tb_sb,
+								 tb->
+								 CFL[i]);
+				reiserfs_restore_prepared_buffer(tb->tb_sb,
+								 tb->
+								 CFR[i]);
+			}
+
+			brelse(tb->L[i]);
+			brelse(tb->R[i]);
+			brelse(tb->FL[i]);
+			brelse(tb->FR[i]);
+			brelse(tb->CFL[i]);
+			brelse(tb->CFR[i]);
+
+			tb->L[i] = NULL;
+			tb->R[i] = NULL;
+			tb->FL[i] = NULL;
+			tb->FR[i] = NULL;
+			tb->CFL[i] = NULL;
+			tb->CFR[i] = NULL;
+		}
+
+		if (wait_tb_buffers_run) {
+			for (i = 0; i < MAX_FEB_SIZE; i++) {
+				if (tb->FEB[i])
+					reiserfs_restore_prepared_buffer
+					    (tb->tb_sb, tb->FEB[i]);
+			}
+		}
+		return ret;
+	}
+
+}
+
+/* Anatoly will probably forgive me renaming tb to tb. I just
+   wanted to make lines shorter */
+void unfix_nodes(struct tree_balance *tb)
+{
+	int i;
+
+	/* Release path buffers. */
+	pathrelse_and_restore(tb->tb_sb, tb->tb_path);
+
+	/* brelse all resources collected for balancing */
+	for (i = 0; i < MAX_HEIGHT; i++) {
+		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
+		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
+		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
+		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
+		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
+		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
+
+		brelse(tb->L[i]);
+		brelse(tb->R[i]);
+		brelse(tb->FL[i]);
+		brelse(tb->FR[i]);
+		brelse(tb->CFL[i]);
+		brelse(tb->CFR[i]);
+	}
+
+	/* deal with list of allocated (used and unused) nodes */
+	for (i = 0; i < MAX_FEB_SIZE; i++) {
+		if (tb->FEB[i]) {
+			b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
+			/* de-allocated block which was not used by balancing and
+			   bforget about buffer for it */
+			brelse(tb->FEB[i]);
+			reiserfs_free_block(tb->transaction_handle, NULL,
+					    blocknr, 0);
+		}
+		if (tb->used[i]) {
+			/* release used as new nodes including a new root */
+			brelse(tb->used[i]);
+		}
+	}
+
+	kfree(tb->vn_buf);
+
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/hashes.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/hashes.c
new file mode 100644
index 0000000..91b0cc1
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/hashes.c

@@ -0,0 +1,182 @@
+
+/*
+ * Keyed 32-bit hash function using TEA in a Davis-Meyer function
+ *   H0 = Key
+ *   Hi = E Mi(Hi-1) + Hi-1
+ *
+ * (see Applied Cryptography, 2nd edition, p448).
+ *
+ * Jeremy Fitzhardinge <jeremy@zip.com.au> 1998
+ *
+ * Jeremy has agreed to the contents of reiserfs/README. -Hans
+ * Yura's function is added (04/07/2000)
+ */
+
+//
+// keyed_hash
+// yura_hash
+// r5_hash
+//
+
+#include <linux/kernel.h>
+#include "reiserfs.h"
+#include <asm/types.h>
+
+#define DELTA 0x9E3779B9
+#define FULLROUNDS 10		/* 32 is overkill, 16 is strong crypto */
+#define PARTROUNDS 6		/* 6 gets complete mixing */
+
+/* a, b, c, d - data; h0, h1 - accumulated hash */
+#define TEACORE(rounds)							\
+	do {								\
+		u32 sum = 0;						\
+		int n = rounds;						\
+		u32 b0, b1;						\
+									\
+		b0 = h0;						\
+		b1 = h1;						\
+									\
+		do							\
+		{							\
+			sum += DELTA;					\
+			b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);	\
+			b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);	\
+		} while(--n);						\
+									\
+		h0 += b0;						\
+		h1 += b1;						\
+	} while(0)
+
+u32 keyed_hash(const signed char *msg, int len)
+{
+	u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3 };
+
+	u32 h0 = k[0], h1 = k[1];
+	u32 a, b, c, d;
+	u32 pad;
+	int i;
+
+	//      assert(len >= 0 && len < 256);
+
+	pad = (u32) len | ((u32) len << 8);
+	pad |= pad << 16;
+
+	while (len >= 16) {
+		a = (u32) msg[0] |
+		    (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
+		b = (u32) msg[4] |
+		    (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
+		c = (u32) msg[8] |
+		    (u32) msg[9] << 8 |
+		    (u32) msg[10] << 16 | (u32) msg[11] << 24;
+		d = (u32) msg[12] |
+		    (u32) msg[13] << 8 |
+		    (u32) msg[14] << 16 | (u32) msg[15] << 24;
+
+		TEACORE(PARTROUNDS);
+
+		len -= 16;
+		msg += 16;
+	}
+
+	if (len >= 12) {
+		a = (u32) msg[0] |
+		    (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
+		b = (u32) msg[4] |
+		    (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
+		c = (u32) msg[8] |
+		    (u32) msg[9] << 8 |
+		    (u32) msg[10] << 16 | (u32) msg[11] << 24;
+
+		d = pad;
+		for (i = 12; i < len; i++) {
+			d <<= 8;
+			d |= msg[i];
+		}
+	} else if (len >= 8) {
+		a = (u32) msg[0] |
+		    (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
+		b = (u32) msg[4] |
+		    (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
+
+		c = d = pad;
+		for (i = 8; i < len; i++) {
+			c <<= 8;
+			c |= msg[i];
+		}
+	} else if (len >= 4) {
+		a = (u32) msg[0] |
+		    (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
+
+		b = c = d = pad;
+		for (i = 4; i < len; i++) {
+			b <<= 8;
+			b |= msg[i];
+		}
+	} else {
+		a = b = c = d = pad;
+		for (i = 0; i < len; i++) {
+			a <<= 8;
+			a |= msg[i];
+		}
+	}
+
+	TEACORE(FULLROUNDS);
+
+/*	return 0;*/
+	return h0 ^ h1;
+}
+
+/* What follows in this file is copyright 2000 by Hans Reiser, and the
+ * licensing of what follows is governed by reiserfs/README */
+
+u32 yura_hash(const signed char *msg, int len)
+{
+	int j, pow;
+	u32 a, c;
+	int i;
+
+	for (pow = 1, i = 1; i < len; i++)
+		pow = pow * 10;
+
+	if (len == 1)
+		a = msg[0] - 48;
+	else
+		a = (msg[0] - 48) * pow;
+
+	for (i = 1; i < len; i++) {
+		c = msg[i] - 48;
+		for (pow = 1, j = i; j < len - 1; j++)
+			pow = pow * 10;
+		a = a + c * pow;
+	}
+
+	for (; i < 40; i++) {
+		c = '0' - 48;
+		for (pow = 1, j = i; j < len - 1; j++)
+			pow = pow * 10;
+		a = a + c * pow;
+	}
+
+	for (; i < 256; i++) {
+		c = i;
+		for (pow = 1, j = i; j < len - 1; j++)
+			pow = pow * 10;
+		a = a + c * pow;
+	}
+
+	a = a << 7;
+	return a;
+}
+
+u32 r5_hash(const signed char *msg, int len)
+{
+	u32 a = 0;
+	while (*msg) {
+		a += *msg << 4;
+		a += *msg >> 4;
+		a *= 11;
+		msg++;
+	}
+	return a;
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/ibalance.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/ibalance.c
new file mode 100644
index 0000000..e1978fd
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/ibalance.c

@@ -0,0 +1,1089 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include "reiserfs.h"
+#include <linux/buffer_head.h>
+
+/* this is one and only function that is used outside (do_balance.c) */
+int balance_internal(struct tree_balance *,
+		     int, int, struct item_head *, struct buffer_head **);
+
+/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */
+#define INTERNAL_SHIFT_FROM_S_TO_L 0
+#define INTERNAL_SHIFT_FROM_R_TO_S 1
+#define INTERNAL_SHIFT_FROM_L_TO_S 2
+#define INTERNAL_SHIFT_FROM_S_TO_R 3
+#define INTERNAL_INSERT_TO_S 4
+#define INTERNAL_INSERT_TO_L 5
+#define INTERNAL_INSERT_TO_R 6
+
+static void internal_define_dest_src_infos(int shift_mode,
+					   struct tree_balance *tb,
+					   int h,
+					   struct buffer_info *dest_bi,
+					   struct buffer_info *src_bi,
+					   int *d_key, struct buffer_head **cf)
+{
+	memset(dest_bi, 0, sizeof(struct buffer_info));
+	memset(src_bi, 0, sizeof(struct buffer_info));
+	/* define dest, src, dest parent, dest position */
+	switch (shift_mode) {
+	case INTERNAL_SHIFT_FROM_S_TO_L:	/* used in internal_shift_left */
+		src_bi->tb = tb;
+		src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
+		src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
+		src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = tb->L[h];
+		dest_bi->bi_parent = tb->FL[h];
+		dest_bi->bi_position = get_left_neighbor_position(tb, h);
+		*d_key = tb->lkey[h];
+		*cf = tb->CFL[h];
+		break;
+	case INTERNAL_SHIFT_FROM_L_TO_S:
+		src_bi->tb = tb;
+		src_bi->bi_bh = tb->L[h];
+		src_bi->bi_parent = tb->FL[h];
+		src_bi->bi_position = get_left_neighbor_position(tb, h);
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
+		dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
+		dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);	/* dest position is analog of dest->b_item_order */
+		*d_key = tb->lkey[h];
+		*cf = tb->CFL[h];
+		break;
+
+	case INTERNAL_SHIFT_FROM_R_TO_S:	/* used in internal_shift_left */
+		src_bi->tb = tb;
+		src_bi->bi_bh = tb->R[h];
+		src_bi->bi_parent = tb->FR[h];
+		src_bi->bi_position = get_right_neighbor_position(tb, h);
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
+		dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
+		dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
+		*d_key = tb->rkey[h];
+		*cf = tb->CFR[h];
+		break;
+
+	case INTERNAL_SHIFT_FROM_S_TO_R:
+		src_bi->tb = tb;
+		src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
+		src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
+		src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = tb->R[h];
+		dest_bi->bi_parent = tb->FR[h];
+		dest_bi->bi_position = get_right_neighbor_position(tb, h);
+		*d_key = tb->rkey[h];
+		*cf = tb->CFR[h];
+		break;
+
+	case INTERNAL_INSERT_TO_L:
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = tb->L[h];
+		dest_bi->bi_parent = tb->FL[h];
+		dest_bi->bi_position = get_left_neighbor_position(tb, h);
+		break;
+
+	case INTERNAL_INSERT_TO_S:
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
+		dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
+		dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
+		break;
+
+	case INTERNAL_INSERT_TO_R:
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = tb->R[h];
+		dest_bi->bi_parent = tb->FR[h];
+		dest_bi->bi_position = get_right_neighbor_position(tb, h);
+		break;
+
+	default:
+		reiserfs_panic(tb->tb_sb, "ibalance-1",
+			       "shift type is unknown (%d)",
+			       shift_mode);
+	}
+}
+
+/* Insert count node pointers into buffer cur before position to + 1.
+ * Insert count items into buffer cur before position to.
+ * Items and node pointers are specified by inserted and bh respectively.
+ */
+static void internal_insert_childs(struct buffer_info *cur_bi,
+				   int to, int count,
+				   struct item_head *inserted,
+				   struct buffer_head **bh)
+{
+	struct buffer_head *cur = cur_bi->bi_bh;
+	struct block_head *blkh;
+	int nr;
+	struct reiserfs_key *ih;
+	struct disk_child new_dc[2];
+	struct disk_child *dc;
+	int i;
+
+	if (count <= 0)
+		return;
+
+	blkh = B_BLK_HEAD(cur);
+	nr = blkh_nr_item(blkh);
+
+	RFALSE(count > 2, "too many children (%d) are to be inserted", count);
+	RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE),
+	       "no enough free space (%d), needed %d bytes",
+	       B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE));
+
+	/* prepare space for count disk_child */
+	dc = B_N_CHILD(cur, to + 1);
+
+	memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE);
+
+	/* copy to_be_insert disk children */
+	for (i = 0; i < count; i++) {
+		put_dc_size(&(new_dc[i]),
+			    MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i]));
+		put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr);
+	}
+	memcpy(dc, new_dc, DC_SIZE * count);
+
+	/* prepare space for count items  */
+	ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to));
+
+	memmove(ih + count, ih,
+		(nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
+
+	/* copy item headers (keys) */
+	memcpy(ih, inserted, KEY_SIZE);
+	if (count > 1)
+		memcpy(ih + 1, inserted + 1, KEY_SIZE);
+
+	/* sizes, item number */
+	set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count);
+	set_blkh_free_space(blkh,
+			    blkh_free_space(blkh) - count * (DC_SIZE +
+							     KEY_SIZE));
+
+	do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
+
+	/*&&&&&&&&&&&&&&&&&&&&&&&& */
+	check_internal(cur);
+	/*&&&&&&&&&&&&&&&&&&&&&&&& */
+
+	if (cur_bi->bi_parent) {
+		struct disk_child *t_dc =
+		    B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
+		put_dc_size(t_dc,
+			    dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE)));
+		do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
+					       0);
+
+		/*&&&&&&&&&&&&&&&&&&&&&&&& */
+		check_internal(cur_bi->bi_parent);
+		/*&&&&&&&&&&&&&&&&&&&&&&&& */
+	}
+
+}
+
+/* Delete del_num items and node pointers from buffer cur starting from *
+ * the first_i'th item and first_p'th pointers respectively.		*/
+static void internal_delete_pointers_items(struct buffer_info *cur_bi,
+					   int first_p,
+					   int first_i, int del_num)
+{
+	struct buffer_head *cur = cur_bi->bi_bh;
+	int nr;
+	struct block_head *blkh;
+	struct reiserfs_key *key;
+	struct disk_child *dc;
+
+	RFALSE(cur == NULL, "buffer is 0");
+	RFALSE(del_num < 0,
+	       "negative number of items (%d) can not be deleted", del_num);
+	RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1
+	       || first_i < 0,
+	       "first pointer order (%d) < 0 or "
+	       "no so many pointers (%d), only (%d) or "
+	       "first key order %d < 0", first_p, first_p + del_num,
+	       B_NR_ITEMS(cur) + 1, first_i);
+	if (del_num == 0)
+		return;
+
+	blkh = B_BLK_HEAD(cur);
+	nr = blkh_nr_item(blkh);
+
+	if (first_p == 0 && del_num == nr + 1) {
+		RFALSE(first_i != 0,
+		       "1st deleted key must have order 0, not %d", first_i);
+		make_empty_node(cur_bi);
+		return;
+	}
+
+	RFALSE(first_i + del_num > B_NR_ITEMS(cur),
+	       "first_i = %d del_num = %d "
+	       "no so many keys (%d) in the node (%b)(%z)",
+	       first_i, del_num, first_i + del_num, cur, cur);
+
+	/* deleting */
+	dc = B_N_CHILD(cur, first_p);
+
+	memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
+	key = B_N_PDELIM_KEY(cur, first_i);
+	memmove(key, key + del_num,
+		(nr - first_i - del_num) * KEY_SIZE + (nr + 1 -
+						       del_num) * DC_SIZE);
+
+	/* sizes, item number */
+	set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
+	set_blkh_free_space(blkh,
+			    blkh_free_space(blkh) +
+			    (del_num * (KEY_SIZE + DC_SIZE)));
+
+	do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
+	/*&&&&&&&&&&&&&&&&&&&&&&& */
+	check_internal(cur);
+	/*&&&&&&&&&&&&&&&&&&&&&&& */
+
+	if (cur_bi->bi_parent) {
+		struct disk_child *t_dc;
+		t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
+		put_dc_size(t_dc,
+			    dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE)));
+
+		do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
+					       0);
+		/*&&&&&&&&&&&&&&&&&&&&&&&& */
+		check_internal(cur_bi->bi_parent);
+		/*&&&&&&&&&&&&&&&&&&&&&&&& */
+	}
+}
+
+/* delete n node pointers and items starting from given position */
+static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
+{
+	int i_from;
+
+	i_from = (from == 0) ? from : from - 1;
+
+	/* delete n pointers starting from `from' position in CUR;
+	   delete n keys starting from 'i_from' position in CUR;
+	 */
+	internal_delete_pointers_items(cur_bi, from, i_from, n);
+}
+
+/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
+* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
+ * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest
+ */
+static void internal_copy_pointers_items(struct buffer_info *dest_bi,
+					 struct buffer_head *src,
+					 int last_first, int cpy_num)
+{
+	/* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST *
+	 * as delimiting key have already inserted to buffer dest.*/
+	struct buffer_head *dest = dest_bi->bi_bh;
+	int nr_dest, nr_src;
+	int dest_order, src_order;
+	struct block_head *blkh;
+	struct reiserfs_key *key;
+	struct disk_child *dc;
+
+	nr_src = B_NR_ITEMS(src);
+
+	RFALSE(dest == NULL || src == NULL,
+	       "src (%p) or dest (%p) buffer is 0", src, dest);
+	RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
+	       "invalid last_first parameter (%d)", last_first);
+	RFALSE(nr_src < cpy_num - 1,
+	       "no so many items (%d) in src (%d)", cpy_num, nr_src);
+	RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num);
+	RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest),
+	       "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)",
+	       cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest));
+
+	if (cpy_num == 0)
+		return;
+
+	/* coping */
+	blkh = B_BLK_HEAD(dest);
+	nr_dest = blkh_nr_item(blkh);
+
+	/*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */
+	/*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */
+	(last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order =
+					 nr_src - cpy_num + 1) : (dest_order =
+								  nr_dest,
+								  src_order =
+								  0);
+
+	/* prepare space for cpy_num pointers */
+	dc = B_N_CHILD(dest, dest_order);
+
+	memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE);
+
+	/* insert pointers */
+	memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num);
+
+	/* prepare space for cpy_num - 1 item headers */
+	key = B_N_PDELIM_KEY(dest, dest_order);
+	memmove(key + cpy_num - 1, key,
+		KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest +
+							       cpy_num));
+
+	/* insert headers */
+	memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1));
+
+	/* sizes, item number */
+	set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1));
+	set_blkh_free_space(blkh,
+			    blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) +
+						     DC_SIZE * cpy_num));
+
+	do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
+
+	/*&&&&&&&&&&&&&&&&&&&&&&&& */
+	check_internal(dest);
+	/*&&&&&&&&&&&&&&&&&&&&&&&& */
+
+	if (dest_bi->bi_parent) {
+		struct disk_child *t_dc;
+		t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
+		put_dc_size(t_dc,
+			    dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) +
+					     DC_SIZE * cpy_num));
+
+		do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
+					       0);
+		/*&&&&&&&&&&&&&&&&&&&&&&&& */
+		check_internal(dest_bi->bi_parent);
+		/*&&&&&&&&&&&&&&&&&&&&&&&& */
+	}
+
+}
+
+/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest.
+ * Delete cpy_num - del_par items and node pointers from buffer src.
+ * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
+ * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
+ */
+static void internal_move_pointers_items(struct buffer_info *dest_bi,
+					 struct buffer_info *src_bi,
+					 int last_first, int cpy_num,
+					 int del_par)
+{
+	int first_pointer;
+	int first_item;
+
+	internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first,
+				     cpy_num);
+
+	if (last_first == FIRST_TO_LAST) {	/* shift_left occurs */
+		first_pointer = 0;
+		first_item = 0;
+		/* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer,
+		   for key - with first_item */
+		internal_delete_pointers_items(src_bi, first_pointer,
+					       first_item, cpy_num - del_par);
+	} else {		/* shift_right occurs */
+		int i, j;
+
+		i = (cpy_num - del_par ==
+		     (j =
+		      B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num +
+		    del_par;
+
+		internal_delete_pointers_items(src_bi,
+					       j + 1 - cpy_num + del_par, i,
+					       cpy_num - del_par);
+	}
+}
+
+/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
+static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before,	/* insert key before key with n_dest number */
+				struct buffer_head *src, int src_position)
+{
+	struct buffer_head *dest = dest_bi->bi_bh;
+	int nr;
+	struct block_head *blkh;
+	struct reiserfs_key *key;
+
+	RFALSE(dest == NULL || src == NULL,
+	       "source(%p) or dest(%p) buffer is 0", src, dest);
+	RFALSE(dest_position_before < 0 || src_position < 0,
+	       "source(%d) or dest(%d) key number less than 0",
+	       src_position, dest_position_before);
+	RFALSE(dest_position_before > B_NR_ITEMS(dest) ||
+	       src_position >= B_NR_ITEMS(src),
+	       "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))",
+	       dest_position_before, B_NR_ITEMS(dest),
+	       src_position, B_NR_ITEMS(src));
+	RFALSE(B_FREE_SPACE(dest) < KEY_SIZE,
+	       "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest));
+
+	blkh = B_BLK_HEAD(dest);
+	nr = blkh_nr_item(blkh);
+
+	/* prepare space for inserting key */
+	key = B_N_PDELIM_KEY(dest, dest_position_before);
+	memmove(key + 1, key,
+		(nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
+
+	/* insert key */
+	memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE);
+
+	/* Change dirt, free space, item number fields. */
+
+	set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
+	set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE);
+
+	do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
+
+	if (dest_bi->bi_parent) {
+		struct disk_child *t_dc;
+		t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
+		put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE);
+
+		do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
+					       0);
+	}
+}
+
+/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
+ * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
+ * Replace  d_key'th key in buffer cfl.
+ * Delete pointer_amount items and node pointers from buffer src.
+ */
+/* this can be invoked both to shift from S to L and from R to S */
+static void internal_shift_left(int mode,	/* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */
+				struct tree_balance *tb,
+				int h, int pointer_amount)
+{
+	struct buffer_info dest_bi, src_bi;
+	struct buffer_head *cf;
+	int d_key_position;
+
+	internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
+				       &d_key_position, &cf);
+
+	/*printk("pointer_amount = %d\n",pointer_amount); */
+
+	if (pointer_amount) {
+		/* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */
+		internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
+				    d_key_position);
+
+		if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) {
+			if (src_bi.bi_position /*src->b_item_order */  == 0)
+				replace_key(tb, cf, d_key_position,
+					    src_bi.
+					    bi_parent /*src->b_parent */ , 0);
+		} else
+			replace_key(tb, cf, d_key_position, src_bi.bi_bh,
+				    pointer_amount - 1);
+	}
+	/* last parameter is del_parameter */
+	internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
+				     pointer_amount, 0);
+
+}
+
+/* Insert delimiting key to L[h].
+ * Copy n node pointers and n - 1 items from buffer S[h] to L[h].
+ * Delete n - 1 items and node pointers from buffer S[h].
+ */
+/* it always shifts from S[h] to L[h] */
+static void internal_shift1_left(struct tree_balance *tb,
+				 int h, int pointer_amount)
+{
+	struct buffer_info dest_bi, src_bi;
+	struct buffer_head *cf;
+	int d_key_position;
+
+	internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
+				       &dest_bi, &src_bi, &d_key_position, &cf);
+
+	if (pointer_amount > 0)	/* insert lkey[h]-th key  from CFL[h] to left neighbor L[h] */
+		internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
+				    d_key_position);
+	/*            internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */
+
+	/* last parameter is del_parameter */
+	internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
+				     pointer_amount, 1);
+	/*    internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
+}
+
+/* Insert d_key'th (delimiting) key from buffer cfr to head of dest.
+ * Copy n node pointers and n - 1 items from buffer src to buffer dest.
+ * Replace  d_key'th key in buffer cfr.
+ * Delete n items and node pointers from buffer src.
+ */
+static void internal_shift_right(int mode,	/* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */
+				 struct tree_balance *tb,
+				 int h, int pointer_amount)
+{
+	struct buffer_info dest_bi, src_bi;
+	struct buffer_head *cf;
+	int d_key_position;
+	int nr;
+
+	internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
+				       &d_key_position, &cf);
+
+	nr = B_NR_ITEMS(src_bi.bi_bh);
+
+	if (pointer_amount > 0) {
+		/* insert delimiting key from common father of dest and src to dest node into position 0 */
+		internal_insert_key(&dest_bi, 0, cf, d_key_position);
+		if (nr == pointer_amount - 1) {
+			RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
+			       dest_bi.bi_bh != tb->R[h],
+			       "src (%p) must be == tb->S[h](%p) when it disappears",
+			       src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h));
+			/* when S[h] disappers replace left delemiting key as well */
+			if (tb->CFL[h])
+				replace_key(tb, cf, d_key_position, tb->CFL[h],
+					    tb->lkey[h]);
+		} else
+			replace_key(tb, cf, d_key_position, src_bi.bi_bh,
+				    nr - pointer_amount);
+	}
+
+	/* last parameter is del_parameter */
+	internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
+				     pointer_amount, 0);
+}
+
+/* Insert delimiting key to R[h].
+ * Copy n node pointers and n - 1 items from buffer S[h] to R[h].
+ * Delete n - 1 items and node pointers from buffer S[h].
+ */
+/* it always shift from S[h] to R[h] */
+static void internal_shift1_right(struct tree_balance *tb,
+				  int h, int pointer_amount)
+{
+	struct buffer_info dest_bi, src_bi;
+	struct buffer_head *cf;
+	int d_key_position;
+
+	internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
+				       &dest_bi, &src_bi, &d_key_position, &cf);
+
+	if (pointer_amount > 0)	/* insert rkey from CFR[h] to right neighbor R[h] */
+		internal_insert_key(&dest_bi, 0, cf, d_key_position);
+	/*            internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */
+
+	/* last parameter is del_parameter */
+	internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
+				     pointer_amount, 1);
+	/*    internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */
+}
+
+/* Delete insert_num node pointers together with their left items
+ * and balance current node.*/
+static void balance_internal_when_delete(struct tree_balance *tb,
+					 int h, int child_pos)
+{
+	int insert_num;
+	int n;
+	struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
+	struct buffer_info bi;
+
+	insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE));
+
+	/* delete child-node-pointer(s) together with their left item(s) */
+	bi.tb = tb;
+	bi.bi_bh = tbSh;
+	bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
+	bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
+
+	internal_delete_childs(&bi, child_pos, -insert_num);
+
+	RFALSE(tb->blknum[h] > 1,
+	       "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]);
+
+	n = B_NR_ITEMS(tbSh);
+
+	if (tb->lnum[h] == 0 && tb->rnum[h] == 0) {
+		if (tb->blknum[h] == 0) {
+			/* node S[h] (root of the tree) is empty now */
+			struct buffer_head *new_root;
+
+			RFALSE(n
+			       || B_FREE_SPACE(tbSh) !=
+			       MAX_CHILD_SIZE(tbSh) - DC_SIZE,
+			       "buffer must have only 0 keys (%d)", n);
+			RFALSE(bi.bi_parent, "root has parent (%p)",
+			       bi.bi_parent);
+
+			/* choose a new root */
+			if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1]))
+				new_root = tb->R[h - 1];
+			else
+				new_root = tb->L[h - 1];
+			/* switch super block's tree root block number to the new value */
+			PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
+			//REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --;
+			PUT_SB_TREE_HEIGHT(tb->tb_sb,
+					   SB_TREE_HEIGHT(tb->tb_sb) - 1);
+
+			do_balance_mark_sb_dirty(tb,
+						 REISERFS_SB(tb->tb_sb)->s_sbh,
+						 1);
+			/*&&&&&&&&&&&&&&&&&&&&&& */
+			if (h > 1)
+				/* use check_internal if new root is an internal node */
+				check_internal(new_root);
+			/*&&&&&&&&&&&&&&&&&&&&&& */
+
+			/* do what is needed for buffer thrown from tree */
+			reiserfs_invalidate_buffer(tb, tbSh);
+			return;
+		}
+		return;
+	}
+
+	if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) {	/* join S[h] with L[h] */
+
+		RFALSE(tb->rnum[h] != 0,
+		       "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
+		       h, tb->rnum[h]);
+
+		internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
+		reiserfs_invalidate_buffer(tb, tbSh);
+
+		return;
+	}
+
+	if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) {	/* join S[h] with R[h] */
+		RFALSE(tb->lnum[h] != 0,
+		       "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
+		       h, tb->lnum[h]);
+
+		internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
+
+		reiserfs_invalidate_buffer(tb, tbSh);
+		return;
+	}
+
+	if (tb->lnum[h] < 0) {	/* borrow from left neighbor L[h] */
+		RFALSE(tb->rnum[h] != 0,
+		       "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
+		       tb->rnum[h]);
+		/*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */
+		internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
+				     -tb->lnum[h]);
+		return;
+	}
+
+	if (tb->rnum[h] < 0) {	/* borrow from right neighbor R[h] */
+		RFALSE(tb->lnum[h] != 0,
+		       "invalid tb->lnum[%d]==%d when borrow from R[h]",
+		       h, tb->lnum[h]);
+		internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]);	/*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */
+		return;
+	}
+
+	if (tb->lnum[h] > 0) {	/* split S[h] into two parts and put them into neighbors */
+		RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
+		       "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
+		       h, tb->lnum[h], h, tb->rnum[h], n);
+
+		internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);	/*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */
+		internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
+				     tb->rnum[h]);
+
+		reiserfs_invalidate_buffer(tb, tbSh);
+
+		return;
+	}
+	reiserfs_panic(tb->tb_sb, "ibalance-2",
+		       "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
+		       h, tb->lnum[h], h, tb->rnum[h]);
+}
+
+/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/
+static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key)
+{
+	RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL,
+	       "L[h](%p) and CFL[h](%p) must exist in replace_lkey",
+	       tb->L[h], tb->CFL[h]);
+
+	if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
+		return;
+
+	memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
+
+	do_balance_mark_internal_dirty(tb, tb->CFL[h], 0);
+}
+
+/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/
+static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key)
+{
+	RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL,
+	       "R[h](%p) and CFR[h](%p) must exist in replace_rkey",
+	       tb->R[h], tb->CFR[h]);
+	RFALSE(B_NR_ITEMS(tb->R[h]) == 0,
+	       "R[h] can not be empty if it exists (item number=%d)",
+	       B_NR_ITEMS(tb->R[h]));
+
+	memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
+
+	do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
+}
+
+int balance_internal(struct tree_balance *tb,	/* tree_balance structure               */
+		     int h,	/* level of the tree                    */
+		     int child_pos, struct item_head *insert_key,	/* key for insertion on higher level    */
+		     struct buffer_head **insert_ptr	/* node for insertion on higher level */
+    )
+    /* if inserting/pasting
+       {
+       child_pos is the position of the node-pointer in S[h] that        *
+       pointed to S[h-1] before balancing of the h-1 level;              *
+       this means that new pointers and items must be inserted AFTER *
+       child_pos
+       }
+       else
+       {
+       it is the position of the leftmost pointer that must be deleted (together with
+       its corresponding key to the left of the pointer)
+       as a result of the previous level's balancing.
+       }
+     */
+{
+	struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
+	struct buffer_info bi;
+	int order;		/* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */
+	int insert_num, n, k;
+	struct buffer_head *S_new;
+	struct item_head new_insert_key;
+	struct buffer_head *new_insert_ptr = NULL;
+	struct item_head *new_insert_key_addr = insert_key;
+
+	RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h);
+
+	PROC_INFO_INC(tb->tb_sb, balance_at[h]);
+
+	order =
+	    (tbSh) ? PATH_H_POSITION(tb->tb_path,
+				     h + 1) /*tb->S[h]->b_item_order */ : 0;
+
+	/* Using insert_size[h] calculate the number insert_num of items
+	   that must be inserted to or deleted from S[h]. */
+	insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
+
+	/* Check whether insert_num is proper * */
+	RFALSE(insert_num < -2 || insert_num > 2,
+	       "incorrect number of items inserted to the internal node (%d)",
+	       insert_num);
+	RFALSE(h > 1 && (insert_num > 1 || insert_num < -1),
+	       "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level",
+	       insert_num, h);
+
+	/* Make balance in case insert_num < 0 */
+	if (insert_num < 0) {
+		balance_internal_when_delete(tb, h, child_pos);
+		return order;
+	}
+
+	k = 0;
+	if (tb->lnum[h] > 0) {
+		/* shift lnum[h] items from S[h] to the left neighbor L[h].
+		   check how many of new items fall into L[h] or CFL[h] after
+		   shifting */
+		n = B_NR_ITEMS(tb->L[h]);	/* number of items in L[h] */
+		if (tb->lnum[h] <= child_pos) {
+			/* new items don't fall into L[h] or CFL[h] */
+			internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
+					    tb->lnum[h]);
+			/*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */
+			child_pos -= tb->lnum[h];
+		} else if (tb->lnum[h] > child_pos + insert_num) {
+			/* all new items fall into L[h] */
+			internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
+					    tb->lnum[h] - insert_num);
+			/*                  internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
+			   tb->lnum[h]-insert_num);
+			 */
+			/* insert insert_num keys and node-pointers into L[h] */
+			bi.tb = tb;
+			bi.bi_bh = tb->L[h];
+			bi.bi_parent = tb->FL[h];
+			bi.bi_position = get_left_neighbor_position(tb, h);
+			internal_insert_childs(&bi,
+					       /*tb->L[h], tb->S[h-1]->b_next */
+					       n + child_pos + 1,
+					       insert_num, insert_key,
+					       insert_ptr);
+
+			insert_num = 0;
+		} else {
+			struct disk_child *dc;
+
+			/* some items fall into L[h] or CFL[h], but some don't fall */
+			internal_shift1_left(tb, h, child_pos + 1);
+			/* calculate number of new items that fall into L[h] */
+			k = tb->lnum[h] - child_pos - 1;
+			bi.tb = tb;
+			bi.bi_bh = tb->L[h];
+			bi.bi_parent = tb->FL[h];
+			bi.bi_position = get_left_neighbor_position(tb, h);
+			internal_insert_childs(&bi,
+					       /*tb->L[h], tb->S[h-1]->b_next, */
+					       n + child_pos + 1, k,
+					       insert_key, insert_ptr);
+
+			replace_lkey(tb, h, insert_key + k);
+
+			/* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
+			dc = B_N_CHILD(tbSh, 0);
+			put_dc_size(dc,
+				    MAX_CHILD_SIZE(insert_ptr[k]) -
+				    B_FREE_SPACE(insert_ptr[k]));
+			put_dc_block_number(dc, insert_ptr[k]->b_blocknr);
+
+			do_balance_mark_internal_dirty(tb, tbSh, 0);
+
+			k++;
+			insert_key += k;
+			insert_ptr += k;
+			insert_num -= k;
+			child_pos = 0;
+		}
+	}
+	/* tb->lnum[h] > 0 */
+	if (tb->rnum[h] > 0) {
+		/*shift rnum[h] items from S[h] to the right neighbor R[h] */
+		/* check how many of new items fall into R or CFR after shifting */
+		n = B_NR_ITEMS(tbSh);	/* number of items in S[h] */
+		if (n - tb->rnum[h] >= child_pos)
+			/* new items fall into S[h] */
+			/*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */
+			internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
+					     tb->rnum[h]);
+		else if (n + insert_num - tb->rnum[h] < child_pos) {
+			/* all new items fall into R[h] */
+			/*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
+			   tb->rnum[h] - insert_num); */
+			internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
+					     tb->rnum[h] - insert_num);
+
+			/* insert insert_num keys and node-pointers into R[h] */
+			bi.tb = tb;
+			bi.bi_bh = tb->R[h];
+			bi.bi_parent = tb->FR[h];
+			bi.bi_position = get_right_neighbor_position(tb, h);
+			internal_insert_childs(&bi,
+					       /*tb->R[h],tb->S[h-1]->b_next */
+					       child_pos - n - insert_num +
+					       tb->rnum[h] - 1,
+					       insert_num, insert_key,
+					       insert_ptr);
+			insert_num = 0;
+		} else {
+			struct disk_child *dc;
+
+			/* one of the items falls into CFR[h] */
+			internal_shift1_right(tb, h, n - child_pos + 1);
+			/* calculate number of new items that fall into R[h] */
+			k = tb->rnum[h] - n + child_pos - 1;
+			bi.tb = tb;
+			bi.bi_bh = tb->R[h];
+			bi.bi_parent = tb->FR[h];
+			bi.bi_position = get_right_neighbor_position(tb, h);
+			internal_insert_childs(&bi,
+					       /*tb->R[h], tb->R[h]->b_child, */
+					       0, k, insert_key + 1,
+					       insert_ptr + 1);
+
+			replace_rkey(tb, h, insert_key + insert_num - k - 1);
+
+			/* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */
+			dc = B_N_CHILD(tb->R[h], 0);
+			put_dc_size(dc,
+				    MAX_CHILD_SIZE(insert_ptr
+						   [insert_num - k - 1]) -
+				    B_FREE_SPACE(insert_ptr
+						 [insert_num - k - 1]));
+			put_dc_block_number(dc,
+					    insert_ptr[insert_num - k -
+						       1]->b_blocknr);
+
+			do_balance_mark_internal_dirty(tb, tb->R[h], 0);
+
+			insert_num -= (k + 1);
+		}
+	}
+
+    /** Fill new node that appears instead of S[h] **/
+	RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
+	RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
+
+	if (!tb->blknum[h]) {	/* node S[h] is empty now */
+		RFALSE(!tbSh, "S[h] is equal NULL");
+
+		/* do what is needed for buffer thrown from tree */
+		reiserfs_invalidate_buffer(tb, tbSh);
+		return order;
+	}
+
+	if (!tbSh) {
+		/* create new root */
+		struct disk_child *dc;
+		struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1);
+		struct block_head *blkh;
+
+		if (tb->blknum[h] != 1)
+			reiserfs_panic(NULL, "ibalance-3", "One new node "
+				       "required for creating the new root");
+		/* S[h] = empty buffer from the list FEB. */
+		tbSh = get_FEB(tb);
+		blkh = B_BLK_HEAD(tbSh);
+		set_blkh_level(blkh, h + 1);
+
+		/* Put the unique node-pointer to S[h] that points to S[h-1]. */
+
+		dc = B_N_CHILD(tbSh, 0);
+		put_dc_block_number(dc, tbSh_1->b_blocknr);
+		put_dc_size(dc,
+			    (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1)));
+
+		tb->insert_size[h] -= DC_SIZE;
+		set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE);
+
+		do_balance_mark_internal_dirty(tb, tbSh, 0);
+
+		/*&&&&&&&&&&&&&&&&&&&&&&&& */
+		check_internal(tbSh);
+		/*&&&&&&&&&&&&&&&&&&&&&&&& */
+
+		/* put new root into path structure */
+		PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) =
+		    tbSh;
+
+		/* Change root in structure super block. */
+		PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr);
+		PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1);
+		do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1);
+	}
+
+	if (tb->blknum[h] == 2) {
+		int snum;
+		struct buffer_info dest_bi, src_bi;
+
+		/* S_new = free buffer from list FEB */
+		S_new = get_FEB(tb);
+
+		set_blkh_level(B_BLK_HEAD(S_new), h + 1);
+
+		dest_bi.tb = tb;
+		dest_bi.bi_bh = S_new;
+		dest_bi.bi_parent = NULL;
+		dest_bi.bi_position = 0;
+		src_bi.tb = tb;
+		src_bi.bi_bh = tbSh;
+		src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
+		src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
+
+		n = B_NR_ITEMS(tbSh);	/* number of items in S[h] */
+		snum = (insert_num + n + 1) / 2;
+		if (n - snum >= child_pos) {
+			/* new items don't fall into S_new */
+			/*  store the delimiting key for the next level */
+			/* new_insert_key = (n - snum)'th key in S[h] */
+			memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum),
+			       KEY_SIZE);
+			/* last parameter is del_par */
+			internal_move_pointers_items(&dest_bi, &src_bi,
+						     LAST_TO_FIRST, snum, 0);
+			/*            internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */
+		} else if (n + insert_num - snum < child_pos) {
+			/* all new items fall into S_new */
+			/*  store the delimiting key for the next level */
+			/* new_insert_key = (n + insert_item - snum)'th key in S[h] */
+			memcpy(&new_insert_key,
+			       B_N_PDELIM_KEY(tbSh, n + insert_num - snum),
+			       KEY_SIZE);
+			/* last parameter is del_par */
+			internal_move_pointers_items(&dest_bi, &src_bi,
+						     LAST_TO_FIRST,
+						     snum - insert_num, 0);
+			/*                  internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */
+
+			/* insert insert_num keys and node-pointers into S_new */
+			internal_insert_childs(&dest_bi,
+					       /*S_new,tb->S[h-1]->b_next, */
+					       child_pos - n - insert_num +
+					       snum - 1,
+					       insert_num, insert_key,
+					       insert_ptr);
+
+			insert_num = 0;
+		} else {
+			struct disk_child *dc;
+
+			/* some items fall into S_new, but some don't fall */
+			/* last parameter is del_par */
+			internal_move_pointers_items(&dest_bi, &src_bi,
+						     LAST_TO_FIRST,
+						     n - child_pos + 1, 1);
+			/*                  internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */
+			/* calculate number of new items that fall into S_new */
+			k = snum - n + child_pos - 1;
+
+			internal_insert_childs(&dest_bi, /*S_new, */ 0, k,
+					       insert_key + 1, insert_ptr + 1);
+
+			/* new_insert_key = insert_key[insert_num - k - 1] */
+			memcpy(&new_insert_key, insert_key + insert_num - k - 1,
+			       KEY_SIZE);
+			/* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
+
+			dc = B_N_CHILD(S_new, 0);
+			put_dc_size(dc,
+				    (MAX_CHILD_SIZE
+				     (insert_ptr[insert_num - k - 1]) -
+				     B_FREE_SPACE(insert_ptr
+						  [insert_num - k - 1])));
+			put_dc_block_number(dc,
+					    insert_ptr[insert_num - k -
+						       1]->b_blocknr);
+
+			do_balance_mark_internal_dirty(tb, S_new, 0);
+
+			insert_num -= (k + 1);
+		}
+		/* new_insert_ptr = node_pointer to S_new */
+		new_insert_ptr = S_new;
+
+		RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new)
+		       || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
+		       S_new);
+
+		// S_new is released in unfix_nodes
+	}
+
+	n = B_NR_ITEMS(tbSh);	/*number of items in S[h] */
+
+	if (0 <= child_pos && child_pos <= n && insert_num > 0) {
+		bi.tb = tb;
+		bi.bi_bh = tbSh;
+		bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
+		bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
+		internal_insert_childs(&bi,	/*tbSh, */
+				       /*          ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next :  tb->S[h]->b_child->b_next, */
+				       child_pos, insert_num, insert_key,
+				       insert_ptr);
+	}
+
+	memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
+	insert_ptr[0] = new_insert_ptr;
+
+	return order;
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/inode.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/inode.c
new file mode 100644
index 0000000..c11db51
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/inode.c

@@ -0,0 +1,3240 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include "reiserfs.h"
+#include "acl.h"
+#include "xattr.h"
+#include <linux/exportfs.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+#include <linux/quotaops.h>
+#include <linux/swap.h>
+
+int reiserfs_commit_write(struct file *f, struct page *page,
+			  unsigned from, unsigned to);
+
+void reiserfs_evict_inode(struct inode *inode)
+{
+	/* We need blocks for transaction + (user+group) quota update (possibly delete) */
+	int jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 2 +
+	    2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
+	struct reiserfs_transaction_handle th;
+	int depth;
+	int err;
+
+	if (!inode->i_nlink && !is_bad_inode(inode))
+		dquot_initialize(inode);
+
+	truncate_inode_pages(&inode->i_data, 0);
+	if (inode->i_nlink)
+		goto no_delete;
+
+	depth = reiserfs_write_lock_once(inode->i_sb);
+
+	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
+	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {	/* also handles bad_inode case */
+		reiserfs_delete_xattrs(inode);
+
+		if (journal_begin(&th, inode->i_sb, jbegin_count))
+			goto out;
+		reiserfs_update_inode_transaction(inode);
+
+		reiserfs_discard_prealloc(&th, inode);
+
+		err = reiserfs_delete_object(&th, inode);
+
+		/* Do quota update inside a transaction for journaled quotas. We must do that
+		 * after delete_object so that quota updates go into the same transaction as
+		 * stat data deletion */
+		if (!err) 
+			dquot_free_inode(inode);
+
+		if (journal_end(&th, inode->i_sb, jbegin_count))
+			goto out;
+
+		/* check return value from reiserfs_delete_object after
+		 * ending the transaction
+		 */
+		if (err)
+		    goto out;
+
+		/* all items of file are deleted, so we can remove "save" link */
+		remove_save_link(inode, 0 /* not truncate */ );	/* we can't do anything
+								 * about an error here */
+	} else {
+		/* no object items are in the tree */
+		;
+	}
+      out:
+	end_writeback(inode);	/* note this must go after the journal_end to prevent deadlock */
+	dquot_drop(inode);
+	inode->i_blocks = 0;
+	reiserfs_write_unlock_once(inode->i_sb, depth);
+	return;
+
+no_delete:
+	end_writeback(inode);
+	dquot_drop(inode);
+}
+
+static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
+			  __u32 objectid, loff_t offset, int type, int length)
+{
+	key->version = version;
+
+	key->on_disk_key.k_dir_id = dirid;
+	key->on_disk_key.k_objectid = objectid;
+	set_cpu_key_k_offset(key, offset);
+	set_cpu_key_k_type(key, type);
+	key->key_length = length;
+}
+
+/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
+   offset and type of key */
+void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
+		  int type, int length)
+{
+	_make_cpu_key(key, get_inode_item_key_version(inode),
+		      le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
+		      le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
+		      length);
+}
+
+//
+// when key is 0, do not set version and short key
+//
+inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
+			      int version,
+			      loff_t offset, int type, int length,
+			      int entry_count /*or ih_free_space */ )
+{
+	if (key) {
+		ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
+		ih->ih_key.k_objectid =
+		    cpu_to_le32(key->on_disk_key.k_objectid);
+	}
+	put_ih_version(ih, version);
+	set_le_ih_k_offset(ih, offset);
+	set_le_ih_k_type(ih, type);
+	put_ih_item_len(ih, length);
+	/*    set_ih_free_space (ih, 0); */
+	// for directory items it is entry count, for directs and stat
+	// datas - 0xffff, for indirects - 0
+	put_ih_entry_count(ih, entry_count);
+}
+
+//
+// FIXME: we might cache recently accessed indirect item
+
+// Ugh.  Not too eager for that....
+//  I cut the code until such time as I see a convincing argument (benchmark).
+// I don't want a bloated inode struct..., and I don't like code complexity....
+
+/* cutting the code is fine, since it really isn't in use yet and is easy
+** to add back in.  But, Vladimir has a really good idea here.  Think
+** about what happens for reading a file.  For each page,
+** The VFS layer calls reiserfs_readpage, who searches the tree to find
+** an indirect item.  This indirect item has X number of pointers, where
+** X is a big number if we've done the block allocation right.  But,
+** we only use one or two of these pointers during each call to readpage,
+** needlessly researching again later on.
+**
+** The size of the cache could be dynamic based on the size of the file.
+**
+** I'd also like to see us cache the location the stat data item, since
+** we are needlessly researching for that frequently.
+**
+** --chris
+*/
+
+/* If this page has a file tail in it, and
+** it was read in by get_block_create_0, the page data is valid,
+** but tail is still sitting in a direct item, and we can't write to
+** it.  So, look through this page, and check all the mapped buffers
+** to make sure they have valid block numbers.  Any that don't need
+** to be unmapped, so that __block_write_begin will correctly call
+** reiserfs_get_block to convert the tail into an unformatted node
+*/
+static inline void fix_tail_page_for_writing(struct page *page)
+{
+	struct buffer_head *head, *next, *bh;
+
+	if (page && page_has_buffers(page)) {
+		head = page_buffers(page);
+		bh = head;
+		do {
+			next = bh->b_this_page;
+			if (buffer_mapped(bh) && bh->b_blocknr == 0) {
+				reiserfs_unmap_buffer(bh);
+			}
+			bh = next;
+		} while (bh != head);
+	}
+}
+
+/* reiserfs_get_block does not need to allocate a block only if it has been
+   done already or non-hole position has been found in the indirect item */
+static inline int allocation_needed(int retval, b_blocknr_t allocated,
+				    struct item_head *ih,
+				    __le32 * item, int pos_in_item)
+{
+	if (allocated)
+		return 0;
+	if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
+	    get_block_num(item, pos_in_item))
+		return 0;
+	return 1;
+}
+
+static inline int indirect_item_found(int retval, struct item_head *ih)
+{
+	return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
+}
+
+static inline void set_block_dev_mapped(struct buffer_head *bh,
+					b_blocknr_t block, struct inode *inode)
+{
+	map_bh(bh, inode->i_sb, block);
+}
+
+//
+// files which were created in the earlier version can not be longer,
+// than 2 gb
+//
+static int file_capable(struct inode *inode, sector_t block)
+{
+	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||	// it is new file.
+	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))	// old file, but 'block' is inside of 2gb
+		return 1;
+
+	return 0;
+}
+
+static int restart_transaction(struct reiserfs_transaction_handle *th,
+			       struct inode *inode, struct treepath *path)
+{
+	struct super_block *s = th->t_super;
+	int len = th->t_blocks_allocated;
+	int err;
+
+	BUG_ON(!th->t_trans_id);
+	BUG_ON(!th->t_refcount);
+
+	pathrelse(path);
+
+	/* we cannot restart while nested */
+	if (th->t_refcount > 1) {
+		return 0;
+	}
+	reiserfs_update_sd(th, inode);
+	err = journal_end(th, s, len);
+	if (!err) {
+		err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
+		if (!err)
+			reiserfs_update_inode_transaction(inode);
+	}
+	return err;
+}
+
+// it is called by get_block when create == 0. Returns block number
+// for 'block'-th logical block of file. When it hits direct item it
+// returns 0 (being called from bmap) or read direct item into piece
+// of page (bh_result)
+
+// Please improve the english/clarity in the comment above, as it is
+// hard to understand.
+
+static int _get_block_create_0(struct inode *inode, sector_t block,
+			       struct buffer_head *bh_result, int args)
+{
+	INITIALIZE_PATH(path);
+	struct cpu_key key;
+	struct buffer_head *bh;
+	struct item_head *ih, tmp_ih;
+	b_blocknr_t blocknr;
+	char *p = NULL;
+	int chars;
+	int ret;
+	int result;
+	int done = 0;
+	unsigned long offset;
+
+	// prepare the key to look for the 'block'-th block of file
+	make_cpu_key(&key, inode,
+		     (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
+		     3);
+
+	result = search_for_position_by_key(inode->i_sb, &key, &path);
+	if (result != POSITION_FOUND) {
+		pathrelse(&path);
+		if (p)
+			kunmap(bh_result->b_page);
+		if (result == IO_ERROR)
+			return -EIO;
+		// We do not return -ENOENT if there is a hole but page is uptodate, because it means
+		// That there is some MMAPED data associated with it that is yet to be written to disk.
+		if ((args & GET_BLOCK_NO_HOLE)
+		    && !PageUptodate(bh_result->b_page)) {
+			return -ENOENT;
+		}
+		return 0;
+	}
+	//
+	bh = get_last_bh(&path);
+	ih = get_ih(&path);
+	if (is_indirect_le_ih(ih)) {
+		__le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
+
+		/* FIXME: here we could cache indirect item or part of it in
+		   the inode to avoid search_by_key in case of subsequent
+		   access to file */
+		blocknr = get_block_num(ind_item, path.pos_in_item);
+		ret = 0;
+		if (blocknr) {
+			map_bh(bh_result, inode->i_sb, blocknr);
+			if (path.pos_in_item ==
+			    ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
+				set_buffer_boundary(bh_result);
+			}
+		} else
+			// We do not return -ENOENT if there is a hole but page is uptodate, because it means
+			// That there is some MMAPED data associated with it that is yet to  be written to disk.
+		if ((args & GET_BLOCK_NO_HOLE)
+			    && !PageUptodate(bh_result->b_page)) {
+			ret = -ENOENT;
+		}
+
+		pathrelse(&path);
+		if (p)
+			kunmap(bh_result->b_page);
+		return ret;
+	}
+	// requested data are in direct item(s)
+	if (!(args & GET_BLOCK_READ_DIRECT)) {
+		// we are called by bmap. FIXME: we can not map block of file
+		// when it is stored in direct item(s)
+		pathrelse(&path);
+		if (p)
+			kunmap(bh_result->b_page);
+		return -ENOENT;
+	}
+
+	/* if we've got a direct item, and the buffer or page was uptodate,
+	 ** we don't want to pull data off disk again.  skip to the
+	 ** end, where we map the buffer and return
+	 */
+	if (buffer_uptodate(bh_result)) {
+		goto finished;
+	} else
+		/*
+		 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
+		 ** pages without any buffers.  If the page is up to date, we don't want
+		 ** read old data off disk.  Set the up to date bit on the buffer instead
+		 ** and jump to the end
+		 */
+	if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
+		set_buffer_uptodate(bh_result);
+		goto finished;
+	}
+	// read file tail into part of page
+	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
+	copy_item_head(&tmp_ih, ih);
+
+	/* we only want to kmap if we are reading the tail into the page.
+	 ** this is not the common case, so we don't kmap until we are
+	 ** sure we need to.  But, this means the item might move if
+	 ** kmap schedules
+	 */
+	if (!p)
+		p = (char *)kmap(bh_result->b_page);
+
+	p += offset;
+	memset(p, 0, inode->i_sb->s_blocksize);
+	do {
+		if (!is_direct_le_ih(ih)) {
+			BUG();
+		}
+		/* make sure we don't read more bytes than actually exist in
+		 ** the file.  This can happen in odd cases where i_size isn't
+		 ** correct, and when direct item padding results in a few
+		 ** extra bytes at the end of the direct item
+		 */
+		if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
+			break;
+		if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
+			chars =
+			    inode->i_size - (le_ih_k_offset(ih) - 1) -
+			    path.pos_in_item;
+			done = 1;
+		} else {
+			chars = ih_item_len(ih) - path.pos_in_item;
+		}
+		memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
+
+		if (done)
+			break;
+
+		p += chars;
+
+		if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
+			// we done, if read direct item is not the last item of
+			// node FIXME: we could try to check right delimiting key
+			// to see whether direct item continues in the right
+			// neighbor or rely on i_size
+			break;
+
+		// update key to look for the next piece
+		set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
+		result = search_for_position_by_key(inode->i_sb, &key, &path);
+		if (result != POSITION_FOUND)
+			// i/o error most likely
+			break;
+		bh = get_last_bh(&path);
+		ih = get_ih(&path);
+	} while (1);
+
+	flush_dcache_page(bh_result->b_page);
+	kunmap(bh_result->b_page);
+
+      finished:
+	pathrelse(&path);
+
+	if (result == IO_ERROR)
+		return -EIO;
+
+	/* this buffer has valid data, but isn't valid for io.  mapping it to
+	 * block #0 tells the rest of reiserfs it just has a tail in it
+	 */
+	map_bh(bh_result, inode->i_sb, 0);
+	set_buffer_uptodate(bh_result);
+	return 0;
+}
+
+// this is called to create file map. So, _get_block_create_0 will not
+// read direct item
+static int reiserfs_bmap(struct inode *inode, sector_t block,
+			 struct buffer_head *bh_result, int create)
+{
+	if (!file_capable(inode, block))
+		return -EFBIG;
+
+	reiserfs_write_lock(inode->i_sb);
+	/* do not read the direct item */
+	_get_block_create_0(inode, block, bh_result, 0);
+	reiserfs_write_unlock(inode->i_sb);
+	return 0;
+}
+
+/* special version of get_block that is only used by grab_tail_page right
+** now.  It is sent to __block_write_begin, and when you try to get a
+** block past the end of the file (or a block from a hole) it returns
+** -ENOENT instead of a valid buffer.  __block_write_begin expects to
+** be able to do i/o on the buffers returned, unless an error value
+** is also returned.
+**
+** So, this allows __block_write_begin to be used for reading a single block
+** in a page.  Where it does not produce a valid page for holes, or past the
+** end of the file.  This turns out to be exactly what we need for reading
+** tails for conversion.
+**
+** The point of the wrapper is forcing a certain value for create, even
+** though the VFS layer is calling this function with create==1.  If you
+** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
+** don't use this function.
+*/
+static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
+				       struct buffer_head *bh_result,
+				       int create)
+{
+	return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
+}
+
+/* This is special helper for reiserfs_get_block in case we are executing
+   direct_IO request. */
+static int reiserfs_get_blocks_direct_io(struct inode *inode,
+					 sector_t iblock,
+					 struct buffer_head *bh_result,
+					 int create)
+{
+	int ret;
+
+	bh_result->b_page = NULL;
+
+	/* We set the b_size before reiserfs_get_block call since it is
+	   referenced in convert_tail_for_hole() that may be called from
+	   reiserfs_get_block() */
+	bh_result->b_size = (1 << inode->i_blkbits);
+
+	ret = reiserfs_get_block(inode, iblock, bh_result,
+				 create | GET_BLOCK_NO_DANGLE);
+	if (ret)
+		goto out;
+
+	/* don't allow direct io onto tail pages */
+	if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
+		/* make sure future calls to the direct io funcs for this offset
+		 ** in the file fail by unmapping the buffer
+		 */
+		clear_buffer_mapped(bh_result);
+		ret = -EINVAL;
+	}
+	/* Possible unpacked tail. Flush the data before pages have
+	   disappeared */
+	if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
+		int err;
+
+		reiserfs_write_lock(inode->i_sb);
+
+		err = reiserfs_commit_for_inode(inode);
+		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
+
+		reiserfs_write_unlock(inode->i_sb);
+
+		if (err < 0)
+			ret = err;
+	}
+      out:
+	return ret;
+}
+
+/*
+** helper function for when reiserfs_get_block is called for a hole
+** but the file tail is still in a direct item
+** bh_result is the buffer head for the hole
+** tail_offset is the offset of the start of the tail in the file
+**
+** This calls prepare_write, which will start a new transaction
+** you should not be in a transaction, or have any paths held when you
+** call this.
+*/
+static int convert_tail_for_hole(struct inode *inode,
+				 struct buffer_head *bh_result,
+				 loff_t tail_offset)
+{
+	unsigned long index;
+	unsigned long tail_end;
+	unsigned long tail_start;
+	struct page *tail_page;
+	struct page *hole_page = bh_result->b_page;
+	int retval = 0;
+
+	if ((tail_offset & (bh_result->b_size - 1)) != 1)
+		return -EIO;
+
+	/* always try to read until the end of the block */
+	tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
+	tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
+
+	index = tail_offset >> PAGE_CACHE_SHIFT;
+	/* hole_page can be zero in case of direct_io, we are sure
+	   that we cannot get here if we write with O_DIRECT into
+	   tail page */
+	if (!hole_page || index != hole_page->index) {
+		tail_page = grab_cache_page(inode->i_mapping, index);
+		retval = -ENOMEM;
+		if (!tail_page) {
+			goto out;
+		}
+	} else {
+		tail_page = hole_page;
+	}
+
+	/* we don't have to make sure the conversion did not happen while
+	 ** we were locking the page because anyone that could convert
+	 ** must first take i_mutex.
+	 **
+	 ** We must fix the tail page for writing because it might have buffers
+	 ** that are mapped, but have a block number of 0.  This indicates tail
+	 ** data that has been read directly into the page, and
+	 ** __block_write_begin won't trigger a get_block in this case.
+	 */
+	fix_tail_page_for_writing(tail_page);
+	retval = __reiserfs_write_begin(tail_page, tail_start,
+				      tail_end - tail_start);
+	if (retval)
+		goto unlock;
+
+	/* tail conversion might change the data in the page */
+	flush_dcache_page(tail_page);
+
+	retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
+
+      unlock:
+	if (tail_page != hole_page) {
+		unlock_page(tail_page);
+		page_cache_release(tail_page);
+	}
+      out:
+	return retval;
+}
+
+static inline int _allocate_block(struct reiserfs_transaction_handle *th,
+				  sector_t block,
+				  struct inode *inode,
+				  b_blocknr_t * allocated_block_nr,
+				  struct treepath *path, int flags)
+{
+	BUG_ON(!th->t_trans_id);
+
+#ifdef REISERFS_PREALLOCATE
+	if (!(flags & GET_BLOCK_NO_IMUX)) {
+		return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
+						  path, block);
+	}
+#endif
+	return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
+					 block);
+}
+
+int reiserfs_get_block(struct inode *inode, sector_t block,
+		       struct buffer_head *bh_result, int create)
+{
+	int repeat, retval = 0;
+	b_blocknr_t allocated_block_nr = 0;	// b_blocknr_t is (unsigned) 32 bit int
+	INITIALIZE_PATH(path);
+	int pos_in_item;
+	struct cpu_key key;
+	struct buffer_head *bh, *unbh = NULL;
+	struct item_head *ih, tmp_ih;
+	__le32 *item;
+	int done;
+	int fs_gen;
+	int lock_depth;
+	struct reiserfs_transaction_handle *th = NULL;
+	/* space reserved in transaction batch:
+	   . 3 balancings in direct->indirect conversion
+	   . 1 block involved into reiserfs_update_sd()
+	   XXX in practically impossible worst case direct2indirect()
+	   can incur (much) more than 3 balancings.
+	   quota update for user, group */
+	int jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 3 + 1 +
+	    2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
+	int version;
+	int dangle = 1;
+	loff_t new_offset =
+	    (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
+
+	lock_depth = reiserfs_write_lock_once(inode->i_sb);
+	version = get_inode_item_key_version(inode);
+
+	if (!file_capable(inode, block)) {
+		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+		return -EFBIG;
+	}
+
+	/* if !create, we aren't changing the FS, so we don't need to
+	 ** log anything, so we don't need to start a transaction
+	 */
+	if (!(create & GET_BLOCK_CREATE)) {
+		int ret;
+		/* find number of block-th logical block of the file */
+		ret = _get_block_create_0(inode, block, bh_result,
+					  create | GET_BLOCK_READ_DIRECT);
+		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+		return ret;
+	}
+	/*
+	 * if we're already in a transaction, make sure to close
+	 * any new transactions we start in this func
+	 */
+	if ((create & GET_BLOCK_NO_DANGLE) ||
+	    reiserfs_transaction_running(inode->i_sb))
+		dangle = 0;
+
+	/* If file is of such a size, that it might have a tail and tails are enabled
+	 ** we should mark it as possibly needing tail packing on close
+	 */
+	if ((have_large_tails(inode->i_sb)
+	     && inode->i_size < i_block_size(inode) * 4)
+	    || (have_small_tails(inode->i_sb)
+		&& inode->i_size < i_block_size(inode)))
+		REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
+
+	/* set the key of the first byte in the 'block'-th block of file */
+	make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
+	if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
+	      start_trans:
+		th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
+		if (!th) {
+			retval = -ENOMEM;
+			goto failure;
+		}
+		reiserfs_update_inode_transaction(inode);
+	}
+      research:
+
+	retval = search_for_position_by_key(inode->i_sb, &key, &path);
+	if (retval == IO_ERROR) {
+		retval = -EIO;
+		goto failure;
+	}
+
+	bh = get_last_bh(&path);
+	ih = get_ih(&path);
+	item = get_item(&path);
+	pos_in_item = path.pos_in_item;
+
+	fs_gen = get_generation(inode->i_sb);
+	copy_item_head(&tmp_ih, ih);
+
+	if (allocation_needed
+	    (retval, allocated_block_nr, ih, item, pos_in_item)) {
+		/* we have to allocate block for the unformatted node */
+		if (!th) {
+			pathrelse(&path);
+			goto start_trans;
+		}
+
+		repeat =
+		    _allocate_block(th, block, inode, &allocated_block_nr,
+				    &path, create);
+
+		if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
+			/* restart the transaction to give the journal a chance to free
+			 ** some blocks.  releases the path, so we have to go back to
+			 ** research if we succeed on the second try
+			 */
+			SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
+			retval = restart_transaction(th, inode, &path);
+			if (retval)
+				goto failure;
+			repeat =
+			    _allocate_block(th, block, inode,
+					    &allocated_block_nr, NULL, create);
+
+			if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
+				goto research;
+			}
+			if (repeat == QUOTA_EXCEEDED)
+				retval = -EDQUOT;
+			else
+				retval = -ENOSPC;
+			goto failure;
+		}
+
+		if (fs_changed(fs_gen, inode->i_sb)
+		    && item_moved(&tmp_ih, &path)) {
+			goto research;
+		}
+	}
+
+	if (indirect_item_found(retval, ih)) {
+		b_blocknr_t unfm_ptr;
+		/* 'block'-th block is in the file already (there is
+		   corresponding cell in some indirect item). But it may be
+		   zero unformatted node pointer (hole) */
+		unfm_ptr = get_block_num(item, pos_in_item);
+		if (unfm_ptr == 0) {
+			/* use allocated block to plug the hole */
+			reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
+			if (fs_changed(fs_gen, inode->i_sb)
+			    && item_moved(&tmp_ih, &path)) {
+				reiserfs_restore_prepared_buffer(inode->i_sb,
+								 bh);
+				goto research;
+			}
+			set_buffer_new(bh_result);
+			if (buffer_dirty(bh_result)
+			    && reiserfs_data_ordered(inode->i_sb))
+				reiserfs_add_ordered_list(inode, bh_result);
+			put_block_num(item, pos_in_item, allocated_block_nr);
+			unfm_ptr = allocated_block_nr;
+			journal_mark_dirty(th, inode->i_sb, bh);
+			reiserfs_update_sd(th, inode);
+		}
+		set_block_dev_mapped(bh_result, unfm_ptr, inode);
+		pathrelse(&path);
+		retval = 0;
+		if (!dangle && th)
+			retval = reiserfs_end_persistent_transaction(th);
+
+		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+
+		/* the item was found, so new blocks were not added to the file
+		 ** there is no need to make sure the inode is updated with this
+		 ** transaction
+		 */
+		return retval;
+	}
+
+	if (!th) {
+		pathrelse(&path);
+		goto start_trans;
+	}
+
+	/* desired position is not found or is in the direct item. We have
+	   to append file with holes up to 'block'-th block converting
+	   direct items to indirect one if necessary */
+	done = 0;
+	do {
+		if (is_statdata_le_ih(ih)) {
+			__le32 unp = 0;
+			struct cpu_key tmp_key;
+
+			/* indirect item has to be inserted */
+			make_le_item_head(&tmp_ih, &key, version, 1,
+					  TYPE_INDIRECT, UNFM_P_SIZE,
+					  0 /* free_space */ );
+
+			if (cpu_key_k_offset(&key) == 1) {
+				/* we are going to add 'block'-th block to the file. Use
+				   allocated block for that */
+				unp = cpu_to_le32(allocated_block_nr);
+				set_block_dev_mapped(bh_result,
+						     allocated_block_nr, inode);
+				set_buffer_new(bh_result);
+				done = 1;
+			}
+			tmp_key = key;	// ;)
+			set_cpu_key_k_offset(&tmp_key, 1);
+			PATH_LAST_POSITION(&path)++;
+
+			retval =
+			    reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
+						 inode, (char *)&unp);
+			if (retval) {
+				reiserfs_free_block(th, inode,
+						    allocated_block_nr, 1);
+				goto failure;	// retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
+			}
+			//mark_tail_converted (inode);
+		} else if (is_direct_le_ih(ih)) {
+			/* direct item has to be converted */
+			loff_t tail_offset;
+
+			tail_offset =
+			    ((le_ih_k_offset(ih) -
+			      1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
+			if (tail_offset == cpu_key_k_offset(&key)) {
+				/* direct item we just found fits into block we have
+				   to map. Convert it into unformatted node: use
+				   bh_result for the conversion */
+				set_block_dev_mapped(bh_result,
+						     allocated_block_nr, inode);
+				unbh = bh_result;
+				done = 1;
+			} else {
+				/* we have to padd file tail stored in direct item(s)
+				   up to block size and convert it to unformatted
+				   node. FIXME: this should also get into page cache */
+
+				pathrelse(&path);
+				/*
+				 * ugly, but we can only end the transaction if
+				 * we aren't nested
+				 */
+				BUG_ON(!th->t_refcount);
+				if (th->t_refcount == 1) {
+					retval =
+					    reiserfs_end_persistent_transaction
+					    (th);
+					th = NULL;
+					if (retval)
+						goto failure;
+				}
+
+				retval =
+				    convert_tail_for_hole(inode, bh_result,
+							  tail_offset);
+				if (retval) {
+					if (retval != -ENOSPC)
+						reiserfs_error(inode->i_sb,
+							"clm-6004",
+							"convert tail failed "
+							"inode %lu, error %d",
+							inode->i_ino,
+							retval);
+					if (allocated_block_nr) {
+						/* the bitmap, the super, and the stat data == 3 */
+						if (!th)
+							th = reiserfs_persistent_transaction(inode->i_sb, 3);
+						if (th)
+							reiserfs_free_block(th,
+									    inode,
+									    allocated_block_nr,
+									    1);
+					}
+					goto failure;
+				}
+				goto research;
+			}
+			retval =
+			    direct2indirect(th, inode, &path, unbh,
+					    tail_offset);
+			if (retval) {
+				reiserfs_unmap_buffer(unbh);
+				reiserfs_free_block(th, inode,
+						    allocated_block_nr, 1);
+				goto failure;
+			}
+			/* it is important the set_buffer_uptodate is done after
+			 ** the direct2indirect.  The buffer might contain valid
+			 ** data newer than the data on disk (read by readpage, changed,
+			 ** and then sent here by writepage).  direct2indirect needs
+			 ** to know if unbh was already up to date, so it can decide
+			 ** if the data in unbh needs to be replaced with data from
+			 ** the disk
+			 */
+			set_buffer_uptodate(unbh);
+
+			/* unbh->b_page == NULL in case of DIRECT_IO request, this means
+			   buffer will disappear shortly, so it should not be added to
+			 */
+			if (unbh->b_page) {
+				/* we've converted the tail, so we must
+				 ** flush unbh before the transaction commits
+				 */
+				reiserfs_add_tail_list(inode, unbh);
+
+				/* mark it dirty now to prevent commit_write from adding
+				 ** this buffer to the inode's dirty buffer list
+				 */
+				/*
+				 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
+				 * It's still atomic, but it sets the page dirty too,
+				 * which makes it eligible for writeback at any time by the
+				 * VM (which was also the case with __mark_buffer_dirty())
+				 */
+				mark_buffer_dirty(unbh);
+			}
+		} else {
+			/* append indirect item with holes if needed, when appending
+			   pointer to 'block'-th block use block, which is already
+			   allocated */
+			struct cpu_key tmp_key;
+			unp_t unf_single = 0;	// We use this in case we need to allocate only
+			// one block which is a fastpath
+			unp_t *un;
+			__u64 max_to_insert =
+			    MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
+			    UNFM_P_SIZE;
+			__u64 blocks_needed;
+
+			RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
+			       "vs-804: invalid position for append");
+			/* indirect item has to be appended, set up key of that position */
+			make_cpu_key(&tmp_key, inode,
+				     le_key_k_offset(version,
+						     &(ih->ih_key)) +
+				     op_bytes_number(ih,
+						     inode->i_sb->s_blocksize),
+				     //pos_in_item * inode->i_sb->s_blocksize,
+				     TYPE_INDIRECT, 3);	// key type is unimportant
+
+			RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
+			       "green-805: invalid offset");
+			blocks_needed =
+			    1 +
+			    ((cpu_key_k_offset(&key) -
+			      cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
+			     s_blocksize_bits);
+
+			if (blocks_needed == 1) {
+				un = &unf_single;
+			} else {
+				un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
+				if (!un) {
+					un = &unf_single;
+					blocks_needed = 1;
+					max_to_insert = 0;
+				}
+			}
+			if (blocks_needed <= max_to_insert) {
+				/* we are going to add target block to the file. Use allocated
+				   block for that */
+				un[blocks_needed - 1] =
+				    cpu_to_le32(allocated_block_nr);
+				set_block_dev_mapped(bh_result,
+						     allocated_block_nr, inode);
+				set_buffer_new(bh_result);
+				done = 1;
+			} else {
+				/* paste hole to the indirect item */
+				/* If kmalloc failed, max_to_insert becomes zero and it means we
+				   only have space for one block */
+				blocks_needed =
+				    max_to_insert ? max_to_insert : 1;
+			}
+			retval =
+			    reiserfs_paste_into_item(th, &path, &tmp_key, inode,
+						     (char *)un,
+						     UNFM_P_SIZE *
+						     blocks_needed);
+
+			if (blocks_needed != 1)
+				kfree(un);
+
+			if (retval) {
+				reiserfs_free_block(th, inode,
+						    allocated_block_nr, 1);
+				goto failure;
+			}
+			if (!done) {
+				/* We need to mark new file size in case this function will be
+				   interrupted/aborted later on. And we may do this only for
+				   holes. */
+				inode->i_size +=
+				    inode->i_sb->s_blocksize * blocks_needed;
+			}
+		}
+
+		if (done == 1)
+			break;
+
+		/* this loop could log more blocks than we had originally asked
+		 ** for.  So, we have to allow the transaction to end if it is
+		 ** too big or too full.  Update the inode so things are
+		 ** consistent if we crash before the function returns
+		 **
+		 ** release the path so that anybody waiting on the path before
+		 ** ending their transaction will be able to continue.
+		 */
+		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
+			retval = restart_transaction(th, inode, &path);
+			if (retval)
+				goto failure;
+		}
+		/*
+		 * inserting indirect pointers for a hole can take a
+		 * long time.  reschedule if needed and also release the write
+		 * lock for others.
+		 */
+		if (need_resched()) {
+			reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+			schedule();
+			lock_depth = reiserfs_write_lock_once(inode->i_sb);
+		}
+
+		retval = search_for_position_by_key(inode->i_sb, &key, &path);
+		if (retval == IO_ERROR) {
+			retval = -EIO;
+			goto failure;
+		}
+		if (retval == POSITION_FOUND) {
+			reiserfs_warning(inode->i_sb, "vs-825",
+					 "%K should not be found", &key);
+			retval = -EEXIST;
+			if (allocated_block_nr)
+				reiserfs_free_block(th, inode,
+						    allocated_block_nr, 1);
+			pathrelse(&path);
+			goto failure;
+		}
+		bh = get_last_bh(&path);
+		ih = get_ih(&path);
+		item = get_item(&path);
+		pos_in_item = path.pos_in_item;
+	} while (1);
+
+	retval = 0;
+
+      failure:
+	if (th && (!dangle || (retval && !th->t_trans_id))) {
+		int err;
+		if (th->t_trans_id)
+			reiserfs_update_sd(th, inode);
+		err = reiserfs_end_persistent_transaction(th);
+		if (err)
+			retval = err;
+	}
+
+	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+	reiserfs_check_path(&path);
+	return retval;
+}
+
+static int
+reiserfs_readpages(struct file *file, struct address_space *mapping,
+		   struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
+}
+
+/* Compute real number of used bytes by file
+ * Following three functions can go away when we'll have enough space in stat item
+ */
+static int real_space_diff(struct inode *inode, int sd_size)
+{
+	int bytes;
+	loff_t blocksize = inode->i_sb->s_blocksize;
+
+	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
+		return sd_size;
+
+	/* End of file is also in full block with indirect reference, so round
+	 ** up to the next block.
+	 **
+	 ** there is just no way to know if the tail is actually packed
+	 ** on the file, so we have to assume it isn't.  When we pack the
+	 ** tail, we add 4 bytes to pretend there really is an unformatted
+	 ** node pointer
+	 */
+	bytes =
+	    ((inode->i_size +
+	      (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
+	    sd_size;
+	return bytes;
+}
+
+static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
+					int sd_size)
+{
+	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
+		return inode->i_size +
+		    (loff_t) (real_space_diff(inode, sd_size));
+	}
+	return ((loff_t) real_space_diff(inode, sd_size)) +
+	    (((loff_t) blocks) << 9);
+}
+
+/* Compute number of blocks used by file in ReiserFS counting */
+static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
+{
+	loff_t bytes = inode_get_bytes(inode);
+	loff_t real_space = real_space_diff(inode, sd_size);
+
+	/* keeps fsck and non-quota versions of reiserfs happy */
+	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
+		bytes += (loff_t) 511;
+	}
+
+	/* files from before the quota patch might i_blocks such that
+	 ** bytes < real_space.  Deal with that here to prevent it from
+	 ** going negative.
+	 */
+	if (bytes < real_space)
+		return 0;
+	return (bytes - real_space) >> 9;
+}
+
+//
+// BAD: new directories have stat data of new type and all other items
+// of old type. Version stored in the inode says about body items, so
+// in update_stat_data we can not rely on inode, but have to check
+// item version directly
+//
+
+// called by read_locked_inode
+static void init_inode(struct inode *inode, struct treepath *path)
+{
+	struct buffer_head *bh;
+	struct item_head *ih;
+	__u32 rdev;
+	//int version = ITEM_VERSION_1;
+
+	bh = PATH_PLAST_BUFFER(path);
+	ih = PATH_PITEM_HEAD(path);
+
+	copy_key(INODE_PKEY(inode), &(ih->ih_key));
+
+	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
+	REISERFS_I(inode)->i_flags = 0;
+	REISERFS_I(inode)->i_prealloc_block = 0;
+	REISERFS_I(inode)->i_prealloc_count = 0;
+	REISERFS_I(inode)->i_trans_id = 0;
+	REISERFS_I(inode)->i_jl = NULL;
+	reiserfs_init_xattr_rwsem(inode);
+
+	if (stat_data_v1(ih)) {
+		struct stat_data_v1 *sd =
+		    (struct stat_data_v1 *)B_I_PITEM(bh, ih);
+		unsigned long blocks;
+
+		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
+		set_inode_sd_version(inode, STAT_DATA_V1);
+		inode->i_mode = sd_v1_mode(sd);
+		set_nlink(inode, sd_v1_nlink(sd));
+		inode->i_uid = sd_v1_uid(sd);
+		inode->i_gid = sd_v1_gid(sd);
+		inode->i_size = sd_v1_size(sd);
+		inode->i_atime.tv_sec = sd_v1_atime(sd);
+		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
+		inode->i_ctime.tv_sec = sd_v1_ctime(sd);
+		inode->i_atime.tv_nsec = 0;
+		inode->i_ctime.tv_nsec = 0;
+		inode->i_mtime.tv_nsec = 0;
+
+		inode->i_blocks = sd_v1_blocks(sd);
+		inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
+		blocks = (inode->i_size + 511) >> 9;
+		blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
+		if (inode->i_blocks > blocks) {
+			// there was a bug in <=3.5.23 when i_blocks could take negative
+			// values. Starting from 3.5.17 this value could even be stored in
+			// stat data. For such files we set i_blocks based on file
+			// size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
+			// only updated if file's inode will ever change
+			inode->i_blocks = blocks;
+		}
+
+		rdev = sd_v1_rdev(sd);
+		REISERFS_I(inode)->i_first_direct_byte =
+		    sd_v1_first_direct_byte(sd);
+		/* an early bug in the quota code can give us an odd number for the
+		 ** block count.  This is incorrect, fix it here.
+		 */
+		if (inode->i_blocks & 1) {
+			inode->i_blocks++;
+		}
+		inode_set_bytes(inode,
+				to_real_used_space(inode, inode->i_blocks,
+						   SD_V1_SIZE));
+		/* nopack is initially zero for v1 objects. For v2 objects,
+		   nopack is initialised from sd_attrs */
+		REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
+	} else {
+		// new stat data found, but object may have old items
+		// (directories and symlinks)
+		struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
+
+		inode->i_mode = sd_v2_mode(sd);
+		set_nlink(inode, sd_v2_nlink(sd));
+		inode->i_uid = sd_v2_uid(sd);
+		inode->i_size = sd_v2_size(sd);
+		inode->i_gid = sd_v2_gid(sd);
+		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
+		inode->i_atime.tv_sec = sd_v2_atime(sd);
+		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
+		inode->i_ctime.tv_nsec = 0;
+		inode->i_mtime.tv_nsec = 0;
+		inode->i_atime.tv_nsec = 0;
+		inode->i_blocks = sd_v2_blocks(sd);
+		rdev = sd_v2_rdev(sd);
+		if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+			inode->i_generation =
+			    le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
+		else
+			inode->i_generation = sd_v2_generation(sd);
+
+		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+			set_inode_item_key_version(inode, KEY_FORMAT_3_5);
+		else
+			set_inode_item_key_version(inode, KEY_FORMAT_3_6);
+		REISERFS_I(inode)->i_first_direct_byte = 0;
+		set_inode_sd_version(inode, STAT_DATA_V2);
+		inode_set_bytes(inode,
+				to_real_used_space(inode, inode->i_blocks,
+						   SD_V2_SIZE));
+		/* read persistent inode attributes from sd and initialise
+		   generic inode flags from them */
+		REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
+		sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
+	}
+
+	pathrelse(path);
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &reiserfs_file_inode_operations;
+		inode->i_fop = &reiserfs_file_operations;
+		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &reiserfs_dir_inode_operations;
+		inode->i_fop = &reiserfs_dir_operations;
+	} else if (S_ISLNK(inode->i_mode)) {
+		inode->i_op = &reiserfs_symlink_inode_operations;
+		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
+	} else {
+		inode->i_blocks = 0;
+		inode->i_op = &reiserfs_special_inode_operations;
+		init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+	}
+}
+
+// update new stat data with inode fields
+static void inode2sd(void *sd, struct inode *inode, loff_t size)
+{
+	struct stat_data *sd_v2 = (struct stat_data *)sd;
+	__u16 flags;
+
+	set_sd_v2_mode(sd_v2, inode->i_mode);
+	set_sd_v2_nlink(sd_v2, inode->i_nlink);
+	set_sd_v2_uid(sd_v2, inode->i_uid);
+	set_sd_v2_size(sd_v2, size);
+	set_sd_v2_gid(sd_v2, inode->i_gid);
+	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
+	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
+	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
+	set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+		set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
+	else
+		set_sd_v2_generation(sd_v2, inode->i_generation);
+	flags = REISERFS_I(inode)->i_attrs;
+	i_attrs_to_sd_attrs(inode, &flags);
+	set_sd_v2_attrs(sd_v2, flags);
+}
+
+// used to copy inode's fields to old stat data
+static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
+{
+	struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
+
+	set_sd_v1_mode(sd_v1, inode->i_mode);
+	set_sd_v1_uid(sd_v1, inode->i_uid);
+	set_sd_v1_gid(sd_v1, inode->i_gid);
+	set_sd_v1_nlink(sd_v1, inode->i_nlink);
+	set_sd_v1_size(sd_v1, size);
+	set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
+	set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
+	set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
+
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+		set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
+	else
+		set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
+
+	// Sigh. i_first_direct_byte is back
+	set_sd_v1_first_direct_byte(sd_v1,
+				    REISERFS_I(inode)->i_first_direct_byte);
+}
+
+/* NOTE, you must prepare the buffer head before sending it here,
+** and then log it after the call
+*/
+static void update_stat_data(struct treepath *path, struct inode *inode,
+			     loff_t size)
+{
+	struct buffer_head *bh;
+	struct item_head *ih;
+
+	bh = PATH_PLAST_BUFFER(path);
+	ih = PATH_PITEM_HEAD(path);
+
+	if (!is_statdata_le_ih(ih))
+		reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
+			       INODE_PKEY(inode), ih);
+
+	if (stat_data_v1(ih)) {
+		// path points to old stat data
+		inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
+	} else {
+		inode2sd(B_I_PITEM(bh, ih), inode, size);
+	}
+
+	return;
+}
+
+void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
+			     struct inode *inode, loff_t size)
+{
+	struct cpu_key key;
+	INITIALIZE_PATH(path);
+	struct buffer_head *bh;
+	int fs_gen;
+	struct item_head *ih, tmp_ih;
+	int retval;
+
+	BUG_ON(!th->t_trans_id);
+
+	make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);	//key type is unimportant
+
+	for (;;) {
+		int pos;
+		/* look for the object's stat data */
+		retval = search_item(inode->i_sb, &key, &path);
+		if (retval == IO_ERROR) {
+			reiserfs_error(inode->i_sb, "vs-13050",
+				       "i/o failure occurred trying to "
+				       "update %K stat data", &key);
+			return;
+		}
+		if (retval == ITEM_NOT_FOUND) {
+			pos = PATH_LAST_POSITION(&path);
+			pathrelse(&path);
+			if (inode->i_nlink == 0) {
+				/*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
+				return;
+			}
+			reiserfs_warning(inode->i_sb, "vs-13060",
+					 "stat data of object %k (nlink == %d) "
+					 "not found (pos %d)",
+					 INODE_PKEY(inode), inode->i_nlink,
+					 pos);
+			reiserfs_check_path(&path);
+			return;
+		}
+
+		/* sigh, prepare_for_journal might schedule.  When it schedules the
+		 ** FS might change.  We have to detect that, and loop back to the
+		 ** search if the stat data item has moved
+		 */
+		bh = get_last_bh(&path);
+		ih = get_ih(&path);
+		copy_item_head(&tmp_ih, ih);
+		fs_gen = get_generation(inode->i_sb);
+		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
+		if (fs_changed(fs_gen, inode->i_sb)
+		    && item_moved(&tmp_ih, &path)) {
+			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
+			continue;	/* Stat_data item has been moved after scheduling. */
+		}
+		break;
+	}
+	update_stat_data(&path, inode, size);
+	journal_mark_dirty(th, th->t_super, bh);
+	pathrelse(&path);
+	return;
+}
+
+/* reiserfs_read_locked_inode is called to read the inode off disk, and it
+** does a make_bad_inode when things go wrong.  But, we need to make sure
+** and clear the key in the private portion of the inode, otherwise a
+** corresponding iput might try to delete whatever object the inode last
+** represented.
+*/
+static void reiserfs_make_bad_inode(struct inode *inode)
+{
+	memset(INODE_PKEY(inode), 0, KEY_SIZE);
+	make_bad_inode(inode);
+}
+
+//
+// initially this function was derived from minix or ext2's analog and
+// evolved as the prototype did
+//
+
+int reiserfs_init_locked_inode(struct inode *inode, void *p)
+{
+	struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
+	inode->i_ino = args->objectid;
+	INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
+	return 0;
+}
+
+/* looks for stat data in the tree, and fills up the fields of in-core
+   inode stat data fields */
+void reiserfs_read_locked_inode(struct inode *inode,
+				struct reiserfs_iget_args *args)
+{
+	INITIALIZE_PATH(path_to_sd);
+	struct cpu_key key;
+	unsigned long dirino;
+	int retval;
+
+	dirino = args->dirid;
+
+	/* set version 1, version 2 could be used too, because stat data
+	   key is the same in both versions */
+	key.version = KEY_FORMAT_3_5;
+	key.on_disk_key.k_dir_id = dirino;
+	key.on_disk_key.k_objectid = inode->i_ino;
+	key.on_disk_key.k_offset = 0;
+	key.on_disk_key.k_type = 0;
+
+	/* look for the object's stat data */
+	retval = search_item(inode->i_sb, &key, &path_to_sd);
+	if (retval == IO_ERROR) {
+		reiserfs_error(inode->i_sb, "vs-13070",
+			       "i/o failure occurred trying to find "
+			       "stat data of %K", &key);
+		reiserfs_make_bad_inode(inode);
+		return;
+	}
+	if (retval != ITEM_FOUND) {
+		/* a stale NFS handle can trigger this without it being an error */
+		pathrelse(&path_to_sd);
+		reiserfs_make_bad_inode(inode);
+		clear_nlink(inode);
+		return;
+	}
+
+	init_inode(inode, &path_to_sd);
+
+	/* It is possible that knfsd is trying to access inode of a file
+	   that is being removed from the disk by some other thread. As we
+	   update sd on unlink all that is required is to check for nlink
+	   here. This bug was first found by Sizif when debugging
+	   SquidNG/Butterfly, forgotten, and found again after Philippe
+	   Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
+
+	   More logical fix would require changes in fs/inode.c:iput() to
+	   remove inode from hash-table _after_ fs cleaned disk stuff up and
+	   in iget() to return NULL if I_FREEING inode is found in
+	   hash-table. */
+	/* Currently there is one place where it's ok to meet inode with
+	   nlink==0: processing of open-unlinked and half-truncated files
+	   during mount (fs/reiserfs/super.c:finish_unfinished()). */
+	if ((inode->i_nlink == 0) &&
+	    !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
+		reiserfs_warning(inode->i_sb, "vs-13075",
+				 "dead inode read from disk %K. "
+				 "This is likely to be race with knfsd. Ignore",
+				 &key);
+		reiserfs_make_bad_inode(inode);
+	}
+
+	reiserfs_check_path(&path_to_sd);	/* init inode should be relsing */
+
+	/*
+	 * Stat data v1 doesn't support ACLs.
+	 */
+	if (get_inode_sd_version(inode) == STAT_DATA_V1)
+		cache_no_acl(inode);
+}
+
+/**
+ * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
+ *
+ * @inode:    inode from hash table to check
+ * @opaque:   "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
+ *
+ * This function is called by iget5_locked() to distinguish reiserfs inodes
+ * having the same inode numbers. Such inodes can only exist due to some
+ * error condition. One of them should be bad. Inodes with identical
+ * inode numbers (objectids) are distinguished by parent directory ids.
+ *
+ */
+int reiserfs_find_actor(struct inode *inode, void *opaque)
+{
+	struct reiserfs_iget_args *args;
+
+	args = opaque;
+	/* args is already in CPU order */
+	return (inode->i_ino == args->objectid) &&
+	    (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
+}
+
+struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
+{
+	struct inode *inode;
+	struct reiserfs_iget_args args;
+
+	args.objectid = key->on_disk_key.k_objectid;
+	args.dirid = key->on_disk_key.k_dir_id;
+	reiserfs_write_unlock(s);
+	inode = iget5_locked(s, key->on_disk_key.k_objectid,
+			     reiserfs_find_actor, reiserfs_init_locked_inode,
+			     (void *)(&args));
+	reiserfs_write_lock(s);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	if (inode->i_state & I_NEW) {
+		reiserfs_read_locked_inode(inode, &args);
+		unlock_new_inode(inode);
+	}
+
+	if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
+		/* either due to i/o error or a stale NFS handle */
+		iput(inode);
+		inode = NULL;
+	}
+	return inode;
+}
+
+static struct dentry *reiserfs_get_dentry(struct super_block *sb,
+	u32 objectid, u32 dir_id, u32 generation)
+
+{
+	struct cpu_key key;
+	struct inode *inode;
+
+	key.on_disk_key.k_objectid = objectid;
+	key.on_disk_key.k_dir_id = dir_id;
+	reiserfs_write_lock(sb);
+	inode = reiserfs_iget(sb, &key);
+	if (inode && !IS_ERR(inode) && generation != 0 &&
+	    generation != inode->i_generation) {
+		iput(inode);
+		inode = NULL;
+	}
+	reiserfs_write_unlock(sb);
+
+	return d_obtain_alias(inode);
+}
+
+struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+		int fh_len, int fh_type)
+{
+	/* fhtype happens to reflect the number of u32s encoded.
+	 * due to a bug in earlier code, fhtype might indicate there
+	 * are more u32s then actually fitted.
+	 * so if fhtype seems to be more than len, reduce fhtype.
+	 * Valid types are:
+	 *   2 - objectid + dir_id - legacy support
+	 *   3 - objectid + dir_id + generation
+	 *   4 - objectid + dir_id + objectid and dirid of parent - legacy
+	 *   5 - objectid + dir_id + generation + objectid and dirid of parent
+	 *   6 - as above plus generation of directory
+	 * 6 does not fit in NFSv2 handles
+	 */
+	if (fh_type > fh_len) {
+		if (fh_type != 6 || fh_len != 5)
+			reiserfs_warning(sb, "reiserfs-13077",
+				"nfsd/reiserfs, fhtype=%d, len=%d - odd",
+				fh_type, fh_len);
+		fh_type = fh_len;
+	}
+	if (fh_len < 2)
+		return NULL;
+
+	return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1],
+		(fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0);
+}
+
+struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+		int fh_len, int fh_type)
+{
+	if (fh_type > fh_len)
+		fh_type = fh_len;
+	if (fh_type < 4)
+		return NULL;
+
+	return reiserfs_get_dentry(sb,
+		(fh_type >= 5) ? fid->raw[3] : fid->raw[2],
+		(fh_type >= 5) ? fid->raw[4] : fid->raw[3],
+		(fh_type == 6) ? fid->raw[5] : 0);
+}
+
+int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
+		       int need_parent)
+{
+	struct inode *inode = dentry->d_inode;
+	int maxlen = *lenp;
+
+	if (need_parent && (maxlen < 5)) {
+		*lenp = 5;
+		return 255;
+	} else if (maxlen < 3) {
+		*lenp = 3;
+		return 255;
+	}
+
+	data[0] = inode->i_ino;
+	data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
+	data[2] = inode->i_generation;
+	*lenp = 3;
+	/* no room for directory info? return what we've stored so far */
+	if (maxlen < 5 || !need_parent)
+		return 3;
+
+	spin_lock(&dentry->d_lock);
+	inode = dentry->d_parent->d_inode;
+	data[3] = inode->i_ino;
+	data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
+	*lenp = 5;
+	if (maxlen >= 6) {
+		data[5] = inode->i_generation;
+		*lenp = 6;
+	}
+	spin_unlock(&dentry->d_lock);
+	return *lenp;
+}
+
+/* looks for stat data, then copies fields to it, marks the buffer
+   containing stat data as dirty */
+/* reiserfs inodes are never really dirty, since the dirty inode call
+** always logs them.  This call allows the VFS inode marking routines
+** to properly mark inodes for datasync and such, but only actually
+** does something when called for a synchronous update.
+*/
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	struct reiserfs_transaction_handle th;
+	int jbegin_count = 1;
+
+	if (inode->i_sb->s_flags & MS_RDONLY)
+		return -EROFS;
+	/* memory pressure can sometimes initiate write_inode calls with sync == 1,
+	 ** these cases are just when the system needs ram, not when the
+	 ** inode needs to reach disk for safety, and they can safely be
+	 ** ignored because the altered inode has already been logged.
+	 */
+	if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
+		reiserfs_write_lock(inode->i_sb);
+		if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
+			reiserfs_update_sd(&th, inode);
+			journal_end_sync(&th, inode->i_sb, jbegin_count);
+		}
+		reiserfs_write_unlock(inode->i_sb);
+	}
+	return 0;
+}
+
+/* stat data of new object is inserted already, this inserts the item
+   containing "." and ".." entries */
+static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
+				  struct inode *inode,
+				  struct item_head *ih, struct treepath *path,
+				  struct inode *dir)
+{
+	struct super_block *sb = th->t_super;
+	char empty_dir[EMPTY_DIR_SIZE];
+	char *body = empty_dir;
+	struct cpu_key key;
+	int retval;
+
+	BUG_ON(!th->t_trans_id);
+
+	_make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
+		      le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
+		      TYPE_DIRENTRY, 3 /*key length */ );
+
+	/* compose item head for new item. Directories consist of items of
+	   old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
+	   is done by reiserfs_new_inode */
+	if (old_format_only(sb)) {
+		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
+				  TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
+
+		make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
+				       ih->ih_key.k_objectid,
+				       INODE_PKEY(dir)->k_dir_id,
+				       INODE_PKEY(dir)->k_objectid);
+	} else {
+		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
+				  TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
+
+		make_empty_dir_item(body, ih->ih_key.k_dir_id,
+				    ih->ih_key.k_objectid,
+				    INODE_PKEY(dir)->k_dir_id,
+				    INODE_PKEY(dir)->k_objectid);
+	}
+
+	/* look for place in the tree for new item */
+	retval = search_item(sb, &key, path);
+	if (retval == IO_ERROR) {
+		reiserfs_error(sb, "vs-13080",
+			       "i/o failure occurred creating new directory");
+		return -EIO;
+	}
+	if (retval == ITEM_FOUND) {
+		pathrelse(path);
+		reiserfs_warning(sb, "vs-13070",
+				 "object with this key exists (%k)",
+				 &(ih->ih_key));
+		return -EEXIST;
+	}
+
+	/* insert item, that is empty directory item */
+	return reiserfs_insert_item(th, path, &key, ih, inode, body);
+}
+
+/* stat data of object has been inserted, this inserts the item
+   containing the body of symlink */
+static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode,	/* Inode of symlink */
+				struct item_head *ih,
+				struct treepath *path, const char *symname,
+				int item_len)
+{
+	struct super_block *sb = th->t_super;
+	struct cpu_key key;
+	int retval;
+
+	BUG_ON(!th->t_trans_id);
+
+	_make_cpu_key(&key, KEY_FORMAT_3_5,
+		      le32_to_cpu(ih->ih_key.k_dir_id),
+		      le32_to_cpu(ih->ih_key.k_objectid),
+		      1, TYPE_DIRECT, 3 /*key length */ );
+
+	make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
+			  0 /*free_space */ );
+
+	/* look for place in the tree for new item */
+	retval = search_item(sb, &key, path);
+	if (retval == IO_ERROR) {
+		reiserfs_error(sb, "vs-13080",
+			       "i/o failure occurred creating new symlink");
+		return -EIO;
+	}
+	if (retval == ITEM_FOUND) {
+		pathrelse(path);
+		reiserfs_warning(sb, "vs-13080",
+				 "object with this key exists (%k)",
+				 &(ih->ih_key));
+		return -EEXIST;
+	}
+
+	/* insert item, that is body of symlink */
+	return reiserfs_insert_item(th, path, &key, ih, inode, symname);
+}
+
+/* inserts the stat data into the tree, and then calls
+   reiserfs_new_directory (to insert ".", ".." item if new object is
+   directory) or reiserfs_new_symlink (to insert symlink body if new
+   object is symlink) or nothing (if new object is regular file)
+
+   NOTE! uid and gid must already be set in the inode.  If we return
+   non-zero due to an error, we have to drop the quota previously allocated
+   for the fresh inode.  This can only be done outside a transaction, so
+   if we return non-zero, we also end the transaction.  */
+int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
+		       struct inode *dir, umode_t mode, const char *symname,
+		       /* 0 for regular, EMTRY_DIR_SIZE for dirs,
+		          strlen (symname) for symlinks) */
+		       loff_t i_size, struct dentry *dentry,
+		       struct inode *inode,
+		       struct reiserfs_security_handle *security)
+{
+	struct super_block *sb;
+	struct reiserfs_iget_args args;
+	INITIALIZE_PATH(path_to_key);
+	struct cpu_key key;
+	struct item_head ih;
+	struct stat_data sd;
+	int retval;
+	int err;
+
+	BUG_ON(!th->t_trans_id);
+
+	reiserfs_write_unlock(inode->i_sb);
+	err = dquot_alloc_inode(inode);
+	reiserfs_write_lock(inode->i_sb);
+	if (err)
+		goto out_end_trans;
+	if (!dir->i_nlink) {
+		err = -EPERM;
+		goto out_bad_inode;
+	}
+
+	sb = dir->i_sb;
+
+	/* item head of new item */
+	ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
+	ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
+	if (!ih.ih_key.k_objectid) {
+		err = -ENOMEM;
+		goto out_bad_inode;
+	}
+	args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
+	if (old_format_only(sb))
+		make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
+				  TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
+	else
+		make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
+				  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
+	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
+	args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
+	if (insert_inode_locked4(inode, args.objectid,
+			     reiserfs_find_actor, &args) < 0) {
+		err = -EINVAL;
+		goto out_bad_inode;
+	}
+	if (old_format_only(sb))
+		/* not a perfect generation count, as object ids can be reused, but
+		 ** this is as good as reiserfs can do right now.
+		 ** note that the private part of inode isn't filled in yet, we have
+		 ** to use the directory.
+		 */
+		inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
+	else
+#if defined( USE_INODE_GENERATION_COUNTER )
+		inode->i_generation =
+		    le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
+#else
+		inode->i_generation = ++event;
+#endif
+
+	/* fill stat data */
+	set_nlink(inode, (S_ISDIR(mode) ? 2 : 1));
+
+	/* uid and gid must already be set by the caller for quota init */
+
+	/* symlink cannot be immutable or append only, right? */
+	if (S_ISLNK(inode->i_mode))
+		inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
+
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+	inode->i_size = i_size;
+	inode->i_blocks = 0;
+	inode->i_bytes = 0;
+	REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
+	    U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
+
+	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
+	REISERFS_I(inode)->i_flags = 0;
+	REISERFS_I(inode)->i_prealloc_block = 0;
+	REISERFS_I(inode)->i_prealloc_count = 0;
+	REISERFS_I(inode)->i_trans_id = 0;
+	REISERFS_I(inode)->i_jl = NULL;
+	REISERFS_I(inode)->i_attrs =
+	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
+	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
+	reiserfs_init_xattr_rwsem(inode);
+
+	/* key to search for correct place for new stat data */
+	_make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
+		      le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
+		      TYPE_STAT_DATA, 3 /*key length */ );
+
+	/* find proper place for inserting of stat data */
+	retval = search_item(sb, &key, &path_to_key);
+	if (retval == IO_ERROR) {
+		err = -EIO;
+		goto out_bad_inode;
+	}
+	if (retval == ITEM_FOUND) {
+		pathrelse(&path_to_key);
+		err = -EEXIST;
+		goto out_bad_inode;
+	}
+	if (old_format_only(sb)) {
+		if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
+			pathrelse(&path_to_key);
+			/* i_uid or i_gid is too big to be stored in stat data v3.5 */
+			err = -EINVAL;
+			goto out_bad_inode;
+		}
+		inode2sd_v1(&sd, inode, inode->i_size);
+	} else {
+		inode2sd(&sd, inode, inode->i_size);
+	}
+	// store in in-core inode the key of stat data and version all
+	// object items will have (directory items will have old offset
+	// format, other new objects will consist of new items)
+	if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
+		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
+	else
+		set_inode_item_key_version(inode, KEY_FORMAT_3_6);
+	if (old_format_only(sb))
+		set_inode_sd_version(inode, STAT_DATA_V1);
+	else
+		set_inode_sd_version(inode, STAT_DATA_V2);
+
+	/* insert the stat data into the tree */
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	if (REISERFS_I(dir)->new_packing_locality)
+		th->displace_new_blocks = 1;
+#endif
+	retval =
+	    reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
+				 (char *)(&sd));
+	if (retval) {
+		err = retval;
+		reiserfs_check_path(&path_to_key);
+		goto out_bad_inode;
+	}
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	if (!th->displace_new_blocks)
+		REISERFS_I(dir)->new_packing_locality = 0;
+#endif
+	if (S_ISDIR(mode)) {
+		/* insert item with "." and ".." */
+		retval =
+		    reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
+	}
+
+	if (S_ISLNK(mode)) {
+		/* insert body of symlink */
+		if (!old_format_only(sb))
+			i_size = ROUND_UP(i_size);
+		retval =
+		    reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
+					 i_size);
+	}
+	if (retval) {
+		err = retval;
+		reiserfs_check_path(&path_to_key);
+		journal_end(th, th->t_super, th->t_blocks_allocated);
+		goto out_inserted_sd;
+	}
+
+	if (reiserfs_posixacl(inode->i_sb)) {
+		retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
+		if (retval) {
+			err = retval;
+			reiserfs_check_path(&path_to_key);
+			journal_end(th, th->t_super, th->t_blocks_allocated);
+			goto out_inserted_sd;
+		}
+	} else if (inode->i_sb->s_flags & MS_POSIXACL) {
+		reiserfs_warning(inode->i_sb, "jdm-13090",
+				 "ACLs aren't enabled in the fs, "
+				 "but vfs thinks they are!");
+	} else if (IS_PRIVATE(dir))
+		inode->i_flags |= S_PRIVATE;
+
+	if (security->name) {
+		retval = reiserfs_security_write(th, inode, security);
+		if (retval) {
+			err = retval;
+			reiserfs_check_path(&path_to_key);
+			retval = journal_end(th, th->t_super,
+					     th->t_blocks_allocated);
+			if (retval)
+				err = retval;
+			goto out_inserted_sd;
+		}
+	}
+
+	reiserfs_update_sd(th, inode);
+	reiserfs_check_path(&path_to_key);
+
+	return 0;
+
+/* it looks like you can easily compress these two goto targets into
+ * one.  Keeping it like this doesn't actually hurt anything, and they
+ * are place holders for what the quota code actually needs.
+ */
+      out_bad_inode:
+	/* Invalidate the object, nothing was inserted yet */
+	INODE_PKEY(inode)->k_objectid = 0;
+
+	/* Quota change must be inside a transaction for journaling */
+	dquot_free_inode(inode);
+
+      out_end_trans:
+	journal_end(th, th->t_super, th->t_blocks_allocated);
+	reiserfs_write_unlock(inode->i_sb);
+	/* Drop can be outside and it needs more credits so it's better to have it outside */
+	dquot_drop(inode);
+	reiserfs_write_lock(inode->i_sb);
+	inode->i_flags |= S_NOQUOTA;
+	make_bad_inode(inode);
+
+      out_inserted_sd:
+	clear_nlink(inode);
+	th->t_trans_id = 0;	/* so the caller can't use this handle later */
+	unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
+	iput(inode);
+	return err;
+}
+
+/*
+** finds the tail page in the page cache,
+** reads the last block in.
+**
+** On success, page_result is set to a locked, pinned page, and bh_result
+** is set to an up to date buffer for the last block in the file.  returns 0.
+**
+** tail conversion is not done, so bh_result might not be valid for writing
+** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
+** trying to write the block.
+**
+** on failure, nonzero is returned, page_result and bh_result are untouched.
+*/
+static int grab_tail_page(struct inode *inode,
+			  struct page **page_result,
+			  struct buffer_head **bh_result)
+{
+
+	/* we want the page with the last byte in the file,
+	 ** not the page that will hold the next byte for appending
+	 */
+	unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+	unsigned long pos = 0;
+	unsigned long start = 0;
+	unsigned long blocksize = inode->i_sb->s_blocksize;
+	unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
+	struct buffer_head *bh;
+	struct buffer_head *head;
+	struct page *page;
+	int error;
+
+	/* we know that we are only called with inode->i_size > 0.
+	 ** we also know that a file tail can never be as big as a block
+	 ** If i_size % blocksize == 0, our file is currently block aligned
+	 ** and it won't need converting or zeroing after a truncate.
+	 */
+	if ((offset & (blocksize - 1)) == 0) {
+		return -ENOENT;
+	}
+	page = grab_cache_page(inode->i_mapping, index);
+	error = -ENOMEM;
+	if (!page) {
+		goto out;
+	}
+	/* start within the page of the last block in the file */
+	start = (offset / blocksize) * blocksize;
+
+	error = __block_write_begin(page, start, offset - start,
+				    reiserfs_get_block_create_0);
+	if (error)
+		goto unlock;
+
+	head = page_buffers(page);
+	bh = head;
+	do {
+		if (pos >= start) {
+			break;
+		}
+		bh = bh->b_this_page;
+		pos += blocksize;
+	} while (bh != head);
+
+	if (!buffer_uptodate(bh)) {
+		/* note, this should never happen, prepare_write should
+		 ** be taking care of this for us.  If the buffer isn't up to date,
+		 ** I've screwed up the code to find the buffer, or the code to
+		 ** call prepare_write
+		 */
+		reiserfs_error(inode->i_sb, "clm-6000",
+			       "error reading block %lu", bh->b_blocknr);
+		error = -EIO;
+		goto unlock;
+	}
+	*bh_result = bh;
+	*page_result = page;
+
+      out:
+	return error;
+
+      unlock:
+	unlock_page(page);
+	page_cache_release(page);
+	return error;
+}
+
+/*
+** vfs version of truncate file.  Must NOT be called with
+** a transaction already started.
+**
+** some code taken from block_truncate_page
+*/
+int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
+{
+	struct reiserfs_transaction_handle th;
+	/* we want the offset for the first byte after the end of the file */
+	unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	unsigned blocksize = inode->i_sb->s_blocksize;
+	unsigned length;
+	struct page *page = NULL;
+	int error;
+	struct buffer_head *bh = NULL;
+	int err2;
+	int lock_depth;
+
+	lock_depth = reiserfs_write_lock_once(inode->i_sb);
+
+	if (inode->i_size > 0) {
+		error = grab_tail_page(inode, &page, &bh);
+		if (error) {
+			// -ENOENT means we truncated past the end of the file,
+			// and get_block_create_0 could not find a block to read in,
+			// which is ok.
+			if (error != -ENOENT)
+				reiserfs_error(inode->i_sb, "clm-6001",
+					       "grab_tail_page failed %d",
+					       error);
+			page = NULL;
+			bh = NULL;
+		}
+	}
+
+	/* so, if page != NULL, we have a buffer head for the offset at
+	 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
+	 ** then we have an unformatted node.  Otherwise, we have a direct item,
+	 ** and no zeroing is required on disk.  We zero after the truncate,
+	 ** because the truncate might pack the item anyway
+	 ** (it will unmap bh if it packs).
+	 */
+	/* it is enough to reserve space in transaction for 2 balancings:
+	   one for "save" link adding and another for the first
+	   cut_from_item. 1 is for update_sd */
+	error = journal_begin(&th, inode->i_sb,
+			      JOURNAL_PER_BALANCE_CNT * 2 + 1);
+	if (error)
+		goto out;
+	reiserfs_update_inode_transaction(inode);
+	if (update_timestamps)
+		/* we are doing real truncate: if the system crashes before the last
+		   transaction of truncating gets committed - on reboot the file
+		   either appears truncated properly or not truncated at all */
+		add_save_link(&th, inode, 1);
+	err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
+	error =
+	    journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
+	if (error)
+		goto out;
+
+	/* check reiserfs_do_truncate after ending the transaction */
+	if (err2) {
+		error = err2;
+  		goto out;
+	}
+	
+	if (update_timestamps) {
+		error = remove_save_link(inode, 1 /* truncate */);
+		if (error)
+			goto out;
+	}
+
+	if (page) {
+		length = offset & (blocksize - 1);
+		/* if we are not on a block boundary */
+		if (length) {
+			length = blocksize - length;
+			zero_user(page, offset, length);
+			if (buffer_mapped(bh) && bh->b_blocknr != 0) {
+				mark_buffer_dirty(bh);
+			}
+		}
+		unlock_page(page);
+		page_cache_release(page);
+	}
+
+	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+
+	return 0;
+      out:
+	if (page) {
+		unlock_page(page);
+		page_cache_release(page);
+	}
+
+	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+
+	return error;
+}
+
+static int map_block_for_writepage(struct inode *inode,
+				   struct buffer_head *bh_result,
+				   unsigned long block)
+{
+	struct reiserfs_transaction_handle th;
+	int fs_gen;
+	struct item_head tmp_ih;
+	struct item_head *ih;
+	struct buffer_head *bh;
+	__le32 *item;
+	struct cpu_key key;
+	INITIALIZE_PATH(path);
+	int pos_in_item;
+	int jbegin_count = JOURNAL_PER_BALANCE_CNT;
+	loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1;
+	int retval;
+	int use_get_block = 0;
+	int bytes_copied = 0;
+	int copy_size;
+	int trans_running = 0;
+
+	/* catch places below that try to log something without starting a trans */
+	th.t_trans_id = 0;
+
+	if (!buffer_uptodate(bh_result)) {
+		return -EIO;
+	}
+
+	kmap(bh_result->b_page);
+      start_over:
+	reiserfs_write_lock(inode->i_sb);
+	make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
+
+      research:
+	retval = search_for_position_by_key(inode->i_sb, &key, &path);
+	if (retval != POSITION_FOUND) {
+		use_get_block = 1;
+		goto out;
+	}
+
+	bh = get_last_bh(&path);
+	ih = get_ih(&path);
+	item = get_item(&path);
+	pos_in_item = path.pos_in_item;
+
+	/* we've found an unformatted node */
+	if (indirect_item_found(retval, ih)) {
+		if (bytes_copied > 0) {
+			reiserfs_warning(inode->i_sb, "clm-6002",
+					 "bytes_copied %d", bytes_copied);
+		}
+		if (!get_block_num(item, pos_in_item)) {
+			/* crap, we are writing to a hole */
+			use_get_block = 1;
+			goto out;
+		}
+		set_block_dev_mapped(bh_result,
+				     get_block_num(item, pos_in_item), inode);
+	} else if (is_direct_le_ih(ih)) {
+		char *p;
+		p = page_address(bh_result->b_page);
+		p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
+		copy_size = ih_item_len(ih) - pos_in_item;
+
+		fs_gen = get_generation(inode->i_sb);
+		copy_item_head(&tmp_ih, ih);
+
+		if (!trans_running) {
+			/* vs-3050 is gone, no need to drop the path */
+			retval = journal_begin(&th, inode->i_sb, jbegin_count);
+			if (retval)
+				goto out;
+			reiserfs_update_inode_transaction(inode);
+			trans_running = 1;
+			if (fs_changed(fs_gen, inode->i_sb)
+			    && item_moved(&tmp_ih, &path)) {
+				reiserfs_restore_prepared_buffer(inode->i_sb,
+								 bh);
+				goto research;
+			}
+		}
+
+		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
+
+		if (fs_changed(fs_gen, inode->i_sb)
+		    && item_moved(&tmp_ih, &path)) {
+			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
+			goto research;
+		}
+
+		memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
+		       copy_size);
+
+		journal_mark_dirty(&th, inode->i_sb, bh);
+		bytes_copied += copy_size;
+		set_block_dev_mapped(bh_result, 0, inode);
+
+		/* are there still bytes left? */
+		if (bytes_copied < bh_result->b_size &&
+		    (byte_offset + bytes_copied) < inode->i_size) {
+			set_cpu_key_k_offset(&key,
+					     cpu_key_k_offset(&key) +
+					     copy_size);
+			goto research;
+		}
+	} else {
+		reiserfs_warning(inode->i_sb, "clm-6003",
+				 "bad item inode %lu", inode->i_ino);
+		retval = -EIO;
+		goto out;
+	}
+	retval = 0;
+
+      out:
+	pathrelse(&path);
+	if (trans_running) {
+		int err = journal_end(&th, inode->i_sb, jbegin_count);
+		if (err)
+			retval = err;
+		trans_running = 0;
+	}
+	reiserfs_write_unlock(inode->i_sb);
+
+	/* this is where we fill in holes in the file. */
+	if (use_get_block) {
+		retval = reiserfs_get_block(inode, block, bh_result,
+					    GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
+					    | GET_BLOCK_NO_DANGLE);
+		if (!retval) {
+			if (!buffer_mapped(bh_result)
+			    || bh_result->b_blocknr == 0) {
+				/* get_block failed to find a mapped unformatted node. */
+				use_get_block = 0;
+				goto start_over;
+			}
+		}
+	}
+	kunmap(bh_result->b_page);
+
+	if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
+		/* we've copied data from the page into the direct item, so the
+		 * buffer in the page is now clean, mark it to reflect that.
+		 */
+		lock_buffer(bh_result);
+		clear_buffer_dirty(bh_result);
+		unlock_buffer(bh_result);
+	}
+	return retval;
+}
+
+/*
+ * mason@suse.com: updated in 2.5.54 to follow the same general io
+ * start/recovery path as __block_write_full_page, along with special
+ * code to handle reiserfs tails.
+ */
+static int reiserfs_write_full_page(struct page *page,
+				    struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	int error = 0;
+	unsigned long block;
+	sector_t last_block;
+	struct buffer_head *head, *bh;
+	int partial = 0;
+	int nr = 0;
+	int checked = PageChecked(page);
+	struct reiserfs_transaction_handle th;
+	struct super_block *s = inode->i_sb;
+	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
+	th.t_trans_id = 0;
+
+	/* no logging allowed when nonblocking or from PF_MEMALLOC */
+	if (checked && (current->flags & PF_MEMALLOC)) {
+		redirty_page_for_writepage(wbc, page);
+		unlock_page(page);
+		return 0;
+	}
+
+	/* The page dirty bit is cleared before writepage is called, which
+	 * means we have to tell create_empty_buffers to make dirty buffers
+	 * The page really should be up to date at this point, so tossing
+	 * in the BH_Uptodate is just a sanity check.
+	 */
+	if (!page_has_buffers(page)) {
+		create_empty_buffers(page, s->s_blocksize,
+				     (1 << BH_Dirty) | (1 << BH_Uptodate));
+	}
+	head = page_buffers(page);
+
+	/* last page in the file, zero out any contents past the
+	 ** last byte in the file
+	 */
+	if (page->index >= end_index) {
+		unsigned last_offset;
+
+		last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+		/* no file contents in this page */
+		if (page->index >= end_index + 1 || !last_offset) {
+			unlock_page(page);
+			return 0;
+		}
+		zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
+	}
+	bh = head;
+	block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
+	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
+	/* first map all the buffers, logging any direct items we find */
+	do {
+		if (block > last_block) {
+			/*
+			 * This can happen when the block size is less than
+			 * the page size.  The corresponding bytes in the page
+			 * were zero filled above
+			 */
+			clear_buffer_dirty(bh);
+			set_buffer_uptodate(bh);
+		} else if ((checked || buffer_dirty(bh)) &&
+		           (!buffer_mapped(bh) || (buffer_mapped(bh)
+						       && bh->b_blocknr ==
+						       0))) {
+			/* not mapped yet, or it points to a direct item, search
+			 * the btree for the mapping info, and log any direct
+			 * items found
+			 */
+			if ((error = map_block_for_writepage(inode, bh, block))) {
+				goto fail;
+			}
+		}
+		bh = bh->b_this_page;
+		block++;
+	} while (bh != head);
+
+	/*
+	 * we start the transaction after map_block_for_writepage,
+	 * because it can create holes in the file (an unbounded operation).
+	 * starting it here, we can make a reliable estimate for how many
+	 * blocks we're going to log
+	 */
+	if (checked) {
+		ClearPageChecked(page);
+		reiserfs_write_lock(s);
+		error = journal_begin(&th, s, bh_per_page + 1);
+		if (error) {
+			reiserfs_write_unlock(s);
+			goto fail;
+		}
+		reiserfs_update_inode_transaction(inode);
+	}
+	/* now go through and lock any dirty buffers on the page */
+	do {
+		get_bh(bh);
+		if (!buffer_mapped(bh))
+			continue;
+		if (buffer_mapped(bh) && bh->b_blocknr == 0)
+			continue;
+
+		if (checked) {
+			reiserfs_prepare_for_journal(s, bh, 1);
+			journal_mark_dirty(&th, s, bh);
+			continue;
+		}
+		/* from this point on, we know the buffer is mapped to a
+		 * real block and not a direct item
+		 */
+		if (wbc->sync_mode != WB_SYNC_NONE) {
+			lock_buffer(bh);
+		} else {
+			if (!trylock_buffer(bh)) {
+				redirty_page_for_writepage(wbc, page);
+				continue;
+			}
+		}
+		if (test_clear_buffer_dirty(bh)) {
+			mark_buffer_async_write(bh);
+		} else {
+			unlock_buffer(bh);
+		}
+	} while ((bh = bh->b_this_page) != head);
+
+	if (checked) {
+		error = journal_end(&th, s, bh_per_page + 1);
+		reiserfs_write_unlock(s);
+		if (error)
+			goto fail;
+	}
+	BUG_ON(PageWriteback(page));
+	set_page_writeback(page);
+	unlock_page(page);
+
+	/*
+	 * since any buffer might be the only dirty buffer on the page,
+	 * the first submit_bh can bring the page out of writeback.
+	 * be careful with the buffers.
+	 */
+	do {
+		struct buffer_head *next = bh->b_this_page;
+		if (buffer_async_write(bh)) {
+			submit_bh(WRITE, bh);
+			nr++;
+		}
+		put_bh(bh);
+		bh = next;
+	} while (bh != head);
+
+	error = 0;
+      done:
+	if (nr == 0) {
+		/*
+		 * if this page only had a direct item, it is very possible for
+		 * no io to be required without there being an error.  Or,
+		 * someone else could have locked them and sent them down the
+		 * pipe without locking the page
+		 */
+		bh = head;
+		do {
+			if (!buffer_uptodate(bh)) {
+				partial = 1;
+				break;
+			}
+			bh = bh->b_this_page;
+		} while (bh != head);
+		if (!partial)
+			SetPageUptodate(page);
+		end_page_writeback(page);
+	}
+	return error;
+
+      fail:
+	/* catches various errors, we need to make sure any valid dirty blocks
+	 * get to the media.  The page is currently locked and not marked for
+	 * writeback
+	 */
+	ClearPageUptodate(page);
+	bh = head;
+	do {
+		get_bh(bh);
+		if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
+			lock_buffer(bh);
+			mark_buffer_async_write(bh);
+		} else {
+			/*
+			 * clear any dirty bits that might have come from getting
+			 * attached to a dirty page
+			 */
+			clear_buffer_dirty(bh);
+		}
+		bh = bh->b_this_page;
+	} while (bh != head);
+	SetPageError(page);
+	BUG_ON(PageWriteback(page));
+	set_page_writeback(page);
+	unlock_page(page);
+	do {
+		struct buffer_head *next = bh->b_this_page;
+		if (buffer_async_write(bh)) {
+			clear_buffer_dirty(bh);
+			submit_bh(WRITE, bh);
+			nr++;
+		}
+		put_bh(bh);
+		bh = next;
+	} while (bh != head);
+	goto done;
+}
+
+static int reiserfs_readpage(struct file *f, struct page *page)
+{
+	return block_read_full_page(page, reiserfs_get_block);
+}
+
+static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	reiserfs_wait_on_write_block(inode->i_sb);
+	return reiserfs_write_full_page(page, wbc);
+}
+
+static void reiserfs_truncate_failed_write(struct inode *inode)
+{
+	truncate_inode_pages(inode->i_mapping, inode->i_size);
+	reiserfs_truncate_file(inode, 0);
+}
+
+static int reiserfs_write_begin(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata)
+{
+	struct inode *inode;
+	struct page *page;
+	pgoff_t index;
+	int ret;
+	int old_ref = 0;
+
+ 	inode = mapping->host;
+	*fsdata = 0;
+ 	if (flags & AOP_FLAG_CONT_EXPAND &&
+ 	    (pos & (inode->i_sb->s_blocksize - 1)) == 0) {
+ 		pos ++;
+		*fsdata = (void *)(unsigned long)flags;
+	}
+
+	index = pos >> PAGE_CACHE_SHIFT;
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
+
+	reiserfs_wait_on_write_block(inode->i_sb);
+	fix_tail_page_for_writing(page);
+	if (reiserfs_transaction_running(inode->i_sb)) {
+		struct reiserfs_transaction_handle *th;
+		th = (struct reiserfs_transaction_handle *)current->
+		    journal_info;
+		BUG_ON(!th->t_refcount);
+		BUG_ON(!th->t_trans_id);
+		old_ref = th->t_refcount;
+		th->t_refcount++;
+	}
+	ret = __block_write_begin(page, pos, len, reiserfs_get_block);
+	if (ret && reiserfs_transaction_running(inode->i_sb)) {
+		struct reiserfs_transaction_handle *th = current->journal_info;
+		/* this gets a little ugly.  If reiserfs_get_block returned an
+		 * error and left a transacstion running, we've got to close it,
+		 * and we've got to free handle if it was a persistent transaction.
+		 *
+		 * But, if we had nested into an existing transaction, we need
+		 * to just drop the ref count on the handle.
+		 *
+		 * If old_ref == 0, the transaction is from reiserfs_get_block,
+		 * and it was a persistent trans.  Otherwise, it was nested above.
+		 */
+		if (th->t_refcount > old_ref) {
+			if (old_ref)
+				th->t_refcount--;
+			else {
+				int err;
+				reiserfs_write_lock(inode->i_sb);
+				err = reiserfs_end_persistent_transaction(th);
+				reiserfs_write_unlock(inode->i_sb);
+				if (err)
+					ret = err;
+			}
+		}
+	}
+	if (ret) {
+		unlock_page(page);
+		page_cache_release(page);
+		/* Truncate allocated blocks */
+		reiserfs_truncate_failed_write(inode);
+	}
+	return ret;
+}
+
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
+{
+	struct inode *inode = page->mapping->host;
+	int ret;
+	int old_ref = 0;
+
+	reiserfs_write_unlock(inode->i_sb);
+	reiserfs_wait_on_write_block(inode->i_sb);
+	reiserfs_write_lock(inode->i_sb);
+
+	fix_tail_page_for_writing(page);
+	if (reiserfs_transaction_running(inode->i_sb)) {
+		struct reiserfs_transaction_handle *th;
+		th = (struct reiserfs_transaction_handle *)current->
+		    journal_info;
+		BUG_ON(!th->t_refcount);
+		BUG_ON(!th->t_trans_id);
+		old_ref = th->t_refcount;
+		th->t_refcount++;
+	}
+
+	ret = __block_write_begin(page, from, len, reiserfs_get_block);
+	if (ret && reiserfs_transaction_running(inode->i_sb)) {
+		struct reiserfs_transaction_handle *th = current->journal_info;
+		/* this gets a little ugly.  If reiserfs_get_block returned an
+		 * error and left a transacstion running, we've got to close it,
+		 * and we've got to free handle if it was a persistent transaction.
+		 *
+		 * But, if we had nested into an existing transaction, we need
+		 * to just drop the ref count on the handle.
+		 *
+		 * If old_ref == 0, the transaction is from reiserfs_get_block,
+		 * and it was a persistent trans.  Otherwise, it was nested above.
+		 */
+		if (th->t_refcount > old_ref) {
+			if (old_ref)
+				th->t_refcount--;
+			else {
+				int err;
+				reiserfs_write_lock(inode->i_sb);
+				err = reiserfs_end_persistent_transaction(th);
+				reiserfs_write_unlock(inode->i_sb);
+				if (err)
+					ret = err;
+			}
+		}
+	}
+	return ret;
+
+}
+
+static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
+{
+	return generic_block_bmap(as, block, reiserfs_bmap);
+}
+
+static int reiserfs_write_end(struct file *file, struct address_space *mapping,
+			      loff_t pos, unsigned len, unsigned copied,
+			      struct page *page, void *fsdata)
+{
+	struct inode *inode = page->mapping->host;
+	int ret = 0;
+	int update_sd = 0;
+	struct reiserfs_transaction_handle *th;
+	unsigned start;
+	int lock_depth = 0;
+	bool locked = false;
+
+	if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
+		pos ++;
+
+	reiserfs_wait_on_write_block(inode->i_sb);
+	if (reiserfs_transaction_running(inode->i_sb))
+		th = current->journal_info;
+	else
+		th = NULL;
+
+	start = pos & (PAGE_CACHE_SIZE - 1);
+	if (unlikely(copied < len)) {
+		if (!PageUptodate(page))
+			copied = 0;
+
+		page_zero_new_buffers(page, start + copied, start + len);
+	}
+	flush_dcache_page(page);
+
+	reiserfs_commit_page(inode, page, start, start + copied);
+
+	/* generic_commit_write does this for us, but does not update the
+	 ** transaction tracking stuff when the size changes.  So, we have
+	 ** to do the i_size updates here.
+	 */
+	if (pos + copied > inode->i_size) {
+		struct reiserfs_transaction_handle myth;
+		lock_depth = reiserfs_write_lock_once(inode->i_sb);
+		locked = true;
+		/* If the file have grown beyond the border where it
+		   can have a tail, unmark it as needing a tail
+		   packing */
+		if ((have_large_tails(inode->i_sb)
+		     && inode->i_size > i_block_size(inode) * 4)
+		    || (have_small_tails(inode->i_sb)
+			&& inode->i_size > i_block_size(inode)))
+			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
+
+		ret = journal_begin(&myth, inode->i_sb, 1);
+		if (ret)
+			goto journal_error;
+
+		reiserfs_update_inode_transaction(inode);
+		inode->i_size = pos + copied;
+		/*
+		 * this will just nest into our transaction.  It's important
+		 * to use mark_inode_dirty so the inode gets pushed around on the
+		 * dirty lists, and so that O_SYNC works as expected
+		 */
+		mark_inode_dirty(inode);
+		reiserfs_update_sd(&myth, inode);
+		update_sd = 1;
+		ret = journal_end(&myth, inode->i_sb, 1);
+		if (ret)
+			goto journal_error;
+	}
+	if (th) {
+		if (!locked) {
+			lock_depth = reiserfs_write_lock_once(inode->i_sb);
+			locked = true;
+		}
+		if (!update_sd)
+			mark_inode_dirty(inode);
+		ret = reiserfs_end_persistent_transaction(th);
+		if (ret)
+			goto out;
+	}
+
+      out:
+	if (locked)
+		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+	unlock_page(page);
+	page_cache_release(page);
+
+	if (pos + len > inode->i_size)
+		reiserfs_truncate_failed_write(inode);
+
+	return ret == 0 ? copied : ret;
+
+      journal_error:
+	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+	locked = false;
+	if (th) {
+		if (!update_sd)
+			reiserfs_update_sd(th, inode);
+		ret = reiserfs_end_persistent_transaction(th);
+	}
+	goto out;
+}
+
+int reiserfs_commit_write(struct file *f, struct page *page,
+			  unsigned from, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
+	int ret = 0;
+	int update_sd = 0;
+	struct reiserfs_transaction_handle *th = NULL;
+
+	reiserfs_write_unlock(inode->i_sb);
+	reiserfs_wait_on_write_block(inode->i_sb);
+	reiserfs_write_lock(inode->i_sb);
+
+	if (reiserfs_transaction_running(inode->i_sb)) {
+		th = current->journal_info;
+	}
+	reiserfs_commit_page(inode, page, from, to);
+
+	/* generic_commit_write does this for us, but does not update the
+	 ** transaction tracking stuff when the size changes.  So, we have
+	 ** to do the i_size updates here.
+	 */
+	if (pos > inode->i_size) {
+		struct reiserfs_transaction_handle myth;
+		/* If the file have grown beyond the border where it
+		   can have a tail, unmark it as needing a tail
+		   packing */
+		if ((have_large_tails(inode->i_sb)
+		     && inode->i_size > i_block_size(inode) * 4)
+		    || (have_small_tails(inode->i_sb)
+			&& inode->i_size > i_block_size(inode)))
+			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
+
+		ret = journal_begin(&myth, inode->i_sb, 1);
+		if (ret)
+			goto journal_error;
+
+		reiserfs_update_inode_transaction(inode);
+		inode->i_size = pos;
+		/*
+		 * this will just nest into our transaction.  It's important
+		 * to use mark_inode_dirty so the inode gets pushed around on the
+		 * dirty lists, and so that O_SYNC works as expected
+		 */
+		mark_inode_dirty(inode);
+		reiserfs_update_sd(&myth, inode);
+		update_sd = 1;
+		ret = journal_end(&myth, inode->i_sb, 1);
+		if (ret)
+			goto journal_error;
+	}
+	if (th) {
+		if (!update_sd)
+			mark_inode_dirty(inode);
+		ret = reiserfs_end_persistent_transaction(th);
+		if (ret)
+			goto out;
+	}
+
+      out:
+	return ret;
+
+      journal_error:
+	if (th) {
+		if (!update_sd)
+			reiserfs_update_sd(th, inode);
+		ret = reiserfs_end_persistent_transaction(th);
+	}
+
+	return ret;
+}
+
+void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
+{
+	if (reiserfs_attrs(inode->i_sb)) {
+		if (sd_attrs & REISERFS_SYNC_FL)
+			inode->i_flags |= S_SYNC;
+		else
+			inode->i_flags &= ~S_SYNC;
+		if (sd_attrs & REISERFS_IMMUTABLE_FL)
+			inode->i_flags |= S_IMMUTABLE;
+		else
+			inode->i_flags &= ~S_IMMUTABLE;
+		if (sd_attrs & REISERFS_APPEND_FL)
+			inode->i_flags |= S_APPEND;
+		else
+			inode->i_flags &= ~S_APPEND;
+		if (sd_attrs & REISERFS_NOATIME_FL)
+			inode->i_flags |= S_NOATIME;
+		else
+			inode->i_flags &= ~S_NOATIME;
+		if (sd_attrs & REISERFS_NOTAIL_FL)
+			REISERFS_I(inode)->i_flags |= i_nopack_mask;
+		else
+			REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
+	}
+}
+
+void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
+{
+	if (reiserfs_attrs(inode->i_sb)) {
+		if (inode->i_flags & S_IMMUTABLE)
+			*sd_attrs |= REISERFS_IMMUTABLE_FL;
+		else
+			*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
+		if (inode->i_flags & S_SYNC)
+			*sd_attrs |= REISERFS_SYNC_FL;
+		else
+			*sd_attrs &= ~REISERFS_SYNC_FL;
+		if (inode->i_flags & S_NOATIME)
+			*sd_attrs |= REISERFS_NOATIME_FL;
+		else
+			*sd_attrs &= ~REISERFS_NOATIME_FL;
+		if (REISERFS_I(inode)->i_flags & i_nopack_mask)
+			*sd_attrs |= REISERFS_NOTAIL_FL;
+		else
+			*sd_attrs &= ~REISERFS_NOTAIL_FL;
+	}
+}
+
+/* decide if this buffer needs to stay around for data logging or ordered
+** write purposes
+*/
+static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
+{
+	int ret = 1;
+	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
+
+	lock_buffer(bh);
+	spin_lock(&j->j_dirty_buffers_lock);
+	if (!buffer_mapped(bh)) {
+		goto free_jh;
+	}
+	/* the page is locked, and the only places that log a data buffer
+	 * also lock the page.
+	 */
+	if (reiserfs_file_data_log(inode)) {
+		/*
+		 * very conservative, leave the buffer pinned if
+		 * anyone might need it.
+		 */
+		if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
+			ret = 0;
+		}
+	} else  if (buffer_dirty(bh)) {
+		struct reiserfs_journal_list *jl;
+		struct reiserfs_jh *jh = bh->b_private;
+
+		/* why is this safe?
+		 * reiserfs_setattr updates i_size in the on disk
+		 * stat data before allowing vmtruncate to be called.
+		 *
+		 * If buffer was put onto the ordered list for this
+		 * transaction, we know for sure either this transaction
+		 * or an older one already has updated i_size on disk,
+		 * and this ordered data won't be referenced in the file
+		 * if we crash.
+		 *
+		 * if the buffer was put onto the ordered list for an older
+		 * transaction, we need to leave it around
+		 */
+		if (jh && (jl = jh->jl)
+		    && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
+			ret = 0;
+	}
+      free_jh:
+	if (ret && bh->b_private) {
+		reiserfs_free_jh(bh);
+	}
+	spin_unlock(&j->j_dirty_buffers_lock);
+	unlock_buffer(bh);
+	return ret;
+}
+
+/* clm -- taken from fs/buffer.c:block_invalidate_page */
+static void reiserfs_invalidatepage(struct page *page, unsigned long offset)
+{
+	struct buffer_head *head, *bh, *next;
+	struct inode *inode = page->mapping->host;
+	unsigned int curr_off = 0;
+	int ret = 1;
+
+	BUG_ON(!PageLocked(page));
+
+	if (offset == 0)
+		ClearPageChecked(page);
+
+	if (!page_has_buffers(page))
+		goto out;
+
+	head = page_buffers(page);
+	bh = head;
+	do {
+		unsigned int next_off = curr_off + bh->b_size;
+		next = bh->b_this_page;
+
+		/*
+		 * is this block fully invalidated?
+		 */
+		if (offset <= curr_off) {
+			if (invalidatepage_can_drop(inode, bh))
+				reiserfs_unmap_buffer(bh);
+			else
+				ret = 0;
+		}
+		curr_off = next_off;
+		bh = next;
+	} while (bh != head);
+
+	/*
+	 * We release buffers only if the entire page is being invalidated.
+	 * The get_block cached value has been unconditionally invalidated,
+	 * so real IO is not possible anymore.
+	 */
+	if (!offset && ret) {
+		ret = try_to_release_page(page, 0);
+		/* maybe should BUG_ON(!ret); - neilb */
+	}
+      out:
+	return;
+}
+
+static int reiserfs_set_page_dirty(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	if (reiserfs_file_data_log(inode)) {
+		SetPageChecked(page);
+		return __set_page_dirty_nobuffers(page);
+	}
+	return __set_page_dirty_buffers(page);
+}
+
+/*
+ * Returns 1 if the page's buffers were dropped.  The page is locked.
+ *
+ * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
+ * in the buffers at page_buffers(page).
+ *
+ * even in -o notail mode, we can't be sure an old mount without -o notail
+ * didn't create files with tails.
+ */
+static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
+{
+	struct inode *inode = page->mapping->host;
+	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
+	struct buffer_head *head;
+	struct buffer_head *bh;
+	int ret = 1;
+
+	WARN_ON(PageChecked(page));
+	spin_lock(&j->j_dirty_buffers_lock);
+	head = page_buffers(page);
+	bh = head;
+	do {
+		if (bh->b_private) {
+			if (!buffer_dirty(bh) && !buffer_locked(bh)) {
+				reiserfs_free_jh(bh);
+			} else {
+				ret = 0;
+				break;
+			}
+		}
+		bh = bh->b_this_page;
+	} while (bh != head);
+	if (ret)
+		ret = try_to_free_buffers(page);
+	spin_unlock(&j->j_dirty_buffers_lock);
+	return ret;
+}
+
+/* We thank Mingming Cao for helping us understand in great detail what
+   to do in this section of the code. */
+static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
+				  const struct iovec *iov, loff_t offset,
+				  unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
+
+	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+				  reiserfs_get_blocks_direct_io);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
+}
+
+int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	unsigned int ia_valid;
+	int depth;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	/* must be turned off for recursive notify_change calls */
+	ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
+
+	if (is_quota_modification(inode, attr))
+		dquot_initialize(inode);
+	depth = reiserfs_write_lock_once(inode->i_sb);
+	if (attr->ia_valid & ATTR_SIZE) {
+		/* version 2 items will be caught by the s_maxbytes check
+		 ** done for us in vmtruncate
+		 */
+		if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
+		    attr->ia_size > MAX_NON_LFS) {
+			error = -EFBIG;
+			goto out;
+		}
+
+		inode_dio_wait(inode);
+
+		/* fill in hole pointers in the expanding truncate case. */
+		if (attr->ia_size > inode->i_size) {
+			error = generic_cont_expand_simple(inode, attr->ia_size);
+			if (REISERFS_I(inode)->i_prealloc_count > 0) {
+				int err;
+				struct reiserfs_transaction_handle th;
+				/* we're changing at most 2 bitmaps, inode + super */
+				err = journal_begin(&th, inode->i_sb, 4);
+				if (!err) {
+					reiserfs_discard_prealloc(&th, inode);
+					err = journal_end(&th, inode->i_sb, 4);
+				}
+				if (err)
+					error = err;
+			}
+			if (error)
+				goto out;
+			/*
+			 * file size is changed, ctime and mtime are
+			 * to be updated
+			 */
+			attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
+		}
+	}
+
+	if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) ||
+	     ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
+	    (get_inode_sd_version(inode) == STAT_DATA_V1)) {
+		/* stat data of format v3.5 has 16 bit uid and gid */
+		error = -EINVAL;
+		goto out;
+	}
+
+	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+		struct reiserfs_transaction_handle th;
+		int jbegin_count =
+		    2 *
+		    (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
+		     REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
+		    2;
+
+		error = reiserfs_chown_xattrs(inode, attr);
+
+		if (error)
+			return error;
+
+		/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
+		error = journal_begin(&th, inode->i_sb, jbegin_count);
+		if (error)
+			goto out;
+		reiserfs_write_unlock_once(inode->i_sb, depth);
+		error = dquot_transfer(inode, attr);
+		depth = reiserfs_write_lock_once(inode->i_sb);
+		if (error) {
+			journal_end(&th, inode->i_sb, jbegin_count);
+			goto out;
+		}
+
+		/* Update corresponding info in inode so that everything is in
+		 * one transaction */
+		if (attr->ia_valid & ATTR_UID)
+			inode->i_uid = attr->ia_uid;
+		if (attr->ia_valid & ATTR_GID)
+			inode->i_gid = attr->ia_gid;
+		mark_inode_dirty(inode);
+		error = journal_end(&th, inode->i_sb, jbegin_count);
+		if (error)
+			goto out;
+	}
+
+	/*
+	 * Relax the lock here, as it might truncate the
+	 * inode pages and wait for inode pages locks.
+	 * To release such page lock, the owner needs the
+	 * reiserfs lock
+	 */
+	reiserfs_write_unlock_once(inode->i_sb, depth);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode))
+		error = vmtruncate(inode, attr->ia_size);
+
+	if (!error) {
+		setattr_copy(inode, attr);
+		mark_inode_dirty(inode);
+	}
+	depth = reiserfs_write_lock_once(inode->i_sb);
+
+	if (!error && reiserfs_posixacl(inode->i_sb)) {
+		if (attr->ia_valid & ATTR_MODE)
+			error = reiserfs_acl_chmod(inode);
+	}
+
+      out:
+	reiserfs_write_unlock_once(inode->i_sb, depth);
+
+	return error;
+}
+
+const struct address_space_operations reiserfs_address_space_operations = {
+	.writepage = reiserfs_writepage,
+	.readpage = reiserfs_readpage,
+	.readpages = reiserfs_readpages,
+	.releasepage = reiserfs_releasepage,
+	.invalidatepage = reiserfs_invalidatepage,
+	.write_begin = reiserfs_write_begin,
+	.write_end = reiserfs_write_end,
+	.bmap = reiserfs_aop_bmap,
+	.direct_IO = reiserfs_direct_IO,
+	.set_page_dirty = reiserfs_set_page_dirty,
+};

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/ioctl.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/ioctl.c
new file mode 100644
index 0000000..0c21850
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/ioctl.c

@@ -0,0 +1,226 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include "reiserfs.h"
+#include <linux/time.h>
+#include <asm/uaccess.h>
+#include <linux/pagemap.h>
+#include <linux/compat.h>
+
+/*
+ * reiserfs_ioctl - handler for ioctl for inode
+ * supported commands:
+ *  1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
+ *                           and prevent packing file (argument arg has to be non-zero)
+ *  2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
+ *  3) That's all for a while ...
+ */
+long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	unsigned int flags;
+	int err = 0;
+
+	reiserfs_write_lock(inode->i_sb);
+
+	switch (cmd) {
+	case REISERFS_IOC_UNPACK:
+		if (S_ISREG(inode->i_mode)) {
+			if (arg)
+				err = reiserfs_unpack(inode, filp);
+		} else
+			err = -ENOTTY;
+		break;
+		/*
+		 * following two cases are taken from fs/ext2/ioctl.c by Remy
+		 * Card (card@masi.ibp.fr)
+		 */
+	case REISERFS_IOC_GETFLAGS:
+		if (!reiserfs_attrs(inode->i_sb)) {
+			err = -ENOTTY;
+			break;
+		}
+
+		flags = REISERFS_I(inode)->i_attrs;
+		i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
+		err = put_user(flags, (int __user *)arg);
+		break;
+	case REISERFS_IOC_SETFLAGS:{
+			if (!reiserfs_attrs(inode->i_sb)) {
+				err = -ENOTTY;
+				break;
+			}
+
+			err = mnt_want_write_file(filp);
+			if (err)
+				break;
+
+			if (!inode_owner_or_capable(inode)) {
+				err = -EPERM;
+				goto setflags_out;
+			}
+			if (get_user(flags, (int __user *)arg)) {
+				err = -EFAULT;
+				goto setflags_out;
+			}
+			/*
+			 * Is it quota file? Do not allow user to mess with it
+			 */
+			if (IS_NOQUOTA(inode)) {
+				err = -EPERM;
+				goto setflags_out;
+			}
+			if (((flags ^ REISERFS_I(inode)->
+			      i_attrs) & (REISERFS_IMMUTABLE_FL |
+					  REISERFS_APPEND_FL))
+			    && !capable(CAP_LINUX_IMMUTABLE)) {
+				err = -EPERM;
+				goto setflags_out;
+			}
+			if ((flags & REISERFS_NOTAIL_FL) &&
+			    S_ISREG(inode->i_mode)) {
+				int result;
+
+				result = reiserfs_unpack(inode, filp);
+				if (result) {
+					err = result;
+					goto setflags_out;
+				}
+			}
+			sd_attrs_to_i_attrs(flags, inode);
+			REISERFS_I(inode)->i_attrs = flags;
+			inode->i_ctime = CURRENT_TIME_SEC;
+			mark_inode_dirty(inode);
+setflags_out:
+			mnt_drop_write_file(filp);
+			break;
+		}
+	case REISERFS_IOC_GETVERSION:
+		err = put_user(inode->i_generation, (int __user *)arg);
+		break;
+	case REISERFS_IOC_SETVERSION:
+		if (!inode_owner_or_capable(inode)) {
+			err = -EPERM;
+			break;
+		}
+		err = mnt_want_write_file(filp);
+		if (err)
+			break;
+		if (get_user(inode->i_generation, (int __user *)arg)) {
+			err = -EFAULT;
+			goto setversion_out;
+		}
+		inode->i_ctime = CURRENT_TIME_SEC;
+		mark_inode_dirty(inode);
+setversion_out:
+		mnt_drop_write_file(filp);
+		break;
+	default:
+		err = -ENOTTY;
+	}
+
+	reiserfs_write_unlock(inode->i_sb);
+
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	/* These are just misnamed, they actually get/put from/to user an int */
+	switch (cmd) {
+	case REISERFS_IOC32_UNPACK:
+		cmd = REISERFS_IOC_UNPACK;
+		break;
+	case REISERFS_IOC32_GETFLAGS:
+		cmd = REISERFS_IOC_GETFLAGS;
+		break;
+	case REISERFS_IOC32_SETFLAGS:
+		cmd = REISERFS_IOC_SETFLAGS;
+		break;
+	case REISERFS_IOC32_GETVERSION:
+		cmd = REISERFS_IOC_GETVERSION;
+		break;
+	case REISERFS_IOC32_SETVERSION:
+		cmd = REISERFS_IOC_SETVERSION;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif
+
+int reiserfs_commit_write(struct file *f, struct page *page,
+			  unsigned from, unsigned to);
+/*
+** reiserfs_unpack
+** Function try to convert tail from direct item into indirect.
+** It set up nopack attribute in the REISERFS_I(inode)->nopack
+*/
+int reiserfs_unpack(struct inode *inode, struct file *filp)
+{
+	int retval = 0;
+	int depth;
+	int index;
+	struct page *page;
+	struct address_space *mapping;
+	unsigned long write_from;
+	unsigned long blocksize = inode->i_sb->s_blocksize;
+
+	if (inode->i_size == 0) {
+		REISERFS_I(inode)->i_flags |= i_nopack_mask;
+		return 0;
+	}
+	/* ioctl already done */
+	if (REISERFS_I(inode)->i_flags & i_nopack_mask) {
+		return 0;
+	}
+
+	depth = reiserfs_write_lock_once(inode->i_sb);
+
+	/* we need to make sure nobody is changing the file size beneath us */
+	reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
+
+	write_from = inode->i_size & (blocksize - 1);
+	/* if we are on a block boundary, we are already unpacked.  */
+	if (write_from == 0) {
+		REISERFS_I(inode)->i_flags |= i_nopack_mask;
+		goto out;
+	}
+
+	/* we unpack by finding the page with the tail, and calling
+	 ** __reiserfs_write_begin on that page.  This will force a
+	 ** reiserfs_get_block to unpack the tail for us.
+	 */
+	index = inode->i_size >> PAGE_CACHE_SHIFT;
+	mapping = inode->i_mapping;
+	page = grab_cache_page(mapping, index);
+	retval = -ENOMEM;
+	if (!page) {
+		goto out;
+	}
+	retval = __reiserfs_write_begin(page, write_from, 0);
+	if (retval)
+		goto out_unlock;
+
+	/* conversion can change page contents, must flush */
+	flush_dcache_page(page);
+	retval = reiserfs_commit_write(NULL, page, write_from, write_from);
+	REISERFS_I(inode)->i_flags |= i_nopack_mask;
+
+      out_unlock:
+	unlock_page(page);
+	page_cache_release(page);
+
+      out:
+	mutex_unlock(&inode->i_mutex);
+	reiserfs_write_unlock_once(inode->i_sb, depth);
+	return retval;
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/item_ops.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/item_ops.c
new file mode 100644
index 0000000..ee382ef
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/item_ops.c

@@ -0,0 +1,756 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <linux/time.h>
+#include "reiserfs.h"
+
+// this contains item handlers for old item types: sd, direct,
+// indirect, directory
+
+/* and where are the comments? how about saying where we can find an
+   explanation of each item handler method? -Hans */
+
+//////////////////////////////////////////////////////////////////////////////
+// stat data functions
+//
+static int sd_bytes_number(struct item_head *ih, int block_size)
+{
+	return 0;
+}
+
+static void sd_decrement_key(struct cpu_key *key)
+{
+	key->on_disk_key.k_objectid--;
+	set_cpu_key_k_type(key, TYPE_ANY);
+	set_cpu_key_k_offset(key, (loff_t)(~0ULL >> 1));
+}
+
+static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize)
+{
+	return 0;
+}
+
+static char *print_time(time_t t)
+{
+	static char timebuf[256];
+
+	sprintf(timebuf, "%ld", t);
+	return timebuf;
+}
+
+static void sd_print_item(struct item_head *ih, char *item)
+{
+	printk("\tmode | size | nlinks | first direct | mtime\n");
+	if (stat_data_v1(ih)) {
+		struct stat_data_v1 *sd = (struct stat_data_v1 *)item;
+
+		printk("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd),
+		       sd_v1_size(sd), sd_v1_nlink(sd),
+		       sd_v1_first_direct_byte(sd),
+		       print_time(sd_v1_mtime(sd)));
+	} else {
+		struct stat_data *sd = (struct stat_data *)item;
+
+		printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd),
+		       (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
+		       sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
+	}
+}
+
+static void sd_check_item(struct item_head *ih, char *item)
+{
+	// FIXME: type something here!
+}
+
+static int sd_create_vi(struct virtual_node *vn,
+			struct virtual_item *vi,
+			int is_affected, int insert_size)
+{
+	vi->vi_index = TYPE_STAT_DATA;
+	//vi->vi_type |= VI_TYPE_STAT_DATA;// not needed?
+	return 0;
+}
+
+static int sd_check_left(struct virtual_item *vi, int free,
+			 int start_skip, int end_skip)
+{
+	BUG_ON(start_skip || end_skip);
+	return -1;
+}
+
+static int sd_check_right(struct virtual_item *vi, int free)
+{
+	return -1;
+}
+
+static int sd_part_size(struct virtual_item *vi, int first, int count)
+{
+	BUG_ON(count);
+	return 0;
+}
+
+static int sd_unit_num(struct virtual_item *vi)
+{
+	return vi->vi_item_len - IH_SIZE;
+}
+
+static void sd_print_vi(struct virtual_item *vi)
+{
+	reiserfs_warning(NULL, "reiserfs-16100",
+			 "STATDATA, index %d, type 0x%x, %h",
+			 vi->vi_index, vi->vi_type, vi->vi_ih);
+}
+
+static struct item_operations stat_data_ops = {
+	.bytes_number = sd_bytes_number,
+	.decrement_key = sd_decrement_key,
+	.is_left_mergeable = sd_is_left_mergeable,
+	.print_item = sd_print_item,
+	.check_item = sd_check_item,
+
+	.create_vi = sd_create_vi,
+	.check_left = sd_check_left,
+	.check_right = sd_check_right,
+	.part_size = sd_part_size,
+	.unit_num = sd_unit_num,
+	.print_vi = sd_print_vi
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// direct item functions
+//
+static int direct_bytes_number(struct item_head *ih, int block_size)
+{
+	return ih_item_len(ih);
+}
+
+// FIXME: this should probably switch to indirect as well
+static void direct_decrement_key(struct cpu_key *key)
+{
+	cpu_key_k_offset_dec(key);
+	if (cpu_key_k_offset(key) == 0)
+		set_cpu_key_k_type(key, TYPE_STAT_DATA);
+}
+
+static int direct_is_left_mergeable(struct reiserfs_key *key,
+				    unsigned long bsize)
+{
+	int version = le_key_version(key);
+	return ((le_key_k_offset(version, key) & (bsize - 1)) != 1);
+}
+
+static void direct_print_item(struct item_head *ih, char *item)
+{
+	int j = 0;
+
+//    return;
+	printk("\"");
+	while (j < ih_item_len(ih))
+		printk("%c", item[j++]);
+	printk("\"\n");
+}
+
+static void direct_check_item(struct item_head *ih, char *item)
+{
+	// FIXME: type something here!
+}
+
+static int direct_create_vi(struct virtual_node *vn,
+			    struct virtual_item *vi,
+			    int is_affected, int insert_size)
+{
+	vi->vi_index = TYPE_DIRECT;
+	//vi->vi_type |= VI_TYPE_DIRECT;
+	return 0;
+}
+
+static int direct_check_left(struct virtual_item *vi, int free,
+			     int start_skip, int end_skip)
+{
+	int bytes;
+
+	bytes = free - free % 8;
+	return bytes ? : -1;
+}
+
+static int direct_check_right(struct virtual_item *vi, int free)
+{
+	return direct_check_left(vi, free, 0, 0);
+}
+
+static int direct_part_size(struct virtual_item *vi, int first, int count)
+{
+	return count;
+}
+
+static int direct_unit_num(struct virtual_item *vi)
+{
+	return vi->vi_item_len - IH_SIZE;
+}
+
+static void direct_print_vi(struct virtual_item *vi)
+{
+	reiserfs_warning(NULL, "reiserfs-16101",
+			 "DIRECT, index %d, type 0x%x, %h",
+			 vi->vi_index, vi->vi_type, vi->vi_ih);
+}
+
+static struct item_operations direct_ops = {
+	.bytes_number = direct_bytes_number,
+	.decrement_key = direct_decrement_key,
+	.is_left_mergeable = direct_is_left_mergeable,
+	.print_item = direct_print_item,
+	.check_item = direct_check_item,
+
+	.create_vi = direct_create_vi,
+	.check_left = direct_check_left,
+	.check_right = direct_check_right,
+	.part_size = direct_part_size,
+	.unit_num = direct_unit_num,
+	.print_vi = direct_print_vi
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// indirect item functions
+//
+
+static int indirect_bytes_number(struct item_head *ih, int block_size)
+{
+	return ih_item_len(ih) / UNFM_P_SIZE * block_size;	//- get_ih_free_space (ih);
+}
+
+// decrease offset, if it becomes 0, change type to stat data
+static void indirect_decrement_key(struct cpu_key *key)
+{
+	cpu_key_k_offset_dec(key);
+	if (cpu_key_k_offset(key) == 0)
+		set_cpu_key_k_type(key, TYPE_STAT_DATA);
+}
+
+// if it is not first item of the body, then it is mergeable
+static int indirect_is_left_mergeable(struct reiserfs_key *key,
+				      unsigned long bsize)
+{
+	int version = le_key_version(key);
+	return (le_key_k_offset(version, key) != 1);
+}
+
+// printing of indirect item
+static void start_new_sequence(__u32 * start, int *len, __u32 new)
+{
+	*start = new;
+	*len = 1;
+}
+
+static int sequence_finished(__u32 start, int *len, __u32 new)
+{
+	if (start == INT_MAX)
+		return 1;
+
+	if (start == 0 && new == 0) {
+		(*len)++;
+		return 0;
+	}
+	if (start != 0 && (start + *len) == new) {
+		(*len)++;
+		return 0;
+	}
+	return 1;
+}
+
+static void print_sequence(__u32 start, int len)
+{
+	if (start == INT_MAX)
+		return;
+
+	if (len == 1)
+		printk(" %d", start);
+	else
+		printk(" %d(%d)", start, len);
+}
+
+static void indirect_print_item(struct item_head *ih, char *item)
+{
+	int j;
+	__le32 *unp;
+	__u32 prev = INT_MAX;
+	int num = 0;
+
+	unp = (__le32 *) item;
+
+	if (ih_item_len(ih) % UNFM_P_SIZE)
+		reiserfs_warning(NULL, "reiserfs-16102", "invalid item len");
+
+	printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih));
+	for (j = 0; j < I_UNFM_NUM(ih); j++) {
+		if (sequence_finished(prev, &num, get_block_num(unp, j))) {
+			print_sequence(prev, num);
+			start_new_sequence(&prev, &num, get_block_num(unp, j));
+		}
+	}
+	print_sequence(prev, num);
+	printk("]\n");
+}
+
+static void indirect_check_item(struct item_head *ih, char *item)
+{
+	// FIXME: type something here!
+}
+
+static int indirect_create_vi(struct virtual_node *vn,
+			      struct virtual_item *vi,
+			      int is_affected, int insert_size)
+{
+	vi->vi_index = TYPE_INDIRECT;
+	//vi->vi_type |= VI_TYPE_INDIRECT;
+	return 0;
+}
+
+static int indirect_check_left(struct virtual_item *vi, int free,
+			       int start_skip, int end_skip)
+{
+	int bytes;
+
+	bytes = free - free % UNFM_P_SIZE;
+	return bytes ? : -1;
+}
+
+static int indirect_check_right(struct virtual_item *vi, int free)
+{
+	return indirect_check_left(vi, free, 0, 0);
+}
+
+// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right)
+static int indirect_part_size(struct virtual_item *vi, int first, int units)
+{
+	// unit of indirect item is byte (yet)
+	return units;
+}
+
+static int indirect_unit_num(struct virtual_item *vi)
+{
+	// unit of indirect item is byte (yet)
+	return vi->vi_item_len - IH_SIZE;
+}
+
+static void indirect_print_vi(struct virtual_item *vi)
+{
+	reiserfs_warning(NULL, "reiserfs-16103",
+			 "INDIRECT, index %d, type 0x%x, %h",
+			 vi->vi_index, vi->vi_type, vi->vi_ih);
+}
+
+static struct item_operations indirect_ops = {
+	.bytes_number = indirect_bytes_number,
+	.decrement_key = indirect_decrement_key,
+	.is_left_mergeable = indirect_is_left_mergeable,
+	.print_item = indirect_print_item,
+	.check_item = indirect_check_item,
+
+	.create_vi = indirect_create_vi,
+	.check_left = indirect_check_left,
+	.check_right = indirect_check_right,
+	.part_size = indirect_part_size,
+	.unit_num = indirect_unit_num,
+	.print_vi = indirect_print_vi
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// direntry functions
+//
+
+static int direntry_bytes_number(struct item_head *ih, int block_size)
+{
+	reiserfs_warning(NULL, "vs-16090",
+			 "bytes number is asked for direntry");
+	return 0;
+}
+
+static void direntry_decrement_key(struct cpu_key *key)
+{
+	cpu_key_k_offset_dec(key);
+	if (cpu_key_k_offset(key) == 0)
+		set_cpu_key_k_type(key, TYPE_STAT_DATA);
+}
+
+static int direntry_is_left_mergeable(struct reiserfs_key *key,
+				      unsigned long bsize)
+{
+	if (le32_to_cpu(key->u.k_offset_v1.k_offset) == DOT_OFFSET)
+		return 0;
+	return 1;
+
+}
+
+static void direntry_print_item(struct item_head *ih, char *item)
+{
+	int i;
+	int namelen;
+	struct reiserfs_de_head *deh;
+	char *name;
+	static char namebuf[80];
+
+	printk("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name",
+	       "Key of pointed object", "Hash", "Gen number", "Status");
+
+	deh = (struct reiserfs_de_head *)item;
+
+	for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) {
+		namelen =
+		    (i ? (deh_location(deh - 1)) : ih_item_len(ih)) -
+		    deh_location(deh);
+		name = item + deh_location(deh);
+		if (name[namelen - 1] == 0)
+			namelen = strlen(name);
+		namebuf[0] = '"';
+		if (namelen > sizeof(namebuf) - 3) {
+			strncpy(namebuf + 1, name, sizeof(namebuf) - 3);
+			namebuf[sizeof(namebuf) - 2] = '"';
+			namebuf[sizeof(namebuf) - 1] = 0;
+		} else {
+			memcpy(namebuf + 1, name, namelen);
+			namebuf[namelen + 1] = '"';
+			namebuf[namelen + 2] = 0;
+		}
+
+		printk("%d:  %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
+		       i, namebuf,
+		       deh_dir_id(deh), deh_objectid(deh),
+		       GET_HASH_VALUE(deh_offset(deh)),
+		       GET_GENERATION_NUMBER((deh_offset(deh))),
+		       (de_hidden(deh)) ? "HIDDEN" : "VISIBLE");
+	}
+}
+
+static void direntry_check_item(struct item_head *ih, char *item)
+{
+	int i;
+	struct reiserfs_de_head *deh;
+
+	// FIXME: type something here!
+	deh = (struct reiserfs_de_head *)item;
+	for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) {
+		;
+	}
+}
+
+#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1
+
+/*
+ * function returns old entry number in directory item in real node
+ * using new entry number in virtual item in virtual node */
+static inline int old_entry_num(int is_affected, int virtual_entry_num,
+				int pos_in_item, int mode)
+{
+	if (mode == M_INSERT || mode == M_DELETE)
+		return virtual_entry_num;
+
+	if (!is_affected)
+		/* cut or paste is applied to another item */
+		return virtual_entry_num;
+
+	if (virtual_entry_num < pos_in_item)
+		return virtual_entry_num;
+
+	if (mode == M_CUT)
+		return virtual_entry_num + 1;
+
+	RFALSE(mode != M_PASTE || virtual_entry_num == 0,
+	       "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'",
+	       mode);
+
+	return virtual_entry_num - 1;
+}
+
+/* Create an array of sizes of directory entries for virtual
+   item. Return space used by an item. FIXME: no control over
+   consuming of space used by this item handler */
+static int direntry_create_vi(struct virtual_node *vn,
+			      struct virtual_item *vi,
+			      int is_affected, int insert_size)
+{
+	struct direntry_uarea *dir_u = vi->vi_uarea;
+	int i, j;
+	int size = sizeof(struct direntry_uarea);
+	struct reiserfs_de_head *deh;
+
+	vi->vi_index = TYPE_DIRENTRY;
+
+	BUG_ON(!(vi->vi_ih) || !vi->vi_item);
+
+	dir_u->flags = 0;
+	if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET)
+		dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM;
+
+	deh = (struct reiserfs_de_head *)(vi->vi_item);
+
+	/* virtual directory item have this amount of entry after */
+	dir_u->entry_count = ih_entry_count(vi->vi_ih) +
+	    ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 :
+			      (vn->vn_mode == M_PASTE ? 1 : 0)) : 0);
+
+	for (i = 0; i < dir_u->entry_count; i++) {
+		j = old_entry_num(is_affected, i, vn->vn_pos_in_item,
+				  vn->vn_mode);
+		dir_u->entry_sizes[i] =
+		    (j ? deh_location(&(deh[j - 1])) : ih_item_len(vi->vi_ih)) -
+		    deh_location(&(deh[j])) + DEH_SIZE;
+	}
+
+	size += (dir_u->entry_count * sizeof(short));
+
+	/* set size of pasted entry */
+	if (is_affected && vn->vn_mode == M_PASTE)
+		dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size;
+
+#ifdef CONFIG_REISERFS_CHECK
+	/* compare total size of entries with item length */
+	{
+		int k, l;
+
+		l = 0;
+		for (k = 0; k < dir_u->entry_count; k++)
+			l += dir_u->entry_sizes[k];
+
+		if (l + IH_SIZE != vi->vi_item_len +
+		    ((is_affected
+		      && (vn->vn_mode == M_PASTE
+			  || vn->vn_mode == M_CUT)) ? insert_size : 0)) {
+			reiserfs_panic(NULL, "vs-8025", "(mode==%c, "
+				       "insert_size==%d), invalid length of "
+				       "directory item",
+				       vn->vn_mode, insert_size);
+		}
+	}
+#endif
+
+	return size;
+
+}
+
+//
+// return number of entries which may fit into specified amount of
+// free space, or -1 if free space is not enough even for 1 entry
+//
+static int direntry_check_left(struct virtual_item *vi, int free,
+			       int start_skip, int end_skip)
+{
+	int i;
+	int entries = 0;
+	struct direntry_uarea *dir_u = vi->vi_uarea;
+
+	for (i = start_skip; i < dir_u->entry_count - end_skip; i++) {
+		if (dir_u->entry_sizes[i] > free)
+			/* i-th entry doesn't fit into the remaining free space */
+			break;
+
+		free -= dir_u->entry_sizes[i];
+		entries++;
+	}
+
+	if (entries == dir_u->entry_count) {
+		reiserfs_panic(NULL, "item_ops-1",
+			       "free space %d, entry_count %d", free,
+			       dir_u->entry_count);
+	}
+
+	/* "." and ".." can not be separated from each other */
+	if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM)
+	    && entries < 2)
+		entries = 0;
+
+	return entries ? : -1;
+}
+
+static int direntry_check_right(struct virtual_item *vi, int free)
+{
+	int i;
+	int entries = 0;
+	struct direntry_uarea *dir_u = vi->vi_uarea;
+
+	for (i = dir_u->entry_count - 1; i >= 0; i--) {
+		if (dir_u->entry_sizes[i] > free)
+			/* i-th entry doesn't fit into the remaining free space */
+			break;
+
+		free -= dir_u->entry_sizes[i];
+		entries++;
+	}
+	BUG_ON(entries == dir_u->entry_count);
+
+	/* "." and ".." can not be separated from each other */
+	if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM)
+	    && entries > dir_u->entry_count - 2)
+		entries = dir_u->entry_count - 2;
+
+	return entries ? : -1;
+}
+
+/* sum of entry sizes between from-th and to-th entries including both edges */
+static int direntry_part_size(struct virtual_item *vi, int first, int count)
+{
+	int i, retval;
+	int from, to;
+	struct direntry_uarea *dir_u = vi->vi_uarea;
+
+	retval = 0;
+	if (first == 0)
+		from = 0;
+	else
+		from = dir_u->entry_count - count;
+	to = from + count - 1;
+
+	for (i = from; i <= to; i++)
+		retval += dir_u->entry_sizes[i];
+
+	return retval;
+}
+
+static int direntry_unit_num(struct virtual_item *vi)
+{
+	struct direntry_uarea *dir_u = vi->vi_uarea;
+
+	return dir_u->entry_count;
+}
+
+static void direntry_print_vi(struct virtual_item *vi)
+{
+	int i;
+	struct direntry_uarea *dir_u = vi->vi_uarea;
+
+	reiserfs_warning(NULL, "reiserfs-16104",
+			 "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x",
+			 vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags);
+	printk("%d entries: ", dir_u->entry_count);
+	for (i = 0; i < dir_u->entry_count; i++)
+		printk("%d ", dir_u->entry_sizes[i]);
+	printk("\n");
+}
+
+static struct item_operations direntry_ops = {
+	.bytes_number = direntry_bytes_number,
+	.decrement_key = direntry_decrement_key,
+	.is_left_mergeable = direntry_is_left_mergeable,
+	.print_item = direntry_print_item,
+	.check_item = direntry_check_item,
+
+	.create_vi = direntry_create_vi,
+	.check_left = direntry_check_left,
+	.check_right = direntry_check_right,
+	.part_size = direntry_part_size,
+	.unit_num = direntry_unit_num,
+	.print_vi = direntry_print_vi
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Error catching functions to catch errors caused by incorrect item types.
+//
+static int errcatch_bytes_number(struct item_head *ih, int block_size)
+{
+	reiserfs_warning(NULL, "green-16001",
+			 "Invalid item type observed, run fsck ASAP");
+	return 0;
+}
+
+static void errcatch_decrement_key(struct cpu_key *key)
+{
+	reiserfs_warning(NULL, "green-16002",
+			 "Invalid item type observed, run fsck ASAP");
+}
+
+static int errcatch_is_left_mergeable(struct reiserfs_key *key,
+				      unsigned long bsize)
+{
+	reiserfs_warning(NULL, "green-16003",
+			 "Invalid item type observed, run fsck ASAP");
+	return 0;
+}
+
+static void errcatch_print_item(struct item_head *ih, char *item)
+{
+	reiserfs_warning(NULL, "green-16004",
+			 "Invalid item type observed, run fsck ASAP");
+}
+
+static void errcatch_check_item(struct item_head *ih, char *item)
+{
+	reiserfs_warning(NULL, "green-16005",
+			 "Invalid item type observed, run fsck ASAP");
+}
+
+static int errcatch_create_vi(struct virtual_node *vn,
+			      struct virtual_item *vi,
+			      int is_affected, int insert_size)
+{
+	reiserfs_warning(NULL, "green-16006",
+			 "Invalid item type observed, run fsck ASAP");
+	return 0;		// We might return -1 here as well, but it won't help as create_virtual_node() from where
+	// this operation is called from is of return type void.
+}
+
+static int errcatch_check_left(struct virtual_item *vi, int free,
+			       int start_skip, int end_skip)
+{
+	reiserfs_warning(NULL, "green-16007",
+			 "Invalid item type observed, run fsck ASAP");
+	return -1;
+}
+
+static int errcatch_check_right(struct virtual_item *vi, int free)
+{
+	reiserfs_warning(NULL, "green-16008",
+			 "Invalid item type observed, run fsck ASAP");
+	return -1;
+}
+
+static int errcatch_part_size(struct virtual_item *vi, int first, int count)
+{
+	reiserfs_warning(NULL, "green-16009",
+			 "Invalid item type observed, run fsck ASAP");
+	return 0;
+}
+
+static int errcatch_unit_num(struct virtual_item *vi)
+{
+	reiserfs_warning(NULL, "green-16010",
+			 "Invalid item type observed, run fsck ASAP");
+	return 0;
+}
+
+static void errcatch_print_vi(struct virtual_item *vi)
+{
+	reiserfs_warning(NULL, "green-16011",
+			 "Invalid item type observed, run fsck ASAP");
+}
+
+static struct item_operations errcatch_ops = {
+	errcatch_bytes_number,
+	errcatch_decrement_key,
+	errcatch_is_left_mergeable,
+	errcatch_print_item,
+	errcatch_check_item,
+
+	errcatch_create_vi,
+	errcatch_check_left,
+	errcatch_check_right,
+	errcatch_part_size,
+	errcatch_unit_num,
+	errcatch_print_vi
+};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//
+#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3)
+#error Item types must use disk-format assigned values.
+#endif
+
+struct item_operations *item_ops[TYPE_ANY + 1] = {
+	&stat_data_ops,
+	&indirect_ops,
+	&direct_ops,
+	&direntry_ops,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	&errcatch_ops		/* This is to catch errors with invalid type (15th entry for TYPE_ANY) */
+};

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/journal.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/journal.c
new file mode 100644
index 0000000..b1a0857
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/journal.c

@@ -0,0 +1,4286 @@
+/*
+** Write ahead logging implementation copyright Chris Mason 2000
+**
+** The background commits make this code very interrelated, and
+** overly complex.  I need to rethink things a bit....The major players:
+**
+** journal_begin -- call with the number of blocks you expect to log.
+**                  If the current transaction is too
+** 		    old, it will block until the current transaction is
+** 		    finished, and then start a new one.
+**		    Usually, your transaction will get joined in with
+**                  previous ones for speed.
+**
+** journal_join  -- same as journal_begin, but won't block on the current
+**                  transaction regardless of age.  Don't ever call
+**                  this.  Ever.  There are only two places it should be
+**                  called from, and they are both inside this file.
+**
+** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
+**                       that might make them get sent to disk
+**                       and then marks them BH_JDirty.  Puts the buffer head
+**                       into the current transaction hash.
+**
+** journal_end -- if the current transaction is batchable, it does nothing
+**                   otherwise, it could do an async/synchronous commit, or
+**                   a full flush of all log and real blocks in the
+**                   transaction.
+**
+** flush_old_commits -- if the current transaction is too old, it is ended and
+**                      commit blocks are sent to disk.  Forces commit blocks
+**                      to disk for all backgrounded commits that have been
+**                      around too long.
+**		     -- Note, if you call this as an immediate flush from
+**		        from within kupdate, it will ignore the immediate flag
+*/
+
+#include <linux/time.h>
+#include <linux/semaphore.h>
+#include <linux/vmalloc.h>
+#include "reiserfs.h"
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+
+/* gets a struct reiserfs_journal_list * from a list head */
+#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
+                               j_list))
+#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
+                               j_working_list))
+
+/* the number of mounted filesystems.  This is used to decide when to
+** start and kill the commit workqueue
+*/
+static int reiserfs_mounted_fs_count;
+
+static struct workqueue_struct *commit_wq;
+
+#define JOURNAL_TRANS_HALF 1018	/* must be correct to keep the desc and commit
+				   structs at 4k */
+#define BUFNR 64		/*read ahead */
+
+/* cnode stat bits.  Move these into reiserfs_fs.h */
+
+#define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
+#define BLOCK_FREED_HOLDER 3	/* this block was freed during this transaction, and can't be written */
+
+#define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
+#define BLOCK_DIRTIED 5
+
+/* journal list state bits */
+#define LIST_TOUCHED 1
+#define LIST_DIRTY   2
+#define LIST_COMMIT_PENDING  4	/* someone will commit this list */
+
+/* flags for do_journal_end */
+#define FLUSH_ALL   1		/* flush commit and real blocks */
+#define COMMIT_NOW  2		/* end and commit this transaction */
+#define WAIT        4		/* wait for the log blocks to hit the disk */
+
+static int do_journal_end(struct reiserfs_transaction_handle *,
+			  struct super_block *, unsigned long nblocks,
+			  int flags);
+static int flush_journal_list(struct super_block *s,
+			      struct reiserfs_journal_list *jl, int flushall);
+static int flush_commit_list(struct super_block *s,
+			     struct reiserfs_journal_list *jl, int flushall);
+static int can_dirty(struct reiserfs_journal_cnode *cn);
+static int journal_join(struct reiserfs_transaction_handle *th,
+			struct super_block *sb, unsigned long nblocks);
+static int release_journal_dev(struct super_block *super,
+			       struct reiserfs_journal *journal);
+static int dirty_one_transaction(struct super_block *s,
+				 struct reiserfs_journal_list *jl);
+static void flush_async_commits(struct work_struct *work);
+static void queue_log_writer(struct super_block *s);
+
+/* values for join in do_journal_begin_r */
+enum {
+	JBEGIN_REG = 0,		/* regular journal begin */
+	JBEGIN_JOIN = 1,	/* join the running transaction if at all possible */
+	JBEGIN_ABORT = 2,	/* called from cleanup code, ignores aborted flag */
+};
+
+static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
+			      struct super_block *sb,
+			      unsigned long nblocks, int join);
+
+static void init_journal_hash(struct super_block *sb)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	memset(journal->j_hash_table, 0,
+	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
+}
+
+/*
+** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
+** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
+** more details.
+*/
+static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
+{
+	if (bh) {
+		clear_buffer_dirty(bh);
+		clear_buffer_journal_test(bh);
+	}
+	return 0;
+}
+
+static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
+							 *sb)
+{
+	struct reiserfs_bitmap_node *bn;
+	static int id;
+
+	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
+	if (!bn) {
+		return NULL;
+	}
+	bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
+	if (!bn->data) {
+		kfree(bn);
+		return NULL;
+	}
+	bn->id = id++;
+	INIT_LIST_HEAD(&bn->list);
+	return bn;
+}
+
+static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_bitmap_node *bn = NULL;
+	struct list_head *entry = journal->j_bitmap_nodes.next;
+
+	journal->j_used_bitmap_nodes++;
+      repeat:
+
+	if (entry != &journal->j_bitmap_nodes) {
+		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
+		list_del(entry);
+		memset(bn->data, 0, sb->s_blocksize);
+		journal->j_free_bitmap_nodes--;
+		return bn;
+	}
+	bn = allocate_bitmap_node(sb);
+	if (!bn) {
+		yield();
+		goto repeat;
+	}
+	return bn;
+}
+static inline void free_bitmap_node(struct super_block *sb,
+				    struct reiserfs_bitmap_node *bn)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	journal->j_used_bitmap_nodes--;
+	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
+		kfree(bn->data);
+		kfree(bn);
+	} else {
+		list_add(&bn->list, &journal->j_bitmap_nodes);
+		journal->j_free_bitmap_nodes++;
+	}
+}
+
+static void allocate_bitmap_nodes(struct super_block *sb)
+{
+	int i;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_bitmap_node *bn = NULL;
+	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
+		bn = allocate_bitmap_node(sb);
+		if (bn) {
+			list_add(&bn->list, &journal->j_bitmap_nodes);
+			journal->j_free_bitmap_nodes++;
+		} else {
+			break;	/* this is ok, we'll try again when more are needed */
+		}
+	}
+}
+
+static int set_bit_in_list_bitmap(struct super_block *sb,
+				  b_blocknr_t block,
+				  struct reiserfs_list_bitmap *jb)
+{
+	unsigned int bmap_nr = block / (sb->s_blocksize << 3);
+	unsigned int bit_nr = block % (sb->s_blocksize << 3);
+
+	if (!jb->bitmaps[bmap_nr]) {
+		jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
+	}
+	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
+	return 0;
+}
+
+static void cleanup_bitmap_list(struct super_block *sb,
+				struct reiserfs_list_bitmap *jb)
+{
+	int i;
+	if (jb->bitmaps == NULL)
+		return;
+
+	for (i = 0; i < reiserfs_bmap_count(sb); i++) {
+		if (jb->bitmaps[i]) {
+			free_bitmap_node(sb, jb->bitmaps[i]);
+			jb->bitmaps[i] = NULL;
+		}
+	}
+}
+
+/*
+** only call this on FS unmount.
+*/
+static int free_list_bitmaps(struct super_block *sb,
+			     struct reiserfs_list_bitmap *jb_array)
+{
+	int i;
+	struct reiserfs_list_bitmap *jb;
+	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
+		jb = jb_array + i;
+		jb->journal_list = NULL;
+		cleanup_bitmap_list(sb, jb);
+		vfree(jb->bitmaps);
+		jb->bitmaps = NULL;
+	}
+	return 0;
+}
+
+static int free_bitmap_nodes(struct super_block *sb)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct list_head *next = journal->j_bitmap_nodes.next;
+	struct reiserfs_bitmap_node *bn;
+
+	while (next != &journal->j_bitmap_nodes) {
+		bn = list_entry(next, struct reiserfs_bitmap_node, list);
+		list_del(next);
+		kfree(bn->data);
+		kfree(bn);
+		next = journal->j_bitmap_nodes.next;
+		journal->j_free_bitmap_nodes--;
+	}
+
+	return 0;
+}
+
+/*
+** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
+** jb_array is the array to be filled in.
+*/
+int reiserfs_allocate_list_bitmaps(struct super_block *sb,
+				   struct reiserfs_list_bitmap *jb_array,
+				   unsigned int bmap_nr)
+{
+	int i;
+	int failed = 0;
+	struct reiserfs_list_bitmap *jb;
+	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
+
+	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
+		jb = jb_array + i;
+		jb->journal_list = NULL;
+		jb->bitmaps = vzalloc(mem);
+		if (!jb->bitmaps) {
+			reiserfs_warning(sb, "clm-2000", "unable to "
+					 "allocate bitmaps for journal lists");
+			failed = 1;
+			break;
+		}
+	}
+	if (failed) {
+		free_list_bitmaps(sb, jb_array);
+		return -1;
+	}
+	return 0;
+}
+
+/*
+** find an available list bitmap.  If you can't find one, flush a commit list
+** and try again
+*/
+static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
+						    struct reiserfs_journal_list
+						    *jl)
+{
+	int i, j;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_list_bitmap *jb = NULL;
+
+	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
+		i = journal->j_list_bitmap_index;
+		journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
+		jb = journal->j_list_bitmap + i;
+		if (journal->j_list_bitmap[i].journal_list) {
+			flush_commit_list(sb,
+					  journal->j_list_bitmap[i].
+					  journal_list, 1);
+			if (!journal->j_list_bitmap[i].journal_list) {
+				break;
+			}
+		} else {
+			break;
+		}
+	}
+	if (jb->journal_list) {	/* double check to make sure if flushed correctly */
+		return NULL;
+	}
+	jb->journal_list = jl;
+	return jb;
+}
+
+/*
+** allocates a new chunk of X nodes, and links them all together as a list.
+** Uses the cnode->next and cnode->prev pointers
+** returns NULL on failure
+*/
+static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
+{
+	struct reiserfs_journal_cnode *head;
+	int i;
+	if (num_cnodes <= 0) {
+		return NULL;
+	}
+	head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
+	if (!head) {
+		return NULL;
+	}
+	head[0].prev = NULL;
+	head[0].next = head + 1;
+	for (i = 1; i < num_cnodes; i++) {
+		head[i].prev = head + (i - 1);
+		head[i].next = head + (i + 1);	/* if last one, overwrite it after the if */
+	}
+	head[num_cnodes - 1].next = NULL;
+	return head;
+}
+
+/*
+** pulls a cnode off the free list, or returns NULL on failure
+*/
+static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
+{
+	struct reiserfs_journal_cnode *cn;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+
+	reiserfs_check_lock_depth(sb, "get_cnode");
+
+	if (journal->j_cnode_free <= 0) {
+		return NULL;
+	}
+	journal->j_cnode_used++;
+	journal->j_cnode_free--;
+	cn = journal->j_cnode_free_list;
+	if (!cn) {
+		return cn;
+	}
+	if (cn->next) {
+		cn->next->prev = NULL;
+	}
+	journal->j_cnode_free_list = cn->next;
+	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
+	return cn;
+}
+
+/*
+** returns a cnode to the free list
+*/
+static void free_cnode(struct super_block *sb,
+		       struct reiserfs_journal_cnode *cn)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+
+	reiserfs_check_lock_depth(sb, "free_cnode");
+
+	journal->j_cnode_used--;
+	journal->j_cnode_free++;
+	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
+	cn->next = journal->j_cnode_free_list;
+	if (journal->j_cnode_free_list) {
+		journal->j_cnode_free_list->prev = cn;
+	}
+	cn->prev = NULL;	/* not needed with the memset, but I might kill the memset, and forget to do this */
+	journal->j_cnode_free_list = cn;
+}
+
+static void clear_prepared_bits(struct buffer_head *bh)
+{
+	clear_buffer_journal_prepared(bh);
+	clear_buffer_journal_restore_dirty(bh);
+}
+
+/* return a cnode with same dev, block number and size in table, or null if not found */
+static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
+								  super_block
+								  *sb,
+								  struct
+								  reiserfs_journal_cnode
+								  **table,
+								  long bl)
+{
+	struct reiserfs_journal_cnode *cn;
+	cn = journal_hash(table, sb, bl);
+	while (cn) {
+		if (cn->blocknr == bl && cn->sb == sb)
+			return cn;
+		cn = cn->hnext;
+	}
+	return (struct reiserfs_journal_cnode *)0;
+}
+
+/*
+** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
+** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
+** being overwritten by a replay after crashing.
+**
+** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
+** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
+** sure you never write the block without logging it.
+**
+** next_zero_bit is a suggestion about the next block to try for find_forward.
+** when bl is rejected because it is set in a journal list bitmap, we search
+** for the next zero bit in the bitmap that rejected bl.  Then, we return that
+** through next_zero_bit for find_forward to try.
+**
+** Just because we return something in next_zero_bit does not mean we won't
+** reject it on the next call to reiserfs_in_journal
+**
+*/
+int reiserfs_in_journal(struct super_block *sb,
+			unsigned int bmap_nr, int bit_nr, int search_all,
+			b_blocknr_t * next_zero_bit)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_journal_cnode *cn;
+	struct reiserfs_list_bitmap *jb;
+	int i;
+	unsigned long bl;
+
+	*next_zero_bit = 0;	/* always start this at zero. */
+
+	PROC_INFO_INC(sb, journal.in_journal);
+	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
+	 ** if we crash before the transaction that freed it commits,  this transaction won't
+	 ** have committed either, and the block will never be written
+	 */
+	if (search_all) {
+		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
+			PROC_INFO_INC(sb, journal.in_journal_bitmap);
+			jb = journal->j_list_bitmap + i;
+			if (jb->journal_list && jb->bitmaps[bmap_nr] &&
+			    test_bit(bit_nr,
+				     (unsigned long *)jb->bitmaps[bmap_nr]->
+				     data)) {
+				*next_zero_bit =
+				    find_next_zero_bit((unsigned long *)
+						       (jb->bitmaps[bmap_nr]->
+							data),
+						       sb->s_blocksize << 3,
+						       bit_nr + 1);
+				return 1;
+			}
+		}
+	}
+
+	bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
+	/* is it in any old transactions? */
+	if (search_all
+	    && (cn =
+		get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
+		return 1;
+	}
+
+	/* is it in the current transaction.  This should never happen */
+	if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
+		BUG();
+		return 1;
+	}
+
+	PROC_INFO_INC(sb, journal.in_journal_reusable);
+	/* safe for reuse */
+	return 0;
+}
+
+/* insert cn into table
+*/
+static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
+				       struct reiserfs_journal_cnode *cn)
+{
+	struct reiserfs_journal_cnode *cn_orig;
+
+	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
+	cn->hnext = cn_orig;
+	cn->hprev = NULL;
+	if (cn_orig) {
+		cn_orig->hprev = cn;
+	}
+	journal_hash(table, cn->sb, cn->blocknr) = cn;
+}
+
+/* lock the current transaction */
+static inline void lock_journal(struct super_block *sb)
+{
+	PROC_INFO_INC(sb, journal.lock_journal);
+
+	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
+}
+
+/* unlock the current transaction */
+static inline void unlock_journal(struct super_block *sb)
+{
+	mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
+}
+
+static inline void get_journal_list(struct reiserfs_journal_list *jl)
+{
+	jl->j_refcount++;
+}
+
+static inline void put_journal_list(struct super_block *s,
+				    struct reiserfs_journal_list *jl)
+{
+	if (jl->j_refcount < 1) {
+		reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
+			       jl->j_trans_id, jl->j_refcount);
+	}
+	if (--jl->j_refcount == 0)
+		kfree(jl);
+}
+
+/*
+** this used to be much more involved, and I'm keeping it just in case things get ugly again.
+** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
+** transaction.
+*/
+static void cleanup_freed_for_journal_list(struct super_block *sb,
+					   struct reiserfs_journal_list *jl)
+{
+
+	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
+	if (jb) {
+		cleanup_bitmap_list(sb, jb);
+	}
+	jl->j_list_bitmap->journal_list = NULL;
+	jl->j_list_bitmap = NULL;
+}
+
+static int journal_list_still_alive(struct super_block *s,
+				    unsigned int trans_id)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	struct list_head *entry = &journal->j_journal_list;
+	struct reiserfs_journal_list *jl;
+
+	if (!list_empty(entry)) {
+		jl = JOURNAL_LIST_ENTRY(entry->next);
+		if (jl->j_trans_id <= trans_id) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * If page->mapping was null, we failed to truncate this page for
+ * some reason.  Most likely because it was truncated after being
+ * logged via data=journal.
+ *
+ * This does a check to see if the buffer belongs to one of these
+ * lost pages before doing the final put_bh.  If page->mapping was
+ * null, it tries to free buffers on the page, which should make the
+ * final page_cache_release drop the page from the lru.
+ */
+static void release_buffer_page(struct buffer_head *bh)
+{
+	struct page *page = bh->b_page;
+	if (!page->mapping && trylock_page(page)) {
+		page_cache_get(page);
+		put_bh(bh);
+		if (!page->mapping)
+			try_to_free_buffers(page);
+		unlock_page(page);
+		page_cache_release(page);
+	} else {
+		put_bh(bh);
+	}
+}
+
+static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+{
+	char b[BDEVNAME_SIZE];
+
+	if (buffer_journaled(bh)) {
+		reiserfs_warning(NULL, "clm-2084",
+				 "pinned buffer %lu:%s sent to disk",
+				 bh->b_blocknr, bdevname(bh->b_bdev, b));
+	}
+	if (uptodate)
+		set_buffer_uptodate(bh);
+	else
+		clear_buffer_uptodate(bh);
+
+	unlock_buffer(bh);
+	release_buffer_page(bh);
+}
+
+static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
+{
+	if (uptodate)
+		set_buffer_uptodate(bh);
+	else
+		clear_buffer_uptodate(bh);
+	unlock_buffer(bh);
+	put_bh(bh);
+}
+
+static void submit_logged_buffer(struct buffer_head *bh)
+{
+	get_bh(bh);
+	bh->b_end_io = reiserfs_end_buffer_io_sync;
+	clear_buffer_journal_new(bh);
+	clear_buffer_dirty(bh);
+	if (!test_clear_buffer_journal_test(bh))
+		BUG();
+	if (!buffer_uptodate(bh))
+		BUG();
+	submit_bh(WRITE, bh);
+}
+
+static void submit_ordered_buffer(struct buffer_head *bh)
+{
+	get_bh(bh);
+	bh->b_end_io = reiserfs_end_ordered_io;
+	clear_buffer_dirty(bh);
+	if (!buffer_uptodate(bh))
+		BUG();
+	submit_bh(WRITE, bh);
+}
+
+#define CHUNK_SIZE 32
+struct buffer_chunk {
+	struct buffer_head *bh[CHUNK_SIZE];
+	int nr;
+};
+
+static void write_chunk(struct buffer_chunk *chunk)
+{
+	int i;
+	for (i = 0; i < chunk->nr; i++) {
+		submit_logged_buffer(chunk->bh[i]);
+	}
+	chunk->nr = 0;
+}
+
+static void write_ordered_chunk(struct buffer_chunk *chunk)
+{
+	int i;
+	for (i = 0; i < chunk->nr; i++) {
+		submit_ordered_buffer(chunk->bh[i]);
+	}
+	chunk->nr = 0;
+}
+
+static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
+			spinlock_t * lock, void (fn) (struct buffer_chunk *))
+{
+	int ret = 0;
+	BUG_ON(chunk->nr >= CHUNK_SIZE);
+	chunk->bh[chunk->nr++] = bh;
+	if (chunk->nr >= CHUNK_SIZE) {
+		ret = 1;
+		if (lock)
+			spin_unlock(lock);
+		fn(chunk);
+		if (lock)
+			spin_lock(lock);
+	}
+	return ret;
+}
+
+static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
+static struct reiserfs_jh *alloc_jh(void)
+{
+	struct reiserfs_jh *jh;
+	while (1) {
+		jh = kmalloc(sizeof(*jh), GFP_NOFS);
+		if (jh) {
+			atomic_inc(&nr_reiserfs_jh);
+			return jh;
+		}
+		yield();
+	}
+}
+
+/*
+ * we want to free the jh when the buffer has been written
+ * and waited on
+ */
+void reiserfs_free_jh(struct buffer_head *bh)
+{
+	struct reiserfs_jh *jh;
+
+	jh = bh->b_private;
+	if (jh) {
+		bh->b_private = NULL;
+		jh->bh = NULL;
+		list_del_init(&jh->list);
+		kfree(jh);
+		if (atomic_read(&nr_reiserfs_jh) <= 0)
+			BUG();
+		atomic_dec(&nr_reiserfs_jh);
+		put_bh(bh);
+	}
+}
+
+static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
+			   int tail)
+{
+	struct reiserfs_jh *jh;
+
+	if (bh->b_private) {
+		spin_lock(&j->j_dirty_buffers_lock);
+		if (!bh->b_private) {
+			spin_unlock(&j->j_dirty_buffers_lock);
+			goto no_jh;
+		}
+		jh = bh->b_private;
+		list_del_init(&jh->list);
+	} else {
+	      no_jh:
+		get_bh(bh);
+		jh = alloc_jh();
+		spin_lock(&j->j_dirty_buffers_lock);
+		/* buffer must be locked for __add_jh, should be able to have
+		 * two adds at the same time
+		 */
+		BUG_ON(bh->b_private);
+		jh->bh = bh;
+		bh->b_private = jh;
+	}
+	jh->jl = j->j_current_jl;
+	if (tail)
+		list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
+	else {
+		list_add_tail(&jh->list, &jh->jl->j_bh_list);
+	}
+	spin_unlock(&j->j_dirty_buffers_lock);
+	return 0;
+}
+
+int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
+{
+	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
+}
+int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
+{
+	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
+}
+
+#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
+static int write_ordered_buffers(spinlock_t * lock,
+				 struct reiserfs_journal *j,
+				 struct reiserfs_journal_list *jl,
+				 struct list_head *list)
+{
+	struct buffer_head *bh;
+	struct reiserfs_jh *jh;
+	int ret = j->j_errno;
+	struct buffer_chunk chunk;
+	struct list_head tmp;
+	INIT_LIST_HEAD(&tmp);
+
+	chunk.nr = 0;
+	spin_lock(lock);
+	while (!list_empty(list)) {
+		jh = JH_ENTRY(list->next);
+		bh = jh->bh;
+		get_bh(bh);
+		if (!trylock_buffer(bh)) {
+			if (!buffer_dirty(bh)) {
+				list_move(&jh->list, &tmp);
+				goto loop_next;
+			}
+			spin_unlock(lock);
+			if (chunk.nr)
+				write_ordered_chunk(&chunk);
+			wait_on_buffer(bh);
+			cond_resched();
+			spin_lock(lock);
+			goto loop_next;
+		}
+		/* in theory, dirty non-uptodate buffers should never get here,
+		 * but the upper layer io error paths still have a few quirks.
+		 * Handle them here as gracefully as we can
+		 */
+		if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
+			clear_buffer_dirty(bh);
+			ret = -EIO;
+		}
+		if (buffer_dirty(bh)) {
+			list_move(&jh->list, &tmp);
+			add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
+		} else {
+			reiserfs_free_jh(bh);
+			unlock_buffer(bh);
+		}
+	      loop_next:
+		put_bh(bh);
+		cond_resched_lock(lock);
+	}
+	if (chunk.nr) {
+		spin_unlock(lock);
+		write_ordered_chunk(&chunk);
+		spin_lock(lock);
+	}
+	while (!list_empty(&tmp)) {
+		jh = JH_ENTRY(tmp.prev);
+		bh = jh->bh;
+		get_bh(bh);
+		reiserfs_free_jh(bh);
+
+		if (buffer_locked(bh)) {
+			spin_unlock(lock);
+			wait_on_buffer(bh);
+			spin_lock(lock);
+		}
+		if (!buffer_uptodate(bh)) {
+			ret = -EIO;
+		}
+		/* ugly interaction with invalidatepage here.
+		 * reiserfs_invalidate_page will pin any buffer that has a valid
+		 * journal head from an older transaction.  If someone else sets
+		 * our buffer dirty after we write it in the first loop, and
+		 * then someone truncates the page away, nobody will ever write
+		 * the buffer. We're safe if we write the page one last time
+		 * after freeing the journal header.
+		 */
+		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
+			spin_unlock(lock);
+			ll_rw_block(WRITE, 1, &bh);
+			spin_lock(lock);
+		}
+		put_bh(bh);
+		cond_resched_lock(lock);
+	}
+	spin_unlock(lock);
+	return ret;
+}
+
+static int flush_older_commits(struct super_block *s,
+			       struct reiserfs_journal_list *jl)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	struct reiserfs_journal_list *other_jl;
+	struct reiserfs_journal_list *first_jl;
+	struct list_head *entry;
+	unsigned int trans_id = jl->j_trans_id;
+	unsigned int other_trans_id;
+	unsigned int first_trans_id;
+
+      find_first:
+	/*
+	 * first we walk backwards to find the oldest uncommitted transation
+	 */
+	first_jl = jl;
+	entry = jl->j_list.prev;
+	while (1) {
+		other_jl = JOURNAL_LIST_ENTRY(entry);
+		if (entry == &journal->j_journal_list ||
+		    atomic_read(&other_jl->j_older_commits_done))
+			break;
+
+		first_jl = other_jl;
+		entry = other_jl->j_list.prev;
+	}
+
+	/* if we didn't find any older uncommitted transactions, return now */
+	if (first_jl == jl) {
+		return 0;
+	}
+
+	first_trans_id = first_jl->j_trans_id;
+
+	entry = &first_jl->j_list;
+	while (1) {
+		other_jl = JOURNAL_LIST_ENTRY(entry);
+		other_trans_id = other_jl->j_trans_id;
+
+		if (other_trans_id < trans_id) {
+			if (atomic_read(&other_jl->j_commit_left) != 0) {
+				flush_commit_list(s, other_jl, 0);
+
+				/* list we were called with is gone, return */
+				if (!journal_list_still_alive(s, trans_id))
+					return 1;
+
+				/* the one we just flushed is gone, this means all
+				 * older lists are also gone, so first_jl is no longer
+				 * valid either.  Go back to the beginning.
+				 */
+				if (!journal_list_still_alive
+				    (s, other_trans_id)) {
+					goto find_first;
+				}
+			}
+			entry = entry->next;
+			if (entry == &journal->j_journal_list)
+				return 0;
+		} else {
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static int reiserfs_async_progress_wait(struct super_block *s)
+{
+	struct reiserfs_journal *j = SB_JOURNAL(s);
+
+	if (atomic_read(&j->j_async_throttle)) {
+		reiserfs_write_unlock(s);
+		congestion_wait(BLK_RW_ASYNC, HZ / 10);
+		reiserfs_write_lock(s);
+	}
+
+	return 0;
+}
+
+/*
+** if this journal list still has commit blocks unflushed, send them to disk.
+**
+** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
+** Before the commit block can by written, every other log block must be safely on disk
+**
+*/
+static int flush_commit_list(struct super_block *s,
+			     struct reiserfs_journal_list *jl, int flushall)
+{
+	int i;
+	b_blocknr_t bn;
+	struct buffer_head *tbh = NULL;
+	unsigned int trans_id = jl->j_trans_id;
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	int retval = 0;
+	int write_len;
+
+	reiserfs_check_lock_depth(s, "flush_commit_list");
+
+	if (atomic_read(&jl->j_older_commits_done)) {
+		return 0;
+	}
+
+	/* before we can put our commit blocks on disk, we have to make sure everyone older than
+	 ** us is on disk too
+	 */
+	BUG_ON(jl->j_len <= 0);
+	BUG_ON(trans_id == journal->j_trans_id);
+
+	get_journal_list(jl);
+	if (flushall) {
+		if (flush_older_commits(s, jl) == 1) {
+			/* list disappeared during flush_older_commits.  return */
+			goto put_jl;
+		}
+	}
+
+	/* make sure nobody is trying to flush this one at the same time */
+	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
+
+	if (!journal_list_still_alive(s, trans_id)) {
+		mutex_unlock(&jl->j_commit_mutex);
+		goto put_jl;
+	}
+	BUG_ON(jl->j_trans_id == 0);
+
+	/* this commit is done, exit */
+	if (atomic_read(&(jl->j_commit_left)) <= 0) {
+		if (flushall) {
+			atomic_set(&(jl->j_older_commits_done), 1);
+		}
+		mutex_unlock(&jl->j_commit_mutex);
+		goto put_jl;
+	}
+
+	if (!list_empty(&jl->j_bh_list)) {
+		int ret;
+
+		/*
+		 * We might sleep in numerous places inside
+		 * write_ordered_buffers. Relax the write lock.
+		 */
+		reiserfs_write_unlock(s);
+		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
+					    journal, jl, &jl->j_bh_list);
+		if (ret < 0 && retval == 0)
+			retval = ret;
+		reiserfs_write_lock(s);
+	}
+	BUG_ON(!list_empty(&jl->j_bh_list));
+	/*
+	 * for the description block and all the log blocks, submit any buffers
+	 * that haven't already reached the disk.  Try to write at least 256
+	 * log blocks. later on, we will only wait on blocks that correspond
+	 * to this transaction, but while we're unplugging we might as well
+	 * get a chunk of data on there.
+	 */
+	atomic_inc(&journal->j_async_throttle);
+	write_len = jl->j_len + 1;
+	if (write_len < 256)
+		write_len = 256;
+	for (i = 0 ; i < write_len ; i++) {
+		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
+		    SB_ONDISK_JOURNAL_SIZE(s);
+		tbh = journal_find_get_block(s, bn);
+		if (tbh) {
+			if (buffer_dirty(tbh)) {
+		            reiserfs_write_unlock(s);
+			    ll_rw_block(WRITE, 1, &tbh);
+			    reiserfs_write_lock(s);
+			}
+			put_bh(tbh) ;
+		}
+	}
+	atomic_dec(&journal->j_async_throttle);
+
+	for (i = 0; i < (jl->j_len + 1); i++) {
+		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
+		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
+		tbh = journal_find_get_block(s, bn);
+
+		reiserfs_write_unlock(s);
+		wait_on_buffer(tbh);
+		reiserfs_write_lock(s);
+		// since we're using ll_rw_blk above, it might have skipped over
+		// a locked buffer.  Double check here
+		//
+		/* redundant, sync_dirty_buffer() checks */
+		if (buffer_dirty(tbh)) {
+			reiserfs_write_unlock(s);
+			sync_dirty_buffer(tbh);
+			reiserfs_write_lock(s);
+		}
+		if (unlikely(!buffer_uptodate(tbh))) {
+#ifdef CONFIG_REISERFS_CHECK
+			reiserfs_warning(s, "journal-601",
+					 "buffer write failed");
+#endif
+			retval = -EIO;
+		}
+		put_bh(tbh);	/* once for journal_find_get_block */
+		put_bh(tbh);	/* once due to original getblk in do_journal_end */
+		atomic_dec(&(jl->j_commit_left));
+	}
+
+	BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
+
+	/* If there was a write error in the journal - we can't commit
+	 * this transaction - it will be invalid and, if successful,
+	 * will just end up propagating the write error out to
+	 * the file system. */
+	if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
+		if (buffer_dirty(jl->j_commit_bh))
+			BUG();
+		mark_buffer_dirty(jl->j_commit_bh) ;
+		reiserfs_write_unlock(s);
+		if (reiserfs_barrier_flush(s))
+			__sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
+		else
+			sync_dirty_buffer(jl->j_commit_bh);
+		reiserfs_write_lock(s);
+	}
+
+	/* If there was a write error in the journal - we can't commit this
+	 * transaction - it will be invalid and, if successful, will just end
+	 * up propagating the write error out to the filesystem. */
+	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
+#ifdef CONFIG_REISERFS_CHECK
+		reiserfs_warning(s, "journal-615", "buffer write failed");
+#endif
+		retval = -EIO;
+	}
+	bforget(jl->j_commit_bh);
+	if (journal->j_last_commit_id != 0 &&
+	    (jl->j_trans_id - journal->j_last_commit_id) != 1) {
+		reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
+				 journal->j_last_commit_id, jl->j_trans_id);
+	}
+	journal->j_last_commit_id = jl->j_trans_id;
+
+	/* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
+	cleanup_freed_for_journal_list(s, jl);
+
+	retval = retval ? retval : journal->j_errno;
+
+	/* mark the metadata dirty */
+	if (!retval)
+		dirty_one_transaction(s, jl);
+	atomic_dec(&(jl->j_commit_left));
+
+	if (flushall) {
+		atomic_set(&(jl->j_older_commits_done), 1);
+	}
+	mutex_unlock(&jl->j_commit_mutex);
+      put_jl:
+	put_journal_list(s, jl);
+
+	if (retval)
+		reiserfs_abort(s, retval, "Journal write error in %s",
+			       __func__);
+	return retval;
+}
+
+/*
+** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
+** returns NULL if it can't find anything
+*/
+static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
+							  reiserfs_journal_cnode
+							  *cn)
+{
+	struct super_block *sb = cn->sb;
+	b_blocknr_t blocknr = cn->blocknr;
+
+	cn = cn->hprev;
+	while (cn) {
+		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
+			return cn->jlist;
+		}
+		cn = cn->hprev;
+	}
+	return NULL;
+}
+
+static int newer_jl_done(struct reiserfs_journal_cnode *cn)
+{
+	struct super_block *sb = cn->sb;
+	b_blocknr_t blocknr = cn->blocknr;
+
+	cn = cn->hprev;
+	while (cn) {
+		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
+		    atomic_read(&cn->jlist->j_commit_left) != 0)
+				    return 0;
+		cn = cn->hprev;
+	}
+	return 1;
+}
+
+static void remove_journal_hash(struct super_block *,
+				struct reiserfs_journal_cnode **,
+				struct reiserfs_journal_list *, unsigned long,
+				int);
+
+/*
+** once all the real blocks have been flushed, it is safe to remove them from the
+** journal list for this transaction.  Aside from freeing the cnode, this also allows the
+** block to be reallocated for data blocks if it had been deleted.
+*/
+static void remove_all_from_journal_list(struct super_block *sb,
+					 struct reiserfs_journal_list *jl,
+					 int debug)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_journal_cnode *cn, *last;
+	cn = jl->j_realblock;
+
+	/* which is better, to lock once around the whole loop, or
+	 ** to lock for each call to remove_journal_hash?
+	 */
+	while (cn) {
+		if (cn->blocknr != 0) {
+			if (debug) {
+				reiserfs_warning(sb, "reiserfs-2201",
+						 "block %u, bh is %d, state %ld",
+						 cn->blocknr, cn->bh ? 1 : 0,
+						 cn->state);
+			}
+			cn->state = 0;
+			remove_journal_hash(sb, journal->j_list_hash_table,
+					    jl, cn->blocknr, 1);
+		}
+		last = cn;
+		cn = cn->next;
+		free_cnode(sb, last);
+	}
+	jl->j_realblock = NULL;
+}
+
+/*
+** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
+** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
+** releasing blocks in this transaction for reuse as data blocks.
+** called by flush_journal_list, before it calls remove_all_from_journal_list
+**
+*/
+static int _update_journal_header_block(struct super_block *sb,
+					unsigned long offset,
+					unsigned int trans_id)
+{
+	struct reiserfs_journal_header *jh;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+
+	if (reiserfs_is_journal_aborted(journal))
+		return -EIO;
+
+	if (trans_id >= journal->j_last_flush_trans_id) {
+		if (buffer_locked((journal->j_header_bh))) {
+			reiserfs_write_unlock(sb);
+			wait_on_buffer((journal->j_header_bh));
+			reiserfs_write_lock(sb);
+			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
+#ifdef CONFIG_REISERFS_CHECK
+				reiserfs_warning(sb, "journal-699",
+						 "buffer write failed");
+#endif
+				return -EIO;
+			}
+		}
+		journal->j_last_flush_trans_id = trans_id;
+		journal->j_first_unflushed_offset = offset;
+		jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
+							b_data);
+		jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
+		jh->j_first_unflushed_offset = cpu_to_le32(offset);
+		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
+
+		set_buffer_dirty(journal->j_header_bh);
+		reiserfs_write_unlock(sb);
+
+		if (reiserfs_barrier_flush(sb))
+			__sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
+		else
+			sync_dirty_buffer(journal->j_header_bh);
+
+		reiserfs_write_lock(sb);
+		if (!buffer_uptodate(journal->j_header_bh)) {
+			reiserfs_warning(sb, "journal-837",
+					 "IO error during journal replay");
+			return -EIO;
+		}
+	}
+	return 0;
+}
+
+static int update_journal_header_block(struct super_block *sb,
+				       unsigned long offset,
+				       unsigned int trans_id)
+{
+	return _update_journal_header_block(sb, offset, trans_id);
+}
+
+/*
+** flush any and all journal lists older than you are
+** can only be called from flush_journal_list
+*/
+static int flush_older_journal_lists(struct super_block *sb,
+				     struct reiserfs_journal_list *jl)
+{
+	struct list_head *entry;
+	struct reiserfs_journal_list *other_jl;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	unsigned int trans_id = jl->j_trans_id;
+
+	/* we know we are the only ones flushing things, no extra race
+	 * protection is required.
+	 */
+      restart:
+	entry = journal->j_journal_list.next;
+	/* Did we wrap? */
+	if (entry == &journal->j_journal_list)
+		return 0;
+	other_jl = JOURNAL_LIST_ENTRY(entry);
+	if (other_jl->j_trans_id < trans_id) {
+		BUG_ON(other_jl->j_refcount <= 0);
+		/* do not flush all */
+		flush_journal_list(sb, other_jl, 0);
+
+		/* other_jl is now deleted from the list */
+		goto restart;
+	}
+	return 0;
+}
+
+static void del_from_work_list(struct super_block *s,
+			       struct reiserfs_journal_list *jl)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	if (!list_empty(&jl->j_working_list)) {
+		list_del_init(&jl->j_working_list);
+		journal->j_num_work_lists--;
+	}
+}
+
+/* flush a journal list, both commit and real blocks
+**
+** always set flushall to 1, unless you are calling from inside
+** flush_journal_list
+**
+** IMPORTANT.  This can only be called while there are no journal writers,
+** and the journal is locked.  That means it can only be called from
+** do_journal_end, or by journal_release
+*/
+static int flush_journal_list(struct super_block *s,
+			      struct reiserfs_journal_list *jl, int flushall)
+{
+	struct reiserfs_journal_list *pjl;
+	struct reiserfs_journal_cnode *cn, *last;
+	int count;
+	int was_jwait = 0;
+	int was_dirty = 0;
+	struct buffer_head *saved_bh;
+	unsigned long j_len_saved = jl->j_len;
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	int err = 0;
+
+	BUG_ON(j_len_saved <= 0);
+
+	if (atomic_read(&journal->j_wcount) != 0) {
+		reiserfs_warning(s, "clm-2048", "called with wcount %d",
+				 atomic_read(&journal->j_wcount));
+	}
+	BUG_ON(jl->j_trans_id == 0);
+
+	/* if flushall == 0, the lock is already held */
+	if (flushall) {
+		reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
+	} else if (mutex_trylock(&journal->j_flush_mutex)) {
+		BUG();
+	}
+
+	count = 0;
+	if (j_len_saved > journal->j_trans_max) {
+		reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
+			       j_len_saved, jl->j_trans_id);
+		return 0;
+	}
+
+	/* if all the work is already done, get out of here */
+	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
+	    atomic_read(&(jl->j_commit_left)) <= 0) {
+		goto flush_older_and_return;
+	}
+
+	/* start by putting the commit list on disk.  This will also flush
+	 ** the commit lists of any olders transactions
+	 */
+	flush_commit_list(s, jl, 1);
+
+	if (!(jl->j_state & LIST_DIRTY)
+	    && !reiserfs_is_journal_aborted(journal))
+		BUG();
+
+	/* are we done now? */
+	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
+	    atomic_read(&(jl->j_commit_left)) <= 0) {
+		goto flush_older_and_return;
+	}
+
+	/* loop through each cnode, see if we need to write it,
+	 ** or wait on a more recent transaction, or just ignore it
+	 */
+	if (atomic_read(&(journal->j_wcount)) != 0) {
+		reiserfs_panic(s, "journal-844", "journal list is flushing, "
+			       "wcount is not 0");
+	}
+	cn = jl->j_realblock;
+	while (cn) {
+		was_jwait = 0;
+		was_dirty = 0;
+		saved_bh = NULL;
+		/* blocknr of 0 is no longer in the hash, ignore it */
+		if (cn->blocknr == 0) {
+			goto free_cnode;
+		}
+
+		/* This transaction failed commit. Don't write out to the disk */
+		if (!(jl->j_state & LIST_DIRTY))
+			goto free_cnode;
+
+		pjl = find_newer_jl_for_cn(cn);
+		/* the order is important here.  We check pjl to make sure we
+		 ** don't clear BH_JDirty_wait if we aren't the one writing this
+		 ** block to disk
+		 */
+		if (!pjl && cn->bh) {
+			saved_bh = cn->bh;
+
+			/* we do this to make sure nobody releases the buffer while
+			 ** we are working with it
+			 */
+			get_bh(saved_bh);
+
+			if (buffer_journal_dirty(saved_bh)) {
+				BUG_ON(!can_dirty(cn));
+				was_jwait = 1;
+				was_dirty = 1;
+			} else if (can_dirty(cn)) {
+				/* everything with !pjl && jwait should be writable */
+				BUG();
+			}
+		}
+
+		/* if someone has this block in a newer transaction, just make
+		 ** sure they are committed, and don't try writing it to disk
+		 */
+		if (pjl) {
+			if (atomic_read(&pjl->j_commit_left))
+				flush_commit_list(s, pjl, 1);
+			goto free_cnode;
+		}
+
+		/* bh == NULL when the block got to disk on its own, OR,
+		 ** the block got freed in a future transaction
+		 */
+		if (saved_bh == NULL) {
+			goto free_cnode;
+		}
+
+		/* this should never happen.  kupdate_one_transaction has this list
+		 ** locked while it works, so we should never see a buffer here that
+		 ** is not marked JDirty_wait
+		 */
+		if ((!was_jwait) && !buffer_locked(saved_bh)) {
+			reiserfs_warning(s, "journal-813",
+					 "BAD! buffer %llu %cdirty %cjwait, "
+					 "not in a newer tranasction",
+					 (unsigned long long)saved_bh->
+					 b_blocknr, was_dirty ? ' ' : '!',
+					 was_jwait ? ' ' : '!');
+		}
+		if (was_dirty) {
+			/* we inc again because saved_bh gets decremented at free_cnode */
+			get_bh(saved_bh);
+			set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
+			lock_buffer(saved_bh);
+			BUG_ON(cn->blocknr != saved_bh->b_blocknr);
+			if (buffer_dirty(saved_bh))
+				submit_logged_buffer(saved_bh);
+			else
+				unlock_buffer(saved_bh);
+			count++;
+		} else {
+			reiserfs_warning(s, "clm-2082",
+					 "Unable to flush buffer %llu in %s",
+					 (unsigned long long)saved_bh->
+					 b_blocknr, __func__);
+		}
+	      free_cnode:
+		last = cn;
+		cn = cn->next;
+		if (saved_bh) {
+			/* we incremented this to keep others from taking the buffer head away */
+			put_bh(saved_bh);
+			if (atomic_read(&(saved_bh->b_count)) < 0) {
+				reiserfs_warning(s, "journal-945",
+						 "saved_bh->b_count < 0");
+			}
+		}
+	}
+	if (count > 0) {
+		cn = jl->j_realblock;
+		while (cn) {
+			if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
+				if (!cn->bh) {
+					reiserfs_panic(s, "journal-1011",
+						       "cn->bh is NULL");
+				}
+
+				reiserfs_write_unlock(s);
+				wait_on_buffer(cn->bh);
+				reiserfs_write_lock(s);
+
+				if (!cn->bh) {
+					reiserfs_panic(s, "journal-1012",
+						       "cn->bh is NULL");
+				}
+				if (unlikely(!buffer_uptodate(cn->bh))) {
+#ifdef CONFIG_REISERFS_CHECK
+					reiserfs_warning(s, "journal-949",
+							 "buffer write failed");
+#endif
+					err = -EIO;
+				}
+				/* note, we must clear the JDirty_wait bit after the up to date
+				 ** check, otherwise we race against our flushpage routine
+				 */
+				BUG_ON(!test_clear_buffer_journal_dirty
+				       (cn->bh));
+
+				/* drop one ref for us */
+				put_bh(cn->bh);
+				/* drop one ref for journal_mark_dirty */
+				release_buffer_page(cn->bh);
+			}
+			cn = cn->next;
+		}
+	}
+
+	if (err)
+		reiserfs_abort(s, -EIO,
+			       "Write error while pushing transaction to disk in %s",
+			       __func__);
+      flush_older_and_return:
+
+	/* before we can update the journal header block, we _must_ flush all
+	 ** real blocks from all older transactions to disk.  This is because
+	 ** once the header block is updated, this transaction will not be
+	 ** replayed after a crash
+	 */
+	if (flushall) {
+		flush_older_journal_lists(s, jl);
+	}
+
+	err = journal->j_errno;
+	/* before we can remove everything from the hash tables for this
+	 ** transaction, we must make sure it can never be replayed
+	 **
+	 ** since we are only called from do_journal_end, we know for sure there
+	 ** are no allocations going on while we are flushing journal lists.  So,
+	 ** we only need to update the journal header block for the last list
+	 ** being flushed
+	 */
+	if (!err && flushall) {
+		err =
+		    update_journal_header_block(s,
+						(jl->j_start + jl->j_len +
+						 2) % SB_ONDISK_JOURNAL_SIZE(s),
+						jl->j_trans_id);
+		if (err)
+			reiserfs_abort(s, -EIO,
+				       "Write error while updating journal header in %s",
+				       __func__);
+	}
+	remove_all_from_journal_list(s, jl, 0);
+	list_del_init(&jl->j_list);
+	journal->j_num_lists--;
+	del_from_work_list(s, jl);
+
+	if (journal->j_last_flush_id != 0 &&
+	    (jl->j_trans_id - journal->j_last_flush_id) != 1) {
+		reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
+				 journal->j_last_flush_id, jl->j_trans_id);
+	}
+	journal->j_last_flush_id = jl->j_trans_id;
+
+	/* not strictly required since we are freeing the list, but it should
+	 * help find code using dead lists later on
+	 */
+	jl->j_len = 0;
+	atomic_set(&(jl->j_nonzerolen), 0);
+	jl->j_start = 0;
+	jl->j_realblock = NULL;
+	jl->j_commit_bh = NULL;
+	jl->j_trans_id = 0;
+	jl->j_state = 0;
+	put_journal_list(s, jl);
+	if (flushall)
+		mutex_unlock(&journal->j_flush_mutex);
+	return err;
+}
+
+static int test_transaction(struct super_block *s,
+                            struct reiserfs_journal_list *jl)
+{
+	struct reiserfs_journal_cnode *cn;
+
+	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
+		return 1;
+
+	cn = jl->j_realblock;
+	while (cn) {
+		/* if the blocknr == 0, this has been cleared from the hash,
+		 ** skip it
+		 */
+		if (cn->blocknr == 0) {
+			goto next;
+		}
+		if (cn->bh && !newer_jl_done(cn))
+			return 0;
+	      next:
+		cn = cn->next;
+		cond_resched();
+	}
+	return 0;
+}
+
+static int write_one_transaction(struct super_block *s,
+				 struct reiserfs_journal_list *jl,
+				 struct buffer_chunk *chunk)
+{
+	struct reiserfs_journal_cnode *cn;
+	int ret = 0;
+
+	jl->j_state |= LIST_TOUCHED;
+	del_from_work_list(s, jl);
+	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
+		return 0;
+	}
+
+	cn = jl->j_realblock;
+	while (cn) {
+		/* if the blocknr == 0, this has been cleared from the hash,
+		 ** skip it
+		 */
+		if (cn->blocknr == 0) {
+			goto next;
+		}
+		if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
+			struct buffer_head *tmp_bh;
+			/* we can race against journal_mark_freed when we try
+			 * to lock_buffer(cn->bh), so we have to inc the buffer
+			 * count, and recheck things after locking
+			 */
+			tmp_bh = cn->bh;
+			get_bh(tmp_bh);
+			lock_buffer(tmp_bh);
+			if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
+				if (!buffer_journal_dirty(tmp_bh) ||
+				    buffer_journal_prepared(tmp_bh))
+					BUG();
+				add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
+				ret++;
+			} else {
+				/* note, cn->bh might be null now */
+				unlock_buffer(tmp_bh);
+			}
+			put_bh(tmp_bh);
+		}
+	      next:
+		cn = cn->next;
+		cond_resched();
+	}
+	return ret;
+}
+
+/* used by flush_commit_list */
+static int dirty_one_transaction(struct super_block *s,
+				 struct reiserfs_journal_list *jl)
+{
+	struct reiserfs_journal_cnode *cn;
+	struct reiserfs_journal_list *pjl;
+	int ret = 0;
+
+	jl->j_state |= LIST_DIRTY;
+	cn = jl->j_realblock;
+	while (cn) {
+		/* look for a more recent transaction that logged this
+		 ** buffer.  Only the most recent transaction with a buffer in
+		 ** it is allowed to send that buffer to disk
+		 */
+		pjl = find_newer_jl_for_cn(cn);
+		if (!pjl && cn->blocknr && cn->bh
+		    && buffer_journal_dirty(cn->bh)) {
+			BUG_ON(!can_dirty(cn));
+			/* if the buffer is prepared, it will either be logged
+			 * or restored.  If restored, we need to make sure
+			 * it actually gets marked dirty
+			 */
+			clear_buffer_journal_new(cn->bh);
+			if (buffer_journal_prepared(cn->bh)) {
+				set_buffer_journal_restore_dirty(cn->bh);
+			} else {
+				set_buffer_journal_test(cn->bh);
+				mark_buffer_dirty(cn->bh);
+			}
+		}
+		cn = cn->next;
+	}
+	return ret;
+}
+
+static int kupdate_transactions(struct super_block *s,
+				struct reiserfs_journal_list *jl,
+				struct reiserfs_journal_list **next_jl,
+				unsigned int *next_trans_id,
+				int num_blocks, int num_trans)
+{
+	int ret = 0;
+	int written = 0;
+	int transactions_flushed = 0;
+	unsigned int orig_trans_id = jl->j_trans_id;
+	struct buffer_chunk chunk;
+	struct list_head *entry;
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	chunk.nr = 0;
+
+	reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
+	if (!journal_list_still_alive(s, orig_trans_id)) {
+		goto done;
+	}
+
+	/* we've got j_flush_mutex held, nobody is going to delete any
+	 * of these lists out from underneath us
+	 */
+	while ((num_trans && transactions_flushed < num_trans) ||
+	       (!num_trans && written < num_blocks)) {
+
+		if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
+		    atomic_read(&jl->j_commit_left)
+		    || !(jl->j_state & LIST_DIRTY)) {
+			del_from_work_list(s, jl);
+			break;
+		}
+		ret = write_one_transaction(s, jl, &chunk);
+
+		if (ret < 0)
+			goto done;
+		transactions_flushed++;
+		written += ret;
+		entry = jl->j_list.next;
+
+		/* did we wrap? */
+		if (entry == &journal->j_journal_list) {
+			break;
+		}
+		jl = JOURNAL_LIST_ENTRY(entry);
+
+		/* don't bother with older transactions */
+		if (jl->j_trans_id <= orig_trans_id)
+			break;
+	}
+	if (chunk.nr) {
+		write_chunk(&chunk);
+	}
+
+      done:
+	mutex_unlock(&journal->j_flush_mutex);
+	return ret;
+}
+
+/* for o_sync and fsync heavy applications, they tend to use
+** all the journa list slots with tiny transactions.  These
+** trigger lots and lots of calls to update the header block, which
+** adds seeks and slows things down.
+**
+** This function tries to clear out a large chunk of the journal lists
+** at once, which makes everything faster since only the newest journal
+** list updates the header block
+*/
+static int flush_used_journal_lists(struct super_block *s,
+				    struct reiserfs_journal_list *jl)
+{
+	unsigned long len = 0;
+	unsigned long cur_len;
+	int ret;
+	int i;
+	int limit = 256;
+	struct reiserfs_journal_list *tjl;
+	struct reiserfs_journal_list *flush_jl;
+	unsigned int trans_id;
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+
+	flush_jl = tjl = jl;
+
+	/* in data logging mode, try harder to flush a lot of blocks */
+	if (reiserfs_data_log(s))
+		limit = 1024;
+	/* flush for 256 transactions or limit blocks, whichever comes first */
+	for (i = 0; i < 256 && len < limit; i++) {
+		if (atomic_read(&tjl->j_commit_left) ||
+		    tjl->j_trans_id < jl->j_trans_id) {
+			break;
+		}
+		cur_len = atomic_read(&tjl->j_nonzerolen);
+		if (cur_len > 0) {
+			tjl->j_state &= ~LIST_TOUCHED;
+		}
+		len += cur_len;
+		flush_jl = tjl;
+		if (tjl->j_list.next == &journal->j_journal_list)
+			break;
+		tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
+	}
+	/* try to find a group of blocks we can flush across all the
+	 ** transactions, but only bother if we've actually spanned
+	 ** across multiple lists
+	 */
+	if (flush_jl != jl) {
+		ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
+	}
+	flush_journal_list(s, flush_jl, 1);
+	return 0;
+}
+
+/*
+** removes any nodes in table with name block and dev as bh.
+** only touchs the hnext and hprev pointers.
+*/
+void remove_journal_hash(struct super_block *sb,
+			 struct reiserfs_journal_cnode **table,
+			 struct reiserfs_journal_list *jl,
+			 unsigned long block, int remove_freed)
+{
+	struct reiserfs_journal_cnode *cur;
+	struct reiserfs_journal_cnode **head;
+
+	head = &(journal_hash(table, sb, block));
+	if (!head) {
+		return;
+	}
+	cur = *head;
+	while (cur) {
+		if (cur->blocknr == block && cur->sb == sb
+		    && (jl == NULL || jl == cur->jlist)
+		    && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
+			if (cur->hnext) {
+				cur->hnext->hprev = cur->hprev;
+			}
+			if (cur->hprev) {
+				cur->hprev->hnext = cur->hnext;
+			} else {
+				*head = cur->hnext;
+			}
+			cur->blocknr = 0;
+			cur->sb = NULL;
+			cur->state = 0;
+			if (cur->bh && cur->jlist)	/* anybody who clears the cur->bh will also dec the nonzerolen */
+				atomic_dec(&(cur->jlist->j_nonzerolen));
+			cur->bh = NULL;
+			cur->jlist = NULL;
+		}
+		cur = cur->hnext;
+	}
+}
+
+static void free_journal_ram(struct super_block *sb)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	kfree(journal->j_current_jl);
+	journal->j_num_lists--;
+
+	vfree(journal->j_cnode_free_orig);
+	free_list_bitmaps(sb, journal->j_list_bitmap);
+	free_bitmap_nodes(sb);	/* must be after free_list_bitmaps */
+	if (journal->j_header_bh) {
+		brelse(journal->j_header_bh);
+	}
+	/* j_header_bh is on the journal dev, make sure not to release the journal
+	 * dev until we brelse j_header_bh
+	 */
+	release_journal_dev(sb, journal);
+	vfree(journal);
+}
+
+/*
+** call on unmount.  Only set error to 1 if you haven't made your way out
+** of read_super() yet.  Any other caller must keep error at 0.
+*/
+static int do_journal_release(struct reiserfs_transaction_handle *th,
+			      struct super_block *sb, int error)
+{
+	struct reiserfs_transaction_handle myth;
+	int flushed = 0;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+
+	/* we only want to flush out transactions if we were called with error == 0
+	 */
+	if (!error && !(sb->s_flags & MS_RDONLY)) {
+		/* end the current trans */
+		BUG_ON(!th->t_trans_id);
+		do_journal_end(th, sb, 10, FLUSH_ALL);
+
+		/* make sure something gets logged to force our way into the flush code */
+		if (!journal_join(&myth, sb, 1)) {
+			reiserfs_prepare_for_journal(sb,
+						     SB_BUFFER_WITH_SB(sb),
+						     1);
+			journal_mark_dirty(&myth, sb,
+					   SB_BUFFER_WITH_SB(sb));
+			do_journal_end(&myth, sb, 1, FLUSH_ALL);
+			flushed = 1;
+		}
+	}
+
+	/* this also catches errors during the do_journal_end above */
+	if (!error && reiserfs_is_journal_aborted(journal)) {
+		memset(&myth, 0, sizeof(myth));
+		if (!journal_join_abort(&myth, sb, 1)) {
+			reiserfs_prepare_for_journal(sb,
+						     SB_BUFFER_WITH_SB(sb),
+						     1);
+			journal_mark_dirty(&myth, sb,
+					   SB_BUFFER_WITH_SB(sb));
+			do_journal_end(&myth, sb, 1, FLUSH_ALL);
+		}
+	}
+
+	reiserfs_mounted_fs_count--;
+	/* wait for all commits to finish */
+	cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
+
+	/*
+	 * We must release the write lock here because
+	 * the workqueue job (flush_async_commit) needs this lock
+	 */
+	reiserfs_write_unlock(sb);
+	flush_workqueue(commit_wq);
+
+	if (!reiserfs_mounted_fs_count) {
+		destroy_workqueue(commit_wq);
+		commit_wq = NULL;
+	}
+
+	free_journal_ram(sb);
+
+	reiserfs_write_lock(sb);
+
+	return 0;
+}
+
+/*
+** call on unmount.  flush all journal trans, release all alloc'd ram
+*/
+int journal_release(struct reiserfs_transaction_handle *th,
+		    struct super_block *sb)
+{
+	return do_journal_release(th, sb, 0);
+}
+
+/*
+** only call from an error condition inside reiserfs_read_super!
+*/
+int journal_release_error(struct reiserfs_transaction_handle *th,
+			  struct super_block *sb)
+{
+	return do_journal_release(th, sb, 1);
+}
+
+/* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
+static int journal_compare_desc_commit(struct super_block *sb,
+				       struct reiserfs_journal_desc *desc,
+				       struct reiserfs_journal_commit *commit)
+{
+	if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
+	    get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
+	    get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max ||
+	    get_commit_trans_len(commit) <= 0) {
+		return 1;
+	}
+	return 0;
+}
+
+/* returns 0 if it did not find a description block
+** returns -1 if it found a corrupt commit block
+** returns 1 if both desc and commit were valid
+*/
+static int journal_transaction_is_valid(struct super_block *sb,
+					struct buffer_head *d_bh,
+					unsigned int *oldest_invalid_trans_id,
+					unsigned long *newest_mount_id)
+{
+	struct reiserfs_journal_desc *desc;
+	struct reiserfs_journal_commit *commit;
+	struct buffer_head *c_bh;
+	unsigned long offset;
+
+	if (!d_bh)
+		return 0;
+
+	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
+	if (get_desc_trans_len(desc) > 0
+	    && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
+		if (oldest_invalid_trans_id && *oldest_invalid_trans_id
+		    && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
+			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+				       "journal-986: transaction "
+				       "is valid returning because trans_id %d is greater than "
+				       "oldest_invalid %lu",
+				       get_desc_trans_id(desc),
+				       *oldest_invalid_trans_id);
+			return 0;
+		}
+		if (newest_mount_id
+		    && *newest_mount_id > get_desc_mount_id(desc)) {
+			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+				       "journal-1087: transaction "
+				       "is valid returning because mount_id %d is less than "
+				       "newest_mount_id %lu",
+				       get_desc_mount_id(desc),
+				       *newest_mount_id);
+			return -1;
+		}
+		if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
+			reiserfs_warning(sb, "journal-2018",
+					 "Bad transaction length %d "
+					 "encountered, ignoring transaction",
+					 get_desc_trans_len(desc));
+			return -1;
+		}
+		offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
+
+		/* ok, we have a journal description block, lets see if the transaction was valid */
+		c_bh =
+		    journal_bread(sb,
+				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+				  ((offset + get_desc_trans_len(desc) +
+				    1) % SB_ONDISK_JOURNAL_SIZE(sb)));
+		if (!c_bh)
+			return 0;
+		commit = (struct reiserfs_journal_commit *)c_bh->b_data;
+		if (journal_compare_desc_commit(sb, desc, commit)) {
+			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+				       "journal_transaction_is_valid, commit offset %ld had bad "
+				       "time %d or length %d",
+				       c_bh->b_blocknr -
+				       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
+				       get_commit_trans_id(commit),
+				       get_commit_trans_len(commit));
+			brelse(c_bh);
+			if (oldest_invalid_trans_id) {
+				*oldest_invalid_trans_id =
+				    get_desc_trans_id(desc);
+				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+					       "journal-1004: "
+					       "transaction_is_valid setting oldest invalid trans_id "
+					       "to %d",
+					       get_desc_trans_id(desc));
+			}
+			return -1;
+		}
+		brelse(c_bh);
+		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+			       "journal-1006: found valid "
+			       "transaction start offset %llu, len %d id %d",
+			       d_bh->b_blocknr -
+			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
+			       get_desc_trans_len(desc),
+			       get_desc_trans_id(desc));
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+static void brelse_array(struct buffer_head **heads, int num)
+{
+	int i;
+	for (i = 0; i < num; i++) {
+		brelse(heads[i]);
+	}
+}
+
+/*
+** given the start, and values for the oldest acceptable transactions,
+** this either reads in a replays a transaction, or returns because the transaction
+** is invalid, or too old.
+*/
+static int journal_read_transaction(struct super_block *sb,
+				    unsigned long cur_dblock,
+				    unsigned long oldest_start,
+				    unsigned int oldest_trans_id,
+				    unsigned long newest_mount_id)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_journal_desc *desc;
+	struct reiserfs_journal_commit *commit;
+	unsigned int trans_id = 0;
+	struct buffer_head *c_bh;
+	struct buffer_head *d_bh;
+	struct buffer_head **log_blocks = NULL;
+	struct buffer_head **real_blocks = NULL;
+	unsigned int trans_offset;
+	int i;
+	int trans_half;
+
+	d_bh = journal_bread(sb, cur_dblock);
+	if (!d_bh)
+		return 1;
+	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
+	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
+	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
+		       "journal_read_transaction, offset %llu, len %d mount_id %d",
+		       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
+		       get_desc_trans_len(desc), get_desc_mount_id(desc));
+	if (get_desc_trans_id(desc) < oldest_trans_id) {
+		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
+			       "journal_read_trans skipping because %lu is too old",
+			       cur_dblock -
+			       SB_ONDISK_JOURNAL_1st_BLOCK(sb));
+		brelse(d_bh);
+		return 1;
+	}
+	if (get_desc_mount_id(desc) != newest_mount_id) {
+		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
+			       "journal_read_trans skipping because %d is != "
+			       "newest_mount_id %lu", get_desc_mount_id(desc),
+			       newest_mount_id);
+		brelse(d_bh);
+		return 1;
+	}
+	c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+			     ((trans_offset + get_desc_trans_len(desc) + 1) %
+			      SB_ONDISK_JOURNAL_SIZE(sb)));
+	if (!c_bh) {
+		brelse(d_bh);
+		return 1;
+	}
+	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
+	if (journal_compare_desc_commit(sb, desc, commit)) {
+		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+			       "journal_read_transaction, "
+			       "commit offset %llu had bad time %d or length %d",
+			       c_bh->b_blocknr -
+			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
+			       get_commit_trans_id(commit),
+			       get_commit_trans_len(commit));
+		brelse(c_bh);
+		brelse(d_bh);
+		return 1;
+	}
+
+	if (bdev_read_only(sb->s_bdev)) {
+		reiserfs_warning(sb, "clm-2076",
+				 "device is readonly, unable to replay log");
+		brelse(c_bh);
+		brelse(d_bh);
+		return -EROFS;
+	}
+
+	trans_id = get_desc_trans_id(desc);
+	/* now we know we've got a good transaction, and it was inside the valid time ranges */
+	log_blocks = kmalloc(get_desc_trans_len(desc) *
+			     sizeof(struct buffer_head *), GFP_NOFS);
+	real_blocks = kmalloc(get_desc_trans_len(desc) *
+			      sizeof(struct buffer_head *), GFP_NOFS);
+	if (!log_blocks || !real_blocks) {
+		brelse(c_bh);
+		brelse(d_bh);
+		kfree(log_blocks);
+		kfree(real_blocks);
+		reiserfs_warning(sb, "journal-1169",
+				 "kmalloc failed, unable to mount FS");
+		return -1;
+	}
+	/* get all the buffer heads */
+	trans_half = journal_trans_half(sb->s_blocksize);
+	for (i = 0; i < get_desc_trans_len(desc); i++) {
+		log_blocks[i] =
+		    journal_getblk(sb,
+				   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+				   (trans_offset + 1 +
+				    i) % SB_ONDISK_JOURNAL_SIZE(sb));
+		if (i < trans_half) {
+			real_blocks[i] =
+			    sb_getblk(sb,
+				      le32_to_cpu(desc->j_realblock[i]));
+		} else {
+			real_blocks[i] =
+			    sb_getblk(sb,
+				      le32_to_cpu(commit->
+						  j_realblock[i - trans_half]));
+		}
+		if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
+			reiserfs_warning(sb, "journal-1207",
+					 "REPLAY FAILURE fsck required! "
+					 "Block to replay is outside of "
+					 "filesystem");
+			goto abort_replay;
+		}
+		/* make sure we don't try to replay onto log or reserved area */
+		if (is_block_in_log_or_reserved_area
+		    (sb, real_blocks[i]->b_blocknr)) {
+			reiserfs_warning(sb, "journal-1204",
+					 "REPLAY FAILURE fsck required! "
+					 "Trying to replay onto a log block");
+		      abort_replay:
+			brelse_array(log_blocks, i);
+			brelse_array(real_blocks, i);
+			brelse(c_bh);
+			brelse(d_bh);
+			kfree(log_blocks);
+			kfree(real_blocks);
+			return -1;
+		}
+	}
+	/* read in the log blocks, memcpy to the corresponding real block */
+	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
+	for (i = 0; i < get_desc_trans_len(desc); i++) {
+
+		reiserfs_write_unlock(sb);
+		wait_on_buffer(log_blocks[i]);
+		reiserfs_write_lock(sb);
+
+		if (!buffer_uptodate(log_blocks[i])) {
+			reiserfs_warning(sb, "journal-1212",
+					 "REPLAY FAILURE fsck required! "
+					 "buffer write failed");
+			brelse_array(log_blocks + i,
+				     get_desc_trans_len(desc) - i);
+			brelse_array(real_blocks, get_desc_trans_len(desc));
+			brelse(c_bh);
+			brelse(d_bh);
+			kfree(log_blocks);
+			kfree(real_blocks);
+			return -1;
+		}
+		memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
+		       real_blocks[i]->b_size);
+		set_buffer_uptodate(real_blocks[i]);
+		brelse(log_blocks[i]);
+	}
+	/* flush out the real blocks */
+	for (i = 0; i < get_desc_trans_len(desc); i++) {
+		set_buffer_dirty(real_blocks[i]);
+		write_dirty_buffer(real_blocks[i], WRITE);
+	}
+	for (i = 0; i < get_desc_trans_len(desc); i++) {
+		wait_on_buffer(real_blocks[i]);
+		if (!buffer_uptodate(real_blocks[i])) {
+			reiserfs_warning(sb, "journal-1226",
+					 "REPLAY FAILURE, fsck required! "
+					 "buffer write failed");
+			brelse_array(real_blocks + i,
+				     get_desc_trans_len(desc) - i);
+			brelse(c_bh);
+			brelse(d_bh);
+			kfree(log_blocks);
+			kfree(real_blocks);
+			return -1;
+		}
+		brelse(real_blocks[i]);
+	}
+	cur_dblock =
+	    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+	    ((trans_offset + get_desc_trans_len(desc) +
+	      2) % SB_ONDISK_JOURNAL_SIZE(sb));
+	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+		       "journal-1095: setting journal " "start to offset %ld",
+		       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
+
+	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
+	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
+	journal->j_last_flush_trans_id = trans_id;
+	journal->j_trans_id = trans_id + 1;
+	/* check for trans_id overflow */
+	if (journal->j_trans_id == 0)
+		journal->j_trans_id = 10;
+	brelse(c_bh);
+	brelse(d_bh);
+	kfree(log_blocks);
+	kfree(real_blocks);
+	return 0;
+}
+
+/* This function reads blocks starting from block and to max_block of bufsize
+   size (but no more than BUFNR blocks at a time). This proved to improve
+   mounting speed on self-rebuilding raid5 arrays at least.
+   Right now it is only used from journal code. But later we might use it
+   from other places.
+   Note: Do not use journal_getblk/sb_getblk functions here! */
+static struct buffer_head *reiserfs_breada(struct block_device *dev,
+					   b_blocknr_t block, int bufsize,
+					   b_blocknr_t max_block)
+{
+	struct buffer_head *bhlist[BUFNR];
+	unsigned int blocks = BUFNR;
+	struct buffer_head *bh;
+	int i, j;
+
+	bh = __getblk(dev, block, bufsize);
+	if (buffer_uptodate(bh))
+		return (bh);
+
+	if (block + BUFNR > max_block) {
+		blocks = max_block - block;
+	}
+	bhlist[0] = bh;
+	j = 1;
+	for (i = 1; i < blocks; i++) {
+		bh = __getblk(dev, block + i, bufsize);
+		if (buffer_uptodate(bh)) {
+			brelse(bh);
+			break;
+		} else
+			bhlist[j++] = bh;
+	}
+	ll_rw_block(READ, j, bhlist);
+	for (i = 1; i < j; i++)
+		brelse(bhlist[i]);
+	bh = bhlist[0];
+	wait_on_buffer(bh);
+	if (buffer_uptodate(bh))
+		return bh;
+	brelse(bh);
+	return NULL;
+}
+
+/*
+** read and replay the log
+** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
+** transaction.  This tests that before finding all the transactions in the log, which makes normal mount times fast.
+**
+** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
+**
+** On exit, it sets things up so the first transaction will work correctly.
+*/
+static int journal_read(struct super_block *sb)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_journal_desc *desc;
+	unsigned int oldest_trans_id = 0;
+	unsigned int oldest_invalid_trans_id = 0;
+	time_t start;
+	unsigned long oldest_start = 0;
+	unsigned long cur_dblock = 0;
+	unsigned long newest_mount_id = 9;
+	struct buffer_head *d_bh;
+	struct reiserfs_journal_header *jh;
+	int valid_journal_header = 0;
+	int replay_count = 0;
+	int continue_replay = 1;
+	int ret;
+	char b[BDEVNAME_SIZE];
+
+	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
+	reiserfs_info(sb, "checking transaction log (%s)\n",
+		      bdevname(journal->j_dev_bd, b));
+	start = get_seconds();
+
+	/* step 1, read in the journal header block.  Check the transaction it says
+	 ** is the first unflushed, and if that transaction is not valid,
+	 ** replay is done
+	 */
+	journal->j_header_bh = journal_bread(sb,
+					     SB_ONDISK_JOURNAL_1st_BLOCK(sb)
+					     + SB_ONDISK_JOURNAL_SIZE(sb));
+	if (!journal->j_header_bh) {
+		return 1;
+	}
+	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
+	if (le32_to_cpu(jh->j_first_unflushed_offset) <
+	    SB_ONDISK_JOURNAL_SIZE(sb)
+	    && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
+		oldest_start =
+		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+		    le32_to_cpu(jh->j_first_unflushed_offset);
+		oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+		newest_mount_id = le32_to_cpu(jh->j_mount_id);
+		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+			       "journal-1153: found in "
+			       "header: first_unflushed_offset %d, last_flushed_trans_id "
+			       "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
+			       le32_to_cpu(jh->j_last_flush_trans_id));
+		valid_journal_header = 1;
+
+		/* now, we try to read the first unflushed offset.  If it is not valid,
+		 ** there is nothing more we can do, and it makes no sense to read
+		 ** through the whole log.
+		 */
+		d_bh =
+		    journal_bread(sb,
+				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+				  le32_to_cpu(jh->j_first_unflushed_offset));
+		ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
+		if (!ret) {
+			continue_replay = 0;
+		}
+		brelse(d_bh);
+		goto start_log_replay;
+	}
+
+	/* ok, there are transactions that need to be replayed.  start with the first log block, find
+	 ** all the valid transactions, and pick out the oldest.
+	 */
+	while (continue_replay
+	       && cur_dblock <
+	       (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+		SB_ONDISK_JOURNAL_SIZE(sb))) {
+		/* Note that it is required for blocksize of primary fs device and journal
+		   device to be the same */
+		d_bh =
+		    reiserfs_breada(journal->j_dev_bd, cur_dblock,
+				    sb->s_blocksize,
+				    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+				    SB_ONDISK_JOURNAL_SIZE(sb));
+		ret =
+		    journal_transaction_is_valid(sb, d_bh,
+						 &oldest_invalid_trans_id,
+						 &newest_mount_id);
+		if (ret == 1) {
+			desc = (struct reiserfs_journal_desc *)d_bh->b_data;
+			if (oldest_start == 0) {	/* init all oldest_ values */
+				oldest_trans_id = get_desc_trans_id(desc);
+				oldest_start = d_bh->b_blocknr;
+				newest_mount_id = get_desc_mount_id(desc);
+				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+					       "journal-1179: Setting "
+					       "oldest_start to offset %llu, trans_id %lu",
+					       oldest_start -
+					       SB_ONDISK_JOURNAL_1st_BLOCK
+					       (sb), oldest_trans_id);
+			} else if (oldest_trans_id > get_desc_trans_id(desc)) {
+				/* one we just read was older */
+				oldest_trans_id = get_desc_trans_id(desc);
+				oldest_start = d_bh->b_blocknr;
+				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+					       "journal-1180: Resetting "
+					       "oldest_start to offset %lu, trans_id %lu",
+					       oldest_start -
+					       SB_ONDISK_JOURNAL_1st_BLOCK
+					       (sb), oldest_trans_id);
+			}
+			if (newest_mount_id < get_desc_mount_id(desc)) {
+				newest_mount_id = get_desc_mount_id(desc);
+				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+					       "journal-1299: Setting "
+					       "newest_mount_id to %d",
+					       get_desc_mount_id(desc));
+			}
+			cur_dblock += get_desc_trans_len(desc) + 2;
+		} else {
+			cur_dblock++;
+		}
+		brelse(d_bh);
+	}
+
+      start_log_replay:
+	cur_dblock = oldest_start;
+	if (oldest_trans_id) {
+		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+			       "journal-1206: Starting replay "
+			       "from offset %llu, trans_id %lu",
+			       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
+			       oldest_trans_id);
+
+	}
+	replay_count = 0;
+	while (continue_replay && oldest_trans_id > 0) {
+		ret =
+		    journal_read_transaction(sb, cur_dblock, oldest_start,
+					     oldest_trans_id, newest_mount_id);
+		if (ret < 0) {
+			return ret;
+		} else if (ret != 0) {
+			break;
+		}
+		cur_dblock =
+		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
+		replay_count++;
+		if (cur_dblock == oldest_start)
+			break;
+	}
+
+	if (oldest_trans_id == 0) {
+		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+			       "journal-1225: No valid " "transactions found");
+	}
+	/* j_start does not get set correctly if we don't replay any transactions.
+	 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
+	 ** copy the trans_id from the header
+	 */
+	if (valid_journal_header && replay_count == 0) {
+		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
+		journal->j_trans_id =
+		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+		/* check for trans_id overflow */
+		if (journal->j_trans_id == 0)
+			journal->j_trans_id = 10;
+		journal->j_last_flush_trans_id =
+		    le32_to_cpu(jh->j_last_flush_trans_id);
+		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
+	} else {
+		journal->j_mount_id = newest_mount_id + 1;
+	}
+	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
+		       "newest_mount_id to %lu", journal->j_mount_id);
+	journal->j_first_unflushed_offset = journal->j_start;
+	if (replay_count > 0) {
+		reiserfs_info(sb,
+			      "replayed %d transactions in %lu seconds\n",
+			      replay_count, get_seconds() - start);
+	}
+	if (!bdev_read_only(sb->s_bdev) &&
+	    _update_journal_header_block(sb, journal->j_start,
+					 journal->j_last_flush_trans_id)) {
+		/* replay failed, caller must call free_journal_ram and abort
+		 ** the mount
+		 */
+		return -1;
+	}
+	return 0;
+}
+
+static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
+{
+	struct reiserfs_journal_list *jl;
+	jl = kzalloc(sizeof(struct reiserfs_journal_list),
+		     GFP_NOFS | __GFP_NOFAIL);
+	INIT_LIST_HEAD(&jl->j_list);
+	INIT_LIST_HEAD(&jl->j_working_list);
+	INIT_LIST_HEAD(&jl->j_tail_bh_list);
+	INIT_LIST_HEAD(&jl->j_bh_list);
+	mutex_init(&jl->j_commit_mutex);
+	SB_JOURNAL(s)->j_num_lists++;
+	get_journal_list(jl);
+	return jl;
+}
+
+static void journal_list_init(struct super_block *sb)
+{
+	SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
+}
+
+static int release_journal_dev(struct super_block *super,
+			       struct reiserfs_journal *journal)
+{
+	int result;
+
+	result = 0;
+
+	if (journal->j_dev_bd != NULL) {
+		result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
+		journal->j_dev_bd = NULL;
+	}
+
+	if (result != 0) {
+		reiserfs_warning(super, "sh-457",
+				 "Cannot release journal device: %i", result);
+	}
+	return result;
+}
+
+static int journal_init_dev(struct super_block *super,
+			    struct reiserfs_journal *journal,
+			    const char *jdev_name)
+{
+	int result;
+	dev_t jdev;
+	fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+	char b[BDEVNAME_SIZE];
+
+	result = 0;
+
+	journal->j_dev_bd = NULL;
+	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
+	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
+
+	if (bdev_read_only(super->s_bdev))
+		blkdev_mode = FMODE_READ;
+
+	/* there is no "jdev" option and journal is on separate device */
+	if ((!jdev_name || !jdev_name[0])) {
+		if (jdev == super->s_dev)
+			blkdev_mode &= ~FMODE_EXCL;
+		journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
+						      journal);
+		journal->j_dev_mode = blkdev_mode;
+		if (IS_ERR(journal->j_dev_bd)) {
+			result = PTR_ERR(journal->j_dev_bd);
+			journal->j_dev_bd = NULL;
+			reiserfs_warning(super, "sh-458",
+					 "cannot init journal device '%s': %i",
+					 __bdevname(jdev, b), result);
+			return result;
+		} else if (jdev != super->s_dev)
+			set_blocksize(journal->j_dev_bd, super->s_blocksize);
+
+		return 0;
+	}
+
+	journal->j_dev_mode = blkdev_mode;
+	journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
+	if (IS_ERR(journal->j_dev_bd)) {
+		result = PTR_ERR(journal->j_dev_bd);
+		journal->j_dev_bd = NULL;
+		reiserfs_warning(super,
+				 "journal_init_dev: Cannot open '%s': %i",
+				 jdev_name, result);
+		return result;
+	}
+
+	set_blocksize(journal->j_dev_bd, super->s_blocksize);
+	reiserfs_info(super,
+		      "journal_init_dev: journal device: %s\n",
+		      bdevname(journal->j_dev_bd, b));
+	return 0;
+}
+
+/**
+ * When creating/tuning a file system user can assign some
+ * journal params within boundaries which depend on the ratio
+ * blocksize/standard_blocksize.
+ *
+ * For blocks >= standard_blocksize transaction size should
+ * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
+ * then JOURNAL_TRANS_MAX_DEFAULT.
+ *
+ * For blocks < standard_blocksize these boundaries should be
+ * decreased proportionally.
+ */
+#define REISERFS_STANDARD_BLKSIZE (4096)
+
+static int check_advise_trans_params(struct super_block *sb,
+				     struct reiserfs_journal *journal)
+{
+        if (journal->j_trans_max) {
+	        /* Non-default journal params.
+		   Do sanity check for them. */
+	        int ratio = 1;
+		if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
+		        ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
+
+		if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
+		    journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
+		    SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
+		    JOURNAL_MIN_RATIO) {
+			reiserfs_warning(sb, "sh-462",
+					 "bad transaction max size (%u). "
+					 "FSCK?", journal->j_trans_max);
+			return 1;
+		}
+		if (journal->j_max_batch != (journal->j_trans_max) *
+		        JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
+			reiserfs_warning(sb, "sh-463",
+					 "bad transaction max batch (%u). "
+					 "FSCK?", journal->j_max_batch);
+			return 1;
+		}
+	} else {
+		/* Default journal params.
+                   The file system was created by old version
+		   of mkreiserfs, so some fields contain zeros,
+		   and we need to advise proper values for them */
+		if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
+			reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
+					 sb->s_blocksize);
+			return 1;
+		}
+		journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
+		journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
+		journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
+	}
+	return 0;
+}
+
+/*
+** must be called once on fs mount.  calls journal_read for you
+*/
+int journal_init(struct super_block *sb, const char *j_dev_name,
+		 int old_format, unsigned int commit_max_age)
+{
+	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
+	struct buffer_head *bhjh;
+	struct reiserfs_super_block *rs;
+	struct reiserfs_journal_header *jh;
+	struct reiserfs_journal *journal;
+	struct reiserfs_journal_list *jl;
+	char b[BDEVNAME_SIZE];
+	int ret;
+
+	journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
+	if (!journal) {
+		reiserfs_warning(sb, "journal-1256",
+				 "unable to get memory for journal structure");
+		return 1;
+	}
+	INIT_LIST_HEAD(&journal->j_bitmap_nodes);
+	INIT_LIST_HEAD(&journal->j_prealloc_list);
+	INIT_LIST_HEAD(&journal->j_working_list);
+	INIT_LIST_HEAD(&journal->j_journal_list);
+	journal->j_persistent_trans = 0;
+	if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
+					   reiserfs_bmap_count(sb)))
+		goto free_and_return;
+
+	allocate_bitmap_nodes(sb);
+
+	/* reserved for journal area support */
+	SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
+						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
+						 / sb->s_blocksize +
+						 reiserfs_bmap_count(sb) +
+						 1 :
+						 REISERFS_DISK_OFFSET_IN_BYTES /
+						 sb->s_blocksize + 2);
+
+	/* Sanity check to see is the standard journal fitting within first bitmap
+	   (actual for small blocksizes) */
+	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
+	    (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
+	     SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
+		reiserfs_warning(sb, "journal-1393",
+				 "journal does not fit for area addressed "
+				 "by first of bitmap blocks. It starts at "
+				 "%u and its size is %u. Block size %ld",
+				 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
+				 SB_ONDISK_JOURNAL_SIZE(sb),
+				 sb->s_blocksize);
+		goto free_and_return;
+	}
+
+	if (journal_init_dev(sb, journal, j_dev_name) != 0) {
+		reiserfs_warning(sb, "sh-462",
+				 "unable to initialize jornal device");
+		goto free_and_return;
+	}
+
+	rs = SB_DISK_SUPER_BLOCK(sb);
+
+	/* read journal header */
+	bhjh = journal_bread(sb,
+			     SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+			     SB_ONDISK_JOURNAL_SIZE(sb));
+	if (!bhjh) {
+		reiserfs_warning(sb, "sh-459",
+				 "unable to read journal header");
+		goto free_and_return;
+	}
+	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
+
+	/* make sure that journal matches to the super block */
+	if (is_reiserfs_jr(rs)
+	    && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
+		sb_jp_journal_magic(rs))) {
+		reiserfs_warning(sb, "sh-460",
+				 "journal header magic %x (device %s) does "
+				 "not match to magic found in super block %x",
+				 jh->jh_journal.jp_journal_magic,
+				 bdevname(journal->j_dev_bd, b),
+				 sb_jp_journal_magic(rs));
+		brelse(bhjh);
+		goto free_and_return;
+	}
+
+	journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
+	journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
+	journal->j_max_commit_age =
+	    le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
+	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
+
+	if (check_advise_trans_params(sb, journal) != 0)
+	        goto free_and_return;
+	journal->j_default_max_commit_age = journal->j_max_commit_age;
+
+	if (commit_max_age != 0) {
+		journal->j_max_commit_age = commit_max_age;
+		journal->j_max_trans_age = commit_max_age;
+	}
+
+	reiserfs_info(sb, "journal params: device %s, size %u, "
+		      "journal first block %u, max trans len %u, max batch %u, "
+		      "max commit age %u, max trans age %u\n",
+		      bdevname(journal->j_dev_bd, b),
+		      SB_ONDISK_JOURNAL_SIZE(sb),
+		      SB_ONDISK_JOURNAL_1st_BLOCK(sb),
+		      journal->j_trans_max,
+		      journal->j_max_batch,
+		      journal->j_max_commit_age, journal->j_max_trans_age);
+
+	brelse(bhjh);
+
+	journal->j_list_bitmap_index = 0;
+	journal_list_init(sb);
+
+	memset(journal->j_list_hash_table, 0,
+	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
+
+	INIT_LIST_HEAD(&journal->j_dirty_buffers);
+	spin_lock_init(&journal->j_dirty_buffers_lock);
+
+	journal->j_start = 0;
+	journal->j_len = 0;
+	journal->j_len_alloc = 0;
+	atomic_set(&(journal->j_wcount), 0);
+	atomic_set(&(journal->j_async_throttle), 0);
+	journal->j_bcount = 0;
+	journal->j_trans_start_time = 0;
+	journal->j_last = NULL;
+	journal->j_first = NULL;
+	init_waitqueue_head(&(journal->j_join_wait));
+	mutex_init(&journal->j_mutex);
+	mutex_init(&journal->j_flush_mutex);
+
+	journal->j_trans_id = 10;
+	journal->j_mount_id = 10;
+	journal->j_state = 0;
+	atomic_set(&(journal->j_jlock), 0);
+	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
+	journal->j_cnode_free_orig = journal->j_cnode_free_list;
+	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
+	journal->j_cnode_used = 0;
+	journal->j_must_wait = 0;
+
+	if (journal->j_cnode_free == 0) {
+		reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
+		                 "allocation failed (%ld bytes). Journal is "
+		                 "too large for available memory. Usually "
+		                 "this is due to a journal that is too large.",
+		                 sizeof (struct reiserfs_journal_cnode) * num_cnodes);
+        	goto free_and_return;
+	}
+
+	init_journal_hash(sb);
+	jl = journal->j_current_jl;
+
+	/*
+	 * get_list_bitmap() may call flush_commit_list() which
+	 * requires the lock. Calling flush_commit_list() shouldn't happen
+	 * this early but I like to be paranoid.
+	 */
+	reiserfs_write_lock(sb);
+	jl->j_list_bitmap = get_list_bitmap(sb, jl);
+	reiserfs_write_unlock(sb);
+	if (!jl->j_list_bitmap) {
+		reiserfs_warning(sb, "journal-2005",
+				 "get_list_bitmap failed for journal list 0");
+		goto free_and_return;
+	}
+
+	/*
+	 * Journal_read needs to be inspected in order to push down
+	 * the lock further inside (or even remove it).
+	 */
+	reiserfs_write_lock(sb);
+	ret = journal_read(sb);
+	reiserfs_write_unlock(sb);
+	if (ret < 0) {
+		reiserfs_warning(sb, "reiserfs-2006",
+				 "Replay Failure, unable to mount");
+		goto free_and_return;
+	}
+
+	reiserfs_mounted_fs_count++;
+	if (reiserfs_mounted_fs_count <= 1)
+		commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
+
+	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
+	journal->j_work_sb = sb;
+	return 0;
+      free_and_return:
+	free_journal_ram(sb);
+	return 1;
+}
+
+/*
+** test for a polite end of the current transaction.  Used by file_write, and should
+** be used by delete to make sure they don't write more than can fit inside a single
+** transaction
+*/
+int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
+				   int new_alloc)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
+	time_t now = get_seconds();
+	/* cannot restart while nested */
+	BUG_ON(!th->t_trans_id);
+	if (th->t_refcount > 1)
+		return 0;
+	if (journal->j_must_wait > 0 ||
+	    (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
+	    atomic_read(&(journal->j_jlock)) ||
+	    (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
+	    journal->j_cnode_free < (journal->j_trans_max * 3)) {
+		return 1;
+	}
+
+	journal->j_len_alloc += new_alloc;
+	th->t_blocks_allocated += new_alloc ;
+	return 0;
+}
+
+/* this must be called inside a transaction
+*/
+void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
+	BUG_ON(!th->t_trans_id);
+	journal->j_must_wait = 1;
+	set_bit(J_WRITERS_BLOCKED, &journal->j_state);
+	return;
+}
+
+/* this must be called without a transaction started
+*/
+void reiserfs_allow_writes(struct super_block *s)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
+	wake_up(&journal->j_join_wait);
+}
+
+/* this must be called without a transaction started
+*/
+void reiserfs_wait_on_write_block(struct super_block *s)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	wait_event(journal->j_join_wait,
+		   !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
+}
+
+static void queue_log_writer(struct super_block *s)
+{
+	wait_queue_t wait;
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	set_bit(J_WRITERS_QUEUED, &journal->j_state);
+
+	/*
+	 * we don't want to use wait_event here because
+	 * we only want to wait once.
+	 */
+	init_waitqueue_entry(&wait, current);
+	add_wait_queue(&journal->j_join_wait, &wait);
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
+		reiserfs_write_unlock(s);
+		schedule();
+		reiserfs_write_lock(s);
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&journal->j_join_wait, &wait);
+}
+
+static void wake_queued_writers(struct super_block *s)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
+		wake_up(&journal->j_join_wait);
+}
+
+static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	unsigned long bcount = journal->j_bcount;
+	while (1) {
+		reiserfs_write_unlock(sb);
+		schedule_timeout_uninterruptible(1);
+		reiserfs_write_lock(sb);
+		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
+		while ((atomic_read(&journal->j_wcount) > 0 ||
+			atomic_read(&journal->j_jlock)) &&
+		       journal->j_trans_id == trans_id) {
+			queue_log_writer(sb);
+		}
+		if (journal->j_trans_id != trans_id)
+			break;
+		if (bcount == journal->j_bcount)
+			break;
+		bcount = journal->j_bcount;
+	}
+}
+
+/* join == true if you must join an existing transaction.
+** join == false if you can deal with waiting for others to finish
+**
+** this will block until the transaction is joinable.  send the number of blocks you
+** expect to use in nblocks.
+*/
+static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
+			      struct super_block *sb, unsigned long nblocks,
+			      int join)
+{
+	time_t now = get_seconds();
+	unsigned int old_trans_id;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_transaction_handle myth;
+	int sched_count = 0;
+	int retval;
+
+	reiserfs_check_lock_depth(sb, "journal_begin");
+	BUG_ON(nblocks > journal->j_trans_max);
+
+	PROC_INFO_INC(sb, journal.journal_being);
+	/* set here for journal_join */
+	th->t_refcount = 1;
+	th->t_super = sb;
+
+      relock:
+	lock_journal(sb);
+	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
+		unlock_journal(sb);
+		retval = journal->j_errno;
+		goto out_fail;
+	}
+	journal->j_bcount++;
+
+	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
+		unlock_journal(sb);
+		reiserfs_write_unlock(sb);
+		reiserfs_wait_on_write_block(sb);
+		reiserfs_write_lock(sb);
+		PROC_INFO_INC(sb, journal.journal_relock_writers);
+		goto relock;
+	}
+	now = get_seconds();
+
+	/* if there is no room in the journal OR
+	 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
+	 ** we don't sleep if there aren't other writers
+	 */
+
+	if ((!join && journal->j_must_wait > 0) ||
+	    (!join
+	     && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
+	    || (!join && atomic_read(&journal->j_wcount) > 0
+		&& journal->j_trans_start_time > 0
+		&& (now - journal->j_trans_start_time) >
+		journal->j_max_trans_age) || (!join
+					      && atomic_read(&journal->j_jlock))
+	    || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
+
+		old_trans_id = journal->j_trans_id;
+		unlock_journal(sb);	/* allow others to finish this transaction */
+
+		if (!join && (journal->j_len_alloc + nblocks + 2) >=
+		    journal->j_max_batch &&
+		    ((journal->j_len + nblocks + 2) * 100) <
+		    (journal->j_len_alloc * 75)) {
+			if (atomic_read(&journal->j_wcount) > 10) {
+				sched_count++;
+				queue_log_writer(sb);
+				goto relock;
+			}
+		}
+		/* don't mess with joining the transaction if all we have to do is
+		 * wait for someone else to do a commit
+		 */
+		if (atomic_read(&journal->j_jlock)) {
+			while (journal->j_trans_id == old_trans_id &&
+			       atomic_read(&journal->j_jlock)) {
+				queue_log_writer(sb);
+			}
+			goto relock;
+		}
+		retval = journal_join(&myth, sb, 1);
+		if (retval)
+			goto out_fail;
+
+		/* someone might have ended the transaction while we joined */
+		if (old_trans_id != journal->j_trans_id) {
+			retval = do_journal_end(&myth, sb, 1, 0);
+		} else {
+			retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
+		}
+
+		if (retval)
+			goto out_fail;
+
+		PROC_INFO_INC(sb, journal.journal_relock_wcount);
+		goto relock;
+	}
+	/* we are the first writer, set trans_id */
+	if (journal->j_trans_start_time == 0) {
+		journal->j_trans_start_time = get_seconds();
+	}
+	atomic_inc(&(journal->j_wcount));
+	journal->j_len_alloc += nblocks;
+	th->t_blocks_logged = 0;
+	th->t_blocks_allocated = nblocks;
+	th->t_trans_id = journal->j_trans_id;
+	unlock_journal(sb);
+	INIT_LIST_HEAD(&th->t_list);
+	return 0;
+
+      out_fail:
+	memset(th, 0, sizeof(*th));
+	/* Re-set th->t_super, so we can properly keep track of how many
+	 * persistent transactions there are. We need to do this so if this
+	 * call is part of a failed restart_transaction, we can free it later */
+	th->t_super = sb;
+	return retval;
+}
+
+struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
+								    super_block
+								    *s,
+								    int nblocks)
+{
+	int ret;
+	struct reiserfs_transaction_handle *th;
+
+	/* if we're nesting into an existing transaction.  It will be
+	 ** persistent on its own
+	 */
+	if (reiserfs_transaction_running(s)) {
+		th = current->journal_info;
+		th->t_refcount++;
+		BUG_ON(th->t_refcount < 2);
+		
+		return th;
+	}
+	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
+	if (!th)
+		return NULL;
+	ret = journal_begin(th, s, nblocks);
+	if (ret) {
+		kfree(th);
+		return NULL;
+	}
+
+	SB_JOURNAL(s)->j_persistent_trans++;
+	return th;
+}
+
+int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
+{
+	struct super_block *s = th->t_super;
+	int ret = 0;
+	if (th->t_trans_id)
+		ret = journal_end(th, th->t_super, th->t_blocks_allocated);
+	else
+		ret = -EIO;
+	if (th->t_refcount == 0) {
+		SB_JOURNAL(s)->j_persistent_trans--;
+		kfree(th);
+	}
+	return ret;
+}
+
+static int journal_join(struct reiserfs_transaction_handle *th,
+			struct super_block *sb, unsigned long nblocks)
+{
+	struct reiserfs_transaction_handle *cur_th = current->journal_info;
+
+	/* this keeps do_journal_end from NULLing out the current->journal_info
+	 ** pointer
+	 */
+	th->t_handle_save = cur_th;
+	BUG_ON(cur_th && cur_th->t_refcount > 1);
+	return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
+}
+
+int journal_join_abort(struct reiserfs_transaction_handle *th,
+		       struct super_block *sb, unsigned long nblocks)
+{
+	struct reiserfs_transaction_handle *cur_th = current->journal_info;
+
+	/* this keeps do_journal_end from NULLing out the current->journal_info
+	 ** pointer
+	 */
+	th->t_handle_save = cur_th;
+	BUG_ON(cur_th && cur_th->t_refcount > 1);
+	return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
+}
+
+int journal_begin(struct reiserfs_transaction_handle *th,
+		  struct super_block *sb, unsigned long nblocks)
+{
+	struct reiserfs_transaction_handle *cur_th = current->journal_info;
+	int ret;
+
+	th->t_handle_save = NULL;
+	if (cur_th) {
+		/* we are nesting into the current transaction */
+		if (cur_th->t_super == sb) {
+			BUG_ON(!cur_th->t_refcount);
+			cur_th->t_refcount++;
+			memcpy(th, cur_th, sizeof(*th));
+			if (th->t_refcount <= 1)
+				reiserfs_warning(sb, "reiserfs-2005",
+						 "BAD: refcount <= 1, but "
+						 "journal_info != 0");
+			return 0;
+		} else {
+			/* we've ended up with a handle from a different filesystem.
+			 ** save it and restore on journal_end.  This should never
+			 ** really happen...
+			 */
+			reiserfs_warning(sb, "clm-2100",
+					 "nesting info a different FS");
+			th->t_handle_save = current->journal_info;
+			current->journal_info = th;
+		}
+	} else {
+		current->journal_info = th;
+	}
+	ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
+	BUG_ON(current->journal_info != th);
+
+	/* I guess this boils down to being the reciprocal of clm-2100 above.
+	 * If do_journal_begin_r fails, we need to put it back, since journal_end
+	 * won't be called to do it. */
+	if (ret)
+		current->journal_info = th->t_handle_save;
+	else
+		BUG_ON(!th->t_refcount);
+
+	return ret;
+}
+
+/*
+** puts bh into the current transaction.  If it was already there, reorders removes the
+** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
+**
+** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
+** transaction is committed.
+**
+** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
+*/
+int journal_mark_dirty(struct reiserfs_transaction_handle *th,
+		       struct super_block *sb, struct buffer_head *bh)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_journal_cnode *cn = NULL;
+	int count_already_incd = 0;
+	int prepared = 0;
+	BUG_ON(!th->t_trans_id);
+
+	PROC_INFO_INC(sb, journal.mark_dirty);
+	if (th->t_trans_id != journal->j_trans_id) {
+		reiserfs_panic(th->t_super, "journal-1577",
+			       "handle trans id %ld != current trans id %ld",
+			       th->t_trans_id, journal->j_trans_id);
+	}
+
+	sb->s_dirt = 1;
+
+	prepared = test_clear_buffer_journal_prepared(bh);
+	clear_buffer_journal_restore_dirty(bh);
+	/* already in this transaction, we are done */
+	if (buffer_journaled(bh)) {
+		PROC_INFO_INC(sb, journal.mark_dirty_already);
+		return 0;
+	}
+
+	/* this must be turned into a panic instead of a warning.  We can't allow
+	 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
+	 ** could get to disk too early.  NOT GOOD.
+	 */
+	if (!prepared || buffer_dirty(bh)) {
+		reiserfs_warning(sb, "journal-1777",
+				 "buffer %llu bad state "
+				 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
+				 (unsigned long long)bh->b_blocknr,
+				 prepared ? ' ' : '!',
+				 buffer_locked(bh) ? ' ' : '!',
+				 buffer_dirty(bh) ? ' ' : '!',
+				 buffer_journal_dirty(bh) ? ' ' : '!');
+	}
+
+	if (atomic_read(&(journal->j_wcount)) <= 0) {
+		reiserfs_warning(sb, "journal-1409",
+				 "returning because j_wcount was %d",
+				 atomic_read(&(journal->j_wcount)));
+		return 1;
+	}
+	/* this error means I've screwed up, and we've overflowed the transaction.
+	 ** Nothing can be done here, except make the FS readonly or panic.
+	 */
+	if (journal->j_len >= journal->j_trans_max) {
+		reiserfs_panic(th->t_super, "journal-1413",
+			       "j_len (%lu) is too big",
+			       journal->j_len);
+	}
+
+	if (buffer_journal_dirty(bh)) {
+		count_already_incd = 1;
+		PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
+		clear_buffer_journal_dirty(bh);
+	}
+
+	if (journal->j_len > journal->j_len_alloc) {
+		journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
+	}
+
+	set_buffer_journaled(bh);
+
+	/* now put this guy on the end */
+	if (!cn) {
+		cn = get_cnode(sb);
+		if (!cn) {
+			reiserfs_panic(sb, "journal-4", "get_cnode failed!");
+		}
+
+		if (th->t_blocks_logged == th->t_blocks_allocated) {
+			th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
+			journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
+		}
+		th->t_blocks_logged++;
+		journal->j_len++;
+
+		cn->bh = bh;
+		cn->blocknr = bh->b_blocknr;
+		cn->sb = sb;
+		cn->jlist = NULL;
+		insert_journal_hash(journal->j_hash_table, cn);
+		if (!count_already_incd) {
+			get_bh(bh);
+		}
+	}
+	cn->next = NULL;
+	cn->prev = journal->j_last;
+	cn->bh = bh;
+	if (journal->j_last) {
+		journal->j_last->next = cn;
+		journal->j_last = cn;
+	} else {
+		journal->j_first = cn;
+		journal->j_last = cn;
+	}
+	return 0;
+}
+
+int journal_end(struct reiserfs_transaction_handle *th,
+		struct super_block *sb, unsigned long nblocks)
+{
+	if (!current->journal_info && th->t_refcount > 1)
+		reiserfs_warning(sb, "REISER-NESTING",
+				 "th NULL, refcount %d", th->t_refcount);
+
+	if (!th->t_trans_id) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	th->t_refcount--;
+	if (th->t_refcount > 0) {
+		struct reiserfs_transaction_handle *cur_th =
+		    current->journal_info;
+
+		/* we aren't allowed to close a nested transaction on a different
+		 ** filesystem from the one in the task struct
+		 */
+		BUG_ON(cur_th->t_super != th->t_super);
+
+		if (th != cur_th) {
+			memcpy(current->journal_info, th, sizeof(*th));
+			th->t_trans_id = 0;
+		}
+		return 0;
+	} else {
+		return do_journal_end(th, sb, nblocks, 0);
+	}
+}
+
+/* removes from the current transaction, relsing and descrementing any counters.
+** also files the removed buffer directly onto the clean list
+**
+** called by journal_mark_freed when a block has been deleted
+**
+** returns 1 if it cleaned and relsed the buffer. 0 otherwise
+*/
+static int remove_from_transaction(struct super_block *sb,
+				   b_blocknr_t blocknr, int already_cleaned)
+{
+	struct buffer_head *bh;
+	struct reiserfs_journal_cnode *cn;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	int ret = 0;
+
+	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
+	if (!cn || !cn->bh) {
+		return ret;
+	}
+	bh = cn->bh;
+	if (cn->prev) {
+		cn->prev->next = cn->next;
+	}
+	if (cn->next) {
+		cn->next->prev = cn->prev;
+	}
+	if (cn == journal->j_first) {
+		journal->j_first = cn->next;
+	}
+	if (cn == journal->j_last) {
+		journal->j_last = cn->prev;
+	}
+	if (bh)
+		remove_journal_hash(sb, journal->j_hash_table, NULL,
+				    bh->b_blocknr, 0);
+	clear_buffer_journaled(bh);	/* don't log this one */
+
+	if (!already_cleaned) {
+		clear_buffer_journal_dirty(bh);
+		clear_buffer_dirty(bh);
+		clear_buffer_journal_test(bh);
+		put_bh(bh);
+		if (atomic_read(&(bh->b_count)) < 0) {
+			reiserfs_warning(sb, "journal-1752",
+					 "b_count < 0");
+		}
+		ret = 1;
+	}
+	journal->j_len--;
+	journal->j_len_alloc--;
+	free_cnode(sb, cn);
+	return ret;
+}
+
+/*
+** for any cnode in a journal list, it can only be dirtied of all the
+** transactions that include it are committed to disk.
+** this checks through each transaction, and returns 1 if you are allowed to dirty,
+** and 0 if you aren't
+**
+** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
+** blocks for a given transaction on disk
+**
+*/
+static int can_dirty(struct reiserfs_journal_cnode *cn)
+{
+	struct super_block *sb = cn->sb;
+	b_blocknr_t blocknr = cn->blocknr;
+	struct reiserfs_journal_cnode *cur = cn->hprev;
+	int can_dirty = 1;
+
+	/* first test hprev.  These are all newer than cn, so any node here
+	 ** with the same block number and dev means this node can't be sent
+	 ** to disk right now.
+	 */
+	while (cur && can_dirty) {
+		if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
+		    cur->blocknr == blocknr) {
+			can_dirty = 0;
+		}
+		cur = cur->hprev;
+	}
+	/* then test hnext.  These are all older than cn.  As long as they
+	 ** are committed to the log, it is safe to write cn to disk
+	 */
+	cur = cn->hnext;
+	while (cur && can_dirty) {
+		if (cur->jlist && cur->jlist->j_len > 0 &&
+		    atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
+		    cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
+			can_dirty = 0;
+		}
+		cur = cur->hnext;
+	}
+	return can_dirty;
+}
+
+/* syncs the commit blocks, but does not force the real buffers to disk
+** will wait until the current transaction is done/committed before returning
+*/
+int journal_end_sync(struct reiserfs_transaction_handle *th,
+		     struct super_block *sb, unsigned long nblocks)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+
+	BUG_ON(!th->t_trans_id);
+	/* you can sync while nested, very, very bad */
+	BUG_ON(th->t_refcount > 1);
+	if (journal->j_len == 0) {
+		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
+					     1);
+		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
+	}
+	return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
+}
+
+/*
+** writeback the pending async commits to disk
+*/
+static void flush_async_commits(struct work_struct *work)
+{
+	struct reiserfs_journal *journal =
+		container_of(work, struct reiserfs_journal, j_work.work);
+	struct super_block *sb = journal->j_work_sb;
+	struct reiserfs_journal_list *jl;
+	struct list_head *entry;
+
+	reiserfs_write_lock(sb);
+	if (!list_empty(&journal->j_journal_list)) {
+		/* last entry is the youngest, commit it and you get everything */
+		entry = journal->j_journal_list.prev;
+		jl = JOURNAL_LIST_ENTRY(entry);
+		flush_commit_list(sb, jl, 1);
+	}
+	reiserfs_write_unlock(sb);
+}
+
+/*
+** flushes any old transactions to disk
+** ends the current transaction if it is too old
+*/
+int reiserfs_flush_old_commits(struct super_block *sb)
+{
+	time_t now;
+	struct reiserfs_transaction_handle th;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+
+	now = get_seconds();
+	/* safety check so we don't flush while we are replaying the log during
+	 * mount
+	 */
+	if (list_empty(&journal->j_journal_list)) {
+		return 0;
+	}
+
+	/* check the current transaction.  If there are no writers, and it is
+	 * too old, finish it, and force the commit blocks to disk
+	 */
+	if (atomic_read(&journal->j_wcount) <= 0 &&
+	    journal->j_trans_start_time > 0 &&
+	    journal->j_len > 0 &&
+	    (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
+		if (!journal_join(&th, sb, 1)) {
+			reiserfs_prepare_for_journal(sb,
+						     SB_BUFFER_WITH_SB(sb),
+						     1);
+			journal_mark_dirty(&th, sb,
+					   SB_BUFFER_WITH_SB(sb));
+
+			/* we're only being called from kreiserfsd, it makes no sense to do
+			 ** an async commit so that kreiserfsd can do it later
+			 */
+			do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
+		}
+	}
+	return sb->s_dirt;
+}
+
+/*
+** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
+**
+** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
+** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
+** flushes the commit list and returns 0.
+**
+** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
+**
+** Note, we can't allow the journal_end to proceed while there are still writers in the log.
+*/
+static int check_journal_end(struct reiserfs_transaction_handle *th,
+			     struct super_block *sb, unsigned long nblocks,
+			     int flags)
+{
+
+	time_t now;
+	int flush = flags & FLUSH_ALL;
+	int commit_now = flags & COMMIT_NOW;
+	int wait_on_commit = flags & WAIT;
+	struct reiserfs_journal_list *jl;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+
+	BUG_ON(!th->t_trans_id);
+
+	if (th->t_trans_id != journal->j_trans_id) {
+		reiserfs_panic(th->t_super, "journal-1577",
+			       "handle trans id %ld != current trans id %ld",
+			       th->t_trans_id, journal->j_trans_id);
+	}
+
+	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
+	if (atomic_read(&(journal->j_wcount)) > 0) {	/* <= 0 is allowed.  unmounting might not call begin */
+		atomic_dec(&(journal->j_wcount));
+	}
+
+	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
+	 ** will be dealt with by next transaction that actually writes something, but should be taken
+	 ** care of in this trans
+	 */
+	BUG_ON(journal->j_len == 0);
+
+	/* if wcount > 0, and we are called to with flush or commit_now,
+	 ** we wait on j_join_wait.  We will wake up when the last writer has
+	 ** finished the transaction, and started it on its way to the disk.
+	 ** Then, we flush the commit or journal list, and just return 0
+	 ** because the rest of journal end was already done for this transaction.
+	 */
+	if (atomic_read(&(journal->j_wcount)) > 0) {
+		if (flush || commit_now) {
+			unsigned trans_id;
+
+			jl = journal->j_current_jl;
+			trans_id = jl->j_trans_id;
+			if (wait_on_commit)
+				jl->j_state |= LIST_COMMIT_PENDING;
+			atomic_set(&(journal->j_jlock), 1);
+			if (flush) {
+				journal->j_next_full_flush = 1;
+			}
+			unlock_journal(sb);
+
+			/* sleep while the current transaction is still j_jlocked */
+			while (journal->j_trans_id == trans_id) {
+				if (atomic_read(&journal->j_jlock)) {
+					queue_log_writer(sb);
+				} else {
+					lock_journal(sb);
+					if (journal->j_trans_id == trans_id) {
+						atomic_set(&(journal->j_jlock),
+							   1);
+					}
+					unlock_journal(sb);
+				}
+			}
+			BUG_ON(journal->j_trans_id == trans_id);
+			
+			if (commit_now
+			    && journal_list_still_alive(sb, trans_id)
+			    && wait_on_commit) {
+				flush_commit_list(sb, jl, 1);
+			}
+			return 0;
+		}
+		unlock_journal(sb);
+		return 0;
+	}
+
+	/* deal with old transactions where we are the last writers */
+	now = get_seconds();
+	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
+		commit_now = 1;
+		journal->j_next_async_flush = 1;
+	}
+	/* don't batch when someone is waiting on j_join_wait */
+	/* don't batch when syncing the commit or flushing the whole trans */
+	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
+	    && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
+	    && journal->j_len_alloc < journal->j_max_batch
+	    && journal->j_cnode_free > (journal->j_trans_max * 3)) {
+		journal->j_bcount++;
+		unlock_journal(sb);
+		return 0;
+	}
+
+	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
+		reiserfs_panic(sb, "journal-003",
+			       "j_start (%ld) is too high",
+			       journal->j_start);
+	}
+	return 1;
+}
+
+/*
+** Does all the work that makes deleting blocks safe.
+** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
+**
+** otherwise:
+** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
+** before this transaction has finished.
+**
+** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
+** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
+** the block can't be reallocated yet.
+**
+** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
+*/
+int journal_mark_freed(struct reiserfs_transaction_handle *th,
+		       struct super_block *sb, b_blocknr_t blocknr)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_journal_cnode *cn = NULL;
+	struct buffer_head *bh = NULL;
+	struct reiserfs_list_bitmap *jb = NULL;
+	int cleaned = 0;
+	BUG_ON(!th->t_trans_id);
+
+	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
+	if (cn && cn->bh) {
+		bh = cn->bh;
+		get_bh(bh);
+	}
+	/* if it is journal new, we just remove it from this transaction */
+	if (bh && buffer_journal_new(bh)) {
+		clear_buffer_journal_new(bh);
+		clear_prepared_bits(bh);
+		reiserfs_clean_and_file_buffer(bh);
+		cleaned = remove_from_transaction(sb, blocknr, cleaned);
+	} else {
+		/* set the bit for this block in the journal bitmap for this transaction */
+		jb = journal->j_current_jl->j_list_bitmap;
+		if (!jb) {
+			reiserfs_panic(sb, "journal-1702",
+				       "journal_list_bitmap is NULL");
+		}
+		set_bit_in_list_bitmap(sb, blocknr, jb);
+
+		/* Note, the entire while loop is not allowed to schedule.  */
+
+		if (bh) {
+			clear_prepared_bits(bh);
+			reiserfs_clean_and_file_buffer(bh);
+		}
+		cleaned = remove_from_transaction(sb, blocknr, cleaned);
+
+		/* find all older transactions with this block, make sure they don't try to write it out */
+		cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
+					  blocknr);
+		while (cn) {
+			if (sb == cn->sb && blocknr == cn->blocknr) {
+				set_bit(BLOCK_FREED, &cn->state);
+				if (cn->bh) {
+					if (!cleaned) {
+						/* remove_from_transaction will brelse the buffer if it was 
+						 ** in the current trans
+						 */
+						clear_buffer_journal_dirty(cn->
+									   bh);
+						clear_buffer_dirty(cn->bh);
+						clear_buffer_journal_test(cn->
+									  bh);
+						cleaned = 1;
+						put_bh(cn->bh);
+						if (atomic_read
+						    (&(cn->bh->b_count)) < 0) {
+							reiserfs_warning(sb,
+								 "journal-2138",
+								 "cn->bh->b_count < 0");
+						}
+					}
+					if (cn->jlist) {	/* since we are clearing the bh, we MUST dec nonzerolen */
+						atomic_dec(&
+							   (cn->jlist->
+							    j_nonzerolen));
+					}
+					cn->bh = NULL;
+				}
+			}
+			cn = cn->hnext;
+		}
+	}
+
+	if (bh)
+		release_buffer_page(bh); /* get_hash grabs the buffer */
+	return 0;
+}
+
+void reiserfs_update_inode_transaction(struct inode *inode)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
+	REISERFS_I(inode)->i_jl = journal->j_current_jl;
+	REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
+}
+
+/*
+ * returns -1 on error, 0 if no commits/barriers were done and 1
+ * if a transaction was actually committed and the barrier was done
+ */
+static int __commit_trans_jl(struct inode *inode, unsigned long id,
+			     struct reiserfs_journal_list *jl)
+{
+	struct reiserfs_transaction_handle th;
+	struct super_block *sb = inode->i_sb;
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	int ret = 0;
+
+	/* is it from the current transaction, or from an unknown transaction? */
+	if (id == journal->j_trans_id) {
+		jl = journal->j_current_jl;
+		/* try to let other writers come in and grow this transaction */
+		let_transaction_grow(sb, id);
+		if (journal->j_trans_id != id) {
+			goto flush_commit_only;
+		}
+
+		ret = journal_begin(&th, sb, 1);
+		if (ret)
+			return ret;
+
+		/* someone might have ended this transaction while we joined */
+		if (journal->j_trans_id != id) {
+			reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
+						     1);
+			journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
+			ret = journal_end(&th, sb, 1);
+			goto flush_commit_only;
+		}
+
+		ret = journal_end_sync(&th, sb, 1);
+		if (!ret)
+			ret = 1;
+
+	} else {
+		/* this gets tricky, we have to make sure the journal list in
+		 * the inode still exists.  We know the list is still around
+		 * if we've got a larger transaction id than the oldest list
+		 */
+	      flush_commit_only:
+		if (journal_list_still_alive(inode->i_sb, id)) {
+			/*
+			 * we only set ret to 1 when we know for sure
+			 * the barrier hasn't been started yet on the commit
+			 * block.
+			 */
+			if (atomic_read(&jl->j_commit_left) > 1)
+				ret = 1;
+			flush_commit_list(sb, jl, 1);
+			if (journal->j_errno)
+				ret = journal->j_errno;
+		}
+	}
+	/* otherwise the list is gone, and long since committed */
+	return ret;
+}
+
+int reiserfs_commit_for_inode(struct inode *inode)
+{
+	unsigned int id = REISERFS_I(inode)->i_trans_id;
+	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
+
+	/* for the whole inode, assume unset id means it was
+	 * changed in the current transaction.  More conservative
+	 */
+	if (!id || !jl) {
+		reiserfs_update_inode_transaction(inode);
+		id = REISERFS_I(inode)->i_trans_id;
+		/* jl will be updated in __commit_trans_jl */
+	}
+
+	return __commit_trans_jl(inode, id, jl);
+}
+
+void reiserfs_restore_prepared_buffer(struct super_block *sb,
+				      struct buffer_head *bh)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	PROC_INFO_INC(sb, journal.restore_prepared);
+	if (!bh) {
+		return;
+	}
+	if (test_clear_buffer_journal_restore_dirty(bh) &&
+	    buffer_journal_dirty(bh)) {
+		struct reiserfs_journal_cnode *cn;
+		cn = get_journal_hash_dev(sb,
+					  journal->j_list_hash_table,
+					  bh->b_blocknr);
+		if (cn && can_dirty(cn)) {
+			set_buffer_journal_test(bh);
+			mark_buffer_dirty(bh);
+		}
+	}
+	clear_buffer_journal_prepared(bh);
+}
+
+extern struct tree_balance *cur_tb;
+/*
+** before we can change a metadata block, we have to make sure it won't
+** be written to disk while we are altering it.  So, we must:
+** clean it
+** wait on it.
+**
+*/
+int reiserfs_prepare_for_journal(struct super_block *sb,
+				 struct buffer_head *bh, int wait)
+{
+	PROC_INFO_INC(sb, journal.prepare);
+
+	if (!trylock_buffer(bh)) {
+		if (!wait)
+			return 0;
+		lock_buffer(bh);
+	}
+	set_buffer_journal_prepared(bh);
+	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
+		clear_buffer_journal_test(bh);
+		set_buffer_journal_restore_dirty(bh);
+	}
+	unlock_buffer(bh);
+	return 1;
+}
+
+static void flush_old_journal_lists(struct super_block *s)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	struct reiserfs_journal_list *jl;
+	struct list_head *entry;
+	time_t now = get_seconds();
+
+	while (!list_empty(&journal->j_journal_list)) {
+		entry = journal->j_journal_list.next;
+		jl = JOURNAL_LIST_ENTRY(entry);
+		/* this check should always be run, to send old lists to disk */
+		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
+		    atomic_read(&jl->j_commit_left) == 0 &&
+		    test_transaction(s, jl)) {
+			flush_used_journal_lists(s, jl);
+		} else {
+			break;
+		}
+	}
+}
+
+/*
+** long and ugly.  If flush, will not return until all commit
+** blocks and all real buffers in the trans are on disk.
+** If no_async, won't return until all commit blocks are on disk.
+**
+** keep reading, there are comments as you go along
+**
+** If the journal is aborted, we just clean up. Things like flushing
+** journal lists, etc just won't happen.
+*/
+static int do_journal_end(struct reiserfs_transaction_handle *th,
+			  struct super_block *sb, unsigned long nblocks,
+			  int flags)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	struct reiserfs_journal_cnode *cn, *next, *jl_cn;
+	struct reiserfs_journal_cnode *last_cn = NULL;
+	struct reiserfs_journal_desc *desc;
+	struct reiserfs_journal_commit *commit;
+	struct buffer_head *c_bh;	/* commit bh */
+	struct buffer_head *d_bh;	/* desc bh */
+	int cur_write_start = 0;	/* start index of current log write */
+	int old_start;
+	int i;
+	int flush;
+	int wait_on_commit;
+	struct reiserfs_journal_list *jl, *temp_jl;
+	struct list_head *entry, *safe;
+	unsigned long jindex;
+	unsigned int commit_trans_id;
+	int trans_half;
+
+	BUG_ON(th->t_refcount > 1);
+	BUG_ON(!th->t_trans_id);
+
+	/* protect flush_older_commits from doing mistakes if the
+           transaction ID counter gets overflowed.  */
+	if (th->t_trans_id == ~0U)
+		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+	flush = flags & FLUSH_ALL;
+	wait_on_commit = flags & WAIT;
+
+	current->journal_info = th->t_handle_save;
+	reiserfs_check_lock_depth(sb, "journal end");
+	if (journal->j_len == 0) {
+		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
+					     1);
+		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
+	}
+
+	lock_journal(sb);
+	if (journal->j_next_full_flush) {
+		flags |= FLUSH_ALL;
+		flush = 1;
+	}
+	if (journal->j_next_async_flush) {
+		flags |= COMMIT_NOW | WAIT;
+		wait_on_commit = 1;
+	}
+
+	/* check_journal_end locks the journal, and unlocks if it does not return 1
+	 ** it tells us if we should continue with the journal_end, or just return
+	 */
+	if (!check_journal_end(th, sb, nblocks, flags)) {
+		sb->s_dirt = 1;
+		wake_queued_writers(sb);
+		reiserfs_async_progress_wait(sb);
+		goto out;
+	}
+
+	/* check_journal_end might set these, check again */
+	if (journal->j_next_full_flush) {
+		flush = 1;
+	}
+
+	/*
+	 ** j must wait means we have to flush the log blocks, and the real blocks for
+	 ** this transaction
+	 */
+	if (journal->j_must_wait > 0) {
+		flush = 1;
+	}
+#ifdef REISERFS_PREALLOCATE
+	/* quota ops might need to nest, setup the journal_info pointer for them
+	 * and raise the refcount so that it is > 0. */
+	current->journal_info = th;
+	th->t_refcount++;
+	reiserfs_discard_all_prealloc(th);	/* it should not involve new blocks into
+						 * the transaction */
+	th->t_refcount--;
+	current->journal_info = th->t_handle_save;
+#endif
+
+	/* setup description block */
+	d_bh =
+	    journal_getblk(sb,
+			   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+			   journal->j_start);
+	set_buffer_uptodate(d_bh);
+	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
+	memset(d_bh->b_data, 0, d_bh->b_size);
+	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
+	set_desc_trans_id(desc, journal->j_trans_id);
+
+	/* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
+	c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+			      ((journal->j_start + journal->j_len +
+				1) % SB_ONDISK_JOURNAL_SIZE(sb)));
+	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
+	memset(c_bh->b_data, 0, c_bh->b_size);
+	set_commit_trans_id(commit, journal->j_trans_id);
+	set_buffer_uptodate(c_bh);
+
+	/* init this journal list */
+	jl = journal->j_current_jl;
+
+	/* we lock the commit before doing anything because
+	 * we want to make sure nobody tries to run flush_commit_list until
+	 * the new transaction is fully setup, and we've already flushed the
+	 * ordered bh list
+	 */
+	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
+
+	/* save the transaction id in case we need to commit it later */
+	commit_trans_id = jl->j_trans_id;
+
+	atomic_set(&jl->j_older_commits_done, 0);
+	jl->j_trans_id = journal->j_trans_id;
+	jl->j_timestamp = journal->j_trans_start_time;
+	jl->j_commit_bh = c_bh;
+	jl->j_start = journal->j_start;
+	jl->j_len = journal->j_len;
+	atomic_set(&jl->j_nonzerolen, journal->j_len);
+	atomic_set(&jl->j_commit_left, journal->j_len + 2);
+	jl->j_realblock = NULL;
+
+	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
+	 **  for each real block, add it to the journal list hash,
+	 ** copy into real block index array in the commit or desc block
+	 */
+	trans_half = journal_trans_half(sb->s_blocksize);
+	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
+		if (buffer_journaled(cn->bh)) {
+			jl_cn = get_cnode(sb);
+			if (!jl_cn) {
+				reiserfs_panic(sb, "journal-1676",
+					       "get_cnode returned NULL");
+			}
+			if (i == 0) {
+				jl->j_realblock = jl_cn;
+			}
+			jl_cn->prev = last_cn;
+			jl_cn->next = NULL;
+			if (last_cn) {
+				last_cn->next = jl_cn;
+			}
+			last_cn = jl_cn;
+			/* make sure the block we are trying to log is not a block
+			   of journal or reserved area */
+
+			if (is_block_in_log_or_reserved_area
+			    (sb, cn->bh->b_blocknr)) {
+				reiserfs_panic(sb, "journal-2332",
+					       "Trying to log block %lu, "
+					       "which is a log block",
+					       cn->bh->b_blocknr);
+			}
+			jl_cn->blocknr = cn->bh->b_blocknr;
+			jl_cn->state = 0;
+			jl_cn->sb = sb;
+			jl_cn->bh = cn->bh;
+			jl_cn->jlist = jl;
+			insert_journal_hash(journal->j_list_hash_table, jl_cn);
+			if (i < trans_half) {
+				desc->j_realblock[i] =
+				    cpu_to_le32(cn->bh->b_blocknr);
+			} else {
+				commit->j_realblock[i - trans_half] =
+				    cpu_to_le32(cn->bh->b_blocknr);
+			}
+		} else {
+			i--;
+		}
+	}
+	set_desc_trans_len(desc, journal->j_len);
+	set_desc_mount_id(desc, journal->j_mount_id);
+	set_desc_trans_id(desc, journal->j_trans_id);
+	set_commit_trans_len(commit, journal->j_len);
+
+	/* special check in case all buffers in the journal were marked for not logging */
+	BUG_ON(journal->j_len == 0);
+
+	/* we're about to dirty all the log blocks, mark the description block
+	 * dirty now too.  Don't mark the commit block dirty until all the
+	 * others are on disk
+	 */
+	mark_buffer_dirty(d_bh);
+
+	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
+	cur_write_start = journal->j_start;
+	cn = journal->j_first;
+	jindex = 1;		/* start at one so we don't get the desc again */
+	while (cn) {
+		clear_buffer_journal_new(cn->bh);
+		/* copy all the real blocks into log area.  dirty log blocks */
+		if (buffer_journaled(cn->bh)) {
+			struct buffer_head *tmp_bh;
+			char *addr;
+			struct page *page;
+			tmp_bh =
+			    journal_getblk(sb,
+					   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
+					   ((cur_write_start +
+					     jindex) %
+					    SB_ONDISK_JOURNAL_SIZE(sb)));
+			set_buffer_uptodate(tmp_bh);
+			page = cn->bh->b_page;
+			addr = kmap(page);
+			memcpy(tmp_bh->b_data,
+			       addr + offset_in_page(cn->bh->b_data),
+			       cn->bh->b_size);
+			kunmap(page);
+			mark_buffer_dirty(tmp_bh);
+			jindex++;
+			set_buffer_journal_dirty(cn->bh);
+			clear_buffer_journaled(cn->bh);
+		} else {
+			/* JDirty cleared sometime during transaction.  don't log this one */
+			reiserfs_warning(sb, "journal-2048",
+					 "BAD, buffer in journal hash, "
+					 "but not JDirty!");
+			brelse(cn->bh);
+		}
+		next = cn->next;
+		free_cnode(sb, cn);
+		cn = next;
+		reiserfs_write_unlock(sb);
+		cond_resched();
+		reiserfs_write_lock(sb);
+	}
+
+	/* we are done  with both the c_bh and d_bh, but
+	 ** c_bh must be written after all other commit blocks,
+	 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
+	 */
+
+	journal->j_current_jl = alloc_journal_list(sb);
+
+	/* now it is safe to insert this transaction on the main list */
+	list_add_tail(&jl->j_list, &journal->j_journal_list);
+	list_add_tail(&jl->j_working_list, &journal->j_working_list);
+	journal->j_num_work_lists++;
+
+	/* reset journal values for the next transaction */
+	old_start = journal->j_start;
+	journal->j_start =
+	    (journal->j_start + journal->j_len +
+	     2) % SB_ONDISK_JOURNAL_SIZE(sb);
+	atomic_set(&(journal->j_wcount), 0);
+	journal->j_bcount = 0;
+	journal->j_last = NULL;
+	journal->j_first = NULL;
+	journal->j_len = 0;
+	journal->j_trans_start_time = 0;
+	/* check for trans_id overflow */
+	if (++journal->j_trans_id == 0)
+		journal->j_trans_id = 10;
+	journal->j_current_jl->j_trans_id = journal->j_trans_id;
+	journal->j_must_wait = 0;
+	journal->j_len_alloc = 0;
+	journal->j_next_full_flush = 0;
+	journal->j_next_async_flush = 0;
+	init_journal_hash(sb);
+
+	// make sure reiserfs_add_jh sees the new current_jl before we
+	// write out the tails
+	smp_mb();
+
+	/* tail conversion targets have to hit the disk before we end the
+	 * transaction.  Otherwise a later transaction might repack the tail
+	 * before this transaction commits, leaving the data block unflushed and
+	 * clean, if we crash before the later transaction commits, the data block
+	 * is lost.
+	 */
+	if (!list_empty(&jl->j_tail_bh_list)) {
+		reiserfs_write_unlock(sb);
+		write_ordered_buffers(&journal->j_dirty_buffers_lock,
+				      journal, jl, &jl->j_tail_bh_list);
+		reiserfs_write_lock(sb);
+	}
+	BUG_ON(!list_empty(&jl->j_tail_bh_list));
+	mutex_unlock(&jl->j_commit_mutex);
+
+	/* honor the flush wishes from the caller, simple commits can
+	 ** be done outside the journal lock, they are done below
+	 **
+	 ** if we don't flush the commit list right now, we put it into
+	 ** the work queue so the people waiting on the async progress work
+	 ** queue don't wait for this proc to flush journal lists and such.
+	 */
+	if (flush) {
+		flush_commit_list(sb, jl, 1);
+		flush_journal_list(sb, jl, 1);
+	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
+		queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
+
+	/* if the next transaction has any chance of wrapping, flush
+	 ** transactions that might get overwritten.  If any journal lists are very
+	 ** old flush them as well.
+	 */
+      first_jl:
+	list_for_each_safe(entry, safe, &journal->j_journal_list) {
+		temp_jl = JOURNAL_LIST_ENTRY(entry);
+		if (journal->j_start <= temp_jl->j_start) {
+			if ((journal->j_start + journal->j_trans_max + 1) >=
+			    temp_jl->j_start) {
+				flush_used_journal_lists(sb, temp_jl);
+				goto first_jl;
+			} else if ((journal->j_start +
+				    journal->j_trans_max + 1) <
+				   SB_ONDISK_JOURNAL_SIZE(sb)) {
+				/* if we don't cross into the next transaction and we don't
+				 * wrap, there is no way we can overlap any later transactions
+				 * break now
+				 */
+				break;
+			}
+		} else if ((journal->j_start +
+			    journal->j_trans_max + 1) >
+			   SB_ONDISK_JOURNAL_SIZE(sb)) {
+			if (((journal->j_start + journal->j_trans_max + 1) %
+			     SB_ONDISK_JOURNAL_SIZE(sb)) >=
+			    temp_jl->j_start) {
+				flush_used_journal_lists(sb, temp_jl);
+				goto first_jl;
+			} else {
+				/* we don't overlap anything from out start to the end of the
+				 * log, and our wrapped portion doesn't overlap anything at
+				 * the start of the log.  We can break
+				 */
+				break;
+			}
+		}
+	}
+	flush_old_journal_lists(sb);
+
+	journal->j_current_jl->j_list_bitmap =
+	    get_list_bitmap(sb, journal->j_current_jl);
+
+	if (!(journal->j_current_jl->j_list_bitmap)) {
+		reiserfs_panic(sb, "journal-1996",
+			       "could not get a list bitmap");
+	}
+
+	atomic_set(&(journal->j_jlock), 0);
+	unlock_journal(sb);
+	/* wake up any body waiting to join. */
+	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
+	wake_up(&(journal->j_join_wait));
+
+	if (!flush && wait_on_commit &&
+	    journal_list_still_alive(sb, commit_trans_id)) {
+		flush_commit_list(sb, jl, 1);
+	}
+      out:
+	reiserfs_check_lock_depth(sb, "journal end2");
+
+	memset(th, 0, sizeof(*th));
+	/* Re-set th->t_super, so we can properly keep track of how many
+	 * persistent transactions there are. We need to do this so if this
+	 * call is part of a failed restart_transaction, we can free it later */
+	th->t_super = sb;
+
+	return journal->j_errno;
+}
+
+/* Send the file system read only and refuse new transactions */
+void reiserfs_abort_journal(struct super_block *sb, int errno)
+{
+	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	if (test_bit(J_ABORTED, &journal->j_state))
+		return;
+
+	if (!journal->j_errno)
+		journal->j_errno = errno;
+
+	sb->s_flags |= MS_RDONLY;
+	set_bit(J_ABORTED, &journal->j_state);
+
+#ifdef CONFIG_REISERFS_CHECK
+	dump_stack();
+#endif
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/lbalance.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/lbalance.c
new file mode 100644
index 0000000..79e5a8b
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/lbalance.c

@@ -0,0 +1,1311 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include "reiserfs.h"
+#include <linux/buffer_head.h>
+
+/* these are used in do_balance.c */
+
+/* leaf_move_items
+   leaf_shift_left
+   leaf_shift_right
+   leaf_delete_items
+   leaf_insert_into_buf
+   leaf_paste_in_buffer
+   leaf_cut_from_buffer
+   leaf_paste_entries
+   */
+
+/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */
+static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
+				  struct buffer_head *source, int last_first,
+				  int item_num, int from, int copy_count)
+{
+	struct buffer_head *dest = dest_bi->bi_bh;
+	int item_num_in_dest;	/* either the number of target item,
+				   or if we must create a new item,
+				   the number of the item we will
+				   create it next to */
+	struct item_head *ih;
+	struct reiserfs_de_head *deh;
+	int copy_records_len;	/* length of all records in item to be copied */
+	char *records;
+
+	ih = B_N_PITEM_HEAD(source, item_num);
+
+	RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item");
+
+	/* length of all record to be copied and first byte of the last of them */
+	deh = B_I_DEH(source, ih);
+	if (copy_count) {
+		copy_records_len = (from ? deh_location(&(deh[from - 1])) :
+				    ih_item_len(ih)) -
+		    deh_location(&(deh[from + copy_count - 1]));
+		records =
+		    source->b_data + ih_location(ih) +
+		    deh_location(&(deh[from + copy_count - 1]));
+	} else {
+		copy_records_len = 0;
+		records = NULL;
+	}
+
+	/* when copy last to first, dest buffer can contain 0 items */
+	item_num_in_dest =
+	    (last_first ==
+	     LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest)
+							       - 1);
+
+	/* if there are no items in dest or the first/last item in dest is not item of the same directory */
+	if ((item_num_in_dest == -1) ||
+	    (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) ||
+	    (last_first == LAST_TO_FIRST
+	     && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key,
+							 B_N_PKEY(dest,
+								  item_num_in_dest))))
+	{
+		/* create new item in dest */
+		struct item_head new_ih;
+
+		/* form item header */
+		memcpy(&new_ih.ih_key, &ih->ih_key, KEY_SIZE);
+		put_ih_version(&new_ih, KEY_FORMAT_3_5);
+		/* calculate item len */
+		put_ih_item_len(&new_ih,
+				DEH_SIZE * copy_count + copy_records_len);
+		put_ih_entry_count(&new_ih, 0);
+
+		if (last_first == LAST_TO_FIRST) {
+			/* form key by the following way */
+			if (from < I_ENTRY_COUNT(ih)) {
+				set_le_ih_k_offset(&new_ih,
+						   deh_offset(&(deh[from])));
+				/*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */
+			} else {
+				/* no entries will be copied to this item in this function */
+				set_le_ih_k_offset(&new_ih, U32_MAX);
+				/* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */
+			}
+			set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key),
+					  TYPE_DIRENTRY);
+		}
+
+		/* insert item into dest buffer */
+		leaf_insert_into_buf(dest_bi,
+				     (last_first ==
+				      LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest),
+				     &new_ih, NULL, 0);
+	} else {
+		/* prepare space for entries */
+		leaf_paste_in_buffer(dest_bi,
+				     (last_first ==
+				      FIRST_TO_LAST) ? (B_NR_ITEMS(dest) -
+							1) : 0, MAX_US_INT,
+				     DEH_SIZE * copy_count + copy_records_len,
+				     records, 0);
+	}
+
+	item_num_in_dest =
+	    (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0;
+
+	leaf_paste_entries(dest_bi, item_num_in_dest,
+			   (last_first ==
+			    FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest,
+									  item_num_in_dest))
+			   : 0, copy_count, deh + from, records,
+			   DEH_SIZE * copy_count + copy_records_len);
+}
+
+/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or
+   part of it or nothing (see the return 0 below) from SOURCE to the end
+   (if last_first) or beginning (!last_first) of the DEST */
+/* returns 1 if anything was copied, else 0 */
+static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
+				   struct buffer_head *src, int last_first,
+				   int bytes_or_entries)
+{
+	struct buffer_head *dest = dest_bi->bi_bh;
+	int dest_nr_item, src_nr_item;	/* number of items in the source and destination buffers */
+	struct item_head *ih;
+	struct item_head *dih;
+
+	dest_nr_item = B_NR_ITEMS(dest);
+
+	if (last_first == FIRST_TO_LAST) {
+		/* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects
+		   or of different types ) then there is no need to treat this item differently from the other items
+		   that we copy, so we return */
+		ih = B_N_PITEM_HEAD(src, 0);
+		dih = B_N_PITEM_HEAD(dest, dest_nr_item - 1);
+		if (!dest_nr_item
+		    || (!op_is_left_mergeable(&(ih->ih_key), src->b_size)))
+			/* there is nothing to merge */
+			return 0;
+
+		RFALSE(!ih_item_len(ih),
+		       "vs-10010: item can not have empty length");
+
+		if (is_direntry_le_ih(ih)) {
+			if (bytes_or_entries == -1)
+				/* copy all entries to dest */
+				bytes_or_entries = ih_entry_count(ih);
+			leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 0, 0,
+					      bytes_or_entries);
+			return 1;
+		}
+
+		/* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST
+		   part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header
+		 */
+		if (bytes_or_entries == -1)
+			bytes_or_entries = ih_item_len(ih);
+
+#ifdef CONFIG_REISERFS_CHECK
+		else {
+			if (bytes_or_entries == ih_item_len(ih)
+			    && is_indirect_le_ih(ih))
+				if (get_ih_free_space(ih))
+					reiserfs_panic(sb_from_bi(dest_bi),
+						       "vs-10020",
+						       "last unformatted node "
+						       "must be filled "
+						       "entirely (%h)", ih);
+		}
+#endif
+
+		/* merge first item (or its part) of src buffer with the last
+		   item of dest buffer. Both are of the same file */
+		leaf_paste_in_buffer(dest_bi,
+				     dest_nr_item - 1, ih_item_len(dih),
+				     bytes_or_entries, B_I_PITEM(src, ih), 0);
+
+		if (is_indirect_le_ih(dih)) {
+			RFALSE(get_ih_free_space(dih),
+			       "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space",
+			       ih);
+			if (bytes_or_entries == ih_item_len(ih))
+				set_ih_free_space(dih, get_ih_free_space(ih));
+		}
+
+		return 1;
+	}
+
+	/* copy boundary item to right (last_first == LAST_TO_FIRST) */
+
+	/* ( DEST is empty or last item of SOURCE and first item of DEST
+	   are the items of different object or of different types )
+	 */
+	src_nr_item = B_NR_ITEMS(src);
+	ih = B_N_PITEM_HEAD(src, src_nr_item - 1);
+	dih = B_N_PITEM_HEAD(dest, 0);
+
+	if (!dest_nr_item || !op_is_left_mergeable(&(dih->ih_key), src->b_size))
+		return 0;
+
+	if (is_direntry_le_ih(ih)) {
+		if (bytes_or_entries == -1)
+			/* bytes_or_entries = entries number in last item body of SOURCE */
+			bytes_or_entries = ih_entry_count(ih);
+
+		leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
+				      src_nr_item - 1,
+				      ih_entry_count(ih) - bytes_or_entries,
+				      bytes_or_entries);
+		return 1;
+	}
+
+	/* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST;
+	   part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST;
+	   don't create new item header
+	 */
+
+	RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih),
+	       "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)",
+	       ih);
+
+	if (bytes_or_entries == -1) {
+		/* bytes_or_entries = length of last item body of SOURCE */
+		bytes_or_entries = ih_item_len(ih);
+
+		RFALSE(le_ih_k_offset(dih) !=
+		       le_ih_k_offset(ih) + op_bytes_number(ih, src->b_size),
+		       "vs-10050: items %h and %h do not match", ih, dih);
+
+		/* change first item key of the DEST */
+		set_le_ih_k_offset(dih, le_ih_k_offset(ih));
+
+		/* item becomes non-mergeable */
+		/* or mergeable if left item was */
+		set_le_ih_k_type(dih, le_ih_k_type(ih));
+	} else {
+		/* merge to right only part of item */
+		RFALSE(ih_item_len(ih) <= bytes_or_entries,
+		       "vs-10060: no so much bytes %lu (needed %lu)",
+		       (unsigned long)ih_item_len(ih),
+		       (unsigned long)bytes_or_entries);
+
+		/* change first item key of the DEST */
+		if (is_direct_le_ih(dih)) {
+			RFALSE(le_ih_k_offset(dih) <=
+			       (unsigned long)bytes_or_entries,
+			       "vs-10070: dih %h, bytes_or_entries(%d)", dih,
+			       bytes_or_entries);
+			set_le_ih_k_offset(dih,
+					   le_ih_k_offset(dih) -
+					   bytes_or_entries);
+		} else {
+			RFALSE(le_ih_k_offset(dih) <=
+			       (bytes_or_entries / UNFM_P_SIZE) * dest->b_size,
+			       "vs-10080: dih %h, bytes_or_entries(%d)",
+			       dih,
+			       (bytes_or_entries / UNFM_P_SIZE) * dest->b_size);
+			set_le_ih_k_offset(dih,
+					   le_ih_k_offset(dih) -
+					   ((bytes_or_entries / UNFM_P_SIZE) *
+					    dest->b_size));
+		}
+	}
+
+	leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries,
+			     B_I_PITEM(src,
+				       ih) + ih_item_len(ih) - bytes_or_entries,
+			     0);
+	return 1;
+}
+
+/* copy cpy_mun items from buffer src to buffer dest
+ * last_first == FIRST_TO_LAST means, that we copy cpy_num  items beginning from first-th item in src to tail of dest
+ * last_first == LAST_TO_FIRST means, that we copy cpy_num  items beginning from first-th item in src to head of dest
+ */
+static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
+				     struct buffer_head *src, int last_first,
+				     int first, int cpy_num)
+{
+	struct buffer_head *dest;
+	int nr, free_space;
+	int dest_before;
+	int last_loc, last_inserted_loc, location;
+	int i, j;
+	struct block_head *blkh;
+	struct item_head *ih;
+
+	RFALSE(last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST,
+	       "vs-10090: bad last_first parameter %d", last_first);
+	RFALSE(B_NR_ITEMS(src) - first < cpy_num,
+	       "vs-10100: too few items in source %d, required %d from %d",
+	       B_NR_ITEMS(src), cpy_num, first);
+	RFALSE(cpy_num < 0, "vs-10110: can not copy negative amount of items");
+	RFALSE(!dest_bi, "vs-10120: can not copy negative amount of items");
+
+	dest = dest_bi->bi_bh;
+
+	RFALSE(!dest, "vs-10130: can not copy negative amount of items");
+
+	if (cpy_num == 0)
+		return;
+
+	blkh = B_BLK_HEAD(dest);
+	nr = blkh_nr_item(blkh);
+	free_space = blkh_free_space(blkh);
+
+	/* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */
+	dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr;
+
+	/* location of head of first new item */
+	ih = B_N_PITEM_HEAD(dest, dest_before);
+
+	RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE,
+	       "vs-10140: not enough free space for headers %d (needed %d)",
+	       B_FREE_SPACE(dest), cpy_num * IH_SIZE);
+
+	/* prepare space for headers */
+	memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE);
+
+	/* copy item headers */
+	memcpy(ih, B_N_PITEM_HEAD(src, first), cpy_num * IH_SIZE);
+
+	free_space -= (IH_SIZE * cpy_num);
+	set_blkh_free_space(blkh, free_space);
+
+	/* location of unmovable item */
+	j = location = (dest_before == 0) ? dest->b_size : ih_location(ih - 1);
+	for (i = dest_before; i < nr + cpy_num; i++) {
+		location -= ih_item_len(ih + i - dest_before);
+		put_ih_location(ih + i - dest_before, location);
+	}
+
+	/* prepare space for items */
+	last_loc = ih_location(&(ih[nr + cpy_num - 1 - dest_before]));
+	last_inserted_loc = ih_location(&(ih[cpy_num - 1]));
+
+	/* check free space */
+	RFALSE(free_space < j - last_inserted_loc,
+	       "vs-10150: not enough free space for items %d (needed %d)",
+	       free_space, j - last_inserted_loc);
+
+	memmove(dest->b_data + last_loc,
+		dest->b_data + last_loc + j - last_inserted_loc,
+		last_inserted_loc - last_loc);
+
+	/* copy items */
+	memcpy(dest->b_data + last_inserted_loc,
+	       B_N_PITEM(src, (first + cpy_num - 1)), j - last_inserted_loc);
+
+	/* sizes, item number */
+	set_blkh_nr_item(blkh, nr + cpy_num);
+	set_blkh_free_space(blkh, free_space - (j - last_inserted_loc));
+
+	do_balance_mark_leaf_dirty(dest_bi->tb, dest, 0);
+
+	if (dest_bi->bi_parent) {
+		struct disk_child *t_dc;
+		t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
+		RFALSE(dc_block_number(t_dc) != dest->b_blocknr,
+		       "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu",
+		       (long unsigned)dest->b_blocknr,
+		       (long unsigned)dc_block_number(t_dc));
+		put_dc_size(t_dc,
+			    dc_size(t_dc) + (j - last_inserted_loc +
+					     IH_SIZE * cpy_num));
+
+		do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
+					       0);
+	}
+}
+
+/* This function splits the (liquid) item into two items (useful when
+   shifting part of an item into another node.) */
+static void leaf_item_bottle(struct buffer_info *dest_bi,
+			     struct buffer_head *src, int last_first,
+			     int item_num, int cpy_bytes)
+{
+	struct buffer_head *dest = dest_bi->bi_bh;
+	struct item_head *ih;
+
+	RFALSE(cpy_bytes == -1,
+	       "vs-10170: bytes == - 1 means: do not split item");
+
+	if (last_first == FIRST_TO_LAST) {
+		/* if ( if item in position item_num in buffer SOURCE is directory item ) */
+		ih = B_N_PITEM_HEAD(src, item_num);
+		if (is_direntry_le_ih(ih))
+			leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
+					      item_num, 0, cpy_bytes);
+		else {
+			struct item_head n_ih;
+
+			/* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST
+			   part defined by 'cpy_bytes'; create new item header; change old item_header (????);
+			   n_ih = new item_header;
+			 */
+			memcpy(&n_ih, ih, IH_SIZE);
+			put_ih_item_len(&n_ih, cpy_bytes);
+			if (is_indirect_le_ih(ih)) {
+				RFALSE(cpy_bytes == ih_item_len(ih)
+				       && get_ih_free_space(ih),
+				       "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)",
+				       (long unsigned)get_ih_free_space(ih));
+				set_ih_free_space(&n_ih, 0);
+			}
+
+			RFALSE(op_is_left_mergeable(&(ih->ih_key), src->b_size),
+			       "vs-10190: bad mergeability of item %h", ih);
+			n_ih.ih_version = ih->ih_version;	/* JDM Endian safe, both le */
+			leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih,
+					     B_N_PITEM(src, item_num), 0);
+		}
+	} else {
+		/*  if ( if item in position item_num in buffer SOURCE is directory item ) */
+		ih = B_N_PITEM_HEAD(src, item_num);
+		if (is_direntry_le_ih(ih))
+			leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
+					      item_num,
+					      I_ENTRY_COUNT(ih) - cpy_bytes,
+					      cpy_bytes);
+		else {
+			struct item_head n_ih;
+
+			/* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST
+			   part defined by 'cpy_bytes'; create new item header;
+			   n_ih = new item_header;
+			 */
+			memcpy(&n_ih, ih, SHORT_KEY_SIZE);
+
+			n_ih.ih_version = ih->ih_version;	/* JDM Endian safe, both le */
+
+			if (is_direct_le_ih(ih)) {
+				set_le_ih_k_offset(&n_ih,
+						   le_ih_k_offset(ih) +
+						   ih_item_len(ih) - cpy_bytes);
+				set_le_ih_k_type(&n_ih, TYPE_DIRECT);
+				set_ih_free_space(&n_ih, MAX_US_INT);
+			} else {
+				/* indirect item */
+				RFALSE(!cpy_bytes && get_ih_free_space(ih),
+				       "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended");
+				set_le_ih_k_offset(&n_ih,
+						   le_ih_k_offset(ih) +
+						   (ih_item_len(ih) -
+						    cpy_bytes) / UNFM_P_SIZE *
+						   dest->b_size);
+				set_le_ih_k_type(&n_ih, TYPE_INDIRECT);
+				set_ih_free_space(&n_ih, get_ih_free_space(ih));
+			}
+
+			/* set item length */
+			put_ih_item_len(&n_ih, cpy_bytes);
+
+			n_ih.ih_version = ih->ih_version;	/* JDM Endian safe, both le */
+
+			leaf_insert_into_buf(dest_bi, 0, &n_ih,
+					     B_N_PITEM(src,
+						       item_num) +
+					     ih_item_len(ih) - cpy_bytes, 0);
+		}
+	}
+}
+
+/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST.
+   If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST.
+   From last item copy cpy_num bytes for regular item and cpy_num directory entries for
+   directory item. */
+static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
+			   int last_first, int cpy_num, int cpy_bytes)
+{
+	struct buffer_head *dest;
+	int pos, i, src_nr_item, bytes;
+
+	dest = dest_bi->bi_bh;
+	RFALSE(!dest || !src, "vs-10210: !dest || !src");
+	RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
+	       "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST");
+	RFALSE(B_NR_ITEMS(src) < cpy_num,
+	       "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src),
+	       cpy_num);
+	RFALSE(cpy_num < 0, "vs-10240: cpy_num < 0 (%d)", cpy_num);
+
+	if (cpy_num == 0)
+		return 0;
+
+	if (last_first == FIRST_TO_LAST) {
+		/* copy items to left */
+		pos = 0;
+		if (cpy_num == 1)
+			bytes = cpy_bytes;
+		else
+			bytes = -1;
+
+		/* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */
+		i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes);
+		cpy_num -= i;
+		if (cpy_num == 0)
+			return i;
+		pos += i;
+		if (cpy_bytes == -1)
+			/* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */
+			leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
+						 pos, cpy_num);
+		else {
+			/* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */
+			leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
+						 pos, cpy_num - 1);
+
+			/* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */
+			leaf_item_bottle(dest_bi, src, FIRST_TO_LAST,
+					 cpy_num + pos - 1, cpy_bytes);
+		}
+	} else {
+		/* copy items to right */
+		src_nr_item = B_NR_ITEMS(src);
+		if (cpy_num == 1)
+			bytes = cpy_bytes;
+		else
+			bytes = -1;
+
+		/* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */
+		i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes);
+
+		cpy_num -= i;
+		if (cpy_num == 0)
+			return i;
+
+		pos = src_nr_item - cpy_num - i;
+		if (cpy_bytes == -1) {
+			/* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */
+			leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
+						 pos, cpy_num);
+		} else {
+			/* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */
+			leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
+						 pos + 1, cpy_num - 1);
+
+			/* copy part of the item which number is pos to the begin of the DEST */
+			leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos,
+					 cpy_bytes);
+		}
+	}
+	return i;
+}
+
+/* there are types of coping: from S[0] to L[0], from S[0] to R[0],
+   from R[0] to L[0]. for each of these we have to define parent and
+   positions of destination and source buffers */
+static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
+				       struct buffer_info *dest_bi,
+				       struct buffer_info *src_bi,
+				       int *first_last,
+				       struct buffer_head *Snew)
+{
+	memset(dest_bi, 0, sizeof(struct buffer_info));
+	memset(src_bi, 0, sizeof(struct buffer_info));
+
+	/* define dest, src, dest parent, dest position */
+	switch (shift_mode) {
+	case LEAF_FROM_S_TO_L:	/* it is used in leaf_shift_left */
+		src_bi->tb = tb;
+		src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
+		src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
+		src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);	/* src->b_item_order */
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = tb->L[0];
+		dest_bi->bi_parent = tb->FL[0];
+		dest_bi->bi_position = get_left_neighbor_position(tb, 0);
+		*first_last = FIRST_TO_LAST;
+		break;
+
+	case LEAF_FROM_S_TO_R:	/* it is used in leaf_shift_right */
+		src_bi->tb = tb;
+		src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
+		src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
+		src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = tb->R[0];
+		dest_bi->bi_parent = tb->FR[0];
+		dest_bi->bi_position = get_right_neighbor_position(tb, 0);
+		*first_last = LAST_TO_FIRST;
+		break;
+
+	case LEAF_FROM_R_TO_L:	/* it is used in balance_leaf_when_delete */
+		src_bi->tb = tb;
+		src_bi->bi_bh = tb->R[0];
+		src_bi->bi_parent = tb->FR[0];
+		src_bi->bi_position = get_right_neighbor_position(tb, 0);
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = tb->L[0];
+		dest_bi->bi_parent = tb->FL[0];
+		dest_bi->bi_position = get_left_neighbor_position(tb, 0);
+		*first_last = FIRST_TO_LAST;
+		break;
+
+	case LEAF_FROM_L_TO_R:	/* it is used in balance_leaf_when_delete */
+		src_bi->tb = tb;
+		src_bi->bi_bh = tb->L[0];
+		src_bi->bi_parent = tb->FL[0];
+		src_bi->bi_position = get_left_neighbor_position(tb, 0);
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = tb->R[0];
+		dest_bi->bi_parent = tb->FR[0];
+		dest_bi->bi_position = get_right_neighbor_position(tb, 0);
+		*first_last = LAST_TO_FIRST;
+		break;
+
+	case LEAF_FROM_S_TO_SNEW:
+		src_bi->tb = tb;
+		src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
+		src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
+		src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
+		dest_bi->tb = tb;
+		dest_bi->bi_bh = Snew;
+		dest_bi->bi_parent = NULL;
+		dest_bi->bi_position = 0;
+		*first_last = LAST_TO_FIRST;
+		break;
+
+	default:
+		reiserfs_panic(sb_from_bi(src_bi), "vs-10250",
+			       "shift type is unknown (%d)", shift_mode);
+	}
+	RFALSE(!src_bi->bi_bh || !dest_bi->bi_bh,
+	       "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly",
+	       shift_mode, src_bi->bi_bh, dest_bi->bi_bh);
+}
+
+/* copy mov_num items and mov_bytes of the (mov_num-1)th item to
+   neighbor. Delete them from source */
+int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
+		    int mov_bytes, struct buffer_head *Snew)
+{
+	int ret_value;
+	struct buffer_info dest_bi, src_bi;
+	int first_last;
+
+	leaf_define_dest_src_infos(shift_mode, tb, &dest_bi, &src_bi,
+				   &first_last, Snew);
+
+	ret_value =
+	    leaf_copy_items(&dest_bi, src_bi.bi_bh, first_last, mov_num,
+			    mov_bytes);
+
+	leaf_delete_items(&src_bi, first_last,
+			  (first_last ==
+			   FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) -
+						 mov_num), mov_num, mov_bytes);
+
+	return ret_value;
+}
+
+/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1)
+   from S[0] to L[0] and replace the delimiting key */
+int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
+{
+	struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path);
+	int i;
+
+	/* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */
+	i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL);
+
+	if (shift_num) {
+		if (B_NR_ITEMS(S0) == 0) {	/* number of items in S[0] == 0 */
+
+			RFALSE(shift_bytes != -1,
+			       "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)",
+			       shift_bytes);
+#ifdef CONFIG_REISERFS_CHECK
+			if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) {
+				print_cur_tb("vs-10275");
+				reiserfs_panic(tb->tb_sb, "vs-10275",
+					       "balance condition corrupted "
+					       "(%c)", tb->tb_mode);
+			}
+#endif
+
+			if (PATH_H_POSITION(tb->tb_path, 1) == 0)
+				replace_key(tb, tb->CFL[0], tb->lkey[0],
+					    PATH_H_PPARENT(tb->tb_path, 0), 0);
+
+		} else {
+			/* replace lkey in CFL[0] by 0-th key from S[0]; */
+			replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0);
+
+			RFALSE((shift_bytes != -1 &&
+				!(is_direntry_le_ih(B_N_PITEM_HEAD(S0, 0))
+				  && !I_ENTRY_COUNT(B_N_PITEM_HEAD(S0, 0)))) &&
+			       (!op_is_left_mergeable
+				(B_N_PKEY(S0, 0), S0->b_size)),
+			       "vs-10280: item must be mergeable");
+		}
+	}
+
+	return i;
+}
+
+/* CLEANING STOPPED HERE */
+
+/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */
+int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
+{
+	//  struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path);
+	int ret_value;
+
+	/* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */
+	ret_value =
+	    leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL);
+
+	/* replace rkey in CFR[0] by the 0-th key from R[0] */
+	if (shift_num) {
+		replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
+
+	}
+
+	return ret_value;
+}
+
+static void leaf_delete_items_entirely(struct buffer_info *bi,
+				       int first, int del_num);
+/*  If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR.
+    If not.
+    If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of
+    the first item. Part defined by del_bytes. Don't delete first item header
+    If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of
+    the last item . Part defined by del_bytes. Don't delete last item header.
+*/
+void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
+		       int first, int del_num, int del_bytes)
+{
+	struct buffer_head *bh;
+	int item_amount = B_NR_ITEMS(bh = cur_bi->bi_bh);
+
+	RFALSE(!bh, "10155: bh is not defined");
+	RFALSE(del_num < 0, "10160: del_num can not be < 0. del_num==%d",
+	       del_num);
+	RFALSE(first < 0
+	       || first + del_num > item_amount,
+	       "10165: invalid number of first item to be deleted (%d) or "
+	       "no so much items (%d) to delete (only %d)", first,
+	       first + del_num, item_amount);
+
+	if (del_num == 0)
+		return;
+
+	if (first == 0 && del_num == item_amount && del_bytes == -1) {
+		make_empty_node(cur_bi);
+		do_balance_mark_leaf_dirty(cur_bi->tb, bh, 0);
+		return;
+	}
+
+	if (del_bytes == -1)
+		/* delete del_num items beginning from item in position first */
+		leaf_delete_items_entirely(cur_bi, first, del_num);
+	else {
+		if (last_first == FIRST_TO_LAST) {
+			/* delete del_num-1 items beginning from item in position first  */
+			leaf_delete_items_entirely(cur_bi, first, del_num - 1);
+
+			/* delete the part of the first item of the bh
+			   do not delete item header
+			 */
+			leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes);
+		} else {
+			struct item_head *ih;
+			int len;
+
+			/* delete del_num-1 items beginning from item in position first+1  */
+			leaf_delete_items_entirely(cur_bi, first + 1,
+						   del_num - 1);
+
+			ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1);
+			if (is_direntry_le_ih(ih))
+				/* the last item is directory  */
+				/* len = numbers of directory entries in this item */
+				len = ih_entry_count(ih);
+			else
+				/* len = body len of item */
+				len = ih_item_len(ih);
+
+			/* delete the part of the last item of the bh
+			   do not delete item header
+			 */
+			leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1,
+					     len - del_bytes, del_bytes);
+		}
+	}
+}
+
+/* insert item into the leaf node in position before */
+void leaf_insert_into_buf(struct buffer_info *bi, int before,
+			  struct item_head *inserted_item_ih,
+			  const char *inserted_item_body, int zeros_number)
+{
+	struct buffer_head *bh = bi->bi_bh;
+	int nr, free_space;
+	struct block_head *blkh;
+	struct item_head *ih;
+	int i;
+	int last_loc, unmoved_loc;
+	char *to;
+
+	blkh = B_BLK_HEAD(bh);
+	nr = blkh_nr_item(blkh);
+	free_space = blkh_free_space(blkh);
+
+	/* check free space */
+	RFALSE(free_space < ih_item_len(inserted_item_ih) + IH_SIZE,
+	       "vs-10170: not enough free space in block %z, new item %h",
+	       bh, inserted_item_ih);
+	RFALSE(zeros_number > ih_item_len(inserted_item_ih),
+	       "vs-10172: zero number == %d, item length == %d",
+	       zeros_number, ih_item_len(inserted_item_ih));
+
+	/* get item new item must be inserted before */
+	ih = B_N_PITEM_HEAD(bh, before);
+
+	/* prepare space for the body of new item */
+	last_loc = nr ? ih_location(&(ih[nr - before - 1])) : bh->b_size;
+	unmoved_loc = before ? ih_location(ih - 1) : bh->b_size;
+
+	memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih),
+		bh->b_data + last_loc, unmoved_loc - last_loc);
+
+	to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih);
+	memset(to, 0, zeros_number);
+	to += zeros_number;
+
+	/* copy body to prepared space */
+	if (inserted_item_body)
+		memmove(to, inserted_item_body,
+			ih_item_len(inserted_item_ih) - zeros_number);
+	else
+		memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number);
+
+	/* insert item header */
+	memmove(ih + 1, ih, IH_SIZE * (nr - before));
+	memmove(ih, inserted_item_ih, IH_SIZE);
+
+	/* change locations */
+	for (i = before; i < nr + 1; i++) {
+		unmoved_loc -= ih_item_len(&(ih[i - before]));
+		put_ih_location(&(ih[i - before]), unmoved_loc);
+	}
+
+	/* sizes, free space, item number */
+	set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
+	set_blkh_free_space(blkh,
+			    free_space - (IH_SIZE +
+					  ih_item_len(inserted_item_ih)));
+	do_balance_mark_leaf_dirty(bi->tb, bh, 1);
+
+	if (bi->bi_parent) {
+		struct disk_child *t_dc;
+		t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position);
+		put_dc_size(t_dc,
+			    dc_size(t_dc) + (IH_SIZE +
+					     ih_item_len(inserted_item_ih)));
+		do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
+	}
+}
+
+/* paste paste_size bytes to affected_item_num-th item.
+   When item is a directory, this only prepare space for new entries */
+void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
+			  int pos_in_item, int paste_size,
+			  const char *body, int zeros_number)
+{
+	struct buffer_head *bh = bi->bi_bh;
+	int nr, free_space;
+	struct block_head *blkh;
+	struct item_head *ih;
+	int i;
+	int last_loc, unmoved_loc;
+
+	blkh = B_BLK_HEAD(bh);
+	nr = blkh_nr_item(blkh);
+	free_space = blkh_free_space(blkh);
+
+	/* check free space */
+	RFALSE(free_space < paste_size,
+	       "vs-10175: not enough free space: needed %d, available %d",
+	       paste_size, free_space);
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (zeros_number > paste_size) {
+		struct super_block *sb = NULL;
+		if (bi && bi->tb)
+			sb = bi->tb->tb_sb;
+		print_cur_tb("10177");
+		reiserfs_panic(sb, "vs-10177",
+			       "zeros_number == %d, paste_size == %d",
+			       zeros_number, paste_size);
+	}
+#endif				/* CONFIG_REISERFS_CHECK */
+
+	/* item to be appended */
+	ih = B_N_PITEM_HEAD(bh, affected_item_num);
+
+	last_loc = ih_location(&(ih[nr - affected_item_num - 1]));
+	unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size;
+
+	/* prepare space */
+	memmove(bh->b_data + last_loc - paste_size, bh->b_data + last_loc,
+		unmoved_loc - last_loc);
+
+	/* change locations */
+	for (i = affected_item_num; i < nr; i++)
+		put_ih_location(&(ih[i - affected_item_num]),
+				ih_location(&(ih[i - affected_item_num])) -
+				paste_size);
+
+	if (body) {
+		if (!is_direntry_le_ih(ih)) {
+			if (!pos_in_item) {
+				/* shift data to right */
+				memmove(bh->b_data + ih_location(ih) +
+					paste_size,
+					bh->b_data + ih_location(ih),
+					ih_item_len(ih));
+				/* paste data in the head of item */
+				memset(bh->b_data + ih_location(ih), 0,
+				       zeros_number);
+				memcpy(bh->b_data + ih_location(ih) +
+				       zeros_number, body,
+				       paste_size - zeros_number);
+			} else {
+				memset(bh->b_data + unmoved_loc - paste_size, 0,
+				       zeros_number);
+				memcpy(bh->b_data + unmoved_loc - paste_size +
+				       zeros_number, body,
+				       paste_size - zeros_number);
+			}
+		}
+	} else
+		memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size);
+
+	put_ih_item_len(ih, ih_item_len(ih) + paste_size);
+
+	/* change free space */
+	set_blkh_free_space(blkh, free_space - paste_size);
+
+	do_balance_mark_leaf_dirty(bi->tb, bh, 0);
+
+	if (bi->bi_parent) {
+		struct disk_child *t_dc =
+		    B_N_CHILD(bi->bi_parent, bi->bi_position);
+		put_dc_size(t_dc, dc_size(t_dc) + paste_size);
+		do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
+	}
+}
+
+/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item
+   does not have free space, so it moves DEHs and remaining records as
+   necessary. Return value is size of removed part of directory item
+   in bytes. */
+static int leaf_cut_entries(struct buffer_head *bh,
+			    struct item_head *ih, int from, int del_count)
+{
+	char *item;
+	struct reiserfs_de_head *deh;
+	int prev_record_offset;	/* offset of record, that is (from-1)th */
+	char *prev_record;	/* */
+	int cut_records_len;	/* length of all removed records */
+	int i;
+
+	/* make sure, that item is directory and there are enough entries to
+	   remove */
+	RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
+	RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
+	       "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
+	       I_ENTRY_COUNT(ih), from, del_count);
+
+	if (del_count == 0)
+		return 0;
+
+	/* first byte of item */
+	item = bh->b_data + ih_location(ih);
+
+	/* entry head array */
+	deh = B_I_DEH(bh, ih);
+
+	/* first byte of remaining entries, those are BEFORE cut entries
+	   (prev_record) and length of all removed records (cut_records_len) */
+	prev_record_offset =
+	    (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih));
+	cut_records_len = prev_record_offset /*from_record */  -
+	    deh_location(&(deh[from + del_count - 1]));
+	prev_record = item + prev_record_offset;
+
+	/* adjust locations of remaining entries */
+	for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i--)
+		put_deh_location(&(deh[i]),
+				 deh_location(&deh[i]) -
+				 (DEH_SIZE * del_count));
+
+	for (i = 0; i < from; i++)
+		put_deh_location(&(deh[i]),
+				 deh_location(&deh[i]) - (DEH_SIZE * del_count +
+							  cut_records_len));
+
+	put_ih_entry_count(ih, ih_entry_count(ih) - del_count);
+
+	/* shift entry head array and entries those are AFTER removed entries */
+	memmove((char *)(deh + from),
+		deh + from + del_count,
+		prev_record - cut_records_len - (char *)(deh + from +
+							 del_count));
+
+	/* shift records, those are BEFORE removed entries */
+	memmove(prev_record - cut_records_len - DEH_SIZE * del_count,
+		prev_record, item + ih_item_len(ih) - prev_record);
+
+	return DEH_SIZE * del_count + cut_records_len;
+}
+
+/*  when cut item is part of regular file
+        pos_in_item - first byte that must be cut
+        cut_size - number of bytes to be cut beginning from pos_in_item
+
+   when cut item is part of directory
+        pos_in_item - number of first deleted entry
+        cut_size - count of deleted entries
+    */
+void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
+			  int pos_in_item, int cut_size)
+{
+	int nr;
+	struct buffer_head *bh = bi->bi_bh;
+	struct block_head *blkh;
+	struct item_head *ih;
+	int last_loc, unmoved_loc;
+	int i;
+
+	blkh = B_BLK_HEAD(bh);
+	nr = blkh_nr_item(blkh);
+
+	/* item head of truncated item */
+	ih = B_N_PITEM_HEAD(bh, cut_item_num);
+
+	if (is_direntry_le_ih(ih)) {
+		/* first cut entry () */
+		cut_size = leaf_cut_entries(bh, ih, pos_in_item, cut_size);
+		if (pos_in_item == 0) {
+			/* change key */
+			RFALSE(cut_item_num,
+			       "when 0-th enrty of item is cut, that item must be first in the node, not %d-th",
+			       cut_item_num);
+			/* change item key by key of first entry in the item */
+			set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih)));
+			/*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */
+		}
+	} else {
+		/* item is direct or indirect */
+		RFALSE(is_statdata_le_ih(ih), "10195: item is stat data");
+		RFALSE(pos_in_item && pos_in_item + cut_size != ih_item_len(ih),
+		       "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)",
+		       (long unsigned)pos_in_item, (long unsigned)cut_size,
+		       (long unsigned)ih_item_len(ih));
+
+		/* shift item body to left if cut is from the head of item */
+		if (pos_in_item == 0) {
+			memmove(bh->b_data + ih_location(ih),
+				bh->b_data + ih_location(ih) + cut_size,
+				ih_item_len(ih) - cut_size);
+
+			/* change key of item */
+			if (is_direct_le_ih(ih))
+				set_le_ih_k_offset(ih,
+						   le_ih_k_offset(ih) +
+						   cut_size);
+			else {
+				set_le_ih_k_offset(ih,
+						   le_ih_k_offset(ih) +
+						   (cut_size / UNFM_P_SIZE) *
+						   bh->b_size);
+				RFALSE(ih_item_len(ih) == cut_size
+				       && get_ih_free_space(ih),
+				       "10205: invalid ih_free_space (%h)", ih);
+			}
+		}
+	}
+
+	/* location of the last item */
+	last_loc = ih_location(&(ih[nr - cut_item_num - 1]));
+
+	/* location of the item, which is remaining at the same place */
+	unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size;
+
+	/* shift */
+	memmove(bh->b_data + last_loc + cut_size, bh->b_data + last_loc,
+		unmoved_loc - last_loc - cut_size);
+
+	/* change item length */
+	put_ih_item_len(ih, ih_item_len(ih) - cut_size);
+
+	if (is_indirect_le_ih(ih)) {
+		if (pos_in_item)
+			set_ih_free_space(ih, 0);
+	}
+
+	/* change locations */
+	for (i = cut_item_num; i < nr; i++)
+		put_ih_location(&(ih[i - cut_item_num]),
+				ih_location(&ih[i - cut_item_num]) + cut_size);
+
+	/* size, free space */
+	set_blkh_free_space(blkh, blkh_free_space(blkh) + cut_size);
+
+	do_balance_mark_leaf_dirty(bi->tb, bh, 0);
+
+	if (bi->bi_parent) {
+		struct disk_child *t_dc;
+		t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position);
+		put_dc_size(t_dc, dc_size(t_dc) - cut_size);
+		do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
+	}
+}
+
+/* delete del_num items from buffer starting from the first'th item */
+static void leaf_delete_items_entirely(struct buffer_info *bi,
+				       int first, int del_num)
+{
+	struct buffer_head *bh = bi->bi_bh;
+	int nr;
+	int i, j;
+	int last_loc, last_removed_loc;
+	struct block_head *blkh;
+	struct item_head *ih;
+
+	RFALSE(bh == NULL, "10210: buffer is 0");
+	RFALSE(del_num < 0, "10215: del_num less than 0 (%d)", del_num);
+
+	if (del_num == 0)
+		return;
+
+	blkh = B_BLK_HEAD(bh);
+	nr = blkh_nr_item(blkh);
+
+	RFALSE(first < 0 || first + del_num > nr,
+	       "10220: first=%d, number=%d, there is %d items", first, del_num,
+	       nr);
+
+	if (first == 0 && del_num == nr) {
+		/* this does not work */
+		make_empty_node(bi);
+
+		do_balance_mark_leaf_dirty(bi->tb, bh, 0);
+		return;
+	}
+
+	ih = B_N_PITEM_HEAD(bh, first);
+
+	/* location of unmovable item */
+	j = (first == 0) ? bh->b_size : ih_location(ih - 1);
+
+	/* delete items */
+	last_loc = ih_location(&(ih[nr - 1 - first]));
+	last_removed_loc = ih_location(&(ih[del_num - 1]));
+
+	memmove(bh->b_data + last_loc + j - last_removed_loc,
+		bh->b_data + last_loc, last_removed_loc - last_loc);
+
+	/* delete item headers */
+	memmove(ih, ih + del_num, (nr - first - del_num) * IH_SIZE);
+
+	/* change item location */
+	for (i = first; i < nr - del_num; i++)
+		put_ih_location(&(ih[i - first]),
+				ih_location(&(ih[i - first])) + (j -
+								 last_removed_loc));
+
+	/* sizes, item number */
+	set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
+	set_blkh_free_space(blkh,
+			    blkh_free_space(blkh) + (j - last_removed_loc +
+						     IH_SIZE * del_num));
+
+	do_balance_mark_leaf_dirty(bi->tb, bh, 0);
+
+	if (bi->bi_parent) {
+		struct disk_child *t_dc =
+		    B_N_CHILD(bi->bi_parent, bi->bi_position);
+		put_dc_size(t_dc,
+			    dc_size(t_dc) - (j - last_removed_loc +
+					     IH_SIZE * del_num));
+		do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
+	}
+}
+
+/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */
+void leaf_paste_entries(struct buffer_info *bi,
+			int item_num,
+			int before,
+			int new_entry_count,
+			struct reiserfs_de_head *new_dehs,
+			const char *records, int paste_size)
+{
+	struct item_head *ih;
+	char *item;
+	struct reiserfs_de_head *deh;
+	char *insert_point;
+	int i, old_entry_num;
+	struct buffer_head *bh = bi->bi_bh;
+
+	if (new_entry_count == 0)
+		return;
+
+	ih = B_N_PITEM_HEAD(bh, item_num);
+
+	/* make sure, that item is directory, and there are enough records in it */
+	RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item");
+	RFALSE(I_ENTRY_COUNT(ih) < before,
+	       "10230: there are no entry we paste entries before. entry_count = %d, before = %d",
+	       I_ENTRY_COUNT(ih), before);
+
+	/* first byte of dest item */
+	item = bh->b_data + ih_location(ih);
+
+	/* entry head array */
+	deh = B_I_DEH(bh, ih);
+
+	/* new records will be pasted at this point */
+	insert_point =
+	    item +
+	    (before ? deh_location(&(deh[before - 1]))
+	     : (ih_item_len(ih) - paste_size));
+
+	/* adjust locations of records that will be AFTER new records */
+	for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i--)
+		put_deh_location(&(deh[i]),
+				 deh_location(&(deh[i])) +
+				 (DEH_SIZE * new_entry_count));
+
+	/* adjust locations of records that will be BEFORE new records */
+	for (i = 0; i < before; i++)
+		put_deh_location(&(deh[i]),
+				 deh_location(&(deh[i])) + paste_size);
+
+	old_entry_num = I_ENTRY_COUNT(ih);
+	put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count);
+
+	/* prepare space for pasted records */
+	memmove(insert_point + paste_size, insert_point,
+		item + (ih_item_len(ih) - paste_size) - insert_point);
+
+	/* copy new records */
+	memcpy(insert_point + DEH_SIZE * new_entry_count, records,
+	       paste_size - DEH_SIZE * new_entry_count);
+
+	/* prepare space for new entry heads */
+	deh += before;
+	memmove((char *)(deh + new_entry_count), deh,
+		insert_point - (char *)deh);
+
+	/* copy new entry heads */
+	deh = (struct reiserfs_de_head *)((char *)deh);
+	memcpy(deh, new_dehs, DEH_SIZE * new_entry_count);
+
+	/* set locations of new records */
+	for (i = 0; i < new_entry_count; i++) {
+		put_deh_location(&(deh[i]),
+				 deh_location(&(deh[i])) +
+				 (-deh_location
+				  (&(new_dehs[new_entry_count - 1])) +
+				  insert_point + DEH_SIZE * new_entry_count -
+				  item));
+	}
+
+	/* change item key if necessary (when we paste before 0-th entry */
+	if (!before) {
+		set_le_ih_k_offset(ih, deh_offset(new_dehs));
+/*      memcpy (&ih->ih_key.k_offset,
+		       &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
+	}
+#ifdef CONFIG_REISERFS_CHECK
+	{
+		int prev, next;
+		/* check record locations */
+		deh = B_I_DEH(bh, ih);
+		for (i = 0; i < I_ENTRY_COUNT(ih); i++) {
+			next =
+			    (i <
+			     I_ENTRY_COUNT(ih) -
+			     1) ? deh_location(&(deh[i + 1])) : 0;
+			prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0;
+
+			if (prev && prev <= deh_location(&(deh[i])))
+				reiserfs_error(sb_from_bi(bi), "vs-10240",
+					       "directory item (%h) "
+					       "corrupted (prev %a, "
+					       "cur(%d) %a)",
+					       ih, deh + i - 1, i, deh + i);
+			if (next && next >= deh_location(&(deh[i])))
+				reiserfs_error(sb_from_bi(bi), "vs-10250",
+					       "directory item (%h) "
+					       "corrupted (cur(%d) %a, "
+					       "next %a)",
+					       ih, i, deh + i, deh + i + 1);
+		}
+	}
+#endif
+
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/lock.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/lock.c
new file mode 100644
index 0000000..d735bc8
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/lock.c

@@ -0,0 +1,97 @@
+#include "reiserfs.h"
+#include <linux/mutex.h>
+
+/*
+ * The previous reiserfs locking scheme was heavily based on
+ * the tricky properties of the Bkl:
+ *
+ * - it was acquired recursively by a same task
+ * - the performances relied on the release-while-schedule() property
+ *
+ * Now that we replace it by a mutex, we still want to keep the same
+ * recursive property to avoid big changes in the code structure.
+ * We use our own lock_owner here because the owner field on a mutex
+ * is only available in SMP or mutex debugging, also we only need this field
+ * for this mutex, no need for a system wide mutex facility.
+ *
+ * Also this lock is often released before a call that could block because
+ * reiserfs performances were partially based on the release while schedule()
+ * property of the Bkl.
+ */
+void reiserfs_write_lock(struct super_block *s)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+	if (sb_i->lock_owner != current) {
+		mutex_lock(&sb_i->lock);
+		sb_i->lock_owner = current;
+	}
+
+	/* No need to protect it, only the current task touches it */
+	sb_i->lock_depth++;
+}
+
+void reiserfs_write_unlock(struct super_block *s)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+	/*
+	 * Are we unlocking without even holding the lock?
+	 * Such a situation must raise a BUG() if we don't want
+	 * to corrupt the data.
+	 */
+	BUG_ON(sb_i->lock_owner != current);
+
+	if (--sb_i->lock_depth == -1) {
+		sb_i->lock_owner = NULL;
+		mutex_unlock(&sb_i->lock);
+	}
+}
+
+/*
+ * If we already own the lock, just exit and don't increase the depth.
+ * Useful when we don't want to lock more than once.
+ *
+ * We always return the lock_depth we had before calling
+ * this function.
+ */
+int reiserfs_write_lock_once(struct super_block *s)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+	if (sb_i->lock_owner != current) {
+		mutex_lock(&sb_i->lock);
+		sb_i->lock_owner = current;
+		return sb_i->lock_depth++;
+	}
+
+	return sb_i->lock_depth;
+}
+
+void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
+{
+	if (lock_depth == -1)
+		reiserfs_write_unlock(s);
+}
+
+/*
+ * Utility function to force a BUG if it is called without the superblock
+ * write lock held.  caller is the string printed just before calling BUG()
+ */
+void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+	if (sb_i->lock_depth < 0)
+		reiserfs_panic(sb, "%s called without kernel lock held %d",
+			       caller);
+}
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *sb)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+	WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
+}
+#endif

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/namei.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/namei.c
new file mode 100644
index 0000000..84e8a69
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/namei.c

@@ -0,0 +1,1564 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ *
+ * Trivial changes by Alan Cox to remove EHASHCOLLISION for compatibility
+ *
+ * Trivial Changes:
+ * Rights granted to Hans Reiser to redistribute under other terms providing
+ * he accepts all liability including but not limited to patent, fitness
+ * for purpose, and direct or indirect claims arising from failure to perform.
+ *
+ * NO WARRANTY
+ */
+
+#include <linux/time.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include "reiserfs.h"
+#include "acl.h"
+#include "xattr.h"
+#include <linux/quotaops.h>
+
+#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
+#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
+
+// directory item contains array of entry headers. This performs
+// binary search through that array
+static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
+{
+	struct item_head *ih = de->de_ih;
+	struct reiserfs_de_head *deh = de->de_deh;
+	int rbound, lbound, j;
+
+	lbound = 0;
+	rbound = I_ENTRY_COUNT(ih) - 1;
+
+	for (j = (rbound + lbound) / 2; lbound <= rbound;
+	     j = (rbound + lbound) / 2) {
+		if (off < deh_offset(deh + j)) {
+			rbound = j - 1;
+			continue;
+		}
+		if (off > deh_offset(deh + j)) {
+			lbound = j + 1;
+			continue;
+		}
+		// this is not name found, but matched third key component
+		de->de_entry_num = j;
+		return NAME_FOUND;
+	}
+
+	de->de_entry_num = lbound;
+	return NAME_NOT_FOUND;
+}
+
+// comment?  maybe something like set de to point to what the path points to?
+static inline void set_de_item_location(struct reiserfs_dir_entry *de,
+					struct treepath *path)
+{
+	de->de_bh = get_last_bh(path);
+	de->de_ih = get_ih(path);
+	de->de_deh = B_I_DEH(de->de_bh, de->de_ih);
+	de->de_item_num = PATH_LAST_POSITION(path);
+}
+
+// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
+inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
+{
+	struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
+
+	BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
+
+	de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num);
+	de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0);
+	de->de_name = B_I_PITEM(de->de_bh, de->de_ih) + deh_location(deh);
+	if (de->de_name[de->de_namelen - 1] == 0)
+		de->de_namelen = strlen(de->de_name);
+}
+
+// what entry points to
+static inline void set_de_object_key(struct reiserfs_dir_entry *de)
+{
+	BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
+	de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num]));
+	de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num]));
+}
+
+static inline void store_de_entry_key(struct reiserfs_dir_entry *de)
+{
+	struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
+
+	BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
+
+	/* store key of the found entry */
+	de->de_entry_key.version = KEY_FORMAT_3_5;
+	de->de_entry_key.on_disk_key.k_dir_id =
+	    le32_to_cpu(de->de_ih->ih_key.k_dir_id);
+	de->de_entry_key.on_disk_key.k_objectid =
+	    le32_to_cpu(de->de_ih->ih_key.k_objectid);
+	set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh));
+	set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY);
+}
+
+/* We assign a key to each directory item, and place multiple entries
+in a single directory item.  A directory item has a key equal to the
+key of the first directory entry in it.
+
+This function first calls search_by_key, then, if item whose first
+entry matches is not found it looks for the entry inside directory
+item found by search_by_key. Fills the path to the entry, and to the
+entry position in the item
+
+*/
+
+/* The function is NOT SCHEDULE-SAFE! */
+int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
+			struct treepath *path, struct reiserfs_dir_entry *de)
+{
+	int retval;
+
+	retval = search_item(sb, key, path);
+	switch (retval) {
+	case ITEM_NOT_FOUND:
+		if (!PATH_LAST_POSITION(path)) {
+			reiserfs_error(sb, "vs-7000", "search_by_key "
+				       "returned item position == 0");
+			pathrelse(path);
+			return IO_ERROR;
+		}
+		PATH_LAST_POSITION(path)--;
+
+	case ITEM_FOUND:
+		break;
+
+	case IO_ERROR:
+		return retval;
+
+	default:
+		pathrelse(path);
+		reiserfs_error(sb, "vs-7002", "no path to here");
+		return IO_ERROR;
+	}
+
+	set_de_item_location(de, path);
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (!is_direntry_le_ih(de->de_ih) ||
+	    COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) {
+		print_block(de->de_bh, 0, -1, -1);
+		reiserfs_panic(sb, "vs-7005", "found item %h is not directory "
+			       "item or does not belong to the same directory "
+			       "as key %K", de->de_ih, key);
+	}
+#endif				/* CONFIG_REISERFS_CHECK */
+
+	/* binary search in directory item by third componen t of the
+	   key. sets de->de_entry_num of de */
+	retval = bin_search_in_dir_item(de, cpu_key_k_offset(key));
+	path->pos_in_item = de->de_entry_num;
+	if (retval != NAME_NOT_FOUND) {
+		// ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set
+		set_de_name_and_namelen(de);
+		set_de_object_key(de);
+	}
+	return retval;
+}
+
+/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */
+
+/* The third component is hashed, and you can choose from more than
+   one hash function.  Per directory hashes are not yet implemented
+   but are thought about. This function should be moved to hashes.c
+   Jedi, please do so.  -Hans */
+
+static __u32 get_third_component(struct super_block *s,
+				 const char *name, int len)
+{
+	__u32 res;
+
+	if (!len || (len == 1 && name[0] == '.'))
+		return DOT_OFFSET;
+	if (len == 2 && name[0] == '.' && name[1] == '.')
+		return DOT_DOT_OFFSET;
+
+	res = REISERFS_SB(s)->s_hash_function(name, len);
+
+	// take bits from 7-th to 30-th including both bounds
+	res = GET_HASH_VALUE(res);
+	if (res == 0)
+		// needed to have no names before "." and ".." those have hash
+		// value == 0 and generation conters 1 and 2 accordingly
+		res = 128;
+	return res + MAX_GENERATION_NUMBER;
+}
+
+static int reiserfs_match(struct reiserfs_dir_entry *de,
+			  const char *name, int namelen)
+{
+	int retval = NAME_NOT_FOUND;
+
+	if ((namelen == de->de_namelen) &&
+	    !memcmp(de->de_name, name, de->de_namelen))
+		retval =
+		    (de_visible(de->de_deh + de->de_entry_num) ? NAME_FOUND :
+		     NAME_FOUND_INVISIBLE);
+
+	return retval;
+}
+
+/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */
+
+				/* used when hash collisions exist */
+
+static int linear_search_in_dir_item(struct cpu_key *key,
+				     struct reiserfs_dir_entry *de,
+				     const char *name, int namelen)
+{
+	struct reiserfs_de_head *deh = de->de_deh;
+	int retval;
+	int i;
+
+	i = de->de_entry_num;
+
+	if (i == I_ENTRY_COUNT(de->de_ih) ||
+	    GET_HASH_VALUE(deh_offset(deh + i)) !=
+	    GET_HASH_VALUE(cpu_key_k_offset(key))) {
+		i--;
+	}
+
+	RFALSE(de->de_deh != B_I_DEH(de->de_bh, de->de_ih),
+	       "vs-7010: array of entry headers not found");
+
+	deh += i;
+
+	for (; i >= 0; i--, deh--) {
+		if (GET_HASH_VALUE(deh_offset(deh)) !=
+		    GET_HASH_VALUE(cpu_key_k_offset(key))) {
+			// hash value does not match, no need to check whole name
+			return NAME_NOT_FOUND;
+		}
+
+		/* mark, that this generation number is used */
+		if (de->de_gen_number_bit_string)
+			set_bit(GET_GENERATION_NUMBER(deh_offset(deh)),
+				de->de_gen_number_bit_string);
+
+		// calculate pointer to name and namelen
+		de->de_entry_num = i;
+		set_de_name_and_namelen(de);
+
+		if ((retval =
+		     reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) {
+			// de's de_name, de_namelen, de_recordlen are set. Fill the rest:
+
+			// key of pointed object
+			set_de_object_key(de);
+
+			store_de_entry_key(de);
+
+			// retval can be NAME_FOUND or NAME_FOUND_INVISIBLE
+			return retval;
+		}
+	}
+
+	if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0)
+		/* we have reached left most entry in the node. In common we
+		   have to go to the left neighbor, but if generation counter
+		   is 0 already, we know for sure, that there is no name with
+		   the same hash value */
+		// FIXME: this work correctly only because hash value can not
+		// be 0. Btw, in case of Yura's hash it is probably possible,
+		// so, this is a bug
+		return NAME_NOT_FOUND;
+
+	RFALSE(de->de_item_num,
+	       "vs-7015: two diritems of the same directory in one node?");
+
+	return GOTO_PREVIOUS_ITEM;
+}
+
+// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
+// FIXME: should add something like IOERROR
+static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
+			       struct treepath *path_to_entry,
+			       struct reiserfs_dir_entry *de)
+{
+	struct cpu_key key_to_search;
+	int retval;
+
+	if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize))
+		return NAME_NOT_FOUND;
+
+	/* we will search for this key in the tree */
+	make_cpu_key(&key_to_search, dir,
+		     get_third_component(dir->i_sb, name, namelen),
+		     TYPE_DIRENTRY, 3);
+
+	while (1) {
+		retval =
+		    search_by_entry_key(dir->i_sb, &key_to_search,
+					path_to_entry, de);
+		if (retval == IO_ERROR) {
+			reiserfs_error(dir->i_sb, "zam-7001", "io error");
+			return IO_ERROR;
+		}
+
+		/* compare names for all entries having given hash value */
+		retval =
+		    linear_search_in_dir_item(&key_to_search, de, name,
+					      namelen);
+		if (retval != GOTO_PREVIOUS_ITEM) {
+			/* there is no need to scan directory anymore. Given entry found or does not exist */
+			path_to_entry->pos_in_item = de->de_entry_num;
+			return retval;
+		}
+
+		/* there is left neighboring item of this directory and given entry can be there */
+		set_cpu_key_k_offset(&key_to_search,
+				     le_ih_k_offset(de->de_ih) - 1);
+		pathrelse(path_to_entry);
+
+	}			/* while (1) */
+}
+
+static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
+				      struct nameidata *nd)
+{
+	int retval;
+	int lock_depth;
+	struct inode *inode = NULL;
+	struct reiserfs_dir_entry de;
+	INITIALIZE_PATH(path_to_entry);
+
+	if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	/*
+	 * Might be called with or without the write lock, must be careful
+	 * to not recursively hold it in case we want to release the lock
+	 * before rescheduling.
+	 */
+	lock_depth = reiserfs_write_lock_once(dir->i_sb);
+
+	de.de_gen_number_bit_string = NULL;
+	retval =
+	    reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
+				&path_to_entry, &de);
+	pathrelse(&path_to_entry);
+	if (retval == NAME_FOUND) {
+		inode = reiserfs_iget(dir->i_sb,
+				      (struct cpu_key *)&(de.de_dir_id));
+		if (!inode || IS_ERR(inode)) {
+			reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+			return ERR_PTR(-EACCES);
+		}
+
+		/* Propagate the private flag so we know we're
+		 * in the priv tree */
+		if (IS_PRIVATE(dir))
+			inode->i_flags |= S_PRIVATE;
+	}
+	reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+	if (retval == IO_ERROR) {
+		return ERR_PTR(-EIO);
+	}
+
+	return d_splice_alias(inode, dentry);
+}
+
+/*
+** looks up the dentry of the parent directory for child.
+** taken from ext2_get_parent
+*/
+struct dentry *reiserfs_get_parent(struct dentry *child)
+{
+	int retval;
+	struct inode *inode = NULL;
+	struct reiserfs_dir_entry de;
+	INITIALIZE_PATH(path_to_entry);
+	struct inode *dir = child->d_inode;
+
+	if (dir->i_nlink == 0) {
+		return ERR_PTR(-ENOENT);
+	}
+	de.de_gen_number_bit_string = NULL;
+
+	reiserfs_write_lock(dir->i_sb);
+	retval = reiserfs_find_entry(dir, "..", 2, &path_to_entry, &de);
+	pathrelse(&path_to_entry);
+	if (retval != NAME_FOUND) {
+		reiserfs_write_unlock(dir->i_sb);
+		return ERR_PTR(-ENOENT);
+	}
+	inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
+	reiserfs_write_unlock(dir->i_sb);
+
+	return d_obtain_alias(inode);
+}
+
+/* add entry to the directory (entry can be hidden).
+
+insert definition of when hidden directories are used here -Hans
+
+ Does not mark dir   inode dirty, do it after successesfull call to it */
+
+static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
+			      struct inode *dir, const char *name, int namelen,
+			      struct inode *inode, int visible)
+{
+	struct cpu_key entry_key;
+	struct reiserfs_de_head *deh;
+	INITIALIZE_PATH(path);
+	struct reiserfs_dir_entry de;
+	DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1);
+	int gen_number;
+	char small_buf[32 + DEH_SIZE];	/* 48 bytes now and we avoid kmalloc
+					   if we create file with short name */
+	char *buffer;
+	int buflen, paste_size;
+	int retval;
+
+	BUG_ON(!th->t_trans_id);
+
+	/* cannot allow items to be added into a busy deleted directory */
+	if (!namelen)
+		return -EINVAL;
+
+	if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize))
+		return -ENAMETOOLONG;
+
+	/* each entry has unique key. compose it */
+	make_cpu_key(&entry_key, dir,
+		     get_third_component(dir->i_sb, name, namelen),
+		     TYPE_DIRENTRY, 3);
+
+	/* get memory for composing the entry */
+	buflen = DEH_SIZE + ROUND_UP(namelen);
+	if (buflen > sizeof(small_buf)) {
+		buffer = kmalloc(buflen, GFP_NOFS);
+		if (!buffer)
+			return -ENOMEM;
+	} else
+		buffer = small_buf;
+
+	paste_size =
+	    (get_inode_sd_version(dir) ==
+	     STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen;
+
+	/* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */
+	deh = (struct reiserfs_de_head *)buffer;
+	deh->deh_location = 0;	/* JDM Endian safe if 0 */
+	put_deh_offset(deh, cpu_key_k_offset(&entry_key));
+	deh->deh_state = 0;	/* JDM Endian safe if 0 */
+	/* put key (ino analog) to de */
+	deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id;	/* safe: k_dir_id is le */
+	deh->deh_objectid = INODE_PKEY(inode)->k_objectid;	/* safe: k_objectid is le */
+
+	/* copy name */
+	memcpy((char *)(deh + 1), name, namelen);
+	/* padd by 0s to the 4 byte boundary */
+	padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen);
+
+	/* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */
+	mark_de_without_sd(deh);
+	visible ? mark_de_visible(deh) : mark_de_hidden(deh);
+
+	/* find the proper place for the new entry */
+	memset(bit_string, 0, sizeof(bit_string));
+	de.de_gen_number_bit_string = bit_string;
+	retval = reiserfs_find_entry(dir, name, namelen, &path, &de);
+	if (retval != NAME_NOT_FOUND) {
+		if (buffer != small_buf)
+			kfree(buffer);
+		pathrelse(&path);
+
+		if (retval == IO_ERROR) {
+			return -EIO;
+		}
+
+		if (retval != NAME_FOUND) {
+			reiserfs_error(dir->i_sb, "zam-7002",
+				       "reiserfs_find_entry() returned "
+				       "unexpected value (%d)", retval);
+		}
+
+		return -EEXIST;
+	}
+
+	gen_number =
+	    find_first_zero_bit(bit_string,
+				MAX_GENERATION_NUMBER + 1);
+	if (gen_number > MAX_GENERATION_NUMBER) {
+		/* there is no free generation number */
+		reiserfs_warning(dir->i_sb, "reiserfs-7010",
+				 "Congratulations! we have got hash function "
+				 "screwed up");
+		if (buffer != small_buf)
+			kfree(buffer);
+		pathrelse(&path);
+		return -EBUSY;
+	}
+	/* adjust offset of directory enrty */
+	put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number));
+	set_cpu_key_k_offset(&entry_key, deh_offset(deh));
+
+	/* update max-hash-collisions counter in reiserfs_sb_info */
+	PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number);
+
+	if (gen_number != 0) {	/* we need to re-search for the insertion point */
+		if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) !=
+		    NAME_NOT_FOUND) {
+			reiserfs_warning(dir->i_sb, "vs-7032",
+					 "entry with this key (%K) already "
+					 "exists", &entry_key);
+
+			if (buffer != small_buf)
+				kfree(buffer);
+			pathrelse(&path);
+			return -EBUSY;
+		}
+	}
+
+	/* perform the insertion of the entry that we have prepared */
+	retval =
+	    reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer,
+				     paste_size);
+	if (buffer != small_buf)
+		kfree(buffer);
+	if (retval) {
+		reiserfs_check_path(&path);
+		return retval;
+	}
+
+	dir->i_size += paste_size;
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+	if (!S_ISDIR(inode->i_mode) && visible)
+		// reiserfs_mkdir or reiserfs_rename will do that by itself
+		reiserfs_update_sd(th, dir);
+
+	reiserfs_check_path(&path);
+	return 0;
+}
+
+/* quota utility function, call if you've had to abort after calling
+** new_inode_init, and have not called reiserfs_new_inode yet.
+** This should only be called on inodes that do not have stat data
+** inserted into the tree yet.
+*/
+static int drop_new_inode(struct inode *inode)
+{
+	dquot_drop(inode);
+	make_bad_inode(inode);
+	inode->i_flags |= S_NOQUOTA;
+	iput(inode);
+	return 0;
+}
+
+/* utility function that does setup for reiserfs_new_inode.
+** dquot_initialize needs lots of credits so it's better to have it
+** outside of a transaction, so we had to pull some bits of
+** reiserfs_new_inode out into this func.
+*/
+static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
+{
+	/* Make inode invalid - just in case we are going to drop it before
+	 * the initialization happens */
+	INODE_PKEY(inode)->k_objectid = 0;
+	/* the quota init calls have to know who to charge the quota to, so
+	 ** we have to set uid and gid here
+	 */
+	inode_init_owner(inode, dir, mode);
+	dquot_initialize(inode);
+	return 0;
+}
+
+static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+			   struct nameidata *nd)
+{
+	int retval;
+	struct inode *inode;
+	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
+	int jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 2 +
+	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
+		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
+	struct reiserfs_transaction_handle th;
+	struct reiserfs_security_handle security;
+
+	dquot_initialize(dir);
+
+	if (!(inode = new_inode(dir->i_sb))) {
+		return -ENOMEM;
+	}
+	new_inode_init(inode, dir, mode);
+
+	jbegin_count += reiserfs_cache_default_acl(dir);
+	retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
+	if (retval < 0) {
+		drop_new_inode(inode);
+		return retval;
+	}
+	jbegin_count += retval;
+	reiserfs_write_lock(dir->i_sb);
+
+	retval = journal_begin(&th, dir->i_sb, jbegin_count);
+	if (retval) {
+		drop_new_inode(inode);
+		goto out_failed;
+	}
+
+	retval =
+	    reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
+			       inode, &security);
+	if (retval)
+		goto out_failed;
+
+	inode->i_op = &reiserfs_file_inode_operations;
+	inode->i_fop = &reiserfs_file_operations;
+	inode->i_mapping->a_ops = &reiserfs_address_space_operations;
+
+	retval =
+	    reiserfs_add_entry(&th, dir, dentry->d_name.name,
+			       dentry->d_name.len, inode, 1 /*visible */ );
+	if (retval) {
+		int err;
+		drop_nlink(inode);
+		reiserfs_update_sd(&th, inode);
+		err = journal_end(&th, dir->i_sb, jbegin_count);
+		if (err)
+			retval = err;
+		unlock_new_inode(inode);
+		iput(inode);
+		goto out_failed;
+	}
+	reiserfs_update_inode_transaction(inode);
+	reiserfs_update_inode_transaction(dir);
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+	retval = journal_end(&th, dir->i_sb, jbegin_count);
+
+      out_failed:
+	reiserfs_write_unlock(dir->i_sb);
+	return retval;
+}
+
+static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+			  dev_t rdev)
+{
+	int retval;
+	struct inode *inode;
+	struct reiserfs_transaction_handle th;
+	struct reiserfs_security_handle security;
+	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
+	int jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 3 +
+	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
+		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
+
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
+
+	dquot_initialize(dir);
+
+	if (!(inode = new_inode(dir->i_sb))) {
+		return -ENOMEM;
+	}
+	new_inode_init(inode, dir, mode);
+
+	jbegin_count += reiserfs_cache_default_acl(dir);
+	retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
+	if (retval < 0) {
+		drop_new_inode(inode);
+		return retval;
+	}
+	jbegin_count += retval;
+	reiserfs_write_lock(dir->i_sb);
+
+	retval = journal_begin(&th, dir->i_sb, jbegin_count);
+	if (retval) {
+		drop_new_inode(inode);
+		goto out_failed;
+	}
+
+	retval =
+	    reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
+			       inode, &security);
+	if (retval) {
+		goto out_failed;
+	}
+
+	inode->i_op = &reiserfs_special_inode_operations;
+	init_special_inode(inode, inode->i_mode, rdev);
+
+	//FIXME: needed for block and char devices only
+	reiserfs_update_sd(&th, inode);
+
+	reiserfs_update_inode_transaction(inode);
+	reiserfs_update_inode_transaction(dir);
+
+	retval =
+	    reiserfs_add_entry(&th, dir, dentry->d_name.name,
+			       dentry->d_name.len, inode, 1 /*visible */ );
+	if (retval) {
+		int err;
+		drop_nlink(inode);
+		reiserfs_update_sd(&th, inode);
+		err = journal_end(&th, dir->i_sb, jbegin_count);
+		if (err)
+			retval = err;
+		unlock_new_inode(inode);
+		iput(inode);
+		goto out_failed;
+	}
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+	retval = journal_end(&th, dir->i_sb, jbegin_count);
+
+      out_failed:
+	reiserfs_write_unlock(dir->i_sb);
+	return retval;
+}
+
+static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int retval;
+	struct inode *inode;
+	struct reiserfs_transaction_handle th;
+	struct reiserfs_security_handle security;
+	int lock_depth;
+	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
+	int jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 3 +
+	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
+		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
+
+	dquot_initialize(dir);
+
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	/* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
+	REISERFS_I(dir)->new_packing_locality = 1;
+#endif
+	mode = S_IFDIR | mode;
+	if (!(inode = new_inode(dir->i_sb))) {
+		return -ENOMEM;
+	}
+	new_inode_init(inode, dir, mode);
+
+	jbegin_count += reiserfs_cache_default_acl(dir);
+	retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
+	if (retval < 0) {
+		drop_new_inode(inode);
+		return retval;
+	}
+	jbegin_count += retval;
+	lock_depth = reiserfs_write_lock_once(dir->i_sb);
+
+	retval = journal_begin(&th, dir->i_sb, jbegin_count);
+	if (retval) {
+		drop_new_inode(inode);
+		goto out_failed;
+	}
+
+	/* inc the link count now, so another writer doesn't overflow it while
+	 ** we sleep later on.
+	 */
+	INC_DIR_INODE_NLINK(dir)
+
+	    retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ ,
+					old_format_only(dir->i_sb) ?
+					EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
+					dentry, inode, &security);
+	if (retval) {
+		DEC_DIR_INODE_NLINK(dir)
+		goto out_failed;
+	}
+
+	reiserfs_update_inode_transaction(inode);
+	reiserfs_update_inode_transaction(dir);
+
+	inode->i_op = &reiserfs_dir_inode_operations;
+	inode->i_fop = &reiserfs_dir_operations;
+
+	// note, _this_ add_entry will not update dir's stat data
+	retval =
+	    reiserfs_add_entry(&th, dir, dentry->d_name.name,
+			       dentry->d_name.len, inode, 1 /*visible */ );
+	if (retval) {
+		int err;
+		clear_nlink(inode);
+		DEC_DIR_INODE_NLINK(dir);
+		reiserfs_update_sd(&th, inode);
+		err = journal_end(&th, dir->i_sb, jbegin_count);
+		if (err)
+			retval = err;
+		unlock_new_inode(inode);
+		iput(inode);
+		goto out_failed;
+	}
+	// the above add_entry did not update dir's stat data
+	reiserfs_update_sd(&th, dir);
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+	retval = journal_end(&th, dir->i_sb, jbegin_count);
+out_failed:
+	reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+	return retval;
+}
+
+static inline int reiserfs_empty_dir(struct inode *inode)
+{
+	/* we can cheat because an old format dir cannot have
+	 ** EMPTY_DIR_SIZE, and a new format dir cannot have
+	 ** EMPTY_DIR_SIZE_V1.  So, if the inode is either size,
+	 ** regardless of disk format version, the directory is empty.
+	 */
+	if (inode->i_size != EMPTY_DIR_SIZE &&
+	    inode->i_size != EMPTY_DIR_SIZE_V1) {
+		return 0;
+	}
+	return 1;
+}
+
+static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int retval, err;
+	struct inode *inode;
+	struct reiserfs_transaction_handle th;
+	int jbegin_count;
+	INITIALIZE_PATH(path);
+	struct reiserfs_dir_entry de;
+
+	/* we will be doing 2 balancings and update 2 stat data, we change quotas
+	 * of the owner of the directory and of the owner of the parent directory.
+	 * The quota structure is possibly deleted only on last iput => outside
+	 * of this transaction */
+	jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
+	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
+
+	dquot_initialize(dir);
+
+	reiserfs_write_lock(dir->i_sb);
+	retval = journal_begin(&th, dir->i_sb, jbegin_count);
+	if (retval)
+		goto out_rmdir;
+
+	de.de_gen_number_bit_string = NULL;
+	if ((retval =
+	     reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
+				 &path, &de)) == NAME_NOT_FOUND) {
+		retval = -ENOENT;
+		goto end_rmdir;
+	} else if (retval == IO_ERROR) {
+		retval = -EIO;
+		goto end_rmdir;
+	}
+
+	inode = dentry->d_inode;
+
+	reiserfs_update_inode_transaction(inode);
+	reiserfs_update_inode_transaction(dir);
+
+	if (de.de_objectid != inode->i_ino) {
+		// FIXME: compare key of an object and a key found in the
+		// entry
+		retval = -EIO;
+		goto end_rmdir;
+	}
+	if (!reiserfs_empty_dir(inode)) {
+		retval = -ENOTEMPTY;
+		goto end_rmdir;
+	}
+
+	/* cut entry from dir directory */
+	retval = reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL,	/* page */
+					0 /*new file size - not used here */ );
+	if (retval < 0)
+		goto end_rmdir;
+
+	if (inode->i_nlink != 2 && inode->i_nlink != 1)
+		reiserfs_error(inode->i_sb, "reiserfs-7040",
+			       "empty directory has nlink != 2 (%d)",
+			       inode->i_nlink);
+
+	clear_nlink(inode);
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+	reiserfs_update_sd(&th, inode);
+
+	DEC_DIR_INODE_NLINK(dir)
+	    dir->i_size -= (DEH_SIZE + de.de_entrylen);
+	reiserfs_update_sd(&th, dir);
+
+	/* prevent empty directory from getting lost */
+	add_save_link(&th, inode, 0 /* not truncate */ );
+
+	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	reiserfs_check_path(&path);
+      out_rmdir:
+	reiserfs_write_unlock(dir->i_sb);
+	return retval;
+
+      end_rmdir:
+	/* we must release path, because we did not call
+	   reiserfs_cut_from_item, or reiserfs_cut_from_item does not
+	   release path if operation was not complete */
+	pathrelse(&path);
+	err = journal_end(&th, dir->i_sb, jbegin_count);
+	reiserfs_write_unlock(dir->i_sb);
+	return err ? err : retval;
+}
+
+static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int retval, err;
+	struct inode *inode;
+	struct reiserfs_dir_entry de;
+	INITIALIZE_PATH(path);
+	struct reiserfs_transaction_handle th;
+	int jbegin_count;
+	unsigned long savelink;
+	int depth;
+
+	dquot_initialize(dir);
+
+	inode = dentry->d_inode;
+
+	/* in this transaction we can be doing at max two balancings and update
+	 * two stat datas, we change quotas of the owner of the directory and of
+	 * the owner of the parent directory. The quota structure is possibly
+	 * deleted only on iput => outside of this transaction */
+	jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
+	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
+
+	depth = reiserfs_write_lock_once(dir->i_sb);
+	retval = journal_begin(&th, dir->i_sb, jbegin_count);
+	if (retval)
+		goto out_unlink;
+
+	de.de_gen_number_bit_string = NULL;
+	if ((retval =
+	     reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
+				 &path, &de)) == NAME_NOT_FOUND) {
+		retval = -ENOENT;
+		goto end_unlink;
+	} else if (retval == IO_ERROR) {
+		retval = -EIO;
+		goto end_unlink;
+	}
+
+	reiserfs_update_inode_transaction(inode);
+	reiserfs_update_inode_transaction(dir);
+
+	if (de.de_objectid != inode->i_ino) {
+		// FIXME: compare key of an object and a key found in the
+		// entry
+		retval = -EIO;
+		goto end_unlink;
+	}
+
+	if (!inode->i_nlink) {
+		reiserfs_warning(inode->i_sb, "reiserfs-7042",
+				 "deleting nonexistent file (%lu), %d",
+				 inode->i_ino, inode->i_nlink);
+		set_nlink(inode, 1);
+	}
+
+	drop_nlink(inode);
+
+	/*
+	 * we schedule before doing the add_save_link call, save the link
+	 * count so we don't race
+	 */
+	savelink = inode->i_nlink;
+
+	retval =
+	    reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL,
+				   0);
+	if (retval < 0) {
+		inc_nlink(inode);
+		goto end_unlink;
+	}
+	inode->i_ctime = CURRENT_TIME_SEC;
+	reiserfs_update_sd(&th, inode);
+
+	dir->i_size -= (de.de_entrylen + DEH_SIZE);
+	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+	reiserfs_update_sd(&th, dir);
+
+	if (!savelink)
+		/* prevent file from getting lost */
+		add_save_link(&th, inode, 0 /* not truncate */ );
+
+	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	reiserfs_check_path(&path);
+	reiserfs_write_unlock_once(dir->i_sb, depth);
+	return retval;
+
+      end_unlink:
+	pathrelse(&path);
+	err = journal_end(&th, dir->i_sb, jbegin_count);
+	reiserfs_check_path(&path);
+	if (err)
+		retval = err;
+      out_unlink:
+	reiserfs_write_unlock_once(dir->i_sb, depth);
+	return retval;
+}
+
+static int reiserfs_symlink(struct inode *parent_dir,
+			    struct dentry *dentry, const char *symname)
+{
+	int retval;
+	struct inode *inode;
+	char *name;
+	int item_len;
+	struct reiserfs_transaction_handle th;
+	struct reiserfs_security_handle security;
+	int mode = S_IFLNK | S_IRWXUGO;
+	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
+	int jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 3 +
+	    2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
+		 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
+
+	dquot_initialize(parent_dir);
+
+	if (!(inode = new_inode(parent_dir->i_sb))) {
+		return -ENOMEM;
+	}
+	new_inode_init(inode, parent_dir, mode);
+
+	retval = reiserfs_security_init(parent_dir, inode, &dentry->d_name,
+					&security);
+	if (retval < 0) {
+		drop_new_inode(inode);
+		return retval;
+	}
+	jbegin_count += retval;
+
+	reiserfs_write_lock(parent_dir->i_sb);
+	item_len = ROUND_UP(strlen(symname));
+	if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) {
+		retval = -ENAMETOOLONG;
+		drop_new_inode(inode);
+		goto out_failed;
+	}
+
+	name = kmalloc(item_len, GFP_NOFS);
+	if (!name) {
+		drop_new_inode(inode);
+		retval = -ENOMEM;
+		goto out_failed;
+	}
+	memcpy(name, symname, strlen(symname));
+	padd_item(name, item_len, strlen(symname));
+
+	retval = journal_begin(&th, parent_dir->i_sb, jbegin_count);
+	if (retval) {
+		drop_new_inode(inode);
+		kfree(name);
+		goto out_failed;
+	}
+
+	retval =
+	    reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname),
+			       dentry, inode, &security);
+	kfree(name);
+	if (retval) {		/* reiserfs_new_inode iputs for us */
+		goto out_failed;
+	}
+
+	reiserfs_update_inode_transaction(inode);
+	reiserfs_update_inode_transaction(parent_dir);
+
+	inode->i_op = &reiserfs_symlink_inode_operations;
+	inode->i_mapping->a_ops = &reiserfs_address_space_operations;
+
+	// must be sure this inode is written with this transaction
+	//
+	//reiserfs_update_sd (&th, inode, READ_BLOCKS);
+
+	retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
+				    dentry->d_name.len, inode, 1 /*visible */ );
+	if (retval) {
+		int err;
+		drop_nlink(inode);
+		reiserfs_update_sd(&th, inode);
+		err = journal_end(&th, parent_dir->i_sb, jbegin_count);
+		if (err)
+			retval = err;
+		unlock_new_inode(inode);
+		iput(inode);
+		goto out_failed;
+	}
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+	retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
+      out_failed:
+	reiserfs_write_unlock(parent_dir->i_sb);
+	return retval;
+}
+
+static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
+			 struct dentry *dentry)
+{
+	int retval;
+	struct inode *inode = old_dentry->d_inode;
+	struct reiserfs_transaction_handle th;
+	/* We need blocks for transaction + update of quotas for the owners of the directory */
+	int jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 3 +
+	    2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
+
+	dquot_initialize(dir);
+
+	reiserfs_write_lock(dir->i_sb);
+	if (inode->i_nlink >= REISERFS_LINK_MAX) {
+		//FIXME: sd_nlink is 32 bit for new files
+		reiserfs_write_unlock(dir->i_sb);
+		return -EMLINK;
+	}
+
+	/* inc before scheduling so reiserfs_unlink knows we are here */
+	inc_nlink(inode);
+
+	retval = journal_begin(&th, dir->i_sb, jbegin_count);
+	if (retval) {
+		drop_nlink(inode);
+		reiserfs_write_unlock(dir->i_sb);
+		return retval;
+	}
+
+	/* create new entry */
+	retval =
+	    reiserfs_add_entry(&th, dir, dentry->d_name.name,
+			       dentry->d_name.len, inode, 1 /*visible */ );
+
+	reiserfs_update_inode_transaction(inode);
+	reiserfs_update_inode_transaction(dir);
+
+	if (retval) {
+		int err;
+		drop_nlink(inode);
+		err = journal_end(&th, dir->i_sb, jbegin_count);
+		reiserfs_write_unlock(dir->i_sb);
+		return err ? err : retval;
+	}
+
+	inode->i_ctime = CURRENT_TIME_SEC;
+	reiserfs_update_sd(&th, inode);
+
+	ihold(inode);
+	d_instantiate(dentry, inode);
+	retval = journal_end(&th, dir->i_sb, jbegin_count);
+	reiserfs_write_unlock(dir->i_sb);
+	return retval;
+}
+
+/* de contains information pointing to an entry which */
+static int de_still_valid(const char *name, int len,
+			  struct reiserfs_dir_entry *de)
+{
+	struct reiserfs_dir_entry tmp = *de;
+
+	// recalculate pointer to name and name length
+	set_de_name_and_namelen(&tmp);
+	// FIXME: could check more
+	if (tmp.de_namelen != len || memcmp(name, de->de_name, len))
+		return 0;
+	return 1;
+}
+
+static int entry_points_to_object(const char *name, int len,
+				  struct reiserfs_dir_entry *de,
+				  struct inode *inode)
+{
+	if (!de_still_valid(name, len, de))
+		return 0;
+
+	if (inode) {
+		if (!de_visible(de->de_deh + de->de_entry_num))
+			reiserfs_panic(inode->i_sb, "vs-7042",
+				       "entry must be visible");
+		return (de->de_objectid == inode->i_ino) ? 1 : 0;
+	}
+
+	/* this must be added hidden entry */
+	if (de_visible(de->de_deh + de->de_entry_num))
+		reiserfs_panic(NULL, "vs-7043", "entry must be visible");
+
+	return 1;
+}
+
+/* sets key of objectid the entry has to point to */
+static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de,
+				 struct reiserfs_key *key)
+{
+	/* JDM These operations are endian safe - both are le */
+	de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id;
+	de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid;
+}
+
+/*
+ * process, that is going to call fix_nodes/do_balance must hold only
+ * one path. If it holds 2 or more, it can get into endless waiting in
+ * get_empty_nodes or its clones
+ */
+static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+			   struct inode *new_dir, struct dentry *new_dentry)
+{
+	int retval;
+	INITIALIZE_PATH(old_entry_path);
+	INITIALIZE_PATH(new_entry_path);
+	INITIALIZE_PATH(dot_dot_entry_path);
+	struct item_head new_entry_ih, old_entry_ih, dot_dot_ih;
+	struct reiserfs_dir_entry old_de, new_de, dot_dot_de;
+	struct inode *old_inode, *new_dentry_inode;
+	struct reiserfs_transaction_handle th;
+	int jbegin_count;
+	umode_t old_inode_mode;
+	unsigned long savelink = 1;
+	struct timespec ctime;
+
+	/* three balancings: (1) old name removal, (2) new name insertion
+	   and (3) maybe "save" link insertion
+	   stat data updates: (1) old directory,
+	   (2) new directory and (3) maybe old object stat data (when it is
+	   directory) and (4) maybe stat data of object to which new entry
+	   pointed initially and (5) maybe block containing ".." of
+	   renamed directory
+	   quota updates: two parent directories */
+	jbegin_count =
+	    JOURNAL_PER_BALANCE_CNT * 3 + 5 +
+	    4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
+
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
+	old_inode = old_dentry->d_inode;
+	new_dentry_inode = new_dentry->d_inode;
+
+	// make sure, that oldname still exists and points to an object we
+	// are going to rename
+	old_de.de_gen_number_bit_string = NULL;
+	reiserfs_write_lock(old_dir->i_sb);
+	retval =
+	    reiserfs_find_entry(old_dir, old_dentry->d_name.name,
+				old_dentry->d_name.len, &old_entry_path,
+				&old_de);
+	pathrelse(&old_entry_path);
+	if (retval == IO_ERROR) {
+		reiserfs_write_unlock(old_dir->i_sb);
+		return -EIO;
+	}
+
+	if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) {
+		reiserfs_write_unlock(old_dir->i_sb);
+		return -ENOENT;
+	}
+
+	old_inode_mode = old_inode->i_mode;
+	if (S_ISDIR(old_inode_mode)) {
+		// make sure, that directory being renamed has correct ".."
+		// and that its new parent directory has not too many links
+		// already
+
+		if (new_dentry_inode) {
+			if (!reiserfs_empty_dir(new_dentry_inode)) {
+				reiserfs_write_unlock(old_dir->i_sb);
+				return -ENOTEMPTY;
+			}
+		}
+
+		/* directory is renamed, its parent directory will be changed,
+		 ** so find ".." entry
+		 */
+		dot_dot_de.de_gen_number_bit_string = NULL;
+		retval =
+		    reiserfs_find_entry(old_inode, "..", 2, &dot_dot_entry_path,
+					&dot_dot_de);
+		pathrelse(&dot_dot_entry_path);
+		if (retval != NAME_FOUND) {
+			reiserfs_write_unlock(old_dir->i_sb);
+			return -EIO;
+		}
+
+		/* inode number of .. must equal old_dir->i_ino */
+		if (dot_dot_de.de_objectid != old_dir->i_ino) {
+			reiserfs_write_unlock(old_dir->i_sb);
+			return -EIO;
+		}
+	}
+
+	retval = journal_begin(&th, old_dir->i_sb, jbegin_count);
+	if (retval) {
+		reiserfs_write_unlock(old_dir->i_sb);
+		return retval;
+	}
+
+	/* add new entry (or find the existing one) */
+	retval =
+	    reiserfs_add_entry(&th, new_dir, new_dentry->d_name.name,
+			       new_dentry->d_name.len, old_inode, 0);
+	if (retval == -EEXIST) {
+		if (!new_dentry_inode) {
+			reiserfs_panic(old_dir->i_sb, "vs-7050",
+				       "new entry is found, new inode == 0");
+		}
+	} else if (retval) {
+		int err = journal_end(&th, old_dir->i_sb, jbegin_count);
+		reiserfs_write_unlock(old_dir->i_sb);
+		return err ? err : retval;
+	}
+
+	reiserfs_update_inode_transaction(old_dir);
+	reiserfs_update_inode_transaction(new_dir);
+
+	/* this makes it so an fsync on an open fd for the old name will
+	 ** commit the rename operation
+	 */
+	reiserfs_update_inode_transaction(old_inode);
+
+	if (new_dentry_inode)
+		reiserfs_update_inode_transaction(new_dentry_inode);
+
+	while (1) {
+		// look for old name using corresponding entry key (found by reiserfs_find_entry)
+		if ((retval =
+		     search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key,
+					 &old_entry_path,
+					 &old_de)) != NAME_FOUND) {
+			pathrelse(&old_entry_path);
+			journal_end(&th, old_dir->i_sb, jbegin_count);
+			reiserfs_write_unlock(old_dir->i_sb);
+			return -EIO;
+		}
+
+		copy_item_head(&old_entry_ih, get_ih(&old_entry_path));
+
+		reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1);
+
+		// look for new name by reiserfs_find_entry
+		new_de.de_gen_number_bit_string = NULL;
+		retval =
+		    reiserfs_find_entry(new_dir, new_dentry->d_name.name,
+					new_dentry->d_name.len, &new_entry_path,
+					&new_de);
+		// reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from
+		// reiserfs_add_entry above, and we'll catch any i/o errors before we get here.
+		if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) {
+			pathrelse(&new_entry_path);
+			pathrelse(&old_entry_path);
+			journal_end(&th, old_dir->i_sb, jbegin_count);
+			reiserfs_write_unlock(old_dir->i_sb);
+			return -EIO;
+		}
+
+		copy_item_head(&new_entry_ih, get_ih(&new_entry_path));
+
+		reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1);
+
+		if (S_ISDIR(old_inode->i_mode)) {
+			if ((retval =
+			     search_by_entry_key(new_dir->i_sb,
+						 &dot_dot_de.de_entry_key,
+						 &dot_dot_entry_path,
+						 &dot_dot_de)) != NAME_FOUND) {
+				pathrelse(&dot_dot_entry_path);
+				pathrelse(&new_entry_path);
+				pathrelse(&old_entry_path);
+				journal_end(&th, old_dir->i_sb, jbegin_count);
+				reiserfs_write_unlock(old_dir->i_sb);
+				return -EIO;
+			}
+			copy_item_head(&dot_dot_ih,
+				       get_ih(&dot_dot_entry_path));
+			// node containing ".." gets into transaction
+			reiserfs_prepare_for_journal(old_inode->i_sb,
+						     dot_dot_de.de_bh, 1);
+		}
+		/* we should check seals here, not do
+		   this stuff, yes? Then, having
+		   gathered everything into RAM we
+		   should lock the buffers, yes?  -Hans */
+		/* probably.  our rename needs to hold more
+		 ** than one path at once.  The seals would
+		 ** have to be written to deal with multi-path
+		 ** issues -chris
+		 */
+		/* sanity checking before doing the rename - avoid races many
+		 ** of the above checks could have scheduled.  We have to be
+		 ** sure our items haven't been shifted by another process.
+		 */
+		if (item_moved(&new_entry_ih, &new_entry_path) ||
+		    !entry_points_to_object(new_dentry->d_name.name,
+					    new_dentry->d_name.len,
+					    &new_de, new_dentry_inode) ||
+		    item_moved(&old_entry_ih, &old_entry_path) ||
+		    !entry_points_to_object(old_dentry->d_name.name,
+					    old_dentry->d_name.len,
+					    &old_de, old_inode)) {
+			reiserfs_restore_prepared_buffer(old_inode->i_sb,
+							 new_de.de_bh);
+			reiserfs_restore_prepared_buffer(old_inode->i_sb,
+							 old_de.de_bh);
+			if (S_ISDIR(old_inode_mode))
+				reiserfs_restore_prepared_buffer(old_inode->
+								 i_sb,
+								 dot_dot_de.
+								 de_bh);
+			continue;
+		}
+		if (S_ISDIR(old_inode_mode)) {
+			if (item_moved(&dot_dot_ih, &dot_dot_entry_path) ||
+			    !entry_points_to_object("..", 2, &dot_dot_de,
+						    old_dir)) {
+				reiserfs_restore_prepared_buffer(old_inode->
+								 i_sb,
+								 old_de.de_bh);
+				reiserfs_restore_prepared_buffer(old_inode->
+								 i_sb,
+								 new_de.de_bh);
+				reiserfs_restore_prepared_buffer(old_inode->
+								 i_sb,
+								 dot_dot_de.
+								 de_bh);
+				continue;
+			}
+		}
+
+		RFALSE(S_ISDIR(old_inode_mode) &&
+		       !buffer_journal_prepared(dot_dot_de.de_bh), "");
+
+		break;
+	}
+
+	/* ok, all the changes can be done in one fell swoop when we
+	   have claimed all the buffers needed. */
+
+	mark_de_visible(new_de.de_deh + new_de.de_entry_num);
+	set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode));
+	journal_mark_dirty(&th, old_dir->i_sb, new_de.de_bh);
+
+	mark_de_hidden(old_de.de_deh + old_de.de_entry_num);
+	journal_mark_dirty(&th, old_dir->i_sb, old_de.de_bh);
+	ctime = CURRENT_TIME_SEC;
+	old_dir->i_ctime = old_dir->i_mtime = ctime;
+	new_dir->i_ctime = new_dir->i_mtime = ctime;
+	/* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of
+	   renamed object */
+	old_inode->i_ctime = ctime;
+
+	if (new_dentry_inode) {
+		// adjust link number of the victim
+		if (S_ISDIR(new_dentry_inode->i_mode)) {
+			clear_nlink(new_dentry_inode);
+		} else {
+			drop_nlink(new_dentry_inode);
+		}
+		new_dentry_inode->i_ctime = ctime;
+		savelink = new_dentry_inode->i_nlink;
+	}
+
+	if (S_ISDIR(old_inode_mode)) {
+		/* adjust ".." of renamed directory */
+		set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir));
+		journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh);
+
+		if (!new_dentry_inode)
+			/* there (in new_dir) was no directory, so it got new link
+			   (".."  of renamed directory) */
+			INC_DIR_INODE_NLINK(new_dir);
+
+		/* old directory lost one link - ".. " of renamed directory */
+		DEC_DIR_INODE_NLINK(old_dir);
+	}
+	// looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse
+	pathrelse(&new_entry_path);
+	pathrelse(&dot_dot_entry_path);
+
+	// FIXME: this reiserfs_cut_from_item's return value may screw up
+	// anybody, but it will panic if will not be able to find the
+	// entry. This needs one more clean up
+	if (reiserfs_cut_from_item
+	    (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL,
+	     0) < 0)
+		reiserfs_error(old_dir->i_sb, "vs-7060",
+			       "couldn't not cut old name. Fsck later?");
+
+	old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
+
+	reiserfs_update_sd(&th, old_dir);
+	reiserfs_update_sd(&th, new_dir);
+	reiserfs_update_sd(&th, old_inode);
+
+	if (new_dentry_inode) {
+		if (savelink == 0)
+			add_save_link(&th, new_dentry_inode,
+				      0 /* not truncate */ );
+		reiserfs_update_sd(&th, new_dentry_inode);
+	}
+
+	retval = journal_end(&th, old_dir->i_sb, jbegin_count);
+	reiserfs_write_unlock(old_dir->i_sb);
+	return retval;
+}
+
+/*
+ * directories can handle most operations...
+ */
+const struct inode_operations reiserfs_dir_inode_operations = {
+	//&reiserfs_dir_operations,   /* default_file_ops */
+	.create = reiserfs_create,
+	.lookup = reiserfs_lookup,
+	.link = reiserfs_link,
+	.unlink = reiserfs_unlink,
+	.symlink = reiserfs_symlink,
+	.mkdir = reiserfs_mkdir,
+	.rmdir = reiserfs_rmdir,
+	.mknod = reiserfs_mknod,
+	.rename = reiserfs_rename,
+	.setattr = reiserfs_setattr,
+	.setxattr = reiserfs_setxattr,
+	.getxattr = reiserfs_getxattr,
+	.listxattr = reiserfs_listxattr,
+	.removexattr = reiserfs_removexattr,
+	.permission = reiserfs_permission,
+	.get_acl = reiserfs_get_acl,
+};
+
+/*
+ * symlink operations.. same as page_symlink_inode_operations, with xattr
+ * stuff added
+ */
+const struct inode_operations reiserfs_symlink_inode_operations = {
+	.readlink = generic_readlink,
+	.follow_link = page_follow_link_light,
+	.put_link = page_put_link,
+	.setattr = reiserfs_setattr,
+	.setxattr = reiserfs_setxattr,
+	.getxattr = reiserfs_getxattr,
+	.listxattr = reiserfs_listxattr,
+	.removexattr = reiserfs_removexattr,
+	.permission = reiserfs_permission,
+	.get_acl = reiserfs_get_acl,
+
+};
+
+/*
+ * special file operations.. just xattr/acl stuff
+ */
+const struct inode_operations reiserfs_special_inode_operations = {
+	.setattr = reiserfs_setattr,
+	.setxattr = reiserfs_setxattr,
+	.getxattr = reiserfs_getxattr,
+	.listxattr = reiserfs_listxattr,
+	.removexattr = reiserfs_removexattr,
+	.permission = reiserfs_permission,
+	.get_acl = reiserfs_get_acl,
+};

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/objectid.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/objectid.c
new file mode 100644
index 0000000..f732d6a
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/objectid.c

@@ -0,0 +1,202 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/time.h>
+#include "reiserfs.h"
+
+// find where objectid map starts
+#define objectid_map(s,rs) (old_format_only (s) ? \
+                         (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
+			 (__le32 *)((rs) + 1))
+
+#ifdef CONFIG_REISERFS_CHECK
+
+static void check_objectid_map(struct super_block *s, __le32 * map)
+{
+	if (le32_to_cpu(map[0]) != 1)
+		reiserfs_panic(s, "vs-15010", "map corrupted: %lx",
+			       (long unsigned int)le32_to_cpu(map[0]));
+
+	// FIXME: add something else here
+}
+
+#else
+static void check_objectid_map(struct super_block *s, __le32 * map)
+{;
+}
+#endif
+
+/* When we allocate objectids we allocate the first unused objectid.
+   Each sequence of objectids in use (the odd sequences) is followed
+   by a sequence of objectids not in use (the even sequences).  We
+   only need to record the last objectid in each of these sequences
+   (both the odd and even sequences) in order to fully define the
+   boundaries of the sequences.  A consequence of allocating the first
+   objectid not in use is that under most conditions this scheme is
+   extremely compact.  The exception is immediately after a sequence
+   of operations which deletes a large number of objects of
+   non-sequential objectids, and even then it will become compact
+   again as soon as more objects are created.  Note that many
+   interesting optimizations of layout could result from complicating
+   objectid assignment, but we have deferred making them for now. */
+
+/* get unique object identifier */
+__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
+{
+	struct super_block *s = th->t_super;
+	struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
+	__le32 *map = objectid_map(s, rs);
+	__u32 unused_objectid;
+
+	BUG_ON(!th->t_trans_id);
+
+	check_objectid_map(s, map);
+
+	reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
+	/* comment needed -Hans */
+	unused_objectid = le32_to_cpu(map[1]);
+	if (unused_objectid == U32_MAX) {
+		reiserfs_warning(s, "reiserfs-15100", "no more object ids");
+		reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s));
+		return 0;
+	}
+
+	/* This incrementation allocates the first unused objectid. That
+	   is to say, the first entry on the objectid map is the first
+	   unused objectid, and by incrementing it we use it.  See below
+	   where we check to see if we eliminated a sequence of unused
+	   objectids.... */
+	map[1] = cpu_to_le32(unused_objectid + 1);
+
+	/* Now we check to see if we eliminated the last remaining member of
+	   the first even sequence (and can eliminate the sequence by
+	   eliminating its last objectid from oids), and can collapse the
+	   first two odd sequences into one sequence.  If so, then the net
+	   result is to eliminate a pair of objectids from oids.  We do this
+	   by shifting the entire map to the left. */
+	if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) {
+		memmove(map + 1, map + 3,
+			(sb_oid_cursize(rs) - 3) * sizeof(__u32));
+		set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
+	}
+
+	journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
+	return unused_objectid;
+}
+
+/* makes object identifier unused */
+void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
+			       __u32 objectid_to_release)
+{
+	struct super_block *s = th->t_super;
+	struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
+	__le32 *map = objectid_map(s, rs);
+	int i = 0;
+
+	BUG_ON(!th->t_trans_id);
+	//return;
+	check_objectid_map(s, map);
+
+	reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
+	journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
+
+	/* start at the beginning of the objectid map (i = 0) and go to
+	   the end of it (i = disk_sb->s_oid_cursize).  Linear search is
+	   what we use, though it is possible that binary search would be
+	   more efficient after performing lots of deletions (which is
+	   when oids is large.)  We only check even i's. */
+	while (i < sb_oid_cursize(rs)) {
+		if (objectid_to_release == le32_to_cpu(map[i])) {
+			/* This incrementation unallocates the objectid. */
+			//map[i]++;
+			le32_add_cpu(&map[i], 1);
+
+			/* Did we unallocate the last member of an odd sequence, and can shrink oids? */
+			if (map[i] == map[i + 1]) {
+				/* shrink objectid map */
+				memmove(map + i, map + i + 2,
+					(sb_oid_cursize(rs) - i -
+					 2) * sizeof(__u32));
+				//disk_sb->s_oid_cursize -= 2;
+				set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
+
+				RFALSE(sb_oid_cursize(rs) < 2 ||
+				       sb_oid_cursize(rs) > sb_oid_maxsize(rs),
+				       "vs-15005: objectid map corrupted cur_size == %d (max == %d)",
+				       sb_oid_cursize(rs), sb_oid_maxsize(rs));
+			}
+			return;
+		}
+
+		if (objectid_to_release > le32_to_cpu(map[i]) &&
+		    objectid_to_release < le32_to_cpu(map[i + 1])) {
+			/* size of objectid map is not changed */
+			if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) {
+				//objectid_map[i+1]--;
+				le32_add_cpu(&map[i + 1], -1);
+				return;
+			}
+
+			/* JDM comparing two little-endian values for equality -- safe */
+			if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
+				/* objectid map must be expanded, but there is no space */
+				PROC_INFO_INC(s, leaked_oid);
+				return;
+			}
+
+			/* expand the objectid map */
+			memmove(map + i + 3, map + i + 1,
+				(sb_oid_cursize(rs) - i - 1) * sizeof(__u32));
+			map[i + 1] = cpu_to_le32(objectid_to_release);
+			map[i + 2] = cpu_to_le32(objectid_to_release + 1);
+			set_sb_oid_cursize(rs, sb_oid_cursize(rs) + 2);
+			return;
+		}
+		i += 2;
+	}
+
+	reiserfs_error(s, "vs-15011", "tried to free free object id (%lu)",
+		       (long unsigned)objectid_to_release);
+}
+
+int reiserfs_convert_objectid_map_v1(struct super_block *s)
+{
+	struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK(s);
+	int cur_size = sb_oid_cursize(disk_sb);
+	int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2;
+	int old_max = sb_oid_maxsize(disk_sb);
+	struct reiserfs_super_block_v1 *disk_sb_v1;
+	__le32 *objectid_map, *new_objectid_map;
+	int i;
+
+	disk_sb_v1 =
+	    (struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
+	objectid_map = (__le32 *) (disk_sb_v1 + 1);
+	new_objectid_map = (__le32 *) (disk_sb + 1);
+
+	if (cur_size > new_size) {
+		/* mark everyone used that was listed as free at the end of the objectid
+		 ** map
+		 */
+		objectid_map[new_size - 1] = objectid_map[cur_size - 1];
+		set_sb_oid_cursize(disk_sb, new_size);
+	}
+	/* move the smaller objectid map past the end of the new super */
+	for (i = new_size - 1; i >= 0; i--) {
+		objectid_map[i + (old_max - new_size)] = objectid_map[i];
+	}
+
+	/* set the max size so we don't overflow later */
+	set_sb_oid_maxsize(disk_sb, new_size);
+
+	/* Zero out label and generate random UUID */
+	memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label));
+	generate_random_uuid(disk_sb->s_uuid);
+
+	/* finally, zero out the unused chunk of the new super */
+	memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused));
+	return 0;
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/prints.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/prints.c
new file mode 100644
index 0000000..c0b1112
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/prints.c

@@ -0,0 +1,768 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include "reiserfs.h"
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+
+#include <stdarg.h>
+
+static char error_buf[1024];
+static char fmt_buf[1024];
+static char off_buf[80];
+
+static char *reiserfs_cpu_offset(struct cpu_key *key)
+{
+	if (cpu_key_k_type(key) == TYPE_DIRENTRY)
+		sprintf(off_buf, "%Lu(%Lu)",
+			(unsigned long long)
+			GET_HASH_VALUE(cpu_key_k_offset(key)),
+			(unsigned long long)
+			GET_GENERATION_NUMBER(cpu_key_k_offset(key)));
+	else
+		sprintf(off_buf, "0x%Lx",
+			(unsigned long long)cpu_key_k_offset(key));
+	return off_buf;
+}
+
+static char *le_offset(struct reiserfs_key *key)
+{
+	int version;
+
+	version = le_key_version(key);
+	if (le_key_k_type(version, key) == TYPE_DIRENTRY)
+		sprintf(off_buf, "%Lu(%Lu)",
+			(unsigned long long)
+			GET_HASH_VALUE(le_key_k_offset(version, key)),
+			(unsigned long long)
+			GET_GENERATION_NUMBER(le_key_k_offset(version, key)));
+	else
+		sprintf(off_buf, "0x%Lx",
+			(unsigned long long)le_key_k_offset(version, key));
+	return off_buf;
+}
+
+static char *cpu_type(struct cpu_key *key)
+{
+	if (cpu_key_k_type(key) == TYPE_STAT_DATA)
+		return "SD";
+	if (cpu_key_k_type(key) == TYPE_DIRENTRY)
+		return "DIR";
+	if (cpu_key_k_type(key) == TYPE_DIRECT)
+		return "DIRECT";
+	if (cpu_key_k_type(key) == TYPE_INDIRECT)
+		return "IND";
+	return "UNKNOWN";
+}
+
+static char *le_type(struct reiserfs_key *key)
+{
+	int version;
+
+	version = le_key_version(key);
+
+	if (le_key_k_type(version, key) == TYPE_STAT_DATA)
+		return "SD";
+	if (le_key_k_type(version, key) == TYPE_DIRENTRY)
+		return "DIR";
+	if (le_key_k_type(version, key) == TYPE_DIRECT)
+		return "DIRECT";
+	if (le_key_k_type(version, key) == TYPE_INDIRECT)
+		return "IND";
+	return "UNKNOWN";
+}
+
+/* %k */
+static void sprintf_le_key(char *buf, struct reiserfs_key *key)
+{
+	if (key)
+		sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id),
+			le32_to_cpu(key->k_objectid), le_offset(key),
+			le_type(key));
+	else
+		sprintf(buf, "[NULL]");
+}
+
+/* %K */
+static void sprintf_cpu_key(char *buf, struct cpu_key *key)
+{
+	if (key)
+		sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id,
+			key->on_disk_key.k_objectid, reiserfs_cpu_offset(key),
+			cpu_type(key));
+	else
+		sprintf(buf, "[NULL]");
+}
+
+static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh)
+{
+	if (deh)
+		sprintf(buf,
+			"[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
+			deh_offset(deh), deh_dir_id(deh), deh_objectid(deh),
+			deh_location(deh), deh_state(deh));
+	else
+		sprintf(buf, "[NULL]");
+
+}
+
+static void sprintf_item_head(char *buf, struct item_head *ih)
+{
+	if (ih) {
+		strcpy(buf,
+		       (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*");
+		sprintf_le_key(buf + strlen(buf), &(ih->ih_key));
+		sprintf(buf + strlen(buf), ", item_len %d, item_location %d, "
+			"free_space(entry_count) %d",
+			ih_item_len(ih), ih_location(ih), ih_free_space(ih));
+	} else
+		sprintf(buf, "[NULL]");
+}
+
+static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de)
+{
+	char name[20];
+
+	memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen);
+	name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0;
+	sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid);
+}
+
+static void sprintf_block_head(char *buf, struct buffer_head *bh)
+{
+	sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ",
+		B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
+}
+
+static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
+{
+	char b[BDEVNAME_SIZE];
+
+	sprintf(buf,
+		"dev %s, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
+		bdevname(bh->b_bdev, b), bh->b_size,
+		(unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
+		bh->b_state, bh->b_page,
+		buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
+		buffer_dirty(bh) ? "DIRTY" : "CLEAN",
+		buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
+}
+
+static void sprintf_disk_child(char *buf, struct disk_child *dc)
+{
+	sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc),
+		dc_size(dc));
+}
+
+static char *is_there_reiserfs_struct(char *fmt, int *what)
+{
+	char *k = fmt;
+
+	while ((k = strchr(k, '%')) != NULL) {
+		if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
+		    k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') {
+			*what = k[1];
+			break;
+		}
+		k++;
+	}
+	return k;
+}
+
+/* debugging reiserfs we used to print out a lot of different
+   variables, like keys, item headers, buffer heads etc. Values of
+   most fields matter. So it took a long time just to write
+   appropriative printk. With this reiserfs_warning you can use format
+   specification for complex structures like you used to do with
+   printfs for integers, doubles and pointers. For instance, to print
+   out key structure you have to write just:
+   reiserfs_warning ("bad key %k", key);
+   instead of
+   printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid,
+           key->k_offset, key->k_uniqueness);
+*/
+static DEFINE_SPINLOCK(error_lock);
+static void prepare_error_buf(const char *fmt, va_list args)
+{
+	char *fmt1 = fmt_buf;
+	char *k;
+	char *p = error_buf;
+	int what;
+
+	spin_lock(&error_lock);
+
+	strcpy(fmt1, fmt);
+
+	while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) {
+		*k = 0;
+
+		p += vsprintf(p, fmt1, args);
+
+		switch (what) {
+		case 'k':
+			sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
+			break;
+		case 'K':
+			sprintf_cpu_key(p, va_arg(args, struct cpu_key *));
+			break;
+		case 'h':
+			sprintf_item_head(p, va_arg(args, struct item_head *));
+			break;
+		case 't':
+			sprintf_direntry(p,
+					 va_arg(args,
+						struct reiserfs_dir_entry *));
+			break;
+		case 'y':
+			sprintf_disk_child(p,
+					   va_arg(args, struct disk_child *));
+			break;
+		case 'z':
+			sprintf_block_head(p,
+					   va_arg(args, struct buffer_head *));
+			break;
+		case 'b':
+			sprintf_buffer_head(p,
+					    va_arg(args, struct buffer_head *));
+			break;
+		case 'a':
+			sprintf_de_head(p,
+					va_arg(args,
+					       struct reiserfs_de_head *));
+			break;
+		}
+
+		p += strlen(p);
+		fmt1 = k + 2;
+	}
+	vsprintf(p, fmt1, args);
+	spin_unlock(&error_lock);
+
+}
+
+/* in addition to usual conversion specifiers this accepts reiserfs
+   specific conversion specifiers:
+   %k to print little endian key,
+   %K to print cpu key,
+   %h to print item_head,
+   %t to print directory entry
+   %z to print block head (arg must be struct buffer_head *
+   %b to print buffer_head
+*/
+
+#define do_reiserfs_warning(fmt)\
+{\
+    va_list args;\
+    va_start( args, fmt );\
+    prepare_error_buf( fmt, args );\
+    va_end( args );\
+}
+
+void __reiserfs_warning(struct super_block *sb, const char *id,
+			 const char *function, const char *fmt, ...)
+{
+	do_reiserfs_warning(fmt);
+	if (sb)
+		printk(KERN_WARNING "REISERFS warning (device %s): %s%s%s: "
+		       "%s\n", sb->s_id, id ? id : "", id ? " " : "",
+		       function, error_buf);
+	else
+		printk(KERN_WARNING "REISERFS warning: %s%s%s: %s\n",
+		       id ? id : "", id ? " " : "", function, error_buf);
+}
+
+/* No newline.. reiserfs_info calls can be followed by printk's */
+void reiserfs_info(struct super_block *sb, const char *fmt, ...)
+{
+	do_reiserfs_warning(fmt);
+	if (sb)
+		printk(KERN_NOTICE "REISERFS (device %s): %s",
+		       sb->s_id, error_buf);
+	else
+		printk(KERN_NOTICE "REISERFS %s:", error_buf);
+}
+
+/* No newline.. reiserfs_printk calls can be followed by printk's */
+static void reiserfs_printk(const char *fmt, ...)
+{
+	do_reiserfs_warning(fmt);
+	printk(error_buf);
+}
+
+void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
+{
+#ifdef CONFIG_REISERFS_CHECK
+	do_reiserfs_warning(fmt);
+	if (s)
+		printk(KERN_DEBUG "REISERFS debug (device %s): %s\n",
+		       s->s_id, error_buf);
+	else
+		printk(KERN_DEBUG "REISERFS debug: %s\n", error_buf);
+#endif
+}
+
+/* The format:
+
+           maintainer-errorid: [function-name:] message
+
+    where errorid is unique to the maintainer and function-name is
+    optional, is recommended, so that anyone can easily find the bug
+    with a simple grep for the short to type string
+    maintainer-errorid.  Don't bother with reusing errorids, there are
+    lots of numbers out there.
+
+    Example:
+
+    reiserfs_panic(
+	p_sb, "reiser-29: reiserfs_new_blocknrs: "
+	"one of search_start or rn(%d) is equal to MAX_B_NUM,"
+	"which means that we are optimizing location based on the bogus location of a temp buffer (%p).",
+	rn, bh
+    );
+
+    Regular panic()s sometimes clear the screen before the message can
+    be read, thus the need for the while loop.
+
+    Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
+    pointless complexity):
+
+    panics in reiserfs.h have numbers from 1000 to 1999
+    super.c				        2000 to 2999
+    preserve.c (unused)			    3000 to 3999
+    bitmap.c				    4000 to 4999
+    stree.c				        5000 to 5999
+    prints.c				    6000 to 6999
+    namei.c                     7000 to 7999
+    fix_nodes.c                 8000 to 8999
+    dir.c                       9000 to 9999
+	lbalance.c					10000 to 10999
+	ibalance.c		11000 to 11999 not ready
+	do_balan.c		12000 to 12999
+	inode.c			13000 to 13999
+	file.c			14000 to 14999
+    objectid.c                       15000 - 15999
+    buffer.c                         16000 - 16999
+    symlink.c                        17000 - 17999
+
+   .  */
+
+void __reiserfs_panic(struct super_block *sb, const char *id,
+		      const char *function, const char *fmt, ...)
+{
+	do_reiserfs_warning(fmt);
+
+#ifdef CONFIG_REISERFS_CHECK
+	dump_stack();
+#endif
+	if (sb)
+		panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
+		      sb->s_id, id ? id : "", id ? " " : "",
+		      function, error_buf);
+	else
+		panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
+		      id ? id : "", id ? " " : "", function, error_buf);
+}
+
+void __reiserfs_error(struct super_block *sb, const char *id,
+		      const char *function, const char *fmt, ...)
+{
+	do_reiserfs_warning(fmt);
+
+	BUG_ON(sb == NULL);
+
+	if (reiserfs_error_panic(sb))
+		__reiserfs_panic(sb, id, function, error_buf);
+
+	if (id && id[0])
+		printk(KERN_CRIT "REISERFS error (device %s): %s %s: %s\n",
+		       sb->s_id, id, function, error_buf);
+	else
+		printk(KERN_CRIT "REISERFS error (device %s): %s: %s\n",
+		       sb->s_id, function, error_buf);
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	reiserfs_info(sb, "Remounting filesystem read-only\n");
+	sb->s_flags |= MS_RDONLY;
+	reiserfs_abort_journal(sb, -EIO);
+}
+
+void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
+{
+	do_reiserfs_warning(fmt);
+
+	if (reiserfs_error_panic(sb)) {
+		panic(KERN_CRIT "REISERFS panic (device %s): %s\n", sb->s_id,
+		      error_buf);
+	}
+
+	if (reiserfs_is_journal_aborted(SB_JOURNAL(sb)))
+		return;
+
+	printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id,
+	       error_buf);
+
+	sb->s_flags |= MS_RDONLY;
+	reiserfs_abort_journal(sb, errno);
+}
+
+/* this prints internal nodes (4 keys/items in line) (dc_number,
+   dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number,
+   dc_size)...*/
+static int print_internal(struct buffer_head *bh, int first, int last)
+{
+	struct reiserfs_key *key;
+	struct disk_child *dc;
+	int i;
+	int from, to;
+
+	if (!B_IS_KEYS_LEVEL(bh))
+		return 1;
+
+	check_internal(bh);
+
+	if (first == -1) {
+		from = 0;
+		to = B_NR_ITEMS(bh);
+	} else {
+		from = first;
+		to = last < B_NR_ITEMS(bh) ? last : B_NR_ITEMS(bh);
+	}
+
+	reiserfs_printk("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh);
+
+	dc = B_N_CHILD(bh, from);
+	reiserfs_printk("PTR %d: %y ", from, dc);
+
+	for (i = from, key = B_N_PDELIM_KEY(bh, from), dc++; i < to;
+	     i++, key++, dc++) {
+		reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc);
+		if (i && i % 4 == 0)
+			printk("\n");
+	}
+	printk("\n");
+	return 0;
+}
+
+static int print_leaf(struct buffer_head *bh, int print_mode, int first,
+		      int last)
+{
+	struct block_head *blkh;
+	struct item_head *ih;
+	int i, nr;
+	int from, to;
+
+	if (!B_IS_ITEMS_LEVEL(bh))
+		return 1;
+
+	check_leaf(bh);
+
+	blkh = B_BLK_HEAD(bh);
+	ih = B_N_PITEM_HEAD(bh, 0);
+	nr = blkh_nr_item(blkh);
+
+	printk
+	    ("\n===================================================================\n");
+	reiserfs_printk("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh);
+
+	if (!(print_mode & PRINT_LEAF_ITEMS)) {
+		reiserfs_printk("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n",
+				&(ih->ih_key), &((ih + nr - 1)->ih_key));
+		return 0;
+	}
+
+	if (first < 0 || first > nr - 1)
+		from = 0;
+	else
+		from = first;
+
+	if (last < 0 || last > nr)
+		to = nr;
+	else
+		to = last;
+
+	ih += from;
+	printk
+	    ("-------------------------------------------------------------------------------\n");
+	printk
+	    ("|##|   type    |           key           | ilen | free_space | version | loc  |\n");
+	for (i = from; i < to; i++, ih++) {
+		printk
+		    ("-------------------------------------------------------------------------------\n");
+		reiserfs_printk("|%2d| %h |\n", i, ih);
+		if (print_mode & PRINT_LEAF_ITEMS)
+			op_print_item(ih, B_I_PITEM(bh, ih));
+	}
+
+	printk
+	    ("===================================================================\n");
+
+	return 0;
+}
+
+char *reiserfs_hashname(int code)
+{
+	if (code == YURA_HASH)
+		return "rupasov";
+	if (code == TEA_HASH)
+		return "tea";
+	if (code == R5_HASH)
+		return "r5";
+
+	return "unknown";
+}
+
+/* return 1 if this is not super block */
+static int print_super_block(struct buffer_head *bh)
+{
+	struct reiserfs_super_block *rs =
+	    (struct reiserfs_super_block *)(bh->b_data);
+	int skipped, data_blocks;
+	char *version;
+	char b[BDEVNAME_SIZE];
+
+	if (is_reiserfs_3_5(rs)) {
+		version = "3.5";
+	} else if (is_reiserfs_3_6(rs)) {
+		version = "3.6";
+	} else if (is_reiserfs_jr(rs)) {
+		version = ((sb_version(rs) == REISERFS_VERSION_2) ?
+			   "3.6" : "3.5");
+	} else {
+		return 1;
+	}
+
+	printk("%s\'s super block is in block %llu\n", bdevname(bh->b_bdev, b),
+	       (unsigned long long)bh->b_blocknr);
+	printk("Reiserfs version %s\n", version);
+	printk("Block count %u\n", sb_block_count(rs));
+	printk("Blocksize %d\n", sb_blocksize(rs));
+	printk("Free blocks %u\n", sb_free_blocks(rs));
+	// FIXME: this would be confusing if
+	// someone stores reiserfs super block in some data block ;)
+//    skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs);
+	skipped = bh->b_blocknr;
+	data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) -
+	    (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) +
+	     1 : sb_reserved_for_journal(rs)) - sb_free_blocks(rs);
+	printk
+	    ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n"
+	     "1 super block, %d data blocks\n", skipped, sb_bmap_nr(rs),
+	     (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) :
+	      sb_reserved_for_journal(rs)), data_blocks);
+	printk("Root block %u\n", sb_root_block(rs));
+	printk("Journal block (first) %d\n", sb_jp_journal_1st_block(rs));
+	printk("Journal dev %d\n", sb_jp_journal_dev(rs));
+	printk("Journal orig size %d\n", sb_jp_journal_size(rs));
+	printk("FS state %d\n", sb_fs_state(rs));
+	printk("Hash function \"%s\"\n",
+	       reiserfs_hashname(sb_hash_function_code(rs)));
+
+	printk("Tree height %d\n", sb_tree_height(rs));
+	return 0;
+}
+
+static int print_desc_block(struct buffer_head *bh)
+{
+	struct reiserfs_journal_desc *desc;
+
+	if (memcmp(get_journal_desc_magic(bh), JOURNAL_DESC_MAGIC, 8))
+		return 1;
+
+	desc = (struct reiserfs_journal_desc *)(bh->b_data);
+	printk("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)",
+	       (unsigned long long)bh->b_blocknr, get_desc_trans_id(desc),
+	       get_desc_mount_id(desc), get_desc_trans_len(desc));
+
+	return 0;
+}
+
+void print_block(struct buffer_head *bh, ...)	//int print_mode, int first, int last)
+{
+	va_list args;
+	int mode, first, last;
+
+	if (!bh) {
+		printk("print_block: buffer is NULL\n");
+		return;
+	}
+
+	va_start(args, bh);
+
+	mode = va_arg(args, int);
+	first = va_arg(args, int);
+	last = va_arg(args, int);
+	if (print_leaf(bh, mode, first, last))
+		if (print_internal(bh, first, last))
+			if (print_super_block(bh))
+				if (print_desc_block(bh))
+					printk
+					    ("Block %llu contains unformatted data\n",
+					     (unsigned long long)bh->b_blocknr);
+
+	va_end(args);
+}
+
+static char print_tb_buf[2048];
+
+/* this stores initial state of tree balance in the print_tb_buf */
+void store_print_tb(struct tree_balance *tb)
+{
+	int h = 0;
+	int i;
+	struct buffer_head *tbSh, *tbFh;
+
+	if (!tb)
+		return;
+
+	sprintf(print_tb_buf, "\n"
+		"BALANCING %d\n"
+		"MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n"
+		"=====================================================================\n"
+		"* h *    S    *    L    *    R    *   F   *   FL  *   FR  *  CFL  *  CFR  *\n",
+		REISERFS_SB(tb->tb_sb)->s_do_balance,
+		tb->tb_mode, PATH_LAST_POSITION(tb->tb_path),
+		tb->tb_path->pos_in_item);
+
+	for (h = 0; h < ARRAY_SIZE(tb->insert_size); h++) {
+		if (PATH_H_PATH_OFFSET(tb->tb_path, h) <=
+		    tb->tb_path->path_length
+		    && PATH_H_PATH_OFFSET(tb->tb_path,
+					  h) > ILLEGAL_PATH_ELEMENT_OFFSET) {
+			tbSh = PATH_H_PBUFFER(tb->tb_path, h);
+			tbFh = PATH_H_PPARENT(tb->tb_path, h);
+		} else {
+			tbSh = NULL;
+			tbFh = NULL;
+		}
+		sprintf(print_tb_buf + strlen(print_tb_buf),
+			"* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n",
+			h,
+			(tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL),
+			(tbSh) ? atomic_read(&(tbSh->b_count)) : -1,
+			(tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL),
+			(tb->L[h]) ? atomic_read(&(tb->L[h]->b_count)) : -1,
+			(tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL),
+			(tb->R[h]) ? atomic_read(&(tb->R[h]->b_count)) : -1,
+			(tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL),
+			(tb->FL[h]) ? (long long)(tb->FL[h]->
+						  b_blocknr) : (-1LL),
+			(tb->FR[h]) ? (long long)(tb->FR[h]->
+						  b_blocknr) : (-1LL),
+			(tb->CFL[h]) ? (long long)(tb->CFL[h]->
+						   b_blocknr) : (-1LL),
+			(tb->CFR[h]) ? (long long)(tb->CFR[h]->
+						   b_blocknr) : (-1LL));
+	}
+
+	sprintf(print_tb_buf + strlen(print_tb_buf),
+		"=====================================================================\n"
+		"* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n"
+		"* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n",
+		tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],
+		tb->rbytes, tb->blknum[0], tb->s0num, tb->s1num, tb->s1bytes,
+		tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0],
+		tb->rkey[0]);
+
+	/* this prints balance parameters for non-leaf levels */
+	h = 0;
+	do {
+		h++;
+		sprintf(print_tb_buf + strlen(print_tb_buf),
+			"* %d * %4d * %2d *    * %2d *    * %2d *\n",
+			h, tb->insert_size[h], tb->lnum[h], tb->rnum[h],
+			tb->blknum[h]);
+	} while (tb->insert_size[h]);
+
+	sprintf(print_tb_buf + strlen(print_tb_buf),
+		"=====================================================================\n"
+		"FEB list: ");
+
+	/* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */
+	h = 0;
+	for (i = 0; i < ARRAY_SIZE(tb->FEB); i++)
+		sprintf(print_tb_buf + strlen(print_tb_buf),
+			"%p (%llu %d)%s", tb->FEB[i],
+			tb->FEB[i] ? (unsigned long long)tb->FEB[i]->
+			b_blocknr : 0ULL,
+			tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0,
+			(i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", ");
+
+	sprintf(print_tb_buf + strlen(print_tb_buf),
+		"======================== the end ====================================\n");
+}
+
+void print_cur_tb(char *mes)
+{
+	printk("%s\n%s", mes, print_tb_buf);
+}
+
+static void check_leaf_block_head(struct buffer_head *bh)
+{
+	struct block_head *blkh;
+	int nr;
+
+	blkh = B_BLK_HEAD(bh);
+	nr = blkh_nr_item(blkh);
+	if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE)
+		reiserfs_panic(NULL, "vs-6010", "invalid item number %z",
+			       bh);
+	if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr)
+		reiserfs_panic(NULL, "vs-6020", "invalid free space %z",
+			       bh);
+
+}
+
+static void check_internal_block_head(struct buffer_head *bh)
+{
+	struct block_head *blkh;
+
+	blkh = B_BLK_HEAD(bh);
+	if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT))
+		reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh);
+
+	if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE)
+		reiserfs_panic(NULL, "vs-6030", "invalid item number %z", bh);
+
+	if (B_FREE_SPACE(bh) !=
+	    bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) -
+	    DC_SIZE * (B_NR_ITEMS(bh) + 1))
+		reiserfs_panic(NULL, "vs-6040", "invalid free space %z", bh);
+
+}
+
+void check_leaf(struct buffer_head *bh)
+{
+	int i;
+	struct item_head *ih;
+
+	if (!bh)
+		return;
+	check_leaf_block_head(bh);
+	for (i = 0, ih = B_N_PITEM_HEAD(bh, 0); i < B_NR_ITEMS(bh); i++, ih++)
+		op_check_item(ih, B_I_PITEM(bh, ih));
+}
+
+void check_internal(struct buffer_head *bh)
+{
+	if (!bh)
+		return;
+	check_internal_block_head(bh);
+}
+
+void print_statistics(struct super_block *s)
+{
+
+	/*
+	   printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \
+	   bmap with search %d, without %d, dir2ind %d, ind2dir %d\n",
+	   REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes,
+	   REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search,
+	   REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct);
+	 */
+
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/procfs.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/procfs.c
new file mode 100644
index 0000000..2c1ade6
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/procfs.c

@@ -0,0 +1,575 @@
+/* -*- linux-c -*- */
+
+/* fs/reiserfs/procfs.c */
+
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+/* proc info support a la one created by Sizif@Botik.RU for PGC */
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include "reiserfs.h"
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+
+/*
+ * LOCKING:
+ *
+ * We rely on new Alexander Viro's super-block locking.
+ *
+ */
+
+static int show_version(struct seq_file *m, struct super_block *sb)
+{
+	char *format;
+
+	if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) {
+		format = "3.6";
+	} else if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5)) {
+		format = "3.5";
+	} else {
+		format = "unknown";
+	}
+
+	seq_printf(m, "%s format\twith checks %s\n", format,
+#if defined( CONFIG_REISERFS_CHECK )
+		   "on"
+#else
+		   "off"
+#endif
+	    );
+	return 0;
+}
+
+#define SF( x ) ( r -> x )
+#define SFP( x ) SF( s_proc_info_data.x )
+#define SFPL( x ) SFP( x[ level ] )
+#define SFPF( x ) SFP( scan_bitmap.x )
+#define SFPJ( x ) SFP( journal.x )
+
+#define D2C( x ) le16_to_cpu( x )
+#define D4C( x ) le32_to_cpu( x )
+#define DF( x ) D2C( rs -> s_v1.x )
+#define DFL( x ) D4C( rs -> s_v1.x )
+
+#define objectid_map( s, rs ) (old_format_only (s) ?				\
+                         (__le32 *)((struct reiserfs_super_block_v1 *)rs + 1) :	\
+			 (__le32 *)(rs + 1))
+#define MAP( i ) D4C( objectid_map( sb, rs )[ i ] )
+
+#define DJF( x ) le32_to_cpu( rs -> x )
+#define DJV( x ) le32_to_cpu( s_v1 -> x )
+#define DJP( x ) le32_to_cpu( jp -> x )
+#define JF( x ) ( r -> s_journal -> x )
+
+static int show_super(struct seq_file *m, struct super_block *sb)
+{
+	struct reiserfs_sb_info *r = REISERFS_SB(sb);
+
+	seq_printf(m, "state: \t%s\n"
+		   "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n"
+		   "gen. counter: \t%i\n"
+		   "s_disk_reads: \t%i\n"
+		   "s_disk_writes: \t%i\n"
+		   "s_fix_nodes: \t%i\n"
+		   "s_do_balance: \t%i\n"
+		   "s_unneeded_left_neighbor: \t%i\n"
+		   "s_good_search_by_key_reada: \t%i\n"
+		   "s_bmaps: \t%i\n"
+		   "s_bmaps_without_search: \t%i\n"
+		   "s_direct2indirect: \t%i\n"
+		   "s_indirect2direct: \t%i\n"
+		   "\n"
+		   "max_hash_collisions: \t%i\n"
+		   "breads: \t%lu\n"
+		   "bread_misses: \t%lu\n"
+		   "search_by_key: \t%lu\n"
+		   "search_by_key_fs_changed: \t%lu\n"
+		   "search_by_key_restarted: \t%lu\n"
+		   "insert_item_restarted: \t%lu\n"
+		   "paste_into_item_restarted: \t%lu\n"
+		   "cut_from_item_restarted: \t%lu\n"
+		   "delete_solid_item_restarted: \t%lu\n"
+		   "delete_item_restarted: \t%lu\n"
+		   "leaked_oid: \t%lu\n"
+		   "leaves_removable: \t%lu\n",
+		   SF(s_mount_state) == REISERFS_VALID_FS ?
+		   "REISERFS_VALID_FS" : "REISERFS_ERROR_FS",
+		   reiserfs_r5_hash(sb) ? "FORCE_R5 " : "",
+		   reiserfs_rupasov_hash(sb) ? "FORCE_RUPASOV " : "",
+		   reiserfs_tea_hash(sb) ? "FORCE_TEA " : "",
+		   reiserfs_hash_detect(sb) ? "DETECT_HASH " : "",
+		   reiserfs_no_border(sb) ? "NO_BORDER " : "BORDER ",
+		   reiserfs_no_unhashed_relocation(sb) ?
+		   "NO_UNHASHED_RELOCATION " : "",
+		   reiserfs_hashed_relocation(sb) ? "UNHASHED_RELOCATION " : "",
+		   reiserfs_test4(sb) ? "TEST4 " : "",
+		   have_large_tails(sb) ? "TAILS " : have_small_tails(sb) ?
+		   "SMALL_TAILS " : "NO_TAILS ",
+		   replay_only(sb) ? "REPLAY_ONLY " : "",
+		   convert_reiserfs(sb) ? "CONV " : "",
+		   atomic_read(&r->s_generation_counter),
+		   SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes),
+		   SF(s_do_balance), SF(s_unneeded_left_neighbor),
+		   SF(s_good_search_by_key_reada), SF(s_bmaps),
+		   SF(s_bmaps_without_search), SF(s_direct2indirect),
+		   SF(s_indirect2direct), SFP(max_hash_collisions), SFP(breads),
+		   SFP(bread_miss), SFP(search_by_key),
+		   SFP(search_by_key_fs_changed), SFP(search_by_key_restarted),
+		   SFP(insert_item_restarted), SFP(paste_into_item_restarted),
+		   SFP(cut_from_item_restarted),
+		   SFP(delete_solid_item_restarted), SFP(delete_item_restarted),
+		   SFP(leaked_oid), SFP(leaves_removable));
+
+	return 0;
+}
+
+static int show_per_level(struct seq_file *m, struct super_block *sb)
+{
+	struct reiserfs_sb_info *r = REISERFS_SB(sb);
+	int level;
+
+	seq_printf(m, "level\t"
+		   "     balances"
+		   " [sbk:  reads"
+		   "   fs_changed"
+		   "   restarted]"
+		   "   free space"
+		   "        items"
+		   "   can_remove"
+		   "         lnum"
+		   "         rnum"
+		   "       lbytes"
+		   "       rbytes"
+		   "     get_neig"
+		   " get_neig_res" "  need_l_neig" "  need_r_neig" "\n");
+
+	for (level = 0; level < MAX_HEIGHT; ++level) {
+		seq_printf(m, "%i\t"
+			   " %12lu"
+			   " %12lu"
+			   " %12lu"
+			   " %12lu"
+			   " %12lu"
+			   " %12lu"
+			   " %12lu"
+			   " %12li"
+			   " %12li"
+			   " %12li"
+			   " %12li"
+			   " %12lu"
+			   " %12lu"
+			   " %12lu"
+			   " %12lu"
+			   "\n",
+			   level,
+			   SFPL(balance_at),
+			   SFPL(sbk_read_at),
+			   SFPL(sbk_fs_changed),
+			   SFPL(sbk_restarted),
+			   SFPL(free_at),
+			   SFPL(items_at),
+			   SFPL(can_node_be_removed),
+			   SFPL(lnum),
+			   SFPL(rnum),
+			   SFPL(lbytes),
+			   SFPL(rbytes),
+			   SFPL(get_neighbors),
+			   SFPL(get_neighbors_restart),
+			   SFPL(need_l_neighbor), SFPL(need_r_neighbor)
+		    );
+	}
+	return 0;
+}
+
+static int show_bitmap(struct seq_file *m, struct super_block *sb)
+{
+	struct reiserfs_sb_info *r = REISERFS_SB(sb);
+
+	seq_printf(m, "free_block: %lu\n"
+		   "  scan_bitmap:"
+		   "          wait"
+		   "          bmap"
+		   "         retry"
+		   "        stolen"
+		   "  journal_hint"
+		   "journal_nohint"
+		   "\n"
+		   " %14lu"
+		   " %14lu"
+		   " %14lu"
+		   " %14lu"
+		   " %14lu"
+		   " %14lu"
+		   " %14lu"
+		   "\n",
+		   SFP(free_block),
+		   SFPF(call),
+		   SFPF(wait),
+		   SFPF(bmap),
+		   SFPF(retry),
+		   SFPF(stolen),
+		   SFPF(in_journal_hint), SFPF(in_journal_nohint));
+
+	return 0;
+}
+
+static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
+{
+	struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
+	struct reiserfs_super_block *rs = sb_info->s_rs;
+	int hash_code = DFL(s_hash_function_code);
+	__u32 flags = DJF(s_flags);
+
+	seq_printf(m, "block_count: \t%i\n"
+		   "free_blocks: \t%i\n"
+		   "root_block: \t%i\n"
+		   "blocksize: \t%i\n"
+		   "oid_maxsize: \t%i\n"
+		   "oid_cursize: \t%i\n"
+		   "umount_state: \t%i\n"
+		   "magic: \t%10.10s\n"
+		   "fs_state: \t%i\n"
+		   "hash: \t%s\n"
+		   "tree_height: \t%i\n"
+		   "bmap_nr: \t%i\n"
+		   "version: \t%i\n"
+		   "flags: \t%x[%s]\n"
+		   "reserved_for_journal: \t%i\n",
+		   DFL(s_block_count),
+		   DFL(s_free_blocks),
+		   DFL(s_root_block),
+		   DF(s_blocksize),
+		   DF(s_oid_maxsize),
+		   DF(s_oid_cursize),
+		   DF(s_umount_state),
+		   rs->s_v1.s_magic,
+		   DF(s_fs_state),
+		   hash_code == TEA_HASH ? "tea" :
+		   (hash_code == YURA_HASH) ? "rupasov" :
+		   (hash_code == R5_HASH) ? "r5" :
+		   (hash_code == UNSET_HASH) ? "unset" : "unknown",
+		   DF(s_tree_height),
+		   DF(s_bmap_nr),
+		   DF(s_version), flags, (flags & reiserfs_attrs_cleared)
+		   ? "attrs_cleared" : "", DF(s_reserved_for_journal));
+
+	return 0;
+}
+
+static int show_oidmap(struct seq_file *m, struct super_block *sb)
+{
+	struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
+	struct reiserfs_super_block *rs = sb_info->s_rs;
+	unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize);
+	unsigned long total_used = 0;
+	int i;
+
+	for (i = 0; i < mapsize; ++i) {
+		__u32 right;
+
+		right = (i == mapsize - 1) ? MAX_KEY_OBJECTID : MAP(i + 1);
+		seq_printf(m, "%s: [ %x .. %x )\n",
+			   (i & 1) ? "free" : "used", MAP(i), right);
+		if (!(i & 1)) {
+			total_used += right - MAP(i);
+		}
+	}
+#if defined( REISERFS_USE_OIDMAPF )
+	if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) {
+		loff_t size = sb_info->oidmap.mapf->f_path.dentry->d_inode->i_size;
+		total_used += size / sizeof(reiserfs_oidinterval_d_t);
+	}
+#endif
+	seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n",
+		   mapsize,
+		   mapsize, le16_to_cpu(rs->s_v1.s_oid_maxsize), total_used);
+	return 0;
+}
+
+static int show_journal(struct seq_file *m, struct super_block *sb)
+{
+	struct reiserfs_sb_info *r = REISERFS_SB(sb);
+	struct reiserfs_super_block *rs = r->s_rs;
+	struct journal_params *jp = &rs->s_v1.s_journal;
+	char b[BDEVNAME_SIZE];
+
+	seq_printf(m,		/* on-disk fields */
+		   "jp_journal_1st_block: \t%i\n"
+		   "jp_journal_dev: \t%s[%x]\n"
+		   "jp_journal_size: \t%i\n"
+		   "jp_journal_trans_max: \t%i\n"
+		   "jp_journal_magic: \t%i\n"
+		   "jp_journal_max_batch: \t%i\n"
+		   "jp_journal_max_commit_age: \t%i\n"
+		   "jp_journal_max_trans_age: \t%i\n"
+		   /* incore fields */
+		   "j_1st_reserved_block: \t%i\n"
+		   "j_state: \t%li\n"
+		   "j_trans_id: \t%u\n"
+		   "j_mount_id: \t%lu\n"
+		   "j_start: \t%lu\n"
+		   "j_len: \t%lu\n"
+		   "j_len_alloc: \t%lu\n"
+		   "j_wcount: \t%i\n"
+		   "j_bcount: \t%lu\n"
+		   "j_first_unflushed_offset: \t%lu\n"
+		   "j_last_flush_trans_id: \t%u\n"
+		   "j_trans_start_time: \t%li\n"
+		   "j_list_bitmap_index: \t%i\n"
+		   "j_must_wait: \t%i\n"
+		   "j_next_full_flush: \t%i\n"
+		   "j_next_async_flush: \t%i\n"
+		   "j_cnode_used: \t%i\n" "j_cnode_free: \t%i\n" "\n"
+		   /* reiserfs_proc_info_data_t.journal fields */
+		   "in_journal: \t%12lu\n"
+		   "in_journal_bitmap: \t%12lu\n"
+		   "in_journal_reusable: \t%12lu\n"
+		   "lock_journal: \t%12lu\n"
+		   "lock_journal_wait: \t%12lu\n"
+		   "journal_begin: \t%12lu\n"
+		   "journal_relock_writers: \t%12lu\n"
+		   "journal_relock_wcount: \t%12lu\n"
+		   "mark_dirty: \t%12lu\n"
+		   "mark_dirty_already: \t%12lu\n"
+		   "mark_dirty_notjournal: \t%12lu\n"
+		   "restore_prepared: \t%12lu\n"
+		   "prepare: \t%12lu\n"
+		   "prepare_retry: \t%12lu\n",
+		   DJP(jp_journal_1st_block),
+		   bdevname(SB_JOURNAL(sb)->j_dev_bd, b),
+		   DJP(jp_journal_dev),
+		   DJP(jp_journal_size),
+		   DJP(jp_journal_trans_max),
+		   DJP(jp_journal_magic),
+		   DJP(jp_journal_max_batch),
+		   SB_JOURNAL(sb)->j_max_commit_age,
+		   DJP(jp_journal_max_trans_age),
+		   JF(j_1st_reserved_block),
+		   JF(j_state),
+		   JF(j_trans_id),
+		   JF(j_mount_id),
+		   JF(j_start),
+		   JF(j_len),
+		   JF(j_len_alloc),
+		   atomic_read(&r->s_journal->j_wcount),
+		   JF(j_bcount),
+		   JF(j_first_unflushed_offset),
+		   JF(j_last_flush_trans_id),
+		   JF(j_trans_start_time),
+		   JF(j_list_bitmap_index),
+		   JF(j_must_wait),
+		   JF(j_next_full_flush),
+		   JF(j_next_async_flush),
+		   JF(j_cnode_used),
+		   JF(j_cnode_free),
+		   SFPJ(in_journal),
+		   SFPJ(in_journal_bitmap),
+		   SFPJ(in_journal_reusable),
+		   SFPJ(lock_journal),
+		   SFPJ(lock_journal_wait),
+		   SFPJ(journal_being),
+		   SFPJ(journal_relock_writers),
+		   SFPJ(journal_relock_wcount),
+		   SFPJ(mark_dirty),
+		   SFPJ(mark_dirty_already),
+		   SFPJ(mark_dirty_notjournal),
+		   SFPJ(restore_prepared), SFPJ(prepare), SFPJ(prepare_retry)
+	    );
+	return 0;
+}
+
+/* iterator */
+static int test_sb(struct super_block *sb, void *data)
+{
+	return data == sb;
+}
+
+static int set_sb(struct super_block *sb, void *data)
+{
+	return -ENOENT;
+}
+
+static void *r_start(struct seq_file *m, loff_t * pos)
+{
+	struct proc_dir_entry *de = m->private;
+	struct super_block *s = de->parent->data;
+	loff_t l = *pos;
+
+	if (l)
+		return NULL;
+
+	if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, s)))
+		return NULL;
+
+	up_write(&s->s_umount);
+	return s;
+}
+
+static void *r_next(struct seq_file *m, void *v, loff_t * pos)
+{
+	++*pos;
+	if (v)
+		deactivate_super(v);
+	return NULL;
+}
+
+static void r_stop(struct seq_file *m, void *v)
+{
+	if (v)
+		deactivate_super(v);
+}
+
+static int r_show(struct seq_file *m, void *v)
+{
+	struct proc_dir_entry *de = m->private;
+	int (*show) (struct seq_file *, struct super_block *) = de->data;
+	return show(m, v);
+}
+
+static const struct seq_operations r_ops = {
+	.start = r_start,
+	.next = r_next,
+	.stop = r_stop,
+	.show = r_show,
+};
+
+static int r_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &r_ops);
+
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = PDE(inode);
+	}
+	return ret;
+}
+
+static const struct file_operations r_file_operations = {
+	.open = r_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+	.owner = THIS_MODULE,
+};
+
+static struct proc_dir_entry *proc_info_root = NULL;
+static const char proc_info_root_name[] = "fs/reiserfs";
+
+static void add_file(struct super_block *sb, char *name,
+		     int (*func) (struct seq_file *, struct super_block *))
+{
+	proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
+			 &r_file_operations, func);
+}
+
+int reiserfs_proc_info_init(struct super_block *sb)
+{
+	char b[BDEVNAME_SIZE];
+	char *s;
+
+	/* Some block devices use /'s */
+	strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
+	s = strchr(b, '/');
+	if (s)
+		*s = '!';
+
+	spin_lock_init(&__PINFO(sb).lock);
+	REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
+	if (REISERFS_SB(sb)->procdir) {
+		REISERFS_SB(sb)->procdir->data = sb;
+		add_file(sb, "version", show_version);
+		add_file(sb, "super", show_super);
+		add_file(sb, "per-level", show_per_level);
+		add_file(sb, "bitmap", show_bitmap);
+		add_file(sb, "on-disk-super", show_on_disk_super);
+		add_file(sb, "oidmap", show_oidmap);
+		add_file(sb, "journal", show_journal);
+		return 0;
+	}
+	reiserfs_warning(sb, "cannot create /proc/%s/%s",
+			 proc_info_root_name, b);
+	return 1;
+}
+
+int reiserfs_proc_info_done(struct super_block *sb)
+{
+	struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
+	char b[BDEVNAME_SIZE];
+	char *s;
+
+	/* Some block devices use /'s */
+	strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
+	s = strchr(b, '/');
+	if (s)
+		*s = '!';
+
+	if (de) {
+		remove_proc_entry("journal", de);
+		remove_proc_entry("oidmap", de);
+		remove_proc_entry("on-disk-super", de);
+		remove_proc_entry("bitmap", de);
+		remove_proc_entry("per-level", de);
+		remove_proc_entry("super", de);
+		remove_proc_entry("version", de);
+	}
+	spin_lock(&__PINFO(sb).lock);
+	__PINFO(sb).exiting = 1;
+	spin_unlock(&__PINFO(sb).lock);
+	if (proc_info_root) {
+		remove_proc_entry(b, proc_info_root);
+		REISERFS_SB(sb)->procdir = NULL;
+	}
+	return 0;
+}
+
+int reiserfs_proc_info_global_init(void)
+{
+	if (proc_info_root == NULL) {
+		proc_info_root = proc_mkdir(proc_info_root_name, NULL);
+		if (!proc_info_root) {
+			reiserfs_warning(NULL, "cannot create /proc/%s",
+					 proc_info_root_name);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int reiserfs_proc_info_global_done(void)
+{
+	if (proc_info_root != NULL) {
+		proc_info_root = NULL;
+		remove_proc_entry(proc_info_root_name, NULL);
+	}
+	return 0;
+}
+/*
+ * Revision 1.1.8.2  2001/07/15 17:08:42  god
+ *  . use get_super() in procfs.c
+ *  . remove remove_save_link() from reiserfs_do_truncate()
+ *
+ * I accept terms and conditions stated in the Legal Agreement
+ * (available at http://www.namesys.com/legalese.html)
+ *
+ * Revision 1.1.8.1  2001/07/11 16:48:50  god
+ * proc info support
+ *
+ * I accept terms and conditions stated in the Legal Agreement
+ * (available at http://www.namesys.com/legalese.html)
+ *
+ */
+
+/*
+ * Make Linus happy.
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * mode-name: "LC"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/reiserfs.h b/ap/os/linux/linux-3.4.x/fs/reiserfs/reiserfs.h
new file mode 100644
index 0000000..a59d271
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/reiserfs.h

@@ -0,0 +1,2923 @@
+/*
+ * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
+ */
+
+#include <linux/reiserfs_fs.h>
+
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/bug.h>
+#include <linux/workqueue.h>
+#include <asm/unaligned.h>
+#include <linux/bitops.h>
+#include <linux/proc_fs.h>
+#include <linux/buffer_head.h>
+
+/* the 32 bit compat definitions with int argument */
+#define REISERFS_IOC32_UNPACK		_IOW(0xCD, 1, int)
+#define REISERFS_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+#define REISERFS_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
+#define REISERFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
+#define REISERFS_IOC32_SETVERSION	FS_IOC32_SETVERSION
+
+struct reiserfs_journal_list;
+
+/** bitmasks for i_flags field in reiserfs-specific part of inode */
+typedef enum {
+    /** this says what format of key do all items (but stat data) of
+      an object have.  If this is set, that format is 3.6 otherwise
+      - 3.5 */
+	i_item_key_version_mask = 0x0001,
+    /** If this is unset, object has 3.5 stat data, otherwise, it has
+      3.6 stat data with 64bit size, 32bit nlink etc. */
+	i_stat_data_version_mask = 0x0002,
+    /** file might need tail packing on close */
+	i_pack_on_close_mask = 0x0004,
+    /** don't pack tail of file */
+	i_nopack_mask = 0x0008,
+    /** If those is set, "safe link" was created for this file during
+      truncate or unlink. Safe link is used to avoid leakage of disk
+      space on crash with some files open, but unlinked. */
+	i_link_saved_unlink_mask = 0x0010,
+	i_link_saved_truncate_mask = 0x0020,
+	i_has_xattr_dir = 0x0040,
+	i_data_log = 0x0080,
+} reiserfs_inode_flags;
+
+struct reiserfs_inode_info {
+	__u32 i_key[4];		/* key is still 4 32 bit integers */
+    /** transient inode flags that are never stored on disk. Bitmasks
+      for this field are defined above. */
+	__u32 i_flags;
+
+	__u32 i_first_direct_byte;	// offset of first byte stored in direct item.
+
+	/* copy of persistent inode flags read from sd_attrs. */
+	__u32 i_attrs;
+
+	int i_prealloc_block;	/* first unused block of a sequence of unused blocks */
+	int i_prealloc_count;	/* length of that sequence */
+	struct list_head i_prealloc_list;	/* per-transaction list of inodes which
+						 * have preallocated blocks */
+
+	unsigned new_packing_locality:1;	/* new_packig_locality is created; new blocks
+						 * for the contents of this directory should be
+						 * displaced */
+
+	/* we use these for fsync or O_SYNC to decide which transaction
+	 ** needs to be committed in order for this inode to be properly
+	 ** flushed */
+	unsigned int i_trans_id;
+	struct reiserfs_journal_list *i_jl;
+	atomic_t openers;
+	struct mutex tailpack;
+#ifdef CONFIG_REISERFS_FS_XATTR
+	struct rw_semaphore i_xattr_sem;
+#endif
+	struct inode vfs_inode;
+};
+
+typedef enum {
+	reiserfs_attrs_cleared = 0x00000001,
+} reiserfs_super_block_flags;
+
+/* struct reiserfs_super_block accessors/mutators
+ * since this is a disk structure, it will always be in
+ * little endian format. */
+#define sb_block_count(sbp)         (le32_to_cpu((sbp)->s_v1.s_block_count))
+#define set_sb_block_count(sbp,v)   ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
+#define sb_free_blocks(sbp)         (le32_to_cpu((sbp)->s_v1.s_free_blocks))
+#define set_sb_free_blocks(sbp,v)   ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v))
+#define sb_root_block(sbp)          (le32_to_cpu((sbp)->s_v1.s_root_block))
+#define set_sb_root_block(sbp,v)    ((sbp)->s_v1.s_root_block = cpu_to_le32(v))
+
+#define sb_jp_journal_1st_block(sbp)  \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block))
+#define set_sb_jp_journal_1st_block(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v))
+#define sb_jp_journal_dev(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev))
+#define set_sb_jp_journal_dev(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v))
+#define sb_jp_journal_size(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size))
+#define set_sb_jp_journal_size(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v))
+#define sb_jp_journal_trans_max(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max))
+#define set_sb_jp_journal_trans_max(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v))
+#define sb_jp_journal_magic(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic))
+#define set_sb_jp_journal_magic(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v))
+#define sb_jp_journal_max_batch(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch))
+#define set_sb_jp_journal_max_batch(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v))
+#define sb_jp_jourmal_max_commit_age(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age))
+#define set_sb_jp_journal_max_commit_age(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v))
+
+#define sb_blocksize(sbp)          (le16_to_cpu((sbp)->s_v1.s_blocksize))
+#define set_sb_blocksize(sbp,v)    ((sbp)->s_v1.s_blocksize = cpu_to_le16(v))
+#define sb_oid_maxsize(sbp)        (le16_to_cpu((sbp)->s_v1.s_oid_maxsize))
+#define set_sb_oid_maxsize(sbp,v)  ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v))
+#define sb_oid_cursize(sbp)        (le16_to_cpu((sbp)->s_v1.s_oid_cursize))
+#define set_sb_oid_cursize(sbp,v)  ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v))
+#define sb_umount_state(sbp)       (le16_to_cpu((sbp)->s_v1.s_umount_state))
+#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v))
+#define sb_fs_state(sbp)           (le16_to_cpu((sbp)->s_v1.s_fs_state))
+#define set_sb_fs_state(sbp,v)     ((sbp)->s_v1.s_fs_state = cpu_to_le16(v))
+#define sb_hash_function_code(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_hash_function_code))
+#define set_sb_hash_function_code(sbp,v) \
+              ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v))
+#define sb_tree_height(sbp)        (le16_to_cpu((sbp)->s_v1.s_tree_height))
+#define set_sb_tree_height(sbp,v)  ((sbp)->s_v1.s_tree_height = cpu_to_le16(v))
+#define sb_bmap_nr(sbp)            (le16_to_cpu((sbp)->s_v1.s_bmap_nr))
+#define set_sb_bmap_nr(sbp,v)      ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v))
+#define sb_version(sbp)            (le16_to_cpu((sbp)->s_v1.s_version))
+#define set_sb_version(sbp,v)      ((sbp)->s_v1.s_version = cpu_to_le16(v))
+
+#define sb_mnt_count(sbp)	   (le16_to_cpu((sbp)->s_mnt_count))
+#define set_sb_mnt_count(sbp, v)   ((sbp)->s_mnt_count = cpu_to_le16(v))
+
+#define sb_reserved_for_journal(sbp) \
+              (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal))
+#define set_sb_reserved_for_journal(sbp,v) \
+              ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v))
+
+/* LOGGING -- */
+
+/* These all interelate for performance.
+**
+** If the journal block count is smaller than n transactions, you lose speed.
+** I don't know what n is yet, I'm guessing 8-16.
+**
+** typical transaction size depends on the application, how often fsync is
+** called, and how many metadata blocks you dirty in a 30 second period.
+** The more small files (<16k) you use, the larger your transactions will
+** be.
+**
+** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
+** to wrap, which slows things down.  If you need high speed meta data updates, the journal should be big enough
+** to prevent wrapping before dirty meta blocks get to disk.
+**
+** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
+** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
+**
+** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
+**
+*/
+
+/* don't mess with these for a while */
+				/* we have a node size define somewhere in reiserfs_fs.h. -Hans */
+#define JOURNAL_BLOCK_SIZE  4096	/* BUG gotta get rid of this */
+#define JOURNAL_MAX_CNODE   1500	/* max cnodes to allocate. */
+#define JOURNAL_HASH_SIZE 8192
+#define JOURNAL_NUM_BITMAPS 5	/* number of copies of the bitmaps to have floating.  Must be >= 2 */
+
+/* One of these for every block in every transaction
+** Each one is in two hash tables.  First, a hash of the current transaction, and after journal_end, a
+** hash of all the in memory transactions.
+** next and prev are used by the current transaction (journal_hash).
+** hnext and hprev are used by journal_list_hash.  If a block is in more than one transaction, the journal_list_hash
+** links it in multiple times.  This allows flush_journal_list to remove just the cnode belonging
+** to a given transaction.
+*/
+struct reiserfs_journal_cnode {
+	struct buffer_head *bh;	/* real buffer head */
+	struct super_block *sb;	/* dev of real buffer head */
+	__u32 blocknr;		/* block number of real buffer head, == 0 when buffer on disk */
+	unsigned long state;
+	struct reiserfs_journal_list *jlist;	/* journal list this cnode lives in */
+	struct reiserfs_journal_cnode *next;	/* next in transaction list */
+	struct reiserfs_journal_cnode *prev;	/* prev in transaction list */
+	struct reiserfs_journal_cnode *hprev;	/* prev in hash list */
+	struct reiserfs_journal_cnode *hnext;	/* next in hash list */
+};
+
+struct reiserfs_bitmap_node {
+	int id;
+	char *data;
+	struct list_head list;
+};
+
+struct reiserfs_list_bitmap {
+	struct reiserfs_journal_list *journal_list;
+	struct reiserfs_bitmap_node **bitmaps;
+};
+
+/*
+** one of these for each transaction.  The most important part here is the j_realblock.
+** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
+** real buffer heads dirty once all the commits hit the disk,
+** and to make sure every real block in a transaction is on disk before allowing the log area
+** to be overwritten */
+struct reiserfs_journal_list {
+	unsigned long j_start;
+	unsigned long j_state;
+	unsigned long j_len;
+	atomic_t j_nonzerolen;
+	atomic_t j_commit_left;
+	atomic_t j_older_commits_done;	/* all commits older than this on disk */
+	struct mutex j_commit_mutex;
+	unsigned int j_trans_id;
+	time_t j_timestamp;
+	struct reiserfs_list_bitmap *j_list_bitmap;
+	struct buffer_head *j_commit_bh;	/* commit buffer head */
+	struct reiserfs_journal_cnode *j_realblock;
+	struct reiserfs_journal_cnode *j_freedlist;	/* list of buffers that were freed during this trans.  free each of these on flush */
+	/* time ordered list of all active transactions */
+	struct list_head j_list;
+
+	/* time ordered list of all transactions we haven't tried to flush yet */
+	struct list_head j_working_list;
+
+	/* list of tail conversion targets in need of flush before commit */
+	struct list_head j_tail_bh_list;
+	/* list of data=ordered buffers in need of flush before commit */
+	struct list_head j_bh_list;
+	int j_refcount;
+};
+
+struct reiserfs_journal {
+	struct buffer_head **j_ap_blocks;	/* journal blocks on disk */
+	struct reiserfs_journal_cnode *j_last;	/* newest journal block */
+	struct reiserfs_journal_cnode *j_first;	/*  oldest journal block.  start here for traverse */
+
+	struct block_device *j_dev_bd;
+	fmode_t j_dev_mode;
+	int j_1st_reserved_block;	/* first block on s_dev of reserved area journal */
+
+	unsigned long j_state;
+	unsigned int j_trans_id;
+	unsigned long j_mount_id;
+	unsigned long j_start;	/* start of current waiting commit (index into j_ap_blocks) */
+	unsigned long j_len;	/* length of current waiting commit */
+	unsigned long j_len_alloc;	/* number of buffers requested by journal_begin() */
+	atomic_t j_wcount;	/* count of writers for current commit */
+	unsigned long j_bcount;	/* batch count. allows turning X transactions into 1 */
+	unsigned long j_first_unflushed_offset;	/* first unflushed transactions offset */
+	unsigned j_last_flush_trans_id;	/* last fully flushed journal timestamp */
+	struct buffer_head *j_header_bh;
+
+	time_t j_trans_start_time;	/* time this transaction started */
+	struct mutex j_mutex;
+	struct mutex j_flush_mutex;
+	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
+	atomic_t j_jlock;	/* lock for j_join_wait */
+	int j_list_bitmap_index;	/* number of next list bitmap to use */
+	int j_must_wait;	/* no more journal begins allowed. MUST sleep on j_join_wait */
+	int j_next_full_flush;	/* next journal_end will flush all journal list */
+	int j_next_async_flush;	/* next journal_end will flush all async commits */
+
+	int j_cnode_used;	/* number of cnodes on the used list */
+	int j_cnode_free;	/* number of cnodes on the free list */
+
+	unsigned int j_trans_max;	/* max number of blocks in a transaction.  */
+	unsigned int j_max_batch;	/* max number of blocks to batch into a trans */
+	unsigned int j_max_commit_age;	/* in seconds, how old can an async commit be */
+	unsigned int j_max_trans_age;	/* in seconds, how old can a transaction be */
+	unsigned int j_default_max_commit_age;	/* the default for the max commit age */
+
+	struct reiserfs_journal_cnode *j_cnode_free_list;
+	struct reiserfs_journal_cnode *j_cnode_free_orig;	/* orig pointer returned from vmalloc */
+
+	struct reiserfs_journal_list *j_current_jl;
+	int j_free_bitmap_nodes;
+	int j_used_bitmap_nodes;
+
+	int j_num_lists;	/* total number of active transactions */
+	int j_num_work_lists;	/* number that need attention from kreiserfsd */
+
+	/* debugging to make sure things are flushed in order */
+	unsigned int j_last_flush_id;
+
+	/* debugging to make sure things are committed in order */
+	unsigned int j_last_commit_id;
+
+	struct list_head j_bitmap_nodes;
+	struct list_head j_dirty_buffers;
+	spinlock_t j_dirty_buffers_lock;	/* protects j_dirty_buffers */
+
+	/* list of all active transactions */
+	struct list_head j_journal_list;
+	/* lists that haven't been touched by writeback attempts */
+	struct list_head j_working_list;
+
+	struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS];	/* array of bitmaps to record the deleted blocks */
+	struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE];	/* hash table for real buffer heads in current trans */
+	struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE];	/* hash table for all the real buffer heads in all
+										   the transactions */
+	struct list_head j_prealloc_list;	/* list of inodes which have preallocated blocks */
+	int j_persistent_trans;
+	unsigned long j_max_trans_size;
+	unsigned long j_max_batch_size;
+
+	int j_errno;
+
+	/* when flushing ordered buffers, throttle new ordered writers */
+	struct delayed_work j_work;
+	struct super_block *j_work_sb;
+	atomic_t j_async_throttle;
+};
+
+enum journal_state_bits {
+	J_WRITERS_BLOCKED = 1,	/* set when new writers not allowed */
+	J_WRITERS_QUEUED,	/* set when log is full due to too many writers */
+	J_ABORTED,		/* set when log is aborted */
+};
+
+#define JOURNAL_DESC_MAGIC "ReIsErLB"	/* ick.  magic string to find desc blocks in the journal */
+
+typedef __u32(*hashf_t) (const signed char *, int);
+
+struct reiserfs_bitmap_info {
+	__u32 free_count;
+};
+
+struct proc_dir_entry;
+
+#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO )
+typedef unsigned long int stat_cnt_t;
+typedef struct reiserfs_proc_info_data {
+	spinlock_t lock;
+	int exiting;
+	int max_hash_collisions;
+
+	stat_cnt_t breads;
+	stat_cnt_t bread_miss;
+	stat_cnt_t search_by_key;
+	stat_cnt_t search_by_key_fs_changed;
+	stat_cnt_t search_by_key_restarted;
+
+	stat_cnt_t insert_item_restarted;
+	stat_cnt_t paste_into_item_restarted;
+	stat_cnt_t cut_from_item_restarted;
+	stat_cnt_t delete_solid_item_restarted;
+	stat_cnt_t delete_item_restarted;
+
+	stat_cnt_t leaked_oid;
+	stat_cnt_t leaves_removable;
+
+	/* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */
+	stat_cnt_t balance_at[5];	/* XXX */
+	/* sbk == search_by_key */
+	stat_cnt_t sbk_read_at[5];	/* XXX */
+	stat_cnt_t sbk_fs_changed[5];
+	stat_cnt_t sbk_restarted[5];
+	stat_cnt_t items_at[5];	/* XXX */
+	stat_cnt_t free_at[5];	/* XXX */
+	stat_cnt_t can_node_be_removed[5];	/* XXX */
+	long int lnum[5];	/* XXX */
+	long int rnum[5];	/* XXX */
+	long int lbytes[5];	/* XXX */
+	long int rbytes[5];	/* XXX */
+	stat_cnt_t get_neighbors[5];
+	stat_cnt_t get_neighbors_restart[5];
+	stat_cnt_t need_l_neighbor[5];
+	stat_cnt_t need_r_neighbor[5];
+
+	stat_cnt_t free_block;
+	struct __scan_bitmap_stats {
+		stat_cnt_t call;
+		stat_cnt_t wait;
+		stat_cnt_t bmap;
+		stat_cnt_t retry;
+		stat_cnt_t in_journal_hint;
+		stat_cnt_t in_journal_nohint;
+		stat_cnt_t stolen;
+	} scan_bitmap;
+	struct __journal_stats {
+		stat_cnt_t in_journal;
+		stat_cnt_t in_journal_bitmap;
+		stat_cnt_t in_journal_reusable;
+		stat_cnt_t lock_journal;
+		stat_cnt_t lock_journal_wait;
+		stat_cnt_t journal_being;
+		stat_cnt_t journal_relock_writers;
+		stat_cnt_t journal_relock_wcount;
+		stat_cnt_t mark_dirty;
+		stat_cnt_t mark_dirty_already;
+		stat_cnt_t mark_dirty_notjournal;
+		stat_cnt_t restore_prepared;
+		stat_cnt_t prepare;
+		stat_cnt_t prepare_retry;
+	} journal;
+} reiserfs_proc_info_data_t;
+#else
+typedef struct reiserfs_proc_info_data {
+} reiserfs_proc_info_data_t;
+#endif
+
+/* reiserfs union of in-core super block data */
+struct reiserfs_sb_info {
+	struct buffer_head *s_sbh;	/* Buffer containing the super block */
+	/* both the comment and the choice of
+	   name are unclear for s_rs -Hans */
+	struct reiserfs_super_block *s_rs;	/* Pointer to the super block in the buffer */
+	struct reiserfs_bitmap_info *s_ap_bitmap;
+	struct reiserfs_journal *s_journal;	/* pointer to journal information */
+	unsigned short s_mount_state;	/* reiserfs state (valid, invalid) */
+
+	/* Serialize writers access, replace the old bkl */
+	struct mutex lock;
+	/* Owner of the lock (can be recursive) */
+	struct task_struct *lock_owner;
+	/* Depth of the lock, start from -1 like the bkl */
+	int lock_depth;
+
+	/* Comment? -Hans */
+	void (*end_io_handler) (struct buffer_head *, int);
+	hashf_t s_hash_function;	/* pointer to function which is used
+					   to sort names in directory. Set on
+					   mount */
+	unsigned long s_mount_opt;	/* reiserfs's mount options are set
+					   here (currently - NOTAIL, NOLOG,
+					   REPLAYONLY) */
+
+	struct {		/* This is a structure that describes block allocator options */
+		unsigned long bits;	/* Bitfield for enable/disable kind of options */
+		unsigned long large_file_size;	/* size started from which we consider file to be a large one(in blocks) */
+		int border;	/* percentage of disk, border takes */
+		int preallocmin;	/* Minimal file size (in blocks) starting from which we do preallocations */
+		int preallocsize;	/* Number of blocks we try to prealloc when file
+					   reaches preallocmin size (in blocks) or
+					   prealloc_list is empty. */
+	} s_alloc_options;
+
+	/* Comment? -Hans */
+	wait_queue_head_t s_wait;
+	/* To be obsoleted soon by per buffer seals.. -Hans */
+	atomic_t s_generation_counter;	// increased by one every time the
+	// tree gets re-balanced
+	unsigned long s_properties;	/* File system properties. Currently holds
+					   on-disk FS format */
+
+	/* session statistics */
+	int s_disk_reads;
+	int s_disk_writes;
+	int s_fix_nodes;
+	int s_do_balance;
+	int s_unneeded_left_neighbor;
+	int s_good_search_by_key_reada;
+	int s_bmaps;
+	int s_bmaps_without_search;
+	int s_direct2indirect;
+	int s_indirect2direct;
+	/* set up when it's ok for reiserfs_read_inode2() to read from
+	   disk inode with nlink==0. Currently this is only used during
+	   finish_unfinished() processing at mount time */
+	int s_is_unlinked_ok;
+	reiserfs_proc_info_data_t s_proc_info_data;
+	struct proc_dir_entry *procdir;
+	int reserved_blocks;	/* amount of blocks reserved for further allocations */
+	spinlock_t bitmap_lock;	/* this lock on now only used to protect reserved_blocks variable */
+	struct dentry *priv_root;	/* root of /.reiserfs_priv */
+	struct dentry *xattr_root;	/* root of /.reiserfs_priv/xattrs */
+	int j_errno;
+#ifdef CONFIG_QUOTA
+	char *s_qf_names[MAXQUOTAS];
+	int s_jquota_fmt;
+#endif
+	char *s_jdev;		/* Stored jdev for mount option showing */
+#ifdef CONFIG_REISERFS_CHECK
+
+	struct tree_balance *cur_tb;	/*
+					 * Detects whether more than one
+					 * copy of tb exists per superblock
+					 * as a means of checking whether
+					 * do_balance is executing concurrently
+					 * against another tree reader/writer
+					 * on a same mount point.
+					 */
+#endif
+};
+
+/* Definitions of reiserfs on-disk properties: */
+#define REISERFS_3_5 0
+#define REISERFS_3_6 1
+#define REISERFS_OLD_FORMAT 2
+
+enum reiserfs_mount_options {
+/* Mount options */
+	REISERFS_LARGETAIL,	/* large tails will be created in a session */
+	REISERFS_SMALLTAIL,	/* small (for files less than block size) tails will be created in a session */
+	REPLAYONLY,		/* replay journal and return 0. Use by fsck */
+	REISERFS_CONVERT,	/* -o conv: causes conversion of old
+				   format super block to the new
+				   format. If not specified - old
+				   partition will be dealt with in a
+				   manner of 3.5.x */
+
+/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting
+** reiserfs disks from 3.5.19 or earlier.  99% of the time, this option
+** is not required.  If the normal autodection code can't determine which
+** hash to use (because both hashes had the same value for a file)
+** use this option to force a specific hash.  It won't allow you to override
+** the existing hash on the FS, so if you have a tea hash disk, and mount
+** with -o hash=rupasov, the mount will fail.
+*/
+	FORCE_TEA_HASH,		/* try to force tea hash on mount */
+	FORCE_RUPASOV_HASH,	/* try to force rupasov hash on mount */
+	FORCE_R5_HASH,		/* try to force rupasov hash on mount */
+	FORCE_HASH_DETECT,	/* try to detect hash function on mount */
+
+	REISERFS_DATA_LOG,
+	REISERFS_DATA_ORDERED,
+	REISERFS_DATA_WRITEBACK,
+
+/* used for testing experimental features, makes benchmarking new
+   features with and without more convenient, should never be used by
+   users in any code shipped to users (ideally) */
+
+	REISERFS_NO_BORDER,
+	REISERFS_NO_UNHASHED_RELOCATION,
+	REISERFS_HASHED_RELOCATION,
+	REISERFS_ATTRS,
+	REISERFS_XATTRS_USER,
+	REISERFS_POSIXACL,
+	REISERFS_EXPOSE_PRIVROOT,
+	REISERFS_BARRIER_NONE,
+	REISERFS_BARRIER_FLUSH,
+
+	/* Actions on error */
+	REISERFS_ERROR_PANIC,
+	REISERFS_ERROR_RO,
+	REISERFS_ERROR_CONTINUE,
+
+	REISERFS_USRQUOTA,	/* User quota option specified */
+	REISERFS_GRPQUOTA,	/* Group quota option specified */
+
+	REISERFS_TEST1,
+	REISERFS_TEST2,
+	REISERFS_TEST3,
+	REISERFS_TEST4,
+	REISERFS_UNSUPPORTED_OPT,
+};
+
+#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH))
+#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH))
+#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH))
+#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT))
+#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER))
+#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
+#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
+#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4))
+
+#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL))
+#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL))
+#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY))
+#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS))
+#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5))
+#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT))
+#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG))
+#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED))
+#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
+#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
+#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
+#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
+#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
+#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
+#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
+
+#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC))
+#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO))
+
+void reiserfs_file_buffer(struct buffer_head *bh, int list);
+extern struct file_system_type reiserfs_fs_type;
+int reiserfs_resize(struct super_block *, unsigned long);
+
+#define CARRY_ON                0
+#define SCHEDULE_OCCURRED       1
+
+#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh)
+#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal)
+#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block)
+#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free)
+#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap)
+
+#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->)
+
+/* A safe version of the "bdevname", which returns the "s_id" field of
+ * a superblock or else "Null superblock" if the super block is NULL.
+ */
+static inline char *reiserfs_bdevname(struct super_block *s)
+{
+	return (s == NULL) ? "Null superblock" : s->s_id;
+}
+
+#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal)))
+static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal
+						*journal)
+{
+	return test_bit(J_ABORTED, &journal->j_state);
+}
+
+/*
+ * Locking primitives. The write lock is a per superblock
+ * special mutex that has properties close to the Big Kernel Lock
+ * which was used in the previous locking scheme.
+ */
+void reiserfs_write_lock(struct super_block *s);
+void reiserfs_write_unlock(struct super_block *s);
+int reiserfs_write_lock_once(struct super_block *s);
+void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *s);
+#else
+static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
+#endif
+
+/*
+ * Several mutexes depend on the write lock.
+ * However sometimes we want to relax the write lock while we hold
+ * these mutexes, according to the release/reacquire on schedule()
+ * properties of the Bkl that were used.
+ * Reiserfs performances and locking were based on this scheme.
+ * Now that the write lock is a mutex and not the bkl anymore, doing so
+ * may result in a deadlock:
+ *
+ * A acquire write_lock
+ * A acquire j_commit_mutex
+ * A release write_lock and wait for something
+ * B acquire write_lock
+ * B can't acquire j_commit_mutex and sleep
+ * A can't acquire write lock anymore
+ * deadlock
+ *
+ * What we do here is avoiding such deadlock by playing the same game
+ * than the Bkl: if we can't acquire a mutex that depends on the write lock,
+ * we release the write lock, wait a bit and then retry.
+ *
+ * The mutexes concerned by this hack are:
+ * - The commit mutex of a journal list
+ * - The flush mutex
+ * - The journal lock
+ * - The inode mutex
+ */
+static inline void reiserfs_mutex_lock_safe(struct mutex *m,
+			       struct super_block *s)
+{
+	reiserfs_lock_check_recursive(s);
+	reiserfs_write_unlock(s);
+	mutex_lock(m);
+	reiserfs_write_lock(s);
+}
+
+static inline void
+reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
+			       struct super_block *s)
+{
+	reiserfs_lock_check_recursive(s);
+	reiserfs_write_unlock(s);
+	mutex_lock_nested(m, subclass);
+	reiserfs_write_lock(s);
+}
+
+static inline void
+reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
+{
+	reiserfs_lock_check_recursive(s);
+	reiserfs_write_unlock(s);
+	down_read(sem);
+	reiserfs_write_lock(s);
+}
+
+/*
+ * When we schedule, we usually want to also release the write lock,
+ * according to the previous bkl based locking scheme of reiserfs.
+ */
+static inline void reiserfs_cond_resched(struct super_block *s)
+{
+	if (need_resched()) {
+		reiserfs_write_unlock(s);
+		schedule();
+		reiserfs_write_lock(s);
+	}
+}
+
+struct fid;
+
+/* in reading the #defines, it may help to understand that they employ
+   the following abbreviations:
+
+   B = Buffer
+   I = Item header
+   H = Height within the tree (should be changed to LEV)
+   N = Number of the item in the node
+   STAT = stat data
+   DEH = Directory Entry Header
+   EC = Entry Count
+   E = Entry number
+   UL = Unsigned Long
+   BLKH = BLocK Header
+   UNFM = UNForMatted node
+   DC = Disk Child
+   P = Path
+
+   These #defines are named by concatenating these abbreviations,
+   where first comes the arguments, and last comes the return value,
+   of the macro.
+
+*/
+
+#define USE_INODE_GENERATION_COUNTER
+
+#define REISERFS_PREALLOCATE
+#define DISPLACE_NEW_PACKING_LOCALITIES
+#define PREALLOCATION_SIZE 9
+
+/* n must be power of 2 */
+#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
+
+// to be ok for alpha and others we have to align structures to 8 byte
+// boundary.
+// FIXME: do not change 4 by anything else: there is code which relies on that
+#define ROUND_UP(x) _ROUND_UP(x,8LL)
+
+/* debug levels.  Right now, CONFIG_REISERFS_CHECK means print all debug
+** messages.
+*/
+#define REISERFS_DEBUG_CODE 5	/* extra messages to help find/debug errors */
+
+void __reiserfs_warning(struct super_block *s, const char *id,
+			 const char *func, const char *fmt, ...);
+#define reiserfs_warning(s, id, fmt, args...) \
+	 __reiserfs_warning(s, id, __func__, fmt, ##args)
+/* assertions handling */
+
+/** always check a condition and panic if it's false. */
+#define __RASSERT(cond, scond, format, args...)			\
+do {									\
+	if (!(cond))							\
+		reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
+			       __FILE__ ":%i:%s: " format "\n",		\
+			       in_interrupt() ? -1 : task_pid_nr(current), \
+			       __LINE__, __func__ , ##args);		\
+} while (0)
+
+#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
+
+#if defined( CONFIG_REISERFS_CHECK )
+#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args)
+#else
+#define RFALSE( cond, format, args... ) do {;} while( 0 )
+#endif
+
+#define CONSTF __attribute_const__
+/*
+ * Disk Data Structures
+ */
+
+/***************************************************************************/
+/*                             SUPER BLOCK                                 */
+/***************************************************************************/
+
+/*
+ * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs
+ * the version in RAM is part of a larger structure containing fields never written to disk.
+ */
+#define UNSET_HASH 0		// read_super will guess about, what hash names
+		     // in directories were sorted with
+#define TEA_HASH  1
+#define YURA_HASH 2
+#define R5_HASH   3
+#define DEFAULT_HASH R5_HASH
+
+struct journal_params {
+	__le32 jp_journal_1st_block;	/* where does journal start from on its
+					 * device */
+	__le32 jp_journal_dev;	/* journal device st_rdev */
+	__le32 jp_journal_size;	/* size of the journal */
+	__le32 jp_journal_trans_max;	/* max number of blocks in a transaction. */
+	__le32 jp_journal_magic;	/* random value made on fs creation (this
+					 * was sb_journal_block_count) */
+	__le32 jp_journal_max_batch;	/* max number of blocks to batch into a
+					 * trans */
+	__le32 jp_journal_max_commit_age;	/* in seconds, how old can an async
+						 * commit be */
+	__le32 jp_journal_max_trans_age;	/* in seconds, how old can a transaction
+						 * be */
+};
+
+/* this is the super from 3.5.X, where X >= 10 */
+struct reiserfs_super_block_v1 {
+	__le32 s_block_count;	/* blocks count         */
+	__le32 s_free_blocks;	/* free blocks count    */
+	__le32 s_root_block;	/* root block number    */
+	struct journal_params s_journal;
+	__le16 s_blocksize;	/* block size */
+	__le16 s_oid_maxsize;	/* max size of object id array, see
+				 * get_objectid() commentary  */
+	__le16 s_oid_cursize;	/* current size of object id array */
+	__le16 s_umount_state;	/* this is set to 1 when filesystem was
+				 * umounted, to 2 - when not */
+	char s_magic[10];	/* reiserfs magic string indicates that
+				 * file system is reiserfs:
+				 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
+	__le16 s_fs_state;	/* it is set to used by fsck to mark which
+				 * phase of rebuilding is done */
+	__le32 s_hash_function_code;	/* indicate, what hash function is being use
+					 * to sort names in a directory*/
+	__le16 s_tree_height;	/* height of disk tree */
+	__le16 s_bmap_nr;	/* amount of bitmap blocks needed to address
+				 * each block of file system */
+	__le16 s_version;	/* this field is only reliable on filesystem
+				 * with non-standard journal */
+	__le16 s_reserved_for_journal;	/* size in blocks of journal area on main
+					 * device, we need to keep after
+					 * making fs with non-standard journal */
+} __attribute__ ((__packed__));
+
+#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
+
+/* this is the on disk super block */
+struct reiserfs_super_block {
+	struct reiserfs_super_block_v1 s_v1;
+	__le32 s_inode_generation;
+	__le32 s_flags;		/* Right now used only by inode-attributes, if enabled */
+	unsigned char s_uuid[16];	/* filesystem unique identifier */
+	unsigned char s_label[16];	/* filesystem volume label */
+	__le16 s_mnt_count;		/* Count of mounts since last fsck */
+	__le16 s_max_mnt_count;		/* Maximum mounts before check */
+	__le32 s_lastcheck;		/* Timestamp of last fsck */
+	__le32 s_check_interval;	/* Interval between checks */
+	char s_unused[76];	/* zero filled by mkreiserfs and
+				 * reiserfs_convert_objectid_map_v1()
+				 * so any additions must be updated
+				 * there as well. */
+} __attribute__ ((__packed__));
+
+#define SB_SIZE (sizeof(struct reiserfs_super_block))
+
+#define REISERFS_VERSION_1 0
+#define REISERFS_VERSION_2 2
+
+// on-disk super block fields converted to cpu form
+#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
+#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
+#define SB_BLOCKSIZE(s) \
+        le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize))
+#define SB_BLOCK_COUNT(s) \
+        le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count))
+#define SB_FREE_BLOCKS(s) \
+        le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks))
+#define SB_REISERFS_MAGIC(s) \
+        (SB_V1_DISK_SUPER_BLOCK(s)->s_magic)
+#define SB_ROOT_BLOCK(s) \
+        le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block))
+#define SB_TREE_HEIGHT(s) \
+        le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height))
+#define SB_REISERFS_STATE(s) \
+        le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state))
+#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version))
+#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr))
+
+#define PUT_SB_BLOCK_COUNT(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
+#define PUT_SB_FREE_BLOCKS(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
+#define PUT_SB_ROOT_BLOCK(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
+#define PUT_SB_TREE_HEIGHT(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
+#define PUT_SB_REISERFS_STATE(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0)
+#define PUT_SB_VERSION(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
+#define PUT_SB_BMAP_NR(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
+
+#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal)
+#define SB_ONDISK_JOURNAL_SIZE(s) \
+         le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size))
+#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \
+         le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block))
+#define SB_ONDISK_JOURNAL_DEVICE(s) \
+         le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev))
+#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \
+         le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal))
+
+#define is_block_in_log_or_reserved_area(s, block) \
+         block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \
+         && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) +  \
+         ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \
+         SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s)))
+
+int is_reiserfs_3_5(struct reiserfs_super_block *rs);
+int is_reiserfs_3_6(struct reiserfs_super_block *rs);
+int is_reiserfs_jr(struct reiserfs_super_block *rs);
+
+/* ReiserFS leaves the first 64k unused, so that partition labels have
+   enough space.  If someone wants to write a fancy bootloader that
+   needs more than 64k, let us know, and this will be increased in size.
+   This number must be larger than than the largest block size on any
+   platform, or code will break.  -Hans */
+#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
+#define REISERFS_FIRST_BLOCK unused_define
+#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
+
+/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */
+#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
+
+/* reiserfs internal error code (used by search_by_key and fix_nodes)) */
+#define CARRY_ON      0
+#define REPEAT_SEARCH -1
+#define IO_ERROR      -2
+#define NO_DISK_SPACE -3
+#define NO_BALANCING_NEEDED  (-4)
+#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5)
+#define QUOTA_EXCEEDED -6
+
+typedef __u32 b_blocknr_t;
+typedef __le32 unp_t;
+
+struct unfm_nodeinfo {
+	unp_t unfm_nodenum;
+	unsigned short unfm_freespace;
+};
+
+/* there are two formats of keys: 3.5 and 3.6
+ */
+#define KEY_FORMAT_3_5 0
+#define KEY_FORMAT_3_6 1
+
+/* there are two stat datas */
+#define STAT_DATA_V1 0
+#define STAT_DATA_V2 1
+
+static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode)
+{
+	return container_of(inode, struct reiserfs_inode_info, vfs_inode);
+}
+
+static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
+ * which overflows on large file systems. */
+static inline __u32 reiserfs_bmap_count(struct super_block *sb)
+{
+	return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
+}
+
+static inline int bmap_would_wrap(unsigned bmap_nr)
+{
+	return bmap_nr > ((1LL << 16) - 1);
+}
+
+/** this says about version of key of all items (but stat data) the
+    object consists of */
+#define get_inode_item_key_version( inode )                                    \
+    ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5)
+
+#define set_inode_item_key_version( inode, version )                           \
+         ({ if((version)==KEY_FORMAT_3_6)                                      \
+                REISERFS_I(inode)->i_flags |= i_item_key_version_mask;      \
+            else                                                               \
+                REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; })
+
+#define get_inode_sd_version(inode)                                            \
+    ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1)
+
+#define set_inode_sd_version(inode, version)                                   \
+         ({ if((version)==STAT_DATA_V2)                                        \
+                REISERFS_I(inode)->i_flags |= i_stat_data_version_mask;     \
+            else                                                               \
+                REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; })
+
+/* This is an aggressive tail suppression policy, I am hoping it
+   improves our benchmarks. The principle behind it is that percentage
+   space saving is what matters, not absolute space saving.  This is
+   non-intuitive, but it helps to understand it if you consider that the
+   cost to access 4 blocks is not much more than the cost to access 1
+   block, if you have to do a seek and rotate.  A tail risks a
+   non-linear disk access that is significant as a percentage of total
+   time cost for a 4 block file and saves an amount of space that is
+   less significant as a percentage of space, or so goes the hypothesis.
+   -Hans */
+#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
+(\
+  (!(n_tail_size)) || \
+  (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \
+   ( (n_file_size) >= (n_block_size) * 4 ) || \
+   ( ( (n_file_size) >= (n_block_size) * 3 ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \
+   ( ( (n_file_size) >= (n_block_size) * 2 ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \
+   ( ( (n_file_size) >= (n_block_size) ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
+)
+
+/* Another strategy for tails, this one means only create a tail if all the
+   file would fit into one DIRECT item.
+   Primary intention for this one is to increase performance by decreasing
+   seeking.
+*/
+#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
+(\
+  (!(n_tail_size)) || \
+  (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \
+)
+
+/*
+ * values for s_umount_state field
+ */
+#define REISERFS_VALID_FS    1
+#define REISERFS_ERROR_FS    2
+
+//
+// there are 5 item types currently
+//
+#define TYPE_STAT_DATA 0
+#define TYPE_INDIRECT 1
+#define TYPE_DIRECT 2
+#define TYPE_DIRENTRY 3
+#define TYPE_MAXTYPE 3
+#define TYPE_ANY 15		// FIXME: comment is required
+
+/***************************************************************************/
+/*                       KEY & ITEM HEAD                                   */
+/***************************************************************************/
+
+//
+// directories use this key as well as old files
+//
+struct offset_v1 {
+	__le32 k_offset;
+	__le32 k_uniqueness;
+} __attribute__ ((__packed__));
+
+struct offset_v2 {
+	__le64 v;
+} __attribute__ ((__packed__));
+
+static inline __u16 offset_v2_k_type(const struct offset_v2 *v2)
+{
+	__u8 type = le64_to_cpu(v2->v) >> 60;
+	return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY;
+}
+
+static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type)
+{
+	v2->v =
+	    (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60);
+}
+
+static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2)
+{
+	return le64_to_cpu(v2->v) & (~0ULL >> 4);
+}
+
+static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset)
+{
+	offset &= (~0ULL >> 4);
+	v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset);
+}
+
+/* Key of an item determines its location in the S+tree, and
+   is composed of 4 components */
+struct reiserfs_key {
+	__le32 k_dir_id;	/* packing locality: by default parent
+				   directory object id */
+	__le32 k_objectid;	/* object identifier */
+	union {
+		struct offset_v1 k_offset_v1;
+		struct offset_v2 k_offset_v2;
+	} __attribute__ ((__packed__)) u;
+} __attribute__ ((__packed__));
+
+struct in_core_key {
+	__u32 k_dir_id;		/* packing locality: by default parent
+				   directory object id */
+	__u32 k_objectid;	/* object identifier */
+	__u64 k_offset;
+	__u8 k_type;
+};
+
+struct cpu_key {
+	struct in_core_key on_disk_key;
+	int version;
+	int key_length;		/* 3 in all cases but direct2indirect and
+				   indirect2direct conversion */
+};
+
+/* Our function for comparing keys can compare keys of different
+   lengths.  It takes as a parameter the length of the keys it is to
+   compare.  These defines are used in determining what is to be passed
+   to it as that parameter. */
+#define REISERFS_FULL_KEY_LEN     4
+#define REISERFS_SHORT_KEY_LEN    2
+
+/* The result of the key compare */
+#define FIRST_GREATER 1
+#define SECOND_GREATER -1
+#define KEYS_IDENTICAL 0
+#define KEY_FOUND 1
+#define KEY_NOT_FOUND 0
+
+#define KEY_SIZE (sizeof(struct reiserfs_key))
+#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32))
+
+/* return values for search_by_key and clones */
+#define ITEM_FOUND 1
+#define ITEM_NOT_FOUND 0
+#define ENTRY_FOUND 1
+#define ENTRY_NOT_FOUND 0
+#define DIRECTORY_NOT_FOUND -1
+#define REGULAR_FILE_FOUND -2
+#define DIRECTORY_FOUND -3
+#define BYTE_FOUND 1
+#define BYTE_NOT_FOUND 0
+#define FILE_NOT_FOUND -1
+
+#define POSITION_FOUND 1
+#define POSITION_NOT_FOUND 0
+
+// return values for reiserfs_find_entry and search_by_entry_key
+#define NAME_FOUND 1
+#define NAME_NOT_FOUND 0
+#define GOTO_PREVIOUS_ITEM 2
+#define NAME_FOUND_INVISIBLE 3
+
+/*  Everything in the filesystem is stored as a set of items.  The
+    item head contains the key of the item, its free space (for
+    indirect items) and specifies the location of the item itself
+    within the block.  */
+
+struct item_head {
+	/* Everything in the tree is found by searching for it based on
+	 * its key.*/
+	struct reiserfs_key ih_key;
+	union {
+		/* The free space in the last unformatted node of an
+		   indirect item if this is an indirect item.  This
+		   equals 0xFFFF iff this is a direct item or stat data
+		   item. Note that the key, not this field, is used to
+		   determine the item type, and thus which field this
+		   union contains. */
+		__le16 ih_free_space_reserved;
+		/* Iff this is a directory item, this field equals the
+		   number of directory entries in the directory item. */
+		__le16 ih_entry_count;
+	} __attribute__ ((__packed__)) u;
+	__le16 ih_item_len;	/* total size of the item body */
+	__le16 ih_item_location;	/* an offset to the item body
+					 * within the block */
+	__le16 ih_version;	/* 0 for all old items, 2 for new
+				   ones. Highest bit is set by fsck
+				   temporary, cleaned after all
+				   done */
+} __attribute__ ((__packed__));
+/* size of item header     */
+#define IH_SIZE (sizeof(struct item_head))
+
+#define ih_free_space(ih)            le16_to_cpu((ih)->u.ih_free_space_reserved)
+#define ih_version(ih)               le16_to_cpu((ih)->ih_version)
+#define ih_entry_count(ih)           le16_to_cpu((ih)->u.ih_entry_count)
+#define ih_location(ih)              le16_to_cpu((ih)->ih_item_location)
+#define ih_item_len(ih)              le16_to_cpu((ih)->ih_item_len)
+
+#define put_ih_free_space(ih, val)   do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0)
+#define put_ih_version(ih, val)      do { (ih)->ih_version = cpu_to_le16(val); } while (0)
+#define put_ih_entry_count(ih, val)  do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0)
+#define put_ih_location(ih, val)     do { (ih)->ih_item_location = cpu_to_le16(val); } while (0)
+#define put_ih_item_len(ih, val)     do { (ih)->ih_item_len = cpu_to_le16(val); } while (0)
+
+#define unreachable_item(ih) (ih_version(ih) & (1 << 15))
+
+#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih))
+#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val)))
+
+/* these operate on indirect items, where you've got an array of ints
+** at a possibly unaligned location.  These are a noop on ia32
+** 
+** p is the array of __u32, i is the index into the array, v is the value
+** to store there.
+*/
+#define get_block_num(p, i) get_unaligned_le32((p) + (i))
+#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
+
+//
+// in old version uniqueness field shows key type
+//
+#define V1_SD_UNIQUENESS 0
+#define V1_INDIRECT_UNIQUENESS 0xfffffffe
+#define V1_DIRECT_UNIQUENESS 0xffffffff
+#define V1_DIRENTRY_UNIQUENESS 500
+#define V1_ANY_UNIQUENESS 555	// FIXME: comment is required
+
+//
+// here are conversion routines
+//
+static inline int uniqueness2type(__u32 uniqueness) CONSTF;
+static inline int uniqueness2type(__u32 uniqueness)
+{
+	switch ((int)uniqueness) {
+	case V1_SD_UNIQUENESS:
+		return TYPE_STAT_DATA;
+	case V1_INDIRECT_UNIQUENESS:
+		return TYPE_INDIRECT;
+	case V1_DIRECT_UNIQUENESS:
+		return TYPE_DIRECT;
+	case V1_DIRENTRY_UNIQUENESS:
+		return TYPE_DIRENTRY;
+	case V1_ANY_UNIQUENESS:
+	default:
+		return TYPE_ANY;
+	}
+}
+
+static inline __u32 type2uniqueness(int type) CONSTF;
+static inline __u32 type2uniqueness(int type)
+{
+	switch (type) {
+	case TYPE_STAT_DATA:
+		return V1_SD_UNIQUENESS;
+	case TYPE_INDIRECT:
+		return V1_INDIRECT_UNIQUENESS;
+	case TYPE_DIRECT:
+		return V1_DIRECT_UNIQUENESS;
+	case TYPE_DIRENTRY:
+		return V1_DIRENTRY_UNIQUENESS;
+	case TYPE_ANY:
+	default:
+		return V1_ANY_UNIQUENESS;
+	}
+}
+
+//
+// key is pointer to on disk key which is stored in le, result is cpu,
+// there is no way to get version of object from key, so, provide
+// version to these defines
+//
+static inline loff_t le_key_k_offset(int version,
+				     const struct reiserfs_key *key)
+{
+	return (version == KEY_FORMAT_3_5) ?
+	    le32_to_cpu(key->u.k_offset_v1.k_offset) :
+	    offset_v2_k_offset(&(key->u.k_offset_v2));
+}
+
+static inline loff_t le_ih_k_offset(const struct item_head *ih)
+{
+	return le_key_k_offset(ih_version(ih), &(ih->ih_key));
+}
+
+static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key)
+{
+	return (version == KEY_FORMAT_3_5) ?
+	    uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) :
+	    offset_v2_k_type(&(key->u.k_offset_v2));
+}
+
+static inline loff_t le_ih_k_type(const struct item_head *ih)
+{
+	return le_key_k_type(ih_version(ih), &(ih->ih_key));
+}
+
+static inline void set_le_key_k_offset(int version, struct reiserfs_key *key,
+				       loff_t offset)
+{
+	(version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) :	/* jdm check */
+	    (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset));
+}
+
+static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset)
+{
+	set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset);
+}
+
+static inline void set_le_key_k_type(int version, struct reiserfs_key *key,
+				     int type)
+{
+	(version == KEY_FORMAT_3_5) ?
+	    (void)(key->u.k_offset_v1.k_uniqueness =
+		   cpu_to_le32(type2uniqueness(type)))
+	    : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type));
+}
+
+static inline void set_le_ih_k_type(struct item_head *ih, int type)
+{
+	set_le_key_k_type(ih_version(ih), &(ih->ih_key), type);
+}
+
+static inline int is_direntry_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_DIRENTRY;
+}
+
+static inline int is_direct_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_DIRECT;
+}
+
+static inline int is_indirect_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_INDIRECT;
+}
+
+static inline int is_statdata_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_STAT_DATA;
+}
+
+//
+// item header has version.
+//
+static inline int is_direntry_le_ih(struct item_head *ih)
+{
+	return is_direntry_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_direct_le_ih(struct item_head *ih)
+{
+	return is_direct_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_indirect_le_ih(struct item_head *ih)
+{
+	return is_indirect_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_statdata_le_ih(struct item_head *ih)
+{
+	return is_statdata_le_key(ih_version(ih), &ih->ih_key);
+}
+
+//
+// key is pointer to cpu key, result is cpu
+//
+static inline loff_t cpu_key_k_offset(const struct cpu_key *key)
+{
+	return key->on_disk_key.k_offset;
+}
+
+static inline loff_t cpu_key_k_type(const struct cpu_key *key)
+{
+	return key->on_disk_key.k_type;
+}
+
+static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset)
+{
+	key->on_disk_key.k_offset = offset;
+}
+
+static inline void set_cpu_key_k_type(struct cpu_key *key, int type)
+{
+	key->on_disk_key.k_type = type;
+}
+
+static inline void cpu_key_k_offset_dec(struct cpu_key *key)
+{
+	key->on_disk_key.k_offset--;
+}
+
+#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY)
+#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT)
+#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT)
+#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA)
+
+/* are these used ? */
+#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key)))
+#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key)))
+#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key)))
+#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key)))
+
+#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \
+    (!COMP_SHORT_KEYS(ih, key) && \
+	  I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize))
+
+/* maximal length of item */
+#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE)
+#define MIN_ITEM_LEN 1
+
+/* object identifier for root dir */
+#define REISERFS_ROOT_OBJECTID 2
+#define REISERFS_ROOT_PARENT_OBJECTID 1
+
+extern struct reiserfs_key root_key;
+
+/* 
+ * Picture represents a leaf of the S+tree
+ *  ______________________________________________________
+ * |      |  Array of     |                   |           |
+ * |Block |  Object-Item  |      F r e e      |  Objects- |
+ * | head |  Headers      |     S p a c e     |   Items   |
+ * |______|_______________|___________________|___________|
+ */
+
+/* Header of a disk block.  More precisely, header of a formatted leaf
+   or internal node, and not the header of an unformatted node. */
+struct block_head {
+	__le16 blk_level;	/* Level of a block in the tree. */
+	__le16 blk_nr_item;	/* Number of keys/items in a block. */
+	__le16 blk_free_space;	/* Block free space in bytes. */
+	__le16 blk_reserved;
+	/* dump this in v4/planA */
+	struct reiserfs_key blk_right_delim_key;	/* kept only for compatibility */
+};
+
+#define BLKH_SIZE                     (sizeof(struct block_head))
+#define blkh_level(p_blkh)            (le16_to_cpu((p_blkh)->blk_level))
+#define blkh_nr_item(p_blkh)          (le16_to_cpu((p_blkh)->blk_nr_item))
+#define blkh_free_space(p_blkh)       (le16_to_cpu((p_blkh)->blk_free_space))
+#define blkh_reserved(p_blkh)         (le16_to_cpu((p_blkh)->blk_reserved))
+#define set_blkh_level(p_blkh,val)    ((p_blkh)->blk_level = cpu_to_le16(val))
+#define set_blkh_nr_item(p_blkh,val)  ((p_blkh)->blk_nr_item = cpu_to_le16(val))
+#define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val))
+#define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val))
+#define blkh_right_delim_key(p_blkh)  ((p_blkh)->blk_right_delim_key)
+#define set_blkh_right_delim_key(p_blkh,val)  ((p_blkh)->blk_right_delim_key = val)
+
+/*
+ * values for blk_level field of the struct block_head
+ */
+
+#define FREE_LEVEL 0		/* when node gets removed from the tree its
+				   blk_level is set to FREE_LEVEL. It is then
+				   used to see whether the node is still in the
+				   tree */
+
+#define DISK_LEAF_NODE_LEVEL  1	/* Leaf node level. */
+
+/* Given the buffer head of a formatted node, resolve to the block head of that node. */
+#define B_BLK_HEAD(bh)			((struct block_head *)((bh)->b_data))
+/* Number of items that are in buffer. */
+#define B_NR_ITEMS(bh)			(blkh_nr_item(B_BLK_HEAD(bh)))
+#define B_LEVEL(bh)			(blkh_level(B_BLK_HEAD(bh)))
+#define B_FREE_SPACE(bh)		(blkh_free_space(B_BLK_HEAD(bh)))
+
+#define PUT_B_NR_ITEMS(bh, val)		do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0)
+#define PUT_B_LEVEL(bh, val)		do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0)
+#define PUT_B_FREE_SPACE(bh, val)	do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0)
+
+/* Get right delimiting key. -- little endian */
+#define B_PRIGHT_DELIM_KEY(bh)		(&(blk_right_delim_key(B_BLK_HEAD(bh))))
+
+/* Does the buffer contain a disk leaf. */
+#define B_IS_ITEMS_LEVEL(bh)		(B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL)
+
+/* Does the buffer contain a disk internal node */
+#define B_IS_KEYS_LEVEL(bh)      (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \
+					    && B_LEVEL(bh) <= MAX_HEIGHT)
+
+/***************************************************************************/
+/*                             STAT DATA                                   */
+/***************************************************************************/
+
+//
+// old stat data is 32 bytes long. We are going to distinguish new one by
+// different size
+//
+struct stat_data_v1 {
+	__le16 sd_mode;		/* file type, permissions */
+	__le16 sd_nlink;	/* number of hard links */
+	__le16 sd_uid;		/* owner */
+	__le16 sd_gid;		/* group */
+	__le32 sd_size;		/* file size */
+	__le32 sd_atime;	/* time of last access */
+	__le32 sd_mtime;	/* time file was last modified  */
+	__le32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+	union {
+		__le32 sd_rdev;
+		__le32 sd_blocks;	/* number of blocks file uses */
+	} __attribute__ ((__packed__)) u;
+	__le32 sd_first_direct_byte;	/* first byte of file which is stored
+					   in a direct item: except that if it
+					   equals 1 it is a symlink and if it
+					   equals ~(__u32)0 there is no
+					   direct item.  The existence of this
+					   field really grates on me. Let's
+					   replace it with a macro based on
+					   sd_size and our tail suppression
+					   policy.  Someday.  -Hans */
+} __attribute__ ((__packed__));
+
+#define SD_V1_SIZE              (sizeof(struct stat_data_v1))
+#define stat_data_v1(ih)        (ih_version (ih) == KEY_FORMAT_3_5)
+#define sd_v1_mode(sdp)         (le16_to_cpu((sdp)->sd_mode))
+#define set_sd_v1_mode(sdp,v)   ((sdp)->sd_mode = cpu_to_le16(v))
+#define sd_v1_nlink(sdp)        (le16_to_cpu((sdp)->sd_nlink))
+#define set_sd_v1_nlink(sdp,v)  ((sdp)->sd_nlink = cpu_to_le16(v))
+#define sd_v1_uid(sdp)          (le16_to_cpu((sdp)->sd_uid))
+#define set_sd_v1_uid(sdp,v)    ((sdp)->sd_uid = cpu_to_le16(v))
+#define sd_v1_gid(sdp)          (le16_to_cpu((sdp)->sd_gid))
+#define set_sd_v1_gid(sdp,v)    ((sdp)->sd_gid = cpu_to_le16(v))
+#define sd_v1_size(sdp)         (le32_to_cpu((sdp)->sd_size))
+#define set_sd_v1_size(sdp,v)   ((sdp)->sd_size = cpu_to_le32(v))
+#define sd_v1_atime(sdp)        (le32_to_cpu((sdp)->sd_atime))
+#define set_sd_v1_atime(sdp,v)  ((sdp)->sd_atime = cpu_to_le32(v))
+#define sd_v1_mtime(sdp)        (le32_to_cpu((sdp)->sd_mtime))
+#define set_sd_v1_mtime(sdp,v)  ((sdp)->sd_mtime = cpu_to_le32(v))
+#define sd_v1_ctime(sdp)        (le32_to_cpu((sdp)->sd_ctime))
+#define set_sd_v1_ctime(sdp,v)  ((sdp)->sd_ctime = cpu_to_le32(v))
+#define sd_v1_rdev(sdp)         (le32_to_cpu((sdp)->u.sd_rdev))
+#define set_sd_v1_rdev(sdp,v)   ((sdp)->u.sd_rdev = cpu_to_le32(v))
+#define sd_v1_blocks(sdp)       (le32_to_cpu((sdp)->u.sd_blocks))
+#define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v))
+#define sd_v1_first_direct_byte(sdp) \
+                                (le32_to_cpu((sdp)->sd_first_direct_byte))
+#define set_sd_v1_first_direct_byte(sdp,v) \
+                                ((sdp)->sd_first_direct_byte = cpu_to_le32(v))
+
+/* inode flags stored in sd_attrs (nee sd_reserved) */
+
+/* we want common flags to have the same values as in ext2,
+   so chattr(1) will work without problems */
+#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
+#define REISERFS_APPEND_FL    FS_APPEND_FL
+#define REISERFS_SYNC_FL      FS_SYNC_FL
+#define REISERFS_NOATIME_FL   FS_NOATIME_FL
+#define REISERFS_NODUMP_FL    FS_NODUMP_FL
+#define REISERFS_SECRM_FL     FS_SECRM_FL
+#define REISERFS_UNRM_FL      FS_UNRM_FL
+#define REISERFS_COMPR_FL     FS_COMPR_FL
+#define REISERFS_NOTAIL_FL    FS_NOTAIL_FL
+
+/* persistent flags that file inherits from the parent directory */
+#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |	\
+				REISERFS_SYNC_FL |	\
+				REISERFS_NOATIME_FL |	\
+				REISERFS_NODUMP_FL |	\
+				REISERFS_SECRM_FL |	\
+				REISERFS_COMPR_FL |	\
+				REISERFS_NOTAIL_FL )
+
+/* Stat Data on disk (reiserfs version of UFS disk inode minus the
+   address blocks) */
+struct stat_data {
+	__le16 sd_mode;		/* file type, permissions */
+	__le16 sd_attrs;	/* persistent inode flags */
+	__le32 sd_nlink;	/* number of hard links */
+	__le64 sd_size;		/* file size */
+	__le32 sd_uid;		/* owner */
+	__le32 sd_gid;		/* group */
+	__le32 sd_atime;	/* time of last access */
+	__le32 sd_mtime;	/* time file was last modified  */
+	__le32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+	__le32 sd_blocks;
+	union {
+		__le32 sd_rdev;
+		__le32 sd_generation;
+		//__le32 sd_first_direct_byte;
+		/* first byte of file which is stored in a
+		   direct item: except that if it equals 1
+		   it is a symlink and if it equals
+		   ~(__u32)0 there is no direct item.  The
+		   existence of this field really grates
+		   on me. Let's replace it with a macro
+		   based on sd_size and our tail
+		   suppression policy? */
+	} __attribute__ ((__packed__)) u;
+} __attribute__ ((__packed__));
+//
+// this is 44 bytes long
+//
+#define SD_SIZE (sizeof(struct stat_data))
+#define SD_V2_SIZE              SD_SIZE
+#define stat_data_v2(ih)        (ih_version (ih) == KEY_FORMAT_3_6)
+#define sd_v2_mode(sdp)         (le16_to_cpu((sdp)->sd_mode))
+#define set_sd_v2_mode(sdp,v)   ((sdp)->sd_mode = cpu_to_le16(v))
+/* sd_reserved */
+/* set_sd_reserved */
+#define sd_v2_nlink(sdp)        (le32_to_cpu((sdp)->sd_nlink))
+#define set_sd_v2_nlink(sdp,v)  ((sdp)->sd_nlink = cpu_to_le32(v))
+#define sd_v2_size(sdp)         (le64_to_cpu((sdp)->sd_size))
+#define set_sd_v2_size(sdp,v)   ((sdp)->sd_size = cpu_to_le64(v))
+#define sd_v2_uid(sdp)          (le32_to_cpu((sdp)->sd_uid))
+#define set_sd_v2_uid(sdp,v)    ((sdp)->sd_uid = cpu_to_le32(v))
+#define sd_v2_gid(sdp)          (le32_to_cpu((sdp)->sd_gid))
+#define set_sd_v2_gid(sdp,v)    ((sdp)->sd_gid = cpu_to_le32(v))
+#define sd_v2_atime(sdp)        (le32_to_cpu((sdp)->sd_atime))
+#define set_sd_v2_atime(sdp,v)  ((sdp)->sd_atime = cpu_to_le32(v))
+#define sd_v2_mtime(sdp)        (le32_to_cpu((sdp)->sd_mtime))
+#define set_sd_v2_mtime(sdp,v)  ((sdp)->sd_mtime = cpu_to_le32(v))
+#define sd_v2_ctime(sdp)        (le32_to_cpu((sdp)->sd_ctime))
+#define set_sd_v2_ctime(sdp,v)  ((sdp)->sd_ctime = cpu_to_le32(v))
+#define sd_v2_blocks(sdp)       (le32_to_cpu((sdp)->sd_blocks))
+#define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v))
+#define sd_v2_rdev(sdp)         (le32_to_cpu((sdp)->u.sd_rdev))
+#define set_sd_v2_rdev(sdp,v)   ((sdp)->u.sd_rdev = cpu_to_le32(v))
+#define sd_v2_generation(sdp)   (le32_to_cpu((sdp)->u.sd_generation))
+#define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v))
+#define sd_v2_attrs(sdp)         (le16_to_cpu((sdp)->sd_attrs))
+#define set_sd_v2_attrs(sdp,v)   ((sdp)->sd_attrs = cpu_to_le16(v))
+
+/***************************************************************************/
+/*                      DIRECTORY STRUCTURE                                */
+/***************************************************************************/
+/* 
+   Picture represents the structure of directory items
+   ________________________________________________
+   |  Array of     |   |     |        |       |   |
+   | directory     |N-1| N-2 | ....   |   1st |0th|
+   | entry headers |   |     |        |       |   |
+   |_______________|___|_____|________|_______|___|
+                    <----   directory entries         ------>
+
+ First directory item has k_offset component 1. We store "." and ".."
+ in one item, always, we never split "." and ".." into differing
+ items.  This makes, among other things, the code for removing
+ directories simpler. */
+#define SD_OFFSET  0
+#define SD_UNIQUENESS 0
+#define DOT_OFFSET 1
+#define DOT_DOT_OFFSET 2
+#define DIRENTRY_UNIQUENESS 500
+
+/* */
+#define FIRST_ITEM_OFFSET 1
+
+/*
+   Q: How to get key of object pointed to by entry from entry?  
+
+   A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key
+      of object, entry points to */
+
+/* NOT IMPLEMENTED:   
+   Directory will someday contain stat data of object */
+
+struct reiserfs_de_head {
+	__le32 deh_offset;	/* third component of the directory entry key */
+	__le32 deh_dir_id;	/* objectid of the parent directory of the object, that is referenced
+				   by directory entry */
+	__le32 deh_objectid;	/* objectid of the object, that is referenced by directory entry */
+	__le16 deh_location;	/* offset of name in the whole item */
+	__le16 deh_state;	/* whether 1) entry contains stat data (for future), and 2) whether
+				   entry is hidden (unlinked) */
+} __attribute__ ((__packed__));
+#define DEH_SIZE                  sizeof(struct reiserfs_de_head)
+#define deh_offset(p_deh)         (le32_to_cpu((p_deh)->deh_offset))
+#define deh_dir_id(p_deh)         (le32_to_cpu((p_deh)->deh_dir_id))
+#define deh_objectid(p_deh)       (le32_to_cpu((p_deh)->deh_objectid))
+#define deh_location(p_deh)       (le16_to_cpu((p_deh)->deh_location))
+#define deh_state(p_deh)          (le16_to_cpu((p_deh)->deh_state))
+
+#define put_deh_offset(p_deh,v)   ((p_deh)->deh_offset = cpu_to_le32((v)))
+#define put_deh_dir_id(p_deh,v)   ((p_deh)->deh_dir_id = cpu_to_le32((v)))
+#define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v)))
+#define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v)))
+#define put_deh_state(p_deh,v)    ((p_deh)->deh_state = cpu_to_le16((v)))
+
+/* empty directory contains two entries "." and ".." and their headers */
+#define EMPTY_DIR_SIZE \
+(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen ("..")))
+
+/* old format directories have this size when empty */
+#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3)
+
+#define DEH_Statdata 0		/* not used now */
+#define DEH_Visible 2
+
+/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */
+#if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__)
+#   define ADDR_UNALIGNED_BITS  (3)
+#endif
+
+/* These are only used to manipulate deh_state.
+ * Because of this, we'll use the ext2_ bit routines,
+ * since they are little endian */
+#ifdef ADDR_UNALIGNED_BITS
+
+#   define aligned_address(addr)           ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
+#   define unaligned_offset(addr)          (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3)
+
+#   define set_bit_unaligned(nr, addr)	\
+	__test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
+#   define clear_bit_unaligned(nr, addr)	\
+	__test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
+#   define test_bit_unaligned(nr, addr)	\
+	test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
+
+#else
+
+#   define set_bit_unaligned(nr, addr)	__test_and_set_bit_le(nr, addr)
+#   define clear_bit_unaligned(nr, addr)	__test_and_clear_bit_le(nr, addr)
+#   define test_bit_unaligned(nr, addr)	test_bit_le(nr, addr)
+
+#endif
+
+#define mark_de_with_sd(deh)        set_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define mark_de_without_sd(deh)     clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define mark_de_visible(deh)	    set_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+#define mark_de_hidden(deh)	    clear_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+
+#define de_with_sd(deh)		    test_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define de_visible(deh)	    	    test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+#define de_hidden(deh)	    	    !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+
+extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
+				   __le32 par_dirid, __le32 par_objid);
+extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
+				__le32 par_dirid, __le32 par_objid);
+
+/* array of the entry headers */
+ /* get item body */
+#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) )
+#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih)))
+
+/* length of the directory entry in directory item. This define
+   calculates length of i-th directory entry using directory entry
+   locations from dir entry head. When it calculates length of 0-th
+   directory entry, it uses length of whole item in place of entry
+   location of the non-existent following entry in the calculation.
+   See picture above.*/
+/*
+#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \
+((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh))))
+*/
+static inline int entry_length(const struct buffer_head *bh,
+			       const struct item_head *ih, int pos_in_item)
+{
+	struct reiserfs_de_head *deh;
+
+	deh = B_I_DEH(bh, ih) + pos_in_item;
+	if (pos_in_item)
+		return deh_location(deh - 1) - deh_location(deh);
+
+	return ih_item_len(ih) - deh_location(deh);
+}
+
+/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */
+#define I_ENTRY_COUNT(ih) (ih_entry_count((ih)))
+
+/* name by bh, ih and entry_num */
+#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num))))
+
+// two entries per block (at least)
+#define REISERFS_MAX_NAME(block_size) 255
+
+/* this structure is used for operations on directory entries. It is
+   not a disk structure. */
+/* When reiserfs_find_entry or search_by_entry_key find directory
+   entry, they return filled reiserfs_dir_entry structure */
+struct reiserfs_dir_entry {
+	struct buffer_head *de_bh;
+	int de_item_num;
+	struct item_head *de_ih;
+	int de_entry_num;
+	struct reiserfs_de_head *de_deh;
+	int de_entrylen;
+	int de_namelen;
+	char *de_name;
+	unsigned long *de_gen_number_bit_string;
+
+	__u32 de_dir_id;
+	__u32 de_objectid;
+
+	struct cpu_key de_entry_key;
+};
+
+/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */
+
+/* pointer to file name, stored in entry */
+#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh))
+
+/* length of name */
+#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \
+(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0))
+
+/* hash value occupies bits from 7 up to 30 */
+#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL)
+/* generation number occupies 7 bits starting from 0 up to 6 */
+#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL)
+#define MAX_GENERATION_NUMBER  127
+
+#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number))
+
+/*
+ * Picture represents an internal node of the reiserfs tree
+ *  ______________________________________________________
+ * |      |  Array of     |  Array of         |  Free     |
+ * |block |    keys       |  pointers         | space     |
+ * | head |      N        |      N+1          |           |
+ * |______|_______________|___________________|___________|
+ */
+
+/***************************************************************************/
+/*                      DISK CHILD                                         */
+/***************************************************************************/
+/* Disk child pointer: The pointer from an internal node of the tree
+   to a node that is on disk. */
+struct disk_child {
+	__le32 dc_block_number;	/* Disk child's block number. */
+	__le16 dc_size;		/* Disk child's used space.   */
+	__le16 dc_reserved;
+};
+
+#define DC_SIZE (sizeof(struct disk_child))
+#define dc_block_number(dc_p)	(le32_to_cpu((dc_p)->dc_block_number))
+#define dc_size(dc_p)		(le16_to_cpu((dc_p)->dc_size))
+#define put_dc_block_number(dc_p, val)   do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0)
+#define put_dc_size(dc_p, val)   do { (dc_p)->dc_size = cpu_to_le16(val); } while(0)
+
+/* Get disk child by buffer header and position in the tree node. */
+#define B_N_CHILD(bh, n_pos)  ((struct disk_child *)\
+((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos)))
+
+/* Get disk child number by buffer header and position in the tree node. */
+#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos)))
+#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \
+				(put_dc_block_number(B_N_CHILD(bh, n_pos), val))
+
+ /* maximal value of field child_size in structure disk_child */
+ /* child size is the combined size of all items and their headers */
+#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE ))
+
+/* amount of used space in buffer (not including block head) */
+#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur)))
+
+/* max and min number of keys in internal node */
+#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
+#define MIN_NR_KEY(bh)    (MAX_NR_KEY(bh)/2)
+
+/***************************************************************************/
+/*                      PATH STRUCTURES AND DEFINES                        */
+/***************************************************************************/
+
+/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the
+   key.  It uses reiserfs_bread to try to find buffers in the cache given their block number.  If it
+   does not find them in the cache it reads them from disk.  For each node search_by_key finds using
+   reiserfs_bread it then uses bin_search to look through that node.  bin_search will find the
+   position of the block_number of the next node if it is looking through an internal node.  If it
+   is looking through a leaf node bin_search will find the position of the item which has key either
+   equal to given key, or which is the maximal key less than the given key. */
+
+struct path_element {
+	struct buffer_head *pe_buffer;	/* Pointer to the buffer at the path in the tree. */
+	int pe_position;	/* Position in the tree node which is placed in the */
+	/* buffer above.                                  */
+};
+
+#define MAX_HEIGHT 5		/* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */
+#define EXTENDED_MAX_HEIGHT         7	/* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
+#define FIRST_PATH_ELEMENT_OFFSET   2	/* Must be equal to at least 2. */
+
+#define ILLEGAL_PATH_ELEMENT_OFFSET 1	/* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
+#define MAX_FEB_SIZE 6		/* this MUST be MAX_HEIGHT + 1. See about FEB below */
+
+/* We need to keep track of who the ancestors of nodes are.  When we
+   perform a search we record which nodes were visited while
+   descending the tree looking for the node we searched for. This list
+   of nodes is called the path.  This information is used while
+   performing balancing.  Note that this path information may become
+   invalid, and this means we must check it when using it to see if it
+   is still valid. You'll need to read search_by_key and the comments
+   in it, especially about decrement_counters_in_path(), to understand
+   this structure.  
+
+Paths make the code so much harder to work with and debug.... An
+enormous number of bugs are due to them, and trying to write or modify
+code that uses them just makes my head hurt.  They are based on an
+excessive effort to avoid disturbing the precious VFS code.:-( The
+gods only know how we are going to SMP the code that uses them.
+znodes are the way! */
+
+#define PATH_READA	0x1	/* do read ahead */
+#define PATH_READA_BACK 0x2	/* read backwards */
+
+struct treepath {
+	int path_length;	/* Length of the array above.   */
+	int reada;
+	struct path_element path_elements[EXTENDED_MAX_HEIGHT];	/* Array of the path elements.  */
+	int pos_in_item;
+};
+
+#define pos_in_item(path) ((path)->pos_in_item)
+
+#define INITIALIZE_PATH(var) \
+struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
+
+/* Get path element by path and path position. */
+#define PATH_OFFSET_PELEMENT(path, n_offset)  ((path)->path_elements + (n_offset))
+
+/* Get buffer header at the path by path and path position. */
+#define PATH_OFFSET_PBUFFER(path, n_offset)   (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer)
+
+/* Get position in the element at the path by path and path position. */
+#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position)
+
+#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length))
+				/* you know, to the person who didn't
+				   write this the macro name does not
+				   at first suggest what it does.
+				   Maybe POSITION_FROM_PATH_END? Or
+				   maybe we should just focus on
+				   dumping paths... -Hans */
+#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length))
+
+#define PATH_PITEM_HEAD(path)    B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path))
+
+/* in do_balance leaf has h == 0 in contrast with path structure,
+   where root has level == 0. That is why we need these defines */
+#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h))	/* tb->S[h] */
+#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1)	/* tb->F[h] or tb->S[0]->b_parent */
+#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h))
+#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1)	/* tb->S[h]->b_item_order */
+
+#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h))
+
+#define get_last_bh(path) PATH_PLAST_BUFFER(path)
+#define get_ih(path) PATH_PITEM_HEAD(path)
+#define get_item_pos(path) PATH_LAST_POSITION(path)
+#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path)))
+#define item_moved(ih,path) comp_items(ih, path)
+#define path_changed(ih,path) comp_items (ih, path)
+
+/***************************************************************************/
+/*                       MISC                                              */
+/***************************************************************************/
+
+/* Size of pointer to the unformatted node. */
+#define UNFM_P_SIZE (sizeof(unp_t))
+#define UNFM_P_SHIFT 2
+
+// in in-core inode key is stored on le form
+#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key))
+
+#define MAX_UL_INT 0xffffffff
+#define MAX_INT    0x7ffffff
+#define MAX_US_INT 0xffff
+
+// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
+#define U32_MAX (~(__u32)0)
+
+static inline loff_t max_reiserfs_offset(struct inode *inode)
+{
+	if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5)
+		return (loff_t) U32_MAX;
+
+	return (loff_t) ((~(__u64) 0) >> 4);
+}
+
+/*#define MAX_KEY_UNIQUENESS	MAX_UL_INT*/
+#define MAX_KEY_OBJECTID	MAX_UL_INT
+
+#define MAX_B_NUM  MAX_UL_INT
+#define MAX_FC_NUM MAX_US_INT
+
+/* the purpose is to detect overflow of an unsigned short */
+#define REISERFS_LINK_MAX (MAX_US_INT - 1000)
+
+/* The following defines are used in reiserfs_insert_item and reiserfs_append_item  */
+#define REISERFS_KERNEL_MEM		0	/* reiserfs kernel memory mode  */
+#define REISERFS_USER_MEM		1	/* reiserfs user memory mode            */
+
+#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter)
+#define get_generation(s) atomic_read (&fs_generation(s))
+#define FILESYSTEM_CHANGED_TB(tb)  (get_generation((tb)->tb_sb) != (tb)->fs_gen)
+#define __fs_changed(gen,s) (gen != get_generation (s))
+#define fs_changed(gen,s)		\
+({					\
+	reiserfs_cond_resched(s);	\
+	__fs_changed(gen, s);		\
+})
+
+/***************************************************************************/
+/*                  FIXATE NODES                                           */
+/***************************************************************************/
+
+#define VI_TYPE_LEFT_MERGEABLE 1
+#define VI_TYPE_RIGHT_MERGEABLE 2
+
+/* To make any changes in the tree we always first find node, that
+   contains item to be changed/deleted or place to insert a new
+   item. We call this node S. To do balancing we need to decide what
+   we will shift to left/right neighbor, or to a new node, where new
+   item will be etc. To make this analysis simpler we build virtual
+   node. Virtual node is an array of items, that will replace items of
+   node S. (For instance if we are going to delete an item, virtual
+   node does not contain it). Virtual node keeps information about
+   item sizes and types, mergeability of first and last items, sizes
+   of all entries in directory item. We use this array of items when
+   calculating what we can shift to neighbors and how many nodes we
+   have to have if we do not any shiftings, if we shift to left/right
+   neighbor or to both. */
+struct virtual_item {
+	int vi_index;		// index in the array of item operations
+	unsigned short vi_type;	// left/right mergeability
+	unsigned short vi_item_len;	/* length of item that it will have after balancing */
+	struct item_head *vi_ih;
+	const char *vi_item;	// body of item (old or new)
+	const void *vi_new_data;	// 0 always but paste mode
+	void *vi_uarea;		// item specific area
+};
+
+struct virtual_node {
+	char *vn_free_ptr;	/* this is a pointer to the free space in the buffer */
+	unsigned short vn_nr_item;	/* number of items in virtual node */
+	short vn_size;		/* size of node , that node would have if it has unlimited size and no balancing is performed */
+	short vn_mode;		/* mode of balancing (paste, insert, delete, cut) */
+	short vn_affected_item_num;
+	short vn_pos_in_item;
+	struct item_head *vn_ins_ih;	/* item header of inserted item, 0 for other modes */
+	const void *vn_data;
+	struct virtual_item *vn_vi;	/* array of items (including a new one, excluding item to be deleted) */
+};
+
+/* used by directory items when creating virtual nodes */
+struct direntry_uarea {
+	int flags;
+	__u16 entry_count;
+	__u16 entry_sizes[1];
+} __attribute__ ((__packed__));
+
+/***************************************************************************/
+/*                  TREE BALANCE                                           */
+/***************************************************************************/
+
+/* This temporary structure is used in tree balance algorithms, and
+   constructed as we go to the extent that its various parts are
+   needed.  It contains arrays of nodes that can potentially be
+   involved in the balancing of node S, and parameters that define how
+   each of the nodes must be balanced.  Note that in these algorithms
+   for balancing the worst case is to need to balance the current node
+   S and the left and right neighbors and all of their parents plus
+   create a new node.  We implement S1 balancing for the leaf nodes
+   and S0 balancing for the internal nodes (S1 and S0 are defined in
+   our papers.)*/
+
+#define MAX_FREE_BLOCK 7	/* size of the array of buffers to free at end of do_balance */
+
+/* maximum number of FEB blocknrs on a single level */
+#define MAX_AMOUNT_NEEDED 2
+
+/* someday somebody will prefix every field in this struct with tb_ */
+struct tree_balance {
+	int tb_mode;
+	int need_balance_dirty;
+	struct super_block *tb_sb;
+	struct reiserfs_transaction_handle *transaction_handle;
+	struct treepath *tb_path;
+	struct buffer_head *L[MAX_HEIGHT];	/* array of left neighbors of nodes in the path */
+	struct buffer_head *R[MAX_HEIGHT];	/* array of right neighbors of nodes in the path */
+	struct buffer_head *FL[MAX_HEIGHT];	/* array of fathers of the left  neighbors      */
+	struct buffer_head *FR[MAX_HEIGHT];	/* array of fathers of the right neighbors      */
+	struct buffer_head *CFL[MAX_HEIGHT];	/* array of common parents of center node and its left neighbor  */
+	struct buffer_head *CFR[MAX_HEIGHT];	/* array of common parents of center node and its right neighbor */
+
+	struct buffer_head *FEB[MAX_FEB_SIZE];	/* array of empty buffers. Number of buffers in array equals
+						   cur_blknum. */
+	struct buffer_head *used[MAX_FEB_SIZE];
+	struct buffer_head *thrown[MAX_FEB_SIZE];
+	int lnum[MAX_HEIGHT];	/* array of number of items which must be
+				   shifted to the left in order to balance the
+				   current node; for leaves includes item that
+				   will be partially shifted; for internal
+				   nodes, it is the number of child pointers
+				   rather than items. It includes the new item
+				   being created. The code sometimes subtracts
+				   one to get the number of wholly shifted
+				   items for other purposes. */
+	int rnum[MAX_HEIGHT];	/* substitute right for left in comment above */
+	int lkey[MAX_HEIGHT];	/* array indexed by height h mapping the key delimiting L[h] and
+				   S[h] to its item number within the node CFL[h] */
+	int rkey[MAX_HEIGHT];	/* substitute r for l in comment above */
+	int insert_size[MAX_HEIGHT];	/* the number of bytes by we are trying to add or remove from
+					   S[h]. A negative value means removing.  */
+	int blknum[MAX_HEIGHT];	/* number of nodes that will replace node S[h] after
+				   balancing on the level h of the tree.  If 0 then S is
+				   being deleted, if 1 then S is remaining and no new nodes
+				   are being created, if 2 or 3 then 1 or 2 new nodes is
+				   being created */
+
+	/* fields that are used only for balancing leaves of the tree */
+	int cur_blknum;		/* number of empty blocks having been already allocated                 */
+	int s0num;		/* number of items that fall into left most  node when S[0] splits     */
+	int s1num;		/* number of items that fall into first  new node when S[0] splits     */
+	int s2num;		/* number of items that fall into second new node when S[0] splits     */
+	int lbytes;		/* number of bytes which can flow to the left neighbor from the        left    */
+	/* most liquid item that cannot be shifted from S[0] entirely         */
+	/* if -1 then nothing will be partially shifted */
+	int rbytes;		/* number of bytes which will flow to the right neighbor from the right        */
+	/* most liquid item that cannot be shifted from S[0] entirely         */
+	/* if -1 then nothing will be partially shifted                           */
+	int s1bytes;		/* number of bytes which flow to the first  new node when S[0] splits   */
+	/* note: if S[0] splits into 3 nodes, then items do not need to be cut  */
+	int s2bytes;
+	struct buffer_head *buf_to_free[MAX_FREE_BLOCK];	/* buffers which are to be freed after do_balance finishes by unfix_nodes */
+	char *vn_buf;		/* kmalloced memory. Used to create
+				   virtual node and keep map of
+				   dirtied bitmap blocks */
+	int vn_buf_size;	/* size of the vn_buf */
+	struct virtual_node *tb_vn;	/* VN starts after bitmap of bitmap blocks */
+
+	int fs_gen;		/* saved value of `reiserfs_generation' counter
+				   see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	struct in_core_key key;	/* key pointer, to pass to block allocator or
+				   another low-level subsystem */
+#endif
+};
+
+/* These are modes of balancing */
+
+/* When inserting an item. */
+#define M_INSERT	'i'
+/* When inserting into (directories only) or appending onto an already
+   existent item. */
+#define M_PASTE		'p'
+/* When deleting an item. */
+#define M_DELETE	'd'
+/* When truncating an item or removing an entry from a (directory) item. */
+#define M_CUT 		'c'
+
+/* used when balancing on leaf level skipped (in reiserfsck) */
+#define M_INTERNAL	'n'
+
+/* When further balancing is not needed, then do_balance does not need
+   to be called. */
+#define M_SKIP_BALANCING 		's'
+#define M_CONVERT	'v'
+
+/* modes of leaf_move_items */
+#define LEAF_FROM_S_TO_L 0
+#define LEAF_FROM_S_TO_R 1
+#define LEAF_FROM_R_TO_L 2
+#define LEAF_FROM_L_TO_R 3
+#define LEAF_FROM_S_TO_SNEW 4
+
+#define FIRST_TO_LAST 0
+#define LAST_TO_FIRST 1
+
+/* used in do_balance for passing parent of node information that has
+   been gotten from tb struct */
+struct buffer_info {
+	struct tree_balance *tb;
+	struct buffer_head *bi_bh;
+	struct buffer_head *bi_parent;
+	int bi_position;
+};
+
+static inline struct super_block *sb_from_tb(struct tree_balance *tb)
+{
+	return tb ? tb->tb_sb : NULL;
+}
+
+static inline struct super_block *sb_from_bi(struct buffer_info *bi)
+{
+	return bi ? sb_from_tb(bi->tb) : NULL;
+}
+
+/* there are 4 types of items: stat data, directory item, indirect, direct.
++-------------------+------------+--------------+------------+
+|	            |  k_offset  | k_uniqueness | mergeable? |
++-------------------+------------+--------------+------------+
+|     stat data     |	0        |      0       |   no       |
++-------------------+------------+--------------+------------+
+| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS|   no       | 
+| non 1st directory | hash value |              |   yes      |
+|     item          |            |              |            |
++-------------------+------------+--------------+------------+
+| indirect item     | offset + 1 |TYPE_INDIRECT |   if this is not the first indirect item of the object
++-------------------+------------+--------------+------------+
+| direct item       | offset + 1 |TYPE_DIRECT   | if not this is not the first direct item of the object
++-------------------+------------+--------------+------------+
+*/
+
+struct item_operations {
+	int (*bytes_number) (struct item_head * ih, int block_size);
+	void (*decrement_key) (struct cpu_key *);
+	int (*is_left_mergeable) (struct reiserfs_key * ih,
+				  unsigned long bsize);
+	void (*print_item) (struct item_head *, char *item);
+	void (*check_item) (struct item_head *, char *item);
+
+	int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi,
+			  int is_affected, int insert_size);
+	int (*check_left) (struct virtual_item * vi, int free,
+			   int start_skip, int end_skip);
+	int (*check_right) (struct virtual_item * vi, int free);
+	int (*part_size) (struct virtual_item * vi, int from, int to);
+	int (*unit_num) (struct virtual_item * vi);
+	void (*print_vi) (struct virtual_item * vi);
+};
+
+extern struct item_operations *item_ops[TYPE_ANY + 1];
+
+#define op_bytes_number(ih,bsize)                    item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize)
+#define op_is_left_mergeable(key,bsize)              item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize)
+#define op_print_item(ih,item)                       item_ops[le_ih_k_type (ih)]->print_item (ih, item)
+#define op_check_item(ih,item)                       item_ops[le_ih_k_type (ih)]->check_item (ih, item)
+#define op_create_vi(vn,vi,is_affected,insert_size)  item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size)
+#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip)
+#define op_check_right(vi,free)                      item_ops[(vi)->vi_index]->check_right (vi, free)
+#define op_part_size(vi,from,to)                     item_ops[(vi)->vi_index]->part_size (vi, from, to)
+#define op_unit_num(vi)				     item_ops[(vi)->vi_index]->unit_num (vi)
+#define op_print_vi(vi)                              item_ops[(vi)->vi_index]->print_vi (vi)
+
+#define COMP_SHORT_KEYS comp_short_keys
+
+/* number of blocks pointed to by the indirect item */
+#define I_UNFM_NUM(ih)	(ih_item_len(ih) / UNFM_P_SIZE)
+
+/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */
+#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size))
+
+/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */
+
+/* get the item header */
+#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) )
+
+/* get key */
+#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) )
+
+/* get the key */
+#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) )
+
+/* get item body */
+#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num))))
+
+/* get the stat data by the buffer header and the item order */
+#define B_N_STAT_DATA(bh,nr) \
+( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) )
+
+    /* following defines use reiserfs buffer header and item header */
+
+/* get stat-data */
+#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) )
+
+// this is 3976 for size==4096
+#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
+
+/* indirect items consist of entries which contain blocknrs, pos
+   indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
+   blocknr contained by the entry pos points to */
+#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)))
+#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0)
+
+struct reiserfs_iget_args {
+	__u32 objectid;
+	__u32 dirid;
+};
+
+/***************************************************************************/
+/*                    FUNCTION DECLARATIONS                                */
+/***************************************************************************/
+
+#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
+
+#define journal_trans_half(blocksize) \
+	((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32))
+
+/* journal.c see journal.c for all the comments here */
+
+/* first block written in a commit.  */
+struct reiserfs_journal_desc {
+	__le32 j_trans_id;	/* id of commit */
+	__le32 j_len;		/* length of commit. len +1 is the commit block */
+	__le32 j_mount_id;	/* mount id of this trans */
+	__le32 j_realblock[1];	/* real locations for each block */
+};
+
+#define get_desc_trans_id(d)   le32_to_cpu((d)->j_trans_id)
+#define get_desc_trans_len(d)  le32_to_cpu((d)->j_len)
+#define get_desc_mount_id(d)   le32_to_cpu((d)->j_mount_id)
+
+#define set_desc_trans_id(d,val)       do { (d)->j_trans_id = cpu_to_le32 (val); } while (0)
+#define set_desc_trans_len(d,val)      do { (d)->j_len = cpu_to_le32 (val); } while (0)
+#define set_desc_mount_id(d,val)       do { (d)->j_mount_id = cpu_to_le32 (val); } while (0)
+
+/* last block written in a commit */
+struct reiserfs_journal_commit {
+	__le32 j_trans_id;	/* must match j_trans_id from the desc block */
+	__le32 j_len;		/* ditto */
+	__le32 j_realblock[1];	/* real locations for each block */
+};
+
+#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
+#define get_commit_trans_len(c)        le32_to_cpu((c)->j_len)
+#define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id)
+
+#define set_commit_trans_id(c,val)     do { (c)->j_trans_id = cpu_to_le32 (val); } while (0)
+#define set_commit_trans_len(c,val)    do { (c)->j_len = cpu_to_le32 (val); } while (0)
+
+/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the
+** last fully flushed transaction.  fully flushed means all the log blocks and all the real blocks are on disk,
+** and this transaction does not need to be replayed.
+*/
+struct reiserfs_journal_header {
+	__le32 j_last_flush_trans_id;	/* id of last fully flushed transaction */
+	__le32 j_first_unflushed_offset;	/* offset in the log of where to start replay after a crash */
+	__le32 j_mount_id;
+	/* 12 */ struct journal_params jh_journal;
+};
+
+/* biggest tunable defines are right here */
+#define JOURNAL_BLOCK_COUNT 8192	/* number of blocks in the journal */
+#define JOURNAL_TRANS_MAX_DEFAULT 1024	/* biggest possible single transaction, don't change for now (8/3/99) */
+#define JOURNAL_TRANS_MIN_DEFAULT 256
+#define JOURNAL_MAX_BATCH_DEFAULT   900	/* max blocks to batch into one transaction, don't make this any bigger than 900 */
+#define JOURNAL_MIN_RATIO 2
+#define JOURNAL_MAX_COMMIT_AGE 30
+#define JOURNAL_MAX_TRANS_AGE 30
+#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9)
+#define JOURNAL_BLOCKS_PER_OBJECT(sb)  (JOURNAL_PER_BALANCE_CNT * 3 + \
+					 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \
+					      REISERFS_QUOTA_TRANS_BLOCKS(sb)))
+
+#ifdef CONFIG_QUOTA
+#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA))
+/* We need to update data and inode (atime) */
+#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0)
+/* 1 balancing, 1 bitmap, 1 data per write + stat data update */
+#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
+(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0)
+/* same as with INIT */
+#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
+(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0)
+#else
+#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0
+#define REISERFS_QUOTA_INIT_BLOCKS(s) 0
+#define REISERFS_QUOTA_DEL_BLOCKS(s) 0
+#endif
+
+/* both of these can be as low as 1, or as high as you want.  The min is the
+** number of 4k bitmap nodes preallocated on mount. New nodes are allocated
+** as needed, and released when transactions are committed.  On release, if 
+** the current number of nodes is > max, the node is freed, otherwise, 
+** it is put on a free list for faster use later.
+*/
+#define REISERFS_MIN_BITMAP_NODES 10
+#define REISERFS_MAX_BITMAP_NODES 100
+
+#define JBH_HASH_SHIFT 13	/* these are based on journal hash size of 8192 */
+#define JBH_HASH_MASK 8191
+
+#define _jhashfn(sb,block)	\
+	(((unsigned long)sb>>L1_CACHE_SHIFT) ^ \
+	 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
+#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK])
+
+// We need these to make journal.c code more readable
+#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
+#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
+#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
+
+enum reiserfs_bh_state_bits {
+	BH_JDirty = BH_PrivateStart,	/* buffer is in current transaction */
+	BH_JDirty_wait,
+	BH_JNew,		/* disk block was taken off free list before
+				 * being in a finished transaction, or
+				 * written to disk. Can be reused immed. */
+	BH_JPrepared,
+	BH_JRestore_dirty,
+	BH_JTest,		// debugging only will go away
+};
+
+BUFFER_FNS(JDirty, journaled);
+TAS_BUFFER_FNS(JDirty, journaled);
+BUFFER_FNS(JDirty_wait, journal_dirty);
+TAS_BUFFER_FNS(JDirty_wait, journal_dirty);
+BUFFER_FNS(JNew, journal_new);
+TAS_BUFFER_FNS(JNew, journal_new);
+BUFFER_FNS(JPrepared, journal_prepared);
+TAS_BUFFER_FNS(JPrepared, journal_prepared);
+BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
+TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
+BUFFER_FNS(JTest, journal_test);
+TAS_BUFFER_FNS(JTest, journal_test);
+
+/*
+** transaction handle which is passed around for all journal calls
+*/
+struct reiserfs_transaction_handle {
+	struct super_block *t_super;	/* super for this FS when journal_begin was
+					   called. saves calls to reiserfs_get_super
+					   also used by nested transactions to make
+					   sure they are nesting on the right FS
+					   _must_ be first in the handle
+					 */
+	int t_refcount;
+	int t_blocks_logged;	/* number of blocks this writer has logged */
+	int t_blocks_allocated;	/* number of blocks this writer allocated */
+	unsigned int t_trans_id;	/* sanity check, equals the current trans id */
+	void *t_handle_save;	/* save existing current->journal_info */
+	unsigned displace_new_blocks:1;	/* if new block allocation occurres, that block
+					   should be displaced from others */
+	struct list_head t_list;
+};
+
+/* used to keep track of ordered and tail writes, attached to the buffer
+ * head through b_journal_head.
+ */
+struct reiserfs_jh {
+	struct reiserfs_journal_list *jl;
+	struct buffer_head *bh;
+	struct list_head list;
+};
+
+void reiserfs_free_jh(struct buffer_head *bh);
+int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh);
+int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh);
+int journal_mark_dirty(struct reiserfs_transaction_handle *,
+		       struct super_block *, struct buffer_head *bh);
+
+static inline int reiserfs_file_data_log(struct inode *inode)
+{
+	if (reiserfs_data_log(inode->i_sb) ||
+	    (REISERFS_I(inode)->i_flags & i_data_log))
+		return 1;
+	return 0;
+}
+
+static inline int reiserfs_transaction_running(struct super_block *s)
+{
+	struct reiserfs_transaction_handle *th = current->journal_info;
+	if (th && th->t_super == s)
+		return 1;
+	if (th && th->t_super == NULL)
+		BUG();
+	return 0;
+}
+
+static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th)
+{
+	return th->t_blocks_allocated - th->t_blocks_logged;
+}
+
+struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
+								    super_block
+								    *,
+								    int count);
+int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *);
+int reiserfs_commit_page(struct inode *inode, struct page *page,
+			 unsigned from, unsigned to);
+int reiserfs_flush_old_commits(struct super_block *);
+int reiserfs_commit_for_inode(struct inode *);
+int reiserfs_inode_needs_commit(struct inode *);
+void reiserfs_update_inode_transaction(struct inode *);
+void reiserfs_wait_on_write_block(struct super_block *s);
+void reiserfs_block_writes(struct reiserfs_transaction_handle *th);
+void reiserfs_allow_writes(struct super_block *s);
+void reiserfs_check_lock_depth(struct super_block *s, char *caller);
+int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh,
+				 int wait);
+void reiserfs_restore_prepared_buffer(struct super_block *,
+				      struct buffer_head *bh);
+int journal_init(struct super_block *, const char *j_dev_name, int old_format,
+		 unsigned int);
+int journal_release(struct reiserfs_transaction_handle *, struct super_block *);
+int journal_release_error(struct reiserfs_transaction_handle *,
+			  struct super_block *);
+int journal_end(struct reiserfs_transaction_handle *, struct super_block *,
+		unsigned long);
+int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
+		     unsigned long);
+int journal_mark_freed(struct reiserfs_transaction_handle *,
+		       struct super_block *, b_blocknr_t blocknr);
+int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
+int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr,
+			 int bit_nr, int searchall, b_blocknr_t *next);
+int journal_begin(struct reiserfs_transaction_handle *,
+		  struct super_block *sb, unsigned long);
+int journal_join_abort(struct reiserfs_transaction_handle *,
+		       struct super_block *sb, unsigned long);
+void reiserfs_abort_journal(struct super_block *sb, int errno);
+void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
+int reiserfs_allocate_list_bitmaps(struct super_block *s,
+				   struct reiserfs_list_bitmap *, unsigned int);
+
+void add_save_link(struct reiserfs_transaction_handle *th,
+		   struct inode *inode, int truncate);
+int remove_save_link(struct inode *inode, int truncate);
+
+/* objectid.c */
+__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th);
+void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
+			       __u32 objectid_to_release);
+int reiserfs_convert_objectid_map_v1(struct super_block *);
+
+/* stree.c */
+int B_IS_IN_TREE(const struct buffer_head *);
+extern void copy_item_head(struct item_head *to,
+			   const struct item_head *from);
+
+// first key is in cpu form, second - le
+extern int comp_short_keys(const struct reiserfs_key *le_key,
+			   const struct cpu_key *cpu_key);
+extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from);
+
+// both are in le form
+extern int comp_le_keys(const struct reiserfs_key *,
+			const struct reiserfs_key *);
+extern int comp_short_le_keys(const struct reiserfs_key *,
+			      const struct reiserfs_key *);
+
+//
+// get key version from on disk key - kludge
+//
+static inline int le_key_version(const struct reiserfs_key *key)
+{
+	int type;
+
+	type = offset_v2_k_type(&(key->u.k_offset_v2));
+	if (type != TYPE_DIRECT && type != TYPE_INDIRECT
+	    && type != TYPE_DIRENTRY)
+		return KEY_FORMAT_3_5;
+
+	return KEY_FORMAT_3_6;
+
+}
+
+static inline void copy_key(struct reiserfs_key *to,
+			    const struct reiserfs_key *from)
+{
+	memcpy(to, from, KEY_SIZE);
+}
+
+int comp_items(const struct item_head *stored_ih, const struct treepath *path);
+const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
+				    const struct super_block *sb);
+int search_by_key(struct super_block *, const struct cpu_key *,
+		  struct treepath *, int);
+#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL)
+int search_for_position_by_key(struct super_block *sb,
+			       const struct cpu_key *cpu_key,
+			       struct treepath *search_path);
+extern void decrement_bcount(struct buffer_head *bh);
+void decrement_counters_in_path(struct treepath *search_path);
+void pathrelse(struct treepath *search_path);
+int reiserfs_check_path(struct treepath *p);
+void pathrelse_and_restore(struct super_block *s, struct treepath *search_path);
+
+int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
+			 struct treepath *path,
+			 const struct cpu_key *key,
+			 struct item_head *ih,
+			 struct inode *inode, const char *body);
+
+int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
+			     struct treepath *path,
+			     const struct cpu_key *key,
+			     struct inode *inode,
+			     const char *body, int paste_size);
+
+int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
+			   struct treepath *path,
+			   struct cpu_key *key,
+			   struct inode *inode,
+			   struct page *page, loff_t new_file_size);
+
+int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
+			 struct treepath *path,
+			 const struct cpu_key *key,
+			 struct inode *inode, struct buffer_head *un_bh);
+
+void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
+				struct inode *inode, struct reiserfs_key *key);
+int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
+			   struct inode *inode);
+int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
+			 struct inode *inode, struct page *,
+			 int update_timestamps);
+
+#define i_block_size(inode) ((inode)->i_sb->s_blocksize)
+#define file_size(inode) ((inode)->i_size)
+#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1))
+
+#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\
+!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 )
+
+void padd_item(char *item, int total_length, int length);
+
+/* inode.c */
+/* args for the create parameter of reiserfs_get_block */
+#define GET_BLOCK_NO_CREATE 0	/* don't create new blocks or convert tails */
+#define GET_BLOCK_CREATE 1	/* add anything you need to find block */
+#define GET_BLOCK_NO_HOLE 2	/* return -ENOENT for file holes */
+#define GET_BLOCK_READ_DIRECT 4	/* read the tail if indirect item not found */
+#define GET_BLOCK_NO_IMUX     8	/* i_mutex is not held, don't preallocate */
+#define GET_BLOCK_NO_DANGLE   16	/* don't leave any transactions running */
+
+void reiserfs_read_locked_inode(struct inode *inode,
+				struct reiserfs_iget_args *args);
+int reiserfs_find_actor(struct inode *inode, void *p);
+int reiserfs_init_locked_inode(struct inode *inode, void *p);
+void reiserfs_evict_inode(struct inode *inode);
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc);
+int reiserfs_get_block(struct inode *inode, sector_t block,
+		       struct buffer_head *bh_result, int create);
+struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+				     int fh_len, int fh_type);
+struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+				     int fh_len, int fh_type);
+int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
+		       int connectable);
+
+int reiserfs_truncate_file(struct inode *, int update_timestamps);
+void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset,
+		  int type, int key_length);
+void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
+		       int version,
+		       loff_t offset, int type, int length, int entry_count);
+struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key);
+
+struct reiserfs_security_handle;
+int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
+		       struct inode *dir, umode_t mode,
+		       const char *symname, loff_t i_size,
+		       struct dentry *dentry, struct inode *inode,
+		       struct reiserfs_security_handle *security);
+
+void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
+			     struct inode *inode, loff_t size);
+
+static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
+				      struct inode *inode)
+{
+	reiserfs_update_sd_size(th, inode, inode->i_size);
+}
+
+void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
+void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
+int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
+
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
+
+/* namei.c */
+void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
+int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
+			struct treepath *path, struct reiserfs_dir_entry *de);
+struct dentry *reiserfs_get_parent(struct dentry *);
+
+#ifdef CONFIG_REISERFS_PROC_INFO
+int reiserfs_proc_info_init(struct super_block *sb);
+int reiserfs_proc_info_done(struct super_block *sb);
+int reiserfs_proc_info_global_init(void);
+int reiserfs_proc_info_global_done(void);
+
+#define PROC_EXP( e )   e
+
+#define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data
+#define PROC_INFO_MAX( sb, field, value )								\
+    __PINFO( sb ).field =												\
+        max( REISERFS_SB( sb ) -> s_proc_info_data.field, value )
+#define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) )
+#define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) )
+#define PROC_INFO_BH_STAT( sb, bh, level )							\
+    PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] );						\
+    PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) );	\
+    PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) )
+#else
+static inline int reiserfs_proc_info_init(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_done(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_global_init(void)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_global_done(void)
+{
+	return 0;
+}
+
+#define PROC_EXP( e )
+#define VOID_V ( ( void ) 0 )
+#define PROC_INFO_MAX( sb, field, value ) VOID_V
+#define PROC_INFO_INC( sb, field ) VOID_V
+#define PROC_INFO_ADD( sb, field, val ) VOID_V
+#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V
+#endif
+
+/* dir.c */
+extern const struct inode_operations reiserfs_dir_inode_operations;
+extern const struct inode_operations reiserfs_symlink_inode_operations;
+extern const struct inode_operations reiserfs_special_inode_operations;
+extern const struct file_operations reiserfs_dir_operations;
+int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *);
+
+/* tail_conversion.c */
+int direct2indirect(struct reiserfs_transaction_handle *, struct inode *,
+		    struct treepath *, struct buffer_head *, loff_t);
+int indirect2direct(struct reiserfs_transaction_handle *, struct inode *,
+		    struct page *, struct treepath *, const struct cpu_key *,
+		    loff_t, char *);
+void reiserfs_unmap_buffer(struct buffer_head *);
+
+/* file.c */
+extern const struct inode_operations reiserfs_file_inode_operations;
+extern const struct file_operations reiserfs_file_operations;
+extern const struct address_space_operations reiserfs_address_space_operations;
+
+/* fix_nodes.c */
+
+int fix_nodes(int n_op_mode, struct tree_balance *tb,
+	      struct item_head *ins_ih, const void *);
+void unfix_nodes(struct tree_balance *);
+
+/* prints.c */
+void __reiserfs_panic(struct super_block *s, const char *id,
+		      const char *function, const char *fmt, ...)
+    __attribute__ ((noreturn));
+#define reiserfs_panic(s, id, fmt, args...) \
+	__reiserfs_panic(s, id, __func__, fmt, ##args)
+void __reiserfs_error(struct super_block *s, const char *id,
+		      const char *function, const char *fmt, ...);
+#define reiserfs_error(s, id, fmt, args...) \
+	 __reiserfs_error(s, id, __func__, fmt, ##args)
+void reiserfs_info(struct super_block *s, const char *fmt, ...);
+void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...);
+void print_indirect_item(struct buffer_head *bh, int item_num);
+void store_print_tb(struct tree_balance *tb);
+void print_cur_tb(char *mes);
+void print_de(struct reiserfs_dir_entry *de);
+void print_bi(struct buffer_info *bi, char *mes);
+#define PRINT_LEAF_ITEMS 1	/* print all items */
+#define PRINT_DIRECTORY_ITEMS 2	/* print directory items */
+#define PRINT_DIRECT_ITEMS 4	/* print contents of direct items */
+void print_block(struct buffer_head *bh, ...);
+void print_bmap(struct super_block *s, int silent);
+void print_bmap_block(int i, char *data, int size, int silent);
+/*void print_super_block (struct super_block * s, char * mes);*/
+void print_objectid_map(struct super_block *s);
+void print_block_head(struct buffer_head *bh, char *mes);
+void check_leaf(struct buffer_head *bh);
+void check_internal(struct buffer_head *bh);
+void print_statistics(struct super_block *s);
+char *reiserfs_hashname(int code);
+
+/* lbalance.c */
+int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
+		    int mov_bytes, struct buffer_head *Snew);
+int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes);
+int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes);
+void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first,
+		       int del_num, int del_bytes);
+void leaf_insert_into_buf(struct buffer_info *bi, int before,
+			  struct item_head *inserted_item_ih,
+			  const char *inserted_item_body, int zeros_number);
+void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
+			  int pos_in_item, int paste_size, const char *body,
+			  int zeros_number);
+void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
+			  int pos_in_item, int cut_size);
+void leaf_paste_entries(struct buffer_info *bi, int item_num, int before,
+			int new_entry_count, struct reiserfs_de_head *new_dehs,
+			const char *records, int paste_size);
+/* ibalance.c */
+int balance_internal(struct tree_balance *, int, int, struct item_head *,
+		     struct buffer_head **);
+
+/* do_balance.c */
+void do_balance_mark_leaf_dirty(struct tree_balance *tb,
+				struct buffer_head *bh, int flag);
+#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
+#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
+
+void do_balance(struct tree_balance *tb, struct item_head *ih,
+		const char *body, int flag);
+void reiserfs_invalidate_buffer(struct tree_balance *tb,
+				struct buffer_head *bh);
+
+int get_left_neighbor_position(struct tree_balance *tb, int h);
+int get_right_neighbor_position(struct tree_balance *tb, int h);
+void replace_key(struct tree_balance *tb, struct buffer_head *, int,
+		 struct buffer_head *, int);
+void make_empty_node(struct buffer_info *);
+struct buffer_head *get_FEB(struct tree_balance *);
+
+/* bitmap.c */
+
+/* structure contains hints for block allocator, and it is a container for
+ * arguments, such as node, search path, transaction_handle, etc. */
+struct __reiserfs_blocknr_hint {
+	struct inode *inode;	/* inode passed to allocator, if we allocate unf. nodes */
+	sector_t block;		/* file offset, in blocks */
+	struct in_core_key key;
+	struct treepath *path;	/* search path, used by allocator to deternine search_start by
+				 * various ways */
+	struct reiserfs_transaction_handle *th;	/* transaction handle is needed to log super blocks and
+						 * bitmap blocks changes  */
+	b_blocknr_t beg, end;
+	b_blocknr_t search_start;	/* a field used to transfer search start value (block number)
+					 * between different block allocator procedures
+					 * (determine_search_start() and others) */
+	int prealloc_size;	/* is set in determine_prealloc_size() function, used by underlayed
+				 * function that do actual allocation */
+
+	unsigned formatted_node:1;	/* the allocator uses different polices for getting disk space for
+					 * formatted/unformatted blocks with/without preallocation */
+	unsigned preallocate:1;
+};
+
+typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t;
+
+int reiserfs_parse_alloc_options(struct super_block *, char *);
+void reiserfs_init_alloc_options(struct super_block *s);
+
+/*
+ * given a directory, this will tell you what packing locality
+ * to use for a new object underneat it.  The locality is returned
+ * in disk byte order (le).
+ */
+__le32 reiserfs_choose_packing(struct inode *dir);
+
+int reiserfs_init_bitmap_cache(struct super_block *sb);
+void reiserfs_free_bitmap_cache(struct super_block *sb);
+void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info);
+struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap);
+int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
+void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *,
+			 b_blocknr_t, int for_unformatted);
+int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int,
+			       int);
+static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb,
+					     b_blocknr_t * new_blocknrs,
+					     int amount_needed)
+{
+	reiserfs_blocknr_hint_t hint = {
+		.th = tb->transaction_handle,
+		.path = tb->tb_path,
+		.inode = NULL,
+		.key = tb->key,
+		.block = 0,
+		.formatted_node = 1
+	};
+	return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed,
+					  0);
+}
+
+static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
+					    *th, struct inode *inode,
+					    b_blocknr_t * new_blocknrs,
+					    struct treepath *path,
+					    sector_t block)
+{
+	reiserfs_blocknr_hint_t hint = {
+		.th = th,
+		.path = path,
+		.inode = inode,
+		.block = block,
+		.formatted_node = 0,
+		.preallocate = 0
+	};
+	return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
+}
+
+#ifdef REISERFS_PREALLOCATE
+static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
+					     *th, struct inode *inode,
+					     b_blocknr_t * new_blocknrs,
+					     struct treepath *path,
+					     sector_t block)
+{
+	reiserfs_blocknr_hint_t hint = {
+		.th = th,
+		.path = path,
+		.inode = inode,
+		.block = block,
+		.formatted_node = 0,
+		.preallocate = 1
+	};
+	return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
+}
+
+void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
+			       struct inode *inode);
+void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th);
+#endif
+
+/* hashes.c */
+__u32 keyed_hash(const signed char *msg, int len);
+__u32 yura_hash(const signed char *msg, int len);
+__u32 r5_hash(const signed char *msg, int len);
+
+#define reiserfs_set_le_bit		__set_bit_le
+#define reiserfs_test_and_set_le_bit	__test_and_set_bit_le
+#define reiserfs_clear_le_bit		__clear_bit_le
+#define reiserfs_test_and_clear_le_bit	__test_and_clear_bit_le
+#define reiserfs_test_le_bit		test_bit_le
+#define reiserfs_find_next_zero_le_bit	find_next_zero_bit_le
+
+/* sometimes reiserfs_truncate may require to allocate few new blocks
+   to perform indirect2direct conversion. People probably used to
+   think, that truncate should work without problems on a filesystem
+   without free disk space. They may complain that they can not
+   truncate due to lack of free disk space. This spare space allows us
+   to not worry about it. 500 is probably too much, but it should be
+   absolutely safe */
+#define SPARE_SPACE 500
+
+/* prototypes from ioctl.c */
+long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+long reiserfs_compat_ioctl(struct file *filp,
+		   unsigned int cmd, unsigned long arg);
+int reiserfs_unpack(struct inode *inode, struct file *filp);

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/resize.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/resize.c
new file mode 100644
index 0000000..9a17f63
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/resize.c

@@ -0,0 +1,209 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+/*
+ * Written by Alexander Zarochentcev.
+ *
+ * The kernel part of the (on-line) reiserfs resizer.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include "reiserfs.h"
+#include <linux/buffer_head.h>
+
+int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
+{
+	int err = 0;
+	struct reiserfs_super_block *sb;
+	struct reiserfs_bitmap_info *bitmap;
+	struct reiserfs_bitmap_info *info;
+	struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s);
+	struct buffer_head *bh;
+	struct reiserfs_transaction_handle th;
+	unsigned int bmap_nr_new, bmap_nr;
+	unsigned int block_r_new, block_r;
+
+	struct reiserfs_list_bitmap *jb;
+	struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS];
+
+	unsigned long int block_count, free_blocks;
+	int i;
+	int copy_size;
+
+	sb = SB_DISK_SUPER_BLOCK(s);
+
+	if (SB_BLOCK_COUNT(s) >= block_count_new) {
+		printk("can\'t shrink filesystem on-line\n");
+		return -EINVAL;
+	}
+
+	/* check the device size */
+	bh = sb_bread(s, block_count_new - 1);
+	if (!bh) {
+		printk("reiserfs_resize: can\'t read last block\n");
+		return -EINVAL;
+	}
+	bforget(bh);
+
+	/* old disk layout detection; those partitions can be mounted, but
+	 * cannot be resized */
+	if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size
+	    != REISERFS_DISK_OFFSET_IN_BYTES) {
+		printk
+		    ("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n");
+		return -ENOTSUPP;
+	}
+
+	/* count used bits in last bitmap block */
+	block_r = SB_BLOCK_COUNT(s) -
+			(reiserfs_bmap_count(s) - 1) * s->s_blocksize * 8;
+
+	/* count bitmap blocks in new fs */
+	bmap_nr_new = block_count_new / (s->s_blocksize * 8);
+	block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8;
+	if (block_r_new)
+		bmap_nr_new++;
+	else
+		block_r_new = s->s_blocksize * 8;
+
+	/* save old values */
+	block_count = SB_BLOCK_COUNT(s);
+	bmap_nr = reiserfs_bmap_count(s);
+
+	/* resizing of reiserfs bitmaps (journal and real), if needed */
+	if (bmap_nr_new > bmap_nr) {
+		/* reallocate journal bitmaps */
+		if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
+			printk
+			    ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
+			return -ENOMEM;
+		}
+		/* the new journal bitmaps are zero filled, now we copy in the bitmap
+		 ** node pointers from the old journal bitmap structs, and then
+		 ** transfer the new data structures into the journal struct.
+		 **
+		 ** using the copy_size var below allows this code to work for
+		 ** both shrinking and expanding the FS.
+		 */
+		copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr;
+		copy_size =
+		    copy_size * sizeof(struct reiserfs_list_bitmap_node *);
+		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
+			struct reiserfs_bitmap_node **node_tmp;
+			jb = SB_JOURNAL(s)->j_list_bitmap + i;
+			memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size);
+
+			/* just in case vfree schedules on us, copy the new
+			 ** pointer into the journal struct before freeing the
+			 ** old one
+			 */
+			node_tmp = jb->bitmaps;
+			jb->bitmaps = jbitmap[i].bitmaps;
+			vfree(node_tmp);
+		}
+
+		/* allocate additional bitmap blocks, reallocate array of bitmap
+		 * block pointers */
+		bitmap =
+		    vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
+		if (!bitmap) {
+			/* Journal bitmaps are still supersized, but the memory isn't
+			 * leaked, so I guess it's ok */
+			printk("reiserfs_resize: unable to allocate memory.\n");
+			return -ENOMEM;
+		}
+		for (i = 0; i < bmap_nr; i++)
+			bitmap[i] = old_bitmap[i];
+
+		/* This doesn't go through the journal, but it doesn't have to.
+		 * The changes are still atomic: We're synced up when the journal
+		 * transaction begins, and the new bitmaps don't matter if the
+		 * transaction fails. */
+		for (i = bmap_nr; i < bmap_nr_new; i++) {
+			/* don't use read_bitmap_block since it will cache
+			 * the uninitialized bitmap */
+			bh = sb_bread(s, i * s->s_blocksize * 8);
+			if (!bh) {
+				vfree(bitmap);
+				return -EIO;
+			}
+			memset(bh->b_data, 0, sb_blocksize(sb));
+			reiserfs_set_le_bit(0, bh->b_data);
+			reiserfs_cache_bitmap_metadata(s, bh, bitmap + i);
+
+			set_buffer_uptodate(bh);
+			mark_buffer_dirty(bh);
+			reiserfs_write_unlock(s);
+			sync_dirty_buffer(bh);
+			reiserfs_write_lock(s);
+			// update bitmap_info stuff
+			bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
+			brelse(bh);
+		}
+		/* free old bitmap blocks array */
+		SB_AP_BITMAP(s) = bitmap;
+		vfree(old_bitmap);
+	}
+
+	/* begin transaction, if there was an error, it's fine. Yes, we have
+	 * incorrect bitmaps now, but none of it is ever going to touch the
+	 * disk anyway. */
+	err = journal_begin(&th, s, 10);
+	if (err)
+		return err;
+
+	/* Extend old last bitmap block - new blocks have been made available */
+	info = SB_AP_BITMAP(s) + bmap_nr - 1;
+	bh = reiserfs_read_bitmap_block(s, bmap_nr - 1);
+	if (!bh) {
+		int jerr = journal_end(&th, s, 10);
+		if (jerr)
+			return jerr;
+		return -EIO;
+	}
+
+	reiserfs_prepare_for_journal(s, bh, 1);
+	for (i = block_r; i < s->s_blocksize * 8; i++)
+		reiserfs_clear_le_bit(i, bh->b_data);
+	info->free_count += s->s_blocksize * 8 - block_r;
+
+	journal_mark_dirty(&th, s, bh);
+	brelse(bh);
+
+	/* Correct new last bitmap block - It may not be full */
+	info = SB_AP_BITMAP(s) + bmap_nr_new - 1;
+	bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1);
+	if (!bh) {
+		int jerr = journal_end(&th, s, 10);
+		if (jerr)
+			return jerr;
+		return -EIO;
+	}
+
+	reiserfs_prepare_for_journal(s, bh, 1);
+	for (i = block_r_new; i < s->s_blocksize * 8; i++)
+		reiserfs_set_le_bit(i, bh->b_data);
+	journal_mark_dirty(&th, s, bh);
+	brelse(bh);
+
+	info->free_count -= s->s_blocksize * 8 - block_r_new;
+	/* update super */
+	reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
+	free_blocks = SB_FREE_BLOCKS(s);
+	PUT_SB_FREE_BLOCKS(s,
+			   free_blocks + (block_count_new - block_count -
+					  (bmap_nr_new - bmap_nr)));
+	PUT_SB_BLOCK_COUNT(s, block_count_new);
+	PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new);
+	s->s_dirt = 1;
+
+	journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+
+	SB_JOURNAL(s)->j_must_wait = 1;
+	return journal_end(&th, s, 10);
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/stree.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/stree.c
new file mode 100644
index 0000000..2f40a4c
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/stree.c

@@ -0,0 +1,2124 @@
+/*
+ *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+/*
+ *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
+ *  Programm System Institute
+ *  Pereslavl-Zalessky Russia
+ */
+
+/*
+ *  This file contains functions dealing with S+tree
+ *
+ * B_IS_IN_TREE
+ * copy_item_head
+ * comp_short_keys
+ * comp_keys
+ * comp_short_le_keys
+ * le_key2cpu_key
+ * comp_le_keys
+ * bin_search
+ * get_lkey
+ * get_rkey
+ * key_in_buffer
+ * decrement_bcount
+ * reiserfs_check_path
+ * pathrelse_and_restore
+ * pathrelse
+ * search_by_key_reada
+ * search_by_key
+ * search_for_position_by_key
+ * comp_items
+ * prepare_for_direct_item
+ * prepare_for_direntry_item
+ * prepare_for_delete_or_cut
+ * calc_deleted_bytes_number
+ * init_tb_struct
+ * padd_item
+ * reiserfs_delete_item
+ * reiserfs_delete_solid_item
+ * reiserfs_delete_object
+ * maybe_indirect_to_direct
+ * indirect_to_direct_roll_back
+ * reiserfs_cut_from_item
+ * truncate_directory
+ * reiserfs_do_truncate
+ * reiserfs_paste_into_item
+ * reiserfs_insert_item
+ */
+
+#include <linux/time.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include "reiserfs.h"
+#include <linux/buffer_head.h>
+#include <linux/quotaops.h>
+
+/* Does the buffer contain a disk block which is in the tree. */
+inline int B_IS_IN_TREE(const struct buffer_head *bh)
+{
+
+	RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
+	       "PAP-1010: block (%b) has too big level (%z)", bh, bh);
+
+	return (B_LEVEL(bh) != FREE_LEVEL);
+}
+
+//
+// to gets item head in le form
+//
+inline void copy_item_head(struct item_head *to,
+			   const struct item_head *from)
+{
+	memcpy(to, from, IH_SIZE);
+}
+
+/* k1 is pointer to on-disk structure which is stored in little-endian
+   form. k2 is pointer to cpu variable. For key of items of the same
+   object this returns 0.
+   Returns: -1 if key1 < key2
+   0 if key1 == key2
+   1 if key1 > key2 */
+inline int comp_short_keys(const struct reiserfs_key *le_key,
+			   const struct cpu_key *cpu_key)
+{
+	__u32 n;
+	n = le32_to_cpu(le_key->k_dir_id);
+	if (n < cpu_key->on_disk_key.k_dir_id)
+		return -1;
+	if (n > cpu_key->on_disk_key.k_dir_id)
+		return 1;
+	n = le32_to_cpu(le_key->k_objectid);
+	if (n < cpu_key->on_disk_key.k_objectid)
+		return -1;
+	if (n > cpu_key->on_disk_key.k_objectid)
+		return 1;
+	return 0;
+}
+
+/* k1 is pointer to on-disk structure which is stored in little-endian
+   form. k2 is pointer to cpu variable.
+   Compare keys using all 4 key fields.
+   Returns: -1 if key1 < key2 0
+   if key1 = key2 1 if key1 > key2 */
+static inline int comp_keys(const struct reiserfs_key *le_key,
+			    const struct cpu_key *cpu_key)
+{
+	int retval;
+
+	retval = comp_short_keys(le_key, cpu_key);
+	if (retval)
+		return retval;
+	if (le_key_k_offset(le_key_version(le_key), le_key) <
+	    cpu_key_k_offset(cpu_key))
+		return -1;
+	if (le_key_k_offset(le_key_version(le_key), le_key) >
+	    cpu_key_k_offset(cpu_key))
+		return 1;
+
+	if (cpu_key->key_length == 3)
+		return 0;
+
+	/* this part is needed only when tail conversion is in progress */
+	if (le_key_k_type(le_key_version(le_key), le_key) <
+	    cpu_key_k_type(cpu_key))
+		return -1;
+
+	if (le_key_k_type(le_key_version(le_key), le_key) >
+	    cpu_key_k_type(cpu_key))
+		return 1;
+
+	return 0;
+}
+
+inline int comp_short_le_keys(const struct reiserfs_key *key1,
+			      const struct reiserfs_key *key2)
+{
+	__u32 *k1_u32, *k2_u32;
+	int key_length = REISERFS_SHORT_KEY_LEN;
+
+	k1_u32 = (__u32 *) key1;
+	k2_u32 = (__u32 *) key2;
+	for (; key_length--; ++k1_u32, ++k2_u32) {
+		if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
+			return -1;
+		if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
+			return 1;
+	}
+	return 0;
+}
+
+inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
+{
+	int version;
+	to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
+	to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
+
+	// find out version of the key
+	version = le_key_version(from);
+	to->version = version;
+	to->on_disk_key.k_offset = le_key_k_offset(version, from);
+	to->on_disk_key.k_type = le_key_k_type(version, from);
+}
+
+// this does not say which one is bigger, it only returns 1 if keys
+// are not equal, 0 otherwise
+inline int comp_le_keys(const struct reiserfs_key *k1,
+			const struct reiserfs_key *k2)
+{
+	return memcmp(k1, k2, sizeof(struct reiserfs_key));
+}
+
+/**************************************************************************
+ *  Binary search toolkit function                                        *
+ *  Search for an item in the array by the item key                       *
+ *  Returns:    1 if found,  0 if not found;                              *
+ *        *pos = number of the searched element if found, else the        *
+ *        number of the first element that is larger than key.            *
+ **************************************************************************/
+/* For those not familiar with binary search: lbound is the leftmost item that it
+ could be, rbound the rightmost item that it could be.  We examine the item
+ halfway between lbound and rbound, and that tells us either that we can increase
+ lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that
+ there are no possible items, and we have not found it. With each examination we
+ cut the number of possible items it could be by one more than half rounded down,
+ or we find it. */
+static inline int bin_search(const void *key,	/* Key to search for. */
+			     const void *base,	/* First item in the array. */
+			     int num,	/* Number of items in the array. */
+			     int width,	/* Item size in the array.
+					   searched. Lest the reader be
+					   confused, note that this is crafted
+					   as a general function, and when it
+					   is applied specifically to the array
+					   of item headers in a node, width
+					   is actually the item header size not
+					   the item size. */
+			     int *pos /* Number of the searched for element. */
+    )
+{
+	int rbound, lbound, j;
+
+	for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
+	     lbound <= rbound; j = (rbound + lbound) / 2)
+		switch (comp_keys
+			((struct reiserfs_key *)((char *)base + j * width),
+			 (struct cpu_key *)key)) {
+		case -1:
+			lbound = j + 1;
+			continue;
+		case 1:
+			rbound = j - 1;
+			continue;
+		case 0:
+			*pos = j;
+			return ITEM_FOUND;	/* Key found in the array.  */
+		}
+
+	/* bin_search did not find given key, it returns position of key,
+	   that is minimal and greater than the given one. */
+	*pos = lbound;
+	return ITEM_NOT_FOUND;
+}
+
+
+/* Minimal possible key. It is never in the tree. */
+const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
+
+/* Maximal possible key. It is never in the tree. */
+static const struct reiserfs_key MAX_KEY = {
+	__constant_cpu_to_le32(0xffffffff),
+	__constant_cpu_to_le32(0xffffffff),
+	{{__constant_cpu_to_le32(0xffffffff),
+	  __constant_cpu_to_le32(0xffffffff)},}
+};
+
+/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
+   of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
+   the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
+   case we return a special key, either MIN_KEY or MAX_KEY. */
+static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
+						  const struct super_block *sb)
+{
+	int position, path_offset = chk_path->path_length;
+	struct buffer_head *parent;
+
+	RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
+	       "PAP-5010: invalid offset in the path");
+
+	/* While not higher in path than first element. */
+	while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
+
+		RFALSE(!buffer_uptodate
+		       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
+		       "PAP-5020: parent is not uptodate");
+
+		/* Parent at the path is not in the tree now. */
+		if (!B_IS_IN_TREE
+		    (parent =
+		     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
+			return &MAX_KEY;
+		/* Check whether position in the parent is correct. */
+		if ((position =
+		     PATH_OFFSET_POSITION(chk_path,
+					  path_offset)) >
+		    B_NR_ITEMS(parent))
+			return &MAX_KEY;
+		/* Check whether parent at the path really points to the child. */
+		if (B_N_CHILD_NUM(parent, position) !=
+		    PATH_OFFSET_PBUFFER(chk_path,
+					path_offset + 1)->b_blocknr)
+			return &MAX_KEY;
+		/* Return delimiting key if position in the parent is not equal to zero. */
+		if (position)
+			return B_N_PDELIM_KEY(parent, position - 1);
+	}
+	/* Return MIN_KEY if we are in the root of the buffer tree. */
+	if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
+	    b_blocknr == SB_ROOT_BLOCK(sb))
+		return &MIN_KEY;
+	return &MAX_KEY;
+}
+
+/* Get delimiting key of the buffer at the path and its right neighbor. */
+inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
+					   const struct super_block *sb)
+{
+	int position, path_offset = chk_path->path_length;
+	struct buffer_head *parent;
+
+	RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
+	       "PAP-5030: invalid offset in the path");
+
+	while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
+
+		RFALSE(!buffer_uptodate
+		       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
+		       "PAP-5040: parent is not uptodate");
+
+		/* Parent at the path is not in the tree now. */
+		if (!B_IS_IN_TREE
+		    (parent =
+		     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
+			return &MIN_KEY;
+		/* Check whether position in the parent is correct. */
+		if ((position =
+		     PATH_OFFSET_POSITION(chk_path,
+					  path_offset)) >
+		    B_NR_ITEMS(parent))
+			return &MIN_KEY;
+		/* Check whether parent at the path really points to the child. */
+		if (B_N_CHILD_NUM(parent, position) !=
+		    PATH_OFFSET_PBUFFER(chk_path,
+					path_offset + 1)->b_blocknr)
+			return &MIN_KEY;
+		/* Return delimiting key if position in the parent is not the last one. */
+		if (position != B_NR_ITEMS(parent))
+			return B_N_PDELIM_KEY(parent, position);
+	}
+	/* Return MAX_KEY if we are in the root of the buffer tree. */
+	if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
+	    b_blocknr == SB_ROOT_BLOCK(sb))
+		return &MAX_KEY;
+	return &MIN_KEY;
+}
+
+/* Check whether a key is contained in the tree rooted from a buffer at a path. */
+/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
+   the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
+   buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
+   this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
+static inline int key_in_buffer(struct treepath *chk_path,	/* Path which should be checked.  */
+				const struct cpu_key *key,	/* Key which should be checked.   */
+				struct super_block *sb
+    )
+{
+
+	RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
+	       || chk_path->path_length > MAX_HEIGHT,
+	       "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
+	       key, chk_path->path_length);
+	RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
+	       "PAP-5060: device must not be NODEV");
+
+	if (comp_keys(get_lkey(chk_path, sb), key) == 1)
+		/* left delimiting key is bigger, that the key we look for */
+		return 0;
+	/*  if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
+	if (comp_keys(get_rkey(chk_path, sb), key) != 1)
+		/* key must be less than right delimitiing key */
+		return 0;
+	return 1;
+}
+
+int reiserfs_check_path(struct treepath *p)
+{
+	RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
+	       "path not properly relsed");
+	return 0;
+}
+
+/* Drop the reference to each buffer in a path and restore
+ * dirty bits clean when preparing the buffer for the log.
+ * This version should only be called from fix_nodes() */
+void pathrelse_and_restore(struct super_block *sb,
+			   struct treepath *search_path)
+{
+	int path_offset = search_path->path_length;
+
+	RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
+	       "clm-4000: invalid path offset");
+
+	while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
+		struct buffer_head *bh;
+		bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
+		reiserfs_restore_prepared_buffer(sb, bh);
+		brelse(bh);
+	}
+	search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
+}
+
+/* Drop the reference to each buffer in a path */
+void pathrelse(struct treepath *search_path)
+{
+	int path_offset = search_path->path_length;
+
+	RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
+	       "PAP-5090: invalid path offset");
+
+	while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
+		brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
+
+	search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
+}
+
+static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
+{
+	struct block_head *blkh;
+	struct item_head *ih;
+	int used_space;
+	int prev_location;
+	int i;
+	int nr;
+
+	blkh = (struct block_head *)buf;
+	if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
+		reiserfs_warning(NULL, "reiserfs-5080",
+				 "this should be caught earlier");
+		return 0;
+	}
+
+	nr = blkh_nr_item(blkh);
+	if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
+		/* item number is too big or too small */
+		reiserfs_warning(NULL, "reiserfs-5081",
+				 "nr_item seems wrong: %z", bh);
+		return 0;
+	}
+	ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
+	used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
+	if (used_space != blocksize - blkh_free_space(blkh)) {
+		/* free space does not match to calculated amount of use space */
+		reiserfs_warning(NULL, "reiserfs-5082",
+				 "free space seems wrong: %z", bh);
+		return 0;
+	}
+	// FIXME: it is_leaf will hit performance too much - we may have
+	// return 1 here
+
+	/* check tables of item heads */
+	ih = (struct item_head *)(buf + BLKH_SIZE);
+	prev_location = blocksize;
+	for (i = 0; i < nr; i++, ih++) {
+		if (le_ih_k_type(ih) == TYPE_ANY) {
+			reiserfs_warning(NULL, "reiserfs-5083",
+					 "wrong item type for item %h",
+					 ih);
+			return 0;
+		}
+		if (ih_location(ih) >= blocksize
+		    || ih_location(ih) < IH_SIZE * nr) {
+			reiserfs_warning(NULL, "reiserfs-5084",
+					 "item location seems wrong: %h",
+					 ih);
+			return 0;
+		}
+		if (ih_item_len(ih) < 1
+		    || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
+			reiserfs_warning(NULL, "reiserfs-5085",
+					 "item length seems wrong: %h",
+					 ih);
+			return 0;
+		}
+		if (prev_location - ih_location(ih) != ih_item_len(ih)) {
+			reiserfs_warning(NULL, "reiserfs-5086",
+					 "item location seems wrong "
+					 "(second one): %h", ih);
+			return 0;
+		}
+		prev_location = ih_location(ih);
+	}
+
+	// one may imagine much more checks
+	return 1;
+}
+
+/* returns 1 if buf looks like an internal node, 0 otherwise */
+static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
+{
+	struct block_head *blkh;
+	int nr;
+	int used_space;
+
+	blkh = (struct block_head *)buf;
+	nr = blkh_level(blkh);
+	if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
+		/* this level is not possible for internal nodes */
+		reiserfs_warning(NULL, "reiserfs-5087",
+				 "this should be caught earlier");
+		return 0;
+	}
+
+	nr = blkh_nr_item(blkh);
+	if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
+		/* for internal which is not root we might check min number of keys */
+		reiserfs_warning(NULL, "reiserfs-5088",
+				 "number of key seems wrong: %z", bh);
+		return 0;
+	}
+
+	used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
+	if (used_space != blocksize - blkh_free_space(blkh)) {
+		reiserfs_warning(NULL, "reiserfs-5089",
+				 "free space seems wrong: %z", bh);
+		return 0;
+	}
+	// one may imagine much more checks
+	return 1;
+}
+
+// make sure that bh contains formatted node of reiserfs tree of
+// 'level'-th level
+static int is_tree_node(struct buffer_head *bh, int level)
+{
+	if (B_LEVEL(bh) != level) {
+		reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
+				 "not match to the expected one %d",
+				 B_LEVEL(bh), level);
+		return 0;
+	}
+	if (level == DISK_LEAF_NODE_LEVEL)
+		return is_leaf(bh->b_data, bh->b_size, bh);
+
+	return is_internal(bh->b_data, bh->b_size, bh);
+}
+
+#define SEARCH_BY_KEY_READA 16
+
+/*
+ * The function is NOT SCHEDULE-SAFE!
+ * It might unlock the write lock if we needed to wait for a block
+ * to be read. Note that in this case it won't recover the lock to avoid
+ * high contention resulting from too much lock requests, especially
+ * the caller (search_by_key) will perform other schedule-unsafe
+ * operations just after calling this function.
+ *
+ * @return true if we have unlocked
+ */
+static bool search_by_key_reada(struct super_block *s,
+				struct buffer_head **bh,
+				b_blocknr_t *b, int num)
+{
+	int i, j;
+	bool unlocked = false;
+
+	for (i = 0; i < num; i++) {
+		bh[i] = sb_getblk(s, b[i]);
+	}
+	/*
+	 * We are going to read some blocks on which we
+	 * have a reference. It's safe, though we might be
+	 * reading blocks concurrently changed if we release
+	 * the lock. But it's still fine because we check later
+	 * if the tree changed
+	 */
+	for (j = 0; j < i; j++) {
+		/*
+		 * note, this needs attention if we are getting rid of the BKL
+		 * you have to make sure the prepared bit isn't set on this buffer
+		 */
+		if (!buffer_uptodate(bh[j])) {
+			if (!unlocked) {
+				reiserfs_write_unlock(s);
+				unlocked = true;
+			}
+			ll_rw_block(READA, 1, bh + j);
+		}
+		brelse(bh[j]);
+	}
+	return unlocked;
+}
+
+/**************************************************************************
+ * Algorithm   SearchByKey                                                *
+ *             look for item in the Disk S+Tree by its key                *
+ * Input:  sb   -  super block                                            *
+ *         key  - pointer to the key to search                            *
+ * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
+ *         search_path - path from the root to the needed leaf            *
+ **************************************************************************/
+
+/* This function fills up the path from the root to the leaf as it
+   descends the tree looking for the key.  It uses reiserfs_bread to
+   try to find buffers in the cache given their block number.  If it
+   does not find them in the cache it reads them from disk.  For each
+   node search_by_key finds using reiserfs_bread it then uses
+   bin_search to look through that node.  bin_search will find the
+   position of the block_number of the next node if it is looking
+   through an internal node.  If it is looking through a leaf node
+   bin_search will find the position of the item which has key either
+   equal to given key, or which is the maximal key less than the given
+   key.  search_by_key returns a path that must be checked for the
+   correctness of the top of the path but need not be checked for the
+   correctness of the bottom of the path */
+/* The function is NOT SCHEDULE-SAFE! */
+int search_by_key(struct super_block *sb, const struct cpu_key *key,	/* Key to search. */
+		  struct treepath *search_path,/* This structure was
+						   allocated and initialized
+						   by the calling
+						   function. It is filled up
+						   by this function.  */
+		  int stop_level	/* How far down the tree to search. To
+					   stop at leaf level - set to
+					   DISK_LEAF_NODE_LEVEL */
+    )
+{
+	b_blocknr_t block_number;
+	int expected_level;
+	struct buffer_head *bh;
+	struct path_element *last_element;
+	int node_level, retval;
+	int right_neighbor_of_leaf_node;
+	int fs_gen;
+	struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
+	b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
+	int reada_count = 0;
+
+#ifdef CONFIG_REISERFS_CHECK
+	int repeat_counter = 0;
+#endif
+
+	PROC_INFO_INC(sb, search_by_key);
+
+	/* As we add each node to a path we increase its count.  This means that
+	   we must be careful to release all nodes in a path before we either
+	   discard the path struct or re-use the path struct, as we do here. */
+
+	pathrelse(search_path);
+
+	right_neighbor_of_leaf_node = 0;
+
+	/* With each iteration of this loop we search through the items in the
+	   current node, and calculate the next current node(next path element)
+	   for the next iteration of this loop.. */
+	block_number = SB_ROOT_BLOCK(sb);
+	expected_level = -1;
+	while (1) {
+
+#ifdef CONFIG_REISERFS_CHECK
+		if (!(++repeat_counter % 50000))
+			reiserfs_warning(sb, "PAP-5100",
+					 "%s: there were %d iterations of "
+					 "while loop looking for key %K",
+					 current->comm, repeat_counter,
+					 key);
+#endif
+
+		/* prep path to have another element added to it. */
+		last_element =
+		    PATH_OFFSET_PELEMENT(search_path,
+					 ++search_path->path_length);
+		fs_gen = get_generation(sb);
+
+		/* Read the next tree node, and set the last element in the path to
+		   have a pointer to it. */
+		if ((bh = last_element->pe_buffer =
+		     sb_getblk(sb, block_number))) {
+			bool unlocked = false;
+
+			if (!buffer_uptodate(bh) && reada_count > 1)
+				/* may unlock the write lock */
+				unlocked = search_by_key_reada(sb, reada_bh,
+						    reada_blocks, reada_count);
+			/*
+			 * If we haven't already unlocked the write lock,
+			 * then we need to do that here before reading
+			 * the current block
+			 */
+			if (!buffer_uptodate(bh) && !unlocked) {
+				reiserfs_write_unlock(sb);
+				unlocked = true;
+			}
+			ll_rw_block(READ, 1, &bh);
+			wait_on_buffer(bh);
+
+			if (unlocked)
+				reiserfs_write_lock(sb);
+			if (!buffer_uptodate(bh))
+				goto io_error;
+		} else {
+		      io_error:
+			search_path->path_length--;
+			pathrelse(search_path);
+			return IO_ERROR;
+		}
+		reada_count = 0;
+		if (expected_level == -1)
+			expected_level = SB_TREE_HEIGHT(sb);
+		expected_level--;
+
+		/* It is possible that schedule occurred. We must check whether the key
+		   to search is still in the tree rooted from the current buffer. If
+		   not then repeat search from the root. */
+		if (fs_changed(fs_gen, sb) &&
+		    (!B_IS_IN_TREE(bh) ||
+		     B_LEVEL(bh) != expected_level ||
+		     !key_in_buffer(search_path, key, sb))) {
+			PROC_INFO_INC(sb, search_by_key_fs_changed);
+			PROC_INFO_INC(sb, search_by_key_restarted);
+			PROC_INFO_INC(sb,
+				      sbk_restarted[expected_level - 1]);
+			pathrelse(search_path);
+
+			/* Get the root block number so that we can repeat the search
+			   starting from the root. */
+			block_number = SB_ROOT_BLOCK(sb);
+			expected_level = -1;
+			right_neighbor_of_leaf_node = 0;
+
+			/* repeat search from the root */
+			continue;
+		}
+
+		/* only check that the key is in the buffer if key is not
+		   equal to the MAX_KEY. Latter case is only possible in
+		   "finish_unfinished()" processing during mount. */
+		RFALSE(comp_keys(&MAX_KEY, key) &&
+		       !key_in_buffer(search_path, key, sb),
+		       "PAP-5130: key is not in the buffer");
+#ifdef CONFIG_REISERFS_CHECK
+		if (REISERFS_SB(sb)->cur_tb) {
+			print_cur_tb("5140");
+			reiserfs_panic(sb, "PAP-5140",
+				       "schedule occurred in do_balance!");
+		}
+#endif
+
+		// make sure, that the node contents look like a node of
+		// certain level
+		if (!is_tree_node(bh, expected_level)) {
+			reiserfs_error(sb, "vs-5150",
+				       "invalid format found in block %ld. "
+				       "Fsck?", bh->b_blocknr);
+			pathrelse(search_path);
+			return IO_ERROR;
+		}
+
+		/* ok, we have acquired next formatted node in the tree */
+		node_level = B_LEVEL(bh);
+
+		PROC_INFO_BH_STAT(sb, bh, node_level - 1);
+
+		RFALSE(node_level < stop_level,
+		       "vs-5152: tree level (%d) is less than stop level (%d)",
+		       node_level, stop_level);
+
+		retval = bin_search(key, B_N_PITEM_HEAD(bh, 0),
+				      B_NR_ITEMS(bh),
+				      (node_level ==
+				       DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
+				      KEY_SIZE,
+				      &(last_element->pe_position));
+		if (node_level == stop_level) {
+			return retval;
+		}
+
+		/* we are not in the stop level */
+		if (retval == ITEM_FOUND)
+			/* item has been found, so we choose the pointer which is to the right of the found one */
+			last_element->pe_position++;
+
+		/* if item was not found we choose the position which is to
+		   the left of the found item. This requires no code,
+		   bin_search did it already. */
+
+		/* So we have chosen a position in the current node which is
+		   an internal node.  Now we calculate child block number by
+		   position in the node. */
+		block_number =
+		    B_N_CHILD_NUM(bh, last_element->pe_position);
+
+		/* if we are going to read leaf nodes, try for read ahead as well */
+		if ((search_path->reada & PATH_READA) &&
+		    node_level == DISK_LEAF_NODE_LEVEL + 1) {
+			int pos = last_element->pe_position;
+			int limit = B_NR_ITEMS(bh);
+			struct reiserfs_key *le_key;
+
+			if (search_path->reada & PATH_READA_BACK)
+				limit = 0;
+			while (reada_count < SEARCH_BY_KEY_READA) {
+				if (pos == limit)
+					break;
+				reada_blocks[reada_count++] =
+				    B_N_CHILD_NUM(bh, pos);
+				if (search_path->reada & PATH_READA_BACK)
+					pos--;
+				else
+					pos++;
+
+				/*
+				 * check to make sure we're in the same object
+				 */
+				le_key = B_N_PDELIM_KEY(bh, pos);
+				if (le32_to_cpu(le_key->k_objectid) !=
+				    key->on_disk_key.k_objectid) {
+					break;
+				}
+			}
+		}
+	}
+}
+
+/* Form the path to an item and position in this item which contains
+   file byte defined by key. If there is no such item
+   corresponding to the key, we point the path to the item with
+   maximal key less than key, and *pos_in_item is set to one
+   past the last entry/byte in the item.  If searching for entry in a
+   directory item, and it is not found, *pos_in_item is set to one
+   entry more than the entry with maximal key which is less than the
+   sought key.
+
+   Note that if there is no entry in this same node which is one more,
+   then we point to an imaginary entry.  for direct items, the
+   position is in units of bytes, for indirect items the position is
+   in units of blocknr entries, for directory items the position is in
+   units of directory entries.  */
+
+/* The function is NOT SCHEDULE-SAFE! */
+int search_for_position_by_key(struct super_block *sb,	/* Pointer to the super block.          */
+			       const struct cpu_key *p_cpu_key,	/* Key to search (cpu variable)         */
+			       struct treepath *search_path	/* Filled up by this function.          */
+    )
+{
+	struct item_head *p_le_ih;	/* pointer to on-disk structure */
+	int blk_size;
+	loff_t item_offset, offset;
+	struct reiserfs_dir_entry de;
+	int retval;
+
+	/* If searching for directory entry. */
+	if (is_direntry_cpu_key(p_cpu_key))
+		return search_by_entry_key(sb, p_cpu_key, search_path,
+					   &de);
+
+	/* If not searching for directory entry. */
+
+	/* If item is found. */
+	retval = search_item(sb, p_cpu_key, search_path);
+	if (retval == IO_ERROR)
+		return retval;
+	if (retval == ITEM_FOUND) {
+
+		RFALSE(!ih_item_len
+		       (B_N_PITEM_HEAD
+			(PATH_PLAST_BUFFER(search_path),
+			 PATH_LAST_POSITION(search_path))),
+		       "PAP-5165: item length equals zero");
+
+		pos_in_item(search_path) = 0;
+		return POSITION_FOUND;
+	}
+
+	RFALSE(!PATH_LAST_POSITION(search_path),
+	       "PAP-5170: position equals zero");
+
+	/* Item is not found. Set path to the previous item. */
+	p_le_ih =
+	    B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path),
+			   --PATH_LAST_POSITION(search_path));
+	blk_size = sb->s_blocksize;
+
+	if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
+		return FILE_NOT_FOUND;
+	}
+	// FIXME: quite ugly this far
+
+	item_offset = le_ih_k_offset(p_le_ih);
+	offset = cpu_key_k_offset(p_cpu_key);
+
+	/* Needed byte is contained in the item pointed to by the path. */
+	if (item_offset <= offset &&
+	    item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
+		pos_in_item(search_path) = offset - item_offset;
+		if (is_indirect_le_ih(p_le_ih)) {
+			pos_in_item(search_path) /= blk_size;
+		}
+		return POSITION_FOUND;
+	}
+
+	/* Needed byte is not contained in the item pointed to by the
+	   path. Set pos_in_item out of the item. */
+	if (is_indirect_le_ih(p_le_ih))
+		pos_in_item(search_path) =
+		    ih_item_len(p_le_ih) / UNFM_P_SIZE;
+	else
+		pos_in_item(search_path) = ih_item_len(p_le_ih);
+
+	return POSITION_NOT_FOUND;
+}
+
+/* Compare given item and item pointed to by the path. */
+int comp_items(const struct item_head *stored_ih, const struct treepath *path)
+{
+	struct buffer_head *bh = PATH_PLAST_BUFFER(path);
+	struct item_head *ih;
+
+	/* Last buffer at the path is not in the tree. */
+	if (!B_IS_IN_TREE(bh))
+		return 1;
+
+	/* Last path position is invalid. */
+	if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
+		return 1;
+
+	/* we need only to know, whether it is the same item */
+	ih = get_ih(path);
+	return memcmp(stored_ih, ih, IH_SIZE);
+}
+
+/* unformatted nodes are not logged anymore, ever.  This is safe
+** now
+*/
+#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
+
+// block can not be forgotten as it is in I/O or held by someone
+#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
+
+// prepare for delete or cut of direct item
+static inline int prepare_for_direct_item(struct treepath *path,
+					  struct item_head *le_ih,
+					  struct inode *inode,
+					  loff_t new_file_length, int *cut_size)
+{
+	loff_t round_len;
+
+	if (new_file_length == max_reiserfs_offset(inode)) {
+		/* item has to be deleted */
+		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
+		return M_DELETE;
+	}
+	// new file gets truncated
+	if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
+		//
+		round_len = ROUND_UP(new_file_length);
+		/* this was new_file_length < le_ih ... */
+		if (round_len < le_ih_k_offset(le_ih)) {
+			*cut_size = -(IH_SIZE + ih_item_len(le_ih));
+			return M_DELETE;	/* Delete this item. */
+		}
+		/* Calculate first position and size for cutting from item. */
+		pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
+		*cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
+
+		return M_CUT;	/* Cut from this item. */
+	}
+
+	// old file: items may have any length
+
+	if (new_file_length < le_ih_k_offset(le_ih)) {
+		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
+		return M_DELETE;	/* Delete this item. */
+	}
+	/* Calculate first position and size for cutting from item. */
+	*cut_size = -(ih_item_len(le_ih) -
+		      (pos_in_item(path) =
+		       new_file_length + 1 - le_ih_k_offset(le_ih)));
+	return M_CUT;		/* Cut from this item. */
+}
+
+static inline int prepare_for_direntry_item(struct treepath *path,
+					    struct item_head *le_ih,
+					    struct inode *inode,
+					    loff_t new_file_length,
+					    int *cut_size)
+{
+	if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
+	    new_file_length == max_reiserfs_offset(inode)) {
+		RFALSE(ih_entry_count(le_ih) != 2,
+		       "PAP-5220: incorrect empty directory item (%h)", le_ih);
+		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
+		return M_DELETE;	/* Delete the directory item containing "." and ".." entry. */
+	}
+
+	if (ih_entry_count(le_ih) == 1) {
+		/* Delete the directory item such as there is one record only
+		   in this item */
+		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
+		return M_DELETE;
+	}
+
+	/* Cut one record from the directory item. */
+	*cut_size =
+	    -(DEH_SIZE +
+	      entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
+	return M_CUT;
+}
+
+#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
+
+/*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
+    If the path points to an indirect item, remove some number of its unformatted nodes.
+    In case of file truncate calculate whether this item must be deleted/truncated or last
+    unformatted node of this item will be converted to a direct item.
+    This function returns a determination of what balance mode the calling function should employ. */
+static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed,	/* Number of unformatted nodes which were removed
+																						   from end of the file. */
+				      int *cut_size, unsigned long long new_file_length	/* MAX_KEY_OFFSET in case of delete. */
+    )
+{
+	struct super_block *sb = inode->i_sb;
+	struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
+	struct buffer_head *bh = PATH_PLAST_BUFFER(path);
+
+	BUG_ON(!th->t_trans_id);
+
+	/* Stat_data item. */
+	if (is_statdata_le_ih(p_le_ih)) {
+
+		RFALSE(new_file_length != max_reiserfs_offset(inode),
+		       "PAP-5210: mode must be M_DELETE");
+
+		*cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
+		return M_DELETE;
+	}
+
+	/* Directory item. */
+	if (is_direntry_le_ih(p_le_ih))
+		return prepare_for_direntry_item(path, p_le_ih, inode,
+						 new_file_length,
+						 cut_size);
+
+	/* Direct item. */
+	if (is_direct_le_ih(p_le_ih))
+		return prepare_for_direct_item(path, p_le_ih, inode,
+					       new_file_length, cut_size);
+
+	/* Case of an indirect item. */
+	{
+	    int blk_size = sb->s_blocksize;
+	    struct item_head s_ih;
+	    int need_re_search;
+	    int delete = 0;
+	    int result = M_CUT;
+	    int pos = 0;
+
+	    if ( new_file_length == max_reiserfs_offset (inode) ) {
+		/* prepare_for_delete_or_cut() is called by
+		 * reiserfs_delete_item() */
+		new_file_length = 0;
+		delete = 1;
+	    }
+
+	    do {
+		need_re_search = 0;
+		*cut_size = 0;
+		bh = PATH_PLAST_BUFFER(path);
+		copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
+		pos = I_UNFM_NUM(&s_ih);
+
+		while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
+		    __le32 *unfm;
+		    __u32 block;
+
+		    /* Each unformatted block deletion may involve one additional
+		     * bitmap block into the transaction, thereby the initial
+		     * journal space reservation might not be enough. */
+		    if (!delete && (*cut_size) != 0 &&
+			reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
+			break;
+
+		    unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1;
+		    block = get_block_num(unfm, 0);
+
+		    if (block != 0) {
+			reiserfs_prepare_for_journal(sb, bh, 1);
+			put_block_num(unfm, 0, 0);
+			journal_mark_dirty(th, sb, bh);
+			reiserfs_free_block(th, inode, block, 1);
+		    }
+
+		    reiserfs_write_unlock(sb);
+		    cond_resched();
+		    reiserfs_write_lock(sb);
+
+		    if (item_moved (&s_ih, path))  {
+			need_re_search = 1;
+			break;
+		    }
+
+		    pos --;
+		    (*removed)++;
+		    (*cut_size) -= UNFM_P_SIZE;
+
+		    if (pos == 0) {
+			(*cut_size) -= IH_SIZE;
+			result = M_DELETE;
+			break;
+		    }
+		}
+		/* a trick.  If the buffer has been logged, this will do nothing.  If
+		** we've broken the loop without logging it, it will restore the
+		** buffer */
+		reiserfs_restore_prepared_buffer(sb, bh);
+	    } while (need_re_search &&
+		     search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
+	    pos_in_item(path) = pos * UNFM_P_SIZE;
+
+	    if (*cut_size == 0) {
+		/* Nothing were cut. maybe convert last unformatted node to the
+		 * direct item? */
+		result = M_CONVERT;
+	    }
+	    return result;
+	}
+}
+
+/* Calculate number of bytes which will be deleted or cut during balance */
+static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
+{
+	int del_size;
+	struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path);
+
+	if (is_statdata_le_ih(p_le_ih))
+		return 0;
+
+	del_size =
+	    (mode ==
+	     M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
+	if (is_direntry_le_ih(p_le_ih)) {
+		/* return EMPTY_DIR_SIZE; We delete emty directoris only.
+		 * we can't use EMPTY_DIR_SIZE, as old format dirs have a different
+		 * empty size.  ick. FIXME, is this right? */
+		return del_size;
+	}
+
+	if (is_indirect_le_ih(p_le_ih))
+		del_size = (del_size / UNFM_P_SIZE) *
+				(PATH_PLAST_BUFFER(tb->tb_path)->b_size);
+	return del_size;
+}
+
+static void init_tb_struct(struct reiserfs_transaction_handle *th,
+			   struct tree_balance *tb,
+			   struct super_block *sb,
+			   struct treepath *path, int size)
+{
+
+	BUG_ON(!th->t_trans_id);
+
+	memset(tb, '\0', sizeof(struct tree_balance));
+	tb->transaction_handle = th;
+	tb->tb_sb = sb;
+	tb->tb_path = path;
+	PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
+	PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
+	tb->insert_size[0] = size;
+}
+
+void padd_item(char *item, int total_length, int length)
+{
+	int i;
+
+	for (i = total_length; i > length;)
+		item[--i] = 0;
+}
+
+#ifdef REISERQUOTA_DEBUG
+char key2type(struct reiserfs_key *ih)
+{
+	if (is_direntry_le_key(2, ih))
+		return 'd';
+	if (is_direct_le_key(2, ih))
+		return 'D';
+	if (is_indirect_le_key(2, ih))
+		return 'i';
+	if (is_statdata_le_key(2, ih))
+		return 's';
+	return 'u';
+}
+
+char head2type(struct item_head *ih)
+{
+	if (is_direntry_le_ih(ih))
+		return 'd';
+	if (is_direct_le_ih(ih))
+		return 'D';
+	if (is_indirect_le_ih(ih))
+		return 'i';
+	if (is_statdata_le_ih(ih))
+		return 's';
+	return 'u';
+}
+#endif
+
+/* Delete object item.
+ * th       - active transaction handle
+ * path     - path to the deleted item
+ * item_key - key to search for the deleted item
+ * indode   - used for updating i_blocks and quotas
+ * un_bh    - NULL or unformatted node pointer
+ */
+int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
+			 struct treepath *path, const struct cpu_key *item_key,
+			 struct inode *inode, struct buffer_head *un_bh)
+{
+	struct super_block *sb = inode->i_sb;
+	struct tree_balance s_del_balance;
+	struct item_head s_ih;
+	struct item_head *q_ih;
+	int quota_cut_bytes;
+	int ret_value, del_size, removed;
+
+#ifdef CONFIG_REISERFS_CHECK
+	char mode;
+	int iter = 0;
+#endif
+
+	BUG_ON(!th->t_trans_id);
+
+	init_tb_struct(th, &s_del_balance, sb, path,
+		       0 /*size is unknown */ );
+
+	while (1) {
+		removed = 0;
+
+#ifdef CONFIG_REISERFS_CHECK
+		iter++;
+		mode =
+#endif
+		    prepare_for_delete_or_cut(th, inode, path,
+					      item_key, &removed,
+					      &del_size,
+					      max_reiserfs_offset(inode));
+
+		RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
+
+		copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
+		s_del_balance.insert_size[0] = del_size;
+
+		ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
+		if (ret_value != REPEAT_SEARCH)
+			break;
+
+		PROC_INFO_INC(sb, delete_item_restarted);
+
+		// file system changed, repeat search
+		ret_value =
+		    search_for_position_by_key(sb, item_key, path);
+		if (ret_value == IO_ERROR)
+			break;
+		if (ret_value == FILE_NOT_FOUND) {
+			reiserfs_warning(sb, "vs-5340",
+					 "no items of the file %K found",
+					 item_key);
+			break;
+		}
+	}			/* while (1) */
+
+	if (ret_value != CARRY_ON) {
+		unfix_nodes(&s_del_balance);
+		return 0;
+	}
+	// reiserfs_delete_item returns item length when success
+	ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
+	q_ih = get_ih(path);
+	quota_cut_bytes = ih_item_len(q_ih);
+
+	/* hack so the quota code doesn't have to guess if the file
+	 ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
+	 ** We test the offset because the tail might have been
+	 ** split into multiple items, and we only want to decrement for
+	 ** the unfm node once
+	 */
+	if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
+		if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
+			quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
+		} else {
+			quota_cut_bytes = 0;
+		}
+	}
+
+	if (un_bh) {
+		int off;
+		char *data;
+
+		/* We are in direct2indirect conversion, so move tail contents
+		   to the unformatted node */
+		/* note, we do the copy before preparing the buffer because we
+		 ** don't care about the contents of the unformatted node yet.
+		 ** the only thing we really care about is the direct item's data
+		 ** is in the unformatted node.
+		 **
+		 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
+		 ** the unformatted node, which might schedule, meaning we'd have to
+		 ** loop all the way back up to the start of the while loop.
+		 **
+		 ** The unformatted node must be dirtied later on.  We can't be
+		 ** sure here if the entire tail has been deleted yet.
+		 **
+		 ** un_bh is from the page cache (all unformatted nodes are
+		 ** from the page cache) and might be a highmem page.  So, we
+		 ** can't use un_bh->b_data.
+		 ** -clm
+		 */
+
+		data = kmap_atomic(un_bh->b_page);
+		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
+		memcpy(data + off,
+		       B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
+		       ret_value);
+		kunmap_atomic(data);
+	}
+	/* Perform balancing after all resources have been collected at once. */
+	do_balance(&s_del_balance, NULL, NULL, M_DELETE);
+
+#ifdef REISERQUOTA_DEBUG
+	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
+		       "reiserquota delete_item(): freeing %u, id=%u type=%c",
+		       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
+#endif
+	dquot_free_space_nodirty(inode, quota_cut_bytes);
+
+	/* Return deleted body length */
+	return ret_value;
+}
+
+/* Summary Of Mechanisms For Handling Collisions Between Processes:
+
+ deletion of the body of the object is performed by iput(), with the
+ result that if multiple processes are operating on a file, the
+ deletion of the body of the file is deferred until the last process
+ that has an open inode performs its iput().
+
+ writes and truncates are protected from collisions by use of
+ semaphores.
+
+ creates, linking, and mknod are protected from collisions with other
+ processes by making the reiserfs_add_entry() the last step in the
+ creation, and then rolling back all changes if there was a collision.
+ - Hans
+*/
+
+/* this deletes item which never gets split */
+void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
+				struct inode *inode, struct reiserfs_key *key)
+{
+	struct tree_balance tb;
+	INITIALIZE_PATH(path);
+	int item_len = 0;
+	int tb_init = 0;
+	struct cpu_key cpu_key;
+	int retval;
+	int quota_cut_bytes = 0;
+
+	BUG_ON(!th->t_trans_id);
+
+	le_key2cpu_key(&cpu_key, key);
+
+	while (1) {
+		retval = search_item(th->t_super, &cpu_key, &path);
+		if (retval == IO_ERROR) {
+			reiserfs_error(th->t_super, "vs-5350",
+				       "i/o failure occurred trying "
+				       "to delete %K", &cpu_key);
+			break;
+		}
+		if (retval != ITEM_FOUND) {
+			pathrelse(&path);
+			// No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
+			if (!
+			    ((unsigned long long)
+			     GET_HASH_VALUE(le_key_k_offset
+					    (le_key_version(key), key)) == 0
+			     && (unsigned long long)
+			     GET_GENERATION_NUMBER(le_key_k_offset
+						   (le_key_version(key),
+						    key)) == 1))
+				reiserfs_warning(th->t_super, "vs-5355",
+						 "%k not found", key);
+			break;
+		}
+		if (!tb_init) {
+			tb_init = 1;
+			item_len = ih_item_len(PATH_PITEM_HEAD(&path));
+			init_tb_struct(th, &tb, th->t_super, &path,
+				       -(IH_SIZE + item_len));
+		}
+		quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
+
+		retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
+		if (retval == REPEAT_SEARCH) {
+			PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
+			continue;
+		}
+
+		if (retval == CARRY_ON) {
+			do_balance(&tb, NULL, NULL, M_DELETE);
+			if (inode) {	/* Should we count quota for item? (we don't count quotas for save-links) */
+#ifdef REISERQUOTA_DEBUG
+				reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
+					       "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
+					       quota_cut_bytes, inode->i_uid,
+					       key2type(key));
+#endif
+				dquot_free_space_nodirty(inode,
+							 quota_cut_bytes);
+			}
+			break;
+		}
+		// IO_ERROR, NO_DISK_SPACE, etc
+		reiserfs_warning(th->t_super, "vs-5360",
+				 "could not delete %K due to fix_nodes failure",
+				 &cpu_key);
+		unfix_nodes(&tb);
+		break;
+	}
+
+	reiserfs_check_path(&path);
+}
+
+int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
+			   struct inode *inode)
+{
+	int err;
+	inode->i_size = 0;
+	BUG_ON(!th->t_trans_id);
+
+	/* for directory this deletes item containing "." and ".." */
+	err =
+	    reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
+	if (err)
+		return err;
+
+#if defined( USE_INODE_GENERATION_COUNTER )
+	if (!old_format_only(th->t_super)) {
+		__le32 *inode_generation;
+
+		inode_generation =
+		    &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
+		le32_add_cpu(inode_generation, 1);
+	}
+/* USE_INODE_GENERATION_COUNTER */
+#endif
+	reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
+
+	return err;
+}
+
+static void unmap_buffers(struct page *page, loff_t pos)
+{
+	struct buffer_head *bh;
+	struct buffer_head *head;
+	struct buffer_head *next;
+	unsigned long tail_index;
+	unsigned long cur_index;
+
+	if (page) {
+		if (page_has_buffers(page)) {
+			tail_index = pos & (PAGE_CACHE_SIZE - 1);
+			cur_index = 0;
+			head = page_buffers(page);
+			bh = head;
+			do {
+				next = bh->b_this_page;
+
+				/* we want to unmap the buffers that contain the tail, and
+				 ** all the buffers after it (since the tail must be at the
+				 ** end of the file).  We don't want to unmap file data
+				 ** before the tail, since it might be dirty and waiting to
+				 ** reach disk
+				 */
+				cur_index += bh->b_size;
+				if (cur_index > tail_index) {
+					reiserfs_unmap_buffer(bh);
+				}
+				bh = next;
+			} while (bh != head);
+		}
+	}
+}
+
+static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
+				    struct inode *inode,
+				    struct page *page,
+				    struct treepath *path,
+				    const struct cpu_key *item_key,
+				    loff_t new_file_size, char *mode)
+{
+	struct super_block *sb = inode->i_sb;
+	int block_size = sb->s_blocksize;
+	int cut_bytes;
+	BUG_ON(!th->t_trans_id);
+	BUG_ON(new_file_size != inode->i_size);
+
+	/* the page being sent in could be NULL if there was an i/o error
+	 ** reading in the last block.  The user will hit problems trying to
+	 ** read the file, but for now we just skip the indirect2direct
+	 */
+	if (atomic_read(&inode->i_count) > 1 ||
+	    !tail_has_to_be_packed(inode) ||
+	    !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
+		/* leave tail in an unformatted node */
+		*mode = M_SKIP_BALANCING;
+		cut_bytes =
+		    block_size - (new_file_size & (block_size - 1));
+		pathrelse(path);
+		return cut_bytes;
+	}
+	/* Perform the conversion to a direct_item. */
+	/* return indirect_to_direct(inode, path, item_key,
+				  new_file_size, mode); */
+	return indirect2direct(th, inode, page, path, item_key,
+			       new_file_size, mode);
+}
+
+/* we did indirect_to_direct conversion. And we have inserted direct
+   item successesfully, but there were no disk space to cut unfm
+   pointer being converted. Therefore we have to delete inserted
+   direct item(s) */
+static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
+					 struct inode *inode, struct treepath *path)
+{
+	struct cpu_key tail_key;
+	int tail_len;
+	int removed;
+	BUG_ON(!th->t_trans_id);
+
+	make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);	// !!!!
+	tail_key.key_length = 4;
+
+	tail_len =
+	    (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
+	while (tail_len) {
+		/* look for the last byte of the tail */
+		if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
+		    POSITION_NOT_FOUND)
+			reiserfs_panic(inode->i_sb, "vs-5615",
+				       "found invalid item");
+		RFALSE(path->pos_in_item !=
+		       ih_item_len(PATH_PITEM_HEAD(path)) - 1,
+		       "vs-5616: appended bytes found");
+		PATH_LAST_POSITION(path)--;
+
+		removed =
+		    reiserfs_delete_item(th, path, &tail_key, inode,
+					 NULL /*unbh not needed */ );
+		RFALSE(removed <= 0
+		       || removed > tail_len,
+		       "vs-5617: there was tail %d bytes, removed item length %d bytes",
+		       tail_len, removed);
+		tail_len -= removed;
+		set_cpu_key_k_offset(&tail_key,
+				     cpu_key_k_offset(&tail_key) - removed);
+	}
+	reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
+			 "conversion has been rolled back due to "
+			 "lack of disk space");
+	//mark_file_without_tail (inode);
+	mark_inode_dirty(inode);
+}
+
+/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
+int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
+			   struct treepath *path,
+			   struct cpu_key *item_key,
+			   struct inode *inode,
+			   struct page *page, loff_t new_file_size)
+{
+	struct super_block *sb = inode->i_sb;
+	/* Every function which is going to call do_balance must first
+	   create a tree_balance structure.  Then it must fill up this
+	   structure by using the init_tb_struct and fix_nodes functions.
+	   After that we can make tree balancing. */
+	struct tree_balance s_cut_balance;
+	struct item_head *p_le_ih;
+	int cut_size = 0,	/* Amount to be cut. */
+	    ret_value = CARRY_ON, removed = 0,	/* Number of the removed unformatted nodes. */
+	    is_inode_locked = 0;
+	char mode;		/* Mode of the balance. */
+	int retval2 = -1;
+	int quota_cut_bytes;
+	loff_t tail_pos = 0;
+
+	BUG_ON(!th->t_trans_id);
+
+	init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
+		       cut_size);
+
+	/* Repeat this loop until we either cut the item without needing
+	   to balance, or we fix_nodes without schedule occurring */
+	while (1) {
+		/* Determine the balance mode, position of the first byte to
+		   be cut, and size to be cut.  In case of the indirect item
+		   free unformatted nodes which are pointed to by the cut
+		   pointers. */
+
+		mode =
+		    prepare_for_delete_or_cut(th, inode, path,
+					      item_key, &removed,
+					      &cut_size, new_file_size);
+		if (mode == M_CONVERT) {
+			/* convert last unformatted node to direct item or leave
+			   tail in the unformatted node */
+			RFALSE(ret_value != CARRY_ON,
+			       "PAP-5570: can not convert twice");
+
+			ret_value =
+			    maybe_indirect_to_direct(th, inode, page,
+						     path, item_key,
+						     new_file_size, &mode);
+			if (mode == M_SKIP_BALANCING)
+				/* tail has been left in the unformatted node */
+				return ret_value;
+
+			is_inode_locked = 1;
+
+			/* removing of last unformatted node will change value we
+			   have to return to truncate. Save it */
+			retval2 = ret_value;
+			/*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
+
+			/* So, we have performed the first part of the conversion:
+			   inserting the new direct item.  Now we are removing the
+			   last unformatted node pointer. Set key to search for
+			   it. */
+			set_cpu_key_k_type(item_key, TYPE_INDIRECT);
+			item_key->key_length = 4;
+			new_file_size -=
+			    (new_file_size & (sb->s_blocksize - 1));
+			tail_pos = new_file_size;
+			set_cpu_key_k_offset(item_key, new_file_size + 1);
+			if (search_for_position_by_key
+			    (sb, item_key,
+			     path) == POSITION_NOT_FOUND) {
+				print_block(PATH_PLAST_BUFFER(path), 3,
+					    PATH_LAST_POSITION(path) - 1,
+					    PATH_LAST_POSITION(path) + 1);
+				reiserfs_panic(sb, "PAP-5580", "item to "
+					       "convert does not exist (%K)",
+					       item_key);
+			}
+			continue;
+		}
+		if (cut_size == 0) {
+			pathrelse(path);
+			return 0;
+		}
+
+		s_cut_balance.insert_size[0] = cut_size;
+
+		ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
+		if (ret_value != REPEAT_SEARCH)
+			break;
+
+		PROC_INFO_INC(sb, cut_from_item_restarted);
+
+		ret_value =
+		    search_for_position_by_key(sb, item_key, path);
+		if (ret_value == POSITION_FOUND)
+			continue;
+
+		reiserfs_warning(sb, "PAP-5610", "item %K not found",
+				 item_key);
+		unfix_nodes(&s_cut_balance);
+		return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
+	}			/* while */
+
+	// check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
+	if (ret_value != CARRY_ON) {
+		if (is_inode_locked) {
+			// FIXME: this seems to be not needed: we are always able
+			// to cut item
+			indirect_to_direct_roll_back(th, inode, path);
+		}
+		if (ret_value == NO_DISK_SPACE)
+			reiserfs_warning(sb, "reiserfs-5092",
+					 "NO_DISK_SPACE");
+		unfix_nodes(&s_cut_balance);
+		return -EIO;
+	}
+
+	/* go ahead and perform balancing */
+
+	RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
+
+	/* Calculate number of bytes that need to be cut from the item. */
+	quota_cut_bytes =
+	    (mode ==
+	     M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance.
+	    insert_size[0];
+	if (retval2 == -1)
+		ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
+	else
+		ret_value = retval2;
+
+	/* For direct items, we only change the quota when deleting the last
+	 ** item.
+	 */
+	p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
+	if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
+		if (mode == M_DELETE &&
+		    (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
+		    1) {
+			// FIXME: this is to keep 3.5 happy
+			REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
+			quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
+		} else {
+			quota_cut_bytes = 0;
+		}
+	}
+#ifdef CONFIG_REISERFS_CHECK
+	if (is_inode_locked) {
+		struct item_head *le_ih =
+		    PATH_PITEM_HEAD(s_cut_balance.tb_path);
+		/* we are going to complete indirect2direct conversion. Make
+		   sure, that we exactly remove last unformatted node pointer
+		   of the item */
+		if (!is_indirect_le_ih(le_ih))
+			reiserfs_panic(sb, "vs-5652",
+				       "item must be indirect %h", le_ih);
+
+		if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
+			reiserfs_panic(sb, "vs-5653", "completing "
+				       "indirect2direct conversion indirect "
+				       "item %h being deleted must be of "
+				       "4 byte long", le_ih);
+
+		if (mode == M_CUT
+		    && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
+			reiserfs_panic(sb, "vs-5654", "can not complete "
+				       "indirect2direct conversion of %h "
+				       "(CUT, insert_size==%d)",
+				       le_ih, s_cut_balance.insert_size[0]);
+		}
+		/* it would be useful to make sure, that right neighboring
+		   item is direct item of this file */
+	}
+#endif
+
+	do_balance(&s_cut_balance, NULL, NULL, mode);
+	if (is_inode_locked) {
+		/* we've done an indirect->direct conversion.  when the data block
+		 ** was freed, it was removed from the list of blocks that must
+		 ** be flushed before the transaction commits, make sure to
+		 ** unmap and invalidate it
+		 */
+		unmap_buffers(page, tail_pos);
+		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
+	}
+#ifdef REISERQUOTA_DEBUG
+	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
+		       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
+		       quota_cut_bytes, inode->i_uid, '?');
+#endif
+	dquot_free_space_nodirty(inode, quota_cut_bytes);
+	return ret_value;
+}
+
+static void truncate_directory(struct reiserfs_transaction_handle *th,
+			       struct inode *inode)
+{
+	BUG_ON(!th->t_trans_id);
+	if (inode->i_nlink)
+		reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
+
+	set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
+	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
+	reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
+	reiserfs_update_sd(th, inode);
+	set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
+	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
+}
+
+/* Truncate file to the new size. Note, this must be called with a transaction
+   already started */
+int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
+			  struct inode *inode,	/* ->i_size contains new size */
+			 struct page *page,	/* up to date for last block */
+			 int update_timestamps	/* when it is called by
+						   file_release to convert
+						   the tail - no timestamps
+						   should be updated */
+    )
+{
+	INITIALIZE_PATH(s_search_path);	/* Path to the current object item. */
+	struct item_head *p_le_ih;	/* Pointer to an item header. */
+	struct cpu_key s_item_key;	/* Key to search for a previous file item. */
+	loff_t file_size,	/* Old file size. */
+	 new_file_size;	/* New file size. */
+	int deleted;		/* Number of deleted or truncated bytes. */
+	int retval;
+	int err = 0;
+
+	BUG_ON(!th->t_trans_id);
+	if (!
+	    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
+	     || S_ISLNK(inode->i_mode)))
+		return 0;
+
+	if (S_ISDIR(inode->i_mode)) {
+		// deletion of directory - no need to update timestamps
+		truncate_directory(th, inode);
+		return 0;
+	}
+
+	/* Get new file size. */
+	new_file_size = inode->i_size;
+
+	// FIXME: note, that key type is unimportant here
+	make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
+		     TYPE_DIRECT, 3);
+
+	retval =
+	    search_for_position_by_key(inode->i_sb, &s_item_key,
+				       &s_search_path);
+	if (retval == IO_ERROR) {
+		reiserfs_error(inode->i_sb, "vs-5657",
+			       "i/o failure occurred trying to truncate %K",
+			       &s_item_key);
+		err = -EIO;
+		goto out;
+	}
+	if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
+		reiserfs_error(inode->i_sb, "PAP-5660",
+			       "wrong result %d of search for %K", retval,
+			       &s_item_key);
+
+		err = -EIO;
+		goto out;
+	}
+
+	s_search_path.pos_in_item--;
+
+	/* Get real file size (total length of all file items) */
+	p_le_ih = PATH_PITEM_HEAD(&s_search_path);
+	if (is_statdata_le_ih(p_le_ih))
+		file_size = 0;
+	else {
+		loff_t offset = le_ih_k_offset(p_le_ih);
+		int bytes =
+		    op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
+
+		/* this may mismatch with real file size: if last direct item
+		   had no padding zeros and last unformatted node had no free
+		   space, this file would have this file size */
+		file_size = offset + bytes - 1;
+	}
+	/*
+	 * are we doing a full truncate or delete, if so
+	 * kick in the reada code
+	 */
+	if (new_file_size == 0)
+		s_search_path.reada = PATH_READA | PATH_READA_BACK;
+
+	if (file_size == 0 || file_size < new_file_size) {
+		goto update_and_out;
+	}
+
+	/* Update key to search for the last file item. */
+	set_cpu_key_k_offset(&s_item_key, file_size);
+
+	do {
+		/* Cut or delete file item. */
+		deleted =
+		    reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
+					   inode, page, new_file_size);
+		if (deleted < 0) {
+			reiserfs_warning(inode->i_sb, "vs-5665",
+					 "reiserfs_cut_from_item failed");
+			reiserfs_check_path(&s_search_path);
+			return 0;
+		}
+
+		RFALSE(deleted > file_size,
+		       "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
+		       deleted, file_size, &s_item_key);
+
+		/* Change key to search the last file item. */
+		file_size -= deleted;
+
+		set_cpu_key_k_offset(&s_item_key, file_size);
+
+		/* While there are bytes to truncate and previous file item is presented in the tree. */
+
+		/*
+		 ** This loop could take a really long time, and could log
+		 ** many more blocks than a transaction can hold.  So, we do a polite
+		 ** journal end here, and if the transaction needs ending, we make
+		 ** sure the file is consistent before ending the current trans
+		 ** and starting a new one
+		 */
+		if (journal_transaction_should_end(th, 0) ||
+		    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
+			int orig_len_alloc = th->t_blocks_allocated;
+			pathrelse(&s_search_path);
+
+			if (update_timestamps) {
+				inode->i_mtime = CURRENT_TIME_SEC;
+				inode->i_ctime = CURRENT_TIME_SEC;
+			}
+			reiserfs_update_sd(th, inode);
+
+			err = journal_end(th, inode->i_sb, orig_len_alloc);
+			if (err)
+				goto out;
+			err = journal_begin(th, inode->i_sb,
+					    JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
+			if (err)
+				goto out;
+			reiserfs_update_inode_transaction(inode);
+		}
+	} while (file_size > ROUND_UP(new_file_size) &&
+		 search_for_position_by_key(inode->i_sb, &s_item_key,
+					    &s_search_path) == POSITION_FOUND);
+
+	RFALSE(file_size > ROUND_UP(new_file_size),
+	       "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
+	       new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
+
+      update_and_out:
+	if (update_timestamps) {
+		// this is truncate, not file closing
+		inode->i_mtime = CURRENT_TIME_SEC;
+		inode->i_ctime = CURRENT_TIME_SEC;
+	}
+	reiserfs_update_sd(th, inode);
+
+      out:
+	pathrelse(&s_search_path);
+	return err;
+}
+
+#ifdef CONFIG_REISERFS_CHECK
+// this makes sure, that we __append__, not overwrite or add holes
+static void check_research_for_paste(struct treepath *path,
+				     const struct cpu_key *key)
+{
+	struct item_head *found_ih = get_ih(path);
+
+	if (is_direct_le_ih(found_ih)) {
+		if (le_ih_k_offset(found_ih) +
+		    op_bytes_number(found_ih,
+				    get_last_bh(path)->b_size) !=
+		    cpu_key_k_offset(key)
+		    || op_bytes_number(found_ih,
+				       get_last_bh(path)->b_size) !=
+		    pos_in_item(path))
+			reiserfs_panic(NULL, "PAP-5720", "found direct item "
+				       "%h or position (%d) does not match "
+				       "to key %K", found_ih,
+				       pos_in_item(path), key);
+	}
+	if (is_indirect_le_ih(found_ih)) {
+		if (le_ih_k_offset(found_ih) +
+		    op_bytes_number(found_ih,
+				    get_last_bh(path)->b_size) !=
+		    cpu_key_k_offset(key)
+		    || I_UNFM_NUM(found_ih) != pos_in_item(path)
+		    || get_ih_free_space(found_ih) != 0)
+			reiserfs_panic(NULL, "PAP-5730", "found indirect "
+				       "item (%h) or position (%d) does not "
+				       "match to key (%K)",
+				       found_ih, pos_in_item(path), key);
+	}
+}
+#endif				/* config reiserfs check */
+
+/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
+int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path,	/* Path to the pasted item.	  */
+			     const struct cpu_key *key,	/* Key to search for the needed item. */
+			     struct inode *inode,	/* Inode item belongs to */
+			     const char *body,	/* Pointer to the bytes to paste.    */
+			     int pasted_size)
+{				/* Size of pasted bytes.             */
+	struct tree_balance s_paste_balance;
+	int retval;
+	int fs_gen;
+
+	BUG_ON(!th->t_trans_id);
+
+	fs_gen = get_generation(inode->i_sb);
+
+#ifdef REISERQUOTA_DEBUG
+	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
+		       "reiserquota paste_into_item(): allocating %u id=%u type=%c",
+		       pasted_size, inode->i_uid,
+		       key2type(&(key->on_disk_key)));
+#endif
+
+	reiserfs_write_unlock(inode->i_sb);
+	retval = dquot_alloc_space_nodirty(inode, pasted_size);
+	reiserfs_write_lock(inode->i_sb);
+	if (retval) {
+		pathrelse(search_path);
+		return retval;
+	}
+	init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
+		       pasted_size);
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	s_paste_balance.key = key->on_disk_key;
+#endif
+
+	/* DQUOT_* can schedule, must check before the fix_nodes */
+	if (fs_changed(fs_gen, inode->i_sb)) {
+		goto search_again;
+	}
+
+	while ((retval =
+		fix_nodes(M_PASTE, &s_paste_balance, NULL,
+			  body)) == REPEAT_SEARCH) {
+	      search_again:
+		/* file system changed while we were in the fix_nodes */
+		PROC_INFO_INC(th->t_super, paste_into_item_restarted);
+		retval =
+		    search_for_position_by_key(th->t_super, key,
+					       search_path);
+		if (retval == IO_ERROR) {
+			retval = -EIO;
+			goto error_out;
+		}
+		if (retval == POSITION_FOUND) {
+			reiserfs_warning(inode->i_sb, "PAP-5710",
+					 "entry or pasted byte (%K) exists",
+					 key);
+			retval = -EEXIST;
+			goto error_out;
+		}
+#ifdef CONFIG_REISERFS_CHECK
+		check_research_for_paste(search_path, key);
+#endif
+	}
+
+	/* Perform balancing after all resources are collected by fix_nodes, and
+	   accessing them will not risk triggering schedule. */
+	if (retval == CARRY_ON) {
+		do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
+		return 0;
+	}
+	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
+      error_out:
+	/* this also releases the path */
+	unfix_nodes(&s_paste_balance);
+#ifdef REISERQUOTA_DEBUG
+	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
+		       "reiserquota paste_into_item(): freeing %u id=%u type=%c",
+		       pasted_size, inode->i_uid,
+		       key2type(&(key->on_disk_key)));
+#endif
+	dquot_free_space_nodirty(inode, pasted_size);
+	return retval;
+}
+
+/* Insert new item into the buffer at the path.
+ * th   - active transaction handle
+ * path - path to the inserted item
+ * ih   - pointer to the item header to insert
+ * body - pointer to the bytes to insert
+ */
+int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
+			 struct treepath *path, const struct cpu_key *key,
+			 struct item_head *ih, struct inode *inode,
+			 const char *body)
+{
+	struct tree_balance s_ins_balance;
+	int retval;
+	int fs_gen = 0;
+	int quota_bytes = 0;
+
+	BUG_ON(!th->t_trans_id);
+
+	if (inode) {		/* Do we count quotas for item? */
+		fs_gen = get_generation(inode->i_sb);
+		quota_bytes = ih_item_len(ih);
+
+		/* hack so the quota code doesn't have to guess if the file has
+		 ** a tail, links are always tails, so there's no guessing needed
+		 */
+		if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
+			quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
+#ifdef REISERQUOTA_DEBUG
+		reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
+			       "reiserquota insert_item(): allocating %u id=%u type=%c",
+			       quota_bytes, inode->i_uid, head2type(ih));
+#endif
+		reiserfs_write_unlock(inode->i_sb);
+		/* We can't dirty inode here. It would be immediately written but
+		 * appropriate stat item isn't inserted yet... */
+		retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+		reiserfs_write_lock(inode->i_sb);
+		if (retval) {
+			pathrelse(path);
+			return retval;
+		}
+	}
+	init_tb_struct(th, &s_ins_balance, th->t_super, path,
+		       IH_SIZE + ih_item_len(ih));
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	s_ins_balance.key = key->on_disk_key;
+#endif
+	/* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
+	if (inode && fs_changed(fs_gen, inode->i_sb)) {
+		goto search_again;
+	}
+
+	while ((retval =
+		fix_nodes(M_INSERT, &s_ins_balance, ih,
+			  body)) == REPEAT_SEARCH) {
+	      search_again:
+		/* file system changed while we were in the fix_nodes */
+		PROC_INFO_INC(th->t_super, insert_item_restarted);
+		retval = search_item(th->t_super, key, path);
+		if (retval == IO_ERROR) {
+			retval = -EIO;
+			goto error_out;
+		}
+		if (retval == ITEM_FOUND) {
+			reiserfs_warning(th->t_super, "PAP-5760",
+					 "key %K already exists in the tree",
+					 key);
+			retval = -EEXIST;
+			goto error_out;
+		}
+	}
+
+	/* make balancing after all resources will be collected at a time */
+	if (retval == CARRY_ON) {
+		do_balance(&s_ins_balance, ih, body, M_INSERT);
+		return 0;
+	}
+
+	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
+      error_out:
+	/* also releases the path */
+	unfix_nodes(&s_ins_balance);
+#ifdef REISERQUOTA_DEBUG
+	reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
+		       "reiserquota insert_item(): freeing %u id=%u type=%c",
+		       quota_bytes, inode->i_uid, head2type(ih));
+#endif
+	if (inode)
+		dquot_free_space_nodirty(inode, quota_bytes);
+	return retval;
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/super.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/super.c
new file mode 100644
index 0000000..8169be9
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/super.c

@@ -0,0 +1,2401 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ *
+ * Trivial changes by Alan Cox to add the LFS fixes
+ *
+ * Trivial Changes:
+ * Rights granted to Hans Reiser to redistribute under other terms providing
+ * he accepts all liability including but not limited to patent, fitness
+ * for purpose, and direct or indirect claims arising from failure to perform.
+ *
+ * NO WARRANTY
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/time.h>
+#include <asm/uaccess.h>
+#include "reiserfs.h"
+#include "acl.h"
+#include "xattr.h"
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/quotaops.h>
+#include <linux/vfs.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/crc32.h>
+#include <linux/seq_file.h>
+
+struct file_system_type reiserfs_fs_type;
+
+static const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING;
+static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING;
+static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING;
+
+int is_reiserfs_3_5(struct reiserfs_super_block *rs)
+{
+	return !strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string,
+			strlen(reiserfs_3_5_magic_string));
+}
+
+int is_reiserfs_3_6(struct reiserfs_super_block *rs)
+{
+	return !strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string,
+			strlen(reiserfs_3_6_magic_string));
+}
+
+int is_reiserfs_jr(struct reiserfs_super_block *rs)
+{
+	return !strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string,
+			strlen(reiserfs_jr_magic_string));
+}
+
+static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
+{
+	return (is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) ||
+		is_reiserfs_jr(rs));
+}
+
+static int reiserfs_remount(struct super_block *s, int *flags, char *data);
+static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
+void show_alloc_options(struct seq_file *seq, struct super_block *s);
+
+static int reiserfs_sync_fs(struct super_block *s, int wait)
+{
+	struct reiserfs_transaction_handle th;
+
+	reiserfs_write_lock(s);
+	if (!journal_begin(&th, s, 1))
+		if (!journal_end_sync(&th, s, 1))
+			reiserfs_flush_old_commits(s);
+	s->s_dirt = 0;	/* Even if it's not true.
+			 * We'll loop forever in sync_supers otherwise */
+	reiserfs_write_unlock(s);
+	return 0;
+}
+
+static void reiserfs_write_super(struct super_block *s)
+{
+	reiserfs_sync_fs(s, 1);
+}
+
+static int reiserfs_freeze(struct super_block *s)
+{
+	struct reiserfs_transaction_handle th;
+	reiserfs_write_lock(s);
+	if (!(s->s_flags & MS_RDONLY)) {
+		int err = journal_begin(&th, s, 1);
+		if (err) {
+			reiserfs_block_writes(&th);
+		} else {
+			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
+						     1);
+			journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+			reiserfs_block_writes(&th);
+			journal_end_sync(&th, s, 1);
+		}
+	}
+	s->s_dirt = 0;
+	reiserfs_write_unlock(s);
+	return 0;
+}
+
+static int reiserfs_unfreeze(struct super_block *s)
+{
+	reiserfs_allow_writes(s);
+	return 0;
+}
+
+extern const struct in_core_key MAX_IN_CORE_KEY;
+
+/* this is used to delete "save link" when there are no items of a
+   file it points to. It can either happen if unlink is completed but
+   "save unlink" removal, or if file has both unlink and truncate
+   pending and as unlink completes first (because key of "save link"
+   protecting unlink is bigger that a key lf "save link" which
+   protects truncate), so there left no items to make truncate
+   completion on */
+static int remove_save_link_only(struct super_block *s,
+				 struct reiserfs_key *key, int oid_free)
+{
+	struct reiserfs_transaction_handle th;
+	int err;
+
+	/* we are going to do one balancing */
+	err = journal_begin(&th, s, JOURNAL_PER_BALANCE_CNT);
+	if (err)
+		return err;
+
+	reiserfs_delete_solid_item(&th, NULL, key);
+	if (oid_free)
+		/* removals are protected by direct items */
+		reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid));
+
+	return journal_end(&th, s, JOURNAL_PER_BALANCE_CNT);
+}
+
+#ifdef CONFIG_QUOTA
+static int reiserfs_quota_on_mount(struct super_block *, int);
+#endif
+
+/* look for uncompleted unlinks and truncates and complete them */
+static int finish_unfinished(struct super_block *s)
+{
+	INITIALIZE_PATH(path);
+	struct cpu_key max_cpu_key, obj_key;
+	struct reiserfs_key save_link_key, last_inode_key;
+	int retval = 0;
+	struct item_head *ih;
+	struct buffer_head *bh;
+	int item_pos;
+	char *item;
+	int done;
+	struct inode *inode;
+	int truncate;
+#ifdef CONFIG_QUOTA
+	int i;
+	int ms_active_set;
+	int quota_enabled[MAXQUOTAS];
+#endif
+
+	/* compose key to look for "save" links */
+	max_cpu_key.version = KEY_FORMAT_3_5;
+	max_cpu_key.on_disk_key.k_dir_id = ~0U;
+	max_cpu_key.on_disk_key.k_objectid = ~0U;
+	set_cpu_key_k_offset(&max_cpu_key, ~0U);
+	max_cpu_key.key_length = 3;
+
+	memset(&last_inode_key, 0, sizeof(last_inode_key));
+
+#ifdef CONFIG_QUOTA
+	/* Needed for iput() to work correctly and not trash data */
+	if (s->s_flags & MS_ACTIVE) {
+		ms_active_set = 0;
+	} else {
+		ms_active_set = 1;
+		s->s_flags |= MS_ACTIVE;
+	}
+	/* Turn on quotas so that they are updated correctly */
+	for (i = 0; i < MAXQUOTAS; i++) {
+		quota_enabled[i] = 1;
+		if (REISERFS_SB(s)->s_qf_names[i]) {
+			int ret;
+
+			if (sb_has_quota_active(s, i)) {
+				quota_enabled[i] = 0;
+				continue;
+			}
+			ret = reiserfs_quota_on_mount(s, i);
+			if (ret < 0)
+				reiserfs_warning(s, "reiserfs-2500",
+						 "cannot turn on journaled "
+						 "quota: error %d", ret);
+		}
+	}
+#endif
+
+	done = 0;
+	REISERFS_SB(s)->s_is_unlinked_ok = 1;
+	while (!retval) {
+		retval = search_item(s, &max_cpu_key, &path);
+		if (retval != ITEM_NOT_FOUND) {
+			reiserfs_error(s, "vs-2140",
+				       "search_by_key returned %d", retval);
+			break;
+		}
+
+		bh = get_last_bh(&path);
+		item_pos = get_item_pos(&path);
+		if (item_pos != B_NR_ITEMS(bh)) {
+			reiserfs_warning(s, "vs-2060",
+					 "wrong position found");
+			break;
+		}
+		item_pos--;
+		ih = B_N_PITEM_HEAD(bh, item_pos);
+
+		if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID)
+			/* there are no "save" links anymore */
+			break;
+
+		save_link_key = ih->ih_key;
+		if (is_indirect_le_ih(ih))
+			truncate = 1;
+		else
+			truncate = 0;
+
+		/* reiserfs_iget needs k_dirid and k_objectid only */
+		item = B_I_PITEM(bh, ih);
+		obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item);
+		obj_key.on_disk_key.k_objectid =
+		    le32_to_cpu(ih->ih_key.k_objectid);
+		obj_key.on_disk_key.k_offset = 0;
+		obj_key.on_disk_key.k_type = 0;
+
+		pathrelse(&path);
+
+		inode = reiserfs_iget(s, &obj_key);
+		if (!inode) {
+			/* the unlink almost completed, it just did not manage to remove
+			   "save" link and release objectid */
+			reiserfs_warning(s, "vs-2180", "iget failed for %K",
+					 &obj_key);
+			retval = remove_save_link_only(s, &save_link_key, 1);
+			continue;
+		}
+
+		if (!truncate && inode->i_nlink) {
+			/* file is not unlinked */
+			reiserfs_warning(s, "vs-2185",
+					 "file %K is not unlinked",
+					 &obj_key);
+			retval = remove_save_link_only(s, &save_link_key, 0);
+			continue;
+		}
+		reiserfs_write_unlock(s);
+		dquot_initialize(inode);
+		reiserfs_write_lock(s);
+
+		if (truncate && S_ISDIR(inode->i_mode)) {
+			/* We got a truncate request for a dir which is impossible.
+			   The only imaginable way is to execute unfinished truncate request
+			   then boot into old kernel, remove the file and create dir with
+			   the same key. */
+			reiserfs_warning(s, "green-2101",
+					 "impossible truncate on a "
+					 "directory %k. Please report",
+					 INODE_PKEY(inode));
+			retval = remove_save_link_only(s, &save_link_key, 0);
+			truncate = 0;
+			iput(inode);
+			continue;
+		}
+
+		if (truncate) {
+			REISERFS_I(inode)->i_flags |=
+			    i_link_saved_truncate_mask;
+			/* not completed truncate found. New size was committed together
+			   with "save" link */
+			reiserfs_info(s, "Truncating %k to %Ld ..",
+				      INODE_PKEY(inode), inode->i_size);
+			reiserfs_truncate_file(inode,
+					       0
+					       /*don't update modification time */
+					       );
+			retval = remove_save_link(inode, truncate);
+		} else {
+			REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
+			/* not completed unlink (rmdir) found */
+			reiserfs_info(s, "Removing %k..", INODE_PKEY(inode));
+			if (memcmp(&last_inode_key, INODE_PKEY(inode),
+					sizeof(last_inode_key))){
+				last_inode_key = *INODE_PKEY(inode);
+				/* removal gets completed in iput */
+				retval = 0;
+			} else {
+				reiserfs_warning(s, "super-2189", "Dead loop "
+						 "in finish_unfinished "
+						 "detected, just remove "
+						 "save link\n");
+				retval = remove_save_link_only(s,
+							&save_link_key, 0);
+			}
+		}
+
+		iput(inode);
+		printk("done\n");
+		done++;
+	}
+	REISERFS_SB(s)->s_is_unlinked_ok = 0;
+
+#ifdef CONFIG_QUOTA
+	/* Turn quotas off */
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (sb_dqopt(s)->files[i] && quota_enabled[i])
+			dquot_quota_off(s, i);
+	}
+	if (ms_active_set)
+		/* Restore the flag back */
+		s->s_flags &= ~MS_ACTIVE;
+#endif
+	pathrelse(&path);
+	if (done)
+		reiserfs_info(s, "There were %d uncompleted unlinks/truncates. "
+			      "Completed\n", done);
+	return retval;
+}
+
+/* to protect file being unlinked from getting lost we "safe" link files
+   being unlinked. This link will be deleted in the same transaction with last
+   item of file. mounting the filesystem we scan all these links and remove
+   files which almost got lost */
+void add_save_link(struct reiserfs_transaction_handle *th,
+		   struct inode *inode, int truncate)
+{
+	INITIALIZE_PATH(path);
+	int retval;
+	struct cpu_key key;
+	struct item_head ih;
+	__le32 link;
+
+	BUG_ON(!th->t_trans_id);
+
+	/* file can only get one "save link" of each kind */
+	RFALSE(truncate &&
+	       (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask),
+	       "saved link already exists for truncated inode %lx",
+	       (long)inode->i_ino);
+	RFALSE(!truncate &&
+	       (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask),
+	       "saved link already exists for unlinked inode %lx",
+	       (long)inode->i_ino);
+
+	/* setup key of "save" link */
+	key.version = KEY_FORMAT_3_5;
+	key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID;
+	key.on_disk_key.k_objectid = inode->i_ino;
+	if (!truncate) {
+		/* unlink, rmdir, rename */
+		set_cpu_key_k_offset(&key, 1 + inode->i_sb->s_blocksize);
+		set_cpu_key_k_type(&key, TYPE_DIRECT);
+
+		/* item head of "safe" link */
+		make_le_item_head(&ih, &key, key.version,
+				  1 + inode->i_sb->s_blocksize, TYPE_DIRECT,
+				  4 /*length */ , 0xffff /*free space */ );
+	} else {
+		/* truncate */
+		if (S_ISDIR(inode->i_mode))
+			reiserfs_warning(inode->i_sb, "green-2102",
+					 "Adding a truncate savelink for "
+					 "a directory %k! Please report",
+					 INODE_PKEY(inode));
+		set_cpu_key_k_offset(&key, 1);
+		set_cpu_key_k_type(&key, TYPE_INDIRECT);
+
+		/* item head of "safe" link */
+		make_le_item_head(&ih, &key, key.version, 1, TYPE_INDIRECT,
+				  4 /*length */ , 0 /*free space */ );
+	}
+	key.key_length = 3;
+
+	/* look for its place in the tree */
+	retval = search_item(inode->i_sb, &key, &path);
+	if (retval != ITEM_NOT_FOUND) {
+		if (retval != -ENOSPC)
+			reiserfs_error(inode->i_sb, "vs-2100",
+				       "search_by_key (%K) returned %d", &key,
+				       retval);
+		pathrelse(&path);
+		return;
+	}
+
+	/* body of "save" link */
+	link = INODE_PKEY(inode)->k_dir_id;
+
+	/* put "save" link into tree, don't charge quota to anyone */
+	retval =
+	    reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link);
+	if (retval) {
+		if (retval != -ENOSPC)
+			reiserfs_error(inode->i_sb, "vs-2120",
+				       "insert_item returned %d", retval);
+	} else {
+		if (truncate)
+			REISERFS_I(inode)->i_flags |=
+			    i_link_saved_truncate_mask;
+		else
+			REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
+	}
+}
+
+/* this opens transaction unlike add_save_link */
+int remove_save_link(struct inode *inode, int truncate)
+{
+	struct reiserfs_transaction_handle th;
+	struct reiserfs_key key;
+	int err;
+
+	/* we are going to do one balancing only */
+	err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
+	if (err)
+		return err;
+
+	/* setup key of "save" link */
+	key.k_dir_id = cpu_to_le32(MAX_KEY_OBJECTID);
+	key.k_objectid = INODE_PKEY(inode)->k_objectid;
+	if (!truncate) {
+		/* unlink, rmdir, rename */
+		set_le_key_k_offset(KEY_FORMAT_3_5, &key,
+				    1 + inode->i_sb->s_blocksize);
+		set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_DIRECT);
+	} else {
+		/* truncate */
+		set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1);
+		set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_INDIRECT);
+	}
+
+	if ((truncate &&
+	     (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask)) ||
+	    (!truncate &&
+	     (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask)))
+		/* don't take quota bytes from anywhere */
+		reiserfs_delete_solid_item(&th, NULL, &key);
+	if (!truncate) {
+		reiserfs_release_objectid(&th, inode->i_ino);
+		REISERFS_I(inode)->i_flags &= ~i_link_saved_unlink_mask;
+	} else
+		REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask;
+
+	return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
+}
+
+static void reiserfs_kill_sb(struct super_block *s)
+{
+	if (REISERFS_SB(s)) {
+		/*
+		 * Force any pending inode evictions to occur now. Any
+		 * inodes to be removed that have extended attributes
+		 * associated with them need to clean them up before
+		 * we can release the extended attribute root dentries.
+		 * shrink_dcache_for_umount will BUG if we don't release
+		 * those before it's called so ->put_super is too late.
+		 */
+		shrink_dcache_sb(s);
+
+		dput(REISERFS_SB(s)->xattr_root);
+		REISERFS_SB(s)->xattr_root = NULL;
+		dput(REISERFS_SB(s)->priv_root);
+		REISERFS_SB(s)->priv_root = NULL;
+	}
+
+	kill_block_super(s);
+}
+
+static void reiserfs_put_super(struct super_block *s)
+{
+	struct reiserfs_transaction_handle th;
+	th.t_trans_id = 0;
+
+	dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
+	reiserfs_write_lock(s);
+
+	if (s->s_dirt)
+		reiserfs_write_super(s);
+
+	/* change file system state to current state if it was mounted with read-write permissions */
+	if (!(s->s_flags & MS_RDONLY)) {
+		if (!journal_begin(&th, s, 10)) {
+			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
+						     1);
+			set_sb_umount_state(SB_DISK_SUPER_BLOCK(s),
+					    REISERFS_SB(s)->s_mount_state);
+			journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+		}
+	}
+
+	/* note, journal_release checks for readonly mount, and can decide not
+	 ** to do a journal_end
+	 */
+	journal_release(&th, s);
+
+	reiserfs_free_bitmap_cache(s);
+
+	brelse(SB_BUFFER_WITH_SB(s));
+
+	print_statistics(s);
+
+	if (REISERFS_SB(s)->reserved_blocks != 0) {
+		reiserfs_warning(s, "green-2005", "reserved blocks left %d",
+				 REISERFS_SB(s)->reserved_blocks);
+	}
+
+	reiserfs_proc_info_done(s);
+
+	reiserfs_write_unlock(s);
+	mutex_destroy(&REISERFS_SB(s)->lock);
+	kfree(s->s_fs_info);
+	s->s_fs_info = NULL;
+}
+
+static struct kmem_cache *reiserfs_inode_cachep;
+
+static struct inode *reiserfs_alloc_inode(struct super_block *sb)
+{
+	struct reiserfs_inode_info *ei;
+	ei = (struct reiserfs_inode_info *)
+	    kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
+	if (!ei)
+		return NULL;
+	atomic_set(&ei->openers, 0);
+	mutex_init(&ei->tailpack);
+	return &ei->vfs_inode;
+}
+
+static void reiserfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
+}
+
+static void reiserfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, reiserfs_i_callback);
+}
+
+static void init_once(void *foo)
+{
+	struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
+
+	INIT_LIST_HEAD(&ei->i_prealloc_list);
+	inode_init_once(&ei->vfs_inode);
+}
+
+static int init_inodecache(void)
+{
+	reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache",
+						  sizeof(struct
+							 reiserfs_inode_info),
+						  0, (SLAB_RECLAIM_ACCOUNT|
+							SLAB_MEM_SPREAD),
+						  init_once);
+	if (reiserfs_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(reiserfs_inode_cachep);
+}
+
+/* we don't mark inodes dirty, we just log them */
+static void reiserfs_dirty_inode(struct inode *inode, int flags)
+{
+	struct reiserfs_transaction_handle th;
+
+	int err = 0;
+	int lock_depth;
+
+	if (inode->i_sb->s_flags & MS_RDONLY) {
+		reiserfs_warning(inode->i_sb, "clm-6006",
+				 "writing inode %lu on readonly FS",
+				 inode->i_ino);
+		return;
+	}
+	lock_depth = reiserfs_write_lock_once(inode->i_sb);
+
+	/* this is really only used for atime updates, so they don't have
+	 ** to be included in O_SYNC or fsync
+	 */
+	err = journal_begin(&th, inode->i_sb, 1);
+	if (err)
+		goto out;
+
+	reiserfs_update_sd(&th, inode);
+	journal_end(&th, inode->i_sb, 1);
+
+out:
+	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+}
+
+static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct super_block *s = root->d_sb;
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	long opts = REISERFS_SB(s)->s_mount_opt;
+
+	if (opts & (1 << REISERFS_LARGETAIL))
+		seq_puts(seq, ",tails=on");
+	else if (!(opts & (1 << REISERFS_SMALLTAIL)))
+		seq_puts(seq, ",notail");
+	/* tails=small is default so we don't show it */
+
+	if (!(opts & (1 << REISERFS_BARRIER_FLUSH)))
+		seq_puts(seq, ",barrier=none");
+	/* barrier=flush is default so we don't show it */
+
+	if (opts & (1 << REISERFS_ERROR_CONTINUE))
+		seq_puts(seq, ",errors=continue");
+	else if (opts & (1 << REISERFS_ERROR_PANIC))
+		seq_puts(seq, ",errors=panic");
+	/* errors=ro is default so we don't show it */
+
+	if (opts & (1 << REISERFS_DATA_LOG))
+		seq_puts(seq, ",data=journal");
+	else if (opts & (1 << REISERFS_DATA_WRITEBACK))
+		seq_puts(seq, ",data=writeback");
+	/* data=ordered is default so we don't show it */
+
+	if (opts & (1 << REISERFS_ATTRS))
+		seq_puts(seq, ",attrs");
+
+	if (opts & (1 << REISERFS_XATTRS_USER))
+		seq_puts(seq, ",user_xattr");
+
+	if (opts & (1 << REISERFS_EXPOSE_PRIVROOT))
+		seq_puts(seq, ",expose_privroot");
+
+	if (opts & (1 << REISERFS_POSIXACL))
+		seq_puts(seq, ",acl");
+
+	if (REISERFS_SB(s)->s_jdev)
+		seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev);
+
+	if (journal->j_max_commit_age != journal->j_default_max_commit_age)
+		seq_printf(seq, ",commit=%d", journal->j_max_commit_age);
+
+#ifdef CONFIG_QUOTA
+	if (REISERFS_SB(s)->s_qf_names[USRQUOTA])
+		seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]);
+	else if (opts & (1 << REISERFS_USRQUOTA))
+		seq_puts(seq, ",usrquota");
+	if (REISERFS_SB(s)->s_qf_names[GRPQUOTA])
+		seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
+	else if (opts & (1 << REISERFS_GRPQUOTA))
+		seq_puts(seq, ",grpquota");
+	if (REISERFS_SB(s)->s_jquota_fmt) {
+		if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD)
+			seq_puts(seq, ",jqfmt=vfsold");
+		else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0)
+			seq_puts(seq, ",jqfmt=vfsv0");
+	}
+#endif
+
+	/* Block allocator options */
+	if (opts & (1 << REISERFS_NO_BORDER))
+		seq_puts(seq, ",block-allocator=noborder");
+	if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION))
+		seq_puts(seq, ",block-allocator=no_unhashed_relocation");
+	if (opts & (1 << REISERFS_HASHED_RELOCATION))
+		seq_puts(seq, ",block-allocator=hashed_relocation");
+	if (opts & (1 << REISERFS_TEST4))
+		seq_puts(seq, ",block-allocator=test4");
+	show_alloc_options(seq, s);
+	return 0;
+}
+
+#ifdef CONFIG_QUOTA
+static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
+				    size_t, loff_t);
+static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t,
+				   loff_t);
+#endif
+
+static const struct super_operations reiserfs_sops = {
+	.alloc_inode = reiserfs_alloc_inode,
+	.destroy_inode = reiserfs_destroy_inode,
+	.write_inode = reiserfs_write_inode,
+	.dirty_inode = reiserfs_dirty_inode,
+	.evict_inode = reiserfs_evict_inode,
+	.put_super = reiserfs_put_super,
+	.write_super = reiserfs_write_super,
+	.sync_fs = reiserfs_sync_fs,
+	.freeze_fs = reiserfs_freeze,
+	.unfreeze_fs = reiserfs_unfreeze,
+	.statfs = reiserfs_statfs,
+	.remount_fs = reiserfs_remount,
+	.show_options = reiserfs_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read = reiserfs_quota_read,
+	.quota_write = reiserfs_quota_write,
+#endif
+};
+
+#ifdef CONFIG_QUOTA
+#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
+
+static int reiserfs_write_dquot(struct dquot *);
+static int reiserfs_acquire_dquot(struct dquot *);
+static int reiserfs_release_dquot(struct dquot *);
+static int reiserfs_mark_dquot_dirty(struct dquot *);
+static int reiserfs_write_info(struct super_block *, int);
+static int reiserfs_quota_on(struct super_block *, int, int, struct path *);
+
+static const struct dquot_operations reiserfs_quota_operations = {
+	.write_dquot = reiserfs_write_dquot,
+	.acquire_dquot = reiserfs_acquire_dquot,
+	.release_dquot = reiserfs_release_dquot,
+	.mark_dirty = reiserfs_mark_dquot_dirty,
+	.write_info = reiserfs_write_info,
+	.alloc_dquot	= dquot_alloc,
+	.destroy_dquot	= dquot_destroy,
+};
+
+static const struct quotactl_ops reiserfs_qctl_operations = {
+	.quota_on = reiserfs_quota_on,
+	.quota_off = dquot_quota_off,
+	.quota_sync = dquot_quota_sync,
+	.get_info = dquot_get_dqinfo,
+	.set_info = dquot_set_dqinfo,
+	.get_dqblk = dquot_get_dqblk,
+	.set_dqblk = dquot_set_dqblk,
+};
+#endif
+
+static const struct export_operations reiserfs_export_ops = {
+	.encode_fh = reiserfs_encode_fh,
+	.fh_to_dentry = reiserfs_fh_to_dentry,
+	.fh_to_parent = reiserfs_fh_to_parent,
+	.get_parent = reiserfs_get_parent,
+};
+
+/* this struct is used in reiserfs_getopt () for containing the value for those
+   mount options that have values rather than being toggles. */
+typedef struct {
+	char *value;
+	int setmask;		/* bitmask which is to set on mount_options bitmask when this
+				   value is found, 0 is no bits are to be changed. */
+	int clrmask;		/* bitmask which is to clear on mount_options bitmask when  this
+				   value is found, 0 is no bits are to be changed. This is
+				   applied BEFORE setmask */
+} arg_desc_t;
+
+/* Set this bit in arg_required to allow empty arguments */
+#define REISERFS_OPT_ALLOWEMPTY 31
+
+/* this struct is used in reiserfs_getopt() for describing the set of reiserfs
+   mount options */
+typedef struct {
+	char *option_name;
+	int arg_required;	/* 0 if argument is not required, not 0 otherwise */
+	const arg_desc_t *values;	/* list of values accepted by an option */
+	int setmask;		/* bitmask which is to set on mount_options bitmask when this
+				   value is found, 0 is no bits are to be changed. */
+	int clrmask;		/* bitmask which is to clear on mount_options bitmask when  this
+				   value is found, 0 is no bits are to be changed. This is
+				   applied BEFORE setmask */
+} opt_desc_t;
+
+/* possible values for -o data= */
+static const arg_desc_t logging_mode[] = {
+	{"ordered", 1 << REISERFS_DATA_ORDERED,
+	 (1 << REISERFS_DATA_LOG | 1 << REISERFS_DATA_WRITEBACK)},
+	{"journal", 1 << REISERFS_DATA_LOG,
+	 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)},
+	{"writeback", 1 << REISERFS_DATA_WRITEBACK,
+	 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)},
+	{.value = NULL}
+};
+
+/* possible values for -o barrier= */
+static const arg_desc_t barrier_mode[] = {
+	{"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH},
+	{"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE},
+	{.value = NULL}
+};
+
+/* possible values for "-o block-allocator=" and bits which are to be set in
+   s_mount_opt of reiserfs specific part of in-core super block */
+static const arg_desc_t balloc[] = {
+	{"noborder", 1 << REISERFS_NO_BORDER, 0},
+	{"border", 0, 1 << REISERFS_NO_BORDER},
+	{"no_unhashed_relocation", 1 << REISERFS_NO_UNHASHED_RELOCATION, 0},
+	{"hashed_relocation", 1 << REISERFS_HASHED_RELOCATION, 0},
+	{"test4", 1 << REISERFS_TEST4, 0},
+	{"notest4", 0, 1 << REISERFS_TEST4},
+	{NULL, 0, 0}
+};
+
+static const arg_desc_t tails[] = {
+	{"on", 1 << REISERFS_LARGETAIL, 1 << REISERFS_SMALLTAIL},
+	{"off", 0, (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
+	{"small", 1 << REISERFS_SMALLTAIL, 1 << REISERFS_LARGETAIL},
+	{NULL, 0, 0}
+};
+
+static const arg_desc_t error_actions[] = {
+	{"panic", 1 << REISERFS_ERROR_PANIC,
+	 (1 << REISERFS_ERROR_RO | 1 << REISERFS_ERROR_CONTINUE)},
+	{"ro-remount", 1 << REISERFS_ERROR_RO,
+	 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_CONTINUE)},
+#ifdef REISERFS_JOURNAL_ERROR_ALLOWS_NO_LOG
+	{"continue", 1 << REISERFS_ERROR_CONTINUE,
+	 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_RO)},
+#endif
+	{NULL, 0, 0},
+};
+
+/* proceed only one option from a list *cur - string containing of mount options
+   opts - array of options which are accepted
+   opt_arg - if option is found and requires an argument and if it is specifed
+   in the input - pointer to the argument is stored here
+   bit_flags - if option requires to set a certain bit - it is set here
+   return -1 if unknown option is found, opt->arg_required otherwise */
+static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
+			   char **opt_arg, unsigned long *bit_flags)
+{
+	char *p;
+	/* foo=bar,
+	   ^   ^  ^
+	   |   |  +-- option_end
+	   |   +-- arg_start
+	   +-- option_start
+	 */
+	const opt_desc_t *opt;
+	const arg_desc_t *arg;
+
+	p = *cur;
+
+	/* assume argument cannot contain commas */
+	*cur = strchr(p, ',');
+	if (*cur) {
+		*(*cur) = '\0';
+		(*cur)++;
+	}
+
+	if (!strncmp(p, "alloc=", 6)) {
+		/* Ugly special case, probably we should redo options parser so that
+		   it can understand several arguments for some options, also so that
+		   it can fill several bitfields with option values. */
+		if (reiserfs_parse_alloc_options(s, p + 6)) {
+			return -1;
+		} else {
+			return 0;
+		}
+	}
+
+	/* for every option in the list */
+	for (opt = opts; opt->option_name; opt++) {
+		if (!strncmp(p, opt->option_name, strlen(opt->option_name))) {
+			if (bit_flags) {
+				if (opt->clrmask ==
+				    (1 << REISERFS_UNSUPPORTED_OPT))
+					reiserfs_warning(s, "super-6500",
+							 "%s not supported.\n",
+							 p);
+				else
+					*bit_flags &= ~opt->clrmask;
+				if (opt->setmask ==
+				    (1 << REISERFS_UNSUPPORTED_OPT))
+					reiserfs_warning(s, "super-6501",
+							 "%s not supported.\n",
+							 p);
+				else
+					*bit_flags |= opt->setmask;
+			}
+			break;
+		}
+	}
+	if (!opt->option_name) {
+		reiserfs_warning(s, "super-6502",
+				 "unknown mount option \"%s\"", p);
+		return -1;
+	}
+
+	p += strlen(opt->option_name);
+	switch (*p) {
+	case '=':
+		if (!opt->arg_required) {
+			reiserfs_warning(s, "super-6503",
+					 "the option \"%s\" does not "
+					 "require an argument\n",
+					 opt->option_name);
+			return -1;
+		}
+		break;
+
+	case 0:
+		if (opt->arg_required) {
+			reiserfs_warning(s, "super-6504",
+					 "the option \"%s\" requires an "
+					 "argument\n", opt->option_name);
+			return -1;
+		}
+		break;
+	default:
+		reiserfs_warning(s, "super-6505",
+				 "head of option \"%s\" is only correct\n",
+				 opt->option_name);
+		return -1;
+	}
+
+	/* move to the argument, or to next option if argument is not required */
+	p++;
+
+	if (opt->arg_required
+	    && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY))
+	    && !strlen(p)) {
+		/* this catches "option=," if not allowed */
+		reiserfs_warning(s, "super-6506",
+				 "empty argument for \"%s\"\n",
+				 opt->option_name);
+		return -1;
+	}
+
+	if (!opt->values) {
+		/* *=NULLopt_arg contains pointer to argument */
+		*opt_arg = p;
+		return opt->arg_required & ~(1 << REISERFS_OPT_ALLOWEMPTY);
+	}
+
+	/* values possible for this option are listed in opt->values */
+	for (arg = opt->values; arg->value; arg++) {
+		if (!strcmp(p, arg->value)) {
+			if (bit_flags) {
+				*bit_flags &= ~arg->clrmask;
+				*bit_flags |= arg->setmask;
+			}
+			return opt->arg_required;
+		}
+	}
+
+	reiserfs_warning(s, "super-6506",
+			 "bad value \"%s\" for option \"%s\"\n", p,
+			 opt->option_name);
+	return -1;
+}
+
+/* returns 0 if something is wrong in option string, 1 - otherwise */
+static int reiserfs_parse_options(struct super_block *s, char *options,	/* string given via mount's -o */
+				  unsigned long *mount_options,
+				  /* after the parsing phase, contains the
+				     collection of bitflags defining what
+				     mount options were selected. */
+				  unsigned long *blocks,	/* strtol-ed from NNN of resize=NNN */
+				  char **jdev_name,
+				  unsigned int *commit_max_age,
+				  char **qf_names,
+				  unsigned int *qfmt)
+{
+	int c;
+	char *arg = NULL;
+	char *pos;
+	opt_desc_t opts[] = {
+		/* Compatibility stuff, so that -o notail for old setups still work */
+		{"tails",.arg_required = 't',.values = tails},
+		{"notail",.clrmask =
+		 (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
+		{"conv",.setmask = 1 << REISERFS_CONVERT},
+		{"attrs",.setmask = 1 << REISERFS_ATTRS},
+		{"noattrs",.clrmask = 1 << REISERFS_ATTRS},
+		{"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT},
+#ifdef CONFIG_REISERFS_FS_XATTR
+		{"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER},
+		{"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER},
+#else
+		{"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
+		{"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
+#endif
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
+		{"acl",.setmask = 1 << REISERFS_POSIXACL},
+		{"noacl",.clrmask = 1 << REISERFS_POSIXACL},
+#else
+		{"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
+		{"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
+#endif
+		{.option_name = "nolog"},
+		{"replayonly",.setmask = 1 << REPLAYONLY},
+		{"block-allocator",.arg_required = 'a',.values = balloc},
+		{"data",.arg_required = 'd',.values = logging_mode},
+		{"barrier",.arg_required = 'b',.values = barrier_mode},
+		{"resize",.arg_required = 'r',.values = NULL},
+		{"jdev",.arg_required = 'j',.values = NULL},
+		{"nolargeio",.arg_required = 'w',.values = NULL},
+		{"commit",.arg_required = 'c',.values = NULL},
+		{"usrquota",.setmask = 1 << REISERFS_USRQUOTA},
+		{"grpquota",.setmask = 1 << REISERFS_GRPQUOTA},
+		{"noquota",.clrmask = 1 << REISERFS_USRQUOTA | 1 << REISERFS_GRPQUOTA},
+		{"errors",.arg_required = 'e',.values = error_actions},
+		{"usrjquota",.arg_required =
+		 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
+		{"grpjquota",.arg_required =
+		 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
+		{"jqfmt",.arg_required = 'f',.values = NULL},
+		{.option_name = NULL}
+	};
+
+	*blocks = 0;
+	if (!options || !*options)
+		/* use default configuration: create tails, journaling on, no
+		   conversion to newest format */
+		return 1;
+
+	for (pos = options; pos;) {
+		c = reiserfs_getopt(s, &pos, opts, &arg, mount_options);
+		if (c == -1)
+			/* wrong option is given */
+			return 0;
+
+		if (c == 'r') {
+			char *p;
+
+			p = NULL;
+			/* "resize=NNN" or "resize=auto" */
+
+			if (!strcmp(arg, "auto")) {
+				/* From JFS code, to auto-get the size. */
+				*blocks =
+				    s->s_bdev->bd_inode->i_size >> s->
+				    s_blocksize_bits;
+			} else {
+				*blocks = simple_strtoul(arg, &p, 0);
+				if (*p != '\0') {
+					/* NNN does not look like a number */
+					reiserfs_warning(s, "super-6507",
+							 "bad value %s for "
+							 "-oresize\n", arg);
+					return 0;
+				}
+			}
+		}
+
+		if (c == 'c') {
+			char *p = NULL;
+			unsigned long val = simple_strtoul(arg, &p, 0);
+			/* commit=NNN (time in seconds) */
+			if (*p != '\0' || val >= (unsigned int)-1) {
+				reiserfs_warning(s, "super-6508",
+						 "bad value %s for -ocommit\n",
+						 arg);
+				return 0;
+			}
+			*commit_max_age = (unsigned int)val;
+		}
+
+		if (c == 'w') {
+			reiserfs_warning(s, "super-6509", "nolargeio option "
+					 "is no longer supported");
+			return 0;
+		}
+
+		if (c == 'j') {
+			if (arg && *arg && jdev_name) {
+				if (*jdev_name) {	//Hm, already assigned?
+					reiserfs_warning(s, "super-6510",
+							 "journal device was "
+							 "already specified to "
+							 "be %s", *jdev_name);
+					return 0;
+				}
+				*jdev_name = arg;
+			}
+		}
+#ifdef CONFIG_QUOTA
+		if (c == 'u' || c == 'g') {
+			int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
+
+			if (sb_any_quota_loaded(s) &&
+			    (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
+				reiserfs_warning(s, "super-6511",
+						 "cannot change journaled "
+						 "quota options when quota "
+						 "turned on.");
+				return 0;
+			}
+			if (*arg) {	/* Some filename specified? */
+				if (REISERFS_SB(s)->s_qf_names[qtype]
+				    && strcmp(REISERFS_SB(s)->s_qf_names[qtype],
+					      arg)) {
+					reiserfs_warning(s, "super-6512",
+							 "%s quota file "
+							 "already specified.",
+							 QTYPE2NAME(qtype));
+					return 0;
+				}
+				if (strchr(arg, '/')) {
+					reiserfs_warning(s, "super-6513",
+							 "quotafile must be "
+							 "on filesystem root.");
+					return 0;
+				}
+				qf_names[qtype] =
+				    kmalloc(strlen(arg) + 1, GFP_KERNEL);
+				if (!qf_names[qtype]) {
+					reiserfs_warning(s, "reiserfs-2502",
+							 "not enough memory "
+							 "for storing "
+							 "quotafile name.");
+					return 0;
+				}
+				strcpy(qf_names[qtype], arg);
+				if (qtype == USRQUOTA)
+					*mount_options |= 1 << REISERFS_USRQUOTA;
+				else
+					*mount_options |= 1 << REISERFS_GRPQUOTA;
+			} else {
+				if (qf_names[qtype] !=
+				    REISERFS_SB(s)->s_qf_names[qtype])
+					kfree(qf_names[qtype]);
+				qf_names[qtype] = NULL;
+				if (qtype == USRQUOTA)
+					*mount_options &= ~(1 << REISERFS_USRQUOTA);
+				else
+					*mount_options &= ~(1 << REISERFS_GRPQUOTA);
+			}
+		}
+		if (c == 'f') {
+			if (!strcmp(arg, "vfsold"))
+				*qfmt = QFMT_VFS_OLD;
+			else if (!strcmp(arg, "vfsv0"))
+				*qfmt = QFMT_VFS_V0;
+			else {
+				reiserfs_warning(s, "super-6514",
+						 "unknown quota format "
+						 "specified.");
+				return 0;
+			}
+			if (sb_any_quota_loaded(s) &&
+			    *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
+				reiserfs_warning(s, "super-6515",
+						 "cannot change journaled "
+						 "quota options when quota "
+						 "turned on.");
+				return 0;
+			}
+		}
+#else
+		if (c == 'u' || c == 'g' || c == 'f') {
+			reiserfs_warning(s, "reiserfs-2503", "journaled "
+					 "quota options not supported.");
+			return 0;
+		}
+#endif
+	}
+
+#ifdef CONFIG_QUOTA
+	if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
+	    && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
+		reiserfs_warning(s, "super-6515",
+				 "journaled quota format not specified.");
+		return 0;
+	}
+	if ((!(*mount_options & (1 << REISERFS_USRQUOTA)) &&
+	       sb_has_quota_loaded(s, USRQUOTA)) ||
+	    (!(*mount_options & (1 << REISERFS_GRPQUOTA)) &&
+	       sb_has_quota_loaded(s, GRPQUOTA))) {
+		reiserfs_warning(s, "super-6516", "quota options must "
+				 "be present when quota is turned on.");
+		return 0;
+	}
+#endif
+
+	return 1;
+}
+
+static void switch_data_mode(struct super_block *s, unsigned long mode)
+{
+	REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) |
+					 (1 << REISERFS_DATA_ORDERED) |
+					 (1 << REISERFS_DATA_WRITEBACK));
+	REISERFS_SB(s)->s_mount_opt |= (1 << mode);
+}
+
+static void handle_data_mode(struct super_block *s, unsigned long mount_options)
+{
+	if (mount_options & (1 << REISERFS_DATA_LOG)) {
+		if (!reiserfs_data_log(s)) {
+			switch_data_mode(s, REISERFS_DATA_LOG);
+			reiserfs_info(s, "switching to journaled data mode\n");
+		}
+	} else if (mount_options & (1 << REISERFS_DATA_ORDERED)) {
+		if (!reiserfs_data_ordered(s)) {
+			switch_data_mode(s, REISERFS_DATA_ORDERED);
+			reiserfs_info(s, "switching to ordered data mode\n");
+		}
+	} else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) {
+		if (!reiserfs_data_writeback(s)) {
+			switch_data_mode(s, REISERFS_DATA_WRITEBACK);
+			reiserfs_info(s, "switching to writeback data mode\n");
+		}
+	}
+}
+
+static void handle_barrier_mode(struct super_block *s, unsigned long bits)
+{
+	int flush = (1 << REISERFS_BARRIER_FLUSH);
+	int none = (1 << REISERFS_BARRIER_NONE);
+	int all_barrier = flush | none;
+
+	if (bits & all_barrier) {
+		REISERFS_SB(s)->s_mount_opt &= ~all_barrier;
+		if (bits & flush) {
+			REISERFS_SB(s)->s_mount_opt |= flush;
+			printk("reiserfs: enabling write barrier flush mode\n");
+		} else if (bits & none) {
+			REISERFS_SB(s)->s_mount_opt |= none;
+			printk("reiserfs: write barriers turned off\n");
+		}
+	}
+}
+
+static void handle_attrs(struct super_block *s)
+{
+	struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
+
+	if (reiserfs_attrs(s)) {
+		if (old_format_only(s)) {
+			reiserfs_warning(s, "super-6517", "cannot support "
+					 "attributes on 3.5.x disk format");
+			REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
+			return;
+		}
+		if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) {
+			reiserfs_warning(s, "super-6518", "cannot support "
+					 "attributes until flag is set in "
+					 "super-block");
+			REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
+		}
+	}
+}
+
+#ifdef CONFIG_QUOTA
+static void handle_quota_files(struct super_block *s, char **qf_names,
+			       unsigned int *qfmt)
+{
+	int i;
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+			kfree(REISERFS_SB(s)->s_qf_names[i]);
+		REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
+	}
+	if (*qfmt)
+		REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+}
+#endif
+
+static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
+{
+	struct reiserfs_super_block *rs;
+	struct reiserfs_transaction_handle th;
+	unsigned long blocks;
+	unsigned long mount_options = REISERFS_SB(s)->s_mount_opt;
+	unsigned long safe_mask = 0;
+	unsigned int commit_max_age = (unsigned int)-1;
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	char *new_opts = kstrdup(arg, GFP_KERNEL);
+	int err;
+	char *qf_names[MAXQUOTAS];
+	unsigned int qfmt = 0;
+#ifdef CONFIG_QUOTA
+	int i;
+#endif
+
+	reiserfs_write_lock(s);
+
+#ifdef CONFIG_QUOTA
+	memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
+#endif
+
+	rs = SB_DISK_SUPER_BLOCK(s);
+
+	if (!reiserfs_parse_options
+	    (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
+	    qf_names, &qfmt)) {
+#ifdef CONFIG_QUOTA
+		for (i = 0; i < MAXQUOTAS; i++)
+			if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+				kfree(qf_names[i]);
+#endif
+		err = -EINVAL;
+		goto out_unlock;
+	}
+#ifdef CONFIG_QUOTA
+	handle_quota_files(s, qf_names, &qfmt);
+#endif
+
+	handle_attrs(s);
+
+	/* Add options that are safe here */
+	safe_mask |= 1 << REISERFS_SMALLTAIL;
+	safe_mask |= 1 << REISERFS_LARGETAIL;
+	safe_mask |= 1 << REISERFS_NO_BORDER;
+	safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION;
+	safe_mask |= 1 << REISERFS_HASHED_RELOCATION;
+	safe_mask |= 1 << REISERFS_TEST4;
+	safe_mask |= 1 << REISERFS_ATTRS;
+	safe_mask |= 1 << REISERFS_XATTRS_USER;
+	safe_mask |= 1 << REISERFS_POSIXACL;
+	safe_mask |= 1 << REISERFS_BARRIER_FLUSH;
+	safe_mask |= 1 << REISERFS_BARRIER_NONE;
+	safe_mask |= 1 << REISERFS_ERROR_RO;
+	safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
+	safe_mask |= 1 << REISERFS_ERROR_PANIC;
+	safe_mask |= 1 << REISERFS_USRQUOTA;
+	safe_mask |= 1 << REISERFS_GRPQUOTA;
+
+	/* Update the bitmask, taking care to keep
+	 * the bits we're not allowed to change here */
+	REISERFS_SB(s)->s_mount_opt =
+	    (REISERFS_SB(s)->
+	     s_mount_opt & ~safe_mask) | (mount_options & safe_mask);
+
+	if (commit_max_age != 0 && commit_max_age != (unsigned int)-1) {
+		journal->j_max_commit_age = commit_max_age;
+		journal->j_max_trans_age = commit_max_age;
+	} else if (commit_max_age == 0) {
+		/* 0 means restore defaults. */
+		journal->j_max_commit_age = journal->j_default_max_commit_age;
+		journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
+	}
+
+	if (blocks) {
+		err = reiserfs_resize(s, blocks);
+		if (err != 0)
+			goto out_unlock;
+	}
+
+	if (*mount_flags & MS_RDONLY) {
+		reiserfs_xattr_init(s, *mount_flags);
+		/* remount read-only */
+		if (s->s_flags & MS_RDONLY)
+			/* it is read-only already */
+			goto out_ok;
+
+		/*
+		 * Drop write lock. Quota will retake it when needed and lock
+		 * ordering requires calling dquot_suspend() without it.
+		 */
+		reiserfs_write_unlock(s);
+		err = dquot_suspend(s, -1);
+		if (err < 0)
+			goto out_err;
+		reiserfs_write_lock(s);
+
+		/* try to remount file system with read-only permissions */
+		if (sb_umount_state(rs) == REISERFS_VALID_FS
+		    || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) {
+			goto out_ok;
+		}
+
+		err = journal_begin(&th, s, 10);
+		if (err)
+			goto out_unlock;
+
+		/* Mounting a rw partition read-only. */
+		reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
+		set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state);
+		journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+	} else {
+		/* remount read-write */
+		if (!(s->s_flags & MS_RDONLY)) {
+			reiserfs_xattr_init(s, *mount_flags);
+			goto out_ok;	/* We are read-write already */
+		}
+
+		if (reiserfs_is_journal_aborted(journal)) {
+			err = journal->j_errno;
+			goto out_unlock;
+		}
+
+		handle_data_mode(s, mount_options);
+		handle_barrier_mode(s, mount_options);
+		REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
+		s->s_flags &= ~MS_RDONLY;	/* now it is safe to call journal_begin */
+		err = journal_begin(&th, s, 10);
+		if (err)
+			goto out_unlock;
+
+		/* Mount a partition which is read-only, read-write */
+		reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
+		REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
+		s->s_flags &= ~MS_RDONLY;
+		set_sb_umount_state(rs, REISERFS_ERROR_FS);
+		if (!old_format_only(s))
+			set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
+		/* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */
+		journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+		REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS;
+	}
+	/* this will force a full flush of all journal lists */
+	SB_JOURNAL(s)->j_must_wait = 1;
+	err = journal_end(&th, s, 10);
+	if (err)
+		goto out_unlock;
+	s->s_dirt = 0;
+
+	if (!(*mount_flags & MS_RDONLY)) {
+		/*
+		 * Drop write lock. Quota will retake it when needed and lock
+		 * ordering requires calling dquot_resume() without it.
+		 */
+		reiserfs_write_unlock(s);
+		dquot_resume(s, -1);
+		reiserfs_write_lock(s);
+		finish_unfinished(s);
+		reiserfs_xattr_init(s, *mount_flags);
+	}
+
+out_ok:
+	replace_mount_options(s, new_opts);
+	reiserfs_write_unlock(s);
+	return 0;
+
+out_unlock:
+	reiserfs_write_unlock(s);
+out_err:
+	kfree(new_opts);
+	return err;
+}
+
+static int read_super_block(struct super_block *s, int offset)
+{
+	struct buffer_head *bh;
+	struct reiserfs_super_block *rs;
+	int fs_blocksize;
+
+	bh = sb_bread(s, offset / s->s_blocksize);
+	if (!bh) {
+		reiserfs_warning(s, "sh-2006",
+				 "bread failed (dev %s, block %lu, size %lu)",
+				 reiserfs_bdevname(s), offset / s->s_blocksize,
+				 s->s_blocksize);
+		return 1;
+	}
+
+	rs = (struct reiserfs_super_block *)bh->b_data;
+	if (!is_any_reiserfs_magic_string(rs)) {
+		brelse(bh);
+		return 1;
+	}
+	//
+	// ok, reiserfs signature (old or new) found in at the given offset
+	//
+	fs_blocksize = sb_blocksize(rs);
+	brelse(bh);
+	sb_set_blocksize(s, fs_blocksize);
+
+	bh = sb_bread(s, offset / s->s_blocksize);
+	if (!bh) {
+		reiserfs_warning(s, "sh-2007",
+				 "bread failed (dev %s, block %lu, size %lu)",
+				 reiserfs_bdevname(s), offset / s->s_blocksize,
+				 s->s_blocksize);
+		return 1;
+	}
+
+	rs = (struct reiserfs_super_block *)bh->b_data;
+	if (sb_blocksize(rs) != s->s_blocksize) {
+		reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
+				 "filesystem on (dev %s, block %Lu, size %lu)",
+				 reiserfs_bdevname(s),
+				 (unsigned long long)bh->b_blocknr,
+				 s->s_blocksize);
+		brelse(bh);
+		return 1;
+	}
+
+	if (rs->s_v1.s_root_block == cpu_to_le32(-1)) {
+		brelse(bh);
+		reiserfs_warning(s, "super-6519", "Unfinished reiserfsck "
+				 "--rebuild-tree run detected. Please run\n"
+				 "reiserfsck --rebuild-tree and wait for a "
+				 "completion. If that fails\n"
+				 "get newer reiserfsprogs package");
+		return 1;
+	}
+
+	SB_BUFFER_WITH_SB(s) = bh;
+	SB_DISK_SUPER_BLOCK(s) = rs;
+
+	if (is_reiserfs_jr(rs)) {
+		/* magic is of non-standard journal filesystem, look at s_version to
+		   find which format is in use */
+		if (sb_version(rs) == REISERFS_VERSION_2)
+			reiserfs_info(s, "found reiserfs format \"3.6\""
+				      " with non-standard journal\n");
+		else if (sb_version(rs) == REISERFS_VERSION_1)
+			reiserfs_info(s, "found reiserfs format \"3.5\""
+				      " with non-standard journal\n");
+		else {
+			reiserfs_warning(s, "sh-2012", "found unknown "
+					 "format \"%u\" of reiserfs with "
+					 "non-standard magic", sb_version(rs));
+			return 1;
+		}
+	} else
+		/* s_version of standard format may contain incorrect information,
+		   so we just look at the magic string */
+		reiserfs_info(s,
+			      "found reiserfs format \"%s\" with standard journal\n",
+			      is_reiserfs_3_5(rs) ? "3.5" : "3.6");
+
+	s->s_op = &reiserfs_sops;
+	s->s_export_op = &reiserfs_export_ops;
+#ifdef CONFIG_QUOTA
+	s->s_qcop = &reiserfs_qctl_operations;
+	s->dq_op = &reiserfs_quota_operations;
+#endif
+
+	/* new format is limited by the 32 bit wide i_blocks field, want to
+	 ** be one full block below that.
+	 */
+	s->s_maxbytes = (512LL << 32) - s->s_blocksize;
+	return 0;
+}
+
+/* after journal replay, reread all bitmap and super blocks */
+static int reread_meta_blocks(struct super_block *s)
+{
+	ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
+	wait_on_buffer(SB_BUFFER_WITH_SB(s));
+	if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
+		reiserfs_warning(s, "reiserfs-2504", "error reading the super");
+		return 1;
+	}
+
+	return 0;
+}
+
+/////////////////////////////////////////////////////
+// hash detection stuff
+
+// if root directory is empty - we set default - Yura's - hash and
+// warn about it
+// FIXME: we look for only one name in a directory. If tea and yura
+// bith have the same value - we ask user to send report to the
+// mailing list
+static __u32 find_hash_out(struct super_block *s)
+{
+	int retval;
+	struct inode *inode;
+	struct cpu_key key;
+	INITIALIZE_PATH(path);
+	struct reiserfs_dir_entry de;
+	__u32 hash = DEFAULT_HASH;
+
+	inode = s->s_root->d_inode;
+
+	do {			// Some serious "goto"-hater was there ;)
+		u32 teahash, r5hash, yurahash;
+
+		make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
+		retval = search_by_entry_key(s, &key, &path, &de);
+		if (retval == IO_ERROR) {
+			pathrelse(&path);
+			return UNSET_HASH;
+		}
+		if (retval == NAME_NOT_FOUND)
+			de.de_entry_num--;
+		set_de_name_and_namelen(&de);
+		if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) {
+			/* allow override in this case */
+			if (reiserfs_rupasov_hash(s)) {
+				hash = YURA_HASH;
+			}
+			reiserfs_info(s, "FS seems to be empty, autodetect "
+					 "is using the default hash\n");
+			break;
+		}
+		r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
+		teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen));
+		yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen));
+		if (((teahash == r5hash)
+		     &&
+		     (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num])))
+		      == r5hash)) || ((teahash == yurahash)
+				      && (yurahash ==
+					  GET_HASH_VALUE(deh_offset
+							 (&
+							  (de.
+							   de_deh[de.
+								  de_entry_num])))))
+		    || ((r5hash == yurahash)
+			&& (yurahash ==
+			    GET_HASH_VALUE(deh_offset
+					   (&(de.de_deh[de.de_entry_num])))))) {
+			reiserfs_warning(s, "reiserfs-2506", "Unable to "
+					 "automatically detect hash function. "
+					 "Please mount with -o "
+					 "hash={tea,rupasov,r5}");
+			hash = UNSET_HASH;
+			break;
+		}
+		if (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) ==
+		    yurahash)
+			hash = YURA_HASH;
+		else if (GET_HASH_VALUE
+			 (deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash)
+			hash = TEA_HASH;
+		else if (GET_HASH_VALUE
+			 (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash)
+			hash = R5_HASH;
+		else {
+			reiserfs_warning(s, "reiserfs-2506",
+					 "Unrecognised hash function");
+			hash = UNSET_HASH;
+		}
+	} while (0);
+
+	pathrelse(&path);
+	return hash;
+}
+
+// finds out which hash names are sorted with
+static int what_hash(struct super_block *s)
+{
+	__u32 code;
+
+	code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s));
+
+	/* reiserfs_hash_detect() == true if any of the hash mount options
+	 ** were used.  We must check them to make sure the user isn't
+	 ** using a bad hash value
+	 */
+	if (code == UNSET_HASH || reiserfs_hash_detect(s))
+		code = find_hash_out(s);
+
+	if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
+		/* detection has found the hash, and we must check against the
+		 ** mount options
+		 */
+		if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
+			reiserfs_warning(s, "reiserfs-2507",
+					 "Error, %s hash detected, "
+					 "unable to force rupasov hash",
+					 reiserfs_hashname(code));
+			code = UNSET_HASH;
+		} else if (reiserfs_tea_hash(s) && code != TEA_HASH) {
+			reiserfs_warning(s, "reiserfs-2508",
+					 "Error, %s hash detected, "
+					 "unable to force tea hash",
+					 reiserfs_hashname(code));
+			code = UNSET_HASH;
+		} else if (reiserfs_r5_hash(s) && code != R5_HASH) {
+			reiserfs_warning(s, "reiserfs-2509",
+					 "Error, %s hash detected, "
+					 "unable to force r5 hash",
+					 reiserfs_hashname(code));
+			code = UNSET_HASH;
+		}
+	} else {
+		/* find_hash_out was not called or could not determine the hash */
+		if (reiserfs_rupasov_hash(s)) {
+			code = YURA_HASH;
+		} else if (reiserfs_tea_hash(s)) {
+			code = TEA_HASH;
+		} else if (reiserfs_r5_hash(s)) {
+			code = R5_HASH;
+		}
+	}
+
+	/* if we are mounted RW, and we have a new valid hash code, update
+	 ** the super
+	 */
+	if (code != UNSET_HASH &&
+	    !(s->s_flags & MS_RDONLY) &&
+	    code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) {
+		set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code);
+	}
+	return code;
+}
+
+// return pointer to appropriate function
+static hashf_t hash_function(struct super_block *s)
+{
+	switch (what_hash(s)) {
+	case TEA_HASH:
+		reiserfs_info(s, "Using tea hash to sort names\n");
+		return keyed_hash;
+	case YURA_HASH:
+		reiserfs_info(s, "Using rupasov hash to sort names\n");
+		return yura_hash;
+	case R5_HASH:
+		reiserfs_info(s, "Using r5 hash to sort names\n");
+		return r5_hash;
+	}
+	return NULL;
+}
+
+// this is used to set up correct value for old partitions
+static int function2code(hashf_t func)
+{
+	if (func == keyed_hash)
+		return TEA_HASH;
+	if (func == yura_hash)
+		return YURA_HASH;
+	if (func == r5_hash)
+		return R5_HASH;
+
+	BUG();			// should never happen
+
+	return 0;
+}
+
+#define SWARN(silent, s, id, ...)			\
+	if (!(silent))				\
+		reiserfs_warning(s, id, __VA_ARGS__)
+
+static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
+{
+	struct inode *root_inode;
+	struct reiserfs_transaction_handle th;
+	int old_format = 0;
+	unsigned long blocks;
+	unsigned int commit_max_age = 0;
+	int jinit_done = 0;
+	struct reiserfs_iget_args args;
+	struct reiserfs_super_block *rs;
+	char *jdev_name;
+	struct reiserfs_sb_info *sbi;
+	int errval = -EINVAL;
+	char *qf_names[MAXQUOTAS] = {};
+	unsigned int qfmt = 0;
+
+	save_mount_options(s, data);
+
+	sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+	s->s_fs_info = sbi;
+	/* Set default values for options: non-aggressive tails, RO on errors */
+	REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
+	REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO);
+	REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH);
+	/* no preallocation minimum, be smart in
+	   reiserfs_file_write instead */
+	REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
+	/* Preallocate by 16 blocks (17-1) at once */
+	REISERFS_SB(s)->s_alloc_options.preallocsize = 17;
+	/* setup default block allocator options */
+	reiserfs_init_alloc_options(s);
+
+	mutex_init(&REISERFS_SB(s)->lock);
+	REISERFS_SB(s)->lock_depth = -1;
+
+	jdev_name = NULL;
+	if (reiserfs_parse_options
+	    (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
+	     &commit_max_age, qf_names, &qfmt) == 0) {
+		goto error_unlocked;
+	}
+	if (jdev_name && jdev_name[0]) {
+		REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL);
+		if (!REISERFS_SB(s)->s_jdev) {
+			SWARN(silent, s, "", "Cannot allocate memory for "
+				"journal device name");
+			goto error;
+		}
+	}
+#ifdef CONFIG_QUOTA
+	handle_quota_files(s, qf_names, &qfmt);
+#endif
+
+	if (blocks) {
+		SWARN(silent, s, "jmacd-7", "resize option for remount only");
+		goto error_unlocked;
+	}
+
+	/* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */
+	if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES))
+		old_format = 1;
+	/* try new format (64-th 1k block), which can contain reiserfs super block */
+	else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
+		SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
+		      reiserfs_bdevname(s));
+		goto error_unlocked;
+	}
+
+	rs = SB_DISK_SUPER_BLOCK(s);
+	/* Let's do basic sanity check to verify that underlying device is not
+	   smaller than the filesystem. If the check fails then abort and scream,
+	   because bad stuff will happen otherwise. */
+	if (s->s_bdev && s->s_bdev->bd_inode
+	    && i_size_read(s->s_bdev->bd_inode) <
+	    sb_block_count(rs) * sb_blocksize(rs)) {
+		SWARN(silent, s, "", "Filesystem cannot be "
+		      "mounted because it is bigger than the device");
+		SWARN(silent, s, "", "You may need to run fsck "
+		      "or increase size of your LVM partition");
+		SWARN(silent, s, "", "Or may be you forgot to "
+		      "reboot after fdisk when it told you to");
+		goto error_unlocked;
+	}
+
+	sbi->s_mount_state = SB_REISERFS_STATE(s);
+	sbi->s_mount_state = REISERFS_VALID_FS;
+
+	if ((errval = reiserfs_init_bitmap_cache(s))) {
+		SWARN(silent, s, "jmacd-8", "unable to read bitmap");
+		goto error_unlocked;
+	}
+
+	errval = -EINVAL;
+#ifdef CONFIG_REISERFS_CHECK
+	SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON");
+	SWARN(silent, s, "", "- it is slow mode for debugging.");
+#endif
+
+	/* make data=ordered the default */
+	if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) &&
+	    !reiserfs_data_writeback(s)) {
+		REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED);
+	}
+
+	if (reiserfs_data_log(s)) {
+		reiserfs_info(s, "using journaled data mode\n");
+	} else if (reiserfs_data_ordered(s)) {
+		reiserfs_info(s, "using ordered data mode\n");
+	} else {
+		reiserfs_info(s, "using writeback data mode\n");
+	}
+	if (reiserfs_barrier_flush(s)) {
+		printk("reiserfs: using flush barriers\n");
+	}
+
+	// set_device_ro(s->s_dev, 1) ;
+	if (journal_init(s, jdev_name, old_format, commit_max_age)) {
+		SWARN(silent, s, "sh-2022",
+		      "unable to initialize journal space");
+		goto error_unlocked;
+	} else {
+		jinit_done = 1;	/* once this is set, journal_release must be called
+				 ** if we error out of the mount
+				 */
+	}
+
+	if (reread_meta_blocks(s)) {
+		SWARN(silent, s, "jmacd-9",
+		      "unable to reread meta blocks after journal init");
+		goto error_unlocked;
+	}
+
+	if (replay_only(s))
+		goto error_unlocked;
+
+	if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) {
+		SWARN(silent, s, "clm-7000",
+		      "Detected readonly device, marking FS readonly");
+		s->s_flags |= MS_RDONLY;
+	}
+	args.objectid = REISERFS_ROOT_OBJECTID;
+	args.dirid = REISERFS_ROOT_PARENT_OBJECTID;
+	root_inode =
+	    iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor,
+			 reiserfs_init_locked_inode, (void *)(&args));
+	if (!root_inode) {
+		SWARN(silent, s, "jmacd-10", "get root inode failed");
+		goto error_unlocked;
+	}
+
+	/*
+	 * This path assumed to be called with the BKL in the old times.
+	 * Now we have inherited the big reiserfs lock from it and many
+	 * reiserfs helpers called in the mount path and elsewhere require
+	 * this lock to be held even if it's not always necessary. Let's be
+	 * conservative and hold it early. The window can be reduced after
+	 * careful review of the code.
+	 */
+	reiserfs_write_lock(s);
+
+	if (root_inode->i_state & I_NEW) {
+		reiserfs_read_locked_inode(root_inode, &args);
+		unlock_new_inode(root_inode);
+	}
+
+	s->s_root = d_make_root(root_inode);
+	if (!s->s_root)
+		goto error;
+	// define and initialize hash function
+	sbi->s_hash_function = hash_function(s);
+	if (sbi->s_hash_function == NULL) {
+		dput(s->s_root);
+		s->s_root = NULL;
+		goto error;
+	}
+
+	if (is_reiserfs_3_5(rs)
+	    || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1))
+		set_bit(REISERFS_3_5, &(sbi->s_properties));
+	else if (old_format)
+		set_bit(REISERFS_OLD_FORMAT, &(sbi->s_properties));
+	else
+		set_bit(REISERFS_3_6, &(sbi->s_properties));
+
+	if (!(s->s_flags & MS_RDONLY)) {
+
+		errval = journal_begin(&th, s, 1);
+		if (errval) {
+			dput(s->s_root);
+			s->s_root = NULL;
+			goto error;
+		}
+		reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
+
+		set_sb_umount_state(rs, REISERFS_ERROR_FS);
+		set_sb_fs_state(rs, 0);
+
+		/* Clear out s_bmap_nr if it would wrap. We can handle this
+		 * case, but older revisions can't. This will cause the
+		 * file system to fail mount on those older implementations,
+		 * avoiding corruption. -jeffm */
+		if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
+		    sb_bmap_nr(rs) != 0) {
+			reiserfs_warning(s, "super-2030", "This file system "
+					"claims to use %u bitmap blocks in "
+					"its super block, but requires %u. "
+					"Clearing to zero.", sb_bmap_nr(rs),
+					reiserfs_bmap_count(s));
+
+			set_sb_bmap_nr(rs, 0);
+		}
+
+		if (old_format_only(s)) {
+			/* filesystem of format 3.5 either with standard or non-standard
+			   journal */
+			if (convert_reiserfs(s)) {
+				/* and -o conv is given */
+				if (!silent)
+					reiserfs_info(s,
+						      "converting 3.5 filesystem to the 3.6 format");
+
+				if (is_reiserfs_3_5(rs))
+					/* put magic string of 3.6 format. 2.2 will not be able to
+					   mount this filesystem anymore */
+					memcpy(rs->s_v1.s_magic,
+					       reiserfs_3_6_magic_string,
+					       sizeof
+					       (reiserfs_3_6_magic_string));
+
+				set_sb_version(rs, REISERFS_VERSION_2);
+				reiserfs_convert_objectid_map_v1(s);
+				set_bit(REISERFS_3_6, &(sbi->s_properties));
+				clear_bit(REISERFS_3_5, &(sbi->s_properties));
+			} else if (!silent) {
+				reiserfs_info(s, "using 3.5.x disk format\n");
+			}
+		} else
+			set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
+
+
+		journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+		errval = journal_end(&th, s, 1);
+		if (errval) {
+			dput(s->s_root);
+			s->s_root = NULL;
+			goto error;
+		}
+
+		if ((errval = reiserfs_lookup_privroot(s)) ||
+		    (errval = reiserfs_xattr_init(s, s->s_flags))) {
+			dput(s->s_root);
+			s->s_root = NULL;
+			goto error;
+		}
+
+		/* look for files which were to be removed in previous session */
+		finish_unfinished(s);
+	} else {
+		if (old_format_only(s) && !silent) {
+			reiserfs_info(s, "using 3.5.x disk format\n");
+		}
+
+		if ((errval = reiserfs_lookup_privroot(s)) ||
+		    (errval = reiserfs_xattr_init(s, s->s_flags))) {
+			dput(s->s_root);
+			s->s_root = NULL;
+			goto error;
+		}
+	}
+	// mark hash in super block: it could be unset. overwrite should be ok
+	set_sb_hash_function_code(rs, function2code(sbi->s_hash_function));
+
+	handle_attrs(s);
+
+	reiserfs_proc_info_init(s);
+
+	init_waitqueue_head(&(sbi->s_wait));
+	spin_lock_init(&sbi->bitmap_lock);
+
+	reiserfs_write_unlock(s);
+
+	return (0);
+
+error:
+	reiserfs_write_unlock(s);
+
+error_unlocked:
+	/* kill the commit thread, free journal ram */
+	if (jinit_done) {
+		reiserfs_write_lock(s);
+		journal_release_error(NULL, s);
+		reiserfs_write_unlock(s);
+	}
+
+	reiserfs_free_bitmap_cache(s);
+	if (SB_BUFFER_WITH_SB(s))
+		brelse(SB_BUFFER_WITH_SB(s));
+#ifdef CONFIG_QUOTA
+	{
+		int j;
+		for (j = 0; j < MAXQUOTAS; j++)
+			kfree(qf_names[j]);
+	}
+#endif
+	kfree(sbi);
+
+	s->s_fs_info = NULL;
+	return errval;
+}
+
+static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(dentry->d_sb);
+
+	buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize));
+	buf->f_bfree = sb_free_blocks(rs);
+	buf->f_bavail = buf->f_bfree;
+	buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
+	/* changed to accommodate gcc folks. */
+	buf->f_type = REISERFS_SUPER_MAGIC;
+	buf->f_fsid.val[0] = (u32)crc32_le(0, rs->s_uuid, sizeof(rs->s_uuid)/2);
+	buf->f_fsid.val[1] = (u32)crc32_le(0, rs->s_uuid + sizeof(rs->s_uuid)/2,
+				sizeof(rs->s_uuid)/2);
+
+	return 0;
+}
+
+#ifdef CONFIG_QUOTA
+static int reiserfs_write_dquot(struct dquot *dquot)
+{
+	struct reiserfs_transaction_handle th;
+	int ret, err;
+
+	reiserfs_write_lock(dquot->dq_sb);
+	ret =
+	    journal_begin(&th, dquot->dq_sb,
+			  REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+	if (ret)
+		goto out;
+	reiserfs_write_unlock(dquot->dq_sb);
+	ret = dquot_commit(dquot);
+	reiserfs_write_lock(dquot->dq_sb);
+	err =
+	    journal_end(&th, dquot->dq_sb,
+			REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+	if (!ret && err)
+		ret = err;
+out:
+	reiserfs_write_unlock(dquot->dq_sb);
+	return ret;
+}
+
+static int reiserfs_acquire_dquot(struct dquot *dquot)
+{
+	struct reiserfs_transaction_handle th;
+	int ret, err;
+
+	reiserfs_write_lock(dquot->dq_sb);
+	ret =
+	    journal_begin(&th, dquot->dq_sb,
+			  REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+	if (ret)
+		goto out;
+	reiserfs_write_unlock(dquot->dq_sb);
+	ret = dquot_acquire(dquot);
+	reiserfs_write_lock(dquot->dq_sb);
+	err =
+	    journal_end(&th, dquot->dq_sb,
+			REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+	if (!ret && err)
+		ret = err;
+out:
+	reiserfs_write_unlock(dquot->dq_sb);
+	return ret;
+}
+
+static int reiserfs_release_dquot(struct dquot *dquot)
+{
+	struct reiserfs_transaction_handle th;
+	int ret, err;
+
+	reiserfs_write_lock(dquot->dq_sb);
+	ret =
+	    journal_begin(&th, dquot->dq_sb,
+			  REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+	reiserfs_write_unlock(dquot->dq_sb);
+	if (ret) {
+		/* Release dquot anyway to avoid endless cycle in dqput() */
+		dquot_release(dquot);
+		goto out;
+	}
+	ret = dquot_release(dquot);
+	reiserfs_write_lock(dquot->dq_sb);
+	err =
+	    journal_end(&th, dquot->dq_sb,
+			REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+	if (!ret && err)
+		ret = err;
+	reiserfs_write_unlock(dquot->dq_sb);
+out:
+	return ret;
+}
+
+static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
+{
+	/* Are we journaling quotas? */
+	if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
+	    REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
+		dquot_mark_dquot_dirty(dquot);
+		return reiserfs_write_dquot(dquot);
+	} else
+		return dquot_mark_dquot_dirty(dquot);
+}
+
+static int reiserfs_write_info(struct super_block *sb, int type)
+{
+	struct reiserfs_transaction_handle th;
+	int ret, err;
+
+	/* Data block + inode block */
+	reiserfs_write_lock(sb);
+	ret = journal_begin(&th, sb, 2);
+	if (ret)
+		goto out;
+	reiserfs_write_unlock(sb);
+	ret = dquot_commit_info(sb, type);
+	reiserfs_write_lock(sb);
+	err = journal_end(&th, sb, 2);
+	if (!ret && err)
+		ret = err;
+out:
+	reiserfs_write_unlock(sb);
+	return ret;
+}
+
+/*
+ * Turn on quotas during mount time - we need to find the quota file and such...
+ */
+static int reiserfs_quota_on_mount(struct super_block *sb, int type)
+{
+	return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
+					REISERFS_SB(sb)->s_jquota_fmt, type);
+}
+
+/*
+ * Standard function to be called on quota_on
+ */
+static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
+			     struct path *path)
+{
+	int err;
+	struct inode *inode;
+	struct reiserfs_transaction_handle th;
+	int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
+
+	reiserfs_write_lock(sb);
+	if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Quotafile not on the same filesystem? */
+	if (path->dentry->d_sb != sb) {
+		err = -EXDEV;
+		goto out;
+	}
+	inode = path->dentry->d_inode;
+	/* We must not pack tails for quota files on reiserfs for quota IO to work */
+	if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
+		err = reiserfs_unpack(inode, NULL);
+		if (err) {
+			reiserfs_warning(sb, "super-6520",
+				"Unpacking tail of quota file failed"
+				" (%d). Cannot turn on quotas.", err);
+			err = -EINVAL;
+			goto out;
+		}
+		mark_inode_dirty(inode);
+	}
+	/* Journaling quota? */
+	if (REISERFS_SB(sb)->s_qf_names[type]) {
+		/* Quotafile not of fs root? */
+		if (path->dentry->d_parent != sb->s_root)
+			reiserfs_warning(sb, "super-6521",
+				 "Quota file not on filesystem root. "
+				 "Journalled quota will not work.");
+	}
+
+	/*
+	 * When we journal data on quota file, we have to flush journal to see
+	 * all updates to the file when we bypass pagecache...
+	 */
+	if (reiserfs_file_data_log(inode)) {
+		/* Just start temporary transaction and finish it */
+		err = journal_begin(&th, sb, 1);
+		if (err)
+			goto out;
+		err = journal_end_sync(&th, sb, 1);
+		if (err)
+			goto out;
+	}
+	reiserfs_write_unlock(sb);
+	return dquot_quota_on(sb, type, format_id, path);
+out:
+	reiserfs_write_unlock(sb);
+	return err;
+}
+
+/* Read data from quotafile - avoid pagecache and such because we cannot afford
+ * acquiring the locks... As quota files are never truncated and quota code
+ * itself serializes the operations (and no one else should touch the files)
+ * we don't have to be afraid of races */
+static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
+				   size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	unsigned long blk = off >> sb->s_blocksize_bits;
+	int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
+	size_t toread;
+	struct buffer_head tmp_bh, *bh;
+	loff_t i_size = i_size_read(inode);
+
+	if (off > i_size)
+		return 0;
+	if (off + len > i_size)
+		len = i_size - off;
+	toread = len;
+	while (toread > 0) {
+		tocopy =
+		    sb->s_blocksize - offset <
+		    toread ? sb->s_blocksize - offset : toread;
+		tmp_bh.b_state = 0;
+		/* Quota files are without tails so we can safely use this function */
+		reiserfs_write_lock(sb);
+		err = reiserfs_get_block(inode, blk, &tmp_bh, 0);
+		reiserfs_write_unlock(sb);
+		if (err)
+			return err;
+		if (!buffer_mapped(&tmp_bh))	/* A hole? */
+			memset(data, 0, tocopy);
+		else {
+			bh = sb_bread(sb, tmp_bh.b_blocknr);
+			if (!bh)
+				return -EIO;
+			memcpy(data, bh->b_data + offset, tocopy);
+			brelse(bh);
+		}
+		offset = 0;
+		toread -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+	return len;
+}
+
+/* Write to quotafile (we know the transaction is already started and has
+ * enough credits) */
+static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
+				    const char *data, size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	unsigned long blk = off >> sb->s_blocksize_bits;
+	int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
+	int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL;
+	size_t towrite = len;
+	struct buffer_head tmp_bh, *bh;
+
+	if (!current->journal_info) {
+		printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)"
+			" cancelled because transaction is not started.\n",
+			(unsigned long long)off, (unsigned long long)len);
+		return -EIO;
+	}
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
+	while (towrite > 0) {
+		tocopy = sb->s_blocksize - offset < towrite ?
+		    sb->s_blocksize - offset : towrite;
+		tmp_bh.b_state = 0;
+		reiserfs_write_lock(sb);
+		err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
+		reiserfs_write_unlock(sb);
+		if (err)
+			goto out;
+		if (offset || tocopy != sb->s_blocksize)
+			bh = sb_bread(sb, tmp_bh.b_blocknr);
+		else
+			bh = sb_getblk(sb, tmp_bh.b_blocknr);
+		if (!bh) {
+			err = -EIO;
+			goto out;
+		}
+		lock_buffer(bh);
+		memcpy(bh->b_data + offset, data, tocopy);
+		flush_dcache_page(bh->b_page);
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+		reiserfs_write_lock(sb);
+		reiserfs_prepare_for_journal(sb, bh, 1);
+		journal_mark_dirty(current->journal_info, sb, bh);
+		if (!journal_quota)
+			reiserfs_add_ordered_list(inode, bh);
+		reiserfs_write_unlock(sb);
+		brelse(bh);
+		offset = 0;
+		towrite -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+out:
+	if (len == towrite) {
+		mutex_unlock(&inode->i_mutex);
+		return err;
+	}
+	if (inode->i_size < off + len - towrite)
+		i_size_write(inode, off + len - towrite);
+	inode->i_version++;
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	mark_inode_dirty(inode);
+	mutex_unlock(&inode->i_mutex);
+	return len - towrite;
+}
+
+#endif
+
+static struct dentry *get_super_block(struct file_system_type *fs_type,
+			   int flags, const char *dev_name,
+			   void *data)
+{
+	return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
+}
+
+static int __init init_reiserfs_fs(void)
+{
+	int ret;
+
+	if ((ret = init_inodecache())) {
+		return ret;
+	}
+
+	reiserfs_proc_info_global_init();
+
+	ret = register_filesystem(&reiserfs_fs_type);
+
+	if (ret == 0) {
+		return 0;
+	}
+
+	reiserfs_proc_info_global_done();
+	destroy_inodecache();
+
+	return ret;
+}
+
+static void __exit exit_reiserfs_fs(void)
+{
+	reiserfs_proc_info_global_done();
+	unregister_filesystem(&reiserfs_fs_type);
+	destroy_inodecache();
+}
+
+struct file_system_type reiserfs_fs_type = {
+	.owner = THIS_MODULE,
+	.name = "reiserfs",
+	.mount = get_super_block,
+	.kill_sb = reiserfs_kill_sb,
+	.fs_flags = FS_REQUIRES_DEV,
+};
+
+MODULE_DESCRIPTION("ReiserFS journaled filesystem");
+MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>");
+MODULE_LICENSE("GPL");
+
+module_init(init_reiserfs_fs);
+module_exit(exit_reiserfs_fs);

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/tail_conversion.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/tail_conversion.c
new file mode 100644
index 0000000..5e2624d
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/tail_conversion.c

@@ -0,0 +1,280 @@
+/*
+ * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details
+ */
+
+#include <linux/time.h>
+#include <linux/pagemap.h>
+#include <linux/buffer_head.h>
+#include "reiserfs.h"
+
+/* access to tail : when one is going to read tail it must make sure, that is not running.
+ direct2indirect and indirect2direct can not run concurrently */
+
+/* Converts direct items to an unformatted node. Panics if file has no
+   tail. -ENOSPC if no disk space for conversion */
+/* path points to first direct item of the file regarless of how many of
+   them are there */
+int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
+		    struct treepath *path, struct buffer_head *unbh,
+		    loff_t tail_offset)
+{
+	struct super_block *sb = inode->i_sb;
+	struct buffer_head *up_to_date_bh;
+	struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
+	unsigned long total_tail = 0;
+	struct cpu_key end_key;	/* Key to search for the last byte of the
+				   converted item. */
+	struct item_head ind_ih;	/* new indirect item to be inserted or
+					   key of unfm pointer to be pasted */
+	int blk_size, retval;	/* returned value for reiserfs_insert_item and clones */
+	unp_t unfm_ptr;		/* Handle on an unformatted node
+				   that will be inserted in the
+				   tree. */
+
+	BUG_ON(!th->t_trans_id);
+
+	REISERFS_SB(sb)->s_direct2indirect++;
+
+	blk_size = sb->s_blocksize;
+
+	/* and key to search for append or insert pointer to the new
+	   unformatted node. */
+	copy_item_head(&ind_ih, p_le_ih);
+	set_le_ih_k_offset(&ind_ih, tail_offset);
+	set_le_ih_k_type(&ind_ih, TYPE_INDIRECT);
+
+	/* Set the key to search for the place for new unfm pointer */
+	make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4);
+
+	/* FIXME: we could avoid this */
+	if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) {
+		reiserfs_error(sb, "PAP-14030",
+			       "pasted or inserted byte exists in "
+			       "the tree %K. Use fsck to repair.", &end_key);
+		pathrelse(path);
+		return -EIO;
+	}
+
+	p_le_ih = PATH_PITEM_HEAD(path);
+
+	unfm_ptr = cpu_to_le32(unbh->b_blocknr);
+
+	if (is_statdata_le_ih(p_le_ih)) {
+		/* Insert new indirect item. */
+		set_ih_free_space(&ind_ih, 0);	/* delete at nearest future */
+		put_ih_item_len(&ind_ih, UNFM_P_SIZE);
+		PATH_LAST_POSITION(path)++;
+		retval =
+		    reiserfs_insert_item(th, path, &end_key, &ind_ih, inode,
+					 (char *)&unfm_ptr);
+	} else {
+		/* Paste into last indirect item of an object. */
+		retval = reiserfs_paste_into_item(th, path, &end_key, inode,
+						    (char *)&unfm_ptr,
+						    UNFM_P_SIZE);
+	}
+	if (retval) {
+		return retval;
+	}
+	// note: from here there are two keys which have matching first
+	// three key components. They only differ by the fourth one.
+
+	/* Set the key to search for the direct items of the file */
+	make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT,
+		     4);
+
+	/* Move bytes from the direct items to the new unformatted node
+	   and delete them. */
+	while (1) {
+		int tail_size;
+
+		/* end_key.k_offset is set so, that we will always have found
+		   last item of the file */
+		if (search_for_position_by_key(sb, &end_key, path) ==
+		    POSITION_FOUND)
+			reiserfs_panic(sb, "PAP-14050",
+				       "direct item (%K) not found", &end_key);
+		p_le_ih = PATH_PITEM_HEAD(path);
+		RFALSE(!is_direct_le_ih(p_le_ih),
+		       "vs-14055: direct item expected(%K), found %h",
+		       &end_key, p_le_ih);
+		tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1))
+		    + ih_item_len(p_le_ih) - 1;
+
+		/* we only send the unbh pointer if the buffer is not up to date.
+		 ** this avoids overwriting good data from writepage() with old data
+		 ** from the disk or buffer cache
+		 ** Special case: unbh->b_page will be NULL if we are coming through
+		 ** DIRECT_IO handler here.
+		 */
+		if (!unbh->b_page || buffer_uptodate(unbh)
+		    || PageUptodate(unbh->b_page)) {
+			up_to_date_bh = NULL;
+		} else {
+			up_to_date_bh = unbh;
+		}
+		retval = reiserfs_delete_item(th, path, &end_key, inode,
+						up_to_date_bh);
+
+		total_tail += retval;
+		if (tail_size == retval)
+			// done: file does not have direct items anymore
+			break;
+
+	}
+	/* if we've copied bytes from disk into the page, we need to zero
+	 ** out the unused part of the block (it was not up to date before)
+	 */
+	if (up_to_date_bh) {
+		unsigned pgoff =
+		    (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
+		char *kaddr = kmap_atomic(up_to_date_bh->b_page);
+		memset(kaddr + pgoff, 0, blk_size - total_tail);
+		kunmap_atomic(kaddr);
+	}
+
+	REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
+
+	return 0;
+}
+
+/* stolen from fs/buffer.c */
+void reiserfs_unmap_buffer(struct buffer_head *bh)
+{
+	lock_buffer(bh);
+	if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
+		BUG();
+	}
+	clear_buffer_dirty(bh);
+	/* Remove the buffer from whatever list it belongs to. We are mostly
+	   interested in removing it from per-sb j_dirty_buffers list, to avoid
+	   BUG() on attempt to write not mapped buffer */
+	if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) {
+		struct inode *inode = bh->b_page->mapping->host;
+		struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
+		spin_lock(&j->j_dirty_buffers_lock);
+		list_del_init(&bh->b_assoc_buffers);
+		reiserfs_free_jh(bh);
+		spin_unlock(&j->j_dirty_buffers_lock);
+	}
+	clear_buffer_mapped(bh);
+	clear_buffer_req(bh);
+	clear_buffer_new(bh);
+	bh->b_bdev = NULL;
+	unlock_buffer(bh);
+}
+
+/* this first locks inode (neither reads nor sync are permitted),
+   reads tail through page cache, insert direct item. When direct item
+   inserted successfully inode is left locked. Return value is always
+   what we expect from it (number of cut bytes). But when tail remains
+   in the unformatted node, we set mode to SKIP_BALANCING and unlock
+   inode */
+int indirect2direct(struct reiserfs_transaction_handle *th,
+		    struct inode *inode, struct page *page,
+		    struct treepath *path,	/* path to the indirect item. */
+		    const struct cpu_key *item_key,	/* Key to look for
+							 * unformatted node
+							 * pointer to be cut. */
+		    loff_t n_new_file_size,	/* New file size. */
+		    char *mode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct item_head s_ih;
+	unsigned long block_size = sb->s_blocksize;
+	char *tail;
+	int tail_len, round_tail_len;
+	loff_t pos, pos1;	/* position of first byte of the tail */
+	struct cpu_key key;
+
+	BUG_ON(!th->t_trans_id);
+
+	REISERFS_SB(sb)->s_indirect2direct++;
+
+	*mode = M_SKIP_BALANCING;
+
+	/* store item head path points to. */
+	copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
+
+	tail_len = (n_new_file_size & (block_size - 1));
+	if (get_inode_sd_version(inode) == STAT_DATA_V2)
+		round_tail_len = ROUND_UP(tail_len);
+	else
+		round_tail_len = tail_len;
+
+	pos =
+	    le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE -
+					 1) * sb->s_blocksize;
+	pos1 = pos;
+
+	// we are protected by i_mutex. The tail can not disapper, not
+	// append can be done either
+	// we are in truncate or packing tail in file_release
+
+	tail = (char *)kmap(page);	/* this can schedule */
+
+	if (path_changed(&s_ih, path)) {
+		/* re-search indirect item */
+		if (search_for_position_by_key(sb, item_key, path)
+		    == POSITION_NOT_FOUND)
+			reiserfs_panic(sb, "PAP-5520",
+				       "item to be converted %K does not exist",
+				       item_key);
+		copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
+#ifdef CONFIG_REISERFS_CHECK
+		pos = le_ih_k_offset(&s_ih) - 1 +
+		    (ih_item_len(&s_ih) / UNFM_P_SIZE -
+		     1) * sb->s_blocksize;
+		if (pos != pos1)
+			reiserfs_panic(sb, "vs-5530", "tail position "
+				       "changed while we were reading it");
+#endif
+	}
+
+	/* Set direct item header to insert. */
+	make_le_item_head(&s_ih, NULL, get_inode_item_key_version(inode),
+			  pos1 + 1, TYPE_DIRECT, round_tail_len,
+			  0xffff /*ih_free_space */ );
+
+	/* we want a pointer to the first byte of the tail in the page.
+	 ** the page was locked and this part of the page was up to date when
+	 ** indirect2direct was called, so we know the bytes are still valid
+	 */
+	tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
+
+	PATH_LAST_POSITION(path)++;
+
+	key = *item_key;
+	set_cpu_key_k_type(&key, TYPE_DIRECT);
+	key.key_length = 4;
+	/* Insert tail as new direct item in the tree */
+	if (reiserfs_insert_item(th, path, &key, &s_ih, inode,
+				 tail ? tail : NULL) < 0) {
+		/* No disk memory. So we can not convert last unformatted node
+		   to the direct item.  In this case we used to adjust
+		   indirect items's ih_free_space. Now ih_free_space is not
+		   used, it would be ideal to write zeros to corresponding
+		   unformatted node. For now i_size is considered as guard for
+		   going out of file size */
+		kunmap(page);
+		return block_size - round_tail_len;
+	}
+	kunmap(page);
+
+	/* make sure to get the i_blocks changes from reiserfs_insert_item */
+	reiserfs_update_sd(th, inode);
+
+	// note: we have now the same as in above direct2indirect
+	// conversion: there are two keys which have matching first three
+	// key components. They only differ by the fouhth one.
+
+	/* We have inserted new direct item and must remove last
+	   unformatted node. */
+	*mode = M_CUT;
+
+	/* we store position of first direct item in the in-core inode */
+	/* mark_file_with_tail (inode, pos1 + 1); */
+	REISERFS_I(inode)->i_first_direct_byte = pos1 + 1;
+
+	return block_size - round_tail_len;
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr.c
new file mode 100644
index 0000000..048d990
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr.c

@@ -0,0 +1,1021 @@
+/*
+ * linux/fs/reiserfs/xattr.c
+ *
+ * Copyright (c) 2002 by Jeff Mahoney, <jeffm@suse.com>
+ *
+ */
+
+/*
+ * In order to implement EA/ACLs in a clean, backwards compatible manner,
+ * they are implemented as files in a "private" directory.
+ * Each EA is in it's own file, with the directory layout like so (/ is assumed
+ * to be relative to fs root). Inside the /.reiserfs_priv/xattrs directory,
+ * directories named using the capital-hex form of the objectid and
+ * generation number are used. Inside each directory are individual files
+ * named with the name of the extended attribute.
+ *
+ * So, for objectid 12648430, we could have:
+ * /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_access
+ * /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_default
+ * /.reiserfs_priv/xattrs/C0FFEE.0/user.Content-Type
+ * .. or similar.
+ *
+ * The file contents are the text of the EA. The size is known based on the
+ * stat data describing the file.
+ *
+ * In the case of system.posix_acl_access and system.posix_acl_default, since
+ * these are special cases for filesystem ACLs, they are interpreted by the
+ * kernel, in addition, they are negatively and positively cached and attached
+ * to the inode so that unnecessary lookups are avoided.
+ *
+ * Locking works like so:
+ * Directory components (xattr root, xattr dir) are protectd by their i_mutex.
+ * The xattrs themselves are protected by the xattr_sem.
+ */
+
+#include "reiserfs.h"
+#include <linux/capability.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/xattr.h>
+#include "xattr.h"
+#include "acl.h"
+#include <asm/uaccess.h>
+#include <net/checksum.h>
+#include <linux/stat.h>
+#include <linux/quotaops.h>
+#include <linux/security.h>
+
+#define PRIVROOT_NAME ".reiserfs_priv"
+#define XAROOT_NAME   "xattrs"
+
+
+/* Helpers for inode ops. We do this so that we don't have all the VFS
+ * overhead and also for proper i_mutex annotation.
+ * dir->i_mutex must be held for all of them. */
+#ifdef CONFIG_REISERFS_FS_XATTR
+static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
+{
+	BUG_ON(!mutex_is_locked(&dir->i_mutex));
+	return dir->i_op->create(dir, dentry, mode, NULL);
+}
+#endif
+
+static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	BUG_ON(!mutex_is_locked(&dir->i_mutex));
+	return dir->i_op->mkdir(dir, dentry, mode);
+}
+
+/* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr
+ * mutation ops aren't called during rename or splace, which are the
+ * only other users of I_MUTEX_CHILD. It violates the ordering, but that's
+ * better than allocating another subclass just for this code. */
+static int xattr_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+	BUG_ON(!mutex_is_locked(&dir->i_mutex));
+
+	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
+					I_MUTEX_CHILD, dir->i_sb);
+	error = dir->i_op->unlink(dir, dentry);
+	mutex_unlock(&dentry->d_inode->i_mutex);
+
+	if (!error)
+		d_delete(dentry);
+	return error;
+}
+
+static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+	BUG_ON(!mutex_is_locked(&dir->i_mutex));
+
+	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
+					I_MUTEX_CHILD, dir->i_sb);
+	error = dir->i_op->rmdir(dir, dentry);
+	if (!error)
+		dentry->d_inode->i_flags |= S_DEAD;
+	mutex_unlock(&dentry->d_inode->i_mutex);
+	if (!error)
+		d_delete(dentry);
+
+	return error;
+}
+
+#define xattr_may_create(flags)	(!flags || flags & XATTR_CREATE)
+
+static struct dentry *open_xa_root(struct super_block *sb, int flags)
+{
+	struct dentry *privroot = REISERFS_SB(sb)->priv_root;
+	struct dentry *xaroot;
+	if (!privroot->d_inode)
+		return ERR_PTR(-ENODATA);
+
+	mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
+
+	xaroot = dget(REISERFS_SB(sb)->xattr_root);
+	if (!xaroot)
+		xaroot = ERR_PTR(-ENODATA);
+	else if (!xaroot->d_inode) {
+		int err = -ENODATA;
+		if (xattr_may_create(flags))
+			err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
+		if (err) {
+			dput(xaroot);
+			xaroot = ERR_PTR(err);
+		}
+	}
+
+	mutex_unlock(&privroot->d_inode->i_mutex);
+	return xaroot;
+}
+
+static struct dentry *open_xa_dir(const struct inode *inode, int flags)
+{
+	struct dentry *xaroot, *xadir;
+	char namebuf[17];
+
+	xaroot = open_xa_root(inode->i_sb, flags);
+	if (IS_ERR(xaroot))
+		return xaroot;
+
+	snprintf(namebuf, sizeof(namebuf), "%X.%X",
+		 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
+		 inode->i_generation);
+
+	mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR);
+
+	xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
+	if (!IS_ERR(xadir) && !xadir->d_inode) {
+		int err = -ENODATA;
+		if (xattr_may_create(flags))
+			err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
+		if (err) {
+			dput(xadir);
+			xadir = ERR_PTR(err);
+		}
+	}
+
+	mutex_unlock(&xaroot->d_inode->i_mutex);
+	dput(xaroot);
+	return xadir;
+}
+
+/* The following are side effects of other operations that aren't explicitly
+ * modifying extended attributes. This includes operations such as permissions
+ * or ownership changes, object deletions, etc. */
+struct reiserfs_dentry_buf {
+	struct dentry *xadir;
+	int count;
+	struct dentry *dentries[8];
+};
+
+static int
+fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
+		    u64 ino, unsigned int d_type)
+{
+	struct reiserfs_dentry_buf *dbuf = buf;
+	struct dentry *dentry;
+	WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
+
+	if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
+		return -ENOSPC;
+
+	if (name[0] == '.' && (namelen < 2 ||
+			       (namelen == 2 && name[1] == '.')))
+		return 0;
+
+	dentry = lookup_one_len(name, dbuf->xadir, namelen);
+	if (IS_ERR(dentry)) {
+		return PTR_ERR(dentry);
+	} else if (!dentry->d_inode) {
+		/* A directory entry exists, but no file? */
+		reiserfs_error(dentry->d_sb, "xattr-20003",
+			       "Corrupted directory: xattr %s listed but "
+			       "not found for file %s.\n",
+			       dentry->d_name.name, dbuf->xadir->d_name.name);
+		dput(dentry);
+		return -EIO;
+	}
+
+	dbuf->dentries[dbuf->count++] = dentry;
+	return 0;
+}
+
+static void
+cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
+{
+	int i;
+	for (i = 0; i < buf->count; i++)
+		if (buf->dentries[i])
+			dput(buf->dentries[i]);
+}
+
+static int reiserfs_for_each_xattr(struct inode *inode,
+				   int (*action)(struct dentry *, void *),
+				   void *data)
+{
+	struct dentry *dir;
+	int i, err = 0;
+	loff_t pos = 0;
+	struct reiserfs_dentry_buf buf = {
+		.count = 0,
+	};
+
+	/* Skip out, an xattr has no xattrs associated with it */
+	if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
+		return 0;
+
+	reiserfs_write_unlock(inode->i_sb);
+	dir = open_xa_dir(inode, XATTR_REPLACE);
+	if (IS_ERR(dir)) {
+		err = PTR_ERR(dir);
+		reiserfs_write_lock(inode->i_sb);
+		goto out;
+	} else if (!dir->d_inode) {
+		err = 0;
+		reiserfs_write_lock(inode->i_sb);
+		goto out_dir;
+	}
+
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
+
+	reiserfs_write_lock(inode->i_sb);
+
+	buf.xadir = dir;
+	err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
+	while ((err == 0 || err == -ENOSPC) && buf.count) {
+		err = 0;
+
+		for (i = 0; i < buf.count && buf.dentries[i]; i++) {
+			int lerr = 0;
+			struct dentry *dentry = buf.dentries[i];
+
+			if (err == 0 && !S_ISDIR(dentry->d_inode->i_mode))
+				lerr = action(dentry, data);
+
+			dput(dentry);
+			buf.dentries[i] = NULL;
+			err = lerr ?: err;
+		}
+		buf.count = 0;
+		if (!err)
+			err = reiserfs_readdir_dentry(dir, &buf,
+						      fill_with_dentries, &pos);
+	}
+	mutex_unlock(&dir->d_inode->i_mutex);
+
+	/* Clean up after a failed readdir */
+	cleanup_dentry_buf(&buf);
+
+	if (!err) {
+		/* We start a transaction here to avoid a ABBA situation
+		 * between the xattr root's i_mutex and the journal lock.
+		 * This doesn't incur much additional overhead since the
+		 * new transaction will just nest inside the
+		 * outer transaction. */
+		int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
+			     4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
+		struct reiserfs_transaction_handle th;
+		err = journal_begin(&th, inode->i_sb, blocks);
+		if (!err) {
+			int jerror;
+			reiserfs_mutex_lock_nested_safe(
+					  &dir->d_parent->d_inode->i_mutex,
+					  I_MUTEX_XATTR, inode->i_sb);
+			err = action(dir, data);
+			jerror = journal_end(&th, inode->i_sb, blocks);
+			mutex_unlock(&dir->d_parent->d_inode->i_mutex);
+			err = jerror ?: err;
+		}
+	}
+out_dir:
+	dput(dir);
+out:
+	/* -ENODATA isn't an error */
+	if (err == -ENODATA)
+		err = 0;
+	return err;
+}
+
+static int delete_one_xattr(struct dentry *dentry, void *data)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+
+	/* This is the xattr dir, handle specially. */
+	if (S_ISDIR(dentry->d_inode->i_mode))
+		return xattr_rmdir(dir, dentry);
+
+	return xattr_unlink(dir, dentry);
+}
+
+static int chown_one_xattr(struct dentry *dentry, void *data)
+{
+	struct iattr *attrs = data;
+	return reiserfs_setattr(dentry, attrs);
+}
+
+/* No i_mutex, but the inode is unconnected. */
+int reiserfs_delete_xattrs(struct inode *inode)
+{
+	int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
+	if (err)
+		reiserfs_warning(inode->i_sb, "jdm-20004",
+				 "Couldn't delete all xattrs (%d)\n", err);
+	return err;
+}
+
+/* inode->i_mutex: down */
+int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
+{
+	int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
+	if (err)
+		reiserfs_warning(inode->i_sb, "jdm-20007",
+				 "Couldn't chown all xattrs (%d)\n", err);
+	return err;
+}
+
+#ifdef CONFIG_REISERFS_FS_XATTR
+/* Returns a dentry corresponding to a specific extended attribute file
+ * for the inode. If flags allow, the file is created. Otherwise, a
+ * valid or negative dentry, or an error is returned. */
+static struct dentry *xattr_lookup(struct inode *inode, const char *name,
+				    int flags)
+{
+	struct dentry *xadir, *xafile;
+	int err = 0;
+
+	xadir = open_xa_dir(inode, flags);
+	if (IS_ERR(xadir))
+		return ERR_CAST(xadir);
+
+	mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
+	xafile = lookup_one_len(name, xadir, strlen(name));
+	if (IS_ERR(xafile)) {
+		err = PTR_ERR(xafile);
+		goto out;
+	}
+
+	if (xafile->d_inode && (flags & XATTR_CREATE))
+		err = -EEXIST;
+
+	if (!xafile->d_inode) {
+		err = -ENODATA;
+		if (xattr_may_create(flags))
+			err = xattr_create(xadir->d_inode, xafile,
+					      0700|S_IFREG);
+	}
+
+	if (err)
+		dput(xafile);
+out:
+	mutex_unlock(&xadir->d_inode->i_mutex);
+	dput(xadir);
+	if (err)
+		return ERR_PTR(err);
+	return xafile;
+}
+
+/* Internal operations on file data */
+static inline void reiserfs_put_page(struct page *page)
+{
+	kunmap(page);
+	page_cache_release(page);
+}
+
+static struct page *reiserfs_get_page(struct inode *dir, size_t n)
+{
+	struct address_space *mapping = dir->i_mapping;
+	struct page *page;
+	/* We can deadlock if we try to free dentries,
+	   and an unlink/rmdir has just occurred - GFP_NOFS avoids this */
+	mapping_set_gfp_mask(mapping, GFP_NOFS);
+	page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
+	if (!IS_ERR(page)) {
+		kmap(page);
+		if (PageError(page))
+			goto fail;
+	}
+	return page;
+
+      fail:
+	reiserfs_put_page(page);
+	return ERR_PTR(-EIO);
+}
+
+static inline __u32 xattr_hash(const char *msg, int len)
+{
+	return csum_partial(msg, len, 0);
+}
+
+int reiserfs_commit_write(struct file *f, struct page *page,
+			  unsigned from, unsigned to);
+
+static void update_ctime(struct inode *inode)
+{
+	struct timespec now = current_fs_time(inode->i_sb);
+	if (inode_unhashed(inode) || !inode->i_nlink ||
+	    timespec_equal(&inode->i_ctime, &now))
+		return;
+
+	inode->i_ctime = CURRENT_TIME_SEC;
+	mark_inode_dirty(inode);
+}
+
+static int lookup_and_delete_xattr(struct inode *inode, const char *name)
+{
+	int err = 0;
+	struct dentry *dentry, *xadir;
+
+	xadir = open_xa_dir(inode, XATTR_REPLACE);
+	if (IS_ERR(xadir))
+		return PTR_ERR(xadir);
+
+	mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
+	dentry = lookup_one_len(name, xadir, strlen(name));
+	if (IS_ERR(dentry)) {
+		err = PTR_ERR(dentry);
+		goto out_dput;
+	}
+
+	if (dentry->d_inode) {
+		reiserfs_write_lock(inode->i_sb);
+		err = xattr_unlink(xadir->d_inode, dentry);
+		reiserfs_write_unlock(inode->i_sb);
+		update_ctime(inode);
+	}
+
+	dput(dentry);
+out_dput:
+	mutex_unlock(&xadir->d_inode->i_mutex);
+	dput(xadir);
+	return err;
+}
+
+
+/* Generic extended attribute operations that can be used by xa plugins */
+
+/*
+ * inode->i_mutex: down
+ */
+int
+reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
+			  struct inode *inode, const char *name,
+			  const void *buffer, size_t buffer_size, int flags)
+{
+	int err = 0;
+	struct dentry *dentry;
+	struct page *page;
+	char *data;
+	size_t file_pos = 0;
+	size_t buffer_pos = 0;
+	size_t new_size;
+	__u32 xahash = 0;
+
+	if (get_inode_sd_version(inode) == STAT_DATA_V1)
+		return -EOPNOTSUPP;
+
+	reiserfs_write_unlock(inode->i_sb);
+
+	if (!buffer) {
+		err = lookup_and_delete_xattr(inode, name);
+		reiserfs_write_lock(inode->i_sb);
+		return err;
+	}
+
+	dentry = xattr_lookup(inode, name, flags);
+	if (IS_ERR(dentry)) {
+		reiserfs_write_lock(inode->i_sb);
+		return PTR_ERR(dentry);
+	}
+
+	down_write(&REISERFS_I(inode)->i_xattr_sem);
+
+	reiserfs_write_lock(inode->i_sb);
+
+	xahash = xattr_hash(buffer, buffer_size);
+	while (buffer_pos < buffer_size || buffer_pos == 0) {
+		size_t chunk;
+		size_t skip = 0;
+		size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+		if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
+			chunk = PAGE_CACHE_SIZE;
+		else
+			chunk = buffer_size - buffer_pos;
+
+		page = reiserfs_get_page(dentry->d_inode, file_pos);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto out_unlock;
+		}
+
+		lock_page(page);
+		data = page_address(page);
+
+		if (file_pos == 0) {
+			struct reiserfs_xattr_header *rxh;
+			skip = file_pos = sizeof(struct reiserfs_xattr_header);
+			if (chunk + skip > PAGE_CACHE_SIZE)
+				chunk = PAGE_CACHE_SIZE - skip;
+			rxh = (struct reiserfs_xattr_header *)data;
+			rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
+			rxh->h_hash = cpu_to_le32(xahash);
+		}
+
+		err = __reiserfs_write_begin(page, page_offset, chunk + skip);
+		if (!err) {
+			if (buffer)
+				memcpy(data + skip, buffer + buffer_pos, chunk);
+			err = reiserfs_commit_write(NULL, page, page_offset,
+						    page_offset + chunk +
+						    skip);
+		}
+		unlock_page(page);
+		reiserfs_put_page(page);
+		buffer_pos += chunk;
+		file_pos += chunk;
+		skip = 0;
+		if (err || buffer_size == 0 || !buffer)
+			break;
+	}
+
+	new_size = buffer_size + sizeof(struct reiserfs_xattr_header);
+	if (!err && new_size < i_size_read(dentry->d_inode)) {
+		struct iattr newattrs = {
+			.ia_ctime = current_fs_time(inode->i_sb),
+			.ia_size = new_size,
+			.ia_valid = ATTR_SIZE | ATTR_CTIME,
+		};
+
+		reiserfs_write_unlock(inode->i_sb);
+		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
+		inode_dio_wait(dentry->d_inode);
+		reiserfs_write_lock(inode->i_sb);
+
+		err = reiserfs_setattr(dentry, &newattrs);
+		mutex_unlock(&dentry->d_inode->i_mutex);
+	} else
+		update_ctime(inode);
+out_unlock:
+	up_write(&REISERFS_I(inode)->i_xattr_sem);
+	dput(dentry);
+	return err;
+}
+
+/* We need to start a transaction to maintain lock ordering */
+int reiserfs_xattr_set(struct inode *inode, const char *name,
+		       const void *buffer, size_t buffer_size, int flags)
+{
+
+	struct reiserfs_transaction_handle th;
+	int error, error2;
+	size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size);
+
+	if (!(flags & XATTR_REPLACE))
+		jbegin_count += reiserfs_xattr_jcreate_nblocks(inode);
+
+	reiserfs_write_lock(inode->i_sb);
+	error = journal_begin(&th, inode->i_sb, jbegin_count);
+	if (error) {
+		reiserfs_write_unlock(inode->i_sb);
+		return error;
+	}
+
+	error = reiserfs_xattr_set_handle(&th, inode, name,
+					  buffer, buffer_size, flags);
+
+	error2 = journal_end(&th, inode->i_sb, jbegin_count);
+	if (error == 0)
+		error = error2;
+	reiserfs_write_unlock(inode->i_sb);
+
+	return error;
+}
+
+/*
+ * inode->i_mutex: down
+ */
+int
+reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
+		   size_t buffer_size)
+{
+	ssize_t err = 0;
+	struct dentry *dentry;
+	size_t isize;
+	size_t file_pos = 0;
+	size_t buffer_pos = 0;
+	struct page *page;
+	__u32 hash = 0;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	/* We can't have xattrs attached to v1 items since they don't have
+	 * generation numbers */
+	if (get_inode_sd_version(inode) == STAT_DATA_V1)
+		return -EOPNOTSUPP;
+
+	dentry = xattr_lookup(inode, name, XATTR_REPLACE);
+	if (IS_ERR(dentry)) {
+		err = PTR_ERR(dentry);
+		goto out;
+	}
+
+	down_read(&REISERFS_I(inode)->i_xattr_sem);
+
+	isize = i_size_read(dentry->d_inode);
+
+	/* Just return the size needed */
+	if (buffer == NULL) {
+		err = isize - sizeof(struct reiserfs_xattr_header);
+		goto out_unlock;
+	}
+
+	if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) {
+		err = -ERANGE;
+		goto out_unlock;
+	}
+
+	while (file_pos < isize) {
+		size_t chunk;
+		char *data;
+		size_t skip = 0;
+		if (isize - file_pos > PAGE_CACHE_SIZE)
+			chunk = PAGE_CACHE_SIZE;
+		else
+			chunk = isize - file_pos;
+
+		page = reiserfs_get_page(dentry->d_inode, file_pos);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto out_unlock;
+		}
+
+		lock_page(page);
+		data = page_address(page);
+		if (file_pos == 0) {
+			struct reiserfs_xattr_header *rxh =
+			    (struct reiserfs_xattr_header *)data;
+			skip = file_pos = sizeof(struct reiserfs_xattr_header);
+			chunk -= skip;
+			/* Magic doesn't match up.. */
+			if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) {
+				unlock_page(page);
+				reiserfs_put_page(page);
+				reiserfs_warning(inode->i_sb, "jdm-20001",
+						 "Invalid magic for xattr (%s) "
+						 "associated with %k", name,
+						 INODE_PKEY(inode));
+				err = -EIO;
+				goto out_unlock;
+			}
+			hash = le32_to_cpu(rxh->h_hash);
+		}
+		memcpy(buffer + buffer_pos, data + skip, chunk);
+		unlock_page(page);
+		reiserfs_put_page(page);
+		file_pos += chunk;
+		buffer_pos += chunk;
+		skip = 0;
+	}
+	err = isize - sizeof(struct reiserfs_xattr_header);
+
+	if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) !=
+	    hash) {
+		reiserfs_warning(inode->i_sb, "jdm-20002",
+				 "Invalid hash for xattr (%s) associated "
+				 "with %k", name, INODE_PKEY(inode));
+		err = -EIO;
+	}
+
+out_unlock:
+	up_read(&REISERFS_I(inode)->i_xattr_sem);
+	dput(dentry);
+
+out:
+	return err;
+}
+
+/*
+ * In order to implement different sets of xattr operations for each xattr
+ * prefix with the generic xattr API, a filesystem should create a
+ * null-terminated array of struct xattr_handler (one for each prefix) and
+ * hang a pointer to it off of the s_xattr field of the superblock.
+ *
+ * The generic_fooxattr() functions will use this list to dispatch xattr
+ * operations to the correct xattr_handler.
+ */
+#define for_each_xattr_handler(handlers, handler)		\
+		for ((handler) = *(handlers)++;			\
+			(handler) != NULL;			\
+			(handler) = *(handlers)++)
+
+/* This is the implementation for the xattr plugin infrastructure */
+static inline const struct xattr_handler *
+find_xattr_handler_prefix(const struct xattr_handler **handlers,
+			   const char *name)
+{
+	const struct xattr_handler *xah;
+
+	if (!handlers)
+		return NULL;
+
+	for_each_xattr_handler(handlers, xah) {
+		if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0)
+			break;
+	}
+
+	return xah;
+}
+
+
+/*
+ * Inode operation getxattr()
+ */
+ssize_t
+reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
+		  size_t size)
+{
+	const struct xattr_handler *handler;
+
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
+
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+		return -EOPNOTSUPP;
+
+	return handler->get(dentry, name, buffer, size, handler->flags);
+}
+
+/*
+ * Inode operation setxattr()
+ *
+ * dentry->d_inode->i_mutex down
+ */
+int
+reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+		  size_t size, int flags)
+{
+	const struct xattr_handler *handler;
+
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
+
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+		return -EOPNOTSUPP;
+
+	return handler->set(dentry, name, value, size, flags, handler->flags);
+}
+
+/*
+ * Inode operation removexattr()
+ *
+ * dentry->d_inode->i_mutex down
+ */
+int reiserfs_removexattr(struct dentry *dentry, const char *name)
+{
+	const struct xattr_handler *handler;
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
+
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+		return -EOPNOTSUPP;
+
+	return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
+}
+
+struct listxattr_buf {
+	size_t size;
+	size_t pos;
+	char *buf;
+	struct dentry *dentry;
+};
+
+static int listxattr_filler(void *buf, const char *name, int namelen,
+			    loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct listxattr_buf *b = (struct listxattr_buf *)buf;
+	size_t size;
+	if (name[0] != '.' ||
+	    (namelen != 1 && (name[1] != '.' || namelen != 2))) {
+		const struct xattr_handler *handler;
+		handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
+						    name);
+		if (!handler)	/* Unsupported xattr name */
+			return 0;
+		if (b->buf) {
+			size = handler->list(b->dentry, b->buf + b->pos,
+					 b->size, name, namelen,
+					 handler->flags);
+			if (size > b->size)
+				return -ERANGE;
+		} else {
+			size = handler->list(b->dentry, NULL, 0, name,
+					     namelen, handler->flags);
+		}
+
+		b->pos += size;
+	}
+	return 0;
+}
+
+/*
+ * Inode operation listxattr()
+ *
+ * We totally ignore the generic listxattr here because it would be stupid
+ * not to. Since the xattrs are organized in a directory, we can just
+ * readdir to find them.
+ */
+ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
+{
+	struct dentry *dir;
+	int err = 0;
+	loff_t pos = 0;
+	struct listxattr_buf buf = {
+		.dentry = dentry,
+		.buf = buffer,
+		.size = buffer ? size : 0,
+	};
+
+	if (!dentry->d_inode)
+		return -EINVAL;
+
+	if (!dentry->d_sb->s_xattr ||
+	    get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+		return -EOPNOTSUPP;
+
+	dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE);
+	if (IS_ERR(dir)) {
+		err = PTR_ERR(dir);
+		if (err == -ENODATA)
+			err = 0;  /* Not an error if there aren't any xattrs */
+		goto out;
+	}
+
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
+	err = reiserfs_readdir_dentry(dir, &buf, listxattr_filler, &pos);
+	mutex_unlock(&dir->d_inode->i_mutex);
+
+	if (!err)
+		err = buf.pos;
+
+	dput(dir);
+out:
+	return err;
+}
+
+static int create_privroot(struct dentry *dentry)
+{
+	int err;
+	struct inode *inode = dentry->d_parent->d_inode;
+	WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+
+	err = xattr_mkdir(inode, dentry, 0700);
+	if (err || !dentry->d_inode) {
+		reiserfs_warning(dentry->d_sb, "jdm-20006",
+				 "xattrs/ACLs enabled and couldn't "
+				 "find/create .reiserfs_priv. "
+				 "Failing mount.");
+		return -EOPNOTSUPP;
+	}
+
+	dentry->d_inode->i_flags |= S_PRIVATE;
+	reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr "
+		      "storage.\n", PRIVROOT_NAME);
+
+	return 0;
+}
+
+#else
+int __init reiserfs_xattr_register_handlers(void) { return 0; }
+void reiserfs_xattr_unregister_handlers(void) {}
+static int create_privroot(struct dentry *dentry) { return 0; }
+#endif
+
+/* Actual operations that are exported to VFS-land */
+const struct xattr_handler *reiserfs_xattr_handlers[] = {
+#ifdef CONFIG_REISERFS_FS_XATTR
+	&reiserfs_xattr_user_handler,
+	&reiserfs_xattr_trusted_handler,
+#endif
+#ifdef CONFIG_REISERFS_FS_SECURITY
+	&reiserfs_xattr_security_handler,
+#endif
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
+	&reiserfs_posix_acl_access_handler,
+	&reiserfs_posix_acl_default_handler,
+#endif
+	NULL
+};
+
+static int xattr_mount_check(struct super_block *s)
+{
+	/* We need generation numbers to ensure that the oid mapping is correct
+	 * v3.5 filesystems don't have them. */
+	if (old_format_only(s)) {
+		if (reiserfs_xattrs_optional(s)) {
+			/* Old format filesystem, but optional xattrs have
+			 * been enabled. Error out. */
+			reiserfs_warning(s, "jdm-2005",
+					 "xattrs/ACLs not supported "
+					 "on pre-v3.6 format filesystems. "
+					 "Failing mount.");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+int reiserfs_permission(struct inode *inode, int mask)
+{
+	/*
+	 * We don't do permission checks on the internal objects.
+	 * Permissions are determined by the "owning" object.
+	 */
+	if (IS_PRIVATE(inode))
+		return 0;
+
+	return generic_permission(inode, mask);
+}
+
+static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	return -EPERM;
+}
+
+static const struct dentry_operations xattr_lookup_poison_ops = {
+	.d_revalidate = xattr_hide_revalidate,
+};
+
+int reiserfs_lookup_privroot(struct super_block *s)
+{
+	struct dentry *dentry;
+	int err = 0;
+
+	/* If we don't have the privroot located yet - go find it */
+	reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
+	dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
+				strlen(PRIVROOT_NAME));
+	if (!IS_ERR(dentry)) {
+		REISERFS_SB(s)->priv_root = dentry;
+		d_set_d_op(dentry, &xattr_lookup_poison_ops);
+		if (dentry->d_inode)
+			dentry->d_inode->i_flags |= S_PRIVATE;
+	} else
+		err = PTR_ERR(dentry);
+	mutex_unlock(&s->s_root->d_inode->i_mutex);
+
+	return err;
+}
+
+/* We need to take a copy of the mount flags since things like
+ * MS_RDONLY don't get set until *after* we're called.
+ * mount_flags != mount_options */
+int reiserfs_xattr_init(struct super_block *s, int mount_flags)
+{
+	int err = 0;
+	struct dentry *privroot = REISERFS_SB(s)->priv_root;
+
+	err = xattr_mount_check(s);
+	if (err)
+		goto error;
+
+	if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
+		reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
+		err = create_privroot(REISERFS_SB(s)->priv_root);
+		mutex_unlock(&s->s_root->d_inode->i_mutex);
+	}
+
+	if (privroot->d_inode) {
+		s->s_xattr = reiserfs_xattr_handlers;
+		reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
+		if (!REISERFS_SB(s)->xattr_root) {
+			struct dentry *dentry;
+			dentry = lookup_one_len(XAROOT_NAME, privroot,
+						strlen(XAROOT_NAME));
+			if (!IS_ERR(dentry))
+				REISERFS_SB(s)->xattr_root = dentry;
+			else
+				err = PTR_ERR(dentry);
+		}
+		mutex_unlock(&privroot->d_inode->i_mutex);
+	}
+
+error:
+	if (err) {
+		clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
+		clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
+	}
+
+	/* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
+	if (reiserfs_posixacl(s))
+		s->s_flags |= MS_POSIXACL;
+	else
+		s->s_flags &= ~MS_POSIXACL;
+
+	return err;
+}

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr.h b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr.h
new file mode 100644
index 0000000..f59626c
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr.h

@@ -0,0 +1,122 @@
+#include <linux/reiserfs_xattr.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rwsem.h>
+
+struct inode;
+struct dentry;
+struct iattr;
+struct super_block;
+struct nameidata;
+
+int reiserfs_xattr_register_handlers(void) __init;
+void reiserfs_xattr_unregister_handlers(void);
+int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
+int reiserfs_lookup_privroot(struct super_block *sb);
+int reiserfs_delete_xattrs(struct inode *inode);
+int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
+int reiserfs_permission(struct inode *inode, int mask);
+
+#ifdef CONFIG_REISERFS_FS_XATTR
+#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
+ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name,
+			  void *buffer, size_t size);
+int reiserfs_setxattr(struct dentry *dentry, const char *name,
+		      const void *value, size_t size, int flags);
+ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+int reiserfs_removexattr(struct dentry *dentry, const char *name);
+
+int reiserfs_xattr_get(struct inode *, const char *, void *, size_t);
+int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int);
+int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *,
+			      struct inode *, const char *, const void *,
+			      size_t, int);
+
+extern const struct xattr_handler reiserfs_xattr_user_handler;
+extern const struct xattr_handler reiserfs_xattr_trusted_handler;
+extern const struct xattr_handler reiserfs_xattr_security_handler;
+#ifdef CONFIG_REISERFS_FS_SECURITY
+int reiserfs_security_init(struct inode *dir, struct inode *inode,
+			   const struct qstr *qstr,
+			   struct reiserfs_security_handle *sec);
+int reiserfs_security_write(struct reiserfs_transaction_handle *th,
+			    struct inode *inode,
+			    struct reiserfs_security_handle *sec);
+void reiserfs_security_free(struct reiserfs_security_handle *sec);
+#endif
+
+static inline int reiserfs_xattrs_initialized(struct super_block *sb)
+{
+	return REISERFS_SB(sb)->priv_root != NULL;
+}
+
+#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
+static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
+{
+	loff_t ret = 0;
+	if (reiserfs_file_data_log(inode)) {
+		ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize);
+		ret >>= inode->i_sb->s_blocksize_bits;
+	}
+	return ret;
+}
+
+/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
+ * Let's try to be smart about it.
+ * xattr root: We cache it. If it's not cached, we may need to create it.
+ * xattr dir: If anything has been loaded for this inode, we can set a flag
+ *            saying so.
+ * xattr file: Since we don't cache xattrs, we can't tell. We always include
+ *             blocks for it.
+ *
+ * However, since root and dir can be created between calls - YOU MUST SAVE
+ * THIS VALUE.
+ */
+static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode)
+{
+	size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+
+	if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) {
+		nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+		if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode)
+			nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+	}
+
+	return nblocks;
+}
+
+static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
+{
+	init_rwsem(&REISERFS_I(inode)->i_xattr_sem);
+}
+
+#else
+
+#define reiserfs_getxattr NULL
+#define reiserfs_setxattr NULL
+#define reiserfs_listxattr NULL
+#define reiserfs_removexattr NULL
+
+static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
+{
+}
+#endif  /*  CONFIG_REISERFS_FS_XATTR  */
+
+#ifndef CONFIG_REISERFS_FS_SECURITY
+static inline int reiserfs_security_init(struct inode *dir,
+					 struct inode *inode,
+					 const struct qstr *qstr,
+					 struct reiserfs_security_handle *sec)
+{
+	return 0;
+}
+static inline int
+reiserfs_security_write(struct reiserfs_transaction_handle *th,
+			struct inode *inode,
+			struct reiserfs_security_handle *sec)
+{
+	return 0;
+}
+static inline void reiserfs_security_free(struct reiserfs_security_handle *sec)
+{}
+#endif

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_acl.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_acl.c
new file mode 100644
index 0000000..44474f9
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_acl.c

@@ -0,0 +1,504 @@
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/posix_acl.h>
+#include "reiserfs.h"
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/xattr.h>
+#include <linux/slab.h>
+#include <linux/posix_acl_xattr.h>
+#include "xattr.h"
+#include "acl.h"
+#include <asm/uaccess.h>
+
+static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
+			    struct inode *inode, int type,
+			    struct posix_acl *acl);
+
+static int
+posix_acl_set(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags, int type)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int error, error2;
+	struct reiserfs_transaction_handle th;
+	size_t jcreate_blocks;
+	if (!reiserfs_posixacl(inode->i_sb))
+		return -EOPNOTSUPP;
+	if (!inode_owner_or_capable(inode))
+		return -EPERM;
+
+	if (value) {
+		acl = posix_acl_from_xattr(value, size);
+		if (IS_ERR(acl)) {
+			return PTR_ERR(acl);
+		} else if (acl) {
+			error = posix_acl_valid(acl);
+			if (error)
+				goto release_and_out;
+		}
+	} else
+		acl = NULL;
+
+	/* Pessimism: We can't assume that anything from the xattr root up
+	 * has been created. */
+
+	jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) +
+			 reiserfs_xattr_nblocks(inode, size) * 2;
+
+	reiserfs_write_lock(inode->i_sb);
+	error = journal_begin(&th, inode->i_sb, jcreate_blocks);
+	if (error == 0) {
+		error = reiserfs_set_acl(&th, inode, type, acl);
+		error2 = journal_end(&th, inode->i_sb, jcreate_blocks);
+		if (error2)
+			error = error2;
+	}
+	reiserfs_write_unlock(inode->i_sb);
+
+      release_and_out:
+	posix_acl_release(acl);
+	return error;
+}
+
+static int
+posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
+{
+	struct posix_acl *acl;
+	int error;
+
+	if (!reiserfs_posixacl(dentry->d_sb))
+		return -EOPNOTSUPP;
+
+	acl = reiserfs_get_acl(dentry->d_inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+	error = posix_acl_to_xattr(acl, buffer, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+/*
+ * Convert from filesystem to in-memory representation.
+ */
+static struct posix_acl *posix_acl_from_disk(const void *value, size_t size)
+{
+	const char *end = (char *)value + size;
+	int n, count;
+	struct posix_acl *acl;
+
+	if (!value)
+		return NULL;
+	if (size < sizeof(reiserfs_acl_header))
+		return ERR_PTR(-EINVAL);
+	if (((reiserfs_acl_header *) value)->a_version !=
+	    cpu_to_le32(REISERFS_ACL_VERSION))
+		return ERR_PTR(-EINVAL);
+	value = (char *)value + sizeof(reiserfs_acl_header);
+	count = reiserfs_acl_count(size);
+	if (count < 0)
+		return ERR_PTR(-EINVAL);
+	if (count == 0)
+		return NULL;
+	acl = posix_acl_alloc(count, GFP_NOFS);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+	for (n = 0; n < count; n++) {
+		reiserfs_acl_entry *entry = (reiserfs_acl_entry *) value;
+		if ((char *)value + sizeof(reiserfs_acl_entry_short) > end)
+			goto fail;
+		acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
+		acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
+		switch (acl->a_entries[n].e_tag) {
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			value = (char *)value +
+			    sizeof(reiserfs_acl_entry_short);
+			acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+			break;
+
+		case ACL_USER:
+		case ACL_GROUP:
+			value = (char *)value + sizeof(reiserfs_acl_entry);
+			if ((char *)value > end)
+				goto fail;
+			acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
+			break;
+
+		default:
+			goto fail;
+		}
+	}
+	if (value != end)
+		goto fail;
+	return acl;
+
+      fail:
+	posix_acl_release(acl);
+	return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Convert from in-memory to filesystem representation.
+ */
+static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
+{
+	reiserfs_acl_header *ext_acl;
+	char *e;
+	int n;
+
+	*size = reiserfs_acl_size(acl->a_count);
+	ext_acl = kmalloc(sizeof(reiserfs_acl_header) +
+						  acl->a_count *
+						  sizeof(reiserfs_acl_entry),
+						  GFP_NOFS);
+	if (!ext_acl)
+		return ERR_PTR(-ENOMEM);
+	ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION);
+	e = (char *)ext_acl + sizeof(reiserfs_acl_header);
+	for (n = 0; n < acl->a_count; n++) {
+		reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e;
+		entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
+		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
+		switch (acl->a_entries[n].e_tag) {
+		case ACL_USER:
+		case ACL_GROUP:
+			entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+			e += sizeof(reiserfs_acl_entry);
+			break;
+
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			e += sizeof(reiserfs_acl_entry_short);
+			break;
+
+		default:
+			goto fail;
+		}
+	}
+	return (char *)ext_acl;
+
+      fail:
+	kfree(ext_acl);
+	return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Inode operation get_posix_acl().
+ *
+ * inode->i_mutex: down
+ * BKL held [before 2.5.x]
+ */
+struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
+{
+	char *name, *value;
+	struct posix_acl *acl;
+	int size;
+	int retval;
+
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		BUG();
+	}
+
+	size = reiserfs_xattr_get(inode, name, NULL, 0);
+	if (size < 0) {
+		if (size == -ENODATA || size == -ENOSYS) {
+			set_cached_acl(inode, type, NULL);
+			return NULL;
+		}
+		return ERR_PTR(size);
+	}
+
+	value = kmalloc(size, GFP_NOFS);
+	if (!value)
+		return ERR_PTR(-ENOMEM);
+
+	retval = reiserfs_xattr_get(inode, name, value, size);
+	if (retval == -ENODATA || retval == -ENOSYS) {
+		/* This shouldn't actually happen as it should have
+		   been caught above.. but just in case */
+		acl = NULL;
+	} else if (retval < 0) {
+		acl = ERR_PTR(retval);
+	} else {
+		acl = posix_acl_from_disk(value, retval);
+	}
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
+
+	kfree(value);
+	return acl;
+}
+
+/*
+ * Inode operation set_posix_acl().
+ *
+ * inode->i_mutex: down
+ * BKL held [before 2.5.x]
+ */
+static int
+reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
+		 int type, struct posix_acl *acl)
+{
+	char *name;
+	void *value = NULL;
+	size_t size = 0;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = POSIX_ACL_XATTR_ACCESS;
+		if (acl) {
+			error = posix_acl_equiv_mode(acl, &inode->i_mode);
+			if (error < 0)
+				return error;
+			else {
+				if (error == 0)
+					acl = NULL;
+			}
+		}
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = POSIX_ACL_XATTR_DEFAULT;
+		if (!S_ISDIR(inode->i_mode))
+			return acl ? -EACCES : 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (acl) {
+		value = posix_acl_to_disk(acl, &size);
+		if (IS_ERR(value))
+			return (int)PTR_ERR(value);
+	}
+
+	error = reiserfs_xattr_set_handle(th, inode, name, value, size, 0);
+
+	/*
+	 * Ensure that the inode gets dirtied if we're only using
+	 * the mode bits and an old ACL didn't exist. We don't need
+	 * to check if the inode is hashed here since we won't get
+	 * called by reiserfs_inherit_default_acl().
+	 */
+	if (error == -ENODATA) {
+		error = 0;
+		if (type == ACL_TYPE_ACCESS) {
+			inode->i_ctime = CURRENT_TIME_SEC;
+			mark_inode_dirty(inode);
+		}
+	}
+
+	kfree(value);
+
+	if (!error)
+		set_cached_acl(inode, type, acl);
+
+	return error;
+}
+
+/* dir->i_mutex: locked,
+ * inode is new and not released into the wild yet */
+int
+reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
+			     struct inode *dir, struct dentry *dentry,
+			     struct inode *inode)
+{
+	struct posix_acl *acl;
+	int err = 0;
+
+	/* ACLs only get applied to files and directories */
+	if (S_ISLNK(inode->i_mode))
+		return 0;
+
+	/* ACLs can only be used on "new" objects, so if it's an old object
+	 * there is nothing to inherit from */
+	if (get_inode_sd_version(dir) == STAT_DATA_V1)
+		goto apply_umask;
+
+	/* Don't apply ACLs to objects in the .reiserfs_priv tree.. This
+	 * would be useless since permissions are ignored, and a pain because
+	 * it introduces locking cycles */
+	if (IS_PRIVATE(dir)) {
+		inode->i_flags |= S_PRIVATE;
+		goto apply_umask;
+	}
+
+	acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+
+	if (acl) {
+		/* Copy the default ACL to the default ACL of a new directory */
+		if (S_ISDIR(inode->i_mode)) {
+			err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT,
+					       acl);
+			if (err)
+				goto cleanup;
+		}
+
+		/* Now we reconcile the new ACL and the mode,
+		   potentially modifying both */
+		err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
+		if (err < 0)
+			return err;
+
+		/* If we need an ACL.. */
+		if (err > 0)
+			err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl);
+	      cleanup:
+		posix_acl_release(acl);
+	} else {
+	      apply_umask:
+		/* no ACL, apply umask */
+		inode->i_mode &= ~current_umask();
+	}
+
+	return err;
+}
+
+/* This is used to cache the default acl before a new object is created.
+ * The biggest reason for this is to get an idea of how many blocks will
+ * actually be required for the create operation if we must inherit an ACL.
+ * An ACL write can add up to 3 object creations and an additional file write
+ * so we'd prefer not to reserve that many blocks in the journal if we can.
+ * It also has the advantage of not loading the ACL with a transaction open,
+ * this may seem silly, but if the owner of the directory is doing the
+ * creation, the ACL may not be loaded since the permissions wouldn't require
+ * it.
+ * We return the number of blocks required for the transaction.
+ */
+int reiserfs_cache_default_acl(struct inode *inode)
+{
+	struct posix_acl *acl;
+	int nblocks = 0;
+
+	if (IS_PRIVATE(inode))
+		return 0;
+
+	acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
+
+	if (acl && !IS_ERR(acl)) {
+		int size = reiserfs_acl_size(acl->a_count);
+
+		/* Other xattrs can be created during inode creation. We don't
+		 * want to claim too many blocks, so we check to see if we
+		 * we need to create the tree to the xattrs, and then we
+		 * just want two files. */
+		nblocks = reiserfs_xattr_jcreate_nblocks(inode);
+		nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+
+		REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
+
+		/* We need to account for writes + bitmaps for two files */
+		nblocks += reiserfs_xattr_nblocks(inode, size) * 4;
+		posix_acl_release(acl);
+	}
+
+	return nblocks;
+}
+
+int reiserfs_acl_chmod(struct inode *inode)
+{
+	struct reiserfs_transaction_handle th;
+	struct posix_acl *acl;
+	size_t size;
+	int depth;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	if (get_inode_sd_version(inode) == STAT_DATA_V1 ||
+	    !reiserfs_posixacl(inode->i_sb)) {
+		return 0;
+	}
+
+	reiserfs_write_unlock(inode->i_sb);
+	acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
+	reiserfs_write_lock(inode->i_sb);
+	if (!acl)
+		return 0;
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	error = posix_acl_chmod(&acl, GFP_NOFS, inode->i_mode);
+	if (error)
+		return error;
+
+	size = reiserfs_xattr_nblocks(inode, reiserfs_acl_size(acl->a_count));
+	depth = reiserfs_write_lock_once(inode->i_sb);
+	error = journal_begin(&th, inode->i_sb, size * 2);
+	if (!error) {
+		int error2;
+		error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, acl);
+		error2 = journal_end(&th, inode->i_sb, size * 2);
+		if (error2)
+			error = error2;
+	}
+	reiserfs_write_unlock_once(inode->i_sb, depth);
+	posix_acl_release(acl);
+	return error;
+}
+
+static size_t posix_acl_access_list(struct dentry *dentry, char *list,
+				    size_t list_size, const char *name,
+				    size_t name_len, int type)
+{
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
+	if (!reiserfs_posixacl(dentry->d_sb))
+		return 0;
+	if (list && size <= list_size)
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
+	return size;
+}
+
+const struct xattr_handler reiserfs_posix_acl_access_handler = {
+	.prefix = POSIX_ACL_XATTR_ACCESS,
+	.flags = ACL_TYPE_ACCESS,
+	.get = posix_acl_get,
+	.set = posix_acl_set,
+	.list = posix_acl_access_list,
+};
+
+static size_t posix_acl_default_list(struct dentry *dentry, char *list,
+				     size_t list_size, const char *name,
+				     size_t name_len, int type)
+{
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
+	if (!reiserfs_posixacl(dentry->d_sb))
+		return 0;
+	if (list && size <= list_size)
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
+	return size;
+}
+
+const struct xattr_handler reiserfs_posix_acl_default_handler = {
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
+	.flags = ACL_TYPE_DEFAULT,
+	.get = posix_acl_get,
+	.set = posix_acl_set,
+	.list = posix_acl_default_list,
+};

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_security.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_security.c
new file mode 100644
index 0000000..800a3ce
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_security.c

@@ -0,0 +1,120 @@
+#include "reiserfs.h"
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/xattr.h>
+#include <linux/slab.h>
+#include "xattr.h"
+#include <linux/security.h>
+#include <asm/uaccess.h>
+
+static int
+security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+		int handler_flags)
+{
+	if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
+		return -EINVAL;
+
+	if (IS_PRIVATE(dentry->d_inode))
+		return -EPERM;
+
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+}
+
+static int
+security_set(struct dentry *dentry, const char *name, const void *buffer,
+	     size_t size, int flags, int handler_flags)
+{
+	if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
+		return -EINVAL;
+
+	if (IS_PRIVATE(dentry->d_inode))
+		return -EPERM;
+
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+}
+
+static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t namelen, int handler_flags)
+{
+	const size_t len = namelen + 1;
+
+	if (IS_PRIVATE(dentry->d_inode))
+		return 0;
+
+	if (list && len <= list_len) {
+		memcpy(list, name, namelen);
+		list[namelen] = '\0';
+	}
+
+	return len;
+}
+
+/* Initializes the security context for a new inode and returns the number
+ * of blocks needed for the transaction. If successful, reiserfs_security
+ * must be released using reiserfs_security_free when the caller is done. */
+int reiserfs_security_init(struct inode *dir, struct inode *inode,
+			   const struct qstr *qstr,
+			   struct reiserfs_security_handle *sec)
+{
+	int blocks = 0;
+	int error;
+
+	sec->name = NULL;
+
+	/* Don't add selinux attributes on xattrs - they'll never get used */
+	if (IS_PRIVATE(dir))
+		return 0;
+
+	error = security_old_inode_init_security(inode, dir, qstr, &sec->name,
+						 &sec->value, &sec->length);
+	if (error) {
+		if (error == -EOPNOTSUPP)
+			error = 0;
+
+		sec->name = NULL;
+		sec->value = NULL;
+		sec->length = 0;
+		return error;
+	}
+
+	if (sec->length && reiserfs_xattrs_initialized(inode->i_sb)) {
+		blocks = reiserfs_xattr_jcreate_nblocks(inode) +
+			 reiserfs_xattr_nblocks(inode, sec->length);
+		/* We don't want to count the directories twice if we have
+		 * a default ACL. */
+		REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
+	}
+	return blocks;
+}
+
+int reiserfs_security_write(struct reiserfs_transaction_handle *th,
+			    struct inode *inode,
+			    struct reiserfs_security_handle *sec)
+{
+	int error;
+	if (strlen(sec->name) < sizeof(XATTR_SECURITY_PREFIX))
+		return -EINVAL;
+
+	error = reiserfs_xattr_set_handle(th, inode, sec->name, sec->value,
+					  sec->length, XATTR_CREATE);
+	if (error == -ENODATA || error == -EOPNOTSUPP)
+		error = 0;
+
+	return error;
+}
+
+void reiserfs_security_free(struct reiserfs_security_handle *sec)
+{
+	kfree(sec->name);
+	kfree(sec->value);
+	sec->name = NULL;
+	sec->value = NULL;
+}
+
+const struct xattr_handler reiserfs_xattr_security_handler = {
+	.prefix = XATTR_SECURITY_PREFIX,
+	.get = security_get,
+	.set = security_set,
+	.list = security_list,
+};

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_trusted.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_trusted.c
new file mode 100644
index 0000000..a003571
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_trusted.c

@@ -0,0 +1,56 @@
+#include "reiserfs.h"
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/xattr.h>
+#include "xattr.h"
+#include <asm/uaccess.h>
+
+static int
+trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+	    int handler_flags)
+{
+	if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+		return -EPERM;
+
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+}
+
+static int
+trusted_set(struct dentry *dentry, const char *name, const void *buffer,
+	    size_t size, int flags, int handler_flags)
+{
+	if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+		return -EPERM;
+
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+}
+
+static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
+			   const char *name, size_t name_len, int handler_flags)
+{
+	const size_t len = name_len + 1;
+
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+		return 0;
+
+	if (list && len <= list_size) {
+		memcpy(list, name, name_len);
+		list[name_len] = '\0';
+	}
+	return len;
+}
+
+const struct xattr_handler reiserfs_xattr_trusted_handler = {
+	.prefix = XATTR_TRUSTED_PREFIX,
+	.get = trusted_get,
+	.set = trusted_set,
+	.list = trusted_list,
+};

diff --git a/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_user.c b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_user.c
new file mode 100644
index 0000000..8667491
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/fs/reiserfs/xattr_user.c

@@ -0,0 +1,52 @@
+#include "reiserfs.h"
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/xattr.h>
+#include "xattr.h"
+#include <asm/uaccess.h>
+
+static int
+user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+	 int handler_flags)
+{
+
+	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
+		return -EINVAL;
+	if (!reiserfs_xattrs_user(dentry->d_sb))
+		return -EOPNOTSUPP;
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+}
+
+static int
+user_set(struct dentry *dentry, const char *name, const void *buffer,
+	 size_t size, int flags, int handler_flags)
+{
+	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
+		return -EINVAL;
+
+	if (!reiserfs_xattrs_user(dentry->d_sb))
+		return -EOPNOTSUPP;
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+}
+
+static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
+			const char *name, size_t name_len, int handler_flags)
+{
+	const size_t len = name_len + 1;
+
+	if (!reiserfs_xattrs_user(dentry->d_sb))
+		return 0;
+	if (list && len <= list_size) {
+		memcpy(list, name, name_len);
+		list[name_len] = '\0';
+	}
+	return len;
+}
+
+const struct xattr_handler reiserfs_xattr_user_handler = {
+	.prefix = XATTR_USER_PREFIX,
+	.get = user_get,
+	.set = user_set,
+	.list = user_list,
+};
commit	9ed821d7e5d875a3395740a9cc2545671fa429b7	[log] [tgz]
author	lh <lh@exm.com>	Fri Apr 07 01:36:19 2023 -0700
committer	lh <lh@exm.com>	Fri Apr 07 01:36:19 2023 -0700
tree	121b5ff9a43e30066e3b33d57065b04bcf9bbabf
parent	a10a76fcf09e2ec7e9055902f242dff467ab9654 [diff]