Blame - src/kernel/linux/v4.19/fs/reiserfs/journal.c - T800

blob: 8a76f9d14bc661c5a6760d7815ac1e97352f8e0c [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Write ahead logging implementation copyright Chris Mason 2000
				4	*
				5	* The background commits make this code very interrelated, and
				6	* overly complex. I need to rethink things a bit....The major players:
				7	*
				8	* journal_begin -- call with the number of blocks you expect to log.
				9	* If the current transaction is too
				10	* old, it will block until the current transaction is
				11	* finished, and then start a new one.
				12	* Usually, your transaction will get joined in with
				13	* previous ones for speed.
				14	*
				15	* journal_join -- same as journal_begin, but won't block on the current
				16	* transaction regardless of age. Don't ever call
				17	* this. Ever. There are only two places it should be
				18	* called from, and they are both inside this file.
				19	*
				20	* journal_mark_dirty -- adds blocks into this transaction. clears any flags
				21	* that might make them get sent to disk
				22	* and then marks them BH_JDirty. Puts the buffer head
				23	* into the current transaction hash.
				24	*
				25	* journal_end -- if the current transaction is batchable, it does nothing
				26	* otherwise, it could do an async/synchronous commit, or
				27	* a full flush of all log and real blocks in the
				28	* transaction.
				29	*
				30	* flush_old_commits -- if the current transaction is too old, it is ended and
				31	* commit blocks are sent to disk. Forces commit blocks
				32	* to disk for all backgrounded commits that have been
				33	* around too long.
				34	* -- Note, if you call this as an immediate flush from
				35	* from within kupdate, it will ignore the immediate flag
				36	*/
				37
				38	#include <linux/time.h>
				39	#include <linux/semaphore.h>
				40	#include <linux/vmalloc.h>
				41	#include "reiserfs.h"
				42	#include <linux/kernel.h>
				43	#include <linux/errno.h>
				44	#include <linux/fcntl.h>
				45	#include <linux/stat.h>
				46	#include <linux/string.h>
				47	#include <linux/buffer_head.h>
				48	#include <linux/workqueue.h>
				49	#include <linux/writeback.h>
				50	#include <linux/blkdev.h>
				51	#include <linux/backing-dev.h>
				52	#include <linux/uaccess.h>
				53	#include <linux/slab.h>
				54
				55
				56	/* gets a struct reiserfs_journal_list * from a list head */
				57	#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
				58	j_list))
				59	#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
				60	j_working_list))
				61
				62	/* must be correct to keep the desc and commit structs at 4k */
				63	#define JOURNAL_TRANS_HALF 1018
				64	#define BUFNR 64 /read ahead /
				65
				66	/* cnode stat bits. Move these into reiserfs_fs.h */
				67
				68	/* this block was freed, and can't be written. */
				69	#define BLOCK_FREED 2
				70	/* this block was freed during this transaction, and can't be written */
				71	#define BLOCK_FREED_HOLDER 3
				72
				73	/* used in flush_journal_list */
				74	#define BLOCK_NEEDS_FLUSH 4
				75	#define BLOCK_DIRTIED 5
				76
				77	/* journal list state bits */
				78	#define LIST_TOUCHED 1
				79	#define LIST_DIRTY 2
				80	#define LIST_COMMIT_PENDING 4 /* someone will commit this list */
				81
				82	/* flags for do_journal_end */
				83	#define FLUSH_ALL 1 /* flush commit and real blocks */
				84	#define COMMIT_NOW 2 /* end and commit this transaction */
				85	#define WAIT 4 /* wait for the log blocks to hit the disk */
				86
				87	static int do_journal_end(struct reiserfs_transaction_handle *, int flags);
				88	static int flush_journal_list(struct super_block *s,
				89	struct reiserfs_journal_list *jl, int flushall);
				90	static int flush_commit_list(struct super_block *s,
				91	struct reiserfs_journal_list *jl, int flushall);
				92	static int can_dirty(struct reiserfs_journal_cnode *cn);
				93	static int journal_join(struct reiserfs_transaction_handle *th,
				94	struct super_block *sb);
				95	static void release_journal_dev(struct super_block *super,
				96	struct reiserfs_journal *journal);
				97	static int dirty_one_transaction(struct super_block *s,
				98	struct reiserfs_journal_list *jl);
				99	static void flush_async_commits(struct work_struct *work);
				100	static void queue_log_writer(struct super_block *s);
				101
				102	/* values for join in do_journal_begin_r */
				103	enum {
				104	JBEGIN_REG = 0, /* regular journal begin */
				105	/* join the running transaction if at all possible */
				106	JBEGIN_JOIN = 1,
				107	/* called from cleanup code, ignores aborted flag */
				108	JBEGIN_ABORT = 2,
				109	};
				110
				111	static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
				112	struct super_block *sb,
				113	unsigned long nblocks, int join);
				114
				115	static void init_journal_hash(struct super_block *sb)
				116	{
				117	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				118	memset(journal->j_hash_table, 0,
				119	JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
				120	}
				121
				122	/*
				123	* clears BH_Dirty and sticks the buffer on the clean list. Called because
				124	* I can't allow refile_buffer to make schedule happen after I've freed a
				125	* block. Look at remove_from_transaction and journal_mark_freed for
				126	* more details.
				127	*/
				128	static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
				129	{
				130	if (bh) {
				131	clear_buffer_dirty(bh);
				132	clear_buffer_journal_test(bh);
				133	}
				134	return 0;
				135	}
				136
				137	static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
				138	*sb)
				139	{
				140	struct reiserfs_bitmap_node *bn;
				141	static int id;
				142
				143	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
				144	if (!bn) {
				145	return NULL;
				146	}
				147	bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
				148	if (!bn->data) {
				149	kfree(bn);
				150	return NULL;
				151	}
				152	bn->id = id++;
				153	INIT_LIST_HEAD(&bn->list);
				154	return bn;
				155	}
				156
				157	static struct reiserfs_bitmap_node get_bitmap_node(struct super_block sb)
				158	{
				159	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				160	struct reiserfs_bitmap_node *bn = NULL;
				161	struct list_head *entry = journal->j_bitmap_nodes.next;
				162
				163	journal->j_used_bitmap_nodes++;
				164	repeat:
				165
				166	if (entry != &journal->j_bitmap_nodes) {
				167	bn = list_entry(entry, struct reiserfs_bitmap_node, list);
				168	list_del(entry);
				169	memset(bn->data, 0, sb->s_blocksize);
				170	journal->j_free_bitmap_nodes--;
				171	return bn;
				172	}
				173	bn = allocate_bitmap_node(sb);
				174	if (!bn) {
				175	yield();
				176	goto repeat;
				177	}
				178	return bn;
				179	}
				180	static inline void free_bitmap_node(struct super_block *sb,
				181	struct reiserfs_bitmap_node *bn)
				182	{
				183	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				184	journal->j_used_bitmap_nodes--;
				185	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
				186	kfree(bn->data);
				187	kfree(bn);
				188	} else {
				189	list_add(&bn->list, &journal->j_bitmap_nodes);
				190	journal->j_free_bitmap_nodes++;
				191	}
				192	}
				193
				194	static void allocate_bitmap_nodes(struct super_block *sb)
				195	{
				196	int i;
				197	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				198	struct reiserfs_bitmap_node *bn = NULL;
				199	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
				200	bn = allocate_bitmap_node(sb);
				201	if (bn) {
				202	list_add(&bn->list, &journal->j_bitmap_nodes);
				203	journal->j_free_bitmap_nodes++;
				204	} else {
				205	/* this is ok, we'll try again when more are needed */
				206	break;
				207	}
				208	}
				209	}
				210
				211	static int set_bit_in_list_bitmap(struct super_block *sb,
				212	b_blocknr_t block,
				213	struct reiserfs_list_bitmap *jb)
				214	{
				215	unsigned int bmap_nr = block / (sb->s_blocksize << 3);
				216	unsigned int bit_nr = block % (sb->s_blocksize << 3);
				217
				218	if (!jb->bitmaps[bmap_nr]) {
				219	jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
				220	}
				221	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
				222	return 0;
				223	}
				224
				225	static void cleanup_bitmap_list(struct super_block *sb,
				226	struct reiserfs_list_bitmap *jb)
				227	{
				228	int i;
				229	if (jb->bitmaps == NULL)
				230	return;
				231
				232	for (i = 0; i < reiserfs_bmap_count(sb); i++) {
				233	if (jb->bitmaps[i]) {
				234	free_bitmap_node(sb, jb->bitmaps[i]);
				235	jb->bitmaps[i] = NULL;
				236	}
				237	}
				238	}
				239
				240	/*
				241	* only call this on FS unmount.
				242	*/
				243	static int free_list_bitmaps(struct super_block *sb,
				244	struct reiserfs_list_bitmap *jb_array)
				245	{
				246	int i;
				247	struct reiserfs_list_bitmap *jb;
				248	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
				249	jb = jb_array + i;
				250	jb->journal_list = NULL;
				251	cleanup_bitmap_list(sb, jb);
				252	vfree(jb->bitmaps);
				253	jb->bitmaps = NULL;
				254	}
				255	return 0;
				256	}
				257
				258	static int free_bitmap_nodes(struct super_block *sb)
				259	{
				260	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				261	struct list_head *next = journal->j_bitmap_nodes.next;
				262	struct reiserfs_bitmap_node *bn;
				263
				264	while (next != &journal->j_bitmap_nodes) {
				265	bn = list_entry(next, struct reiserfs_bitmap_node, list);
				266	list_del(next);
				267	kfree(bn->data);
				268	kfree(bn);
				269	next = journal->j_bitmap_nodes.next;
				270	journal->j_free_bitmap_nodes--;
				271	}
				272
				273	return 0;
				274	}
				275
				276	/*
				277	* get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
				278	* jb_array is the array to be filled in.
				279	*/
				280	int reiserfs_allocate_list_bitmaps(struct super_block *sb,
				281	struct reiserfs_list_bitmap *jb_array,
				282	unsigned int bmap_nr)
				283	{
				284	int i;
				285	int failed = 0;
				286	struct reiserfs_list_bitmap *jb;
				287	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
				288
				289	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
				290	jb = jb_array + i;
				291	jb->journal_list = NULL;
				292	jb->bitmaps = vzalloc(mem);
				293	if (!jb->bitmaps) {
				294	reiserfs_warning(sb, "clm-2000", "unable to "
				295	"allocate bitmaps for journal lists");
				296	failed = 1;
				297	break;
				298	}
				299	}
				300	if (failed) {
				301	free_list_bitmaps(sb, jb_array);
				302	return -1;
				303	}
				304	return 0;
				305	}
				306
				307	/*
				308	* find an available list bitmap. If you can't find one, flush a commit list
				309	* and try again
				310	*/
				311	static struct reiserfs_list_bitmap get_list_bitmap(struct super_block sb,
				312	struct reiserfs_journal_list
				313	*jl)
				314	{
				315	int i, j;
				316	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				317	struct reiserfs_list_bitmap *jb = NULL;
				318
				319	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
				320	i = journal->j_list_bitmap_index;
				321	journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
				322	jb = journal->j_list_bitmap + i;
				323	if (journal->j_list_bitmap[i].journal_list) {
				324	flush_commit_list(sb,
				325	journal->j_list_bitmap[i].
				326	journal_list, 1);
				327	if (!journal->j_list_bitmap[i].journal_list) {
				328	break;
				329	}
				330	} else {
				331	break;
				332	}
				333	}
				334	/* double check to make sure if flushed correctly */
				335	if (jb->journal_list)
				336	return NULL;
				337	jb->journal_list = jl;
				338	return jb;
				339	}
				340
				341	/*
				342	* allocates a new chunk of X nodes, and links them all together as a list.
				343	* Uses the cnode->next and cnode->prev pointers
				344	* returns NULL on failure
				345	*/
				346	static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
				347	{
				348	struct reiserfs_journal_cnode *head;
				349	int i;
				350	if (num_cnodes <= 0) {
				351	return NULL;
				352	}
				353	head = vzalloc(array_size(num_cnodes,
				354	sizeof(struct reiserfs_journal_cnode)));
				355	if (!head) {
				356	return NULL;
				357	}
				358	head[0].prev = NULL;
				359	head[0].next = head + 1;
				360	for (i = 1; i < num_cnodes; i++) {
				361	head[i].prev = head + (i - 1);
				362	head[i].next = head + (i + 1); /* if last one, overwrite it after the if */
				363	}
				364	head[num_cnodes - 1].next = NULL;
				365	return head;
				366	}
				367
				368	/* pulls a cnode off the free list, or returns NULL on failure */
				369	static struct reiserfs_journal_cnode get_cnode(struct super_block sb)
				370	{
				371	struct reiserfs_journal_cnode *cn;
				372	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				373
				374	reiserfs_check_lock_depth(sb, "get_cnode");
				375
				376	if (journal->j_cnode_free <= 0) {
				377	return NULL;
				378	}
				379	journal->j_cnode_used++;
				380	journal->j_cnode_free--;
				381	cn = journal->j_cnode_free_list;
				382	if (!cn) {
				383	return cn;
				384	}
				385	if (cn->next) {
				386	cn->next->prev = NULL;
				387	}
				388	journal->j_cnode_free_list = cn->next;
				389	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
				390	return cn;
				391	}
				392
				393	/*
				394	* returns a cnode to the free list
				395	*/
				396	static void free_cnode(struct super_block *sb,
				397	struct reiserfs_journal_cnode *cn)
				398	{
				399	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				400
				401	reiserfs_check_lock_depth(sb, "free_cnode");
				402
				403	journal->j_cnode_used--;
				404	journal->j_cnode_free++;
				405	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
				406	cn->next = journal->j_cnode_free_list;
				407	if (journal->j_cnode_free_list) {
				408	journal->j_cnode_free_list->prev = cn;
				409	}
				410	cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */
				411	journal->j_cnode_free_list = cn;
				412	}
				413
				414	static void clear_prepared_bits(struct buffer_head *bh)
				415	{
				416	clear_buffer_journal_prepared(bh);
				417	clear_buffer_journal_restore_dirty(bh);
				418	}
				419
				420	/*
				421	* return a cnode with same dev, block number and size in table,
				422	* or null if not found
				423	*/
				424	static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
				425	super_block
				426	*sb,
				427	struct
				428	reiserfs_journal_cnode
				429	**table,
				430	long bl)
				431	{
				432	struct reiserfs_journal_cnode *cn;
				433	cn = journal_hash(table, sb, bl);
				434	while (cn) {
				435	if (cn->blocknr == bl && cn->sb == sb)
				436	return cn;
				437	cn = cn->hnext;
				438	}
				439	return (struct reiserfs_journal_cnode *)0;
				440	}
				441
				442	/*
				443	* this actually means 'can this block be reallocated yet?'. If you set
				444	* search_all, a block can only be allocated if it is not in the current
				445	* transaction, was not freed by the current transaction, and has no chance
				446	* of ever being overwritten by a replay after crashing.
				447	*
				448	* If you don't set search_all, a block can only be allocated if it is not
				449	* in the current transaction. Since deleting a block removes it from the
				450	* current transaction, this case should never happen. If you don't set
				451	* search_all, make sure you never write the block without logging it.
				452	*
				453	* next_zero_bit is a suggestion about the next block to try for find_forward.
				454	* when bl is rejected because it is set in a journal list bitmap, we search
				455	* for the next zero bit in the bitmap that rejected bl. Then, we return
				456	* that through next_zero_bit for find_forward to try.
				457	*
				458	* Just because we return something in next_zero_bit does not mean we won't
				459	* reject it on the next call to reiserfs_in_journal
				460	*/
				461	int reiserfs_in_journal(struct super_block *sb,
				462	unsigned int bmap_nr, int bit_nr, int search_all,
				463	b_blocknr_t * next_zero_bit)
				464	{
				465	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				466	struct reiserfs_journal_cnode *cn;
				467	struct reiserfs_list_bitmap *jb;
				468	int i;
				469	unsigned long bl;
				470
				471	next_zero_bit = 0; / always start this at zero. */
				472
				473	PROC_INFO_INC(sb, journal.in_journal);
				474	/*
				475	* If we aren't doing a search_all, this is a metablock, and it
				476	* will be logged before use. if we crash before the transaction
				477	* that freed it commits, this transaction won't have committed
				478	* either, and the block will never be written
				479	*/
				480	if (search_all) {
				481	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
				482	PROC_INFO_INC(sb, journal.in_journal_bitmap);
				483	jb = journal->j_list_bitmap + i;
				484	if (jb->journal_list && jb->bitmaps[bmap_nr] &&
				485	test_bit(bit_nr,
				486	(unsigned long *)jb->bitmaps[bmap_nr]->
				487	data)) {
				488	*next_zero_bit =
				489	find_next_zero_bit((unsigned long *)
				490	(jb->bitmaps[bmap_nr]->
				491	data),
				492	sb->s_blocksize << 3,
				493	bit_nr + 1);
				494	return 1;
				495	}
				496	}
				497	}
				498
				499	bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
				500	/* is it in any old transactions? */
				501	if (search_all
				502	&& (cn =
				503	get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
				504	return 1;
				505	}
				506
				507	/* is it in the current transaction. This should never happen */
				508	if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
				509	BUG();
				510	return 1;
				511	}
				512
				513	PROC_INFO_INC(sb, journal.in_journal_reusable);
				514	/* safe for reuse */
				515	return 0;
				516	}
				517
				518	/* insert cn into table */
				519	static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
				520	struct reiserfs_journal_cnode *cn)
				521	{
				522	struct reiserfs_journal_cnode *cn_orig;
				523
				524	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
				525	cn->hnext = cn_orig;
				526	cn->hprev = NULL;
				527	if (cn_orig) {
				528	cn_orig->hprev = cn;
				529	}
				530	journal_hash(table, cn->sb, cn->blocknr) = cn;
				531	}
				532
				533	/* lock the current transaction */
				534	static inline void lock_journal(struct super_block *sb)
				535	{
				536	PROC_INFO_INC(sb, journal.lock_journal);
				537
				538	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
				539	}
				540
				541	/* unlock the current transaction */
				542	static inline void unlock_journal(struct super_block *sb)
				543	{
				544	mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
				545	}
				546
				547	static inline void get_journal_list(struct reiserfs_journal_list *jl)
				548	{
				549	jl->j_refcount++;
				550	}
				551
				552	static inline void put_journal_list(struct super_block *s,
				553	struct reiserfs_journal_list *jl)
				554	{
				555	if (jl->j_refcount < 1) {
				556	reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
				557	jl->j_trans_id, jl->j_refcount);
				558	}
				559	if (--jl->j_refcount == 0)
				560	kfree(jl);
				561	}
				562
				563	/*
				564	* this used to be much more involved, and I'm keeping it just in case
				565	* things get ugly again. it gets called by flush_commit_list, and
				566	* cleans up any data stored about blocks freed during a transaction.
				567	*/
				568	static void cleanup_freed_for_journal_list(struct super_block *sb,
				569	struct reiserfs_journal_list *jl)
				570	{
				571
				572	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
				573	if (jb) {
				574	cleanup_bitmap_list(sb, jb);
				575	}
				576	jl->j_list_bitmap->journal_list = NULL;
				577	jl->j_list_bitmap = NULL;
				578	}
				579
				580	static int journal_list_still_alive(struct super_block *s,
				581	unsigned int trans_id)
				582	{
				583	struct reiserfs_journal *journal = SB_JOURNAL(s);
				584	struct list_head *entry = &journal->j_journal_list;
				585	struct reiserfs_journal_list *jl;
				586
				587	if (!list_empty(entry)) {
				588	jl = JOURNAL_LIST_ENTRY(entry->next);
				589	if (jl->j_trans_id <= trans_id) {
				590	return 1;
				591	}
				592	}
				593	return 0;
				594	}
				595
				596	/*
				597	* If page->mapping was null, we failed to truncate this page for
				598	* some reason. Most likely because it was truncated after being
				599	* logged via data=journal.
				600	*
				601	* This does a check to see if the buffer belongs to one of these
				602	* lost pages before doing the final put_bh. If page->mapping was
				603	* null, it tries to free buffers on the page, which should make the
				604	* final put_page drop the page from the lru.
				605	*/
				606	static void release_buffer_page(struct buffer_head *bh)
				607	{
				608	struct page *page = bh->b_page;
				609	if (!page->mapping && trylock_page(page)) {
				610	get_page(page);
				611	put_bh(bh);
				612	if (!page->mapping)
				613	try_to_free_buffers(page);
				614	unlock_page(page);
				615	put_page(page);
				616	} else {
				617	put_bh(bh);
				618	}
				619	}
				620
				621	static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
				622	{
				623	if (buffer_journaled(bh)) {
				624	reiserfs_warning(NULL, "clm-2084",
				625	"pinned buffer %lu:%pg sent to disk",
				626	bh->b_blocknr, bh->b_bdev);
				627	}
				628	if (uptodate)
				629	set_buffer_uptodate(bh);
				630	else
				631	clear_buffer_uptodate(bh);
				632
				633	unlock_buffer(bh);
				634	release_buffer_page(bh);
				635	}
				636
				637	static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
				638	{
				639	if (uptodate)
				640	set_buffer_uptodate(bh);
				641	else
				642	clear_buffer_uptodate(bh);
				643	unlock_buffer(bh);
				644	put_bh(bh);
				645	}
				646
				647	static void submit_logged_buffer(struct buffer_head *bh)
				648	{
				649	get_bh(bh);
				650	bh->b_end_io = reiserfs_end_buffer_io_sync;
				651	clear_buffer_journal_new(bh);
				652	clear_buffer_dirty(bh);
				653	if (!test_clear_buffer_journal_test(bh))
				654	BUG();
				655	if (!buffer_uptodate(bh))
				656	BUG();
				657	submit_bh(REQ_OP_WRITE, 0, bh);
				658	}
				659
				660	static void submit_ordered_buffer(struct buffer_head *bh)
				661	{
				662	get_bh(bh);
				663	bh->b_end_io = reiserfs_end_ordered_io;
				664	clear_buffer_dirty(bh);
				665	if (!buffer_uptodate(bh))
				666	BUG();
				667	submit_bh(REQ_OP_WRITE, 0, bh);
				668	}
				669
				670	#define CHUNK_SIZE 32
				671	struct buffer_chunk {
				672	struct buffer_head *bh[CHUNK_SIZE];
				673	int nr;
				674	};
				675
				676	static void write_chunk(struct buffer_chunk *chunk)
				677	{
				678	int i;
				679	for (i = 0; i < chunk->nr; i++) {
				680	submit_logged_buffer(chunk->bh[i]);
				681	}
				682	chunk->nr = 0;
				683	}
				684
				685	static void write_ordered_chunk(struct buffer_chunk *chunk)
				686	{
				687	int i;
				688	for (i = 0; i < chunk->nr; i++) {
				689	submit_ordered_buffer(chunk->bh[i]);
				690	}
				691	chunk->nr = 0;
				692	}
				693
				694	static int add_to_chunk(struct buffer_chunk chunk, struct buffer_head bh,
				695	spinlock_t * lock, void (fn) (struct buffer_chunk *))
				696	{
				697	int ret = 0;
				698	BUG_ON(chunk->nr >= CHUNK_SIZE);
				699	chunk->bh[chunk->nr++] = bh;
				700	if (chunk->nr >= CHUNK_SIZE) {
				701	ret = 1;
				702	if (lock) {
				703	spin_unlock(lock);
				704	fn(chunk);
				705	spin_lock(lock);
				706	} else {
				707	fn(chunk);
				708	}
				709	}
				710	return ret;
				711	}
				712
				713	static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
				714	static struct reiserfs_jh *alloc_jh(void)
				715	{
				716	struct reiserfs_jh *jh;
				717	while (1) {
				718	jh = kmalloc(sizeof(*jh), GFP_NOFS);
				719	if (jh) {
				720	atomic_inc(&nr_reiserfs_jh);
				721	return jh;
				722	}
				723	yield();
				724	}
				725	}
				726
				727	/*
				728	* we want to free the jh when the buffer has been written
				729	* and waited on
				730	*/
				731	void reiserfs_free_jh(struct buffer_head *bh)
				732	{
				733	struct reiserfs_jh *jh;
				734
				735	jh = bh->b_private;
				736	if (jh) {
				737	bh->b_private = NULL;
				738	jh->bh = NULL;
				739	list_del_init(&jh->list);
				740	kfree(jh);
				741	if (atomic_read(&nr_reiserfs_jh) <= 0)
				742	BUG();
				743	atomic_dec(&nr_reiserfs_jh);
				744	put_bh(bh);
				745	}
				746	}
				747
				748	static inline int __add_jh(struct reiserfs_journal j, struct buffer_head bh,
				749	int tail)
				750	{
				751	struct reiserfs_jh *jh;
				752
				753	if (bh->b_private) {
				754	spin_lock(&j->j_dirty_buffers_lock);
				755	if (!bh->b_private) {
				756	spin_unlock(&j->j_dirty_buffers_lock);
				757	goto no_jh;
				758	}
				759	jh = bh->b_private;
				760	list_del_init(&jh->list);
				761	} else {
				762	no_jh:
				763	get_bh(bh);
				764	jh = alloc_jh();
				765	spin_lock(&j->j_dirty_buffers_lock);
				766	/*
				767	* buffer must be locked for __add_jh, should be able to have
				768	* two adds at the same time
				769	*/
				770	BUG_ON(bh->b_private);
				771	jh->bh = bh;
				772	bh->b_private = jh;
				773	}
				774	jh->jl = j->j_current_jl;
				775	if (tail)
				776	list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
				777	else {
				778	list_add_tail(&jh->list, &jh->jl->j_bh_list);
				779	}
				780	spin_unlock(&j->j_dirty_buffers_lock);
				781	return 0;
				782	}
				783
				784	int reiserfs_add_tail_list(struct inode inode, struct buffer_head bh)
				785	{
				786	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
				787	}
				788	int reiserfs_add_ordered_list(struct inode inode, struct buffer_head bh)
				789	{
				790	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
				791	}
				792
				793	#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
				794	static int write_ordered_buffers(spinlock_t * lock,
				795	struct reiserfs_journal *j,
				796	struct reiserfs_journal_list *jl,
				797	struct list_head *list)
				798	{
				799	struct buffer_head *bh;
				800	struct reiserfs_jh *jh;
				801	int ret = j->j_errno;
				802	struct buffer_chunk chunk;
				803	struct list_head tmp;
				804	INIT_LIST_HEAD(&tmp);
				805
				806	chunk.nr = 0;
				807	spin_lock(lock);
				808	while (!list_empty(list)) {
				809	jh = JH_ENTRY(list->next);
				810	bh = jh->bh;
				811	get_bh(bh);
				812	if (!trylock_buffer(bh)) {
				813	if (!buffer_dirty(bh)) {
				814	list_move(&jh->list, &tmp);
				815	goto loop_next;
				816	}
				817	spin_unlock(lock);
				818	if (chunk.nr)
				819	write_ordered_chunk(&chunk);
				820	wait_on_buffer(bh);
				821	cond_resched();
				822	spin_lock(lock);
				823	goto loop_next;
				824	}
				825	/*
				826	* in theory, dirty non-uptodate buffers should never get here,
				827	* but the upper layer io error paths still have a few quirks.
				828	* Handle them here as gracefully as we can
				829	*/
				830	if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
				831	clear_buffer_dirty(bh);
				832	ret = -EIO;
				833	}
				834	if (buffer_dirty(bh)) {
				835	list_move(&jh->list, &tmp);
				836	add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
				837	} else {
				838	reiserfs_free_jh(bh);
				839	unlock_buffer(bh);
				840	}
				841	loop_next:
				842	put_bh(bh);
				843	cond_resched_lock(lock);
				844	}
				845	if (chunk.nr) {
				846	spin_unlock(lock);
				847	write_ordered_chunk(&chunk);
				848	spin_lock(lock);
				849	}
				850	while (!list_empty(&tmp)) {
				851	jh = JH_ENTRY(tmp.prev);
				852	bh = jh->bh;
				853	get_bh(bh);
				854	reiserfs_free_jh(bh);
				855
				856	if (buffer_locked(bh)) {
				857	spin_unlock(lock);
				858	wait_on_buffer(bh);
				859	spin_lock(lock);
				860	}
				861	if (!buffer_uptodate(bh)) {
				862	ret = -EIO;
				863	}
				864	/*
				865	* ugly interaction with invalidatepage here.
				866	* reiserfs_invalidate_page will pin any buffer that has a
				867	* valid journal head from an older transaction. If someone
				868	* else sets our buffer dirty after we write it in the first
				869	* loop, and then someone truncates the page away, nobody
				870	* will ever write the buffer. We're safe if we write the
				871	* page one last time after freeing the journal header.
				872	*/
				873	if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
				874	spin_unlock(lock);
				875	ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
				876	spin_lock(lock);
				877	}
				878	put_bh(bh);
				879	cond_resched_lock(lock);
				880	}
				881	spin_unlock(lock);
				882	return ret;
				883	}
				884
				885	static int flush_older_commits(struct super_block *s,
				886	struct reiserfs_journal_list *jl)
				887	{
				888	struct reiserfs_journal *journal = SB_JOURNAL(s);
				889	struct reiserfs_journal_list *other_jl;
				890	struct reiserfs_journal_list *first_jl;
				891	struct list_head *entry;
				892	unsigned int trans_id = jl->j_trans_id;
				893	unsigned int other_trans_id;
				894	unsigned int first_trans_id;
				895
				896	find_first:
				897	/*
				898	* first we walk backwards to find the oldest uncommitted transation
				899	*/
				900	first_jl = jl;
				901	entry = jl->j_list.prev;
				902	while (1) {
				903	other_jl = JOURNAL_LIST_ENTRY(entry);
				904	if (entry == &journal->j_journal_list \|\|
				905	atomic_read(&other_jl->j_older_commits_done))
				906	break;
				907
				908	first_jl = other_jl;
				909	entry = other_jl->j_list.prev;
				910	}
				911
				912	/* if we didn't find any older uncommitted transactions, return now */
				913	if (first_jl == jl) {
				914	return 0;
				915	}
				916
				917	first_trans_id = first_jl->j_trans_id;
				918
				919	entry = &first_jl->j_list;
				920	while (1) {
				921	other_jl = JOURNAL_LIST_ENTRY(entry);
				922	other_trans_id = other_jl->j_trans_id;
				923
				924	if (other_trans_id < trans_id) {
				925	if (atomic_read(&other_jl->j_commit_left) != 0) {
				926	flush_commit_list(s, other_jl, 0);
				927
				928	/* list we were called with is gone, return */
				929	if (!journal_list_still_alive(s, trans_id))
				930	return 1;
				931
				932	/*
				933	* the one we just flushed is gone, this means
				934	* all older lists are also gone, so first_jl
				935	* is no longer valid either. Go back to the
				936	* beginning.
				937	*/
				938	if (!journal_list_still_alive
				939	(s, other_trans_id)) {
				940	goto find_first;
				941	}
				942	}
				943	entry = entry->next;
				944	if (entry == &journal->j_journal_list)
				945	return 0;
				946	} else {
				947	return 0;
				948	}
				949	}
				950	return 0;
				951	}
				952
				953	static int reiserfs_async_progress_wait(struct super_block *s)
				954	{
				955	struct reiserfs_journal *j = SB_JOURNAL(s);
				956
				957	if (atomic_read(&j->j_async_throttle)) {
				958	int depth;
				959
				960	depth = reiserfs_write_unlock_nested(s);
				961	congestion_wait(BLK_RW_ASYNC, HZ / 10);
				962	reiserfs_write_lock_nested(s, depth);
				963	}
				964
				965	return 0;
				966	}
				967
				968	/*
				969	* if this journal list still has commit blocks unflushed, send them to disk.
				970	*
				971	* log areas must be flushed in order (transaction 2 can't commit before
				972	* transaction 1) Before the commit block can by written, every other log
				973	* block must be safely on disk
				974	*/
				975	static int flush_commit_list(struct super_block *s,
				976	struct reiserfs_journal_list *jl, int flushall)
				977	{
				978	int i;
				979	b_blocknr_t bn;
				980	struct buffer_head *tbh = NULL;
				981	unsigned int trans_id = jl->j_trans_id;
				982	struct reiserfs_journal *journal = SB_JOURNAL(s);
				983	int retval = 0;
				984	int write_len;
				985	int depth;
				986
				987	reiserfs_check_lock_depth(s, "flush_commit_list");
				988
				989	if (atomic_read(&jl->j_older_commits_done)) {
				990	return 0;
				991	}
				992
				993	/*
				994	* before we can put our commit blocks on disk, we have to make
				995	* sure everyone older than us is on disk too
				996	*/
				997	BUG_ON(jl->j_len <= 0);
				998	BUG_ON(trans_id == journal->j_trans_id);
				999
				1000	get_journal_list(jl);
				1001	if (flushall) {
				1002	if (flush_older_commits(s, jl) == 1) {
				1003	/*
				1004	* list disappeared during flush_older_commits.
				1005	* return
				1006	*/
				1007	goto put_jl;
				1008	}
				1009	}
				1010
				1011	/* make sure nobody is trying to flush this one at the same time */
				1012	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
				1013
				1014	if (!journal_list_still_alive(s, trans_id)) {
				1015	mutex_unlock(&jl->j_commit_mutex);
				1016	goto put_jl;
				1017	}
				1018	BUG_ON(jl->j_trans_id == 0);
				1019
				1020	/* this commit is done, exit */
				1021	if (atomic_read(&jl->j_commit_left) <= 0) {
				1022	if (flushall) {
				1023	atomic_set(&jl->j_older_commits_done, 1);
				1024	}
				1025	mutex_unlock(&jl->j_commit_mutex);
				1026	goto put_jl;
				1027	}
				1028
				1029	if (!list_empty(&jl->j_bh_list)) {
				1030	int ret;
				1031
				1032	/*
				1033	* We might sleep in numerous places inside
				1034	* write_ordered_buffers. Relax the write lock.
				1035	*/
				1036	depth = reiserfs_write_unlock_nested(s);
				1037	ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
				1038	journal, jl, &jl->j_bh_list);
				1039	if (ret < 0 && retval == 0)
				1040	retval = ret;
				1041	reiserfs_write_lock_nested(s, depth);
				1042	}
				1043	BUG_ON(!list_empty(&jl->j_bh_list));
				1044	/*
				1045	* for the description block and all the log blocks, submit any buffers
				1046	* that haven't already reached the disk. Try to write at least 256
				1047	* log blocks. later on, we will only wait on blocks that correspond
				1048	* to this transaction, but while we're unplugging we might as well
				1049	* get a chunk of data on there.
				1050	*/
				1051	atomic_inc(&journal->j_async_throttle);
				1052	write_len = jl->j_len + 1;
				1053	if (write_len < 256)
				1054	write_len = 256;
				1055	for (i = 0 ; i < write_len ; i++) {
				1056	bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
				1057	SB_ONDISK_JOURNAL_SIZE(s);
				1058	tbh = journal_find_get_block(s, bn);
				1059	if (tbh) {
				1060	if (buffer_dirty(tbh)) {
				1061	depth = reiserfs_write_unlock_nested(s);
				1062	ll_rw_block(REQ_OP_WRITE, 0, 1, &tbh);
				1063	reiserfs_write_lock_nested(s, depth);
				1064	}
				1065	put_bh(tbh) ;
				1066	}
				1067	}
				1068	atomic_dec(&journal->j_async_throttle);
				1069
				1070	for (i = 0; i < (jl->j_len + 1); i++) {
				1071	bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
				1072	(jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
				1073	tbh = journal_find_get_block(s, bn);
				1074
				1075	depth = reiserfs_write_unlock_nested(s);
				1076	__wait_on_buffer(tbh);
				1077	reiserfs_write_lock_nested(s, depth);
				1078	/*
				1079	* since we're using ll_rw_blk above, it might have skipped
				1080	* over a locked buffer. Double check here
				1081	*/
				1082	/* redundant, sync_dirty_buffer() checks */
				1083	if (buffer_dirty(tbh)) {
				1084	depth = reiserfs_write_unlock_nested(s);
				1085	sync_dirty_buffer(tbh);
				1086	reiserfs_write_lock_nested(s, depth);
				1087	}
				1088	if (unlikely(!buffer_uptodate(tbh))) {
				1089	#ifdef CONFIG_REISERFS_CHECK
				1090	reiserfs_warning(s, "journal-601",
				1091	"buffer write failed");
				1092	#endif
				1093	retval = -EIO;
				1094	}
				1095	/* once for journal_find_get_block */
				1096	put_bh(tbh);
				1097	/* once due to original getblk in do_journal_end */
				1098	put_bh(tbh);
				1099	atomic_dec(&jl->j_commit_left);
				1100	}
				1101
				1102	BUG_ON(atomic_read(&jl->j_commit_left) != 1);
				1103
				1104	/*
				1105	* If there was a write error in the journal - we can't commit
				1106	* this transaction - it will be invalid and, if successful,
				1107	* will just end up propagating the write error out to
				1108	* the file system.
				1109	*/
				1110	if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
				1111	if (buffer_dirty(jl->j_commit_bh))
				1112	BUG();
				1113	mark_buffer_dirty(jl->j_commit_bh) ;
				1114	depth = reiserfs_write_unlock_nested(s);
				1115	if (reiserfs_barrier_flush(s))
				1116	__sync_dirty_buffer(jl->j_commit_bh,
				1117	REQ_SYNC \| REQ_PREFLUSH \| REQ_FUA);
				1118	else
				1119	sync_dirty_buffer(jl->j_commit_bh);
				1120	reiserfs_write_lock_nested(s, depth);
				1121	}
				1122
				1123	/*
				1124	* If there was a write error in the journal - we can't commit this
				1125	* transaction - it will be invalid and, if successful, will just end
				1126	* up propagating the write error out to the filesystem.
				1127	*/
				1128	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
				1129	#ifdef CONFIG_REISERFS_CHECK
				1130	reiserfs_warning(s, "journal-615", "buffer write failed");
				1131	#endif
				1132	retval = -EIO;
				1133	}
				1134	bforget(jl->j_commit_bh);
				1135	if (journal->j_last_commit_id != 0 &&
				1136	(jl->j_trans_id - journal->j_last_commit_id) != 1) {
				1137	reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
				1138	journal->j_last_commit_id, jl->j_trans_id);
				1139	}
				1140	journal->j_last_commit_id = jl->j_trans_id;
				1141
				1142	/*
				1143	* now, every commit block is on the disk. It is safe to allow
				1144	* blocks freed during this transaction to be reallocated
				1145	*/
				1146	cleanup_freed_for_journal_list(s, jl);
				1147
				1148	retval = retval ? retval : journal->j_errno;
				1149
				1150	/* mark the metadata dirty */
				1151	if (!retval)
				1152	dirty_one_transaction(s, jl);
				1153	atomic_dec(&jl->j_commit_left);
				1154
				1155	if (flushall) {
				1156	atomic_set(&jl->j_older_commits_done, 1);
				1157	}
				1158	mutex_unlock(&jl->j_commit_mutex);
				1159	put_jl:
				1160	put_journal_list(s, jl);
				1161
				1162	if (retval)
				1163	reiserfs_abort(s, retval, "Journal write error in %s",
				1164	__func__);
				1165	return retval;
				1166	}
				1167
				1168	/*
				1169	* flush_journal_list frequently needs to find a newer transaction for a
				1170	* given block. This does that, or returns NULL if it can't find anything
				1171	*/
				1172	static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
				1173	reiserfs_journal_cnode
				1174	*cn)
				1175	{
				1176	struct super_block *sb = cn->sb;
				1177	b_blocknr_t blocknr = cn->blocknr;
				1178
				1179	cn = cn->hprev;
				1180	while (cn) {
				1181	if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
				1182	return cn->jlist;
				1183	}
				1184	cn = cn->hprev;
				1185	}
				1186	return NULL;
				1187	}
				1188
				1189	static void remove_journal_hash(struct super_block *,
				1190	struct reiserfs_journal_cnode **,
				1191	struct reiserfs_journal_list *, unsigned long,
				1192	int);
				1193
				1194	/*
				1195	* once all the real blocks have been flushed, it is safe to remove them
				1196	* from the journal list for this transaction. Aside from freeing the
				1197	* cnode, this also allows the block to be reallocated for data blocks
				1198	* if it had been deleted.
				1199	*/
				1200	static void remove_all_from_journal_list(struct super_block *sb,
				1201	struct reiserfs_journal_list *jl,
				1202	int debug)
				1203	{
				1204	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				1205	struct reiserfs_journal_cnode cn, last;
				1206	cn = jl->j_realblock;
				1207
				1208	/*
				1209	* which is better, to lock once around the whole loop, or
				1210	* to lock for each call to remove_journal_hash?
				1211	*/
				1212	while (cn) {
				1213	if (cn->blocknr != 0) {
				1214	if (debug) {
				1215	reiserfs_warning(sb, "reiserfs-2201",
				1216	"block %u, bh is %d, state %ld",
				1217	cn->blocknr, cn->bh ? 1 : 0,
				1218	cn->state);
				1219	}
				1220	cn->state = 0;
				1221	remove_journal_hash(sb, journal->j_list_hash_table,
				1222	jl, cn->blocknr, 1);
				1223	}
				1224	last = cn;
				1225	cn = cn->next;
				1226	free_cnode(sb, last);
				1227	}
				1228	jl->j_realblock = NULL;
				1229	}
				1230
				1231	/*
				1232	* if this timestamp is greater than the timestamp we wrote last to the
				1233	* header block, write it to the header block. once this is done, I can
				1234	* safely say the log area for this transaction won't ever be replayed,
				1235	* and I can start releasing blocks in this transaction for reuse as data
				1236	* blocks. called by flush_journal_list, before it calls
				1237	* remove_all_from_journal_list
				1238	*/
				1239	static int _update_journal_header_block(struct super_block *sb,
				1240	unsigned long offset,
				1241	unsigned int trans_id)
				1242	{
				1243	struct reiserfs_journal_header *jh;
				1244	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				1245	int depth;
				1246
				1247	if (reiserfs_is_journal_aborted(journal))
				1248	return -EIO;
				1249
				1250	if (trans_id >= journal->j_last_flush_trans_id) {
				1251	if (buffer_locked((journal->j_header_bh))) {
				1252	depth = reiserfs_write_unlock_nested(sb);
				1253	__wait_on_buffer(journal->j_header_bh);
				1254	reiserfs_write_lock_nested(sb, depth);
				1255	if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
				1256	#ifdef CONFIG_REISERFS_CHECK
				1257	reiserfs_warning(sb, "journal-699",
				1258	"buffer write failed");
				1259	#endif
				1260	return -EIO;
				1261	}
				1262	}
				1263	journal->j_last_flush_trans_id = trans_id;
				1264	journal->j_first_unflushed_offset = offset;
				1265	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
				1266	b_data);
				1267	jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
				1268	jh->j_first_unflushed_offset = cpu_to_le32(offset);
				1269	jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
				1270
				1271	set_buffer_dirty(journal->j_header_bh);
				1272	depth = reiserfs_write_unlock_nested(sb);
				1273
				1274	if (reiserfs_barrier_flush(sb))
				1275	__sync_dirty_buffer(journal->j_header_bh,
				1276	REQ_SYNC \| REQ_PREFLUSH \| REQ_FUA);
				1277	else
				1278	sync_dirty_buffer(journal->j_header_bh);
				1279
				1280	reiserfs_write_lock_nested(sb, depth);
				1281	if (!buffer_uptodate(journal->j_header_bh)) {
				1282	reiserfs_warning(sb, "journal-837",
				1283	"IO error during journal replay");
				1284	return -EIO;
				1285	}
				1286	}
				1287	return 0;
				1288	}
				1289
				1290	static int update_journal_header_block(struct super_block *sb,
				1291	unsigned long offset,
				1292	unsigned int trans_id)
				1293	{
				1294	return _update_journal_header_block(sb, offset, trans_id);
				1295	}
				1296
				1297	/*
				1298	** flush any and all journal lists older than you are
				1299	** can only be called from flush_journal_list
				1300	*/
				1301	static int flush_older_journal_lists(struct super_block *sb,
				1302	struct reiserfs_journal_list *jl)
				1303	{
				1304	struct list_head *entry;
				1305	struct reiserfs_journal_list *other_jl;
				1306	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				1307	unsigned int trans_id = jl->j_trans_id;
				1308
				1309	/*
				1310	* we know we are the only ones flushing things, no extra race
				1311	* protection is required.
				1312	*/
				1313	restart:
				1314	entry = journal->j_journal_list.next;
				1315	/* Did we wrap? */
				1316	if (entry == &journal->j_journal_list)
				1317	return 0;
				1318	other_jl = JOURNAL_LIST_ENTRY(entry);
				1319	if (other_jl->j_trans_id < trans_id) {
				1320	BUG_ON(other_jl->j_refcount <= 0);
				1321	/* do not flush all */
				1322	flush_journal_list(sb, other_jl, 0);
				1323
				1324	/* other_jl is now deleted from the list */
				1325	goto restart;
				1326	}
				1327	return 0;
				1328	}
				1329
				1330	static void del_from_work_list(struct super_block *s,
				1331	struct reiserfs_journal_list *jl)
				1332	{
				1333	struct reiserfs_journal *journal = SB_JOURNAL(s);
				1334	if (!list_empty(&jl->j_working_list)) {
				1335	list_del_init(&jl->j_working_list);
				1336	journal->j_num_work_lists--;
				1337	}
				1338	}
				1339
				1340	/*
				1341	* flush a journal list, both commit and real blocks
				1342	*
				1343	* always set flushall to 1, unless you are calling from inside
				1344	* flush_journal_list
				1345	*
				1346	* IMPORTANT. This can only be called while there are no journal writers,
				1347	* and the journal is locked. That means it can only be called from
				1348	* do_journal_end, or by journal_release
				1349	*/
				1350	static int flush_journal_list(struct super_block *s,
				1351	struct reiserfs_journal_list *jl, int flushall)
				1352	{
				1353	struct reiserfs_journal_list *pjl;
				1354	struct reiserfs_journal_cnode cn, last;
				1355	int count;
				1356	int was_jwait = 0;
				1357	int was_dirty = 0;
				1358	struct buffer_head *saved_bh;
				1359	unsigned long j_len_saved = jl->j_len;
				1360	struct reiserfs_journal *journal = SB_JOURNAL(s);
				1361	int err = 0;
				1362	int depth;
				1363
				1364	BUG_ON(j_len_saved <= 0);
				1365
				1366	if (atomic_read(&journal->j_wcount) != 0) {
				1367	reiserfs_warning(s, "clm-2048", "called with wcount %d",
				1368	atomic_read(&journal->j_wcount));
				1369	}
				1370
				1371	/* if flushall == 0, the lock is already held */
				1372	if (flushall) {
				1373	reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
				1374	} else if (mutex_trylock(&journal->j_flush_mutex)) {
				1375	BUG();
				1376	}
				1377
				1378	count = 0;
				1379	if (j_len_saved > journal->j_trans_max) {
				1380	reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
				1381	j_len_saved, jl->j_trans_id);
				1382	return 0;
				1383	}
				1384
				1385	/* if all the work is already done, get out of here */
				1386	if (atomic_read(&jl->j_nonzerolen) <= 0 &&
				1387	atomic_read(&jl->j_commit_left) <= 0) {
				1388	goto flush_older_and_return;
				1389	}
				1390
				1391	/*
				1392	* start by putting the commit list on disk. This will also flush
				1393	* the commit lists of any olders transactions
				1394	*/
				1395	flush_commit_list(s, jl, 1);
				1396
				1397	if (!(jl->j_state & LIST_DIRTY)
				1398	&& !reiserfs_is_journal_aborted(journal))
				1399	BUG();
				1400
				1401	/* are we done now? */
				1402	if (atomic_read(&jl->j_nonzerolen) <= 0 &&
				1403	atomic_read(&jl->j_commit_left) <= 0) {
				1404	goto flush_older_and_return;
				1405	}
				1406
				1407	/*
				1408	* loop through each cnode, see if we need to write it,
				1409	* or wait on a more recent transaction, or just ignore it
				1410	*/
				1411	if (atomic_read(&journal->j_wcount) != 0) {
				1412	reiserfs_panic(s, "journal-844", "journal list is flushing, "
				1413	"wcount is not 0");
				1414	}
				1415	cn = jl->j_realblock;
				1416	while (cn) {
				1417	was_jwait = 0;
				1418	was_dirty = 0;
				1419	saved_bh = NULL;
				1420	/* blocknr of 0 is no longer in the hash, ignore it */
				1421	if (cn->blocknr == 0) {
				1422	goto free_cnode;
				1423	}
				1424
				1425	/*
				1426	* This transaction failed commit.
				1427	* Don't write out to the disk
				1428	*/
				1429	if (!(jl->j_state & LIST_DIRTY))
				1430	goto free_cnode;
				1431
				1432	pjl = find_newer_jl_for_cn(cn);
				1433	/*
				1434	* the order is important here. We check pjl to make sure we
				1435	* don't clear BH_JDirty_wait if we aren't the one writing this
				1436	* block to disk
				1437	*/
				1438	if (!pjl && cn->bh) {
				1439	saved_bh = cn->bh;
				1440
				1441	/*
				1442	* we do this to make sure nobody releases the
				1443	* buffer while we are working with it
				1444	*/
				1445	get_bh(saved_bh);
				1446
				1447	if (buffer_journal_dirty(saved_bh)) {
				1448	BUG_ON(!can_dirty(cn));
				1449	was_jwait = 1;
				1450	was_dirty = 1;
				1451	} else if (can_dirty(cn)) {
				1452	/*
				1453	* everything with !pjl && jwait
				1454	* should be writable
				1455	*/
				1456	BUG();
				1457	}
				1458	}
				1459
				1460	/*
				1461	* if someone has this block in a newer transaction, just make
				1462	* sure they are committed, and don't try writing it to disk
				1463	*/
				1464	if (pjl) {
				1465	if (atomic_read(&pjl->j_commit_left))
				1466	flush_commit_list(s, pjl, 1);
				1467	goto free_cnode;
				1468	}
				1469
				1470	/*
				1471	* bh == NULL when the block got to disk on its own, OR,
				1472	* the block got freed in a future transaction
				1473	*/
				1474	if (saved_bh == NULL) {
				1475	goto free_cnode;
				1476	}
				1477
				1478	/*
				1479	* this should never happen. kupdate_one_transaction has
				1480	* this list locked while it works, so we should never see a
				1481	* buffer here that is not marked JDirty_wait
				1482	*/
				1483	if ((!was_jwait) && !buffer_locked(saved_bh)) {
				1484	reiserfs_warning(s, "journal-813",
				1485	"BAD! buffer %llu %cdirty %cjwait, "
				1486	"not in a newer transaction",
				1487	(unsigned long long)saved_bh->
				1488	b_blocknr, was_dirty ? ' ' : '!',
				1489	was_jwait ? ' ' : '!');
				1490	}
				1491	if (was_dirty) {
				1492	/*
				1493	* we inc again because saved_bh gets decremented
				1494	* at free_cnode
				1495	*/
				1496	get_bh(saved_bh);
				1497	set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
				1498	lock_buffer(saved_bh);
				1499	BUG_ON(cn->blocknr != saved_bh->b_blocknr);
				1500	if (buffer_dirty(saved_bh))
				1501	submit_logged_buffer(saved_bh);
				1502	else
				1503	unlock_buffer(saved_bh);
				1504	count++;
				1505	} else {
				1506	reiserfs_warning(s, "clm-2082",
				1507	"Unable to flush buffer %llu in %s",
				1508	(unsigned long long)saved_bh->
				1509	b_blocknr, __func__);
				1510	}
				1511	free_cnode:
				1512	last = cn;
				1513	cn = cn->next;
				1514	if (saved_bh) {
				1515	/*
				1516	* we incremented this to keep others from
				1517	* taking the buffer head away
				1518	*/
				1519	put_bh(saved_bh);
				1520	if (atomic_read(&saved_bh->b_count) < 0) {
				1521	reiserfs_warning(s, "journal-945",
				1522	"saved_bh->b_count < 0");
				1523	}
				1524	}
				1525	}
				1526	if (count > 0) {
				1527	cn = jl->j_realblock;
				1528	while (cn) {
				1529	if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
				1530	if (!cn->bh) {
				1531	reiserfs_panic(s, "journal-1011",
				1532	"cn->bh is NULL");
				1533	}
				1534
				1535	depth = reiserfs_write_unlock_nested(s);
				1536	__wait_on_buffer(cn->bh);
				1537	reiserfs_write_lock_nested(s, depth);
				1538
				1539	if (!cn->bh) {
				1540	reiserfs_panic(s, "journal-1012",
				1541	"cn->bh is NULL");
				1542	}
				1543	if (unlikely(!buffer_uptodate(cn->bh))) {
				1544	#ifdef CONFIG_REISERFS_CHECK
				1545	reiserfs_warning(s, "journal-949",
				1546	"buffer write failed");
				1547	#endif
				1548	err = -EIO;
				1549	}
				1550	/*
				1551	* note, we must clear the JDirty_wait bit
				1552	* after the up to date check, otherwise we
				1553	* race against our flushpage routine
				1554	*/
				1555	BUG_ON(!test_clear_buffer_journal_dirty
				1556	(cn->bh));
				1557
				1558	/* drop one ref for us */
				1559	put_bh(cn->bh);
				1560	/* drop one ref for journal_mark_dirty */
				1561	release_buffer_page(cn->bh);
				1562	}
				1563	cn = cn->next;
				1564	}
				1565	}
				1566
				1567	if (err)
				1568	reiserfs_abort(s, -EIO,
				1569	"Write error while pushing transaction to disk in %s",
				1570	__func__);
				1571	flush_older_and_return:
				1572
				1573	/*
				1574	* before we can update the journal header block, we _must_ flush all
				1575	* real blocks from all older transactions to disk. This is because
				1576	* once the header block is updated, this transaction will not be
				1577	* replayed after a crash
				1578	*/
				1579	if (flushall) {
				1580	flush_older_journal_lists(s, jl);
				1581	}
				1582
				1583	err = journal->j_errno;
				1584	/*
				1585	* before we can remove everything from the hash tables for this
				1586	* transaction, we must make sure it can never be replayed
				1587	*
				1588	* since we are only called from do_journal_end, we know for sure there
				1589	* are no allocations going on while we are flushing journal lists. So,
				1590	* we only need to update the journal header block for the last list
				1591	* being flushed
				1592	*/
				1593	if (!err && flushall) {
				1594	err =
				1595	update_journal_header_block(s,
				1596	(jl->j_start + jl->j_len +
				1597	2) % SB_ONDISK_JOURNAL_SIZE(s),
				1598	jl->j_trans_id);
				1599	if (err)
				1600	reiserfs_abort(s, -EIO,
				1601	"Write error while updating journal header in %s",
				1602	__func__);
				1603	}
				1604	remove_all_from_journal_list(s, jl, 0);
				1605	list_del_init(&jl->j_list);
				1606	journal->j_num_lists--;
				1607	del_from_work_list(s, jl);
				1608
				1609	if (journal->j_last_flush_id != 0 &&
				1610	(jl->j_trans_id - journal->j_last_flush_id) != 1) {
				1611	reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
				1612	journal->j_last_flush_id, jl->j_trans_id);
				1613	}
				1614	journal->j_last_flush_id = jl->j_trans_id;
				1615
				1616	/*
				1617	* not strictly required since we are freeing the list, but it should
				1618	* help find code using dead lists later on
				1619	*/
				1620	jl->j_len = 0;
				1621	atomic_set(&jl->j_nonzerolen, 0);
				1622	jl->j_start = 0;
				1623	jl->j_realblock = NULL;
				1624	jl->j_commit_bh = NULL;
				1625	jl->j_trans_id = 0;
				1626	jl->j_state = 0;
				1627	put_journal_list(s, jl);
				1628	if (flushall)
				1629	mutex_unlock(&journal->j_flush_mutex);
				1630	return err;
				1631	}
				1632
				1633	static int write_one_transaction(struct super_block *s,
				1634	struct reiserfs_journal_list *jl,
				1635	struct buffer_chunk *chunk)
				1636	{
				1637	struct reiserfs_journal_cnode *cn;
				1638	int ret = 0;
				1639
				1640	jl->j_state \|= LIST_TOUCHED;
				1641	del_from_work_list(s, jl);
				1642	if (jl->j_len == 0 \|\| atomic_read(&jl->j_nonzerolen) == 0) {
				1643	return 0;
				1644	}
				1645
				1646	cn = jl->j_realblock;
				1647	while (cn) {
				1648	/*
				1649	* if the blocknr == 0, this has been cleared from the hash,
				1650	* skip it
				1651	*/
				1652	if (cn->blocknr == 0) {
				1653	goto next;
				1654	}
				1655	if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
				1656	struct buffer_head *tmp_bh;
				1657	/*
				1658	* we can race against journal_mark_freed when we try
				1659	* to lock_buffer(cn->bh), so we have to inc the buffer
				1660	* count, and recheck things after locking
				1661	*/
				1662	tmp_bh = cn->bh;
				1663	get_bh(tmp_bh);
				1664	lock_buffer(tmp_bh);
				1665	if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
				1666	if (!buffer_journal_dirty(tmp_bh) \|\|
				1667	buffer_journal_prepared(tmp_bh))
				1668	BUG();
				1669	add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
				1670	ret++;
				1671	} else {
				1672	/* note, cn->bh might be null now */
				1673	unlock_buffer(tmp_bh);
				1674	}
				1675	put_bh(tmp_bh);
				1676	}
				1677	next:
				1678	cn = cn->next;
				1679	cond_resched();
				1680	}
				1681	return ret;
				1682	}
				1683
				1684	/* used by flush_commit_list */
				1685	static int dirty_one_transaction(struct super_block *s,
				1686	struct reiserfs_journal_list *jl)
				1687	{
				1688	struct reiserfs_journal_cnode *cn;
				1689	struct reiserfs_journal_list *pjl;
				1690	int ret = 0;
				1691
				1692	jl->j_state \|= LIST_DIRTY;
				1693	cn = jl->j_realblock;
				1694	while (cn) {
				1695	/*
				1696	* look for a more recent transaction that logged this
				1697	* buffer. Only the most recent transaction with a buffer in
				1698	* it is allowed to send that buffer to disk
				1699	*/
				1700	pjl = find_newer_jl_for_cn(cn);
				1701	if (!pjl && cn->blocknr && cn->bh
				1702	&& buffer_journal_dirty(cn->bh)) {
				1703	BUG_ON(!can_dirty(cn));
				1704	/*
				1705	* if the buffer is prepared, it will either be logged
				1706	* or restored. If restored, we need to make sure
				1707	* it actually gets marked dirty
				1708	*/
				1709	clear_buffer_journal_new(cn->bh);
				1710	if (buffer_journal_prepared(cn->bh)) {
				1711	set_buffer_journal_restore_dirty(cn->bh);
				1712	} else {
				1713	set_buffer_journal_test(cn->bh);
				1714	mark_buffer_dirty(cn->bh);
				1715	}
				1716	}
				1717	cn = cn->next;
				1718	}
				1719	return ret;
				1720	}
				1721
				1722	static int kupdate_transactions(struct super_block *s,
				1723	struct reiserfs_journal_list *jl,
				1724	struct reiserfs_journal_list **next_jl,
				1725	unsigned int *next_trans_id,
				1726	int num_blocks, int num_trans)
				1727	{
				1728	int ret = 0;
				1729	int written = 0;
				1730	int transactions_flushed = 0;
				1731	unsigned int orig_trans_id = jl->j_trans_id;
				1732	struct buffer_chunk chunk;
				1733	struct list_head *entry;
				1734	struct reiserfs_journal *journal = SB_JOURNAL(s);
				1735	chunk.nr = 0;
				1736
				1737	reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
				1738	if (!journal_list_still_alive(s, orig_trans_id)) {
				1739	goto done;
				1740	}
				1741
				1742	/*
				1743	* we've got j_flush_mutex held, nobody is going to delete any
				1744	* of these lists out from underneath us
				1745	*/
				1746	while ((num_trans && transactions_flushed < num_trans) \|\|
				1747	(!num_trans && written < num_blocks)) {
				1748
				1749	if (jl->j_len == 0 \|\| (jl->j_state & LIST_TOUCHED) \|\|
				1750	atomic_read(&jl->j_commit_left)
				1751	\|\| !(jl->j_state & LIST_DIRTY)) {
				1752	del_from_work_list(s, jl);
				1753	break;
				1754	}
				1755	ret = write_one_transaction(s, jl, &chunk);
				1756
				1757	if (ret < 0)
				1758	goto done;
				1759	transactions_flushed++;
				1760	written += ret;
				1761	entry = jl->j_list.next;
				1762
				1763	/* did we wrap? */
				1764	if (entry == &journal->j_journal_list) {
				1765	break;
				1766	}
				1767	jl = JOURNAL_LIST_ENTRY(entry);
				1768
				1769	/* don't bother with older transactions */
				1770	if (jl->j_trans_id <= orig_trans_id)
				1771	break;
				1772	}
				1773	if (chunk.nr) {
				1774	write_chunk(&chunk);
				1775	}
				1776
				1777	done:
				1778	mutex_unlock(&journal->j_flush_mutex);
				1779	return ret;
				1780	}
				1781
				1782	/*
				1783	* for o_sync and fsync heavy applications, they tend to use
				1784	* all the journa list slots with tiny transactions. These
				1785	* trigger lots and lots of calls to update the header block, which
				1786	* adds seeks and slows things down.
				1787	*
				1788	* This function tries to clear out a large chunk of the journal lists
				1789	* at once, which makes everything faster since only the newest journal
				1790	* list updates the header block
				1791	*/
				1792	static int flush_used_journal_lists(struct super_block *s,
				1793	struct reiserfs_journal_list *jl)
				1794	{
				1795	unsigned long len = 0;
				1796	unsigned long cur_len;
				1797	int ret;
				1798	int i;
				1799	int limit = 256;
				1800	struct reiserfs_journal_list *tjl;
				1801	struct reiserfs_journal_list *flush_jl;
				1802	unsigned int trans_id;
				1803	struct reiserfs_journal *journal = SB_JOURNAL(s);
				1804
				1805	flush_jl = tjl = jl;
				1806
				1807	/* in data logging mode, try harder to flush a lot of blocks */
				1808	if (reiserfs_data_log(s))
				1809	limit = 1024;
				1810	/* flush for 256 transactions or limit blocks, whichever comes first */
				1811	for (i = 0; i < 256 && len < limit; i++) {
				1812	if (atomic_read(&tjl->j_commit_left) \|\|
				1813	tjl->j_trans_id < jl->j_trans_id) {
				1814	break;
				1815	}
				1816	cur_len = atomic_read(&tjl->j_nonzerolen);
				1817	if (cur_len > 0) {
				1818	tjl->j_state &= ~LIST_TOUCHED;
				1819	}
				1820	len += cur_len;
				1821	flush_jl = tjl;
				1822	if (tjl->j_list.next == &journal->j_journal_list)
				1823	break;
				1824	tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
				1825	}
				1826	get_journal_list(jl);
				1827	get_journal_list(flush_jl);
				1828
				1829	/*
				1830	* try to find a group of blocks we can flush across all the
				1831	* transactions, but only bother if we've actually spanned
				1832	* across multiple lists
				1833	*/
				1834	if (flush_jl != jl) {
				1835	ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
				1836	}
				1837	flush_journal_list(s, flush_jl, 1);
				1838	put_journal_list(s, flush_jl);
				1839	put_journal_list(s, jl);
				1840	return 0;
				1841	}
				1842
				1843	/*
				1844	* removes any nodes in table with name block and dev as bh.
				1845	* only touchs the hnext and hprev pointers.
				1846	*/
				1847	void remove_journal_hash(struct super_block *sb,
				1848	struct reiserfs_journal_cnode **table,
				1849	struct reiserfs_journal_list *jl,
				1850	unsigned long block, int remove_freed)
				1851	{
				1852	struct reiserfs_journal_cnode *cur;
				1853	struct reiserfs_journal_cnode **head;
				1854
				1855	head = &(journal_hash(table, sb, block));
				1856	if (!head) {
				1857	return;
				1858	}
				1859	cur = *head;
				1860	while (cur) {
				1861	if (cur->blocknr == block && cur->sb == sb
				1862	&& (jl == NULL \|\| jl == cur->jlist)
				1863	&& (!test_bit(BLOCK_FREED, &cur->state) \|\| remove_freed)) {
				1864	if (cur->hnext) {
				1865	cur->hnext->hprev = cur->hprev;
				1866	}
				1867	if (cur->hprev) {
				1868	cur->hprev->hnext = cur->hnext;
				1869	} else {
				1870	*head = cur->hnext;
				1871	}
				1872	cur->blocknr = 0;
				1873	cur->sb = NULL;
				1874	cur->state = 0;
				1875	/*
				1876	* anybody who clears the cur->bh will also
				1877	* dec the nonzerolen
				1878	*/
				1879	if (cur->bh && cur->jlist)
				1880	atomic_dec(&cur->jlist->j_nonzerolen);
				1881	cur->bh = NULL;
				1882	cur->jlist = NULL;
				1883	}
				1884	cur = cur->hnext;
				1885	}
				1886	}
				1887
				1888	static void free_journal_ram(struct super_block *sb)
				1889	{
				1890	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				1891	kfree(journal->j_current_jl);
				1892	journal->j_num_lists--;
				1893
				1894	vfree(journal->j_cnode_free_orig);
				1895	free_list_bitmaps(sb, journal->j_list_bitmap);
				1896	free_bitmap_nodes(sb); /* must be after free_list_bitmaps */
				1897	if (journal->j_header_bh) {
				1898	brelse(journal->j_header_bh);
				1899	}
				1900	/*
				1901	* j_header_bh is on the journal dev, make sure
				1902	* not to release the journal dev until we brelse j_header_bh
				1903	*/
				1904	release_journal_dev(sb, journal);
				1905	vfree(journal);
				1906	}
				1907
				1908	/*
				1909	* call on unmount. Only set error to 1 if you haven't made your way out
				1910	* of read_super() yet. Any other caller must keep error at 0.
				1911	*/
				1912	static int do_journal_release(struct reiserfs_transaction_handle *th,
				1913	struct super_block *sb, int error)
				1914	{
				1915	struct reiserfs_transaction_handle myth;
				1916	int flushed = 0;
				1917	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				1918
				1919	/*
				1920	* we only want to flush out transactions if we were
				1921	* called with error == 0
				1922	*/
				1923	if (!error && !sb_rdonly(sb)) {
				1924	/* end the current trans */
				1925	BUG_ON(!th->t_trans_id);
				1926	do_journal_end(th, FLUSH_ALL);
				1927
				1928	/*
				1929	* make sure something gets logged to force
				1930	* our way into the flush code
				1931	*/
				1932	if (!journal_join(&myth, sb)) {
				1933	reiserfs_prepare_for_journal(sb,
				1934	SB_BUFFER_WITH_SB(sb),
				1935	1);
				1936	journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb));
				1937	do_journal_end(&myth, FLUSH_ALL);
				1938	flushed = 1;
				1939	}
				1940	}
				1941
				1942	/* this also catches errors during the do_journal_end above */
				1943	if (!error && reiserfs_is_journal_aborted(journal)) {
				1944	memset(&myth, 0, sizeof(myth));
				1945	if (!journal_join_abort(&myth, sb)) {
				1946	reiserfs_prepare_for_journal(sb,
				1947	SB_BUFFER_WITH_SB(sb),
				1948	1);
				1949	journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb));
				1950	do_journal_end(&myth, FLUSH_ALL);
				1951	}
				1952	}
				1953
				1954
				1955	/*
				1956	* We must release the write lock here because
				1957	* the workqueue job (flush_async_commit) needs this lock
				1958	*/
				1959	reiserfs_write_unlock(sb);
				1960
				1961	/*
				1962	* Cancel flushing of old commits. Note that neither of these works
				1963	* will be requeued because superblock is being shutdown and doesn't
				1964	* have SB_ACTIVE set.
				1965	*/
				1966	reiserfs_cancel_old_flush(sb);
				1967	/* wait for all commits to finish */
				1968	cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work);
				1969
				1970	free_journal_ram(sb);
				1971
				1972	reiserfs_write_lock(sb);
				1973
				1974	return 0;
				1975	}
				1976
				1977	/* * call on unmount. flush all journal trans, release all alloc'd ram */
				1978	int journal_release(struct reiserfs_transaction_handle *th,
				1979	struct super_block *sb)
				1980	{
				1981	return do_journal_release(th, sb, 0);
				1982	}
				1983
				1984	/* only call from an error condition inside reiserfs_read_super! */
				1985	int journal_release_error(struct reiserfs_transaction_handle *th,
				1986	struct super_block *sb)
				1987	{
				1988	return do_journal_release(th, sb, 1);
				1989	}
				1990
				1991	/*
				1992	* compares description block with commit block.
				1993	* returns 1 if they differ, 0 if they are the same
				1994	*/
				1995	static int journal_compare_desc_commit(struct super_block *sb,
				1996	struct reiserfs_journal_desc *desc,
				1997	struct reiserfs_journal_commit *commit)
				1998	{
				1999	if (get_commit_trans_id(commit) != get_desc_trans_id(desc) \|\|
				2000	get_commit_trans_len(commit) != get_desc_trans_len(desc) \|\|
				2001	get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max \|\|
				2002	get_commit_trans_len(commit) <= 0) {
				2003	return 1;
				2004	}
				2005	return 0;
				2006	}
				2007
				2008	/*
				2009	* returns 0 if it did not find a description block
				2010	* returns -1 if it found a corrupt commit block
				2011	* returns 1 if both desc and commit were valid
				2012	* NOTE: only called during fs mount
				2013	*/
				2014	static int journal_transaction_is_valid(struct super_block *sb,
				2015	struct buffer_head *d_bh,
				2016	unsigned int *oldest_invalid_trans_id,
				2017	unsigned long *newest_mount_id)
				2018	{
				2019	struct reiserfs_journal_desc *desc;
				2020	struct reiserfs_journal_commit *commit;
				2021	struct buffer_head *c_bh;
				2022	unsigned long offset;
				2023
				2024	if (!d_bh)
				2025	return 0;
				2026
				2027	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
				2028	if (get_desc_trans_len(desc) > 0
				2029	&& !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
				2030	if (oldest_invalid_trans_id && *oldest_invalid_trans_id
				2031	&& get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
				2032	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2033	"journal-986: transaction "
				2034	"is valid returning because trans_id %d is greater than "
				2035	"oldest_invalid %lu",
				2036	get_desc_trans_id(desc),
				2037	*oldest_invalid_trans_id);
				2038	return 0;
				2039	}
				2040	if (newest_mount_id
				2041	&& *newest_mount_id > get_desc_mount_id(desc)) {
				2042	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2043	"journal-1087: transaction "
				2044	"is valid returning because mount_id %d is less than "
				2045	"newest_mount_id %lu",
				2046	get_desc_mount_id(desc),
				2047	*newest_mount_id);
				2048	return -1;
				2049	}
				2050	if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
				2051	reiserfs_warning(sb, "journal-2018",
				2052	"Bad transaction length %d "
				2053	"encountered, ignoring transaction",
				2054	get_desc_trans_len(desc));
				2055	return -1;
				2056	}
				2057	offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
				2058
				2059	/*
				2060	* ok, we have a journal description block,
				2061	* let's see if the transaction was valid
				2062	*/
				2063	c_bh =
				2064	journal_bread(sb,
				2065	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2066	((offset + get_desc_trans_len(desc) +
				2067	1) % SB_ONDISK_JOURNAL_SIZE(sb)));
				2068	if (!c_bh)
				2069	return 0;
				2070	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
				2071	if (journal_compare_desc_commit(sb, desc, commit)) {
				2072	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2073	"journal_transaction_is_valid, commit offset %ld had bad "
				2074	"time %d or length %d",
				2075	c_bh->b_blocknr -
				2076	SB_ONDISK_JOURNAL_1st_BLOCK(sb),
				2077	get_commit_trans_id(commit),
				2078	get_commit_trans_len(commit));
				2079	brelse(c_bh);
				2080	if (oldest_invalid_trans_id) {
				2081	*oldest_invalid_trans_id =
				2082	get_desc_trans_id(desc);
				2083	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2084	"journal-1004: "
				2085	"transaction_is_valid setting oldest invalid trans_id "
				2086	"to %d",
				2087	get_desc_trans_id(desc));
				2088	}
				2089	return -1;
				2090	}
				2091	brelse(c_bh);
				2092	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2093	"journal-1006: found valid "
				2094	"transaction start offset %llu, len %d id %d",
				2095	d_bh->b_blocknr -
				2096	SB_ONDISK_JOURNAL_1st_BLOCK(sb),
				2097	get_desc_trans_len(desc),
				2098	get_desc_trans_id(desc));
				2099	return 1;
				2100	} else {
				2101	return 0;
				2102	}
				2103	}
				2104
				2105	static void brelse_array(struct buffer_head **heads, int num)
				2106	{
				2107	int i;
				2108	for (i = 0; i < num; i++) {
				2109	brelse(heads[i]);
				2110	}
				2111	}
				2112
				2113	/*
				2114	* given the start, and values for the oldest acceptable transactions,
				2115	* this either reads in a replays a transaction, or returns because the
				2116	* transaction is invalid, or too old.
				2117	* NOTE: only called during fs mount
				2118	*/
				2119	static int journal_read_transaction(struct super_block *sb,
				2120	unsigned long cur_dblock,
				2121	unsigned long oldest_start,
				2122	unsigned int oldest_trans_id,
				2123	unsigned long newest_mount_id)
				2124	{
				2125	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				2126	struct reiserfs_journal_desc *desc;
				2127	struct reiserfs_journal_commit *commit;
				2128	unsigned int trans_id = 0;
				2129	struct buffer_head *c_bh;
				2130	struct buffer_head *d_bh;
				2131	struct buffer_head **log_blocks = NULL;
				2132	struct buffer_head **real_blocks = NULL;
				2133	unsigned int trans_offset;
				2134	int i;
				2135	int trans_half;
				2136
				2137	d_bh = journal_bread(sb, cur_dblock);
				2138	if (!d_bh)
				2139	return 1;
				2140	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
				2141	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
				2142	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
				2143	"journal_read_transaction, offset %llu, len %d mount_id %d",
				2144	d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
				2145	get_desc_trans_len(desc), get_desc_mount_id(desc));
				2146	if (get_desc_trans_id(desc) < oldest_trans_id) {
				2147	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
				2148	"journal_read_trans skipping because %lu is too old",
				2149	cur_dblock -
				2150	SB_ONDISK_JOURNAL_1st_BLOCK(sb));
				2151	brelse(d_bh);
				2152	return 1;
				2153	}
				2154	if (get_desc_mount_id(desc) != newest_mount_id) {
				2155	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
				2156	"journal_read_trans skipping because %d is != "
				2157	"newest_mount_id %lu", get_desc_mount_id(desc),
				2158	newest_mount_id);
				2159	brelse(d_bh);
				2160	return 1;
				2161	}
				2162	c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2163	((trans_offset + get_desc_trans_len(desc) + 1) %
				2164	SB_ONDISK_JOURNAL_SIZE(sb)));
				2165	if (!c_bh) {
				2166	brelse(d_bh);
				2167	return 1;
				2168	}
				2169	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
				2170	if (journal_compare_desc_commit(sb, desc, commit)) {
				2171	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2172	"journal_read_transaction, "
				2173	"commit offset %llu had bad time %d or length %d",
				2174	c_bh->b_blocknr -
				2175	SB_ONDISK_JOURNAL_1st_BLOCK(sb),
				2176	get_commit_trans_id(commit),
				2177	get_commit_trans_len(commit));
				2178	brelse(c_bh);
				2179	brelse(d_bh);
				2180	return 1;
				2181	}
				2182
				2183	if (bdev_read_only(sb->s_bdev)) {
				2184	reiserfs_warning(sb, "clm-2076",
				2185	"device is readonly, unable to replay log");
				2186	brelse(c_bh);
				2187	brelse(d_bh);
				2188	return -EROFS;
				2189	}
				2190
				2191	trans_id = get_desc_trans_id(desc);
				2192	/*
				2193	* now we know we've got a good transaction, and it was
				2194	* inside the valid time ranges
				2195	*/
				2196	log_blocks = kmalloc_array(get_desc_trans_len(desc),
				2197	sizeof(struct buffer_head *),
				2198	GFP_NOFS);
				2199	real_blocks = kmalloc_array(get_desc_trans_len(desc),
				2200	sizeof(struct buffer_head *),
				2201	GFP_NOFS);
				2202	if (!log_blocks \|\| !real_blocks) {
				2203	brelse(c_bh);
				2204	brelse(d_bh);
				2205	kfree(log_blocks);
				2206	kfree(real_blocks);
				2207	reiserfs_warning(sb, "journal-1169",
				2208	"kmalloc failed, unable to mount FS");
				2209	return -1;
				2210	}
				2211	/* get all the buffer heads */
				2212	trans_half = journal_trans_half(sb->s_blocksize);
				2213	for (i = 0; i < get_desc_trans_len(desc); i++) {
				2214	log_blocks[i] =
				2215	journal_getblk(sb,
				2216	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2217	(trans_offset + 1 +
				2218	i) % SB_ONDISK_JOURNAL_SIZE(sb));
				2219	if (i < trans_half) {
				2220	real_blocks[i] =
				2221	sb_getblk(sb,
				2222	le32_to_cpu(desc->j_realblock[i]));
				2223	} else {
				2224	real_blocks[i] =
				2225	sb_getblk(sb,
				2226	le32_to_cpu(commit->
				2227	j_realblock[i - trans_half]));
				2228	}
				2229	if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
				2230	reiserfs_warning(sb, "journal-1207",
				2231	"REPLAY FAILURE fsck required! "
				2232	"Block to replay is outside of "
				2233	"filesystem");
				2234	goto abort_replay;
				2235	}
				2236	/* make sure we don't try to replay onto log or reserved area */
				2237	if (is_block_in_log_or_reserved_area
				2238	(sb, real_blocks[i]->b_blocknr)) {
				2239	reiserfs_warning(sb, "journal-1204",
				2240	"REPLAY FAILURE fsck required! "
				2241	"Trying to replay onto a log block");
				2242	abort_replay:
				2243	brelse_array(log_blocks, i);
				2244	brelse_array(real_blocks, i);
				2245	brelse(c_bh);
				2246	brelse(d_bh);
				2247	kfree(log_blocks);
				2248	kfree(real_blocks);
				2249	return -1;
				2250	}
				2251	}
				2252	/* read in the log blocks, memcpy to the corresponding real block */
				2253	ll_rw_block(REQ_OP_READ, 0, get_desc_trans_len(desc), log_blocks);
				2254	for (i = 0; i < get_desc_trans_len(desc); i++) {
				2255
				2256	wait_on_buffer(log_blocks[i]);
				2257	if (!buffer_uptodate(log_blocks[i])) {
				2258	reiserfs_warning(sb, "journal-1212",
				2259	"REPLAY FAILURE fsck required! "
				2260	"buffer write failed");
				2261	brelse_array(log_blocks + i,
				2262	get_desc_trans_len(desc) - i);
				2263	brelse_array(real_blocks, get_desc_trans_len(desc));
				2264	brelse(c_bh);
				2265	brelse(d_bh);
				2266	kfree(log_blocks);
				2267	kfree(real_blocks);
				2268	return -1;
				2269	}
				2270	memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
				2271	real_blocks[i]->b_size);
				2272	set_buffer_uptodate(real_blocks[i]);
				2273	brelse(log_blocks[i]);
				2274	}
				2275	/* flush out the real blocks */
				2276	for (i = 0; i < get_desc_trans_len(desc); i++) {
				2277	set_buffer_dirty(real_blocks[i]);
				2278	write_dirty_buffer(real_blocks[i], 0);
				2279	}
				2280	for (i = 0; i < get_desc_trans_len(desc); i++) {
				2281	wait_on_buffer(real_blocks[i]);
				2282	if (!buffer_uptodate(real_blocks[i])) {
				2283	reiserfs_warning(sb, "journal-1226",
				2284	"REPLAY FAILURE, fsck required! "
				2285	"buffer write failed");
				2286	brelse_array(real_blocks + i,
				2287	get_desc_trans_len(desc) - i);
				2288	brelse(c_bh);
				2289	brelse(d_bh);
				2290	kfree(log_blocks);
				2291	kfree(real_blocks);
				2292	return -1;
				2293	}
				2294	brelse(real_blocks[i]);
				2295	}
				2296	cur_dblock =
				2297	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2298	((trans_offset + get_desc_trans_len(desc) +
				2299	2) % SB_ONDISK_JOURNAL_SIZE(sb));
				2300	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2301	"journal-1095: setting journal " "start to offset %ld",
				2302	cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
				2303
				2304	/*
				2305	* init starting values for the first transaction, in case
				2306	* this is the last transaction to be replayed.
				2307	*/
				2308	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
				2309	journal->j_last_flush_trans_id = trans_id;
				2310	journal->j_trans_id = trans_id + 1;
				2311	/* check for trans_id overflow */
				2312	if (journal->j_trans_id == 0)
				2313	journal->j_trans_id = 10;
				2314	brelse(c_bh);
				2315	brelse(d_bh);
				2316	kfree(log_blocks);
				2317	kfree(real_blocks);
				2318	return 0;
				2319	}
				2320
				2321	/*
				2322	* This function reads blocks starting from block and to max_block of bufsize
				2323	* size (but no more than BUFNR blocks at a time). This proved to improve
				2324	* mounting speed on self-rebuilding raid5 arrays at least.
				2325	* Right now it is only used from journal code. But later we might use it
				2326	* from other places.
				2327	* Note: Do not use journal_getblk/sb_getblk functions here!
				2328	*/
				2329	static struct buffer_head reiserfs_breada(struct block_device dev,
				2330	b_blocknr_t block, int bufsize,
				2331	b_blocknr_t max_block)
				2332	{
				2333	struct buffer_head *bhlist[BUFNR];
				2334	unsigned int blocks = BUFNR;
				2335	struct buffer_head *bh;
				2336	int i, j;
				2337
				2338	bh = __getblk(dev, block, bufsize);
				2339	if (buffer_uptodate(bh))
				2340	return (bh);
				2341
				2342	if (block + BUFNR > max_block) {
				2343	blocks = max_block - block;
				2344	}
				2345	bhlist[0] = bh;
				2346	j = 1;
				2347	for (i = 1; i < blocks; i++) {
				2348	bh = __getblk(dev, block + i, bufsize);
				2349	if (buffer_uptodate(bh)) {
				2350	brelse(bh);
				2351	break;
				2352	} else
				2353	bhlist[j++] = bh;
				2354	}
				2355	ll_rw_block(REQ_OP_READ, 0, j, bhlist);
				2356	for (i = 1; i < j; i++)
				2357	brelse(bhlist[i]);
				2358	bh = bhlist[0];
				2359	wait_on_buffer(bh);
				2360	if (buffer_uptodate(bh))
				2361	return bh;
				2362	brelse(bh);
				2363	return NULL;
				2364	}
				2365
				2366	/*
				2367	* read and replay the log
				2368	* on a clean unmount, the journal header's next unflushed pointer will be
				2369	* to an invalid transaction. This tests that before finding all the
				2370	* transactions in the log, which makes normal mount times fast.
				2371	*
				2372	* After a crash, this starts with the next unflushed transaction, and
				2373	* replays until it finds one too old, or invalid.
				2374	*
				2375	* On exit, it sets things up so the first transaction will work correctly.
				2376	* NOTE: only called during fs mount
				2377	*/
				2378	static int journal_read(struct super_block *sb)
				2379	{
				2380	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				2381	struct reiserfs_journal_desc *desc;
				2382	unsigned int oldest_trans_id = 0;
				2383	unsigned int oldest_invalid_trans_id = 0;
				2384	time64_t start;
				2385	unsigned long oldest_start = 0;
				2386	unsigned long cur_dblock = 0;
				2387	unsigned long newest_mount_id = 9;
				2388	struct buffer_head *d_bh;
				2389	struct reiserfs_journal_header *jh;
				2390	int valid_journal_header = 0;
				2391	int replay_count = 0;
				2392	int continue_replay = 1;
				2393	int ret;
				2394
				2395	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
				2396	reiserfs_info(sb, "checking transaction log (%pg)\n",
				2397	journal->j_dev_bd);
				2398	start = ktime_get_seconds();
				2399
				2400	/*
				2401	* step 1, read in the journal header block. Check the transaction
				2402	* it says is the first unflushed, and if that transaction is not
				2403	* valid, replay is done
				2404	*/
				2405	journal->j_header_bh = journal_bread(sb,
				2406	SB_ONDISK_JOURNAL_1st_BLOCK(sb)
				2407	+ SB_ONDISK_JOURNAL_SIZE(sb));
				2408	if (!journal->j_header_bh) {
				2409	return 1;
				2410	}
				2411	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
				2412	if (le32_to_cpu(jh->j_first_unflushed_offset) <
				2413	SB_ONDISK_JOURNAL_SIZE(sb)
				2414	&& le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
				2415	oldest_start =
				2416	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2417	le32_to_cpu(jh->j_first_unflushed_offset);
				2418	oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
				2419	newest_mount_id = le32_to_cpu(jh->j_mount_id);
				2420	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2421	"journal-1153: found in "
				2422	"header: first_unflushed_offset %d, last_flushed_trans_id "
				2423	"%lu", le32_to_cpu(jh->j_first_unflushed_offset),
				2424	le32_to_cpu(jh->j_last_flush_trans_id));
				2425	valid_journal_header = 1;
				2426
				2427	/*
				2428	* now, we try to read the first unflushed offset. If it
				2429	* is not valid, there is nothing more we can do, and it
				2430	* makes no sense to read through the whole log.
				2431	*/
				2432	d_bh =
				2433	journal_bread(sb,
				2434	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2435	le32_to_cpu(jh->j_first_unflushed_offset));
				2436	ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
				2437	if (!ret) {
				2438	continue_replay = 0;
				2439	}
				2440	brelse(d_bh);
				2441	goto start_log_replay;
				2442	}
				2443
				2444	/*
				2445	* ok, there are transactions that need to be replayed. start
				2446	* with the first log block, find all the valid transactions, and
				2447	* pick out the oldest.
				2448	*/
				2449	while (continue_replay
				2450	&& cur_dblock <
				2451	(SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2452	SB_ONDISK_JOURNAL_SIZE(sb))) {
				2453	/*
				2454	* Note that it is required for blocksize of primary fs
				2455	* device and journal device to be the same
				2456	*/
				2457	d_bh =
				2458	reiserfs_breada(journal->j_dev_bd, cur_dblock,
				2459	sb->s_blocksize,
				2460	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2461	SB_ONDISK_JOURNAL_SIZE(sb));
				2462	ret =
				2463	journal_transaction_is_valid(sb, d_bh,
				2464	&oldest_invalid_trans_id,
				2465	&newest_mount_id);
				2466	if (ret == 1) {
				2467	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
				2468	if (oldest_start == 0) { /* init all oldest_ values */
				2469	oldest_trans_id = get_desc_trans_id(desc);
				2470	oldest_start = d_bh->b_blocknr;
				2471	newest_mount_id = get_desc_mount_id(desc);
				2472	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2473	"journal-1179: Setting "
				2474	"oldest_start to offset %llu, trans_id %lu",
				2475	oldest_start -
				2476	SB_ONDISK_JOURNAL_1st_BLOCK
				2477	(sb), oldest_trans_id);
				2478	} else if (oldest_trans_id > get_desc_trans_id(desc)) {
				2479	/* one we just read was older */
				2480	oldest_trans_id = get_desc_trans_id(desc);
				2481	oldest_start = d_bh->b_blocknr;
				2482	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2483	"journal-1180: Resetting "
				2484	"oldest_start to offset %lu, trans_id %lu",
				2485	oldest_start -
				2486	SB_ONDISK_JOURNAL_1st_BLOCK
				2487	(sb), oldest_trans_id);
				2488	}
				2489	if (newest_mount_id < get_desc_mount_id(desc)) {
				2490	newest_mount_id = get_desc_mount_id(desc);
				2491	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2492	"journal-1299: Setting "
				2493	"newest_mount_id to %d",
				2494	get_desc_mount_id(desc));
				2495	}
				2496	cur_dblock += get_desc_trans_len(desc) + 2;
				2497	} else {
				2498	cur_dblock++;
				2499	}
				2500	brelse(d_bh);
				2501	}
				2502
				2503	start_log_replay:
				2504	cur_dblock = oldest_start;
				2505	if (oldest_trans_id) {
				2506	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2507	"journal-1206: Starting replay "
				2508	"from offset %llu, trans_id %lu",
				2509	cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
				2510	oldest_trans_id);
				2511
				2512	}
				2513	replay_count = 0;
				2514	while (continue_replay && oldest_trans_id > 0) {
				2515	ret =
				2516	journal_read_transaction(sb, cur_dblock, oldest_start,
				2517	oldest_trans_id, newest_mount_id);
				2518	if (ret < 0) {
				2519	return ret;
				2520	} else if (ret != 0) {
				2521	break;
				2522	}
				2523	cur_dblock =
				2524	SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
				2525	replay_count++;
				2526	if (cur_dblock == oldest_start)
				2527	break;
				2528	}
				2529
				2530	if (oldest_trans_id == 0) {
				2531	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
				2532	"journal-1225: No valid " "transactions found");
				2533	}
				2534	/*
				2535	* j_start does not get set correctly if we don't replay any
				2536	* transactions. if we had a valid journal_header, set j_start
				2537	* to the first unflushed transaction value, copy the trans_id
				2538	* from the header
				2539	*/
				2540	if (valid_journal_header && replay_count == 0) {
				2541	journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
				2542	journal->j_trans_id =
				2543	le32_to_cpu(jh->j_last_flush_trans_id) + 1;
				2544	/* check for trans_id overflow */
				2545	if (journal->j_trans_id == 0)
				2546	journal->j_trans_id = 10;
				2547	journal->j_last_flush_trans_id =
				2548	le32_to_cpu(jh->j_last_flush_trans_id);
				2549	journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
				2550	} else {
				2551	journal->j_mount_id = newest_mount_id + 1;
				2552	}
				2553	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
				2554	"newest_mount_id to %lu", journal->j_mount_id);
				2555	journal->j_first_unflushed_offset = journal->j_start;
				2556	if (replay_count > 0) {
				2557	reiserfs_info(sb,
				2558	"replayed %d transactions in %lu seconds\n",
				2559	replay_count, ktime_get_seconds() - start);
				2560	}
				2561	/* needed to satisfy the locking in _update_journal_header_block */
				2562	reiserfs_write_lock(sb);
				2563	if (!bdev_read_only(sb->s_bdev) &&
				2564	_update_journal_header_block(sb, journal->j_start,
				2565	journal->j_last_flush_trans_id)) {
				2566	reiserfs_write_unlock(sb);
				2567	/*
				2568	* replay failed, caller must call free_journal_ram and abort
				2569	* the mount
				2570	*/
				2571	return -1;
				2572	}
				2573	reiserfs_write_unlock(sb);
				2574	return 0;
				2575	}
				2576
				2577	static struct reiserfs_journal_list alloc_journal_list(struct super_block s)
				2578	{
				2579	struct reiserfs_journal_list *jl;
				2580	jl = kzalloc(sizeof(struct reiserfs_journal_list),
				2581	GFP_NOFS \| __GFP_NOFAIL);
				2582	INIT_LIST_HEAD(&jl->j_list);
				2583	INIT_LIST_HEAD(&jl->j_working_list);
				2584	INIT_LIST_HEAD(&jl->j_tail_bh_list);
				2585	INIT_LIST_HEAD(&jl->j_bh_list);
				2586	mutex_init(&jl->j_commit_mutex);
				2587	SB_JOURNAL(s)->j_num_lists++;
				2588	get_journal_list(jl);
				2589	return jl;
				2590	}
				2591
				2592	static void journal_list_init(struct super_block *sb)
				2593	{
				2594	SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
				2595	}
				2596
				2597	static void release_journal_dev(struct super_block *super,
				2598	struct reiserfs_journal *journal)
				2599	{
				2600	if (journal->j_dev_bd != NULL) {
				2601	blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
				2602	journal->j_dev_bd = NULL;
				2603	}
				2604	}
				2605
				2606	static int journal_init_dev(struct super_block *super,
				2607	struct reiserfs_journal *journal,
				2608	const char *jdev_name)
				2609	{
				2610	int result;
				2611	dev_t jdev;
				2612	fmode_t blkdev_mode = FMODE_READ \| FMODE_WRITE \| FMODE_EXCL;
				2613	char b[BDEVNAME_SIZE];
				2614
				2615	result = 0;
				2616
				2617	journal->j_dev_bd = NULL;
				2618	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
				2619	new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
				2620
				2621	if (bdev_read_only(super->s_bdev))
				2622	blkdev_mode = FMODE_READ;
				2623
				2624	/* there is no "jdev" option and journal is on separate device */
				2625	if ((!jdev_name \|\| !jdev_name[0])) {
				2626	if (jdev == super->s_dev)
				2627	blkdev_mode &= ~FMODE_EXCL;
				2628	journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
				2629	journal);
				2630	journal->j_dev_mode = blkdev_mode;
				2631	if (IS_ERR(journal->j_dev_bd)) {
				2632	result = PTR_ERR(journal->j_dev_bd);
				2633	journal->j_dev_bd = NULL;
				2634	reiserfs_warning(super, "sh-458",
				2635	"cannot init journal device '%s': %i",
				2636	__bdevname(jdev, b), result);
				2637	return result;
				2638	} else if (jdev != super->s_dev)
				2639	set_blocksize(journal->j_dev_bd, super->s_blocksize);
				2640
				2641	return 0;
				2642	}
				2643
				2644	journal->j_dev_mode = blkdev_mode;
				2645	journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
				2646	if (IS_ERR(journal->j_dev_bd)) {
				2647	result = PTR_ERR(journal->j_dev_bd);
				2648	journal->j_dev_bd = NULL;
				2649	reiserfs_warning(super, "sh-457",
				2650	"journal_init_dev: Cannot open '%s': %i",
				2651	jdev_name, result);
				2652	return result;
				2653	}
				2654
				2655	set_blocksize(journal->j_dev_bd, super->s_blocksize);
				2656	reiserfs_info(super,
				2657	"journal_init_dev: journal device: %pg\n",
				2658	journal->j_dev_bd);
				2659	return 0;
				2660	}
				2661
				2662	/*
				2663	* When creating/tuning a file system user can assign some
				2664	* journal params within boundaries which depend on the ratio
				2665	* blocksize/standard_blocksize.
				2666	*
				2667	* For blocks >= standard_blocksize transaction size should
				2668	* be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
				2669	* then JOURNAL_TRANS_MAX_DEFAULT.
				2670	*
				2671	* For blocks < standard_blocksize these boundaries should be
				2672	* decreased proportionally.
				2673	*/
				2674	#define REISERFS_STANDARD_BLKSIZE (4096)
				2675
				2676	static int check_advise_trans_params(struct super_block *sb,
				2677	struct reiserfs_journal *journal)
				2678	{
				2679	if (journal->j_trans_max) {
				2680	/* Non-default journal params. Do sanity check for them. */
				2681	int ratio = 1;
				2682	if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
				2683	ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
				2684
				2685	if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio \|\|
				2686	journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio \|\|
				2687	SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
				2688	JOURNAL_MIN_RATIO) {
				2689	reiserfs_warning(sb, "sh-462",
				2690	"bad transaction max size (%u). "
				2691	"FSCK?", journal->j_trans_max);
				2692	return 1;
				2693	}
				2694	if (journal->j_max_batch != (journal->j_trans_max) *
				2695	JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
				2696	reiserfs_warning(sb, "sh-463",
				2697	"bad transaction max batch (%u). "
				2698	"FSCK?", journal->j_max_batch);
				2699	return 1;
				2700	}
				2701	} else {
				2702	/*
				2703	* Default journal params.
				2704	* The file system was created by old version
				2705	* of mkreiserfs, so some fields contain zeros,
				2706	* and we need to advise proper values for them
				2707	*/
				2708	if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
				2709	reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
				2710	sb->s_blocksize);
				2711	return 1;
				2712	}
				2713	journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
				2714	journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
				2715	journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
				2716	}
				2717	return 0;
				2718	}
				2719
				2720	/* must be called once on fs mount. calls journal_read for you */
				2721	int journal_init(struct super_block sb, const char j_dev_name,
				2722	int old_format, unsigned int commit_max_age)
				2723	{
				2724	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
				2725	struct buffer_head *bhjh;
				2726	struct reiserfs_super_block *rs;
				2727	struct reiserfs_journal_header *jh;
				2728	struct reiserfs_journal *journal;
				2729	struct reiserfs_journal_list *jl;
				2730	int ret;
				2731
				2732	journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
				2733	if (!journal) {
				2734	reiserfs_warning(sb, "journal-1256",
				2735	"unable to get memory for journal structure");
				2736	return 1;
				2737	}
				2738	INIT_LIST_HEAD(&journal->j_bitmap_nodes);
				2739	INIT_LIST_HEAD(&journal->j_prealloc_list);
				2740	INIT_LIST_HEAD(&journal->j_working_list);
				2741	INIT_LIST_HEAD(&journal->j_journal_list);
				2742	journal->j_persistent_trans = 0;
				2743	if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
				2744	reiserfs_bmap_count(sb)))
				2745	goto free_and_return;
				2746
				2747	allocate_bitmap_nodes(sb);
				2748
				2749	/* reserved for journal area support */
				2750	SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
				2751	REISERFS_OLD_DISK_OFFSET_IN_BYTES
				2752	/ sb->s_blocksize +
				2753	reiserfs_bmap_count(sb) +
				2754	1 :
				2755	REISERFS_DISK_OFFSET_IN_BYTES /
				2756	sb->s_blocksize + 2);
				2757
				2758	/*
				2759	* Sanity check to see is the standard journal fitting
				2760	* within first bitmap (actual for small blocksizes)
				2761	*/
				2762	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
				2763	(SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
				2764	SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
				2765	reiserfs_warning(sb, "journal-1393",
				2766	"journal does not fit for area addressed "
				2767	"by first of bitmap blocks. It starts at "
				2768	"%u and its size is %u. Block size %ld",
				2769	SB_JOURNAL_1st_RESERVED_BLOCK(sb),
				2770	SB_ONDISK_JOURNAL_SIZE(sb),
				2771	sb->s_blocksize);
				2772	goto free_and_return;
				2773	}
				2774
				2775	if (journal_init_dev(sb, journal, j_dev_name) != 0) {
				2776	reiserfs_warning(sb, "sh-462",
				2777	"unable to initialize journal device");
				2778	goto free_and_return;
				2779	}
				2780
				2781	rs = SB_DISK_SUPER_BLOCK(sb);
				2782
				2783	/* read journal header */
				2784	bhjh = journal_bread(sb,
				2785	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				2786	SB_ONDISK_JOURNAL_SIZE(sb));
				2787	if (!bhjh) {
				2788	reiserfs_warning(sb, "sh-459",
				2789	"unable to read journal header");
				2790	goto free_and_return;
				2791	}
				2792	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
				2793
				2794	/* make sure that journal matches to the super block */
				2795	if (is_reiserfs_jr(rs)
				2796	&& (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
				2797	sb_jp_journal_magic(rs))) {
				2798	reiserfs_warning(sb, "sh-460",
				2799	"journal header magic %x (device %pg) does "
				2800	"not match to magic found in super block %x",
				2801	jh->jh_journal.jp_journal_magic,
				2802	journal->j_dev_bd,
				2803	sb_jp_journal_magic(rs));
				2804	brelse(bhjh);
				2805	goto free_and_return;
				2806	}
				2807
				2808	journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
				2809	journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
				2810	journal->j_max_commit_age =
				2811	le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
				2812	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
				2813
				2814	if (check_advise_trans_params(sb, journal) != 0)
				2815	goto free_and_return;
				2816	journal->j_default_max_commit_age = journal->j_max_commit_age;
				2817
				2818	if (commit_max_age != 0) {
				2819	journal->j_max_commit_age = commit_max_age;
				2820	journal->j_max_trans_age = commit_max_age;
				2821	}
				2822
				2823	reiserfs_info(sb, "journal params: device %pg, size %u, "
				2824	"journal first block %u, max trans len %u, max batch %u, "
				2825	"max commit age %u, max trans age %u\n",
				2826	journal->j_dev_bd,
				2827	SB_ONDISK_JOURNAL_SIZE(sb),
				2828	SB_ONDISK_JOURNAL_1st_BLOCK(sb),
				2829	journal->j_trans_max,
				2830	journal->j_max_batch,
				2831	journal->j_max_commit_age, journal->j_max_trans_age);
				2832
				2833	brelse(bhjh);
				2834
				2835	journal->j_list_bitmap_index = 0;
				2836	journal_list_init(sb);
				2837
				2838	memset(journal->j_list_hash_table, 0,
				2839	JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
				2840
				2841	INIT_LIST_HEAD(&journal->j_dirty_buffers);
				2842	spin_lock_init(&journal->j_dirty_buffers_lock);
				2843
				2844	journal->j_start = 0;
				2845	journal->j_len = 0;
				2846	journal->j_len_alloc = 0;
				2847	atomic_set(&journal->j_wcount, 0);
				2848	atomic_set(&journal->j_async_throttle, 0);
				2849	journal->j_bcount = 0;
				2850	journal->j_trans_start_time = 0;
				2851	journal->j_last = NULL;
				2852	journal->j_first = NULL;
				2853	init_waitqueue_head(&journal->j_join_wait);
				2854	mutex_init(&journal->j_mutex);
				2855	mutex_init(&journal->j_flush_mutex);
				2856
				2857	journal->j_trans_id = 10;
				2858	journal->j_mount_id = 10;
				2859	journal->j_state = 0;
				2860	atomic_set(&journal->j_jlock, 0);
				2861	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
				2862	journal->j_cnode_free_orig = journal->j_cnode_free_list;
				2863	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
				2864	journal->j_cnode_used = 0;
				2865	journal->j_must_wait = 0;
				2866
				2867	if (journal->j_cnode_free == 0) {
				2868	reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
				2869	"allocation failed (%ld bytes). Journal is "
				2870	"too large for available memory. Usually "
				2871	"this is due to a journal that is too large.",
				2872	sizeof (struct reiserfs_journal_cnode) * num_cnodes);
				2873	goto free_and_return;
				2874	}
				2875
				2876	init_journal_hash(sb);
				2877	jl = journal->j_current_jl;
				2878
				2879	/*
				2880	* get_list_bitmap() may call flush_commit_list() which
				2881	* requires the lock. Calling flush_commit_list() shouldn't happen
				2882	* this early but I like to be paranoid.
				2883	*/
				2884	reiserfs_write_lock(sb);
				2885	jl->j_list_bitmap = get_list_bitmap(sb, jl);
				2886	reiserfs_write_unlock(sb);
				2887	if (!jl->j_list_bitmap) {
				2888	reiserfs_warning(sb, "journal-2005",
				2889	"get_list_bitmap failed for journal list 0");
				2890	goto free_and_return;
				2891	}
				2892
				2893	ret = journal_read(sb);
				2894	if (ret < 0) {
				2895	reiserfs_warning(sb, "reiserfs-2006",
				2896	"Replay Failure, unable to mount");
				2897	goto free_and_return;
				2898	}
				2899
				2900	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
				2901	journal->j_work_sb = sb;
				2902	return 0;
				2903	free_and_return:
				2904	free_journal_ram(sb);
				2905	return 1;
				2906	}
				2907
				2908	/*
				2909	* test for a polite end of the current transaction. Used by file_write,
				2910	* and should be used by delete to make sure they don't write more than
				2911	* can fit inside a single transaction
				2912	*/
				2913	int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
				2914	int new_alloc)
				2915	{
				2916	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
				2917	time64_t now = ktime_get_seconds();
				2918	/* cannot restart while nested */
				2919	BUG_ON(!th->t_trans_id);
				2920	if (th->t_refcount > 1)
				2921	return 0;
				2922	if (journal->j_must_wait > 0 \|\|
				2923	(journal->j_len_alloc + new_alloc) >= journal->j_max_batch \|\|
				2924	atomic_read(&journal->j_jlock) \|\|
				2925	(now - journal->j_trans_start_time) > journal->j_max_trans_age \|\|
				2926	journal->j_cnode_free < (journal->j_trans_max * 3)) {
				2927	return 1;
				2928	}
				2929
				2930	journal->j_len_alloc += new_alloc;
				2931	th->t_blocks_allocated += new_alloc ;
				2932	return 0;
				2933	}
				2934
				2935	/* this must be called inside a transaction */
				2936	void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
				2937	{
				2938	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
				2939	BUG_ON(!th->t_trans_id);
				2940	journal->j_must_wait = 1;
				2941	set_bit(J_WRITERS_BLOCKED, &journal->j_state);
				2942	return;
				2943	}
				2944
				2945	/* this must be called without a transaction started */
				2946	void reiserfs_allow_writes(struct super_block *s)
				2947	{
				2948	struct reiserfs_journal *journal = SB_JOURNAL(s);
				2949	clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
				2950	wake_up(&journal->j_join_wait);
				2951	}
				2952
				2953	/* this must be called without a transaction started */
				2954	void reiserfs_wait_on_write_block(struct super_block *s)
				2955	{
				2956	struct reiserfs_journal *journal = SB_JOURNAL(s);
				2957	wait_event(journal->j_join_wait,
				2958	!test_bit(J_WRITERS_BLOCKED, &journal->j_state));
				2959	}
				2960
				2961	static void queue_log_writer(struct super_block *s)
				2962	{
				2963	wait_queue_entry_t wait;
				2964	struct reiserfs_journal *journal = SB_JOURNAL(s);
				2965	set_bit(J_WRITERS_QUEUED, &journal->j_state);
				2966
				2967	/*
				2968	* we don't want to use wait_event here because
				2969	* we only want to wait once.
				2970	*/
				2971	init_waitqueue_entry(&wait, current);
				2972	add_wait_queue(&journal->j_join_wait, &wait);
				2973	set_current_state(TASK_UNINTERRUPTIBLE);
				2974	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
				2975	int depth = reiserfs_write_unlock_nested(s);
				2976	schedule();
				2977	reiserfs_write_lock_nested(s, depth);
				2978	}
				2979	__set_current_state(TASK_RUNNING);
				2980	remove_wait_queue(&journal->j_join_wait, &wait);
				2981	}
				2982
				2983	static void wake_queued_writers(struct super_block *s)
				2984	{
				2985	struct reiserfs_journal *journal = SB_JOURNAL(s);
				2986	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
				2987	wake_up(&journal->j_join_wait);
				2988	}
				2989
				2990	static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
				2991	{
				2992	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				2993	unsigned long bcount = journal->j_bcount;
				2994	while (1) {
				2995	int depth;
				2996
				2997	depth = reiserfs_write_unlock_nested(sb);
				2998	schedule_timeout_uninterruptible(1);
				2999	reiserfs_write_lock_nested(sb, depth);
				3000
				3001	journal->j_current_jl->j_state \|= LIST_COMMIT_PENDING;
				3002	while ((atomic_read(&journal->j_wcount) > 0 \|\|
				3003	atomic_read(&journal->j_jlock)) &&
				3004	journal->j_trans_id == trans_id) {
				3005	queue_log_writer(sb);
				3006	}
				3007	if (journal->j_trans_id != trans_id)
				3008	break;
				3009	if (bcount == journal->j_bcount)
				3010	break;
				3011	bcount = journal->j_bcount;
				3012	}
				3013	}
				3014
				3015	/*
				3016	* join == true if you must join an existing transaction.
				3017	* join == false if you can deal with waiting for others to finish
				3018	*
				3019	* this will block until the transaction is joinable. send the number of
				3020	* blocks you expect to use in nblocks.
				3021	*/
				3022	static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
				3023	struct super_block *sb, unsigned long nblocks,
				3024	int join)
				3025	{
				3026	time64_t now = ktime_get_seconds();
				3027	unsigned int old_trans_id;
				3028	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3029	struct reiserfs_transaction_handle myth;
				3030	int sched_count = 0;
				3031	int retval;
				3032	int depth;
				3033
				3034	reiserfs_check_lock_depth(sb, "journal_begin");
				3035	BUG_ON(nblocks > journal->j_trans_max);
				3036
				3037	PROC_INFO_INC(sb, journal.journal_being);
				3038	/* set here for journal_join */
				3039	th->t_refcount = 1;
				3040	th->t_super = sb;
				3041
				3042	relock:
				3043	lock_journal(sb);
				3044	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
				3045	unlock_journal(sb);
				3046	retval = journal->j_errno;
				3047	goto out_fail;
				3048	}
				3049	journal->j_bcount++;
				3050
				3051	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
				3052	unlock_journal(sb);
				3053	depth = reiserfs_write_unlock_nested(sb);
				3054	reiserfs_wait_on_write_block(sb);
				3055	reiserfs_write_lock_nested(sb, depth);
				3056	PROC_INFO_INC(sb, journal.journal_relock_writers);
				3057	goto relock;
				3058	}
				3059	now = ktime_get_seconds();
				3060
				3061	/*
				3062	* if there is no room in the journal OR
				3063	* if this transaction is too old, and we weren't called joinable,
				3064	* wait for it to finish before beginning we don't sleep if there
				3065	* aren't other writers
				3066	*/
				3067
				3068	if ((!join && journal->j_must_wait > 0) \|\|
				3069	(!join
				3070	&& (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
				3071	\|\| (!join && atomic_read(&journal->j_wcount) > 0
				3072	&& journal->j_trans_start_time > 0
				3073	&& (now - journal->j_trans_start_time) >
				3074	journal->j_max_trans_age) \|\| (!join
				3075	&& atomic_read(&journal->j_jlock))
				3076	\|\| (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
				3077
				3078	old_trans_id = journal->j_trans_id;
				3079	/* allow others to finish this transaction */
				3080	unlock_journal(sb);
				3081
				3082	if (!join && (journal->j_len_alloc + nblocks + 2) >=
				3083	journal->j_max_batch &&
				3084	((journal->j_len + nblocks + 2) * 100) <
				3085	(journal->j_len_alloc * 75)) {
				3086	if (atomic_read(&journal->j_wcount) > 10) {
				3087	sched_count++;
				3088	queue_log_writer(sb);
				3089	goto relock;
				3090	}
				3091	}
				3092	/*
				3093	* don't mess with joining the transaction if all we
				3094	* have to do is wait for someone else to do a commit
				3095	*/
				3096	if (atomic_read(&journal->j_jlock)) {
				3097	while (journal->j_trans_id == old_trans_id &&
				3098	atomic_read(&journal->j_jlock)) {
				3099	queue_log_writer(sb);
				3100	}
				3101	goto relock;
				3102	}
				3103	retval = journal_join(&myth, sb);
				3104	if (retval)
				3105	goto out_fail;
				3106
				3107	/* someone might have ended the transaction while we joined */
				3108	if (old_trans_id != journal->j_trans_id) {
				3109	retval = do_journal_end(&myth, 0);
				3110	} else {
				3111	retval = do_journal_end(&myth, COMMIT_NOW);
				3112	}
				3113
				3114	if (retval)
				3115	goto out_fail;
				3116
				3117	PROC_INFO_INC(sb, journal.journal_relock_wcount);
				3118	goto relock;
				3119	}
				3120	/* we are the first writer, set trans_id */
				3121	if (journal->j_trans_start_time == 0) {
				3122	journal->j_trans_start_time = ktime_get_seconds();
				3123	}
				3124	atomic_inc(&journal->j_wcount);
				3125	journal->j_len_alloc += nblocks;
				3126	th->t_blocks_logged = 0;
				3127	th->t_blocks_allocated = nblocks;
				3128	th->t_trans_id = journal->j_trans_id;
				3129	unlock_journal(sb);
				3130	INIT_LIST_HEAD(&th->t_list);
				3131	return 0;
				3132
				3133	out_fail:
				3134	memset(th, 0, sizeof(*th));
				3135	/*
				3136	* Re-set th->t_super, so we can properly keep track of how many
				3137	* persistent transactions there are. We need to do this so if this
				3138	* call is part of a failed restart_transaction, we can free it later
				3139	*/
				3140	th->t_super = sb;
				3141	return retval;
				3142	}
				3143
				3144	struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
				3145	super_block
				3146	*s,
				3147	int nblocks)
				3148	{
				3149	int ret;
				3150	struct reiserfs_transaction_handle *th;
				3151
				3152	/*
				3153	* if we're nesting into an existing transaction. It will be
				3154	* persistent on its own
				3155	*/
				3156	if (reiserfs_transaction_running(s)) {
				3157	th = current->journal_info;
				3158	th->t_refcount++;
				3159	BUG_ON(th->t_refcount < 2);
				3160
				3161	return th;
				3162	}
				3163	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
				3164	if (!th)
				3165	return NULL;
				3166	ret = journal_begin(th, s, nblocks);
				3167	if (ret) {
				3168	kfree(th);
				3169	return NULL;
				3170	}
				3171
				3172	SB_JOURNAL(s)->j_persistent_trans++;
				3173	return th;
				3174	}
				3175
				3176	int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
				3177	{
				3178	struct super_block *s = th->t_super;
				3179	int ret = 0;
				3180	if (th->t_trans_id)
				3181	ret = journal_end(th);
				3182	else
				3183	ret = -EIO;
				3184	if (th->t_refcount == 0) {
				3185	SB_JOURNAL(s)->j_persistent_trans--;
				3186	kfree(th);
				3187	}
				3188	return ret;
				3189	}
				3190
				3191	static int journal_join(struct reiserfs_transaction_handle *th,
				3192	struct super_block *sb)
				3193	{
				3194	struct reiserfs_transaction_handle *cur_th = current->journal_info;
				3195
				3196	/*
				3197	* this keeps do_journal_end from NULLing out the
				3198	* current->journal_info pointer
				3199	*/
				3200	th->t_handle_save = cur_th;
				3201	BUG_ON(cur_th && cur_th->t_refcount > 1);
				3202	return do_journal_begin_r(th, sb, 1, JBEGIN_JOIN);
				3203	}
				3204
				3205	int journal_join_abort(struct reiserfs_transaction_handle *th,
				3206	struct super_block *sb)
				3207	{
				3208	struct reiserfs_transaction_handle *cur_th = current->journal_info;
				3209
				3210	/*
				3211	* this keeps do_journal_end from NULLing out the
				3212	* current->journal_info pointer
				3213	*/
				3214	th->t_handle_save = cur_th;
				3215	BUG_ON(cur_th && cur_th->t_refcount > 1);
				3216	return do_journal_begin_r(th, sb, 1, JBEGIN_ABORT);
				3217	}
				3218
				3219	int journal_begin(struct reiserfs_transaction_handle *th,
				3220	struct super_block *sb, unsigned long nblocks)
				3221	{
				3222	struct reiserfs_transaction_handle *cur_th = current->journal_info;
				3223	int ret;
				3224
				3225	th->t_handle_save = NULL;
				3226	if (cur_th) {
				3227	/* we are nesting into the current transaction */
				3228	if (cur_th->t_super == sb) {
				3229	BUG_ON(!cur_th->t_refcount);
				3230	cur_th->t_refcount++;
				3231	memcpy(th, cur_th, sizeof(*th));
				3232	if (th->t_refcount <= 1)
				3233	reiserfs_warning(sb, "reiserfs-2005",
				3234	"BAD: refcount <= 1, but "
				3235	"journal_info != 0");
				3236	return 0;
				3237	} else {
				3238	/*
				3239	* we've ended up with a handle from a different
				3240	* filesystem. save it and restore on journal_end.
				3241	* This should never really happen...
				3242	*/
				3243	reiserfs_warning(sb, "clm-2100",
				3244	"nesting info a different FS");
				3245	th->t_handle_save = current->journal_info;
				3246	current->journal_info = th;
				3247	}
				3248	} else {
				3249	current->journal_info = th;
				3250	}
				3251	ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
				3252	BUG_ON(current->journal_info != th);
				3253
				3254	/*
				3255	* I guess this boils down to being the reciprocal of clm-2100 above.
				3256	* If do_journal_begin_r fails, we need to put it back, since
				3257	* journal_end won't be called to do it. */
				3258	if (ret)
				3259	current->journal_info = th->t_handle_save;
				3260	else
				3261	BUG_ON(!th->t_refcount);
				3262
				3263	return ret;
				3264	}
				3265
				3266	/*
				3267	* puts bh into the current transaction. If it was already there, reorders
				3268	* removes the old pointers from the hash, and puts new ones in (to make
				3269	* sure replay happen in the right order).
				3270	*
				3271	* if it was dirty, cleans and files onto the clean list. I can't let it
				3272	* be dirty again until the transaction is committed.
				3273	*
				3274	* if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
				3275	*/
				3276	int journal_mark_dirty(struct reiserfs_transaction_handle *th,
				3277	struct buffer_head *bh)
				3278	{
				3279	struct super_block *sb = th->t_super;
				3280	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3281	struct reiserfs_journal_cnode *cn = NULL;
				3282	int count_already_incd = 0;
				3283	int prepared = 0;
				3284	BUG_ON(!th->t_trans_id);
				3285
				3286	PROC_INFO_INC(sb, journal.mark_dirty);
				3287	if (th->t_trans_id != journal->j_trans_id) {
				3288	reiserfs_panic(th->t_super, "journal-1577",
				3289	"handle trans id %ld != current trans id %ld",
				3290	th->t_trans_id, journal->j_trans_id);
				3291	}
				3292
				3293	prepared = test_clear_buffer_journal_prepared(bh);
				3294	clear_buffer_journal_restore_dirty(bh);
				3295	/* already in this transaction, we are done */
				3296	if (buffer_journaled(bh)) {
				3297	PROC_INFO_INC(sb, journal.mark_dirty_already);
				3298	return 0;
				3299	}
				3300
				3301	/*
				3302	* this must be turned into a panic instead of a warning. We can't
				3303	* allow a dirty or journal_dirty or locked buffer to be logged, as
				3304	* some changes could get to disk too early. NOT GOOD.
				3305	*/
				3306	if (!prepared \|\| buffer_dirty(bh)) {
				3307	reiserfs_warning(sb, "journal-1777",
				3308	"buffer %llu bad state "
				3309	"%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
				3310	(unsigned long long)bh->b_blocknr,
				3311	prepared ? ' ' : '!',
				3312	buffer_locked(bh) ? ' ' : '!',
				3313	buffer_dirty(bh) ? ' ' : '!',
				3314	buffer_journal_dirty(bh) ? ' ' : '!');
				3315	}
				3316
				3317	if (atomic_read(&journal->j_wcount) <= 0) {
				3318	reiserfs_warning(sb, "journal-1409",
				3319	"returning because j_wcount was %d",
				3320	atomic_read(&journal->j_wcount));
				3321	return 1;
				3322	}
				3323	/*
				3324	* this error means I've screwed up, and we've overflowed
				3325	* the transaction. Nothing can be done here, except make the
				3326	* FS readonly or panic.
				3327	*/
				3328	if (journal->j_len >= journal->j_trans_max) {
				3329	reiserfs_panic(th->t_super, "journal-1413",
				3330	"j_len (%lu) is too big",
				3331	journal->j_len);
				3332	}
				3333
				3334	if (buffer_journal_dirty(bh)) {
				3335	count_already_incd = 1;
				3336	PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
				3337	clear_buffer_journal_dirty(bh);
				3338	}
				3339
				3340	if (journal->j_len > journal->j_len_alloc) {
				3341	journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
				3342	}
				3343
				3344	set_buffer_journaled(bh);
				3345
				3346	/* now put this guy on the end */
				3347	if (!cn) {
				3348	cn = get_cnode(sb);
				3349	if (!cn) {
				3350	reiserfs_panic(sb, "journal-4", "get_cnode failed!");
				3351	}
				3352
				3353	if (th->t_blocks_logged == th->t_blocks_allocated) {
				3354	th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
				3355	journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
				3356	}
				3357	th->t_blocks_logged++;
				3358	journal->j_len++;
				3359
				3360	cn->bh = bh;
				3361	cn->blocknr = bh->b_blocknr;
				3362	cn->sb = sb;
				3363	cn->jlist = NULL;
				3364	insert_journal_hash(journal->j_hash_table, cn);
				3365	if (!count_already_incd) {
				3366	get_bh(bh);
				3367	}
				3368	}
				3369	cn->next = NULL;
				3370	cn->prev = journal->j_last;
				3371	cn->bh = bh;
				3372	if (journal->j_last) {
				3373	journal->j_last->next = cn;
				3374	journal->j_last = cn;
				3375	} else {
				3376	journal->j_first = cn;
				3377	journal->j_last = cn;
				3378	}
				3379	reiserfs_schedule_old_flush(sb);
				3380	return 0;
				3381	}
				3382
				3383	int journal_end(struct reiserfs_transaction_handle *th)
				3384	{
				3385	struct super_block *sb = th->t_super;
				3386	if (!current->journal_info && th->t_refcount > 1)
				3387	reiserfs_warning(sb, "REISER-NESTING",
				3388	"th NULL, refcount %d", th->t_refcount);
				3389
				3390	if (!th->t_trans_id) {
				3391	WARN_ON(1);
				3392	return -EIO;
				3393	}
				3394
				3395	th->t_refcount--;
				3396	if (th->t_refcount > 0) {
				3397	struct reiserfs_transaction_handle *cur_th =
				3398	current->journal_info;
				3399
				3400	/*
				3401	* we aren't allowed to close a nested transaction on a
				3402	* different filesystem from the one in the task struct
				3403	*/
				3404	BUG_ON(cur_th->t_super != th->t_super);
				3405
				3406	if (th != cur_th) {
				3407	memcpy(current->journal_info, th, sizeof(*th));
				3408	th->t_trans_id = 0;
				3409	}
				3410	return 0;
				3411	} else {
				3412	return do_journal_end(th, 0);
				3413	}
				3414	}
				3415
				3416	/*
				3417	* removes from the current transaction, relsing and descrementing any counters.
				3418	* also files the removed buffer directly onto the clean list
				3419	*
				3420	* called by journal_mark_freed when a block has been deleted
				3421	*
				3422	* returns 1 if it cleaned and relsed the buffer. 0 otherwise
				3423	*/
				3424	static int remove_from_transaction(struct super_block *sb,
				3425	b_blocknr_t blocknr, int already_cleaned)
				3426	{
				3427	struct buffer_head *bh;
				3428	struct reiserfs_journal_cnode *cn;
				3429	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3430	int ret = 0;
				3431
				3432	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
				3433	if (!cn \|\| !cn->bh) {
				3434	return ret;
				3435	}
				3436	bh = cn->bh;
				3437	if (cn->prev) {
				3438	cn->prev->next = cn->next;
				3439	}
				3440	if (cn->next) {
				3441	cn->next->prev = cn->prev;
				3442	}
				3443	if (cn == journal->j_first) {
				3444	journal->j_first = cn->next;
				3445	}
				3446	if (cn == journal->j_last) {
				3447	journal->j_last = cn->prev;
				3448	}
				3449	if (bh)
				3450	remove_journal_hash(sb, journal->j_hash_table, NULL,
				3451	bh->b_blocknr, 0);
				3452	clear_buffer_journaled(bh); /* don't log this one */
				3453
				3454	if (!already_cleaned) {
				3455	clear_buffer_journal_dirty(bh);
				3456	clear_buffer_dirty(bh);
				3457	clear_buffer_journal_test(bh);
				3458	put_bh(bh);
				3459	if (atomic_read(&bh->b_count) < 0) {
				3460	reiserfs_warning(sb, "journal-1752",
				3461	"b_count < 0");
				3462	}
				3463	ret = 1;
				3464	}
				3465	journal->j_len--;
				3466	journal->j_len_alloc--;
				3467	free_cnode(sb, cn);
				3468	return ret;
				3469	}
				3470
				3471	/*
				3472	* for any cnode in a journal list, it can only be dirtied of all the
				3473	* transactions that include it are committed to disk.
				3474	* this checks through each transaction, and returns 1 if you are allowed
				3475	* to dirty, and 0 if you aren't
				3476	*
				3477	* it is called by dirty_journal_list, which is called after
				3478	* flush_commit_list has gotten all the log blocks for a given
				3479	* transaction on disk
				3480	*
				3481	*/
				3482	static int can_dirty(struct reiserfs_journal_cnode *cn)
				3483	{
				3484	struct super_block *sb = cn->sb;
				3485	b_blocknr_t blocknr = cn->blocknr;
				3486	struct reiserfs_journal_cnode *cur = cn->hprev;
				3487	int can_dirty = 1;
				3488
				3489	/*
				3490	* first test hprev. These are all newer than cn, so any node here
				3491	* with the same block number and dev means this node can't be sent
				3492	* to disk right now.
				3493	*/
				3494	while (cur && can_dirty) {
				3495	if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
				3496	cur->blocknr == blocknr) {
				3497	can_dirty = 0;
				3498	}
				3499	cur = cur->hprev;
				3500	}
				3501	/*
				3502	* then test hnext. These are all older than cn. As long as they
				3503	* are committed to the log, it is safe to write cn to disk
				3504	*/
				3505	cur = cn->hnext;
				3506	while (cur && can_dirty) {
				3507	if (cur->jlist && cur->jlist->j_len > 0 &&
				3508	atomic_read(&cur->jlist->j_commit_left) > 0 && cur->bh &&
				3509	cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
				3510	can_dirty = 0;
				3511	}
				3512	cur = cur->hnext;
				3513	}
				3514	return can_dirty;
				3515	}
				3516
				3517	/*
				3518	* syncs the commit blocks, but does not force the real buffers to disk
				3519	* will wait until the current transaction is done/committed before returning
				3520	*/
				3521	int journal_end_sync(struct reiserfs_transaction_handle *th)
				3522	{
				3523	struct super_block *sb = th->t_super;
				3524	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3525
				3526	BUG_ON(!th->t_trans_id);
				3527	/* you can sync while nested, very, very bad */
				3528	BUG_ON(th->t_refcount > 1);
				3529	if (journal->j_len == 0) {
				3530	reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
				3531	1);
				3532	journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb));
				3533	}
				3534	return do_journal_end(th, COMMIT_NOW \| WAIT);
				3535	}
				3536
				3537	/* writeback the pending async commits to disk */
				3538	static void flush_async_commits(struct work_struct *work)
				3539	{
				3540	struct reiserfs_journal *journal =
				3541	container_of(work, struct reiserfs_journal, j_work.work);
				3542	struct super_block *sb = journal->j_work_sb;
				3543	struct reiserfs_journal_list *jl;
				3544	struct list_head *entry;
				3545
				3546	reiserfs_write_lock(sb);
				3547	if (!list_empty(&journal->j_journal_list)) {
				3548	/* last entry is the youngest, commit it and you get everything */
				3549	entry = journal->j_journal_list.prev;
				3550	jl = JOURNAL_LIST_ENTRY(entry);
				3551	flush_commit_list(sb, jl, 1);
				3552	}
				3553	reiserfs_write_unlock(sb);
				3554	}
				3555
				3556	/*
				3557	* flushes any old transactions to disk
				3558	* ends the current transaction if it is too old
				3559	*/
				3560	void reiserfs_flush_old_commits(struct super_block *sb)
				3561	{
				3562	time64_t now;
				3563	struct reiserfs_transaction_handle th;
				3564	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3565
				3566	now = ktime_get_seconds();
				3567	/*
				3568	* safety check so we don't flush while we are replaying the log during
				3569	* mount
				3570	*/
				3571	if (list_empty(&journal->j_journal_list))
				3572	return;
				3573
				3574	/*
				3575	* check the current transaction. If there are no writers, and it is
				3576	* too old, finish it, and force the commit blocks to disk
				3577	*/
				3578	if (atomic_read(&journal->j_wcount) <= 0 &&
				3579	journal->j_trans_start_time > 0 &&
				3580	journal->j_len > 0 &&
				3581	(now - journal->j_trans_start_time) > journal->j_max_trans_age) {
				3582	if (!journal_join(&th, sb)) {
				3583	reiserfs_prepare_for_journal(sb,
				3584	SB_BUFFER_WITH_SB(sb),
				3585	1);
				3586	journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb));
				3587
				3588	/*
				3589	* we're only being called from kreiserfsd, it makes
				3590	* no sense to do an async commit so that kreiserfsd
				3591	* can do it later
				3592	*/
				3593	do_journal_end(&th, COMMIT_NOW \| WAIT);
				3594	}
				3595	}
				3596	}
				3597
				3598	/*
				3599	* returns 0 if do_journal_end should return right away, returns 1 if
				3600	* do_journal_end should finish the commit
				3601	*
				3602	* if the current transaction is too old, but still has writers, this will
				3603	* wait on j_join_wait until all the writers are done. By the time it
				3604	* wakes up, the transaction it was called has already ended, so it just
				3605	* flushes the commit list and returns 0.
				3606	*
				3607	* Won't batch when flush or commit_now is set. Also won't batch when
				3608	* others are waiting on j_join_wait.
				3609	*
				3610	* Note, we can't allow the journal_end to proceed while there are still
				3611	* writers in the log.
				3612	*/
				3613	static int check_journal_end(struct reiserfs_transaction_handle *th, int flags)
				3614	{
				3615
				3616	time64_t now;
				3617	int flush = flags & FLUSH_ALL;
				3618	int commit_now = flags & COMMIT_NOW;
				3619	int wait_on_commit = flags & WAIT;
				3620	struct reiserfs_journal_list *jl;
				3621	struct super_block *sb = th->t_super;
				3622	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3623
				3624	BUG_ON(!th->t_trans_id);
				3625
				3626	if (th->t_trans_id != journal->j_trans_id) {
				3627	reiserfs_panic(th->t_super, "journal-1577",
				3628	"handle trans id %ld != current trans id %ld",
				3629	th->t_trans_id, journal->j_trans_id);
				3630	}
				3631
				3632	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
				3633	/* <= 0 is allowed. unmounting might not call begin */
				3634	if (atomic_read(&journal->j_wcount) > 0)
				3635	atomic_dec(&journal->j_wcount);
				3636
				3637	/*
				3638	* BUG, deal with case where j_len is 0, but people previously
				3639	* freed blocks need to be released will be dealt with by next
				3640	* transaction that actually writes something, but should be taken
				3641	* care of in this trans
				3642	*/
				3643	BUG_ON(journal->j_len == 0);
				3644
				3645	/*
				3646	* if wcount > 0, and we are called to with flush or commit_now,
				3647	* we wait on j_join_wait. We will wake up when the last writer has
				3648	* finished the transaction, and started it on its way to the disk.
				3649	* Then, we flush the commit or journal list, and just return 0
				3650	* because the rest of journal end was already done for this
				3651	* transaction.
				3652	*/
				3653	if (atomic_read(&journal->j_wcount) > 0) {
				3654	if (flush \|\| commit_now) {
				3655	unsigned trans_id;
				3656
				3657	jl = journal->j_current_jl;
				3658	trans_id = jl->j_trans_id;
				3659	if (wait_on_commit)
				3660	jl->j_state \|= LIST_COMMIT_PENDING;
				3661	atomic_set(&journal->j_jlock, 1);
				3662	if (flush) {
				3663	journal->j_next_full_flush = 1;
				3664	}
				3665	unlock_journal(sb);
				3666
				3667	/*
				3668	* sleep while the current transaction is
				3669	* still j_jlocked
				3670	*/
				3671	while (journal->j_trans_id == trans_id) {
				3672	if (atomic_read(&journal->j_jlock)) {
				3673	queue_log_writer(sb);
				3674	} else {
				3675	lock_journal(sb);
				3676	if (journal->j_trans_id == trans_id) {
				3677	atomic_set(&journal->j_jlock,
				3678	1);
				3679	}
				3680	unlock_journal(sb);
				3681	}
				3682	}
				3683	BUG_ON(journal->j_trans_id == trans_id);
				3684
				3685	if (commit_now
				3686	&& journal_list_still_alive(sb, trans_id)
				3687	&& wait_on_commit) {
				3688	flush_commit_list(sb, jl, 1);
				3689	}
				3690	return 0;
				3691	}
				3692	unlock_journal(sb);
				3693	return 0;
				3694	}
				3695
				3696	/* deal with old transactions where we are the last writers */
				3697	now = ktime_get_seconds();
				3698	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
				3699	commit_now = 1;
				3700	journal->j_next_async_flush = 1;
				3701	}
				3702	/* don't batch when someone is waiting on j_join_wait */
				3703	/* don't batch when syncing the commit or flushing the whole trans */
				3704	if (!(journal->j_must_wait > 0) && !(atomic_read(&journal->j_jlock))
				3705	&& !flush && !commit_now && (journal->j_len < journal->j_max_batch)
				3706	&& journal->j_len_alloc < journal->j_max_batch
				3707	&& journal->j_cnode_free > (journal->j_trans_max * 3)) {
				3708	journal->j_bcount++;
				3709	unlock_journal(sb);
				3710	return 0;
				3711	}
				3712
				3713	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
				3714	reiserfs_panic(sb, "journal-003",
				3715	"j_start (%ld) is too high",
				3716	journal->j_start);
				3717	}
				3718	return 1;
				3719	}
				3720
				3721	/*
				3722	* Does all the work that makes deleting blocks safe.
				3723	* when deleting a block mark BH_JNew, just remove it from the current
				3724	* transaction, clean it's buffer_head and move on.
				3725	*
				3726	* otherwise:
				3727	* set a bit for the block in the journal bitmap. That will prevent it from
				3728	* being allocated for unformatted nodes before this transaction has finished.
				3729	*
				3730	* mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.
				3731	* That will prevent any old transactions with this block from trying to flush
				3732	* to the real location. Since we aren't removing the cnode from the
				3733	* journal_list_hash, *the block can't be reallocated yet.
				3734	*
				3735	* Then remove it from the current transaction, decrementing any counters and
				3736	* filing it on the clean list.
				3737	*/
				3738	int journal_mark_freed(struct reiserfs_transaction_handle *th,
				3739	struct super_block *sb, b_blocknr_t blocknr)
				3740	{
				3741	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3742	struct reiserfs_journal_cnode *cn = NULL;
				3743	struct buffer_head *bh = NULL;
				3744	struct reiserfs_list_bitmap *jb = NULL;
				3745	int cleaned = 0;
				3746	BUG_ON(!th->t_trans_id);
				3747
				3748	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
				3749	if (cn && cn->bh) {
				3750	bh = cn->bh;
				3751	get_bh(bh);
				3752	}
				3753	/* if it is journal new, we just remove it from this transaction */
				3754	if (bh && buffer_journal_new(bh)) {
				3755	clear_buffer_journal_new(bh);
				3756	clear_prepared_bits(bh);
				3757	reiserfs_clean_and_file_buffer(bh);
				3758	cleaned = remove_from_transaction(sb, blocknr, cleaned);
				3759	} else {
				3760	/*
				3761	* set the bit for this block in the journal bitmap
				3762	* for this transaction
				3763	*/
				3764	jb = journal->j_current_jl->j_list_bitmap;
				3765	if (!jb) {
				3766	reiserfs_panic(sb, "journal-1702",
				3767	"journal_list_bitmap is NULL");
				3768	}
				3769	set_bit_in_list_bitmap(sb, blocknr, jb);
				3770
				3771	/* Note, the entire while loop is not allowed to schedule. */
				3772
				3773	if (bh) {
				3774	clear_prepared_bits(bh);
				3775	reiserfs_clean_and_file_buffer(bh);
				3776	}
				3777	cleaned = remove_from_transaction(sb, blocknr, cleaned);
				3778
				3779	/*
				3780	* find all older transactions with this block,
				3781	* make sure they don't try to write it out
				3782	*/
				3783	cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
				3784	blocknr);
				3785	while (cn) {
				3786	if (sb == cn->sb && blocknr == cn->blocknr) {
				3787	set_bit(BLOCK_FREED, &cn->state);
				3788	if (cn->bh) {
				3789	/*
				3790	* remove_from_transaction will brelse
				3791	* the buffer if it was in the current
				3792	* trans
				3793	*/
				3794	if (!cleaned) {
				3795	clear_buffer_journal_dirty(cn->
				3796	bh);
				3797	clear_buffer_dirty(cn->bh);
				3798	clear_buffer_journal_test(cn->
				3799	bh);
				3800	cleaned = 1;
				3801	put_bh(cn->bh);
				3802	if (atomic_read
				3803	(&cn->bh->b_count) < 0) {
				3804	reiserfs_warning(sb,
				3805	"journal-2138",
				3806	"cn->bh->b_count < 0");
				3807	}
				3808	}
				3809	/*
				3810	* since we are clearing the bh,
				3811	* we MUST dec nonzerolen
				3812	*/
				3813	if (cn->jlist) {
				3814	atomic_dec(&cn->jlist->
				3815	j_nonzerolen);
				3816	}
				3817	cn->bh = NULL;
				3818	}
				3819	}
				3820	cn = cn->hnext;
				3821	}
				3822	}
				3823
				3824	if (bh)
				3825	release_buffer_page(bh); /* get_hash grabs the buffer */
				3826	return 0;
				3827	}
				3828
				3829	void reiserfs_update_inode_transaction(struct inode *inode)
				3830	{
				3831	struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
				3832	REISERFS_I(inode)->i_jl = journal->j_current_jl;
				3833	REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
				3834	}
				3835
				3836	/*
				3837	* returns -1 on error, 0 if no commits/barriers were done and 1
				3838	* if a transaction was actually committed and the barrier was done
				3839	*/
				3840	static int __commit_trans_jl(struct inode *inode, unsigned long id,
				3841	struct reiserfs_journal_list *jl)
				3842	{
				3843	struct reiserfs_transaction_handle th;
				3844	struct super_block *sb = inode->i_sb;
				3845	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3846	int ret = 0;
				3847
				3848	/*
				3849	* is it from the current transaction,
				3850	* or from an unknown transaction?
				3851	*/
				3852	if (id == journal->j_trans_id) {
				3853	jl = journal->j_current_jl;
				3854	/*
				3855	* try to let other writers come in and
				3856	* grow this transaction
				3857	*/
				3858	let_transaction_grow(sb, id);
				3859	if (journal->j_trans_id != id) {
				3860	goto flush_commit_only;
				3861	}
				3862
				3863	ret = journal_begin(&th, sb, 1);
				3864	if (ret)
				3865	return ret;
				3866
				3867	/* someone might have ended this transaction while we joined */
				3868	if (journal->j_trans_id != id) {
				3869	reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
				3870	1);
				3871	journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb));
				3872	ret = journal_end(&th);
				3873	goto flush_commit_only;
				3874	}
				3875
				3876	ret = journal_end_sync(&th);
				3877	if (!ret)
				3878	ret = 1;
				3879
				3880	} else {
				3881	/*
				3882	* this gets tricky, we have to make sure the journal list in
				3883	* the inode still exists. We know the list is still around
				3884	* if we've got a larger transaction id than the oldest list
				3885	*/
				3886	flush_commit_only:
				3887	if (journal_list_still_alive(inode->i_sb, id)) {
				3888	/*
				3889	* we only set ret to 1 when we know for sure
				3890	* the barrier hasn't been started yet on the commit
				3891	* block.
				3892	*/
				3893	if (atomic_read(&jl->j_commit_left) > 1)
				3894	ret = 1;
				3895	flush_commit_list(sb, jl, 1);
				3896	if (journal->j_errno)
				3897	ret = journal->j_errno;
				3898	}
				3899	}
				3900	/* otherwise the list is gone, and long since committed */
				3901	return ret;
				3902	}
				3903
				3904	int reiserfs_commit_for_inode(struct inode *inode)
				3905	{
				3906	unsigned int id = REISERFS_I(inode)->i_trans_id;
				3907	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
				3908
				3909	/*
				3910	* for the whole inode, assume unset id means it was
				3911	* changed in the current transaction. More conservative
				3912	*/
				3913	if (!id \|\| !jl) {
				3914	reiserfs_update_inode_transaction(inode);
				3915	id = REISERFS_I(inode)->i_trans_id;
				3916	/* jl will be updated in __commit_trans_jl */
				3917	}
				3918
				3919	return __commit_trans_jl(inode, id, jl);
				3920	}
				3921
				3922	void reiserfs_restore_prepared_buffer(struct super_block *sb,
				3923	struct buffer_head *bh)
				3924	{
				3925	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3926	PROC_INFO_INC(sb, journal.restore_prepared);
				3927	if (!bh) {
				3928	return;
				3929	}
				3930	if (test_clear_buffer_journal_restore_dirty(bh) &&
				3931	buffer_journal_dirty(bh)) {
				3932	struct reiserfs_journal_cnode *cn;
				3933	reiserfs_write_lock(sb);
				3934	cn = get_journal_hash_dev(sb,
				3935	journal->j_list_hash_table,
				3936	bh->b_blocknr);
				3937	if (cn && can_dirty(cn)) {
				3938	set_buffer_journal_test(bh);
				3939	mark_buffer_dirty(bh);
				3940	}
				3941	reiserfs_write_unlock(sb);
				3942	}
				3943	clear_buffer_journal_prepared(bh);
				3944	}
				3945
				3946	extern struct tree_balance *cur_tb;
				3947	/*
				3948	* before we can change a metadata block, we have to make sure it won't
				3949	* be written to disk while we are altering it. So, we must:
				3950	* clean it
				3951	* wait on it.
				3952	*/
				3953	int reiserfs_prepare_for_journal(struct super_block *sb,
				3954	struct buffer_head *bh, int wait)
				3955	{
				3956	PROC_INFO_INC(sb, journal.prepare);
				3957
				3958	if (!trylock_buffer(bh)) {
				3959	if (!wait)
				3960	return 0;
				3961	lock_buffer(bh);
				3962	}
				3963	set_buffer_journal_prepared(bh);
				3964	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
				3965	clear_buffer_journal_test(bh);
				3966	set_buffer_journal_restore_dirty(bh);
				3967	}
				3968	unlock_buffer(bh);
				3969	return 1;
				3970	}
				3971
				3972	/*
				3973	* long and ugly. If flush, will not return until all commit
				3974	* blocks and all real buffers in the trans are on disk.
				3975	* If no_async, won't return until all commit blocks are on disk.
				3976	*
				3977	* keep reading, there are comments as you go along
				3978	*
				3979	* If the journal is aborted, we just clean up. Things like flushing
				3980	* journal lists, etc just won't happen.
				3981	*/
				3982	static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
				3983	{
				3984	struct super_block *sb = th->t_super;
				3985	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				3986	struct reiserfs_journal_cnode cn, next, *jl_cn;
				3987	struct reiserfs_journal_cnode *last_cn = NULL;
				3988	struct reiserfs_journal_desc *desc;
				3989	struct reiserfs_journal_commit *commit;
				3990	struct buffer_head c_bh; / commit bh */
				3991	struct buffer_head d_bh; / desc bh */
				3992	int cur_write_start = 0; /* start index of current log write */
				3993	int old_start;
				3994	int i;
				3995	int flush;
				3996	int wait_on_commit;
				3997	struct reiserfs_journal_list jl, temp_jl;
				3998	struct list_head entry, safe;
				3999	unsigned long jindex;
				4000	unsigned int commit_trans_id;
				4001	int trans_half;
				4002	int depth;
				4003
				4004	BUG_ON(th->t_refcount > 1);
				4005	BUG_ON(!th->t_trans_id);
				4006	BUG_ON(!th->t_super);
				4007
				4008	/*
				4009	* protect flush_older_commits from doing mistakes if the
				4010	* transaction ID counter gets overflowed.
				4011	*/
				4012	if (th->t_trans_id == ~0U)
				4013	flags \|= FLUSH_ALL \| COMMIT_NOW \| WAIT;
				4014	flush = flags & FLUSH_ALL;
				4015	wait_on_commit = flags & WAIT;
				4016
				4017	current->journal_info = th->t_handle_save;
				4018	reiserfs_check_lock_depth(sb, "journal end");
				4019	if (journal->j_len == 0) {
				4020	reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
				4021	1);
				4022	journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb));
				4023	}
				4024
				4025	lock_journal(sb);
				4026	if (journal->j_next_full_flush) {
				4027	flags \|= FLUSH_ALL;
				4028	flush = 1;
				4029	}
				4030	if (journal->j_next_async_flush) {
				4031	flags \|= COMMIT_NOW \| WAIT;
				4032	wait_on_commit = 1;
				4033	}
				4034
				4035	/*
				4036	* check_journal_end locks the journal, and unlocks if it does
				4037	* not return 1 it tells us if we should continue with the
				4038	* journal_end, or just return
				4039	*/
				4040	if (!check_journal_end(th, flags)) {
				4041	reiserfs_schedule_old_flush(sb);
				4042	wake_queued_writers(sb);
				4043	reiserfs_async_progress_wait(sb);
				4044	goto out;
				4045	}
				4046
				4047	/* check_journal_end might set these, check again */
				4048	if (journal->j_next_full_flush) {
				4049	flush = 1;
				4050	}
				4051
				4052	/*
				4053	* j must wait means we have to flush the log blocks, and the
				4054	* real blocks for this transaction
				4055	*/
				4056	if (journal->j_must_wait > 0) {
				4057	flush = 1;
				4058	}
				4059	#ifdef REISERFS_PREALLOCATE
				4060	/*
				4061	* quota ops might need to nest, setup the journal_info pointer
				4062	* for them and raise the refcount so that it is > 0.
				4063	*/
				4064	current->journal_info = th;
				4065	th->t_refcount++;
				4066
				4067	/* it should not involve new blocks into the transaction */
				4068	reiserfs_discard_all_prealloc(th);
				4069
				4070	th->t_refcount--;
				4071	current->journal_info = th->t_handle_save;
				4072	#endif
				4073
				4074	/* setup description block */
				4075	d_bh =
				4076	journal_getblk(sb,
				4077	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				4078	journal->j_start);
				4079	set_buffer_uptodate(d_bh);
				4080	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
				4081	memset(d_bh->b_data, 0, d_bh->b_size);
				4082	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
				4083	set_desc_trans_id(desc, journal->j_trans_id);
				4084
				4085	/*
				4086	* setup commit block. Don't write (keep it clean too) this one
				4087	* until after everyone else is written
				4088	*/
				4089	c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				4090	((journal->j_start + journal->j_len +
				4091	1) % SB_ONDISK_JOURNAL_SIZE(sb)));
				4092	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
				4093	memset(c_bh->b_data, 0, c_bh->b_size);
				4094	set_commit_trans_id(commit, journal->j_trans_id);
				4095	set_buffer_uptodate(c_bh);
				4096
				4097	/* init this journal list */
				4098	jl = journal->j_current_jl;
				4099
				4100	/*
				4101	* we lock the commit before doing anything because
				4102	* we want to make sure nobody tries to run flush_commit_list until
				4103	* the new transaction is fully setup, and we've already flushed the
				4104	* ordered bh list
				4105	*/
				4106	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
				4107
				4108	/* save the transaction id in case we need to commit it later */
				4109	commit_trans_id = jl->j_trans_id;
				4110
				4111	atomic_set(&jl->j_older_commits_done, 0);
				4112	jl->j_trans_id = journal->j_trans_id;
				4113	jl->j_timestamp = journal->j_trans_start_time;
				4114	jl->j_commit_bh = c_bh;
				4115	jl->j_start = journal->j_start;
				4116	jl->j_len = journal->j_len;
				4117	atomic_set(&jl->j_nonzerolen, journal->j_len);
				4118	atomic_set(&jl->j_commit_left, journal->j_len + 2);
				4119	jl->j_realblock = NULL;
				4120
				4121	/*
				4122	* The ENTIRE FOR LOOP MUST not cause schedule to occur.
				4123	* for each real block, add it to the journal list hash,
				4124	* copy into real block index array in the commit or desc block
				4125	*/
				4126	trans_half = journal_trans_half(sb->s_blocksize);
				4127	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
				4128	if (buffer_journaled(cn->bh)) {
				4129	jl_cn = get_cnode(sb);
				4130	if (!jl_cn) {
				4131	reiserfs_panic(sb, "journal-1676",
				4132	"get_cnode returned NULL");
				4133	}
				4134	if (i == 0) {
				4135	jl->j_realblock = jl_cn;
				4136	}
				4137	jl_cn->prev = last_cn;
				4138	jl_cn->next = NULL;
				4139	if (last_cn) {
				4140	last_cn->next = jl_cn;
				4141	}
				4142	last_cn = jl_cn;
				4143	/*
				4144	* make sure the block we are trying to log
				4145	* is not a block of journal or reserved area
				4146	*/
				4147	if (is_block_in_log_or_reserved_area
				4148	(sb, cn->bh->b_blocknr)) {
				4149	reiserfs_panic(sb, "journal-2332",
				4150	"Trying to log block %lu, "
				4151	"which is a log block",
				4152	cn->bh->b_blocknr);
				4153	}
				4154	jl_cn->blocknr = cn->bh->b_blocknr;
				4155	jl_cn->state = 0;
				4156	jl_cn->sb = sb;
				4157	jl_cn->bh = cn->bh;
				4158	jl_cn->jlist = jl;
				4159	insert_journal_hash(journal->j_list_hash_table, jl_cn);
				4160	if (i < trans_half) {
				4161	desc->j_realblock[i] =
				4162	cpu_to_le32(cn->bh->b_blocknr);
				4163	} else {
				4164	commit->j_realblock[i - trans_half] =
				4165	cpu_to_le32(cn->bh->b_blocknr);
				4166	}
				4167	} else {
				4168	i--;
				4169	}
				4170	}
				4171	set_desc_trans_len(desc, journal->j_len);
				4172	set_desc_mount_id(desc, journal->j_mount_id);
				4173	set_desc_trans_id(desc, journal->j_trans_id);
				4174	set_commit_trans_len(commit, journal->j_len);
				4175
				4176	/*
				4177	* special check in case all buffers in the journal
				4178	* were marked for not logging
				4179	*/
				4180	BUG_ON(journal->j_len == 0);
				4181
				4182	/*
				4183	* we're about to dirty all the log blocks, mark the description block
				4184	* dirty now too. Don't mark the commit block dirty until all the
				4185	* others are on disk
				4186	*/
				4187	mark_buffer_dirty(d_bh);
				4188
				4189	/*
				4190	* first data block is j_start + 1, so add one to
				4191	* cur_write_start wherever you use it
				4192	*/
				4193	cur_write_start = journal->j_start;
				4194	cn = journal->j_first;
				4195	jindex = 1; /* start at one so we don't get the desc again */
				4196	while (cn) {
				4197	clear_buffer_journal_new(cn->bh);
				4198	/* copy all the real blocks into log area. dirty log blocks */
				4199	if (buffer_journaled(cn->bh)) {
				4200	struct buffer_head *tmp_bh;
				4201	char *addr;
				4202	struct page *page;
				4203	tmp_bh =
				4204	journal_getblk(sb,
				4205	SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
				4206	((cur_write_start +
				4207	jindex) %
				4208	SB_ONDISK_JOURNAL_SIZE(sb)));
				4209	set_buffer_uptodate(tmp_bh);
				4210	page = cn->bh->b_page;
				4211	addr = kmap(page);
				4212	memcpy(tmp_bh->b_data,
				4213	addr + offset_in_page(cn->bh->b_data),
				4214	cn->bh->b_size);
				4215	kunmap(page);
				4216	mark_buffer_dirty(tmp_bh);
				4217	jindex++;
				4218	set_buffer_journal_dirty(cn->bh);
				4219	clear_buffer_journaled(cn->bh);
				4220	} else {
				4221	/*
				4222	* JDirty cleared sometime during transaction.
				4223	* don't log this one
				4224	*/
				4225	reiserfs_warning(sb, "journal-2048",
				4226	"BAD, buffer in journal hash, "
				4227	"but not JDirty!");
				4228	brelse(cn->bh);
				4229	}
				4230	next = cn->next;
				4231	free_cnode(sb, cn);
				4232	cn = next;
				4233	reiserfs_cond_resched(sb);
				4234	}
				4235
				4236	/*
				4237	* we are done with both the c_bh and d_bh, but
				4238	* c_bh must be written after all other commit blocks,
				4239	* so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
				4240	*/
				4241
				4242	journal->j_current_jl = alloc_journal_list(sb);
				4243
				4244	/* now it is safe to insert this transaction on the main list */
				4245	list_add_tail(&jl->j_list, &journal->j_journal_list);
				4246	list_add_tail(&jl->j_working_list, &journal->j_working_list);
				4247	journal->j_num_work_lists++;
				4248
				4249	/* reset journal values for the next transaction */
				4250	old_start = journal->j_start;
				4251	journal->j_start =
				4252	(journal->j_start + journal->j_len +
				4253	2) % SB_ONDISK_JOURNAL_SIZE(sb);
				4254	atomic_set(&journal->j_wcount, 0);
				4255	journal->j_bcount = 0;
				4256	journal->j_last = NULL;
				4257	journal->j_first = NULL;
				4258	journal->j_len = 0;
				4259	journal->j_trans_start_time = 0;
				4260	/* check for trans_id overflow */
				4261	if (++journal->j_trans_id == 0)
				4262	journal->j_trans_id = 10;
				4263	journal->j_current_jl->j_trans_id = journal->j_trans_id;
				4264	journal->j_must_wait = 0;
				4265	journal->j_len_alloc = 0;
				4266	journal->j_next_full_flush = 0;
				4267	journal->j_next_async_flush = 0;
				4268	init_journal_hash(sb);
				4269
				4270	/*
				4271	* make sure reiserfs_add_jh sees the new current_jl before we
				4272	* write out the tails
				4273	*/
				4274	smp_mb();
				4275
				4276	/*
				4277	* tail conversion targets have to hit the disk before we end the
				4278	* transaction. Otherwise a later transaction might repack the tail
				4279	* before this transaction commits, leaving the data block unflushed
				4280	* and clean, if we crash before the later transaction commits, the
				4281	* data block is lost.
				4282	*/
				4283	if (!list_empty(&jl->j_tail_bh_list)) {
				4284	depth = reiserfs_write_unlock_nested(sb);
				4285	write_ordered_buffers(&journal->j_dirty_buffers_lock,
				4286	journal, jl, &jl->j_tail_bh_list);
				4287	reiserfs_write_lock_nested(sb, depth);
				4288	}
				4289	BUG_ON(!list_empty(&jl->j_tail_bh_list));
				4290	mutex_unlock(&jl->j_commit_mutex);
				4291
				4292	/*
				4293	* honor the flush wishes from the caller, simple commits can
				4294	* be done outside the journal lock, they are done below
				4295	*
				4296	* if we don't flush the commit list right now, we put it into
				4297	* the work queue so the people waiting on the async progress work
				4298	* queue don't wait for this proc to flush journal lists and such.
				4299	*/
				4300	if (flush) {
				4301	flush_commit_list(sb, jl, 1);
				4302	flush_journal_list(sb, jl, 1);
				4303	} else if (!(jl->j_state & LIST_COMMIT_PENDING)) {
				4304	/*
				4305	* Avoid queueing work when sb is being shut down. Transaction
				4306	* will be flushed on journal shutdown.
				4307	*/
				4308	if (sb->s_flags & SB_ACTIVE)
				4309	queue_delayed_work(REISERFS_SB(sb)->commit_wq,
				4310	&journal->j_work, HZ / 10);
				4311	}
				4312
				4313	/*
				4314	* if the next transaction has any chance of wrapping, flush
				4315	* transactions that might get overwritten. If any journal lists
				4316	* are very old flush them as well.
				4317	*/
				4318	first_jl:
				4319	list_for_each_safe(entry, safe, &journal->j_journal_list) {
				4320	temp_jl = JOURNAL_LIST_ENTRY(entry);
				4321	if (journal->j_start <= temp_jl->j_start) {
				4322	if ((journal->j_start + journal->j_trans_max + 1) >=
				4323	temp_jl->j_start) {
				4324	flush_used_journal_lists(sb, temp_jl);
				4325	goto first_jl;
				4326	} else if ((journal->j_start +
				4327	journal->j_trans_max + 1) <
				4328	SB_ONDISK_JOURNAL_SIZE(sb)) {
				4329	/*
				4330	* if we don't cross into the next
				4331	* transaction and we don't wrap, there is
				4332	* no way we can overlap any later transactions
				4333	* break now
				4334	*/
				4335	break;
				4336	}
				4337	} else if ((journal->j_start +
				4338	journal->j_trans_max + 1) >
				4339	SB_ONDISK_JOURNAL_SIZE(sb)) {
				4340	if (((journal->j_start + journal->j_trans_max + 1) %
				4341	SB_ONDISK_JOURNAL_SIZE(sb)) >=
				4342	temp_jl->j_start) {
				4343	flush_used_journal_lists(sb, temp_jl);
				4344	goto first_jl;
				4345	} else {
				4346	/*
				4347	* we don't overlap anything from out start
				4348	* to the end of the log, and our wrapped
				4349	* portion doesn't overlap anything at
				4350	* the start of the log. We can break
				4351	*/
				4352	break;
				4353	}
				4354	}
				4355	}
				4356
				4357	journal->j_current_jl->j_list_bitmap =
				4358	get_list_bitmap(sb, journal->j_current_jl);
				4359
				4360	if (!(journal->j_current_jl->j_list_bitmap)) {
				4361	reiserfs_panic(sb, "journal-1996",
				4362	"could not get a list bitmap");
				4363	}
				4364
				4365	atomic_set(&journal->j_jlock, 0);
				4366	unlock_journal(sb);
				4367	/* wake up any body waiting to join. */
				4368	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
				4369	wake_up(&journal->j_join_wait);
				4370
				4371	if (!flush && wait_on_commit &&
				4372	journal_list_still_alive(sb, commit_trans_id)) {
				4373	flush_commit_list(sb, jl, 1);
				4374	}
				4375	out:
				4376	reiserfs_check_lock_depth(sb, "journal end2");
				4377
				4378	memset(th, 0, sizeof(*th));
				4379	/*
				4380	* Re-set th->t_super, so we can properly keep track of how many
				4381	* persistent transactions there are. We need to do this so if this
				4382	* call is part of a failed restart_transaction, we can free it later
				4383	*/
				4384	th->t_super = sb;
				4385
				4386	return journal->j_errno;
				4387	}
				4388
				4389	/* Send the file system read only and refuse new transactions */
				4390	void reiserfs_abort_journal(struct super_block *sb, int errno)
				4391	{
				4392	struct reiserfs_journal *journal = SB_JOURNAL(sb);
				4393	if (test_bit(J_ABORTED, &journal->j_state))
				4394	return;
				4395
				4396	if (!journal->j_errno)
				4397	journal->j_errno = errno;
				4398
				4399	sb->s_flags \|= SB_RDONLY;
				4400	set_bit(J_ABORTED, &journal->j_state);
				4401
				4402	#ifdef CONFIG_REISERFS_CHECK
				4403	dump_stack();
				4404	#endif
				4405	}