Blame - src/kernel/linux/v4.19/fs/jbd2/transaction.c - T800

blob: 22f9ffe03caf8bfa2de7f382211c957fd22a7333 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0+
				2	/*
				3	* linux/fs/jbd2/transaction.c
				4	*
				5	* Written by Stephen C. Tweedie <sct@redhat.com>, 1998
				6	*
				7	* Copyright 1998 Red Hat corp --- All Rights Reserved
				8	*
				9	* Generic filesystem transaction handling code; part of the ext2fs
				10	* journaling system.
				11	*
				12	* This file manages transactions (compound commits managed by the
				13	* journaling code) and handles (individual atomic operations by the
				14	* filesystem).
				15	*/
				16
				17	#include <linux/time.h>
				18	#include <linux/fs.h>
				19	#include <linux/jbd2.h>
				20	#include <linux/errno.h>
				21	#include <linux/slab.h>
				22	#include <linux/timer.h>
				23	#include <linux/mm.h>
				24	#include <linux/highmem.h>
				25	#include <linux/hrtimer.h>
				26	#include <linux/backing-dev.h>
				27	#include <linux/bug.h>
				28	#include <linux/module.h>
				29	#include <linux/sched/mm.h>
				30
				31	#include <trace/events/jbd2.h>
				32
				33	static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
				34	static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
				35
				36	static struct kmem_cache *transaction_cache;
				37	int __init jbd2_journal_init_transaction_cache(void)
				38	{
				39	J_ASSERT(!transaction_cache);
				40	transaction_cache = kmem_cache_create("jbd2_transaction_s",
				41	sizeof(transaction_t),
				42	0,
				43	SLAB_HWCACHE_ALIGN\|SLAB_TEMPORARY,
				44	NULL);
				45	if (!transaction_cache) {
				46	pr_emerg("JBD2: failed to create transaction cache\n");
				47	return -ENOMEM;
				48	}
				49	return 0;
				50	}
				51
				52	void jbd2_journal_destroy_transaction_cache(void)
				53	{
				54	kmem_cache_destroy(transaction_cache);
				55	transaction_cache = NULL;
				56	}
				57
				58	void jbd2_journal_free_transaction(transaction_t *transaction)
				59	{
				60	if (unlikely(ZERO_OR_NULL_PTR(transaction)))
				61	return;
				62	kmem_cache_free(transaction_cache, transaction);
				63	}
				64
				65	/*
				66	* jbd2_get_transaction: obtain a new transaction_t object.
				67	*
				68	* Simply allocate and initialise a new transaction. Create it in
				69	* RUNNING state and add it to the current journal (which should not
				70	* have an existing running transaction: we only make a new transaction
				71	* once we have started to commit the old one).
				72	*
				73	* Preconditions:
				74	* The journal MUST be locked. We don't perform atomic mallocs on the
				75	* new transaction and we can't block without protecting against other
				76	* processes trying to touch the journal while it is in transition.
				77	*
				78	*/
				79
				80	static transaction_t *
				81	jbd2_get_transaction(journal_t journal, transaction_t transaction)
				82	{
				83	transaction->t_journal = journal;
				84	transaction->t_state = T_RUNNING;
				85	transaction->t_start_time = ktime_get();
				86	transaction->t_tid = journal->j_transaction_sequence++;
				87	transaction->t_expires = jiffies + journal->j_commit_interval;
				88	spin_lock_init(&transaction->t_handle_lock);
				89	atomic_set(&transaction->t_updates, 0);
				90	atomic_set(&transaction->t_outstanding_credits,
				91	atomic_read(&journal->j_reserved_credits));
				92	atomic_set(&transaction->t_handle_count, 0);
				93	INIT_LIST_HEAD(&transaction->t_inode_list);
				94	INIT_LIST_HEAD(&transaction->t_private_list);
				95
				96	/* Set up the commit timer for the new transaction. */
				97	journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
				98	add_timer(&journal->j_commit_timer);
				99
				100	J_ASSERT(journal->j_running_transaction == NULL);
				101	journal->j_running_transaction = transaction;
				102	transaction->t_max_wait = 0;
				103	transaction->t_start = jiffies;
				104	transaction->t_requested = 0;
				105
				106	return transaction;
				107	}
				108
				109	/*
				110	* Handle management.
				111	*
				112	* A handle_t is an object which represents a single atomic update to a
				113	* filesystem, and which tracks all of the modifications which form part
				114	* of that one update.
				115	*/
				116
				117	/*
				118	* Update transaction's maximum wait time, if debugging is enabled.
				119	*
				120	* In order for t_max_wait to be reliable, it must be protected by a
				121	* lock. But doing so will mean that start_this_handle() can not be
				122	* run in parallel on SMP systems, which limits our scalability. So
				123	* unless debugging is enabled, we no longer update t_max_wait, which
				124	* means that maximum wait time reported by the jbd2_run_stats
				125	* tracepoint will always be zero.
				126	*/
				127	static inline void update_t_max_wait(transaction_t *transaction,
				128	unsigned long ts)
				129	{
				130	#ifdef CONFIG_JBD2_DEBUG
				131	if (jbd2_journal_enable_debug &&
				132	time_after(transaction->t_start, ts)) {
				133	ts = jbd2_time_diff(ts, transaction->t_start);
				134	spin_lock(&transaction->t_handle_lock);
				135	if (ts > transaction->t_max_wait)
				136	transaction->t_max_wait = ts;
				137	spin_unlock(&transaction->t_handle_lock);
				138	}
				139	#endif
				140	}
				141
				142	/*
				143	* Wait until running transaction passes T_LOCKED state. Also starts the commit
				144	* if needed. The function expects running transaction to exist and releases
				145	* j_state_lock.
				146	*/
				147	static void wait_transaction_locked(journal_t *journal)
				148	__releases(journal->j_state_lock)
				149	{
				150	DEFINE_WAIT(wait);
				151	int need_to_start;
				152	tid_t tid = journal->j_running_transaction->t_tid;
				153
				154	prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
				155	TASK_UNINTERRUPTIBLE);
				156	need_to_start = !tid_geq(journal->j_commit_request, tid);
				157	read_unlock(&journal->j_state_lock);
				158	if (need_to_start)
				159	jbd2_log_start_commit(journal, tid);
				160	jbd2_might_wait_for_commit(journal);
				161	schedule();
				162	finish_wait(&journal->j_wait_transaction_locked, &wait);
				163	}
				164
				165	static void sub_reserved_credits(journal_t *journal, int blocks)
				166	{
				167	atomic_sub(blocks, &journal->j_reserved_credits);
				168	wake_up(&journal->j_wait_reserved);
				169	}
				170
				171	/*
				172	* Wait until we can add credits for handle to the running transaction. Called
				173	* with j_state_lock held for reading. Returns 0 if handle joined the running
				174	* transaction. Returns 1 if we had to wait, j_state_lock is dropped, and
				175	* caller must retry.
				176	*/
				177	static int add_transaction_credits(journal_t *journal, int blocks,
				178	int rsv_blocks)
				179	{
				180	transaction_t *t = journal->j_running_transaction;
				181	int needed;
				182	int total = blocks + rsv_blocks;
				183
				184	/*
				185	* If the current transaction is locked down for commit, wait
				186	* for the lock to be released.
				187	*/
				188	if (t->t_state == T_LOCKED) {
				189	wait_transaction_locked(journal);
				190	return 1;
				191	}
				192
				193	/*
				194	* If there is not enough space left in the log to write all
				195	* potential buffers requested by this operation, we need to
				196	* stall pending a log checkpoint to free some more log space.
				197	*/
				198	needed = atomic_add_return(total, &t->t_outstanding_credits);
				199	if (needed > journal->j_max_transaction_buffers) {
				200	/*
				201	* If the current transaction is already too large,
				202	* then start to commit it: we can then go back and
				203	* attach this handle to a new transaction.
				204	*/
				205	atomic_sub(total, &t->t_outstanding_credits);
				206
				207	/*
				208	* Is the number of reserved credits in the current transaction too
				209	* big to fit this handle? Wait until reserved credits are freed.
				210	*/
				211	if (atomic_read(&journal->j_reserved_credits) + total >
				212	journal->j_max_transaction_buffers) {
				213	read_unlock(&journal->j_state_lock);
				214	jbd2_might_wait_for_commit(journal);
				215	wait_event(journal->j_wait_reserved,
				216	atomic_read(&journal->j_reserved_credits) + total <=
				217	journal->j_max_transaction_buffers);
				218	return 1;
				219	}
				220
				221	wait_transaction_locked(journal);
				222	return 1;
				223	}
				224
				225	/*
				226	* The commit code assumes that it can get enough log space
				227	* without forcing a checkpoint. This is critical for
				228	* correctness: a checkpoint of a buffer which is also
				229	* associated with a committing transaction creates a deadlock,
				230	* so commit simply cannot force through checkpoints.
				231	*
				232	* We must therefore ensure the necessary space in the journal
				233	* before starting to dirty potentially checkpointed buffers
				234	* in the new transaction.
				235	*/
				236	if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
				237	atomic_sub(total, &t->t_outstanding_credits);
				238	read_unlock(&journal->j_state_lock);
				239	jbd2_might_wait_for_commit(journal);
				240	write_lock(&journal->j_state_lock);
				241	if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
				242	__jbd2_log_wait_for_space(journal);
				243	write_unlock(&journal->j_state_lock);
				244	return 1;
				245	}
				246
				247	/* No reservation? We are done... */
				248	if (!rsv_blocks)
				249	return 0;
				250
				251	needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
				252	/* We allow at most half of a transaction to be reserved */
				253	if (needed > journal->j_max_transaction_buffers / 2) {
				254	sub_reserved_credits(journal, rsv_blocks);
				255	atomic_sub(total, &t->t_outstanding_credits);
				256	read_unlock(&journal->j_state_lock);
				257	jbd2_might_wait_for_commit(journal);
				258	wait_event(journal->j_wait_reserved,
				259	atomic_read(&journal->j_reserved_credits) + rsv_blocks
				260	<= journal->j_max_transaction_buffers / 2);
				261	return 1;
				262	}
				263	return 0;
				264	}
				265
				266	/*
				267	* start_this_handle: Given a handle, deal with any locking or stalling
				268	* needed to make sure that there is enough journal space for the handle
				269	* to begin. Attach the handle to a transaction and set up the
				270	* transaction's buffer credits.
				271	*/
				272
				273	static int start_this_handle(journal_t journal, handle_t handle,
				274	gfp_t gfp_mask)
				275	{
				276	transaction_t transaction, new_transaction = NULL;
				277	int blocks = handle->h_buffer_credits;
				278	int rsv_blocks = 0;
				279	unsigned long ts = jiffies;
				280
				281	if (handle->h_rsv_handle)
				282	rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
				283
				284	/*
				285	* Limit the number of reserved credits to 1/2 of maximum transaction
				286	* size and limit the number of total credits to not exceed maximum
				287	* transaction size per operation.
				288	*/
				289	if ((rsv_blocks > journal->j_max_transaction_buffers / 2) \|\|
				290	(rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
				291	printk(KERN_ERR "JBD2: %s wants too many credits "
				292	"credits:%d rsv_credits:%d max:%d\n",
				293	current->comm, blocks, rsv_blocks,
				294	journal->j_max_transaction_buffers);
				295	WARN_ON(1);
				296	return -ENOSPC;
				297	}
				298
				299	alloc_transaction:
				300	if (!journal->j_running_transaction) {
				301	/*
				302	* If __GFP_FS is not present, then we may be being called from
				303	* inside the fs writeback layer, so we MUST NOT fail.
				304	*/
				305	if ((gfp_mask & __GFP_FS) == 0)
				306	gfp_mask \|= __GFP_NOFAIL;
				307	new_transaction = kmem_cache_zalloc(transaction_cache,
				308	gfp_mask);
				309	if (!new_transaction)
				310	return -ENOMEM;
				311	}
				312
				313	jbd_debug(3, "New handle %p going live.\n", handle);
				314
				315	/*
				316	* We need to hold j_state_lock until t_updates has been incremented,
				317	* for proper journal barrier handling
				318	*/
				319	repeat:
				320	read_lock(&journal->j_state_lock);
				321	BUG_ON(journal->j_flags & JBD2_UNMOUNT);
				322	if (is_journal_aborted(journal) \|\|
				323	(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
				324	read_unlock(&journal->j_state_lock);
				325	jbd2_journal_free_transaction(new_transaction);
				326	return -EROFS;
				327	}
				328
				329	/*
				330	* Wait on the journal's transaction barrier if necessary. Specifically
				331	* we allow reserved handles to proceed because otherwise commit could
				332	* deadlock on page writeback not being able to complete.
				333	*/
				334	if (!handle->h_reserved && journal->j_barrier_count) {
				335	read_unlock(&journal->j_state_lock);
				336	wait_event(journal->j_wait_transaction_locked,
				337	journal->j_barrier_count == 0);
				338	goto repeat;
				339	}
				340
				341	if (!journal->j_running_transaction) {
				342	read_unlock(&journal->j_state_lock);
				343	if (!new_transaction)
				344	goto alloc_transaction;
				345	write_lock(&journal->j_state_lock);
				346	if (!journal->j_running_transaction &&
				347	(handle->h_reserved \|\| !journal->j_barrier_count)) {
				348	jbd2_get_transaction(journal, new_transaction);
				349	new_transaction = NULL;
				350	}
				351	write_unlock(&journal->j_state_lock);
				352	goto repeat;
				353	}
				354
				355	transaction = journal->j_running_transaction;
				356
				357	if (!handle->h_reserved) {
				358	/* We may have dropped j_state_lock - restart in that case */
				359	if (add_transaction_credits(journal, blocks, rsv_blocks))
				360	goto repeat;
				361	} else {
				362	/*
				363	* We have handle reserved so we are allowed to join T_LOCKED
				364	* transaction and we don't have to check for transaction size
				365	* and journal space.
				366	*/
				367	sub_reserved_credits(journal, blocks);
				368	handle->h_reserved = 0;
				369	}
				370
				371	/* OK, account for the buffers that this operation expects to
				372	* use and add the handle to the running transaction.
				373	*/
				374	update_t_max_wait(transaction, ts);
				375	handle->h_transaction = transaction;
				376	handle->h_requested_credits = blocks;
				377	handle->h_start_jiffies = jiffies;
				378	atomic_inc(&transaction->t_updates);
				379	atomic_inc(&transaction->t_handle_count);
				380	jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
				381	handle, blocks,
				382	atomic_read(&transaction->t_outstanding_credits),
				383	jbd2_log_space_left(journal));
				384	read_unlock(&journal->j_state_lock);
				385	current->journal_info = handle;
				386	jbd2_journal_free_transaction(new_transaction);
				387	/*
				388	* Ensure that no allocations done while the transaction is open are
				389	* going to recurse back to the fs layer.
				390	*/
				391	handle->saved_alloc_context = memalloc_nofs_save();
				392	return 0;
				393	}
				394
				395	/* Allocate a new handle. This should probably be in a slab... */
				396	static handle_t *new_handle(int nblocks)
				397	{
				398	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
				399	if (!handle)
				400	return NULL;
				401	handle->h_buffer_credits = nblocks;
				402	handle->h_ref = 1;
				403
				404	return handle;
				405	}
				406
				407	handle_t jbd2__journal_start(journal_t journal, int nblocks, int rsv_blocks,
				408	gfp_t gfp_mask, unsigned int type,
				409	unsigned int line_no)
				410	{
				411	handle_t *handle = journal_current_handle();
				412	int err;
				413
				414	if (!journal)
				415	return ERR_PTR(-EROFS);
				416
				417	if (handle) {
				418	J_ASSERT(handle->h_transaction->t_journal == journal);
				419	handle->h_ref++;
				420	return handle;
				421	}
				422
				423	handle = new_handle(nblocks);
				424	if (!handle)
				425	return ERR_PTR(-ENOMEM);
				426	if (rsv_blocks) {
				427	handle_t *rsv_handle;
				428
				429	rsv_handle = new_handle(rsv_blocks);
				430	if (!rsv_handle) {
				431	jbd2_free_handle(handle);
				432	return ERR_PTR(-ENOMEM);
				433	}
				434	rsv_handle->h_reserved = 1;
				435	rsv_handle->h_journal = journal;
				436	handle->h_rsv_handle = rsv_handle;
				437	}
				438
				439	err = start_this_handle(journal, handle, gfp_mask);
				440	if (err < 0) {
				441	if (handle->h_rsv_handle)
				442	jbd2_free_handle(handle->h_rsv_handle);
				443	jbd2_free_handle(handle);
				444	return ERR_PTR(err);
				445	}
				446	handle->h_type = type;
				447	handle->h_line_no = line_no;
				448	trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
				449	handle->h_transaction->t_tid, type,
				450	line_no, nblocks);
				451
				452	return handle;
				453	}
				454	EXPORT_SYMBOL(jbd2__journal_start);
				455
				456
				457	/**
				458	* handle_t *jbd2_journal_start() - Obtain a new handle.
				459	* @journal: Journal to start transaction on.
				460	* @nblocks: number of block buffer we might modify
				461	*
				462	* We make sure that the transaction can guarantee at least nblocks of
				463	* modified buffers in the log. We block until the log can guarantee
				464	* that much space. Additionally, if rsv_blocks > 0, we also create another
				465	* handle with rsv_blocks reserved blocks in the journal. This handle is
				466	* is stored in h_rsv_handle. It is not attached to any particular transaction
				467	* and thus doesn't block transaction commit. If the caller uses this reserved
				468	* handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
				469	* on the parent handle will dispose the reserved one. Reserved handle has to
				470	* be converted to a normal handle using jbd2_journal_start_reserved() before
				471	* it can be used.
				472	*
				473	* Return a pointer to a newly allocated handle, or an ERR_PTR() value
				474	* on failure.
				475	*/
				476	handle_t jbd2_journal_start(journal_t journal, int nblocks)
				477	{
				478	return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
				479	}
				480	EXPORT_SYMBOL(jbd2_journal_start);
				481
				482	void jbd2_journal_free_reserved(handle_t *handle)
				483	{
				484	journal_t *journal = handle->h_journal;
				485
				486	WARN_ON(!handle->h_reserved);
				487	sub_reserved_credits(journal, handle->h_buffer_credits);
				488	jbd2_free_handle(handle);
				489	}
				490	EXPORT_SYMBOL(jbd2_journal_free_reserved);
				491
				492	/**
				493	* int jbd2_journal_start_reserved() - start reserved handle
				494	* @handle: handle to start
				495	* @type: for handle statistics
				496	* @line_no: for handle statistics
				497	*
				498	* Start handle that has been previously reserved with jbd2_journal_reserve().
				499	* This attaches @handle to the running transaction (or creates one if there's
				500	* not transaction running). Unlike jbd2_journal_start() this function cannot
				501	* block on journal commit, checkpointing, or similar stuff. It can block on
				502	* memory allocation or frozen journal though.
				503	*
				504	* Return 0 on success, non-zero on error - handle is freed in that case.
				505	*/
				506	int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
				507	unsigned int line_no)
				508	{
				509	journal_t *journal = handle->h_journal;
				510	int ret = -EIO;
				511
				512	if (WARN_ON(!handle->h_reserved)) {
				513	/* Someone passed in normal handle? Just stop it. */
				514	jbd2_journal_stop(handle);
				515	return ret;
				516	}
				517	/*
				518	* Usefulness of mixing of reserved and unreserved handles is
				519	* questionable. So far nobody seems to need it so just error out.
				520	*/
				521	if (WARN_ON(current->journal_info)) {
				522	jbd2_journal_free_reserved(handle);
				523	return ret;
				524	}
				525
				526	handle->h_journal = NULL;
				527	/*
				528	* GFP_NOFS is here because callers are likely from writeback or
				529	* similarly constrained call sites
				530	*/
				531	ret = start_this_handle(journal, handle, GFP_NOFS);
				532	if (ret < 0) {
				533	handle->h_journal = journal;
				534	jbd2_journal_free_reserved(handle);
				535	return ret;
				536	}
				537	handle->h_type = type;
				538	handle->h_line_no = line_no;
				539	return 0;
				540	}
				541	EXPORT_SYMBOL(jbd2_journal_start_reserved);
				542
				543	/**
				544	* int jbd2_journal_extend() - extend buffer credits.
				545	* @handle: handle to 'extend'
				546	* @nblocks: nr blocks to try to extend by.
				547	*
				548	* Some transactions, such as large extends and truncates, can be done
				549	* atomically all at once or in several stages. The operation requests
				550	* a credit for a number of buffer modifications in advance, but can
				551	* extend its credit if it needs more.
				552	*
				553	* jbd2_journal_extend tries to give the running handle more buffer credits.
				554	* It does not guarantee that allocation - this is a best-effort only.
				555	* The calling process MUST be able to deal cleanly with a failure to
				556	* extend here.
				557	*
				558	* Return 0 on success, non-zero on failure.
				559	*
				560	* return code < 0 implies an error
				561	* return code > 0 implies normal transaction-full status.
				562	*/
				563	int jbd2_journal_extend(handle_t *handle, int nblocks)
				564	{
				565	transaction_t *transaction = handle->h_transaction;
				566	journal_t *journal;
				567	int result;
				568	int wanted;
				569
				570	if (is_handle_aborted(handle))
				571	return -EROFS;
				572	journal = transaction->t_journal;
				573
				574	result = 1;
				575
				576	read_lock(&journal->j_state_lock);
				577
				578	/* Don't extend a locked-down transaction! */
				579	if (transaction->t_state != T_RUNNING) {
				580	jbd_debug(3, "denied handle %p %d blocks: "
				581	"transaction not running\n", handle, nblocks);
				582	goto error_out;
				583	}
				584
				585	spin_lock(&transaction->t_handle_lock);
				586	wanted = atomic_add_return(nblocks,
				587	&transaction->t_outstanding_credits);
				588
				589	if (wanted > journal->j_max_transaction_buffers) {
				590	jbd_debug(3, "denied handle %p %d blocks: "
				591	"transaction too large\n", handle, nblocks);
				592	atomic_sub(nblocks, &transaction->t_outstanding_credits);
				593	goto unlock;
				594	}
				595
				596	if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
				597	jbd2_log_space_left(journal)) {
				598	jbd_debug(3, "denied handle %p %d blocks: "
				599	"insufficient log space\n", handle, nblocks);
				600	atomic_sub(nblocks, &transaction->t_outstanding_credits);
				601	goto unlock;
				602	}
				603
				604	trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
				605	transaction->t_tid,
				606	handle->h_type, handle->h_line_no,
				607	handle->h_buffer_credits,
				608	nblocks);
				609
				610	handle->h_buffer_credits += nblocks;
				611	handle->h_requested_credits += nblocks;
				612	result = 0;
				613
				614	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
				615	unlock:
				616	spin_unlock(&transaction->t_handle_lock);
				617	error_out:
				618	read_unlock(&journal->j_state_lock);
				619	return result;
				620	}
				621
				622
				623	/**
				624	* int jbd2_journal_restart() - restart a handle .
				625	* @handle: handle to restart
				626	* @nblocks: nr credits requested
				627	* @gfp_mask: memory allocation flags (for start_this_handle)
				628	*
				629	* Restart a handle for a multi-transaction filesystem
				630	* operation.
				631	*
				632	* If the jbd2_journal_extend() call above fails to grant new buffer credits
				633	* to a running handle, a call to jbd2_journal_restart will commit the
				634	* handle's transaction so far and reattach the handle to a new
				635	* transaction capable of guaranteeing the requested number of
				636	* credits. We preserve reserved handle if there's any attached to the
				637	* passed in handle.
				638	*/
				639	int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
				640	{
				641	transaction_t *transaction = handle->h_transaction;
				642	journal_t *journal;
				643	tid_t tid;
				644	int need_to_start, ret;
				645
				646	/* If we've had an abort of any type, don't even think about
				647	* actually doing the restart! */
				648	if (is_handle_aborted(handle))
				649	return 0;
				650	journal = transaction->t_journal;
				651
				652	/*
				653	* First unlink the handle from its current transaction, and start the
				654	* commit on that.
				655	*/
				656	J_ASSERT(atomic_read(&transaction->t_updates) > 0);
				657	J_ASSERT(journal_current_handle() == handle);
				658
				659	read_lock(&journal->j_state_lock);
				660	spin_lock(&transaction->t_handle_lock);
				661	atomic_sub(handle->h_buffer_credits,
				662	&transaction->t_outstanding_credits);
				663	if (handle->h_rsv_handle) {
				664	sub_reserved_credits(journal,
				665	handle->h_rsv_handle->h_buffer_credits);
				666	}
				667	if (atomic_dec_and_test(&transaction->t_updates))
				668	wake_up(&journal->j_wait_updates);
				669	tid = transaction->t_tid;
				670	spin_unlock(&transaction->t_handle_lock);
				671	handle->h_transaction = NULL;
				672	current->journal_info = NULL;
				673
				674	jbd_debug(2, "restarting handle %p\n", handle);
				675	need_to_start = !tid_geq(journal->j_commit_request, tid);
				676	read_unlock(&journal->j_state_lock);
				677	if (need_to_start)
				678	jbd2_log_start_commit(journal, tid);
				679	handle->h_buffer_credits = nblocks;
				680	/*
				681	* Restore the original nofs context because the journal restart
				682	* is basically the same thing as journal stop and start.
				683	* start_this_handle will start a new nofs context.
				684	*/
				685	memalloc_nofs_restore(handle->saved_alloc_context);
				686	ret = start_this_handle(journal, handle, gfp_mask);
				687	return ret;
				688	}
				689	EXPORT_SYMBOL(jbd2__journal_restart);
				690
				691
				692	int jbd2_journal_restart(handle_t *handle, int nblocks)
				693	{
				694	return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
				695	}
				696	EXPORT_SYMBOL(jbd2_journal_restart);
				697
				698	/**
				699	* void jbd2_journal_lock_updates () - establish a transaction barrier.
				700	* @journal: Journal to establish a barrier on.
				701	*
				702	* This locks out any further updates from being started, and blocks
				703	* until all existing updates have completed, returning only once the
				704	* journal is in a quiescent state with no updates running.
				705	*
				706	* The journal lock should not be held on entry.
				707	*/
				708	void jbd2_journal_lock_updates(journal_t *journal)
				709	{
				710	DEFINE_WAIT(wait);
				711
				712	jbd2_might_wait_for_commit(journal);
				713
				714	write_lock(&journal->j_state_lock);
				715	++journal->j_barrier_count;
				716
				717	/* Wait until there are no reserved handles */
				718	if (atomic_read(&journal->j_reserved_credits)) {
				719	write_unlock(&journal->j_state_lock);
				720	wait_event(journal->j_wait_reserved,
				721	atomic_read(&journal->j_reserved_credits) == 0);
				722	write_lock(&journal->j_state_lock);
				723	}
				724
				725	/* Wait until there are no running updates */
				726	while (1) {
				727	transaction_t *transaction = journal->j_running_transaction;
				728
				729	if (!transaction)
				730	break;
				731
				732	spin_lock(&transaction->t_handle_lock);
				733	prepare_to_wait(&journal->j_wait_updates, &wait,
				734	TASK_UNINTERRUPTIBLE);
				735	if (!atomic_read(&transaction->t_updates)) {
				736	spin_unlock(&transaction->t_handle_lock);
				737	finish_wait(&journal->j_wait_updates, &wait);
				738	break;
				739	}
				740	spin_unlock(&transaction->t_handle_lock);
				741	write_unlock(&journal->j_state_lock);
				742	schedule();
				743	finish_wait(&journal->j_wait_updates, &wait);
				744	write_lock(&journal->j_state_lock);
				745	}
				746	write_unlock(&journal->j_state_lock);
				747
				748	/*
				749	* We have now established a barrier against other normal updates, but
				750	* we also need to barrier against other jbd2_journal_lock_updates() calls
				751	* to make sure that we serialise special journal-locked operations
				752	* too.
				753	*/
				754	mutex_lock(&journal->j_barrier);
				755	}
				756
				757	/**
				758	* void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
				759	* @journal: Journal to release the barrier on.
				760	*
				761	* Release a transaction barrier obtained with jbd2_journal_lock_updates().
				762	*
				763	* Should be called without the journal lock held.
				764	*/
				765	void jbd2_journal_unlock_updates (journal_t *journal)
				766	{
				767	J_ASSERT(journal->j_barrier_count != 0);
				768
				769	mutex_unlock(&journal->j_barrier);
				770	write_lock(&journal->j_state_lock);
				771	--journal->j_barrier_count;
				772	write_unlock(&journal->j_state_lock);
				773	wake_up(&journal->j_wait_transaction_locked);
				774	}
				775
				776	static void warn_dirty_buffer(struct buffer_head *bh)
				777	{
				778	printk(KERN_WARNING
				779	"JBD2: Spotted dirty metadata buffer (dev = %pg, blocknr = %llu). "
				780	"There's a risk of filesystem corruption in case of system "
				781	"crash.\n",
				782	bh->b_bdev, (unsigned long long)bh->b_blocknr);
				783	}
				784
				785	/* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
				786	static void jbd2_freeze_jh_data(struct journal_head *jh)
				787	{
				788	struct page *page;
				789	int offset;
				790	char *source;
				791	struct buffer_head *bh = jh2bh(jh);
				792
				793	J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
				794	page = bh->b_page;
				795	offset = offset_in_page(bh->b_data);
				796	source = kmap_atomic(page);
				797	/* Fire data frozen trigger just before we copy the data */
				798	jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
				799	memcpy(jh->b_frozen_data, source + offset, bh->b_size);
				800	kunmap_atomic(source);
				801
				802	/*
				803	* Now that the frozen data is saved off, we need to store any matching
				804	* triggers.
				805	*/
				806	jh->b_frozen_triggers = jh->b_triggers;
				807	}
				808
				809	/*
				810	* If the buffer is already part of the current transaction, then there
				811	* is nothing we need to do. If it is already part of a prior
				812	* transaction which we are still committing to disk, then we need to
				813	* make sure that we do not overwrite the old copy: we do copy-out to
				814	* preserve the copy going to disk. We also account the buffer against
				815	* the handle's metadata buffer credits (unless the buffer is already
				816	* part of the transaction, that is).
				817	*
				818	*/
				819	static int
				820	do_get_write_access(handle_t handle, struct journal_head jh,
				821	int force_copy)
				822	{
				823	struct buffer_head *bh;
				824	transaction_t *transaction = handle->h_transaction;
				825	journal_t *journal;
				826	int error;
				827	char *frozen_buffer = NULL;
				828	unsigned long start_lock, time_lock;
				829
				830	if (is_handle_aborted(handle))
				831	return -EROFS;
				832	journal = transaction->t_journal;
				833
				834	jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
				835
				836	JBUFFER_TRACE(jh, "entry");
				837	repeat:
				838	bh = jh2bh(jh);
				839
				840	/* @@@ Need to check for errors here at some point. */
				841
				842	start_lock = jiffies;
				843	lock_buffer(bh);
				844	jbd_lock_bh_state(bh);
				845
				846	/* If it takes too long to lock the buffer, trace it */
				847	time_lock = jbd2_time_diff(start_lock, jiffies);
				848	if (time_lock > HZ/10)
				849	trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
				850	jiffies_to_msecs(time_lock));
				851
				852	/* We now hold the buffer lock so it is safe to query the buffer
				853	* state. Is the buffer dirty?
				854	*
				855	* If so, there are two possibilities. The buffer may be
				856	* non-journaled, and undergoing a quite legitimate writeback.
				857	* Otherwise, it is journaled, and we don't expect dirty buffers
				858	* in that state (the buffers should be marked JBD_Dirty
				859	* instead.) So either the IO is being done under our own
				860	* control and this is a bug, or it's a third party IO such as
				861	* dump(8) (which may leave the buffer scheduled for read ---
				862	* ie. locked but not dirty) or tune2fs (which may actually have
				863	* the buffer dirtied, ugh.) */
				864
				865	if (buffer_dirty(bh)) {
				866	/*
				867	* First question: is this buffer already part of the current
				868	* transaction or the existing committing transaction?
				869	*/
				870	if (jh->b_transaction) {
				871	J_ASSERT_JH(jh,
				872	jh->b_transaction == transaction \|\|
				873	jh->b_transaction ==
				874	journal->j_committing_transaction);
				875	if (jh->b_next_transaction)
				876	J_ASSERT_JH(jh, jh->b_next_transaction ==
				877	transaction);
				878	warn_dirty_buffer(bh);
				879	}
				880	/*
				881	* In any case we need to clean the dirty flag and we must
				882	* do it under the buffer lock to be sure we don't race
				883	* with running write-out.
				884	*/
				885	JBUFFER_TRACE(jh, "Journalling dirty buffer");
				886	clear_buffer_dirty(bh);
				887	set_buffer_jbddirty(bh);
				888	}
				889
				890	unlock_buffer(bh);
				891
				892	error = -EROFS;
				893	if (is_handle_aborted(handle)) {
				894	jbd_unlock_bh_state(bh);
				895	goto out;
				896	}
				897	error = 0;
				898
				899	/*
				900	* The buffer is already part of this transaction if b_transaction or
				901	* b_next_transaction points to it
				902	*/
				903	if (jh->b_transaction == transaction \|\|
				904	jh->b_next_transaction == transaction)
				905	goto done;
				906
				907	/*
				908	* this is the first time this transaction is touching this buffer,
				909	* reset the modified flag
				910	*/
				911	jh->b_modified = 0;
				912
				913	/*
				914	* If the buffer is not journaled right now, we need to make sure it
				915	* doesn't get written to disk before the caller actually commits the
				916	* new data
				917	*/
				918	if (!jh->b_transaction) {
				919	JBUFFER_TRACE(jh, "no transaction");
				920	J_ASSERT_JH(jh, !jh->b_next_transaction);
				921	JBUFFER_TRACE(jh, "file as BJ_Reserved");
				922	/*
				923	* Make sure all stores to jh (b_modified, b_frozen_data) are
				924	* visible before attaching it to the running transaction.
				925	* Paired with barrier in jbd2_write_access_granted()
				926	*/
				927	smp_wmb();
				928	spin_lock(&journal->j_list_lock);
				929	__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
				930	spin_unlock(&journal->j_list_lock);
				931	goto done;
				932	}
				933	/*
				934	* If there is already a copy-out version of this buffer, then we don't
				935	* need to make another one
				936	*/
				937	if (jh->b_frozen_data) {
				938	JBUFFER_TRACE(jh, "has frozen data");
				939	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
				940	goto attach_next;
				941	}
				942
				943	JBUFFER_TRACE(jh, "owned by older transaction");
				944	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
				945	J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
				946
				947	/*
				948	* There is one case we have to be very careful about. If the
				949	* committing transaction is currently writing this buffer out to disk
				950	* and has NOT made a copy-out, then we cannot modify the buffer
				951	* contents at all right now. The essence of copy-out is that it is
				952	* the extra copy, not the primary copy, which gets journaled. If the
				953	* primary copy is already going to disk then we cannot do copy-out
				954	* here.
				955	*/
				956	if (buffer_shadow(bh)) {
				957	JBUFFER_TRACE(jh, "on shadow: sleep");
				958	jbd_unlock_bh_state(bh);
				959	wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
				960	goto repeat;
				961	}
				962
				963	/*
				964	* Only do the copy if the currently-owning transaction still needs it.
				965	* If buffer isn't on BJ_Metadata list, the committing transaction is
				966	* past that stage (here we use the fact that BH_Shadow is set under
				967	* bh_state lock together with refiling to BJ_Shadow list and at this
				968	* point we know the buffer doesn't have BH_Shadow set).
				969	*
				970	* Subtle point, though: if this is a get_undo_access, then we will be
				971	* relying on the frozen_data to contain the new value of the
				972	* committed_data record after the transaction, so we HAVE to force the
				973	* frozen_data copy in that case.
				974	*/
				975	if (jh->b_jlist == BJ_Metadata \|\| force_copy) {
				976	JBUFFER_TRACE(jh, "generate frozen data");
				977	if (!frozen_buffer) {
				978	JBUFFER_TRACE(jh, "allocate memory for buffer");
				979	jbd_unlock_bh_state(bh);
				980	frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
				981	GFP_NOFS \| __GFP_NOFAIL);
				982	goto repeat;
				983	}
				984	jh->b_frozen_data = frozen_buffer;
				985	frozen_buffer = NULL;
				986	jbd2_freeze_jh_data(jh);
				987	}
				988	attach_next:
				989	/*
				990	* Make sure all stores to jh (b_modified, b_frozen_data) are visible
				991	* before attaching it to the running transaction. Paired with barrier
				992	* in jbd2_write_access_granted()
				993	*/
				994	smp_wmb();
				995	jh->b_next_transaction = transaction;
				996
				997	done:
				998	jbd_unlock_bh_state(bh);
				999
				1000	/*
				1001	* If we are about to journal a buffer, then any revoke pending on it is
				1002	* no longer valid
				1003	*/
				1004	jbd2_journal_cancel_revoke(handle, jh);
				1005
				1006	out:
				1007	if (unlikely(frozen_buffer)) /* It's usually NULL */
				1008	jbd2_free(frozen_buffer, bh->b_size);
				1009
				1010	JBUFFER_TRACE(jh, "exit");
				1011	return error;
				1012	}
				1013
				1014	/* Fast check whether buffer is already attached to the required transaction */
				1015	static bool jbd2_write_access_granted(handle_t handle, struct buffer_head bh,
				1016	bool undo)
				1017	{
				1018	struct journal_head *jh;
				1019	bool ret = false;
				1020
				1021	/* Dirty buffers require special handling... */
				1022	if (buffer_dirty(bh))
				1023	return false;
				1024
				1025	/*
				1026	* RCU protects us from dereferencing freed pages. So the checks we do
				1027	* are guaranteed not to oops. However the jh slab object can get freed
				1028	* & reallocated while we work with it. So we have to be careful. When
				1029	* we see jh attached to the running transaction, we know it must stay
				1030	* so until the transaction is committed. Thus jh won't be freed and
				1031	* will be attached to the same bh while we run. However it can
				1032	* happen jh gets freed, reallocated, and attached to the transaction
				1033	* just after we get pointer to it from bh. So we have to be careful
				1034	* and recheck jh still belongs to our bh before we return success.
				1035	*/
				1036	rcu_read_lock();
				1037	if (!buffer_jbd(bh))
				1038	goto out;
				1039	/* This should be bh2jh() but that doesn't work with inline functions */
				1040	jh = READ_ONCE(bh->b_private);
				1041	if (!jh)
				1042	goto out;
				1043	/* For undo access buffer must have data copied */
				1044	if (undo && !jh->b_committed_data)
				1045	goto out;
				1046	if (jh->b_transaction != handle->h_transaction &&
				1047	jh->b_next_transaction != handle->h_transaction)
				1048	goto out;
				1049	/*
				1050	* There are two reasons for the barrier here:
				1051	* 1) Make sure to fetch b_bh after we did previous checks so that we
				1052	* detect when jh went through free, realloc, attach to transaction
				1053	* while we were checking. Paired with implicit barrier in that path.
				1054	* 2) So that access to bh done after jbd2_write_access_granted()
				1055	* doesn't get reordered and see inconsistent state of concurrent
				1056	* do_get_write_access().
				1057	*/
				1058	smp_mb();
				1059	if (unlikely(jh->b_bh != bh))
				1060	goto out;
				1061	ret = true;
				1062	out:
				1063	rcu_read_unlock();
				1064	return ret;
				1065	}
				1066
				1067	/**
				1068	* int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
				1069	* @handle: transaction to add buffer modifications to
				1070	* @bh: bh to be used for metadata writes
				1071	*
				1072	* Returns: error code or 0 on success.
				1073	*
				1074	* In full data journalling mode the buffer may be of type BJ_AsyncData,
				1075	* because we're ``write()ing`` a buffer which is also part of a shared mapping.
				1076	*/
				1077
				1078	int jbd2_journal_get_write_access(handle_t handle, struct buffer_head bh)
				1079	{
				1080	struct journal_head *jh;
				1081	int rc;
				1082
				1083	if (jbd2_write_access_granted(handle, bh, false))
				1084	return 0;
				1085
				1086	jh = jbd2_journal_add_journal_head(bh);
				1087	/* We do not want to get caught playing with fields which the
				1088	* log thread also manipulates. Make sure that the buffer
				1089	* completes any outstanding IO before proceeding. */
				1090	rc = do_get_write_access(handle, jh, 0);
				1091	jbd2_journal_put_journal_head(jh);
				1092	return rc;
				1093	}
				1094
				1095
				1096	/*
				1097	* When the user wants to journal a newly created buffer_head
				1098	* (ie. getblk() returned a new buffer and we are going to populate it
				1099	* manually rather than reading off disk), then we need to keep the
				1100	* buffer_head locked until it has been completely filled with new
				1101	* data. In this case, we should be able to make the assertion that
				1102	* the bh is not already part of an existing transaction.
				1103	*
				1104	* The buffer should already be locked by the caller by this point.
				1105	* There is no lock ranking violation: it was a newly created,
				1106	* unlocked buffer beforehand. */
				1107
				1108	/**
				1109	* int jbd2_journal_get_create_access () - notify intent to use newly created bh
				1110	* @handle: transaction to new buffer to
				1111	* @bh: new buffer.
				1112	*
				1113	* Call this if you create a new bh.
				1114	*/
				1115	int jbd2_journal_get_create_access(handle_t handle, struct buffer_head bh)
				1116	{
				1117	transaction_t *transaction = handle->h_transaction;
				1118	journal_t *journal;
				1119	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
				1120	int err;
				1121
				1122	jbd_debug(5, "journal_head %p\n", jh);
				1123	err = -EROFS;
				1124	if (is_handle_aborted(handle))
				1125	goto out;
				1126	journal = transaction->t_journal;
				1127	err = 0;
				1128
				1129	JBUFFER_TRACE(jh, "entry");
				1130	/*
				1131	* The buffer may already belong to this transaction due to pre-zeroing
				1132	* in the filesystem's new_block code. It may also be on the previous,
				1133	* committing transaction's lists, but it HAS to be in Forget state in
				1134	* that case: the transaction must have deleted the buffer for it to be
				1135	* reused here.
				1136	*/
				1137	jbd_lock_bh_state(bh);
				1138	J_ASSERT_JH(jh, (jh->b_transaction == transaction \|\|
				1139	jh->b_transaction == NULL \|\|
				1140	(jh->b_transaction == journal->j_committing_transaction &&
				1141	jh->b_jlist == BJ_Forget)));
				1142
				1143	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
				1144	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
				1145
				1146	if (jh->b_transaction == NULL) {
				1147	/*
				1148	* Previous jbd2_journal_forget() could have left the buffer
				1149	* with jbddirty bit set because it was being committed. When
				1150	* the commit finished, we've filed the buffer for
				1151	* checkpointing and marked it dirty. Now we are reallocating
				1152	* the buffer so the transaction freeing it must have
				1153	* committed and so it's safe to clear the dirty bit.
				1154	*/
				1155	clear_buffer_dirty(jh2bh(jh));
				1156	/* first access by this transaction */
				1157	jh->b_modified = 0;
				1158
				1159	JBUFFER_TRACE(jh, "file as BJ_Reserved");
				1160	spin_lock(&journal->j_list_lock);
				1161	__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
				1162	spin_unlock(&journal->j_list_lock);
				1163	} else if (jh->b_transaction == journal->j_committing_transaction) {
				1164	/* first access by this transaction */
				1165	jh->b_modified = 0;
				1166
				1167	JBUFFER_TRACE(jh, "set next transaction");
				1168	spin_lock(&journal->j_list_lock);
				1169	jh->b_next_transaction = transaction;
				1170	spin_unlock(&journal->j_list_lock);
				1171	}
				1172	jbd_unlock_bh_state(bh);
				1173
				1174	/*
				1175	* akpm: I added this. ext3_alloc_branch can pick up new indirect
				1176	* blocks which contain freed but then revoked metadata. We need
				1177	* to cancel the revoke in case we end up freeing it yet again
				1178	* and the reallocating as data - this would cause a second revoke,
				1179	* which hits an assertion error.
				1180	*/
				1181	JBUFFER_TRACE(jh, "cancelling revoke");
				1182	jbd2_journal_cancel_revoke(handle, jh);
				1183	out:
				1184	jbd2_journal_put_journal_head(jh);
				1185	return err;
				1186	}
				1187
				1188	/**
				1189	* int jbd2_journal_get_undo_access() - Notify intent to modify metadata with
				1190	* non-rewindable consequences
				1191	* @handle: transaction
				1192	* @bh: buffer to undo
				1193	*
				1194	* Sometimes there is a need to distinguish between metadata which has
				1195	* been committed to disk and that which has not. The ext3fs code uses
				1196	* this for freeing and allocating space, we have to make sure that we
				1197	* do not reuse freed space until the deallocation has been committed,
				1198	* since if we overwrote that space we would make the delete
				1199	* un-rewindable in case of a crash.
				1200	*
				1201	* To deal with that, jbd2_journal_get_undo_access requests write access to a
				1202	* buffer for parts of non-rewindable operations such as delete
				1203	* operations on the bitmaps. The journaling code must keep a copy of
				1204	* the buffer's contents prior to the undo_access call until such time
				1205	* as we know that the buffer has definitely been committed to disk.
				1206	*
				1207	* We never need to know which transaction the committed data is part
				1208	* of, buffers touched here are guaranteed to be dirtied later and so
				1209	* will be committed to a new transaction in due course, at which point
				1210	* we can discard the old committed data pointer.
				1211	*
				1212	* Returns error number or 0 on success.
				1213	*/
				1214	int jbd2_journal_get_undo_access(handle_t handle, struct buffer_head bh)
				1215	{
				1216	int err;
				1217	struct journal_head *jh;
				1218	char *committed_data = NULL;
				1219
				1220	if (jbd2_write_access_granted(handle, bh, true))
				1221	return 0;
				1222
				1223	jh = jbd2_journal_add_journal_head(bh);
				1224	JBUFFER_TRACE(jh, "entry");
				1225
				1226	/*
				1227	* Do this first --- it can drop the journal lock, so we want to
				1228	* make sure that obtaining the committed_data is done
				1229	* atomically wrt. completion of any outstanding commits.
				1230	*/
				1231	err = do_get_write_access(handle, jh, 1);
				1232	if (err)
				1233	goto out;
				1234
				1235	repeat:
				1236	if (!jh->b_committed_data)
				1237	committed_data = jbd2_alloc(jh2bh(jh)->b_size,
				1238	GFP_NOFS\|__GFP_NOFAIL);
				1239
				1240	jbd_lock_bh_state(bh);
				1241	if (!jh->b_committed_data) {
				1242	/* Copy out the current buffer contents into the
				1243	* preserved, committed copy. */
				1244	JBUFFER_TRACE(jh, "generate b_committed data");
				1245	if (!committed_data) {
				1246	jbd_unlock_bh_state(bh);
				1247	goto repeat;
				1248	}
				1249
				1250	jh->b_committed_data = committed_data;
				1251	committed_data = NULL;
				1252	memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
				1253	}
				1254	jbd_unlock_bh_state(bh);
				1255	out:
				1256	jbd2_journal_put_journal_head(jh);
				1257	if (unlikely(committed_data))
				1258	jbd2_free(committed_data, bh->b_size);
				1259	return err;
				1260	}
				1261
				1262	/**
				1263	* void jbd2_journal_set_triggers() - Add triggers for commit writeout
				1264	* @bh: buffer to trigger on
				1265	* @type: struct jbd2_buffer_trigger_type containing the trigger(s).
				1266	*
				1267	* Set any triggers on this journal_head. This is always safe, because
				1268	* triggers for a committing buffer will be saved off, and triggers for
				1269	* a running transaction will match the buffer in that transaction.
				1270	*
				1271	* Call with NULL to clear the triggers.
				1272	*/
				1273	void jbd2_journal_set_triggers(struct buffer_head *bh,
				1274	struct jbd2_buffer_trigger_type *type)
				1275	{
				1276	struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
				1277
				1278	if (WARN_ON(!jh))
				1279	return;
				1280	jh->b_triggers = type;
				1281	jbd2_journal_put_journal_head(jh);
				1282	}
				1283
				1284	void jbd2_buffer_frozen_trigger(struct journal_head jh, void mapped_data,
				1285	struct jbd2_buffer_trigger_type *triggers)
				1286	{
				1287	struct buffer_head *bh = jh2bh(jh);
				1288
				1289	if (!triggers \|\| !triggers->t_frozen)
				1290	return;
				1291
				1292	triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
				1293	}
				1294
				1295	void jbd2_buffer_abort_trigger(struct journal_head *jh,
				1296	struct jbd2_buffer_trigger_type *triggers)
				1297	{
				1298	if (!triggers \|\| !triggers->t_abort)
				1299	return;
				1300
				1301	triggers->t_abort(triggers, jh2bh(jh));
				1302	}
				1303
				1304	/**
				1305	* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
				1306	* @handle: transaction to add buffer to.
				1307	* @bh: buffer to mark
				1308	*
				1309	* mark dirty metadata which needs to be journaled as part of the current
				1310	* transaction.
				1311	*
				1312	* The buffer must have previously had jbd2_journal_get_write_access()
				1313	* called so that it has a valid journal_head attached to the buffer
				1314	* head.
				1315	*
				1316	* The buffer is placed on the transaction's metadata list and is marked
				1317	* as belonging to the transaction.
				1318	*
				1319	* Returns error number or 0 on success.
				1320	*
				1321	* Special care needs to be taken if the buffer already belongs to the
				1322	* current committing transaction (in which case we should have frozen
				1323	* data present for that commit). In that case, we don't relink the
				1324	* buffer: that only gets done when the old transaction finally
				1325	* completes its commit.
				1326	*/
				1327	int jbd2_journal_dirty_metadata(handle_t handle, struct buffer_head bh)
				1328	{
				1329	transaction_t *transaction = handle->h_transaction;
				1330	journal_t *journal;
				1331	struct journal_head *jh;
				1332	int ret = 0;
				1333
				1334	if (is_handle_aborted(handle))
				1335	return -EROFS;
				1336	if (!buffer_jbd(bh))
				1337	return -EUCLEAN;
				1338
				1339	/*
				1340	* We don't grab jh reference here since the buffer must be part
				1341	* of the running transaction.
				1342	*/
				1343	jh = bh2jh(bh);
				1344	jbd_debug(5, "journal_head %p\n", jh);
				1345	JBUFFER_TRACE(jh, "entry");
				1346
				1347	/*
				1348	* This and the following assertions are unreliable since we may see jh
				1349	* in inconsistent state unless we grab bh_state lock. But this is
				1350	* crucial to catch bugs so let's do a reliable check until the
				1351	* lockless handling is fully proven.
				1352	*/
				1353	if (jh->b_transaction != transaction &&
				1354	jh->b_next_transaction != transaction) {
				1355	jbd_lock_bh_state(bh);
				1356	J_ASSERT_JH(jh, jh->b_transaction == transaction \|\|
				1357	jh->b_next_transaction == transaction);
				1358	jbd_unlock_bh_state(bh);
				1359	}
				1360	if (jh->b_modified == 1) {
				1361	/* If it's in our transaction it must be in BJ_Metadata list. */
				1362	if (jh->b_transaction == transaction &&
				1363	jh->b_jlist != BJ_Metadata) {
				1364	jbd_lock_bh_state(bh);
				1365	if (jh->b_transaction == transaction &&
				1366	jh->b_jlist != BJ_Metadata)
				1367	pr_err("JBD2: assertion failure: h_type=%u "
				1368	"h_line_no=%u block_no=%llu jlist=%u\n",
				1369	handle->h_type, handle->h_line_no,
				1370	(unsigned long long) bh->b_blocknr,
				1371	jh->b_jlist);
				1372	J_ASSERT_JH(jh, jh->b_transaction != transaction \|\|
				1373	jh->b_jlist == BJ_Metadata);
				1374	jbd_unlock_bh_state(bh);
				1375	}
				1376	goto out;
				1377	}
				1378
				1379	journal = transaction->t_journal;
				1380	jbd_lock_bh_state(bh);
				1381
				1382	if (jh->b_modified == 0) {
				1383	/*
				1384	* This buffer's got modified and becoming part
				1385	* of the transaction. This needs to be done
				1386	* once a transaction -bzzz
				1387	*/
				1388	if (handle->h_buffer_credits <= 0) {
				1389	ret = -ENOSPC;
				1390	goto out_unlock_bh;
				1391	}
				1392	jh->b_modified = 1;
				1393	handle->h_buffer_credits--;
				1394	}
				1395
				1396	/*
				1397	* fastpath, to avoid expensive locking. If this buffer is already
				1398	* on the running transaction's metadata list there is nothing to do.
				1399	* Nobody can take it off again because there is a handle open.
				1400	* I _think_ we're OK here with SMP barriers - a mistaken decision will
				1401	* result in this test being false, so we go in and take the locks.
				1402	*/
				1403	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
				1404	JBUFFER_TRACE(jh, "fastpath");
				1405	if (unlikely(jh->b_transaction !=
				1406	journal->j_running_transaction)) {
				1407	printk(KERN_ERR "JBD2: %s: "
				1408	"jh->b_transaction (%llu, %p, %u) != "
				1409	"journal->j_running_transaction (%p, %u)\n",
				1410	journal->j_devname,
				1411	(unsigned long long) bh->b_blocknr,
				1412	jh->b_transaction,
				1413	jh->b_transaction ? jh->b_transaction->t_tid : 0,
				1414	journal->j_running_transaction,
				1415	journal->j_running_transaction ?
				1416	journal->j_running_transaction->t_tid : 0);
				1417	ret = -EINVAL;
				1418	}
				1419	goto out_unlock_bh;
				1420	}
				1421
				1422	set_buffer_jbddirty(bh);
				1423
				1424	/*
				1425	* Metadata already on the current transaction list doesn't
				1426	* need to be filed. Metadata on another transaction's list must
				1427	* be committing, and will be refiled once the commit completes:
				1428	* leave it alone for now.
				1429	*/
				1430	if (jh->b_transaction != transaction) {
				1431	JBUFFER_TRACE(jh, "already on other transaction");
				1432	if (unlikely(((jh->b_transaction !=
				1433	journal->j_committing_transaction)) \|\|
				1434	(jh->b_next_transaction != transaction))) {
				1435	printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: "
				1436	"bad jh for block %llu: "
				1437	"transaction (%p, %u), "
				1438	"jh->b_transaction (%p, %u), "
				1439	"jh->b_next_transaction (%p, %u), jlist %u\n",
				1440	journal->j_devname,
				1441	(unsigned long long) bh->b_blocknr,
				1442	transaction, transaction->t_tid,
				1443	jh->b_transaction,
				1444	jh->b_transaction ?
				1445	jh->b_transaction->t_tid : 0,
				1446	jh->b_next_transaction,
				1447	jh->b_next_transaction ?
				1448	jh->b_next_transaction->t_tid : 0,
				1449	jh->b_jlist);
				1450	WARN_ON(1);
				1451	ret = -EINVAL;
				1452	}
				1453	/* And this case is illegal: we can't reuse another
				1454	* transaction's data buffer, ever. */
				1455	goto out_unlock_bh;
				1456	}
				1457
				1458	/* That test should have eliminated the following case: */
				1459	J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
				1460
				1461	JBUFFER_TRACE(jh, "file as BJ_Metadata");
				1462	spin_lock(&journal->j_list_lock);
				1463	__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
				1464	spin_unlock(&journal->j_list_lock);
				1465	out_unlock_bh:
				1466	jbd_unlock_bh_state(bh);
				1467	out:
				1468	JBUFFER_TRACE(jh, "exit");
				1469	return ret;
				1470	}
				1471
				1472	/**
				1473	* void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
				1474	* @handle: transaction handle
				1475	* @bh: bh to 'forget'
				1476	*
				1477	* We can only do the bforget if there are no commits pending against the
				1478	* buffer. If the buffer is dirty in the current running transaction we
				1479	* can safely unlink it.
				1480	*
				1481	* bh may not be a journalled buffer at all - it may be a non-JBD
				1482	* buffer which came off the hashtable. Check for this.
				1483	*
				1484	* Decrements bh->b_count by one.
				1485	*
				1486	* Allow this call even if the handle has aborted --- it may be part of
				1487	* the caller's cleanup after an abort.
				1488	*/
				1489	int jbd2_journal_forget (handle_t handle, struct buffer_head bh)
				1490	{
				1491	transaction_t *transaction = handle->h_transaction;
				1492	journal_t *journal;
				1493	struct journal_head *jh;
				1494	int drop_reserve = 0;
				1495	int err = 0;
				1496	int was_modified = 0;
				1497
				1498	if (is_handle_aborted(handle))
				1499	return -EROFS;
				1500	journal = transaction->t_journal;
				1501
				1502	BUFFER_TRACE(bh, "entry");
				1503
				1504	jbd_lock_bh_state(bh);
				1505
				1506	if (!buffer_jbd(bh))
				1507	goto not_jbd;
				1508	jh = bh2jh(bh);
				1509
				1510	/* Critical error: attempting to delete a bitmap buffer, maybe?
				1511	* Don't do any jbd operations, and return an error. */
				1512	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
				1513	"inconsistent data on disk")) {
				1514	err = -EIO;
				1515	goto not_jbd;
				1516	}
				1517
				1518	/* keep track of whether or not this transaction modified us */
				1519	was_modified = jh->b_modified;
				1520
				1521	/*
				1522	* The buffer's going from the transaction, we must drop
				1523	* all references -bzzz
				1524	*/
				1525	jh->b_modified = 0;
				1526
				1527	if (jh->b_transaction == transaction) {
				1528	J_ASSERT_JH(jh, !jh->b_frozen_data);
				1529
				1530	/* If we are forgetting a buffer which is already part
				1531	* of this transaction, then we can just drop it from
				1532	* the transaction immediately. */
				1533	clear_buffer_dirty(bh);
				1534	clear_buffer_jbddirty(bh);
				1535
				1536	JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
				1537
				1538	/*
				1539	* we only want to drop a reference if this transaction
				1540	* modified the buffer
				1541	*/
				1542	if (was_modified)
				1543	drop_reserve = 1;
				1544
				1545	/*
				1546	* We are no longer going to journal this buffer.
				1547	* However, the commit of this transaction is still
				1548	* important to the buffer: the delete that we are now
				1549	* processing might obsolete an old log entry, so by
				1550	* committing, we can satisfy the buffer's checkpoint.
				1551	*
				1552	* So, if we have a checkpoint on the buffer, we should
				1553	* now refile the buffer on our BJ_Forget list so that
				1554	* we know to remove the checkpoint after we commit.
				1555	*/
				1556
				1557	spin_lock(&journal->j_list_lock);
				1558	if (jh->b_cp_transaction) {
				1559	__jbd2_journal_temp_unlink_buffer(jh);
				1560	__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
				1561	} else {
				1562	__jbd2_journal_unfile_buffer(jh);
				1563	if (!buffer_jbd(bh)) {
				1564	spin_unlock(&journal->j_list_lock);
				1565	jbd_unlock_bh_state(bh);
				1566	__bforget(bh);
				1567	goto drop;
				1568	}
				1569	}
				1570	spin_unlock(&journal->j_list_lock);
				1571	} else if (jh->b_transaction) {
				1572	J_ASSERT_JH(jh, (jh->b_transaction ==
				1573	journal->j_committing_transaction));
				1574	/* However, if the buffer is still owned by a prior
				1575	* (committing) transaction, we can't drop it yet... */
				1576	JBUFFER_TRACE(jh, "belongs to older transaction");
				1577	/* ... but we CAN drop it from the new transaction through
				1578	* marking the buffer as freed and set j_next_transaction to
				1579	* the new transaction, so that not only the commit code
				1580	* knows it should clear dirty bits when it is done with the
				1581	* buffer, but also the buffer can be checkpointed only
				1582	* after the new transaction commits. */
				1583
				1584	set_buffer_freed(bh);
				1585
				1586	if (!jh->b_next_transaction) {
				1587	spin_lock(&journal->j_list_lock);
				1588	jh->b_next_transaction = transaction;
				1589	spin_unlock(&journal->j_list_lock);
				1590	} else {
				1591	J_ASSERT(jh->b_next_transaction == transaction);
				1592
				1593	/*
				1594	* only drop a reference if this transaction modified
				1595	* the buffer
				1596	*/
				1597	if (was_modified)
				1598	drop_reserve = 1;
				1599	}
				1600	}
				1601
				1602	not_jbd:
				1603	jbd_unlock_bh_state(bh);
				1604	__brelse(bh);
				1605	drop:
				1606	if (drop_reserve) {
				1607	/* no need to reserve log space for this block -bzzz */
				1608	handle->h_buffer_credits++;
				1609	}
				1610	return err;
				1611	}
				1612
				1613	/**
				1614	* int jbd2_journal_stop() - complete a transaction
				1615	* @handle: transaction to complete.
				1616	*
				1617	* All done for a particular handle.
				1618	*
				1619	* There is not much action needed here. We just return any remaining
				1620	* buffer credits to the transaction and remove the handle. The only
				1621	* complication is that we need to start a commit operation if the
				1622	* filesystem is marked for synchronous update.
				1623	*
				1624	* jbd2_journal_stop itself will not usually return an error, but it may
				1625	* do so in unusual circumstances. In particular, expect it to
				1626	* return -EIO if a jbd2_journal_abort has been executed since the
				1627	* transaction began.
				1628	*/
				1629	int jbd2_journal_stop(handle_t *handle)
				1630	{
				1631	transaction_t *transaction = handle->h_transaction;
				1632	journal_t *journal;
				1633	int err = 0, wait_for_commit = 0;
				1634	tid_t tid;
				1635	pid_t pid;
				1636
				1637	if (!transaction) {
				1638	/*
				1639	* Handle is already detached from the transaction so
				1640	* there is nothing to do other than decrease a refcount,
				1641	* or free the handle if refcount drops to zero
				1642	*/
				1643	if (--handle->h_ref > 0) {
				1644	jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
				1645	handle->h_ref);
				1646	return err;
				1647	} else {
				1648	if (handle->h_rsv_handle)
				1649	jbd2_free_handle(handle->h_rsv_handle);
				1650	goto free_and_exit;
				1651	}
				1652	}
				1653	journal = transaction->t_journal;
				1654
				1655	J_ASSERT(journal_current_handle() == handle);
				1656
				1657	if (is_handle_aborted(handle))
				1658	err = -EIO;
				1659	else
				1660	J_ASSERT(atomic_read(&transaction->t_updates) > 0);
				1661
				1662	if (--handle->h_ref > 0) {
				1663	jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
				1664	handle->h_ref);
				1665	return err;
				1666	}
				1667
				1668	jbd_debug(4, "Handle %p going down\n", handle);
				1669	trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
				1670	transaction->t_tid,
				1671	handle->h_type, handle->h_line_no,
				1672	jiffies - handle->h_start_jiffies,
				1673	handle->h_sync, handle->h_requested_credits,
				1674	(handle->h_requested_credits -
				1675	handle->h_buffer_credits));
				1676
				1677	/*
				1678	* Implement synchronous transaction batching. If the handle
				1679	* was synchronous, don't force a commit immediately. Let's
				1680	* yield and let another thread piggyback onto this
				1681	* transaction. Keep doing that while new threads continue to
				1682	* arrive. It doesn't cost much - we're about to run a commit
				1683	* and sleep on IO anyway. Speeds up many-threaded, many-dir
				1684	* operations by 30x or more...
				1685	*
				1686	* We try and optimize the sleep time against what the
				1687	* underlying disk can do, instead of having a static sleep
				1688	* time. This is useful for the case where our storage is so
				1689	* fast that it is more optimal to go ahead and force a flush
				1690	* and wait for the transaction to be committed than it is to
				1691	* wait for an arbitrary amount of time for new writers to
				1692	* join the transaction. We achieve this by measuring how
				1693	* long it takes to commit a transaction, and compare it with
				1694	* how long this transaction has been running, and if run time
				1695	* < commit time then we sleep for the delta and commit. This
				1696	* greatly helps super fast disks that would see slowdowns as
				1697	* more threads started doing fsyncs.
				1698	*
				1699	* But don't do this if this process was the most recent one
				1700	* to perform a synchronous write. We do this to detect the
				1701	* case where a single process is doing a stream of sync
				1702	* writes. No point in waiting for joiners in that case.
				1703	*
				1704	* Setting max_batch_time to 0 disables this completely.
				1705	*/
				1706	pid = current->pid;
				1707	if (handle->h_sync && journal->j_last_sync_writer != pid &&
				1708	journal->j_max_batch_time) {
				1709	u64 commit_time, trans_time;
				1710
				1711	journal->j_last_sync_writer = pid;
				1712
				1713	read_lock(&journal->j_state_lock);
				1714	commit_time = journal->j_average_commit_time;
				1715	read_unlock(&journal->j_state_lock);
				1716
				1717	trans_time = ktime_to_ns(ktime_sub(ktime_get(),
				1718	transaction->t_start_time));
				1719
				1720	commit_time = max_t(u64, commit_time,
				1721	1000*journal->j_min_batch_time);
				1722	commit_time = min_t(u64, commit_time,
				1723	1000*journal->j_max_batch_time);
				1724
				1725	if (trans_time < commit_time) {
				1726	ktime_t expires = ktime_add_ns(ktime_get(),
				1727	commit_time);
				1728	set_current_state(TASK_UNINTERRUPTIBLE);
				1729	schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
				1730	}
				1731	}
				1732
				1733	if (handle->h_sync)
				1734	transaction->t_synchronous_commit = 1;
				1735	current->journal_info = NULL;
				1736	atomic_sub(handle->h_buffer_credits,
				1737	&transaction->t_outstanding_credits);
				1738
				1739	/*
				1740	* If the handle is marked SYNC, we need to set another commit
				1741	* going! We also want to force a commit if the current
				1742	* transaction is occupying too much of the log, or if the
				1743	* transaction is too old now.
				1744	*/
				1745	if (handle->h_sync \|\|
				1746	(atomic_read(&transaction->t_outstanding_credits) >
				1747	journal->j_max_transaction_buffers) \|\|
				1748	time_after_eq(jiffies, transaction->t_expires)) {
				1749	/* Do this even for aborted journals: an abort still
				1750	* completes the commit thread, it just doesn't write
				1751	* anything to disk. */
				1752
				1753	jbd_debug(2, "transaction too old, requesting commit for "
				1754	"handle %p\n", handle);
				1755	/* This is non-blocking */
				1756	jbd2_log_start_commit(journal, transaction->t_tid);
				1757
				1758	/*
				1759	* Special case: JBD2_SYNC synchronous updates require us
				1760	* to wait for the commit to complete.
				1761	*/
				1762	if (handle->h_sync && !(current->flags & PF_MEMALLOC))
				1763	wait_for_commit = 1;
				1764	}
				1765
				1766	/*
				1767	* Once we drop t_updates, if it goes to zero the transaction
				1768	* could start committing on us and eventually disappear. So
				1769	* once we do this, we must not dereference transaction
				1770	* pointer again.
				1771	*/
				1772	tid = transaction->t_tid;
				1773	if (atomic_dec_and_test(&transaction->t_updates)) {
				1774	wake_up(&journal->j_wait_updates);
				1775	if (journal->j_barrier_count)
				1776	wake_up(&journal->j_wait_transaction_locked);
				1777	}
				1778	if (wait_for_commit)
				1779	err = jbd2_log_wait_commit(journal, tid);
				1780
				1781	if (handle->h_rsv_handle)
				1782	jbd2_journal_free_reserved(handle->h_rsv_handle);
				1783	free_and_exit:
				1784	/*
				1785	* Scope of the GFP_NOFS context is over here and so we can restore the
				1786	* original alloc context.
				1787	*/
				1788	memalloc_nofs_restore(handle->saved_alloc_context);
				1789	jbd2_free_handle(handle);
				1790	return err;
				1791	}
				1792
				1793	/*
				1794	*
				1795	* List management code snippets: various functions for manipulating the
				1796	* transaction buffer lists.
				1797	*
				1798	*/
				1799
				1800	/*
				1801	* Append a buffer to a transaction list, given the transaction's list head
				1802	* pointer.
				1803	*
				1804	* j_list_lock is held.
				1805	*
				1806	* jbd_lock_bh_state(jh2bh(jh)) is held.
				1807	*/
				1808
				1809	static inline void
				1810	__blist_add_buffer(struct journal_head *list, struct journal_head jh)
				1811	{
				1812	if (!*list) {
				1813	jh->b_tnext = jh->b_tprev = jh;
				1814	*list = jh;
				1815	} else {
				1816	/* Insert at the tail of the list to preserve order */
				1817	struct journal_head first = list, *last = first->b_tprev;
				1818	jh->b_tprev = last;
				1819	jh->b_tnext = first;
				1820	last->b_tnext = first->b_tprev = jh;
				1821	}
				1822	}
				1823
				1824	/*
				1825	* Remove a buffer from a transaction list, given the transaction's list
				1826	* head pointer.
				1827	*
				1828	* Called with j_list_lock held, and the journal may not be locked.
				1829	*
				1830	* jbd_lock_bh_state(jh2bh(jh)) is held.
				1831	*/
				1832
				1833	static inline void
				1834	__blist_del_buffer(struct journal_head *list, struct journal_head jh)
				1835	{
				1836	if (*list == jh) {
				1837	*list = jh->b_tnext;
				1838	if (*list == jh)
				1839	*list = NULL;
				1840	}
				1841	jh->b_tprev->b_tnext = jh->b_tnext;
				1842	jh->b_tnext->b_tprev = jh->b_tprev;
				1843	}
				1844
				1845	/*
				1846	* Remove a buffer from the appropriate transaction list.
				1847	*
				1848	* Note that this function can change the value of
				1849	* bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
				1850	* t_reserved_list. If the caller is holding onto a copy of one of these
				1851	* pointers, it could go bad. Generally the caller needs to re-read the
				1852	* pointer from the transaction_t.
				1853	*
				1854	* Called under j_list_lock.
				1855	*/
				1856	static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
				1857	{
				1858	struct journal_head **list = NULL;
				1859	transaction_t *transaction;
				1860	struct buffer_head *bh = jh2bh(jh);
				1861
				1862	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
				1863	transaction = jh->b_transaction;
				1864	if (transaction)
				1865	assert_spin_locked(&transaction->t_journal->j_list_lock);
				1866
				1867	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
				1868	if (jh->b_jlist != BJ_None)
				1869	J_ASSERT_JH(jh, transaction != NULL);
				1870
				1871	switch (jh->b_jlist) {
				1872	case BJ_None:
				1873	return;
				1874	case BJ_Metadata:
				1875	transaction->t_nr_buffers--;
				1876	J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
				1877	list = &transaction->t_buffers;
				1878	break;
				1879	case BJ_Forget:
				1880	list = &transaction->t_forget;
				1881	break;
				1882	case BJ_Shadow:
				1883	list = &transaction->t_shadow_list;
				1884	break;
				1885	case BJ_Reserved:
				1886	list = &transaction->t_reserved_list;
				1887	break;
				1888	}
				1889
				1890	__blist_del_buffer(list, jh);
				1891	jh->b_jlist = BJ_None;
				1892	if (transaction && is_journal_aborted(transaction->t_journal))
				1893	clear_buffer_jbddirty(bh);
				1894	else if (test_clear_buffer_jbddirty(bh))
				1895	mark_buffer_dirty(bh); /* Expose it to the VM */
				1896	}
				1897
				1898	/*
				1899	* Remove buffer from all transactions.
				1900	*
				1901	* Called with bh_state lock and j_list_lock
				1902	*
				1903	* jh and bh may be already freed when this function returns.
				1904	*/
				1905	static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
				1906	{
				1907	__jbd2_journal_temp_unlink_buffer(jh);
				1908	jh->b_transaction = NULL;
				1909	jbd2_journal_put_journal_head(jh);
				1910	}
				1911
				1912	void jbd2_journal_unfile_buffer(journal_t journal, struct journal_head jh)
				1913	{
				1914	struct buffer_head *bh = jh2bh(jh);
				1915
				1916	/* Get reference so that buffer cannot be freed before we unlock it */
				1917	get_bh(bh);
				1918	jbd_lock_bh_state(bh);
				1919	spin_lock(&journal->j_list_lock);
				1920	__jbd2_journal_unfile_buffer(jh);
				1921	spin_unlock(&journal->j_list_lock);
				1922	jbd_unlock_bh_state(bh);
				1923	__brelse(bh);
				1924	}
				1925
				1926	/*
				1927	* Called from jbd2_journal_try_to_free_buffers().
				1928	*
				1929	* Called under jbd_lock_bh_state(bh)
				1930	*/
				1931	static void
				1932	__journal_try_to_free_buffer(journal_t journal, struct buffer_head bh)
				1933	{
				1934	struct journal_head *jh;
				1935
				1936	jh = bh2jh(bh);
				1937
				1938	if (buffer_locked(bh) \|\| buffer_dirty(bh))
				1939	goto out;
				1940
				1941	if (jh->b_next_transaction != NULL \|\| jh->b_transaction != NULL)
				1942	goto out;
				1943
				1944	spin_lock(&journal->j_list_lock);
				1945	if (jh->b_cp_transaction != NULL) {
				1946	/* written-back checkpointed metadata buffer */
				1947	JBUFFER_TRACE(jh, "remove from checkpoint list");
				1948	__jbd2_journal_remove_checkpoint(jh);
				1949	}
				1950	spin_unlock(&journal->j_list_lock);
				1951	out:
				1952	return;
				1953	}
				1954
				1955	/**
				1956	* int jbd2_journal_try_to_free_buffers() - try to free page buffers.
				1957	* @journal: journal for operation
				1958	* @page: to try and free
				1959	* @gfp_mask: we use the mask to detect how hard should we try to release
				1960	* buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
				1961	* code to release the buffers.
				1962	*
				1963	*
				1964	* For all the buffers on this page,
				1965	* if they are fully written out ordered data, move them onto BUF_CLEAN
				1966	* so try_to_free_buffers() can reap them.
				1967	*
				1968	* This function returns non-zero if we wish try_to_free_buffers()
				1969	* to be called. We do this if the page is releasable by try_to_free_buffers().
				1970	* We also do it if the page has locked or dirty buffers and the caller wants
				1971	* us to perform sync or async writeout.
				1972	*
				1973	* This complicates JBD locking somewhat. We aren't protected by the
				1974	* BKL here. We wish to remove the buffer from its committing or
				1975	* running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
				1976	*
				1977	* This may change the value of transaction_t->t_datalist, so anyone
				1978	* who looks at t_datalist needs to lock against this function.
				1979	*
				1980	* Even worse, someone may be doing a jbd2_journal_dirty_data on this
				1981	* buffer. So we need to lock against that. jbd2_journal_dirty_data()
				1982	* will come out of the lock with the buffer dirty, which makes it
				1983	* ineligible for release here.
				1984	*
				1985	* Who else is affected by this? hmm... Really the only contender
				1986	* is do_get_write_access() - it could be looking at the buffer while
				1987	* journal_try_to_free_buffer() is changing its state. But that
				1988	* cannot happen because we never reallocate freed data as metadata
				1989	* while the data is part of a transaction. Yes?
				1990	*
				1991	* Return 0 on failure, 1 on success
				1992	*/
				1993	int jbd2_journal_try_to_free_buffers(journal_t *journal,
				1994	struct page *page, gfp_t gfp_mask)
				1995	{
				1996	struct buffer_head *head;
				1997	struct buffer_head *bh;
				1998	int ret = 0;
				1999
				2000	J_ASSERT(PageLocked(page));
				2001
				2002	head = page_buffers(page);
				2003	bh = head;
				2004	do {
				2005	struct journal_head *jh;
				2006
				2007	/*
				2008	* We take our own ref against the journal_head here to avoid
				2009	* having to add tons of locking around each instance of
				2010	* jbd2_journal_put_journal_head().
				2011	*/
				2012	jh = jbd2_journal_grab_journal_head(bh);
				2013	if (!jh)
				2014	continue;
				2015
				2016	jbd_lock_bh_state(bh);
				2017	__journal_try_to_free_buffer(journal, bh);
				2018	jbd2_journal_put_journal_head(jh);
				2019	jbd_unlock_bh_state(bh);
				2020	if (buffer_jbd(bh))
				2021	goto busy;
				2022	} while ((bh = bh->b_this_page) != head);
				2023
				2024	ret = try_to_free_buffers(page);
				2025
				2026	busy:
				2027	return ret;
				2028	}
				2029
				2030	/*
				2031	* This buffer is no longer needed. If it is on an older transaction's
				2032	* checkpoint list we need to record it on this transaction's forget list
				2033	* to pin this buffer (and hence its checkpointing transaction) down until
				2034	* this transaction commits. If the buffer isn't on a checkpoint list, we
				2035	* release it.
				2036	* Returns non-zero if JBD no longer has an interest in the buffer.
				2037	*
				2038	* Called under j_list_lock.
				2039	*
				2040	* Called under jbd_lock_bh_state(bh).
				2041	*/
				2042	static int __dispose_buffer(struct journal_head jh, transaction_t transaction)
				2043	{
				2044	int may_free = 1;
				2045	struct buffer_head *bh = jh2bh(jh);
				2046
				2047	if (jh->b_cp_transaction) {
				2048	JBUFFER_TRACE(jh, "on running+cp transaction");
				2049	__jbd2_journal_temp_unlink_buffer(jh);
				2050	/*
				2051	* We don't want to write the buffer anymore, clear the
				2052	* bit so that we don't confuse checks in
				2053	* __journal_file_buffer
				2054	*/
				2055	clear_buffer_dirty(bh);
				2056	__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
				2057	may_free = 0;
				2058	} else {
				2059	JBUFFER_TRACE(jh, "on running transaction");
				2060	__jbd2_journal_unfile_buffer(jh);
				2061	}
				2062	return may_free;
				2063	}
				2064
				2065	/*
				2066	* jbd2_journal_invalidatepage
				2067	*
				2068	* This code is tricky. It has a number of cases to deal with.
				2069	*
				2070	* There are two invariants which this code relies on:
				2071	*
				2072	* i_size must be updated on disk before we start calling invalidatepage on the
				2073	* data.
				2074	*
				2075	* This is done in ext3 by defining an ext3_setattr method which
				2076	* updates i_size before truncate gets going. By maintaining this
				2077	* invariant, we can be sure that it is safe to throw away any buffers
				2078	* attached to the current transaction: once the transaction commits,
				2079	* we know that the data will not be needed.
				2080	*
				2081	* Note however that we can not throw away data belonging to the
				2082	* previous, committing transaction!
				2083	*
				2084	* Any disk blocks which are part of the previous, committing
				2085	* transaction (and which therefore cannot be discarded immediately) are
				2086	* not going to be reused in the new running transaction
				2087	*
				2088	* The bitmap committed_data images guarantee this: any block which is
				2089	* allocated in one transaction and removed in the next will be marked
				2090	* as in-use in the committed_data bitmap, so cannot be reused until
				2091	* the next transaction to delete the block commits. This means that
				2092	* leaving committing buffers dirty is quite safe: the disk blocks
				2093	* cannot be reallocated to a different file and so buffer aliasing is
				2094	* not possible.
				2095	*
				2096	*
				2097	* The above applies mainly to ordered data mode. In writeback mode we
				2098	* don't make guarantees about the order in which data hits disk --- in
				2099	* particular we don't guarantee that new dirty data is flushed before
				2100	* transaction commit --- so it is always safe just to discard data
				2101	* immediately in that mode. --sct
				2102	*/
				2103
				2104	/*
				2105	* The journal_unmap_buffer helper function returns zero if the buffer
				2106	* concerned remains pinned as an anonymous buffer belonging to an older
				2107	* transaction.
				2108	*
				2109	* We're outside-transaction here. Either or both of j_running_transaction
				2110	* and j_committing_transaction may be NULL.
				2111	*/
				2112	static int journal_unmap_buffer(journal_t journal, struct buffer_head bh,
				2113	int partial_page)
				2114	{
				2115	transaction_t *transaction;
				2116	struct journal_head *jh;
				2117	int may_free = 1;
				2118
				2119	BUFFER_TRACE(bh, "entry");
				2120
				2121	/*
				2122	* It is safe to proceed here without the j_list_lock because the
				2123	* buffers cannot be stolen by try_to_free_buffers as long as we are
				2124	* holding the page lock. --sct
				2125	*/
				2126
				2127	if (!buffer_jbd(bh))
				2128	goto zap_buffer_unlocked;
				2129
				2130	/* OK, we have data buffer in journaled mode */
				2131	write_lock(&journal->j_state_lock);
				2132	jbd_lock_bh_state(bh);
				2133	spin_lock(&journal->j_list_lock);
				2134
				2135	jh = jbd2_journal_grab_journal_head(bh);
				2136	if (!jh)
				2137	goto zap_buffer_no_jh;
				2138
				2139	/*
				2140	* We cannot remove the buffer from checkpoint lists until the
				2141	* transaction adding inode to orphan list (let's call it T)
				2142	* is committed. Otherwise if the transaction changing the
				2143	* buffer would be cleaned from the journal before T is
				2144	* committed, a crash will cause that the correct contents of
				2145	* the buffer will be lost. On the other hand we have to
				2146	* clear the buffer dirty bit at latest at the moment when the
				2147	* transaction marking the buffer as freed in the filesystem
				2148	* structures is committed because from that moment on the
				2149	* block can be reallocated and used by a different page.
				2150	* Since the block hasn't been freed yet but the inode has
				2151	* already been added to orphan list, it is safe for us to add
				2152	* the buffer to BJ_Forget list of the newest transaction.
				2153	*
				2154	* Also we have to clear buffer_mapped flag of a truncated buffer
				2155	* because the buffer_head may be attached to the page straddling
				2156	* i_size (can happen only when blocksize < pagesize) and thus the
				2157	* buffer_head can be reused when the file is extended again. So we end
				2158	* up keeping around invalidated buffers attached to transactions'
				2159	* BJ_Forget list just to stop checkpointing code from cleaning up
				2160	* the transaction this buffer was modified in.
				2161	*/
				2162	transaction = jh->b_transaction;
				2163	if (transaction == NULL) {
				2164	/* First case: not on any transaction. If it
				2165	* has no checkpoint link, then we can zap it:
				2166	* it's a writeback-mode buffer so we don't care
				2167	* if it hits disk safely. */
				2168	if (!jh->b_cp_transaction) {
				2169	JBUFFER_TRACE(jh, "not on any transaction: zap");
				2170	goto zap_buffer;
				2171	}
				2172
				2173	if (!buffer_dirty(bh)) {
				2174	/* bdflush has written it. We can drop it now */
				2175	__jbd2_journal_remove_checkpoint(jh);
				2176	goto zap_buffer;
				2177	}
				2178
				2179	/* OK, it must be in the journal but still not
				2180	* written fully to disk: it's metadata or
				2181	* journaled data... */
				2182
				2183	if (journal->j_running_transaction) {
				2184	/* ... and once the current transaction has
				2185	* committed, the buffer won't be needed any
				2186	* longer. */
				2187	JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
				2188	may_free = __dispose_buffer(jh,
				2189	journal->j_running_transaction);
				2190	goto zap_buffer;
				2191	} else {
				2192	/* There is no currently-running transaction. So the
				2193	* orphan record which we wrote for this file must have
				2194	* passed into commit. We must attach this buffer to
				2195	* the committing transaction, if it exists. */
				2196	if (journal->j_committing_transaction) {
				2197	JBUFFER_TRACE(jh, "give to committing trans");
				2198	may_free = __dispose_buffer(jh,
				2199	journal->j_committing_transaction);
				2200	goto zap_buffer;
				2201	} else {
				2202	/* The orphan record's transaction has
				2203	* committed. We can cleanse this buffer */
				2204	clear_buffer_jbddirty(bh);
				2205	__jbd2_journal_remove_checkpoint(jh);
				2206	goto zap_buffer;
				2207	}
				2208	}
				2209	} else if (transaction == journal->j_committing_transaction) {
				2210	JBUFFER_TRACE(jh, "on committing transaction");
				2211	/*
				2212	* The buffer is committing, we simply cannot touch
				2213	* it. If the page is straddling i_size we have to wait
				2214	* for commit and try again.
				2215	*/
				2216	if (partial_page) {
				2217	jbd2_journal_put_journal_head(jh);
				2218	spin_unlock(&journal->j_list_lock);
				2219	jbd_unlock_bh_state(bh);
				2220	write_unlock(&journal->j_state_lock);
				2221	return -EBUSY;
				2222	}
				2223	/*
				2224	* OK, buffer won't be reachable after truncate. We just set
				2225	* j_next_transaction to the running transaction (if there is
				2226	* one) and mark buffer as freed so that commit code knows it
				2227	* should clear dirty bits when it is done with the buffer.
				2228	*/
				2229	set_buffer_freed(bh);
				2230	if (journal->j_running_transaction && buffer_jbddirty(bh))
				2231	jh->b_next_transaction = journal->j_running_transaction;
				2232	jbd2_journal_put_journal_head(jh);
				2233	spin_unlock(&journal->j_list_lock);
				2234	jbd_unlock_bh_state(bh);
				2235	write_unlock(&journal->j_state_lock);
				2236	return 0;
				2237	} else {
				2238	/* Good, the buffer belongs to the running transaction.
				2239	* We are writing our own transaction's data, not any
				2240	* previous one's, so it is safe to throw it away
				2241	* (remember that we expect the filesystem to have set
				2242	* i_size already for this truncate so recovery will not
				2243	* expose the disk blocks we are discarding here.) */
				2244	J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
				2245	JBUFFER_TRACE(jh, "on running transaction");
				2246	may_free = __dispose_buffer(jh, transaction);
				2247	}
				2248
				2249	zap_buffer:
				2250	/*
				2251	* This is tricky. Although the buffer is truncated, it may be reused
				2252	* if blocksize < pagesize and it is attached to the page straddling
				2253	* EOF. Since the buffer might have been added to BJ_Forget list of the
				2254	* running transaction, journal_get_write_access() won't clear
				2255	* b_modified and credit accounting gets confused. So clear b_modified
				2256	* here.
				2257	*/
				2258	jh->b_modified = 0;
				2259	jbd2_journal_put_journal_head(jh);
				2260	zap_buffer_no_jh:
				2261	spin_unlock(&journal->j_list_lock);
				2262	jbd_unlock_bh_state(bh);
				2263	write_unlock(&journal->j_state_lock);
				2264	zap_buffer_unlocked:
				2265	clear_buffer_dirty(bh);
				2266	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
				2267	clear_buffer_mapped(bh);
				2268	clear_buffer_req(bh);
				2269	clear_buffer_new(bh);
				2270	clear_buffer_delay(bh);
				2271	clear_buffer_unwritten(bh);
				2272	bh->b_bdev = NULL;
				2273	return may_free;
				2274	}
				2275
				2276	/**
				2277	* void jbd2_journal_invalidatepage()
				2278	* @journal: journal to use for flush...
				2279	* @page: page to flush
				2280	* @offset: start of the range to invalidate
				2281	* @length: length of the range to invalidate
				2282	*
				2283	* Reap page buffers containing data after in the specified range in page.
				2284	* Can return -EBUSY if buffers are part of the committing transaction and
				2285	* the page is straddling i_size. Caller then has to wait for current commit
				2286	* and try again.
				2287	*/
				2288	int jbd2_journal_invalidatepage(journal_t *journal,
				2289	struct page *page,
				2290	unsigned int offset,
				2291	unsigned int length)
				2292	{
				2293	struct buffer_head head, bh, *next;
				2294	unsigned int stop = offset + length;
				2295	unsigned int curr_off = 0;
				2296	int partial_page = (offset \|\| length < PAGE_SIZE);
				2297	int may_free = 1;
				2298	int ret = 0;
				2299
				2300	if (!PageLocked(page))
				2301	BUG();
				2302	if (!page_has_buffers(page))
				2303	return 0;
				2304
				2305	BUG_ON(stop > PAGE_SIZE \|\| stop < length);
				2306
				2307	/* We will potentially be playing with lists other than just the
				2308	* data lists (especially for journaled data mode), so be
				2309	* cautious in our locking. */
				2310
				2311	head = bh = page_buffers(page);
				2312	do {
				2313	unsigned int next_off = curr_off + bh->b_size;
				2314	next = bh->b_this_page;
				2315
				2316	if (next_off > stop)
				2317	return 0;
				2318
				2319	if (offset <= curr_off) {
				2320	/* This block is wholly outside the truncation point */
				2321	lock_buffer(bh);
				2322	ret = journal_unmap_buffer(journal, bh, partial_page);
				2323	unlock_buffer(bh);
				2324	if (ret < 0)
				2325	return ret;
				2326	may_free &= ret;
				2327	}
				2328	curr_off = next_off;
				2329	bh = next;
				2330
				2331	} while (bh != head);
				2332
				2333	if (!partial_page) {
				2334	if (may_free && try_to_free_buffers(page))
				2335	J_ASSERT(!page_has_buffers(page));
				2336	}
				2337	return 0;
				2338	}
				2339
				2340	/*
				2341	* File a buffer on the given transaction list.
				2342	*/
				2343	void __jbd2_journal_file_buffer(struct journal_head *jh,
				2344	transaction_t *transaction, int jlist)
				2345	{
				2346	struct journal_head **list = NULL;
				2347	int was_dirty = 0;
				2348	struct buffer_head *bh = jh2bh(jh);
				2349
				2350	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
				2351	assert_spin_locked(&transaction->t_journal->j_list_lock);
				2352
				2353	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
				2354	J_ASSERT_JH(jh, jh->b_transaction == transaction \|\|
				2355	jh->b_transaction == NULL);
				2356
				2357	if (jh->b_transaction && jh->b_jlist == jlist)
				2358	return;
				2359
				2360	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|
				2361	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {
				2362	/*
				2363	* For metadata buffers, we track dirty bit in buffer_jbddirty
				2364	* instead of buffer_dirty. We should not see a dirty bit set
				2365	* here because we clear it in do_get_write_access but e.g.
				2366	* tune2fs can modify the sb and set the dirty bit at any time
				2367	* so we try to gracefully handle that.
				2368	*/
				2369	if (buffer_dirty(bh))
				2370	warn_dirty_buffer(bh);
				2371	if (test_clear_buffer_dirty(bh) \|\|
				2372	test_clear_buffer_jbddirty(bh))
				2373	was_dirty = 1;
				2374	}
				2375
				2376	if (jh->b_transaction)
				2377	__jbd2_journal_temp_unlink_buffer(jh);
				2378	else
				2379	jbd2_journal_grab_journal_head(bh);
				2380	jh->b_transaction = transaction;
				2381
				2382	switch (jlist) {
				2383	case BJ_None:
				2384	J_ASSERT_JH(jh, !jh->b_committed_data);
				2385	J_ASSERT_JH(jh, !jh->b_frozen_data);
				2386	return;
				2387	case BJ_Metadata:
				2388	transaction->t_nr_buffers++;
				2389	list = &transaction->t_buffers;
				2390	break;
				2391	case BJ_Forget:
				2392	list = &transaction->t_forget;
				2393	break;
				2394	case BJ_Shadow:
				2395	list = &transaction->t_shadow_list;
				2396	break;
				2397	case BJ_Reserved:
				2398	list = &transaction->t_reserved_list;
				2399	break;
				2400	}
				2401
				2402	__blist_add_buffer(list, jh);
				2403	jh->b_jlist = jlist;
				2404
				2405	if (was_dirty)
				2406	set_buffer_jbddirty(bh);
				2407	}
				2408
				2409	void jbd2_journal_file_buffer(struct journal_head *jh,
				2410	transaction_t *transaction, int jlist)
				2411	{
				2412	jbd_lock_bh_state(jh2bh(jh));
				2413	spin_lock(&transaction->t_journal->j_list_lock);
				2414	__jbd2_journal_file_buffer(jh, transaction, jlist);
				2415	spin_unlock(&transaction->t_journal->j_list_lock);
				2416	jbd_unlock_bh_state(jh2bh(jh));
				2417	}
				2418
				2419	/*
				2420	* Remove a buffer from its current buffer list in preparation for
				2421	* dropping it from its current transaction entirely. If the buffer has
				2422	* already started to be used by a subsequent transaction, refile the
				2423	* buffer on that transaction's metadata list.
				2424	*
				2425	* Called under j_list_lock
				2426	* Called under jbd_lock_bh_state(jh2bh(jh))
				2427	*
				2428	* jh and bh may be already free when this function returns
				2429	*/
				2430	void __jbd2_journal_refile_buffer(struct journal_head *jh)
				2431	{
				2432	int was_dirty, jlist;
				2433	struct buffer_head *bh = jh2bh(jh);
				2434
				2435	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
				2436	if (jh->b_transaction)
				2437	assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
				2438
				2439	/* If the buffer is now unused, just drop it. */
				2440	if (jh->b_next_transaction == NULL) {
				2441	__jbd2_journal_unfile_buffer(jh);
				2442	return;
				2443	}
				2444
				2445	/*
				2446	* It has been modified by a later transaction: add it to the new
				2447	* transaction's metadata list.
				2448	*/
				2449
				2450	was_dirty = test_clear_buffer_jbddirty(bh);
				2451	__jbd2_journal_temp_unlink_buffer(jh);
				2452	/*
				2453	* We set b_transaction here because b_next_transaction will inherit
				2454	* our jh reference and thus __jbd2_journal_file_buffer() must not
				2455	* take a new one.
				2456	*/
				2457	jh->b_transaction = jh->b_next_transaction;
				2458	jh->b_next_transaction = NULL;
				2459	if (buffer_freed(bh))
				2460	jlist = BJ_Forget;
				2461	else if (jh->b_modified)
				2462	jlist = BJ_Metadata;
				2463	else
				2464	jlist = BJ_Reserved;
				2465	__jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
				2466	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
				2467
				2468	if (was_dirty)
				2469	set_buffer_jbddirty(bh);
				2470	}
				2471
				2472	/*
				2473	* __jbd2_journal_refile_buffer() with necessary locking added. We take our
				2474	* bh reference so that we can safely unlock bh.
				2475	*
				2476	* The jh and bh may be freed by this call.
				2477	*/
				2478	void jbd2_journal_refile_buffer(journal_t journal, struct journal_head jh)
				2479	{
				2480	struct buffer_head *bh = jh2bh(jh);
				2481
				2482	/* Get reference so that buffer cannot be freed before we unlock it */
				2483	get_bh(bh);
				2484	jbd_lock_bh_state(bh);
				2485	spin_lock(&journal->j_list_lock);
				2486	__jbd2_journal_refile_buffer(jh);
				2487	jbd_unlock_bh_state(bh);
				2488	spin_unlock(&journal->j_list_lock);
				2489	__brelse(bh);
				2490	}
				2491
				2492	/*
				2493	* File inode in the inode list of the handle's transaction
				2494	*/
				2495	static int jbd2_journal_file_inode(handle_t handle, struct jbd2_inode jinode,
				2496	unsigned long flags, loff_t start_byte, loff_t end_byte)
				2497	{
				2498	transaction_t *transaction = handle->h_transaction;
				2499	journal_t *journal;
				2500
				2501	if (is_handle_aborted(handle))
				2502	return -EROFS;
				2503	journal = transaction->t_journal;
				2504
				2505	jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
				2506	transaction->t_tid);
				2507
				2508	spin_lock(&journal->j_list_lock);
				2509	jinode->i_flags \|= flags;
				2510
				2511	if (jinode->i_dirty_end) {
				2512	jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
				2513	jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
				2514	} else {
				2515	jinode->i_dirty_start = start_byte;
				2516	jinode->i_dirty_end = end_byte;
				2517	}
				2518
				2519	/* Is inode already attached where we need it? */
				2520	if (jinode->i_transaction == transaction \|\|
				2521	jinode->i_next_transaction == transaction)
				2522	goto done;
				2523
				2524	/*
				2525	* We only ever set this variable to 1 so the test is safe. Since
				2526	* t_need_data_flush is likely to be set, we do the test to save some
				2527	* cacheline bouncing
				2528	*/
				2529	if (!transaction->t_need_data_flush)
				2530	transaction->t_need_data_flush = 1;
				2531	/* On some different transaction's list - should be
				2532	* the committing one */
				2533	if (jinode->i_transaction) {
				2534	J_ASSERT(jinode->i_next_transaction == NULL);
				2535	J_ASSERT(jinode->i_transaction ==
				2536	journal->j_committing_transaction);
				2537	jinode->i_next_transaction = transaction;
				2538	goto done;
				2539	}
				2540	/* Not on any transaction list... */
				2541	J_ASSERT(!jinode->i_next_transaction);
				2542	jinode->i_transaction = transaction;
				2543	list_add(&jinode->i_list, &transaction->t_inode_list);
				2544	done:
				2545	spin_unlock(&journal->j_list_lock);
				2546
				2547	return 0;
				2548	}
				2549
				2550	int jbd2_journal_inode_add_write(handle_t handle, struct jbd2_inode jinode)
				2551	{
				2552	return jbd2_journal_file_inode(handle, jinode,
				2553	JI_WRITE_DATA \| JI_WAIT_DATA, 0, LLONG_MAX);
				2554	}
				2555
				2556	int jbd2_journal_inode_add_wait(handle_t handle, struct jbd2_inode jinode)
				2557	{
				2558	return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
				2559	LLONG_MAX);
				2560	}
				2561
				2562	int jbd2_journal_inode_ranged_write(handle_t *handle,
				2563	struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
				2564	{
				2565	return jbd2_journal_file_inode(handle, jinode,
				2566	JI_WRITE_DATA \| JI_WAIT_DATA, start_byte,
				2567	start_byte + length - 1);
				2568	}
				2569
				2570	int jbd2_journal_inode_ranged_wait(handle_t handle, struct jbd2_inode jinode,
				2571	loff_t start_byte, loff_t length)
				2572	{
				2573	return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
				2574	start_byte, start_byte + length - 1);
				2575	}
				2576
				2577	/*
				2578	* File truncate and transaction commit interact with each other in a
				2579	* non-trivial way. If a transaction writing data block A is
				2580	* committing, we cannot discard the data by truncate until we have
				2581	* written them. Otherwise if we crashed after the transaction with
				2582	* write has committed but before the transaction with truncate has
				2583	* committed, we could see stale data in block A. This function is a
				2584	* helper to solve this problem. It starts writeout of the truncated
				2585	* part in case it is in the committing transaction.
				2586	*
				2587	* Filesystem code must call this function when inode is journaled in
				2588	* ordered mode before truncation happens and after the inode has been
				2589	* placed on orphan list with the new inode size. The second condition
				2590	* avoids the race that someone writes new data and we start
				2591	* committing the transaction after this function has been called but
				2592	* before a transaction for truncate is started (and furthermore it
				2593	* allows us to optimize the case where the addition to orphan list
				2594	* happens in the same transaction as write --- we don't have to write
				2595	* any data in such case).
				2596	*/
				2597	int jbd2_journal_begin_ordered_truncate(journal_t *journal,
				2598	struct jbd2_inode *jinode,
				2599	loff_t new_size)
				2600	{
				2601	transaction_t inode_trans, commit_trans;
				2602	int ret = 0;
				2603
				2604	/* This is a quick check to avoid locking if not necessary */
				2605	if (!jinode->i_transaction)
				2606	goto out;
				2607	/* Locks are here just to force reading of recent values, it is
				2608	* enough that the transaction was not committing before we started
				2609	* a transaction adding the inode to orphan list */
				2610	read_lock(&journal->j_state_lock);
				2611	commit_trans = journal->j_committing_transaction;
				2612	read_unlock(&journal->j_state_lock);
				2613	spin_lock(&journal->j_list_lock);
				2614	inode_trans = jinode->i_transaction;
				2615	spin_unlock(&journal->j_list_lock);
				2616	if (inode_trans == commit_trans) {
				2617	ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
				2618	new_size, LLONG_MAX);
				2619	if (ret)
				2620	jbd2_journal_abort(journal, ret);
				2621	}
				2622	out:
				2623	return ret;
				2624	}