Blame - src/kernel/linux/v4.14/fs/gfs2/log.c - T103

blob: b05c0ed36b6fe77ca9b40a0807c737d16378c6eb [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	/*
				2	* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
				3	* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
				4	*
				5	* This copyrighted material is made available to anyone wishing to use,
				6	* modify, copy, or redistribute it subject to the terms and conditions
				7	* of the GNU General Public License version 2.
				8	*/
				9
				10	#include <linux/sched.h>
				11	#include <linux/slab.h>
				12	#include <linux/spinlock.h>
				13	#include <linux/completion.h>
				14	#include <linux/buffer_head.h>
				15	#include <linux/gfs2_ondisk.h>
				16	#include <linux/crc32.h>
				17	#include <linux/delay.h>
				18	#include <linux/kthread.h>
				19	#include <linux/freezer.h>
				20	#include <linux/bio.h>
				21	#include <linux/blkdev.h>
				22	#include <linux/writeback.h>
				23	#include <linux/list_sort.h>
				24
				25	#include "gfs2.h"
				26	#include "incore.h"
				27	#include "bmap.h"
				28	#include "glock.h"
				29	#include "log.h"
				30	#include "lops.h"
				31	#include "meta_io.h"
				32	#include "util.h"
				33	#include "dir.h"
				34	#include "trace_gfs2.h"
				35
				36	/**
				37	* gfs2_struct2blk - compute stuff
				38	* @sdp: the filesystem
				39	* @nstruct: the number of structures
				40	* @ssize: the size of the structures
				41	*
				42	* Compute the number of log descriptor blocks needed to hold a certain number
				43	* of structures of a certain size.
				44	*
				45	* Returns: the number of blocks needed (minimum is always 1)
				46	*/
				47
				48	unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
				49	unsigned int ssize)
				50	{
				51	unsigned int blks;
				52	unsigned int first, second;
				53
				54	blks = 1;
				55	first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
				56
				57	if (nstruct > first) {
				58	second = (sdp->sd_sb.sb_bsize -
				59	sizeof(struct gfs2_meta_header)) / ssize;
				60	blks += DIV_ROUND_UP(nstruct - first, second);
				61	}
				62
				63	return blks;
				64	}
				65
				66	/**
				67	* gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
				68	* @mapping: The associated mapping (maybe NULL)
				69	* @bd: The gfs2_bufdata to remove
				70	*
				71	* The ail lock _must_ be held when calling this function
				72	*
				73	*/
				74
				75	void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
				76	{
				77	bd->bd_tr = NULL;
				78	list_del_init(&bd->bd_ail_st_list);
				79	list_del_init(&bd->bd_ail_gl_list);
				80	atomic_dec(&bd->bd_gl->gl_ail_count);
				81	brelse(bd->bd_bh);
				82	}
				83
				84	/**
				85	* gfs2_ail1_start_one - Start I/O on a part of the AIL
				86	* @sdp: the filesystem
				87	* @wbc: The writeback control structure
				88	* @ai: The ail structure
				89	*
				90	*/
				91
				92	static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
				93	struct writeback_control *wbc,
				94	struct gfs2_trans *tr)
				95	__releases(&sdp->sd_ail_lock)
				96	__acquires(&sdp->sd_ail_lock)
				97	{
				98	struct gfs2_glock *gl = NULL;
				99	struct address_space *mapping;
				100	struct gfs2_bufdata bd, s;
				101	struct buffer_head *bh;
				102
				103	list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
				104	bh = bd->bd_bh;
				105
				106	gfs2_assert(sdp, bd->bd_tr == tr);
				107
				108	if (!buffer_busy(bh)) {
				109	if (!buffer_uptodate(bh))
				110	gfs2_io_error_bh(sdp, bh);
				111	list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
				112	continue;
				113	}
				114
				115	if (!buffer_dirty(bh))
				116	continue;
				117	if (gl == bd->bd_gl)
				118	continue;
				119	gl = bd->bd_gl;
				120	list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
				121	mapping = bh->b_page->mapping;
				122	if (!mapping)
				123	continue;
				124	spin_unlock(&sdp->sd_ail_lock);
				125	generic_writepages(mapping, wbc);
				126	spin_lock(&sdp->sd_ail_lock);
				127	if (wbc->nr_to_write <= 0)
				128	break;
				129	return 1;
				130	}
				131
				132	return 0;
				133	}
				134
				135
				136	/**
				137	* gfs2_ail1_flush - start writeback of some ail1 entries
				138	* @sdp: The super block
				139	* @wbc: The writeback control structure
				140	*
				141	* Writes back some ail1 entries, according to the limits in the
				142	* writeback control structure
				143	*/
				144
				145	void gfs2_ail1_flush(struct gfs2_sbd sdp, struct writeback_control wbc)
				146	{
				147	struct list_head *head = &sdp->sd_ail1_list;
				148	struct gfs2_trans *tr;
				149	struct blk_plug plug;
				150
				151	trace_gfs2_ail_flush(sdp, wbc, 1);
				152	blk_start_plug(&plug);
				153	spin_lock(&sdp->sd_ail_lock);
				154	restart:
				155	list_for_each_entry_reverse(tr, head, tr_list) {
				156	if (wbc->nr_to_write <= 0)
				157	break;
				158	if (gfs2_ail1_start_one(sdp, wbc, tr))
				159	goto restart;
				160	}
				161	spin_unlock(&sdp->sd_ail_lock);
				162	blk_finish_plug(&plug);
				163	trace_gfs2_ail_flush(sdp, wbc, 0);
				164	}
				165
				166	/**
				167	* gfs2_ail1_start - start writeback of all ail1 entries
				168	* @sdp: The superblock
				169	*/
				170
				171	static void gfs2_ail1_start(struct gfs2_sbd *sdp)
				172	{
				173	struct writeback_control wbc = {
				174	.sync_mode = WB_SYNC_NONE,
				175	.nr_to_write = LONG_MAX,
				176	.range_start = 0,
				177	.range_end = LLONG_MAX,
				178	};
				179
				180	return gfs2_ail1_flush(sdp, &wbc);
				181	}
				182
				183	/**
				184	* gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
				185	* @sdp: the filesystem
				186	* @ai: the AIL entry
				187	*
				188	*/
				189
				190	static void gfs2_ail1_empty_one(struct gfs2_sbd sdp, struct gfs2_trans tr)
				191	{
				192	struct gfs2_bufdata bd, s;
				193	struct buffer_head *bh;
				194
				195	list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
				196	bd_ail_st_list) {
				197	bh = bd->bd_bh;
				198	gfs2_assert(sdp, bd->bd_tr == tr);
				199	if (buffer_busy(bh))
				200	continue;
				201	if (!buffer_uptodate(bh))
				202	gfs2_io_error_bh(sdp, bh);
				203	list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
				204	}
				205
				206	}
				207
				208	/**
				209	* gfs2_ail1_empty - Try to empty the ail1 lists
				210	* @sdp: The superblock
				211	*
				212	* Tries to empty the ail1 lists, starting with the oldest first
				213	*/
				214
				215	static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
				216	{
				217	struct gfs2_trans tr, s;
				218	int oldest_tr = 1;
				219	int ret;
				220
				221	spin_lock(&sdp->sd_ail_lock);
				222	list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
				223	gfs2_ail1_empty_one(sdp, tr);
				224	if (list_empty(&tr->tr_ail1_list) && oldest_tr)
				225	list_move(&tr->tr_list, &sdp->sd_ail2_list);
				226	else
				227	oldest_tr = 0;
				228	}
				229	ret = list_empty(&sdp->sd_ail1_list);
				230	spin_unlock(&sdp->sd_ail_lock);
				231
				232	return ret;
				233	}
				234
				235	static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
				236	{
				237	struct gfs2_trans *tr;
				238	struct gfs2_bufdata *bd;
				239	struct buffer_head *bh;
				240
				241	spin_lock(&sdp->sd_ail_lock);
				242	list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
				243	list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) {
				244	bh = bd->bd_bh;
				245	if (!buffer_locked(bh))
				246	continue;
				247	get_bh(bh);
				248	spin_unlock(&sdp->sd_ail_lock);
				249	wait_on_buffer(bh);
				250	brelse(bh);
				251	return;
				252	}
				253	}
				254	spin_unlock(&sdp->sd_ail_lock);
				255	}
				256
				257	/**
				258	* gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
				259	* @sdp: the filesystem
				260	* @ai: the AIL entry
				261	*
				262	*/
				263
				264	static void gfs2_ail2_empty_one(struct gfs2_sbd sdp, struct gfs2_trans tr)
				265	{
				266	struct list_head *head = &tr->tr_ail2_list;
				267	struct gfs2_bufdata *bd;
				268
				269	while (!list_empty(head)) {
				270	bd = list_entry(head->prev, struct gfs2_bufdata,
				271	bd_ail_st_list);
				272	gfs2_assert(sdp, bd->bd_tr == tr);
				273	gfs2_remove_from_ail(bd);
				274	}
				275	}
				276
				277	static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
				278	{
				279	struct gfs2_trans tr, safe;
				280	unsigned int old_tail = sdp->sd_log_tail;
				281	int wrap = (new_tail < old_tail);
				282	int a, b, rm;
				283
				284	spin_lock(&sdp->sd_ail_lock);
				285
				286	list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) {
				287	a = (old_tail <= tr->tr_first);
				288	b = (tr->tr_first < new_tail);
				289	rm = (wrap) ? (a \|\| b) : (a && b);
				290	if (!rm)
				291	continue;
				292
				293	gfs2_ail2_empty_one(sdp, tr);
				294	list_del(&tr->tr_list);
				295	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
				296	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
				297	kfree(tr);
				298	}
				299
				300	spin_unlock(&sdp->sd_ail_lock);
				301	}
				302
				303	/**
				304	* gfs2_log_release - Release a given number of log blocks
				305	* @sdp: The GFS2 superblock
				306	* @blks: The number of blocks
				307	*
				308	*/
				309
				310	void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
				311	{
				312
				313	atomic_add(blks, &sdp->sd_log_blks_free);
				314	trace_gfs2_log_blocks(sdp, blks);
				315	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
				316	sdp->sd_jdesc->jd_blocks);
				317	up_read(&sdp->sd_log_flush_lock);
				318	}
				319
				320	/**
				321	* gfs2_log_reserve - Make a log reservation
				322	* @sdp: The GFS2 superblock
				323	* @blks: The number of blocks to reserve
				324	*
				325	* Note that we never give out the last few blocks of the journal. Thats
				326	* due to the fact that there is a small number of header blocks
				327	* associated with each log flush. The exact number can't be known until
				328	* flush time, so we ensure that we have just enough free blocks at all
				329	* times to avoid running out during a log flush.
				330	*
				331	* We no longer flush the log here, instead we wake up logd to do that
				332	* for us. To avoid the thundering herd and to ensure that we deal fairly
				333	* with queued waiters, we use an exclusive wait. This means that when we
				334	* get woken with enough journal space to get our reservation, we need to
				335	* wake the next waiter on the list.
				336	*
				337	* Returns: errno
				338	*/
				339
				340	int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
				341	{
				342	int ret = 0;
				343	unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
				344	unsigned wanted = blks + reserved_blks;
				345	DEFINE_WAIT(wait);
				346	int did_wait = 0;
				347	unsigned int free_blocks;
				348
				349	if (gfs2_assert_warn(sdp, blks) \|\|
				350	gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
				351	return -EINVAL;
				352	atomic_add(blks, &sdp->sd_log_blks_needed);
				353	retry:
				354	free_blocks = atomic_read(&sdp->sd_log_blks_free);
				355	if (unlikely(free_blocks <= wanted)) {
				356	do {
				357	prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
				358	TASK_UNINTERRUPTIBLE);
				359	wake_up(&sdp->sd_logd_waitq);
				360	did_wait = 1;
				361	if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
				362	io_schedule();
				363	free_blocks = atomic_read(&sdp->sd_log_blks_free);
				364	} while(free_blocks <= wanted);
				365	finish_wait(&sdp->sd_log_waitq, &wait);
				366	}
				367	atomic_inc(&sdp->sd_reserving_log);
				368	if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
				369	free_blocks - blks) != free_blocks) {
				370	if (atomic_dec_and_test(&sdp->sd_reserving_log))
				371	wake_up(&sdp->sd_reserving_log_wait);
				372	goto retry;
				373	}
				374	atomic_sub(blks, &sdp->sd_log_blks_needed);
				375	trace_gfs2_log_blocks(sdp, -blks);
				376
				377	/*
				378	* If we waited, then so might others, wake them up _after_ we get
				379	* our share of the log.
				380	*/
				381	if (unlikely(did_wait))
				382	wake_up(&sdp->sd_log_waitq);
				383
				384	down_read(&sdp->sd_log_flush_lock);
				385	if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
				386	gfs2_log_release(sdp, blks);
				387	ret = -EROFS;
				388	}
				389	if (atomic_dec_and_test(&sdp->sd_reserving_log))
				390	wake_up(&sdp->sd_reserving_log_wait);
				391	return ret;
				392	}
				393
				394	/**
				395	* log_distance - Compute distance between two journal blocks
				396	* @sdp: The GFS2 superblock
				397	* @newer: The most recent journal block of the pair
				398	* @older: The older journal block of the pair
				399	*
				400	* Compute the distance (in the journal direction) between two
				401	* blocks in the journal
				402	*
				403	* Returns: the distance in blocks
				404	*/
				405
				406	static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
				407	unsigned int older)
				408	{
				409	int dist;
				410
				411	dist = newer - older;
				412	if (dist < 0)
				413	dist += sdp->sd_jdesc->jd_blocks;
				414
				415	return dist;
				416	}
				417
				418	/**
				419	* calc_reserved - Calculate the number of blocks to reserve when
				420	* refunding a transaction's unused buffers.
				421	* @sdp: The GFS2 superblock
				422	*
				423	* This is complex. We need to reserve room for all our currently used
				424	* metadata buffers (e.g. normal file I/O rewriting file time stamps) and
				425	* all our journaled data buffers for journaled files (e.g. files in the
				426	* meta_fs like rindex, or files for which chattr +j was done.)
				427	* If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
				428	* will count it as free space (sd_log_blks_free) and corruption will follow.
				429	*
				430	* We can have metadata bufs and jdata bufs in the same journal. So each
				431	* type gets its own log header, for which we need to reserve a block.
				432	* In fact, each type has the potential for needing more than one header
				433	* in cases where we have more buffers than will fit on a journal page.
				434	* Metadata journal entries take up half the space of journaled buffer entries.
				435	* Thus, metadata entries have buf_limit (502) and journaled buffers have
				436	* databuf_limit (251) before they cause a wrap around.
				437	*
				438	* Also, we need to reserve blocks for revoke journal entries and one for an
				439	* overall header for the lot.
				440	*
				441	* Returns: the number of blocks reserved
				442	*/
				443	static unsigned int calc_reserved(struct gfs2_sbd *sdp)
				444	{
				445	unsigned int reserved = 0;
				446	unsigned int mbuf;
				447	unsigned int dbuf;
				448	struct gfs2_trans *tr = sdp->sd_log_tr;
				449
				450	if (tr) {
				451	mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
				452	dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
				453	reserved = mbuf + dbuf;
				454	/* Account for header blocks */
				455	reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp));
				456	reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp));
				457	}
				458
				459	if (sdp->sd_log_commited_revoke > 0)
				460	reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
				461	sizeof(u64));
				462	/* One for the overall header */
				463	if (reserved)
				464	reserved++;
				465	return reserved;
				466	}
				467
				468	static unsigned int current_tail(struct gfs2_sbd *sdp)
				469	{
				470	struct gfs2_trans *tr;
				471	unsigned int tail;
				472
				473	spin_lock(&sdp->sd_ail_lock);
				474
				475	if (list_empty(&sdp->sd_ail1_list)) {
				476	tail = sdp->sd_log_head;
				477	} else {
				478	tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans,
				479	tr_list);
				480	tail = tr->tr_first;
				481	}
				482
				483	spin_unlock(&sdp->sd_ail_lock);
				484
				485	return tail;
				486	}
				487
				488	static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
				489	{
				490	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
				491
				492	ail2_empty(sdp, new_tail);
				493
				494	atomic_add(dist, &sdp->sd_log_blks_free);
				495	trace_gfs2_log_blocks(sdp, dist);
				496	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
				497	sdp->sd_jdesc->jd_blocks);
				498
				499	sdp->sd_log_tail = new_tail;
				500	}
				501
				502
				503	static void log_flush_wait(struct gfs2_sbd *sdp)
				504	{
				505	DEFINE_WAIT(wait);
				506
				507	if (atomic_read(&sdp->sd_log_in_flight)) {
				508	do {
				509	prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
				510	TASK_UNINTERRUPTIBLE);
				511	if (atomic_read(&sdp->sd_log_in_flight))
				512	io_schedule();
				513	} while(atomic_read(&sdp->sd_log_in_flight));
				514	finish_wait(&sdp->sd_log_flush_wait, &wait);
				515	}
				516	}
				517
				518	static int ip_cmp(void priv, struct list_head a, struct list_head *b)
				519	{
				520	struct gfs2_inode ipa, ipb;
				521
				522	ipa = list_entry(a, struct gfs2_inode, i_ordered);
				523	ipb = list_entry(b, struct gfs2_inode, i_ordered);
				524
				525	if (ipa->i_no_addr < ipb->i_no_addr)
				526	return -1;
				527	if (ipa->i_no_addr > ipb->i_no_addr)
				528	return 1;
				529	return 0;
				530	}
				531
				532	static void gfs2_ordered_write(struct gfs2_sbd *sdp)
				533	{
				534	struct gfs2_inode *ip;
				535	LIST_HEAD(written);
				536
				537	spin_lock(&sdp->sd_ordered_lock);
				538	list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
				539	while (!list_empty(&sdp->sd_log_le_ordered)) {
				540	ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
				541	list_move(&ip->i_ordered, &written);
				542	if (ip->i_inode.i_mapping->nrpages == 0)
				543	continue;
				544	spin_unlock(&sdp->sd_ordered_lock);
				545	filemap_fdatawrite(ip->i_inode.i_mapping);
				546	spin_lock(&sdp->sd_ordered_lock);
				547	}
				548	list_splice(&written, &sdp->sd_log_le_ordered);
				549	spin_unlock(&sdp->sd_ordered_lock);
				550	}
				551
				552	static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
				553	{
				554	struct gfs2_inode *ip;
				555
				556	spin_lock(&sdp->sd_ordered_lock);
				557	while (!list_empty(&sdp->sd_log_le_ordered)) {
				558	ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
				559	list_del(&ip->i_ordered);
				560	WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
				561	if (ip->i_inode.i_mapping->nrpages == 0)
				562	continue;
				563	spin_unlock(&sdp->sd_ordered_lock);
				564	filemap_fdatawait(ip->i_inode.i_mapping);
				565	spin_lock(&sdp->sd_ordered_lock);
				566	}
				567	spin_unlock(&sdp->sd_ordered_lock);
				568	}
				569
				570	void gfs2_ordered_del_inode(struct gfs2_inode *ip)
				571	{
				572	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
				573
				574	spin_lock(&sdp->sd_ordered_lock);
				575	if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
				576	list_del(&ip->i_ordered);
				577	spin_unlock(&sdp->sd_ordered_lock);
				578	}
				579
				580	void gfs2_add_revoke(struct gfs2_sbd sdp, struct gfs2_bufdata bd)
				581	{
				582	struct buffer_head *bh = bd->bd_bh;
				583	struct gfs2_glock *gl = bd->bd_gl;
				584
				585	bh->b_private = NULL;
				586	bd->bd_blkno = bh->b_blocknr;
				587	gfs2_remove_from_ail(bd); /* drops ref on bh */
				588	bd->bd_bh = NULL;
				589	bd->bd_ops = &gfs2_revoke_lops;
				590	sdp->sd_log_num_revoke++;
				591	if (atomic_inc_return(&gl->gl_revokes) == 1)
				592	gfs2_glock_hold(gl);
				593	set_bit(GLF_LFLUSH, &gl->gl_flags);
				594	list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
				595	}
				596
				597	void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
				598	{
				599	if (atomic_dec_return(&gl->gl_revokes) == 0) {
				600	clear_bit(GLF_LFLUSH, &gl->gl_flags);
				601	gfs2_glock_queue_put(gl);
				602	}
				603	}
				604
				605	void gfs2_write_revokes(struct gfs2_sbd *sdp)
				606	{
				607	struct gfs2_trans *tr;
				608	struct gfs2_bufdata bd, tmp;
				609	int have_revokes = 0;
				610	int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
				611
				612	gfs2_ail1_empty(sdp);
				613	spin_lock(&sdp->sd_ail_lock);
				614	list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
				615	list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) {
				616	if (list_empty(&bd->bd_list)) {
				617	have_revokes = 1;
				618	goto done;
				619	}
				620	}
				621	}
				622	done:
				623	spin_unlock(&sdp->sd_ail_lock);
				624	if (have_revokes == 0)
				625	return;
				626	while (sdp->sd_log_num_revoke > max_revokes)
				627	max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
				628	max_revokes -= sdp->sd_log_num_revoke;
				629	if (!sdp->sd_log_num_revoke) {
				630	atomic_dec(&sdp->sd_log_blks_free);
				631	/* If no blocks have been reserved, we need to also
				632	* reserve a block for the header */
				633	if (!sdp->sd_log_blks_reserved)
				634	atomic_dec(&sdp->sd_log_blks_free);
				635	}
				636	gfs2_log_lock(sdp);
				637	spin_lock(&sdp->sd_ail_lock);
				638	list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
				639	list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) {
				640	if (max_revokes == 0)
				641	goto out_of_blocks;
				642	if (!list_empty(&bd->bd_list))
				643	continue;
				644	gfs2_add_revoke(sdp, bd);
				645	max_revokes--;
				646	}
				647	}
				648	out_of_blocks:
				649	spin_unlock(&sdp->sd_ail_lock);
				650	gfs2_log_unlock(sdp);
				651
				652	if (!sdp->sd_log_num_revoke) {
				653	atomic_inc(&sdp->sd_log_blks_free);
				654	if (!sdp->sd_log_blks_reserved)
				655	atomic_inc(&sdp->sd_log_blks_free);
				656	}
				657	}
				658
				659	/**
				660	* log_write_header - Get and initialize a journal header buffer
				661	* @sdp: The GFS2 superblock
				662	*
				663	* Returns: the initialized log buffer descriptor
				664	*/
				665
				666	static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
				667	{
				668	struct gfs2_log_header *lh;
				669	unsigned int tail;
				670	u32 hash;
				671	int op_flags = REQ_PREFLUSH \| REQ_FUA \| REQ_META \| REQ_SYNC;
				672	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
				673	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
				674	lh = page_address(page);
				675	clear_page(lh);
				676
				677	gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
				678
				679	tail = current_tail(sdp);
				680
				681	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
				682	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
				683	lh->lh_header.__pad0 = cpu_to_be64(0);
				684	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
				685	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
				686	lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
				687	lh->lh_flags = cpu_to_be32(flags);
				688	lh->lh_tail = cpu_to_be32(tail);
				689	lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
				690	hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header));
				691	lh->lh_hash = cpu_to_be32(hash);
				692
				693	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
				694	gfs2_ordered_wait(sdp);
				695	log_flush_wait(sdp);
				696	op_flags = REQ_SYNC \| REQ_META \| REQ_PRIO;
				697	}
				698
				699	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
				700	gfs2_log_write_page(sdp, page);
				701	gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
				702	log_flush_wait(sdp);
				703
				704	if (sdp->sd_log_tail != tail)
				705	log_pull_tail(sdp, tail);
				706	}
				707
				708	/**
				709	* gfs2_log_flush - flush incore transaction(s)
				710	* @sdp: the filesystem
				711	* @gl: The glock structure to flush. If NULL, flush the whole incore log
				712	*
				713	*/
				714
				715	void gfs2_log_flush(struct gfs2_sbd sdp, struct gfs2_glock gl,
				716	enum gfs2_flush_type type)
				717	{
				718	struct gfs2_trans *tr;
				719	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
				720
				721	down_write(&sdp->sd_log_flush_lock);
				722
				723	/* Log might have been flushed while we waited for the flush lock */
				724	if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
				725	up_write(&sdp->sd_log_flush_lock);
				726	return;
				727	}
				728	trace_gfs2_log_flush(sdp, 1);
				729
				730	if (type == SHUTDOWN_FLUSH)
				731	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
				732
				733	sdp->sd_log_flush_head = sdp->sd_log_head;
				734	tr = sdp->sd_log_tr;
				735	if (tr) {
				736	sdp->sd_log_tr = NULL;
				737	tr->tr_first = sdp->sd_log_flush_head;
				738	if (unlikely (state == SFS_FROZEN))
				739	gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
				740	}
				741
				742	if (unlikely(state == SFS_FROZEN))
				743	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
				744	gfs2_assert_withdraw(sdp,
				745	sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
				746
				747	gfs2_ordered_write(sdp);
				748	lops_before_commit(sdp, tr);
				749	gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
				750
				751	if (sdp->sd_log_head != sdp->sd_log_flush_head) {
				752	log_flush_wait(sdp);
				753	log_write_header(sdp, 0);
				754	} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
				755	atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
				756	trace_gfs2_log_blocks(sdp, -1);
				757	log_write_header(sdp, 0);
				758	}
				759	lops_after_commit(sdp, tr);
				760
				761	gfs2_log_lock(sdp);
				762	sdp->sd_log_head = sdp->sd_log_flush_head;
				763	sdp->sd_log_blks_reserved = 0;
				764	sdp->sd_log_commited_revoke = 0;
				765
				766	spin_lock(&sdp->sd_ail_lock);
				767	if (tr && !list_empty(&tr->tr_ail1_list)) {
				768	list_add(&tr->tr_list, &sdp->sd_ail1_list);
				769	tr = NULL;
				770	}
				771	spin_unlock(&sdp->sd_ail_lock);
				772	gfs2_log_unlock(sdp);
				773
				774	if (type != NORMAL_FLUSH) {
				775	if (!sdp->sd_log_idle) {
				776	for (;;) {
				777	gfs2_ail1_start(sdp);
				778	gfs2_ail1_wait(sdp);
				779	if (gfs2_ail1_empty(sdp))
				780	break;
				781	}
				782	atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
				783	trace_gfs2_log_blocks(sdp, -1);
				784	log_write_header(sdp, 0);
				785	sdp->sd_log_head = sdp->sd_log_flush_head;
				786	}
				787	if (type == SHUTDOWN_FLUSH \|\| type == FREEZE_FLUSH)
				788	gfs2_log_shutdown(sdp);
				789	if (type == FREEZE_FLUSH)
				790	atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
				791	}
				792
				793	trace_gfs2_log_flush(sdp, 0);
				794	up_write(&sdp->sd_log_flush_lock);
				795
				796	kfree(tr);
				797	}
				798
				799	/**
				800	* gfs2_merge_trans - Merge a new transaction into a cached transaction
				801	* @old: Original transaction to be expanded
				802	* @new: New transaction to be merged
				803	*/
				804
				805	static void gfs2_merge_trans(struct gfs2_sbd sdp, struct gfs2_trans new)
				806	{
				807	struct gfs2_trans *old = sdp->sd_log_tr;
				808
				809	WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
				810
				811	old->tr_num_buf_new += new->tr_num_buf_new;
				812	old->tr_num_databuf_new += new->tr_num_databuf_new;
				813	old->tr_num_buf_rm += new->tr_num_buf_rm;
				814	old->tr_num_databuf_rm += new->tr_num_databuf_rm;
				815	old->tr_num_revoke += new->tr_num_revoke;
				816	old->tr_num_revoke_rm += new->tr_num_revoke_rm;
				817
				818	list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
				819	list_splice_tail_init(&new->tr_buf, &old->tr_buf);
				820
				821	spin_lock(&sdp->sd_ail_lock);
				822	list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list);
				823	list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list);
				824	spin_unlock(&sdp->sd_ail_lock);
				825	}
				826
				827	static void log_refund(struct gfs2_sbd sdp, struct gfs2_trans tr)
				828	{
				829	unsigned int reserved;
				830	unsigned int unused;
				831	unsigned int maxres;
				832
				833	gfs2_log_lock(sdp);
				834
				835	if (sdp->sd_log_tr) {
				836	gfs2_merge_trans(sdp, tr);
				837	} else if (tr->tr_num_buf_new \|\| tr->tr_num_databuf_new) {
				838	gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
				839	sdp->sd_log_tr = tr;
				840	set_bit(TR_ATTACHED, &tr->tr_flags);
				841	}
				842
				843	sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
				844	reserved = calc_reserved(sdp);
				845	maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
				846	gfs2_assert_withdraw(sdp, maxres >= reserved);
				847	unused = maxres - reserved;
				848	atomic_add(unused, &sdp->sd_log_blks_free);
				849	trace_gfs2_log_blocks(sdp, unused);
				850	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
				851	sdp->sd_jdesc->jd_blocks);
				852	sdp->sd_log_blks_reserved = reserved;
				853
				854	gfs2_log_unlock(sdp);
				855	}
				856
				857	/**
				858	* gfs2_log_commit - Commit a transaction to the log
				859	* @sdp: the filesystem
				860	* @tr: the transaction
				861	*
				862	* We wake up gfs2_logd if the number of pinned blocks exceed thresh1
				863	* or the total number of used blocks (pinned blocks plus AIL blocks)
				864	* is greater than thresh2.
				865	*
				866	* At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
				867	* journal size.
				868	*
				869	* Returns: errno
				870	*/
				871
				872	void gfs2_log_commit(struct gfs2_sbd sdp, struct gfs2_trans tr)
				873	{
				874	log_refund(sdp, tr);
				875
				876	if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) \|\|
				877	((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
				878	atomic_read(&sdp->sd_log_thresh2)))
				879	wake_up(&sdp->sd_logd_waitq);
				880	}
				881
				882	/**
				883	* gfs2_log_shutdown - write a shutdown header into a journal
				884	* @sdp: the filesystem
				885	*
				886	*/
				887
				888	void gfs2_log_shutdown(struct gfs2_sbd *sdp)
				889	{
				890	gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
				891	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
				892	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
				893
				894	sdp->sd_log_flush_head = sdp->sd_log_head;
				895
				896	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT);
				897
				898	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
				899	gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
				900
				901	sdp->sd_log_head = sdp->sd_log_flush_head;
				902	sdp->sd_log_tail = sdp->sd_log_head;
				903	}
				904
				905	static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
				906	{
				907	return (atomic_read(&sdp->sd_log_pinned) +
				908	atomic_read(&sdp->sd_log_blks_needed) >=
				909	atomic_read(&sdp->sd_log_thresh1));
				910	}
				911
				912	static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
				913	{
				914	unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
				915
				916	if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
				917	return 1;
				918
				919	return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
				920	atomic_read(&sdp->sd_log_thresh2);
				921	}
				922
				923	/**
				924	* gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
				925	* @sdp: Pointer to GFS2 superblock
				926	*
				927	* Also, periodically check to make sure that we're using the most recent
				928	* journal index.
				929	*/
				930
				931	int gfs2_logd(void *data)
				932	{
				933	struct gfs2_sbd *sdp = data;
				934	unsigned long t = 1;
				935	DEFINE_WAIT(wait);
				936	bool did_flush;
				937
				938	while (!kthread_should_stop()) {
				939
				940	/* Check for errors writing to the journal */
				941	if (sdp->sd_log_error) {
				942	gfs2_lm_withdraw(sdp,
				943	"GFS2: fsid=%s: error %d: "
				944	"withdrawing the file system to "
				945	"prevent further damage.\n",
				946	sdp->sd_fsname, sdp->sd_log_error);
				947	}
				948
				949	did_flush = false;
				950	if (gfs2_jrnl_flush_reqd(sdp) \|\| t == 0) {
				951	gfs2_ail1_empty(sdp);
				952	gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
				953	did_flush = true;
				954	}
				955
				956	if (gfs2_ail_flush_reqd(sdp)) {
				957	gfs2_ail1_start(sdp);
				958	gfs2_ail1_wait(sdp);
				959	gfs2_ail1_empty(sdp);
				960	gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
				961	did_flush = true;
				962	}
				963
				964	if (!gfs2_ail_flush_reqd(sdp) \|\| did_flush)
				965	wake_up(&sdp->sd_log_waitq);
				966
				967	t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
				968
				969	try_to_freeze();
				970
				971	do {
				972	prepare_to_wait(&sdp->sd_logd_waitq, &wait,
				973	TASK_INTERRUPTIBLE);
				974	if (!gfs2_ail_flush_reqd(sdp) &&
				975	!gfs2_jrnl_flush_reqd(sdp) &&
				976	!kthread_should_stop())
				977	t = schedule_timeout(t);
				978	} while(t && !gfs2_ail_flush_reqd(sdp) &&
				979	!gfs2_jrnl_flush_reqd(sdp) &&
				980	!kthread_should_stop());
				981	finish_wait(&sdp->sd_logd_waitq, &wait);
				982	}
				983
				984	return 0;
				985	}
				986