Blame - ap/os/linux/linux-3.4.x/fs/gfs2/aops.c - T106_DC

blob: 40bd9999250b3b908eaa0704811918feffeaf9a6 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/*
				2	* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
				3	* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
				4	*
				5	* This copyrighted material is made available to anyone wishing to use,
				6	* modify, copy, or redistribute it subject to the terms and conditions
				7	* of the GNU General Public License version 2.
				8	*/
				9
				10	#include <linux/sched.h>
				11	#include <linux/slab.h>
				12	#include <linux/spinlock.h>
				13	#include <linux/completion.h>
				14	#include <linux/buffer_head.h>
				15	#include <linux/pagemap.h>
				16	#include <linux/pagevec.h>
				17	#include <linux/mpage.h>
				18	#include <linux/fs.h>
				19	#include <linux/writeback.h>
				20	#include <linux/swap.h>
				21	#include <linux/gfs2_ondisk.h>
				22	#include <linux/backing-dev.h>
				23
				24	#include "gfs2.h"
				25	#include "incore.h"
				26	#include "bmap.h"
				27	#include "glock.h"
				28	#include "inode.h"
				29	#include "log.h"
				30	#include "meta_io.h"
				31	#include "quota.h"
				32	#include "trans.h"
				33	#include "rgrp.h"
				34	#include "super.h"
				35	#include "util.h"
				36	#include "glops.h"
				37
				38
				39	void gfs2_page_add_databufs(struct gfs2_inode ip, struct page page,
				40	unsigned int from, unsigned int to)
				41	{
				42	struct buffer_head *head = page_buffers(page);
				43	unsigned int bsize = head->b_size;
				44	struct buffer_head *bh;
				45	unsigned int start, end;
				46
				47	for (bh = head, start = 0; bh != head \|\| !start;
				48	bh = bh->b_this_page, start = end) {
				49	end = start + bsize;
				50	if (end <= from \|\| start >= to)
				51	continue;
				52	if (gfs2_is_jdata(ip))
				53	set_buffer_uptodate(bh);
				54	gfs2_trans_add_bh(ip->i_gl, bh, 0);
				55	}
				56	}
				57
				58	/**
				59	* gfs2_get_block_noalloc - Fills in a buffer head with details about a block
				60	* @inode: The inode
				61	* @lblock: The block number to look up
				62	* @bh_result: The buffer head to return the result in
				63	* @create: Non-zero if we may add block to the file
				64	*
				65	* Returns: errno
				66	*/
				67
				68	static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
				69	struct buffer_head *bh_result, int create)
				70	{
				71	int error;
				72
				73	error = gfs2_block_map(inode, lblock, bh_result, 0);
				74	if (error)
				75	return error;
				76	if (!buffer_mapped(bh_result))
				77	return -EIO;
				78	return 0;
				79	}
				80
				81	static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
				82	struct buffer_head *bh_result, int create)
				83	{
				84	return gfs2_block_map(inode, lblock, bh_result, 0);
				85	}
				86
				87	/**
				88	* gfs2_writepage_common - Common bits of writepage
				89	* @page: The page to be written
				90	* @wbc: The writeback control
				91	*
				92	* Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
				93	*/
				94
				95	static int gfs2_writepage_common(struct page *page,
				96	struct writeback_control *wbc)
				97	{
				98	struct inode *inode = page->mapping->host;
				99	struct gfs2_inode *ip = GFS2_I(inode);
				100	struct gfs2_sbd *sdp = GFS2_SB(inode);
				101	loff_t i_size = i_size_read(inode);
				102	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
				103	unsigned offset;
				104
				105	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
				106	goto out;
				107	if (current->journal_info)
				108	goto redirty;
				109	/* Is the page fully outside i_size? (truncate in progress) */
				110	offset = i_size & (PAGE_CACHE_SIZE-1);
				111	if (page->index > end_index \|\| (page->index == end_index && !offset)) {
				112	page->mapping->a_ops->invalidatepage(page, 0);
				113	goto out;
				114	}
				115	return 1;
				116	redirty:
				117	redirty_page_for_writepage(wbc, page);
				118	out:
				119	unlock_page(page);
				120	return 0;
				121	}
				122
				123	/**
				124	* gfs2_writeback_writepage - Write page for writeback mappings
				125	* @page: The page
				126	* @wbc: The writeback control
				127	*
				128	*/
				129
				130	static int gfs2_writeback_writepage(struct page *page,
				131	struct writeback_control *wbc)
				132	{
				133	int ret;
				134
				135	ret = gfs2_writepage_common(page, wbc);
				136	if (ret <= 0)
				137	return ret;
				138
				139	return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
				140	}
				141
				142	/**
				143	* gfs2_ordered_writepage - Write page for ordered data files
				144	* @page: The page to write
				145	* @wbc: The writeback control
				146	*
				147	*/
				148
				149	static int gfs2_ordered_writepage(struct page *page,
				150	struct writeback_control *wbc)
				151	{
				152	struct inode *inode = page->mapping->host;
				153	struct gfs2_inode *ip = GFS2_I(inode);
				154	int ret;
				155
				156	ret = gfs2_writepage_common(page, wbc);
				157	if (ret <= 0)
				158	return ret;
				159
				160	if (!page_has_buffers(page)) {
				161	create_empty_buffers(page, inode->i_sb->s_blocksize,
				162	(1 << BH_Dirty)\|(1 << BH_Uptodate));
				163	}
				164	gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
				165	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
				166	}
				167
				168	/**
				169	* __gfs2_jdata_writepage - The core of jdata writepage
				170	* @page: The page to write
				171	* @wbc: The writeback control
				172	*
				173	* This is shared between writepage and writepages and implements the
				174	* core of the writepage operation. If a transaction is required then
				175	* PageChecked will have been set and the transaction will have
				176	* already been started before this is called.
				177	*/
				178
				179	static int __gfs2_jdata_writepage(struct page page, struct writeback_control wbc)
				180	{
				181	struct inode *inode = page->mapping->host;
				182	struct gfs2_inode *ip = GFS2_I(inode);
				183	struct gfs2_sbd *sdp = GFS2_SB(inode);
				184
				185	if (PageChecked(page)) {
				186	ClearPageChecked(page);
				187	if (!page_has_buffers(page)) {
				188	create_empty_buffers(page, inode->i_sb->s_blocksize,
				189	(1 << BH_Dirty)\|(1 << BH_Uptodate));
				190	}
				191	gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
				192	}
				193	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
				194	}
				195
				196	/**
				197	* gfs2_jdata_writepage - Write complete page
				198	* @page: Page to write
				199	*
				200	* Returns: errno
				201	*
				202	*/
				203
				204	static int gfs2_jdata_writepage(struct page page, struct writeback_control wbc)
				205	{
				206	struct inode *inode = page->mapping->host;
				207	struct gfs2_sbd *sdp = GFS2_SB(inode);
				208	int ret;
				209	int done_trans = 0;
				210
				211	if (PageChecked(page)) {
				212	if (wbc->sync_mode != WB_SYNC_ALL)
				213	goto out_ignore;
				214	ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
				215	if (ret)
				216	goto out_ignore;
				217	done_trans = 1;
				218	}
				219	ret = gfs2_writepage_common(page, wbc);
				220	if (ret > 0)
				221	ret = __gfs2_jdata_writepage(page, wbc);
				222	if (done_trans)
				223	gfs2_trans_end(sdp);
				224	return ret;
				225
				226	out_ignore:
				227	redirty_page_for_writepage(wbc, page);
				228	unlock_page(page);
				229	return 0;
				230	}
				231
				232	/**
				233	* gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
				234	* @mapping: The mapping to write
				235	* @wbc: Write-back control
				236	*
				237	* For the data=writeback case we can already ignore buffer heads
				238	* and write whole extents at once. This is a big reduction in the
				239	* number of I/O requests we send and the bmap calls we make in this case.
				240	*/
				241	static int gfs2_writeback_writepages(struct address_space *mapping,
				242	struct writeback_control *wbc)
				243	{
				244	return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
				245	}
				246
				247	/**
				248	* gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
				249	* @mapping: The mapping
				250	* @wbc: The writeback control
				251	* @writepage: The writepage function to call for each page
				252	* @pvec: The vector of pages
				253	* @nr_pages: The number of pages to write
				254	*
				255	* Returns: non-zero if loop should terminate, zero otherwise
				256	*/
				257
				258	static int gfs2_write_jdata_pagevec(struct address_space *mapping,
				259	struct writeback_control *wbc,
				260	struct pagevec *pvec,
				261	int nr_pages, pgoff_t end)
				262	{
				263	struct inode *inode = mapping->host;
				264	struct gfs2_sbd *sdp = GFS2_SB(inode);
				265	loff_t i_size = i_size_read(inode);
				266	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
				267	unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
				268	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
				269	int i;
				270	int ret;
				271
				272	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
				273	if (ret < 0)
				274	return ret;
				275
				276	for(i = 0; i < nr_pages; i++) {
				277	struct page *page = pvec->pages[i];
				278
				279	lock_page(page);
				280
				281	if (unlikely(page->mapping != mapping)) {
				282	unlock_page(page);
				283	continue;
				284	}
				285
				286	if (!wbc->range_cyclic && page->index > end) {
				287	ret = 1;
				288	unlock_page(page);
				289	continue;
				290	}
				291
				292	if (wbc->sync_mode != WB_SYNC_NONE)
				293	wait_on_page_writeback(page);
				294
				295	if (PageWriteback(page) \|\|
				296	!clear_page_dirty_for_io(page)) {
				297	unlock_page(page);
				298	continue;
				299	}
				300
				301	/* Is the page fully outside i_size? (truncate in progress) */
				302	if (page->index > end_index \|\| (page->index == end_index && !offset)) {
				303	page->mapping->a_ops->invalidatepage(page, 0);
				304	unlock_page(page);
				305	continue;
				306	}
				307
				308	ret = __gfs2_jdata_writepage(page, wbc);
				309
				310	if (ret \|\| (--(wbc->nr_to_write) <= 0))
				311	ret = 1;
				312	}
				313	gfs2_trans_end(sdp);
				314	return ret;
				315	}
				316
				317	/**
				318	* gfs2_write_cache_jdata - Like write_cache_pages but different
				319	* @mapping: The mapping to write
				320	* @wbc: The writeback control
				321	* @writepage: The writepage function to call
				322	* @data: The data to pass to writepage
				323	*
				324	* The reason that we use our own function here is that we need to
				325	* start transactions before we grab page locks. This allows us
				326	* to get the ordering right.
				327	*/
				328
				329	static int gfs2_write_cache_jdata(struct address_space *mapping,
				330	struct writeback_control *wbc)
				331	{
				332	int ret = 0;
				333	int done = 0;
				334	struct pagevec pvec;
				335	int nr_pages;
				336	pgoff_t index;
				337	pgoff_t end;
				338	int scanned = 0;
				339	int range_whole = 0;
				340
				341	pagevec_init(&pvec, 0);
				342	if (wbc->range_cyclic) {
				343	index = mapping->writeback_index; /* Start from prev offset */
				344	end = -1;
				345	} else {
				346	index = wbc->range_start >> PAGE_CACHE_SHIFT;
				347	end = wbc->range_end >> PAGE_CACHE_SHIFT;
				348	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
				349	range_whole = 1;
				350	scanned = 1;
				351	}
				352
				353	retry:
				354	while (!done && (index <= end) &&
				355	(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
				356	PAGECACHE_TAG_DIRTY,
				357	min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
				358	scanned = 1;
				359	ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
				360	if (ret)
				361	done = 1;
				362	if (ret > 0)
				363	ret = 0;
				364
				365	pagevec_release(&pvec);
				366	cond_resched();
				367	}
				368
				369	if (!scanned && !done) {
				370	/*
				371	* We hit the last page and there is more work to be done: wrap
				372	* back to the start of the file
				373	*/
				374	scanned = 1;
				375	index = 0;
				376	goto retry;
				377	}
				378
				379	if (wbc->range_cyclic \|\| (range_whole && wbc->nr_to_write > 0))
				380	mapping->writeback_index = index;
				381	return ret;
				382	}
				383
				384
				385	/**
				386	* gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
				387	* @mapping: The mapping to write
				388	* @wbc: The writeback control
				389	*
				390	*/
				391
				392	static int gfs2_jdata_writepages(struct address_space *mapping,
				393	struct writeback_control *wbc)
				394	{
				395	struct gfs2_inode *ip = GFS2_I(mapping->host);
				396	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
				397	int ret;
				398
				399	ret = gfs2_write_cache_jdata(mapping, wbc);
				400	if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
				401	gfs2_log_flush(sdp, ip->i_gl);
				402	ret = gfs2_write_cache_jdata(mapping, wbc);
				403	}
				404	return ret;
				405	}
				406
				407	/**
				408	* stuffed_readpage - Fill in a Linux page with stuffed file data
				409	* @ip: the inode
				410	* @page: the page
				411	*
				412	* Returns: errno
				413	*/
				414
				415	static int stuffed_readpage(struct gfs2_inode ip, struct page page)
				416	{
				417	struct buffer_head *dibh;
				418	u64 dsize = i_size_read(&ip->i_inode);
				419	void *kaddr;
				420	int error;
				421
				422	/*
				423	* Due to the order of unstuffing files and ->fault(), we can be
				424	* asked for a zero page in the case of a stuffed file being extended,
				425	* so we need to supply one here. It doesn't happen often.
				426	*/
				427	if (unlikely(page->index)) {
				428	zero_user(page, 0, PAGE_CACHE_SIZE);
				429	SetPageUptodate(page);
				430	return 0;
				431	}
				432
				433	error = gfs2_meta_inode_buffer(ip, &dibh);
				434	if (error)
				435	return error;
				436
				437	kaddr = kmap_atomic(page);
				438	if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
				439	dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
				440	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
				441	memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
				442	kunmap_atomic(kaddr);
				443	flush_dcache_page(page);
				444	brelse(dibh);
				445	SetPageUptodate(page);
				446
				447	return 0;
				448	}
				449
				450
				451	/**
				452	* __gfs2_readpage - readpage
				453	* @file: The file to read a page for
				454	* @page: The page to read
				455	*
				456	* This is the core of gfs2's readpage. Its used by the internal file
				457	* reading code as in that case we already hold the glock. Also its
				458	* called by gfs2_readpage() once the required lock has been granted.
				459	*
				460	*/
				461
				462	static int __gfs2_readpage(void file, struct page page)
				463	{
				464	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
				465	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
				466	int error;
				467
				468	if (gfs2_is_stuffed(ip)) {
				469	error = stuffed_readpage(ip, page);
				470	unlock_page(page);
				471	} else {
				472	error = mpage_readpage(page, gfs2_block_map);
				473	}
				474
				475	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
				476	return -EIO;
				477
				478	return error;
				479	}
				480
				481	/**
				482	* gfs2_readpage - read a page of a file
				483	* @file: The file to read
				484	* @page: The page of the file
				485	*
				486	* This deals with the locking required. We have to unlock and
				487	* relock the page in order to get the locking in the right
				488	* order.
				489	*/
				490
				491	static int gfs2_readpage(struct file file, struct page page)
				492	{
				493	struct address_space *mapping = page->mapping;
				494	struct gfs2_inode *ip = GFS2_I(mapping->host);
				495	struct gfs2_holder gh;
				496	int error;
				497
				498	unlock_page(page);
				499	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
				500	error = gfs2_glock_nq(&gh);
				501	if (unlikely(error))
				502	goto out;
				503	error = AOP_TRUNCATED_PAGE;
				504	lock_page(page);
				505	if (page->mapping == mapping && !PageUptodate(page))
				506	error = __gfs2_readpage(file, page);
				507	else
				508	unlock_page(page);
				509	gfs2_glock_dq(&gh);
				510	out:
				511	gfs2_holder_uninit(&gh);
				512	if (error && error != AOP_TRUNCATED_PAGE)
				513	lock_page(page);
				514	return error;
				515	}
				516
				517	/**
				518	* gfs2_internal_read - read an internal file
				519	* @ip: The gfs2 inode
				520	* @ra_state: The readahead state (or NULL for no readahead)
				521	* @buf: The buffer to fill
				522	* @pos: The file position
				523	* @size: The amount to read
				524	*
				525	*/
				526
				527	int gfs2_internal_read(struct gfs2_inode ip, struct file_ra_state ra_state,
				528	char buf, loff_t pos, unsigned size)
				529	{
				530	struct address_space *mapping = ip->i_inode.i_mapping;
				531	unsigned long index = *pos / PAGE_CACHE_SIZE;
				532	unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
				533	unsigned copied = 0;
				534	unsigned amt;
				535	struct page *page;
				536	void *p;
				537
				538	do {
				539	amt = size - copied;
				540	if (offset + size > PAGE_CACHE_SIZE)
				541	amt = PAGE_CACHE_SIZE - offset;
				542	page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
				543	if (IS_ERR(page))
				544	return PTR_ERR(page);
				545	p = kmap_atomic(page);
				546	memcpy(buf + copied, p + offset, amt);
				547	kunmap_atomic(p);
				548	mark_page_accessed(page);
				549	page_cache_release(page);
				550	copied += amt;
				551	index++;
				552	offset = 0;
				553	} while(copied < size);
				554	(*pos) += size;
				555	return size;
				556	}
				557
				558	/**
				559	* gfs2_readpages - Read a bunch of pages at once
				560	*
				561	* Some notes:
				562	* 1. This is only for readahead, so we can simply ignore any things
				563	* which are slightly inconvenient (such as locking conflicts between
				564	* the page lock and the glock) and return having done no I/O. Its
				565	* obviously not something we'd want to do on too regular a basis.
				566	* Any I/O we ignore at this time will be done via readpage later.
				567	* 2. We don't handle stuffed files here we let readpage do the honours.
				568	* 3. mpage_readpages() does most of the heavy lifting in the common case.
				569	* 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
				570	*/
				571
				572	static int gfs2_readpages(struct file file, struct address_space mapping,
				573	struct list_head *pages, unsigned nr_pages)
				574	{
				575	struct inode *inode = mapping->host;
				576	struct gfs2_inode *ip = GFS2_I(inode);
				577	struct gfs2_sbd *sdp = GFS2_SB(inode);
				578	struct gfs2_holder gh;
				579	int ret;
				580
				581	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
				582	ret = gfs2_glock_nq(&gh);
				583	if (unlikely(ret))
				584	goto out_uninit;
				585	if (!gfs2_is_stuffed(ip))
				586	ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
				587	gfs2_glock_dq(&gh);
				588	out_uninit:
				589	gfs2_holder_uninit(&gh);
				590	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
				591	ret = -EIO;
				592	return ret;
				593	}
				594
				595	/**
				596	* gfs2_write_begin - Begin to write to a file
				597	* @file: The file to write to
				598	* @mapping: The mapping in which to write
				599	* @pos: The file offset at which to start writing
				600	* @len: Length of the write
				601	* @flags: Various flags
				602	* @pagep: Pointer to return the page
				603	* @fsdata: Pointer to return fs data (unused by GFS2)
				604	*
				605	* Returns: errno
				606	*/
				607
				608	static int gfs2_write_begin(struct file file, struct address_space mapping,
				609	loff_t pos, unsigned len, unsigned flags,
				610	struct page pagep, void fsdata)
				611	{
				612	struct gfs2_inode *ip = GFS2_I(mapping->host);
				613	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
				614	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
				615	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
				616	int alloc_required;
				617	int error = 0;
				618	struct gfs2_qadata *qa = NULL;
				619	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
				620	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
				621	struct page *page;
				622
				623	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
				624	error = gfs2_glock_nq(&ip->i_gh);
				625	if (unlikely(error))
				626	goto out_uninit;
				627	if (&ip->i_inode == sdp->sd_rindex) {
				628	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
				629	GL_NOCACHE, &m_ip->i_gh);
				630	if (unlikely(error)) {
				631	gfs2_glock_dq(&ip->i_gh);
				632	goto out_uninit;
				633	}
				634	}
				635
				636	alloc_required = gfs2_write_alloc_required(ip, pos, len);
				637
				638	if (alloc_required \|\| gfs2_is_jdata(ip))
				639	gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
				640
				641	if (alloc_required) {
				642	qa = gfs2_qadata_get(ip);
				643	if (!qa) {
				644	error = -ENOMEM;
				645	goto out_unlock;
				646	}
				647
				648	error = gfs2_quota_lock_check(ip);
				649	if (error)
				650	goto out_alloc_put;
				651
				652	error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
				653	if (error)
				654	goto out_qunlock;
				655	}
				656
				657	rblocks = RES_DINODE + ind_blocks;
				658	if (gfs2_is_jdata(ip))
				659	rblocks += data_blocks ? data_blocks : 1;
				660	if (ind_blocks \|\| data_blocks)
				661	rblocks += RES_STATFS + RES_QUOTA;
				662	if (&ip->i_inode == sdp->sd_rindex)
				663	rblocks += 2 * RES_STATFS;
				664	if (alloc_required)
				665	rblocks += gfs2_rg_blocks(ip);
				666
				667	error = gfs2_trans_begin(sdp, rblocks,
				668	PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
				669	if (error)
				670	goto out_trans_fail;
				671
				672	error = -ENOMEM;
				673	flags \|= AOP_FLAG_NOFS;
				674	page = grab_cache_page_write_begin(mapping, index, flags);
				675	*pagep = page;
				676	if (unlikely(!page))
				677	goto out_endtrans;
				678
				679	if (gfs2_is_stuffed(ip)) {
				680	error = 0;
				681	if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
				682	error = gfs2_unstuff_dinode(ip, page);
				683	if (error == 0)
				684	goto prepare_write;
				685	} else if (!PageUptodate(page)) {
				686	error = stuffed_readpage(ip, page);
				687	}
				688	goto out;
				689	}
				690
				691	prepare_write:
				692	error = __block_write_begin(page, from, len, gfs2_block_map);
				693	out:
				694	if (error == 0)
				695	return 0;
				696
				697	unlock_page(page);
				698	page_cache_release(page);
				699
				700	gfs2_trans_end(sdp);
				701	if (pos + len > ip->i_inode.i_size)
				702	gfs2_trim_blocks(&ip->i_inode);
				703	goto out_trans_fail;
				704
				705	out_endtrans:
				706	gfs2_trans_end(sdp);
				707	out_trans_fail:
				708	if (alloc_required) {
				709	gfs2_inplace_release(ip);
				710	out_qunlock:
				711	gfs2_quota_unlock(ip);
				712	out_alloc_put:
				713	gfs2_qadata_put(ip);
				714	}
				715	out_unlock:
				716	if (&ip->i_inode == sdp->sd_rindex) {
				717	gfs2_glock_dq(&m_ip->i_gh);
				718	gfs2_holder_uninit(&m_ip->i_gh);
				719	}
				720	gfs2_glock_dq(&ip->i_gh);
				721	out_uninit:
				722	gfs2_holder_uninit(&ip->i_gh);
				723	return error;
				724	}
				725
				726	/**
				727	* adjust_fs_space - Adjusts the free space available due to gfs2_grow
				728	* @inode: the rindex inode
				729	*/
				730	static void adjust_fs_space(struct inode *inode)
				731	{
				732	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
				733	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
				734	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
				735	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
				736	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
				737	struct buffer_head m_bh, l_bh;
				738	u64 fs_total, new_free;
				739
				740	/* Total up the file system space, according to the latest rindex. */
				741	fs_total = gfs2_ri_total(sdp);
				742	if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
				743	return;
				744
				745	spin_lock(&sdp->sd_statfs_spin);
				746	gfs2_statfs_change_in(m_sc, m_bh->b_data +
				747	sizeof(struct gfs2_dinode));
				748	if (fs_total > (m_sc->sc_total + l_sc->sc_total))
				749	new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
				750	else
				751	new_free = 0;
				752	spin_unlock(&sdp->sd_statfs_spin);
				753	fs_warn(sdp, "File system extended by %llu blocks.\n",
				754	(unsigned long long)new_free);
				755	gfs2_statfs_change(sdp, new_free, new_free, 0);
				756
				757	if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
				758	goto out;
				759	update_statfs(sdp, m_bh, l_bh);
				760	brelse(l_bh);
				761	out:
				762	brelse(m_bh);
				763	}
				764
				765	/**
				766	* gfs2_stuffed_write_end - Write end for stuffed files
				767	* @inode: The inode
				768	* @dibh: The buffer_head containing the on-disk inode
				769	* @pos: The file position
				770	* @len: The length of the write
				771	* @copied: How much was actually copied by the VFS
				772	* @page: The page
				773	*
				774	* This copies the data from the page into the inode block after
				775	* the inode data structure itself.
				776	*
				777	* Returns: errno
				778	*/
				779	static int gfs2_stuffed_write_end(struct inode inode, struct buffer_head dibh,
				780	loff_t pos, unsigned len, unsigned copied,
				781	struct page *page)
				782	{
				783	struct gfs2_inode *ip = GFS2_I(inode);
				784	struct gfs2_sbd *sdp = GFS2_SB(inode);
				785	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
				786	u64 to = pos + copied;
				787	void *kaddr;
				788	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
				789
				790	BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
				791	kaddr = kmap_atomic(page);
				792	memcpy(buf + pos, kaddr + pos, copied);
				793	memset(kaddr + pos + copied, 0, len - copied);
				794	flush_dcache_page(page);
				795	kunmap_atomic(kaddr);
				796
				797	if (!PageUptodate(page))
				798	SetPageUptodate(page);
				799	unlock_page(page);
				800	page_cache_release(page);
				801
				802	if (copied) {
				803	if (inode->i_size < to)
				804	i_size_write(inode, to);
				805	mark_inode_dirty(inode);
				806	}
				807
				808	if (inode == sdp->sd_rindex) {
				809	adjust_fs_space(inode);
				810	sdp->sd_rindex_uptodate = 0;
				811	}
				812
				813	brelse(dibh);
				814	gfs2_trans_end(sdp);
				815	if (inode == sdp->sd_rindex) {
				816	gfs2_glock_dq(&m_ip->i_gh);
				817	gfs2_holder_uninit(&m_ip->i_gh);
				818	}
				819	gfs2_glock_dq(&ip->i_gh);
				820	gfs2_holder_uninit(&ip->i_gh);
				821	return copied;
				822	}
				823
				824	/**
				825	* gfs2_write_end
				826	* @file: The file to write to
				827	* @mapping: The address space to write to
				828	* @pos: The file position
				829	* @len: The length of the data
				830	* @copied:
				831	* @page: The page that has been written
				832	* @fsdata: The fsdata (unused in GFS2)
				833	*
				834	* The main write_end function for GFS2. We have a separate one for
				835	* stuffed files as they are slightly different, otherwise we just
				836	* put our locking around the VFS provided functions.
				837	*
				838	* Returns: errno
				839	*/
				840
				841	static int gfs2_write_end(struct file file, struct address_space mapping,
				842	loff_t pos, unsigned len, unsigned copied,
				843	struct page page, void fsdata)
				844	{
				845	struct inode *inode = page->mapping->host;
				846	struct gfs2_inode *ip = GFS2_I(inode);
				847	struct gfs2_sbd *sdp = GFS2_SB(inode);
				848	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
				849	struct buffer_head *dibh;
				850	struct gfs2_qadata *qa = ip->i_qadata;
				851	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
				852	unsigned int to = from + len;
				853	int ret;
				854
				855	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
				856
				857	ret = gfs2_meta_inode_buffer(ip, &dibh);
				858	if (unlikely(ret)) {
				859	unlock_page(page);
				860	page_cache_release(page);
				861	goto failed;
				862	}
				863
				864	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
				865
				866	if (gfs2_is_stuffed(ip))
				867	return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
				868
				869	if (!gfs2_is_writeback(ip))
				870	gfs2_page_add_databufs(ip, page, from, to);
				871
				872	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
				873
				874	if (inode == sdp->sd_rindex) {
				875	adjust_fs_space(inode);
				876	sdp->sd_rindex_uptodate = 0;
				877	}
				878
				879	brelse(dibh);
				880	failed:
				881	gfs2_trans_end(sdp);
				882	if (ip->i_res)
				883	gfs2_inplace_release(ip);
				884	if (qa) {
				885	gfs2_quota_unlock(ip);
				886	gfs2_qadata_put(ip);
				887	}
				888	if (inode == sdp->sd_rindex) {
				889	gfs2_glock_dq(&m_ip->i_gh);
				890	gfs2_holder_uninit(&m_ip->i_gh);
				891	}
				892	gfs2_glock_dq(&ip->i_gh);
				893	gfs2_holder_uninit(&ip->i_gh);
				894	return ret;
				895	}
				896
				897	/**
				898	* gfs2_set_page_dirty - Page dirtying function
				899	* @page: The page to dirty
				900	*
				901	* Returns: 1 if it dirtyed the page, or 0 otherwise
				902	*/
				903
				904	static int gfs2_set_page_dirty(struct page *page)
				905	{
				906	SetPageChecked(page);
				907	return __set_page_dirty_buffers(page);
				908	}
				909
				910	/**
				911	* gfs2_bmap - Block map function
				912	* @mapping: Address space info
				913	* @lblock: The block to map
				914	*
				915	* Returns: The disk address for the block or 0 on hole or error
				916	*/
				917
				918	static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
				919	{
				920	struct gfs2_inode *ip = GFS2_I(mapping->host);
				921	struct gfs2_holder i_gh;
				922	sector_t dblock = 0;
				923	int error;
				924
				925	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
				926	if (error)
				927	return 0;
				928
				929	if (!gfs2_is_stuffed(ip))
				930	dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
				931
				932	gfs2_glock_dq_uninit(&i_gh);
				933
				934	return dblock;
				935	}
				936
				937	static void gfs2_discard(struct gfs2_sbd sdp, struct buffer_head bh)
				938	{
				939	struct gfs2_bufdata *bd;
				940
				941	lock_buffer(bh);
				942	gfs2_log_lock(sdp);
				943	clear_buffer_dirty(bh);
				944	bd = bh->b_private;
				945	if (bd) {
				946	if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
				947	list_del_init(&bd->bd_le.le_list);
				948	else
				949	gfs2_remove_from_journal(bh, current->journal_info, 0);
				950	}
				951	bh->b_bdev = NULL;
				952	clear_buffer_mapped(bh);
				953	clear_buffer_req(bh);
				954	clear_buffer_new(bh);
				955	gfs2_log_unlock(sdp);
				956	unlock_buffer(bh);
				957	}
				958
				959	static void gfs2_invalidatepage(struct page *page, unsigned long offset)
				960	{
				961	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
				962	struct buffer_head bh, head;
				963	unsigned long pos = 0;
				964
				965	BUG_ON(!PageLocked(page));
				966	if (offset == 0)
				967	ClearPageChecked(page);
				968	if (!page_has_buffers(page))
				969	goto out;
				970
				971	bh = head = page_buffers(page);
				972	do {
				973	if (offset <= pos)
				974	gfs2_discard(sdp, bh);
				975	pos += bh->b_size;
				976	bh = bh->b_this_page;
				977	} while (bh != head);
				978	out:
				979	if (offset == 0)
				980	try_to_release_page(page, 0);
				981	}
				982
				983	/**
				984	* gfs2_ok_for_dio - check that dio is valid on this file
				985	* @ip: The inode
				986	* @rw: READ or WRITE
				987	* @offset: The offset at which we are reading or writing
				988	*
				989	* Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
				990	* 1 (to accept the i/o request)
				991	*/
				992	static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
				993	{
				994	/*
				995	* Should we return an error here? I can't see that O_DIRECT for
				996	* a stuffed file makes any sense. For now we'll silently fall
				997	* back to buffered I/O
				998	*/
				999	if (gfs2_is_stuffed(ip))
				1000	return 0;
				1001
				1002	if (offset >= i_size_read(&ip->i_inode))
				1003	return 0;
				1004	return 1;
				1005	}
				1006
				1007
				1008
				1009	static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
				1010	const struct iovec *iov, loff_t offset,
				1011	unsigned long nr_segs)
				1012	{
				1013	struct file *file = iocb->ki_filp;
				1014	struct inode *inode = file->f_mapping->host;
				1015	struct address_space *mapping = inode->i_mapping;
				1016	struct gfs2_inode *ip = GFS2_I(inode);
				1017	struct gfs2_holder gh;
				1018	int rv;
				1019
				1020	/*
				1021	* Deferred lock, even if its a write, since we do no allocation
				1022	* on this path. All we need change is atime, and this lock mode
				1023	* ensures that other nodes have flushed their buffered read caches
				1024	* (i.e. their page cache entries for this inode). We do not,
				1025	* unfortunately have the option of only flushing a range like
				1026	* the VFS does.
				1027	*/
				1028	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
				1029	rv = gfs2_glock_nq(&gh);
				1030	if (rv)
				1031	return rv;
				1032	rv = gfs2_ok_for_dio(ip, rw, offset);
				1033	if (rv != 1)
				1034	goto out; /* dio not valid, fall back to buffered i/o */
				1035
				1036	/*
				1037	* Now since we are holding a deferred (CW) lock at this point, you
				1038	* might be wondering why this is ever needed. There is a case however
				1039	* where we've granted a deferred local lock against a cached exclusive
				1040	* glock. That is ok provided all granted local locks are deferred, but
				1041	* it also means that it is possible to encounter pages which are
				1042	* cached and possibly also mapped. So here we check for that and sort
				1043	* them out ahead of the dio. The glock state machine will take care of
				1044	* everything else.
				1045	*
				1046	* If in fact the cached glock state (gl->gl_state) is deferred (CW) in
				1047	* the first place, mapping->nr_pages will always be zero.
				1048	*/
				1049	if (mapping->nrpages) {
				1050	loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
				1051	loff_t len = iov_length(iov, nr_segs);
				1052	loff_t end = PAGE_ALIGN(offset + len) - 1;
				1053
				1054	rv = 0;
				1055	if (len == 0)
				1056	goto out;
				1057	if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
				1058	unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
				1059	rv = filemap_write_and_wait_range(mapping, lstart, end);
				1060	if (rv)
				1061	return rv;
				1062	truncate_inode_pages_range(mapping, lstart, end);
				1063	}
				1064
				1065	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
				1066	offset, nr_segs, gfs2_get_block_direct,
				1067	NULL, NULL, 0);
				1068	out:
				1069	gfs2_glock_dq_m(1, &gh);
				1070	gfs2_holder_uninit(&gh);
				1071	return rv;
				1072	}
				1073
				1074	/**
				1075	* gfs2_releasepage - free the metadata associated with a page
				1076	* @page: the page that's being released
				1077	* @gfp_mask: passed from Linux VFS, ignored by us
				1078	*
				1079	* Call try_to_free_buffers() if the buffers in this page can be
				1080	* released.
				1081	*
				1082	* Returns: 0
				1083	*/
				1084
				1085	int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
				1086	{
				1087	struct address_space *mapping = page->mapping;
				1088	struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
				1089	struct buffer_head bh, head;
				1090	struct gfs2_bufdata *bd;
				1091
				1092	if (!page_has_buffers(page))
				1093	return 0;
				1094
				1095	gfs2_log_lock(sdp);
				1096	spin_lock(&sdp->sd_ail_lock);
				1097	head = bh = page_buffers(page);
				1098	do {
				1099	if (atomic_read(&bh->b_count))
				1100	goto cannot_release;
				1101	bd = bh->b_private;
				1102	if (bd && bd->bd_ail)
				1103	goto cannot_release;
				1104	if (buffer_pinned(bh) \|\| buffer_dirty(bh))
				1105	goto not_possible;
				1106	bh = bh->b_this_page;
				1107	} while(bh != head);
				1108	spin_unlock(&sdp->sd_ail_lock);
				1109	gfs2_log_unlock(sdp);
				1110
				1111	head = bh = page_buffers(page);
				1112	do {
				1113	gfs2_log_lock(sdp);
				1114	bd = bh->b_private;
				1115	if (bd) {
				1116	gfs2_assert_warn(sdp, bd->bd_bh == bh);
				1117	gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
				1118	if (!list_empty(&bd->bd_le.le_list)) {
				1119	if (!buffer_pinned(bh))
				1120	list_del_init(&bd->bd_le.le_list);
				1121	else
				1122	bd = NULL;
				1123	}
				1124	if (bd)
				1125	bd->bd_bh = NULL;
				1126	bh->b_private = NULL;
				1127	}
				1128	gfs2_log_unlock(sdp);
				1129	if (bd)
				1130	kmem_cache_free(gfs2_bufdata_cachep, bd);
				1131
				1132	bh = bh->b_this_page;
				1133	} while (bh != head);
				1134
				1135	return try_to_free_buffers(page);
				1136
				1137	not_possible: /* Should never happen */
				1138	WARN_ON(buffer_dirty(bh));
				1139	WARN_ON(buffer_pinned(bh));
				1140	cannot_release:
				1141	spin_unlock(&sdp->sd_ail_lock);
				1142	gfs2_log_unlock(sdp);
				1143	return 0;
				1144	}
				1145
				1146	static const struct address_space_operations gfs2_writeback_aops = {
				1147	.writepage = gfs2_writeback_writepage,
				1148	.writepages = gfs2_writeback_writepages,
				1149	.readpage = gfs2_readpage,
				1150	.readpages = gfs2_readpages,
				1151	.write_begin = gfs2_write_begin,
				1152	.write_end = gfs2_write_end,
				1153	.bmap = gfs2_bmap,
				1154	.invalidatepage = gfs2_invalidatepage,
				1155	.releasepage = gfs2_releasepage,
				1156	.direct_IO = gfs2_direct_IO,
				1157	.migratepage = buffer_migrate_page,
				1158	.is_partially_uptodate = block_is_partially_uptodate,
				1159	.error_remove_page = generic_error_remove_page,
				1160	};
				1161
				1162	static const struct address_space_operations gfs2_ordered_aops = {
				1163	.writepage = gfs2_ordered_writepage,
				1164	.readpage = gfs2_readpage,
				1165	.readpages = gfs2_readpages,
				1166	.write_begin = gfs2_write_begin,
				1167	.write_end = gfs2_write_end,
				1168	.set_page_dirty = gfs2_set_page_dirty,
				1169	.bmap = gfs2_bmap,
				1170	.invalidatepage = gfs2_invalidatepage,
				1171	.releasepage = gfs2_releasepage,
				1172	.direct_IO = gfs2_direct_IO,
				1173	.migratepage = buffer_migrate_page,
				1174	.is_partially_uptodate = block_is_partially_uptodate,
				1175	.error_remove_page = generic_error_remove_page,
				1176	};
				1177
				1178	static const struct address_space_operations gfs2_jdata_aops = {
				1179	.writepage = gfs2_jdata_writepage,
				1180	.writepages = gfs2_jdata_writepages,
				1181	.readpage = gfs2_readpage,
				1182	.readpages = gfs2_readpages,
				1183	.write_begin = gfs2_write_begin,
				1184	.write_end = gfs2_write_end,
				1185	.set_page_dirty = gfs2_set_page_dirty,
				1186	.bmap = gfs2_bmap,
				1187	.invalidatepage = gfs2_invalidatepage,
				1188	.releasepage = gfs2_releasepage,
				1189	.is_partially_uptodate = block_is_partially_uptodate,
				1190	.error_remove_page = generic_error_remove_page,
				1191	};
				1192
				1193	void gfs2_set_aops(struct inode *inode)
				1194	{
				1195	struct gfs2_inode *ip = GFS2_I(inode);
				1196
				1197	if (gfs2_is_writeback(ip))
				1198	inode->i_mapping->a_ops = &gfs2_writeback_aops;
				1199	else if (gfs2_is_ordered(ip))
				1200	inode->i_mapping->a_ops = &gfs2_ordered_aops;
				1201	else if (gfs2_is_jdata(ip))
				1202	inode->i_mapping->a_ops = &gfs2_jdata_aops;
				1203	else
				1204	BUG();
				1205	}
				1206