Blame - ap/os/linux/linux-3.4.x/fs/ntfs/aops.c - T106_DC

blob: f5d45653bd273806e84b96c156da9415cf58955f [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/**
				2	* aops.c - NTFS kernel address space operations and page cache handling.
				3	* Part of the Linux-NTFS project.
				4	*
				5	* Copyright (c) 2001-2007 Anton Altaparmakov
				6	* Copyright (c) 2002 Richard Russon
				7	*
				8	* This program/include file is free software; you can redistribute it and/or
				9	* modify it under the terms of the GNU General Public License as published
				10	* by the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*
				13	* This program/include file is distributed in the hope that it will be
				14	* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
				15	* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	* GNU General Public License for more details.
				17	*
				18	* You should have received a copy of the GNU General Public License
				19	* along with this program (in the main directory of the Linux-NTFS
				20	* distribution in the file COPYING); if not, write to the Free Software
				21	* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				22	*/
				23
				24	#include <linux/errno.h>
				25	#include <linux/fs.h>
				26	#include <linux/gfp.h>
				27	#include <linux/mm.h>
				28	#include <linux/pagemap.h>
				29	#include <linux/swap.h>
				30	#include <linux/buffer_head.h>
				31	#include <linux/writeback.h>
				32	#include <linux/bit_spinlock.h>
				33
				34	#include "aops.h"
				35	#include "attrib.h"
				36	#include "debug.h"
				37	#include "inode.h"
				38	#include "mft.h"
				39	#include "runlist.h"
				40	#include "types.h"
				41	#include "ntfs.h"
				42
				43	/**
				44	* ntfs_end_buffer_async_read - async io completion for reading attributes
				45	* @bh: buffer head on which io is completed
				46	* @uptodate: whether @bh is now uptodate or not
				47	*
				48	* Asynchronous I/O completion handler for reading pages belonging to the
				49	* attribute address space of an inode. The inodes can either be files or
				50	* directories or they can be fake inodes describing some attribute.
				51	*
				52	* If NInoMstProtected(), perform the post read mst fixups when all IO on the
				53	* page has been completed and mark the page uptodate or set the error bit on
				54	* the page. To determine the size of the records that need fixing up, we
				55	* cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
				56	* record size, and index_block_size_bits, to the log(base 2) of the ntfs
				57	* record size.
				58	*/
				59	static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
				60	{
				61	unsigned long flags;
				62	struct buffer_head first, tmp;
				63	struct page *page;
				64	struct inode *vi;
				65	ntfs_inode *ni;
				66	int page_uptodate = 1;
				67
				68	page = bh->b_page;
				69	vi = page->mapping->host;
				70	ni = NTFS_I(vi);
				71
				72	if (likely(uptodate)) {
				73	loff_t i_size;
				74	s64 file_ofs, init_size;
				75
				76	set_buffer_uptodate(bh);
				77
				78	file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
				79	bh_offset(bh);
				80	read_lock_irqsave(&ni->size_lock, flags);
				81	init_size = ni->initialized_size;
				82	i_size = i_size_read(vi);
				83	read_unlock_irqrestore(&ni->size_lock, flags);
				84	if (unlikely(init_size > i_size)) {
				85	/* Race with shrinking truncate. */
				86	init_size = i_size;
				87	}
				88	/* Check for the current buffer head overflowing. */
				89	if (unlikely(file_ofs + bh->b_size > init_size)) {
				90	int ofs;
				91	void *kaddr;
				92
				93	ofs = 0;
				94	if (file_ofs < init_size)
				95	ofs = init_size - file_ofs;
				96	local_irq_save(flags);
				97	kaddr = kmap_atomic(page);
				98	memset(kaddr + bh_offset(bh) + ofs, 0,
				99	bh->b_size - ofs);
				100	flush_dcache_page(page);
				101	kunmap_atomic(kaddr);
				102	local_irq_restore(flags);
				103	}
				104	} else {
				105	clear_buffer_uptodate(bh);
				106	SetPageError(page);
				107	ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
				108	"0x%llx.", (unsigned long long)bh->b_blocknr);
				109	}
				110	first = page_buffers(page);
				111	flags = bh_uptodate_lock_irqsave(first);
				112	clear_buffer_async_read(bh);
				113	unlock_buffer(bh);
				114	tmp = bh;
				115	do {
				116	if (!buffer_uptodate(tmp))
				117	page_uptodate = 0;
				118	if (buffer_async_read(tmp)) {
				119	if (likely(buffer_locked(tmp)))
				120	goto still_busy;
				121	/* Async buffers must be locked. */
				122	BUG();
				123	}
				124	tmp = tmp->b_this_page;
				125	} while (tmp != bh);
				126	bh_uptodate_unlock_irqrestore(first, flags);
				127	/*
				128	* If none of the buffers had errors then we can set the page uptodate,
				129	* but we first have to perform the post read mst fixups, if the
				130	* attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
				131	* Note we ignore fixup errors as those are detected when
				132	* map_mft_record() is called which gives us per record granularity
				133	* rather than per page granularity.
				134	*/
				135	if (!NInoMstProtected(ni)) {
				136	if (likely(page_uptodate && !PageError(page)))
				137	SetPageUptodate(page);
				138	} else {
				139	u8 *kaddr;
				140	unsigned int i, recs;
				141	u32 rec_size;
				142
				143	rec_size = ni->itype.index.block_size;
				144	recs = PAGE_CACHE_SIZE / rec_size;
				145	/* Should have been verified before we got here... */
				146	BUG_ON(!recs);
				147	local_irq_save_nort(flags);
				148	kaddr = kmap_atomic(page);
				149	for (i = 0; i < recs; i++)
				150	post_read_mst_fixup((NTFS_RECORD*)(kaddr +
				151	i * rec_size), rec_size);
				152	kunmap_atomic(kaddr);
				153	local_irq_restore_nort(flags);
				154	flush_dcache_page(page);
				155	if (likely(page_uptodate && !PageError(page)))
				156	SetPageUptodate(page);
				157	}
				158	unlock_page(page);
				159	return;
				160	still_busy:
				161	bh_uptodate_unlock_irqrestore(first, flags);
				162	}
				163
				164	/**
				165	* ntfs_read_block - fill a @page of an address space with data
				166	* @page: page cache page to fill with data
				167	*
				168	* Fill the page @page of the address space belonging to the @page->host inode.
				169	* We read each buffer asynchronously and when all buffers are read in, our io
				170	* completion handler ntfs_end_buffer_read_async(), if required, automatically
				171	* applies the mst fixups to the page before finally marking it uptodate and
				172	* unlocking it.
				173	*
				174	* We only enforce allocated_size limit because i_size is checked for in
				175	* generic_file_read().
				176	*
				177	* Return 0 on success and -errno on error.
				178	*
				179	* Contains an adapted version of fs/buffer.c::block_read_full_page().
				180	*/
				181	static int ntfs_read_block(struct page *page)
				182	{
				183	loff_t i_size;
				184	VCN vcn;
				185	LCN lcn;
				186	s64 init_size;
				187	struct inode *vi;
				188	ntfs_inode *ni;
				189	ntfs_volume *vol;
				190	runlist_element *rl;
				191	struct buffer_head bh, head, *arr[MAX_BUF_PER_PAGE];
				192	sector_t iblock, lblock, zblock;
				193	unsigned long flags;
				194	unsigned int blocksize, vcn_ofs;
				195	int i, nr;
				196	unsigned char blocksize_bits;
				197
				198	vi = page->mapping->host;
				199	ni = NTFS_I(vi);
				200	vol = ni->vol;
				201
				202	/* $MFT/$DATA must have its complete runlist in memory at all times. */
				203	BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
				204
				205	blocksize = vol->sb->s_blocksize;
				206	blocksize_bits = vol->sb->s_blocksize_bits;
				207
				208	if (!page_has_buffers(page)) {
				209	create_empty_buffers(page, blocksize, 0);
				210	if (unlikely(!page_has_buffers(page))) {
				211	unlock_page(page);
				212	return -ENOMEM;
				213	}
				214	}
				215	bh = head = page_buffers(page);
				216	BUG_ON(!bh);
				217
				218	/*
				219	* We may be racing with truncate. To avoid some of the problems we
				220	* now take a snapshot of the various sizes and use those for the whole
				221	* of the function. In case of an extending truncate it just means we
				222	* may leave some buffers unmapped which are now allocated. This is
				223	* not a problem since these buffers will just get mapped when a write
				224	* occurs. In case of a shrinking truncate, we will detect this later
				225	* on due to the runlist being incomplete and if the page is being
				226	* fully truncated, truncate will throw it away as soon as we unlock
				227	* it so no need to worry what we do with it.
				228	*/
				229	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				230	read_lock_irqsave(&ni->size_lock, flags);
				231	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
				232	init_size = ni->initialized_size;
				233	i_size = i_size_read(vi);
				234	read_unlock_irqrestore(&ni->size_lock, flags);
				235	if (unlikely(init_size > i_size)) {
				236	/* Race with shrinking truncate. */
				237	init_size = i_size;
				238	}
				239	zblock = (init_size + blocksize - 1) >> blocksize_bits;
				240
				241	/* Loop through all the buffers in the page. */
				242	rl = NULL;
				243	nr = i = 0;
				244	do {
				245	int err = 0;
				246
				247	if (unlikely(buffer_uptodate(bh)))
				248	continue;
				249	if (unlikely(buffer_mapped(bh))) {
				250	arr[nr++] = bh;
				251	continue;
				252	}
				253	bh->b_bdev = vol->sb->s_bdev;
				254	/* Is the block within the allowed limits? */
				255	if (iblock < lblock) {
				256	bool is_retry = false;
				257
				258	/* Convert iblock into corresponding vcn and offset. */
				259	vcn = (VCN)iblock << blocksize_bits >>
				260	vol->cluster_size_bits;
				261	vcn_ofs = ((VCN)iblock << blocksize_bits) &
				262	vol->cluster_size_mask;
				263	if (!rl) {
				264	lock_retry_remap:
				265	down_read(&ni->runlist.lock);
				266	rl = ni->runlist.rl;
				267	}
				268	if (likely(rl != NULL)) {
				269	/* Seek to element containing target vcn. */
				270	while (rl->length && rl[1].vcn <= vcn)
				271	rl++;
				272	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				273	} else
				274	lcn = LCN_RL_NOT_MAPPED;
				275	/* Successful remap. */
				276	if (lcn >= 0) {
				277	/* Setup buffer head to correct block. */
				278	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				279	+ vcn_ofs) >> blocksize_bits;
				280	set_buffer_mapped(bh);
				281	/* Only read initialized data blocks. */
				282	if (iblock < zblock) {
				283	arr[nr++] = bh;
				284	continue;
				285	}
				286	/* Fully non-initialized data block, zero it. */
				287	goto handle_zblock;
				288	}
				289	/* It is a hole, need to zero it. */
				290	if (lcn == LCN_HOLE)
				291	goto handle_hole;
				292	/* If first try and runlist unmapped, map and retry. */
				293	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				294	is_retry = true;
				295	/*
				296	* Attempt to map runlist, dropping lock for
				297	* the duration.
				298	*/
				299	up_read(&ni->runlist.lock);
				300	err = ntfs_map_runlist(ni, vcn);
				301	if (likely(!err))
				302	goto lock_retry_remap;
				303	rl = NULL;
				304	} else if (!rl)
				305	up_read(&ni->runlist.lock);
				306	/*
				307	* If buffer is outside the runlist, treat it as a
				308	* hole. This can happen due to concurrent truncate
				309	* for example.
				310	*/
				311	if (err == -ENOENT \|\| lcn == LCN_ENOENT) {
				312	err = 0;
				313	goto handle_hole;
				314	}
				315	/* Hard error, zero out region. */
				316	if (!err)
				317	err = -EIO;
				318	bh->b_blocknr = -1;
				319	SetPageError(page);
				320	ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
				321	"attribute type 0x%x, vcn 0x%llx, "
				322	"offset 0x%x because its location on "
				323	"disk could not be determined%s "
				324	"(error code %i).", ni->mft_no,
				325	ni->type, (unsigned long long)vcn,
				326	vcn_ofs, is_retry ? " even after "
				327	"retrying" : "", err);
				328	}
				329	/*
				330	* Either iblock was outside lblock limits or
				331	* ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
				332	* of the page and set the buffer uptodate.
				333	*/
				334	handle_hole:
				335	bh->b_blocknr = -1UL;
				336	clear_buffer_mapped(bh);
				337	handle_zblock:
				338	zero_user(page, i * blocksize, blocksize);
				339	if (likely(!err))
				340	set_buffer_uptodate(bh);
				341	} while (i++, iblock++, (bh = bh->b_this_page) != head);
				342
				343	/* Release the lock if we took it. */
				344	if (rl)
				345	up_read(&ni->runlist.lock);
				346
				347	/* Check we have at least one buffer ready for i/o. */
				348	if (nr) {
				349	struct buffer_head *tbh;
				350
				351	/* Lock the buffers. */
				352	for (i = 0; i < nr; i++) {
				353	tbh = arr[i];
				354	lock_buffer(tbh);
				355	tbh->b_end_io = ntfs_end_buffer_async_read;
				356	set_buffer_async_read(tbh);
				357	}
				358	/* Finally, start i/o on the buffers. */
				359	for (i = 0; i < nr; i++) {
				360	tbh = arr[i];
				361	if (likely(!buffer_uptodate(tbh)))
				362	submit_bh(READ, tbh);
				363	else
				364	ntfs_end_buffer_async_read(tbh, 1);
				365	}
				366	return 0;
				367	}
				368	/* No i/o was scheduled on any of the buffers. */
				369	if (likely(!PageError(page)))
				370	SetPageUptodate(page);
				371	else /* Signal synchronous i/o error. */
				372	nr = -EIO;
				373	unlock_page(page);
				374	return nr;
				375	}
				376
				377	/**
				378	* ntfs_readpage - fill a @page of a @file with data from the device
				379	* @file: open file to which the page @page belongs or NULL
				380	* @page: page cache page to fill with data
				381	*
				382	* For non-resident attributes, ntfs_readpage() fills the @page of the open
				383	* file @file by calling the ntfs version of the generic block_read_full_page()
				384	* function, ntfs_read_block(), which in turn creates and reads in the buffers
				385	* associated with the page asynchronously.
				386	*
				387	* For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
				388	* data from the mft record (which at this stage is most likely in memory) and
				389	* fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
				390	* even if the mft record is not cached at this point in time, we need to wait
				391	* for it to be read in before we can do the copy.
				392	*
				393	* Return 0 on success and -errno on error.
				394	*/
				395	static int ntfs_readpage(struct file file, struct page page)
				396	{
				397	loff_t i_size;
				398	struct inode *vi;
				399	ntfs_inode ni, base_ni;
				400	u8 *addr;
				401	ntfs_attr_search_ctx *ctx;
				402	MFT_RECORD *mrec;
				403	unsigned long flags;
				404	u32 attr_len;
				405	int err = 0;
				406
				407	retry_readpage:
				408	BUG_ON(!PageLocked(page));
				409	vi = page->mapping->host;
				410	i_size = i_size_read(vi);
				411	/* Is the page fully outside i_size? (truncate in progress) */
				412	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
				413	PAGE_CACHE_SHIFT)) {
				414	zero_user(page, 0, PAGE_CACHE_SIZE);
				415	ntfs_debug("Read outside i_size - truncated?");
				416	goto done;
				417	}
				418	/*
				419	* This can potentially happen because we clear PageUptodate() during
				420	* ntfs_writepage() of MstProtected() attributes.
				421	*/
				422	if (PageUptodate(page)) {
				423	unlock_page(page);
				424	return 0;
				425	}
				426	ni = NTFS_I(vi);
				427	/*
				428	* Only $DATA attributes can be encrypted and only unnamed $DATA
				429	* attributes can be compressed. Index root can have the flags set but
				430	* this means to create compressed/encrypted files, not that the
				431	* attribute is compressed/encrypted. Note we need to check for
				432	* AT_INDEX_ALLOCATION since this is the type of both directory and
				433	* index inodes.
				434	*/
				435	if (ni->type != AT_INDEX_ALLOCATION) {
				436	/* If attribute is encrypted, deny access, just like NT4. */
				437	if (NInoEncrypted(ni)) {
				438	BUG_ON(ni->type != AT_DATA);
				439	err = -EACCES;
				440	goto err_out;
				441	}
				442	/* Compressed data streams are handled in compress.c. */
				443	if (NInoNonResident(ni) && NInoCompressed(ni)) {
				444	BUG_ON(ni->type != AT_DATA);
				445	BUG_ON(ni->name_len);
				446	return ntfs_read_compressed_block(page);
				447	}
				448	}
				449	/* NInoNonResident() == NInoIndexAllocPresent() */
				450	if (NInoNonResident(ni)) {
				451	/* Normal, non-resident data stream. */
				452	return ntfs_read_block(page);
				453	}
				454	/*
				455	* Attribute is resident, implying it is not compressed or encrypted.
				456	* This also means the attribute is smaller than an mft record and
				457	* hence smaller than a page, so can simply zero out any pages with
				458	* index above 0. Note the attribute can actually be marked compressed
				459	* but if it is resident the actual data is not compressed so we are
				460	* ok to ignore the compressed flag here.
				461	*/
				462	if (unlikely(page->index > 0)) {
				463	zero_user(page, 0, PAGE_CACHE_SIZE);
				464	goto done;
				465	}
				466	if (!NInoAttr(ni))
				467	base_ni = ni;
				468	else
				469	base_ni = ni->ext.base_ntfs_ino;
				470	/* Map, pin, and lock the mft record. */
				471	mrec = map_mft_record(base_ni);
				472	if (IS_ERR(mrec)) {
				473	err = PTR_ERR(mrec);
				474	goto err_out;
				475	}
				476	/*
				477	* If a parallel write made the attribute non-resident, drop the mft
				478	* record and retry the readpage.
				479	*/
				480	if (unlikely(NInoNonResident(ni))) {
				481	unmap_mft_record(base_ni);
				482	goto retry_readpage;
				483	}
				484	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
				485	if (unlikely(!ctx)) {
				486	err = -ENOMEM;
				487	goto unm_err_out;
				488	}
				489	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				490	CASE_SENSITIVE, 0, NULL, 0, ctx);
				491	if (unlikely(err))
				492	goto put_unm_err_out;
				493	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
				494	read_lock_irqsave(&ni->size_lock, flags);
				495	if (unlikely(attr_len > ni->initialized_size))
				496	attr_len = ni->initialized_size;
				497	i_size = i_size_read(vi);
				498	read_unlock_irqrestore(&ni->size_lock, flags);
				499	if (unlikely(attr_len > i_size)) {
				500	/* Race with shrinking truncate. */
				501	attr_len = i_size;
				502	}
				503	addr = kmap_atomic(page);
				504	/* Copy the data to the page. */
				505	memcpy(addr, (u8*)ctx->attr +
				506	le16_to_cpu(ctx->attr->data.resident.value_offset),
				507	attr_len);
				508	/* Zero the remainder of the page. */
				509	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				510	flush_dcache_page(page);
				511	kunmap_atomic(addr);
				512	put_unm_err_out:
				513	ntfs_attr_put_search_ctx(ctx);
				514	unm_err_out:
				515	unmap_mft_record(base_ni);
				516	done:
				517	SetPageUptodate(page);
				518	err_out:
				519	unlock_page(page);
				520	return err;
				521	}
				522
				523	#ifdef NTFS_RW
				524
				525	/**
				526	* ntfs_write_block - write a @page to the backing store
				527	* @page: page cache page to write out
				528	* @wbc: writeback control structure
				529	*
				530	* This function is for writing pages belonging to non-resident, non-mst
				531	* protected attributes to their backing store.
				532	*
				533	* For a page with buffers, map and write the dirty buffers asynchronously
				534	* under page writeback. For a page without buffers, create buffers for the
				535	* page, then proceed as above.
				536	*
				537	* If a page doesn't have buffers the page dirty state is definitive. If a page
				538	* does have buffers, the page dirty state is just a hint, and the buffer dirty
				539	* state is definitive. (A hint which has rules: dirty buffers against a clean
				540	* page is illegal. Other combinations are legal and need to be handled. In
				541	* particular a dirty page containing clean buffers for example.)
				542	*
				543	* Return 0 on success and -errno on error.
				544	*
				545	* Based on ntfs_read_block() and __block_write_full_page().
				546	*/
				547	static int ntfs_write_block(struct page page, struct writeback_control wbc)
				548	{
				549	VCN vcn;
				550	LCN lcn;
				551	s64 initialized_size;
				552	loff_t i_size;
				553	sector_t block, dblock, iblock;
				554	struct inode *vi;
				555	ntfs_inode *ni;
				556	ntfs_volume *vol;
				557	runlist_element *rl;
				558	struct buffer_head bh, head;
				559	unsigned long flags;
				560	unsigned int blocksize, vcn_ofs;
				561	int err;
				562	bool need_end_writeback;
				563	unsigned char blocksize_bits;
				564
				565	vi = page->mapping->host;
				566	ni = NTFS_I(vi);
				567	vol = ni->vol;
				568
				569	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				570	"0x%lx.", ni->mft_no, ni->type, page->index);
				571
				572	BUG_ON(!NInoNonResident(ni));
				573	BUG_ON(NInoMstProtected(ni));
				574	blocksize = vol->sb->s_blocksize;
				575	blocksize_bits = vol->sb->s_blocksize_bits;
				576	if (!page_has_buffers(page)) {
				577	BUG_ON(!PageUptodate(page));
				578	create_empty_buffers(page, blocksize,
				579	(1 << BH_Uptodate) \| (1 << BH_Dirty));
				580	if (unlikely(!page_has_buffers(page))) {
				581	ntfs_warning(vol->sb, "Error allocating page "
				582	"buffers. Redirtying page so we try "
				583	"again later.");
				584	/*
				585	* Put the page back on mapping->dirty_pages, but leave
				586	* its buffers' dirty state as-is.
				587	*/
				588	redirty_page_for_writepage(wbc, page);
				589	unlock_page(page);
				590	return 0;
				591	}
				592	}
				593	bh = head = page_buffers(page);
				594	BUG_ON(!bh);
				595
				596	/* NOTE: Different naming scheme to ntfs_read_block()! */
				597
				598	/* The first block in the page. */
				599	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				600
				601	read_lock_irqsave(&ni->size_lock, flags);
				602	i_size = i_size_read(vi);
				603	initialized_size = ni->initialized_size;
				604	read_unlock_irqrestore(&ni->size_lock, flags);
				605
				606	/* The first out of bounds block for the data size. */
				607	dblock = (i_size + blocksize - 1) >> blocksize_bits;
				608
				609	/* The last (fully or partially) initialized block. */
				610	iblock = initialized_size >> blocksize_bits;
				611
				612	/*
				613	* Be very careful. We have no exclusion from __set_page_dirty_buffers
				614	* here, and the (potentially unmapped) buffers may become dirty at
				615	* any time. If a buffer becomes dirty here after we've inspected it
				616	* then we just miss that fact, and the page stays dirty.
				617	*
				618	* Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
				619	* handle that here by just cleaning them.
				620	*/
				621
				622	/*
				623	* Loop through all the buffers in the page, mapping all the dirty
				624	* buffers to disk addresses and handling any aliases from the
				625	* underlying block device's mapping.
				626	*/
				627	rl = NULL;
				628	err = 0;
				629	do {
				630	bool is_retry = false;
				631
				632	if (unlikely(block >= dblock)) {
				633	/*
				634	* Mapped buffers outside i_size will occur, because
				635	* this page can be outside i_size when there is a
				636	* truncate in progress. The contents of such buffers
				637	* were zeroed by ntfs_writepage().
				638	*
				639	* FIXME: What about the small race window where
				640	* ntfs_writepage() has not done any clearing because
				641	* the page was within i_size but before we get here,
				642	* vmtruncate() modifies i_size?
				643	*/
				644	clear_buffer_dirty(bh);
				645	set_buffer_uptodate(bh);
				646	continue;
				647	}
				648
				649	/* Clean buffers are not written out, so no need to map them. */
				650	if (!buffer_dirty(bh))
				651	continue;
				652
				653	/* Make sure we have enough initialized size. */
				654	if (unlikely((block >= iblock) &&
				655	(initialized_size < i_size))) {
				656	/*
				657	* If this page is fully outside initialized size, zero
				658	* out all pages between the current initialized size
				659	* and the current page. Just use ntfs_readpage() to do
				660	* the zeroing transparently.
				661	*/
				662	if (block > iblock) {
				663	// TODO:
				664	// For each page do:
				665	// - read_cache_page()
				666	// Again for each page do:
				667	// - wait_on_page_locked()
				668	// - Check (PageUptodate(page) &&
				669	// !PageError(page))
				670	// Update initialized size in the attribute and
				671	// in the inode.
				672	// Again, for each page do:
				673	// __set_page_dirty_buffers();
				674	// page_cache_release()
				675	// We don't need to wait on the writes.
				676	// Update iblock.
				677	}
				678	/*
				679	* The current page straddles initialized size. Zero
				680	* all non-uptodate buffers and set them uptodate (and
				681	* dirty?). Note, there aren't any non-uptodate buffers
				682	* if the page is uptodate.
				683	* FIXME: For an uptodate page, the buffers may need to
				684	* be written out because they were not initialized on
				685	* disk before.
				686	*/
				687	if (!PageUptodate(page)) {
				688	// TODO:
				689	// Zero any non-uptodate buffers up to i_size.
				690	// Set them uptodate and dirty.
				691	}
				692	// TODO:
				693	// Update initialized size in the attribute and in the
				694	// inode (up to i_size).
				695	// Update iblock.
				696	// FIXME: This is inefficient. Try to batch the two
				697	// size changes to happen in one go.
				698	ntfs_error(vol->sb, "Writing beyond initialized size "
				699	"is not supported yet. Sorry.");
				700	err = -EOPNOTSUPP;
				701	break;
				702	// Do NOT set_buffer_new() BUT DO clear buffer range
				703	// outside write request range.
				704	// set_buffer_uptodate() on complete buffers as well as
				705	// set_buffer_dirty().
				706	}
				707
				708	/* No need to map buffers that are already mapped. */
				709	if (buffer_mapped(bh))
				710	continue;
				711
				712	/* Unmapped, dirty buffer. Need to map it. */
				713	bh->b_bdev = vol->sb->s_bdev;
				714
				715	/* Convert block into corresponding vcn and offset. */
				716	vcn = (VCN)block << blocksize_bits;
				717	vcn_ofs = vcn & vol->cluster_size_mask;
				718	vcn >>= vol->cluster_size_bits;
				719	if (!rl) {
				720	lock_retry_remap:
				721	down_read(&ni->runlist.lock);
				722	rl = ni->runlist.rl;
				723	}
				724	if (likely(rl != NULL)) {
				725	/* Seek to element containing target vcn. */
				726	while (rl->length && rl[1].vcn <= vcn)
				727	rl++;
				728	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				729	} else
				730	lcn = LCN_RL_NOT_MAPPED;
				731	/* Successful remap. */
				732	if (lcn >= 0) {
				733	/* Setup buffer head to point to correct block. */
				734	bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
				735	vcn_ofs) >> blocksize_bits;
				736	set_buffer_mapped(bh);
				737	continue;
				738	}
				739	/* It is a hole, need to instantiate it. */
				740	if (lcn == LCN_HOLE) {
				741	u8 *kaddr;
				742	unsigned long bpos, bend;
				743
				744	/* Check if the buffer is zero. */
				745	kaddr = kmap_atomic(page);
				746	bpos = (unsigned long *)(kaddr + bh_offset(bh));
				747	bend = (unsigned long )((u8)bpos + blocksize);
				748	do {
				749	if (unlikely(*bpos))
				750	break;
				751	} while (likely(++bpos < bend));
				752	kunmap_atomic(kaddr);
				753	if (bpos == bend) {
				754	/*
				755	* Buffer is zero and sparse, no need to write
				756	* it.
				757	*/
				758	bh->b_blocknr = -1;
				759	clear_buffer_dirty(bh);
				760	continue;
				761	}
				762	// TODO: Instantiate the hole.
				763	// clear_buffer_new(bh);
				764	// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
				765	ntfs_error(vol->sb, "Writing into sparse regions is "
				766	"not supported yet. Sorry.");
				767	err = -EOPNOTSUPP;
				768	break;
				769	}
				770	/* If first try and runlist unmapped, map and retry. */
				771	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				772	is_retry = true;
				773	/*
				774	* Attempt to map runlist, dropping lock for
				775	* the duration.
				776	*/
				777	up_read(&ni->runlist.lock);
				778	err = ntfs_map_runlist(ni, vcn);
				779	if (likely(!err))
				780	goto lock_retry_remap;
				781	rl = NULL;
				782	} else if (!rl)
				783	up_read(&ni->runlist.lock);
				784	/*
				785	* If buffer is outside the runlist, truncate has cut it out
				786	* of the runlist. Just clean and clear the buffer and set it
				787	* uptodate so it can get discarded by the VM.
				788	*/
				789	if (err == -ENOENT \|\| lcn == LCN_ENOENT) {
				790	bh->b_blocknr = -1;
				791	clear_buffer_dirty(bh);
				792	zero_user(page, bh_offset(bh), blocksize);
				793	set_buffer_uptodate(bh);
				794	err = 0;
				795	continue;
				796	}
				797	/* Failed to map the buffer, even after retrying. */
				798	if (!err)
				799	err = -EIO;
				800	bh->b_blocknr = -1;
				801	ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
				802	"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
				803	"because its location on disk could not be "
				804	"determined%s (error code %i).", ni->mft_no,
				805	ni->type, (unsigned long long)vcn,
				806	vcn_ofs, is_retry ? " even after "
				807	"retrying" : "", err);
				808	break;
				809	} while (block++, (bh = bh->b_this_page) != head);
				810
				811	/* Release the lock if we took it. */
				812	if (rl)
				813	up_read(&ni->runlist.lock);
				814
				815	/* For the error case, need to reset bh to the beginning. */
				816	bh = head;
				817
				818	/* Just an optimization, so ->readpage() is not called later. */
				819	if (unlikely(!PageUptodate(page))) {
				820	int uptodate = 1;
				821	do {
				822	if (!buffer_uptodate(bh)) {
				823	uptodate = 0;
				824	bh = head;
				825	break;
				826	}
				827	} while ((bh = bh->b_this_page) != head);
				828	if (uptodate)
				829	SetPageUptodate(page);
				830	}
				831
				832	/* Setup all mapped, dirty buffers for async write i/o. */
				833	do {
				834	if (buffer_mapped(bh) && buffer_dirty(bh)) {
				835	lock_buffer(bh);
				836	if (test_clear_buffer_dirty(bh)) {
				837	BUG_ON(!buffer_uptodate(bh));
				838	mark_buffer_async_write(bh);
				839	} else
				840	unlock_buffer(bh);
				841	} else if (unlikely(err)) {
				842	/*
				843	* For the error case. The buffer may have been set
				844	* dirty during attachment to a dirty page.
				845	*/
				846	if (err != -ENOMEM)
				847	clear_buffer_dirty(bh);
				848	}
				849	} while ((bh = bh->b_this_page) != head);
				850
				851	if (unlikely(err)) {
				852	// TODO: Remove the -EOPNOTSUPP check later on...
				853	if (unlikely(err == -EOPNOTSUPP))
				854	err = 0;
				855	else if (err == -ENOMEM) {
				856	ntfs_warning(vol->sb, "Error allocating memory. "
				857	"Redirtying page so we try again "
				858	"later.");
				859	/*
				860	* Put the page back on mapping->dirty_pages, but
				861	* leave its buffer's dirty state as-is.
				862	*/
				863	redirty_page_for_writepage(wbc, page);
				864	err = 0;
				865	} else
				866	SetPageError(page);
				867	}
				868
				869	BUG_ON(PageWriteback(page));
				870	set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
				871
				872	/* Submit the prepared buffers for i/o. */
				873	need_end_writeback = true;
				874	do {
				875	struct buffer_head *next = bh->b_this_page;
				876	if (buffer_async_write(bh)) {
				877	submit_bh(WRITE, bh);
				878	need_end_writeback = false;
				879	}
				880	bh = next;
				881	} while (bh != head);
				882	unlock_page(page);
				883
				884	/* If no i/o was started, need to end_page_writeback(). */
				885	if (unlikely(need_end_writeback))
				886	end_page_writeback(page);
				887
				888	ntfs_debug("Done.");
				889	return err;
				890	}
				891
				892	/**
				893	* ntfs_write_mst_block - write a @page to the backing store
				894	* @page: page cache page to write out
				895	* @wbc: writeback control structure
				896	*
				897	* This function is for writing pages belonging to non-resident, mst protected
				898	* attributes to their backing store. The only supported attributes are index
				899	* allocation and $MFT/$DATA. Both directory inodes and index inodes are
				900	* supported for the index allocation case.
				901	*
				902	* The page must remain locked for the duration of the write because we apply
				903	* the mst fixups, write, and then undo the fixups, so if we were to unlock the
				904	* page before undoing the fixups, any other user of the page will see the
				905	* page contents as corrupt.
				906	*
				907	* We clear the page uptodate flag for the duration of the function to ensure
				908	* exclusion for the $MFT/$DATA case against someone mapping an mft record we
				909	* are about to apply the mst fixups to.
				910	*
				911	* Return 0 on success and -errno on error.
				912	*
				913	* Based on ntfs_write_block(), ntfs_mft_writepage(), and
				914	* write_mft_record_nolock().
				915	*/
				916	static int ntfs_write_mst_block(struct page *page,
				917	struct writeback_control *wbc)
				918	{
				919	sector_t block, dblock, rec_block;
				920	struct inode *vi = page->mapping->host;
				921	ntfs_inode *ni = NTFS_I(vi);
				922	ntfs_volume *vol = ni->vol;
				923	u8 *kaddr;
				924	unsigned int rec_size = ni->itype.index.block_size;
				925	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
				926	struct buffer_head bh, head, tbh, rec_start_bh;
				927	struct buffer_head *bhs[MAX_BUF_PER_PAGE];
				928	runlist_element *rl;
				929	int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
				930	unsigned bh_size, rec_size_bits;
				931	bool sync, is_mft, page_is_dirty, rec_is_dirty;
				932	unsigned char bh_size_bits;
				933
				934	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				935	"0x%lx.", vi->i_ino, ni->type, page->index);
				936	BUG_ON(!NInoNonResident(ni));
				937	BUG_ON(!NInoMstProtected(ni));
				938	is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
				939	/*
				940	* NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
				941	* in its page cache were to be marked dirty. However this should
				942	* never happen with the current driver and considering we do not
				943	* handle this case here we do want to BUG(), at least for now.
				944	*/
				945	BUG_ON(!(is_mft \|\| S_ISDIR(vi->i_mode) \|\|
				946	(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
				947	bh_size = vol->sb->s_blocksize;
				948	bh_size_bits = vol->sb->s_blocksize_bits;
				949	max_bhs = PAGE_CACHE_SIZE / bh_size;
				950	BUG_ON(!max_bhs);
				951	BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
				952
				953	/* Were we called for sync purposes? */
				954	sync = (wbc->sync_mode == WB_SYNC_ALL);
				955
				956	/* Make sure we have mapped buffers. */
				957	bh = head = page_buffers(page);
				958	BUG_ON(!bh);
				959
				960	rec_size_bits = ni->itype.index.block_size_bits;
				961	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
				962	bhs_per_rec = rec_size >> bh_size_bits;
				963	BUG_ON(!bhs_per_rec);
				964
				965	/* The first block in the page. */
				966	rec_block = block = (sector_t)page->index <<
				967	(PAGE_CACHE_SHIFT - bh_size_bits);
				968
				969	/* The first out of bounds block for the data size. */
				970	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
				971
				972	rl = NULL;
				973	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
				974	page_is_dirty = rec_is_dirty = false;
				975	rec_start_bh = NULL;
				976	do {
				977	bool is_retry = false;
				978
				979	if (likely(block < rec_block)) {
				980	if (unlikely(block >= dblock)) {
				981	clear_buffer_dirty(bh);
				982	set_buffer_uptodate(bh);
				983	continue;
				984	}
				985	/*
				986	* This block is not the first one in the record. We
				987	* ignore the buffer's dirty state because we could
				988	* have raced with a parallel mark_ntfs_record_dirty().
				989	*/
				990	if (!rec_is_dirty)
				991	continue;
				992	if (unlikely(err2)) {
				993	if (err2 != -ENOMEM)
				994	clear_buffer_dirty(bh);
				995	continue;
				996	}
				997	} else /* if (block == rec_block) */ {
				998	BUG_ON(block > rec_block);
				999	/* This block is the first one in the record. */
				1000	rec_block += bhs_per_rec;
				1001	err2 = 0;
				1002	if (unlikely(block >= dblock)) {
				1003	clear_buffer_dirty(bh);
				1004	continue;
				1005	}
				1006	if (!buffer_dirty(bh)) {
				1007	/* Clean records are not written out. */
				1008	rec_is_dirty = false;
				1009	continue;
				1010	}
				1011	rec_is_dirty = true;
				1012	rec_start_bh = bh;
				1013	}
				1014	/* Need to map the buffer if it is not mapped already. */
				1015	if (unlikely(!buffer_mapped(bh))) {
				1016	VCN vcn;
				1017	LCN lcn;
				1018	unsigned int vcn_ofs;
				1019
				1020	bh->b_bdev = vol->sb->s_bdev;
				1021	/* Obtain the vcn and offset of the current block. */
				1022	vcn = (VCN)block << bh_size_bits;
				1023	vcn_ofs = vcn & vol->cluster_size_mask;
				1024	vcn >>= vol->cluster_size_bits;
				1025	if (!rl) {
				1026	lock_retry_remap:
				1027	down_read(&ni->runlist.lock);
				1028	rl = ni->runlist.rl;
				1029	}
				1030	if (likely(rl != NULL)) {
				1031	/* Seek to element containing target vcn. */
				1032	while (rl->length && rl[1].vcn <= vcn)
				1033	rl++;
				1034	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				1035	} else
				1036	lcn = LCN_RL_NOT_MAPPED;
				1037	/* Successful remap. */
				1038	if (likely(lcn >= 0)) {
				1039	/* Setup buffer head to correct block. */
				1040	bh->b_blocknr = ((lcn <<
				1041	vol->cluster_size_bits) +
				1042	vcn_ofs) >> bh_size_bits;
				1043	set_buffer_mapped(bh);
				1044	} else {
				1045	/*
				1046	* Remap failed. Retry to map the runlist once
				1047	* unless we are working on $MFT which always
				1048	* has the whole of its runlist in memory.
				1049	*/
				1050	if (!is_mft && !is_retry &&
				1051	lcn == LCN_RL_NOT_MAPPED) {
				1052	is_retry = true;
				1053	/*
				1054	* Attempt to map runlist, dropping
				1055	* lock for the duration.
				1056	*/
				1057	up_read(&ni->runlist.lock);
				1058	err2 = ntfs_map_runlist(ni, vcn);
				1059	if (likely(!err2))
				1060	goto lock_retry_remap;
				1061	if (err2 == -ENOMEM)
				1062	page_is_dirty = true;
				1063	lcn = err2;
				1064	} else {
				1065	err2 = -EIO;
				1066	if (!rl)
				1067	up_read(&ni->runlist.lock);
				1068	}
				1069	/* Hard error. Abort writing this record. */
				1070	if (!err \|\| err == -ENOMEM)
				1071	err = err2;
				1072	bh->b_blocknr = -1;
				1073	ntfs_error(vol->sb, "Cannot write ntfs record "
				1074	"0x%llx (inode 0x%lx, "
				1075	"attribute type 0x%x) because "
				1076	"its location on disk could "
				1077	"not be determined (error "
				1078	"code %lli).",
				1079	(long long)block <<
				1080	bh_size_bits >>
				1081	vol->mft_record_size_bits,
				1082	ni->mft_no, ni->type,
				1083	(long long)lcn);
				1084	/*
				1085	* If this is not the first buffer, remove the
				1086	* buffers in this record from the list of
				1087	* buffers to write and clear their dirty bit
				1088	* if not error -ENOMEM.
				1089	*/
				1090	if (rec_start_bh != bh) {
				1091	while (bhs[--nr_bhs] != rec_start_bh)
				1092	;
				1093	if (err2 != -ENOMEM) {
				1094	do {
				1095	clear_buffer_dirty(
				1096	rec_start_bh);
				1097	} while ((rec_start_bh =
				1098	rec_start_bh->
				1099	b_this_page) !=
				1100	bh);
				1101	}
				1102	}
				1103	continue;
				1104	}
				1105	}
				1106	BUG_ON(!buffer_uptodate(bh));
				1107	BUG_ON(nr_bhs >= max_bhs);
				1108	bhs[nr_bhs++] = bh;
				1109	} while (block++, (bh = bh->b_this_page) != head);
				1110	if (unlikely(rl))
				1111	up_read(&ni->runlist.lock);
				1112	/* If there were no dirty buffers, we are done. */
				1113	if (!nr_bhs)
				1114	goto done;
				1115	/* Map the page so we can access its contents. */
				1116	kaddr = kmap(page);
				1117	/* Clear the page uptodate flag whilst the mst fixups are applied. */
				1118	BUG_ON(!PageUptodate(page));
				1119	ClearPageUptodate(page);
				1120	for (i = 0; i < nr_bhs; i++) {
				1121	unsigned int ofs;
				1122
				1123	/* Skip buffers which are not at the beginning of records. */
				1124	if (i % bhs_per_rec)
				1125	continue;
				1126	tbh = bhs[i];
				1127	ofs = bh_offset(tbh);
				1128	if (is_mft) {
				1129	ntfs_inode *tni;
				1130	unsigned long mft_no;
				1131
				1132	/* Get the mft record number. */
				1133	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1134	>> rec_size_bits;
				1135	/* Check whether to write this mft record. */
				1136	tni = NULL;
				1137	if (!ntfs_may_write_mft_record(vol, mft_no,
				1138	(MFT_RECORD*)(kaddr + ofs), &tni)) {
				1139	/*
				1140	* The record should not be written. This
				1141	* means we need to redirty the page before
				1142	* returning.
				1143	*/
				1144	page_is_dirty = true;
				1145	/*
				1146	* Remove the buffers in this mft record from
				1147	* the list of buffers to write.
				1148	*/
				1149	do {
				1150	bhs[i] = NULL;
				1151	} while (++i % bhs_per_rec);
				1152	continue;
				1153	}
				1154	/*
				1155	* The record should be written. If a locked ntfs
				1156	* inode was returned, add it to the array of locked
				1157	* ntfs inodes.
				1158	*/
				1159	if (tni)
				1160	locked_nis[nr_locked_nis++] = tni;
				1161	}
				1162	/* Apply the mst protection fixups. */
				1163	err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
				1164	rec_size);
				1165	if (unlikely(err2)) {
				1166	if (!err \|\| err == -ENOMEM)
				1167	err = -EIO;
				1168	ntfs_error(vol->sb, "Failed to apply mst fixups "
				1169	"(inode 0x%lx, attribute type 0x%x, "
				1170	"page index 0x%lx, page offset 0x%x)!"
				1171	" Unmount and run chkdsk.", vi->i_ino,
				1172	ni->type, page->index, ofs);
				1173	/*
				1174	* Mark all the buffers in this record clean as we do
				1175	* not want to write corrupt data to disk.
				1176	*/
				1177	do {
				1178	clear_buffer_dirty(bhs[i]);
				1179	bhs[i] = NULL;
				1180	} while (++i % bhs_per_rec);
				1181	continue;
				1182	}
				1183	nr_recs++;
				1184	}
				1185	/* If no records are to be written out, we are done. */
				1186	if (!nr_recs)
				1187	goto unm_done;
				1188	flush_dcache_page(page);
				1189	/* Lock buffers and start synchronous write i/o on them. */
				1190	for (i = 0; i < nr_bhs; i++) {
				1191	tbh = bhs[i];
				1192	if (!tbh)
				1193	continue;
				1194	if (!trylock_buffer(tbh))
				1195	BUG();
				1196	/* The buffer dirty state is now irrelevant, just clean it. */
				1197	clear_buffer_dirty(tbh);
				1198	BUG_ON(!buffer_uptodate(tbh));
				1199	BUG_ON(!buffer_mapped(tbh));
				1200	get_bh(tbh);
				1201	tbh->b_end_io = end_buffer_write_sync;
				1202	submit_bh(WRITE, tbh);
				1203	}
				1204	/* Synchronize the mft mirror now if not @sync. */
				1205	if (is_mft && !sync)
				1206	goto do_mirror;
				1207	do_wait:
				1208	/* Wait on i/o completion of buffers. */
				1209	for (i = 0; i < nr_bhs; i++) {
				1210	tbh = bhs[i];
				1211	if (!tbh)
				1212	continue;
				1213	wait_on_buffer(tbh);
				1214	if (unlikely(!buffer_uptodate(tbh))) {
				1215	ntfs_error(vol->sb, "I/O error while writing ntfs "
				1216	"record buffer (inode 0x%lx, "
				1217	"attribute type 0x%x, page index "
				1218	"0x%lx, page offset 0x%lx)! Unmount "
				1219	"and run chkdsk.", vi->i_ino, ni->type,
				1220	page->index, bh_offset(tbh));
				1221	if (!err \|\| err == -ENOMEM)
				1222	err = -EIO;
				1223	/*
				1224	* Set the buffer uptodate so the page and buffer
				1225	* states do not become out of sync.
				1226	*/
				1227	set_buffer_uptodate(tbh);
				1228	}
				1229	}
				1230	/* If @sync, now synchronize the mft mirror. */
				1231	if (is_mft && sync) {
				1232	do_mirror:
				1233	for (i = 0; i < nr_bhs; i++) {
				1234	unsigned long mft_no;
				1235	unsigned int ofs;
				1236
				1237	/*
				1238	* Skip buffers which are not at the beginning of
				1239	* records.
				1240	*/
				1241	if (i % bhs_per_rec)
				1242	continue;
				1243	tbh = bhs[i];
				1244	/* Skip removed buffers (and hence records). */
				1245	if (!tbh)
				1246	continue;
				1247	ofs = bh_offset(tbh);
				1248	/* Get the mft record number. */
				1249	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1250	>> rec_size_bits;
				1251	if (mft_no < vol->mftmirr_size)
				1252	ntfs_sync_mft_mirror(vol, mft_no,
				1253	(MFT_RECORD*)(kaddr + ofs),
				1254	sync);
				1255	}
				1256	if (!sync)
				1257	goto do_wait;
				1258	}
				1259	/* Remove the mst protection fixups again. */
				1260	for (i = 0; i < nr_bhs; i++) {
				1261	if (!(i % bhs_per_rec)) {
				1262	tbh = bhs[i];
				1263	if (!tbh)
				1264	continue;
				1265	post_write_mst_fixup((NTFS_RECORD*)(kaddr +
				1266	bh_offset(tbh)));
				1267	}
				1268	}
				1269	flush_dcache_page(page);
				1270	unm_done:
				1271	/* Unlock any locked inodes. */
				1272	while (nr_locked_nis-- > 0) {
				1273	ntfs_inode tni, base_tni;
				1274
				1275	tni = locked_nis[nr_locked_nis];
				1276	/* Get the base inode. */
				1277	mutex_lock(&tni->extent_lock);
				1278	if (tni->nr_extents >= 0)
				1279	base_tni = tni;
				1280	else {
				1281	base_tni = tni->ext.base_ntfs_ino;
				1282	BUG_ON(!base_tni);
				1283	}
				1284	mutex_unlock(&tni->extent_lock);
				1285	ntfs_debug("Unlocking %s inode 0x%lx.",
				1286	tni == base_tni ? "base" : "extent",
				1287	tni->mft_no);
				1288	mutex_unlock(&tni->mrec_lock);
				1289	atomic_dec(&tni->count);
				1290	iput(VFS_I(base_tni));
				1291	}
				1292	SetPageUptodate(page);
				1293	kunmap(page);
				1294	done:
				1295	if (unlikely(err && err != -ENOMEM)) {
				1296	/*
				1297	* Set page error if there is only one ntfs record in the page.
				1298	* Otherwise we would loose per-record granularity.
				1299	*/
				1300	if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
				1301	SetPageError(page);
				1302	NVolSetErrors(vol);
				1303	}
				1304	if (page_is_dirty) {
				1305	ntfs_debug("Page still contains one or more dirty ntfs "
				1306	"records. Redirtying the page starting at "
				1307	"record 0x%lx.", page->index <<
				1308	(PAGE_CACHE_SHIFT - rec_size_bits));
				1309	redirty_page_for_writepage(wbc, page);
				1310	unlock_page(page);
				1311	} else {
				1312	/*
				1313	* Keep the VM happy. This must be done otherwise the
				1314	* radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
				1315	* the page is clean.
				1316	*/
				1317	BUG_ON(PageWriteback(page));
				1318	set_page_writeback(page);
				1319	unlock_page(page);
				1320	end_page_writeback(page);
				1321	}
				1322	if (likely(!err))
				1323	ntfs_debug("Done.");
				1324	return err;
				1325	}
				1326
				1327	/**
				1328	* ntfs_writepage - write a @page to the backing store
				1329	* @page: page cache page to write out
				1330	* @wbc: writeback control structure
				1331	*
				1332	* This is called from the VM when it wants to have a dirty ntfs page cache
				1333	* page cleaned. The VM has already locked the page and marked it clean.
				1334	*
				1335	* For non-resident attributes, ntfs_writepage() writes the @page by calling
				1336	* the ntfs version of the generic block_write_full_page() function,
				1337	* ntfs_write_block(), which in turn if necessary creates and writes the
				1338	* buffers associated with the page asynchronously.
				1339	*
				1340	* For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
				1341	* the data to the mft record (which at this stage is most likely in memory).
				1342	* The mft record is then marked dirty and written out asynchronously via the
				1343	* vfs inode dirty code path for the inode the mft record belongs to or via the
				1344	* vm page dirty code path for the page the mft record is in.
				1345	*
				1346	* Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
				1347	*
				1348	* Return 0 on success and -errno on error.
				1349	*/
				1350	static int ntfs_writepage(struct page page, struct writeback_control wbc)
				1351	{
				1352	loff_t i_size;
				1353	struct inode *vi = page->mapping->host;
				1354	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
				1355	char *addr;
				1356	ntfs_attr_search_ctx *ctx = NULL;
				1357	MFT_RECORD *m = NULL;
				1358	u32 attr_len;
				1359	int err;
				1360
				1361	retry_writepage:
				1362	BUG_ON(!PageLocked(page));
				1363	i_size = i_size_read(vi);
				1364	/* Is the page fully outside i_size? (truncate in progress) */
				1365	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
				1366	PAGE_CACHE_SHIFT)) {
				1367	/*
				1368	* The page may have dirty, unmapped buffers. Make them
				1369	* freeable here, so the page does not leak.
				1370	*/
				1371	block_invalidatepage(page, 0);
				1372	unlock_page(page);
				1373	ntfs_debug("Write outside i_size - truncated?");
				1374	return 0;
				1375	}
				1376	/*
				1377	* Only $DATA attributes can be encrypted and only unnamed $DATA
				1378	* attributes can be compressed. Index root can have the flags set but
				1379	* this means to create compressed/encrypted files, not that the
				1380	* attribute is compressed/encrypted. Note we need to check for
				1381	* AT_INDEX_ALLOCATION since this is the type of both directory and
				1382	* index inodes.
				1383	*/
				1384	if (ni->type != AT_INDEX_ALLOCATION) {
				1385	/* If file is encrypted, deny access, just like NT4. */
				1386	if (NInoEncrypted(ni)) {
				1387	unlock_page(page);
				1388	BUG_ON(ni->type != AT_DATA);
				1389	ntfs_debug("Denying write access to encrypted file.");
				1390	return -EACCES;
				1391	}
				1392	/* Compressed data streams are handled in compress.c. */
				1393	if (NInoNonResident(ni) && NInoCompressed(ni)) {
				1394	BUG_ON(ni->type != AT_DATA);
				1395	BUG_ON(ni->name_len);
				1396	// TODO: Implement and replace this with
				1397	// return ntfs_write_compressed_block(page);
				1398	unlock_page(page);
				1399	ntfs_error(vi->i_sb, "Writing to compressed files is "
				1400	"not supported yet. Sorry.");
				1401	return -EOPNOTSUPP;
				1402	}
				1403	// TODO: Implement and remove this check.
				1404	if (NInoNonResident(ni) && NInoSparse(ni)) {
				1405	unlock_page(page);
				1406	ntfs_error(vi->i_sb, "Writing to sparse files is not "
				1407	"supported yet. Sorry.");
				1408	return -EOPNOTSUPP;
				1409	}
				1410	}
				1411	/* NInoNonResident() == NInoIndexAllocPresent() */
				1412	if (NInoNonResident(ni)) {
				1413	/* We have to zero every time due to mmap-at-end-of-file. */
				1414	if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
				1415	/* The page straddles i_size. */
				1416	unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
				1417	zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
				1418	}
				1419	/* Handle mst protected attributes. */
				1420	if (NInoMstProtected(ni))
				1421	return ntfs_write_mst_block(page, wbc);
				1422	/* Normal, non-resident data stream. */
				1423	return ntfs_write_block(page, wbc);
				1424	}
				1425	/*
				1426	* Attribute is resident, implying it is not compressed, encrypted, or
				1427	* mst protected. This also means the attribute is smaller than an mft
				1428	* record and hence smaller than a page, so can simply return error on
				1429	* any pages with index above 0. Note the attribute can actually be
				1430	* marked compressed but if it is resident the actual data is not
				1431	* compressed so we are ok to ignore the compressed flag here.
				1432	*/
				1433	BUG_ON(page_has_buffers(page));
				1434	BUG_ON(!PageUptodate(page));
				1435	if (unlikely(page->index > 0)) {
				1436	ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
				1437	"Aborting write.", page->index);
				1438	BUG_ON(PageWriteback(page));
				1439	set_page_writeback(page);
				1440	unlock_page(page);
				1441	end_page_writeback(page);
				1442	return -EIO;
				1443	}
				1444	if (!NInoAttr(ni))
				1445	base_ni = ni;
				1446	else
				1447	base_ni = ni->ext.base_ntfs_ino;
				1448	/* Map, pin, and lock the mft record. */
				1449	m = map_mft_record(base_ni);
				1450	if (IS_ERR(m)) {
				1451	err = PTR_ERR(m);
				1452	m = NULL;
				1453	ctx = NULL;
				1454	goto err_out;
				1455	}
				1456	/*
				1457	* If a parallel write made the attribute non-resident, drop the mft
				1458	* record and retry the writepage.
				1459	*/
				1460	if (unlikely(NInoNonResident(ni))) {
				1461	unmap_mft_record(base_ni);
				1462	goto retry_writepage;
				1463	}
				1464	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1465	if (unlikely(!ctx)) {
				1466	err = -ENOMEM;
				1467	goto err_out;
				1468	}
				1469	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1470	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1471	if (unlikely(err))
				1472	goto err_out;
				1473	/*
				1474	* Keep the VM happy. This must be done otherwise the radix-tree tag
				1475	* PAGECACHE_TAG_DIRTY remains set even though the page is clean.
				1476	*/
				1477	BUG_ON(PageWriteback(page));
				1478	set_page_writeback(page);
				1479	unlock_page(page);
				1480	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
				1481	i_size = i_size_read(vi);
				1482	if (unlikely(attr_len > i_size)) {
				1483	/* Race with shrinking truncate or a failed truncate. */
				1484	attr_len = i_size;
				1485	/*
				1486	* If the truncate failed, fix it up now. If a concurrent
				1487	* truncate, we do its job, so it does not have to do anything.
				1488	*/
				1489	err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
				1490	attr_len);
				1491	/* Shrinking cannot fail. */
				1492	BUG_ON(err);
				1493	}
				1494	addr = kmap_atomic(page);
				1495	/* Copy the data from the page to the mft record. */
				1496	memcpy((u8*)ctx->attr +
				1497	le16_to_cpu(ctx->attr->data.resident.value_offset),
				1498	addr, attr_len);
				1499	/* Zero out of bounds area in the page cache page. */
				1500	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				1501	kunmap_atomic(addr);
				1502	flush_dcache_page(page);
				1503	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1504	/* We are done with the page. */
				1505	end_page_writeback(page);
				1506	/* Finally, mark the mft record dirty, so it gets written back. */
				1507	mark_mft_record_dirty(ctx->ntfs_ino);
				1508	ntfs_attr_put_search_ctx(ctx);
				1509	unmap_mft_record(base_ni);
				1510	return 0;
				1511	err_out:
				1512	if (err == -ENOMEM) {
				1513	ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
				1514	"page so we try again later.");
				1515	/*
				1516	* Put the page back on mapping->dirty_pages, but leave its
				1517	* buffers' dirty state as-is.
				1518	*/
				1519	redirty_page_for_writepage(wbc, page);
				1520	err = 0;
				1521	} else {
				1522	ntfs_error(vi->i_sb, "Resident attribute write failed with "
				1523	"error %i.", err);
				1524	SetPageError(page);
				1525	NVolSetErrors(ni->vol);
				1526	}
				1527	unlock_page(page);
				1528	if (ctx)
				1529	ntfs_attr_put_search_ctx(ctx);
				1530	if (m)
				1531	unmap_mft_record(base_ni);
				1532	return err;
				1533	}
				1534
				1535	#endif /* NTFS_RW */
				1536
				1537	/**
				1538	* ntfs_aops - general address space operations for inodes and attributes
				1539	*/
				1540	const struct address_space_operations ntfs_aops = {
				1541	.readpage = ntfs_readpage, /* Fill page with data. */
				1542	#ifdef NTFS_RW
				1543	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				1544	#endif /* NTFS_RW */
				1545	.migratepage = buffer_migrate_page, /* Move a page cache page from
				1546	one physical page to an
				1547	other. */
				1548	.error_remove_page = generic_error_remove_page,
				1549	};
				1550
				1551	/**
				1552	* ntfs_mst_aops - general address space operations for mst protecteed inodes
				1553	* and attributes
				1554	*/
				1555	const struct address_space_operations ntfs_mst_aops = {
				1556	.readpage = ntfs_readpage, /* Fill page with data. */
				1557	#ifdef NTFS_RW
				1558	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				1559	.set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
				1560	without touching the buffers
				1561	belonging to the page. */
				1562	#endif /* NTFS_RW */
				1563	.migratepage = buffer_migrate_page, /* Move a page cache page from
				1564	one physical page to an
				1565	other. */
				1566	.error_remove_page = generic_error_remove_page,
				1567	};
				1568
				1569	#ifdef NTFS_RW
				1570
				1571	/**
				1572	* mark_ntfs_record_dirty - mark an ntfs record dirty
				1573	* @page: page containing the ntfs record to mark dirty
				1574	* @ofs: byte offset within @page at which the ntfs record begins
				1575	*
				1576	* Set the buffers and the page in which the ntfs record is located dirty.
				1577	*
				1578	* The latter also marks the vfs inode the ntfs record belongs to dirty
				1579	* (I_DIRTY_PAGES only).
				1580	*
				1581	* If the page does not have buffers, we create them and set them uptodate.
				1582	* The page may not be locked which is why we need to handle the buffers under
				1583	* the mapping->private_lock. Once the buffers are marked dirty we no longer
				1584	* need the lock since try_to_free_buffers() does not free dirty buffers.
				1585	*/
				1586	void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
				1587	struct address_space *mapping = page->mapping;
				1588	ntfs_inode *ni = NTFS_I(mapping->host);
				1589	struct buffer_head bh, head, *buffers_to_free = NULL;
				1590	unsigned int end, bh_size, bh_ofs;
				1591
				1592	BUG_ON(!PageUptodate(page));
				1593	end = ofs + ni->itype.index.block_size;
				1594	bh_size = VFS_I(ni)->i_sb->s_blocksize;
				1595	spin_lock(&mapping->private_lock);
				1596	if (unlikely(!page_has_buffers(page))) {
				1597	spin_unlock(&mapping->private_lock);
				1598	bh = head = alloc_page_buffers(page, bh_size, 1);
				1599	spin_lock(&mapping->private_lock);
				1600	if (likely(!page_has_buffers(page))) {
				1601	struct buffer_head *tail;
				1602
				1603	do {
				1604	set_buffer_uptodate(bh);
				1605	tail = bh;
				1606	bh = bh->b_this_page;
				1607	} while (bh);
				1608	tail->b_this_page = head;
				1609	attach_page_buffers(page, head);
				1610	} else
				1611	buffers_to_free = bh;
				1612	}
				1613	bh = head = page_buffers(page);
				1614	BUG_ON(!bh);
				1615	do {
				1616	bh_ofs = bh_offset(bh);
				1617	if (bh_ofs + bh_size <= ofs)
				1618	continue;
				1619	if (unlikely(bh_ofs >= end))
				1620	break;
				1621	set_buffer_dirty(bh);
				1622	} while ((bh = bh->b_this_page) != head);
				1623	spin_unlock(&mapping->private_lock);
				1624	__set_page_dirty_nobuffers(page);
				1625	if (unlikely(buffers_to_free)) {
				1626	do {
				1627	bh = buffers_to_free->b_this_page;
				1628	free_buffer_head(buffers_to_free);
				1629	buffers_to_free = bh;
				1630	} while (buffers_to_free);
				1631	}
				1632	}
				1633
				1634	#endif /* NTFS_RW */