Blame - ap/app/e2fsprogs/e2fsprogs-1.42.9/e2fsck/revoke.c - T106_DC

blob: 38c265e43d1a0511b6fa56bb20b5b0d402c06945 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/*
				2	* linux/fs/revoke.c
				3	*
				4	* Written by Stephen C. Tweedie <sct@redhat.com>, 2000
				5	*
				6	* Copyright 2000 Red Hat corp --- All Rights Reserved
				7	*
				8	* This file is part of the Linux kernel and is made available under
				9	* the terms of the GNU General Public License, version 2, or at your
				10	* option, any later version, incorporated herein by reference.
				11	*
				12	* Journal revoke routines for the generic filesystem journaling code;
				13	* part of the ext2fs journaling system.
				14	*
				15	* Revoke is the mechanism used to prevent old log records for deleted
				16	* metadata from being replayed on top of newer data using the same
				17	* blocks. The revoke mechanism is used in two separate places:
				18	*
				19	* + Commit: during commit we write the entire list of the current
				20	* transaction's revoked blocks to the journal
				21	*
				22	* + Recovery: during recovery we record the transaction ID of all
				23	* revoked blocks. If there are multiple revoke records in the log
				24	* for a single block, only the last one counts, and if there is a log
				25	* entry for a block beyond the last revoke, then that log entry still
				26	* gets replayed.
				27	*
				28	* We can get interactions between revokes and new log data within a
				29	* single transaction:
				30	*
				31	* Block is revoked and then journaled:
				32	* The desired end result is the journaling of the new block, so we
				33	* cancel the revoke before the transaction commits.
				34	*
				35	* Block is journaled and then revoked:
				36	* The revoke must take precedence over the write of the block, so we
				37	* need either to cancel the journal entry or to write the revoke
				38	* later in the log than the log block. In this case, we choose the
				39	* latter: journaling a block cancels any revoke record for that block
				40	* in the current transaction, so any revoke for that block in the
				41	* transaction must have happened after the block was journaled and so
				42	* the revoke must take precedence.
				43	*
				44	* Block is revoked and then written as data:
				45	* The data write is allowed to succeed, but the revoke is _not_
				46	* cancelled. We still need to prevent old log records from
				47	* overwriting the new data. We don't even need to clear the revoke
				48	* bit here.
				49	*
				50	* Revoke information on buffers is a tri-state value:
				51	*
				52	* RevokeValid clear: no cached revoke status, need to look it up
				53	* RevokeValid set, Revoked clear:
				54	* buffer has not been revoked, and cancel_revoke
				55	* need do nothing.
				56	* RevokeValid set, Revoked set:
				57	* buffer has been revoked.
				58	*/
				59
				60	#ifndef __KERNEL__
				61	#include "config.h"
				62	#include "jfs_user.h"
				63	#else
				64	#include <linux/sched.h>
				65	#include <linux/fs.h>
				66	#include <linux/jbd.h>
				67	#include <linux/errno.h>
				68	#include <linux/slab.h>
				69	#include <linux/locks.h>
				70	#include <linux/list.h>
				71	#include <linux/smp_lock.h>
				72	#include <linux/init.h>
				73	#endif
				74
				75	static lkmem_cache_t *revoke_record_cache;
				76	static lkmem_cache_t *revoke_table_cache;
				77
				78	/* Each revoke record represents one single revoked block. During
				79	journal replay, this involves recording the transaction ID of the
				80	last transaction to revoke this block. */
				81
				82	struct jbd_revoke_record_s
				83	{
				84	struct list_head hash;
				85	tid_t sequence; /* Used for recovery only */
				86	unsigned long blocknr;
				87	};
				88
				89
				90	/* The revoke table is just a simple hash table of revoke records. */
				91	struct jbd_revoke_table_s
				92	{
				93	/* It is conceivable that we might want a larger hash table
				94	* for recovery. Must be a power of two. */
				95	int hash_size;
				96	int hash_shift;
				97	struct list_head *hash_table;
				98	};
				99
				100
				101	#ifdef __KERNEL__
				102	static void write_one_revoke_record(journal_t , transaction_t ,
				103	struct journal_head *, int ,
				104	struct jbd_revoke_record_s *);
				105	static void flush_descriptor(journal_t , struct journal_head , int);
				106	#endif
				107
				108	/* Utility functions to maintain the revoke table */
				109
				110	/* Borrowed from buffer.c: this is a tried and tested block hash function */
				111	static inline int hash(journal_t *journal, unsigned long block)
				112	{
				113	struct jbd_revoke_table_s *table = journal->j_revoke;
				114	int hash_shift = table->hash_shift;
				115
				116	return ((block << (hash_shift - 6)) ^
				117	(block >> 13) ^
				118	(block << (hash_shift - 12))) & (table->hash_size - 1);
				119	}
				120
				121	static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
				122	tid_t seq)
				123	{
				124	struct list_head *hash_list;
				125	struct jbd_revoke_record_s *record;
				126
				127	#ifdef __KERNEL__
				128	repeat:
				129	#endif
				130	record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS);
				131	if (!record)
				132	goto oom;
				133
				134	record->sequence = seq;
				135	record->blocknr = blocknr;
				136	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
				137	list_add(&record->hash, hash_list);
				138	return 0;
				139
				140	oom:
				141	#ifdef __KERNEL__
				142	if (!journal_oom_retry)
				143	return -ENOMEM;
				144	jbd_debug(1, "ENOMEM in " __FUNCTION__ ", retrying.\n");
				145	current->policy \|= SCHED_YIELD;
				146	schedule();
				147	goto repeat;
				148	#else
				149	return -ENOMEM;
				150	#endif
				151	}
				152
				153	/* Find a revoke record in the journal's hash table. */
				154
				155	static struct jbd_revoke_record_s find_revoke_record(journal_t journal,
				156	unsigned long blocknr)
				157	{
				158	struct list_head *hash_list;
				159	struct jbd_revoke_record_s *record;
				160
				161	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
				162
				163	record = (struct jbd_revoke_record_s *) hash_list->next;
				164	while (&(record->hash) != hash_list) {
				165	if (record->blocknr == blocknr)
				166	return record;
				167	record = (struct jbd_revoke_record_s *) record->hash.next;
				168	}
				169	return NULL;
				170	}
				171
				172	int __init journal_init_revoke_caches(void)
				173	{
				174	revoke_record_cache = kmem_cache_create("revoke_record",
				175	sizeof(struct jbd_revoke_record_s),
				176	0, SLAB_HWCACHE_ALIGN, NULL, NULL);
				177	if (revoke_record_cache == 0)
				178	return -ENOMEM;
				179
				180	revoke_table_cache = kmem_cache_create("revoke_table",
				181	sizeof(struct jbd_revoke_table_s),
				182	0, 0, NULL, NULL);
				183	if (revoke_table_cache == 0) {
				184	kmem_cache_destroy(revoke_record_cache);
				185	revoke_record_cache = NULL;
				186	return -ENOMEM;
				187	}
				188	return 0;
				189	}
				190
				191	void journal_destroy_revoke_caches(void)
				192	{
				193	kmem_cache_destroy(revoke_record_cache);
				194	revoke_record_cache = 0;
				195	kmem_cache_destroy(revoke_table_cache);
				196	revoke_table_cache = 0;
				197	}
				198
				199	/* Initialise the revoke table for a given journal to a given size. */
				200
				201	int journal_init_revoke(journal_t *journal, int hash_size)
				202	{
				203	int shift, tmp;
				204
				205	J_ASSERT (journal->j_revoke == NULL);
				206
				207	journal->j_revoke = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
				208	if (!journal->j_revoke)
				209	return -ENOMEM;
				210
				211	/* Check that the hash_size is a power of two */
				212	J_ASSERT ((hash_size & (hash_size-1)) == 0);
				213
				214	journal->j_revoke->hash_size = hash_size;
				215
				216	shift = 0;
				217	tmp = hash_size;
				218	while((tmp >>= 1UL) != 0UL)
				219	shift++;
				220	journal->j_revoke->hash_shift = shift;
				221
				222	journal->j_revoke->hash_table =
				223	kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
				224	if (!journal->j_revoke->hash_table) {
				225	kmem_cache_free(revoke_table_cache, journal->j_revoke);
				226	journal->j_revoke = NULL;
				227	return -ENOMEM;
				228	}
				229
				230	for (tmp = 0; tmp < hash_size; tmp++)
				231	INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
				232
				233	return 0;
				234	}
				235
				236	/* Destoy a journal's revoke table. The table must already be empty! */
				237
				238	void journal_destroy_revoke(journal_t *journal)
				239	{
				240	struct jbd_revoke_table_s *table;
				241	struct list_head *hash_list;
				242	int i;
				243
				244	table = journal->j_revoke;
				245	if (!table)
				246	return;
				247
				248	for (i=0; i<table->hash_size; i++) {
				249	hash_list = &table->hash_table[i];
				250	J_ASSERT (list_empty(hash_list));
				251	}
				252
				253	kfree(table->hash_table);
				254	kmem_cache_free(revoke_table_cache, table);
				255	journal->j_revoke = NULL;
				256	}
				257
				258
				259	#ifdef __KERNEL__
				260
				261	/*
				262	* journal_revoke: revoke a given buffer_head from the journal. This
				263	* prevents the block from being replayed during recovery if we take a
				264	* crash after this current transaction commits. Any subsequent
				265	* metadata writes of the buffer in this transaction cancel the
				266	* revoke.
				267	*
				268	* Note that this call may block --- it is up to the caller to make
				269	* sure that there are no further calls to journal_write_metadata
				270	* before the revoke is complete. In ext3, this implies calling the
				271	* revoke before clearing the block bitmap when we are deleting
				272	* metadata.
				273	*
				274	* Revoke performs a journal_forget on any buffer_head passed in as a
				275	* parameter, but does _not_ forget the buffer_head if the bh was only
				276	* found implicitly.
				277	*
				278	* bh_in may not be a journalled buffer - it may have come off
				279	* the hash tables without an attached journal_head.
				280	*
				281	* If bh_in is non-zero, journal_revoke() will decrement its b_count
				282	* by one.
				283	*/
				284
				285	int journal_revoke(handle_t *handle, unsigned long blocknr,
				286	struct buffer_head *bh_in)
				287	{
				288	struct buffer_head *bh = NULL;
				289	journal_t *journal;
				290	kdev_t dev;
				291	int err;
				292
				293	if (bh_in)
				294	BUFFER_TRACE(bh_in, "enter");
				295
				296	journal = handle->h_transaction->t_journal;
				297	if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){
				298	J_ASSERT (!"Cannot set revoke feature!");
				299	return -EINVAL;
				300	}
				301
				302	dev = journal->j_fs_dev;
				303	bh = bh_in;
				304
				305	if (!bh) {
				306	bh = get_hash_table(dev, blocknr, journal->j_blocksize);
				307	if (bh)
				308	BUFFER_TRACE(bh, "found on hash");
				309	}
				310	#ifdef JBD_EXPENSIVE_CHECKING
				311	else {
				312	struct buffer_head *bh2;
				313
				314	/* If there is a different buffer_head lying around in
				315	* memory anywhere... */
				316	bh2 = get_hash_table(dev, blocknr, journal->j_blocksize);
				317	if (bh2) {
				318	/* ... and it has RevokeValid status... */
				319	if ((bh2 != bh) &&
				320	test_bit(BH_RevokeValid, &bh2->b_state))
				321	/* ...then it better be revoked too,
				322	* since it's illegal to create a revoke
				323	* record against a buffer_head which is
				324	* not marked revoked --- that would
				325	* risk missing a subsequent revoke
				326	* cancel. */
				327	J_ASSERT_BH(bh2, test_bit(BH_Revoked, &
				328	bh2->b_state));
				329	__brelse(bh2);
				330	}
				331	}
				332	#endif
				333
				334	/* We really ought not ever to revoke twice in a row without
				335	first having the revoke cancelled: it's illegal to free a
				336	block twice without allocating it in between! */
				337	if (bh) {
				338	J_ASSERT_BH(bh, !test_bit(BH_Revoked, &bh->b_state));
				339	set_bit(BH_Revoked, &bh->b_state);
				340	set_bit(BH_RevokeValid, &bh->b_state);
				341	if (bh_in) {
				342	BUFFER_TRACE(bh_in, "call journal_forget");
				343	journal_forget(handle, bh_in);
				344	} else {
				345	BUFFER_TRACE(bh, "call brelse");
				346	__brelse(bh);
				347	}
				348	}
				349
				350	lock_journal(journal);
				351	jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
				352	err = insert_revoke_hash(journal, blocknr,
				353	handle->h_transaction->t_tid);
				354	unlock_journal(journal);
				355	BUFFER_TRACE(bh_in, "exit");
				356	return err;
				357	}
				358
				359	/*
				360	* Cancel an outstanding revoke. For use only internally by the
				361	* journaling code (called from journal_get_write_access).
				362	*
				363	* We trust the BH_Revoked bit on the buffer if the buffer is already
				364	* being journaled: if there is no revoke pending on the buffer, then we
				365	* don't do anything here.
				366	*
				367	* This would break if it were possible for a buffer to be revoked and
				368	* discarded, and then reallocated within the same transaction. In such
				369	* a case we would have lost the revoked bit, but when we arrived here
				370	* the second time we would still have a pending revoke to cancel. So,
				371	* do not trust the Revoked bit on buffers unless RevokeValid is also
				372	* set.
				373	*
				374	* The caller must have the journal locked.
				375	*/
				376	int journal_cancel_revoke(handle_t handle, struct journal_head jh)
				377	{
				378	struct jbd_revoke_record_s *record;
				379	journal_t *journal = handle->h_transaction->t_journal;
				380	int need_cancel;
				381	int did_revoke = 0; /* akpm: debug */
				382	struct buffer_head *bh = jh2bh(jh);
				383
				384	jbd_debug(4, "journal_head %p, cancelling revoke\n", jh);
				385
				386	/* Is the existing Revoke bit valid? If so, we trust it, and
				387	* only perform the full cancel if the revoke bit is set. If
				388	* not, we can't trust the revoke bit, and we need to do the
				389	* full search for a revoke record. */
				390	if (test_and_set_bit(BH_RevokeValid, &bh->b_state))
				391	need_cancel = (test_and_clear_bit(BH_Revoked, &bh->b_state));
				392	else {
				393	need_cancel = 1;
				394	clear_bit(BH_Revoked, &bh->b_state);
				395	}
				396
				397	if (need_cancel) {
				398	record = find_revoke_record(journal, bh->b_blocknr);
				399	if (record) {
				400	jbd_debug(4, "cancelled existing revoke on "
				401	"blocknr %lu\n", bh->b_blocknr);
				402	list_del(&record->hash);
				403	kmem_cache_free(revoke_record_cache, record);
				404	did_revoke = 1;
				405	}
				406	}
				407
				408	#ifdef JBD_EXPENSIVE_CHECKING
				409	/* There better not be one left behind by now! */
				410	record = find_revoke_record(journal, bh->b_blocknr);
				411	J_ASSERT_JH(jh, record == NULL);
				412	#endif
				413
				414	/* Finally, have we just cleared revoke on an unhashed
				415	* buffer_head? If so, we'd better make sure we clear the
				416	* revoked status on any hashed alias too, otherwise the revoke
				417	* state machine will get very upset later on. */
				418	if (need_cancel && !bh->b_pprev) {
				419	struct buffer_head *bh2;
				420	bh2 = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size);
				421	if (bh2) {
				422	clear_bit(BH_Revoked, &bh2->b_state);
				423	__brelse(bh2);
				424	}
				425	}
				426
				427	return did_revoke;
				428	}
				429
				430
				431	/*
				432	* Write revoke records to the journal for all entries in the current
				433	* revoke hash, deleting the entries as we go.
				434	*
				435	* Called with the journal lock held.
				436	*/
				437
				438	void journal_write_revoke_records(journal_t *journal,
				439	transaction_t *transaction)
				440	{
				441	struct journal_head *descriptor;
				442	struct jbd_revoke_record_s *record;
				443	struct jbd_revoke_table_s *revoke;
				444	struct list_head *hash_list;
				445	int i, offset, count;
				446
				447	descriptor = NULL;
				448	offset = 0;
				449	count = 0;
				450	revoke = journal->j_revoke;
				451
				452	for (i = 0; i < revoke->hash_size; i++) {
				453	hash_list = &revoke->hash_table[i];
				454
				455	while (!list_empty(hash_list)) {
				456	record = (struct jbd_revoke_record_s *)
				457	hash_list->next;
				458	write_one_revoke_record(journal, transaction,
				459	&descriptor, &offset,
				460	record);
				461	count++;
				462	list_del(&record->hash);
				463	kmem_cache_free(revoke_record_cache, record);
				464	}
				465	}
				466	if (descriptor)
				467	flush_descriptor(journal, descriptor, offset);
				468	jbd_debug(1, "Wrote %d revoke records\n", count);
				469	}
				470
				471	/*
				472	* Write out one revoke record. We need to create a new descriptor
				473	* block if the old one is full or if we have not already created one.
				474	*/
				475
				476	static void write_one_revoke_record(journal_t *journal,
				477	transaction_t *transaction,
				478	struct journal_head **descriptorp,
				479	int *offsetp,
				480	struct jbd_revoke_record_s *record)
				481	{
				482	struct journal_head *descriptor;
				483	int offset;
				484	journal_header_t *header;
				485
				486	/* If we are already aborting, this all becomes a noop. We
				487	still need to go round the loop in
				488	journal_write_revoke_records in order to free all of the
				489	revoke records: only the IO to the journal is omitted. */
				490	if (is_journal_aborted(journal))
				491	return;
				492
				493	descriptor = *descriptorp;
				494	offset = *offsetp;
				495
				496	/* Make sure we have a descriptor with space left for the record */
				497	if (descriptor) {
				498	if (offset == journal->j_blocksize) {
				499	flush_descriptor(journal, descriptor, offset);
				500	descriptor = NULL;
				501	}
				502	}
				503
				504	if (!descriptor) {
				505	descriptor = journal_get_descriptor_buffer(journal);
				506	if (!descriptor)
				507	return;
				508	header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
				509	header->h_magic = htonl(JFS_MAGIC_NUMBER);
				510	header->h_blocktype = htonl(JFS_REVOKE_BLOCK);
				511	header->h_sequence = htonl(transaction->t_tid);
				512
				513	/* Record it so that we can wait for IO completion later */
				514	JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
				515	journal_file_buffer(descriptor, transaction, BJ_LogCtl);
				516
				517	offset = sizeof(journal_revoke_header_t);
				518	*descriptorp = descriptor;
				519	}
				520
				521	* ((unsigned int *)(&jh2bh(descriptor)->b_data[offset])) =
				522	htonl(record->blocknr);
				523	offset += 4;
				524	*offsetp = offset;
				525	}
				526
				527	/*
				528	* Flush a revoke descriptor out to the journal. If we are aborting,
				529	* this is a noop; otherwise we are generating a buffer which needs to
				530	* be waited for during commit, so it has to go onto the appropriate
				531	* journal buffer list.
				532	*/
				533
				534	static void flush_descriptor(journal_t *journal,
				535	struct journal_head *descriptor,
				536	int offset)
				537	{
				538	journal_revoke_header_t *header;
				539
				540	if (is_journal_aborted(journal)) {
				541	JBUFFER_TRACE(descriptor, "brelse");
				542	__brelse(jh2bh(descriptor));
				543	return;
				544	}
				545
				546	header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
				547	header->r_count = htonl(offset);
				548	set_bit(BH_JWrite, &jh2bh(descriptor)->b_state);
				549	{
				550	struct buffer_head *bh = jh2bh(descriptor);
				551	BUFFER_TRACE(bh, "write");
				552	ll_rw_block (WRITE, 1, &bh);
				553	}
				554	}
				555
				556	#endif
				557
				558	/*
				559	* Revoke support for recovery.
				560	*
				561	* Recovery needs to be able to:
				562	*
				563	* record all revoke records, including the tid of the latest instance
				564	* of each revoke in the journal
				565	*
				566	* check whether a given block in a given transaction should be replayed
				567	* (ie. has not been revoked by a revoke record in that or a subsequent
				568	* transaction)
				569	*
				570	* empty the revoke table after recovery.
				571	*/
				572
				573	/*
				574	* First, setting revoke records. We create a new revoke record for
				575	* every block ever revoked in the log as we scan it for recovery, and
				576	* we update the existing records if we find multiple revokes for a
				577	* single block.
				578	*/
				579
				580	int journal_set_revoke(journal_t *journal,
				581	unsigned long blocknr,
				582	tid_t sequence)
				583	{
				584	struct jbd_revoke_record_s *record;
				585
				586	record = find_revoke_record(journal, blocknr);
				587	if (record) {
				588	/* If we have multiple occurences, only record the
				589	* latest sequence number in the hashed record */
				590	if (tid_gt(sequence, record->sequence))
				591	record->sequence = sequence;
				592	return 0;
				593	}
				594	return insert_revoke_hash(journal, blocknr, sequence);
				595	}
				596
				597	/*
				598	* Test revoke records. For a given block referenced in the log, has
				599	* that block been revoked? A revoke record with a given transaction
				600	* sequence number revokes all blocks in that transaction and earlier
				601	* ones, but later transactions still need replayed.
				602	*/
				603
				604	int journal_test_revoke(journal_t *journal,
				605	unsigned long blocknr,
				606	tid_t sequence)
				607	{
				608	struct jbd_revoke_record_s *record;
				609
				610	record = find_revoke_record(journal, blocknr);
				611	if (!record)
				612	return 0;
				613	if (tid_gt(sequence, record->sequence))
				614	return 0;
				615	return 1;
				616	}
				617
				618	/*
				619	* Finally, once recovery is over, we need to clear the revoke table so
				620	* that it can be reused by the running filesystem.
				621	*/
				622
				623	void journal_clear_revoke(journal_t *journal)
				624	{
				625	int i;
				626	struct list_head *hash_list;
				627	struct jbd_revoke_record_s *record;
				628	struct jbd_revoke_table_s *revoke;
				629
				630	revoke = journal->j_revoke;
				631
				632	for (i = 0; i < revoke->hash_size; i++) {
				633	hash_list = &revoke->hash_table[i];
				634	while (!list_empty(hash_list)) {
				635	record = (struct jbd_revoke_record_s*) hash_list->next;
				636	list_del(&record->hash);
				637	kmem_cache_free(revoke_record_cache, record);
				638	}
				639	}
				640	}
				641