| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 2003 Sistina Software Limited. | 
 | 3 |  * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | 
 | 4 |  * | 
 | 5 |  * This file is released under the GPL. | 
 | 6 |  */ | 
 | 7 |  | 
 | 8 | #include <linux/dm-dirty-log.h> | 
 | 9 | #include <linux/dm-region-hash.h> | 
 | 10 |  | 
 | 11 | #include <linux/ctype.h> | 
 | 12 | #include <linux/init.h> | 
 | 13 | #include <linux/module.h> | 
 | 14 | #include <linux/slab.h> | 
 | 15 | #include <linux/vmalloc.h> | 
 | 16 |  | 
 | 17 | #include "dm.h" | 
 | 18 |  | 
 | 19 | #define	DM_MSG_PREFIX	"region hash" | 
 | 20 |  | 
 | 21 | /*----------------------------------------------------------------- | 
 | 22 |  * Region hash | 
 | 23 |  * | 
 | 24 |  * The mirror splits itself up into discrete regions.  Each | 
 | 25 |  * region can be in one of three states: clean, dirty, | 
 | 26 |  * nosync.  There is no need to put clean regions in the hash. | 
 | 27 |  * | 
 | 28 |  * In addition to being present in the hash table a region _may_ | 
 | 29 |  * be present on one of three lists. | 
 | 30 |  * | 
 | 31 |  *   clean_regions: Regions on this list have no io pending to | 
 | 32 |  *   them, they are in sync, we are no longer interested in them, | 
 | 33 |  *   they are dull.  dm_rh_update_states() will remove them from the | 
 | 34 |  *   hash table. | 
 | 35 |  * | 
 | 36 |  *   quiesced_regions: These regions have been spun down, ready | 
 | 37 |  *   for recovery.  rh_recovery_start() will remove regions from | 
 | 38 |  *   this list and hand them to kmirrord, which will schedule the | 
 | 39 |  *   recovery io with kcopyd. | 
 | 40 |  * | 
 | 41 |  *   recovered_regions: Regions that kcopyd has successfully | 
 | 42 |  *   recovered.  dm_rh_update_states() will now schedule any delayed | 
 | 43 |  *   io, up the recovery_count, and remove the region from the | 
 | 44 |  *   hash. | 
 | 45 |  * | 
 | 46 |  * There are 2 locks: | 
 | 47 |  *   A rw spin lock 'hash_lock' protects just the hash table, | 
 | 48 |  *   this is never held in write mode from interrupt context, | 
 | 49 |  *   which I believe means that we only have to disable irqs when | 
 | 50 |  *   doing a write lock. | 
 | 51 |  * | 
 | 52 |  *   An ordinary spin lock 'region_lock' that protects the three | 
 | 53 |  *   lists in the region_hash, with the 'state', 'list' and | 
 | 54 |  *   'delayed_bios' fields of the regions.  This is used from irq | 
 | 55 |  *   context, so all other uses will have to suspend local irqs. | 
 | 56 |  *---------------------------------------------------------------*/ | 
 | 57 | struct dm_region_hash { | 
 | 58 | 	uint32_t region_size; | 
 | 59 | 	unsigned region_shift; | 
 | 60 |  | 
 | 61 | 	/* holds persistent region state */ | 
 | 62 | 	struct dm_dirty_log *log; | 
 | 63 |  | 
 | 64 | 	/* hash table */ | 
 | 65 | 	rwlock_t hash_lock; | 
 | 66 | 	mempool_t *region_pool; | 
 | 67 | 	unsigned mask; | 
 | 68 | 	unsigned nr_buckets; | 
 | 69 | 	unsigned prime; | 
 | 70 | 	unsigned shift; | 
 | 71 | 	struct list_head *buckets; | 
 | 72 |  | 
 | 73 | 	unsigned max_recovery; /* Max # of regions to recover in parallel */ | 
 | 74 |  | 
 | 75 | 	spinlock_t region_lock; | 
 | 76 | 	atomic_t recovery_in_flight; | 
 | 77 | 	struct semaphore recovery_count; | 
 | 78 | 	struct list_head clean_regions; | 
 | 79 | 	struct list_head quiesced_regions; | 
 | 80 | 	struct list_head recovered_regions; | 
 | 81 | 	struct list_head failed_recovered_regions; | 
 | 82 |  | 
 | 83 | 	/* | 
 | 84 | 	 * If there was a flush failure no regions can be marked clean. | 
 | 85 | 	 */ | 
 | 86 | 	int flush_failure; | 
 | 87 |  | 
 | 88 | 	void *context; | 
 | 89 | 	sector_t target_begin; | 
 | 90 |  | 
 | 91 | 	/* Callback function to schedule bios writes */ | 
 | 92 | 	void (*dispatch_bios)(void *context, struct bio_list *bios); | 
 | 93 |  | 
 | 94 | 	/* Callback function to wakeup callers worker thread. */ | 
 | 95 | 	void (*wakeup_workers)(void *context); | 
 | 96 |  | 
 | 97 | 	/* Callback function to wakeup callers recovery waiters. */ | 
 | 98 | 	void (*wakeup_all_recovery_waiters)(void *context); | 
 | 99 | }; | 
 | 100 |  | 
 | 101 | struct dm_region { | 
 | 102 | 	struct dm_region_hash *rh;	/* FIXME: can we get rid of this ? */ | 
 | 103 | 	region_t key; | 
 | 104 | 	int state; | 
 | 105 |  | 
 | 106 | 	struct list_head hash_list; | 
 | 107 | 	struct list_head list; | 
 | 108 |  | 
 | 109 | 	atomic_t pending; | 
 | 110 | 	struct bio_list delayed_bios; | 
 | 111 | }; | 
 | 112 |  | 
 | 113 | /* | 
 | 114 |  * Conversion fns | 
 | 115 |  */ | 
 | 116 | static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector) | 
 | 117 | { | 
 | 118 | 	return sector >> rh->region_shift; | 
 | 119 | } | 
 | 120 |  | 
 | 121 | sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region) | 
 | 122 | { | 
 | 123 | 	return region << rh->region_shift; | 
 | 124 | } | 
 | 125 | EXPORT_SYMBOL_GPL(dm_rh_region_to_sector); | 
 | 126 |  | 
 | 127 | region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio) | 
 | 128 | { | 
 | 129 | 	return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin); | 
 | 130 | } | 
 | 131 | EXPORT_SYMBOL_GPL(dm_rh_bio_to_region); | 
 | 132 |  | 
 | 133 | void *dm_rh_region_context(struct dm_region *reg) | 
 | 134 | { | 
 | 135 | 	return reg->rh->context; | 
 | 136 | } | 
 | 137 | EXPORT_SYMBOL_GPL(dm_rh_region_context); | 
 | 138 |  | 
 | 139 | region_t dm_rh_get_region_key(struct dm_region *reg) | 
 | 140 | { | 
 | 141 | 	return reg->key; | 
 | 142 | } | 
 | 143 | EXPORT_SYMBOL_GPL(dm_rh_get_region_key); | 
 | 144 |  | 
 | 145 | sector_t dm_rh_get_region_size(struct dm_region_hash *rh) | 
 | 146 | { | 
 | 147 | 	return rh->region_size; | 
 | 148 | } | 
 | 149 | EXPORT_SYMBOL_GPL(dm_rh_get_region_size); | 
 | 150 |  | 
 | 151 | /* | 
 | 152 |  * FIXME: shall we pass in a structure instead of all these args to | 
 | 153 |  * dm_region_hash_create()???? | 
 | 154 |  */ | 
 | 155 | #define RH_HASH_MULT 2654435387U | 
 | 156 | #define RH_HASH_SHIFT 12 | 
 | 157 |  | 
 | 158 | #define MIN_REGIONS 64 | 
 | 159 | struct dm_region_hash *dm_region_hash_create( | 
 | 160 | 		void *context, void (*dispatch_bios)(void *context, | 
 | 161 | 						     struct bio_list *bios), | 
 | 162 | 		void (*wakeup_workers)(void *context), | 
 | 163 | 		void (*wakeup_all_recovery_waiters)(void *context), | 
 | 164 | 		sector_t target_begin, unsigned max_recovery, | 
 | 165 | 		struct dm_dirty_log *log, uint32_t region_size, | 
 | 166 | 		region_t nr_regions) | 
 | 167 | { | 
 | 168 | 	struct dm_region_hash *rh; | 
 | 169 | 	unsigned nr_buckets, max_buckets; | 
 | 170 | 	size_t i; | 
 | 171 |  | 
 | 172 | 	/* | 
 | 173 | 	 * Calculate a suitable number of buckets for our hash | 
 | 174 | 	 * table. | 
 | 175 | 	 */ | 
 | 176 | 	max_buckets = nr_regions >> 6; | 
 | 177 | 	for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1) | 
 | 178 | 		; | 
 | 179 | 	nr_buckets >>= 1; | 
 | 180 |  | 
 | 181 | 	rh = kmalloc(sizeof(*rh), GFP_KERNEL); | 
 | 182 | 	if (!rh) { | 
 | 183 | 		DMERR("unable to allocate region hash memory"); | 
 | 184 | 		return ERR_PTR(-ENOMEM); | 
 | 185 | 	} | 
 | 186 |  | 
 | 187 | 	rh->context = context; | 
 | 188 | 	rh->dispatch_bios = dispatch_bios; | 
 | 189 | 	rh->wakeup_workers = wakeup_workers; | 
 | 190 | 	rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters; | 
 | 191 | 	rh->target_begin = target_begin; | 
 | 192 | 	rh->max_recovery = max_recovery; | 
 | 193 | 	rh->log = log; | 
 | 194 | 	rh->region_size = region_size; | 
 | 195 | 	rh->region_shift = ffs(region_size) - 1; | 
 | 196 | 	rwlock_init(&rh->hash_lock); | 
 | 197 | 	rh->mask = nr_buckets - 1; | 
 | 198 | 	rh->nr_buckets = nr_buckets; | 
 | 199 |  | 
 | 200 | 	rh->shift = RH_HASH_SHIFT; | 
 | 201 | 	rh->prime = RH_HASH_MULT; | 
 | 202 |  | 
 | 203 | 	rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets)); | 
 | 204 | 	if (!rh->buckets) { | 
 | 205 | 		DMERR("unable to allocate region hash bucket memory"); | 
 | 206 | 		kfree(rh); | 
 | 207 | 		return ERR_PTR(-ENOMEM); | 
 | 208 | 	} | 
 | 209 |  | 
 | 210 | 	for (i = 0; i < nr_buckets; i++) | 
 | 211 | 		INIT_LIST_HEAD(rh->buckets + i); | 
 | 212 |  | 
 | 213 | 	spin_lock_init(&rh->region_lock); | 
 | 214 | 	sema_init(&rh->recovery_count, 0); | 
 | 215 | 	atomic_set(&rh->recovery_in_flight, 0); | 
 | 216 | 	INIT_LIST_HEAD(&rh->clean_regions); | 
 | 217 | 	INIT_LIST_HEAD(&rh->quiesced_regions); | 
 | 218 | 	INIT_LIST_HEAD(&rh->recovered_regions); | 
 | 219 | 	INIT_LIST_HEAD(&rh->failed_recovered_regions); | 
 | 220 | 	rh->flush_failure = 0; | 
 | 221 |  | 
 | 222 | 	rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, | 
 | 223 | 						      sizeof(struct dm_region)); | 
 | 224 | 	if (!rh->region_pool) { | 
 | 225 | 		vfree(rh->buckets); | 
 | 226 | 		kfree(rh); | 
 | 227 | 		rh = ERR_PTR(-ENOMEM); | 
 | 228 | 	} | 
 | 229 |  | 
 | 230 | 	return rh; | 
 | 231 | } | 
 | 232 | EXPORT_SYMBOL_GPL(dm_region_hash_create); | 
 | 233 |  | 
 | 234 | void dm_region_hash_destroy(struct dm_region_hash *rh) | 
 | 235 | { | 
 | 236 | 	unsigned h; | 
 | 237 | 	struct dm_region *reg, *nreg; | 
 | 238 |  | 
 | 239 | 	BUG_ON(!list_empty(&rh->quiesced_regions)); | 
 | 240 | 	for (h = 0; h < rh->nr_buckets; h++) { | 
 | 241 | 		list_for_each_entry_safe(reg, nreg, rh->buckets + h, | 
 | 242 | 					 hash_list) { | 
 | 243 | 			BUG_ON(atomic_read(®->pending)); | 
 | 244 | 			mempool_free(reg, rh->region_pool); | 
 | 245 | 		} | 
 | 246 | 	} | 
 | 247 |  | 
 | 248 | 	if (rh->log) | 
 | 249 | 		dm_dirty_log_destroy(rh->log); | 
 | 250 |  | 
 | 251 | 	if (rh->region_pool) | 
 | 252 | 		mempool_destroy(rh->region_pool); | 
 | 253 |  | 
 | 254 | 	vfree(rh->buckets); | 
 | 255 | 	kfree(rh); | 
 | 256 | } | 
 | 257 | EXPORT_SYMBOL_GPL(dm_region_hash_destroy); | 
 | 258 |  | 
 | 259 | struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh) | 
 | 260 | { | 
 | 261 | 	return rh->log; | 
 | 262 | } | 
 | 263 | EXPORT_SYMBOL_GPL(dm_rh_dirty_log); | 
 | 264 |  | 
 | 265 | static unsigned rh_hash(struct dm_region_hash *rh, region_t region) | 
 | 266 | { | 
 | 267 | 	return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask; | 
 | 268 | } | 
 | 269 |  | 
 | 270 | static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region) | 
 | 271 | { | 
 | 272 | 	struct dm_region *reg; | 
 | 273 | 	struct list_head *bucket = rh->buckets + rh_hash(rh, region); | 
 | 274 |  | 
 | 275 | 	list_for_each_entry(reg, bucket, hash_list) | 
 | 276 | 		if (reg->key == region) | 
 | 277 | 			return reg; | 
 | 278 |  | 
 | 279 | 	return NULL; | 
 | 280 | } | 
 | 281 |  | 
 | 282 | static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg) | 
 | 283 | { | 
 | 284 | 	list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key)); | 
 | 285 | } | 
 | 286 |  | 
 | 287 | static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region) | 
 | 288 | { | 
 | 289 | 	struct dm_region *reg, *nreg; | 
 | 290 |  | 
 | 291 | 	nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); | 
 | 292 | 	if (unlikely(!nreg)) | 
 | 293 | 		nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL); | 
 | 294 |  | 
 | 295 | 	nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? | 
 | 296 | 		      DM_RH_CLEAN : DM_RH_NOSYNC; | 
 | 297 | 	nreg->rh = rh; | 
 | 298 | 	nreg->key = region; | 
 | 299 | 	INIT_LIST_HEAD(&nreg->list); | 
 | 300 | 	atomic_set(&nreg->pending, 0); | 
 | 301 | 	bio_list_init(&nreg->delayed_bios); | 
 | 302 |  | 
 | 303 | 	write_lock_irq(&rh->hash_lock); | 
 | 304 | 	reg = __rh_lookup(rh, region); | 
 | 305 | 	if (reg) | 
 | 306 | 		/* We lost the race. */ | 
 | 307 | 		mempool_free(nreg, rh->region_pool); | 
 | 308 | 	else { | 
 | 309 | 		__rh_insert(rh, nreg); | 
 | 310 | 		if (nreg->state == DM_RH_CLEAN) { | 
 | 311 | 			spin_lock(&rh->region_lock); | 
 | 312 | 			list_add(&nreg->list, &rh->clean_regions); | 
 | 313 | 			spin_unlock(&rh->region_lock); | 
 | 314 | 		} | 
 | 315 |  | 
 | 316 | 		reg = nreg; | 
 | 317 | 	} | 
 | 318 | 	write_unlock_irq(&rh->hash_lock); | 
 | 319 |  | 
 | 320 | 	return reg; | 
 | 321 | } | 
 | 322 |  | 
 | 323 | static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region) | 
 | 324 | { | 
 | 325 | 	struct dm_region *reg; | 
 | 326 |  | 
 | 327 | 	reg = __rh_lookup(rh, region); | 
 | 328 | 	if (!reg) { | 
 | 329 | 		read_unlock(&rh->hash_lock); | 
 | 330 | 		reg = __rh_alloc(rh, region); | 
 | 331 | 		read_lock(&rh->hash_lock); | 
 | 332 | 	} | 
 | 333 |  | 
 | 334 | 	return reg; | 
 | 335 | } | 
 | 336 |  | 
 | 337 | int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block) | 
 | 338 | { | 
 | 339 | 	int r; | 
 | 340 | 	struct dm_region *reg; | 
 | 341 |  | 
 | 342 | 	read_lock(&rh->hash_lock); | 
 | 343 | 	reg = __rh_lookup(rh, region); | 
 | 344 | 	read_unlock(&rh->hash_lock); | 
 | 345 |  | 
 | 346 | 	if (reg) | 
 | 347 | 		return reg->state; | 
 | 348 |  | 
 | 349 | 	/* | 
 | 350 | 	 * The region wasn't in the hash, so we fall back to the | 
 | 351 | 	 * dirty log. | 
 | 352 | 	 */ | 
 | 353 | 	r = rh->log->type->in_sync(rh->log, region, may_block); | 
 | 354 |  | 
 | 355 | 	/* | 
 | 356 | 	 * Any error from the dirty log (eg. -EWOULDBLOCK) gets | 
 | 357 | 	 * taken as a DM_RH_NOSYNC | 
 | 358 | 	 */ | 
 | 359 | 	return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC; | 
 | 360 | } | 
 | 361 | EXPORT_SYMBOL_GPL(dm_rh_get_state); | 
 | 362 |  | 
 | 363 | static void complete_resync_work(struct dm_region *reg, int success) | 
 | 364 | { | 
 | 365 | 	struct dm_region_hash *rh = reg->rh; | 
 | 366 |  | 
 | 367 | 	rh->log->type->set_region_sync(rh->log, reg->key, success); | 
 | 368 |  | 
 | 369 | 	/* | 
 | 370 | 	 * Dispatch the bios before we call 'wake_up_all'. | 
 | 371 | 	 * This is important because if we are suspending, | 
 | 372 | 	 * we want to know that recovery is complete and | 
 | 373 | 	 * the work queue is flushed.  If we wake_up_all | 
 | 374 | 	 * before we dispatch_bios (queue bios and call wake()), | 
 | 375 | 	 * then we risk suspending before the work queue | 
 | 376 | 	 * has been properly flushed. | 
 | 377 | 	 */ | 
 | 378 | 	rh->dispatch_bios(rh->context, ®->delayed_bios); | 
 | 379 | 	if (atomic_dec_and_test(&rh->recovery_in_flight)) | 
 | 380 | 		rh->wakeup_all_recovery_waiters(rh->context); | 
 | 381 | 	up(&rh->recovery_count); | 
 | 382 | } | 
 | 383 |  | 
 | 384 | /* dm_rh_mark_nosync | 
 | 385 |  * @ms | 
 | 386 |  * @bio | 
 | 387 |  * | 
 | 388 |  * The bio was written on some mirror(s) but failed on other mirror(s). | 
 | 389 |  * We can successfully endio the bio but should avoid the region being | 
 | 390 |  * marked clean by setting the state DM_RH_NOSYNC. | 
 | 391 |  * | 
 | 392 |  * This function is _not_ safe in interrupt context! | 
 | 393 |  */ | 
 | 394 | void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) | 
 | 395 | { | 
 | 396 | 	unsigned long flags; | 
 | 397 | 	struct dm_dirty_log *log = rh->log; | 
 | 398 | 	struct dm_region *reg; | 
 | 399 | 	region_t region = dm_rh_bio_to_region(rh, bio); | 
 | 400 | 	int recovering = 0; | 
 | 401 |  | 
 | 402 | 	if (bio->bi_rw & REQ_FLUSH) { | 
 | 403 | 		rh->flush_failure = 1; | 
 | 404 | 		return; | 
 | 405 | 	} | 
 | 406 |  | 
 | 407 | 	if (bio->bi_rw & REQ_DISCARD) | 
 | 408 | 		return; | 
 | 409 |  | 
 | 410 | 	/* We must inform the log that the sync count has changed. */ | 
 | 411 | 	log->type->set_region_sync(log, region, 0); | 
 | 412 |  | 
 | 413 | 	read_lock(&rh->hash_lock); | 
 | 414 | 	reg = __rh_find(rh, region); | 
 | 415 | 	read_unlock(&rh->hash_lock); | 
 | 416 |  | 
 | 417 | 	/* region hash entry should exist because write was in-flight */ | 
 | 418 | 	BUG_ON(!reg); | 
 | 419 | 	BUG_ON(!list_empty(®->list)); | 
 | 420 |  | 
 | 421 | 	spin_lock_irqsave(&rh->region_lock, flags); | 
 | 422 | 	/* | 
 | 423 | 	 * Possible cases: | 
 | 424 | 	 *   1) DM_RH_DIRTY | 
 | 425 | 	 *   2) DM_RH_NOSYNC: was dirty, other preceding writes failed | 
 | 426 | 	 *   3) DM_RH_RECOVERING: flushing pending writes | 
 | 427 | 	 * Either case, the region should have not been connected to list. | 
 | 428 | 	 */ | 
 | 429 | 	recovering = (reg->state == DM_RH_RECOVERING); | 
 | 430 | 	reg->state = DM_RH_NOSYNC; | 
 | 431 | 	BUG_ON(!list_empty(®->list)); | 
 | 432 | 	spin_unlock_irqrestore(&rh->region_lock, flags); | 
 | 433 |  | 
 | 434 | 	if (recovering) | 
 | 435 | 		complete_resync_work(reg, 0); | 
 | 436 | } | 
 | 437 | EXPORT_SYMBOL_GPL(dm_rh_mark_nosync); | 
 | 438 |  | 
 | 439 | void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled) | 
 | 440 | { | 
 | 441 | 	struct dm_region *reg, *next; | 
 | 442 |  | 
 | 443 | 	LIST_HEAD(clean); | 
 | 444 | 	LIST_HEAD(recovered); | 
 | 445 | 	LIST_HEAD(failed_recovered); | 
 | 446 |  | 
 | 447 | 	/* | 
 | 448 | 	 * Quickly grab the lists. | 
 | 449 | 	 */ | 
 | 450 | 	write_lock_irq(&rh->hash_lock); | 
 | 451 | 	spin_lock(&rh->region_lock); | 
 | 452 | 	if (!list_empty(&rh->clean_regions)) { | 
 | 453 | 		list_splice_init(&rh->clean_regions, &clean); | 
 | 454 |  | 
 | 455 | 		list_for_each_entry(reg, &clean, list) | 
 | 456 | 			list_del(®->hash_list); | 
 | 457 | 	} | 
 | 458 |  | 
 | 459 | 	if (!list_empty(&rh->recovered_regions)) { | 
 | 460 | 		list_splice_init(&rh->recovered_regions, &recovered); | 
 | 461 |  | 
 | 462 | 		list_for_each_entry(reg, &recovered, list) | 
 | 463 | 			list_del(®->hash_list); | 
 | 464 | 	} | 
 | 465 |  | 
 | 466 | 	if (!list_empty(&rh->failed_recovered_regions)) { | 
 | 467 | 		list_splice_init(&rh->failed_recovered_regions, | 
 | 468 | 				 &failed_recovered); | 
 | 469 |  | 
 | 470 | 		list_for_each_entry(reg, &failed_recovered, list) | 
 | 471 | 			list_del(®->hash_list); | 
 | 472 | 	} | 
 | 473 |  | 
 | 474 | 	spin_unlock(&rh->region_lock); | 
 | 475 | 	write_unlock_irq(&rh->hash_lock); | 
 | 476 |  | 
 | 477 | 	/* | 
 | 478 | 	 * All the regions on the recovered and clean lists have | 
 | 479 | 	 * now been pulled out of the system, so no need to do | 
 | 480 | 	 * any more locking. | 
 | 481 | 	 */ | 
 | 482 | 	list_for_each_entry_safe(reg, next, &recovered, list) { | 
 | 483 | 		rh->log->type->clear_region(rh->log, reg->key); | 
 | 484 | 		complete_resync_work(reg, 1); | 
 | 485 | 		mempool_free(reg, rh->region_pool); | 
 | 486 | 	} | 
 | 487 |  | 
 | 488 | 	list_for_each_entry_safe(reg, next, &failed_recovered, list) { | 
 | 489 | 		complete_resync_work(reg, errors_handled ? 0 : 1); | 
 | 490 | 		mempool_free(reg, rh->region_pool); | 
 | 491 | 	} | 
 | 492 |  | 
 | 493 | 	list_for_each_entry_safe(reg, next, &clean, list) { | 
 | 494 | 		rh->log->type->clear_region(rh->log, reg->key); | 
 | 495 | 		mempool_free(reg, rh->region_pool); | 
 | 496 | 	} | 
 | 497 |  | 
 | 498 | 	rh->log->type->flush(rh->log); | 
 | 499 | } | 
 | 500 | EXPORT_SYMBOL_GPL(dm_rh_update_states); | 
 | 501 |  | 
 | 502 | static void rh_inc(struct dm_region_hash *rh, region_t region) | 
 | 503 | { | 
 | 504 | 	struct dm_region *reg; | 
 | 505 |  | 
 | 506 | 	read_lock(&rh->hash_lock); | 
 | 507 | 	reg = __rh_find(rh, region); | 
 | 508 |  | 
 | 509 | 	spin_lock_irq(&rh->region_lock); | 
 | 510 | 	atomic_inc(®->pending); | 
 | 511 |  | 
 | 512 | 	if (reg->state == DM_RH_CLEAN) { | 
 | 513 | 		reg->state = DM_RH_DIRTY; | 
 | 514 | 		list_del_init(®->list);	/* take off the clean list */ | 
 | 515 | 		spin_unlock_irq(&rh->region_lock); | 
 | 516 |  | 
 | 517 | 		rh->log->type->mark_region(rh->log, reg->key); | 
 | 518 | 	} else | 
 | 519 | 		spin_unlock_irq(&rh->region_lock); | 
 | 520 |  | 
 | 521 |  | 
 | 522 | 	read_unlock(&rh->hash_lock); | 
 | 523 | } | 
 | 524 |  | 
 | 525 | void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) | 
 | 526 | { | 
 | 527 | 	struct bio *bio; | 
 | 528 |  | 
 | 529 | 	for (bio = bios->head; bio; bio = bio->bi_next) { | 
 | 530 | 		if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)) | 
 | 531 | 			continue; | 
 | 532 | 		rh_inc(rh, dm_rh_bio_to_region(rh, bio)); | 
 | 533 | 	} | 
 | 534 | } | 
 | 535 | EXPORT_SYMBOL_GPL(dm_rh_inc_pending); | 
 | 536 |  | 
 | 537 | void dm_rh_dec(struct dm_region_hash *rh, region_t region) | 
 | 538 | { | 
 | 539 | 	unsigned long flags; | 
 | 540 | 	struct dm_region *reg; | 
 | 541 | 	int should_wake = 0; | 
 | 542 |  | 
 | 543 | 	read_lock(&rh->hash_lock); | 
 | 544 | 	reg = __rh_lookup(rh, region); | 
 | 545 | 	read_unlock(&rh->hash_lock); | 
 | 546 |  | 
 | 547 | 	spin_lock_irqsave(&rh->region_lock, flags); | 
 | 548 | 	if (atomic_dec_and_test(®->pending)) { | 
 | 549 | 		/* | 
 | 550 | 		 * There is no pending I/O for this region. | 
 | 551 | 		 * We can move the region to corresponding list for next action. | 
 | 552 | 		 * At this point, the region is not yet connected to any list. | 
 | 553 | 		 * | 
 | 554 | 		 * If the state is DM_RH_NOSYNC, the region should be kept off | 
 | 555 | 		 * from clean list. | 
 | 556 | 		 * The hash entry for DM_RH_NOSYNC will remain in memory | 
 | 557 | 		 * until the region is recovered or the map is reloaded. | 
 | 558 | 		 */ | 
 | 559 |  | 
 | 560 | 		/* do nothing for DM_RH_NOSYNC */ | 
 | 561 | 		if (unlikely(rh->flush_failure)) { | 
 | 562 | 			/* | 
 | 563 | 			 * If a write flush failed some time ago, we | 
 | 564 | 			 * don't know whether or not this write made it | 
 | 565 | 			 * to the disk, so we must resync the device. | 
 | 566 | 			 */ | 
 | 567 | 			reg->state = DM_RH_NOSYNC; | 
 | 568 | 		} else if (reg->state == DM_RH_RECOVERING) { | 
 | 569 | 			list_add_tail(®->list, &rh->quiesced_regions); | 
 | 570 | 		} else if (reg->state == DM_RH_DIRTY) { | 
 | 571 | 			reg->state = DM_RH_CLEAN; | 
 | 572 | 			list_add(®->list, &rh->clean_regions); | 
 | 573 | 		} | 
 | 574 | 		should_wake = 1; | 
 | 575 | 	} | 
 | 576 | 	spin_unlock_irqrestore(&rh->region_lock, flags); | 
 | 577 |  | 
 | 578 | 	if (should_wake) | 
 | 579 | 		rh->wakeup_workers(rh->context); | 
 | 580 | } | 
 | 581 | EXPORT_SYMBOL_GPL(dm_rh_dec); | 
 | 582 |  | 
 | 583 | /* | 
 | 584 |  * Starts quiescing a region in preparation for recovery. | 
 | 585 |  */ | 
 | 586 | static int __rh_recovery_prepare(struct dm_region_hash *rh) | 
 | 587 | { | 
 | 588 | 	int r; | 
 | 589 | 	region_t region; | 
 | 590 | 	struct dm_region *reg; | 
 | 591 |  | 
 | 592 | 	/* | 
 | 593 | 	 * Ask the dirty log what's next. | 
 | 594 | 	 */ | 
 | 595 | 	r = rh->log->type->get_resync_work(rh->log, ®ion); | 
 | 596 | 	if (r <= 0) | 
 | 597 | 		return r; | 
 | 598 |  | 
 | 599 | 	/* | 
 | 600 | 	 * Get this region, and start it quiescing by setting the | 
 | 601 | 	 * recovering flag. | 
 | 602 | 	 */ | 
 | 603 | 	read_lock(&rh->hash_lock); | 
 | 604 | 	reg = __rh_find(rh, region); | 
 | 605 | 	read_unlock(&rh->hash_lock); | 
 | 606 |  | 
 | 607 | 	spin_lock_irq(&rh->region_lock); | 
 | 608 | 	reg->state = DM_RH_RECOVERING; | 
 | 609 |  | 
 | 610 | 	/* Already quiesced ? */ | 
 | 611 | 	if (atomic_read(®->pending)) | 
 | 612 | 		list_del_init(®->list); | 
 | 613 | 	else | 
 | 614 | 		list_move(®->list, &rh->quiesced_regions); | 
 | 615 |  | 
 | 616 | 	spin_unlock_irq(&rh->region_lock); | 
 | 617 |  | 
 | 618 | 	return 1; | 
 | 619 | } | 
 | 620 |  | 
 | 621 | void dm_rh_recovery_prepare(struct dm_region_hash *rh) | 
 | 622 | { | 
 | 623 | 	/* Extra reference to avoid race with dm_rh_stop_recovery */ | 
 | 624 | 	atomic_inc(&rh->recovery_in_flight); | 
 | 625 |  | 
 | 626 | 	while (!down_trylock(&rh->recovery_count)) { | 
 | 627 | 		atomic_inc(&rh->recovery_in_flight); | 
 | 628 | 		if (__rh_recovery_prepare(rh) <= 0) { | 
 | 629 | 			atomic_dec(&rh->recovery_in_flight); | 
 | 630 | 			up(&rh->recovery_count); | 
 | 631 | 			break; | 
 | 632 | 		} | 
 | 633 | 	} | 
 | 634 |  | 
 | 635 | 	/* Drop the extra reference */ | 
 | 636 | 	if (atomic_dec_and_test(&rh->recovery_in_flight)) | 
 | 637 | 		rh->wakeup_all_recovery_waiters(rh->context); | 
 | 638 | } | 
 | 639 | EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare); | 
 | 640 |  | 
 | 641 | /* | 
 | 642 |  * Returns any quiesced regions. | 
 | 643 |  */ | 
 | 644 | struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh) | 
 | 645 | { | 
 | 646 | 	struct dm_region *reg = NULL; | 
 | 647 |  | 
 | 648 | 	spin_lock_irq(&rh->region_lock); | 
 | 649 | 	if (!list_empty(&rh->quiesced_regions)) { | 
 | 650 | 		reg = list_entry(rh->quiesced_regions.next, | 
 | 651 | 				 struct dm_region, list); | 
 | 652 | 		list_del_init(®->list);  /* remove from the quiesced list */ | 
 | 653 | 	} | 
 | 654 | 	spin_unlock_irq(&rh->region_lock); | 
 | 655 |  | 
 | 656 | 	return reg; | 
 | 657 | } | 
 | 658 | EXPORT_SYMBOL_GPL(dm_rh_recovery_start); | 
 | 659 |  | 
 | 660 | void dm_rh_recovery_end(struct dm_region *reg, int success) | 
 | 661 | { | 
 | 662 | 	struct dm_region_hash *rh = reg->rh; | 
 | 663 |  | 
 | 664 | 	spin_lock_irq(&rh->region_lock); | 
 | 665 | 	if (success) | 
 | 666 | 		list_add(®->list, ®->rh->recovered_regions); | 
 | 667 | 	else | 
 | 668 | 		list_add(®->list, ®->rh->failed_recovered_regions); | 
 | 669 |  | 
 | 670 | 	spin_unlock_irq(&rh->region_lock); | 
 | 671 |  | 
 | 672 | 	rh->wakeup_workers(rh->context); | 
 | 673 | } | 
 | 674 | EXPORT_SYMBOL_GPL(dm_rh_recovery_end); | 
 | 675 |  | 
 | 676 | /* Return recovery in flight count. */ | 
 | 677 | int dm_rh_recovery_in_flight(struct dm_region_hash *rh) | 
 | 678 | { | 
 | 679 | 	return atomic_read(&rh->recovery_in_flight); | 
 | 680 | } | 
 | 681 | EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight); | 
 | 682 |  | 
 | 683 | int dm_rh_flush(struct dm_region_hash *rh) | 
 | 684 | { | 
 | 685 | 	return rh->log->type->flush(rh->log); | 
 | 686 | } | 
 | 687 | EXPORT_SYMBOL_GPL(dm_rh_flush); | 
 | 688 |  | 
 | 689 | void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio) | 
 | 690 | { | 
 | 691 | 	struct dm_region *reg; | 
 | 692 |  | 
 | 693 | 	read_lock(&rh->hash_lock); | 
 | 694 | 	reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio)); | 
 | 695 | 	bio_list_add(®->delayed_bios, bio); | 
 | 696 | 	read_unlock(&rh->hash_lock); | 
 | 697 | } | 
 | 698 | EXPORT_SYMBOL_GPL(dm_rh_delay); | 
 | 699 |  | 
 | 700 | void dm_rh_stop_recovery(struct dm_region_hash *rh) | 
 | 701 | { | 
 | 702 | 	int i; | 
 | 703 |  | 
 | 704 | 	/* wait for any recovering regions */ | 
 | 705 | 	for (i = 0; i < rh->max_recovery; i++) | 
 | 706 | 		down(&rh->recovery_count); | 
 | 707 | } | 
 | 708 | EXPORT_SYMBOL_GPL(dm_rh_stop_recovery); | 
 | 709 |  | 
 | 710 | void dm_rh_start_recovery(struct dm_region_hash *rh) | 
 | 711 | { | 
 | 712 | 	int i; | 
 | 713 |  | 
 | 714 | 	for (i = 0; i < rh->max_recovery; i++) | 
 | 715 | 		up(&rh->recovery_count); | 
 | 716 |  | 
 | 717 | 	rh->wakeup_workers(rh->context); | 
 | 718 | } | 
 | 719 | EXPORT_SYMBOL_GPL(dm_rh_start_recovery); | 
 | 720 |  | 
 | 721 | MODULE_DESCRIPTION(DM_NAME " region hash"); | 
 | 722 | MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>"); | 
 | 723 | MODULE_LICENSE("GPL"); |