Blame - ap/os/linux/linux-3.4.x/drivers/md/dm-raid1.c - R306

blob: 3afb9cdbdd1452be95c385cc1301851af1f4faa9 [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame^]	1	/*
				2	* Copyright (C) 2003 Sistina Software Limited.
				3	* Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
				4	*
				5	* This file is released under the GPL.
				6	*/
				7
				8	#include "dm-bio-record.h"
				9
				10	#include <linux/init.h>
				11	#include <linux/mempool.h>
				12	#include <linux/module.h>
				13	#include <linux/pagemap.h>
				14	#include <linux/slab.h>
				15	#include <linux/workqueue.h>
				16	#include <linux/device-mapper.h>
				17	#include <linux/dm-io.h>
				18	#include <linux/dm-dirty-log.h>
				19	#include <linux/dm-kcopyd.h>
				20	#include <linux/dm-region-hash.h>
				21
				22	#define DM_MSG_PREFIX "raid1"
				23
				24	#define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */
				25
				26	#define DM_RAID1_HANDLE_ERRORS 0x01
				27	#define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS)
				28
				29	static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
				30
				31	/*-----------------------------------------------------------------
				32	* Mirror set structures.
				33	---------------------------------------------------------------/
				34	enum dm_raid1_error {
				35	DM_RAID1_WRITE_ERROR,
				36	DM_RAID1_FLUSH_ERROR,
				37	DM_RAID1_SYNC_ERROR,
				38	DM_RAID1_READ_ERROR
				39	};
				40
				41	struct mirror {
				42	struct mirror_set *ms;
				43	atomic_t error_count;
				44	unsigned long error_type;
				45	struct dm_dev *dev;
				46	sector_t offset;
				47	};
				48
				49	struct mirror_set {
				50	struct dm_target *ti;
				51	struct list_head list;
				52
				53	uint64_t features;
				54
				55	spinlock_t lock; /* protects the lists */
				56	struct bio_list reads;
				57	struct bio_list writes;
				58	struct bio_list failures;
				59	struct bio_list holds; /* bios are waiting until suspend */
				60
				61	struct dm_region_hash *rh;
				62	struct dm_kcopyd_client *kcopyd_client;
				63	struct dm_io_client *io_client;
				64	mempool_t *read_record_pool;
				65
				66	/* recovery */
				67	region_t nr_regions;
				68	int in_sync;
				69	int log_failure;
				70	int leg_failure;
				71	atomic_t suspend;
				72
				73	atomic_t default_mirror; /* Default mirror */
				74
				75	struct workqueue_struct *kmirrord_wq;
				76	struct work_struct kmirrord_work;
				77	struct timer_list timer;
				78	unsigned long timer_pending;
				79
				80	struct work_struct trigger_event;
				81
				82	unsigned nr_mirrors;
				83	struct mirror mirror[0];
				84	};
				85
				86	static void wakeup_mirrord(void *context)
				87	{
				88	struct mirror_set *ms = context;
				89
				90	queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
				91	}
				92
				93	static void delayed_wake_fn(unsigned long data)
				94	{
				95	struct mirror_set ms = (struct mirror_set ) data;
				96
				97	clear_bit(0, &ms->timer_pending);
				98	wakeup_mirrord(ms);
				99	}
				100
				101	static void delayed_wake(struct mirror_set *ms)
				102	{
				103	if (test_and_set_bit(0, &ms->timer_pending))
				104	return;
				105
				106	ms->timer.expires = jiffies + HZ / 5;
				107	ms->timer.data = (unsigned long) ms;
				108	ms->timer.function = delayed_wake_fn;
				109	add_timer(&ms->timer);
				110	}
				111
				112	static void wakeup_all_recovery_waiters(void *context)
				113	{
				114	wake_up_all(&_kmirrord_recovery_stopped);
				115	}
				116
				117	static void queue_bio(struct mirror_set ms, struct bio bio, int rw)
				118	{
				119	unsigned long flags;
				120	int should_wake = 0;
				121	struct bio_list *bl;
				122
				123	bl = (rw == WRITE) ? &ms->writes : &ms->reads;
				124	spin_lock_irqsave(&ms->lock, flags);
				125	should_wake = !(bl->head);
				126	bio_list_add(bl, bio);
				127	spin_unlock_irqrestore(&ms->lock, flags);
				128
				129	if (should_wake)
				130	wakeup_mirrord(ms);
				131	}
				132
				133	static void dispatch_bios(void context, struct bio_list bio_list)
				134	{
				135	struct mirror_set *ms = context;
				136	struct bio *bio;
				137
				138	while ((bio = bio_list_pop(bio_list)))
				139	queue_bio(ms, bio, WRITE);
				140	}
				141
				142	#define MIN_READ_RECORDS 20
				143	struct dm_raid1_read_record {
				144	struct mirror *m;
				145	struct dm_bio_details details;
				146	};
				147
				148	static struct kmem_cache *_dm_raid1_read_record_cache;
				149
				150	/*
				151	* Every mirror should look like this one.
				152	*/
				153	#define DEFAULT_MIRROR 0
				154
				155	/*
				156	* This is yucky. We squirrel the mirror struct away inside
				157	* bi_next for read/write buffers. This is safe since the bh
				158	* doesn't get submitted to the lower levels of block layer.
				159	*/
				160	static struct mirror bio_get_m(struct bio bio)
				161	{
				162	return (struct mirror *) bio->bi_next;
				163	}
				164
				165	static void bio_set_m(struct bio bio, struct mirror m)
				166	{
				167	bio->bi_next = (struct bio *) m;
				168	}
				169
				170	static struct mirror get_default_mirror(struct mirror_set ms)
				171	{
				172	return &ms->mirror[atomic_read(&ms->default_mirror)];
				173	}
				174
				175	static void set_default_mirror(struct mirror *m)
				176	{
				177	struct mirror_set *ms = m->ms;
				178	struct mirror *m0 = &(ms->mirror[0]);
				179
				180	atomic_set(&ms->default_mirror, m - m0);
				181	}
				182
				183	static struct mirror get_valid_mirror(struct mirror_set ms)
				184	{
				185	struct mirror *m;
				186
				187	for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++)
				188	if (!atomic_read(&m->error_count))
				189	return m;
				190
				191	return NULL;
				192	}
				193
				194	/* fail_mirror
				195	* @m: mirror device to fail
				196	* @error_type: one of the enum's, DM_RAID1_*_ERROR
				197	*
				198	* If errors are being handled, record the type of
				199	* error encountered for this device. If this type
				200	* of error has already been recorded, we can return;
				201	* otherwise, we must signal userspace by triggering
				202	* an event. Additionally, if the device is the
				203	* primary device, we must choose a new primary, but
				204	* only if the mirror is in-sync.
				205	*
				206	* This function must not block.
				207	*/
				208	static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
				209	{
				210	struct mirror_set *ms = m->ms;
				211	struct mirror *new;
				212
				213	ms->leg_failure = 1;
				214
				215	/*
				216	* error_count is used for nothing more than a
				217	* simple way to tell if a device has encountered
				218	* errors.
				219	*/
				220	atomic_inc(&m->error_count);
				221
				222	if (test_and_set_bit(error_type, &m->error_type))
				223	return;
				224
				225	if (!errors_handled(ms))
				226	return;
				227
				228	if (m != get_default_mirror(ms))
				229	goto out;
				230
				231	if (!ms->in_sync) {
				232	/*
				233	* Better to issue requests to same failing device
				234	* than to risk returning corrupt data.
				235	*/
				236	DMERR("Primary mirror (%s) failed while out-of-sync: "
				237	"Reads may fail.", m->dev->name);
				238	goto out;
				239	}
				240
				241	new = get_valid_mirror(ms);
				242	if (new)
				243	set_default_mirror(new);
				244	else
				245	DMWARN("All sides of mirror have failed.");
				246
				247	out:
				248	schedule_work(&ms->trigger_event);
				249	}
				250
				251	static int mirror_flush(struct dm_target *ti)
				252	{
				253	struct mirror_set *ms = ti->private;
				254	unsigned long error_bits;
				255
				256	unsigned int i;
				257	struct dm_io_region io[ms->nr_mirrors];
				258	struct mirror *m;
				259	struct dm_io_request io_req = {
				260	.bi_rw = WRITE_FLUSH,
				261	.mem.type = DM_IO_KMEM,
				262	.mem.ptr.addr = NULL,
				263	.client = ms->io_client,
				264	};
				265
				266	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) {
				267	io[i].bdev = m->dev->bdev;
				268	io[i].sector = 0;
				269	io[i].count = 0;
				270	}
				271
				272	error_bits = -1;
				273	dm_io(&io_req, ms->nr_mirrors, io, &error_bits);
				274	if (unlikely(error_bits != 0)) {
				275	for (i = 0; i < ms->nr_mirrors; i++)
				276	if (test_bit(i, &error_bits))
				277	fail_mirror(ms->mirror + i,
				278	DM_RAID1_FLUSH_ERROR);
				279	return -EIO;
				280	}
				281
				282	return 0;
				283	}
				284
				285	/*-----------------------------------------------------------------
				286	* Recovery.
				287	*
				288	* When a mirror is first activated we may find that some regions
				289	* are in the no-sync state. We have to recover these by
				290	* recopying from the default mirror to all the others.
				291	---------------------------------------------------------------/
				292	static void recovery_complete(int read_err, unsigned long write_err,
				293	void *context)
				294	{
				295	struct dm_region *reg = context;
				296	struct mirror_set *ms = dm_rh_region_context(reg);
				297	int m, bit = 0;
				298
				299	if (read_err) {
				300	/* Read error means the failure of default mirror. */
				301	DMERR_LIMIT("Unable to read primary mirror during recovery");
				302	fail_mirror(get_default_mirror(ms), DM_RAID1_SYNC_ERROR);
				303	}
				304
				305	if (write_err) {
				306	DMERR_LIMIT("Write error during recovery (error = 0x%lx)",
				307	write_err);
				308	/*
				309	* Bits correspond to devices (excluding default mirror).
				310	* The default mirror cannot change during recovery.
				311	*/
				312	for (m = 0; m < ms->nr_mirrors; m++) {
				313	if (&ms->mirror[m] == get_default_mirror(ms))
				314	continue;
				315	if (test_bit(bit, &write_err))
				316	fail_mirror(ms->mirror + m,
				317	DM_RAID1_SYNC_ERROR);
				318	bit++;
				319	}
				320	}
				321
				322	dm_rh_recovery_end(reg, !(read_err \|\| write_err));
				323	}
				324
				325	static int recover(struct mirror_set ms, struct dm_region reg)
				326	{
				327	int r;
				328	unsigned i;
				329	struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
				330	struct mirror *m;
				331	unsigned long flags = 0;
				332	region_t key = dm_rh_get_region_key(reg);
				333	sector_t region_size = dm_rh_get_region_size(ms->rh);
				334
				335	/* fill in the source */
				336	m = get_default_mirror(ms);
				337	from.bdev = m->dev->bdev;
				338	from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
				339	if (key == (ms->nr_regions - 1)) {
				340	/*
				341	* The final region may be smaller than
				342	* region_size.
				343	*/
				344	from.count = ms->ti->len & (region_size - 1);
				345	if (!from.count)
				346	from.count = region_size;
				347	} else
				348	from.count = region_size;
				349
				350	/* fill in the destinations */
				351	for (i = 0, dest = to; i < ms->nr_mirrors; i++) {
				352	if (&ms->mirror[i] == get_default_mirror(ms))
				353	continue;
				354
				355	m = ms->mirror + i;
				356	dest->bdev = m->dev->bdev;
				357	dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
				358	dest->count = from.count;
				359	dest++;
				360	}
				361
				362	/* hand to kcopyd */
				363	if (!errors_handled(ms))
				364	set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
				365
				366	r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
				367	flags, recovery_complete, reg);
				368
				369	return r;
				370	}
				371
				372	static void do_recovery(struct mirror_set *ms)
				373	{
				374	struct dm_region *reg;
				375	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
				376	int r;
				377
				378	/*
				379	* Start quiescing some regions.
				380	*/
				381	dm_rh_recovery_prepare(ms->rh);
				382
				383	/*
				384	* Copy any already quiesced regions.
				385	*/
				386	while ((reg = dm_rh_recovery_start(ms->rh))) {
				387	r = recover(ms, reg);
				388	if (r)
				389	dm_rh_recovery_end(reg, 0);
				390	}
				391
				392	/*
				393	* Update the in sync flag.
				394	*/
				395	if (!ms->in_sync &&
				396	(log->type->get_sync_count(log) == ms->nr_regions)) {
				397	/* the sync is complete */
				398	dm_table_event(ms->ti->table);
				399	ms->in_sync = 1;
				400	}
				401	}
				402
				403	/*-----------------------------------------------------------------
				404	* Reads
				405	---------------------------------------------------------------/
				406	static struct mirror choose_mirror(struct mirror_set ms, sector_t sector)
				407	{
				408	struct mirror *m = get_default_mirror(ms);
				409
				410	do {
				411	if (likely(!atomic_read(&m->error_count)))
				412	return m;
				413
				414	if (m-- == ms->mirror)
				415	m += ms->nr_mirrors;
				416	} while (m != get_default_mirror(ms));
				417
				418	return NULL;
				419	}
				420
				421	static int default_ok(struct mirror *m)
				422	{
				423	struct mirror *default_mirror = get_default_mirror(m->ms);
				424
				425	return !atomic_read(&default_mirror->error_count);
				426	}
				427
				428	static int mirror_available(struct mirror_set ms, struct bio bio)
				429	{
				430	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
				431	region_t region = dm_rh_bio_to_region(ms->rh, bio);
				432
				433	if (log->type->in_sync(log, region, 0))
				434	return choose_mirror(ms, bio->bi_sector) ? 1 : 0;
				435
				436	return 0;
				437	}
				438
				439	/*
				440	* remap a buffer to a particular mirror.
				441	*/
				442	static sector_t map_sector(struct mirror m, struct bio bio)
				443	{
				444	if (unlikely(!bio->bi_size))
				445	return 0;
				446	return m->offset + dm_target_offset(m->ms->ti, bio->bi_sector);
				447	}
				448
				449	static void map_bio(struct mirror m, struct bio bio)
				450	{
				451	bio->bi_bdev = m->dev->bdev;
				452	bio->bi_sector = map_sector(m, bio);
				453	}
				454
				455	static void map_region(struct dm_io_region io, struct mirror m,
				456	struct bio *bio)
				457	{
				458	io->bdev = m->dev->bdev;
				459	io->sector = map_sector(m, bio);
				460	io->count = bio->bi_size >> 9;
				461	}
				462
				463	static void hold_bio(struct mirror_set ms, struct bio bio)
				464	{
				465	/*
				466	* Lock is required to avoid race condition during suspend
				467	* process.
				468	*/
				469	spin_lock_irq(&ms->lock);
				470
				471	if (atomic_read(&ms->suspend)) {
				472	spin_unlock_irq(&ms->lock);
				473
				474	/*
				475	* If device is suspended, complete the bio.
				476	*/
				477	if (dm_noflush_suspending(ms->ti))
				478	bio_endio(bio, DM_ENDIO_REQUEUE);
				479	else
				480	bio_endio(bio, -EIO);
				481	return;
				482	}
				483
				484	/*
				485	* Hold bio until the suspend is complete.
				486	*/
				487	bio_list_add(&ms->holds, bio);
				488	spin_unlock_irq(&ms->lock);
				489	}
				490
				491	/*-----------------------------------------------------------------
				492	* Reads
				493	---------------------------------------------------------------/
				494	static void read_callback(unsigned long error, void *context)
				495	{
				496	struct bio *bio = context;
				497	struct mirror *m;
				498
				499	m = bio_get_m(bio);
				500	bio_set_m(bio, NULL);
				501
				502	if (likely(!error)) {
				503	bio_endio(bio, 0);
				504	return;
				505	}
				506
				507	fail_mirror(m, DM_RAID1_READ_ERROR);
				508
				509	if (likely(default_ok(m)) \|\| mirror_available(m->ms, bio)) {
				510	DMWARN_LIMIT("Read failure on mirror device %s. "
				511	"Trying alternative device.",
				512	m->dev->name);
				513	queue_bio(m->ms, bio, bio_rw(bio));
				514	return;
				515	}
				516
				517	DMERR_LIMIT("Read failure on mirror device %s. Failing I/O.",
				518	m->dev->name);
				519	bio_endio(bio, -EIO);
				520	}
				521
				522	/* Asynchronous read. */
				523	static void read_async_bio(struct mirror m, struct bio bio)
				524	{
				525	struct dm_io_region io;
				526	struct dm_io_request io_req = {
				527	.bi_rw = READ,
				528	.mem.type = DM_IO_BVEC,
				529	.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
				530	.notify.fn = read_callback,
				531	.notify.context = bio,
				532	.client = m->ms->io_client,
				533	};
				534
				535	map_region(&io, m, bio);
				536	bio_set_m(bio, m);
				537	BUG_ON(dm_io(&io_req, 1, &io, NULL));
				538	}
				539
				540	static inline int region_in_sync(struct mirror_set *ms, region_t region,
				541	int may_block)
				542	{
				543	int state = dm_rh_get_state(ms->rh, region, may_block);
				544	return state == DM_RH_CLEAN \|\| state == DM_RH_DIRTY;
				545	}
				546
				547	static void do_reads(struct mirror_set ms, struct bio_list reads)
				548	{
				549	region_t region;
				550	struct bio *bio;
				551	struct mirror *m;
				552
				553	while ((bio = bio_list_pop(reads))) {
				554	region = dm_rh_bio_to_region(ms->rh, bio);
				555	m = get_default_mirror(ms);
				556
				557	/*
				558	* We can only read balance if the region is in sync.
				559	*/
				560	if (likely(region_in_sync(ms, region, 1)))
				561	m = choose_mirror(ms, bio->bi_sector);
				562	else if (m && atomic_read(&m->error_count))
				563	m = NULL;
				564
				565	if (likely(m))
				566	read_async_bio(m, bio);
				567	else
				568	bio_endio(bio, -EIO);
				569	}
				570	}
				571
				572	/*-----------------------------------------------------------------
				573	* Writes.
				574	*
				575	* We do different things with the write io depending on the
				576	* state of the region that it's in:
				577	*
				578	* SYNC: increment pending, use kcopyd to write to all mirrors
				579	* RECOVERING: delay the io until recovery completes
				580	* NOSYNC: increment pending, just write to the default mirror
				581	---------------------------------------------------------------/
				582
				583
				584	static void write_callback(unsigned long error, void *context)
				585	{
				586	unsigned i, ret = 0;
				587	struct bio bio = (struct bio ) context;
				588	struct mirror_set *ms;
				589	int should_wake = 0;
				590	unsigned long flags;
				591
				592	ms = bio_get_m(bio)->ms;
				593	bio_set_m(bio, NULL);
				594
				595	/*
				596	* NOTE: We don't decrement the pending count here,
				597	* instead it is done by the targets endio function.
				598	* This way we handle both writes to SYNC and NOSYNC
				599	* regions with the same code.
				600	*/
				601	if (likely(!error)) {
				602	bio_endio(bio, ret);
				603	return;
				604	}
				605
				606	/*
				607	* If the bio is discard, return an error, but do not
				608	* degrade the array.
				609	*/
				610	if (bio->bi_rw & REQ_DISCARD) {
				611	bio_endio(bio, -EOPNOTSUPP);
				612	return;
				613	}
				614
				615	for (i = 0; i < ms->nr_mirrors; i++)
				616	if (test_bit(i, &error))
				617	fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
				618
				619	/*
				620	* Need to raise event. Since raising
				621	* events can block, we need to do it in
				622	* the main thread.
				623	*/
				624	spin_lock_irqsave(&ms->lock, flags);
				625	if (!ms->failures.head)
				626	should_wake = 1;
				627	bio_list_add(&ms->failures, bio);
				628	spin_unlock_irqrestore(&ms->lock, flags);
				629	if (should_wake)
				630	wakeup_mirrord(ms);
				631	}
				632
				633	static void do_write(struct mirror_set ms, struct bio bio)
				634	{
				635	unsigned int i;
				636	struct dm_io_region io[ms->nr_mirrors], *dest = io;
				637	struct mirror *m;
				638	struct dm_io_request io_req = {
				639	.bi_rw = WRITE \| (bio->bi_rw & WRITE_FLUSH_FUA),
				640	.mem.type = DM_IO_BVEC,
				641	.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
				642	.notify.fn = write_callback,
				643	.notify.context = bio,
				644	.client = ms->io_client,
				645	};
				646
				647	if (bio->bi_rw & REQ_DISCARD) {
				648	io_req.bi_rw \|= REQ_DISCARD;
				649	io_req.mem.type = DM_IO_KMEM;
				650	io_req.mem.ptr.addr = NULL;
				651	}
				652
				653	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++)
				654	map_region(dest++, m, bio);
				655
				656	/*
				657	* Use default mirror because we only need it to retrieve the reference
				658	* to the mirror set in write_callback().
				659	*/
				660	bio_set_m(bio, get_default_mirror(ms));
				661
				662	BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL));
				663	}
				664
				665	static void do_writes(struct mirror_set ms, struct bio_list writes)
				666	{
				667	int state;
				668	struct bio *bio;
				669	struct bio_list sync, nosync, recover, *this_list = NULL;
				670	struct bio_list requeue;
				671	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
				672	region_t region;
				673
				674	if (!writes->head)
				675	return;
				676
				677	/*
				678	* Classify each write.
				679	*/
				680	bio_list_init(&sync);
				681	bio_list_init(&nosync);
				682	bio_list_init(&recover);
				683	bio_list_init(&requeue);
				684
				685	while ((bio = bio_list_pop(writes))) {
				686	if ((bio->bi_rw & REQ_FLUSH) \|\|
				687	(bio->bi_rw & REQ_DISCARD)) {
				688	bio_list_add(&sync, bio);
				689	continue;
				690	}
				691
				692	region = dm_rh_bio_to_region(ms->rh, bio);
				693
				694	if (log->type->is_remote_recovering &&
				695	log->type->is_remote_recovering(log, region)) {
				696	bio_list_add(&requeue, bio);
				697	continue;
				698	}
				699
				700	state = dm_rh_get_state(ms->rh, region, 1);
				701	switch (state) {
				702	case DM_RH_CLEAN:
				703	case DM_RH_DIRTY:
				704	this_list = &sync;
				705	break;
				706
				707	case DM_RH_NOSYNC:
				708	this_list = &nosync;
				709	break;
				710
				711	case DM_RH_RECOVERING:
				712	this_list = &recover;
				713	break;
				714	}
				715
				716	bio_list_add(this_list, bio);
				717	}
				718
				719	/*
				720	* Add bios that are delayed due to remote recovery
				721	* back on to the write queue
				722	*/
				723	if (unlikely(requeue.head)) {
				724	spin_lock_irq(&ms->lock);
				725	bio_list_merge(&ms->writes, &requeue);
				726	spin_unlock_irq(&ms->lock);
				727	delayed_wake(ms);
				728	}
				729
				730	/*
				731	* Increment the pending counts for any regions that will
				732	* be written to (writes to recover regions are going to
				733	* be delayed).
				734	*/
				735	dm_rh_inc_pending(ms->rh, &sync);
				736	dm_rh_inc_pending(ms->rh, &nosync);
				737
				738	/*
				739	* If the flush fails on a previous call and succeeds here,
				740	* we must not reset the log_failure variable. We need
				741	* userspace interaction to do that.
				742	*/
				743	ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
				744
				745	/*
				746	* Dispatch io.
				747	*/
				748	if (unlikely(ms->log_failure) && errors_handled(ms)) {
				749	spin_lock_irq(&ms->lock);
				750	bio_list_merge(&ms->failures, &sync);
				751	spin_unlock_irq(&ms->lock);
				752	wakeup_mirrord(ms);
				753	} else
				754	while ((bio = bio_list_pop(&sync)))
				755	do_write(ms, bio);
				756
				757	while ((bio = bio_list_pop(&recover)))
				758	dm_rh_delay(ms->rh, bio);
				759
				760	while ((bio = bio_list_pop(&nosync))) {
				761	if (unlikely(ms->leg_failure) && errors_handled(ms)) {
				762	spin_lock_irq(&ms->lock);
				763	bio_list_add(&ms->failures, bio);
				764	spin_unlock_irq(&ms->lock);
				765	wakeup_mirrord(ms);
				766	} else {
				767	map_bio(get_default_mirror(ms), bio);
				768	generic_make_request(bio);
				769	}
				770	}
				771	}
				772
				773	static void do_failures(struct mirror_set ms, struct bio_list failures)
				774	{
				775	struct bio *bio;
				776
				777	if (likely(!failures->head))
				778	return;
				779
				780	/*
				781	* If the log has failed, unattempted writes are being
				782	* put on the holds list. We can't issue those writes
				783	* until a log has been marked, so we must store them.
				784	*
				785	* If a 'noflush' suspend is in progress, we can requeue
				786	* the I/O's to the core. This give userspace a chance
				787	* to reconfigure the mirror, at which point the core
				788	* will reissue the writes. If the 'noflush' flag is
				789	* not set, we have no choice but to return errors.
				790	*
				791	* Some writes on the failures list may have been
				792	* submitted before the log failure and represent a
				793	* failure to write to one of the devices. It is ok
				794	* for us to treat them the same and requeue them
				795	* as well.
				796	*/
				797	while ((bio = bio_list_pop(failures))) {
				798	if (!ms->log_failure) {
				799	ms->in_sync = 0;
				800	dm_rh_mark_nosync(ms->rh, bio);
				801	}
				802
				803	/*
				804	* If all the legs are dead, fail the I/O.
				805	* If we have been told to handle errors, hold the bio
				806	* and wait for userspace to deal with the problem.
				807	* Otherwise pretend that the I/O succeeded. (This would
				808	* be wrong if the failed leg returned after reboot and
				809	* got replicated back to the good legs.)
				810	*/
				811	if (!get_valid_mirror(ms))
				812	bio_endio(bio, -EIO);
				813	else if (errors_handled(ms))
				814	hold_bio(ms, bio);
				815	else
				816	bio_endio(bio, 0);
				817	}
				818	}
				819
				820	static void trigger_event(struct work_struct *work)
				821	{
				822	struct mirror_set *ms =
				823	container_of(work, struct mirror_set, trigger_event);
				824
				825	dm_table_event(ms->ti->table);
				826	}
				827
				828	/*-----------------------------------------------------------------
				829	* kmirrord
				830	---------------------------------------------------------------/
				831	static void do_mirror(struct work_struct *work)
				832	{
				833	struct mirror_set *ms = container_of(work, struct mirror_set,
				834	kmirrord_work);
				835	struct bio_list reads, writes, failures;
				836	unsigned long flags;
				837
				838	spin_lock_irqsave(&ms->lock, flags);
				839	reads = ms->reads;
				840	writes = ms->writes;
				841	failures = ms->failures;
				842	bio_list_init(&ms->reads);
				843	bio_list_init(&ms->writes);
				844	bio_list_init(&ms->failures);
				845	spin_unlock_irqrestore(&ms->lock, flags);
				846
				847	dm_rh_update_states(ms->rh, errors_handled(ms));
				848	do_recovery(ms);
				849	do_reads(ms, &reads);
				850	do_writes(ms, &writes);
				851	do_failures(ms, &failures);
				852	}
				853
				854	/*-----------------------------------------------------------------
				855	* Target functions
				856	---------------------------------------------------------------/
				857	static struct mirror_set *alloc_context(unsigned int nr_mirrors,
				858	uint32_t region_size,
				859	struct dm_target *ti,
				860	struct dm_dirty_log *dl)
				861	{
				862	size_t len;
				863	struct mirror_set *ms = NULL;
				864
				865	len = sizeof(ms) + (sizeof(ms->mirror[0]) nr_mirrors);
				866
				867	ms = kzalloc(len, GFP_KERNEL);
				868	if (!ms) {
				869	ti->error = "Cannot allocate mirror context";
				870	return NULL;
				871	}
				872
				873	spin_lock_init(&ms->lock);
				874	bio_list_init(&ms->reads);
				875	bio_list_init(&ms->writes);
				876	bio_list_init(&ms->failures);
				877	bio_list_init(&ms->holds);
				878
				879	ms->ti = ti;
				880	ms->nr_mirrors = nr_mirrors;
				881	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
				882	ms->in_sync = 0;
				883	ms->log_failure = 0;
				884	ms->leg_failure = 0;
				885	atomic_set(&ms->suspend, 0);
				886	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
				887
				888	ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
				889	_dm_raid1_read_record_cache);
				890
				891	if (!ms->read_record_pool) {
				892	ti->error = "Error creating mirror read_record_pool";
				893	kfree(ms);
				894	return NULL;
				895	}
				896
				897	ms->io_client = dm_io_client_create();
				898	if (IS_ERR(ms->io_client)) {
				899	ti->error = "Error creating dm_io client";
				900	mempool_destroy(ms->read_record_pool);
				901	kfree(ms);
				902	return NULL;
				903	}
				904
				905	ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord,
				906	wakeup_all_recovery_waiters,
				907	ms->ti->begin, MAX_RECOVERY,
				908	dl, region_size, ms->nr_regions);
				909	if (IS_ERR(ms->rh)) {
				910	ti->error = "Error creating dirty region hash";
				911	dm_io_client_destroy(ms->io_client);
				912	mempool_destroy(ms->read_record_pool);
				913	kfree(ms);
				914	return NULL;
				915	}
				916
				917	return ms;
				918	}
				919
				920	static void free_context(struct mirror_set ms, struct dm_target ti,
				921	unsigned int m)
				922	{
				923	while (m--)
				924	dm_put_device(ti, ms->mirror[m].dev);
				925
				926	dm_io_client_destroy(ms->io_client);
				927	dm_region_hash_destroy(ms->rh);
				928	mempool_destroy(ms->read_record_pool);
				929	kfree(ms);
				930	}
				931
				932	static int get_mirror(struct mirror_set ms, struct dm_target ti,
				933	unsigned int mirror, char **argv)
				934	{
				935	unsigned long long offset;
				936	char dummy;
				937
				938	if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) {
				939	ti->error = "Invalid offset";
				940	return -EINVAL;
				941	}
				942
				943	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
				944	&ms->mirror[mirror].dev)) {
				945	ti->error = "Device lookup failure";
				946	return -ENXIO;
				947	}
				948
				949	ms->mirror[mirror].ms = ms;
				950	atomic_set(&(ms->mirror[mirror].error_count), 0);
				951	ms->mirror[mirror].error_type = 0;
				952	ms->mirror[mirror].offset = offset;
				953
				954	return 0;
				955	}
				956
				957	/*
				958	* Create dirty log: log_type #log_params <log_params>
				959	*/
				960	static struct dm_dirty_log create_dirty_log(struct dm_target ti,
				961	unsigned argc, char **argv,
				962	unsigned *args_used)
				963	{
				964	unsigned param_count;
				965	struct dm_dirty_log *dl;
				966	char dummy;
				967
				968	if (argc < 2) {
				969	ti->error = "Insufficient mirror log arguments";
				970	return NULL;
				971	}
				972
				973	if (sscanf(argv[1], "%u%c", &param_count, &dummy) != 1) {
				974	ti->error = "Invalid mirror log argument count";
				975	return NULL;
				976	}
				977
				978	*args_used = 2 + param_count;
				979
				980	if (argc < *args_used) {
				981	ti->error = "Insufficient mirror log arguments";
				982	return NULL;
				983	}
				984
				985	dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count,
				986	argv + 2);
				987	if (!dl) {
				988	ti->error = "Error creating mirror dirty log";
				989	return NULL;
				990	}
				991
				992	return dl;
				993	}
				994
				995	static int parse_features(struct mirror_set ms, unsigned argc, char *argv,
				996	unsigned *args_used)
				997	{
				998	unsigned num_features;
				999	struct dm_target *ti = ms->ti;
				1000	char dummy;
				1001
				1002	*args_used = 0;
				1003
				1004	if (!argc)
				1005	return 0;
				1006
				1007	if (sscanf(argv[0], "%u%c", &num_features, &dummy) != 1) {
				1008	ti->error = "Invalid number of features";
				1009	return -EINVAL;
				1010	}
				1011
				1012	argc--;
				1013	argv++;
				1014	(*args_used)++;
				1015
				1016	if (num_features > argc) {
				1017	ti->error = "Not enough arguments to support feature count";
				1018	return -EINVAL;
				1019	}
				1020
				1021	if (!strcmp("handle_errors", argv[0]))
				1022	ms->features \|= DM_RAID1_HANDLE_ERRORS;
				1023	else {
				1024	ti->error = "Unrecognised feature requested";
				1025	return -EINVAL;
				1026	}
				1027
				1028	(*args_used)++;
				1029
				1030	return 0;
				1031	}
				1032
				1033	/*
				1034	* Construct a mirror mapping:
				1035	*
				1036	* log_type #log_params <log_params>
				1037	* #mirrors [mirror_path offset]{2,}
				1038	* [#features <features>]
				1039	*
				1040	* log_type is "core" or "disk"
				1041	* #log_params is between 1 and 3
				1042	*
				1043	* If present, features must be "handle_errors".
				1044	*/
				1045	static int mirror_ctr(struct dm_target ti, unsigned int argc, char *argv)
				1046	{
				1047	int r;
				1048	unsigned int nr_mirrors, m, args_used;
				1049	struct mirror_set *ms;
				1050	struct dm_dirty_log *dl;
				1051	char dummy;
				1052
				1053	dl = create_dirty_log(ti, argc, argv, &args_used);
				1054	if (!dl)
				1055	return -EINVAL;
				1056
				1057	argv += args_used;
				1058	argc -= args_used;
				1059
				1060	if (!argc \|\| sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 \|\|
				1061	nr_mirrors < 2 \|\| nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
				1062	ti->error = "Invalid number of mirrors";
				1063	dm_dirty_log_destroy(dl);
				1064	return -EINVAL;
				1065	}
				1066
				1067	argv++, argc--;
				1068
				1069	if (argc < nr_mirrors * 2) {
				1070	ti->error = "Too few mirror arguments";
				1071	dm_dirty_log_destroy(dl);
				1072	return -EINVAL;
				1073	}
				1074
				1075	ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl);
				1076	if (!ms) {
				1077	dm_dirty_log_destroy(dl);
				1078	return -ENOMEM;
				1079	}
				1080
				1081	/* Get the mirror parameter sets */
				1082	for (m = 0; m < nr_mirrors; m++) {
				1083	r = get_mirror(ms, ti, m, argv);
				1084	if (r) {
				1085	free_context(ms, ti, m);
				1086	return r;
				1087	}
				1088	argv += 2;
				1089	argc -= 2;
				1090	}
				1091
				1092	ti->private = ms;
				1093	ti->split_io = dm_rh_get_region_size(ms->rh);
				1094	ti->num_flush_requests = 1;
				1095	ti->num_discard_requests = 1;
				1096	ti->discard_zeroes_data_unsupported = 1;
				1097
				1098	ms->kmirrord_wq = alloc_workqueue("kmirrord",
				1099	WQ_NON_REENTRANT \| WQ_MEM_RECLAIM, 0);
				1100	if (!ms->kmirrord_wq) {
				1101	DMERR("couldn't start kmirrord");
				1102	r = -ENOMEM;
				1103	goto err_free_context;
				1104	}
				1105	INIT_WORK(&ms->kmirrord_work, do_mirror);
				1106	init_timer(&ms->timer);
				1107	ms->timer_pending = 0;
				1108	INIT_WORK(&ms->trigger_event, trigger_event);
				1109
				1110	r = parse_features(ms, argc, argv, &args_used);
				1111	if (r)
				1112	goto err_destroy_wq;
				1113
				1114	argv += args_used;
				1115	argc -= args_used;
				1116
				1117	/*
				1118	* Any read-balancing addition depends on the
				1119	* DM_RAID1_HANDLE_ERRORS flag being present.
				1120	* This is because the decision to balance depends
				1121	* on the sync state of a region. If the above
				1122	* flag is not present, we ignore errors; and
				1123	* the sync state may be inaccurate.
				1124	*/
				1125
				1126	if (argc) {
				1127	ti->error = "Too many mirror arguments";
				1128	r = -EINVAL;
				1129	goto err_destroy_wq;
				1130	}
				1131
				1132	ms->kcopyd_client = dm_kcopyd_client_create();
				1133	if (IS_ERR(ms->kcopyd_client)) {
				1134	r = PTR_ERR(ms->kcopyd_client);
				1135	goto err_destroy_wq;
				1136	}
				1137
				1138	wakeup_mirrord(ms);
				1139	return 0;
				1140
				1141	err_destroy_wq:
				1142	destroy_workqueue(ms->kmirrord_wq);
				1143	err_free_context:
				1144	free_context(ms, ti, ms->nr_mirrors);
				1145	return r;
				1146	}
				1147
				1148	static void mirror_dtr(struct dm_target *ti)
				1149	{
				1150	struct mirror_set ms = (struct mirror_set ) ti->private;
				1151
				1152	del_timer_sync(&ms->timer);
				1153	flush_workqueue(ms->kmirrord_wq);
				1154	flush_work_sync(&ms->trigger_event);
				1155	dm_kcopyd_client_destroy(ms->kcopyd_client);
				1156	destroy_workqueue(ms->kmirrord_wq);
				1157	free_context(ms, ti, ms->nr_mirrors);
				1158	}
				1159
				1160	/*
				1161	* Mirror mapping function
				1162	*/
				1163	static int mirror_map(struct dm_target ti, struct bio bio,
				1164	union map_info *map_context)
				1165	{
				1166	int r, rw = bio_rw(bio);
				1167	struct mirror *m;
				1168	struct mirror_set *ms = ti->private;
				1169	struct dm_raid1_read_record *read_record = NULL;
				1170	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
				1171
				1172	if (rw == WRITE) {
				1173	/* Save region for mirror_end_io() handler */
				1174	map_context->ll = dm_rh_bio_to_region(ms->rh, bio);
				1175	queue_bio(ms, bio, rw);
				1176	return DM_MAPIO_SUBMITTED;
				1177	}
				1178
				1179	r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
				1180	if (r < 0 && r != -EWOULDBLOCK)
				1181	return r;
				1182
				1183	/*
				1184	* If region is not in-sync queue the bio.
				1185	*/
				1186	if (!r \|\| (r == -EWOULDBLOCK)) {
				1187	if (rw == READA)
				1188	return -EWOULDBLOCK;
				1189
				1190	queue_bio(ms, bio, rw);
				1191	return DM_MAPIO_SUBMITTED;
				1192	}
				1193
				1194	/*
				1195	* The region is in-sync and we can perform reads directly.
				1196	* Store enough information so we can retry if it fails.
				1197	*/
				1198	m = choose_mirror(ms, bio->bi_sector);
				1199	if (unlikely(!m))
				1200	return -EIO;
				1201
				1202	read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO);
				1203	if (likely(read_record)) {
				1204	dm_bio_record(&read_record->details, bio);
				1205	map_context->ptr = read_record;
				1206	read_record->m = m;
				1207	}
				1208
				1209	map_bio(m, bio);
				1210
				1211	return DM_MAPIO_REMAPPED;
				1212	}
				1213
				1214	static int mirror_end_io(struct dm_target ti, struct bio bio,
				1215	int error, union map_info *map_context)
				1216	{
				1217	int rw = bio_rw(bio);
				1218	struct mirror_set ms = (struct mirror_set ) ti->private;
				1219	struct mirror *m = NULL;
				1220	struct dm_bio_details *bd = NULL;
				1221	struct dm_raid1_read_record *read_record = map_context->ptr;
				1222
				1223	/*
				1224	* We need to dec pending if this was a write.
				1225	*/
				1226	if (rw == WRITE) {
				1227	if (!(bio->bi_rw & (REQ_FLUSH \| REQ_DISCARD)))
				1228	dm_rh_dec(ms->rh, map_context->ll);
				1229	return error;
				1230	}
				1231
				1232	if (error == -EOPNOTSUPP)
				1233	goto out;
				1234
				1235	if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
				1236	goto out;
				1237
				1238	if (unlikely(error)) {
				1239	if (!read_record) {
				1240	/*
				1241	* There wasn't enough memory to record necessary
				1242	* information for a retry or there was no other
				1243	* mirror in-sync.
				1244	*/
				1245	DMERR_LIMIT("Mirror read failed.");
				1246	return -EIO;
				1247	}
				1248
				1249	m = read_record->m;
				1250
				1251	DMERR("Mirror read failed from %s. Trying alternative device.",
				1252	m->dev->name);
				1253
				1254	fail_mirror(m, DM_RAID1_READ_ERROR);
				1255
				1256	/*
				1257	* A failed read is requeued for another attempt using an intact
				1258	* mirror.
				1259	*/
				1260	if (default_ok(m) \|\| mirror_available(ms, bio)) {
				1261	bd = &read_record->details;
				1262
				1263	dm_bio_restore(bd, bio);
				1264	mempool_free(read_record, ms->read_record_pool);
				1265	map_context->ptr = NULL;
				1266	queue_bio(ms, bio, rw);
				1267	return 1;
				1268	}
				1269	DMERR("All replicated volumes dead, failing I/O");
				1270	}
				1271
				1272	out:
				1273	if (read_record) {
				1274	mempool_free(read_record, ms->read_record_pool);
				1275	map_context->ptr = NULL;
				1276	}
				1277
				1278	return error;
				1279	}
				1280
				1281	static void mirror_presuspend(struct dm_target *ti)
				1282	{
				1283	struct mirror_set ms = (struct mirror_set ) ti->private;
				1284	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
				1285
				1286	struct bio_list holds;
				1287	struct bio *bio;
				1288
				1289	atomic_set(&ms->suspend, 1);
				1290
				1291	/*
				1292	* Process bios in the hold list to start recovery waiting
				1293	* for bios in the hold list. After the process, no bio has
				1294	* a chance to be added in the hold list because ms->suspend
				1295	* is set.
				1296	*/
				1297	spin_lock_irq(&ms->lock);
				1298	holds = ms->holds;
				1299	bio_list_init(&ms->holds);
				1300	spin_unlock_irq(&ms->lock);
				1301
				1302	while ((bio = bio_list_pop(&holds)))
				1303	hold_bio(ms, bio);
				1304
				1305	/*
				1306	* We must finish up all the work that we've
				1307	* generated (i.e. recovery work).
				1308	*/
				1309	dm_rh_stop_recovery(ms->rh);
				1310
				1311	wait_event(_kmirrord_recovery_stopped,
				1312	!dm_rh_recovery_in_flight(ms->rh));
				1313
				1314	if (log->type->presuspend && log->type->presuspend(log))
				1315	/* FIXME: need better error handling */
				1316	DMWARN("log presuspend failed");
				1317
				1318	/*
				1319	* Now that recovery is complete/stopped and the
				1320	* delayed bios are queued, we need to wait for
				1321	* the worker thread to complete. This way,
				1322	* we know that all of our I/O has been pushed.
				1323	*/
				1324	flush_workqueue(ms->kmirrord_wq);
				1325	}
				1326
				1327	static void mirror_postsuspend(struct dm_target *ti)
				1328	{
				1329	struct mirror_set *ms = ti->private;
				1330	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
				1331
				1332	if (log->type->postsuspend && log->type->postsuspend(log))
				1333	/* FIXME: need better error handling */
				1334	DMWARN("log postsuspend failed");
				1335	}
				1336
				1337	static void mirror_resume(struct dm_target *ti)
				1338	{
				1339	struct mirror_set *ms = ti->private;
				1340	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
				1341
				1342	atomic_set(&ms->suspend, 0);
				1343	if (log->type->resume && log->type->resume(log))
				1344	/* FIXME: need better error handling */
				1345	DMWARN("log resume failed");
				1346	dm_rh_start_recovery(ms->rh);
				1347	}
				1348
				1349	/*
				1350	* device_status_char
				1351	* @m: mirror device/leg we want the status of
				1352	*
				1353	* We return one character representing the most severe error
				1354	* we have encountered.
				1355	* A => Alive - No failures
				1356	* D => Dead - A write failure occurred leaving mirror out-of-sync
				1357	* S => Sync - A sychronization failure occurred, mirror out-of-sync
				1358	* R => Read - A read failure occurred, mirror data unaffected
				1359	*
				1360	* Returns: <char>
				1361	*/
				1362	static char device_status_char(struct mirror *m)
				1363	{
				1364	if (!atomic_read(&(m->error_count)))
				1365	return 'A';
				1366
				1367	return (test_bit(DM_RAID1_FLUSH_ERROR, &(m->error_type))) ? 'F' :
				1368	(test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' :
				1369	(test_bit(DM_RAID1_SYNC_ERROR, &(m->error_type))) ? 'S' :
				1370	(test_bit(DM_RAID1_READ_ERROR, &(m->error_type))) ? 'R' : 'U';
				1371	}
				1372
				1373
				1374	static void mirror_status(struct dm_target *ti, status_type_t type,
				1375	char *result, unsigned int maxlen)
				1376	{
				1377	unsigned int m, sz = 0;
				1378	struct mirror_set ms = (struct mirror_set ) ti->private;
				1379	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
				1380	char buffer[ms->nr_mirrors + 1];
				1381
				1382	switch (type) {
				1383	case STATUSTYPE_INFO:
				1384	DMEMIT("%d ", ms->nr_mirrors);
				1385	for (m = 0; m < ms->nr_mirrors; m++) {
				1386	DMEMIT("%s ", ms->mirror[m].dev->name);
				1387	buffer[m] = device_status_char(&(ms->mirror[m]));
				1388	}
				1389	buffer[m] = '\0';
				1390
				1391	DMEMIT("%llu/%llu 1 %s ",
				1392	(unsigned long long)log->type->get_sync_count(log),
				1393	(unsigned long long)ms->nr_regions, buffer);
				1394
				1395	sz += log->type->status(log, type, result+sz, maxlen-sz);
				1396
				1397	break;
				1398
				1399	case STATUSTYPE_TABLE:
				1400	sz = log->type->status(log, type, result, maxlen);
				1401
				1402	DMEMIT("%d", ms->nr_mirrors);
				1403	for (m = 0; m < ms->nr_mirrors; m++)
				1404	DMEMIT(" %s %llu", ms->mirror[m].dev->name,
				1405	(unsigned long long)ms->mirror[m].offset);
				1406
				1407	if (ms->features & DM_RAID1_HANDLE_ERRORS)
				1408	DMEMIT(" 1 handle_errors");
				1409	}
				1410	}
				1411
				1412	static int mirror_iterate_devices(struct dm_target *ti,
				1413	iterate_devices_callout_fn fn, void *data)
				1414	{
				1415	struct mirror_set *ms = ti->private;
				1416	int ret = 0;
				1417	unsigned i;
				1418
				1419	for (i = 0; !ret && i < ms->nr_mirrors; i++)
				1420	ret = fn(ti, ms->mirror[i].dev,
				1421	ms->mirror[i].offset, ti->len, data);
				1422
				1423	return ret;
				1424	}
				1425
				1426	static struct target_type mirror_target = {
				1427	.name = "mirror",
				1428	.version = {1, 12, 1},
				1429	.module = THIS_MODULE,
				1430	.ctr = mirror_ctr,
				1431	.dtr = mirror_dtr,
				1432	.map = mirror_map,
				1433	.end_io = mirror_end_io,
				1434	.presuspend = mirror_presuspend,
				1435	.postsuspend = mirror_postsuspend,
				1436	.resume = mirror_resume,
				1437	.status = mirror_status,
				1438	.iterate_devices = mirror_iterate_devices,
				1439	};
				1440
				1441	static int __init dm_mirror_init(void)
				1442	{
				1443	int r;
				1444
				1445	_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
				1446	if (!_dm_raid1_read_record_cache) {
				1447	DMERR("Can't allocate dm_raid1_read_record cache");
				1448	r = -ENOMEM;
				1449	goto bad_cache;
				1450	}
				1451
				1452	r = dm_register_target(&mirror_target);
				1453	if (r < 0) {
				1454	DMERR("Failed to register mirror target");
				1455	goto bad_target;
				1456	}
				1457
				1458	return 0;
				1459
				1460	bad_target:
				1461	kmem_cache_destroy(_dm_raid1_read_record_cache);
				1462	bad_cache:
				1463	return r;
				1464	}
				1465
				1466	static void __exit dm_mirror_exit(void)
				1467	{
				1468	dm_unregister_target(&mirror_target);
				1469	kmem_cache_destroy(_dm_raid1_read_record_cache);
				1470	}
				1471
				1472	/* Module hooks */
				1473	module_init(dm_mirror_init);
				1474	module_exit(dm_mirror_exit);
				1475
				1476	MODULE_DESCRIPTION(DM_NAME " mirror target");
				1477	MODULE_AUTHOR("Joe Thornber");
				1478	MODULE_LICENSE("GPL");