Blame - ap/os/linux/linux-3.4.x/drivers/md/dm-thin.c - T106_DC

blob: e811e44dfcf7769583216fdbbd9fe66209ddfd00 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* Copyright (C) 2011 Red Hat UK.
				3	*
				4	* This file is released under the GPL.
				5	*/
				6
				7	#include "dm-thin-metadata.h"
				8
				9	#include <linux/device-mapper.h>
				10	#include <linux/dm-io.h>
				11	#include <linux/dm-kcopyd.h>
				12	#include <linux/list.h>
				13	#include <linux/init.h>
				14	#include <linux/module.h>
				15	#include <linux/slab.h>
				16
				17	#define DM_MSG_PREFIX "thin"
				18
				19	/*
				20	* Tunable constants
				21	*/
				22	#define ENDIO_HOOK_POOL_SIZE 1024
				23	#define DEFERRED_SET_SIZE 64
				24	#define MAPPING_POOL_SIZE 1024
				25	#define PRISON_CELLS 1024
				26	#define COMMIT_PERIOD HZ
				27
				28	/*
				29	* The block size of the device holding pool data must be
				30	* between 64KB and 1GB.
				31	*/
				32	#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
				33	#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
				34
				35	/*
				36	* Device id is restricted to 24 bits.
				37	*/
				38	#define MAX_DEV_ID ((1 << 24) - 1)
				39
				40	/*
				41	* How do we handle breaking sharing of data blocks?
				42	* =================================================
				43	*
				44	* We use a standard copy-on-write btree to store the mappings for the
				45	* devices (note I'm talking about copy-on-write of the metadata here, not
				46	* the data). When you take an internal snapshot you clone the root node
				47	* of the origin btree. After this there is no concept of an origin or a
				48	* snapshot. They are just two device trees that happen to point to the
				49	* same data blocks.
				50	*
				51	* When we get a write in we decide if it's to a shared data block using
				52	* some timestamp magic. If it is, we have to break sharing.
				53	*
				54	* Let's say we write to a shared block in what was the origin. The
				55	* steps are:
				56	*
				57	* i) plug io further to this physical block. (see bio_prison code).
				58	*
				59	* ii) quiesce any read io to that shared data block. Obviously
				60	* including all devices that share this block. (see deferred_set code)
				61	*
				62	* iii) copy the data block to a newly allocate block. This step can be
				63	* missed out if the io covers the block. (schedule_copy).
				64	*
				65	* iv) insert the new mapping into the origin's btree
				66	* (process_prepared_mapping). This act of inserting breaks some
				67	* sharing of btree nodes between the two devices. Breaking sharing only
				68	* effects the btree of that specific device. Btrees for the other
				69	* devices that share the block never change. The btree for the origin
				70	* device as it was after the last commit is untouched, ie. we're using
				71	* persistent data structures in the functional programming sense.
				72	*
				73	* v) unplug io to this physical block, including the io that triggered
				74	* the breaking of sharing.
				75	*
				76	* Steps (ii) and (iii) occur in parallel.
				77	*
				78	* The metadata _doesn't_ need to be committed before the io continues. We
				79	* get away with this because the io is always written to a _new_ block.
				80	* If there's a crash, then:
				81	*
				82	* - The origin mapping will point to the old origin block (the shared
				83	* one). This will contain the data as it was before the io that triggered
				84	* the breaking of sharing came in.
				85	*
				86	* - The snap mapping still points to the old block. As it would after
				87	* the commit.
				88	*
				89	* The downside of this scheme is the timestamp magic isn't perfect, and
				90	* will continue to think that data block in the snapshot device is shared
				91	* even after the write to the origin has broken sharing. I suspect data
				92	* blocks will typically be shared by many different devices, so we're
				93	* breaking sharing n + 1 times, rather than n, where n is the number of
				94	* devices that reference this data block. At the moment I think the
				95	* benefits far, far outweigh the disadvantages.
				96	*/
				97
				98	/----------------------------------------------------------------/
				99
				100	/*
				101	* Sometimes we can't deal with a bio straight away. We put them in prison
				102	* where they can't cause any mischief. Bios are put in a cell identified
				103	* by a key, multiple bios can be in the same cell. When the cell is
				104	* subsequently unlocked the bios become available.
				105	*/
				106	struct bio_prison;
				107
				108	struct cell_key {
				109	int virtual;
				110	dm_thin_id dev;
				111	dm_block_t block;
				112	};
				113
				114	struct cell {
				115	struct hlist_node list;
				116	struct bio_prison *prison;
				117	struct cell_key key;
				118	struct bio *holder;
				119	struct bio_list bios;
				120	};
				121
				122	struct bio_prison {
				123	spinlock_t lock;
				124	mempool_t *cell_pool;
				125
				126	unsigned nr_buckets;
				127	unsigned hash_mask;
				128	struct hlist_head *cells;
				129	};
				130
				131	static uint32_t calc_nr_buckets(unsigned nr_cells)
				132	{
				133	uint32_t n = 128;
				134
				135	nr_cells /= 4;
				136	nr_cells = min(nr_cells, 8192u);
				137
				138	while (n < nr_cells)
				139	n <<= 1;
				140
				141	return n;
				142	}
				143
				144	/*
				145	* @nr_cells should be the number of cells you want in use _concurrently_.
				146	* Don't confuse it with the number of distinct keys.
				147	*/
				148	static struct bio_prison *prison_create(unsigned nr_cells)
				149	{
				150	unsigned i;
				151	uint32_t nr_buckets = calc_nr_buckets(nr_cells);
				152	size_t len = sizeof(struct bio_prison) +
				153	(sizeof(struct hlist_head) * nr_buckets);
				154	struct bio_prison *prison = kmalloc(len, GFP_KERNEL);
				155
				156	if (!prison)
				157	return NULL;
				158
				159	spin_lock_init(&prison->lock);
				160	prison->cell_pool = mempool_create_kmalloc_pool(nr_cells,
				161	sizeof(struct cell));
				162	if (!prison->cell_pool) {
				163	kfree(prison);
				164	return NULL;
				165	}
				166
				167	prison->nr_buckets = nr_buckets;
				168	prison->hash_mask = nr_buckets - 1;
				169	prison->cells = (struct hlist_head *) (prison + 1);
				170	for (i = 0; i < nr_buckets; i++)
				171	INIT_HLIST_HEAD(prison->cells + i);
				172
				173	return prison;
				174	}
				175
				176	static void prison_destroy(struct bio_prison *prison)
				177	{
				178	mempool_destroy(prison->cell_pool);
				179	kfree(prison);
				180	}
				181
				182	static uint32_t hash_key(struct bio_prison prison, struct cell_key key)
				183	{
				184	const unsigned long BIG_PRIME = 4294967291UL;
				185	uint64_t hash = key->block * BIG_PRIME;
				186
				187	return (uint32_t) (hash & prison->hash_mask);
				188	}
				189
				190	static int keys_equal(struct cell_key lhs, struct cell_key rhs)
				191	{
				192	return (lhs->virtual == rhs->virtual) &&
				193	(lhs->dev == rhs->dev) &&
				194	(lhs->block == rhs->block);
				195	}
				196
				197	static struct cell __search_bucket(struct hlist_head bucket,
				198	struct cell_key *key)
				199	{
				200	struct cell *cell;
				201	struct hlist_node *tmp;
				202
				203	hlist_for_each_entry(cell, tmp, bucket, list)
				204	if (keys_equal(&cell->key, key))
				205	return cell;
				206
				207	return NULL;
				208	}
				209
				210	/*
				211	* This may block if a new cell needs allocating. You must ensure that
				212	* cells will be unlocked even if the calling thread is blocked.
				213	*
				214	* Returns 1 if the cell was already held, 0 if @inmate is the new holder.
				215	*/
				216	static int bio_detain(struct bio_prison prison, struct cell_key key,
				217	struct bio inmate, struct cell *ref)
				218	{
				219	int r = 1;
				220	unsigned long flags;
				221	uint32_t hash = hash_key(prison, key);
				222	struct cell cell, cell2;
				223
				224	BUG_ON(hash > prison->nr_buckets);
				225
				226	spin_lock_irqsave(&prison->lock, flags);
				227
				228	cell = __search_bucket(prison->cells + hash, key);
				229	if (cell) {
				230	bio_list_add(&cell->bios, inmate);
				231	goto out;
				232	}
				233
				234	/*
				235	* Allocate a new cell
				236	*/
				237	spin_unlock_irqrestore(&prison->lock, flags);
				238	cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
				239	spin_lock_irqsave(&prison->lock, flags);
				240
				241	/*
				242	* We've been unlocked, so we have to double check that
				243	* nobody else has inserted this cell in the meantime.
				244	*/
				245	cell = __search_bucket(prison->cells + hash, key);
				246	if (cell) {
				247	mempool_free(cell2, prison->cell_pool);
				248	bio_list_add(&cell->bios, inmate);
				249	goto out;
				250	}
				251
				252	/*
				253	* Use new cell.
				254	*/
				255	cell = cell2;
				256
				257	cell->prison = prison;
				258	memcpy(&cell->key, key, sizeof(cell->key));
				259	cell->holder = inmate;
				260	bio_list_init(&cell->bios);
				261	hlist_add_head(&cell->list, prison->cells + hash);
				262
				263	r = 0;
				264
				265	out:
				266	spin_unlock_irqrestore(&prison->lock, flags);
				267
				268	*ref = cell;
				269
				270	return r;
				271	}
				272
				273	/*
				274	* @inmates must have been initialised prior to this call
				275	*/
				276	static void __cell_release(struct cell cell, struct bio_list inmates)
				277	{
				278	struct bio_prison *prison = cell->prison;
				279
				280	hlist_del(&cell->list);
				281
				282	if (inmates) {
				283	bio_list_add(inmates, cell->holder);
				284	bio_list_merge(inmates, &cell->bios);
				285	}
				286
				287	mempool_free(cell, prison->cell_pool);
				288	}
				289
				290	static void cell_release(struct cell cell, struct bio_list bios)
				291	{
				292	unsigned long flags;
				293	struct bio_prison *prison = cell->prison;
				294
				295	spin_lock_irqsave(&prison->lock, flags);
				296	__cell_release(cell, bios);
				297	spin_unlock_irqrestore(&prison->lock, flags);
				298	}
				299
				300	/*
				301	* There are a couple of places where we put a bio into a cell briefly
				302	* before taking it out again. In these situations we know that no other
				303	* bio may be in the cell. This function releases the cell, and also does
				304	* a sanity check.
				305	*/
				306	static void __cell_release_singleton(struct cell cell, struct bio bio)
				307	{
				308	BUG_ON(cell->holder != bio);
				309	BUG_ON(!bio_list_empty(&cell->bios));
				310
				311	__cell_release(cell, NULL);
				312	}
				313
				314	static void cell_release_singleton(struct cell cell, struct bio bio)
				315	{
				316	unsigned long flags;
				317	struct bio_prison *prison = cell->prison;
				318
				319	spin_lock_irqsave(&prison->lock, flags);
				320	__cell_release_singleton(cell, bio);
				321	spin_unlock_irqrestore(&prison->lock, flags);
				322	}
				323
				324	/*
				325	* Sometimes we don't want the holder, just the additional bios.
				326	*/
				327	static void __cell_release_no_holder(struct cell cell, struct bio_list inmates)
				328	{
				329	struct bio_prison *prison = cell->prison;
				330
				331	hlist_del(&cell->list);
				332	bio_list_merge(inmates, &cell->bios);
				333
				334	mempool_free(cell, prison->cell_pool);
				335	}
				336
				337	static void cell_release_no_holder(struct cell cell, struct bio_list inmates)
				338	{
				339	unsigned long flags;
				340	struct bio_prison *prison = cell->prison;
				341
				342	spin_lock_irqsave(&prison->lock, flags);
				343	__cell_release_no_holder(cell, inmates);
				344	spin_unlock_irqrestore(&prison->lock, flags);
				345	}
				346
				347	static void cell_error(struct cell *cell)
				348	{
				349	struct bio_prison *prison = cell->prison;
				350	struct bio_list bios;
				351	struct bio *bio;
				352	unsigned long flags;
				353
				354	bio_list_init(&bios);
				355
				356	spin_lock_irqsave(&prison->lock, flags);
				357	__cell_release(cell, &bios);
				358	spin_unlock_irqrestore(&prison->lock, flags);
				359
				360	while ((bio = bio_list_pop(&bios)))
				361	bio_io_error(bio);
				362	}
				363
				364	/----------------------------------------------------------------/
				365
				366	/*
				367	* We use the deferred set to keep track of pending reads to shared blocks.
				368	* We do this to ensure the new mapping caused by a write isn't performed
				369	* until these prior reads have completed. Otherwise the insertion of the
				370	* new mapping could free the old block that the read bios are mapped to.
				371	*/
				372
				373	struct deferred_set;
				374	struct deferred_entry {
				375	struct deferred_set *ds;
				376	unsigned count;
				377	struct list_head work_items;
				378	};
				379
				380	struct deferred_set {
				381	spinlock_t lock;
				382	unsigned current_entry;
				383	unsigned sweeper;
				384	struct deferred_entry entries[DEFERRED_SET_SIZE];
				385	};
				386
				387	static void ds_init(struct deferred_set *ds)
				388	{
				389	int i;
				390
				391	spin_lock_init(&ds->lock);
				392	ds->current_entry = 0;
				393	ds->sweeper = 0;
				394	for (i = 0; i < DEFERRED_SET_SIZE; i++) {
				395	ds->entries[i].ds = ds;
				396	ds->entries[i].count = 0;
				397	INIT_LIST_HEAD(&ds->entries[i].work_items);
				398	}
				399	}
				400
				401	static struct deferred_entry ds_inc(struct deferred_set ds)
				402	{
				403	unsigned long flags;
				404	struct deferred_entry *entry;
				405
				406	spin_lock_irqsave(&ds->lock, flags);
				407	entry = ds->entries + ds->current_entry;
				408	entry->count++;
				409	spin_unlock_irqrestore(&ds->lock, flags);
				410
				411	return entry;
				412	}
				413
				414	static unsigned ds_next(unsigned index)
				415	{
				416	return (index + 1) % DEFERRED_SET_SIZE;
				417	}
				418
				419	static void __sweep(struct deferred_set ds, struct list_head head)
				420	{
				421	while ((ds->sweeper != ds->current_entry) &&
				422	!ds->entries[ds->sweeper].count) {
				423	list_splice_init(&ds->entries[ds->sweeper].work_items, head);
				424	ds->sweeper = ds_next(ds->sweeper);
				425	}
				426
				427	if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
				428	list_splice_init(&ds->entries[ds->sweeper].work_items, head);
				429	}
				430
				431	static void ds_dec(struct deferred_entry entry, struct list_head head)
				432	{
				433	unsigned long flags;
				434
				435	spin_lock_irqsave(&entry->ds->lock, flags);
				436	BUG_ON(!entry->count);
				437	--entry->count;
				438	__sweep(entry->ds, head);
				439	spin_unlock_irqrestore(&entry->ds->lock, flags);
				440	}
				441
				442	/*
				443	* Returns 1 if deferred or 0 if no pending items to delay job.
				444	*/
				445	static int ds_add_work(struct deferred_set ds, struct list_head work)
				446	{
				447	int r = 1;
				448	unsigned long flags;
				449	unsigned next_entry;
				450
				451	spin_lock_irqsave(&ds->lock, flags);
				452	if ((ds->sweeper == ds->current_entry) &&
				453	!ds->entries[ds->current_entry].count)
				454	r = 0;
				455	else {
				456	list_add(work, &ds->entries[ds->current_entry].work_items);
				457	next_entry = ds_next(ds->current_entry);
				458	if (!ds->entries[next_entry].count)
				459	ds->current_entry = next_entry;
				460	}
				461	spin_unlock_irqrestore(&ds->lock, flags);
				462
				463	return r;
				464	}
				465
				466	/----------------------------------------------------------------/
				467
				468	/*
				469	* Key building.
				470	*/
				471	static void build_data_key(struct dm_thin_device *td,
				472	dm_block_t b, struct cell_key *key)
				473	{
				474	key->virtual = 0;
				475	key->dev = dm_thin_dev_id(td);
				476	key->block = b;
				477	}
				478
				479	static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
				480	struct cell_key *key)
				481	{
				482	key->virtual = 1;
				483	key->dev = dm_thin_dev_id(td);
				484	key->block = b;
				485	}
				486
				487	/----------------------------------------------------------------/
				488
				489	/*
				490	* A pool device ties together a metadata device and a data device. It
				491	* also provides the interface for creating and destroying internal
				492	* devices.
				493	*/
				494	struct new_mapping;
				495
				496	struct pool_features {
				497	unsigned zero_new_blocks:1;
				498	unsigned discard_enabled:1;
				499	unsigned discard_passdown:1;
				500	};
				501
				502	struct pool {
				503	struct list_head list;
				504	struct dm_target ti; / Only set if a pool target is bound */
				505
				506	struct mapped_device *pool_md;
				507	struct block_device *md_dev;
				508	struct dm_pool_metadata *pmd;
				509
				510	uint32_t sectors_per_block;
				511	unsigned block_shift;
				512	dm_block_t offset_mask;
				513	dm_block_t low_water_blocks;
				514
				515	struct pool_features pf;
				516	unsigned low_water_triggered:1; /* A dm event has been sent */
				517	unsigned no_free_space:1; /* A -ENOSPC warning has been issued */
				518
				519	struct bio_prison *prison;
				520	struct dm_kcopyd_client *copier;
				521
				522	struct workqueue_struct *wq;
				523	struct work_struct worker;
				524	struct delayed_work waker;
				525
				526	unsigned ref_count;
				527	unsigned long last_commit_jiffies;
				528
				529	spinlock_t lock;
				530	struct bio_list deferred_bios;
				531	struct bio_list deferred_flush_bios;
				532	struct list_head prepared_mappings;
				533	struct list_head prepared_discards;
				534
				535	struct bio_list retry_on_resume_list;
				536
				537	struct deferred_set shared_read_ds;
				538	struct deferred_set all_io_ds;
				539
				540	struct new_mapping *next_mapping;
				541	mempool_t *mapping_pool;
				542	mempool_t *endio_hook_pool;
				543	};
				544
				545	/*
				546	* Target context for a pool.
				547	*/
				548	struct pool_c {
				549	struct dm_target *ti;
				550	struct pool *pool;
				551	struct dm_dev *data_dev;
				552	struct dm_dev *metadata_dev;
				553	struct dm_target_callbacks callbacks;
				554
				555	dm_block_t low_water_blocks;
				556	struct pool_features pf;
				557	};
				558
				559	/*
				560	* Target context for a thin.
				561	*/
				562	struct thin_c {
				563	struct dm_dev *pool_dev;
				564	struct dm_dev *origin_dev;
				565	dm_thin_id dev_id;
				566
				567	struct pool *pool;
				568	struct dm_thin_device *td;
				569	};
				570
				571	/----------------------------------------------------------------/
				572
				573	/*
				574	* A global list of pools that uses a struct mapped_device as a key.
				575	*/
				576	static struct dm_thin_pool_table {
				577	struct mutex mutex;
				578	struct list_head pools;
				579	} dm_thin_pool_table;
				580
				581	static void pool_table_init(void)
				582	{
				583	mutex_init(&dm_thin_pool_table.mutex);
				584	INIT_LIST_HEAD(&dm_thin_pool_table.pools);
				585	}
				586
				587	static void __pool_table_insert(struct pool *pool)
				588	{
				589	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				590	list_add(&pool->list, &dm_thin_pool_table.pools);
				591	}
				592
				593	static void __pool_table_remove(struct pool *pool)
				594	{
				595	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				596	list_del(&pool->list);
				597	}
				598
				599	static struct pool __pool_table_lookup(struct mapped_device md)
				600	{
				601	struct pool pool = NULL, tmp;
				602
				603	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				604
				605	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
				606	if (tmp->pool_md == md) {
				607	pool = tmp;
				608	break;
				609	}
				610	}
				611
				612	return pool;
				613	}
				614
				615	static struct pool __pool_table_lookup_metadata_dev(struct block_device md_dev)
				616	{
				617	struct pool pool = NULL, tmp;
				618
				619	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				620
				621	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
				622	if (tmp->md_dev == md_dev) {
				623	pool = tmp;
				624	break;
				625	}
				626	}
				627
				628	return pool;
				629	}
				630
				631	/----------------------------------------------------------------/
				632
				633	struct endio_hook {
				634	struct thin_c *tc;
				635	struct deferred_entry *shared_read_entry;
				636	struct deferred_entry *all_io_entry;
				637	struct new_mapping *overwrite_mapping;
				638	};
				639
				640	static void __requeue_bio_list(struct thin_c tc, struct bio_list master)
				641	{
				642	struct bio *bio;
				643	struct bio_list bios;
				644
				645	bio_list_init(&bios);
				646	bio_list_merge(&bios, master);
				647	bio_list_init(master);
				648
				649	while ((bio = bio_list_pop(&bios))) {
				650	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				651	if (h->tc == tc)
				652	bio_endio(bio, DM_ENDIO_REQUEUE);
				653	else
				654	bio_list_add(master, bio);
				655	}
				656	}
				657
				658	static void requeue_io(struct thin_c *tc)
				659	{
				660	struct pool *pool = tc->pool;
				661	unsigned long flags;
				662
				663	spin_lock_irqsave(&pool->lock, flags);
				664	__requeue_bio_list(tc, &pool->deferred_bios);
				665	__requeue_bio_list(tc, &pool->retry_on_resume_list);
				666	spin_unlock_irqrestore(&pool->lock, flags);
				667	}
				668
				669	/*
				670	* This section of code contains the logic for processing a thin device's IO.
				671	* Much of the code depends on pool object resources (lists, workqueues, etc)
				672	* but most is exclusively called from the thin target rather than the thin-pool
				673	* target.
				674	*/
				675
				676	static dm_block_t get_bio_block(struct thin_c tc, struct bio bio)
				677	{
				678	return bio->bi_sector >> tc->pool->block_shift;
				679	}
				680
				681	static void remap(struct thin_c tc, struct bio bio, dm_block_t block)
				682	{
				683	struct pool *pool = tc->pool;
				684
				685	bio->bi_bdev = tc->pool_dev->bdev;
				686	bio->bi_sector = (block << pool->block_shift) +
				687	(bio->bi_sector & pool->offset_mask);
				688	}
				689
				690	static void remap_to_origin(struct thin_c tc, struct bio bio)
				691	{
				692	bio->bi_bdev = tc->origin_dev->bdev;
				693	}
				694
				695	static void issue(struct thin_c tc, struct bio bio)
				696	{
				697	struct pool *pool = tc->pool;
				698	unsigned long flags;
				699
				700	/*
				701	* Batch together any FUA/FLUSH bios we find and then issue
				702	* a single commit for them in process_deferred_bios().
				703	*/
				704	if (bio->bi_rw & (REQ_FLUSH \| REQ_FUA)) {
				705	spin_lock_irqsave(&pool->lock, flags);
				706	bio_list_add(&pool->deferred_flush_bios, bio);
				707	spin_unlock_irqrestore(&pool->lock, flags);
				708	} else
				709	generic_make_request(bio);
				710	}
				711
				712	static void remap_to_origin_and_issue(struct thin_c tc, struct bio bio)
				713	{
				714	remap_to_origin(tc, bio);
				715	issue(tc, bio);
				716	}
				717
				718	static void remap_and_issue(struct thin_c tc, struct bio bio,
				719	dm_block_t block)
				720	{
				721	remap(tc, bio, block);
				722	issue(tc, bio);
				723	}
				724
				725	/*
				726	* wake_worker() is used when new work is queued and when pool_resume is
				727	* ready to continue deferred IO processing.
				728	*/
				729	static void wake_worker(struct pool *pool)
				730	{
				731	queue_work(pool->wq, &pool->worker);
				732	}
				733
				734	/----------------------------------------------------------------/
				735
				736	/*
				737	* Bio endio functions.
				738	*/
				739	struct new_mapping {
				740	struct list_head list;
				741
				742	unsigned quiesced:1;
				743	unsigned prepared:1;
				744	unsigned pass_discard:1;
				745
				746	struct thin_c *tc;
				747	dm_block_t virt_block;
				748	dm_block_t data_block;
				749	struct cell cell, cell2;
				750	int err;
				751
				752	/*
				753	* If the bio covers the whole area of a block then we can avoid
				754	* zeroing or copying. Instead this bio is hooked. The bio will
				755	* still be in the cell, so care has to be taken to avoid issuing
				756	* the bio twice.
				757	*/
				758	struct bio *bio;
				759	bio_end_io_t *saved_bi_end_io;
				760	};
				761
				762	static void __maybe_add_mapping(struct new_mapping *m)
				763	{
				764	struct pool *pool = m->tc->pool;
				765
				766	if (m->quiesced && m->prepared) {
				767	list_add(&m->list, &pool->prepared_mappings);
				768	wake_worker(pool);
				769	}
				770	}
				771
				772	static void copy_complete(int read_err, unsigned long write_err, void *context)
				773	{
				774	unsigned long flags;
				775	struct new_mapping *m = context;
				776	struct pool *pool = m->tc->pool;
				777
				778	m->err = read_err \|\| write_err ? -EIO : 0;
				779
				780	spin_lock_irqsave(&pool->lock, flags);
				781	m->prepared = 1;
				782	__maybe_add_mapping(m);
				783	spin_unlock_irqrestore(&pool->lock, flags);
				784	}
				785
				786	static void overwrite_endio(struct bio *bio, int err)
				787	{
				788	unsigned long flags;
				789	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				790	struct new_mapping *m = h->overwrite_mapping;
				791	struct pool *pool = m->tc->pool;
				792
				793	m->err = err;
				794
				795	spin_lock_irqsave(&pool->lock, flags);
				796	m->prepared = 1;
				797	__maybe_add_mapping(m);
				798	spin_unlock_irqrestore(&pool->lock, flags);
				799	}
				800
				801	/----------------------------------------------------------------/
				802
				803	/*
				804	* Workqueue.
				805	*/
				806
				807	/*
				808	* Prepared mapping jobs.
				809	*/
				810
				811	/*
				812	* This sends the bios in the cell back to the deferred_bios list.
				813	*/
				814	static void cell_defer(struct thin_c tc, struct cell cell,
				815	dm_block_t data_block)
				816	{
				817	struct pool *pool = tc->pool;
				818	unsigned long flags;
				819
				820	spin_lock_irqsave(&pool->lock, flags);
				821	cell_release(cell, &pool->deferred_bios);
				822	spin_unlock_irqrestore(&tc->pool->lock, flags);
				823
				824	wake_worker(pool);
				825	}
				826
				827	/*
				828	* Same as cell_defer above, except it omits one particular detainee,
				829	* a write bio that covers the block and has already been processed.
				830	*/
				831	static void cell_defer_except(struct thin_c tc, struct cell cell)
				832	{
				833	struct bio_list bios;
				834	struct pool *pool = tc->pool;
				835	unsigned long flags;
				836
				837	bio_list_init(&bios);
				838
				839	spin_lock_irqsave(&pool->lock, flags);
				840	cell_release_no_holder(cell, &pool->deferred_bios);
				841	spin_unlock_irqrestore(&pool->lock, flags);
				842
				843	wake_worker(pool);
				844	}
				845
				846	static void process_prepared_mapping(struct new_mapping *m)
				847	{
				848	struct thin_c *tc = m->tc;
				849	struct bio *bio;
				850	int r;
				851
				852	bio = m->bio;
				853	if (bio)
				854	bio->bi_end_io = m->saved_bi_end_io;
				855
				856	if (m->err) {
				857	cell_error(m->cell);
				858	goto out;
				859	}
				860
				861	/*
				862	* Commit the prepared block into the mapping btree.
				863	* Any I/O for this block arriving after this point will get
				864	* remapped to it directly.
				865	*/
				866	r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
				867	if (r) {
				868	DMERR("dm_thin_insert_block() failed");
				869	cell_error(m->cell);
				870	goto out;
				871	}
				872
				873	/*
				874	* Release any bios held while the block was being provisioned.
				875	* If we are processing a write bio that completely covers the block,
				876	* we already processed it so can ignore it now when processing
				877	* the bios in the cell.
				878	*/
				879	if (bio) {
				880	cell_defer_except(tc, m->cell);
				881	bio_endio(bio, 0);
				882	} else
				883	cell_defer(tc, m->cell, m->data_block);
				884
				885	out:
				886	list_del(&m->list);
				887	mempool_free(m, tc->pool->mapping_pool);
				888	}
				889
				890	static void process_prepared_discard(struct new_mapping *m)
				891	{
				892	int r;
				893	struct thin_c *tc = m->tc;
				894
				895	r = dm_thin_remove_block(tc->td, m->virt_block);
				896	if (r)
				897	DMERR("dm_thin_remove_block() failed");
				898
				899	/*
				900	* Pass the discard down to the underlying device?
				901	*/
				902	if (m->pass_discard)
				903	remap_and_issue(tc, m->bio, m->data_block);
				904	else
				905	bio_endio(m->bio, 0);
				906
				907	cell_defer_except(tc, m->cell);
				908	cell_defer_except(tc, m->cell2);
				909	mempool_free(m, tc->pool->mapping_pool);
				910	}
				911
				912	static void process_prepared(struct pool pool, struct list_head head,
				913	void (fn)(struct new_mapping ))
				914	{
				915	unsigned long flags;
				916	struct list_head maps;
				917	struct new_mapping m, tmp;
				918
				919	INIT_LIST_HEAD(&maps);
				920	spin_lock_irqsave(&pool->lock, flags);
				921	list_splice_init(head, &maps);
				922	spin_unlock_irqrestore(&pool->lock, flags);
				923
				924	list_for_each_entry_safe(m, tmp, &maps, list)
				925	fn(m);
				926	}
				927
				928	/*
				929	* Deferred bio jobs.
				930	*/
				931	static int io_overlaps_block(struct pool pool, struct bio bio)
				932	{
				933	return !(bio->bi_sector & pool->offset_mask) &&
				934	(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
				935
				936	}
				937
				938	static int io_overwrites_block(struct pool pool, struct bio bio)
				939	{
				940	return (bio_data_dir(bio) == WRITE) &&
				941	io_overlaps_block(pool, bio);
				942	}
				943
				944	static void save_and_set_endio(struct bio bio, bio_end_io_t *save,
				945	bio_end_io_t *fn)
				946	{
				947	*save = bio->bi_end_io;
				948	bio->bi_end_io = fn;
				949	}
				950
				951	static int ensure_next_mapping(struct pool *pool)
				952	{
				953	if (pool->next_mapping)
				954	return 0;
				955
				956	pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC);
				957
				958	return pool->next_mapping ? 0 : -ENOMEM;
				959	}
				960
				961	static struct new_mapping get_next_mapping(struct pool pool)
				962	{
				963	struct new_mapping *r = pool->next_mapping;
				964
				965	BUG_ON(!pool->next_mapping);
				966
				967	pool->next_mapping = NULL;
				968
				969	return r;
				970	}
				971
				972	static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
				973	struct dm_dev *origin, dm_block_t data_origin,
				974	dm_block_t data_dest,
				975	struct cell cell, struct bio bio)
				976	{
				977	int r;
				978	struct pool *pool = tc->pool;
				979	struct new_mapping *m = get_next_mapping(pool);
				980
				981	INIT_LIST_HEAD(&m->list);
				982	m->quiesced = 0;
				983	m->prepared = 0;
				984	m->tc = tc;
				985	m->virt_block = virt_block;
				986	m->data_block = data_dest;
				987	m->cell = cell;
				988	m->err = 0;
				989	m->bio = NULL;
				990
				991	if (!ds_add_work(&pool->shared_read_ds, &m->list))
				992	m->quiesced = 1;
				993
				994	/*
				995	* IO to pool_dev remaps to the pool target's data_dev.
				996	*
				997	* If the whole block of data is being overwritten, we can issue the
				998	* bio immediately. Otherwise we use kcopyd to clone the data first.
				999	*/
				1000	if (io_overwrites_block(pool, bio)) {
				1001	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1002	h->overwrite_mapping = m;
				1003	m->bio = bio;
				1004	save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
				1005	remap_and_issue(tc, bio, data_dest);
				1006	} else {
				1007	struct dm_io_region from, to;
				1008
				1009	from.bdev = origin->bdev;
				1010	from.sector = data_origin * pool->sectors_per_block;
				1011	from.count = pool->sectors_per_block;
				1012
				1013	to.bdev = tc->pool_dev->bdev;
				1014	to.sector = data_dest * pool->sectors_per_block;
				1015	to.count = pool->sectors_per_block;
				1016
				1017	r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
				1018	0, copy_complete, m);
				1019	if (r < 0) {
				1020	mempool_free(m, pool->mapping_pool);
				1021	DMERR("dm_kcopyd_copy() failed");
				1022	cell_error(cell);
				1023	}
				1024	}
				1025	}
				1026
				1027	static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
				1028	dm_block_t data_origin, dm_block_t data_dest,
				1029	struct cell cell, struct bio bio)
				1030	{
				1031	schedule_copy(tc, virt_block, tc->pool_dev,
				1032	data_origin, data_dest, cell, bio);
				1033	}
				1034
				1035	static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
				1036	dm_block_t data_dest,
				1037	struct cell cell, struct bio bio)
				1038	{
				1039	schedule_copy(tc, virt_block, tc->origin_dev,
				1040	virt_block, data_dest, cell, bio);
				1041	}
				1042
				1043	static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
				1044	dm_block_t data_block, struct cell *cell,
				1045	struct bio *bio)
				1046	{
				1047	struct pool *pool = tc->pool;
				1048	struct new_mapping *m = get_next_mapping(pool);
				1049
				1050	INIT_LIST_HEAD(&m->list);
				1051	m->quiesced = 1;
				1052	m->prepared = 0;
				1053	m->tc = tc;
				1054	m->virt_block = virt_block;
				1055	m->data_block = data_block;
				1056	m->cell = cell;
				1057	m->err = 0;
				1058	m->bio = NULL;
				1059
				1060	/*
				1061	* If the whole block of data is being overwritten or we are not
				1062	* zeroing pre-existing data, we can issue the bio immediately.
				1063	* Otherwise we use kcopyd to zero the data first.
				1064	*/
				1065	if (!pool->pf.zero_new_blocks)
				1066	process_prepared_mapping(m);
				1067
				1068	else if (io_overwrites_block(pool, bio)) {
				1069	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1070	h->overwrite_mapping = m;
				1071	m->bio = bio;
				1072	save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
				1073	remap_and_issue(tc, bio, data_block);
				1074
				1075	} else {
				1076	int r;
				1077	struct dm_io_region to;
				1078
				1079	to.bdev = tc->pool_dev->bdev;
				1080	to.sector = data_block * pool->sectors_per_block;
				1081	to.count = pool->sectors_per_block;
				1082
				1083	r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
				1084	if (r < 0) {
				1085	mempool_free(m, pool->mapping_pool);
				1086	DMERR("dm_kcopyd_zero() failed");
				1087	cell_error(cell);
				1088	}
				1089	}
				1090	}
				1091
				1092	static int alloc_data_block(struct thin_c tc, dm_block_t result)
				1093	{
				1094	int r;
				1095	dm_block_t free_blocks;
				1096	unsigned long flags;
				1097	struct pool *pool = tc->pool;
				1098
				1099	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
				1100	if (r)
				1101	return r;
				1102
				1103	if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
				1104	DMWARN("%s: reached low water mark, sending event.",
				1105	dm_device_name(pool->pool_md));
				1106	spin_lock_irqsave(&pool->lock, flags);
				1107	pool->low_water_triggered = 1;
				1108	spin_unlock_irqrestore(&pool->lock, flags);
				1109	dm_table_event(pool->ti->table);
				1110	}
				1111
				1112	if (!free_blocks) {
				1113	if (pool->no_free_space)
				1114	return -ENOSPC;
				1115	else {
				1116	/*
				1117	* Try to commit to see if that will free up some
				1118	* more space.
				1119	*/
				1120	r = dm_pool_commit_metadata(pool->pmd);
				1121	if (r) {
				1122	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				1123	__func__, r);
				1124	return r;
				1125	}
				1126
				1127	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
				1128	if (r)
				1129	return r;
				1130
				1131	/*
				1132	* If we still have no space we set a flag to avoid
				1133	* doing all this checking and return -ENOSPC.
				1134	*/
				1135	if (!free_blocks) {
				1136	DMWARN("%s: no free space available.",
				1137	dm_device_name(pool->pool_md));
				1138	spin_lock_irqsave(&pool->lock, flags);
				1139	pool->no_free_space = 1;
				1140	spin_unlock_irqrestore(&pool->lock, flags);
				1141	return -ENOSPC;
				1142	}
				1143	}
				1144	}
				1145
				1146	r = dm_pool_alloc_data_block(pool->pmd, result);
				1147	if (r)
				1148	return r;
				1149
				1150	return 0;
				1151	}
				1152
				1153	/*
				1154	* If we have run out of space, queue bios until the device is
				1155	* resumed, presumably after having been reloaded with more space.
				1156	*/
				1157	static void retry_on_resume(struct bio *bio)
				1158	{
				1159	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1160	struct thin_c *tc = h->tc;
				1161	struct pool *pool = tc->pool;
				1162	unsigned long flags;
				1163
				1164	spin_lock_irqsave(&pool->lock, flags);
				1165	bio_list_add(&pool->retry_on_resume_list, bio);
				1166	spin_unlock_irqrestore(&pool->lock, flags);
				1167	}
				1168
				1169	static void no_space(struct cell *cell)
				1170	{
				1171	struct bio *bio;
				1172	struct bio_list bios;
				1173
				1174	bio_list_init(&bios);
				1175	cell_release(cell, &bios);
				1176
				1177	while ((bio = bio_list_pop(&bios)))
				1178	retry_on_resume(bio);
				1179	}
				1180
				1181	static void process_discard(struct thin_c tc, struct bio bio)
				1182	{
				1183	int r;
				1184	unsigned long flags;
				1185	struct pool *pool = tc->pool;
				1186	struct cell cell, cell2;
				1187	struct cell_key key, key2;
				1188	dm_block_t block = get_bio_block(tc, bio);
				1189	struct dm_thin_lookup_result lookup_result;
				1190	struct new_mapping *m;
				1191
				1192	build_virtual_key(tc->td, block, &key);
				1193	if (bio_detain(tc->pool->prison, &key, bio, &cell))
				1194	return;
				1195
				1196	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
				1197	switch (r) {
				1198	case 0:
				1199	/*
				1200	* Check nobody is fiddling with this pool block. This can
				1201	* happen if someone's in the process of breaking sharing
				1202	* on this block.
				1203	*/
				1204	build_data_key(tc->td, lookup_result.block, &key2);
				1205	if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
				1206	cell_release_singleton(cell, bio);
				1207	break;
				1208	}
				1209
				1210	if (io_overlaps_block(pool, bio)) {
				1211	/*
				1212	* IO may still be going to the destination block. We must
				1213	* quiesce before we can do the removal.
				1214	*/
				1215	m = get_next_mapping(pool);
				1216	m->tc = tc;
				1217	m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown;
				1218	m->virt_block = block;
				1219	m->data_block = lookup_result.block;
				1220	m->cell = cell;
				1221	m->cell2 = cell2;
				1222	m->err = 0;
				1223	m->bio = bio;
				1224
				1225	if (!ds_add_work(&pool->all_io_ds, &m->list)) {
				1226	spin_lock_irqsave(&pool->lock, flags);
				1227	list_add(&m->list, &pool->prepared_discards);
				1228	spin_unlock_irqrestore(&pool->lock, flags);
				1229	wake_worker(pool);
				1230	}
				1231	} else {
				1232	/*
				1233	* This path is hit if people are ignoring
				1234	* limits->discard_granularity. It ignores any
				1235	* part of the discard that is in a subsequent
				1236	* block.
				1237	*/
				1238	sector_t offset = bio->bi_sector - (block << pool->block_shift);
				1239	unsigned remaining = (pool->sectors_per_block - offset) << 9;
				1240	bio->bi_size = min(bio->bi_size, remaining);
				1241
				1242	cell_release_singleton(cell, bio);
				1243	cell_release_singleton(cell2, bio);
				1244	if ((!lookup_result.shared) && pool->pf.discard_passdown)
				1245	remap_and_issue(tc, bio, lookup_result.block);
				1246	else
				1247	bio_endio(bio, 0);
				1248	}
				1249	break;
				1250
				1251	case -ENODATA:
				1252	/*
				1253	* It isn't provisioned, just forget it.
				1254	*/
				1255	cell_release_singleton(cell, bio);
				1256	bio_endio(bio, 0);
				1257	break;
				1258
				1259	default:
				1260	DMERR("discard: find block unexpectedly returned %d", r);
				1261	cell_release_singleton(cell, bio);
				1262	bio_io_error(bio);
				1263	break;
				1264	}
				1265	}
				1266
				1267	static void break_sharing(struct thin_c tc, struct bio bio, dm_block_t block,
				1268	struct cell_key *key,
				1269	struct dm_thin_lookup_result *lookup_result,
				1270	struct cell *cell)
				1271	{
				1272	int r;
				1273	dm_block_t data_block;
				1274
				1275	r = alloc_data_block(tc, &data_block);
				1276	switch (r) {
				1277	case 0:
				1278	schedule_internal_copy(tc, block, lookup_result->block,
				1279	data_block, cell, bio);
				1280	break;
				1281
				1282	case -ENOSPC:
				1283	no_space(cell);
				1284	break;
				1285
				1286	default:
				1287	DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
				1288	cell_error(cell);
				1289	break;
				1290	}
				1291	}
				1292
				1293	static void process_shared_bio(struct thin_c tc, struct bio bio,
				1294	dm_block_t block,
				1295	struct dm_thin_lookup_result *lookup_result)
				1296	{
				1297	struct cell *cell;
				1298	struct pool *pool = tc->pool;
				1299	struct cell_key key;
				1300
				1301	/*
				1302	* If cell is already occupied, then sharing is already in the process
				1303	* of being broken so we have nothing further to do here.
				1304	*/
				1305	build_data_key(tc->td, lookup_result->block, &key);
				1306	if (bio_detain(pool->prison, &key, bio, &cell))
				1307	return;
				1308
				1309	if (bio_data_dir(bio) == WRITE)
				1310	break_sharing(tc, bio, block, &key, lookup_result, cell);
				1311	else {
				1312	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1313
				1314	h->shared_read_entry = ds_inc(&pool->shared_read_ds);
				1315
				1316	cell_release_singleton(cell, bio);
				1317	remap_and_issue(tc, bio, lookup_result->block);
				1318	}
				1319	}
				1320
				1321	static void provision_block(struct thin_c tc, struct bio bio, dm_block_t block,
				1322	struct cell *cell)
				1323	{
				1324	int r;
				1325	dm_block_t data_block;
				1326
				1327	/*
				1328	* Remap empty bios (flushes) immediately, without provisioning.
				1329	*/
				1330	if (!bio->bi_size) {
				1331	cell_release_singleton(cell, bio);
				1332	remap_and_issue(tc, bio, 0);
				1333	return;
				1334	}
				1335
				1336	/*
				1337	* Fill read bios with zeroes and complete them immediately.
				1338	*/
				1339	if (bio_data_dir(bio) == READ) {
				1340	zero_fill_bio(bio);
				1341	cell_release_singleton(cell, bio);
				1342	bio_endio(bio, 0);
				1343	return;
				1344	}
				1345
				1346	r = alloc_data_block(tc, &data_block);
				1347	switch (r) {
				1348	case 0:
				1349	if (tc->origin_dev)
				1350	schedule_external_copy(tc, block, data_block, cell, bio);
				1351	else
				1352	schedule_zero(tc, block, data_block, cell, bio);
				1353	break;
				1354
				1355	case -ENOSPC:
				1356	no_space(cell);
				1357	break;
				1358
				1359	default:
				1360	DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
				1361	cell_error(cell);
				1362	break;
				1363	}
				1364	}
				1365
				1366	static void process_bio(struct thin_c tc, struct bio bio)
				1367	{
				1368	int r;
				1369	dm_block_t block = get_bio_block(tc, bio);
				1370	struct cell *cell;
				1371	struct cell_key key;
				1372	struct dm_thin_lookup_result lookup_result;
				1373
				1374	/*
				1375	* If cell is already occupied, then the block is already
				1376	* being provisioned so we have nothing further to do here.
				1377	*/
				1378	build_virtual_key(tc->td, block, &key);
				1379	if (bio_detain(tc->pool->prison, &key, bio, &cell))
				1380	return;
				1381
				1382	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
				1383	switch (r) {
				1384	case 0:
				1385	/*
				1386	* We can release this cell now. This thread is the only
				1387	* one that puts bios into a cell, and we know there were
				1388	* no preceding bios.
				1389	*/
				1390	/*
				1391	* TODO: this will probably have to change when discard goes
				1392	* back in.
				1393	*/
				1394	cell_release_singleton(cell, bio);
				1395
				1396	if (lookup_result.shared)
				1397	process_shared_bio(tc, bio, block, &lookup_result);
				1398	else
				1399	remap_and_issue(tc, bio, lookup_result.block);
				1400	break;
				1401
				1402	case -ENODATA:
				1403	if (bio_data_dir(bio) == READ && tc->origin_dev) {
				1404	cell_release_singleton(cell, bio);
				1405	remap_to_origin_and_issue(tc, bio);
				1406	} else
				1407	provision_block(tc, bio, block, cell);
				1408	break;
				1409
				1410	default:
				1411	DMERR("dm_thin_find_block() failed, error = %d", r);
				1412	cell_release_singleton(cell, bio);
				1413	bio_io_error(bio);
				1414	break;
				1415	}
				1416	}
				1417
				1418	static int need_commit_due_to_time(struct pool *pool)
				1419	{
				1420	return jiffies < pool->last_commit_jiffies \|\|
				1421	jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
				1422	}
				1423
				1424	static void process_deferred_bios(struct pool *pool)
				1425	{
				1426	unsigned long flags;
				1427	struct bio *bio;
				1428	struct bio_list bios;
				1429	int r;
				1430
				1431	bio_list_init(&bios);
				1432
				1433	spin_lock_irqsave(&pool->lock, flags);
				1434	bio_list_merge(&bios, &pool->deferred_bios);
				1435	bio_list_init(&pool->deferred_bios);
				1436	spin_unlock_irqrestore(&pool->lock, flags);
				1437
				1438	while ((bio = bio_list_pop(&bios))) {
				1439	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1440	struct thin_c *tc = h->tc;
				1441
				1442	/*
				1443	* If we've got no free new_mapping structs, and processing
				1444	* this bio might require one, we pause until there are some
				1445	* prepared mappings to process.
				1446	*/
				1447	if (ensure_next_mapping(pool)) {
				1448	spin_lock_irqsave(&pool->lock, flags);
				1449	bio_list_add(&pool->deferred_bios, bio);
				1450	bio_list_merge(&pool->deferred_bios, &bios);
				1451	spin_unlock_irqrestore(&pool->lock, flags);
				1452	break;
				1453	}
				1454
				1455	if (bio->bi_rw & REQ_DISCARD)
				1456	process_discard(tc, bio);
				1457	else
				1458	process_bio(tc, bio);
				1459	}
				1460
				1461	/*
				1462	* If there are any deferred flush bios, we must commit
				1463	* the metadata before issuing them.
				1464	*/
				1465	bio_list_init(&bios);
				1466	spin_lock_irqsave(&pool->lock, flags);
				1467	bio_list_merge(&bios, &pool->deferred_flush_bios);
				1468	bio_list_init(&pool->deferred_flush_bios);
				1469	spin_unlock_irqrestore(&pool->lock, flags);
				1470
				1471	if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
				1472	return;
				1473
				1474	r = dm_pool_commit_metadata(pool->pmd);
				1475	if (r) {
				1476	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				1477	__func__, r);
				1478	while ((bio = bio_list_pop(&bios)))
				1479	bio_io_error(bio);
				1480	return;
				1481	}
				1482	pool->last_commit_jiffies = jiffies;
				1483
				1484	while ((bio = bio_list_pop(&bios)))
				1485	generic_make_request(bio);
				1486	}
				1487
				1488	static void do_worker(struct work_struct *ws)
				1489	{
				1490	struct pool *pool = container_of(ws, struct pool, worker);
				1491
				1492	process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping);
				1493	process_prepared(pool, &pool->prepared_discards, process_prepared_discard);
				1494	process_deferred_bios(pool);
				1495	}
				1496
				1497	/*
				1498	* We want to commit periodically so that not too much
				1499	* unwritten data builds up.
				1500	*/
				1501	static void do_waker(struct work_struct *ws)
				1502	{
				1503	struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
				1504	wake_worker(pool);
				1505	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
				1506	}
				1507
				1508	/----------------------------------------------------------------/
				1509
				1510	/*
				1511	* Mapping functions.
				1512	*/
				1513
				1514	/*
				1515	* Called only while mapping a thin bio to hand it over to the workqueue.
				1516	*/
				1517	static void thin_defer_bio(struct thin_c tc, struct bio bio)
				1518	{
				1519	unsigned long flags;
				1520	struct pool *pool = tc->pool;
				1521
				1522	spin_lock_irqsave(&pool->lock, flags);
				1523	bio_list_add(&pool->deferred_bios, bio);
				1524	spin_unlock_irqrestore(&pool->lock, flags);
				1525
				1526	wake_worker(pool);
				1527	}
				1528
				1529	static struct endio_hook thin_hook_bio(struct thin_c tc, struct bio *bio)
				1530	{
				1531	struct pool *pool = tc->pool;
				1532	struct endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
				1533
				1534	h->tc = tc;
				1535	h->shared_read_entry = NULL;
				1536	h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds);
				1537	h->overwrite_mapping = NULL;
				1538
				1539	return h;
				1540	}
				1541
				1542	/*
				1543	* Non-blocking function called from the thin target's map function.
				1544	*/
				1545	static int thin_bio_map(struct dm_target ti, struct bio bio,
				1546	union map_info *map_context)
				1547	{
				1548	int r;
				1549	struct thin_c *tc = ti->private;
				1550	dm_block_t block = get_bio_block(tc, bio);
				1551	struct dm_thin_device *td = tc->td;
				1552	struct dm_thin_lookup_result result;
				1553
				1554	map_context->ptr = thin_hook_bio(tc, bio);
				1555	if (bio->bi_rw & (REQ_DISCARD \| REQ_FLUSH \| REQ_FUA)) {
				1556	thin_defer_bio(tc, bio);
				1557	return DM_MAPIO_SUBMITTED;
				1558	}
				1559
				1560	r = dm_thin_find_block(td, block, 0, &result);
				1561
				1562	/*
				1563	* Note that we defer readahead too.
				1564	*/
				1565	switch (r) {
				1566	case 0:
				1567	if (unlikely(result.shared)) {
				1568	/*
				1569	* We have a race condition here between the
				1570	* result.shared value returned by the lookup and
				1571	* snapshot creation, which may cause new
				1572	* sharing.
				1573	*
				1574	* To avoid this always quiesce the origin before
				1575	* taking the snap. You want to do this anyway to
				1576	* ensure a consistent application view
				1577	* (i.e. lockfs).
				1578	*
				1579	* More distant ancestors are irrelevant. The
				1580	* shared flag will be set in their case.
				1581	*/
				1582	thin_defer_bio(tc, bio);
				1583	r = DM_MAPIO_SUBMITTED;
				1584	} else {
				1585	remap(tc, bio, result.block);
				1586	r = DM_MAPIO_REMAPPED;
				1587	}
				1588	break;
				1589
				1590	case -ENODATA:
				1591	/*
				1592	* In future, the failed dm_thin_find_block above could
				1593	* provide the hint to load the metadata into cache.
				1594	*/
				1595	case -EWOULDBLOCK:
				1596	thin_defer_bio(tc, bio);
				1597	r = DM_MAPIO_SUBMITTED;
				1598	break;
				1599	}
				1600
				1601	return r;
				1602	}
				1603
				1604	static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
				1605	{
				1606	int r;
				1607	unsigned long flags;
				1608	struct pool_c *pt = container_of(cb, struct pool_c, callbacks);
				1609
				1610	spin_lock_irqsave(&pt->pool->lock, flags);
				1611	r = !bio_list_empty(&pt->pool->retry_on_resume_list);
				1612	spin_unlock_irqrestore(&pt->pool->lock, flags);
				1613
				1614	if (!r) {
				1615	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
				1616	r = bdi_congested(&q->backing_dev_info, bdi_bits);
				1617	}
				1618
				1619	return r;
				1620	}
				1621
				1622	static void __requeue_bios(struct pool *pool)
				1623	{
				1624	bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list);
				1625	bio_list_init(&pool->retry_on_resume_list);
				1626	}
				1627
				1628	/*----------------------------------------------------------------
				1629	* Binding of control targets to a pool object
				1630	--------------------------------------------------------------/
				1631	static int bind_control_target(struct pool pool, struct dm_target ti)
				1632	{
				1633	struct pool_c *pt = ti->private;
				1634
				1635	pool->ti = ti;
				1636	pool->low_water_blocks = pt->low_water_blocks;
				1637	pool->pf = pt->pf;
				1638
				1639	/*
				1640	* If discard_passdown was enabled verify that the data device
				1641	* supports discards. Disable discard_passdown if not; otherwise
				1642	* -EOPNOTSUPP will be returned.
				1643	*/
				1644	if (pt->pf.discard_passdown) {
				1645	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
				1646	if (!q \|\| !blk_queue_discard(q)) {
				1647	char buf[BDEVNAME_SIZE];
				1648	DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
				1649	bdevname(pt->data_dev->bdev, buf));
				1650	pool->pf.discard_passdown = 0;
				1651	}
				1652	}
				1653
				1654	return 0;
				1655	}
				1656
				1657	static void unbind_control_target(struct pool pool, struct dm_target ti)
				1658	{
				1659	if (pool->ti == ti)
				1660	pool->ti = NULL;
				1661	}
				1662
				1663	/*----------------------------------------------------------------
				1664	* Pool creation
				1665	--------------------------------------------------------------/
				1666	/* Initialize pool features. */
				1667	static void pool_features_init(struct pool_features *pf)
				1668	{
				1669	pf->zero_new_blocks = 1;
				1670	pf->discard_enabled = 1;
				1671	pf->discard_passdown = 1;
				1672	}
				1673
				1674	static void __pool_destroy(struct pool *pool)
				1675	{
				1676	__pool_table_remove(pool);
				1677
				1678	if (dm_pool_metadata_close(pool->pmd) < 0)
				1679	DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
				1680
				1681	prison_destroy(pool->prison);
				1682	dm_kcopyd_client_destroy(pool->copier);
				1683
				1684	if (pool->wq)
				1685	destroy_workqueue(pool->wq);
				1686
				1687	if (pool->next_mapping)
				1688	mempool_free(pool->next_mapping, pool->mapping_pool);
				1689	mempool_destroy(pool->mapping_pool);
				1690	mempool_destroy(pool->endio_hook_pool);
				1691	kfree(pool);
				1692	}
				1693
				1694	static struct pool pool_create(struct mapped_device pool_md,
				1695	struct block_device *metadata_dev,
				1696	unsigned long block_size, char **error)
				1697	{
				1698	int r;
				1699	void *err_p;
				1700	struct pool *pool;
				1701	struct dm_pool_metadata *pmd;
				1702
				1703	pmd = dm_pool_metadata_open(metadata_dev, block_size);
				1704	if (IS_ERR(pmd)) {
				1705	*error = "Error creating metadata object";
				1706	return (struct pool *)pmd;
				1707	}
				1708
				1709	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
				1710	if (!pool) {
				1711	*error = "Error allocating memory for pool";
				1712	err_p = ERR_PTR(-ENOMEM);
				1713	goto bad_pool;
				1714	}
				1715
				1716	pool->pmd = pmd;
				1717	pool->sectors_per_block = block_size;
				1718	pool->block_shift = ffs(block_size) - 1;
				1719	pool->offset_mask = block_size - 1;
				1720	pool->low_water_blocks = 0;
				1721	pool_features_init(&pool->pf);
				1722	pool->prison = prison_create(PRISON_CELLS);
				1723	if (!pool->prison) {
				1724	*error = "Error creating pool's bio prison";
				1725	err_p = ERR_PTR(-ENOMEM);
				1726	goto bad_prison;
				1727	}
				1728
				1729	pool->copier = dm_kcopyd_client_create();
				1730	if (IS_ERR(pool->copier)) {
				1731	r = PTR_ERR(pool->copier);
				1732	*error = "Error creating pool's kcopyd client";
				1733	err_p = ERR_PTR(r);
				1734	goto bad_kcopyd_client;
				1735	}
				1736
				1737	/*
				1738	* Create singlethreaded workqueue that will service all devices
				1739	* that use this metadata.
				1740	*/
				1741	pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
				1742	if (!pool->wq) {
				1743	*error = "Error creating pool's workqueue";
				1744	err_p = ERR_PTR(-ENOMEM);
				1745	goto bad_wq;
				1746	}
				1747
				1748	INIT_WORK(&pool->worker, do_worker);
				1749	INIT_DELAYED_WORK(&pool->waker, do_waker);
				1750	spin_lock_init(&pool->lock);
				1751	bio_list_init(&pool->deferred_bios);
				1752	bio_list_init(&pool->deferred_flush_bios);
				1753	INIT_LIST_HEAD(&pool->prepared_mappings);
				1754	INIT_LIST_HEAD(&pool->prepared_discards);
				1755	pool->low_water_triggered = 0;
				1756	pool->no_free_space = 0;
				1757	bio_list_init(&pool->retry_on_resume_list);
				1758	ds_init(&pool->shared_read_ds);
				1759	ds_init(&pool->all_io_ds);
				1760
				1761	pool->next_mapping = NULL;
				1762	pool->mapping_pool =
				1763	mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping));
				1764	if (!pool->mapping_pool) {
				1765	*error = "Error creating pool's mapping mempool";
				1766	err_p = ERR_PTR(-ENOMEM);
				1767	goto bad_mapping_pool;
				1768	}
				1769
				1770	pool->endio_hook_pool =
				1771	mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook));
				1772	if (!pool->endio_hook_pool) {
				1773	*error = "Error creating pool's endio_hook mempool";
				1774	err_p = ERR_PTR(-ENOMEM);
				1775	goto bad_endio_hook_pool;
				1776	}
				1777	pool->ref_count = 1;
				1778	pool->last_commit_jiffies = jiffies;
				1779	pool->pool_md = pool_md;
				1780	pool->md_dev = metadata_dev;
				1781	__pool_table_insert(pool);
				1782
				1783	return pool;
				1784
				1785	bad_endio_hook_pool:
				1786	mempool_destroy(pool->mapping_pool);
				1787	bad_mapping_pool:
				1788	destroy_workqueue(pool->wq);
				1789	bad_wq:
				1790	dm_kcopyd_client_destroy(pool->copier);
				1791	bad_kcopyd_client:
				1792	prison_destroy(pool->prison);
				1793	bad_prison:
				1794	kfree(pool);
				1795	bad_pool:
				1796	if (dm_pool_metadata_close(pmd))
				1797	DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
				1798
				1799	return err_p;
				1800	}
				1801
				1802	static void __pool_inc(struct pool *pool)
				1803	{
				1804	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				1805	pool->ref_count++;
				1806	}
				1807
				1808	static void __pool_dec(struct pool *pool)
				1809	{
				1810	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				1811	BUG_ON(!pool->ref_count);
				1812	if (!--pool->ref_count)
				1813	__pool_destroy(pool);
				1814	}
				1815
				1816	static struct pool __pool_find(struct mapped_device pool_md,
				1817	struct block_device *metadata_dev,
				1818	unsigned long block_size, char **error,
				1819	int *created)
				1820	{
				1821	struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
				1822
				1823	if (pool) {
				1824	if (pool->pool_md != pool_md)
				1825	return ERR_PTR(-EBUSY);
				1826	__pool_inc(pool);
				1827
				1828	} else {
				1829	pool = __pool_table_lookup(pool_md);
				1830	if (pool) {
				1831	if (pool->md_dev != metadata_dev)
				1832	return ERR_PTR(-EINVAL);
				1833	__pool_inc(pool);
				1834
				1835	} else {
				1836	pool = pool_create(pool_md, metadata_dev, block_size, error);
				1837	*created = 1;
				1838	}
				1839	}
				1840
				1841	return pool;
				1842	}
				1843
				1844	/*----------------------------------------------------------------
				1845	* Pool target methods
				1846	--------------------------------------------------------------/
				1847	static void pool_dtr(struct dm_target *ti)
				1848	{
				1849	struct pool_c *pt = ti->private;
				1850
				1851	mutex_lock(&dm_thin_pool_table.mutex);
				1852
				1853	unbind_control_target(pt->pool, ti);
				1854	__pool_dec(pt->pool);
				1855	dm_put_device(ti, pt->metadata_dev);
				1856	dm_put_device(ti, pt->data_dev);
				1857	kfree(pt);
				1858
				1859	mutex_unlock(&dm_thin_pool_table.mutex);
				1860	}
				1861
				1862	static int parse_pool_features(struct dm_arg_set as, struct pool_features pf,
				1863	struct dm_target *ti)
				1864	{
				1865	int r;
				1866	unsigned argc;
				1867	const char *arg_name;
				1868
				1869	static struct dm_arg _args[] = {
				1870	{0, 3, "Invalid number of pool feature arguments"},
				1871	};
				1872
				1873	/*
				1874	* No feature arguments supplied.
				1875	*/
				1876	if (!as->argc)
				1877	return 0;
				1878
				1879	r = dm_read_arg_group(_args, as, &argc, &ti->error);
				1880	if (r)
				1881	return -EINVAL;
				1882
				1883	while (argc && !r) {
				1884	arg_name = dm_shift_arg(as);
				1885	argc--;
				1886
				1887	if (!strcasecmp(arg_name, "skip_block_zeroing")) {
				1888	pf->zero_new_blocks = 0;
				1889	continue;
				1890	} else if (!strcasecmp(arg_name, "ignore_discard")) {
				1891	pf->discard_enabled = 0;
				1892	continue;
				1893	} else if (!strcasecmp(arg_name, "no_discard_passdown")) {
				1894	pf->discard_passdown = 0;
				1895	continue;
				1896	}
				1897
				1898	ti->error = "Unrecognised pool feature requested";
				1899	r = -EINVAL;
				1900	}
				1901
				1902	return r;
				1903	}
				1904
				1905	/*
				1906	* thin-pool <metadata dev> <data dev>
				1907	* <data block size (sectors)>
				1908	* <low water mark (blocks)>
				1909	* [<#feature args> [<arg>]*]
				1910	*
				1911	* Optional feature arguments are:
				1912	* skip_block_zeroing: skips the zeroing of newly-provisioned blocks.
				1913	* ignore_discard: disable discard
				1914	* no_discard_passdown: don't pass discards down to the data device
				1915	*/
				1916	static int pool_ctr(struct dm_target ti, unsigned argc, char *argv)
				1917	{
				1918	int r, pool_created = 0;
				1919	struct pool_c *pt;
				1920	struct pool *pool;
				1921	struct pool_features pf;
				1922	struct dm_arg_set as;
				1923	struct dm_dev *data_dev;
				1924	unsigned long block_size;
				1925	dm_block_t low_water_blocks;
				1926	struct dm_dev *metadata_dev;
				1927	sector_t metadata_dev_size;
				1928	char b[BDEVNAME_SIZE];
				1929
				1930	/*
				1931	* FIXME Remove validation from scope of lock.
				1932	*/
				1933	mutex_lock(&dm_thin_pool_table.mutex);
				1934
				1935	if (argc < 4) {
				1936	ti->error = "Invalid argument count";
				1937	r = -EINVAL;
				1938	goto out_unlock;
				1939	}
				1940	as.argc = argc;
				1941	as.argv = argv;
				1942
				1943	r = dm_get_device(ti, argv[0], FMODE_READ \| FMODE_WRITE, &metadata_dev);
				1944	if (r) {
				1945	ti->error = "Error opening metadata block device";
				1946	goto out_unlock;
				1947	}
				1948
				1949	metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT;
				1950	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
				1951	DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
				1952	bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
				1953
				1954	r = dm_get_device(ti, argv[1], FMODE_READ \| FMODE_WRITE, &data_dev);
				1955	if (r) {
				1956	ti->error = "Error getting data device";
				1957	goto out_metadata;
				1958	}
				1959
				1960	if (kstrtoul(argv[2], 10, &block_size) \|\| !block_size \|\|
				1961	block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS \|\|
				1962	block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS \|\|
				1963	!is_power_of_2(block_size)) {
				1964	ti->error = "Invalid block size";
				1965	r = -EINVAL;
				1966	goto out;
				1967	}
				1968
				1969	if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
				1970	ti->error = "Invalid low water mark";
				1971	r = -EINVAL;
				1972	goto out;
				1973	}
				1974
				1975	/*
				1976	* Set default pool features.
				1977	*/
				1978	pool_features_init(&pf);
				1979
				1980	dm_consume_args(&as, 4);
				1981	r = parse_pool_features(&as, &pf, ti);
				1982	if (r)
				1983	goto out;
				1984
				1985	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
				1986	if (!pt) {
				1987	r = -ENOMEM;
				1988	goto out;
				1989	}
				1990
				1991	pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
				1992	block_size, &ti->error, &pool_created);
				1993	if (IS_ERR(pool)) {
				1994	r = PTR_ERR(pool);
				1995	goto out_free_pt;
				1996	}
				1997
				1998	/*
				1999	* 'pool_created' reflects whether this is the first table load.
				2000	* Top level discard support is not allowed to be changed after
				2001	* initial load. This would require a pool reload to trigger thin
				2002	* device changes.
				2003	*/
				2004	if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) {
				2005	ti->error = "Discard support cannot be disabled once enabled";
				2006	r = -EINVAL;
				2007	goto out_flags_changed;
				2008	}
				2009
				2010	pt->pool = pool;
				2011	pt->ti = ti;
				2012	pt->metadata_dev = metadata_dev;
				2013	pt->data_dev = data_dev;
				2014	pt->low_water_blocks = low_water_blocks;
				2015	pt->pf = pf;
				2016	ti->num_flush_requests = 1;
				2017	/*
				2018	* Only need to enable discards if the pool should pass
				2019	* them down to the data device. The thin device's discard
				2020	* processing will cause mappings to be removed from the btree.
				2021	*/
				2022	if (pf.discard_enabled && pf.discard_passdown) {
				2023	ti->num_discard_requests = 1;
				2024	/*
				2025	* Setting 'discards_supported' circumvents the normal
				2026	* stacking of discard limits (this keeps the pool and
				2027	* thin devices' discard limits consistent).
				2028	*/
				2029	ti->discards_supported = 1;
				2030	ti->discard_zeroes_data_unsupported = 1;
				2031	}
				2032	ti->private = pt;
				2033
				2034	pt->callbacks.congested_fn = pool_is_congested;
				2035	dm_table_add_target_callbacks(ti->table, &pt->callbacks);
				2036
				2037	mutex_unlock(&dm_thin_pool_table.mutex);
				2038
				2039	return 0;
				2040
				2041	out_flags_changed:
				2042	__pool_dec(pool);
				2043	out_free_pt:
				2044	kfree(pt);
				2045	out:
				2046	dm_put_device(ti, data_dev);
				2047	out_metadata:
				2048	dm_put_device(ti, metadata_dev);
				2049	out_unlock:
				2050	mutex_unlock(&dm_thin_pool_table.mutex);
				2051
				2052	return r;
				2053	}
				2054
				2055	static int pool_map(struct dm_target ti, struct bio bio,
				2056	union map_info *map_context)
				2057	{
				2058	int r;
				2059	struct pool_c *pt = ti->private;
				2060	struct pool *pool = pt->pool;
				2061	unsigned long flags;
				2062
				2063	/*
				2064	* As this is a singleton target, ti->begin is always zero.
				2065	*/
				2066	spin_lock_irqsave(&pool->lock, flags);
				2067	bio->bi_bdev = pt->data_dev->bdev;
				2068	r = DM_MAPIO_REMAPPED;
				2069	spin_unlock_irqrestore(&pool->lock, flags);
				2070
				2071	return r;
				2072	}
				2073
				2074	/*
				2075	* Retrieves the number of blocks of the data device from
				2076	* the superblock and compares it to the actual device size,
				2077	* thus resizing the data device in case it has grown.
				2078	*
				2079	* This both copes with opening preallocated data devices in the ctr
				2080	* being followed by a resume
				2081	* -and-
				2082	* calling the resume method individually after userspace has
				2083	* grown the data device in reaction to a table event.
				2084	*/
				2085	static int pool_preresume(struct dm_target *ti)
				2086	{
				2087	int r;
				2088	struct pool_c *pt = ti->private;
				2089	struct pool *pool = pt->pool;
				2090	dm_block_t data_size, sb_data_size;
				2091
				2092	/*
				2093	* Take control of the pool object.
				2094	*/
				2095	r = bind_control_target(pool, ti);
				2096	if (r)
				2097	return r;
				2098
				2099	data_size = ti->len >> pool->block_shift;
				2100	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
				2101	if (r) {
				2102	DMERR("failed to retrieve data device size");
				2103	return r;
				2104	}
				2105
				2106	if (data_size < sb_data_size) {
				2107	DMERR("pool target too small, is %llu blocks (expected %llu)",
				2108	data_size, sb_data_size);
				2109	return -EINVAL;
				2110
				2111	} else if (data_size > sb_data_size) {
				2112	r = dm_pool_resize_data_dev(pool->pmd, data_size);
				2113	if (r) {
				2114	DMERR("failed to resize data device");
				2115	return r;
				2116	}
				2117
				2118	r = dm_pool_commit_metadata(pool->pmd);
				2119	if (r) {
				2120	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				2121	__func__, r);
				2122	return r;
				2123	}
				2124	}
				2125
				2126	return 0;
				2127	}
				2128
				2129	static void pool_resume(struct dm_target *ti)
				2130	{
				2131	struct pool_c *pt = ti->private;
				2132	struct pool *pool = pt->pool;
				2133	unsigned long flags;
				2134
				2135	spin_lock_irqsave(&pool->lock, flags);
				2136	pool->low_water_triggered = 0;
				2137	pool->no_free_space = 0;
				2138	__requeue_bios(pool);
				2139	spin_unlock_irqrestore(&pool->lock, flags);
				2140
				2141	do_waker(&pool->waker.work);
				2142	}
				2143
				2144	static void pool_postsuspend(struct dm_target *ti)
				2145	{
				2146	int r;
				2147	struct pool_c *pt = ti->private;
				2148	struct pool *pool = pt->pool;
				2149
				2150	cancel_delayed_work(&pool->waker);
				2151	flush_workqueue(pool->wq);
				2152
				2153	r = dm_pool_commit_metadata(pool->pmd);
				2154	if (r < 0) {
				2155	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				2156	__func__, r);
				2157	/* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/
				2158	}
				2159	}
				2160
				2161	static int check_arg_count(unsigned argc, unsigned args_required)
				2162	{
				2163	if (argc != args_required) {
				2164	DMWARN("Message received with %u arguments instead of %u.",
				2165	argc, args_required);
				2166	return -EINVAL;
				2167	}
				2168
				2169	return 0;
				2170	}
				2171
				2172	static int read_dev_id(char arg, dm_thin_id dev_id, int warning)
				2173	{
				2174	if (!kstrtoull(arg, 10, (unsigned long long *)dev_id) &&
				2175	*dev_id <= MAX_DEV_ID)
				2176	return 0;
				2177
				2178	if (warning)
				2179	DMWARN("Message received with invalid device id: %s", arg);
				2180
				2181	return -EINVAL;
				2182	}
				2183
				2184	static int process_create_thin_mesg(unsigned argc, char *argv, struct pool pool)
				2185	{
				2186	dm_thin_id dev_id;
				2187	int r;
				2188
				2189	r = check_arg_count(argc, 2);
				2190	if (r)
				2191	return r;
				2192
				2193	r = read_dev_id(argv[1], &dev_id, 1);
				2194	if (r)
				2195	return r;
				2196
				2197	r = dm_pool_create_thin(pool->pmd, dev_id);
				2198	if (r) {
				2199	DMWARN("Creation of new thinly-provisioned device with id %s failed.",
				2200	argv[1]);
				2201	return r;
				2202	}
				2203
				2204	return 0;
				2205	}
				2206
				2207	static int process_create_snap_mesg(unsigned argc, char *argv, struct pool pool)
				2208	{
				2209	dm_thin_id dev_id;
				2210	dm_thin_id origin_dev_id;
				2211	int r;
				2212
				2213	r = check_arg_count(argc, 3);
				2214	if (r)
				2215	return r;
				2216
				2217	r = read_dev_id(argv[1], &dev_id, 1);
				2218	if (r)
				2219	return r;
				2220
				2221	r = read_dev_id(argv[2], &origin_dev_id, 1);
				2222	if (r)
				2223	return r;
				2224
				2225	r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
				2226	if (r) {
				2227	DMWARN("Creation of new snapshot %s of device %s failed.",
				2228	argv[1], argv[2]);
				2229	return r;
				2230	}
				2231
				2232	return 0;
				2233	}
				2234
				2235	static int process_delete_mesg(unsigned argc, char *argv, struct pool pool)
				2236	{
				2237	dm_thin_id dev_id;
				2238	int r;
				2239
				2240	r = check_arg_count(argc, 2);
				2241	if (r)
				2242	return r;
				2243
				2244	r = read_dev_id(argv[1], &dev_id, 1);
				2245	if (r)
				2246	return r;
				2247
				2248	r = dm_pool_delete_thin_device(pool->pmd, dev_id);
				2249	if (r)
				2250	DMWARN("Deletion of thin device %s failed.", argv[1]);
				2251
				2252	return r;
				2253	}
				2254
				2255	static int process_set_transaction_id_mesg(unsigned argc, char *argv, struct pool pool)
				2256	{
				2257	dm_thin_id old_id, new_id;
				2258	int r;
				2259
				2260	r = check_arg_count(argc, 3);
				2261	if (r)
				2262	return r;
				2263
				2264	if (kstrtoull(argv[1], 10, (unsigned long long *)&old_id)) {
				2265	DMWARN("set_transaction_id message: Unrecognised id %s.", argv[1]);
				2266	return -EINVAL;
				2267	}
				2268
				2269	if (kstrtoull(argv[2], 10, (unsigned long long *)&new_id)) {
				2270	DMWARN("set_transaction_id message: Unrecognised new id %s.", argv[2]);
				2271	return -EINVAL;
				2272	}
				2273
				2274	r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
				2275	if (r) {
				2276	DMWARN("Failed to change transaction id from %s to %s.",
				2277	argv[1], argv[2]);
				2278	return r;
				2279	}
				2280
				2281	return 0;
				2282	}
				2283
				2284	/*
				2285	* Messages supported:
				2286	* create_thin <dev_id>
				2287	* create_snap <dev_id> <origin_id>
				2288	* delete <dev_id>
				2289	* trim <dev_id> <new_size_in_sectors>
				2290	* set_transaction_id <current_trans_id> <new_trans_id>
				2291	*/
				2292	static int pool_message(struct dm_target ti, unsigned argc, char *argv)
				2293	{
				2294	int r = -EINVAL;
				2295	struct pool_c *pt = ti->private;
				2296	struct pool *pool = pt->pool;
				2297
				2298	if (!strcasecmp(argv[0], "create_thin"))
				2299	r = process_create_thin_mesg(argc, argv, pool);
				2300
				2301	else if (!strcasecmp(argv[0], "create_snap"))
				2302	r = process_create_snap_mesg(argc, argv, pool);
				2303
				2304	else if (!strcasecmp(argv[0], "delete"))
				2305	r = process_delete_mesg(argc, argv, pool);
				2306
				2307	else if (!strcasecmp(argv[0], "set_transaction_id"))
				2308	r = process_set_transaction_id_mesg(argc, argv, pool);
				2309
				2310	else
				2311	DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
				2312
				2313	if (!r) {
				2314	r = dm_pool_commit_metadata(pool->pmd);
				2315	if (r)
				2316	DMERR("%s message: dm_pool_commit_metadata() failed, error = %d",
				2317	argv[0], r);
				2318	}
				2319
				2320	return r;
				2321	}
				2322
				2323	/*
				2324	* Status line is:
				2325	* <transaction id> <used metadata sectors>/<total metadata sectors>
				2326	* <used data sectors>/<total data sectors> <held metadata root>
				2327	*/
				2328	static void pool_status(struct dm_target *ti, status_type_t type,
				2329	char *result, unsigned maxlen)
				2330	{
				2331	int r, count;
				2332	unsigned sz = 0;
				2333	uint64_t transaction_id;
				2334	dm_block_t nr_free_blocks_data;
				2335	dm_block_t nr_free_blocks_metadata;
				2336	dm_block_t nr_blocks_data;
				2337	dm_block_t nr_blocks_metadata;
				2338	dm_block_t held_root;
				2339	char buf[BDEVNAME_SIZE];
				2340	char buf2[BDEVNAME_SIZE];
				2341	struct pool_c *pt = ti->private;
				2342	struct pool *pool = pt->pool;
				2343
				2344	switch (type) {
				2345	case STATUSTYPE_INFO:
				2346	r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
				2347	if (r) {
				2348	DMERR("dm_pool_get_metadata_transaction_id returned %d", r);
				2349	goto err;
				2350	}
				2351
				2352	r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata);
				2353	if (r) {
				2354	DMERR("dm_pool_get_free_metadata_block_count returned %d", r);
				2355	goto err;
				2356	}
				2357
				2358	r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
				2359	if (r) {
				2360	DMERR("dm_pool_get_metadata_dev_size returned %d", r);
				2361	goto err;
				2362	}
				2363
				2364	r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data);
				2365	if (r) {
				2366	DMERR("dm_pool_get_free_block_count returned %d", r);
				2367	goto err;
				2368	}
				2369
				2370	r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
				2371	if (r) {
				2372	DMERR("dm_pool_get_data_dev_size returned %d", r);
				2373	goto err;
				2374	}
				2375
				2376	r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
				2377	if (r) {
				2378	DMERR("dm_pool_get_metadata_snap returned %d", r);
				2379	goto err;
				2380	}
				2381
				2382	DMEMIT("%llu %llu/%llu %llu/%llu ",
				2383	(unsigned long long)transaction_id,
				2384	(unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
				2385	(unsigned long long)nr_blocks_metadata,
				2386	(unsigned long long)(nr_blocks_data - nr_free_blocks_data),
				2387	(unsigned long long)nr_blocks_data);
				2388
				2389	if (held_root)
				2390	DMEMIT("%llu", held_root);
				2391	else
				2392	DMEMIT("-");
				2393
				2394	break;
				2395
				2396	case STATUSTYPE_TABLE:
				2397	DMEMIT("%s %s %lu %llu ",
				2398	format_dev_t(buf, pt->metadata_dev->bdev->bd_dev),
				2399	format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
				2400	(unsigned long)pool->sectors_per_block,
				2401	(unsigned long long)pt->low_water_blocks);
				2402
				2403	count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled +
				2404	!pt->pf.discard_passdown;
				2405	DMEMIT("%u ", count);
				2406
				2407	if (!pool->pf.zero_new_blocks)
				2408	DMEMIT("skip_block_zeroing ");
				2409
				2410	if (!pool->pf.discard_enabled)
				2411	DMEMIT("ignore_discard ");
				2412
				2413	if (!pt->pf.discard_passdown)
				2414	DMEMIT("no_discard_passdown ");
				2415
				2416	break;
				2417	}
				2418	return;
				2419
				2420	err:
				2421	DMEMIT("Error");
				2422	}
				2423
				2424	static int pool_iterate_devices(struct dm_target *ti,
				2425	iterate_devices_callout_fn fn, void *data)
				2426	{
				2427	struct pool_c *pt = ti->private;
				2428
				2429	return fn(ti, pt->data_dev, 0, ti->len, data);
				2430	}
				2431
				2432	static int pool_merge(struct dm_target ti, struct bvec_merge_data bvm,
				2433	struct bio_vec *biovec, int max_size)
				2434	{
				2435	struct pool_c *pt = ti->private;
				2436	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
				2437
				2438	if (!q->merge_bvec_fn)
				2439	return max_size;
				2440
				2441	bvm->bi_bdev = pt->data_dev->bdev;
				2442
				2443	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
				2444	}
				2445
				2446	static void set_discard_limits(struct pool pool, struct queue_limits limits)
				2447	{
				2448	/*
				2449	* FIXME: these limits may be incompatible with the pool's data device
				2450	*/
				2451	limits->max_discard_sectors = pool->sectors_per_block;
				2452
				2453	/*
				2454	* This is just a hint, and not enforced. We have to cope with
				2455	* bios that overlap 2 blocks.
				2456	*/
				2457	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
				2458	}
				2459
				2460	static void pool_io_hints(struct dm_target ti, struct queue_limits limits)
				2461	{
				2462	struct pool_c *pt = ti->private;
				2463	struct pool *pool = pt->pool;
				2464
				2465	blk_limits_io_min(limits, 0);
				2466	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
				2467	if (pool->pf.discard_enabled)
				2468	set_discard_limits(pool, limits);
				2469	}
				2470
				2471	static struct target_type pool_target = {
				2472	.name = "thin-pool",
				2473	.features = DM_TARGET_SINGLETON \| DM_TARGET_ALWAYS_WRITEABLE \|
				2474	DM_TARGET_IMMUTABLE,
				2475	.version = {1, 1, 1},
				2476	.module = THIS_MODULE,
				2477	.ctr = pool_ctr,
				2478	.dtr = pool_dtr,
				2479	.map = pool_map,
				2480	.postsuspend = pool_postsuspend,
				2481	.preresume = pool_preresume,
				2482	.resume = pool_resume,
				2483	.message = pool_message,
				2484	.status = pool_status,
				2485	.merge = pool_merge,
				2486	.iterate_devices = pool_iterate_devices,
				2487	.io_hints = pool_io_hints,
				2488	};
				2489
				2490	/*----------------------------------------------------------------
				2491	* Thin target methods
				2492	--------------------------------------------------------------/
				2493	static void thin_dtr(struct dm_target *ti)
				2494	{
				2495	struct thin_c *tc = ti->private;
				2496
				2497	mutex_lock(&dm_thin_pool_table.mutex);
				2498
				2499	__pool_dec(tc->pool);
				2500	dm_pool_close_thin_device(tc->td);
				2501	dm_put_device(ti, tc->pool_dev);
				2502	if (tc->origin_dev)
				2503	dm_put_device(ti, tc->origin_dev);
				2504	kfree(tc);
				2505
				2506	mutex_unlock(&dm_thin_pool_table.mutex);
				2507	}
				2508
				2509	/*
				2510	* Thin target parameters:
				2511	*
				2512	* <pool_dev> <dev_id> [origin_dev]
				2513	*
				2514	* pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
				2515	* dev_id: the internal device identifier
				2516	* origin_dev: a device external to the pool that should act as the origin
				2517	*
				2518	* If the pool device has discards disabled, they get disabled for the thin
				2519	* device as well.
				2520	*/
				2521	static int thin_ctr(struct dm_target ti, unsigned argc, char *argv)
				2522	{
				2523	int r;
				2524	struct thin_c *tc;
				2525	struct dm_dev pool_dev, origin_dev;
				2526	struct mapped_device *pool_md;
				2527
				2528	mutex_lock(&dm_thin_pool_table.mutex);
				2529
				2530	if (argc != 2 && argc != 3) {
				2531	ti->error = "Invalid argument count";
				2532	r = -EINVAL;
				2533	goto out_unlock;
				2534	}
				2535
				2536	tc = ti->private = kzalloc(sizeof(*tc), GFP_KERNEL);
				2537	if (!tc) {
				2538	ti->error = "Out of memory";
				2539	r = -ENOMEM;
				2540	goto out_unlock;
				2541	}
				2542
				2543	if (argc == 3) {
				2544	r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
				2545	if (r) {
				2546	ti->error = "Error opening origin device";
				2547	goto bad_origin_dev;
				2548	}
				2549	tc->origin_dev = origin_dev;
				2550	}
				2551
				2552	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
				2553	if (r) {
				2554	ti->error = "Error opening pool device";
				2555	goto bad_pool_dev;
				2556	}
				2557	tc->pool_dev = pool_dev;
				2558
				2559	if (read_dev_id(argv[1], (unsigned long long *)&tc->dev_id, 0)) {
				2560	ti->error = "Invalid device id";
				2561	r = -EINVAL;
				2562	goto bad_common;
				2563	}
				2564
				2565	pool_md = dm_get_md(tc->pool_dev->bdev->bd_dev);
				2566	if (!pool_md) {
				2567	ti->error = "Couldn't get pool mapped device";
				2568	r = -EINVAL;
				2569	goto bad_common;
				2570	}
				2571
				2572	tc->pool = __pool_table_lookup(pool_md);
				2573	if (!tc->pool) {
				2574	ti->error = "Couldn't find pool object";
				2575	r = -EINVAL;
				2576	goto bad_pool_lookup;
				2577	}
				2578	__pool_inc(tc->pool);
				2579
				2580	r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
				2581	if (r) {
				2582	ti->error = "Couldn't open thin internal device";
				2583	goto bad_thin_open;
				2584	}
				2585
				2586	ti->split_io = tc->pool->sectors_per_block;
				2587	ti->num_flush_requests = 1;
				2588
				2589	/* In case the pool supports discards, pass them on. */
				2590	if (tc->pool->pf.discard_enabled) {
				2591	ti->discards_supported = 1;
				2592	ti->num_discard_requests = 1;
				2593	ti->discard_zeroes_data_unsupported = 1;
				2594	}
				2595
				2596	dm_put(pool_md);
				2597
				2598	mutex_unlock(&dm_thin_pool_table.mutex);
				2599
				2600	return 0;
				2601
				2602	bad_thin_open:
				2603	__pool_dec(tc->pool);
				2604	bad_pool_lookup:
				2605	dm_put(pool_md);
				2606	bad_common:
				2607	dm_put_device(ti, tc->pool_dev);
				2608	bad_pool_dev:
				2609	if (tc->origin_dev)
				2610	dm_put_device(ti, tc->origin_dev);
				2611	bad_origin_dev:
				2612	kfree(tc);
				2613	out_unlock:
				2614	mutex_unlock(&dm_thin_pool_table.mutex);
				2615
				2616	return r;
				2617	}
				2618
				2619	static int thin_map(struct dm_target ti, struct bio bio,
				2620	union map_info *map_context)
				2621	{
				2622	bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
				2623
				2624	return thin_bio_map(ti, bio, map_context);
				2625	}
				2626
				2627	static int thin_endio(struct dm_target *ti,
				2628	struct bio *bio, int err,
				2629	union map_info *map_context)
				2630	{
				2631	unsigned long flags;
				2632	struct endio_hook *h = map_context->ptr;
				2633	struct list_head work;
				2634	struct new_mapping m, tmp;
				2635	struct pool *pool = h->tc->pool;
				2636
				2637	if (h->shared_read_entry) {
				2638	INIT_LIST_HEAD(&work);
				2639	ds_dec(h->shared_read_entry, &work);
				2640
				2641	spin_lock_irqsave(&pool->lock, flags);
				2642	list_for_each_entry_safe(m, tmp, &work, list) {
				2643	list_del(&m->list);
				2644	m->quiesced = 1;
				2645	__maybe_add_mapping(m);
				2646	}
				2647	spin_unlock_irqrestore(&pool->lock, flags);
				2648	}
				2649
				2650	if (h->all_io_entry) {
				2651	INIT_LIST_HEAD(&work);
				2652	ds_dec(h->all_io_entry, &work);
				2653	spin_lock_irqsave(&pool->lock, flags);
				2654	list_for_each_entry_safe(m, tmp, &work, list)
				2655	list_add(&m->list, &pool->prepared_discards);
				2656	spin_unlock_irqrestore(&pool->lock, flags);
				2657	}
				2658
				2659	mempool_free(h, pool->endio_hook_pool);
				2660
				2661	return 0;
				2662	}
				2663
				2664	static void thin_postsuspend(struct dm_target *ti)
				2665	{
				2666	if (dm_noflush_suspending(ti))
				2667	requeue_io((struct thin_c *)ti->private);
				2668	}
				2669
				2670	/*
				2671	* <nr mapped sectors> <highest mapped sector>
				2672	*/
				2673	static void thin_status(struct dm_target *ti, status_type_t type,
				2674	char *result, unsigned maxlen)
				2675	{
				2676	int r;
				2677	ssize_t sz = 0;
				2678	dm_block_t mapped, highest;
				2679	char buf[BDEVNAME_SIZE];
				2680	struct thin_c *tc = ti->private;
				2681
				2682	if (!tc->td)
				2683	DMEMIT("-");
				2684	else {
				2685	switch (type) {
				2686	case STATUSTYPE_INFO:
				2687	r = dm_thin_get_mapped_count(tc->td, &mapped);
				2688	if (r) {
				2689	DMERR("dm_thin_get_mapped_count returned %d", r);
				2690	goto err;
				2691	}
				2692
				2693	r = dm_thin_get_highest_mapped_block(tc->td, &highest);
				2694	if (r < 0) {
				2695	DMERR("dm_thin_get_highest_mapped_block returned %d", r);
				2696	goto err;
				2697	}
				2698
				2699	DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
				2700	if (r)
				2701	DMEMIT("%llu", ((highest + 1) *
				2702	tc->pool->sectors_per_block) - 1);
				2703	else
				2704	DMEMIT("-");
				2705	break;
				2706
				2707	case STATUSTYPE_TABLE:
				2708	DMEMIT("%s %lu",
				2709	format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
				2710	(unsigned long) tc->dev_id);
				2711	if (tc->origin_dev)
				2712	DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
				2713	break;
				2714	}
				2715	}
				2716
				2717	return;
				2718
				2719	err:
				2720	DMEMIT("Error");
				2721	}
				2722
				2723	static int thin_iterate_devices(struct dm_target *ti,
				2724	iterate_devices_callout_fn fn, void *data)
				2725	{
				2726	dm_block_t blocks;
				2727	struct thin_c *tc = ti->private;
				2728
				2729	/*
				2730	* We can't call dm_pool_get_data_dev_size() since that blocks. So
				2731	* we follow a more convoluted path through to the pool's target.
				2732	*/
				2733	if (!tc->pool->ti)
				2734	return 0; /* nothing is bound */
				2735
				2736	blocks = tc->pool->ti->len >> tc->pool->block_shift;
				2737	if (blocks)
				2738	return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
				2739
				2740	return 0;
				2741	}
				2742
				2743	static void thin_io_hints(struct dm_target ti, struct queue_limits limits)
				2744	{
				2745	struct thin_c *tc = ti->private;
				2746	struct pool *pool = tc->pool;
				2747
				2748	blk_limits_io_min(limits, 0);
				2749	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
				2750	set_discard_limits(pool, limits);
				2751	}
				2752
				2753	static struct target_type thin_target = {
				2754	.name = "thin",
				2755	.version = {1, 1, 1},
				2756	.module = THIS_MODULE,
				2757	.ctr = thin_ctr,
				2758	.dtr = thin_dtr,
				2759	.map = thin_map,
				2760	.end_io = thin_endio,
				2761	.postsuspend = thin_postsuspend,
				2762	.status = thin_status,
				2763	.iterate_devices = thin_iterate_devices,
				2764	.io_hints = thin_io_hints,
				2765	};
				2766
				2767	/----------------------------------------------------------------/
				2768
				2769	static int __init dm_thin_init(void)
				2770	{
				2771	int r;
				2772
				2773	pool_table_init();
				2774
				2775	r = dm_register_target(&thin_target);
				2776	if (r)
				2777	return r;
				2778
				2779	r = dm_register_target(&pool_target);
				2780	if (r)
				2781	dm_unregister_target(&thin_target);
				2782
				2783	return r;
				2784	}
				2785
				2786	static void dm_thin_exit(void)
				2787	{
				2788	dm_unregister_target(&thin_target);
				2789	dm_unregister_target(&pool_target);
				2790	}
				2791
				2792	module_init(dm_thin_init);
				2793	module_exit(dm_thin_exit);
				2794
				2795	MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
				2796	MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
				2797	MODULE_LICENSE("GPL");