Blame - src/kernel/linux/v4.19/block/bio.c - T800

blob: ee3bae8b9dcd69fd4757b41d72e73234e71f72f2 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public Licens
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
				16	*
				17	*/
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
				20	#include <linux/bio.h>
				21	#include <linux/blkdev.h>
				22	#include <linux/uio.h>
				23	#include <linux/iocontext.h>
				24	#include <linux/slab.h>
				25	#include <linux/init.h>
				26	#include <linux/kernel.h>
				27	#include <linux/export.h>
				28	#include <linux/mempool.h>
				29	#include <linux/workqueue.h>
				30	#include <linux/cgroup.h>
				31	#include <linux/blk-cgroup.h>
				32	#include <linux/blk-crypto.h>
				33
				34	#include <trace/events/block.h>
				35	#include "blk.h"
				36	#include "blk-rq-qos.h"
				37
				38	/*
				39	* Test patch to inline a certain number of bi_io_vec's inside the bio
				40	* itself, to shrink a bio data allocation from two mempool calls to one
				41	*/
				42	#define BIO_INLINE_VECS 4
				43
				44	/*
				45	* if you change this list, also change bvec_alloc or things will
				46	* break badly! cannot be bigger than what you can fit into an
				47	* unsigned short
				48	*/
				49	#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
				50	static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
				51	BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max),
				52	};
				53	#undef BV
				54
				55	/*
				56	* fs_bio_set is the bio_set containing bio and iovec memory pools used by
				57	* IO code that does not need private memory pools.
				58	*/
				59	struct bio_set fs_bio_set;
				60	EXPORT_SYMBOL(fs_bio_set);
				61
				62	/*
				63	* Our slab pool management
				64	*/
				65	struct bio_slab {
				66	struct kmem_cache *slab;
				67	unsigned int slab_ref;
				68	unsigned int slab_size;
				69	char name[8];
				70	};
				71	static DEFINE_MUTEX(bio_slab_lock);
				72	static struct bio_slab *bio_slabs;
				73	static unsigned int bio_slab_nr, bio_slab_max;
				74
				75	static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
				76	{
				77	unsigned int sz = sizeof(struct bio) + extra_size;
				78	struct kmem_cache *slab = NULL;
				79	struct bio_slab bslab, new_bio_slabs;
				80	unsigned int new_bio_slab_max;
				81	unsigned int i, entry = -1;
				82
				83	mutex_lock(&bio_slab_lock);
				84
				85	i = 0;
				86	while (i < bio_slab_nr) {
				87	bslab = &bio_slabs[i];
				88
				89	if (!bslab->slab && entry == -1)
				90	entry = i;
				91	else if (bslab->slab_size == sz) {
				92	slab = bslab->slab;
				93	bslab->slab_ref++;
				94	break;
				95	}
				96	i++;
				97	}
				98
				99	if (slab)
				100	goto out_unlock;
				101
				102	if (bio_slab_nr == bio_slab_max && entry == -1) {
				103	new_bio_slab_max = bio_slab_max << 1;
				104	new_bio_slabs = krealloc(bio_slabs,
				105	new_bio_slab_max * sizeof(struct bio_slab),
				106	GFP_KERNEL);
				107	if (!new_bio_slabs)
				108	goto out_unlock;
				109	bio_slab_max = new_bio_slab_max;
				110	bio_slabs = new_bio_slabs;
				111	}
				112	if (entry == -1)
				113	entry = bio_slab_nr++;
				114
				115	bslab = &bio_slabs[entry];
				116
				117	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
				118	slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
				119	SLAB_HWCACHE_ALIGN, NULL);
				120	if (!slab)
				121	goto out_unlock;
				122
				123	bslab->slab = slab;
				124	bslab->slab_ref = 1;
				125	bslab->slab_size = sz;
				126	out_unlock:
				127	mutex_unlock(&bio_slab_lock);
				128	return slab;
				129	}
				130
				131	static void bio_put_slab(struct bio_set *bs)
				132	{
				133	struct bio_slab *bslab = NULL;
				134	unsigned int i;
				135
				136	mutex_lock(&bio_slab_lock);
				137
				138	for (i = 0; i < bio_slab_nr; i++) {
				139	if (bs->bio_slab == bio_slabs[i].slab) {
				140	bslab = &bio_slabs[i];
				141	break;
				142	}
				143	}
				144
				145	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
				146	goto out;
				147
				148	WARN_ON(!bslab->slab_ref);
				149
				150	if (--bslab->slab_ref)
				151	goto out;
				152
				153	kmem_cache_destroy(bslab->slab);
				154	bslab->slab = NULL;
				155
				156	out:
				157	mutex_unlock(&bio_slab_lock);
				158	}
				159
				160	unsigned int bvec_nr_vecs(unsigned short idx)
				161	{
				162	return bvec_slabs[--idx].nr_vecs;
				163	}
				164
				165	void bvec_free(mempool_t pool, struct bio_vec bv, unsigned int idx)
				166	{
				167	if (!idx)
				168	return;
				169	idx--;
				170
				171	BIO_BUG_ON(idx >= BVEC_POOL_NR);
				172
				173	if (idx == BVEC_POOL_MAX) {
				174	mempool_free(bv, pool);
				175	} else {
				176	struct biovec_slab *bvs = bvec_slabs + idx;
				177
				178	kmem_cache_free(bvs->slab, bv);
				179	}
				180	}
				181
				182	struct bio_vec bvec_alloc(gfp_t gfp_mask, int nr, unsigned long idx,
				183	mempool_t *pool)
				184	{
				185	struct bio_vec *bvl;
				186
				187	/*
				188	* see comment near bvec_array define!
				189	*/
				190	switch (nr) {
				191	case 1:
				192	*idx = 0;
				193	break;
				194	case 2 ... 4:
				195	*idx = 1;
				196	break;
				197	case 5 ... 16:
				198	*idx = 2;
				199	break;
				200	case 17 ... 64:
				201	*idx = 3;
				202	break;
				203	case 65 ... 128:
				204	*idx = 4;
				205	break;
				206	case 129 ... BIO_MAX_PAGES:
				207	*idx = 5;
				208	break;
				209	default:
				210	return NULL;
				211	}
				212
				213	/*
				214	* idx now points to the pool we want to allocate from. only the
				215	* 1-vec entry pool is mempool backed.
				216	*/
				217	if (*idx == BVEC_POOL_MAX) {
				218	fallback:
				219	bvl = mempool_alloc(pool, gfp_mask);
				220	} else {
				221	struct biovec_slab bvs = bvec_slabs + idx;
				222	gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM \| __GFP_IO);
				223
				224	/*
				225	* Make this allocation restricted and don't dump info on
				226	* allocation failures, since we'll fallback to the mempool
				227	* in case of failure.
				228	*/
				229	__gfp_mask \|= __GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN;
				230
				231	/*
				232	* Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
				233	* is set, retry with the 1-entry mempool
				234	*/
				235	bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
				236	if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
				237	*idx = BVEC_POOL_MAX;
				238	goto fallback;
				239	}
				240	}
				241
				242	(*idx)++;
				243	return bvl;
				244	}
				245
				246	void bio_uninit(struct bio *bio)
				247	{
				248	bio_disassociate_task(bio);
				249
				250	bio_crypt_free_ctx(bio);
				251	}
				252	EXPORT_SYMBOL(bio_uninit);
				253
				254	static void bio_free(struct bio *bio)
				255	{
				256	struct bio_set *bs = bio->bi_pool;
				257	void *p;
				258
				259	bio_uninit(bio);
				260
				261	if (bs) {
				262	bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
				263
				264	/*
				265	* If we have front padding, adjust the bio pointer before freeing
				266	*/
				267	p = bio;
				268	p -= bs->front_pad;
				269
				270	mempool_free(p, &bs->bio_pool);
				271	} else {
				272	/* Bio was allocated by bio_kmalloc() */
				273	kfree(bio);
				274	}
				275	}
				276
				277	/*
				278	* Users of this function have their own bio allocation. Subsequently,
				279	* they must remember to pair any call to bio_init() with bio_uninit()
				280	* when IO has completed, or when the bio is released.
				281	*/
				282	void bio_init(struct bio bio, struct bio_vec table,
				283	unsigned short max_vecs)
				284	{
				285	memset(bio, 0, sizeof(*bio));
				286	atomic_set(&bio->__bi_remaining, 1);
				287	atomic_set(&bio->__bi_cnt, 1);
				288
				289	bio->bi_io_vec = table;
				290	bio->bi_max_vecs = max_vecs;
				291	}
				292	EXPORT_SYMBOL(bio_init);
				293
				294	/**
				295	* bio_reset - reinitialize a bio
				296	* @bio: bio to reset
				297	*
				298	* Description:
				299	* After calling bio_reset(), @bio will be in the same state as a freshly
				300	* allocated bio returned bio bio_alloc_bioset() - the only fields that are
				301	* preserved are the ones that are initialized by bio_alloc_bioset(). See
				302	* comment in struct bio.
				303	*/
				304	void bio_reset(struct bio *bio)
				305	{
				306	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
				307
				308	bio_uninit(bio);
				309
				310	memset(bio, 0, BIO_RESET_BYTES);
				311	bio->bi_flags = flags;
				312	atomic_set(&bio->__bi_remaining, 1);
				313	}
				314	EXPORT_SYMBOL(bio_reset);
				315
				316	static struct bio __bio_chain_endio(struct bio bio)
				317	{
				318	struct bio *parent = bio->bi_private;
				319
				320	if (!parent->bi_status)
				321	parent->bi_status = bio->bi_status;
				322	bio_put(bio);
				323	return parent;
				324	}
				325
				326	static void bio_chain_endio(struct bio *bio)
				327	{
				328	bio_endio(__bio_chain_endio(bio));
				329	}
				330
				331	/**
				332	* bio_chain - chain bio completions
				333	* @bio: the target bio
				334	* @parent: the @bio's parent bio
				335	*
				336	* The caller won't have a bi_end_io called when @bio completes - instead,
				337	* @parent's bi_end_io won't be called until both @parent and @bio have
				338	* completed; the chained bio will also be freed when it completes.
				339	*
				340	* The caller must not set bi_private or bi_end_io in @bio.
				341	*/
				342	void bio_chain(struct bio bio, struct bio parent)
				343	{
				344	BUG_ON(bio->bi_private \|\| bio->bi_end_io);
				345
				346	bio->bi_private = parent;
				347	bio->bi_end_io = bio_chain_endio;
				348	bio_inc_remaining(parent);
				349	}
				350	EXPORT_SYMBOL(bio_chain);
				351
				352	static void bio_alloc_rescue(struct work_struct *work)
				353	{
				354	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
				355	struct bio *bio;
				356
				357	while (1) {
				358	spin_lock(&bs->rescue_lock);
				359	bio = bio_list_pop(&bs->rescue_list);
				360	spin_unlock(&bs->rescue_lock);
				361
				362	if (!bio)
				363	break;
				364
				365	generic_make_request(bio);
				366	}
				367	}
				368
				369	static void punt_bios_to_rescuer(struct bio_set *bs)
				370	{
				371	struct bio_list punt, nopunt;
				372	struct bio *bio;
				373
				374	if (WARN_ON_ONCE(!bs->rescue_workqueue))
				375	return;
				376	/*
				377	* In order to guarantee forward progress we must punt only bios that
				378	* were allocated from this bio_set; otherwise, if there was a bio on
				379	* there for a stacking driver higher up in the stack, processing it
				380	* could require allocating bios from this bio_set, and doing that from
				381	* our own rescuer would be bad.
				382	*
				383	* Since bio lists are singly linked, pop them all instead of trying to
				384	* remove from the middle of the list:
				385	*/
				386
				387	bio_list_init(&punt);
				388	bio_list_init(&nopunt);
				389
				390	while ((bio = bio_list_pop(&current->bio_list[0])))
				391	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				392	current->bio_list[0] = nopunt;
				393
				394	bio_list_init(&nopunt);
				395	while ((bio = bio_list_pop(&current->bio_list[1])))
				396	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				397	current->bio_list[1] = nopunt;
				398
				399	spin_lock(&bs->rescue_lock);
				400	bio_list_merge(&bs->rescue_list, &punt);
				401	spin_unlock(&bs->rescue_lock);
				402
				403	queue_work(bs->rescue_workqueue, &bs->rescue_work);
				404	}
				405
				406	/**
				407	* bio_alloc_bioset - allocate a bio for I/O
				408	* @gfp_mask: the GFP_* mask given to the slab allocator
				409	* @nr_iovecs: number of iovecs to pre-allocate
				410	* @bs: the bio_set to allocate from.
				411	*
				412	* Description:
				413	* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
				414	* backed by the @bs's mempool.
				415	*
				416	* When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
				417	* always be able to allocate a bio. This is due to the mempool guarantees.
				418	* To make this work, callers must never allocate more than 1 bio at a time
				419	* from this pool. Callers that need to allocate more than 1 bio must always
				420	* submit the previously allocated bio for IO before attempting to allocate
				421	* a new one. Failure to do so can cause deadlocks under memory pressure.
				422	*
				423	* Note that when running under generic_make_request() (i.e. any block
				424	* driver), bios are not submitted until after you return - see the code in
				425	* generic_make_request() that converts recursion into iteration, to prevent
				426	* stack overflows.
				427	*
				428	* This would normally mean allocating multiple bios under
				429	* generic_make_request() would be susceptible to deadlocks, but we have
				430	* deadlock avoidance code that resubmits any blocked bios from a rescuer
				431	* thread.
				432	*
				433	* However, we do not guarantee forward progress for allocations from other
				434	* mempools. Doing multiple allocations from the same mempool under
				435	* generic_make_request() should be avoided - instead, use bio_set's front_pad
				436	* for per bio allocations.
				437	*
				438	* RETURNS:
				439	* Pointer to new bio on success, NULL on failure.
				440	*/
				441	struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
				442	struct bio_set *bs)
				443	{
				444	gfp_t saved_gfp = gfp_mask;
				445	unsigned front_pad;
				446	unsigned inline_vecs;
				447	struct bio_vec *bvl = NULL;
				448	struct bio *bio;
				449	void *p;
				450
				451	if (!bs) {
				452	if (nr_iovecs > UIO_MAXIOV)
				453	return NULL;
				454
				455	p = kmalloc(sizeof(struct bio) +
				456	nr_iovecs * sizeof(struct bio_vec),
				457	gfp_mask);
				458	front_pad = 0;
				459	inline_vecs = nr_iovecs;
				460	} else {
				461	/* should not use nobvec bioset for nr_iovecs > 0 */
				462	if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
				463	nr_iovecs > 0))
				464	return NULL;
				465	/*
				466	* generic_make_request() converts recursion to iteration; this
				467	* means if we're running beneath it, any bios we allocate and
				468	* submit will not be submitted (and thus freed) until after we
				469	* return.
				470	*
				471	* This exposes us to a potential deadlock if we allocate
				472	* multiple bios from the same bio_set() while running
				473	* underneath generic_make_request(). If we were to allocate
				474	* multiple bios (say a stacking block driver that was splitting
				475	* bios), we would deadlock if we exhausted the mempool's
				476	* reserve.
				477	*
				478	* We solve this, and guarantee forward progress, with a rescuer
				479	* workqueue per bio_set. If we go to allocate and there are
				480	* bios on current->bio_list, we first try the allocation
				481	* without __GFP_DIRECT_RECLAIM; if that fails, we punt those
				482	* bios we would be blocking to the rescuer workqueue before
				483	* we retry with the original gfp_flags.
				484	*/
				485
				486	if (current->bio_list &&
				487	(!bio_list_empty(&current->bio_list[0]) \|\|
				488	!bio_list_empty(&current->bio_list[1])) &&
				489	bs->rescue_workqueue)
				490	gfp_mask &= ~__GFP_DIRECT_RECLAIM;
				491
				492	p = mempool_alloc(&bs->bio_pool, gfp_mask);
				493	if (!p && gfp_mask != saved_gfp) {
				494	punt_bios_to_rescuer(bs);
				495	gfp_mask = saved_gfp;
				496	p = mempool_alloc(&bs->bio_pool, gfp_mask);
				497	}
				498
				499	front_pad = bs->front_pad;
				500	inline_vecs = BIO_INLINE_VECS;
				501	}
				502
				503	if (unlikely(!p))
				504	return NULL;
				505
				506	bio = p + front_pad;
				507	bio_init(bio, NULL, 0);
				508
				509	if (nr_iovecs > inline_vecs) {
				510	unsigned long idx = 0;
				511
				512	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
				513	if (!bvl && gfp_mask != saved_gfp) {
				514	punt_bios_to_rescuer(bs);
				515	gfp_mask = saved_gfp;
				516	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
				517	}
				518
				519	if (unlikely(!bvl))
				520	goto err_free;
				521
				522	bio->bi_flags \|= idx << BVEC_POOL_OFFSET;
				523	} else if (nr_iovecs) {
				524	bvl = bio->bi_inline_vecs;
				525	}
				526
				527	bio->bi_pool = bs;
				528	bio->bi_max_vecs = nr_iovecs;
				529	bio->bi_io_vec = bvl;
				530	return bio;
				531
				532	err_free:
				533	mempool_free(p, &bs->bio_pool);
				534	return NULL;
				535	}
				536	EXPORT_SYMBOL(bio_alloc_bioset);
				537
				538	void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
				539	{
				540	unsigned long flags;
				541	struct bio_vec bv;
				542	struct bvec_iter iter;
				543
				544	__bio_for_each_segment(bv, bio, iter, start) {
				545	char *data = bvec_kmap_irq(&bv, &flags);
				546	memset(data, 0, bv.bv_len);
				547	flush_dcache_page(bv.bv_page);
				548	bvec_kunmap_irq(data, &flags);
				549	}
				550	}
				551	EXPORT_SYMBOL(zero_fill_bio_iter);
				552
				553	/**
				554	* bio_put - release a reference to a bio
				555	* @bio: bio to release reference to
				556	*
				557	* Description:
				558	* Put a reference to a &struct bio, either one you have gotten with
				559	* bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
				560	**/
				561	void bio_put(struct bio *bio)
				562	{
				563	if (!bio_flagged(bio, BIO_REFFED))
				564	bio_free(bio);
				565	else {
				566	BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
				567
				568	/*
				569	* last put frees it
				570	*/
				571	if (atomic_dec_and_test(&bio->__bi_cnt))
				572	bio_free(bio);
				573	}
				574	}
				575	EXPORT_SYMBOL(bio_put);
				576
				577	inline int bio_phys_segments(struct request_queue q, struct bio bio)
				578	{
				579	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
				580	blk_recount_segments(q, bio);
				581
				582	return bio->bi_phys_segments;
				583	}
				584	EXPORT_SYMBOL(bio_phys_segments);
				585
				586	/**
				587	* __bio_clone_fast - clone a bio that shares the original bio's biovec
				588	* @bio: destination bio
				589	* @bio_src: bio to clone
				590	*
				591	* Clone a &bio. Caller will own the returned bio, but not
				592	* the actual data it points to. Reference count of returned
				593	* bio will be one.
				594	*
				595	* Caller must ensure that @bio_src is not freed before @bio.
				596	*/
				597	void __bio_clone_fast(struct bio bio, struct bio bio_src)
				598	{
				599	BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));
				600
				601	/*
				602	* most users will be overriding ->bi_disk with a new target,
				603	* so we don't set nor calculate new physical/hw segment counts here
				604	*/
				605	bio->bi_disk = bio_src->bi_disk;
				606	bio->bi_partno = bio_src->bi_partno;
				607	bio_set_flag(bio, BIO_CLONED);
				608	if (bio_flagged(bio_src, BIO_THROTTLED))
				609	bio_set_flag(bio, BIO_THROTTLED);
				610	bio->bi_opf = bio_src->bi_opf;
				611	bio->bi_ioprio = bio_src->bi_ioprio;
				612	bio->bi_write_hint = bio_src->bi_write_hint;
				613	bio->bi_iter = bio_src->bi_iter;
				614	bio->bi_io_vec = bio_src->bi_io_vec;
				615
				616	bio_clone_blkcg_association(bio, bio_src);
				617	}
				618	EXPORT_SYMBOL(__bio_clone_fast);
				619
				620	/**
				621	* bio_clone_fast - clone a bio that shares the original bio's biovec
				622	* @bio: bio to clone
				623	* @gfp_mask: allocation priority
				624	* @bs: bio_set to allocate from
				625	*
				626	* Like __bio_clone_fast, only also allocates the returned bio
				627	*/
				628	struct bio bio_clone_fast(struct bio bio, gfp_t gfp_mask, struct bio_set *bs)
				629	{
				630	struct bio *b;
				631
				632	b = bio_alloc_bioset(gfp_mask, 0, bs);
				633	if (!b)
				634	return NULL;
				635
				636	__bio_clone_fast(b, bio);
				637
				638	bio_crypt_clone(b, bio, gfp_mask);
				639
				640	if (bio_integrity(bio) &&
				641	bio_integrity_clone(b, bio, gfp_mask) < 0) {
				642	bio_put(b);
				643	return NULL;
				644	}
				645
				646	return b;
				647	}
				648	EXPORT_SYMBOL(bio_clone_fast);
				649
				650	/**
				651	* bio_add_pc_page - attempt to add page to bio
				652	* @q: the target queue
				653	* @bio: destination bio
				654	* @page: page to add
				655	* @len: vec entry length
				656	* @offset: vec entry offset
				657	*
				658	* Attempt to add a page to the bio_vec maplist. This can fail for a
				659	* number of reasons, such as the bio being full or target block device
				660	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				661	* so it is always possible to add a single page to an empty bio.
				662	*
				663	* This should only be used by REQ_PC bios.
				664	*/
				665	int bio_add_pc_page(struct request_queue q, struct bio bio, struct page
				666	*page, unsigned int len, unsigned int offset)
				667	{
				668	int retried_segments = 0;
				669	struct bio_vec *bvec;
				670
				671	/*
				672	* cloned bio must not modify vec list
				673	*/
				674	if (unlikely(bio_flagged(bio, BIO_CLONED)))
				675	return 0;
				676
				677	if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
				678	return 0;
				679
				680	/*
				681	* For filesystems with a blocksize smaller than the pagesize
				682	* we will often be called with the same page as last time and
				683	* a consecutive offset. Optimize this special case.
				684	*/
				685	if (bio->bi_vcnt > 0) {
				686	struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
				687
				688	if (page == prev->bv_page &&
				689	offset == prev->bv_offset + prev->bv_len) {
				690	prev->bv_len += len;
				691	bio->bi_iter.bi_size += len;
				692	goto done;
				693	}
				694
				695	/*
				696	* If the queue doesn't support SG gaps and adding this
				697	* offset would create a gap, disallow it.
				698	*/
				699	if (bvec_gap_to_prev(q, prev, offset))
				700	return 0;
				701	}
				702
				703	if (bio_full(bio))
				704	return 0;
				705
				706	/*
				707	* setup the new entry, we might clear it again later if we
				708	* cannot add the page
				709	*/
				710	bvec = &bio->bi_io_vec[bio->bi_vcnt];
				711	bvec->bv_page = page;
				712	bvec->bv_len = len;
				713	bvec->bv_offset = offset;
				714	bio->bi_vcnt++;
				715	bio->bi_phys_segments++;
				716	bio->bi_iter.bi_size += len;
				717
				718	/*
				719	* Perform a recount if the number of segments is greater
				720	* than queue_max_segments(q).
				721	*/
				722
				723	while (bio->bi_phys_segments > queue_max_segments(q)) {
				724
				725	if (retried_segments)
				726	goto failed;
				727
				728	retried_segments = 1;
				729	blk_recount_segments(q, bio);
				730	}
				731
				732	/* If we may be able to merge these biovecs, force a recount */
				733	if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
				734	bio_clear_flag(bio, BIO_SEG_VALID);
				735
				736	done:
				737	return len;
				738
				739	failed:
				740	bvec->bv_page = NULL;
				741	bvec->bv_len = 0;
				742	bvec->bv_offset = 0;
				743	bio->bi_vcnt--;
				744	bio->bi_iter.bi_size -= len;
				745	blk_recount_segments(q, bio);
				746	return 0;
				747	}
				748	EXPORT_SYMBOL(bio_add_pc_page);
				749
				750	/**
				751	* __bio_try_merge_page - try appending data to an existing bvec.
				752	* @bio: destination bio
				753	* @page: page to add
				754	* @len: length of the data to add
				755	* @off: offset of the data in @page
				756	*
				757	* Try to add the data at @page + @off to the last bvec of @bio. This is a
				758	* a useful optimisation for file systems with a block size smaller than the
				759	* page size.
				760	*
				761	* Return %true on success or %false on failure.
				762	*/
				763	bool __bio_try_merge_page(struct bio bio, struct page page,
				764	unsigned int len, unsigned int off)
				765	{
				766	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
				767	return false;
				768
				769	if (bio->bi_vcnt > 0) {
				770	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
				771
				772	if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
				773	bv->bv_len += len;
				774	bio->bi_iter.bi_size += len;
				775	return true;
				776	}
				777	}
				778	return false;
				779	}
				780	EXPORT_SYMBOL_GPL(__bio_try_merge_page);
				781
				782	/**
				783	* __bio_add_page - add page to a bio in a new segment
				784	* @bio: destination bio
				785	* @page: page to add
				786	* @len: length of the data to add
				787	* @off: offset of the data in @page
				788	*
				789	* Add the data at @page + @off to @bio as a new bvec. The caller must ensure
				790	* that @bio has space for another bvec.
				791	*/
				792	void __bio_add_page(struct bio bio, struct page page,
				793	unsigned int len, unsigned int off)
				794	{
				795	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
				796
				797	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
				798	WARN_ON_ONCE(bio_full(bio));
				799
				800	bv->bv_page = page;
				801	bv->bv_offset = off;
				802	bv->bv_len = len;
				803
				804	bio->bi_iter.bi_size += len;
				805	bio->bi_vcnt++;
				806
				807	if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
				808	bio_set_flag(bio, BIO_WORKINGSET);
				809	}
				810	EXPORT_SYMBOL_GPL(__bio_add_page);
				811
				812	/**
				813	* bio_add_page - attempt to add page to bio
				814	* @bio: destination bio
				815	* @page: page to add
				816	* @len: vec entry length
				817	* @offset: vec entry offset
				818	*
				819	* Attempt to add a page to the bio_vec maplist. This will only fail
				820	* if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
				821	*/
				822	int bio_add_page(struct bio bio, struct page page,
				823	unsigned int len, unsigned int offset)
				824	{
				825	if (!__bio_try_merge_page(bio, page, len, offset)) {
				826	if (bio_full(bio))
				827	return 0;
				828	__bio_add_page(bio, page, len, offset);
				829	}
				830	return len;
				831	}
				832	EXPORT_SYMBOL(bio_add_page);
				833
				834	/**
				835	* __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
				836	* @bio: bio to add pages to
				837	* @iter: iov iterator describing the region to be mapped
				838	*
				839	* Pins pages from *iter and appends them to @bio's bvec array. The
				840	* pages will have to be released using put_page() when done.
				841	* For multi-segment *iter, this function only adds pages from the
				842	* the next non-empty segment of the iov iterator.
				843	*/
				844	static int __bio_iov_iter_get_pages(struct bio bio, struct iov_iter iter)
				845	{
				846	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
				847	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
				848	struct page pages = (struct page )bv;
				849	size_t offset;
				850	ssize_t size;
				851
				852	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
				853	if (unlikely(size <= 0))
				854	return size ? size : -EFAULT;
				855	idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
				856
				857	/*
				858	* Deep magic below: We need to walk the pinned pages backwards
				859	* because we are abusing the space allocated for the bio_vecs
				860	* for the page array. Because the bio_vecs are larger than the
				861	* page pointers by definition this will always work. But it also
				862	* means we can't use bio_add_page, so any changes to it's semantics
				863	* need to be reflected here as well.
				864	*/
				865	bio->bi_iter.bi_size += size;
				866	bio->bi_vcnt += nr_pages;
				867
				868	while (idx--) {
				869	bv[idx].bv_page = pages[idx];
				870	bv[idx].bv_len = PAGE_SIZE;
				871	bv[idx].bv_offset = 0;
				872	}
				873
				874	bv[0].bv_offset += offset;
				875	bv[0].bv_len -= offset;
				876	bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
				877
				878	iov_iter_advance(iter, size);
				879	return 0;
				880	}
				881
				882	/**
				883	* bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
				884	* @bio: bio to add pages to
				885	* @iter: iov iterator describing the region to be mapped
				886	*
				887	* Pins pages from *iter and appends them to @bio's bvec array. The
				888	* pages will have to be released using put_page() when done.
				889	* The function tries, but does not guarantee, to pin as many pages as
				890	* fit into the bio, or are requested in *iter, whatever is smaller.
				891	* If MM encounters an error pinning the requested pages, it stops.
				892	* Error is returned only if 0 pages could be pinned.
				893	*/
				894	int bio_iov_iter_get_pages(struct bio bio, struct iov_iter iter)
				895	{
				896	unsigned short orig_vcnt = bio->bi_vcnt;
				897
				898	do {
				899	int ret = __bio_iov_iter_get_pages(bio, iter);
				900
				901	if (unlikely(ret))
				902	return bio->bi_vcnt > orig_vcnt ? 0 : ret;
				903
				904	} while (iov_iter_count(iter) && !bio_full(bio));
				905
				906	return 0;
				907	}
				908	EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
				909
				910	static void submit_bio_wait_endio(struct bio *bio)
				911	{
				912	complete(bio->bi_private);
				913	}
				914
				915	/**
				916	* submit_bio_wait - submit a bio, and wait until it completes
				917	* @bio: The &struct bio which describes the I/O
				918	*
				919	* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
				920	* bio_endio() on failure.
				921	*
				922	* WARNING: Unlike to how submit_bio() is usually used, this function does not
				923	* result in bio reference to be consumed. The caller must drop the reference
				924	* on his own.
				925	*/
				926	int submit_bio_wait(struct bio *bio)
				927	{
				928	DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
				929
				930	bio->bi_private = &done;
				931	bio->bi_end_io = submit_bio_wait_endio;
				932	bio->bi_opf \|= REQ_SYNC;
				933	submit_bio(bio);
				934	wait_for_completion_io(&done);
				935
				936	return blk_status_to_errno(bio->bi_status);
				937	}
				938	EXPORT_SYMBOL(submit_bio_wait);
				939
				940	/**
				941	* bio_advance - increment/complete a bio by some number of bytes
				942	* @bio: bio to advance
				943	* @bytes: number of bytes to complete
				944	*
				945	* This updates bi_sector, bi_size and bi_idx; if the number of bytes to
				946	* complete doesn't align with a bvec boundary, then bv_len and bv_offset will
				947	* be updated on the last bvec as well.
				948	*
				949	* @bio will then represent the remaining, uncompleted portion of the io.
				950	*/
				951	void bio_advance(struct bio *bio, unsigned bytes)
				952	{
				953	if (bio_integrity(bio))
				954	bio_integrity_advance(bio, bytes);
				955
				956	bio_crypt_advance(bio, bytes);
				957	bio_advance_iter(bio, &bio->bi_iter, bytes);
				958	}
				959	EXPORT_SYMBOL(bio_advance);
				960
				961	void bio_copy_data_iter(struct bio dst, struct bvec_iter dst_iter,
				962	struct bio src, struct bvec_iter src_iter)
				963	{
				964	struct bio_vec src_bv, dst_bv;
				965	void src_p, dst_p;
				966	unsigned bytes;
				967
				968	while (src_iter->bi_size && dst_iter->bi_size) {
				969	src_bv = bio_iter_iovec(src, *src_iter);
				970	dst_bv = bio_iter_iovec(dst, *dst_iter);
				971
				972	bytes = min(src_bv.bv_len, dst_bv.bv_len);
				973
				974	src_p = kmap_atomic(src_bv.bv_page);
				975	dst_p = kmap_atomic(dst_bv.bv_page);
				976
				977	memcpy(dst_p + dst_bv.bv_offset,
				978	src_p + src_bv.bv_offset,
				979	bytes);
				980
				981	kunmap_atomic(dst_p);
				982	kunmap_atomic(src_p);
				983
				984	flush_dcache_page(dst_bv.bv_page);
				985
				986	bio_advance_iter(src, src_iter, bytes);
				987	bio_advance_iter(dst, dst_iter, bytes);
				988	}
				989	}
				990	EXPORT_SYMBOL(bio_copy_data_iter);
				991
				992	/**
				993	* bio_copy_data - copy contents of data buffers from one bio to another
				994	* @src: source bio
				995	* @dst: destination bio
				996	*
				997	* Stops when it reaches the end of either @src or @dst - that is, copies
				998	* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
				999	*/
				1000	void bio_copy_data(struct bio dst, struct bio src)
				1001	{
				1002	struct bvec_iter src_iter = src->bi_iter;
				1003	struct bvec_iter dst_iter = dst->bi_iter;
				1004
				1005	bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
				1006	}
				1007	EXPORT_SYMBOL(bio_copy_data);
				1008
				1009	/**
				1010	* bio_list_copy_data - copy contents of data buffers from one chain of bios to
				1011	* another
				1012	* @src: source bio list
				1013	* @dst: destination bio list
				1014	*
				1015	* Stops when it reaches the end of either the @src list or @dst list - that is,
				1016	* copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
				1017	* bios).
				1018	*/
				1019	void bio_list_copy_data(struct bio dst, struct bio src)
				1020	{
				1021	struct bvec_iter src_iter = src->bi_iter;
				1022	struct bvec_iter dst_iter = dst->bi_iter;
				1023
				1024	while (1) {
				1025	if (!src_iter.bi_size) {
				1026	src = src->bi_next;
				1027	if (!src)
				1028	break;
				1029
				1030	src_iter = src->bi_iter;
				1031	}
				1032
				1033	if (!dst_iter.bi_size) {
				1034	dst = dst->bi_next;
				1035	if (!dst)
				1036	break;
				1037
				1038	dst_iter = dst->bi_iter;
				1039	}
				1040
				1041	bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
				1042	}
				1043	}
				1044	EXPORT_SYMBOL(bio_list_copy_data);
				1045
				1046	struct bio_map_data {
				1047	int is_our_pages;
				1048	struct iov_iter iter;
				1049	struct iovec iov[];
				1050	};
				1051
				1052	static struct bio_map_data bio_alloc_map_data(struct iov_iter data,
				1053	gfp_t gfp_mask)
				1054	{
				1055	struct bio_map_data *bmd;
				1056	if (data->nr_segs > UIO_MAXIOV)
				1057	return NULL;
				1058
				1059	bmd = kmalloc(sizeof(struct bio_map_data) +
				1060	sizeof(struct iovec) * data->nr_segs, gfp_mask);
				1061	if (!bmd)
				1062	return NULL;
				1063	memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
				1064	bmd->iter = *data;
				1065	bmd->iter.iov = bmd->iov;
				1066	return bmd;
				1067	}
				1068
				1069	/**
				1070	* bio_copy_from_iter - copy all pages from iov_iter to bio
				1071	* @bio: The &struct bio which describes the I/O as destination
				1072	* @iter: iov_iter as source
				1073	*
				1074	* Copy all pages from iov_iter to bio.
				1075	* Returns 0 on success, or error on failure.
				1076	*/
				1077	static int bio_copy_from_iter(struct bio bio, struct iov_iter iter)
				1078	{
				1079	int i;
				1080	struct bio_vec *bvec;
				1081
				1082	bio_for_each_segment_all(bvec, bio, i) {
				1083	ssize_t ret;
				1084
				1085	ret = copy_page_from_iter(bvec->bv_page,
				1086	bvec->bv_offset,
				1087	bvec->bv_len,
				1088	iter);
				1089
				1090	if (!iov_iter_count(iter))
				1091	break;
				1092
				1093	if (ret < bvec->bv_len)
				1094	return -EFAULT;
				1095	}
				1096
				1097	return 0;
				1098	}
				1099
				1100	/**
				1101	* bio_copy_to_iter - copy all pages from bio to iov_iter
				1102	* @bio: The &struct bio which describes the I/O as source
				1103	* @iter: iov_iter as destination
				1104	*
				1105	* Copy all pages from bio to iov_iter.
				1106	* Returns 0 on success, or error on failure.
				1107	*/
				1108	static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
				1109	{
				1110	int i;
				1111	struct bio_vec *bvec;
				1112
				1113	bio_for_each_segment_all(bvec, bio, i) {
				1114	ssize_t ret;
				1115
				1116	ret = copy_page_to_iter(bvec->bv_page,
				1117	bvec->bv_offset,
				1118	bvec->bv_len,
				1119	&iter);
				1120
				1121	if (!iov_iter_count(&iter))
				1122	break;
				1123
				1124	if (ret < bvec->bv_len)
				1125	return -EFAULT;
				1126	}
				1127
				1128	return 0;
				1129	}
				1130
				1131	void bio_free_pages(struct bio *bio)
				1132	{
				1133	struct bio_vec *bvec;
				1134	int i;
				1135
				1136	bio_for_each_segment_all(bvec, bio, i)
				1137	__free_page(bvec->bv_page);
				1138	}
				1139	EXPORT_SYMBOL(bio_free_pages);
				1140
				1141	/**
				1142	* bio_uncopy_user - finish previously mapped bio
				1143	* @bio: bio being terminated
				1144	*
				1145	* Free pages allocated from bio_copy_user_iov() and write back data
				1146	* to user space in case of a read.
				1147	*/
				1148	int bio_uncopy_user(struct bio *bio)
				1149	{
				1150	struct bio_map_data *bmd = bio->bi_private;
				1151	int ret = 0;
				1152
				1153	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
				1154	/*
				1155	* if we're in a workqueue, the request is orphaned, so
				1156	* don't copy into a random user address space, just free
				1157	* and return -EINTR so user space doesn't expect any data.
				1158	*/
				1159	if (!current->mm)
				1160	ret = -EINTR;
				1161	else if (bio_data_dir(bio) == READ)
				1162	ret = bio_copy_to_iter(bio, bmd->iter);
				1163	if (bmd->is_our_pages)
				1164	bio_free_pages(bio);
				1165	}
				1166	kfree(bmd);
				1167	bio_put(bio);
				1168	return ret;
				1169	}
				1170
				1171	/**
				1172	* bio_copy_user_iov - copy user data to bio
				1173	* @q: destination block queue
				1174	* @map_data: pointer to the rq_map_data holding pages (if necessary)
				1175	* @iter: iovec iterator
				1176	* @gfp_mask: memory allocation flags
				1177	*
				1178	* Prepares and returns a bio for indirect user io, bouncing data
				1179	* to/from kernel pages as necessary. Must be paired with
				1180	* call bio_uncopy_user() on io completion.
				1181	*/
				1182	struct bio bio_copy_user_iov(struct request_queue q,
				1183	struct rq_map_data *map_data,
				1184	struct iov_iter *iter,
				1185	gfp_t gfp_mask)
				1186	{
				1187	struct bio_map_data *bmd;
				1188	struct page *page;
				1189	struct bio *bio;
				1190	int i = 0, ret;
				1191	int nr_pages;
				1192	unsigned int len = iter->count;
				1193	unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
				1194
				1195	bmd = bio_alloc_map_data(iter, gfp_mask);
				1196	if (!bmd)
				1197	return ERR_PTR(-ENOMEM);
				1198
				1199	/*
				1200	* We need to do a deep copy of the iov_iter including the iovecs.
				1201	* The caller provided iov might point to an on-stack or otherwise
				1202	* shortlived one.
				1203	*/
				1204	bmd->is_our_pages = map_data ? 0 : 1;
				1205
				1206	nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
				1207	if (nr_pages > BIO_MAX_PAGES)
				1208	nr_pages = BIO_MAX_PAGES;
				1209
				1210	ret = -ENOMEM;
				1211	bio = bio_kmalloc(gfp_mask, nr_pages);
				1212	if (!bio)
				1213	goto out_bmd;
				1214
				1215	ret = 0;
				1216
				1217	if (map_data) {
				1218	nr_pages = 1 << map_data->page_order;
				1219	i = map_data->offset / PAGE_SIZE;
				1220	}
				1221	while (len) {
				1222	unsigned int bytes = PAGE_SIZE;
				1223
				1224	bytes -= offset;
				1225
				1226	if (bytes > len)
				1227	bytes = len;
				1228
				1229	if (map_data) {
				1230	if (i == map_data->nr_entries * nr_pages) {
				1231	ret = -ENOMEM;
				1232	break;
				1233	}
				1234
				1235	page = map_data->pages[i / nr_pages];
				1236	page += (i % nr_pages);
				1237
				1238	i++;
				1239	} else {
				1240	page = alloc_page(q->bounce_gfp \| gfp_mask);
				1241	if (!page) {
				1242	ret = -ENOMEM;
				1243	break;
				1244	}
				1245	}
				1246
				1247	if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
				1248	if (!map_data)
				1249	__free_page(page);
				1250	break;
				1251	}
				1252
				1253	len -= bytes;
				1254	offset = 0;
				1255	}
				1256
				1257	if (ret)
				1258	goto cleanup;
				1259
				1260	if (map_data)
				1261	map_data->offset += bio->bi_iter.bi_size;
				1262
				1263	/*
				1264	* success
				1265	*/
				1266	if (((iter->type & WRITE) && (!map_data \|\| !map_data->null_mapped)) \|\|
				1267	(map_data && map_data->from_user)) {
				1268	ret = bio_copy_from_iter(bio, iter);
				1269	if (ret)
				1270	goto cleanup;
				1271	} else {
				1272	if (bmd->is_our_pages)
				1273	zero_fill_bio(bio);
				1274	iov_iter_advance(iter, bio->bi_iter.bi_size);
				1275	}
				1276
				1277	bio->bi_private = bmd;
				1278	if (map_data && map_data->null_mapped)
				1279	bio_set_flag(bio, BIO_NULL_MAPPED);
				1280	return bio;
				1281	cleanup:
				1282	if (!map_data)
				1283	bio_free_pages(bio);
				1284	bio_put(bio);
				1285	out_bmd:
				1286	kfree(bmd);
				1287	return ERR_PTR(ret);
				1288	}
				1289
				1290	/**
				1291	* bio_map_user_iov - map user iovec into bio
				1292	* @q: the struct request_queue for the bio
				1293	* @iter: iovec iterator
				1294	* @gfp_mask: memory allocation flags
				1295	*
				1296	* Map the user space address into a bio suitable for io to a block
				1297	* device. Returns an error pointer in case of error.
				1298	*/
				1299	struct bio bio_map_user_iov(struct request_queue q,
				1300	struct iov_iter *iter,
				1301	gfp_t gfp_mask)
				1302	{
				1303	int j;
				1304	struct bio *bio;
				1305	int ret;
				1306	struct bio_vec *bvec;
				1307
				1308	if (!iov_iter_count(iter))
				1309	return ERR_PTR(-EINVAL);
				1310
				1311	bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
				1312	if (!bio)
				1313	return ERR_PTR(-ENOMEM);
				1314
				1315	while (iov_iter_count(iter)) {
				1316	struct page **pages;
				1317	ssize_t bytes;
				1318	size_t offs, added = 0;
				1319	int npages;
				1320
				1321	bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
				1322	if (unlikely(bytes <= 0)) {
				1323	ret = bytes ? bytes : -EFAULT;
				1324	goto out_unmap;
				1325	}
				1326
				1327	npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
				1328
				1329	if (unlikely(offs & queue_dma_alignment(q))) {
				1330	ret = -EINVAL;
				1331	j = 0;
				1332	} else {
				1333	for (j = 0; j < npages; j++) {
				1334	struct page *page = pages[j];
				1335	unsigned int n = PAGE_SIZE - offs;
				1336	unsigned short prev_bi_vcnt = bio->bi_vcnt;
				1337
				1338	if (n > bytes)
				1339	n = bytes;
				1340
				1341	if (!bio_add_pc_page(q, bio, page, n, offs))
				1342	break;
				1343
				1344	/*
				1345	* check if vector was merged with previous
				1346	* drop page reference if needed
				1347	*/
				1348	if (bio->bi_vcnt == prev_bi_vcnt)
				1349	put_page(page);
				1350
				1351	added += n;
				1352	bytes -= n;
				1353	offs = 0;
				1354	}
				1355	iov_iter_advance(iter, added);
				1356	}
				1357	/*
				1358	* release the pages we didn't map into the bio, if any
				1359	*/
				1360	while (j < npages)
				1361	put_page(pages[j++]);
				1362	kvfree(pages);
				1363	/* couldn't stuff something into bio? */
				1364	if (bytes)
				1365	break;
				1366	}
				1367
				1368	bio_set_flag(bio, BIO_USER_MAPPED);
				1369
				1370	/*
				1371	* subtle -- if bio_map_user_iov() ended up bouncing a bio,
				1372	* it would normally disappear when its bi_end_io is run.
				1373	* however, we need it for the unmap, so grab an extra
				1374	* reference to it
				1375	*/
				1376	bio_get(bio);
				1377	return bio;
				1378
				1379	out_unmap:
				1380	bio_for_each_segment_all(bvec, bio, j) {
				1381	put_page(bvec->bv_page);
				1382	}
				1383	bio_put(bio);
				1384	return ERR_PTR(ret);
				1385	}
				1386
				1387	static void __bio_unmap_user(struct bio *bio)
				1388	{
				1389	struct bio_vec *bvec;
				1390	int i;
				1391
				1392	/*
				1393	* make sure we dirty pages we wrote to
				1394	*/
				1395	bio_for_each_segment_all(bvec, bio, i) {
				1396	if (bio_data_dir(bio) == READ)
				1397	set_page_dirty_lock(bvec->bv_page);
				1398
				1399	put_page(bvec->bv_page);
				1400	}
				1401
				1402	bio_put(bio);
				1403	}
				1404
				1405	/**
				1406	* bio_unmap_user - unmap a bio
				1407	* @bio: the bio being unmapped
				1408	*
				1409	* Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
				1410	* process context.
				1411	*
				1412	* bio_unmap_user() may sleep.
				1413	*/
				1414	void bio_unmap_user(struct bio *bio)
				1415	{
				1416	__bio_unmap_user(bio);
				1417	bio_put(bio);
				1418	}
				1419
				1420	static void bio_map_kern_endio(struct bio *bio)
				1421	{
				1422	bio_put(bio);
				1423	}
				1424
				1425	/**
				1426	* bio_map_kern - map kernel address into bio
				1427	* @q: the struct request_queue for the bio
				1428	* @data: pointer to buffer to map
				1429	* @len: length in bytes
				1430	* @gfp_mask: allocation flags for bio allocation
				1431	*
				1432	* Map the kernel address into a bio suitable for io to a block
				1433	* device. Returns an error pointer in case of error.
				1434	*/
				1435	struct bio bio_map_kern(struct request_queue q, void *data, unsigned int len,
				1436	gfp_t gfp_mask)
				1437	{
				1438	unsigned long kaddr = (unsigned long)data;
				1439	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1440	unsigned long start = kaddr >> PAGE_SHIFT;
				1441	const int nr_pages = end - start;
				1442	int offset, i;
				1443	struct bio *bio;
				1444
				1445	bio = bio_kmalloc(gfp_mask, nr_pages);
				1446	if (!bio)
				1447	return ERR_PTR(-ENOMEM);
				1448
				1449	offset = offset_in_page(kaddr);
				1450	for (i = 0; i < nr_pages; i++) {
				1451	unsigned int bytes = PAGE_SIZE - offset;
				1452
				1453	if (len <= 0)
				1454	break;
				1455
				1456	if (bytes > len)
				1457	bytes = len;
				1458
				1459	if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
				1460	offset) < bytes) {
				1461	/* we don't support partial mappings */
				1462	bio_put(bio);
				1463	return ERR_PTR(-EINVAL);
				1464	}
				1465
				1466	data += bytes;
				1467	len -= bytes;
				1468	offset = 0;
				1469	}
				1470
				1471	bio->bi_end_io = bio_map_kern_endio;
				1472	return bio;
				1473	}
				1474	EXPORT_SYMBOL(bio_map_kern);
				1475
				1476	static void bio_copy_kern_endio(struct bio *bio)
				1477	{
				1478	bio_free_pages(bio);
				1479	bio_put(bio);
				1480	}
				1481
				1482	static void bio_copy_kern_endio_read(struct bio *bio)
				1483	{
				1484	char *p = bio->bi_private;
				1485	struct bio_vec *bvec;
				1486	int i;
				1487
				1488	bio_for_each_segment_all(bvec, bio, i) {
				1489	memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
				1490	p += bvec->bv_len;
				1491	}
				1492
				1493	bio_copy_kern_endio(bio);
				1494	}
				1495
				1496	/**
				1497	* bio_copy_kern - copy kernel address into bio
				1498	* @q: the struct request_queue for the bio
				1499	* @data: pointer to buffer to copy
				1500	* @len: length in bytes
				1501	* @gfp_mask: allocation flags for bio and page allocation
				1502	* @reading: data direction is READ
				1503	*
				1504	* copy the kernel address into a bio suitable for io to a block
				1505	* device. Returns an error pointer in case of error.
				1506	*/
				1507	struct bio bio_copy_kern(struct request_queue q, void *data, unsigned int len,
				1508	gfp_t gfp_mask, int reading)
				1509	{
				1510	unsigned long kaddr = (unsigned long)data;
				1511	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1512	unsigned long start = kaddr >> PAGE_SHIFT;
				1513	struct bio *bio;
				1514	void *p = data;
				1515	int nr_pages = 0;
				1516
				1517	/*
				1518	* Overflow, abort
				1519	*/
				1520	if (end < start)
				1521	return ERR_PTR(-EINVAL);
				1522
				1523	nr_pages = end - start;
				1524	bio = bio_kmalloc(gfp_mask, nr_pages);
				1525	if (!bio)
				1526	return ERR_PTR(-ENOMEM);
				1527
				1528	while (len) {
				1529	struct page *page;
				1530	unsigned int bytes = PAGE_SIZE;
				1531
				1532	if (bytes > len)
				1533	bytes = len;
				1534
				1535	page = alloc_page(q->bounce_gfp \| gfp_mask);
				1536	if (!page)
				1537	goto cleanup;
				1538
				1539	if (!reading)
				1540	memcpy(page_address(page), p, bytes);
				1541
				1542	if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
				1543	break;
				1544
				1545	len -= bytes;
				1546	p += bytes;
				1547	}
				1548
				1549	if (reading) {
				1550	bio->bi_end_io = bio_copy_kern_endio_read;
				1551	bio->bi_private = data;
				1552	} else {
				1553	bio->bi_end_io = bio_copy_kern_endio;
				1554	}
				1555
				1556	return bio;
				1557
				1558	cleanup:
				1559	bio_free_pages(bio);
				1560	bio_put(bio);
				1561	return ERR_PTR(-ENOMEM);
				1562	}
				1563
				1564	/*
				1565	* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
				1566	* for performing direct-IO in BIOs.
				1567	*
				1568	* The problem is that we cannot run set_page_dirty() from interrupt context
				1569	* because the required locks are not interrupt-safe. So what we can do is to
				1570	* mark the pages dirty _before_ performing IO. And in interrupt context,
				1571	* check that the pages are still dirty. If so, fine. If not, redirty them
				1572	* in process context.
				1573	*
				1574	* We special-case compound pages here: normally this means reads into hugetlb
				1575	* pages. The logic in here doesn't really work right for compound pages
				1576	* because the VM does not uniformly chase down the head page in all cases.
				1577	* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
				1578	* handle them at all. So we skip compound pages here at an early stage.
				1579	*
				1580	* Note that this code is very hard to test under normal circumstances because
				1581	* direct-io pins the pages with get_user_pages(). This makes
				1582	* is_page_cache_freeable return false, and the VM will not clean the pages.
				1583	* But other code (eg, flusher threads) could clean the pages if they are mapped
				1584	* pagecache.
				1585	*
				1586	* Simply disabling the call to bio_set_pages_dirty() is a good way to test the
				1587	* deferred bio dirtying paths.
				1588	*/
				1589
				1590	/*
				1591	* bio_set_pages_dirty() will mark all the bio's pages as dirty.
				1592	*/
				1593	void bio_set_pages_dirty(struct bio *bio)
				1594	{
				1595	struct bio_vec *bvec;
				1596	int i;
				1597
				1598	bio_for_each_segment_all(bvec, bio, i) {
				1599	if (!PageCompound(bvec->bv_page))
				1600	set_page_dirty_lock(bvec->bv_page);
				1601	}
				1602	}
				1603	EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
				1604
				1605	static void bio_release_pages(struct bio *bio)
				1606	{
				1607	struct bio_vec *bvec;
				1608	int i;
				1609
				1610	bio_for_each_segment_all(bvec, bio, i)
				1611	put_page(bvec->bv_page);
				1612	}
				1613
				1614	/*
				1615	* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
				1616	* If they are, then fine. If, however, some pages are clean then they must
				1617	* have been written out during the direct-IO read. So we take another ref on
				1618	* the BIO and re-dirty the pages in process context.
				1619	*
				1620	* It is expected that bio_check_pages_dirty() will wholly own the BIO from
				1621	* here on. It will run one put_page() against each page and will run one
				1622	* bio_put() against the BIO.
				1623	*/
				1624
				1625	static void bio_dirty_fn(struct work_struct *work);
				1626
				1627	static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
				1628	static DEFINE_SPINLOCK(bio_dirty_lock);
				1629	static struct bio *bio_dirty_list;
				1630
				1631	/*
				1632	* This runs in process context
				1633	*/
				1634	static void bio_dirty_fn(struct work_struct *work)
				1635	{
				1636	struct bio bio, next;
				1637
				1638	spin_lock_irq(&bio_dirty_lock);
				1639	next = bio_dirty_list;
				1640	bio_dirty_list = NULL;
				1641	spin_unlock_irq(&bio_dirty_lock);
				1642
				1643	while ((bio = next) != NULL) {
				1644	next = bio->bi_private;
				1645
				1646	bio_set_pages_dirty(bio);
				1647	bio_release_pages(bio);
				1648	bio_put(bio);
				1649	}
				1650	}
				1651
				1652	void bio_check_pages_dirty(struct bio *bio)
				1653	{
				1654	struct bio_vec *bvec;
				1655	unsigned long flags;
				1656	int i;
				1657
				1658	bio_for_each_segment_all(bvec, bio, i) {
				1659	if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
				1660	goto defer;
				1661	}
				1662
				1663	bio_release_pages(bio);
				1664	bio_put(bio);
				1665	return;
				1666	defer:
				1667	spin_lock_irqsave(&bio_dirty_lock, flags);
				1668	bio->bi_private = bio_dirty_list;
				1669	bio_dirty_list = bio;
				1670	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1671	schedule_work(&bio_dirty_work);
				1672	}
				1673	EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
				1674
				1675	void generic_start_io_acct(struct request_queue *q, int op,
				1676	unsigned long sectors, struct hd_struct *part)
				1677	{
				1678	const int sgrp = op_stat_group(op);
				1679	int cpu = part_stat_lock();
				1680
				1681	part_round_stats(q, cpu, part);
				1682	part_stat_inc(cpu, part, ios[sgrp]);
				1683	part_stat_add(cpu, part, sectors[sgrp], sectors);
				1684	part_inc_in_flight(q, part, op_is_write(op));
				1685
				1686	part_stat_unlock();
				1687	}
				1688	EXPORT_SYMBOL(generic_start_io_acct);
				1689
				1690	void generic_end_io_acct(struct request_queue *q, int req_op,
				1691	struct hd_struct *part, unsigned long start_time)
				1692	{
				1693	unsigned long duration = jiffies - start_time;
				1694	const int sgrp = op_stat_group(req_op);
				1695	int cpu = part_stat_lock();
				1696
				1697	part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
				1698	part_round_stats(q, cpu, part);
				1699	part_dec_in_flight(q, part, op_is_write(req_op));
				1700
				1701	part_stat_unlock();
				1702	}
				1703	EXPORT_SYMBOL(generic_end_io_acct);
				1704
				1705	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
				1706	void bio_flush_dcache_pages(struct bio *bi)
				1707	{
				1708	struct bio_vec bvec;
				1709	struct bvec_iter iter;
				1710
				1711	bio_for_each_segment(bvec, bi, iter)
				1712	flush_dcache_page(bvec.bv_page);
				1713	}
				1714	EXPORT_SYMBOL(bio_flush_dcache_pages);
				1715	#endif
				1716
				1717	static inline bool bio_remaining_done(struct bio *bio)
				1718	{
				1719	/*
				1720	* If we're not chaining, then ->__bi_remaining is always 1 and
				1721	* we always end io on the first invocation.
				1722	*/
				1723	if (!bio_flagged(bio, BIO_CHAIN))
				1724	return true;
				1725
				1726	BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
				1727
				1728	if (atomic_dec_and_test(&bio->__bi_remaining)) {
				1729	bio_clear_flag(bio, BIO_CHAIN);
				1730	return true;
				1731	}
				1732
				1733	return false;
				1734	}
				1735
				1736	/**
				1737	* bio_endio - end I/O on a bio
				1738	* @bio: bio
				1739	*
				1740	* Description:
				1741	* bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
				1742	* way to end I/O on a bio. No one should call bi_end_io() directly on a
				1743	* bio unless they own it and thus know that it has an end_io function.
				1744	*
				1745	* bio_endio() can be called several times on a bio that has been chained
				1746	* using bio_chain(). The ->bi_end_io() function will only be called the
				1747	* last time. At this point the BLK_TA_COMPLETE tracing event will be
				1748	* generated if BIO_TRACE_COMPLETION is set.
				1749	**/
				1750	void bio_endio(struct bio *bio)
				1751	{
				1752	again:
				1753	if (!bio_remaining_done(bio))
				1754	return;
				1755
				1756	if (!blk_crypto_endio(bio))
				1757	return;
				1758
				1759	if (!bio_integrity_endio(bio))
				1760	return;
				1761
				1762	if (bio->bi_disk)
				1763	rq_qos_done_bio(bio->bi_disk->queue, bio);
				1764
				1765	/*
				1766	* Need to have a real endio function for chained bios, otherwise
				1767	* various corner cases will break (like stacking block devices that
				1768	* save/restore bi_end_io) - however, we want to avoid unbounded
				1769	* recursion and blowing the stack. Tail call optimization would
				1770	* handle this, but compiling with frame pointers also disables
				1771	* gcc's sibling call optimization.
				1772	*/
				1773	if (bio->bi_end_io == bio_chain_endio) {
				1774	bio = __bio_chain_endio(bio);
				1775	goto again;
				1776	}
				1777
				1778	if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
				1779	trace_block_bio_complete(bio->bi_disk->queue, bio,
				1780	blk_status_to_errno(bio->bi_status));
				1781	bio_clear_flag(bio, BIO_TRACE_COMPLETION);
				1782	}
				1783
				1784	blk_throtl_bio_endio(bio);
				1785	/* release cgroup info */
				1786	bio_uninit(bio);
				1787	if (bio->bi_end_io)
				1788	bio->bi_end_io(bio);
				1789	}
				1790	EXPORT_SYMBOL(bio_endio);
				1791
				1792	/**
				1793	* bio_split - split a bio
				1794	* @bio: bio to split
				1795	* @sectors: number of sectors to split from the front of @bio
				1796	* @gfp: gfp mask
				1797	* @bs: bio set to allocate from
				1798	*
				1799	* Allocates and returns a new bio which represents @sectors from the start of
				1800	* @bio, and updates @bio to represent the remaining sectors.
				1801	*
				1802	* Unless this is a discard request the newly allocated bio will point
				1803	* to @bio's bi_io_vec; it is the caller's responsibility to ensure that
				1804	* @bio is not freed before the split.
				1805	*/
				1806	struct bio bio_split(struct bio bio, int sectors,
				1807	gfp_t gfp, struct bio_set *bs)
				1808	{
				1809	struct bio *split;
				1810
				1811	BUG_ON(sectors <= 0);
				1812	BUG_ON(sectors >= bio_sectors(bio));
				1813
				1814	split = bio_clone_fast(bio, gfp, bs);
				1815	if (!split)
				1816	return NULL;
				1817
				1818	split->bi_iter.bi_size = sectors << 9;
				1819
				1820	if (bio_integrity(split))
				1821	bio_integrity_trim(split);
				1822
				1823	bio_advance(bio, split->bi_iter.bi_size);
				1824	bio->bi_iter.bi_done = 0;
				1825
				1826	if (bio_flagged(bio, BIO_TRACE_COMPLETION))
				1827	bio_set_flag(split, BIO_TRACE_COMPLETION);
				1828
				1829	return split;
				1830	}
				1831	EXPORT_SYMBOL(bio_split);
				1832
				1833	/**
				1834	* bio_trim - trim a bio
				1835	* @bio: bio to trim
				1836	* @offset: number of sectors to trim from the front of @bio
				1837	* @size: size we want to trim @bio to, in sectors
				1838	*/
				1839	void bio_trim(struct bio *bio, int offset, int size)
				1840	{
				1841	/* 'bio' is a cloned bio which we need to trim to match
				1842	* the given offset and size.
				1843	*/
				1844
				1845	size <<= 9;
				1846	if (offset == 0 && size == bio->bi_iter.bi_size)
				1847	return;
				1848
				1849	bio_clear_flag(bio, BIO_SEG_VALID);
				1850
				1851	bio_advance(bio, offset << 9);
				1852
				1853	bio->bi_iter.bi_size = size;
				1854
				1855	if (bio_integrity(bio))
				1856	bio_integrity_trim(bio);
				1857
				1858	}
				1859	EXPORT_SYMBOL_GPL(bio_trim);
				1860
				1861	/*
				1862	* create memory pools for biovec's in a bio_set.
				1863	* use the global biovec slabs created for general use.
				1864	*/
				1865	int biovec_init_pool(mempool_t *pool, int pool_entries)
				1866	{
				1867	struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
				1868
				1869	return mempool_init_slab_pool(pool, pool_entries, bp->slab);
				1870	}
				1871
				1872	/*
				1873	* bioset_exit - exit a bioset initialized with bioset_init()
				1874	*
				1875	* May be called on a zeroed but uninitialized bioset (i.e. allocated with
				1876	* kzalloc()).
				1877	*/
				1878	void bioset_exit(struct bio_set *bs)
				1879	{
				1880	if (bs->rescue_workqueue)
				1881	destroy_workqueue(bs->rescue_workqueue);
				1882	bs->rescue_workqueue = NULL;
				1883
				1884	mempool_exit(&bs->bio_pool);
				1885	mempool_exit(&bs->bvec_pool);
				1886
				1887	bioset_integrity_free(bs);
				1888	if (bs->bio_slab)
				1889	bio_put_slab(bs);
				1890	bs->bio_slab = NULL;
				1891	}
				1892	EXPORT_SYMBOL(bioset_exit);
				1893
				1894	/**
				1895	* bioset_init - Initialize a bio_set
				1896	* @bs: pool to initialize
				1897	* @pool_size: Number of bio and bio_vecs to cache in the mempool
				1898	* @front_pad: Number of bytes to allocate in front of the returned bio
				1899	* @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
				1900	* and %BIOSET_NEED_RESCUER
				1901	*
				1902	* Description:
				1903	* Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
				1904	* to ask for a number of bytes to be allocated in front of the bio.
				1905	* Front pad allocation is useful for embedding the bio inside
				1906	* another structure, to avoid allocating extra data to go with the bio.
				1907	* Note that the bio must be embedded at the END of that structure always,
				1908	* or things will break badly.
				1909	* If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
				1910	* for allocating iovecs. This pool is not needed e.g. for bio_clone_fast().
				1911	* If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
				1912	* dispatch queued requests when the mempool runs out of space.
				1913	*
				1914	*/
				1915	int bioset_init(struct bio_set *bs,
				1916	unsigned int pool_size,
				1917	unsigned int front_pad,
				1918	int flags)
				1919	{
				1920	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
				1921
				1922	bs->front_pad = front_pad;
				1923
				1924	spin_lock_init(&bs->rescue_lock);
				1925	bio_list_init(&bs->rescue_list);
				1926	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
				1927
				1928	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
				1929	if (!bs->bio_slab)
				1930	return -ENOMEM;
				1931
				1932	if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
				1933	goto bad;
				1934
				1935	if ((flags & BIOSET_NEED_BVECS) &&
				1936	biovec_init_pool(&bs->bvec_pool, pool_size))
				1937	goto bad;
				1938
				1939	if (!(flags & BIOSET_NEED_RESCUER))
				1940	return 0;
				1941
				1942	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
				1943	if (!bs->rescue_workqueue)
				1944	goto bad;
				1945
				1946	return 0;
				1947	bad:
				1948	bioset_exit(bs);
				1949	return -ENOMEM;
				1950	}
				1951	EXPORT_SYMBOL(bioset_init);
				1952
				1953	/*
				1954	* Initialize and setup a new bio_set, based on the settings from
				1955	* another bio_set.
				1956	*/
				1957	int bioset_init_from_src(struct bio_set bs, struct bio_set src)
				1958	{
				1959	int flags;
				1960
				1961	flags = 0;
				1962	if (src->bvec_pool.min_nr)
				1963	flags \|= BIOSET_NEED_BVECS;
				1964	if (src->rescue_workqueue)
				1965	flags \|= BIOSET_NEED_RESCUER;
				1966
				1967	return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
				1968	}
				1969	EXPORT_SYMBOL(bioset_init_from_src);
				1970
				1971	#ifdef CONFIG_BLK_CGROUP
				1972
				1973	#ifdef CONFIG_MEMCG
				1974	/**
				1975	* bio_associate_blkcg_from_page - associate a bio with the page's blkcg
				1976	* @bio: target bio
				1977	* @page: the page to lookup the blkcg from
				1978	*
				1979	* Associate @bio with the blkcg from @page's owning memcg. This works like
				1980	* every other associate function wrt references.
				1981	*/
				1982	int bio_associate_blkcg_from_page(struct bio bio, struct page page)
				1983	{
				1984	struct cgroup_subsys_state *blkcg_css;
				1985
				1986	if (unlikely(bio->bi_css))
				1987	return -EBUSY;
				1988	if (!page->mem_cgroup)
				1989	return 0;
				1990	blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
				1991	&io_cgrp_subsys);
				1992	bio->bi_css = blkcg_css;
				1993	return 0;
				1994	}
				1995	#endif /* CONFIG_MEMCG */
				1996
				1997	/**
				1998	* bio_associate_blkcg - associate a bio with the specified blkcg
				1999	* @bio: target bio
				2000	* @blkcg_css: css of the blkcg to associate
				2001	*
				2002	* Associate @bio with the blkcg specified by @blkcg_css. Block layer will
				2003	* treat @bio as if it were issued by a task which belongs to the blkcg.
				2004	*
				2005	* This function takes an extra reference of @blkcg_css which will be put
				2006	* when @bio is released. The caller must own @bio and is responsible for
				2007	* synchronizing calls to this function.
				2008	*/
				2009	int bio_associate_blkcg(struct bio bio, struct cgroup_subsys_state blkcg_css)
				2010	{
				2011	if (unlikely(bio->bi_css))
				2012	return -EBUSY;
				2013	css_get(blkcg_css);
				2014	bio->bi_css = blkcg_css;
				2015	return 0;
				2016	}
				2017	EXPORT_SYMBOL_GPL(bio_associate_blkcg);
				2018
				2019	/**
				2020	* bio_associate_blkg - associate a bio with the specified blkg
				2021	* @bio: target bio
				2022	* @blkg: the blkg to associate
				2023	*
				2024	* Associate @bio with the blkg specified by @blkg. This is the queue specific
				2025	* blkcg information associated with the @bio, a reference will be taken on the
				2026	* @blkg and will be freed when the bio is freed.
				2027	*/
				2028	int bio_associate_blkg(struct bio bio, struct blkcg_gq blkg)
				2029	{
				2030	if (unlikely(bio->bi_blkg))
				2031	return -EBUSY;
				2032	if (!blkg_try_get(blkg))
				2033	return -ENODEV;
				2034	bio->bi_blkg = blkg;
				2035	return 0;
				2036	}
				2037
				2038	/**
				2039	* bio_disassociate_task - undo bio_associate_current()
				2040	* @bio: target bio
				2041	*/
				2042	void bio_disassociate_task(struct bio *bio)
				2043	{
				2044	if (bio->bi_ioc) {
				2045	put_io_context(bio->bi_ioc);
				2046	bio->bi_ioc = NULL;
				2047	}
				2048	if (bio->bi_css) {
				2049	css_put(bio->bi_css);
				2050	bio->bi_css = NULL;
				2051	}
				2052	if (bio->bi_blkg) {
				2053	blkg_put(bio->bi_blkg);
				2054	bio->bi_blkg = NULL;
				2055	}
				2056	}
				2057
				2058	/**
				2059	* bio_clone_blkcg_association - clone blkcg association from src to dst bio
				2060	* @dst: destination bio
				2061	* @src: source bio
				2062	*/
				2063	void bio_clone_blkcg_association(struct bio dst, struct bio src)
				2064	{
				2065	if (src->bi_css)
				2066	WARN_ON(bio_associate_blkcg(dst, src->bi_css));
				2067	}
				2068	EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
				2069	#endif /* CONFIG_BLK_CGROUP */
				2070
				2071	static void __init biovec_init_slabs(void)
				2072	{
				2073	int i;
				2074
				2075	for (i = 0; i < BVEC_POOL_NR; i++) {
				2076	int size;
				2077	struct biovec_slab *bvs = bvec_slabs + i;
				2078
				2079	if (bvs->nr_vecs <= BIO_INLINE_VECS) {
				2080	bvs->slab = NULL;
				2081	continue;
				2082	}
				2083
				2084	size = bvs->nr_vecs * sizeof(struct bio_vec);
				2085	bvs->slab = kmem_cache_create(bvs->name, size, 0,
				2086	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);
				2087	}
				2088	}
				2089
				2090	static int __init init_bio(void)
				2091	{
				2092	bio_slab_max = 2;
				2093	bio_slab_nr = 0;
				2094	bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab),
				2095	GFP_KERNEL);
				2096	if (!bio_slabs)
				2097	panic("bio: can't allocate bios\n");
				2098
				2099	bio_integrity_init();
				2100	biovec_init_slabs();
				2101
				2102	if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
				2103	panic("bio: can't allocate bios\n");
				2104
				2105	if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
				2106	panic("bio: can't create integrity pool\n");
				2107
				2108	return 0;
				2109	}
				2110	subsys_initcall(init_bio);