| rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame] | 1 | /* | 
 | 2 |  * zbud.c | 
 | 3 |  * | 
 | 4 |  * Copyright (C) 2013, Seth Jennings, IBM | 
 | 5 |  * | 
 | 6 |  * Concepts based on zcache internal zbud allocator by Dan Magenheimer. | 
 | 7 |  * | 
 | 8 |  * zbud is an special purpose allocator for storing compressed pages.  Contrary | 
 | 9 |  * to what its name may suggest, zbud is not a buddy allocator, but rather an | 
 | 10 |  * allocator that "buddies" two compressed pages together in a single memory | 
 | 11 |  * page. | 
 | 12 |  * | 
 | 13 |  * While this design limits storage density, it has simple and deterministic | 
 | 14 |  * reclaim properties that make it preferable to a higher density approach when | 
 | 15 |  * reclaim will be used. | 
 | 16 |  * | 
 | 17 |  * zbud works by storing compressed pages, or "zpages", together in pairs in a | 
 | 18 |  * single memory page called a "zbud page".  The first buddy is "left | 
 | 19 |  * justified" at the beginning of the zbud page, and the last buddy is "right | 
 | 20 |  * justified" at the end of the zbud page.  The benefit is that if either | 
 | 21 |  * buddy is freed, the freed buddy space, coalesced with whatever slack space | 
 | 22 |  * that existed between the buddies, results in the largest possible free region | 
 | 23 |  * within the zbud page. | 
 | 24 |  * | 
 | 25 |  * zbud also provides an attractive lower bound on density. The ratio of zpages | 
 | 26 |  * to zbud pages can not be less than 1.  This ensures that zbud can never "do | 
 | 27 |  * harm" by using more pages to store zpages than the uncompressed zpages would | 
 | 28 |  * have used on their own. | 
 | 29 |  * | 
 | 30 |  * zbud pages are divided into "chunks".  The size of the chunks is fixed at | 
 | 31 |  * compile time and determined by NCHUNKS_ORDER below.  Dividing zbud pages | 
 | 32 |  * into chunks allows organizing unbuddied zbud pages into a manageable number | 
 | 33 |  * of unbuddied lists according to the number of free chunks available in the | 
 | 34 |  * zbud page. | 
 | 35 |  * | 
 | 36 |  * The zbud API differs from that of conventional allocators in that the | 
 | 37 |  * allocation function, zbud_alloc(), returns an opaque handle to the user, | 
 | 38 |  * not a dereferenceable pointer.  The user must map the handle using | 
 | 39 |  * zbud_map() in order to get a usable pointer by which to access the | 
 | 40 |  * allocation data and unmap the handle with zbud_unmap() when operations | 
 | 41 |  * on the allocation data are complete. | 
 | 42 |  */ | 
 | 43 |  | 
 | 44 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 
 | 45 |  | 
 | 46 | #include <linux/atomic.h> | 
 | 47 | #include <linux/list.h> | 
 | 48 | #include <linux/mm.h> | 
 | 49 | #include <linux/module.h> | 
 | 50 | #include <linux/preempt.h> | 
 | 51 | #include <linux/slab.h> | 
 | 52 | #include <linux/spinlock.h> | 
 | 53 | #include <linux/zbud.h> | 
 | 54 | #include <linux/zpool.h> | 
 | 55 |  | 
 | 56 | /***************** | 
 | 57 |  * Structures | 
 | 58 | *****************/ | 
 | 59 | /* | 
 | 60 |  * NCHUNKS_ORDER determines the internal allocation granularity, effectively | 
 | 61 |  * adjusting internal fragmentation.  It also determines the number of | 
 | 62 |  * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the | 
 | 63 |  * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk | 
 | 64 |  * in allocated page is occupied by zbud header, NCHUNKS will be calculated to | 
 | 65 |  * 63 which shows the max number of free chunks in zbud page, also there will be | 
 | 66 |  * 63 freelists per pool. | 
 | 67 |  */ | 
 | 68 | #define NCHUNKS_ORDER	6 | 
 | 69 |  | 
 | 70 | #define CHUNK_SHIFT	(PAGE_SHIFT - NCHUNKS_ORDER) | 
 | 71 | #define CHUNK_SIZE	(1 << CHUNK_SHIFT) | 
 | 72 | #define ZHDR_SIZE_ALIGNED CHUNK_SIZE | 
 | 73 | #define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) | 
 | 74 |  | 
 | 75 | /** | 
 | 76 |  * struct zbud_pool - stores metadata for each zbud pool | 
 | 77 |  * @lock:	protects all pool fields and first|last_chunk fields of any | 
 | 78 |  *		zbud page in the pool | 
 | 79 |  * @unbuddied:	array of lists tracking zbud pages that only contain one buddy; | 
 | 80 |  *		the lists each zbud page is added to depends on the size of | 
 | 81 |  *		its free region. | 
 | 82 |  * @buddied:	list tracking the zbud pages that contain two buddies; | 
 | 83 |  *		these zbud pages are full | 
 | 84 |  * @lru:	list tracking the zbud pages in LRU order by most recently | 
 | 85 |  *		added buddy. | 
 | 86 |  * @pages_nr:	number of zbud pages in the pool. | 
 | 87 |  * @ops:	pointer to a structure of user defined operations specified at | 
 | 88 |  *		pool creation time. | 
 | 89 |  * | 
 | 90 |  * This structure is allocated at pool creation time and maintains metadata | 
 | 91 |  * pertaining to a particular zbud pool. | 
 | 92 |  */ | 
 | 93 | struct zbud_pool { | 
 | 94 | 	spinlock_t lock; | 
 | 95 | 	struct list_head unbuddied[NCHUNKS]; | 
 | 96 | 	struct list_head buddied; | 
 | 97 | 	struct list_head lru; | 
 | 98 | 	u64 pages_nr; | 
 | 99 | 	const struct zbud_ops *ops; | 
 | 100 | #ifdef CONFIG_ZPOOL | 
 | 101 | 	struct zpool *zpool; | 
 | 102 | 	const struct zpool_ops *zpool_ops; | 
 | 103 | #endif | 
 | 104 | }; | 
 | 105 |  | 
 | 106 | /* | 
 | 107 |  * struct zbud_header - zbud page metadata occupying the first chunk of each | 
 | 108 |  *			zbud page. | 
 | 109 |  * @buddy:	links the zbud page into the unbuddied/buddied lists in the pool | 
 | 110 |  * @lru:	links the zbud page into the lru list in the pool | 
 | 111 |  * @first_chunks:	the size of the first buddy in chunks, 0 if free | 
 | 112 |  * @last_chunks:	the size of the last buddy in chunks, 0 if free | 
 | 113 |  */ | 
 | 114 | struct zbud_header { | 
 | 115 | 	struct list_head buddy; | 
 | 116 | 	struct list_head lru; | 
 | 117 | 	unsigned int first_chunks; | 
 | 118 | 	unsigned int last_chunks; | 
 | 119 | 	bool under_reclaim; | 
 | 120 | }; | 
 | 121 |  | 
 | 122 | /***************** | 
 | 123 |  * zpool | 
 | 124 |  ****************/ | 
 | 125 |  | 
 | 126 | #ifdef CONFIG_ZPOOL | 
 | 127 |  | 
 | 128 | static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle) | 
 | 129 | { | 
 | 130 | 	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) | 
 | 131 | 		return pool->zpool_ops->evict(pool->zpool, handle); | 
 | 132 | 	else | 
 | 133 | 		return -ENOENT; | 
 | 134 | } | 
 | 135 |  | 
 | 136 | static const struct zbud_ops zbud_zpool_ops = { | 
 | 137 | 	.evict =	zbud_zpool_evict | 
 | 138 | }; | 
 | 139 |  | 
 | 140 | static void *zbud_zpool_create(const char *name, gfp_t gfp, | 
 | 141 | 			       const struct zpool_ops *zpool_ops, | 
 | 142 | 			       struct zpool *zpool) | 
 | 143 | { | 
 | 144 | 	struct zbud_pool *pool; | 
 | 145 |  | 
 | 146 | 	pool = zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL); | 
 | 147 | 	if (pool) { | 
 | 148 | 		pool->zpool = zpool; | 
 | 149 | 		pool->zpool_ops = zpool_ops; | 
 | 150 | 	} | 
 | 151 | 	return pool; | 
 | 152 | } | 
 | 153 |  | 
 | 154 | static void zbud_zpool_destroy(void *pool) | 
 | 155 | { | 
 | 156 | 	zbud_destroy_pool(pool); | 
 | 157 | } | 
 | 158 |  | 
 | 159 | static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp, | 
 | 160 | 			unsigned long *handle) | 
 | 161 | { | 
 | 162 | 	return zbud_alloc(pool, size, gfp, handle); | 
 | 163 | } | 
 | 164 | static void zbud_zpool_free(void *pool, unsigned long handle) | 
 | 165 | { | 
 | 166 | 	zbud_free(pool, handle); | 
 | 167 | } | 
 | 168 |  | 
 | 169 | static int zbud_zpool_shrink(void *pool, unsigned int pages, | 
 | 170 | 			unsigned int *reclaimed) | 
 | 171 | { | 
 | 172 | 	unsigned int total = 0; | 
 | 173 | 	int ret = -EINVAL; | 
 | 174 |  | 
 | 175 | 	while (total < pages) { | 
 | 176 | 		ret = zbud_reclaim_page(pool, 8); | 
 | 177 | 		if (ret < 0) | 
 | 178 | 			break; | 
 | 179 | 		total++; | 
 | 180 | 	} | 
 | 181 |  | 
 | 182 | 	if (reclaimed) | 
 | 183 | 		*reclaimed = total; | 
 | 184 |  | 
 | 185 | 	return ret; | 
 | 186 | } | 
 | 187 |  | 
 | 188 | static void *zbud_zpool_map(void *pool, unsigned long handle, | 
 | 189 | 			enum zpool_mapmode mm) | 
 | 190 | { | 
 | 191 | 	return zbud_map(pool, handle); | 
 | 192 | } | 
 | 193 | static void zbud_zpool_unmap(void *pool, unsigned long handle) | 
 | 194 | { | 
 | 195 | 	zbud_unmap(pool, handle); | 
 | 196 | } | 
 | 197 |  | 
 | 198 | static u64 zbud_zpool_total_size(void *pool) | 
 | 199 | { | 
 | 200 | 	return zbud_get_pool_size(pool) * PAGE_SIZE; | 
 | 201 | } | 
 | 202 |  | 
 | 203 | static struct zpool_driver zbud_zpool_driver = { | 
 | 204 | 	.type =		"zbud", | 
 | 205 | 	.owner =	THIS_MODULE, | 
 | 206 | 	.create =	zbud_zpool_create, | 
 | 207 | 	.destroy =	zbud_zpool_destroy, | 
 | 208 | 	.malloc =	zbud_zpool_malloc, | 
 | 209 | 	.free =		zbud_zpool_free, | 
 | 210 | 	.shrink =	zbud_zpool_shrink, | 
 | 211 | 	.map =		zbud_zpool_map, | 
 | 212 | 	.unmap =	zbud_zpool_unmap, | 
 | 213 | 	.total_size =	zbud_zpool_total_size, | 
 | 214 | }; | 
 | 215 |  | 
 | 216 | MODULE_ALIAS("zpool-zbud"); | 
 | 217 | #endif /* CONFIG_ZPOOL */ | 
 | 218 |  | 
 | 219 | /***************** | 
 | 220 |  * Helpers | 
 | 221 | *****************/ | 
 | 222 | /* Just to make the code easier to read */ | 
 | 223 | enum buddy { | 
 | 224 | 	FIRST, | 
 | 225 | 	LAST | 
 | 226 | }; | 
 | 227 |  | 
 | 228 | /* Converts an allocation size in bytes to size in zbud chunks */ | 
 | 229 | static int size_to_chunks(size_t size) | 
 | 230 | { | 
 | 231 | 	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; | 
 | 232 | } | 
 | 233 |  | 
 | 234 | #define for_each_unbuddied_list(_iter, _begin) \ | 
 | 235 | 	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) | 
 | 236 |  | 
 | 237 | /* Initializes the zbud header of a newly allocated zbud page */ | 
 | 238 | static struct zbud_header *init_zbud_page(struct page *page) | 
 | 239 | { | 
 | 240 | 	struct zbud_header *zhdr = page_address(page); | 
 | 241 | 	zhdr->first_chunks = 0; | 
 | 242 | 	zhdr->last_chunks = 0; | 
 | 243 | 	INIT_LIST_HEAD(&zhdr->buddy); | 
 | 244 | 	INIT_LIST_HEAD(&zhdr->lru); | 
 | 245 | 	zhdr->under_reclaim = 0; | 
 | 246 | 	return zhdr; | 
 | 247 | } | 
 | 248 |  | 
 | 249 | /* Resets the struct page fields and frees the page */ | 
 | 250 | static void free_zbud_page(struct zbud_header *zhdr) | 
 | 251 | { | 
 | 252 | 	__free_page(virt_to_page(zhdr)); | 
 | 253 | } | 
 | 254 |  | 
 | 255 | /* | 
 | 256 |  * Encodes the handle of a particular buddy within a zbud page | 
 | 257 |  * Pool lock should be held as this function accesses first|last_chunks | 
 | 258 |  */ | 
 | 259 | static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud) | 
 | 260 | { | 
 | 261 | 	unsigned long handle; | 
 | 262 |  | 
 | 263 | 	/* | 
 | 264 | 	 * For now, the encoded handle is actually just the pointer to the data | 
 | 265 | 	 * but this might not always be the case.  A little information hiding. | 
 | 266 | 	 * Add CHUNK_SIZE to the handle if it is the first allocation to jump | 
 | 267 | 	 * over the zbud header in the first chunk. | 
 | 268 | 	 */ | 
 | 269 | 	handle = (unsigned long)zhdr; | 
 | 270 | 	if (bud == FIRST) | 
 | 271 | 		/* skip over zbud header */ | 
 | 272 | 		handle += ZHDR_SIZE_ALIGNED; | 
 | 273 | 	else /* bud == LAST */ | 
 | 274 | 		handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT); | 
 | 275 | 	return handle; | 
 | 276 | } | 
 | 277 |  | 
 | 278 | /* Returns the zbud page where a given handle is stored */ | 
 | 279 | static struct zbud_header *handle_to_zbud_header(unsigned long handle) | 
 | 280 | { | 
 | 281 | 	return (struct zbud_header *)(handle & PAGE_MASK); | 
 | 282 | } | 
 | 283 |  | 
 | 284 | /* Returns the number of free chunks in a zbud page */ | 
 | 285 | static int num_free_chunks(struct zbud_header *zhdr) | 
 | 286 | { | 
 | 287 | 	/* | 
 | 288 | 	 * Rather than branch for different situations, just use the fact that | 
 | 289 | 	 * free buddies have a length of zero to simplify everything. | 
 | 290 | 	 */ | 
 | 291 | 	return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; | 
 | 292 | } | 
 | 293 |  | 
 | 294 | /***************** | 
 | 295 |  * API Functions | 
 | 296 | *****************/ | 
 | 297 | /** | 
 | 298 |  * zbud_create_pool() - create a new zbud pool | 
 | 299 |  * @gfp:	gfp flags when allocating the zbud pool structure | 
 | 300 |  * @ops:	user-defined operations for the zbud pool | 
 | 301 |  * | 
 | 302 |  * Return: pointer to the new zbud pool or NULL if the metadata allocation | 
 | 303 |  * failed. | 
 | 304 |  */ | 
 | 305 | struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops) | 
 | 306 | { | 
 | 307 | 	struct zbud_pool *pool; | 
 | 308 | 	int i; | 
 | 309 |  | 
 | 310 | 	pool = kzalloc(sizeof(struct zbud_pool), gfp); | 
 | 311 | 	if (!pool) | 
 | 312 | 		return NULL; | 
 | 313 | 	spin_lock_init(&pool->lock); | 
 | 314 | 	for_each_unbuddied_list(i, 0) | 
 | 315 | 		INIT_LIST_HEAD(&pool->unbuddied[i]); | 
 | 316 | 	INIT_LIST_HEAD(&pool->buddied); | 
 | 317 | 	INIT_LIST_HEAD(&pool->lru); | 
 | 318 | 	pool->pages_nr = 0; | 
 | 319 | 	pool->ops = ops; | 
 | 320 | 	return pool; | 
 | 321 | } | 
 | 322 |  | 
 | 323 | /** | 
 | 324 |  * zbud_destroy_pool() - destroys an existing zbud pool | 
 | 325 |  * @pool:	the zbud pool to be destroyed | 
 | 326 |  * | 
 | 327 |  * The pool should be emptied before this function is called. | 
 | 328 |  */ | 
 | 329 | void zbud_destroy_pool(struct zbud_pool *pool) | 
 | 330 | { | 
 | 331 | 	kfree(pool); | 
 | 332 | } | 
 | 333 |  | 
 | 334 | /** | 
 | 335 |  * zbud_alloc() - allocates a region of a given size | 
 | 336 |  * @pool:	zbud pool from which to allocate | 
 | 337 |  * @size:	size in bytes of the desired allocation | 
 | 338 |  * @gfp:	gfp flags used if the pool needs to grow | 
 | 339 |  * @handle:	handle of the new allocation | 
 | 340 |  * | 
 | 341 |  * This function will attempt to find a free region in the pool large enough to | 
 | 342 |  * satisfy the allocation request.  A search of the unbuddied lists is | 
 | 343 |  * performed first. If no suitable free region is found, then a new page is | 
 | 344 |  * allocated and added to the pool to satisfy the request. | 
 | 345 |  * | 
 | 346 |  * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used | 
 | 347 |  * as zbud pool pages. | 
 | 348 |  * | 
 | 349 |  * Return: 0 if success and handle is set, otherwise -EINVAL if the size or | 
 | 350 |  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate | 
 | 351 |  * a new page. | 
 | 352 |  */ | 
 | 353 | int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, | 
 | 354 | 			unsigned long *handle) | 
 | 355 | { | 
 | 356 | 	int chunks, i, freechunks; | 
 | 357 | 	struct zbud_header *zhdr = NULL; | 
 | 358 | 	enum buddy bud; | 
 | 359 | 	struct page *page; | 
 | 360 |  | 
 | 361 | 	if (!size || (gfp & __GFP_HIGHMEM)) | 
 | 362 | 		return -EINVAL; | 
 | 363 | 	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) | 
 | 364 | 		return -ENOSPC; | 
 | 365 | 	chunks = size_to_chunks(size); | 
 | 366 | 	spin_lock(&pool->lock); | 
 | 367 |  | 
 | 368 | 	/* First, try to find an unbuddied zbud page. */ | 
 | 369 | 	zhdr = NULL; | 
 | 370 | 	for_each_unbuddied_list(i, chunks) { | 
 | 371 | 		if (!list_empty(&pool->unbuddied[i])) { | 
 | 372 | 			zhdr = list_first_entry(&pool->unbuddied[i], | 
 | 373 | 					struct zbud_header, buddy); | 
 | 374 | 			list_del(&zhdr->buddy); | 
 | 375 | 			if (zhdr->first_chunks == 0) | 
 | 376 | 				bud = FIRST; | 
 | 377 | 			else | 
 | 378 | 				bud = LAST; | 
 | 379 | 			goto found; | 
 | 380 | 		} | 
 | 381 | 	} | 
 | 382 |  | 
 | 383 | 	/* Couldn't find unbuddied zbud page, create new one */ | 
 | 384 | 	spin_unlock(&pool->lock); | 
 | 385 | 	page = alloc_page(gfp); | 
 | 386 | 	if (!page) | 
 | 387 | 		return -ENOMEM; | 
 | 388 | 	spin_lock(&pool->lock); | 
 | 389 | 	pool->pages_nr++; | 
 | 390 | 	zhdr = init_zbud_page(page); | 
 | 391 | 	bud = FIRST; | 
 | 392 |  | 
 | 393 | found: | 
 | 394 | 	if (bud == FIRST) | 
 | 395 | 		zhdr->first_chunks = chunks; | 
 | 396 | 	else | 
 | 397 | 		zhdr->last_chunks = chunks; | 
 | 398 |  | 
 | 399 | 	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) { | 
 | 400 | 		/* Add to unbuddied list */ | 
 | 401 | 		freechunks = num_free_chunks(zhdr); | 
 | 402 | 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); | 
 | 403 | 	} else { | 
 | 404 | 		/* Add to buddied list */ | 
 | 405 | 		list_add(&zhdr->buddy, &pool->buddied); | 
 | 406 | 	} | 
 | 407 |  | 
 | 408 | 	/* Add/move zbud page to beginning of LRU */ | 
 | 409 | 	if (!list_empty(&zhdr->lru)) | 
 | 410 | 		list_del(&zhdr->lru); | 
 | 411 | 	list_add(&zhdr->lru, &pool->lru); | 
 | 412 |  | 
 | 413 | 	*handle = encode_handle(zhdr, bud); | 
 | 414 | 	spin_unlock(&pool->lock); | 
 | 415 |  | 
 | 416 | 	return 0; | 
 | 417 | } | 
 | 418 |  | 
 | 419 | /** | 
 | 420 |  * zbud_free() - frees the allocation associated with the given handle | 
 | 421 |  * @pool:	pool in which the allocation resided | 
 | 422 |  * @handle:	handle associated with the allocation returned by zbud_alloc() | 
 | 423 |  * | 
 | 424 |  * In the case that the zbud page in which the allocation resides is under | 
 | 425 |  * reclaim, as indicated by the PG_reclaim flag being set, this function | 
 | 426 |  * only sets the first|last_chunks to 0.  The page is actually freed | 
 | 427 |  * once both buddies are evicted (see zbud_reclaim_page() below). | 
 | 428 |  */ | 
 | 429 | void zbud_free(struct zbud_pool *pool, unsigned long handle) | 
 | 430 | { | 
 | 431 | 	struct zbud_header *zhdr; | 
 | 432 | 	int freechunks; | 
 | 433 |  | 
 | 434 | 	spin_lock(&pool->lock); | 
 | 435 | 	zhdr = handle_to_zbud_header(handle); | 
 | 436 |  | 
 | 437 | 	/* If first buddy, handle will be page aligned */ | 
 | 438 | 	if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK) | 
 | 439 | 		zhdr->last_chunks = 0; | 
 | 440 | 	else | 
 | 441 | 		zhdr->first_chunks = 0; | 
 | 442 |  | 
 | 443 | 	if (zhdr->under_reclaim) { | 
 | 444 | 		/* zbud page is under reclaim, reclaim will free */ | 
 | 445 | 		spin_unlock(&pool->lock); | 
 | 446 | 		return; | 
 | 447 | 	} | 
 | 448 |  | 
 | 449 | 	/* Remove from existing buddy list */ | 
 | 450 | 	list_del(&zhdr->buddy); | 
 | 451 |  | 
 | 452 | 	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { | 
 | 453 | 		/* zbud page is empty, free */ | 
 | 454 | 		list_del(&zhdr->lru); | 
 | 455 | 		free_zbud_page(zhdr); | 
 | 456 | 		pool->pages_nr--; | 
 | 457 | 	} else { | 
 | 458 | 		/* Add to unbuddied list */ | 
 | 459 | 		freechunks = num_free_chunks(zhdr); | 
 | 460 | 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); | 
 | 461 | 	} | 
 | 462 |  | 
 | 463 | 	spin_unlock(&pool->lock); | 
 | 464 | } | 
 | 465 |  | 
 | 466 | /** | 
 | 467 |  * zbud_reclaim_page() - evicts allocations from a pool page and frees it | 
 | 468 |  * @pool:	pool from which a page will attempt to be evicted | 
 | 469 |  * @retires:	number of pages on the LRU list for which eviction will | 
 | 470 |  *		be attempted before failing | 
 | 471 |  * | 
 | 472 |  * zbud reclaim is different from normal system reclaim in that the reclaim is | 
 | 473 |  * done from the bottom, up.  This is because only the bottom layer, zbud, has | 
 | 474 |  * information on how the allocations are organized within each zbud page. This | 
 | 475 |  * has the potential to create interesting locking situations between zbud and | 
 | 476 |  * the user, however. | 
 | 477 |  * | 
 | 478 |  * To avoid these, this is how zbud_reclaim_page() should be called: | 
 | 479 |  | 
 | 480 |  * The user detects a page should be reclaimed and calls zbud_reclaim_page(). | 
 | 481 |  * zbud_reclaim_page() will remove a zbud page from the pool LRU list and call | 
 | 482 |  * the user-defined eviction handler with the pool and handle as arguments. | 
 | 483 |  * | 
 | 484 |  * If the handle can not be evicted, the eviction handler should return | 
 | 485 |  * non-zero. zbud_reclaim_page() will add the zbud page back to the | 
 | 486 |  * appropriate list and try the next zbud page on the LRU up to | 
 | 487 |  * a user defined number of retries. | 
 | 488 |  * | 
 | 489 |  * If the handle is successfully evicted, the eviction handler should | 
 | 490 |  * return 0 _and_ should have called zbud_free() on the handle. zbud_free() | 
 | 491 |  * contains logic to delay freeing the page if the page is under reclaim, | 
 | 492 |  * as indicated by the setting of the PG_reclaim flag on the underlying page. | 
 | 493 |  * | 
 | 494 |  * If all buddies in the zbud page are successfully evicted, then the | 
 | 495 |  * zbud page can be freed. | 
 | 496 |  * | 
 | 497 |  * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are | 
 | 498 |  * no pages to evict or an eviction handler is not registered, -EAGAIN if | 
 | 499 |  * the retry limit was hit. | 
 | 500 |  */ | 
 | 501 | int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) | 
 | 502 | { | 
 | 503 | 	int i, ret, freechunks; | 
 | 504 | 	struct zbud_header *zhdr; | 
 | 505 | 	unsigned long first_handle = 0, last_handle = 0; | 
 | 506 |  | 
 | 507 | 	spin_lock(&pool->lock); | 
 | 508 | 	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) || | 
 | 509 | 			retries == 0) { | 
 | 510 | 		spin_unlock(&pool->lock); | 
 | 511 | 		return -EINVAL; | 
 | 512 | 	} | 
 | 513 | 	for (i = 0; i < retries; i++) { | 
 | 514 | 		zhdr = list_last_entry(&pool->lru, struct zbud_header, lru); | 
 | 515 | 		list_del(&zhdr->lru); | 
 | 516 | 		list_del(&zhdr->buddy); | 
 | 517 | 		/* Protect zbud page against free */ | 
 | 518 | 		zhdr->under_reclaim = true; | 
 | 519 | 		/* | 
 | 520 | 		 * We need encode the handles before unlocking, since we can | 
 | 521 | 		 * race with free that will set (first|last)_chunks to 0 | 
 | 522 | 		 */ | 
 | 523 | 		first_handle = 0; | 
 | 524 | 		last_handle = 0; | 
 | 525 | 		if (zhdr->first_chunks) | 
 | 526 | 			first_handle = encode_handle(zhdr, FIRST); | 
 | 527 | 		if (zhdr->last_chunks) | 
 | 528 | 			last_handle = encode_handle(zhdr, LAST); | 
 | 529 | 		spin_unlock(&pool->lock); | 
 | 530 |  | 
 | 531 | 		/* Issue the eviction callback(s) */ | 
 | 532 | 		if (first_handle) { | 
 | 533 | 			ret = pool->ops->evict(pool, first_handle); | 
 | 534 | 			if (ret) | 
 | 535 | 				goto next; | 
 | 536 | 		} | 
 | 537 | 		if (last_handle) { | 
 | 538 | 			ret = pool->ops->evict(pool, last_handle); | 
 | 539 | 			if (ret) | 
 | 540 | 				goto next; | 
 | 541 | 		} | 
 | 542 | next: | 
 | 543 | 		spin_lock(&pool->lock); | 
 | 544 | 		zhdr->under_reclaim = false; | 
 | 545 | 		if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { | 
 | 546 | 			/* | 
 | 547 | 			 * Both buddies are now free, free the zbud page and | 
 | 548 | 			 * return success. | 
 | 549 | 			 */ | 
 | 550 | 			free_zbud_page(zhdr); | 
 | 551 | 			pool->pages_nr--; | 
 | 552 | 			spin_unlock(&pool->lock); | 
 | 553 | 			return 0; | 
 | 554 | 		} else if (zhdr->first_chunks == 0 || | 
 | 555 | 				zhdr->last_chunks == 0) { | 
 | 556 | 			/* add to unbuddied list */ | 
 | 557 | 			freechunks = num_free_chunks(zhdr); | 
 | 558 | 			list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); | 
 | 559 | 		} else { | 
 | 560 | 			/* add to buddied list */ | 
 | 561 | 			list_add(&zhdr->buddy, &pool->buddied); | 
 | 562 | 		} | 
 | 563 |  | 
 | 564 | 		/* add to beginning of LRU */ | 
 | 565 | 		list_add(&zhdr->lru, &pool->lru); | 
 | 566 | 	} | 
 | 567 | 	spin_unlock(&pool->lock); | 
 | 568 | 	return -EAGAIN; | 
 | 569 | } | 
 | 570 |  | 
 | 571 | /** | 
 | 572 |  * zbud_map() - maps the allocation associated with the given handle | 
 | 573 |  * @pool:	pool in which the allocation resides | 
 | 574 |  * @handle:	handle associated with the allocation to be mapped | 
 | 575 |  * | 
 | 576 |  * While trivial for zbud, the mapping functions for others allocators | 
 | 577 |  * implementing this allocation API could have more complex information encoded | 
 | 578 |  * in the handle and could create temporary mappings to make the data | 
 | 579 |  * accessible to the user. | 
 | 580 |  * | 
 | 581 |  * Returns: a pointer to the mapped allocation | 
 | 582 |  */ | 
 | 583 | void *zbud_map(struct zbud_pool *pool, unsigned long handle) | 
 | 584 | { | 
 | 585 | 	return (void *)(handle); | 
 | 586 | } | 
 | 587 |  | 
 | 588 | /** | 
 | 589 |  * zbud_unmap() - maps the allocation associated with the given handle | 
 | 590 |  * @pool:	pool in which the allocation resides | 
 | 591 |  * @handle:	handle associated with the allocation to be unmapped | 
 | 592 |  */ | 
 | 593 | void zbud_unmap(struct zbud_pool *pool, unsigned long handle) | 
 | 594 | { | 
 | 595 | } | 
 | 596 |  | 
 | 597 | /** | 
 | 598 |  * zbud_get_pool_size() - gets the zbud pool size in pages | 
 | 599 |  * @pool:	pool whose size is being queried | 
 | 600 |  * | 
 | 601 |  * Returns: size in pages of the given pool.  The pool lock need not be | 
 | 602 |  * taken to access pages_nr. | 
 | 603 |  */ | 
 | 604 | u64 zbud_get_pool_size(struct zbud_pool *pool) | 
 | 605 | { | 
 | 606 | 	return pool->pages_nr; | 
 | 607 | } | 
 | 608 |  | 
 | 609 | static int __init init_zbud(void) | 
 | 610 | { | 
 | 611 | 	/* Make sure the zbud header will fit in one chunk */ | 
 | 612 | 	BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED); | 
 | 613 | 	pr_info("loaded\n"); | 
 | 614 |  | 
 | 615 | #ifdef CONFIG_ZPOOL | 
 | 616 | 	zpool_register_driver(&zbud_zpool_driver); | 
 | 617 | #endif | 
 | 618 |  | 
 | 619 | 	return 0; | 
 | 620 | } | 
 | 621 |  | 
 | 622 | static void __exit exit_zbud(void) | 
 | 623 | { | 
 | 624 | #ifdef CONFIG_ZPOOL | 
 | 625 | 	zpool_unregister_driver(&zbud_zpool_driver); | 
 | 626 | #endif | 
 | 627 |  | 
 | 628 | 	pr_info("unloaded\n"); | 
 | 629 | } | 
 | 630 |  | 
 | 631 | module_init(init_zbud); | 
 | 632 | module_exit(exit_zbud); | 
 | 633 |  | 
 | 634 | MODULE_LICENSE("GPL"); | 
 | 635 | MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); | 
 | 636 | MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages"); |