Blame - ap/os/linux/linux-3.4.x/drivers/mtd/ubi/wl.c - R306

blob: 5438aeadad76de8d2e7abc62f8255b5ab2355b88 [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* @ubi: UBI device description object
				3	* Copyright (c) International Business Machines Corp., 2006
				4	*
				5	* This program is free software; you can redistribute it and/or modify
				6	* it under the terms of the GNU General Public License as published by
				7	* the Free Software Foundation; either version 2 of the License, or
				8	* (at your option) any later version.
				9	*
				10	* This program is distributed in the hope that it will be useful,
				11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
				13	* the GNU General Public License for more details.
				14	*
				15	* You should have received a copy of the GNU General Public License
				16	* along with this program; if not, write to the Free Software
				17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				18	*
				19	* Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
				20	*/
				21
				22	/*
				23	* UBI wear-leveling sub-system.
				24	*
				25	* This sub-system is responsible for wear-leveling. It works in terms of
				26	* physical eraseblocks and erase counters and knows nothing about logical
				27	* eraseblocks, volumes, etc. From this sub-system's perspective all physical
				28	* eraseblocks are of two types - used and free. Used physical eraseblocks are
				29	* those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
				30	* eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
				31	*
				32	* Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
				33	* header. The rest of the physical eraseblock contains only %0xFF bytes.
				34	*
				35	* When physical eraseblocks are returned to the WL sub-system by means of the
				36	* 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
				37	* done asynchronously in context of the per-UBI device background thread,
				38	* which is also managed by the WL sub-system.
				39	*
				40	* The wear-leveling is ensured by means of moving the contents of used
				41	* physical eraseblocks with low erase counter to free physical eraseblocks
				42	* with high erase counter.
				43	*
				44	* The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
				45	* an "optimal" physical eraseblock. For example, when it is known that the
				46	* physical eraseblock will be "put" soon because it contains short-term data,
				47	* the WL sub-system may pick a free physical eraseblock with low erase
				48	* counter, and so forth.
				49	*
				50	* If the WL sub-system fails to erase a physical eraseblock, it marks it as
				51	* bad.
				52	*
				53	* This sub-system is also responsible for scrubbing. If a bit-flip is detected
				54	* in a physical eraseblock, it has to be moved. Technically this is the same
				55	* as moving it for wear-leveling reasons.
				56	*
				57	* As it was said, for the UBI sub-system all physical eraseblocks are either
				58	* "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
				59	* used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub
				60	* RB-trees, as well as (temporarily) in the @wl->pq queue.
				61	*
				62	* When the WL sub-system returns a physical eraseblock, the physical
				63	* eraseblock is protected from being moved for some "time". For this reason,
				64	* the physical eraseblock is not directly moved from the @wl->free tree to the
				65	* @wl->used tree. There is a protection queue in between where this
				66	* physical eraseblock is temporarily stored (@wl->pq).
				67	*
				68	* All this protection stuff is needed because:
				69	* o we don't want to move physical eraseblocks just after we have given them
				70	* to the user; instead, we first want to let users fill them up with data;
				71	*
				72	* o there is a chance that the user will put the physical eraseblock very
				73	* soon, so it makes sense not to move it for some time, but wait; this is
				74	* especially important in case of "short term" physical eraseblocks.
				75	*
				76	* Physical eraseblocks stay protected only for limited time. But the "time" is
				77	* measured in erase cycles in this case. This is implemented with help of the
				78	* protection queue. Eraseblocks are put to the tail of this queue when they
				79	* are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
				80	* head of the queue on each erase operation (for any eraseblock). So the
				81	* length of the queue defines how may (global) erase cycles PEBs are protected.
				82	*
				83	* To put it differently, each physical eraseblock has 2 main states: free and
				84	* used. The former state corresponds to the @wl->free tree. The latter state
				85	* is split up on several sub-states:
				86	* o the WL movement is allowed (@wl->used tree);
				87	* o the WL movement is disallowed (@wl->erroneous) because the PEB is
				88	* erroneous - e.g., there was a read error;
				89	* o the WL movement is temporarily prohibited (@wl->pq queue);
				90	* o scrubbing is needed (@wl->scrub tree).
				91	*
				92	* Depending on the sub-state, wear-leveling entries of the used physical
				93	* eraseblocks may be kept in one of those structures.
				94	*
				95	* Note, in this implementation, we keep a small in-RAM object for each physical
				96	* eraseblock. This is surely not a scalable solution. But it appears to be good
				97	* enough for moderately large flashes and it is simple. In future, one may
				98	* re-work this sub-system and make it more scalable.
				99	*
				100	* At the moment this sub-system does not utilize the sequence number, which
				101	* was introduced relatively recently. But it would be wise to do this because
				102	* the sequence number of a logical eraseblock characterizes how old is it. For
				103	* example, when we move a PEB with low erase counter, and we need to pick the
				104	* target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
				105	* pick target PEB with an average EC if our PEB is not very "old". This is a
				106	* room for future re-works of the WL sub-system.
				107	*/
				108
				109	#include <linux/slab.h>
				110	#include <linux/crc32.h>
				111	#include <linux/freezer.h>
				112	#include <linux/kthread.h>
				113	#include "ubi.h"
				114
				115	/* Number of physical eraseblocks reserved for wear-leveling purposes */
				116	#define WL_RESERVED_PEBS 1
				117
				118	/*
				119	* Maximum difference between two erase counters. If this threshold is
				120	* exceeded, the WL sub-system starts moving data from used physical
				121	* eraseblocks with low erase counter to free physical eraseblocks with high
				122	* erase counter.
				123	*/
				124	#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
				125
				126	/*
				127	* When a physical eraseblock is moved, the WL sub-system has to pick the target
				128	* physical eraseblock to move to. The simplest way would be just to pick the
				129	* one with the highest erase counter. But in certain workloads this could lead
				130	* to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
				131	* situation when the picked physical eraseblock is constantly erased after the
				132	* data is written to it. So, we have a constant which limits the highest erase
				133	* counter of the free physical eraseblock to pick. Namely, the WL sub-system
				134	* does not pick eraseblocks with erase counter greater than the lowest erase
				135	* counter plus %WL_FREE_MAX_DIFF.
				136	*/
				137	#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
				138
				139	/*
				140	* Maximum number of consecutive background thread failures which is enough to
				141	* switch to read-only mode.
				142	*/
				143	#define WL_MAX_FAILURES 32
				144
				145	/**
				146	* struct ubi_work - UBI work description data structure.
				147	* @list: a link in the list of pending works
				148	* @func: worker function
				149	* @e: physical eraseblock to erase
				150	* @torture: if the physical eraseblock has to be tortured
				151	*
				152	* The @func pointer points to the worker function. If the @cancel argument is
				153	* not zero, the worker has to free the resources and exit immediately. The
				154	* worker has to return zero in case of success and a negative error code in
				155	* case of failure.
				156	*/
				157	struct ubi_work {
				158	struct list_head list;
				159	int (func)(struct ubi_device ubi, struct ubi_work *wrk, int cancel);
				160	/* The below fields are only relevant to erasure works */
				161	struct ubi_wl_entry *e;
				162	int torture;
				163	};
				164
				165	#ifdef CONFIG_MTD_UBI_DEBUG
				166	static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec);
				167	static int paranoid_check_in_wl_tree(const struct ubi_device *ubi,
				168	struct ubi_wl_entry *e,
				169	struct rb_root *root);
				170	static int paranoid_check_in_pq(const struct ubi_device *ubi,
				171	struct ubi_wl_entry *e);
				172	#else
				173	#define paranoid_check_ec(ubi, pnum, ec) 0
				174	#define paranoid_check_in_wl_tree(ubi, e, root)
				175	#define paranoid_check_in_pq(ubi, e) 0
				176	#endif
				177
				178	/**
				179	* wl_tree_add - add a wear-leveling entry to a WL RB-tree.
				180	* @e: the wear-leveling entry to add
				181	* @root: the root of the tree
				182	*
				183	* Note, we use (erase counter, physical eraseblock number) pairs as keys in
				184	* the @ubi->used and @ubi->free RB-trees.
				185	*/
				186	static void wl_tree_add(struct ubi_wl_entry e, struct rb_root root)
				187	{
				188	struct rb_node *p, parent = NULL;
				189
				190	p = &root->rb_node;
				191	while (*p) {
				192	struct ubi_wl_entry *e1;
				193
				194	parent = *p;
				195	e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
				196
				197	if (e->ec < e1->ec)
				198	p = &(*p)->rb_left;
				199	else if (e->ec > e1->ec)
				200	p = &(*p)->rb_right;
				201	else {
				202	ubi_assert(e->pnum != e1->pnum);
				203	if (e->pnum < e1->pnum)
				204	p = &(*p)->rb_left;
				205	else
				206	p = &(*p)->rb_right;
				207	}
				208	}
				209
				210	rb_link_node(&e->u.rb, parent, p);
				211	rb_insert_color(&e->u.rb, root);
				212	}
				213
				214	/**
				215	* do_work - do one pending work.
				216	* @ubi: UBI device description object
				217	*
				218	* This function returns zero in case of success and a negative error code in
				219	* case of failure.
				220	*/
				221	static int do_work(struct ubi_device *ubi)
				222	{
				223	int err;
				224	struct ubi_work *wrk;
				225
				226	cond_resched();
				227
				228	/*
				229	* @ubi->work_sem is used to synchronize with the workers. Workers take
				230	* it in read mode, so many of them may be doing works at a time. But
				231	* the queue flush code has to be sure the whole queue of works is
				232	* done, and it takes the mutex in write mode.
				233	*/
				234	down_read(&ubi->work_sem);
				235	spin_lock(&ubi->wl_lock);
				236	if (list_empty(&ubi->works)) {
				237	spin_unlock(&ubi->wl_lock);
				238	up_read(&ubi->work_sem);
				239	return 0;
				240	}
				241
				242	wrk = list_entry(ubi->works.next, struct ubi_work, list);
				243	list_del(&wrk->list);
				244	ubi->works_count -= 1;
				245	ubi_assert(ubi->works_count >= 0);
				246	spin_unlock(&ubi->wl_lock);
				247
				248	/*
				249	* Call the worker function. Do not touch the work structure
				250	* after this call as it will have been freed or reused by that
				251	* time by the worker function.
				252	*/
				253	err = wrk->func(ubi, wrk, 0);
				254	if (err)
				255	ubi_err("work failed with error code %d", err);
				256	up_read(&ubi->work_sem);
				257
				258	return err;
				259	}
				260
				261	/**
				262	* produce_free_peb - produce a free physical eraseblock.
				263	* @ubi: UBI device description object
				264	*
				265	* This function tries to make a free PEB by means of synchronous execution of
				266	* pending works. This may be needed if, for example the background thread is
				267	* disabled. Returns zero in case of success and a negative error code in case
				268	* of failure.
				269	*/
				270	static int produce_free_peb(struct ubi_device *ubi)
				271	{
				272	int err;
				273
				274	spin_lock(&ubi->wl_lock);
				275	while (!ubi->free.rb_node) {
				276	spin_unlock(&ubi->wl_lock);
				277
				278	dbg_wl("do one work synchronously");
				279	err = do_work(ubi);
				280	if (err)
				281	return err;
				282
				283	spin_lock(&ubi->wl_lock);
				284	}
				285	spin_unlock(&ubi->wl_lock);
				286
				287	return 0;
				288	}
				289
				290	/**
				291	* in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
				292	* @e: the wear-leveling entry to check
				293	* @root: the root of the tree
				294	*
				295	* This function returns non-zero if @e is in the @root RB-tree and zero if it
				296	* is not.
				297	*/
				298	static int in_wl_tree(struct ubi_wl_entry e, struct rb_root root)
				299	{
				300	struct rb_node *p;
				301
				302	p = root->rb_node;
				303	while (p) {
				304	struct ubi_wl_entry *e1;
				305
				306	e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
				307
				308	if (e->pnum == e1->pnum) {
				309	ubi_assert(e == e1);
				310	return 1;
				311	}
				312
				313	if (e->ec < e1->ec)
				314	p = p->rb_left;
				315	else if (e->ec > e1->ec)
				316	p = p->rb_right;
				317	else {
				318	ubi_assert(e->pnum != e1->pnum);
				319	if (e->pnum < e1->pnum)
				320	p = p->rb_left;
				321	else
				322	p = p->rb_right;
				323	}
				324	}
				325
				326	return 0;
				327	}
				328
				329	/**
				330	* prot_queue_add - add physical eraseblock to the protection queue.
				331	* @ubi: UBI device description object
				332	* @e: the physical eraseblock to add
				333	*
				334	* This function adds @e to the tail of the protection queue @ubi->pq, where
				335	* @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
				336	* temporarily protected from the wear-leveling worker. Note, @wl->lock has to
				337	* be locked.
				338	*/
				339	static void prot_queue_add(struct ubi_device ubi, struct ubi_wl_entry e)
				340	{
				341	int pq_tail = ubi->pq_head - 1;
				342
				343	if (pq_tail < 0)
				344	pq_tail = UBI_PROT_QUEUE_LEN - 1;
				345	ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN);
				346	list_add_tail(&e->u.list, &ubi->pq[pq_tail]);
				347	dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
				348	}
				349
				350	/**
				351	* find_wl_entry - find wear-leveling entry closest to certain erase counter.
				352	* @root: the RB-tree where to look for
				353	* @diff: maximum possible difference from the smallest erase counter
				354	*
				355	* This function looks for a wear leveling entry with erase counter closest to
				356	* min + @diff, where min is the smallest erase counter.
				357	*/
				358	static struct ubi_wl_entry find_wl_entry(struct rb_root root, int diff)
				359	{
				360	struct rb_node *p;
				361	struct ubi_wl_entry *e;
				362	int max;
				363
				364	e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
				365	if(e == NULL)
				366	{
				367	return NULL;
				368	}
				369	max = e->ec + diff;
				370
				371	p = root->rb_node;
				372	while (p) {
				373	struct ubi_wl_entry *e1;
				374
				375	e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
				376	if (e1->ec >= max)
				377	p = p->rb_left;
				378	else {
				379	p = p->rb_right;
				380	e = e1;
				381	}
				382	}
				383
				384	return e;
				385	}
				386
				387	/**
				388	* ubi_wl_get_peb - get a physical eraseblock.
				389	* @ubi: UBI device description object
				390	* @dtype: type of data which will be stored in this physical eraseblock
				391	*
				392	* This function returns a physical eraseblock in case of success and a
				393	* negative error code in case of failure. Might sleep.
				394	*/
				395	int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
				396	{
				397	int err;
				398	struct ubi_wl_entry e, first, *last;
				399
				400	ubi_assert(dtype == UBI_LONGTERM \|\| dtype == UBI_SHORTTERM \|\|
				401	dtype == UBI_UNKNOWN);
				402
				403	retry:
				404	spin_lock(&ubi->wl_lock);
				405	if (!ubi->free.rb_node) {
				406	if (ubi->works_count == 0) {
				407	ubi_assert(list_empty(&ubi->works));
				408	ubi_err("no free eraseblocks");
				409	spin_unlock(&ubi->wl_lock);
				410	return -ENOSPC;
				411	}
				412	spin_unlock(&ubi->wl_lock);
				413
				414	err = produce_free_peb(ubi);
				415	if (err < 0)
				416	return err;
				417	goto retry;
				418	}
				419
				420	switch (dtype) {
				421	case UBI_LONGTERM:
				422	/*
				423	* For long term data we pick a physical eraseblock with high
				424	* erase counter. But the highest erase counter we can pick is
				425	* bounded by the the lowest erase counter plus
				426	* %WL_FREE_MAX_DIFF.
				427	*/
				428	e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
				429	break;
				430	case UBI_UNKNOWN:
				431	/*
				432	* For unknown data we pick a physical eraseblock with medium
				433	* erase counter. But we by no means can pick a physical
				434	* eraseblock with erase counter greater or equivalent than the
				435	* lowest erase counter plus %WL_FREE_MAX_DIFF/2.
				436	*/
				437	first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry,
				438	u.rb);
				439	last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, u.rb);
				440
				441	if (last->ec - first->ec < WL_FREE_MAX_DIFF)
				442	e = rb_entry(ubi->free.rb_node,
				443	struct ubi_wl_entry, u.rb);
				444	else
				445	e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF/2);
				446	break;
				447	case UBI_SHORTTERM:
				448	/*
				449	* For short term data we pick a physical eraseblock with the
				450	* lowest erase counter as we expect it will be erased soon.
				451	*/
				452	e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb);
				453	break;
				454	default:
				455	BUG();
				456	}
				457
				458	paranoid_check_in_wl_tree(ubi, e, &ubi->free);
				459
				460	/*
				461	* Move the physical eraseblock to the protection queue where it will
				462	* be protected from being moved for some time.
				463	*/
				464	rb_erase(&e->u.rb, &ubi->free);
				465	dbg_wl("PEB %d EC %d", e->pnum, e->ec);
				466	prot_queue_add(ubi, e);
				467	spin_unlock(&ubi->wl_lock);
				468
				469	err = ubi_dbg_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset,
				470	ubi->peb_size - ubi->vid_hdr_aloffset);
				471	if (err) {
				472	ubi_err("new PEB %d does not contain all 0xFF bytes", e->pnum);
				473	return err;
				474	}
				475
				476	return e->pnum;
				477	}
				478
				479	/**
				480	* prot_queue_del - remove a physical eraseblock from the protection queue.
				481	* @ubi: UBI device description object
				482	* @pnum: the physical eraseblock to remove
				483	*
				484	* This function deletes PEB @pnum from the protection queue and returns zero
				485	* in case of success and %-ENODEV if the PEB was not found.
				486	*/
				487	static int prot_queue_del(struct ubi_device *ubi, int pnum)
				488	{
				489	struct ubi_wl_entry *e;
				490
				491	e = ubi->lookuptbl[pnum];
				492	if (!e)
				493	return -ENODEV;
				494
				495	if (paranoid_check_in_pq(ubi, e))
				496	return -ENODEV;
				497
				498	list_del(&e->u.list);
				499	dbg_wl("deleted PEB %d from the protection queue", e->pnum);
				500	return 0;
				501	}
				502
				503	/**
				504	* sync_erase - synchronously erase a physical eraseblock.
				505	* @ubi: UBI device description object
				506	* @e: the the physical eraseblock to erase
				507	* @torture: if the physical eraseblock has to be tortured
				508	*
				509	* This function returns zero in case of success and a negative error code in
				510	* case of failure.
				511	*/
				512	static int sync_erase(struct ubi_device ubi, struct ubi_wl_entry e,
				513	int torture)
				514	{
				515	int err;
				516	struct ubi_ec_hdr *ec_hdr;
				517	unsigned long long ec = e->ec;
				518
				519	dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
				520
				521	err = paranoid_check_ec(ubi, e->pnum, e->ec);
				522	if (err)
				523	return -EINVAL;
				524
				525	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
				526	if (!ec_hdr)
				527	return -ENOMEM;
				528
				529	err = ubi_io_sync_erase(ubi, e->pnum, torture);
				530	if (err < 0)
				531	goto out_free;
				532
				533	ec += err;
				534	if (ec > UBI_MAX_ERASECOUNTER) {
				535	/*
				536	* Erase counter overflow. Upgrade UBI and use 64-bit
				537	* erase counters internally.
				538	*/
				539	ubi_err("erase counter overflow at PEB %d, EC %llu",
				540	e->pnum, ec);
				541	err = -EINVAL;
				542	goto out_free;
				543	}
				544
				545	dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
				546
				547	ec_hdr->ec = cpu_to_be64(ec);
				548
				549	err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
				550	if (err)
				551	goto out_free;
				552
				553	e->ec = ec;
				554	spin_lock(&ubi->wl_lock);
				555	if (e->ec > ubi->max_ec)
				556	ubi->max_ec = e->ec;
				557	spin_unlock(&ubi->wl_lock);
				558
				559	out_free:
				560	kfree(ec_hdr);
				561	return err;
				562	}
				563
				564	/**
				565	* serve_prot_queue - check if it is time to stop protecting PEBs.
				566	* @ubi: UBI device description object
				567	*
				568	* This function is called after each erase operation and removes PEBs from the
				569	* tail of the protection queue. These PEBs have been protected for long enough
				570	* and should be moved to the used tree.
				571	*/
				572	static void serve_prot_queue(struct ubi_device *ubi)
				573	{
				574	struct ubi_wl_entry e, tmp;
				575	int count;
				576
				577	/*
				578	* There may be several protected physical eraseblock to remove,
				579	* process them all.
				580	*/
				581	repeat:
				582	count = 0;
				583	spin_lock(&ubi->wl_lock);
				584	list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
				585	dbg_wl("PEB %d EC %d protection over, move to used tree",
				586	e->pnum, e->ec);
				587
				588	list_del(&e->u.list);
				589	wl_tree_add(e, &ubi->used);
				590	if (count++ > 32) {
				591	/*
				592	* Let's be nice and avoid holding the spinlock for
				593	* too long.
				594	*/
				595	spin_unlock(&ubi->wl_lock);
				596	cond_resched();
				597	goto repeat;
				598	}
				599	}
				600
				601	ubi->pq_head += 1;
				602	if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
				603	ubi->pq_head = 0;
				604	ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN);
				605	spin_unlock(&ubi->wl_lock);
				606	}
				607
				608	/**
				609	* schedule_ubi_work - schedule a work.
				610	* @ubi: UBI device description object
				611	* @wrk: the work to schedule
				612	*
				613	* This function adds a work defined by @wrk to the tail of the pending works
				614	* list.
				615	*/
				616	static void schedule_ubi_work(struct ubi_device ubi, struct ubi_work wrk)
				617	{
				618	spin_lock(&ubi->wl_lock);
				619	list_add_tail(&wrk->list, &ubi->works);
				620	ubi_assert(ubi->works_count >= 0);
				621	ubi->works_count += 1;
				622	if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi))
				623	wake_up_process(ubi->bgt_thread);
				624	spin_unlock(&ubi->wl_lock);
				625	}
				626
				627	static int erase_worker(struct ubi_device ubi, struct ubi_work wl_wrk,
				628	int cancel);
				629
				630	/**
				631	* schedule_erase - schedule an erase work.
				632	* @ubi: UBI device description object
				633	* @e: the WL entry of the physical eraseblock to erase
				634	* @torture: if the physical eraseblock has to be tortured
				635	*
				636	* This function returns zero in case of success and a %-ENOMEM in case of
				637	* failure.
				638	*/
				639	static int schedule_erase(struct ubi_device ubi, struct ubi_wl_entry e,
				640	int torture)
				641	{
				642	struct ubi_work *wl_wrk;
				643
				644	dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
				645	e->pnum, e->ec, torture);
				646
				647	wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
				648	if (!wl_wrk)
				649	return -ENOMEM;
				650
				651	wl_wrk->func = &erase_worker;
				652	wl_wrk->e = e;
				653	wl_wrk->torture = torture;
				654
				655	schedule_ubi_work(ubi, wl_wrk);
				656	return 0;
				657	}
				658
				659	/**
				660	* wear_leveling_worker - wear-leveling worker function.
				661	* @ubi: UBI device description object
				662	* @wrk: the work object
				663	* @cancel: non-zero if the worker has to free memory and exit
				664	*
				665	* This function copies a more worn out physical eraseblock to a less worn out
				666	* one. Returns zero in case of success and a negative error code in case of
				667	* failure.
				668	*/
				669	static int wear_leveling_worker(struct ubi_device ubi, struct ubi_work wrk,
				670	int cancel)
				671	{
				672	int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0;
				673	int vol_id = -1, lnum = -1;
				674	struct ubi_wl_entry e1, e2;
				675	struct ubi_vid_hdr *vid_hdr;
				676
				677	kfree(wrk);
				678	if (cancel)
				679	return 0;
				680
				681	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
				682	if (!vid_hdr)
				683	return -ENOMEM;
				684
				685	mutex_lock(&ubi->move_mutex);
				686	spin_lock(&ubi->wl_lock);
				687	ubi_assert(!ubi->move_from && !ubi->move_to);
				688	ubi_assert(!ubi->move_to_put);
				689
				690	if (!ubi->free.rb_node \|\|
				691	(!ubi->used.rb_node && !ubi->scrub.rb_node)) {
				692	/*
				693	* No free physical eraseblocks? Well, they must be waiting in
				694	* the queue to be erased. Cancel movement - it will be
				695	* triggered again when a free physical eraseblock appears.
				696	*
				697	* No used physical eraseblocks? They must be temporarily
				698	* protected from being moved. They will be moved to the
				699	* @ubi->used tree later and the wear-leveling will be
				700	* triggered again.
				701	*/
				702	dbg_wl("cancel WL, a list is empty: free %d, used %d",
				703	!ubi->free.rb_node, !ubi->used.rb_node);
				704	goto out_cancel;
				705	}
				706
				707	if (!ubi->scrub.rb_node) {
				708	/*
				709	* Now pick the least worn-out used physical eraseblock and a
				710	* highly worn-out free physical eraseblock. If the erase
				711	* counters differ much enough, start wear-leveling.
				712	*/
				713	e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
				714	e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
				715
				716	if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
				717	dbg_wl("no WL needed: min used EC %d, max free EC %d",
				718	e1->ec, e2->ec);
				719	goto out_cancel;
				720	}
				721	paranoid_check_in_wl_tree(ubi, e1, &ubi->used);
				722	rb_erase(&e1->u.rb, &ubi->used);
				723	dbg_wl("move PEB %d EC %d to PEB %d EC %d",
				724	e1->pnum, e1->ec, e2->pnum, e2->ec);
				725	} else {
				726	/* Perform scrubbing */
				727	scrubbing = 1;
				728	e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
				729	e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
				730	paranoid_check_in_wl_tree(ubi, e1, &ubi->scrub);
				731	rb_erase(&e1->u.rb, &ubi->scrub);
				732	dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
				733	}
				734
				735	paranoid_check_in_wl_tree(ubi, e2, &ubi->free);
				736	rb_erase(&e2->u.rb, &ubi->free);
				737	ubi->move_from = e1;
				738	ubi->move_to = e2;
				739	spin_unlock(&ubi->wl_lock);
				740
				741	/*
				742	* Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
				743	* We so far do not know which logical eraseblock our physical
				744	* eraseblock (@e1) belongs to. We have to read the volume identifier
				745	* header first.
				746	*
				747	* Note, we are protected from this PEB being unmapped and erased. The
				748	* 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
				749	* which is being moved was unmapped.
				750	*/
				751
				752	err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
				753	if (err && err != UBI_IO_BITFLIPS) {
				754	if (err == UBI_IO_FF) {
				755	/*
				756	* We are trying to move PEB without a VID header. UBI
				757	* always write VID headers shortly after the PEB was
				758	* given, so we have a situation when it has not yet
				759	* had a chance to write it, because it was preempted.
				760	* So add this PEB to the protection queue so far,
				761	* because presumably more data will be written there
				762	* (including the missing VID header), and then we'll
				763	* move it.
				764	*/
				765	dbg_wl("PEB %d has no VID header", e1->pnum);
				766	protect = 1;
				767	goto out_not_moved;
				768	} else if (err == UBI_IO_FF_BITFLIPS) {
				769	/*
				770	* The same situation as %UBI_IO_FF, but bit-flips were
				771	* detected. It is better to schedule this PEB for
				772	* scrubbing.
				773	*/
				774	dbg_wl("PEB %d has no VID header but has bit-flips",
				775	e1->pnum);
				776	scrubbing = 1;
				777	goto out_not_moved;
				778	}
				779
				780	ubi_err("error %d while reading VID header from PEB %d",
				781	err, e1->pnum);
				782	goto out_error;
				783	}
				784
				785	vol_id = be32_to_cpu(vid_hdr->vol_id);
				786	lnum = be32_to_cpu(vid_hdr->lnum);
				787
				788	err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
				789	if (err) {
				790	if (err == MOVE_CANCEL_RACE) {
				791	/*
				792	* The LEB has not been moved because the volume is
				793	* being deleted or the PEB has been put meanwhile. We
				794	* should prevent this PEB from being selected for
				795	* wear-leveling movement again, so put it to the
				796	* protection queue.
				797	*/
				798	protect = 1;
				799	goto out_not_moved;
				800	}
				801	if (err == MOVE_RETRY) {
				802	scrubbing = 1;
				803	goto out_not_moved;
				804	}
				805	if (err == MOVE_TARGET_BITFLIPS \|\| err == MOVE_TARGET_WR_ERR \|\|
				806	err == MOVE_TARGET_RD_ERR) {
				807	/*
				808	* Target PEB had bit-flips or write error - torture it.
				809	*/
				810	torture = 1;
				811	goto out_not_moved;
				812	}
				813
				814	if (err == MOVE_SOURCE_RD_ERR) {
				815	/*
				816	* An error happened while reading the source PEB. Do
				817	* not switch to R/O mode in this case, and give the
				818	* upper layers a possibility to recover from this,
				819	* e.g. by unmapping corresponding LEB. Instead, just
				820	* put this PEB to the @ubi->erroneous list to prevent
				821	* UBI from trying to move it over and over again.
				822	*/
				823	if (ubi->erroneous_peb_count > ubi->max_erroneous) {
				824	ubi_err("too many erroneous eraseblocks (%d)",
				825	ubi->erroneous_peb_count);
				826	goto out_error;
				827	}
				828	erroneous = 1;
				829	goto out_not_moved;
				830	}
				831
				832	if (err < 0)
				833	goto out_error;
				834
				835	ubi_assert(0);
				836	}
				837
				838	/* The PEB has been successfully moved */
				839	if (scrubbing)
				840	ubi_msg("scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
				841	e1->pnum, vol_id, lnum, e2->pnum);
				842	ubi_free_vid_hdr(ubi, vid_hdr);
				843
				844	spin_lock(&ubi->wl_lock);
				845	if (!ubi->move_to_put) {
				846	wl_tree_add(e2, &ubi->used);
				847	e2 = NULL;
				848	}
				849	ubi->move_from = ubi->move_to = NULL;
				850	ubi->move_to_put = ubi->wl_scheduled = 0;
				851	spin_unlock(&ubi->wl_lock);
				852
				853	err = schedule_erase(ubi, e1, 0);
				854	if (err) {
				855	kmem_cache_free(ubi_wl_entry_slab, e1);
				856	if (e2)
				857	kmem_cache_free(ubi_wl_entry_slab, e2);
				858	goto out_ro;
				859	}
				860
				861	if (e2) {
				862	/*
				863	* Well, the target PEB was put meanwhile, schedule it for
				864	* erasure.
				865	*/
				866	dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase",
				867	e2->pnum, vol_id, lnum);
				868	err = schedule_erase(ubi, e2, 0);
				869	if (err) {
				870	kmem_cache_free(ubi_wl_entry_slab, e2);
				871	goto out_ro;
				872	}
				873	}
				874
				875	dbg_wl("done");
				876	mutex_unlock(&ubi->move_mutex);
				877	return 0;
				878
				879	/*
				880	* For some reasons the LEB was not moved, might be an error, might be
				881	* something else. @e1 was not changed, so return it back. @e2 might
				882	* have been changed, schedule it for erasure.
				883	*/
				884	out_not_moved:
				885	if (vol_id != -1)
				886	dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)",
				887	e1->pnum, vol_id, lnum, e2->pnum, err);
				888	else
				889	dbg_wl("cancel moving PEB %d to PEB %d (%d)",
				890	e1->pnum, e2->pnum, err);
				891	spin_lock(&ubi->wl_lock);
				892	if (protect)
				893	prot_queue_add(ubi, e1);
				894	else if (erroneous) {
				895	wl_tree_add(e1, &ubi->erroneous);
				896	ubi->erroneous_peb_count += 1;
				897	} else if (scrubbing)
				898	wl_tree_add(e1, &ubi->scrub);
				899	else
				900	wl_tree_add(e1, &ubi->used);
				901	ubi_assert(!ubi->move_to_put);
				902	ubi->move_from = ubi->move_to = NULL;
				903	ubi->wl_scheduled = 0;
				904	spin_unlock(&ubi->wl_lock);
				905
				906	ubi_free_vid_hdr(ubi, vid_hdr);
				907	err = schedule_erase(ubi, e2, torture);
				908	if (err) {
				909	kmem_cache_free(ubi_wl_entry_slab, e2);
				910	goto out_ro;
				911	}
				912	mutex_unlock(&ubi->move_mutex);
				913	return 0;
				914
				915	out_error:
				916	if (vol_id != -1)
				917	ubi_err("error %d while moving PEB %d to PEB %d",
				918	err, e1->pnum, e2->pnum);
				919	else
				920	ubi_err("error %d while moving PEB %d (LEB %d:%d) to PEB %d",
				921	err, e1->pnum, vol_id, lnum, e2->pnum);
				922	spin_lock(&ubi->wl_lock);
				923	ubi->move_from = ubi->move_to = NULL;
				924	ubi->move_to_put = ubi->wl_scheduled = 0;
				925	spin_unlock(&ubi->wl_lock);
				926
				927	ubi_free_vid_hdr(ubi, vid_hdr);
				928	kmem_cache_free(ubi_wl_entry_slab, e1);
				929	kmem_cache_free(ubi_wl_entry_slab, e2);
				930
				931	out_ro:
				932	ubi_ro_mode(ubi);
				933	mutex_unlock(&ubi->move_mutex);
				934	ubi_assert(err != 0);
				935	return err < 0 ? err : -EIO;
				936
				937	out_cancel:
				938	ubi->wl_scheduled = 0;
				939	spin_unlock(&ubi->wl_lock);
				940	mutex_unlock(&ubi->move_mutex);
				941	ubi_free_vid_hdr(ubi, vid_hdr);
				942	return 0;
				943	}
				944
				945	/**
				946	* ensure_wear_leveling - schedule wear-leveling if it is needed.
				947	* @ubi: UBI device description object
				948	*
				949	* This function checks if it is time to start wear-leveling and schedules it
				950	* if yes. This function returns zero in case of success and a negative error
				951	* code in case of failure.
				952	*/
				953	static int ensure_wear_leveling(struct ubi_device *ubi)
				954	{
				955	int err = 0;
				956	struct ubi_wl_entry *e1;
				957	struct ubi_wl_entry *e2;
				958	struct ubi_work *wrk;
				959
				960	spin_lock(&ubi->wl_lock);
				961	if (ubi->wl_scheduled)
				962	/* Wear-leveling is already in the work queue */
				963	goto out_unlock;
				964
				965	/*
				966	* If the ubi->scrub tree is not empty, scrubbing is needed, and the
				967	* the WL worker has to be scheduled anyway.
				968	*/
				969	if (!ubi->scrub.rb_node) {
				970	if (!ubi->used.rb_node \|\| !ubi->free.rb_node)
				971	/* No physical eraseblocks - no deal */
				972	goto out_unlock;
				973
				974	/*
				975	* We schedule wear-leveling only if the difference between the
				976	* lowest erase counter of used physical eraseblocks and a high
				977	* erase counter of free physical eraseblocks is greater than
				978	* %UBI_WL_THRESHOLD.
				979	*/
				980	e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
				981	e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
				982
				983	if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
				984	goto out_unlock;
				985	dbg_wl("schedule wear-leveling");
				986	} else
				987	dbg_wl("schedule scrubbing");
				988
				989	ubi->wl_scheduled = 1;
				990	spin_unlock(&ubi->wl_lock);
				991
				992	wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
				993	if (!wrk) {
				994	err = -ENOMEM;
				995	goto out_cancel;
				996	}
				997
				998	wrk->func = &wear_leveling_worker;
				999	schedule_ubi_work(ubi, wrk);
				1000	return err;
				1001
				1002	out_cancel:
				1003	spin_lock(&ubi->wl_lock);
				1004	ubi->wl_scheduled = 0;
				1005	out_unlock:
				1006	spin_unlock(&ubi->wl_lock);
				1007	return err;
				1008	}
				1009
				1010	/**
				1011	* erase_worker - physical eraseblock erase worker function.
				1012	* @ubi: UBI device description object
				1013	* @wl_wrk: the work object
				1014	* @cancel: non-zero if the worker has to free memory and exit
				1015	*
				1016	* This function erases a physical eraseblock and perform torture testing if
				1017	* needed. It also takes care about marking the physical eraseblock bad if
				1018	* needed. Returns zero in case of success and a negative error code in case of
				1019	* failure.
				1020	*/
				1021	static int erase_worker(struct ubi_device ubi, struct ubi_work wl_wrk,
				1022	int cancel)
				1023	{
				1024	struct ubi_wl_entry *e = wl_wrk->e;
				1025	int pnum = e->pnum, err, need;
				1026
				1027	if (cancel) {
				1028	dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
				1029	kfree(wl_wrk);
				1030	kmem_cache_free(ubi_wl_entry_slab, e);
				1031	return 0;
				1032	}
				1033
				1034	dbg_wl("erase PEB %d EC %d", pnum, e->ec);
				1035
				1036	err = sync_erase(ubi, e, wl_wrk->torture);
				1037	if (!err) {
				1038	/* Fine, we've erased it successfully */
				1039	kfree(wl_wrk);
				1040
				1041	spin_lock(&ubi->wl_lock);
				1042	wl_tree_add(e, &ubi->free);
				1043	spin_unlock(&ubi->wl_lock);
				1044
				1045	/*
				1046	* One more erase operation has happened, take care about
				1047	* protected physical eraseblocks.
				1048	*/
				1049	serve_prot_queue(ubi);
				1050
				1051	/* And take care about wear-leveling */
				1052	err = ensure_wear_leveling(ubi);
				1053	return err;
				1054	}
				1055
				1056	ubi_err("failed to erase PEB %d, error %d", pnum, err);
				1057	kfree(wl_wrk);
				1058
				1059	if (err == -EINTR \|\| err == -ENOMEM \|\| err == -EAGAIN \|\|
				1060	err == -EBUSY) {
				1061	int err1;
				1062
				1063	/* Re-schedule the LEB for erasure */
				1064	err1 = schedule_erase(ubi, e, 0);
				1065	if (err1) {
				1066	err = err1;
				1067	goto out_ro;
				1068	}
				1069	return err;
				1070	}
				1071
				1072	kmem_cache_free(ubi_wl_entry_slab, e);
				1073	if (err != -EIO)
				1074	/*
				1075	* If this is not %-EIO, we have no idea what to do. Scheduling
				1076	* this physical eraseblock for erasure again would cause
				1077	* errors again and again. Well, lets switch to R/O mode.
				1078	*/
				1079	goto out_ro;
				1080
				1081	/* It is %-EIO, the PEB went bad */
				1082
				1083	if (!ubi->bad_allowed) {
				1084	ubi_err("bad physical eraseblock %d detected", pnum);
				1085	goto out_ro;
				1086	}
				1087
				1088	spin_lock(&ubi->volumes_lock);
				1089	need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1;
				1090	if (need > 0) {
				1091	need = ubi->avail_pebs >= need ? need : ubi->avail_pebs;
				1092	ubi->avail_pebs -= need;
				1093	ubi->rsvd_pebs += need;
				1094	ubi->beb_rsvd_pebs += need;
				1095	if (need > 0)
				1096	ubi_msg("reserve more %d PEBs", need);
				1097	}
				1098
				1099	if (ubi->beb_rsvd_pebs == 0) {
				1100	spin_unlock(&ubi->volumes_lock);
				1101	ubi_err("no reserved physical eraseblocks");
				1102	goto out_ro;
				1103	}
				1104	spin_unlock(&ubi->volumes_lock);
				1105
				1106	ubi_msg("mark PEB %d as bad", pnum);
				1107	err = ubi_io_mark_bad(ubi, pnum);
				1108	if (err)
				1109	goto out_ro;
				1110
				1111	spin_lock(&ubi->volumes_lock);
				1112	ubi->beb_rsvd_pebs -= 1;
				1113	ubi->bad_peb_count += 1;
				1114	ubi->good_peb_count -= 1;
				1115	ubi_calculate_reserved(ubi);
				1116	if (ubi->beb_rsvd_pebs)
				1117	ubi_msg("%d PEBs left in the reserve", ubi->beb_rsvd_pebs);
				1118	else
				1119	ubi_warn("last PEB from the reserved pool was used");
				1120	spin_unlock(&ubi->volumes_lock);
				1121
				1122	return err;
				1123
				1124	out_ro:
				1125	ubi_ro_mode(ubi);
				1126	return err;
				1127	}
				1128
				1129	/**
				1130	* ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
				1131	* @ubi: UBI device description object
				1132	* @pnum: physical eraseblock to return
				1133	* @torture: if this physical eraseblock has to be tortured
				1134	*
				1135	* This function is called to return physical eraseblock @pnum to the pool of
				1136	* free physical eraseblocks. The @torture flag has to be set if an I/O error
				1137	* occurred to this @pnum and it has to be tested. This function returns zero
				1138	* in case of success, and a negative error code in case of failure.
				1139	*/
				1140	int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
				1141	{
				1142	int err;
				1143	struct ubi_wl_entry *e;
				1144
				1145	dbg_wl("PEB %d", pnum);
				1146	ubi_assert(pnum >= 0);
				1147	ubi_assert(pnum < ubi->peb_count);
				1148
				1149	retry:
				1150	spin_lock(&ubi->wl_lock);
				1151	e = ubi->lookuptbl[pnum];
				1152	if (e == ubi->move_from) {
				1153	/*
				1154	* User is putting the physical eraseblock which was selected to
				1155	* be moved. It will be scheduled for erasure in the
				1156	* wear-leveling worker.
				1157	*/
				1158	dbg_wl("PEB %d is being moved, wait", pnum);
				1159	spin_unlock(&ubi->wl_lock);
				1160
				1161	/* Wait for the WL worker by taking the @ubi->move_mutex */
				1162	mutex_lock(&ubi->move_mutex);
				1163	mutex_unlock(&ubi->move_mutex);
				1164	goto retry;
				1165	} else if (e == ubi->move_to) {
				1166	/*
				1167	* User is putting the physical eraseblock which was selected
				1168	* as the target the data is moved to. It may happen if the EBA
				1169	* sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
				1170	* but the WL sub-system has not put the PEB to the "used" tree
				1171	* yet, but it is about to do this. So we just set a flag which
				1172	* will tell the WL worker that the PEB is not needed anymore
				1173	* and should be scheduled for erasure.
				1174	*/
				1175	dbg_wl("PEB %d is the target of data moving", pnum);
				1176	ubi_assert(!ubi->move_to_put);
				1177	ubi->move_to_put = 1;
				1178	spin_unlock(&ubi->wl_lock);
				1179	return 0;
				1180	} else {
				1181	if (in_wl_tree(e, &ubi->used)) {
				1182	paranoid_check_in_wl_tree(ubi, e, &ubi->used);
				1183	rb_erase(&e->u.rb, &ubi->used);
				1184	} else if (in_wl_tree(e, &ubi->scrub)) {
				1185	paranoid_check_in_wl_tree(ubi, e, &ubi->scrub);
				1186	rb_erase(&e->u.rb, &ubi->scrub);
				1187	} else if (in_wl_tree(e, &ubi->erroneous)) {
				1188	paranoid_check_in_wl_tree(ubi, e, &ubi->erroneous);
				1189	rb_erase(&e->u.rb, &ubi->erroneous);
				1190	ubi->erroneous_peb_count -= 1;
				1191	ubi_assert(ubi->erroneous_peb_count >= 0);
				1192	/* Erroneous PEBs should be tortured */
				1193	torture = 1;
				1194	} else {
				1195	err = prot_queue_del(ubi, e->pnum);
				1196	if (err) {
				1197	ubi_err("PEB %d not found", pnum);
				1198	ubi_ro_mode(ubi);
				1199	spin_unlock(&ubi->wl_lock);
				1200	return err;
				1201	}
				1202	}
				1203	}
				1204	spin_unlock(&ubi->wl_lock);
				1205
				1206	err = schedule_erase(ubi, e, torture);
				1207	if (err) {
				1208	spin_lock(&ubi->wl_lock);
				1209	wl_tree_add(e, &ubi->used);
				1210	spin_unlock(&ubi->wl_lock);
				1211	}
				1212
				1213	return err;
				1214	}
				1215
				1216	/**
				1217	* ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
				1218	* @ubi: UBI device description object
				1219	* @pnum: the physical eraseblock to schedule
				1220	*
				1221	* If a bit-flip in a physical eraseblock is detected, this physical eraseblock
				1222	* needs scrubbing. This function schedules a physical eraseblock for
				1223	* scrubbing which is done in background. This function returns zero in case of
				1224	* success and a negative error code in case of failure.
				1225	*/
				1226	int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
				1227	{
				1228	struct ubi_wl_entry *e;
				1229
				1230	dbg_msg("schedule PEB %d for scrubbing", pnum);
				1231
				1232	retry:
				1233	spin_lock(&ubi->wl_lock);
				1234	e = ubi->lookuptbl[pnum];
				1235	if (e == ubi->move_from \|\| in_wl_tree(e, &ubi->scrub) \|\|
				1236	in_wl_tree(e, &ubi->erroneous)) {
				1237	spin_unlock(&ubi->wl_lock);
				1238	return 0;
				1239	}
				1240
				1241	if (e == ubi->move_to) {
				1242	/*
				1243	* This physical eraseblock was used to move data to. The data
				1244	* was moved but the PEB was not yet inserted to the proper
				1245	* tree. We should just wait a little and let the WL worker
				1246	* proceed.
				1247	*/
				1248	spin_unlock(&ubi->wl_lock);
				1249	dbg_wl("the PEB %d is not in proper tree, retry", pnum);
				1250	yield();
				1251	goto retry;
				1252	}
				1253
				1254	if (in_wl_tree(e, &ubi->used)) {
				1255	paranoid_check_in_wl_tree(ubi, e, &ubi->used);
				1256	rb_erase(&e->u.rb, &ubi->used);
				1257	} else {
				1258	int err;
				1259
				1260	err = prot_queue_del(ubi, e->pnum);
				1261	if (err) {
				1262	ubi_err("PEB %d not found", pnum);
				1263	ubi_ro_mode(ubi);
				1264	spin_unlock(&ubi->wl_lock);
				1265	return err;
				1266	}
				1267	}
				1268
				1269	wl_tree_add(e, &ubi->scrub);
				1270	spin_unlock(&ubi->wl_lock);
				1271
				1272	/*
				1273	* Technically scrubbing is the same as wear-leveling, so it is done
				1274	* by the WL worker.
				1275	*/
				1276	return ensure_wear_leveling(ubi);
				1277	}
				1278
				1279	/**
				1280	* ubi_wl_flush - flush all pending works.
				1281	* @ubi: UBI device description object
				1282	*
				1283	* This function returns zero in case of success and a negative error code in
				1284	* case of failure.
				1285	*/
				1286	int ubi_wl_flush(struct ubi_device *ubi)
				1287	{
				1288	int err;
				1289
				1290	/*
				1291	* Erase while the pending works queue is not empty, but not more than
				1292	* the number of currently pending works.
				1293	*/
				1294	dbg_wl("flush (%d pending works)", ubi->works_count);
				1295	while (ubi->works_count) {
				1296	err = do_work(ubi);
				1297	if (err)
				1298	return err;
				1299	}
				1300
				1301	/*
				1302	* Make sure all the works which have been done in parallel are
				1303	* finished.
				1304	*/
				1305	down_write(&ubi->work_sem);
				1306	up_write(&ubi->work_sem);
				1307
				1308	/*
				1309	* And in case last was the WL worker and it canceled the LEB
				1310	* movement, flush again.
				1311	*/
				1312	while (ubi->works_count) {
				1313	dbg_wl("flush more (%d pending works)", ubi->works_count);
				1314	err = do_work(ubi);
				1315	if (err)
				1316	return err;
				1317	}
				1318
				1319	return 0;
				1320	}
				1321
				1322	/**
				1323	* tree_destroy - destroy an RB-tree.
				1324	* @root: the root of the tree to destroy
				1325	*/
				1326	static void tree_destroy(struct rb_root *root)
				1327	{
				1328	struct rb_node *rb;
				1329	struct ubi_wl_entry *e;
				1330
				1331	rb = root->rb_node;
				1332	while (rb) {
				1333	if (rb->rb_left)
				1334	rb = rb->rb_left;
				1335	else if (rb->rb_right)
				1336	rb = rb->rb_right;
				1337	else {
				1338	e = rb_entry(rb, struct ubi_wl_entry, u.rb);
				1339
				1340	rb = rb_parent(rb);
				1341	if (rb) {
				1342	if (rb->rb_left == &e->u.rb)
				1343	rb->rb_left = NULL;
				1344	else
				1345	rb->rb_right = NULL;
				1346	}
				1347
				1348	kmem_cache_free(ubi_wl_entry_slab, e);
				1349	}
				1350	}
				1351	}
				1352
				1353	/**
				1354	* ubi_thread - UBI background thread.
				1355	* @u: the UBI device description object pointer
				1356	*/
				1357	int ubi_thread(void *u)
				1358	{
				1359	int failures = 0;
				1360	struct ubi_device *ubi = u;
				1361
				1362	ubi_msg("background thread \"%s\" started, PID %d",
				1363	ubi->bgt_name, task_pid_nr(current));
				1364
				1365	set_freezable();
				1366	for (;;) {
				1367	int err;
				1368
				1369	if (kthread_should_stop())
				1370	break;
				1371
				1372	if (try_to_freeze())
				1373	continue;
				1374
				1375	spin_lock(&ubi->wl_lock);
				1376	if (list_empty(&ubi->works) \|\| ubi->ro_mode \|\|
				1377	!ubi->thread_enabled \|\| ubi_dbg_is_bgt_disabled(ubi)) {
				1378	set_current_state(TASK_INTERRUPTIBLE);
				1379	spin_unlock(&ubi->wl_lock);
				1380	schedule();
				1381	continue;
				1382	}
				1383	spin_unlock(&ubi->wl_lock);
				1384
				1385	err = do_work(ubi);
				1386	if (err) {
				1387	ubi_err("%s: work failed with error code %d",
				1388	ubi->bgt_name, err);
				1389	if (failures++ > WL_MAX_FAILURES) {
				1390	/*
				1391	* Too many failures, disable the thread and
				1392	* switch to read-only mode.
				1393	*/
				1394	ubi_msg("%s: %d consecutive failures",
				1395	ubi->bgt_name, WL_MAX_FAILURES);
				1396	ubi_ro_mode(ubi);
				1397	ubi->thread_enabled = 0;
				1398	continue;
				1399	}
				1400	} else
				1401	failures = 0;
				1402
				1403	cond_resched();
				1404	}
				1405
				1406	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
				1407	return 0;
				1408	}
				1409
				1410	/**
				1411	* cancel_pending - cancel all pending works.
				1412	* @ubi: UBI device description object
				1413	*/
				1414	static void cancel_pending(struct ubi_device *ubi)
				1415	{
				1416	while (!list_empty(&ubi->works)) {
				1417	struct ubi_work *wrk;
				1418
				1419	wrk = list_entry(ubi->works.next, struct ubi_work, list);
				1420	list_del(&wrk->list);
				1421	wrk->func(ubi, wrk, 1);
				1422	ubi->works_count -= 1;
				1423	ubi_assert(ubi->works_count >= 0);
				1424	}
				1425	}
				1426
				1427	/**
				1428	* ubi_wl_init_scan - initialize the WL sub-system using scanning information.
				1429	* @ubi: UBI device description object
				1430	* @si: scanning information
				1431	*
				1432	* This function returns zero in case of success, and a negative error code in
				1433	* case of failure.
				1434	*/
				1435	int ubi_wl_init_scan(struct ubi_device ubi, struct ubi_scan_info si)
				1436	{
				1437	int err, i;
				1438	struct rb_node rb1, rb2;
				1439	struct ubi_scan_volume *sv;
				1440	struct ubi_scan_leb seb, tmp;
				1441	struct ubi_wl_entry *e;
				1442
				1443	ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT;
				1444	spin_lock_init(&ubi->wl_lock);
				1445	mutex_init(&ubi->move_mutex);
				1446	init_rwsem(&ubi->work_sem);
				1447	ubi->max_ec = si->max_ec;
				1448	INIT_LIST_HEAD(&ubi->works);
				1449
				1450	snprintf(ubi->bgt_name, sizeof(ubi->bgt_name), UBI_BGT_NAME_PATTERN, ubi->ubi_num);
				1451
				1452	err = -ENOMEM;
				1453	ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL);
				1454	if (!ubi->lookuptbl)
				1455	return err;
				1456
				1457	for (i = 0; i < UBI_PROT_QUEUE_LEN; i++)
				1458	INIT_LIST_HEAD(&ubi->pq[i]);
				1459	ubi->pq_head = 0;
				1460
				1461	list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
				1462	cond_resched();
				1463
				1464	e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
				1465	if (!e)
				1466	goto out_free;
				1467
				1468	e->pnum = seb->pnum;
				1469	e->ec = seb->ec;
				1470	ubi->lookuptbl[e->pnum] = e;
				1471	if (schedule_erase(ubi, e, 0)) {
				1472	kmem_cache_free(ubi_wl_entry_slab, e);
				1473	goto out_free;
				1474	}
				1475	}
				1476
				1477	list_for_each_entry(seb, &si->free, u.list) {
				1478	cond_resched();
				1479
				1480	e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
				1481	if (!e)
				1482	goto out_free;
				1483
				1484	e->pnum = seb->pnum;
				1485	e->ec = seb->ec;
				1486	ubi_assert(e->ec >= 0);
				1487	wl_tree_add(e, &ubi->free);
				1488	ubi->lookuptbl[e->pnum] = e;
				1489	}
				1490
				1491	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
				1492	ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
				1493	cond_resched();
				1494
				1495	e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
				1496	if (!e)
				1497	goto out_free;
				1498
				1499	e->pnum = seb->pnum;
				1500	e->ec = seb->ec;
				1501	ubi->lookuptbl[e->pnum] = e;
				1502	if (!seb->scrub) {
				1503	dbg_wl("add PEB %d EC %d to the used tree",
				1504	e->pnum, e->ec);
				1505	wl_tree_add(e, &ubi->used);
				1506	} else {
				1507	dbg_wl("add PEB %d EC %d to the scrub tree",
				1508	e->pnum, e->ec);
				1509	wl_tree_add(e, &ubi->scrub);
				1510	}
				1511	}
				1512	}
				1513
				1514	if (ubi->avail_pebs < WL_RESERVED_PEBS) {
				1515	ubi_err("no enough physical eraseblocks (%d, need %d)",
				1516	ubi->avail_pebs, WL_RESERVED_PEBS);
				1517	if (ubi->corr_peb_count)
				1518	ubi_err("%d PEBs are corrupted and not used",
				1519	ubi->corr_peb_count);
				1520	err = -ENOSPC;
				1521	goto out_free;
				1522	}
				1523	ubi->avail_pebs -= WL_RESERVED_PEBS;
				1524	ubi->rsvd_pebs += WL_RESERVED_PEBS;
				1525
				1526	/* Schedule wear-leveling if needed */
				1527	err = ensure_wear_leveling(ubi);
				1528	if (err)
				1529	goto out_free;
				1530
				1531	return 0;
				1532
				1533	out_free:
				1534	cancel_pending(ubi);
				1535	tree_destroy(&ubi->used);
				1536	tree_destroy(&ubi->free);
				1537	tree_destroy(&ubi->scrub);
				1538	kfree(ubi->lookuptbl);
				1539	return err;
				1540	}
				1541
				1542	/**
				1543	* protection_queue_destroy - destroy the protection queue.
				1544	* @ubi: UBI device description object
				1545	*/
				1546	static void protection_queue_destroy(struct ubi_device *ubi)
				1547	{
				1548	int i;
				1549	struct ubi_wl_entry e, tmp;
				1550
				1551	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) {
				1552	list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
				1553	list_del(&e->u.list);
				1554	kmem_cache_free(ubi_wl_entry_slab, e);
				1555	}
				1556	}
				1557	}
				1558
				1559	/**
				1560	* ubi_wl_close - close the wear-leveling sub-system.
				1561	* @ubi: UBI device description object
				1562	*/
				1563	void ubi_wl_close(struct ubi_device *ubi)
				1564	{
				1565	dbg_wl("close the WL sub-system");
				1566	cancel_pending(ubi);
				1567	protection_queue_destroy(ubi);
				1568	tree_destroy(&ubi->used);
				1569	tree_destroy(&ubi->erroneous);
				1570	tree_destroy(&ubi->free);
				1571	tree_destroy(&ubi->scrub);
				1572	kfree(ubi->lookuptbl);
				1573	}
				1574
				1575	#ifdef CONFIG_MTD_UBI_DEBUG
				1576
				1577	/**
				1578	* paranoid_check_ec - make sure that the erase counter of a PEB is correct.
				1579	* @ubi: UBI device description object
				1580	* @pnum: the physical eraseblock number to check
				1581	* @ec: the erase counter to check
				1582	*
				1583	* This function returns zero if the erase counter of physical eraseblock @pnum
				1584	* is equivalent to @ec, and a negative error code if not or if an error
				1585	* occurred.
				1586	*/
				1587	static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec)
				1588	{
				1589	int err;
				1590	long long read_ec;
				1591	struct ubi_ec_hdr *ec_hdr;
				1592
				1593	if (!ubi->dbg->chk_gen)
				1594	return 0;
				1595
				1596	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
				1597	if (!ec_hdr)
				1598	return -ENOMEM;
				1599
				1600	err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
				1601	if (err && err != UBI_IO_BITFLIPS) {
				1602	/* The header does not have to exist */
				1603	err = 0;
				1604	goto out_free;
				1605	}
				1606
				1607	read_ec = be64_to_cpu(ec_hdr->ec);
				1608	if (ec != read_ec) {
				1609	ubi_err("paranoid check failed for PEB %d", pnum);
				1610	ubi_err("read EC is %lld, should be %d", read_ec, ec);
				1611	ubi_dbg_dump_stack();
				1612	err = 1;
				1613	} else
				1614	err = 0;
				1615
				1616	out_free:
				1617	kfree(ec_hdr);
				1618	return err;
				1619	}
				1620
				1621	/**
				1622	* paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
				1623	* @ubi: UBI device description object
				1624	* @e: the wear-leveling entry to check
				1625	* @root: the root of the tree
				1626	*
				1627	* This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
				1628	* is not.
				1629	*/
				1630	static int paranoid_check_in_wl_tree(const struct ubi_device *ubi,
				1631	struct ubi_wl_entry *e,
				1632	struct rb_root *root)
				1633	{
				1634	if (!ubi->dbg->chk_gen)
				1635	return 0;
				1636
				1637	if (in_wl_tree(e, root))
				1638	return 0;
				1639
				1640	ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ",
				1641	e->pnum, e->ec, root);
				1642	ubi_dbg_dump_stack();
				1643	return -EINVAL;
				1644	}
				1645
				1646	/**
				1647	* paranoid_check_in_pq - check if wear-leveling entry is in the protection
				1648	* queue.
				1649	* @ubi: UBI device description object
				1650	* @e: the wear-leveling entry to check
				1651	*
				1652	* This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
				1653	*/
				1654	static int paranoid_check_in_pq(const struct ubi_device *ubi,
				1655	struct ubi_wl_entry *e)
				1656	{
				1657	struct ubi_wl_entry *p;
				1658	int i;
				1659
				1660	if (!ubi->dbg->chk_gen)
				1661	return 0;
				1662
				1663	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
				1664	list_for_each_entry(p, &ubi->pq[i], u.list)
				1665	if (p == e)
				1666	return 0;
				1667
				1668	ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue",
				1669	e->pnum, e->ec);
				1670	ubi_dbg_dump_stack();
				1671	return -EINVAL;
				1672	}
				1673
				1674	#endif /* CONFIG_MTD_UBI_DEBUG */