Blame - src/kernel/linux/v4.14/drivers/md/bcache/journal.c - T103

blob: 6aafda26903c86708697ffad12d6689d26589c08 [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* bcache journalling code, for btree insertions
				4	*
				5	* Copyright 2012 Google, Inc.
				6	*/
				7
				8	#include "bcache.h"
				9	#include "btree.h"
				10	#include "debug.h"
				11	#include "extents.h"
				12
				13	#include <trace/events/bcache.h>
				14
				15	/*
				16	* Journal replay/recovery:
				17	*
				18	* This code is all driven from run_cache_set(); we first read the journal
				19	* entries, do some other stuff, then we mark all the keys in the journal
				20	* entries (same as garbage collection would), then we replay them - reinserting
				21	* them into the cache in precisely the same order as they appear in the
				22	* journal.
				23	*
				24	* We only journal keys that go in leaf nodes, which simplifies things quite a
				25	* bit.
				26	*/
				27
				28	static void journal_read_endio(struct bio *bio)
				29	{
				30	struct closure *cl = bio->bi_private;
				31	closure_put(cl);
				32	}
				33
				34	static int journal_read_bucket(struct cache ca, struct list_head list,
				35	unsigned bucket_index)
				36	{
				37	struct journal_device *ja = &ca->journal;
				38	struct bio *bio = &ja->bio;
				39
				40	struct journal_replay *i;
				41	struct jset j, data = ca->set->journal.w[0].data;
				42	struct closure cl;
				43	unsigned len, left, offset = 0;
				44	int ret = 0;
				45	sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]);
				46
				47	closure_init_stack(&cl);
				48
				49	pr_debug("reading %u", bucket_index);
				50
				51	while (offset < ca->sb.bucket_size) {
				52	reread: left = ca->sb.bucket_size - offset;
				53	len = min_t(unsigned, left, PAGE_SECTORS << JSET_BITS);
				54
				55	bio_reset(bio);
				56	bio->bi_iter.bi_sector = bucket + offset;
				57	bio_set_dev(bio, ca->bdev);
				58	bio->bi_iter.bi_size = len << 9;
				59
				60	bio->bi_end_io = journal_read_endio;
				61	bio->bi_private = &cl;
				62	bio_set_op_attrs(bio, REQ_OP_READ, 0);
				63	bch_bio_map(bio, data);
				64
				65	closure_bio_submit(bio, &cl);
				66	closure_sync(&cl);
				67
				68	/* This function could be simpler now since we no longer write
				69	* journal entries that overlap bucket boundaries; this means
				70	* the start of a bucket will always have a valid journal entry
				71	* if it has any journal entries at all.
				72	*/
				73
				74	j = data;
				75	while (len) {
				76	struct list_head *where;
				77	size_t blocks, bytes = set_bytes(j);
				78
				79	if (j->magic != jset_magic(&ca->sb)) {
				80	pr_debug("%u: bad magic", bucket_index);
				81	return ret;
				82	}
				83
				84	if (bytes > left << 9 \|\|
				85	bytes > PAGE_SIZE << JSET_BITS) {
				86	pr_info("%u: too big, %zu bytes, offset %u",
				87	bucket_index, bytes, offset);
				88	return ret;
				89	}
				90
				91	if (bytes > len << 9)
				92	goto reread;
				93
				94	if (j->csum != csum_set(j)) {
				95	pr_info("%u: bad csum, %zu bytes, offset %u",
				96	bucket_index, bytes, offset);
				97	return ret;
				98	}
				99
				100	blocks = set_blocks(j, block_bytes(ca->set));
				101
				102	while (!list_empty(list)) {
				103	i = list_first_entry(list,
				104	struct journal_replay, list);
				105	if (i->j.seq >= j->last_seq)
				106	break;
				107	list_del(&i->list);
				108	kfree(i);
				109	}
				110
				111	list_for_each_entry_reverse(i, list, list) {
				112	if (j->seq == i->j.seq)
				113	goto next_set;
				114
				115	if (j->seq < i->j.last_seq)
				116	goto next_set;
				117
				118	if (j->seq > i->j.seq) {
				119	where = &i->list;
				120	goto add;
				121	}
				122	}
				123
				124	where = list;
				125	add:
				126	i = kmalloc(offsetof(struct journal_replay, j) +
				127	bytes, GFP_KERNEL);
				128	if (!i)
				129	return -ENOMEM;
				130	memcpy(&i->j, j, bytes);
				131	list_add(&i->list, where);
				132	ret = 1;
				133
				134	ja->seq[bucket_index] = j->seq;
				135	next_set:
				136	offset += blocks * ca->sb.block_size;
				137	len -= blocks * ca->sb.block_size;
				138	j = ((void ) j) + blocks block_bytes(ca);
				139	}
				140	}
				141
				142	return ret;
				143	}
				144
				145	int bch_journal_read(struct cache_set c, struct list_head list)
				146	{
				147	#define read_bucket(b) \
				148	({ \
				149	int ret = journal_read_bucket(ca, list, b); \
				150	__set_bit(b, bitmap); \
				151	if (ret < 0) \
				152	return ret; \
				153	ret; \
				154	})
				155
				156	struct cache *ca;
				157	unsigned iter;
				158
				159	for_each_cache(ca, c, iter) {
				160	struct journal_device *ja = &ca->journal;
				161	DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS);
				162	unsigned i, l, r, m;
				163	uint64_t seq;
				164
				165	bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
				166	pr_debug("%u journal buckets", ca->sb.njournal_buckets);
				167
				168	/*
				169	* Read journal buckets ordered by golden ratio hash to quickly
				170	* find a sequence of buckets with valid journal entries
				171	*/
				172	for (i = 0; i < ca->sb.njournal_buckets; i++) {
				173	l = (i * 2654435769U) % ca->sb.njournal_buckets;
				174
				175	if (test_bit(l, bitmap))
				176	break;
				177
				178	if (read_bucket(l))
				179	goto bsearch;
				180	}
				181
				182	/*
				183	* If that fails, check all the buckets we haven't checked
				184	* already
				185	*/
				186	pr_debug("falling back to linear search");
				187
				188	for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
				189	l < ca->sb.njournal_buckets;
				190	l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
				191	if (read_bucket(l))
				192	goto bsearch;
				193
				194	/* no journal entries on this device? */
				195	if (l == ca->sb.njournal_buckets)
				196	continue;
				197	bsearch:
				198	BUG_ON(list_empty(list));
				199
				200	/* Binary search */
				201	m = l;
				202	r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
				203	pr_debug("starting binary search, l %u r %u", l, r);
				204
				205	while (l + 1 < r) {
				206	seq = list_entry(list->prev, struct journal_replay,
				207	list)->j.seq;
				208
				209	m = (l + r) >> 1;
				210	read_bucket(m);
				211
				212	if (seq != list_entry(list->prev, struct journal_replay,
				213	list)->j.seq)
				214	l = m;
				215	else
				216	r = m;
				217	}
				218
				219	/*
				220	* Read buckets in reverse order until we stop finding more
				221	* journal entries
				222	*/
				223	pr_debug("finishing up: m %u njournal_buckets %u",
				224	m, ca->sb.njournal_buckets);
				225	l = m;
				226
				227	while (1) {
				228	if (!l--)
				229	l = ca->sb.njournal_buckets - 1;
				230
				231	if (l == m)
				232	break;
				233
				234	if (test_bit(l, bitmap))
				235	continue;
				236
				237	if (!read_bucket(l))
				238	break;
				239	}
				240
				241	seq = 0;
				242
				243	for (i = 0; i < ca->sb.njournal_buckets; i++)
				244	if (ja->seq[i] > seq) {
				245	seq = ja->seq[i];
				246	/*
				247	* When journal_reclaim() goes to allocate for
				248	* the first time, it'll use the bucket after
				249	* ja->cur_idx
				250	*/
				251	ja->cur_idx = i;
				252	ja->last_idx = ja->discard_idx = (i + 1) %
				253	ca->sb.njournal_buckets;
				254
				255	}
				256	}
				257
				258	if (!list_empty(list))
				259	c->journal.seq = list_entry(list->prev,
				260	struct journal_replay,
				261	list)->j.seq;
				262
				263	return 0;
				264	#undef read_bucket
				265	}
				266
				267	void bch_journal_mark(struct cache_set c, struct list_head list)
				268	{
				269	atomic_t p = { 0 };
				270	struct bkey *k;
				271	struct journal_replay *i;
				272	struct journal *j = &c->journal;
				273	uint64_t last = j->seq;
				274
				275	/*
				276	* journal.pin should never fill up - we never write a journal
				277	* entry when it would fill up. But if for some reason it does, we
				278	* iterate over the list in reverse order so that we can just skip that
				279	* refcount instead of bugging.
				280	*/
				281
				282	list_for_each_entry_reverse(i, list, list) {
				283	BUG_ON(last < i->j.seq);
				284	i->pin = NULL;
				285
				286	while (last-- != i->j.seq)
				287	if (fifo_free(&j->pin) > 1) {
				288	fifo_push_front(&j->pin, p);
				289	atomic_set(&fifo_front(&j->pin), 0);
				290	}
				291
				292	if (fifo_free(&j->pin) > 1) {
				293	fifo_push_front(&j->pin, p);
				294	i->pin = &fifo_front(&j->pin);
				295	atomic_set(i->pin, 1);
				296	}
				297
				298	for (k = i->j.start;
				299	k < bset_bkey_last(&i->j);
				300	k = bkey_next(k))
				301	if (!__bch_extent_invalid(c, k)) {
				302	unsigned j;
				303
				304	for (j = 0; j < KEY_PTRS(k); j++)
				305	if (ptr_available(c, k, j))
				306	atomic_inc(&PTR_BUCKET(c, k, j)->pin);
				307
				308	bch_initial_mark_key(c, 0, k);
				309	}
				310	}
				311	}
				312
				313	bool is_discard_enabled(struct cache_set *s)
				314	{
				315	struct cache *ca;
				316	unsigned int i;
				317
				318	for_each_cache(ca, s, i)
				319	if (ca->discard)
				320	return true;
				321
				322	return false;
				323	}
				324
				325	int bch_journal_replay(struct cache_set s, struct list_head list)
				326	{
				327	int ret = 0, keys = 0, entries = 0;
				328	struct bkey *k;
				329	struct journal_replay *i =
				330	list_entry(list->prev, struct journal_replay, list);
				331
				332	uint64_t start = i->j.last_seq, end = i->j.seq, n = start;
				333	struct keylist keylist;
				334
				335	list_for_each_entry(i, list, list) {
				336	BUG_ON(i->pin && atomic_read(i->pin) != 1);
				337
				338	if (n != i->j.seq) {
				339	if (n == start && is_discard_enabled(s))
				340	pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)",
				341	n, i->j.seq - 1, start, end);
				342	else {
				343	pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)",
				344	n, i->j.seq - 1, start, end);
				345	ret = -EIO;
				346	goto err;
				347	}
				348	}
				349
				350	for (k = i->j.start;
				351	k < bset_bkey_last(&i->j);
				352	k = bkey_next(k)) {
				353	trace_bcache_journal_replay_key(k);
				354
				355	bch_keylist_init_single(&keylist, k);
				356
				357	ret = bch_btree_insert(s, &keylist, i->pin, NULL);
				358	if (ret)
				359	goto err;
				360
				361	BUG_ON(!bch_keylist_empty(&keylist));
				362	keys++;
				363
				364	cond_resched();
				365	}
				366
				367	if (i->pin)
				368	atomic_dec(i->pin);
				369	n = i->j.seq + 1;
				370	entries++;
				371	}
				372
				373	pr_info("journal replay done, %i keys in %i entries, seq %llu",
				374	keys, entries, end);
				375	err:
				376	while (!list_empty(list)) {
				377	i = list_first_entry(list, struct journal_replay, list);
				378	list_del(&i->list);
				379	kfree(i);
				380	}
				381
				382	return ret;
				383	}
				384
				385	/* Journalling */
				386
				387	static void btree_flush_write(struct cache_set *c)
				388	{
				389	/*
				390	* Try to find the btree node with that references the oldest journal
				391	* entry, best is our current candidate and is locked if non NULL:
				392	*/
				393	struct btree b, best;
				394	unsigned i;
				395	retry:
				396	best = NULL;
				397
				398	for_each_cached_btree(b, c, i)
				399	if (btree_current_write(b)->journal) {
				400	if (!best)
				401	best = b;
				402	else if (journal_pin_cmp(c,
				403	btree_current_write(best)->journal,
				404	btree_current_write(b)->journal)) {
				405	best = b;
				406	}
				407	}
				408
				409	b = best;
				410	if (b) {
				411	mutex_lock(&b->write_lock);
				412	if (!btree_current_write(b)->journal) {
				413	mutex_unlock(&b->write_lock);
				414	/* We raced */
				415	goto retry;
				416	}
				417
				418	__bch_btree_node_write(b, NULL);
				419	mutex_unlock(&b->write_lock);
				420	}
				421	}
				422
				423	#define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1)
				424
				425	static void journal_discard_endio(struct bio *bio)
				426	{
				427	struct journal_device *ja =
				428	container_of(bio, struct journal_device, discard_bio);
				429	struct cache *ca = container_of(ja, struct cache, journal);
				430
				431	atomic_set(&ja->discard_in_flight, DISCARD_DONE);
				432
				433	closure_wake_up(&ca->set->journal.wait);
				434	closure_put(&ca->set->cl);
				435	}
				436
				437	static void journal_discard_work(struct work_struct *work)
				438	{
				439	struct journal_device *ja =
				440	container_of(work, struct journal_device, discard_work);
				441
				442	submit_bio(&ja->discard_bio);
				443	}
				444
				445	static void do_journal_discard(struct cache *ca)
				446	{
				447	struct journal_device *ja = &ca->journal;
				448	struct bio *bio = &ja->discard_bio;
				449
				450	if (!ca->discard) {
				451	ja->discard_idx = ja->last_idx;
				452	return;
				453	}
				454
				455	switch (atomic_read(&ja->discard_in_flight)) {
				456	case DISCARD_IN_FLIGHT:
				457	return;
				458
				459	case DISCARD_DONE:
				460	ja->discard_idx = (ja->discard_idx + 1) %
				461	ca->sb.njournal_buckets;
				462
				463	atomic_set(&ja->discard_in_flight, DISCARD_READY);
				464	/* fallthrough */
				465
				466	case DISCARD_READY:
				467	if (ja->discard_idx == ja->last_idx)
				468	return;
				469
				470	atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
				471
				472	bio_init(bio, bio->bi_inline_vecs, 1);
				473	bio_set_op_attrs(bio, REQ_OP_DISCARD, 0);
				474	bio->bi_iter.bi_sector = bucket_to_sector(ca->set,
				475	ca->sb.d[ja->discard_idx]);
				476	bio_set_dev(bio, ca->bdev);
				477	bio->bi_iter.bi_size = bucket_bytes(ca);
				478	bio->bi_end_io = journal_discard_endio;
				479
				480	closure_get(&ca->set->cl);
				481	INIT_WORK(&ja->discard_work, journal_discard_work);
				482	schedule_work(&ja->discard_work);
				483	}
				484	}
				485
				486	static void journal_reclaim(struct cache_set *c)
				487	{
				488	struct bkey *k = &c->journal.key;
				489	struct cache *ca;
				490	uint64_t last_seq;
				491	unsigned iter, n = 0;
				492	atomic_t p;
				493
				494	while (!atomic_read(&fifo_front(&c->journal.pin)))
				495	fifo_pop(&c->journal.pin, p);
				496
				497	last_seq = last_seq(&c->journal);
				498
				499	/* Update last_idx */
				500
				501	for_each_cache(ca, c, iter) {
				502	struct journal_device *ja = &ca->journal;
				503
				504	while (ja->last_idx != ja->cur_idx &&
				505	ja->seq[ja->last_idx] < last_seq)
				506	ja->last_idx = (ja->last_idx + 1) %
				507	ca->sb.njournal_buckets;
				508	}
				509
				510	for_each_cache(ca, c, iter)
				511	do_journal_discard(ca);
				512
				513	if (c->journal.blocks_free)
				514	goto out;
				515
				516	/*
				517	* Allocate:
				518	* XXX: Sort by free journal space
				519	*/
				520
				521	for_each_cache(ca, c, iter) {
				522	struct journal_device *ja = &ca->journal;
				523	unsigned next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
				524
				525	/* No space available on this device */
				526	if (next == ja->discard_idx)
				527	continue;
				528
				529	ja->cur_idx = next;
				530	k->ptr[n++] = MAKE_PTR(0,
				531	bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
				532	ca->sb.nr_this_dev);
				533	}
				534
				535	if (n) {
				536	bkey_init(k);
				537	SET_KEY_PTRS(k, n);
				538	c->journal.blocks_free = c->sb.bucket_size >> c->block_bits;
				539	}
				540	out:
				541	if (!journal_full(&c->journal))
				542	__closure_wake_up(&c->journal.wait);
				543	}
				544
				545	void bch_journal_next(struct journal *j)
				546	{
				547	atomic_t p = { 1 };
				548
				549	j->cur = (j->cur == j->w)
				550	? &j->w[1]
				551	: &j->w[0];
				552
				553	/*
				554	* The fifo_push() needs to happen at the same time as j->seq is
				555	* incremented for last_seq() to be calculated correctly
				556	*/
				557	BUG_ON(!fifo_push(&j->pin, p));
				558	atomic_set(&fifo_back(&j->pin), 1);
				559
				560	j->cur->data->seq = ++j->seq;
				561	j->cur->dirty = false;
				562	j->cur->need_write = false;
				563	j->cur->data->keys = 0;
				564
				565	if (fifo_full(&j->pin))
				566	pr_debug("journal_pin full (%zu)", fifo_used(&j->pin));
				567	}
				568
				569	static void journal_write_endio(struct bio *bio)
				570	{
				571	struct journal_write *w = bio->bi_private;
				572
				573	cache_set_err_on(bio->bi_status, w->c, "journal io error");
				574	closure_put(&w->c->journal.io);
				575	}
				576
				577	static void journal_write(struct closure *);
				578
				579	static void journal_write_done(struct closure *cl)
				580	{
				581	struct journal *j = container_of(cl, struct journal, io);
				582	struct journal_write *w = (j->cur == j->w)
				583	? &j->w[1]
				584	: &j->w[0];
				585
				586	__closure_wake_up(&w->wait);
				587	continue_at_nobarrier(cl, journal_write, system_wq);
				588	}
				589
				590	static void journal_write_unlock(struct closure *cl)
				591	{
				592	struct cache_set *c = container_of(cl, struct cache_set, journal.io);
				593
				594	c->journal.io_in_flight = 0;
				595	spin_unlock(&c->journal.lock);
				596	}
				597
				598	static void journal_write_unlocked(struct closure *cl)
				599	__releases(c->journal.lock)
				600	{
				601	struct cache_set *c = container_of(cl, struct cache_set, journal.io);
				602	struct cache *ca;
				603	struct journal_write *w = c->journal.cur;
				604	struct bkey *k = &c->journal.key;
				605	unsigned i, sectors = set_blocks(w->data, block_bytes(c)) *
				606	c->sb.block_size;
				607
				608	struct bio *bio;
				609	struct bio_list list;
				610	bio_list_init(&list);
				611
				612	if (!w->need_write) {
				613	closure_return_with_destructor(cl, journal_write_unlock);
				614	return;
				615	} else if (journal_full(&c->journal)) {
				616	journal_reclaim(c);
				617	spin_unlock(&c->journal.lock);
				618
				619	btree_flush_write(c);
				620	continue_at(cl, journal_write, system_wq);
				621	return;
				622	}
				623
				624	c->journal.blocks_free -= set_blocks(w->data, block_bytes(c));
				625
				626	w->data->btree_level = c->root->level;
				627
				628	bkey_copy(&w->data->btree_root, &c->root->key);
				629	bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket);
				630
				631	for_each_cache(ca, c, i)
				632	w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
				633
				634	w->data->magic = jset_magic(&c->sb);
				635	w->data->version = BCACHE_JSET_VERSION;
				636	w->data->last_seq = last_seq(&c->journal);
				637	w->data->csum = csum_set(w->data);
				638
				639	for (i = 0; i < KEY_PTRS(k); i++) {
				640	ca = PTR_CACHE(c, k, i);
				641	bio = &ca->journal.bio;
				642
				643	atomic_long_add(sectors, &ca->meta_sectors_written);
				644
				645	bio_reset(bio);
				646	bio->bi_iter.bi_sector = PTR_OFFSET(k, i);
				647	bio_set_dev(bio, ca->bdev);
				648	bio->bi_iter.bi_size = sectors << 9;
				649
				650	bio->bi_end_io = journal_write_endio;
				651	bio->bi_private = w;
				652	bio_set_op_attrs(bio, REQ_OP_WRITE,
				653	REQ_SYNC\|REQ_META\|REQ_PREFLUSH\|REQ_FUA);
				654	bch_bio_map(bio, w->data);
				655
				656	trace_bcache_journal_write(bio);
				657	bio_list_add(&list, bio);
				658
				659	SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + sectors);
				660
				661	ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
				662	}
				663
				664	/* If KEY_PTRS(k) == 0, this jset gets lost in air */
				665	BUG_ON(i == 0);
				666
				667	atomic_dec_bug(&fifo_back(&c->journal.pin));
				668	bch_journal_next(&c->journal);
				669	journal_reclaim(c);
				670
				671	spin_unlock(&c->journal.lock);
				672
				673	while ((bio = bio_list_pop(&list)))
				674	closure_bio_submit(bio, cl);
				675
				676	continue_at(cl, journal_write_done, NULL);
				677	}
				678
				679	static void journal_write(struct closure *cl)
				680	{
				681	struct cache_set *c = container_of(cl, struct cache_set, journal.io);
				682
				683	spin_lock(&c->journal.lock);
				684	journal_write_unlocked(cl);
				685	}
				686
				687	static void journal_try_write(struct cache_set *c)
				688	__releases(c->journal.lock)
				689	{
				690	struct closure *cl = &c->journal.io;
				691	struct journal_write *w = c->journal.cur;
				692
				693	w->need_write = true;
				694
				695	if (!c->journal.io_in_flight) {
				696	c->journal.io_in_flight = 1;
				697	closure_call(cl, journal_write_unlocked, NULL, &c->cl);
				698	} else {
				699	spin_unlock(&c->journal.lock);
				700	}
				701	}
				702
				703	static struct journal_write journal_wait_for_write(struct cache_set c,
				704	unsigned nkeys)
				705	{
				706	size_t sectors;
				707	struct closure cl;
				708	bool wait = false;
				709
				710	closure_init_stack(&cl);
				711
				712	spin_lock(&c->journal.lock);
				713
				714	while (1) {
				715	struct journal_write *w = c->journal.cur;
				716
				717	sectors = __set_blocks(w->data, w->data->keys + nkeys,
				718	block_bytes(c)) * c->sb.block_size;
				719
				720	if (sectors <= min_t(size_t,
				721	c->journal.blocks_free * c->sb.block_size,
				722	PAGE_SECTORS << JSET_BITS))
				723	return w;
				724
				725	if (wait)
				726	closure_wait(&c->journal.wait, &cl);
				727
				728	if (!journal_full(&c->journal)) {
				729	if (wait)
				730	trace_bcache_journal_entry_full(c);
				731
				732	/*
				733	* XXX: If we were inserting so many keys that they
				734	* won't fit in an _empty_ journal write, we'll
				735	* deadlock. For now, handle this in
				736	* bch_keylist_realloc() - but something to think about.
				737	*/
				738	BUG_ON(!w->data->keys);
				739
				740	journal_try_write(c); /* unlocks */
				741	} else {
				742	if (wait)
				743	trace_bcache_journal_full(c);
				744
				745	journal_reclaim(c);
				746	spin_unlock(&c->journal.lock);
				747
				748	btree_flush_write(c);
				749	}
				750
				751	closure_sync(&cl);
				752	spin_lock(&c->journal.lock);
				753	wait = true;
				754	}
				755	}
				756
				757	static void journal_write_work(struct work_struct *work)
				758	{
				759	struct cache_set *c = container_of(to_delayed_work(work),
				760	struct cache_set,
				761	journal.work);
				762	spin_lock(&c->journal.lock);
				763	if (c->journal.cur->dirty)
				764	journal_try_write(c);
				765	else
				766	spin_unlock(&c->journal.lock);
				767	}
				768
				769	/*
				770	* Entry point to the journalling code - bio_insert() and btree_invalidate()
				771	* pass bch_journal() a list of keys to be journalled, and then
				772	* bch_journal() hands those same keys off to btree_insert_async()
				773	*/
				774
				775	atomic_t bch_journal(struct cache_set c,
				776	struct keylist *keys,
				777	struct closure *parent)
				778	{
				779	struct journal_write *w;
				780	atomic_t *ret;
				781
				782	if (!CACHE_SYNC(&c->sb))
				783	return NULL;
				784
				785	w = journal_wait_for_write(c, bch_keylist_nkeys(keys));
				786
				787	memcpy(bset_bkey_last(w->data), keys->keys, bch_keylist_bytes(keys));
				788	w->data->keys += bch_keylist_nkeys(keys);
				789
				790	ret = &fifo_back(&c->journal.pin);
				791	atomic_inc(ret);
				792
				793	if (parent) {
				794	closure_wait(&w->wait, parent);
				795	journal_try_write(c);
				796	} else if (!w->dirty) {
				797	w->dirty = true;
				798	schedule_delayed_work(&c->journal.work,
				799	msecs_to_jiffies(c->journal_delay_ms));
				800	spin_unlock(&c->journal.lock);
				801	} else {
				802	spin_unlock(&c->journal.lock);
				803	}
				804
				805
				806	return ret;
				807	}
				808
				809	void bch_journal_meta(struct cache_set c, struct closure cl)
				810	{
				811	struct keylist keys;
				812	atomic_t *ref;
				813
				814	bch_keylist_init(&keys);
				815
				816	ref = bch_journal(c, &keys, cl);
				817	if (ref)
				818	atomic_dec_bug(ref);
				819	}
				820
				821	void bch_journal_free(struct cache_set *c)
				822	{
				823	free_pages((unsigned long) c->journal.w[1].data, JSET_BITS);
				824	free_pages((unsigned long) c->journal.w[0].data, JSET_BITS);
				825	free_fifo(&c->journal.pin);
				826	}
				827
				828	int bch_journal_alloc(struct cache_set *c)
				829	{
				830	struct journal *j = &c->journal;
				831
				832	spin_lock_init(&j->lock);
				833	INIT_DELAYED_WORK(&j->work, journal_write_work);
				834
				835	c->journal_delay_ms = 100;
				836
				837	j->w[0].c = c;
				838	j->w[1].c = c;
				839
				840	if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) \|\|
				841	!(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL\|__GFP_COMP, JSET_BITS)) \|\|
				842	!(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL\|__GFP_COMP, JSET_BITS)))
				843	return -ENOMEM;
				844
				845	return 0;
				846	}