Skip to content

Commit e288e93

Browse files
committed
Merge branch 'bcache' (bcache fixes from Kent Overstreet)
Merge bcache fixes from Kent Overstreet: "There's fixes for _three_ different data corruption bugs, all of which were found by users hitting them in the wild. The first one isn't bcache specific - in 3.11 bcache was switched to the bio_copy_data in fs/bio.c, and that's when the bug in that code was discovered, but it's also used by raid1 and pktcdvd. (That was my code too, so the bug's doubly embarassing given that it was or should've been just a cut and paste from bcache code. Dunno what happened there). Most of these (all the non data corruption bugs, actually) were ready before the merge window and have been sitting in Jens' tree, but I don't know what's been up with him lately..." * emailed patches from Kent Overstreet <[email protected]>: bcache: Fix flushes in writeback mode bcache: Fix for handling overlapping extents when reading in a btree node bcache: Fix a shrinker deadlock bcache: Fix a dumb CPU spinning bug in writeback bcache: Fix a flush/fua performance bug bcache: Fix a writeback performance regression bcache: Correct printf()-style format length modifier bcache: Fix for when no journal entries are found bcache: Strip endline when writing the label through sysfs bcache: Fix a dumb journal discard bug block: Fix bio_copy_data()
2 parents db6aaf4 + c0f04d8 commit e288e93

File tree

10 files changed

+110
-66
lines changed

10 files changed

+110
-66
lines changed

drivers/md/bcache/bcache.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ struct cached_dev {
498498
*/
499499
atomic_t has_dirty;
500500

501-
struct ratelimit writeback_rate;
501+
struct bch_ratelimit writeback_rate;
502502
struct delayed_work writeback_rate_update;
503503

504504
/*
@@ -507,10 +507,9 @@ struct cached_dev {
507507
*/
508508
sector_t last_read;
509509

510-
/* Number of writeback bios in flight */
511-
atomic_t in_flight;
510+
/* Limit number of writeback bios in flight */
511+
struct semaphore in_flight;
512512
struct closure_with_timer writeback;
513-
struct closure_waitlist writeback_wait;
514513

515514
struct keybuf writeback_keys;
516515

drivers/md/bcache/bset.c

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
926926

927927
/* Mergesort */
928928

929+
static void sort_key_next(struct btree_iter *iter,
930+
struct btree_iter_set *i)
931+
{
932+
i->k = bkey_next(i->k);
933+
934+
if (i->k == i->end)
935+
*i = iter->data[--iter->used];
936+
}
937+
929938
static void btree_sort_fixup(struct btree_iter *iter)
930939
{
931940
while (iter->used > 1) {
932941
struct btree_iter_set *top = iter->data, *i = top + 1;
933-
struct bkey *k;
934942

935943
if (iter->used > 2 &&
936944
btree_iter_cmp(i[0], i[1]))
937945
i++;
938946

939-
for (k = i->k;
940-
k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0;
941-
k = bkey_next(k))
942-
if (top->k > i->k)
943-
__bch_cut_front(top->k, k);
944-
else if (KEY_SIZE(k))
945-
bch_cut_back(&START_KEY(k), top->k);
946-
947-
if (top->k < i->k || k == i->k)
947+
if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
948948
break;
949949

950-
heap_sift(iter, i - top, btree_iter_cmp);
950+
if (!KEY_SIZE(i->k)) {
951+
sort_key_next(iter, i);
952+
heap_sift(iter, i - top, btree_iter_cmp);
953+
continue;
954+
}
955+
956+
if (top->k > i->k) {
957+
if (bkey_cmp(top->k, i->k) >= 0)
958+
sort_key_next(iter, i);
959+
else
960+
bch_cut_front(top->k, i->k);
961+
962+
heap_sift(iter, i - top, btree_iter_cmp);
963+
} else {
964+
/* can't happen because of comparison func */
965+
BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
966+
bch_cut_back(&START_KEY(i->k), top->k);
967+
}
951968
}
952969
}
953970

drivers/md/bcache/btree.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b)
255255

256256
return;
257257
err:
258-
bch_cache_set_error(b->c, "io error reading bucket %lu",
258+
bch_cache_set_error(b->c, "io error reading bucket %zu",
259259
PTR_BUCKET_NR(b->c, &b->key, 0));
260260
}
261261

@@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
612612
return SHRINK_STOP;
613613

614614
/* Return -1 if we can't do anything right now */
615-
if (sc->gfp_mask & __GFP_WAIT)
615+
if (sc->gfp_mask & __GFP_IO)
616616
mutex_lock(&c->bucket_lock);
617617
else if (!mutex_trylock(&c->bucket_lock))
618618
return -1;

drivers/md/bcache/journal.c

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
153153
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
154154
pr_debug("%u journal buckets", ca->sb.njournal_buckets);
155155

156-
/* Read journal buckets ordered by golden ratio hash to quickly
156+
/*
157+
* Read journal buckets ordered by golden ratio hash to quickly
157158
* find a sequence of buckets with valid journal entries
158159
*/
159160
for (i = 0; i < ca->sb.njournal_buckets; i++) {
@@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
166167
goto bsearch;
167168
}
168169

169-
/* If that fails, check all the buckets we haven't checked
170+
/*
171+
* If that fails, check all the buckets we haven't checked
170172
* already
171173
*/
172174
pr_debug("falling back to linear search");
173175

174-
for (l = 0; l < ca->sb.njournal_buckets; l++) {
175-
if (test_bit(l, bitmap))
176-
continue;
177-
176+
for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
177+
l < ca->sb.njournal_buckets;
178+
l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
178179
if (read_bucket(l))
179180
goto bsearch;
180-
}
181+
182+
if (list_empty(list))
183+
continue;
181184
bsearch:
182185
/* Binary search */
183186
m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
@@ -197,10 +200,12 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
197200
r = m;
198201
}
199202

200-
/* Read buckets in reverse order until we stop finding more
203+
/*
204+
* Read buckets in reverse order until we stop finding more
201205
* journal entries
202206
*/
203-
pr_debug("finishing up");
207+
pr_debug("finishing up: m %u njournal_buckets %u",
208+
m, ca->sb.njournal_buckets);
204209
l = m;
205210

206211
while (1) {
@@ -228,9 +233,10 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
228233
}
229234
}
230235

231-
c->journal.seq = list_entry(list->prev,
232-
struct journal_replay,
233-
list)->j.seq;
236+
if (!list_empty(list))
237+
c->journal.seq = list_entry(list->prev,
238+
struct journal_replay,
239+
list)->j.seq;
234240

235241
return 0;
236242
#undef read_bucket
@@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca)
428434
return;
429435
}
430436

431-
switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) {
437+
switch (atomic_read(&ja->discard_in_flight)) {
432438
case DISCARD_IN_FLIGHT:
433439
return;
434440

@@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl)
689695
if (cl)
690696
BUG_ON(!closure_wait(&w->wait, cl));
691697

698+
closure_flush(&c->journal.io);
692699
__journal_try_write(c, true);
693700
}
694701
}

drivers/md/bcache/request.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -997,14 +997,17 @@ static void request_write(struct cached_dev *dc, struct search *s)
997997
} else {
998998
bch_writeback_add(dc);
999999

1000-
if (s->op.flush_journal) {
1000+
if (bio->bi_rw & REQ_FLUSH) {
10011001
/* Also need to send a flush to the backing device */
1002-
s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
1003-
dc->disk.bio_split);
1002+
struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
1003+
dc->disk.bio_split);
10041004

1005-
bio->bi_size = 0;
1006-
bio->bi_vcnt = 0;
1007-
closure_bio_submit(bio, cl, s->d);
1005+
flush->bi_rw = WRITE_FLUSH;
1006+
flush->bi_bdev = bio->bi_bdev;
1007+
flush->bi_end_io = request_endio;
1008+
flush->bi_private = cl;
1009+
1010+
closure_bio_submit(flush, cl, s->d);
10081011
} else {
10091012
s->op.cache_bio = bio;
10101013
}

drivers/md/bcache/sysfs.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,13 @@ STORE(__cached_dev)
223223
}
224224

225225
if (attr == &sysfs_label) {
226-
/* note: endlines are preserved */
227-
memcpy(dc->sb.label, buf, SB_LABEL_SIZE);
226+
if (size > SB_LABEL_SIZE)
227+
return -EINVAL;
228+
memcpy(dc->sb.label, buf, size);
229+
if (size < SB_LABEL_SIZE)
230+
dc->sb.label[size] = '\0';
231+
if (size && dc->sb.label[size - 1] == '\n')
232+
dc->sb.label[size - 1] = '\0';
228233
bch_write_bdev_super(dc, NULL);
229234
if (dc->disk.c) {
230235
memcpy(dc->disk.c->uuids[dc->disk.id].label,

drivers/md/bcache/util.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
190190
stats->last = now ?: 1;
191191
}
192192

193-
unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
193+
/**
194+
* bch_next_delay() - increment @d by the amount of work done, and return how
195+
* long to delay until the next time to do some work.
196+
*
197+
* @d - the struct bch_ratelimit to update
198+
* @done - the amount of work done, in arbitrary units
199+
*
200+
* Returns the amount of time to delay by, in jiffies
201+
*/
202+
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
194203
{
195204
uint64_t now = local_clock();
196205

drivers/md/bcache/util.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
450450
(ewma) >> factor; \
451451
})
452452

453-
struct ratelimit {
453+
struct bch_ratelimit {
454+
/* Next time we want to do some work, in nanoseconds */
454455
uint64_t next;
456+
457+
/*
458+
* Rate at which we want to do work, in units per nanosecond
459+
* The units here correspond to the units passed to bch_next_delay()
460+
*/
455461
unsigned rate;
456462
};
457463

458-
static inline void ratelimit_reset(struct ratelimit *d)
464+
static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
459465
{
460466
d->next = local_clock();
461467
}
462468

463-
unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
469+
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
464470

465471
#define __DIV_SAFE(n, d, zero) \
466472
({ \

drivers/md/bcache/writeback.c

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
9494

9595
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
9696
{
97+
uint64_t ret;
98+
9799
if (atomic_read(&dc->disk.detaching) ||
98100
!dc->writeback_percent)
99101
return 0;
100102

101-
return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
103+
ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
104+
105+
return min_t(uint64_t, ret, HZ);
102106
}
103107

104108
/* Background writeback */
@@ -208,7 +212,7 @@ static void refill_dirty(struct closure *cl)
208212

209213
up_write(&dc->writeback_lock);
210214

211-
ratelimit_reset(&dc->writeback_rate);
215+
bch_ratelimit_reset(&dc->writeback_rate);
212216

213217
/* Punt to workqueue only so we don't recurse and blow the stack */
214218
continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
318322
}
319323

320324
bch_keybuf_del(&dc->writeback_keys, w);
321-
atomic_dec_bug(&dc->in_flight);
322-
323-
closure_wake_up(&dc->writeback_wait);
325+
up(&dc->in_flight);
324326

325327
closure_return_with_destructor(cl, dirty_io_destructor);
326328
}
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)
349351

350352
closure_bio_submit(&io->bio, cl, &io->dc->disk);
351353

352-
continue_at(cl, write_dirty_finish, dirty_wq);
354+
continue_at(cl, write_dirty_finish, system_wq);
353355
}
354356

355357
static void read_dirty_endio(struct bio *bio, int error)
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
369371

370372
closure_bio_submit(&io->bio, cl, &io->dc->disk);
371373

372-
continue_at(cl, write_dirty, dirty_wq);
374+
continue_at(cl, write_dirty, system_wq);
373375
}
374376

375377
static void read_dirty(struct closure *cl)
@@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl)
394396

395397
if (delay > 0 &&
396398
(KEY_START(&w->key) != dc->last_read ||
397-
jiffies_to_msecs(delay) > 50)) {
398-
w->private = NULL;
399-
400-
closure_delay(&dc->writeback, delay);
401-
continue_at(cl, read_dirty, dirty_wq);
402-
}
399+
jiffies_to_msecs(delay) > 50))
400+
delay = schedule_timeout_uninterruptible(delay);
403401

404402
dc->last_read = KEY_OFFSET(&w->key);
405403

@@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl)
424422

425423
trace_bcache_writeback(&w->key);
426424

427-
closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
425+
down(&dc->in_flight);
426+
closure_call(&io->cl, read_dirty_submit, NULL, cl);
428427

429428
delay = writeback_delay(dc, KEY_SIZE(&w->key));
430-
431-
atomic_inc(&dc->in_flight);
432-
433-
if (!closure_wait_event(&dc->writeback_wait, cl,
434-
atomic_read(&dc->in_flight) < 64))
435-
continue_at(cl, read_dirty, dirty_wq);
436429
}
437430

438431
if (0) {
@@ -442,7 +435,11 @@ static void read_dirty(struct closure *cl)
442435
bch_keybuf_del(&dc->writeback_keys, w);
443436
}
444437

445-
refill_dirty(cl);
438+
/*
439+
* Wait for outstanding writeback IOs to finish (and keybuf slots to be
440+
* freed) before refilling again
441+
*/
442+
continue_at(cl, refill_dirty, dirty_wq);
446443
}
447444

448445
/* Init */
@@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
484481

485482
void bch_cached_dev_writeback_init(struct cached_dev *dc)
486483
{
484+
sema_init(&dc->in_flight, 64);
487485
closure_init_unlocked(&dc->writeback);
488486
init_rwsem(&dc->writeback_lock);
489487

@@ -513,7 +511,7 @@ void bch_writeback_exit(void)
513511

514512
int __init bch_writeback_init(void)
515513
{
516-
dirty_wq = create_singlethread_workqueue("bcache_writeback");
514+
dirty_wq = create_workqueue("bcache_writeback");
517515
if (!dirty_wq)
518516
return -ENOMEM;
519517

fs/bio.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
917917
src_p = kmap_atomic(src_bv->bv_page);
918918
dst_p = kmap_atomic(dst_bv->bv_page);
919919

920-
memcpy(dst_p + dst_bv->bv_offset,
921-
src_p + src_bv->bv_offset,
920+
memcpy(dst_p + dst_offset,
921+
src_p + src_offset,
922922
bytes);
923923

924924
kunmap_atomic(dst_p);

0 commit comments

Comments
 (0)