Skip to content

Commit a6fc2ba

Browse files
Ming Leigregkh
Ming Lei
authored andcommitted
block: model freeze & enter queue as lock for supporting lockdep
[ Upstream commit f1be178 ] Recently we got several deadlock report[1][2][3] caused by blk_mq_freeze_queue and blk_enter_queue(). Turns out the two are just like acquiring read/write lock, so model them as read/write lock for supporting lockdep: 1) model q->q_usage_counter as two locks(io and queue lock) - queue lock covers sync with blk_enter_queue() - io lock covers sync with bio_enter_queue() 2) make the lockdep class/key as per-queue: - different subsystem has very different lock use pattern, shared lock class causes false positive easily - freeze_queue degrades to no lock in case that disk state becomes DEAD because bio_enter_queue() won't be blocked any more - freeze_queue degrades to no lock in case that request queue becomes dying because blk_enter_queue() won't be blocked any more 3) model blk_mq_freeze_queue() as acquire_exclusive & try_lock - it is exclusive lock, so dependency with blk_enter_queue() is covered - it is trylock because blk_mq_freeze_queue() are allowed to run concurrently 4) model blk_enter_queue() & bio_enter_queue() as acquire_read() - nested blk_enter_queue() are allowed - dependency with blk_mq_freeze_queue() is covered - blk_queue_exit() is often called from other contexts(such as irq), and it can't be annotated as lock_release(), so simply do it in blk_enter_queue(), this way still covered cases as many as possible With lockdep support, such kind of reports may be reported asap and needn't wait until the real deadlock is triggered. For example, lockdep report can be triggered in the report[3] with this patch applied. [1] occasional block layer hang when setting 'echo noop > /sys/block/sda/queue/scheduler' https://bugzilla.kernel.org/show_bug.cgi?id=219166 [2] del_gendisk() vs blk_queue_enter() race condition https://lore.kernel.org/linux-block/[email protected]/ [3] queue_freeze & queue_enter deadlock in scsi https://lore.kernel.org/linux-block/ZxG38G9BuFdBpBHZ@fedora/T/#u Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: Ming Lei <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]> Stable-dep-of: 3802f73 ("block: fix uaf for flush rq while iterating tags") Signed-off-by: Sasha Levin <[email protected]>
1 parent 9c3d53f commit a6fc2ba

File tree

5 files changed

+81
-13
lines changed

5 files changed

+81
-13
lines changed

block/blk-core.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,8 @@ static void blk_free_queue(struct request_queue *q)
261261
blk_mq_release(q);
262262

263263
ida_free(&blk_queue_ida, q->id);
264+
lockdep_unregister_key(&q->io_lock_cls_key);
265+
lockdep_unregister_key(&q->q_lock_cls_key);
264266
call_rcu(&q->rcu_head, blk_free_queue_rcu);
265267
}
266268

@@ -278,18 +280,20 @@ void blk_put_queue(struct request_queue *q)
278280
}
279281
EXPORT_SYMBOL(blk_put_queue);
280282

281-
void blk_queue_start_drain(struct request_queue *q)
283+
bool blk_queue_start_drain(struct request_queue *q)
282284
{
283285
/*
284286
* When queue DYING flag is set, we need to block new req
285287
* entering queue, so we call blk_freeze_queue_start() to
286288
* prevent I/O from crossing blk_queue_enter().
287289
*/
288-
blk_freeze_queue_start(q);
290+
bool freeze = __blk_freeze_queue_start(q);
289291
if (queue_is_mq(q))
290292
blk_mq_wake_waiters(q);
291293
/* Make blk_queue_enter() reexamine the DYING flag. */
292294
wake_up_all(&q->mq_freeze_wq);
295+
296+
return freeze;
293297
}
294298

295299
/**
@@ -321,6 +325,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
321325
return -ENODEV;
322326
}
323327

328+
rwsem_acquire_read(&q->q_lockdep_map, 0, 0, _RET_IP_);
329+
rwsem_release(&q->q_lockdep_map, _RET_IP_);
324330
return 0;
325331
}
326332

@@ -352,6 +358,8 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio)
352358
goto dead;
353359
}
354360

361+
rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
362+
rwsem_release(&q->io_lockdep_map, _RET_IP_);
355363
return 0;
356364
dead:
357365
bio_io_error(bio);
@@ -441,6 +449,12 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
441449
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
442450
if (error)
443451
goto fail_stats;
452+
lockdep_register_key(&q->io_lock_cls_key);
453+
lockdep_register_key(&q->q_lock_cls_key);
454+
lockdep_init_map(&q->io_lockdep_map, "&q->q_usage_counter(io)",
455+
&q->io_lock_cls_key, 0);
456+
lockdep_init_map(&q->q_lockdep_map, "&q->q_usage_counter(queue)",
457+
&q->q_lock_cls_key, 0);
444458

445459
q->nr_requests = BLKDEV_DEFAULT_RQ;
446460

block/blk-mq.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,29 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
120120
inflight[1] = mi.inflight[1];
121121
}
122122

123-
void blk_freeze_queue_start(struct request_queue *q)
123+
bool __blk_freeze_queue_start(struct request_queue *q)
124124
{
125+
int freeze;
126+
125127
mutex_lock(&q->mq_freeze_lock);
126128
if (++q->mq_freeze_depth == 1) {
127129
percpu_ref_kill(&q->q_usage_counter);
128130
mutex_unlock(&q->mq_freeze_lock);
129131
if (queue_is_mq(q))
130132
blk_mq_run_hw_queues(q, false);
133+
freeze = true;
131134
} else {
132135
mutex_unlock(&q->mq_freeze_lock);
136+
freeze = false;
133137
}
138+
139+
return freeze;
140+
}
141+
142+
void blk_freeze_queue_start(struct request_queue *q)
143+
{
144+
if (__blk_freeze_queue_start(q))
145+
blk_freeze_acquire_lock(q, false, false);
134146
}
135147
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
136148

@@ -176,8 +188,10 @@ void blk_mq_freeze_queue(struct request_queue *q)
176188
}
177189
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
178190

179-
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
191+
bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
180192
{
193+
int unfreeze = false;
194+
181195
mutex_lock(&q->mq_freeze_lock);
182196
if (force_atomic)
183197
q->q_usage_counter.data->force_atomic = true;
@@ -186,13 +200,17 @@ void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
186200
if (!q->mq_freeze_depth) {
187201
percpu_ref_resurrect(&q->q_usage_counter);
188202
wake_up_all(&q->mq_freeze_wq);
203+
unfreeze = true;
189204
}
190205
mutex_unlock(&q->mq_freeze_lock);
206+
207+
return unfreeze;
191208
}
192209

193210
void blk_mq_unfreeze_queue(struct request_queue *q)
194211
{
195-
__blk_mq_unfreeze_queue(q, false);
212+
if (__blk_mq_unfreeze_queue(q, false))
213+
blk_unfreeze_release_lock(q, false, false);
196214
}
197215
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
198216

@@ -205,7 +223,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
205223
*/
206224
void blk_freeze_queue_start_non_owner(struct request_queue *q)
207225
{
208-
blk_freeze_queue_start(q);
226+
__blk_freeze_queue_start(q);
209227
}
210228
EXPORT_SYMBOL_GPL(blk_freeze_queue_start_non_owner);
211229

block/blk.h

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include <linux/bio-integrity.h>
66
#include <linux/blk-crypto.h>
7+
#include <linux/lockdep.h>
78
#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
89
#include <linux/sched/sysctl.h>
910
#include <linux/timekeeping.h>
@@ -35,8 +36,9 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
3536
void blk_free_flush_queue(struct blk_flush_queue *q);
3637

3738
void blk_freeze_queue(struct request_queue *q);
38-
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
39-
void blk_queue_start_drain(struct request_queue *q);
39+
bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
40+
bool blk_queue_start_drain(struct request_queue *q);
41+
bool __blk_freeze_queue_start(struct request_queue *q);
4042
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
4143
void submit_bio_noacct_nocheck(struct bio *bio);
4244
void bio_await_chain(struct bio *bio);
@@ -69,8 +71,11 @@ static inline int bio_queue_enter(struct bio *bio)
6971
{
7072
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
7173

72-
if (blk_try_enter_queue(q, false))
74+
if (blk_try_enter_queue(q, false)) {
75+
rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
76+
rwsem_release(&q->io_lockdep_map, _RET_IP_);
7377
return 0;
78+
}
7479
return __bio_queue_enter(q, bio);
7580
}
7681

@@ -734,4 +739,22 @@ void blk_integrity_verify(struct bio *bio);
734739
void blk_integrity_prepare(struct request *rq);
735740
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
736741

742+
static inline void blk_freeze_acquire_lock(struct request_queue *q, bool
743+
disk_dead, bool queue_dying)
744+
{
745+
if (!disk_dead)
746+
rwsem_acquire(&q->io_lockdep_map, 0, 1, _RET_IP_);
747+
if (!queue_dying)
748+
rwsem_acquire(&q->q_lockdep_map, 0, 1, _RET_IP_);
749+
}
750+
751+
static inline void blk_unfreeze_release_lock(struct request_queue *q, bool
752+
disk_dead, bool queue_dying)
753+
{
754+
if (!queue_dying)
755+
rwsem_release(&q->q_lockdep_map, _RET_IP_);
756+
if (!disk_dead)
757+
rwsem_release(&q->io_lockdep_map, _RET_IP_);
758+
}
759+
737760
#endif /* BLK_INTERNAL_H */

block/genhd.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -581,13 +581,13 @@ static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
581581
rcu_read_unlock();
582582
}
583583

584-
static void __blk_mark_disk_dead(struct gendisk *disk)
584+
static bool __blk_mark_disk_dead(struct gendisk *disk)
585585
{
586586
/*
587587
* Fail any new I/O.
588588
*/
589589
if (test_and_set_bit(GD_DEAD, &disk->state))
590-
return;
590+
return false;
591591

592592
if (test_bit(GD_OWNS_QUEUE, &disk->state))
593593
blk_queue_flag_set(QUEUE_FLAG_DYING, disk->queue);
@@ -600,7 +600,7 @@ static void __blk_mark_disk_dead(struct gendisk *disk)
600600
/*
601601
* Prevent new I/O from crossing bio_queue_enter().
602602
*/
603-
blk_queue_start_drain(disk->queue);
603+
return blk_queue_start_drain(disk->queue);
604604
}
605605

606606
/**
@@ -641,6 +641,7 @@ void del_gendisk(struct gendisk *disk)
641641
struct request_queue *q = disk->queue;
642642
struct block_device *part;
643643
unsigned long idx;
644+
bool start_drain, queue_dying;
644645

645646
might_sleep();
646647

@@ -668,7 +669,10 @@ void del_gendisk(struct gendisk *disk)
668669
* Drop all partitions now that the disk is marked dead.
669670
*/
670671
mutex_lock(&disk->open_mutex);
671-
__blk_mark_disk_dead(disk);
672+
start_drain = __blk_mark_disk_dead(disk);
673+
queue_dying = blk_queue_dying(q);
674+
if (start_drain)
675+
blk_freeze_acquire_lock(q, true, queue_dying);
672676
xa_for_each_start(&disk->part_tbl, idx, part, 1)
673677
drop_partition(part);
674678
mutex_unlock(&disk->open_mutex);
@@ -725,6 +729,9 @@ void del_gendisk(struct gendisk *disk)
725729
if (queue_is_mq(q))
726730
blk_mq_exit_queue(q);
727731
}
732+
733+
if (start_drain)
734+
blk_unfreeze_release_lock(q, true, queue_dying);
728735
}
729736
EXPORT_SYMBOL(del_gendisk);
730737

include/linux/blkdev.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <linux/uuid.h>
2626
#include <linux/xarray.h>
2727
#include <linux/file.h>
28+
#include <linux/lockdep.h>
2829

2930
struct module;
3031
struct request_queue;
@@ -471,6 +472,11 @@ struct request_queue {
471472
struct xarray hctx_table;
472473

473474
struct percpu_ref q_usage_counter;
475+
struct lock_class_key io_lock_cls_key;
476+
struct lockdep_map io_lockdep_map;
477+
478+
struct lock_class_key q_lock_cls_key;
479+
struct lockdep_map q_lockdep_map;
474480

475481
struct request *last_merge;
476482

0 commit comments

Comments
 (0)