Skip to content

Commit 8727884

Browse files
ignatkgregkh
authored andcommitted
dm crypt: do not wait for backlogged crypto request completion in softirq
commit 8abec36 upstream. Commit 39d42fa ("dm crypt: add flags to optionally bypass kcryptd workqueues") made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context. When Crypto API backlogs a crypto request, dm-crypt uses wait_for_completion to avoid sending further requests to an already overloaded crypto driver. However, if the code is executing in softirq context, we might get the following stacktrace: [ 210.235213][ C0] BUG: scheduling while atomic: fio/2602/0x00000102 [ 210.236701][ C0] Modules linked in: [ 210.237566][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.239292][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.241233][ C0] Call Trace: [ 210.241946][ C0] <IRQ> [ 210.242561][ C0] dump_stack+0x7d/0xa3 [ 210.243466][ C0] __schedule_bug.cold+0xb3/0xc2 [ 210.244539][ C0] __schedule+0x156f/0x20d0 [ 210.245518][ C0] ? io_schedule_timeout+0x140/0x140 [ 210.246660][ C0] schedule+0xd0/0x270 [ 210.247541][ C0] schedule_timeout+0x1fb/0x280 [ 210.248586][ C0] ? usleep_range+0x150/0x150 [ 210.249624][ C0] ? unpoison_range+0x3a/0x60 [ 210.250632][ C0] ? ____kasan_kmalloc.constprop.0+0x82/0xa0 [ 210.251949][ C0] ? unpoison_range+0x3a/0x60 [ 210.252958][ C0] ? __prepare_to_swait+0xa7/0x190 [ 210.254067][ C0] do_wait_for_common+0x2ab/0x370 [ 210.255158][ C0] ? usleep_range+0x150/0x150 [ 210.256192][ C0] ? bit_wait_io_timeout+0x160/0x160 [ 210.257358][ C0] ? blk_update_request+0x757/0x1150 [ 210.258582][ C0] ? _raw_spin_lock_irq+0x82/0xd0 [ 210.259674][ C0] ? _raw_read_unlock_irqrestore+0x30/0x30 [ 210.260917][ C0] wait_for_completion+0x4c/0x90 [ 210.261971][ C0] crypt_convert+0x19a6/0x4c00 [ 210.263033][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.264193][ C0] ? kasan_set_track+0x1c/0x30 [ 210.265191][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.266283][ C0] ? kmem_cache_free+0x104/0x470 [ 210.267363][ C0] ? crypt_endio+0x91/0x180 [ 210.268327][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.269565][ C0] blk_update_request+0x757/0x1150 [ 210.270563][ C0] blk_mq_end_request+0x4b/0x480 [ 210.271680][ C0] blk_done_softirq+0x21d/0x340 [ 210.272775][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.273847][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.275031][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.276182][ C0] __do_softirq+0x190/0x611 [ 210.277203][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.278340][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.279514][ C0] </IRQ> [ 210.280164][ C0] do_softirq_own_stack+0x37/0x40 [ 210.281281][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.282286][ C0] common_interrupt+0x74/0x120 [ 210.283376][ C0] asm_common_interrupt+0x1e/0x40 [ 210.284496][ C0] RIP: 0010:_aesni_enc1+0x65/0xb0 Fix this by making crypt_convert function reentrant from the point of a single bio and make dm-crypt defer further bio processing to a workqueue, if Crypto API backlogs a request in interrupt context. Fixes: 39d42fa ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: [email protected] # v5.9+ Signed-off-by: Ignat Korchagin <[email protected]> Acked-by: Mikulas Patocka <[email protected]> Signed-off-by: Mike Snitzer <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 93edb8d commit 8727884

File tree

1 file changed

+98
-5
lines changed

1 file changed

+98
-5
lines changed

drivers/md/dm-crypt.c

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,13 +1539,19 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
15391539
* Encrypt / decrypt data from one bio to another one (can be the same one)
15401540
*/
15411541
static blk_status_t crypt_convert(struct crypt_config *cc,
1542-
struct convert_context *ctx, bool atomic)
1542+
struct convert_context *ctx, bool atomic, bool reset_pending)
15431543
{
15441544
unsigned int tag_offset = 0;
15451545
unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
15461546
int r;
15471547

1548-
atomic_set(&ctx->cc_pending, 1);
1548+
/*
1549+
* if reset_pending is set we are dealing with the bio for the first time,
1550+
* else we're continuing to work on the previous bio, so don't mess with
1551+
* the cc_pending counter
1552+
*/
1553+
if (reset_pending)
1554+
atomic_set(&ctx->cc_pending, 1);
15491555

15501556
while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
15511557

@@ -1568,7 +1574,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
15681574
* but the driver request queue is full, let's wait.
15691575
*/
15701576
case -EBUSY:
1571-
wait_for_completion(&ctx->restart);
1577+
if (in_interrupt()) {
1578+
if (try_wait_for_completion(&ctx->restart)) {
1579+
/*
1580+
* we don't have to block to wait for completion,
1581+
* so proceed
1582+
*/
1583+
} else {
1584+
/*
1585+
* we can't wait for completion without blocking
1586+
* exit and continue processing in a workqueue
1587+
*/
1588+
ctx->r.req = NULL;
1589+
ctx->cc_sector += sector_step;
1590+
tag_offset++;
1591+
return BLK_STS_DEV_RESOURCE;
1592+
}
1593+
} else {
1594+
wait_for_completion(&ctx->restart);
1595+
}
15721596
reinit_completion(&ctx->restart);
15731597
fallthrough;
15741598
/*
@@ -1960,6 +1984,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc,
19601984
}
19611985
}
19621986

1987+
static void kcryptd_crypt_write_continue(struct work_struct *work)
1988+
{
1989+
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
1990+
struct crypt_config *cc = io->cc;
1991+
struct convert_context *ctx = &io->ctx;
1992+
int crypt_finished;
1993+
sector_t sector = io->sector;
1994+
blk_status_t r;
1995+
1996+
wait_for_completion(&ctx->restart);
1997+
reinit_completion(&ctx->restart);
1998+
1999+
r = crypt_convert(cc, &io->ctx, true, false);
2000+
if (r)
2001+
io->error = r;
2002+
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
2003+
if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) {
2004+
/* Wait for completion signaled by kcryptd_async_done() */
2005+
wait_for_completion(&ctx->restart);
2006+
crypt_finished = 1;
2007+
}
2008+
2009+
/* Encryption was already finished, submit io now */
2010+
if (crypt_finished) {
2011+
kcryptd_crypt_write_io_submit(io, 0);
2012+
io->sector = sector;
2013+
}
2014+
2015+
crypt_dec_pending(io);
2016+
}
2017+
19632018
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
19642019
{
19652020
struct crypt_config *cc = io->cc;
@@ -1988,7 +2043,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
19882043

19892044
crypt_inc_pending(io);
19902045
r = crypt_convert(cc, ctx,
1991-
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));
2046+
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true);
2047+
/*
2048+
* Crypto API backlogged the request, because its queue was full
2049+
* and we're in softirq context, so continue from a workqueue
2050+
* (TODO: is it actually possible to be in softirq in the write path?)
2051+
*/
2052+
if (r == BLK_STS_DEV_RESOURCE) {
2053+
INIT_WORK(&io->work, kcryptd_crypt_write_continue);
2054+
queue_work(cc->crypt_queue, &io->work);
2055+
return;
2056+
}
19922057
if (r)
19932058
io->error = r;
19942059
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
@@ -2013,6 +2078,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
20132078
crypt_dec_pending(io);
20142079
}
20152080

2081+
static void kcryptd_crypt_read_continue(struct work_struct *work)
2082+
{
2083+
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
2084+
struct crypt_config *cc = io->cc;
2085+
blk_status_t r;
2086+
2087+
wait_for_completion(&io->ctx.restart);
2088+
reinit_completion(&io->ctx.restart);
2089+
2090+
r = crypt_convert(cc, &io->ctx, true, false);
2091+
if (r)
2092+
io->error = r;
2093+
2094+
if (atomic_dec_and_test(&io->ctx.cc_pending))
2095+
kcryptd_crypt_read_done(io);
2096+
2097+
crypt_dec_pending(io);
2098+
}
2099+
20162100
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
20172101
{
20182102
struct crypt_config *cc = io->cc;
@@ -2024,7 +2108,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
20242108
io->sector);
20252109

20262110
r = crypt_convert(cc, &io->ctx,
2027-
test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));
2111+
test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
2112+
/*
2113+
* Crypto API backlogged the request, because its queue was full
2114+
* and we're in softirq context, so continue from a workqueue
2115+
*/
2116+
if (r == BLK_STS_DEV_RESOURCE) {
2117+
INIT_WORK(&io->work, kcryptd_crypt_read_continue);
2118+
queue_work(cc->crypt_queue, &io->work);
2119+
return;
2120+
}
20282121
if (r)
20292122
io->error = r;
20302123

0 commit comments

Comments
 (0)