Skip to content

Commit 07d399c

Browse files
josefbacikkdave
authored andcommitted
btrfs: take the dio extent lock during O_DIRECT operations
Currently we hold the extent lock for the entire duration of a read. This isn't really necessary in the buffered case, we're protected by the page lock, however it's necessary for O_DIRECT. For O_DIRECT reads, if we only locked the extent for the part where we get the extent, we could potentially race with an O_DIRECT write in the same region. This isn't really a problem, unless the read is delayed so much that the write does the COW, unpins the old extent, and some other application re-allocates the extent before the read is actually able to be submitted. At that point at best we'd have a checksum mismatch, but at worse we could read data that doesn't belong to us. To address this potential race we need to make sure we don't have overlapping, concurrent direct io reads and writes. To accomplish this use the new EXTENT_DIO_LOCKED bit in the direct IO case in the same spot as the current extent lock. The writes will take this while they're creating the ordered extent, which is also used to make sure concurrent buffered reads or concurrent direct reads are not allowed to occur, and drop it after the ordered extent is taken. For reads it will act as the current read behavior for the EXTENT_LOCKED bit, we set it when we're starting the read, we clear it in the end_io to allow other direct writes to continue. This still has the drawback of disallowing concurrent overlapping direct reads from occurring, but that exists with the current extent locking. Signed-off-by: Josef Bacik <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 7e2a595 commit 07d399c

File tree

1 file changed

+32
-12
lines changed

1 file changed

+32
-12
lines changed

fs/btrfs/direct-io.c

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,21 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
4040
struct btrfs_ordered_extent *ordered;
4141
int ret = 0;
4242

43+
/* Direct lock must be taken before the extent lock. */
44+
if (nowait) {
45+
if (!try_lock_dio_extent(io_tree, lockstart, lockend, cached_state))
46+
return -EAGAIN;
47+
} else {
48+
lock_dio_extent(io_tree, lockstart, lockend, cached_state);
49+
}
50+
4351
while (1) {
4452
if (nowait) {
4553
if (!try_lock_extent(io_tree, lockstart, lockend,
46-
cached_state))
47-
return -EAGAIN;
54+
cached_state)) {
55+
ret = -EAGAIN;
56+
break;
57+
}
4858
} else {
4959
lock_extent(io_tree, lockstart, lockend, cached_state);
5060
}
@@ -120,6 +130,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
120130
cond_resched();
121131
}
122132

133+
if (ret)
134+
unlock_dio_extent(io_tree, lockstart, lockend, cached_state);
123135
return ret;
124136
}
125137

@@ -546,8 +558,9 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
546558
}
547559

548560
if (unlock_extents)
549-
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
550-
&cached_state);
561+
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
562+
EXTENT_LOCKED | EXTENT_DIO_LOCKED,
563+
&cached_state);
551564
else
552565
free_extent_state(cached_state);
553566

@@ -572,8 +585,13 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
572585
return 0;
573586

574587
unlock_err:
575-
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
576-
&cached_state);
588+
/*
589+
* Don't use EXTENT_LOCK_BITS here in case we extend it later and forget
590+
* to update this, be explicit that we expect EXTENT_LOCKED and
591+
* EXTENT_DIO_LOCKED to be set here, and so that's what we're clearing.
592+
*/
593+
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
594+
EXTENT_LOCKED | EXTENT_DIO_LOCKED, &cached_state);
577595
err:
578596
if (dio_data->data_space_reserved) {
579597
btrfs_free_reserved_data_space(BTRFS_I(inode),
@@ -596,8 +614,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
596614

597615
if (!write && (iomap->type == IOMAP_HOLE)) {
598616
/* If reading from a hole, unlock and return */
599-
unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1,
600-
NULL);
617+
clear_extent_bit(&BTRFS_I(inode)->io_tree, pos, pos + length - 1,
618+
EXTENT_LOCKED | EXTENT_DIO_LOCKED, NULL);
601619
return 0;
602620
}
603621

@@ -608,8 +626,9 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
608626
btrfs_finish_ordered_extent(dio_data->ordered, NULL,
609627
pos, length, false);
610628
else
611-
unlock_extent(&BTRFS_I(inode)->io_tree, pos,
612-
pos + length - 1, NULL);
629+
clear_extent_bit(&BTRFS_I(inode)->io_tree, pos,
630+
pos + length - 1,
631+
EXTENT_LOCKED | EXTENT_DIO_LOCKED, NULL);
613632
ret = -ENOTBLK;
614633
}
615634
if (write) {
@@ -641,8 +660,9 @@ static void btrfs_dio_end_io(struct btrfs_bio *bbio)
641660
dip->file_offset, dip->bytes,
642661
!bio->bi_status);
643662
} else {
644-
unlock_extent(&inode->io_tree, dip->file_offset,
645-
dip->file_offset + dip->bytes - 1, NULL);
663+
clear_extent_bit(&inode->io_tree, dip->file_offset,
664+
dip->file_offset + dip->bytes - 1,
665+
EXTENT_LOCKED | EXTENT_DIO_LOCKED, NULL);
646666
}
647667

648668
bbio->bio.bi_private = bbio->private;

0 commit comments

Comments
 (0)