Skip to content

Commit 09e6cef

Browse files
adam900710kdave
authored andcommitted
btrfs: refactor alloc_extent_buffer() to allocate-then-attach method
Currently alloc_extent_buffer() utilizes find_or_create_page() to allocate one page a time for an extent buffer. This method has the following disadvantages: - find_or_create_page() is the legacy way of allocating new pages With the new folio infrastructure, find_or_create_page() is just redirected to filemap_get_folio(). - Lacks the way to support higher order (order >= 1) folios As we can not yet let filemap give us a higher order folio. This patch would change the workflow by the following way: Old | new -----------------------------------+------------------------------------- | ret = btrfs_alloc_page_array(); for (i = 0; i < num_pages; i++) { | for (i = 0; i < num_pages; i++) { p = find_or_create_page(); | ret = filemap_add_folio(); /* Attach page private */ | /* Reuse page cache if needed */ /* Reused eb if needed */ | | /* Attach page private and | reuse eb if needed */ | } By this we split the page allocation and private attaching into two parts, allowing future updates to each part more easily, and migrate to folio interfaces (especially for possible higher order folios). Signed-off-by: Qu Wenruo <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 2b0122a commit 09e6cef

File tree

6 files changed

+123
-46
lines changed

6 files changed

+123
-46
lines changed

fs/btrfs/compression.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
608608
goto out_free_bio;
609609
}
610610

611-
ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages);
611+
ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages, 0);
612612
if (ret2) {
613613
ret = BLK_STS_RESOURCE;
614614
goto out_free_compressed_pages;

fs/btrfs/extent_io.c

+115-39
Original file line numberDiff line numberDiff line change
@@ -674,19 +674,22 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
674674
* @nr_pages: number of pages to allocate
675675
* @page_array: the array to fill with pages; any existing non-null entries in
676676
* the array will be skipped
677+
* @extra_gfp: the extra GFP flags for the allocation.
677678
*
678679
* Return: 0 if all pages were able to be allocated;
679680
* -ENOMEM otherwise, the partially allocated pages would be freed and
680681
* the array slots zeroed
681682
*/
682-
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
683+
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
684+
gfp_t extra_gfp)
683685
{
684686
unsigned int allocated;
685687

686688
for (allocated = 0; allocated < nr_pages;) {
687689
unsigned int last = allocated;
688690

689-
allocated = alloc_pages_bulk_array(GFP_NOFS, nr_pages, page_array);
691+
allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp,
692+
nr_pages, page_array);
690693

691694
if (allocated == nr_pages)
692695
return 0;
@@ -3219,7 +3222,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
32193222
*/
32203223
set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
32213224

3222-
ret = btrfs_alloc_page_array(num_pages, new->pages);
3225+
ret = btrfs_alloc_page_array(num_pages, new->pages, 0);
32233226
if (ret) {
32243227
btrfs_release_extent_buffer(new);
32253228
return NULL;
@@ -3255,7 +3258,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
32553258
return NULL;
32563259

32573260
num_pages = num_extent_pages(eb);
3258-
ret = btrfs_alloc_page_array(num_pages, eb->pages);
3261+
ret = btrfs_alloc_page_array(num_pages, eb->pages, 0);
32593262
if (ret)
32603263
goto err;
32613264

@@ -3475,16 +3478,75 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
34753478
return 0;
34763479
}
34773480

3481+
3482+
/*
3483+
* Return 0 if eb->pages[i] is attached to btree inode successfully.
3484+
* Return >0 if there is already annother extent buffer for the range,
3485+
* and @found_eb_ret would be updated.
3486+
*/
3487+
static int attach_eb_page_to_filemap(struct extent_buffer *eb, int i,
3488+
struct extent_buffer **found_eb_ret)
3489+
{
3490+
3491+
struct btrfs_fs_info *fs_info = eb->fs_info;
3492+
struct address_space *mapping = fs_info->btree_inode->i_mapping;
3493+
const unsigned long index = eb->start >> PAGE_SHIFT;
3494+
struct folio *existing_folio;
3495+
int ret;
3496+
3497+
ASSERT(found_eb_ret);
3498+
3499+
/* Caller should ensure the page exists. */
3500+
ASSERT(eb->pages[i]);
3501+
3502+
retry:
3503+
ret = filemap_add_folio(mapping, page_folio(eb->pages[i]), index + i,
3504+
GFP_NOFS | __GFP_NOFAIL);
3505+
if (!ret)
3506+
return 0;
3507+
3508+
existing_folio = filemap_lock_folio(mapping, index + i);
3509+
/* The page cache only exists for a very short time, just retry. */
3510+
if (IS_ERR(existing_folio))
3511+
goto retry;
3512+
3513+
/* For now, we should only have single-page folios for btree inode. */
3514+
ASSERT(folio_nr_pages(existing_folio) == 1);
3515+
3516+
if (fs_info->nodesize < PAGE_SIZE) {
3517+
/*
3518+
* We're going to reuse the existing page, can drop our page
3519+
* and subpage structure now.
3520+
*/
3521+
__free_page(eb->pages[i]);
3522+
eb->pages[i] = folio_page(existing_folio, 0);
3523+
} else {
3524+
struct extent_buffer *existing_eb;
3525+
3526+
existing_eb = grab_extent_buffer(fs_info,
3527+
folio_page(existing_folio, 0));
3528+
if (existing_eb) {
3529+
/* The extent buffer still exists, we can use it directly. */
3530+
*found_eb_ret = existing_eb;
3531+
folio_unlock(existing_folio);
3532+
folio_put(existing_folio);
3533+
return 1;
3534+
}
3535+
/* The extent buffer no longer exists, we can reuse the folio. */
3536+
__free_page(eb->pages[i]);
3537+
eb->pages[i] = folio_page(existing_folio, 0);
3538+
}
3539+
return 0;
3540+
}
3541+
34783542
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
34793543
u64 start, u64 owner_root, int level)
34803544
{
34813545
unsigned long len = fs_info->nodesize;
34823546
int num_pages;
3483-
int i;
3484-
unsigned long index = start >> PAGE_SHIFT;
3547+
int attached = 0;
34853548
struct extent_buffer *eb;
3486-
struct extent_buffer *exists = NULL;
3487-
struct page *p;
3549+
struct extent_buffer *existing_eb = NULL;
34883550
struct address_space *mapping = fs_info->btree_inode->i_mapping;
34893551
struct btrfs_subpage *prealloc = NULL;
34903552
u64 lockdep_owner = owner_root;
@@ -3535,29 +3597,36 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
35353597
if (fs_info->nodesize < PAGE_SIZE) {
35363598
prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
35373599
if (IS_ERR(prealloc)) {
3538-
exists = ERR_CAST(prealloc);
3539-
goto free_eb;
3600+
ret = PTR_ERR(prealloc);
3601+
goto out;
35403602
}
35413603
}
35423604

3543-
for (i = 0; i < num_pages; i++, index++) {
3544-
p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
3545-
if (!p) {
3546-
exists = ERR_PTR(-ENOMEM);
3547-
btrfs_free_subpage(prealloc);
3548-
goto free_eb;
3605+
/* Allocate all pages first. */
3606+
ret = btrfs_alloc_page_array(num_pages, eb->pages, __GFP_NOFAIL);
3607+
if (ret < 0) {
3608+
btrfs_free_subpage(prealloc);
3609+
goto out;
3610+
}
3611+
3612+
/* Attach all pages to the filemap. */
3613+
for (int i = 0; i < num_pages; i++) {
3614+
struct page *p;
3615+
3616+
ret = attach_eb_page_to_filemap(eb, i, &existing_eb);
3617+
if (ret > 0) {
3618+
ASSERT(existing_eb);
3619+
goto out;
35493620
}
3621+
attached++;
35503622

3623+
/*
3624+
* Only after attach_eb_page_to_filemap(), eb->pages[] is
3625+
* reliable, as we may choose to reuse the existing page cache
3626+
* and free the allocated page.
3627+
*/
3628+
p = eb->pages[i];
35513629
spin_lock(&mapping->private_lock);
3552-
exists = grab_extent_buffer(fs_info, p);
3553-
if (exists) {
3554-
spin_unlock(&mapping->private_lock);
3555-
unlock_page(p);
3556-
put_page(p);
3557-
mark_extent_buffer_accessed(exists, p);
3558-
btrfs_free_subpage(prealloc);
3559-
goto free_eb;
3560-
}
35613630
/* Should not fail, as we have preallocated the memory */
35623631
ret = attach_extent_buffer_page(eb, p, prealloc);
35633632
ASSERT(!ret);
@@ -3574,7 +3643,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
35743643
spin_unlock(&mapping->private_lock);
35753644

35763645
WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
3577-
eb->pages[i] = p;
35783646

35793647
/*
35803648
* Check if the current page is physically contiguous with previous eb
@@ -3601,20 +3669,19 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
36013669
eb->addr = page_address(eb->pages[0]) + offset_in_page(eb->start);
36023670
again:
36033671
ret = radix_tree_preload(GFP_NOFS);
3604-
if (ret) {
3605-
exists = ERR_PTR(ret);
3606-
goto free_eb;
3607-
}
3672+
if (ret)
3673+
goto out;
36083674

36093675
spin_lock(&fs_info->buffer_lock);
36103676
ret = radix_tree_insert(&fs_info->buffer_radix,
36113677
start >> fs_info->sectorsize_bits, eb);
36123678
spin_unlock(&fs_info->buffer_lock);
36133679
radix_tree_preload_end();
36143680
if (ret == -EEXIST) {
3615-
exists = find_extent_buffer(fs_info, start);
3616-
if (exists)
3617-
goto free_eb;
3681+
ret = 0;
3682+
existing_eb = find_extent_buffer(fs_info, start);
3683+
if (existing_eb)
3684+
goto out;
36183685
else
36193686
goto again;
36203687
}
@@ -3627,19 +3694,28 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
36273694
* btree_release_folio will correctly detect that a page belongs to a
36283695
* live buffer and won't free them prematurely.
36293696
*/
3630-
for (i = 0; i < num_pages; i++)
3697+
for (int i = 0; i < num_pages; i++)
36313698
unlock_page(eb->pages[i]);
36323699
return eb;
36333700

3634-
free_eb:
3701+
out:
36353702
WARN_ON(!atomic_dec_and_test(&eb->refs));
3636-
for (i = 0; i < num_pages; i++) {
3637-
if (eb->pages[i])
3638-
unlock_page(eb->pages[i]);
3703+
for (int i = 0; i < attached; i++) {
3704+
ASSERT(eb->pages[i]);
3705+
detach_extent_buffer_page(eb, eb->pages[i]);
3706+
unlock_page(eb->pages[i]);
36393707
}
3708+
/*
3709+
* Now all pages of that extent buffer is unmapped, set UNMAPPED flag,
3710+
* so it can be cleaned up without utlizing page->mapping.
3711+
*/
3712+
set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
36403713

36413714
btrfs_release_extent_buffer(eb);
3642-
return exists;
3715+
if (ret < 0)
3716+
return ERR_PTR(ret);
3717+
ASSERT(existing_eb);
3718+
return existing_eb;
36433719
}
36443720

36453721
static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)

fs/btrfs/extent_io.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,8 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
302302
void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
303303
struct extent_buffer *buf);
304304

305-
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
305+
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
306+
gfp_t extra_gfp);
306307

307308
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
308309
bool find_lock_delalloc_range(struct inode *inode,

fs/btrfs/inode.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -10028,7 +10028,7 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
1002810028
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
1002910029
if (!pages)
1003010030
return -ENOMEM;
10031-
ret = btrfs_alloc_page_array(nr_pages, pages);
10031+
ret = btrfs_alloc_page_array(nr_pages, pages, 0);
1003210032
if (ret) {
1003310033
ret = -ENOMEM;
1003410034
goto out;

fs/btrfs/raid56.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
964964
{
965965
int ret;
966966

967-
ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages);
967+
ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, 0);
968968
if (ret < 0)
969969
return ret;
970970
/* Mapping all sectors */
@@ -979,7 +979,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
979979
int ret;
980980

981981
ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
982-
rbio->stripe_pages + data_pages);
982+
rbio->stripe_pages + data_pages, 0);
983983
if (ret < 0)
984984
return ret;
985985

@@ -1530,7 +1530,7 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
15301530
const int data_pages = rbio->nr_data * rbio->stripe_npages;
15311531
int ret;
15321532

1533-
ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages);
1533+
ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, 0);
15341534
if (ret < 0)
15351535
return ret;
15361536

fs/btrfs/scrub.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
261261
atomic_set(&stripe->pending_io, 0);
262262
spin_lock_init(&stripe->write_error_lock);
263263

264-
ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages);
264+
ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, 0);
265265
if (ret < 0)
266266
goto error;
267267

0 commit comments

Comments
 (0)