@@ -674,19 +674,22 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
674
674
* @nr_pages: number of pages to allocate
675
675
* @page_array: the array to fill with pages; any existing non-null entries in
676
676
* the array will be skipped
677
+ * @extra_gfp: the extra GFP flags for the allocation.
677
678
*
678
679
* Return: 0 if all pages were able to be allocated;
679
680
* -ENOMEM otherwise, the partially allocated pages would be freed and
680
681
* the array slots zeroed
681
682
*/
682
- int btrfs_alloc_page_array (unsigned int nr_pages , struct page * * page_array )
683
+ int btrfs_alloc_page_array (unsigned int nr_pages , struct page * * page_array ,
684
+ gfp_t extra_gfp )
683
685
{
684
686
unsigned int allocated ;
685
687
686
688
for (allocated = 0 ; allocated < nr_pages ;) {
687
689
unsigned int last = allocated ;
688
690
689
- allocated = alloc_pages_bulk_array (GFP_NOFS , nr_pages , page_array );
691
+ allocated = alloc_pages_bulk_array (GFP_NOFS | extra_gfp ,
692
+ nr_pages , page_array );
690
693
691
694
if (allocated == nr_pages )
692
695
return 0 ;
@@ -3219,7 +3222,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
3219
3222
*/
3220
3223
set_bit (EXTENT_BUFFER_UNMAPPED , & new -> bflags );
3221
3224
3222
- ret = btrfs_alloc_page_array (num_pages , new -> pages );
3225
+ ret = btrfs_alloc_page_array (num_pages , new -> pages , 0 );
3223
3226
if (ret ) {
3224
3227
btrfs_release_extent_buffer (new );
3225
3228
return NULL ;
@@ -3255,7 +3258,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
3255
3258
return NULL ;
3256
3259
3257
3260
num_pages = num_extent_pages (eb );
3258
- ret = btrfs_alloc_page_array (num_pages , eb -> pages );
3261
+ ret = btrfs_alloc_page_array (num_pages , eb -> pages , 0 );
3259
3262
if (ret )
3260
3263
goto err ;
3261
3264
@@ -3475,16 +3478,75 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
3475
3478
return 0 ;
3476
3479
}
3477
3480
3481
+
3482
+ /*
3483
+ * Return 0 if eb->pages[i] is attached to btree inode successfully.
3484
+ * Return >0 if there is already annother extent buffer for the range,
3485
+ * and @found_eb_ret would be updated.
3486
+ */
3487
+ static int attach_eb_page_to_filemap (struct extent_buffer * eb , int i ,
3488
+ struct extent_buffer * * found_eb_ret )
3489
+ {
3490
+
3491
+ struct btrfs_fs_info * fs_info = eb -> fs_info ;
3492
+ struct address_space * mapping = fs_info -> btree_inode -> i_mapping ;
3493
+ const unsigned long index = eb -> start >> PAGE_SHIFT ;
3494
+ struct folio * existing_folio ;
3495
+ int ret ;
3496
+
3497
+ ASSERT (found_eb_ret );
3498
+
3499
+ /* Caller should ensure the page exists. */
3500
+ ASSERT (eb -> pages [i ]);
3501
+
3502
+ retry :
3503
+ ret = filemap_add_folio (mapping , page_folio (eb -> pages [i ]), index + i ,
3504
+ GFP_NOFS | __GFP_NOFAIL );
3505
+ if (!ret )
3506
+ return 0 ;
3507
+
3508
+ existing_folio = filemap_lock_folio (mapping , index + i );
3509
+ /* The page cache only exists for a very short time, just retry. */
3510
+ if (IS_ERR (existing_folio ))
3511
+ goto retry ;
3512
+
3513
+ /* For now, we should only have single-page folios for btree inode. */
3514
+ ASSERT (folio_nr_pages (existing_folio ) == 1 );
3515
+
3516
+ if (fs_info -> nodesize < PAGE_SIZE ) {
3517
+ /*
3518
+ * We're going to reuse the existing page, can drop our page
3519
+ * and subpage structure now.
3520
+ */
3521
+ __free_page (eb -> pages [i ]);
3522
+ eb -> pages [i ] = folio_page (existing_folio , 0 );
3523
+ } else {
3524
+ struct extent_buffer * existing_eb ;
3525
+
3526
+ existing_eb = grab_extent_buffer (fs_info ,
3527
+ folio_page (existing_folio , 0 ));
3528
+ if (existing_eb ) {
3529
+ /* The extent buffer still exists, we can use it directly. */
3530
+ * found_eb_ret = existing_eb ;
3531
+ folio_unlock (existing_folio );
3532
+ folio_put (existing_folio );
3533
+ return 1 ;
3534
+ }
3535
+ /* The extent buffer no longer exists, we can reuse the folio. */
3536
+ __free_page (eb -> pages [i ]);
3537
+ eb -> pages [i ] = folio_page (existing_folio , 0 );
3538
+ }
3539
+ return 0 ;
3540
+ }
3541
+
3478
3542
struct extent_buffer * alloc_extent_buffer (struct btrfs_fs_info * fs_info ,
3479
3543
u64 start , u64 owner_root , int level )
3480
3544
{
3481
3545
unsigned long len = fs_info -> nodesize ;
3482
3546
int num_pages ;
3483
- int i ;
3484
- unsigned long index = start >> PAGE_SHIFT ;
3547
+ int attached = 0 ;
3485
3548
struct extent_buffer * eb ;
3486
- struct extent_buffer * exists = NULL ;
3487
- struct page * p ;
3549
+ struct extent_buffer * existing_eb = NULL ;
3488
3550
struct address_space * mapping = fs_info -> btree_inode -> i_mapping ;
3489
3551
struct btrfs_subpage * prealloc = NULL ;
3490
3552
u64 lockdep_owner = owner_root ;
@@ -3535,29 +3597,36 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
3535
3597
if (fs_info -> nodesize < PAGE_SIZE ) {
3536
3598
prealloc = btrfs_alloc_subpage (fs_info , BTRFS_SUBPAGE_METADATA );
3537
3599
if (IS_ERR (prealloc )) {
3538
- exists = ERR_CAST (prealloc );
3539
- goto free_eb ;
3600
+ ret = PTR_ERR (prealloc );
3601
+ goto out ;
3540
3602
}
3541
3603
}
3542
3604
3543
- for (i = 0 ; i < num_pages ; i ++ , index ++ ) {
3544
- p = find_or_create_page (mapping , index , GFP_NOFS |__GFP_NOFAIL );
3545
- if (!p ) {
3546
- exists = ERR_PTR (- ENOMEM );
3547
- btrfs_free_subpage (prealloc );
3548
- goto free_eb ;
3605
+ /* Allocate all pages first. */
3606
+ ret = btrfs_alloc_page_array (num_pages , eb -> pages , __GFP_NOFAIL );
3607
+ if (ret < 0 ) {
3608
+ btrfs_free_subpage (prealloc );
3609
+ goto out ;
3610
+ }
3611
+
3612
+ /* Attach all pages to the filemap. */
3613
+ for (int i = 0 ; i < num_pages ; i ++ ) {
3614
+ struct page * p ;
3615
+
3616
+ ret = attach_eb_page_to_filemap (eb , i , & existing_eb );
3617
+ if (ret > 0 ) {
3618
+ ASSERT (existing_eb );
3619
+ goto out ;
3549
3620
}
3621
+ attached ++ ;
3550
3622
3623
+ /*
3624
+ * Only after attach_eb_page_to_filemap(), eb->pages[] is
3625
+ * reliable, as we may choose to reuse the existing page cache
3626
+ * and free the allocated page.
3627
+ */
3628
+ p = eb -> pages [i ];
3551
3629
spin_lock (& mapping -> private_lock );
3552
- exists = grab_extent_buffer (fs_info , p );
3553
- if (exists ) {
3554
- spin_unlock (& mapping -> private_lock );
3555
- unlock_page (p );
3556
- put_page (p );
3557
- mark_extent_buffer_accessed (exists , p );
3558
- btrfs_free_subpage (prealloc );
3559
- goto free_eb ;
3560
- }
3561
3630
/* Should not fail, as we have preallocated the memory */
3562
3631
ret = attach_extent_buffer_page (eb , p , prealloc );
3563
3632
ASSERT (!ret );
@@ -3574,7 +3643,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
3574
3643
spin_unlock (& mapping -> private_lock );
3575
3644
3576
3645
WARN_ON (btrfs_page_test_dirty (fs_info , p , eb -> start , eb -> len ));
3577
- eb -> pages [i ] = p ;
3578
3646
3579
3647
/*
3580
3648
* Check if the current page is physically contiguous with previous eb
@@ -3601,20 +3669,19 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
3601
3669
eb -> addr = page_address (eb -> pages [0 ]) + offset_in_page (eb -> start );
3602
3670
again :
3603
3671
ret = radix_tree_preload (GFP_NOFS );
3604
- if (ret ) {
3605
- exists = ERR_PTR (ret );
3606
- goto free_eb ;
3607
- }
3672
+ if (ret )
3673
+ goto out ;
3608
3674
3609
3675
spin_lock (& fs_info -> buffer_lock );
3610
3676
ret = radix_tree_insert (& fs_info -> buffer_radix ,
3611
3677
start >> fs_info -> sectorsize_bits , eb );
3612
3678
spin_unlock (& fs_info -> buffer_lock );
3613
3679
radix_tree_preload_end ();
3614
3680
if (ret == - EEXIST ) {
3615
- exists = find_extent_buffer (fs_info , start );
3616
- if (exists )
3617
- goto free_eb ;
3681
+ ret = 0 ;
3682
+ existing_eb = find_extent_buffer (fs_info , start );
3683
+ if (existing_eb )
3684
+ goto out ;
3618
3685
else
3619
3686
goto again ;
3620
3687
}
@@ -3627,19 +3694,28 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
3627
3694
* btree_release_folio will correctly detect that a page belongs to a
3628
3695
* live buffer and won't free them prematurely.
3629
3696
*/
3630
- for (i = 0 ; i < num_pages ; i ++ )
3697
+ for (int i = 0 ; i < num_pages ; i ++ )
3631
3698
unlock_page (eb -> pages [i ]);
3632
3699
return eb ;
3633
3700
3634
- free_eb :
3701
+ out :
3635
3702
WARN_ON (!atomic_dec_and_test (& eb -> refs ));
3636
- for (i = 0 ; i < num_pages ; i ++ ) {
3637
- if (eb -> pages [i ])
3638
- unlock_page (eb -> pages [i ]);
3703
+ for (int i = 0 ; i < attached ; i ++ ) {
3704
+ ASSERT (eb -> pages [i ]);
3705
+ detach_extent_buffer_page (eb , eb -> pages [i ]);
3706
+ unlock_page (eb -> pages [i ]);
3639
3707
}
3708
+ /*
3709
+ * Now all pages of that extent buffer is unmapped, set UNMAPPED flag,
3710
+ * so it can be cleaned up without utlizing page->mapping.
3711
+ */
3712
+ set_bit (EXTENT_BUFFER_UNMAPPED , & eb -> bflags );
3640
3713
3641
3714
btrfs_release_extent_buffer (eb );
3642
- return exists ;
3715
+ if (ret < 0 )
3716
+ return ERR_PTR (ret );
3717
+ ASSERT (existing_eb );
3718
+ return existing_eb ;
3643
3719
}
3644
3720
3645
3721
static inline void btrfs_release_extent_buffer_rcu (struct rcu_head * head )
0 commit comments