Skip to content

Commit 2bee7eb

Browse files
dennisszhoukdave
authored andcommitted
btrfs: discard one region at a time in async discard
The prior two patches added discarding via a background workqueue. This just piggybacked off of the fstrim code to trim the whole block at once. Well inevitably this is worse performance wise and will aggressively overtrim. But it was nice to plumb the other infrastructure to keep the patches easier to review. This adds the real goal of this series which is discarding slowly (ie. a slow long running fstrim). The discarding is split into two phases, extents and then bitmaps. The reason for this is two fold. First, the bitmap regions overlap the extent regions. Second, discarding the extents first will let the newly trimmed bitmaps have the highest chance of coalescing when being readded to the free space cache. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Dennis Zhou <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 6e80d4f commit 2bee7eb

File tree

4 files changed

+191
-46
lines changed

4 files changed

+191
-46
lines changed

fs/btrfs/block-group.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,19 @@ enum btrfs_disk_cache_state {
1212
BTRFS_DC_SETUP,
1313
};
1414

15+
/*
16+
* This describes the state of the block_group for async discard. This is due
17+
* to the two pass nature of it where extent discarding is prioritized over
18+
* bitmap discarding. BTRFS_DISCARD_RESET_CURSOR is set when we are resetting
19+
* between lists to prevent contention for discard state variables
20+
* (eg. discard_cursor).
21+
*/
22+
enum btrfs_discard_state {
23+
BTRFS_DISCARD_EXTENTS,
24+
BTRFS_DISCARD_BITMAPS,
25+
BTRFS_DISCARD_RESET_CURSOR,
26+
};
27+
1528
/*
1629
* Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
1730
* only allocate a chunk if we really need one.
@@ -121,6 +134,8 @@ struct btrfs_block_group {
121134
struct list_head discard_list;
122135
int discard_index;
123136
u64 discard_eligible_time;
137+
u64 discard_cursor;
138+
enum btrfs_discard_state discard_state;
124139

125140
/* For dirty block groups */
126141
struct list_head dirty_list;

fs/btrfs/discard.c

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,30 @@ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
2121
return &discard_ctl->discard_list[block_group->discard_index];
2222
}
2323

24-
static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
25-
struct btrfs_block_group *block_group)
24+
static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
25+
struct btrfs_block_group *block_group)
2626
{
27-
spin_lock(&discard_ctl->lock);
28-
29-
if (!btrfs_run_discard_work(discard_ctl)) {
30-
spin_unlock(&discard_ctl->lock);
27+
if (!btrfs_run_discard_work(discard_ctl))
3128
return;
32-
}
3329

3430
if (list_empty(&block_group->discard_list) ||
3531
block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
3632
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
3733
block_group->discard_index = BTRFS_DISCARD_INDEX_START;
3834
block_group->discard_eligible_time = (ktime_get_ns() +
3935
BTRFS_DISCARD_DELAY);
36+
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
4037
}
4138

4239
list_move_tail(&block_group->discard_list,
4340
get_discard_list(discard_ctl, block_group));
41+
}
4442

43+
static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
44+
struct btrfs_block_group *block_group)
45+
{
46+
spin_lock(&discard_ctl->lock);
47+
__add_to_discard_list(discard_ctl, block_group);
4548
spin_unlock(&discard_ctl->lock);
4649
}
4750

@@ -60,6 +63,7 @@ static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
6063
block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
6164
block_group->discard_eligible_time = (ktime_get_ns() +
6265
BTRFS_DISCARD_UNUSED_DELAY);
66+
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
6367
list_add_tail(&block_group->discard_list,
6468
&discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
6569

@@ -127,23 +131,40 @@ static struct btrfs_block_group *find_next_block_group(
127131
/**
128132
* peek_discard_list - wrap find_next_block_group()
129133
* @discard_ctl: discard control
134+
* @discard_state: the discard_state of the block_group after state management
130135
*
131136
* This wraps find_next_block_group() and sets the block_group to be in use.
137+
* discard_state's control flow is managed here. Variables related to
138+
* discard_state are reset here as needed (eg. discard_cursor). @discard_state
139+
* is remembered as it may change while we're discarding, but we want the
140+
* discard to execute in the context determined here.
132141
*/
133142
static struct btrfs_block_group *peek_discard_list(
134-
struct btrfs_discard_ctl *discard_ctl)
143+
struct btrfs_discard_ctl *discard_ctl,
144+
enum btrfs_discard_state *discard_state)
135145
{
136146
struct btrfs_block_group *block_group;
137147
const u64 now = ktime_get_ns();
138148

139149
spin_lock(&discard_ctl->lock);
140-
150+
again:
141151
block_group = find_next_block_group(discard_ctl, now);
142152

143-
if (block_group && now < block_group->discard_eligible_time)
153+
if (block_group && now > block_group->discard_eligible_time) {
154+
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
155+
block_group->used != 0) {
156+
__add_to_discard_list(discard_ctl, block_group);
157+
goto again;
158+
}
159+
if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
160+
block_group->discard_cursor = block_group->start;
161+
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
162+
}
163+
discard_ctl->block_group = block_group;
164+
*discard_state = block_group->discard_state;
165+
} else {
144166
block_group = NULL;
145-
146-
discard_ctl->block_group = block_group;
167+
}
147168

148169
spin_unlock(&discard_ctl->lock);
149170

@@ -254,24 +275,54 @@ static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
254275
* btrfs_discard_workfn - discard work function
255276
* @work: work
256277
*
257-
* This finds the next block_group to start discarding and then discards it.
278+
* This finds the next block_group to start discarding and then discards a
279+
* single region. It does this in a two-pass fashion: first extents and second
280+
* bitmaps. Completely discarded block groups are sent to the unused_bgs path.
258281
*/
259282
static void btrfs_discard_workfn(struct work_struct *work)
260283
{
261284
struct btrfs_discard_ctl *discard_ctl;
262285
struct btrfs_block_group *block_group;
286+
enum btrfs_discard_state discard_state;
263287
u64 trimmed = 0;
264288

265289
discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
266290

267-
block_group = peek_discard_list(discard_ctl);
291+
block_group = peek_discard_list(discard_ctl, &discard_state);
268292
if (!block_group || !btrfs_run_discard_work(discard_ctl))
269293
return;
270294

271-
btrfs_trim_block_group(block_group, &trimmed, block_group->start,
272-
btrfs_block_group_end(block_group), 0);
295+
/* Perform discarding */
296+
if (discard_state == BTRFS_DISCARD_BITMAPS)
297+
btrfs_trim_block_group_bitmaps(block_group, &trimmed,
298+
block_group->discard_cursor,
299+
btrfs_block_group_end(block_group),
300+
0, true);
301+
else
302+
btrfs_trim_block_group_extents(block_group, &trimmed,
303+
block_group->discard_cursor,
304+
btrfs_block_group_end(block_group),
305+
0, true);
306+
307+
/* Determine next steps for a block_group */
308+
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
309+
if (discard_state == BTRFS_DISCARD_BITMAPS) {
310+
btrfs_finish_discard_pass(discard_ctl, block_group);
311+
} else {
312+
block_group->discard_cursor = block_group->start;
313+
spin_lock(&discard_ctl->lock);
314+
if (block_group->discard_state !=
315+
BTRFS_DISCARD_RESET_CURSOR)
316+
block_group->discard_state =
317+
BTRFS_DISCARD_BITMAPS;
318+
spin_unlock(&discard_ctl->lock);
319+
}
320+
}
321+
322+
spin_lock(&discard_ctl->lock);
323+
discard_ctl->block_group = NULL;
324+
spin_unlock(&discard_ctl->lock);
273325

274-
btrfs_finish_discard_pass(discard_ctl, block_group);
275326
btrfs_discard_schedule_work(discard_ctl, false);
276327
}
277328

0 commit comments

Comments
 (0)