Skip to content

Commit 29efc39

Browse files
committed
md/md0: optimize raid0 discard handling
There are complaints that raid0 discard handling is slow. Currently we divide discard request into chunks and dispatch to underlayer disks. The block layer will do merge to form big requests. This causes a lot of request split/merge and uses significant CPU time. A simple idea is to calculate the range for each raid disk for an IO request and send a discard request to raid disks, which will avoid the split/merge completely. Previously Coly tried the approach, but the implementation was too complex because of raid0 zones. This patch always split bio in zone boundary and handle bio within one zone. It simplifies the implementation a lot. Reviewed-by: NeilBrown <[email protected]> Acked-by: Coly Li <[email protected]> Signed-off-by: Shaohua Li <[email protected]>
1 parent 2214c26 commit 29efc39

File tree

1 file changed

+102
-14
lines changed

1 file changed

+102
-14
lines changed

drivers/md/raid0.c

+102-14
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ static int raid0_run(struct mddev *mddev)
385385
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
386386
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
387387
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
388-
blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
388+
blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
389389

390390
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
391391
blk_queue_io_opt(mddev->queue,
@@ -459,6 +459,95 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
459459
}
460460
}
461461

462+
static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
463+
{
464+
struct r0conf *conf = mddev->private;
465+
struct strip_zone *zone;
466+
sector_t start = bio->bi_iter.bi_sector;
467+
sector_t end;
468+
unsigned int stripe_size;
469+
sector_t first_stripe_index, last_stripe_index;
470+
sector_t start_disk_offset;
471+
unsigned int start_disk_index;
472+
sector_t end_disk_offset;
473+
unsigned int end_disk_index;
474+
unsigned int disk;
475+
476+
zone = find_zone(conf, &start);
477+
478+
if (bio_end_sector(bio) > zone->zone_end) {
479+
struct bio *split = bio_split(bio,
480+
zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
481+
mddev->bio_set);
482+
bio_chain(split, bio);
483+
generic_make_request(bio);
484+
bio = split;
485+
end = zone->zone_end;
486+
} else
487+
end = bio_end_sector(bio);
488+
489+
if (zone != conf->strip_zone)
490+
end = end - zone[-1].zone_end;
491+
492+
/* Now start and end is the offset in zone */
493+
stripe_size = zone->nb_dev * mddev->chunk_sectors;
494+
495+
first_stripe_index = start;
496+
sector_div(first_stripe_index, stripe_size);
497+
last_stripe_index = end;
498+
sector_div(last_stripe_index, stripe_size);
499+
500+
start_disk_index = (int)(start - first_stripe_index * stripe_size) /
501+
mddev->chunk_sectors;
502+
start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
503+
mddev->chunk_sectors) +
504+
first_stripe_index * mddev->chunk_sectors;
505+
end_disk_index = (int)(end - last_stripe_index * stripe_size) /
506+
mddev->chunk_sectors;
507+
end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
508+
mddev->chunk_sectors) +
509+
last_stripe_index * mddev->chunk_sectors;
510+
511+
for (disk = 0; disk < zone->nb_dev; disk++) {
512+
sector_t dev_start, dev_end;
513+
struct bio *discard_bio = NULL;
514+
struct md_rdev *rdev;
515+
516+
if (disk < start_disk_index)
517+
dev_start = (first_stripe_index + 1) *
518+
mddev->chunk_sectors;
519+
else if (disk > start_disk_index)
520+
dev_start = first_stripe_index * mddev->chunk_sectors;
521+
else
522+
dev_start = start_disk_offset;
523+
524+
if (disk < end_disk_index)
525+
dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
526+
else if (disk > end_disk_index)
527+
dev_end = last_stripe_index * mddev->chunk_sectors;
528+
else
529+
dev_end = end_disk_offset;
530+
531+
if (dev_end <= dev_start)
532+
continue;
533+
534+
rdev = conf->devlist[(zone - conf->strip_zone) *
535+
conf->strip_zone[0].nb_dev + disk];
536+
if (__blkdev_issue_discard(rdev->bdev,
537+
dev_start + zone->dev_start + rdev->data_offset,
538+
dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
539+
!discard_bio)
540+
continue;
541+
bio_chain(discard_bio, bio);
542+
if (mddev->gendisk)
543+
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
544+
discard_bio, disk_devt(mddev->gendisk),
545+
bio->bi_iter.bi_sector);
546+
generic_make_request(discard_bio);
547+
}
548+
bio_endio(bio);
549+
}
550+
462551
static void raid0_make_request(struct mddev *mddev, struct bio *bio)
463552
{
464553
struct strip_zone *zone;
@@ -473,6 +562,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
473562
return;
474563
}
475564

565+
if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
566+
raid0_handle_discard(mddev, bio);
567+
return;
568+
}
569+
476570
bio_sector = bio->bi_iter.bi_sector;
477571
sector = bio_sector;
478572
chunk_sects = mddev->chunk_sectors;
@@ -498,19 +592,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
498592
bio->bi_iter.bi_sector = sector + zone->dev_start +
499593
tmp_dev->data_offset;
500594

501-
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
502-
!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
503-
/* Just ignore it */
504-
bio_endio(bio);
505-
} else {
506-
if (mddev->gendisk)
507-
trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
508-
bio, disk_devt(mddev->gendisk),
509-
bio_sector);
510-
mddev_check_writesame(mddev, bio);
511-
mddev_check_write_zeroes(mddev, bio);
512-
generic_make_request(bio);
513-
}
595+
if (mddev->gendisk)
596+
trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
597+
bio, disk_devt(mddev->gendisk),
598+
bio_sector);
599+
mddev_check_writesame(mddev, bio);
600+
mddev_check_write_zeroes(mddev, bio);
601+
generic_make_request(bio);
514602
}
515603

516604
static void raid0_status(struct seq_file *seq, struct mddev *mddev)

0 commit comments

Comments
 (0)