Skip to content

Commit 7ceb17e

Browse files
committed
md: Allow devices to be re-added to a read-only array.
When assembling an array incrementally we might want to make it device available when "enough" devices are present, but maybe not "all" devices are present. If the remaining devices appear before the array is actually used, they should be added transparently. We do this by using the "read-auto" mode where the array acts like it is read-only until a write request arrives. Current an add-device request switches a read-auto array to active. This means that only one device can be added after the array is first made read-auto. This isn't a problem for RAID5, but is not ideal for RAID6 or RAID10. Also we don't really want to switch the array to read-auto at all when re-adding a device as this doesn't really imply any change. So: - remove the "md_update_sb()" call from add_new_disk(). This isn't really needed as just adding a disk doesn't require a metadata update. Instead, just set MD_CHANGE_DEVS. This will effect a metadata update soon enough, once the array is not read-only. - Allow the ADD_NEW_DISK ioctl to succeed without activating a read-auto array, providing the MD_DISK_SYNC flag is set. In this case, the device will be rejected if it cannot be added with the correct device number, or has an incorrect event count. - Teach remove_and_add_spares() to be careful about adding spares when the array is read-only (or read-mostly) - only add devices that are thought to be in-sync, and only do it if the array is in-sync itself. - In md_check_recovery, use remove_and_add_spares in the read-only case, rather than open coding just the 'remove' part of it. Reported-by: Martin Wilck <[email protected]> Signed-off-by: NeilBrown <[email protected]>
1 parent 7e83ccb commit 7ceb17e

File tree

1 file changed

+57
-26
lines changed

1 file changed

+57
-26
lines changed

drivers/md/md.c

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5816,7 +5816,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
58165816
else
58175817
sysfs_notify_dirent_safe(rdev->sysfs_state);
58185818

5819-
md_update_sb(mddev, 1);
5819+
set_bit(MD_CHANGE_DEVS, &mddev->flags);
58205820
if (mddev->degraded)
58215821
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
58225822
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -6503,6 +6503,24 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
65036503
err = hot_remove_disk(mddev, new_decode_dev(arg));
65046504
goto done_unlock;
65056505

6506+
case ADD_NEW_DISK:
6507+
/* We can support ADD_NEW_DISK on read-only arrays
6508+
* on if we are re-adding a preexisting device.
6509+
* So require mddev->pers and MD_DISK_SYNC.
6510+
*/
6511+
if (mddev->pers) {
6512+
mdu_disk_info_t info;
6513+
if (copy_from_user(&info, argp, sizeof(info)))
6514+
err = -EFAULT;
6515+
else if (!(info.state & (1<<MD_DISK_SYNC)))
6516+
/* Need to clear read-only for this */
6517+
break;
6518+
else
6519+
err = add_new_disk(mddev, &info);
6520+
goto done_unlock;
6521+
}
6522+
break;
6523+
65066524
case BLKROSET:
65076525
if (get_user(ro, (int __user *)(arg))) {
65086526
err = -EFAULT;
@@ -7685,17 +7703,36 @@ static int remove_and_add_spares(struct mddev *mddev,
76857703
!test_bit(In_sync, &rdev->flags) &&
76867704
!test_bit(Faulty, &rdev->flags))
76877705
spares++;
7688-
if (rdev->raid_disk < 0
7689-
&& !test_bit(Faulty, &rdev->flags)) {
7690-
rdev->recovery_offset = 0;
7691-
if (mddev->pers->
7692-
hot_add_disk(mddev, rdev) == 0) {
7693-
if (sysfs_link_rdev(mddev, rdev))
7694-
/* failure here is OK */;
7695-
spares++;
7696-
md_new_event(mddev);
7697-
set_bit(MD_CHANGE_DEVS, &mddev->flags);
7698-
}
7706+
if (rdev->raid_disk >= 0)
7707+
continue;
7708+
if (test_bit(Faulty, &rdev->flags))
7709+
continue;
7710+
if (mddev->ro &&
7711+
rdev->saved_raid_disk < 0)
7712+
continue;
7713+
7714+
rdev->recovery_offset = 0;
7715+
if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
7716+
spin_lock_irq(&mddev->write_lock);
7717+
if (mddev->in_sync)
7718+
/* OK, this device, which is in_sync,
7719+
* will definitely be noticed before
7720+
* the next write, so recovery isn't
7721+
* needed.
7722+
*/
7723+
rdev->recovery_offset = mddev->recovery_cp;
7724+
spin_unlock_irq(&mddev->write_lock);
7725+
}
7726+
if (mddev->ro && rdev->recovery_offset != MaxSector)
7727+
/* not safe to add this disk now */
7728+
continue;
7729+
if (mddev->pers->
7730+
hot_add_disk(mddev, rdev) == 0) {
7731+
if (sysfs_link_rdev(mddev, rdev))
7732+
/* failure here is OK */;
7733+
spares++;
7734+
md_new_event(mddev);
7735+
set_bit(MD_CHANGE_DEVS, &mddev->flags);
76997736
}
77007737
}
77017738
no_add:
@@ -7804,22 +7841,16 @@ void md_check_recovery(struct mddev *mddev)
78047841
int spares = 0;
78057842

78067843
if (mddev->ro) {
7807-
/* Only thing we do on a ro array is remove
7808-
* failed devices.
7844+
/* On a read-only array we can:
7845+
* - remove failed devices
7846+
* - add already-in_sync devices if the array itself
7847+
* is in-sync.
7848+
* As we only add devices that are already in-sync,
7849+
* we can activate the spares immediately.
78097850
*/
7810-
struct md_rdev *rdev;
7811-
rdev_for_each(rdev, mddev)
7812-
if (rdev->raid_disk >= 0 &&
7813-
!test_bit(Blocked, &rdev->flags) &&
7814-
test_bit(Faulty, &rdev->flags) &&
7815-
atomic_read(&rdev->nr_pending)==0) {
7816-
if (mddev->pers->hot_remove_disk(
7817-
mddev, rdev) == 0) {
7818-
sysfs_unlink_rdev(mddev, rdev);
7819-
rdev->raid_disk = -1;
7820-
}
7821-
}
78227851
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7852+
remove_and_add_spares(mddev, NULL);
7853+
mddev->pers->spare_active(mddev);
78237854
goto unlock;
78247855
}
78257856

0 commit comments

Comments
 (0)