Skip to content

Commit 3ce94ce

Browse files
YuKuai-huaweiliu-song-6
authored andcommitted
md: fix duplicate filename for rdev
Commit 5792a28 ("[PATCH] md: avoid a deadlock when removing a device from an md array via sysfs") delays the deletion of rdev, however, this introduces a window that rdev can be added again while the deletion is not done yet, and sysfs will complain about duplicate filename. Follow up patches try to fix this problem by flushing workqueue, however, flush_rdev_wq() is just dead code, the progress in md_kick_rdev_from_array(): 1) list_del_rcu(&rdev->same_set); 2) synchronize_rcu(); 3) queue_work(md_rdev_misc_wq, &rdev->del_work); So in flush_rdev_wq(), if rdev is found in the list, work_pending() can never pass, in the meantime, if work is queued, then rdev can never be found in the list. flush_rdev_wq() can be replaced by flush_workqueue() directly, however, this approach is not good: - the workqueue is global, this synchronization for all raid disks is not necessary. - flush_workqueue can't be called under 'reconfig_mutex', there is still a small window between flush_workqueue() and mddev_lock() that other contexts can queue new work, hence the problem is not solved completely. sysfs already has apis to support delete itself through writer, and these apis, specifically sysfs_break/unbreak_active_protection(), is used to support deleting rdev synchronously. Therefore, the above commit can be reverted, and sysfs duplicate filename can be avoided. A new mdadm regression test is proposed as well([1]). [1] https://lore.kernel.org/linux-raid/[email protected]/ Fixes: 5792a28 ("[PATCH] md: avoid a deadlock when removing a device from an md array via sysfs") Signed-off-by: Yu Kuai <[email protected]> Signed-off-by: Song Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent f8b20a4 commit 3ce94ce

File tree

2 files changed

+52
-44
lines changed

2 files changed

+52
-44
lines changed

drivers/md/md.c

Lines changed: 44 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,11 @@ static struct module *md_cluster_mod;
8787
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
8888
static struct workqueue_struct *md_wq;
8989
static struct workqueue_struct *md_misc_wq;
90-
static struct workqueue_struct *md_rdev_misc_wq;
9190

9291
static int remove_and_add_spares(struct mddev *mddev,
9392
struct md_rdev *this);
9493
static void mddev_detach(struct mddev *mddev);
94+
static void export_rdev(struct md_rdev *rdev, struct mddev *mddev);
9595

9696
/*
9797
* Default number of read corrections we'll attempt on an rdev
@@ -643,9 +643,11 @@ void mddev_init(struct mddev *mddev)
643643
{
644644
mutex_init(&mddev->open_mutex);
645645
mutex_init(&mddev->reconfig_mutex);
646+
mutex_init(&mddev->delete_mutex);
646647
mutex_init(&mddev->bitmap_info.mutex);
647648
INIT_LIST_HEAD(&mddev->disks);
648649
INIT_LIST_HEAD(&mddev->all_mddevs);
650+
INIT_LIST_HEAD(&mddev->deleting);
649651
timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
650652
atomic_set(&mddev->active, 1);
651653
atomic_set(&mddev->openers, 0);
@@ -747,6 +749,24 @@ static void mddev_free(struct mddev *mddev)
747749

748750
static const struct attribute_group md_redundancy_group;
749751

752+
static void md_free_rdev(struct mddev *mddev)
753+
{
754+
struct md_rdev *rdev;
755+
struct md_rdev *tmp;
756+
757+
mutex_lock(&mddev->delete_mutex);
758+
if (list_empty(&mddev->deleting))
759+
goto out;
760+
761+
list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
762+
list_del_init(&rdev->same_set);
763+
kobject_del(&rdev->kobj);
764+
export_rdev(rdev, mddev);
765+
}
766+
out:
767+
mutex_unlock(&mddev->delete_mutex);
768+
}
769+
750770
void mddev_unlock(struct mddev *mddev)
751771
{
752772
if (mddev->to_remove) {
@@ -788,6 +808,8 @@ void mddev_unlock(struct mddev *mddev)
788808
} else
789809
mutex_unlock(&mddev->reconfig_mutex);
790810

811+
md_free_rdev(mddev);
812+
791813
/* As we've dropped the mutex we need a spinlock to
792814
* make sure the thread doesn't disappear
793815
*/
@@ -2428,13 +2450,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
24282450
return err;
24292451
}
24302452

2431-
static void rdev_delayed_delete(struct work_struct *ws)
2432-
{
2433-
struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2434-
kobject_del(&rdev->kobj);
2435-
kobject_put(&rdev->kobj);
2436-
}
2437-
24382453
void md_autodetect_dev(dev_t dev);
24392454

24402455
/* just for claiming the bdev */
@@ -2455,6 +2470,8 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev)
24552470

24562471
static void md_kick_rdev_from_array(struct md_rdev *rdev)
24572472
{
2473+
struct mddev *mddev = rdev->mddev;
2474+
24582475
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
24592476
list_del_rcu(&rdev->same_set);
24602477
pr_debug("md: unbind<%pg>\n", rdev->bdev);
@@ -2468,15 +2485,17 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
24682485
rdev->sysfs_unack_badblocks = NULL;
24692486
rdev->sysfs_badblocks = NULL;
24702487
rdev->badblocks.count = 0;
2471-
/* We need to delay this, otherwise we can deadlock when
2472-
* writing to 'remove' to "dev/state". We also need
2473-
* to delay it due to rcu usage.
2474-
*/
2488+
24752489
synchronize_rcu();
2476-
INIT_WORK(&rdev->del_work, rdev_delayed_delete);
2477-
kobject_get(&rdev->kobj);
2478-
queue_work(md_rdev_misc_wq, &rdev->del_work);
2479-
export_rdev(rdev, rdev->mddev);
2490+
2491+
/*
2492+
* kobject_del() will wait for all in progress writers to be done, where
2493+
* reconfig_mutex is held, hence it can't be called under
2494+
* reconfig_mutex and it's delayed to mddev_unlock().
2495+
*/
2496+
mutex_lock(&mddev->delete_mutex);
2497+
list_add(&rdev->same_set, &mddev->deleting);
2498+
mutex_unlock(&mddev->delete_mutex);
24802499
}
24812500

24822501
static void export_array(struct mddev *mddev)
@@ -3544,13 +3563,18 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
35443563
{
35453564
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
35463565
struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3566+
struct kernfs_node *kn = NULL;
35473567
ssize_t rv;
35483568
struct mddev *mddev = rdev->mddev;
35493569

35503570
if (!entry->store)
35513571
return -EIO;
35523572
if (!capable(CAP_SYS_ADMIN))
35533573
return -EACCES;
3574+
3575+
if (entry->store == state_store && cmd_match(page, "remove"))
3576+
kn = sysfs_break_active_protection(kobj, attr);
3577+
35543578
rv = mddev ? mddev_lock(mddev) : -ENODEV;
35553579
if (!rv) {
35563580
if (rdev->mddev == NULL)
@@ -3559,6 +3583,10 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
35593583
rv = entry->store(rdev, page, length);
35603584
mddev_unlock(mddev);
35613585
}
3586+
3587+
if (kn)
3588+
sysfs_unbreak_active_protection(kn);
3589+
35623590
return rv;
35633591
}
35643592

@@ -4484,20 +4512,6 @@ null_show(struct mddev *mddev, char *page)
44844512
return -EINVAL;
44854513
}
44864514

4487-
/* need to ensure rdev_delayed_delete() has completed */
4488-
static void flush_rdev_wq(struct mddev *mddev)
4489-
{
4490-
struct md_rdev *rdev;
4491-
4492-
rcu_read_lock();
4493-
rdev_for_each_rcu(rdev, mddev)
4494-
if (work_pending(&rdev->del_work)) {
4495-
flush_workqueue(md_rdev_misc_wq);
4496-
break;
4497-
}
4498-
rcu_read_unlock();
4499-
}
4500-
45014515
static ssize_t
45024516
new_dev_store(struct mddev *mddev, const char *buf, size_t len)
45034517
{
@@ -4525,7 +4539,6 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
45254539
minor != MINOR(dev))
45264540
return -EOVERFLOW;
45274541

4528-
flush_rdev_wq(mddev);
45294542
err = mddev_lock(mddev);
45304543
if (err)
45314544
return err;
@@ -5595,7 +5608,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
55955608
* removed (mddev_delayed_delete).
55965609
*/
55975610
flush_workqueue(md_misc_wq);
5598-
flush_workqueue(md_rdev_misc_wq);
55995611

56005612
mutex_lock(&disks_mutex);
56015613
mddev = mddev_alloc(dev);
@@ -7558,9 +7570,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
75587570

75597571
}
75607572

7561-
if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
7562-
flush_rdev_wq(mddev);
7563-
75647573
if (cmd == HOT_REMOVE_DISK)
75657574
/* need to ensure recovery thread has run */
75667575
wait_event_interruptible_timeout(mddev->sb_wait,
@@ -9623,10 +9632,6 @@ static int __init md_init(void)
96239632
if (!md_misc_wq)
96249633
goto err_misc_wq;
96259634

9626-
md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
9627-
if (!md_rdev_misc_wq)
9628-
goto err_rdev_misc_wq;
9629-
96309635
ret = __register_blkdev(MD_MAJOR, "md", md_probe);
96319636
if (ret < 0)
96329637
goto err_md;
@@ -9645,8 +9650,6 @@ static int __init md_init(void)
96459650
err_mdp:
96469651
unregister_blkdev(MD_MAJOR, "md");
96479652
err_md:
9648-
destroy_workqueue(md_rdev_misc_wq);
9649-
err_rdev_misc_wq:
96509653
destroy_workqueue(md_misc_wq);
96519654
err_misc_wq:
96529655
destroy_workqueue(md_wq);
@@ -9942,7 +9945,6 @@ static __exit void md_exit(void)
99429945
}
99439946
spin_unlock(&all_mddevs_lock);
99449947

9945-
destroy_workqueue(md_rdev_misc_wq);
99469948
destroy_workqueue(md_misc_wq);
99479949
destroy_workqueue(md_wq);
99489950
}

drivers/md/md.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,6 @@ struct md_rdev {
122122

123123
struct serial_in_rdev *serial; /* used for raid1 io serialization */
124124

125-
struct work_struct del_work; /* used for delayed sysfs removal */
126-
127125
struct kernfs_node *sysfs_state; /* handle for 'state'
128126
* sysfs entry */
129127
/* handle for 'unacknowledged_bad_blocks' sysfs dentry */
@@ -531,6 +529,14 @@ struct mddev {
531529
unsigned int good_device_nr; /* good device num within cluster raid */
532530
unsigned int noio_flag; /* for memalloc scope API */
533531

532+
/*
533+
* Temporarily store rdev that will be finally removed when
534+
* reconfig_mutex is unlocked.
535+
*/
536+
struct list_head deleting;
537+
/* Protect the deleting list */
538+
struct mutex delete_mutex;
539+
534540
bool has_superblocks:1;
535541
bool fail_last_dev:1;
536542
bool serialize_policy:1;

0 commit comments

Comments
 (0)