Skip to content

Commit 912afc3

Browse files
committed
Merge tag 'dm-3.5-changes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull device-mapper updates from Alasdair G Kergon: "Improve multipath's retrying mechanism in some defined circumstances and provide a simple reserve/release mechanism for userspace tools to access thin provisioning metadata while the pool is in use." * tag 'dm-3.5-changes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: dm thin: provide userspace access to pool metadata dm thin: use slab mempools dm mpath: allow ioctls to trigger pg init dm mpath: delay retry of bypassed pg dm mpath: reduce size of struct multipath
2 parents 4fc3acf + cc8394d commit 912afc3

File tree

6 files changed

+322
-90
lines changed

6 files changed

+322
-90
lines changed

Documentation/device-mapper/thin-provisioning.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,17 @@ iii) Messages
287287
the current transaction id is when you change it with this
288288
compare-and-swap message.
289289

290+
reserve_metadata_snap
291+
292+
Reserve a copy of the data mapping btree for use by userland.
293+
This allows userland to inspect the mappings as they were when
294+
this message was executed. Use the pool's status command to
295+
get the root block associated with the metadata snapshot.
296+
297+
release_metadata_snap
298+
299+
Release a previously reserved copy of the data mapping btree.
300+
290301
'thin' target
291302
-------------
292303

drivers/md/dm-mpath.c

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/slab.h>
1919
#include <linux/time.h>
2020
#include <linux/workqueue.h>
21+
#include <linux/delay.h>
2122
#include <scsi/scsi_dh.h>
2223
#include <linux/atomic.h>
2324

@@ -61,11 +62,11 @@ struct multipath {
6162
struct list_head list;
6263
struct dm_target *ti;
6364

64-
spinlock_t lock;
65-
6665
const char *hw_handler_name;
6766
char *hw_handler_params;
6867

68+
spinlock_t lock;
69+
6970
unsigned nr_priority_groups;
7071
struct list_head priority_groups;
7172

@@ -81,16 +82,17 @@ struct multipath {
8182
struct priority_group *next_pg; /* Switch to this PG if set */
8283
unsigned repeat_count; /* I/Os left before calling PS again */
8384

84-
unsigned queue_io; /* Must we queue all I/O? */
85-
unsigned queue_if_no_path; /* Queue I/O if last path fails? */
86-
unsigned saved_queue_if_no_path;/* Saved state during suspension */
85+
unsigned queue_io:1; /* Must we queue all I/O? */
86+
unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */
87+
unsigned saved_queue_if_no_path:1; /* Saved state during suspension */
88+
8789
unsigned pg_init_retries; /* Number of times to retry pg_init */
8890
unsigned pg_init_count; /* Number of times pg_init called */
8991
unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
9092

93+
unsigned queue_size;
9194
struct work_struct process_queued_ios;
9295
struct list_head queued_ios;
93-
unsigned queue_size;
9496

9597
struct work_struct trigger_event;
9698

@@ -328,14 +330,18 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
328330
/*
329331
* Loop through priority groups until we find a valid path.
330332
* First time we skip PGs marked 'bypassed'.
331-
* Second time we only try the ones we skipped.
333+
* Second time we only try the ones we skipped, but set
334+
* pg_init_delay_retry so we do not hammer controllers.
332335
*/
333336
do {
334337
list_for_each_entry(pg, &m->priority_groups, list) {
335338
if (pg->bypassed == bypassed)
336339
continue;
337-
if (!__choose_path_in_pg(m, pg, nr_bytes))
340+
if (!__choose_path_in_pg(m, pg, nr_bytes)) {
341+
if (!bypassed)
342+
m->pg_init_delay_retry = 1;
338343
return;
344+
}
339345
}
340346
} while (bypassed--);
341347

@@ -481,9 +487,6 @@ static void process_queued_ios(struct work_struct *work)
481487

482488
spin_lock_irqsave(&m->lock, flags);
483489

484-
if (!m->queue_size)
485-
goto out;
486-
487490
if (!m->current_pgpath)
488491
__choose_pgpath(m, 0);
489492

@@ -496,7 +499,6 @@ static void process_queued_ios(struct work_struct *work)
496499
if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
497500
__pg_init_all_paths(m);
498501

499-
out:
500502
spin_unlock_irqrestore(&m->lock, flags);
501503
if (!must_queue)
502504
dispatch_queued_ios(m);
@@ -1517,11 +1519,16 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
15171519
static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
15181520
unsigned long arg)
15191521
{
1520-
struct multipath *m = (struct multipath *) ti->private;
1521-
struct block_device *bdev = NULL;
1522-
fmode_t mode = 0;
1522+
struct multipath *m = ti->private;
1523+
struct block_device *bdev;
1524+
fmode_t mode;
15231525
unsigned long flags;
1524-
int r = 0;
1526+
int r;
1527+
1528+
again:
1529+
bdev = NULL;
1530+
mode = 0;
1531+
r = 0;
15251532

15261533
spin_lock_irqsave(&m->lock, flags);
15271534

@@ -1546,6 +1553,12 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
15461553
if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
15471554
r = scsi_verify_blk_ioctl(NULL, cmd);
15481555

1556+
if (r == -EAGAIN && !fatal_signal_pending(current)) {
1557+
queue_work(kmultipathd, &m->process_queued_ios);
1558+
msleep(10);
1559+
goto again;
1560+
}
1561+
15491562
return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
15501563
}
15511564

@@ -1643,7 +1656,7 @@ static int multipath_busy(struct dm_target *ti)
16431656
*---------------------------------------------------------------*/
16441657
static struct target_type multipath_target = {
16451658
.name = "multipath",
1646-
.version = {1, 3, 0},
1659+
.version = {1, 4, 0},
16471660
.module = THIS_MODULE,
16481661
.ctr = multipath_ctr,
16491662
.dtr = multipath_dtr,

drivers/md/dm-thin-metadata.c

Lines changed: 130 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,31 +1082,155 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
10821082
return 0;
10831083
}
10841084

1085-
static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
1086-
dm_block_t *result)
1085+
static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
1086+
{
1087+
int r, inc;
1088+
struct thin_disk_superblock *disk_super;
1089+
struct dm_block *copy, *sblock;
1090+
dm_block_t held_root;
1091+
1092+
/*
1093+
* Copy the superblock.
1094+
*/
1095+
dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
1096+
r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
1097+
&sb_validator, &copy, &inc);
1098+
if (r)
1099+
return r;
1100+
1101+
BUG_ON(!inc);
1102+
1103+
held_root = dm_block_location(copy);
1104+
disk_super = dm_block_data(copy);
1105+
1106+
if (le64_to_cpu(disk_super->held_root)) {
1107+
DMWARN("Pool metadata snapshot already exists: release this before taking another.");
1108+
1109+
dm_tm_dec(pmd->tm, held_root);
1110+
dm_tm_unlock(pmd->tm, copy);
1111+
pmd->need_commit = 1;
1112+
1113+
return -EBUSY;
1114+
}
1115+
1116+
/*
1117+
* Wipe the spacemap since we're not publishing this.
1118+
*/
1119+
memset(&disk_super->data_space_map_root, 0,
1120+
sizeof(disk_super->data_space_map_root));
1121+
memset(&disk_super->metadata_space_map_root, 0,
1122+
sizeof(disk_super->metadata_space_map_root));
1123+
1124+
/*
1125+
* Increment the data structures that need to be preserved.
1126+
*/
1127+
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
1128+
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
1129+
dm_tm_unlock(pmd->tm, copy);
1130+
1131+
/*
1132+
* Write the held root into the superblock.
1133+
*/
1134+
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1135+
&sb_validator, &sblock);
1136+
if (r) {
1137+
dm_tm_dec(pmd->tm, held_root);
1138+
pmd->need_commit = 1;
1139+
return r;
1140+
}
1141+
1142+
disk_super = dm_block_data(sblock);
1143+
disk_super->held_root = cpu_to_le64(held_root);
1144+
dm_bm_unlock(sblock);
1145+
1146+
pmd->need_commit = 1;
1147+
1148+
return 0;
1149+
}
1150+
1151+
int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
1152+
{
1153+
int r;
1154+
1155+
down_write(&pmd->root_lock);
1156+
r = __reserve_metadata_snap(pmd);
1157+
up_write(&pmd->root_lock);
1158+
1159+
return r;
1160+
}
1161+
1162+
static int __release_metadata_snap(struct dm_pool_metadata *pmd)
10871163
{
10881164
int r;
10891165
struct thin_disk_superblock *disk_super;
1090-
struct dm_block *sblock;
1166+
struct dm_block *sblock, *copy;
1167+
dm_block_t held_root;
10911168

10921169
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
10931170
&sb_validator, &sblock);
10941171
if (r)
10951172
return r;
10961173

1174+
disk_super = dm_block_data(sblock);
1175+
held_root = le64_to_cpu(disk_super->held_root);
1176+
disk_super->held_root = cpu_to_le64(0);
1177+
pmd->need_commit = 1;
1178+
1179+
dm_bm_unlock(sblock);
1180+
1181+
if (!held_root) {
1182+
DMWARN("No pool metadata snapshot found: nothing to release.");
1183+
return -EINVAL;
1184+
}
1185+
1186+
r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, &copy);
1187+
if (r)
1188+
return r;
1189+
1190+
disk_super = dm_block_data(copy);
1191+
dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
1192+
dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
1193+
dm_sm_dec_block(pmd->metadata_sm, held_root);
1194+
1195+
return dm_tm_unlock(pmd->tm, copy);
1196+
}
1197+
1198+
int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
1199+
{
1200+
int r;
1201+
1202+
down_write(&pmd->root_lock);
1203+
r = __release_metadata_snap(pmd);
1204+
up_write(&pmd->root_lock);
1205+
1206+
return r;
1207+
}
1208+
1209+
static int __get_metadata_snap(struct dm_pool_metadata *pmd,
1210+
dm_block_t *result)
1211+
{
1212+
int r;
1213+
struct thin_disk_superblock *disk_super;
1214+
struct dm_block *sblock;
1215+
1216+
r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1217+
&sb_validator, &sblock);
1218+
if (r)
1219+
return r;
1220+
10971221
disk_super = dm_block_data(sblock);
10981222
*result = le64_to_cpu(disk_super->held_root);
10991223

11001224
return dm_bm_unlock(sblock);
11011225
}
11021226

1103-
int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
1104-
dm_block_t *result)
1227+
int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
1228+
dm_block_t *result)
11051229
{
11061230
int r;
11071231

11081232
down_read(&pmd->root_lock);
1109-
r = __get_held_metadata_root(pmd, result);
1233+
r = __get_metadata_snap(pmd, result);
11101234
up_read(&pmd->root_lock);
11111235

11121236
return r;

drivers/md/dm-thin-metadata.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,18 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
9090

9191
/*
9292
* Hold/get root for userspace transaction.
93+
*
94+
* The metadata snapshot is a copy of the current superblock (minus the
95+
* space maps). Userland can access the data structures for READ
96+
* operations only. A small performance hit is incurred by providing this
97+
* copy of the metadata to userland due to extra copy-on-write operations
98+
* on the metadata nodes. Release this as soon as you finish with it.
9399
*/
94-
int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd);
100+
int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd);
101+
int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd);
95102

96-
int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
97-
dm_block_t *result);
103+
int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
104+
dm_block_t *result);
98105

99106
/*
100107
* Actions on a single virtual device.

0 commit comments

Comments
 (0)