Skip to content

Commit 8575ec2

Browse files
Hugh Dickinstorvalds
Hugh Dickins
authored andcommitted
compact_pgdat: workaround lockdep warning in kswapd
I get this lockdep warning from swapping load on linux-next, due to "vmscan: kswapd carefully call compaction". ================================= [ INFO: inconsistent lock state ] 3.3.0-rc2-next-20120201 #5 Not tainted --------------------------------- inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. kswapd0/28 [HC0[0]:SC0[0]:HE1:SE1] takes: (pcpu_alloc_mutex){+.+.?.}, at: [<ffffffff810d6684>] pcpu_alloc+0x67/0x325 {RECLAIM_FS-ON-W} state was registered at: [<ffffffff81099b75>] mark_held_locks+0xd7/0x103 [<ffffffff8109a13c>] lockdep_trace_alloc+0x85/0x9e [<ffffffff810f6bdc>] __kmalloc+0x6c/0x14b [<ffffffff810d57fd>] pcpu_mem_zalloc+0x59/0x62 [<ffffffff810d5d16>] pcpu_extend_area_map+0x26/0xb1 [<ffffffff810d679f>] pcpu_alloc+0x182/0x325 [<ffffffff810d694d>] __alloc_percpu+0xb/0xd [<ffffffff8142ebfd>] snmp_mib_init+0x1e/0x2e [<ffffffff8185cd8d>] ipv4_mib_init_net+0x7a/0x184 [<ffffffff813dc963>] ops_init.clone.0+0x6b/0x73 [<ffffffff813dc9cc>] register_pernet_operations+0x61/0xa0 [<ffffffff813dca8e>] register_pernet_subsys+0x29/0x42 [<ffffffff8185d044>] inet_init+0x1ad/0x252 [<ffffffff810002e3>] do_one_initcall+0x7a/0x12f [<ffffffff81832bc5>] kernel_init+0x9d/0x11e [<ffffffff814e51e4>] kernel_thread_helper+0x4/0x10 irq event stamp: 656613 hardirqs last enabled at (656613): [<ffffffff814e0ddc>] __mutex_unlock_slowpath+0x104/0x128 hardirqs last disabled at (656612): [<ffffffff814e0d34>] __mutex_unlock_slowpath+0x5c/0x128 softirqs last enabled at (655568): [<ffffffff8105b4a5>] __do_softirq+0x120/0x136 softirqs last disabled at (654757): [<ffffffff814e52dc>] call_softirq+0x1c/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pcpu_alloc_mutex); <Interrupt> lock(pcpu_alloc_mutex); *** DEADLOCK *** no locks held by kswapd0/28. stack backtrace: Pid: 28, comm: kswapd0 Not tainted 3.3.0-rc2-next-20120201 #5 Call Trace: [<ffffffff810981f4>] print_usage_bug+0x1bf/0x1d0 [<ffffffff81096c3e>] ? print_irq_inversion_bug+0x1d9/0x1d9 [<ffffffff810982c0>] mark_lock_irq+0xbb/0x22e [<ffffffff810c5399>] ? free_hot_cold_page+0x13d/0x14f [<ffffffff81098684>] mark_lock+0x251/0x331 [<ffffffff81098893>] mark_irqflags+0x12f/0x141 [<ffffffff81098e32>] __lock_acquire+0x58d/0x753 [<ffffffff810d6684>] ? pcpu_alloc+0x67/0x325 [<ffffffff81099433>] lock_acquire+0x54/0x6a [<ffffffff810d6684>] ? pcpu_alloc+0x67/0x325 [<ffffffff8107a5b8>] ? add_preempt_count+0xa9/0xae [<ffffffff814e0a21>] mutex_lock_nested+0x5e/0x315 [<ffffffff810d6684>] ? pcpu_alloc+0x67/0x325 [<ffffffff81098f81>] ? __lock_acquire+0x6dc/0x753 [<ffffffff810c9fb0>] ? __pagevec_release+0x2c/0x2c [<ffffffff810d6684>] pcpu_alloc+0x67/0x325 [<ffffffff810c9fb0>] ? __pagevec_release+0x2c/0x2c [<ffffffff810d694d>] __alloc_percpu+0xb/0xd [<ffffffff8106c35e>] schedule_on_each_cpu+0x23/0x110 [<ffffffff810c9fcb>] lru_add_drain_all+0x10/0x12 [<ffffffff810f126f>] __compact_pgdat+0x20/0x182 [<ffffffff810f15c2>] compact_pgdat+0x27/0x29 [<ffffffff810c306b>] ? zone_watermark_ok+0x1a/0x1c [<ffffffff810cdf6f>] balance_pgdat+0x732/0x751 [<ffffffff810ce0ed>] kswapd+0x15f/0x178 [<ffffffff810cdf8e>] ? balance_pgdat+0x751/0x751 [<ffffffff8106fd11>] kthread+0x84/0x8c [<ffffffff814e51e4>] kernel_thread_helper+0x4/0x10 [<ffffffff810787ed>] ? finish_task_switch+0x85/0xea [<ffffffff814e3861>] ? retint_restore_args+0xe/0xe [<ffffffff8106fc8d>] ? __init_kthread_worker+0x56/0x56 [<ffffffff814e51e0>] ? gs_change+0xb/0xb The RECLAIM_FS notations indicate that it's doing the GFP_FS checking that Nick hacked into lockdep a while back: I think we're intended to read that "<Interrupt>" in the DEADLOCK scenario as "<Direct reclaim>". I'm hazy, I have not reached any conclusion as to whether it's right to complain or not; but I believe it's uneasy about kswapd now doing the mutex_lock(&pcpu_alloc_mutex) which lru_add_drain_all() entails. Nor have I reached any conclusion as to whether it's important for kswapd to do that draining or not. But so as not to get blocked on this, with lockdep disabled from giving further reports, here's a patch which removes the lru_add_drain_all() from kswapd's callpath (and calls it only once from compact_nodes(), instead of once per node). Signed-off-by: Hugh Dickins <[email protected]> Cc: Rik van Riel <[email protected]> Acked-by: Mel Gorman <[email protected]> Cc: Tejun Heo <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent aff6224 commit 8575ec2

File tree

1 file changed

+12
-10
lines changed

1 file changed

+12
-10
lines changed

mm/compaction.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -680,9 +680,6 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
680680
int zoneid;
681681
struct zone *zone;
682682

683-
/* Flush pending updates to the LRU lists */
684-
lru_add_drain_all();
685-
686683
for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
687684

688685
zone = &pgdat->node_zones[zoneid];
@@ -727,24 +724,22 @@ int compact_pgdat(pg_data_t *pgdat, int order)
727724

728725
static int compact_node(int nid)
729726
{
730-
pg_data_t *pgdat;
731727
struct compact_control cc = {
732728
.order = -1,
733729
.sync = true,
734730
};
735731

736-
if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
737-
return -EINVAL;
738-
pgdat = NODE_DATA(nid);
739-
740-
return __compact_pgdat(pgdat, &cc);
732+
return __compact_pgdat(NODE_DATA(nid), &cc);
741733
}
742734

743735
/* Compact all nodes in the system */
744736
static int compact_nodes(void)
745737
{
746738
int nid;
747739

740+
/* Flush pending updates to the LRU lists */
741+
lru_add_drain_all();
742+
748743
for_each_online_node(nid)
749744
compact_node(nid);
750745

@@ -777,7 +772,14 @@ ssize_t sysfs_compact_node(struct device *dev,
777772
struct device_attribute *attr,
778773
const char *buf, size_t count)
779774
{
780-
compact_node(dev->id);
775+
int nid = dev->id;
776+
777+
if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
778+
/* Flush pending updates to the LRU lists */
779+
lru_add_drain_all();
780+
781+
compact_node(nid);
782+
}
781783

782784
return count;
783785
}

0 commit comments

Comments
 (0)