Skip to content

Commit f84311d

Browse files
hnazgregkh
authored andcommitted
mm: workingset: fix crash in shadow node shrinker caused by replace_page_cache_page()
commit 22f2ac5 upstream. Antonio reports the following crash when using fuse under memory pressure: kernel BUG at /build/linux-a2WvEb/linux-4.4.0/mm/workingset.c:346! invalid opcode: 0000 [#1] SMP Modules linked in: all of them CPU: 2 PID: 63 Comm: kswapd0 Not tainted 4.4.0-36-generic #55-Ubuntu Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3904 04/27/2013 task: ffff88040cae6040 ti: ffff880407488000 task.ti: ffff880407488000 RIP: shadow_lru_isolate+0x181/0x190 Call Trace: __list_lru_walk_one.isra.3+0x8f/0x130 list_lru_walk_one+0x23/0x30 scan_shadow_nodes+0x34/0x50 shrink_slab.part.40+0x1ed/0x3d0 shrink_zone+0x2ca/0x2e0 kswapd+0x51e/0x990 kthread+0xd8/0xf0 ret_from_fork+0x3f/0x70 which corresponds to the following sanity check in the shadow node tracking: BUG_ON(node->count & RADIX_TREE_COUNT_MASK); The workingset code tracks radix tree nodes that exclusively contain shadow entries of evicted pages in them, and this (somewhat obscure) line checks whether there are real pages left that would interfere with reclaim of the radix tree node under memory pressure. While discussing ways how fuse might sneak pages into the radix tree past the workingset code, Miklos pointed to replace_page_cache_page(), and indeed there is a problem there: it properly accounts for the old page being removed - __delete_from_page_cache() does that - but then does a raw raw radix_tree_insert(), not accounting for the replacement page. Eventually the page count bits in node->count underflow while leaving the node incorrectly linked to the shadow node LRU. To address this, make sure replace_page_cache_page() uses the tracked page insertion code, page_cache_tree_insert(). This fixes the page accounting and makes sure page-containing nodes are properly unlinked from the shadow node LRU again. Also, make the sanity checks a bit less obscure by using the helpers for checking the number of pages and shadows in a radix tree node. [[email protected]: backport for 4.4] Fixes: 449dd69 ("mm: keep page cache radix tree nodes in check") Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Johannes Weiner <[email protected]> Reported-by: Antonio SJ Musumeci <[email protected]> Debugged-by: Miklos Szeredi <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]> Signed-off-by: Michal Hocko <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 74688ce commit f84311d

File tree

3 files changed

+49
-49
lines changed

3 files changed

+49
-49
lines changed

include/linux/swap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node)
266266

267267
static inline void workingset_node_pages_dec(struct radix_tree_node *node)
268268
{
269+
VM_BUG_ON(!workingset_node_pages(node));
269270
node->count--;
270271
}
271272

@@ -281,6 +282,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node)
281282

282283
static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
283284
{
285+
VM_BUG_ON(!workingset_node_shadows(node));
284286
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
285287
}
286288

mm/filemap.c

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,48 @@
109109
* ->tasklist_lock (memory_failure, collect_procs_ao)
110110
*/
111111

112+
static int page_cache_tree_insert(struct address_space *mapping,
113+
struct page *page, void **shadowp)
114+
{
115+
struct radix_tree_node *node;
116+
void **slot;
117+
int error;
118+
119+
error = __radix_tree_create(&mapping->page_tree, page->index,
120+
&node, &slot);
121+
if (error)
122+
return error;
123+
if (*slot) {
124+
void *p;
125+
126+
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
127+
if (!radix_tree_exceptional_entry(p))
128+
return -EEXIST;
129+
if (shadowp)
130+
*shadowp = p;
131+
mapping->nrshadows--;
132+
if (node)
133+
workingset_node_shadows_dec(node);
134+
}
135+
radix_tree_replace_slot(slot, page);
136+
mapping->nrpages++;
137+
if (node) {
138+
workingset_node_pages_inc(node);
139+
/*
140+
* Don't track node that contains actual pages.
141+
*
142+
* Avoid acquiring the list_lru lock if already
143+
* untracked. The list_empty() test is safe as
144+
* node->private_list is protected by
145+
* mapping->tree_lock.
146+
*/
147+
if (!list_empty(&node->private_list))
148+
list_lru_del(&workingset_shadow_nodes,
149+
&node->private_list);
150+
}
151+
return 0;
152+
}
153+
112154
static void page_cache_tree_delete(struct address_space *mapping,
113155
struct page *page, void *shadow)
114156
{
@@ -546,7 +588,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
546588
memcg = mem_cgroup_begin_page_stat(old);
547589
spin_lock_irqsave(&mapping->tree_lock, flags);
548590
__delete_from_page_cache(old, NULL, memcg);
549-
error = radix_tree_insert(&mapping->page_tree, offset, new);
591+
error = page_cache_tree_insert(mapping, new, NULL);
550592
BUG_ON(error);
551593
mapping->nrpages++;
552594

@@ -570,48 +612,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
570612
}
571613
EXPORT_SYMBOL_GPL(replace_page_cache_page);
572614

573-
static int page_cache_tree_insert(struct address_space *mapping,
574-
struct page *page, void **shadowp)
575-
{
576-
struct radix_tree_node *node;
577-
void **slot;
578-
int error;
579-
580-
error = __radix_tree_create(&mapping->page_tree, page->index,
581-
&node, &slot);
582-
if (error)
583-
return error;
584-
if (*slot) {
585-
void *p;
586-
587-
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
588-
if (!radix_tree_exceptional_entry(p))
589-
return -EEXIST;
590-
if (shadowp)
591-
*shadowp = p;
592-
mapping->nrshadows--;
593-
if (node)
594-
workingset_node_shadows_dec(node);
595-
}
596-
radix_tree_replace_slot(slot, page);
597-
mapping->nrpages++;
598-
if (node) {
599-
workingset_node_pages_inc(node);
600-
/*
601-
* Don't track node that contains actual pages.
602-
*
603-
* Avoid acquiring the list_lru lock if already
604-
* untracked. The list_empty() test is safe as
605-
* node->private_list is protected by
606-
* mapping->tree_lock.
607-
*/
608-
if (!list_empty(&node->private_list))
609-
list_lru_del(&workingset_shadow_nodes,
610-
&node->private_list);
611-
}
612-
return 0;
613-
}
614-
615615
static int __add_to_page_cache_locked(struct page *page,
616616
struct address_space *mapping,
617617
pgoff_t offset, gfp_t gfp_mask,

mm/workingset.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -341,21 +341,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
341341
* no pages, so we expect to be able to remove them all and
342342
* delete and free the empty node afterwards.
343343
*/
344-
345-
BUG_ON(!node->count);
346-
BUG_ON(node->count & RADIX_TREE_COUNT_MASK);
344+
BUG_ON(!workingset_node_shadows(node));
345+
BUG_ON(workingset_node_pages(node));
347346

348347
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
349348
if (node->slots[i]) {
350349
BUG_ON(!radix_tree_exceptional_entry(node->slots[i]));
351350
node->slots[i] = NULL;
352-
BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
353-
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
351+
workingset_node_shadows_dec(node);
354352
BUG_ON(!mapping->nrshadows);
355353
mapping->nrshadows--;
356354
}
357355
}
358-
BUG_ON(node->count);
356+
BUG_ON(workingset_node_shadows(node));
359357
inc_zone_state(page_zone(virt_to_page(node)), WORKINGSET_NODERECLAIM);
360358
if (!__radix_tree_delete_node(&mapping->page_tree, node))
361359
BUG();

0 commit comments

Comments
 (0)