Skip to content

Commit 0eb77e9

Browse files
Christoph Lametertorvalds
Christoph Lameter
authored andcommitted
vmstat: make vmstat_updater deferrable again and shut down on idle
Currently the vmstat updater is not deferrable as a result of commit ba4877b ("vmstat: do not use deferrable delayed work for vmstat_update"). This in turn can cause multiple interruptions of the applications because the vmstat updater may run at Make vmstate_update deferrable again and provide a function that folds the differentials when the processor is going to idle mode thus addressing the issue of the above commit in a clean way. Note that the shepherd thread will continue scanning the differentials from another processor and will reenable the vmstat workers if it detects any changes. Fixes: ba4877b ("vmstat: do not use deferrable delayed work for vmstat_update") Signed-off-by: Christoph Lameter <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Tetsuo Handa <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 686739f commit 0eb77e9

File tree

3 files changed

+47
-25
lines changed

3 files changed

+47
-25
lines changed

include/linux/vmstat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
189189
extern void dec_zone_state(struct zone *, enum zone_stat_item);
190190
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
191191

192+
void quiet_vmstat(void);
192193
void cpu_vm_stats_fold(int cpu);
193194
void refresh_zone_stat_thresholds(void);
194195

@@ -249,6 +250,7 @@ static inline void __dec_zone_page_state(struct page *page,
249250

250251
static inline void refresh_zone_stat_thresholds(void) { }
251252
static inline void cpu_vm_stats_fold(int cpu) { }
253+
static inline void quiet_vmstat(void) { }
252254

253255
static inline void drain_zonestat(struct zone *zone,
254256
struct per_cpu_pageset *pset) { }

kernel/sched/idle.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ static void cpu_idle_loop(void)
219219
*/
220220

221221
__current_set_polling();
222+
quiet_vmstat();
222223
tick_nohz_idle_enter();
223224

224225
while (!need_resched()) {

mm/vmstat.c

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ static int fold_diff(int *diff)
460460
*
461461
* The function returns the number of global counters updated.
462462
*/
463-
static int refresh_cpu_vm_stats(void)
463+
static int refresh_cpu_vm_stats(bool do_pagesets)
464464
{
465465
struct zone *zone;
466466
int i;
@@ -484,33 +484,35 @@ static int refresh_cpu_vm_stats(void)
484484
#endif
485485
}
486486
}
487-
cond_resched();
488487
#ifdef CONFIG_NUMA
489-
/*
490-
* Deal with draining the remote pageset of this
491-
* processor
492-
*
493-
* Check if there are pages remaining in this pageset
494-
* if not then there is nothing to expire.
495-
*/
496-
if (!__this_cpu_read(p->expire) ||
488+
if (do_pagesets) {
489+
cond_resched();
490+
/*
491+
* Deal with draining the remote pageset of this
492+
* processor
493+
*
494+
* Check if there are pages remaining in this pageset
495+
* if not then there is nothing to expire.
496+
*/
497+
if (!__this_cpu_read(p->expire) ||
497498
!__this_cpu_read(p->pcp.count))
498-
continue;
499+
continue;
499500

500-
/*
501-
* We never drain zones local to this processor.
502-
*/
503-
if (zone_to_nid(zone) == numa_node_id()) {
504-
__this_cpu_write(p->expire, 0);
505-
continue;
506-
}
501+
/*
502+
* We never drain zones local to this processor.
503+
*/
504+
if (zone_to_nid(zone) == numa_node_id()) {
505+
__this_cpu_write(p->expire, 0);
506+
continue;
507+
}
507508

508-
if (__this_cpu_dec_return(p->expire))
509-
continue;
509+
if (__this_cpu_dec_return(p->expire))
510+
continue;
510511

511-
if (__this_cpu_read(p->pcp.count)) {
512-
drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
513-
changes++;
512+
if (__this_cpu_read(p->pcp.count)) {
513+
drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
514+
changes++;
515+
}
514516
}
515517
#endif
516518
}
@@ -1386,7 +1388,7 @@ static cpumask_var_t cpu_stat_off;
13861388

13871389
static void vmstat_update(struct work_struct *w)
13881390
{
1389-
if (refresh_cpu_vm_stats()) {
1391+
if (refresh_cpu_vm_stats(true)) {
13901392
/*
13911393
* Counters were updated so we expect more updates
13921394
* to occur in the future. Keep on running the
@@ -1417,6 +1419,23 @@ static void vmstat_update(struct work_struct *w)
14171419
}
14181420
}
14191421

1422+
/*
1423+
* Switch off vmstat processing and then fold all the remaining differentials
1424+
* until the diffs stay at zero. The function is used by NOHZ and can only be
1425+
* invoked when tick processing is not active.
1426+
*/
1427+
void quiet_vmstat(void)
1428+
{
1429+
if (system_state != SYSTEM_RUNNING)
1430+
return;
1431+
1432+
do {
1433+
if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
1434+
cancel_delayed_work(this_cpu_ptr(&vmstat_work));
1435+
1436+
} while (refresh_cpu_vm_stats(false));
1437+
}
1438+
14201439
/*
14211440
* Check if the diffs for a certain cpu indicate that
14221441
* an update is needed.
@@ -1449,7 +1468,7 @@ static bool need_update(int cpu)
14491468
*/
14501469
static void vmstat_shepherd(struct work_struct *w);
14511470

1452-
static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
1471+
static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
14531472

14541473
static void vmstat_shepherd(struct work_struct *w)
14551474
{

0 commit comments

Comments
 (0)