Skip to content

Commit 4b56938

Browse files
nhatsmrtakpm00
authored andcommitted
memcontrol: add helpers for hugetlb memcg accounting
Patch series "hugetlb memcg accounting", v4. Currently, hugetlb memory usage is not acounted for in the memory controller, which could lead to memory overprotection for cgroups with hugetlb-backed memory. This has been observed in our production system. For instance, here is one of our usecases: suppose there are two 32G containers. The machine is booted with hugetlb_cma=6G, and each container may or may not use up to 3 gigantic page, depending on the workload within it. The rest is anon, cache, slab, etc. We can set the hugetlb cgroup limit of each cgroup to 3G to enforce hugetlb fairness. But it is very difficult to configure memory.max to keep overall consumption, including anon, cache, slab etcetera fair. What we have had to resort to is to constantly poll hugetlb usage and readjust memory.max. Similar procedure is done to other memory limits (memory.low for e.g). However, this is rather cumbersome and buggy. Furthermore, when there is a delay in memory limits correction, (for e.g when hugetlb usage changes within consecutive runs of the userspace agent), the system could be in an over/underprotected state. This patch series rectifies this issue by charging the memcg when the hugetlb folio is allocated, and uncharging when the folio is freed. In addition, a new selftest is added to demonstrate and verify this new behavior. This patch (of 4): This patch exposes charge committing and cancelling as parts of the memory controller interface. These functionalities are useful when the try_charge() and commit_charge() stages have to be separated by other actions in between (which can fail). One such example is the new hugetlb accounting behavior in the following patch. The patch also adds a helper function to obtain a reference to the current task's memcg. Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Nhat Pham <[email protected]> Acked-by: Michal Hocko <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Frank van der Linden <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Muchun Song <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Shuah Khan <[email protected]> Cc: Tejun heo <[email protected]> Cc: Yosry Ahmed <[email protected]> Cc: Zefan Li <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 59838b2 commit 4b56938

File tree

2 files changed

+68
-12
lines changed

2 files changed

+68
-12
lines changed

include/linux/memcontrol.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
652652
page_counter_read(&memcg->memory);
653653
}
654654

655+
void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
656+
655657
int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
656658

657659
/**
@@ -703,6 +705,8 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
703705
__mem_cgroup_uncharge_list(page_list);
704706
}
705707

708+
void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);
709+
706710
void mem_cgroup_migrate(struct folio *old, struct folio *new);
707711

708712
/**
@@ -759,6 +763,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
759763

760764
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
761765

766+
struct mem_cgroup *get_mem_cgroup_from_current(void);
767+
762768
struct lruvec *folio_lruvec_lock(struct folio *folio);
763769
struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
764770
struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
@@ -1239,6 +1245,11 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
12391245
return false;
12401246
}
12411247

1248+
static inline void mem_cgroup_commit_charge(struct folio *folio,
1249+
struct mem_cgroup *memcg)
1250+
{
1251+
}
1252+
12421253
static inline int mem_cgroup_charge(struct folio *folio,
12431254
struct mm_struct *mm, gfp_t gfp)
12441255
{
@@ -1263,6 +1274,11 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
12631274
{
12641275
}
12651276

1277+
static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
1278+
unsigned int nr_pages)
1279+
{
1280+
}
1281+
12661282
static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
12671283
{
12681284
}
@@ -1300,6 +1316,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
13001316
return NULL;
13011317
}
13021318

1319+
static inline struct mem_cgroup *get_mem_cgroup_from_current(void)
1320+
{
1321+
return NULL;
1322+
}
1323+
13031324
static inline
13041325
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
13051326
{

mm/memcontrol.c

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,27 @@ static __always_inline bool memcg_kmem_bypass(void)
10991099
return false;
11001100
}
11011101

1102+
/**
1103+
* get_mem_cgroup_from_current - Obtain a reference on current task's memcg.
1104+
*/
1105+
struct mem_cgroup *get_mem_cgroup_from_current(void)
1106+
{
1107+
struct mem_cgroup *memcg;
1108+
1109+
if (mem_cgroup_disabled())
1110+
return NULL;
1111+
1112+
again:
1113+
rcu_read_lock();
1114+
memcg = mem_cgroup_from_task(current);
1115+
if (!css_tryget(&memcg->css)) {
1116+
rcu_read_unlock();
1117+
goto again;
1118+
}
1119+
rcu_read_unlock();
1120+
return memcg;
1121+
}
1122+
11021123
/**
11031124
* mem_cgroup_iter - iterate over memory cgroup hierarchy
11041125
* @root: hierarchy root
@@ -2873,7 +2894,12 @@ static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
28732894
return try_charge_memcg(memcg, gfp_mask, nr_pages);
28742895
}
28752896

2876-
static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
2897+
/**
2898+
* mem_cgroup_cancel_charge() - cancel an uncommitted try_charge() call.
2899+
* @memcg: memcg previously charged.
2900+
* @nr_pages: number of pages previously charged.
2901+
*/
2902+
void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
28772903
{
28782904
if (mem_cgroup_is_root(memcg))
28792905
return;
@@ -2898,6 +2924,22 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
28982924
folio->memcg_data = (unsigned long)memcg;
28992925
}
29002926

2927+
/**
2928+
* mem_cgroup_commit_charge - commit a previously successful try_charge().
2929+
* @folio: folio to commit the charge to.
2930+
* @memcg: memcg previously charged.
2931+
*/
2932+
void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
2933+
{
2934+
css_get(&memcg->css);
2935+
commit_charge(folio, memcg);
2936+
2937+
local_irq_disable();
2938+
mem_cgroup_charge_statistics(memcg, folio_nr_pages(folio));
2939+
memcg_check_events(memcg, folio_nid(folio));
2940+
local_irq_enable();
2941+
}
2942+
29012943
#ifdef CONFIG_MEMCG_KMEM
29022944
/*
29032945
* The allocated objcg pointers array is not accounted directly.
@@ -6116,15 +6158,15 @@ static void __mem_cgroup_clear_mc(void)
61166158

61176159
/* we must uncharge all the leftover precharges from mc.to */
61186160
if (mc.precharge) {
6119-
cancel_charge(mc.to, mc.precharge);
6161+
mem_cgroup_cancel_charge(mc.to, mc.precharge);
61206162
mc.precharge = 0;
61216163
}
61226164
/*
61236165
* we didn't uncharge from mc.from at mem_cgroup_move_account(), so
61246166
* we must uncharge here.
61256167
*/
61266168
if (mc.moved_charge) {
6127-
cancel_charge(mc.from, mc.moved_charge);
6169+
mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
61286170
mc.moved_charge = 0;
61296171
}
61306172
/* we must fixup refcnts and charges */
@@ -7031,20 +7073,13 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
70317073
static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
70327074
gfp_t gfp)
70337075
{
7034-
long nr_pages = folio_nr_pages(folio);
70357076
int ret;
70367077

7037-
ret = try_charge(memcg, gfp, nr_pages);
7078+
ret = try_charge(memcg, gfp, folio_nr_pages(folio));
70387079
if (ret)
70397080
goto out;
70407081

7041-
css_get(&memcg->css);
7042-
commit_charge(folio, memcg);
7043-
7044-
local_irq_disable();
7045-
mem_cgroup_charge_statistics(memcg, nr_pages);
7046-
memcg_check_events(memcg, folio_nid(folio));
7047-
local_irq_enable();
7082+
mem_cgroup_commit_charge(folio, memcg);
70487083
out:
70497084
return ret;
70507085
}

0 commit comments

Comments
 (0)