Commit 2788cf0c authored by Vladimir Davydov's avatar Vladimir Davydov Committed by Linus Torvalds

memcg: reparent list_lrus and free kmemcg_id on css offline

Now, the only reason to keep kmemcg_id till css free is list_lru, which
uses it to distribute elements between per-memcg lists.  However, it can
be easily sorted out - we only need to change kmemcg_id of an offline
cgroup to its parent's id, making further list_lru_add()'s add elements to
the parent's list, and then move all elements from the offline cgroup's
list to the one of its parent.  It will work, because a racing
list_lru_del() does not need to know the list it is deleting the element
from.  It can decrement the wrong nr_items counter though, but the ongoing
reparenting will fix it.  After list_lru reparenting is done we are free
to release kmemcg_id saving a valuable slot in a per-memcg array for new
Signed-off-by: default avatarVladimir Davydov <>
Cc: Johannes Weiner <>
Cc: Michal Hocko <>
Cc: Tejun Heo <>
Cc: Christoph Lameter <>
Cc: Pekka Enberg <>
Cc: David Rientjes <>
Cc: Joonsoo Kim <>
Cc: Dave Chinner <>
Signed-off-by: default avatarAndrew Morton <>
Signed-off-by: default avatarLinus Torvalds <>
parent 3f97b163
......@@ -26,7 +26,7 @@ enum lru_status {
struct list_lru_one {
struct list_head list;
/* kept as signed so we can catch imbalance bugs */
/* may become negative during memcg reparenting */
long nr_items;
......@@ -62,6 +62,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
#define list_lru_init_memcg(lru) __list_lru_init((lru), true, NULL)
int memcg_update_all_list_lrus(int num_memcgs);
void memcg_drain_all_list_lrus(int src_idx, int dst_idx);
* list_lru_add: add an element to the lru list's tail
......@@ -100,7 +100,6 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
l = list_lru_from_kmem(nlru, item);
WARN_ON_ONCE(l->nr_items < 0);
if (list_empty(item)) {
list_add_tail(item, &l->list);
......@@ -123,7 +122,6 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
if (!list_empty(item)) {
WARN_ON_ONCE(l->nr_items < 0);
return true;
......@@ -156,7 +154,6 @@ static unsigned long __list_lru_count_one(struct list_lru *lru,
l = list_lru_from_memcg_idx(nlru, memcg_idx);
WARN_ON_ONCE(l->nr_items < 0);
count = l->nr_items;
......@@ -458,6 +455,49 @@ int memcg_update_all_list_lrus(int new_size)
memcg_cancel_update_list_lru(lru, old_size, new_size);
goto out;
static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
int src_idx, int dst_idx)
struct list_lru_one *src, *dst;
* Since list_lru_{add,del} may be called under an IRQ-safe lock,
* we have to use IRQ-safe primitives here to avoid deadlock.
src = list_lru_from_memcg_idx(nlru, src_idx);
dst = list_lru_from_memcg_idx(nlru, dst_idx);
list_splice_init(&src->list, &dst->list);
dst->nr_items += src->nr_items;
src->nr_items = 0;
static void memcg_drain_list_lru(struct list_lru *lru,
int src_idx, int dst_idx)
int i;
if (!list_lru_memcg_aware(lru))
for (i = 0; i < nr_node_ids; i++)
memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx);
void memcg_drain_all_list_lrus(int src_idx, int dst_idx)
struct list_lru *lru;
list_for_each_entry(lru, &list_lrus, list)
memcg_drain_list_lru(lru, src_idx, dst_idx);
static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
......@@ -334,6 +334,7 @@ struct mem_cgroup {
#if defined(CONFIG_MEMCG_KMEM)
/* Index in the kmem_cache->memcg_params.memcg_caches array */
int kmemcg_id;
bool kmem_acct_activated;
bool kmem_acct_active;
......@@ -582,14 +583,10 @@ void memcg_put_cache_ids(void)
struct static_key memcg_kmem_enabled_key;
static void memcg_free_cache_id(int id);
static void disarm_kmem_keys(struct mem_cgroup *memcg)
if (memcg->kmemcg_id >= 0) {
if (memcg->kmem_acct_activated)
* This check can't live in kmem destruction function,
* since the charges will outlive the cgroup
......@@ -3322,6 +3319,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
int memcg_id;
BUG_ON(memcg->kmemcg_id >= 0);
......@@ -3365,6 +3363,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
* patched.
memcg->kmemcg_id = memcg_id;
memcg->kmem_acct_activated = true;
memcg->kmem_acct_active = true;
return err;
......@@ -4047,6 +4046,10 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
struct cgroup_subsys_state *css;
struct mem_cgroup *parent, *child;
int kmemcg_id;
if (!memcg->kmem_acct_active)
......@@ -4059,6 +4062,32 @@ static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
memcg->kmem_acct_active = false;
kmemcg_id = memcg->kmemcg_id;
BUG_ON(kmemcg_id < 0);
parent = parent_mem_cgroup(memcg);
if (!parent)
parent = root_mem_cgroup;
* Change kmemcg_id of this cgroup and all its descendants to the
* parent's id, and then move all entries from this cgroup's list_lrus
* to ones of the parent. After we have finished, all list_lrus
* corresponding to this cgroup are guaranteed to remain empty. The
* ordering is imposed by list_lru_node->lock taken by
* memcg_drain_all_list_lrus().
css_for_each_descendant_pre(css, &memcg->css) {
child = mem_cgroup_from_css(css);
BUG_ON(child->kmemcg_id != kmemcg_id);
child->kmemcg_id = parent->kmemcg_id;
if (!memcg->use_hierarchy)
memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id);
static void memcg_destroy_kmem(struct mem_cgroup *memcg)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment