Commit 60d3fd32 authored by Vladimir Davydov's avatar Vladimir Davydov Committed by Linus Torvalds

list_lru: introduce per-memcg lists

There are several FS shrinkers, including super_block::s_shrink, that
keep reclaimable objects in the list_lru structure.  Hence to turn them
to memcg-aware shrinkers, it is enough to make list_lru per-memcg.

This patch does the trick.  It adds an array of lru lists to the
list_lru_node structure (per-node part of the list_lru), one for each
kmem-active memcg, and dispatches every item addition or removal to the
list corresponding to the memcg which the item is accounted to.  So now
the list_lru structure is not just per node, but per node and per memcg.

Not all list_lrus need this feature, so this patch also adds a new
method, list_lru_init_memcg, which initializes a list_lru as memcg
aware.  Otherwise (i.e.  if initialized with old list_lru_init), the
list_lru won't have per memcg lists.

Just like per memcg caches arrays, the arrays of per-memcg lists are
indexed by memcg_cache_id, so we must grow them whenever
memcg_nr_cache_ids is increased.  So we introduce a callback,
memcg_update_all_list_lrus, invoked by memcg_alloc_cache_id if the id
space is full.

The locking is implemented in a manner similar to lruvecs, i.e.  we have
one lock per node that protects all lists (both global and per cgroup) on
the node.
Signed-off-by: default avatarVladimir Davydov <>
Cc: Dave Chinner <>
Cc: Johannes Weiner <>
Cc: Michal Hocko <>
Cc: Greg Thelen <>
Cc: Glauber Costa <>
Cc: Alexander Viro <>
Cc: Christoph Lameter <>
Cc: Pekka Enberg <>
Cc: David Rientjes <>
Cc: Joonsoo Kim <>
Cc: Tejun Heo <>
Signed-off-by: default avatarAndrew Morton <>
Signed-off-by: default avatarLinus Torvalds <>
parent c0a5b560
......@@ -11,6 +11,8 @@
#include <linux/nodemask.h>
#include <linux/shrinker.h>
struct mem_cgroup;
/* list_lru_walk_cb has to always return one of those */
enum lru_status {
LRU_REMOVED, /* item removed from list */
......@@ -22,11 +24,26 @@ enum lru_status {
internally, but has to return locked. */
struct list_lru_node {
spinlock_t lock;
struct list_lru_one {
struct list_head list;
/* kept as signed so we can catch imbalance bugs */
long nr_items;
struct list_lru_memcg {
/* array of per cgroup lists, indexed by memcg_cache_id */
struct list_lru_one *lru[0];
struct list_lru_node {
/* protects all lists on the node, including per cgroup */
spinlock_t lock;
/* global list, used for the root cgroup in cgroup aware lrus */
struct list_lru_one lru;
/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
struct list_lru_memcg *memcg_lrus;
} ____cacheline_aligned_in_smp;
struct list_lru {
......@@ -37,11 +54,14 @@ struct list_lru {
void list_lru_destroy(struct list_lru *lru);
int list_lru_init_key(struct list_lru *lru, struct lock_class_key *key);
static inline int list_lru_init(struct list_lru *lru)
return list_lru_init_key(lru, NULL);
int __list_lru_init(struct list_lru *lru, bool memcg_aware,
struct lock_class_key *key);
#define list_lru_init(lru) __list_lru_init((lru), false, NULL)
#define list_lru_init_key(lru, key) __list_lru_init((lru), false, (key))
#define list_lru_init_memcg(lru) __list_lru_init((lru), true, NULL)
int memcg_update_all_list_lrus(int num_memcgs);
* list_lru_add: add an element to the lru list's tail
......@@ -75,20 +95,23 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item);
bool list_lru_del(struct list_lru *lru, struct list_head *item);
* list_lru_count_node: return the number of objects currently held by @lru
* list_lru_count_one: return the number of objects currently held by @lru
* @lru: the lru pointer.
* @nid: the node id to count from.
* @memcg: the cgroup to count from.
* Always return a non-negative number, 0 for empty lists. There is no
* guarantee that the list is not updated while the count is being computed.
* Callers that want such a guarantee need to provide an outer lock.
unsigned long list_lru_count_one(struct list_lru *lru,
int nid, struct mem_cgroup *memcg);
unsigned long list_lru_count_node(struct list_lru *lru, int nid);
static inline unsigned long list_lru_shrink_count(struct list_lru *lru,
struct shrink_control *sc)
return list_lru_count_node(lru, sc->nid);
return list_lru_count_one(lru, sc->nid, sc->memcg);
static inline unsigned long list_lru_count(struct list_lru *lru)
......@@ -105,9 +128,10 @@ static inline unsigned long list_lru_count(struct list_lru *lru)
typedef enum lru_status
(*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg);
* list_lru_walk_node: walk a list_lru, isolating and disposing freeable items.
* list_lru_walk_one: walk a list_lru, isolating and disposing freeable items.
* @lru: the lru pointer.
* @nid: the node id to scan from.
* @memcg: the cgroup to scan from.
* @isolate: callback function that is resposible for deciding what to do with
* the item currently being scanned
* @cb_arg: opaque type that will be passed to @isolate
......@@ -125,6 +149,10 @@ typedef enum lru_status
* Return value: the number of objects effectively removed from the LRU.
unsigned long list_lru_walk_one(struct list_lru *lru,
int nid, struct mem_cgroup *memcg,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk);
unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk);
......@@ -133,8 +161,8 @@ static inline unsigned long
list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
list_lru_walk_cb isolate, void *cb_arg)
return list_lru_walk_node(lru, sc->nid, isolate, cb_arg,
return list_lru_walk_one(lru, sc->nid, sc->memcg, isolate, cb_arg,
static inline unsigned long
......@@ -439,6 +439,8 @@ int memcg_cache_id(struct mem_cgroup *memcg);
struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
void __memcg_kmem_put_cache(struct kmem_cache *cachep);
struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr);
int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
unsigned long nr_pages);
void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages);
......@@ -535,6 +537,13 @@ static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
if (memcg_kmem_enabled())
static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
if (!memcg_kmem_enabled())
return NULL;
return __mem_cgroup_from_kmem(ptr);
#define for_each_memcg_cache_index(_idx) \
for (; NULL; )
......@@ -586,6 +595,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
static inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
static inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
return NULL;
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
This diff is collapsed.
......@@ -2571,6 +2571,8 @@ static int memcg_alloc_cache_id(void)
err = memcg_update_all_caches(size);
if (!err)
err = memcg_update_all_list_lrus(size);
if (!err)
memcg_nr_cache_ids = size;
......@@ -2765,6 +2767,24 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
memcg_uncharge_kmem(memcg, 1 << order);
page->mem_cgroup = NULL;
struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr)
struct mem_cgroup *memcg = NULL;
struct kmem_cache *cachep;
struct page *page;
page = virt_to_head_page(ptr);
if (PageSlab(page)) {
cachep = page->slab_cache;
if (!is_root_cache(cachep))
memcg = cachep->memcg_params->memcg;
} else
/* page allocated by alloc_kmem_pages */
memcg = page->mem_cgroup;
return memcg;
#endif /* CONFIG_MEMCG_KMEM */
