Commit 599d0c95 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm, vmscan: move LRU lists to node

This moves the LRU lists from the zone to the node and related data such
as counters, tracing, congestion tracking and writeback tracking.

Unfortunately, due to reclaim and compaction retry logic, it is
necessary to account for the number of LRU pages on both zone and node
logic.  Most reclaim logic is based on the node counters but the retry
logic uses the zone counters which do not distinguish inactive and
active sizes.  It would be possible to leave the LRU counters on a
per-zone basis but it's a heavier calculation across multiple cache
lines that is much more frequent than the retry checks.

Other than the LRU counters, this is mostly a mechanical patch but note
that it introduces a number of anomalies.  For example, the scans are
per-zone but using per-node counters.  We also mark a node as congested
when a zone is congested.  This causes weird problems that are fixed
later but is easier to review.

In the event that there is excessive overhead on 32-bit systems due to
the nodes being on LRU then there are two potential solutions

1. Long-term isolation of highmem pages when reclaim is lowmem

   When pages are skipped, they are immediately added back onto the LRU
   list. If lowmem reclaim persisted for long periods of time, the same
   highmem pages get continually scanned. The idea would be that lowmem
   keeps those pages on a separate list until a reclaim for highmem pages
   arrives that splices the highmem pages back onto the LRU. It potentially
   could be implemented similar to the UNEVICTABLE list.

   That would reduce the skip rate with the potential corner case is that
   highmem pages have to be scanned and reclaimed to free lowmem slab pages.

2. Linear scan lowmem pages if the initial LRU shrink fails

   This will break LRU ordering but may be preferable and faster during
   memory pressure than skipping LRU pages.

Link: http://lkml.kernel.org/r/1467970510-21195-4-git-send-email-mgorman@techsingularity.netSigned-off-by: 's avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: 's avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: 's avatarVlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a52633d8
......@@ -45,10 +45,10 @@ void show_mem(unsigned int filter)
struct zone *zone;
pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n",
(global_page_state(NR_ACTIVE_ANON) +
global_page_state(NR_ACTIVE_FILE)),
(global_page_state(NR_INACTIVE_ANON) +
global_page_state(NR_INACTIVE_FILE)),
(global_node_page_state(NR_ACTIVE_ANON) +
global_node_page_state(NR_ACTIVE_FILE)),
(global_node_page_state(NR_INACTIVE_ANON) +
global_node_page_state(NR_INACTIVE_FILE)),
global_page_state(NR_FILE_DIRTY),
global_page_state(NR_WRITEBACK),
global_page_state(NR_UNSTABLE_NFS),
......
......@@ -56,6 +56,7 @@ static ssize_t node_read_meminfo(struct device *dev,
{
int n;
int nid = dev->id;
struct pglist_data *pgdat = NODE_DATA(nid);
struct sysinfo i;
si_meminfo_node(&i, nid);
......@@ -74,15 +75,15 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(i.totalram),
nid, K(i.freeram),
nid, K(i.totalram - i.freeram),
nid, K(sum_zone_node_page_state(nid, NR_ACTIVE_ANON) +
sum_zone_node_page_state(nid, NR_ACTIVE_FILE)),
nid, K(sum_zone_node_page_state(nid, NR_INACTIVE_ANON) +
sum_zone_node_page_state(nid, NR_INACTIVE_FILE)),
nid, K(sum_zone_node_page_state(nid, NR_ACTIVE_ANON)),
nid, K(sum_zone_node_page_state(nid, NR_INACTIVE_ANON)),
nid, K(sum_zone_node_page_state(nid, NR_ACTIVE_FILE)),
nid, K(sum_zone_node_page_state(nid, NR_INACTIVE_FILE)),
nid, K(sum_zone_node_page_state(nid, NR_UNEVICTABLE)),
nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
node_page_state(pgdat, NR_ACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
node_page_state(pgdat, NR_INACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
#ifdef CONFIG_HIGHMEM
......
......@@ -72,10 +72,10 @@ static unsigned long lowmem_deathpending_timeout;
static unsigned long lowmem_count(struct shrinker *s,
struct shrink_control *sc)
{
return global_page_state(NR_ACTIVE_ANON) +
global_page_state(NR_ACTIVE_FILE) +
global_page_state(NR_INACTIVE_ANON) +
global_page_state(NR_INACTIVE_FILE);
return global_node_page_state(NR_ACTIVE_ANON) +
global_node_page_state(NR_ACTIVE_FILE) +
global_node_page_state(NR_INACTIVE_ANON) +
global_node_page_state(NR_INACTIVE_FILE);
}
static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
......
......@@ -197,7 +197,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
}
long congestion_wait(int sync, long timeout);
long wait_iff_congested(struct zone *zone, int sync, long timeout);
long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
int pdflush_proc_obsolete(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
......
......@@ -339,7 +339,7 @@ static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
struct lruvec *lruvec;
if (mem_cgroup_disabled()) {
lruvec = &zone->lruvec;
lruvec = zone_lruvec(zone);
goto out;
}
......@@ -348,15 +348,15 @@ static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
out:
/*
* Since a node can be onlined after the mem_cgroup was created,
* we have to be prepared to initialize lruvec->zone here;
* we have to be prepared to initialize lruvec->pgdat here;
* and if offlined then reonlined, we need to reinitialize it.
*/
if (unlikely(lruvec->zone != zone))
lruvec->zone = zone;
if (unlikely(lruvec->pgdat != zone->zone_pgdat))
lruvec->pgdat = zone->zone_pgdat;
return lruvec;
}
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
......@@ -437,7 +437,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int nr_pages);
enum zone_type zid, int nr_pages);
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid, unsigned int lru_mask);
......@@ -612,13 +612,13 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
struct mem_cgroup *memcg)
{
return &zone->lruvec;
return zone_lruvec(zone);
}
static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
struct zone *zone)
struct pglist_data *pgdat)
{
return &zone->lruvec;
return &pgdat->lruvec;
}
static inline bool mm_match_cgroup(struct mm_struct *mm,
......
......@@ -23,25 +23,32 @@ static inline int page_is_file_cache(struct page *page)
}
static __always_inline void __update_lru_size(struct lruvec *lruvec,
enum lru_list lru, int nr_pages)
enum lru_list lru, enum zone_type zid,
int nr_pages)
{
__mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
__mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
__mod_zone_page_state(&pgdat->node_zones[zid],
NR_ZONE_LRU_BASE + !!is_file_lru(lru),
nr_pages);
}
static __always_inline void update_lru_size(struct lruvec *lruvec,
enum lru_list lru, int nr_pages)
enum lru_list lru, enum zone_type zid,
int nr_pages)
{
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
#else
__update_lru_size(lruvec, lru, nr_pages);
__update_lru_size(lruvec, lru, zid, nr_pages);
#endif
}
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
update_lru_size(lruvec, lru, hpage_nr_pages(page));
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
list_add(&page->lru, &lruvec->lists[lru]);
}
......@@ -49,7 +56,7 @@ static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
list_del(&page->lru);
update_lru_size(lruvec, lru, -hpage_nr_pages(page));
update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
}
/**
......
......@@ -111,12 +111,9 @@ enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
NR_ALLOC_BATCH,
NR_LRU_BASE,
NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
NR_ACTIVE_ANON, /* " " " " " */
NR_INACTIVE_FILE, /* " " " " " */
NR_ACTIVE_FILE, /* " " " " " */
NR_UNEVICTABLE, /* " " " " " */
NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
NR_ZONE_LRU_ANON = NR_ZONE_LRU_BASE,
NR_ZONE_LRU_FILE,
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_ANON_PAGES, /* Mapped anonymous pages */
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
......@@ -134,12 +131,9 @@ enum zone_stat_item {
NR_VMSCAN_WRITE,
NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
NR_DIRTIED, /* page dirtyings since bootup */
NR_WRITTEN, /* page writings since bootup */
NR_PAGES_SCANNED, /* pages scanned since last reclaim */
#if IS_ENABLED(CONFIG_ZSMALLOC)
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
......@@ -161,6 +155,15 @@ enum zone_stat_item {
NR_VM_ZONE_STAT_ITEMS };
enum node_stat_item {
NR_LRU_BASE,
NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
NR_ACTIVE_ANON, /* " " " " " */
NR_INACTIVE_FILE, /* " " " " " */
NR_ACTIVE_FILE, /* " " " " " */
NR_UNEVICTABLE, /* " " " " " */
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
NR_PAGES_SCANNED, /* pages scanned since last reclaim */
NR_VM_NODE_STAT_ITEMS
};
......@@ -219,7 +222,7 @@ struct lruvec {
/* Evictions & activations on the inactive file list */
atomic_long_t inactive_age;
#ifdef CONFIG_MEMCG
struct zone *zone;
struct pglist_data *pgdat;
#endif
};
......@@ -357,13 +360,6 @@ struct zone {
#ifdef CONFIG_NUMA
int node;
#endif
/*
* The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
* this zone's LRU. Maintained by the pageout code.
*/
unsigned int inactive_ratio;
struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;
......@@ -495,9 +491,6 @@ struct zone {
/* Write-intensive fields used by page reclaim */
/* Fields commonly accessed by the page reclaim scanner */
struct lruvec lruvec;
/*
* When free pages are below this point, additional steps are taken
* when reading the number of free pages to avoid per-cpu counter
......@@ -537,17 +530,20 @@ struct zone {
enum zone_flags {
ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
ZONE_CONGESTED, /* zone has many dirty pages backed by
ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
};
enum pgdat_flags {
PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
* a congested BDI
*/
ZONE_DIRTY, /* reclaim scanning has recently found
PGDAT_DIRTY, /* reclaim scanning has recently found
* many dirty file pages at the tail
* of the LRU.
*/
ZONE_WRITEBACK, /* reclaim scanning has recently found
PGDAT_WRITEBACK, /* reclaim scanning has recently found
* many pages under writeback
*/
ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
};
static inline unsigned long zone_end_pfn(const struct zone *zone)
......@@ -707,6 +703,19 @@ typedef struct pglist_data {
unsigned long split_queue_len;
#endif
/* Fields commonly accessed by the page reclaim scanner */
struct lruvec lruvec;
/*
* The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
* this node's LRU. Maintained by the pageout code.
*/
unsigned int inactive_ratio;
unsigned long flags;
ZONE_PADDING(_pad2_)
/* Per-node vmstats */
struct per_cpu_nodestat __percpu *per_cpu_nodestats;
atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
......@@ -728,6 +737,11 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone)
return &zone->zone_pgdat->lru_lock;
}
static inline struct lruvec *zone_lruvec(struct zone *zone)
{
return &zone->zone_pgdat->lruvec;
}
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
return pgdat->node_start_pfn + pgdat->node_spanned_pages;
......@@ -779,12 +793,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
extern void lruvec_init(struct lruvec *lruvec);
static inline struct zone *lruvec_zone(struct lruvec *lruvec)
static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
{
#ifdef CONFIG_MEMCG
return lruvec->zone;
return lruvec->pgdat;
#else
return container_of(lruvec, struct zone, lruvec);
return container_of(lruvec, struct pglist_data, lruvec);
#endif
}
......
......@@ -317,6 +317,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
/* linux/mm/vmscan.c */
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
......
......@@ -26,11 +26,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGFREE, PGACTIVATE, PGDEACTIVATE,
PGFAULT, PGMAJFAULT,
PGLAZYFREED,
FOR_ALL_ZONES(PGREFILL),
FOR_ALL_ZONES(PGSTEAL_KSWAPD),
FOR_ALL_ZONES(PGSTEAL_DIRECT),
FOR_ALL_ZONES(PGSCAN_KSWAPD),
FOR_ALL_ZONES(PGSCAN_DIRECT),
PGREFILL,
PGSTEAL_KSWAPD,
PGSTEAL_DIRECT,
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_DIRECT_THROTTLE,
#ifdef CONFIG_NUMA
PGSCAN_ZONE_RECLAIM_FAILED,
......
......@@ -178,6 +178,23 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
return x;
}
static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat,
enum node_stat_item item)
{
long x = atomic_long_read(&pgdat->vm_stat[item]);
#ifdef CONFIG_SMP
int cpu;
for_each_online_cpu(cpu)
x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item];
if (x < 0)
x = 0;
#endif
return x;
}
#ifdef CONFIG_NUMA
extern unsigned long sum_zone_node_page_state(int node,
enum zone_stat_item item);
......
......@@ -352,15 +352,14 @@ TRACE_EVENT(mm_vmscan_writepage,
TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
TP_PROTO(struct zone *zone,
TP_PROTO(int nid,
unsigned long nr_scanned, unsigned long nr_reclaimed,
int priority, int file),
TP_ARGS(zone, nr_scanned, nr_reclaimed, priority, file),
TP_ARGS(nid, nr_scanned, nr_reclaimed, priority, file),
TP_STRUCT__entry(
__field(int, nid)
__field(int, zid)
__field(unsigned long, nr_scanned)
__field(unsigned long, nr_reclaimed)
__field(int, priority)
......@@ -368,16 +367,15 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
),
TP_fast_assign(
__entry->nid = zone_to_nid(zone);
__entry->zid = zone_idx(zone);
__entry->nid = nid;
__entry->nr_scanned = nr_scanned;
__entry->nr_reclaimed = nr_reclaimed;
__entry->priority = priority;
__entry->reclaim_flags = trace_shrink_flags(file);
),
TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
__entry->nid, __entry->zid,
TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
__entry->nid,
__entry->nr_scanned, __entry->nr_reclaimed,
__entry->priority,
show_reclaim_flags(__entry->reclaim_flags))
......
......@@ -1627,11 +1627,11 @@ static unsigned long minimum_image_size(unsigned long saveable)
unsigned long size;
size = global_page_state(NR_SLAB_RECLAIMABLE)
+ global_page_state(NR_ACTIVE_ANON)
+ global_page_state(NR_INACTIVE_ANON)
+ global_page_state(NR_ACTIVE_FILE)
+ global_page_state(NR_INACTIVE_FILE)
- global_page_state(NR_FILE_MAPPED);
+ global_node_page_state(NR_ACTIVE_ANON)
+ global_node_page_state(NR_INACTIVE_ANON)
+ global_node_page_state(NR_ACTIVE_FILE)
+ global_node_page_state(NR_INACTIVE_FILE)
- global_node_page_state(NR_FILE_MAPPED);
return saveable <= size ? 0 : saveable - size;
}
......
......@@ -947,24 +947,24 @@ long congestion_wait(int sync, long timeout)
EXPORT_SYMBOL(congestion_wait);
/**
* wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
* @zone: A zone to check if it is heavily congested
* wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
* @pgdat: A pgdat to check if it is heavily congested
* @sync: SYNC or ASYNC IO
* @timeout: timeout in jiffies
*
* In the event of a congested backing_dev (any backing_dev) and the given
* @zone has experienced recent congestion, this waits for up to @timeout
* @pgdat has experienced recent congestion, this waits for up to @timeout
* jiffies for either a BDI to exit congestion of the given @sync queue
* or a write to complete.
*
* In the absence of zone congestion, cond_resched() is called to yield
* In the absence of pgdat congestion, cond_resched() is called to yield
* the processor if necessary but otherwise does not sleep.
*
* The return value is 0 if the sleep is for the full timeout. Otherwise,
* it is the number of jiffies that were still remaining when the function
* returned. return_value == timeout implies the function did not sleep.
*/
long wait_iff_congested(struct zone *zone, int sync, long timeout)
long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
{
long ret;
unsigned long start = jiffies;
......@@ -973,12 +973,13 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
/*
* If there is no congestion, or heavy congestion is not being
* encountered in the current zone, yield if necessary instead
* encountered in the current pgdat, yield if necessary instead
* of sleeping on the congestion queue
*/
if (atomic_read(&nr_wb_congested[sync]) == 0 ||
!test_bit(ZONE_CONGESTED, &zone->flags)) {
!test_bit(PGDAT_CONGESTED, &pgdat->flags)) {
cond_resched();
/* In case we scheduled, work out time remaining */
ret = timeout - (jiffies - start);
if (ret < 0)
......
......@@ -646,8 +646,8 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
list_for_each_entry(page, &cc->migratepages, lru)
count[!!page_is_file_cache(page)]++;
mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]);
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]);
}
/* Similar to reclaim, but different enough that they don't share logic */
......@@ -655,12 +655,12 @@ static bool too_many_isolated(struct zone *zone)
{
unsigned long active, inactive, isolated;
inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
zone_page_state(zone, NR_INACTIVE_ANON);
active = zone_page_state(zone, NR_ACTIVE_FILE) +
zone_page_state(zone, NR_ACTIVE_ANON);
isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
zone_page_state(zone, NR_ISOLATED_ANON);
inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) +
node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON);
isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) +
node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON);
return isolated > (inactive + active) / 2;
}
......@@ -856,7 +856,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
}
}
lruvec = mem_cgroup_page_lruvec(page, zone);
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
/* Try isolate the page */
if (__isolate_lru_page(page, isolate_mode) != 0)
......
......@@ -1818,7 +1818,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
pgoff_t end = -1;
int i;
lruvec = mem_cgroup_page_lruvec(head, zone);
lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
/* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head);
......
......@@ -78,7 +78,7 @@ extern unsigned long highest_memmap_pfn;
*/
extern int isolate_lru_page(struct page *page);
extern void putback_lru_page(struct page *page);
extern bool zone_reclaimable(struct zone *zone);
extern bool pgdat_reclaimable(struct pglist_data *pgdat);
/*
* in mm/rmap.c:
......
......@@ -480,7 +480,7 @@ void __khugepaged_exit(struct mm_struct *mm)
static void release_pte_page(struct page *page)
{
/* 0 stands for page_is_file_cache(page) == false */
dec_zone_page_state(page, NR_ISOLATED_ANON + 0);
dec_node_page_state(page, NR_ISOLATED_ANON + 0);
unlock_page(page);
putback_lru_page(page);
}
......@@ -576,7 +576,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
goto out;
}
/* 0 stands for page_is_file_cache(page) == false */
inc_zone_page_state(page, NR_ISOLATED_ANON + 0);
inc_node_page_state(page, NR_ISOLATED_ANON + 0);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageLRU(page), page);
......
......@@ -943,14 +943,14 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
* and putback protocol: the LRU lock must be held, and the page must
* either be PageLRU() or the caller must have isolated/allocated it.
*/
struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
{
struct mem_cgroup_per_zone *mz;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
if (mem_cgroup_disabled()) {
lruvec = &zone->lruvec;
lruvec = &pgdat->lruvec;
goto out;
}
......@@ -970,8 +970,8 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
* we have to be prepared to initialize lruvec->zone here;
* and if offlined then reonlined, we need to reinitialize it.
*/
if (unlikely(lruvec->zone != zone))
lruvec->zone = zone;
if (unlikely(lruvec->pgdat != pgdat))
lruvec->pgdat = pgdat;
return lruvec;
}
......@@ -979,6 +979,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
* mem_cgroup_update_lru_size - account for adding or removing an lru page
* @lruvec: mem_cgroup per zone lru vector
* @lru: index of lru list the page is sitting on
* @zid: Zone ID of the zone pages have been added to
* @nr_pages: positive when adding or negative when removing
*
* This function must be called under lru_lock, just before a page is added
......@@ -986,14 +987,14 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
* so as to allow it to check that lru_size 0 is consistent with list_empty).
*/
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int nr_pages)
enum zone_type zid, int nr_pages)
{
struct mem_cgroup_per_zone *mz;
unsigned long *lru_size;
long size;
bool empty;
__update_lru_size(lruvec, lru, nr_pages);
__update_lru_size(lruvec, lru, zid, nr_pages);
if (mem_cgroup_disabled())
return;
......@@ -2069,7 +2070,7 @@ static void lock_page_lru(struct page *page, int *isolated)
if (PageLRU(page)) {
struct lruvec *lruvec;
lruvec = mem_cgroup_page_lruvec(page, zone);
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_lru(page));
*isolated = 1;
......@@ -2084,7 +2085,7 @@ static void unlock_page_lru(struct page *page, int isolated)
if (isolated) {
struct lruvec *lruvec;
lruvec = mem_cgroup_page_lruvec(page, zone);
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
VM_BUG_ON_PAGE(PageLRU(page), page);
SetPageLRU(page);
add_page_to_lru_list(page, lruvec, page_lru(page));
......
......@@ -1663,7 +1663,7 @@ static int __soft_offline_page(struct page *page, int flags)
put_hwpoison_page(page);
if (!ret) {
LIST_HEAD(pagelist);
inc_zone_page_state(page, NR_ISOLATED_ANON +
inc_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
list_add(&page->lru, &pagelist);
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
......@@ -1671,7 +1671,7 @@ static int __soft_offline_page(struct page *page, int flags)
if (ret) {
if (!list_empty(&pagelist)) {
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
putback_lru_page(page);
}
......
......@@ -1586,7 +1586,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
put_page(page);
list_add_tail(&page->lru, &source);
move_pages--;
inc_zone_page_state(page, NR_ISOLATED_ANON +
inc_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
} else {
......
......@@ -962,7 +962,7 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
if (!isolate_lru_page(page)) {
list_add_tail(&page->lru, pagelist);
inc_zone_page_state(page, NR_ISOLATED_ANON +
inc_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
}
}
......
......@@ -168,7 +168,7 @@ void putback_movable_pages(struct list_head *l)
continue;
}
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
/*
* We isolated non-lru movable page so here we can use
......@@ -1119,7 +1119,7 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
* restored.
*/
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
}
......@@ -1460,7 +1460,7 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
err = isolate_lru_page(page);
if (!err) {
list_add_tail(&page->lru, &pagelist);
inc_zone_page_state(page, NR_ISOLATED_ANON +
inc_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
}
put_and_set:
......@@ -1726,15 +1726,16 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
unsigned long nr_migrate_pages)
{
int z;
if (!pgdat_reclaimable(pgdat))
return false;
for (z = pgdat->nr_zones - 1; z >= 0; z--) {
struct zone *zone = pgdat->node_zones + z;
if (!populated_zone(zone))
continue;
if (!zone_reclaimable(zone))
continue;
/* Avoid waking kswapd by allocating pages_to_migrate pages. */
if (!zone_watermark_ok(zone, 0,
high_wmark_pages(zone) +
......@@ -1828,7 +1829,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
}
page_lru = page_is_file_cache(page);
mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
hpage_nr_pages(page));
/*
......@@ -1886,7 +1887,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
if (nr_remaining) {
if (!list_empty(&migratepages)) {
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
putback_lru_page(page);
}
......@@ -1979,7 +1980,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
/* Retake the callers reference and putback on LRU */
get_page(page);