Commit 95211279 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'akpm' (Andrew's patch-bomb)

Merge first batch of patches from Andrew Morton:
 "A few misc things and all the MM queue"

* emailed from Andrew Morton <akpm@linux-foundation.org>: (92 commits)
  memcg: avoid THP split in task migration
  thp: add HPAGE_PMD_* definitions for !CONFIG_TRANSPARENT_HUGEPAGE
  memcg: clean up existing move charge code
  mm/memcontrol.c: remove unnecessary 'break' in mem_cgroup_read()
  mm/memcontrol.c: remove redundant BUG_ON() in mem_cgroup_usage_unregister_event()
  mm/memcontrol.c: s/stealed/stolen/
  memcg: fix performance of mem_cgroup_begin_update_page_stat()
  memcg: remove PCG_FILE_MAPPED
  memcg: use new logic for page stat accounting
  memcg: remove PCG_MOVE_LOCK flag from page_cgroup
  memcg: simplify move_account() check
  memcg: remove EXPORT_SYMBOL(mem_cgroup_update_page_stat)
  memcg: kill dead prev_priority stubs
  memcg: remove PCG_CACHE page_cgroup flag
  memcg: let css_get_next() rely upon rcu_read_lock()
  cgroup: revert ss_id_lock to spinlock
  idr: make idr_get_next() good for rcu_read_lock()
  memcg: remove unnecessary thp check in page stat accounting
  memcg: remove redundant returns
  memcg: enum lru_list lru
  ...
parents 5375871d 12724850
......@@ -290,7 +290,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
rsslim current limit in bytes on the rss
start_code address above which program text can run
end_code address below which program text can run
start_stack address of the start of the stack
start_stack address of the start of the main process stack
esp current value of ESP
eip current value of EIP
pending bitmap of pending signals
......@@ -325,7 +325,7 @@ address perms offset dev inode pathname
a7cb1000-a7cb2000 ---p 00000000 00:00 0
a7cb2000-a7eb2000 rw-p 00000000 00:00 0
a7eb2000-a7eb3000 ---p 00000000 00:00 0
a7eb3000-a7ed5000 rw-p 00000000 00:00 0
a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack:1001]
a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
......@@ -357,11 +357,39 @@ is not associated with a file:
[heap] = the heap of the program
[stack] = the stack of the main process
[stack:1001] = the stack of the thread with tid 1001
[vdso] = the "virtual dynamic shared object",
the kernel system call handler
or if empty, the mapping is anonymous.
The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
of the individual tasks of a process. In this file you will see a mapping marked
as [stack] if that task sees it as a stack. This is a key difference from the
content of /proc/PID/maps, where you will see all mappings that are being used
as stack by all of those tasks. Hence, for the example above, the task-level
map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
08048000-08049000 r-xp 00000000 03:00 8312 /opt/test
08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
0804a000-0806b000 rw-p 00000000 00:00 0 [heap]
a7cb1000-a7cb2000 ---p 00000000 00:00 0
a7cb2000-a7eb2000 rw-p 00000000 00:00 0
a7eb2000-a7eb3000 ---p 00000000 00:00 0
a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack]
a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
a800b000-a800e000 rw-p 00000000 00:00 0
a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0
a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0
a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0
a8024000-a8027000 rw-p 00000000 00:00 0
a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2
a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2
a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2
aff35000-aff4a000 rw-p 00000000 00:00 0
ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]
The /proc/PID/smaps is an extension based on maps, showing the memory
consumption for each of the process's mappings. For each of mappings there
......
......@@ -2635,6 +2635,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
to facilitate early boot debugging.
See also Documentation/trace/events.txt
transparent_hugepage=
[KNL]
Format: [always|madvise|never]
Can be used to control the default behavior of the system
with respect to transparent hugepages.
See Documentation/vm/transhuge.txt for more details.
tsc= Disable clocksource stability checks for TSC.
Format: <string>
[x86] reliable: mark tsc clocksource as reliable, this
......
......@@ -98,6 +98,7 @@
#define KPF_HWPOISON 19
#define KPF_NOPAGE 20
#define KPF_KSM 21
#define KPF_THP 22
/* [32-] kernel hacking assistances */
#define KPF_RESERVED 32
......@@ -147,6 +148,7 @@ static const char *page_flag_names[] = {
[KPF_HWPOISON] = "X:hwpoison",
[KPF_NOPAGE] = "n:nopage",
[KPF_KSM] = "x:ksm",
[KPF_THP] = "t:thp",
[KPF_RESERVED] = "r:reserved",
[KPF_MLOCKED] = "m:mlocked",
......
......@@ -60,6 +60,7 @@ There are three components to pagemap:
19. HWPOISON
20. NOPAGE
21. KSM
22. THP
Short descriptions to the page flags:
......@@ -97,6 +98,9 @@ Short descriptions to the page flags:
21. KSM
identical memory pages dynamically shared between one or more processes
22. THP
contiguous pages which construct transparent hugepages
[IO related page flags]
1. ERROR IO error occurred
3. UPTODATE page has up-to-date data
......
......@@ -776,7 +776,6 @@ static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka,
siginfo_t *info,
sigset_t *oldset, struct pt_regs *regs)
{
sigset_t blocked;
int err;
if (ka->sa.sa_flags & SA_SIGINFO)
......@@ -787,11 +786,7 @@ static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka,
if (err)
return err;
sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NOMASK))
sigaddset(&blocked, signr);
set_current_blocked(&blocked);
block_sigmask(ka, signr);
tracehook_signal_handler(signr, info, ka, regs, 0);
return 0;
......
......@@ -465,7 +465,6 @@ static inline int
handle_signal(unsigned long signr, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
{
sigset_t blocked;
int err;
if (ka->sa.sa_flags & SA_SIGINFO)
......@@ -476,11 +475,7 @@ handle_signal(unsigned long signr, struct k_sigaction *ka,
if (err)
return err;
sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NOMASK))
sigaddset(&blocked, signr);
set_current_blocked(&blocked);
block_sigmask(ka, signr);
tracehook_signal_handler(signr, info, ka, regs, 0);
return 0;
......
......@@ -479,18 +479,14 @@ static inline int handle_signal(unsigned long signr, struct k_sigaction *ka,
siginfo_t *info,
sigset_t *oldset, struct pt_regs *regs)
{
sigset_t blocked;
int err;
err = setup_rt_frame(ka, regs, signr, oldset,
(ka->sa.sa_flags & SA_SIGINFO) ? info : NULL);
if (err)
return err;
sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NOMASK))
sigaddset(&blocked, signr);
set_current_blocked(&blocked);
block_sigmask(ka, signr);
tracehook_signal_handler(signr, info, ka, regs, 0);
return 0;
......
......@@ -195,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
unsigned long addr = addr0, start_addr;
/* requested length too big for entire address space */
if (len > TASK_SIZE)
......@@ -223,25 +223,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
mm->free_area_cache = mm->mmap_base;
}
try_again:
/* either no address requested or can't fit in requested address hole */
addr = mm->free_area_cache;
/* make sure it can fit in the remaining address space */
if (addr > len) {
unsigned long tmp_addr = align_addr(addr - len, filp,
ALIGN_TOPDOWN);
vma = find_vma(mm, tmp_addr);
if (!vma || tmp_addr + len <= vma->vm_start)
/* remember the address as a hint for next time */
return mm->free_area_cache = tmp_addr;
}
if (mm->mmap_base < len)
goto bottomup;
start_addr = addr = mm->free_area_cache;
addr = mm->mmap_base-len;
if (addr < len)
goto fail;
addr -= len;
do {
addr = align_addr(addr, filp, ALIGN_TOPDOWN);
......@@ -263,6 +252,17 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
addr = vma->vm_start-len;
} while (len < vma->vm_start);
fail:
/*
* if hint left us with no space for the requested
* mapping then try again:
*/
if (start_addr != mm->mmap_base) {
mm->free_area_cache = mm->mmap_base;
mm->cached_hole_size = 0;
goto try_again;
}
bottomup:
/*
* A failed mmap() very likely causes application failure,
......
......@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
spinlock_t *ptl;
int i;
down_write(&mm->mmap_sem);
pgd = pgd_offset(mm, 0xA0000);
if (pgd_none_or_clear_bad(pgd))
goto out;
......@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
}
pte_unmap_unlock(pte, ptl);
out:
up_write(&mm->mmap_sem);
flush_tlb();
}
......
......@@ -308,10 +308,11 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
{
struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev_vma;
unsigned long base = mm->mmap_base, addr = addr0;
struct vm_area_struct *vma;
unsigned long base = mm->mmap_base;
unsigned long addr = addr0;
unsigned long largest_hole = mm->cached_hole_size;
int first_time = 1;
unsigned long start_addr;
/* don't allow allocations above current base */
if (mm->free_area_cache > base)
......@@ -322,6 +323,8 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
mm->free_area_cache = base;
}
try_again:
start_addr = mm->free_area_cache;
/* make sure it can fit in the remaining address space */
if (mm->free_area_cache < len)
goto fail;
......@@ -337,22 +340,14 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
if (!vma)
return addr;
/*
* new region fits between prev_vma->vm_end and
* vma->vm_start, use it:
*/
prev_vma = vma->vm_prev;
if (addr + len <= vma->vm_start &&
(!prev_vma || (addr >= prev_vma->vm_end))) {
if (addr + len <= vma->vm_start) {
/* remember the address as a hint for next time */
mm->cached_hole_size = largest_hole;
return (mm->free_area_cache = addr);
} else {
} else if (mm->free_area_cache == vma->vm_end) {
/* pull free_area_cache down to the first hole */
if (mm->free_area_cache == vma->vm_end) {
mm->free_area_cache = vma->vm_start;
mm->cached_hole_size = largest_hole;
}
mm->free_area_cache = vma->vm_start;
mm->cached_hole_size = largest_hole;
}
/* remember the largest hole we saw so far */
......@@ -368,10 +363,9 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
* if hint left us with no space for the requested
* mapping then try again:
*/
if (first_time) {
if (start_addr != base) {
mm->free_area_cache = base;
largest_hole = 0;
first_time = 0;
goto try_again;
}
/*
......
......@@ -60,7 +60,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
eb->nid = nid;
if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
emu_nid_to_phys[nid] = pb->nid;
emu_nid_to_phys[nid] = nid;
pb->start += size;
if (pb->start >= pb->end) {
......
......@@ -260,10 +260,7 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3,
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sighand->siglock);
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
set_current_blocked(&set);
if (restore_sigcontext(regs, frame))
goto badframe;
......@@ -336,8 +333,8 @@ gen_return_code(unsigned char *codemem)
}
static void setup_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
static int setup_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe *frame;
int err = 0;
......@@ -422,12 +419,11 @@ static void setup_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
current->comm, current->pid, signal, frame, regs->pc);
#endif
return;
return 0;
give_sigsegv:
if (sig == SIGSEGV)
ka->sa.sa_handler = SIG_DFL;
force_sig(SIGSEGV, current);
force_sigsegv(sig, current);
return -EFAULT;
}
/*
......@@ -449,11 +445,8 @@ asmlinkage long xtensa_rt_sigsuspend(sigset_t __user *unewset,
return -EFAULT;
sigdelsetmask(&newset, ~_BLOCKABLE);
spin_lock_irq(&current->sighand->siglock);
saveset = current->blocked;
current->blocked = newset;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
set_current_blocked(&newset);
regs->areg[2] = -EINTR;
while (1) {
......@@ -536,17 +529,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
/* Whee! Actually deliver the signal. */
/* Set up the stack frame */
setup_frame(signr, &ka, &info, oldset, regs);
if (ka.sa.sa_flags & SA_ONESHOT)
ka.sa.sa_handler = SIG_DFL;
ret = setup_frame(signr, &ka, &info, oldset, regs);
if (ret)
return ret;
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked, &current->blocked, &ka.sa.sa_mask);
if (!(ka.sa.sa_flags & SA_NODEFER))
sigaddset(&current->blocked, signr);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
block_sigmask(&ka, signr);
if (current->ptrace & PT_SINGLESTEP)
task_pt_regs(current)->icountlevel = 1;
......
......@@ -507,8 +507,7 @@ int intel_idle_cpu_init(int cpu)
int num_substates;
if (cstate > max_cstate) {
printk(PREFIX "max_cstate %d reached\n",
max_cstate);
printk(PREFIX "max_cstate %d reached\n", max_cstate);
break;
}
......@@ -524,8 +523,9 @@ int intel_idle_cpu_init(int cpu)
dev->states_usage[dev->state_count].driver_data =
(void *)get_driver_data(cstate);
dev->state_count += 1;
}
dev->state_count += 1;
}
dev->cpu = cpu;
if (cpuidle_register_device(dev)) {
......
......@@ -346,7 +346,7 @@ static struct sysrq_key_op sysrq_term_op = {
static void moom_callback(struct work_struct *ignored)
{
out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0, NULL);
out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0, NULL, true);
}
static DECLARE_WORK(moom_work, moom_callback);
......
......@@ -822,7 +822,7 @@ static int exec_mmap(struct mm_struct *mm)
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
sync_mm_rss(tsk, old_mm);
sync_mm_rss(old_mm);
mm_release(tsk, old_mm);
if (old_mm) {
......
......@@ -41,6 +41,25 @@ const struct file_operations hugetlbfs_file_operations;
static const struct inode_operations hugetlbfs_dir_inode_operations;
static const struct inode_operations hugetlbfs_inode_operations;
struct hugetlbfs_config {
uid_t uid;
gid_t gid;
umode_t mode;
long nr_blocks;
long nr_inodes;
struct hstate *hstate;
};
struct hugetlbfs_inode_info {
struct shared_policy policy;
struct inode vfs_inode;
};
static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
{
return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
}
static struct backing_dev_info hugetlbfs_backing_dev_info = {
.name = "hugetlbfs",
.ra_pages = 0, /* No readahead */
......@@ -154,10 +173,12 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return addr;
}
start_addr = mm->free_area_cache;
if (len <= mm->cached_hole_size)
if (len > mm->cached_hole_size)
start_addr = mm->free_area_cache;
else {
start_addr = TASK_UNMAPPED_BASE;
mm->cached_hole_size = 0;
}
full_search:
addr = ALIGN(start_addr, huge_page_size(h));
......@@ -171,13 +192,18 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
*/
if (start_addr != TASK_UNMAPPED_BASE) {
start_addr = TASK_UNMAPPED_BASE;
mm->cached_hole_size = 0;
goto full_search;
}
return -ENOMEM;
}
if (!vma || addr + len <= vma->vm_start)
if (!vma || addr + len <= vma->vm_start) {
mm->free_area_cache = addr + len;
return addr;
}
if (addr + mm->cached_hole_size < vma->vm_start)
mm->cached_hole_size = vma->vm_start - addr;
addr = ALIGN(vma->vm_end, huge_page_size(h));
}
}
......@@ -238,17 +264,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
loff_t isize;
ssize_t retval = 0;
mutex_lock(&inode->i_mutex);
/* validate length */
if (len == 0)
goto out;
isize = i_size_read(inode);
if (!isize)
goto out;
end_index = (isize - 1) >> huge_page_shift(h);
for (;;) {
struct page *page;
unsigned long nr, ret;
......@@ -256,18 +275,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
/* nr is the maximum number of bytes to copy from this page */
nr = huge_page_size(h);
isize = i_size_read(inode);
if (!isize)
goto out;
end_index = (isize - 1) >> huge_page_shift(h);
if (index >= end_index) {
if (index > end_index)
goto out;
nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
if (nr <= offset) {
if (nr <= offset)
goto out;
}
}
nr = nr - offset;
/* Find the page */
page = find_get_page(mapping, index);
page = find_lock_page(mapping, index);
if (unlikely(page == NULL)) {
/*
* We have a HOLE, zero out the user-buffer for the
......@@ -279,17 +301,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
else
ra = 0;
} else {
unlock_page(page);
/*
* We have the page, copy it to user space buffer.
*/
ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
ret = ra;
page_cache_release(page);
}
if (ra < 0) {
if (retval == 0)
retval = ra;
if (page)
page_cache_release(page);
goto out;
}
......@@ -299,16 +322,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
index += offset >> huge_page_shift(h);
offset &= ~huge_page_mask(h);
if (page)
page_cache_release(page);
/* short read or no more work */
if ((ret != nr) || (len == 0))
break;
}
out:
*ppos = ((loff_t)index << huge_page_shift(h)) + offset;
mutex_unlock(&inode->i_mutex);
return retval;
}
......@@ -607,9 +626,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
spin_lock(&sbinfo->stat_lock);
/* If no limits set, just report 0 for max/free/used
* blocks, like simple_statfs() */
if (sbinfo->max_blocks >= 0) {
buf->f_blocks = sbinfo->max_blocks;
buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
if (sbinfo->spool) {
long free_pages;
spin_lock(&sbinfo->spool->lock);
buf->f_blocks = sbinfo->spool->max_hpages;
free_pages = sbinfo->spool->max_hpages
- sbinfo->spool->used_hpages;
buf->f_bavail = buf->f_bfree = free_pages;
spin_unlock(&sbinfo->spool->lock);
buf->f_files = sbinfo->max_inodes;
buf->f_ffree = sbinfo->free_inodes;
}
......@@ -625,6 +650,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
if (sbi) {
sb->s_fs_info = NULL;
if (sbi->spool)
hugepage_put_subpool(sbi->spool);