Commit 5da0d8e4 authored by Philippe Gerum's avatar Philippe Gerum

mm: ipipe: disable ondemand memory

parent 607e7b63
......@@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_VM_PINNED 31 /* ondemand load up and COW disabled */
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
MMF_DISABLE_THP_MASK)
......
......@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/io.h>
#include <linux/export.h>
#include <linux/hardirq.h>
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
......@@ -175,7 +176,12 @@ int ioremap_page_range(unsigned long addr,
break;
} while (pgd++, addr = next, addr != end);
flush_cache_vmap(start, end);
/* APEI may invoke this for temporarily remapping pages in interrupt
* context - nothing we can and need to propagate globally. */
if (!in_interrupt()) {
__ipipe_pin_mapping_globally(start, end);
flush_cache_vmap(start, end);
}
return err;
}
......@@ -55,6 +55,7 @@
#include <linux/export.h>
#include <linux/delayacct.h>
#include <linux/init.h>
#include <linux/ipipe.h>
#include <linux/pfn_t.h>
#include <linux/writeback.h>
#include <linux/memcontrol.h>
......@@ -129,6 +130,11 @@ EXPORT_SYMBOL(zero_pfn);
unsigned long highest_memmap_pfn __read_mostly;
static inline void cow_user_page(struct page *dst,
struct page *src,
unsigned long va,
struct vm_area_struct *vma);
/*
* CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
*/
......@@ -939,8 +945,8 @@ out:
static inline unsigned long
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
unsigned long addr, int *rss)
pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
unsigned long addr, int *rss, struct page *uncow_page)
{
unsigned long vm_flags = vma->vm_flags;
pte_t pte = *src_pte;
......@@ -1018,6 +1024,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* in the parent and the child
*/
if (is_cow_mapping(vm_flags)) {
#ifdef CONFIG_IPIPE
if (uncow_page) {
struct page *old_page = vm_normal_page(vma, addr, pte);
cow_user_page(uncow_page, old_page, addr, vma);
pte = mk_pte(uncow_page, vma->vm_page_prot);
if (vm_flags & VM_SHARED)
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
page_add_new_anon_rmap(uncow_page, vma, addr, false);
rss[!!PageAnon(uncow_page)]++;
goto out_set_pte;
}
#endif /* CONFIG_IPIPE */
ptep_set_wrprotect(src_mm, addr, src_pte);
pte = pte_wrprotect(pte);
}
......@@ -1065,13 +1086,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
int progress = 0;
int rss[NR_MM_COUNTERS];
swp_entry_t entry = (swp_entry_t){0};
struct page *uncow_page = NULL;
#ifdef CONFIG_IPIPE
int do_cow_break = 0;
again:
if (do_cow_break) {
uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
if (uncow_page == NULL)
return -ENOMEM;
do_cow_break = 0;
}
#else
again:
#endif
init_rss_vec(rss);
dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
if (!dst_pte)
if (!dst_pte) {
if (uncow_page)
put_page(uncow_page);
return -ENOMEM;
}
src_pte = pte_offset_map(src_pmd, addr);
src_ptl = pte_lockptr(src_mm, src_pmd);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
......@@ -1094,8 +1129,25 @@ again:
progress++;
continue;
}
#ifdef CONFIG_IPIPE
if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
if (is_cow_mapping(vma->vm_flags) &&
test_bit(MMF_VM_PINNED, &src_mm->flags) &&
((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
arch_leave_lazy_mmu_mode();
spin_unlock(src_ptl);
pte_unmap(src_pte);
add_mm_rss_vec(dst_mm, rss);
pte_unmap_unlock(dst_pte, dst_ptl);
cond_resched();
do_cow_break = 1;
goto again;
}
}
#endif
entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
vma, addr, rss);
vma, addr, rss, uncow_page);
uncow_page = NULL;
if (entry.val)
break;
progress += 8;
......@@ -4642,6 +4694,41 @@ long copy_huge_page_from_user(struct page *dst_page,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
#ifdef CONFIG_IPIPE
int __ipipe_disable_ondemand_mappings(struct task_struct *tsk)
{
struct vm_area_struct *vma;
struct mm_struct *mm;
int result = 0;
mm = get_task_mm(tsk);
if (!mm)
return -EPERM;
down_write(&mm->mmap_sem);
if (test_bit(MMF_VM_PINNED, &mm->flags))
goto done_mm;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (is_cow_mapping(vma->vm_flags) &&
(vma->vm_flags & VM_WRITE)) {
result = __ipipe_pin_vma(mm, vma);
if (result < 0)
goto done_mm;
}
}
set_bit(MMF_VM_PINNED, &mm->flags);
done_mm:
up_write(&mm->mmap_sem);
mmput(mm);
return result;
}
EXPORT_SYMBOL_GPL(__ipipe_disable_ondemand_mappings);
#endif /* CONFIG_IPIPE */
#if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS
static struct kmem_cache *page_ptl_cachep;
......
......@@ -864,3 +864,27 @@ void user_shm_unlock(size_t size, struct user_struct *user)
spin_unlock(&shmlock_user_lock);
free_uid(user);
}
#ifdef CONFIG_IPIPE
int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma)
{
int ret, write, len;
if (vma->vm_flags & (VM_IO | VM_PFNMAP))
return 0;
if (!((vma->vm_flags & VM_DONTEXPAND) ||
is_vm_hugetlb_page(vma) || vma == get_gate_vma(mm))) {
ret = populate_vma_page_range(vma, vma->vm_start, vma->vm_end,
NULL);
return ret < 0 ? ret : 0;
}
write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE;
len = DIV_ROUND_UP(vma->vm_end, PAGE_SIZE) - vma->vm_start/PAGE_SIZE;
ret = get_user_pages(vma->vm_start, len, write, 0, NULL);
if (ret < 0)
return ret;
return ret == len ? 0 : -EFAULT;
}
#endif
......@@ -22,6 +22,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/ipipe.h>
#include <linux/migrate.h>
#include <linux/perf_event.h>
#include <linux/pkeys.h>
......@@ -41,7 +42,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
struct mm_struct *mm = vma->vm_mm;
pte_t *pte, oldpte;
spinlock_t *ptl;
unsigned long pages = 0;
unsigned long pages = 0, flags;
int target_node = NUMA_NO_NODE;
/*
......@@ -96,6 +97,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
continue;
}
flags = hard_local_irq_save();
ptent = ptep_modify_prot_start(mm, addr, pte);
ptent = pte_modify(ptent, newprot);
if (preserve_write)
......@@ -108,6 +110,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptent = pte_mkwrite(ptent);
}
ptep_modify_prot_commit(mm, addr, pte, ptent);
hard_local_irq_restore(flags);
pages++;
} else if (IS_ENABLED(CONFIG_MIGRATION)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
......@@ -286,6 +289,12 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
pages = hugetlb_change_protection(vma, start, end, newprot);
else
pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
#ifdef CONFIG_IPIPE
if (test_bit(MMF_VM_PINNED, &vma->vm_mm->flags) &&
((vma->vm_flags | vma->vm_mm->def_flags) & VM_LOCKED) &&
(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
__ipipe_pin_vma(vma->vm_mm, vma);
#endif
return pages;
}
......
......@@ -232,6 +232,8 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
return err;
} while (pgd++, addr = next, addr != end);
__ipipe_pin_mapping_globally(start, end);
return nr;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment