Skip to content

Commit

Permalink
mm/khugepaged: add flag to predicate khugepaged-only behavior
Browse files Browse the repository at this point in the history
Add .is_khugepaged flag to struct collapse_control so khugepaged-specific
behavior can be elided by MADV_COLLAPSE context.

Start by protecting khugepaged-specific heuristics by this flag.  In
MADV_COLLAPSE, the user presumably has reason to believe the collapse will
be beneficial and khugepaged heuristics shouldn't prevent the user from
doing so:

1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared]

2) requirement that some pages in region being collapsed be young or
   referenced

[[email protected]: consistently order cc->is_khugepaged and pte_* checks]
  Link: https://lkml.kernel.org/r/[email protected]
  Link: https://lore.kernel.org/linux-mm/[email protected]/
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Zach O'Keefe <[email protected]>
Reviewed-by: Yang Shi <[email protected]>
Cc: Alex Shi <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Axel Rasmussen <[email protected]>
Cc: Chris Kennelly <[email protected]>
Cc: Chris Zankel <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Helge Deller <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Ivan Kokshaysky <[email protected]>
Cc: James Bottomley <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: "Kirill A. Shutemov" <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Matt Turner <[email protected]>
Cc: Max Filippov <[email protected]>
Cc: Miaohe Lin <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Pasha Tatashin <[email protected]>
Cc: Pavel Begunkov <[email protected]>
Cc: Peter Xu <[email protected]>
Cc: Rongwei Wang <[email protected]>
Cc: SeongJae Park <[email protected]>
Cc: Song Liu <[email protected]>
Cc: Thomas Bogendoerfer <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Dan Carpenter <[email protected]>
Cc: "Souptick Joarder (HPE)" <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
zokeefe authored and akpm00 committed Sep 12, 2022
1 parent 50ad2f2 commit d8ea7cc
Showing 1 changed file with 58 additions and 25 deletions.
83 changes: 58 additions & 25 deletions mm/khugepaged.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
* default collapse hugepages if there is at least one pte mapped like
* it would have happened if the vma was large enough during page
* fault.
*
* Note that these are only respected if collapse was initiated by khugepaged.
*/
static unsigned int khugepaged_max_ptes_none __read_mostly;
static unsigned int khugepaged_max_ptes_swap __read_mostly;
Expand All @@ -86,6 +88,8 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
#define MAX_PTE_MAPPED_THP 8

struct collapse_control {
bool is_khugepaged;

/* Num pages scanned per node */
u32 node_load[MAX_NUMNODES];

Expand Down Expand Up @@ -554,6 +558,7 @@ static bool is_refcount_suitable(struct page *page)
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
unsigned long address,
pte_t *pte,
struct collapse_control *cc,
struct list_head *compound_pagelist)
{
struct page *page = NULL;
Expand All @@ -566,8 +571,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
pte_t pteval = *_pte;
if (pte_none(pteval) || (pte_present(pteval) &&
is_zero_pfn(pte_pfn(pteval)))) {
++none_or_zero;
if (!userfaultfd_armed(vma) &&
++none_or_zero <= khugepaged_max_ptes_none) {
(!cc->is_khugepaged ||
none_or_zero <= khugepaged_max_ptes_none)) {
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
Expand All @@ -587,11 +594,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,

VM_BUG_ON_PAGE(!PageAnon(page), page);

if (page_mapcount(page) > 1 &&
++shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out;
if (page_mapcount(page) > 1) {
++shared;
if (cc->is_khugepaged &&
shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out;
}
}

if (PageCompound(page)) {
Expand Down Expand Up @@ -654,10 +664,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
if (PageCompound(page))
list_add_tail(&page->lru, compound_pagelist);
next:
/* There should be enough young pte to collapse the page */
if (pte_young(pteval) ||
page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
/*
* If collapse was initiated by khugepaged, check that there is
* enough young pte to justify collapsing the page
*/
if (cc->is_khugepaged &&
(pte_young(pteval) || page_is_young(page) ||
PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
address)))
referenced++;

if (pte_write(pteval))
Expand All @@ -666,7 +680,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,

if (unlikely(!writable)) {
result = SCAN_PAGE_RO;
} else if (unlikely(!referenced)) {
} else if (unlikely(cc->is_khugepaged && !referenced)) {
result = SCAN_LACK_REFERENCED_PAGE;
} else {
result = SCAN_SUCCEED;
Expand Down Expand Up @@ -745,6 +759,7 @@ static void khugepaged_alloc_sleep(void)


struct collapse_control khugepaged_collapse_control = {
.is_khugepaged = true,
.last_target_node = NUMA_NO_NODE,
};

Expand Down Expand Up @@ -1025,7 +1040,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
mmu_notifier_invalidate_range_end(&range);

spin_lock(pte_ptl);
result = __collapse_huge_page_isolate(vma, address, pte,
result = __collapse_huge_page_isolate(vma, address, pte, cc,
&compound_pagelist);
spin_unlock(pte_ptl);

Expand Down Expand Up @@ -1116,7 +1131,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
_pte++, _address += PAGE_SIZE) {
pte_t pteval = *_pte;
if (is_swap_pte(pteval)) {
if (++unmapped <= khugepaged_max_ptes_swap) {
++unmapped;
if (!cc->is_khugepaged ||
unmapped <= khugepaged_max_ptes_swap) {
/*
* Always be strict with uffd-wp
* enabled swap entries. Please see
Expand All @@ -1134,8 +1151,10 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
}
}
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
++none_or_zero;
if (!userfaultfd_armed(vma) &&
++none_or_zero <= khugepaged_max_ptes_none) {
(!cc->is_khugepaged ||
none_or_zero <= khugepaged_max_ptes_none)) {
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
Expand Down Expand Up @@ -1165,11 +1184,14 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_unmap;
}

if (page_mapcount(page) > 1 &&
++shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out_unmap;
if (page_mapcount(page) > 1) {
++shared;
if (cc->is_khugepaged &&
shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out_unmap;
}
}

page = compound_head(page);
Expand Down Expand Up @@ -1220,14 +1242,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
result = SCAN_PAGE_COUNT;
goto out_unmap;
}
if (pte_young(pteval) ||
page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))

/*
* If collapse was initiated by khugepaged, check that there is
* enough young pte to justify collapsing the page
*/
if (cc->is_khugepaged &&
(pte_young(pteval) || page_is_young(page) ||
PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
address)))
referenced++;
}
if (!writable) {
result = SCAN_PAGE_RO;
} else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
} else if (cc->is_khugepaged &&
(!referenced ||
(unmapped && referenced < HPAGE_PMD_NR / 2))) {
result = SCAN_LACK_REFERENCED_PAGE;
} else {
result = SCAN_SUCCEED;
Expand Down Expand Up @@ -1896,7 +1926,9 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
continue;

if (xa_is_value(page)) {
if (++swap > khugepaged_max_ptes_swap) {
++swap;
if (cc->is_khugepaged &&
swap > khugepaged_max_ptes_swap) {
result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
break;
Expand Down Expand Up @@ -1947,7 +1979,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
rcu_read_unlock();

if (result == SCAN_SUCCEED) {
if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
if (cc->is_khugepaged &&
present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
} else {
Expand Down

0 comments on commit d8ea7cc

Please sign in to comment.