Commit 7ce32ac6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "24 patches, based on 4a09d388.

  Subsystems affected by this patch series: mm (thp, vmalloc, hugetlb,
  memory-failure, and pagealloc), nilfs2, kthread, MAINTAINERS, and
  mailmap"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (24 commits)
  mailmap: add Marek's other e-mail address and identity without diacritics
  MAINTAINERS: fix Marek's identity again
  mm/page_alloc: do bulk array bounds check after checking populated elements
  mm/page_alloc: __alloc_pages_bulk(): do bounds check before accessing array
  mm/hwpoison: do not lock page again when me_huge_page() successfully recovers
  mm,hwpoison: return -EHWPOISON to denote that the page has already been poisoned
  mm/memory-failure: use a mutex to avoid memory_failure() races
  mm, futex: fix shared futex pgoff on shmem huge page
  kthread: prevent deadlock when kthread_mod_delayed_work() races with kthread_cancel_delayed_work_sync()
  kthread_worker: split code for canceling the delayed work timer
  mm/vmalloc: unbreak kasan vmalloc support
  KVM: s390: prepare for hugepage vmalloc
  mm/vmalloc: add vmalloc_no_huge
  nilfs2: fix memory leak in nilfs_sysfs_delete_device_group
  mm/thp: another PVMW_SYNC fix in page_vma_mapped_walk()
  mm/thp: fix page_vma_mapped_walk() if THP mapped by ptes
  mm: page_vma_mapped_walk(): get vma_address_end() earlier
  mm: page_vma_mapped_walk(): use goto instead of while (1)
  mm: page_vma_mapped_walk(): add a level of indentation
  mm: page_vma_mapped_walk(): crossing page table boundary
  ...
parents 808e9df4 72a461ad
...@@ -212,6 +212,8 @@ Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com> ...@@ -212,6 +212,8 @@ Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com>
Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org> Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org>
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com> Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
Marek Behún <kabel@kernel.org> <marek.behun@nic.cz>
Marek Behún <kabel@kernel.org> Marek Behun <marek.behun@nic.cz>
Mark Brown <broonie@sirena.org.uk> Mark Brown <broonie@sirena.org.uk>
Mark Starovoytov <mstarovo@pm.me> <mstarovoitov@marvell.com> Mark Starovoytov <mstarovo@pm.me> <mstarovoitov@marvell.com>
Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com> Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
......
...@@ -1816,7 +1816,7 @@ F: drivers/pinctrl/pinctrl-gemini.c ...@@ -1816,7 +1816,7 @@ F: drivers/pinctrl/pinctrl-gemini.c
F: drivers/rtc/rtc-ftrtc010.c F: drivers/rtc/rtc-ftrtc010.c
ARM/CZ.NIC TURRIS SUPPORT ARM/CZ.NIC TURRIS SUPPORT
M: Marek Behun <kabel@kernel.org> M: Marek Behún <kabel@kernel.org>
S: Maintained S: Maintained
W: https://www.turris.cz/ W: https://www.turris.cz/
F: Documentation/ABI/testing/debugfs-moxtet F: Documentation/ABI/testing/debugfs-moxtet
...@@ -10945,7 +10945,7 @@ F: include/linux/mv643xx.h ...@@ -10945,7 +10945,7 @@ F: include/linux/mv643xx.h
MARVELL MV88X3310 PHY DRIVER MARVELL MV88X3310 PHY DRIVER
M: Russell King <linux@armlinux.org.uk> M: Russell King <linux@armlinux.org.uk>
M: Marek Behun <marek.behun@nic.cz> M: Marek Behún <kabel@kernel.org>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Maintained S: Maintained
F: drivers/net/phy/marvell10g.c F: drivers/net/phy/marvell10g.c
......
...@@ -140,7 +140,12 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) ...@@ -140,7 +140,12 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
/* Allocate variable storage */ /* Allocate variable storage */
vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
vlen += uv_info.guest_virt_base_stor_len; vlen += uv_info.guest_virt_base_stor_len;
kvm->arch.pv.stor_var = vzalloc(vlen); /*
* The Create Secure Configuration Ultravisor Call does not support
* using large pages for the virtual memory area.
* This is a hardware limitation.
*/
kvm->arch.pv.stor_var = vmalloc_no_huge(vlen);
if (!kvm->arch.pv.stor_var) if (!kvm->arch.pv.stor_var)
goto out_err; goto out_err;
return 0; return 0;
......
...@@ -1053,6 +1053,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) ...@@ -1053,6 +1053,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
nilfs_sysfs_delete_superblock_group(nilfs); nilfs_sysfs_delete_superblock_group(nilfs);
nilfs_sysfs_delete_segctor_group(nilfs); nilfs_sysfs_delete_segctor_group(nilfs);
kobject_del(&nilfs->ns_dev_kobj); kobject_del(&nilfs->ns_dev_kobj);
kobject_put(&nilfs->ns_dev_kobj);
kfree(nilfs->ns_dev_subgroups); kfree(nilfs->ns_dev_subgroups);
} }
......
...@@ -741,17 +741,6 @@ static inline int hstate_index(struct hstate *h) ...@@ -741,17 +741,6 @@ static inline int hstate_index(struct hstate *h)
return h - hstates; return h - hstates;
} }
pgoff_t __basepage_index(struct page *page);
/* Return page->index in PAGE_SIZE units */
static inline pgoff_t basepage_index(struct page *page)
{
if (!PageCompound(page))
return page->index;
return __basepage_index(page);
}
extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_page(struct page *page);
extern int dissolve_free_huge_pages(unsigned long start_pfn, extern int dissolve_free_huge_pages(unsigned long start_pfn,
unsigned long end_pfn); unsigned long end_pfn);
...@@ -988,11 +977,6 @@ static inline int hstate_index(struct hstate *h) ...@@ -988,11 +977,6 @@ static inline int hstate_index(struct hstate *h)
return 0; return 0;
} }
static inline pgoff_t basepage_index(struct page *page)
{
return page->index;
}
static inline int dissolve_free_huge_page(struct page *page) static inline int dissolve_free_huge_page(struct page *page)
{ {
return 0; return 0;
......
...@@ -516,7 +516,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping, ...@@ -516,7 +516,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
} }
/* /*
* Get index of the page with in radix-tree * Get index of the page within radix-tree (but not for hugetlb pages).
* (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
*/ */
static inline pgoff_t page_to_index(struct page *page) static inline pgoff_t page_to_index(struct page *page)
...@@ -535,15 +535,16 @@ static inline pgoff_t page_to_index(struct page *page) ...@@ -535,15 +535,16 @@ static inline pgoff_t page_to_index(struct page *page)
return pgoff; return pgoff;
} }
extern pgoff_t hugetlb_basepage_index(struct page *page);
/* /*
* Get the offset in PAGE_SIZE. * Get the offset in PAGE_SIZE (even for hugetlb pages).
* (TODO: hugepage should have ->index in PAGE_SIZE) * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
*/ */
static inline pgoff_t page_to_pgoff(struct page *page) static inline pgoff_t page_to_pgoff(struct page *page)
{ {
if (unlikely(PageHeadHuge(page))) if (unlikely(PageHuge(page)))
return page->index << compound_order(page); return hugetlb_basepage_index(page);
return page_to_index(page); return page_to_index(page);
} }
......
...@@ -135,6 +135,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, ...@@ -135,6 +135,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
const void *caller); const void *caller);
void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
int node, const void *caller); int node, const void *caller);
void *vmalloc_no_huge(unsigned long size);
extern void vfree(const void *addr); extern void vfree(const void *addr);
extern void vfree_atomic(const void *addr); extern void vfree_atomic(const void *addr);
......
...@@ -35,7 +35,6 @@ ...@@ -35,7 +35,6 @@
#include <linux/jhash.h> #include <linux/jhash.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/hugetlb.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/fault-inject.h> #include <linux/fault-inject.h>
...@@ -650,7 +649,7 @@ static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, ...@@ -650,7 +649,7 @@ static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.i_seq = get_inode_sequence_number(inode); key->shared.i_seq = get_inode_sequence_number(inode);
key->shared.pgoff = basepage_index(tail); key->shared.pgoff = page_to_pgoff(tail);
rcu_read_unlock(); rcu_read_unlock();
} }
......
...@@ -1093,20 +1093,15 @@ void kthread_flush_work(struct kthread_work *work) ...@@ -1093,20 +1093,15 @@ void kthread_flush_work(struct kthread_work *work)
EXPORT_SYMBOL_GPL(kthread_flush_work); EXPORT_SYMBOL_GPL(kthread_flush_work);
/* /*
* This function removes the work from the worker queue. Also it makes sure * Make sure that the timer is neither set nor running and could
* that it won't get queued later via the delayed work's timer. * not manipulate the work list_head any longer.
* *
* The work might still be in use when this function finishes. See the * The function is called under worker->lock. The lock is temporary
* current_work proceed by the worker. * released but the timer can't be set again in the meantime.
*
* Return: %true if @work was pending and successfully canceled,
* %false if @work was not pending
*/ */
static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
unsigned long *flags) unsigned long *flags)
{ {
/* Try to cancel the timer if exists. */
if (is_dwork) {
struct kthread_delayed_work *dwork = struct kthread_delayed_work *dwork =
container_of(work, struct kthread_delayed_work, work); container_of(work, struct kthread_delayed_work, work);
struct kthread_worker *worker = work->worker; struct kthread_worker *worker = work->worker;
...@@ -1122,8 +1117,23 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, ...@@ -1122,8 +1117,23 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
del_timer_sync(&dwork->timer); del_timer_sync(&dwork->timer);
raw_spin_lock_irqsave(&worker->lock, *flags); raw_spin_lock_irqsave(&worker->lock, *flags);
work->canceling--; work->canceling--;
} }
/*
* This function removes the work from the worker queue.
*
* It is called under worker->lock. The caller must make sure that
* the timer used by delayed work is not running, e.g. by calling
* kthread_cancel_delayed_work_timer().
*
* The work might still be in use when this function finishes. See the
* current_work proceed by the worker.
*
* Return: %true if @work was pending and successfully canceled,
* %false if @work was not pending
*/
static bool __kthread_cancel_work(struct kthread_work *work)
{
/* /*
* Try to remove the work from a worker list. It might either * Try to remove the work from a worker list. It might either
* be from worker->work_list or from worker->delayed_work_list. * be from worker->work_list or from worker->delayed_work_list.
...@@ -1176,11 +1186,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker, ...@@ -1176,11 +1186,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
/* Work must not be used with >1 worker, see kthread_queue_work() */ /* Work must not be used with >1 worker, see kthread_queue_work() */
WARN_ON_ONCE(work->worker != worker); WARN_ON_ONCE(work->worker != worker);
/* Do not fight with another command that is canceling this work. */ /*
* Temporary cancel the work but do not fight with another command
* that is canceling the work as well.
*
* It is a bit tricky because of possible races with another
* mod_delayed_work() and cancel_delayed_work() callers.
*
* The timer must be canceled first because worker->lock is released
* when doing so. But the work can be removed from the queue (list)
* only when it can be queued again so that the return value can
* be used for reference counting.
*/
kthread_cancel_delayed_work_timer(work, &flags);
if (work->canceling) if (work->canceling)
goto out; goto out;
ret = __kthread_cancel_work(work);
ret = __kthread_cancel_work(work, true, &flags);
fast_queue: fast_queue:
__kthread_queue_delayed_work(worker, dwork, delay); __kthread_queue_delayed_work(worker, dwork, delay);
out: out:
...@@ -1202,7 +1224,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork) ...@@ -1202,7 +1224,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
/* Work must not be used with >1 worker, see kthread_queue_work(). */ /* Work must not be used with >1 worker, see kthread_queue_work(). */
WARN_ON_ONCE(work->worker != worker); WARN_ON_ONCE(work->worker != worker);
ret = __kthread_cancel_work(work, is_dwork, &flags); if (is_dwork)
kthread_cancel_delayed_work_timer(work, &flags);
ret = __kthread_cancel_work(work);
if (worker->current_work != work) if (worker->current_work != work)
goto out_fast; goto out_fast;
......
...@@ -1588,15 +1588,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage) ...@@ -1588,15 +1588,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
return NULL; return NULL;
} }
pgoff_t __basepage_index(struct page *page) pgoff_t hugetlb_basepage_index(struct page *page)
{ {
struct page *page_head = compound_head(page); struct page *page_head = compound_head(page);
pgoff_t index = page_index(page_head); pgoff_t index = page_index(page_head);
unsigned long compound_idx; unsigned long compound_idx;
if (!PageHuge(page_head))
return page_index(page);
if (compound_order(page_head) >= MAX_ORDER) if (compound_order(page_head) >= MAX_ORDER)
compound_idx = page_to_pfn(page) - page_to_pfn(page_head); compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
else else
......
...@@ -658,6 +658,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn, ...@@ -658,6 +658,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
*/ */
static int me_kernel(struct page *p, unsigned long pfn) static int me_kernel(struct page *p, unsigned long pfn)
{ {
unlock_page(p);
return MF_IGNORED; return MF_IGNORED;
} }
...@@ -667,6 +668,7 @@ static int me_kernel(struct page *p, unsigned long pfn) ...@@ -667,6 +668,7 @@ static int me_kernel(struct page *p, unsigned long pfn)
static int me_unknown(struct page *p, unsigned long pfn) static int me_unknown(struct page *p, unsigned long pfn)
{ {
pr_err("Memory failure: %#lx: Unknown page state\n", pfn); pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
unlock_page(p);
return MF_FAILED; return MF_FAILED;
} }
...@@ -675,6 +677,7 @@ static int me_unknown(struct page *p, unsigned long pfn) ...@@ -675,6 +677,7 @@ static int me_unknown(struct page *p, unsigned long pfn)
*/ */
static int me_pagecache_clean(struct page *p, unsigned long pfn) static int me_pagecache_clean(struct page *p, unsigned long pfn)
{ {
int ret;
struct address_space *mapping; struct address_space *mapping;
delete_from_lru_cache(p); delete_from_lru_cache(p);
...@@ -683,8 +686,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) ...@@ -683,8 +686,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
* For anonymous pages we're done the only reference left * For anonymous pages we're done the only reference left
* should be the one m_f() holds. * should be the one m_f() holds.
*/ */
if (PageAnon(p)) if (PageAnon(p)) {
return MF_RECOVERED; ret = MF_RECOVERED;
goto out;
}
/* /*
* Now truncate the page in the page cache. This is really * Now truncate the page in the page cache. This is really
...@@ -698,7 +703,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) ...@@ -698,7 +703,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
/* /*
* Page has been teared down in the meanwhile * Page has been teared down in the meanwhile
*/ */
return MF_FAILED; ret = MF_FAILED;
goto out;
} }
/* /*
...@@ -706,7 +712,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) ...@@ -706,7 +712,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
* *
* Open: to take i_mutex or not for this? Right now we don't. * Open: to take i_mutex or not for this? Right now we don't.
*/ */
return truncate_error_page(p, pfn, mapping); ret = truncate_error_page(p, pfn, mapping);
out:
unlock_page(p);
return ret;
} }
/* /*
...@@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) ...@@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
*/ */
static int me_swapcache_dirty(struct page *p, unsigned long pfn) static int me_swapcache_dirty(struct page *p, unsigned long pfn)
{ {
int ret;
ClearPageDirty(p); ClearPageDirty(p);
/* Trigger EIO in shmem: */ /* Trigger EIO in shmem: */
ClearPageUptodate(p); ClearPageUptodate(p);
if (!delete_from_lru_cache(p)) ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
return MF_DELAYED; unlock_page(p);
else return ret;
return MF_FAILED;
} }
static int me_swapcache_clean(struct page *p, unsigned long pfn) static int me_swapcache_clean(struct page *p, unsigned long pfn)
{ {
int ret;
delete_from_swap_cache(p); delete_from_swap_cache(p);
if (!delete_from_lru_cache(p)) ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
return MF_RECOVERED; unlock_page(p);
else return ret;
return MF_FAILED;
} }
/* /*
...@@ -820,6 +831,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) ...@@ -820,6 +831,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
mapping = page_mapping(hpage); mapping = page_mapping(hpage);
if (mapping) { if (mapping) {
res = truncate_error_page(hpage, pfn, mapping); res = truncate_error_page(hpage, pfn, mapping);
unlock_page(hpage);
} else { } else {
res = MF_FAILED; res = MF_FAILED;
unlock_page(hpage); unlock_page(hpage);
...@@ -834,7 +846,6 @@ static int me_huge_page(struct page *p, unsigned long pfn) ...@@ -834,7 +846,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
page_ref_inc(p); page_ref_inc(p);
res = MF_RECOVERED; res = MF_RECOVERED;
} }
lock_page(hpage);
} }
return res; return res;
...@@ -866,6 +877,8 @@ static struct page_state { ...@@ -866,6 +877,8 @@ static struct page_state {
unsigned long mask; unsigned long mask;
unsigned long res; unsigned long res;
enum mf_action_page_type type; enum mf_action_page_type type;
/* Callback ->action() has to unlock the relevant page inside it. */
int (*action)(struct page *p, unsigned long pfn); int (*action)(struct page *p, unsigned long pfn);
} error_states[] = { } error_states[] = {
{ reserved, reserved, MF_MSG_KERNEL, me_kernel }, { reserved, reserved, MF_MSG_KERNEL, me_kernel },
...@@ -929,6 +942,7 @@ static int page_action(struct page_state *ps, struct page *p, ...@@ -929,6 +942,7 @@ static int page_action(struct page_state *ps, struct page *p,
int result; int result;
int count; int count;
/* page p should be unlocked after returning from ps->action(). */
result = ps->action(p, pfn); result = ps->action(p, pfn);
count = page_count(p) - 1; count = page_count(p) - 1;
...@@ -1253,7 +1267,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) ...@@ -1253,7 +1267,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (TestSetPageHWPoison(head)) { if (TestSetPageHWPoison(head)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n", pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn); pfn);
return 0; return -EHWPOISON;
} }
num_poisoned_pages_inc(); num_poisoned_pages_inc();
...@@ -1313,7 +1327,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) ...@@ -1313,7 +1327,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
goto out; goto out;
} }
res = identify_page_state(pfn, p, page_flags); return identify_page_state(pfn, p, page_flags);
out: out:
unlock_page(head); unlock_page(head);
return res; return res;
...@@ -1429,9 +1443,10 @@ int memory_failure(unsigned long pfn, int flags) ...@@ -1429,9 +1443,10 @@ int memory_failure(unsigned long pfn, int flags)
struct page *hpage; struct page *hpage;
struct page *orig_head; struct page *orig_head;
struct dev_pagemap *pgmap; struct dev_pagemap *pgmap;
int res; int res = 0;
unsigned long page_flags; unsigned long page_flags;
bool retry = true; bool retry = true;
static DEFINE_MUTEX(mf_mutex);
if (!sysctl_memory_failure_recovery) if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn); panic("Memory failure on page %lx", pfn);
...@@ -1449,13 +1464,19 @@ int memory_failure(unsigned long pfn, int flags) ...@@ -1449,13 +1464,19 @@ int memory_failure(unsigned long pfn, int flags)
return -ENXIO; return -ENXIO;
} }
mutex_lock(&mf_mutex);
try_again: try_again:
if (PageHuge(p)) if (PageHuge(p)) {
return memory_failure_hugetlb(pfn, flags); res = memory_failure_hugetlb(pfn, flags);
goto unlock_mutex;
}
if (TestSetPageHWPoison(p)) { if (TestSetPageHWPoison(p)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n", pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn); pfn);
return 0; res = -EHWPOISON;
goto unlock_mutex;
} }
orig_head = hpage = compound_head(p); orig_head = hpage = compound_head(p);
...@@ -1488,17 +1509,19 @@ int memory_failure(unsigned long pfn, int flags) ...@@ -1488,17 +1509,19 @@ int memory_failure(unsigned long pfn, int flags)
res = MF_FAILED; res = MF_FAILED;
} }
action_result(pfn, MF_MSG_BUDDY, res); action_result(pfn, MF_MSG_BUDDY, res);
return res == MF_RECOVERED ? 0 : -EBUSY; res = res == MF_RECOVERED ? 0 : -EBUSY;
} else { } else {
action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
return -EBUSY; res = -EBUSY;
} }
goto unlock_mutex;
} }
if (PageTransHuge(hpage)) { if (PageTransHuge(hpage)) {
if (try_to_split_thp_page(p, "Memory Failure") < 0) { if (try_to_split_thp_page(p, "Memory Failure") < 0) {
action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
return -EBUSY; res = -EBUSY;
goto unlock_mutex;
} }
VM_BUG_ON_PAGE(!page_count(p), p); VM_BUG_ON_PAGE(!page_count(p), p);
} }
...@@ -1522,7 +1545,7 @@ int memory_failure(unsigned long pfn, int flags) ...@@ -1522,7 +1545,7 @@ int memory_failure(unsigned long pfn, int flags)
if (PageCompound(p) && compound_head(p) != orig_head) { if (PageCompound(p) && compound_head(p) != orig_head) {
action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
res = -EBUSY; res = -EBUSY;
goto out; goto unlock_page;
} }
/* /*
...@@ -1542,14 +1565,14 @@ int memory_failure(unsigned long pfn, int flags) ...@@ -1542,14 +1565,14 @@ int memory_failure(unsigned long pfn, int flags)
num_poisoned_pages_dec(); num_poisoned_pages_dec();
unlock_page(p); unlock_page(p);
put_page(p); put_page(p);
return 0; goto unlock_mutex;
} }
if (hwpoison_filter(p)) { if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p)) if (TestClearPageHWPoison(p))
num_poisoned_pages_dec(); num_poisoned_pages_dec();
unlock_page(p); unlock_page(p);
put_page(p); put_page(p);
return 0; goto unlock_mutex;
} }
/* /*
...@@ -1573,7 +1596,7 @@ int memory_failure(unsigned long pfn, int flags) ...@@ -1573,7 +1596,7 @@ int memory_failure(unsigned long pfn, int flags)
if (!hwpoison_user_mappings(p, pfn, flags, &p)) { if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
res = -EBUSY; res = -EBUSY;
goto out; goto unlock_page;
} }
/* /*
...@@ -1582,13 +1605,17 @@ int memory_failure(unsigned long pfn, int flags) ...@@ -1582,13 +1605,17 @@ int memory_failure(unsigned long pfn, int flags)
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
res = -EBUSY; res = -EBUSY;
goto out; goto unlock_page;
} }
identify_page_state: identify_page_state:
res = identify_page_state(pfn, p, page_flags); res = identify_page_state(pfn, p, page_flags);
out: mutex_unlock(&mf_mutex);
return res;
unlock_page:
unlock_page(p); unlock_page(p);
unlock_mutex:
mutex_unlock(&mf_mutex);
return res; return res;
} }
EXPORT_SYMBOL_GPL(memory_failure); EXPORT_SYMBOL_GPL(memory_failure);
......
...@@ -5053,9 +5053,13 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, ...@@ -5053,9 +5053,13 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
* Skip populated array elements to determine if any pages need * Skip populated array elements to determine if any pages need
* to be allocated before disabling IRQs. * to be allocated before disabling IRQs.
*/ */
while (page_array && page_array[nr_populated] && nr_populated < nr_pages) while (page_array && nr_populated < nr_pages && page_array[nr_populated])
nr_populated++; nr_populated++;
/* Already populated array? */
if (unlikely(page_array && nr_pages - nr_populated == 0))
return 0;
/* Use the single page allocator for one page. */ /* Use the single page allocator for one page. */
if (nr_pages - nr_populated == 1) if (nr_pages - nr_populated == 1)
goto failed; goto failed;
......
...@@ -116,6 +116,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) ...@@ -116,6 +116,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
return pfn_is_match(pvmw->page, pfn); return pfn_is_match(pvmw->page, pfn);
} }
static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
{
pvmw->address = (pvmw->address + size) & ~(size - 1);
if (!pvmw->address)
pvmw->address = ULONG_MAX;
}
/** /**
* page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
* @pvmw->address * @pvmw->address
...@@ -144,6 +151,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -144,6 +151,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
{ {
struct mm_struct *mm = pvmw->vma->vm_mm; struct mm_struct *mm = pvmw->vma->vm_mm;
struct page *page = pvmw->page; struct page *page = pvmw->page;
unsigned long end;
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d; p4d_t *p4d;
pud_t *pud; pud_t *pud;
...@@ -153,10 +161,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -153,10 +161,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (pvmw->pmd && !pvmw->pte) if (pvmw->pmd && !pvmw->pte)
return not_found(pvmw); return not_found(pvmw);
if (unlikely(PageHuge(page))) {
/* The only possible mapping was handled on last iteration */
if (pvmw->pte) if (pvmw->pte)
goto next_pte; return not_found(pvmw);
if (unlikely(PageHuge(pvmw->page))) {
/* when pud is not present, pte will be NULL */ /* when pud is not present, pte will be NULL */
pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
if (!pvmw->pte) if (!pvmw->pte)
...@@ -168,16 +177,36 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -168,16 +177,36 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
return not_found(pvmw); return not_found(pvmw);
return true; return true;
} }
/*
* Seek to next pte only makes sense for THP.
* But more important than that optimization, is to filter out
* any PageKsm page: whose page->index misleads vma_address()
* and vma_address_end() to disaster.
*/
end = PageTransCompound(page) ?
vma_address_end(page, pvmw->vma) :
pvmw->address + PAGE_SIZE;
if (pvmw->pte)
goto next_pte;
restart: restart:
do {
pgd = pgd_offset(mm, pvmw->address); pgd = pgd_offset(mm, pvmw->address);
if (!pgd_present(*pgd)) if (!pgd_present(*pgd)) {
return false; step_forward(pvmw, PGDIR_SIZE);
continue;
}
p4d = p4d_offset(pgd, pvmw->address); p4d = p4d_offset(pgd, pvmw->address);
if (!p4d_present(*p4d)) if (!p4d_present(*p4d)) {
return false; step_forward(pvmw, P4D_SIZE);
continue;
}
pud = pud_offset(p4d, pvmw->address); pud = pud_offset(p4d, pvmw->address);
if (!pud_present(*pud)) if (!pud_present(*pud)) {
return false; step_forward(pvmw, PUD_SIZE);
continue;
}
pvmw->pmd = pmd_offset(pud, pvmw->address); pvmw->pmd = pmd_offset(pud, pvmw->address);
/* /*
* Make sure the pmd value isn't cached in a register by the * Make sure the pmd value isn't cached in a register by the
...@@ -185,32 +214,32 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -185,32 +214,32 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
* subsequent update. * subsequent update.
*/ */
pmde = READ_ONCE(*pvmw->pmd); pmde = READ_ONCE(*pvmw->pmd);
if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
pvmw->ptl = pmd_lock(mm, pvmw->pmd); pvmw->ptl = pmd_lock(mm, pvmw->pmd);
if (likely(pmd_trans_huge(*pvmw->pmd))) { pmde = *pvmw->pmd;
if (likely(pmd_trans_huge(pmde))) {
if (pvmw->flags & PVMW_MIGRATION) if (pvmw->flags & PVMW_MIGRATION)
return not_found(pvmw); return not_found(pvmw);
if (pmd_page(*pvmw->pmd) != page) if (pmd_page(pmde) != page)
return not_found(pvmw); return not_found(pvmw);
return true; return true;
} else if (!pmd_present(*pvmw->pmd)) { }
if (thp_migration_supported()) { if (!pmd_present(pmde)) {
if (!(pvmw->flags & PVMW_MIGRATION)) swp_entry_t entry;
return not_found(pvmw);
if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
if (migration_entry_to_page(entry) != page) if (!thp_migration_supported() ||
!(pvmw->flags & PVMW_MIGRATION))
return not_found(pvmw);
entry = pmd_to_swp_entry(pmde);
if (!is_migration_entry(entry) ||
migration_entry_to_page(entry) != page)
return not_found(pvmw); return not_found(pvmw);
return true; return true;
} }
}
return not_found(pvmw);
} else {
/* THP pmd was split under us: handle on pte level */ /* THP pmd was split under us: handle on pte level */
spin_unlock(pvmw->ptl); spin_unlock(pvmw->ptl);
pvmw->ptl = NULL; pvmw->ptl = NULL;
}
} else if (!pmd_present(pmde)) { } else if (!pmd_present(pmde)) {
/* /*
* If PVMW_SYNC, take and drop THP pmd lock so that we * If PVMW_SYNC, take and drop THP pmd lock so that we
...@@ -218,39 +247,38 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -218,39 +247,38 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
* cleared *pmd but not decremented compound_mapcount(). * cleared *pmd but not decremented compound_mapcount().
*/ */
if ((pvmw->flags & PVMW_SYNC) && if ((pvmw->flags & PVMW_SYNC) &&
PageTransCompound(pvmw->page)) { PageTransCompound(page)) {
spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
spin_unlock(ptl); spin_unlock(ptl);
} }
return false; step_forward(pvmw, PMD_SIZE);
continue;
} }
if (!map_pte(pvmw)) if (!map_pte(pvmw))
goto next_pte; goto next_pte;
while (1) { this_pte:
unsigned long end;
if (check_pte(pvmw)) if (check_pte(pvmw))
return true; return true;
next_pte: next_pte:
/* Seek to next pte only makes sense for THP */
if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
return not_found(pvmw);
end = vma_address_end(pvmw->page, pvmw->vma);
do { do {
pvmw->address += PAGE_SIZE; pvmw->address += PAGE_SIZE;
if (pvmw->address >= end) if (pvmw->address >= end)
return not_found(pvmw); return not_found(pvmw);
/* Did we cross page table boundary? */ /* Did we cross page table boundary? */
if (pvmw->address % PMD_SIZE == 0) { if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
pte_unmap(pvmw->pte);
if (pvmw->ptl) { if (pvmw->ptl) {
spin_unlock(pvmw->ptl); spin_unlock(pvmw->ptl);
pvmw->ptl = NULL; pvmw->ptl = NULL;
} }
pte_unmap(pvmw->pte);
pvmw->pte = NULL;
goto restart; goto restart;
} else { }
pvmw->pte++; pvmw->pte++;
if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
spin_lock(pvmw->ptl);
} }
} while (pte_none(*pvmw->pte)); } while (pte_none(*pvmw->pte));
...@@ -258,7 +286,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -258,7 +286,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
pvmw->ptl = pte_lockptr(mm, pvmw->pmd); pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
spin_lock(pvmw->ptl); spin_lock(pvmw->ptl);
} }
} goto this_pte;
} while (pvmw->address < end);
return false;
} }
/** /**
......
...@@ -2344,15 +2344,16 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm) ...@@ -2344,15 +2344,16 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm)
} }
static struct vm_struct *__get_vm_area_node(unsigned long size, static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long flags, unsigned long start, unsigned long align, unsigned long shift, unsigned long flags,
unsigned long end, int node, gfp_t gfp_mask, const void *caller) unsigned long start, unsigned long end, int node,
gfp_t gfp_mask, const void *caller)
{ {
struct vmap_area *va; struct vmap_area *va;
struct vm_struct *area; struct vm_struct *area;
unsigned long requested_size = size; unsigned long requested_size = size;
BUG_ON(in_interrupt()); BUG_ON(in_interrupt());
size = PAGE_ALIGN(size); size = ALIGN(size, 1ul << shift);
if (unlikely(!size)) if (unlikely(!size))
return NULL; return NULL;
...@@ -2384,8 +2385,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, ...@@ -2384,8 +2385,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end, unsigned long start, unsigned long end,
const void *caller) const void *caller)
{ {
return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end,
GFP_KERNEL, caller); NUMA_NO_NODE, GFP_KERNEL, caller);
} }
/** /**
...@@ -2401,7 +2402,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, ...@@ -2401,7 +2402,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
*/ */
struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
{ {
return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
VMALLOC_START, VMALLOC_END,
NUMA_NO_NODE, GFP_KERNEL, NUMA_NO_NODE, GFP_KERNEL,
__builtin_return_address(0)); __builtin_return_address(0));
} }
...@@ -2409,7 +2411,8 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) ...@@ -2409,7 +2411,8 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
const void *caller) const void *caller)
{ {
return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
VMALLOC_START, VMALLOC_END,
NUMA_NO_NODE, GFP_KERNEL, caller); NUMA_NO_NODE, GFP_KERNEL, caller);
} }
...@@ -2902,9 +2905,9 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, ...@@ -2902,9 +2905,9 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
} }
again: again:
size = PAGE_ALIGN(size); area = __get_vm_area_node(real_size, align, shift, VM_ALLOC |
area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | VM_UNINITIALIZED | vm_flags, start, end, node,
vm_flags, start, end, node, gfp_mask, caller); gfp_mask, caller);
if (!area) { if (!area) {
warn_alloc(gfp_mask, NULL, warn_alloc(gfp_mask, NULL,
"vmalloc size %lu allocation failure: " "vmalloc size %lu allocation failure: "
...@@ -2923,6 +2926,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, ...@@ -2923,6 +2926,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
*/ */
clear_vm_uninitialized_flag(area); clear_vm_uninitialized_flag(area);
size = PAGE_ALIGN(size);
kmemleak_vmalloc(area, size, gfp_mask); kmemleak_vmalloc(area, size, gfp_mask);
return addr; return addr;
...@@ -2998,6 +3002,23 @@ void *vmalloc(unsigned long size) ...@@ -2998,6 +3002,23 @@ void *vmalloc(unsigned long size)
} }
EXPORT_SYMBOL(vmalloc); EXPORT_SYMBOL(vmalloc);
/**
* vmalloc_no_huge - allocate virtually contiguous memory using small pages
* @size: allocation size
*
* Allocate enough non-huge pages to cover @size from the page level
* allocator and map them into contiguous kernel virtual space.
*
* Return: pointer to the allocated memory or %NULL on error
*/
void *vmalloc_no_huge(unsigned long size)
{
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
NUMA_NO_NODE, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_no_huge);
/** /**
* vzalloc - allocate virtually contiguous memory with zero fill * vzalloc - allocate virtually contiguous memory with zero fill
* @size: allocation size * @size: allocation size
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment