Commit fd6b99fa authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "16 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm/devm_memremap_pages: fix final page put race
  PCI/P2PDMA: track pgmap references per resource, not globally
  lib/genalloc: introduce chunk owners
  PCI/P2PDMA: fix the gen_pool_add_virt() failure path
  mm/devm_memremap_pages: introduce devm_memunmap_pages
  drivers/base/devres: introduce devm_release_action()
  mm/vmscan.c: fix trying to reclaim unevictable LRU page
  coredump: fix race condition between collapse_huge_page() and core dumping
  mm/mlock.c: change count_mm_mlocked_page_nr return type
  mm: mmu_gather: remove __tlb_reset_range() for force flush
  fs/ocfs2: fix race in ocfs2_dentry_attach_lock()
  mm/vmscan.c: fix recent_rotated history
  mm/mlock.c: mlockall error for flag MCL_ONFAULT
  scripts/decode_stacktrace.sh: prefix addr2line with $CROSS_COMPILE
  mm/list_lru.c: fix memory leak in __memcg_init_list_lru_node
  mm: memcontrol: don't batch updates of local VM stats and events
parents c78ad1be 50f44ee7
......@@ -755,10 +755,32 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match,
&devres));
}
EXPORT_SYMBOL_GPL(devm_remove_action);
/**
* devm_release_action() - release previously added custom action
* @dev: Device that owns the action
* @action: Function implementing the action
* @data: Pointer to data passed to @action implementation
*
* Releases and removes instance of @action previously added by
* devm_add_action(). Both action and data should match one of the
* existing entries.
*/
void devm_release_action(struct device *dev, void (*action)(void *), void *data)
{
struct action_devres devres = {
.data = data,
.action = action,
};
WARN_ON(devres_release(dev, devm_action_release, devm_action_match,
&devres));
}
EXPORT_SYMBOL_GPL(devm_release_action);
/*
* Managed kmalloc/kfree
*/
......
......@@ -27,9 +27,8 @@ static void dev_dax_percpu_release(struct percpu_ref *ref)
complete(&dev_dax->cmp);
}
static void dev_dax_percpu_exit(void *data)
static void dev_dax_percpu_exit(struct percpu_ref *ref)
{
struct percpu_ref *ref = data;
struct dev_dax *dev_dax = ref_to_dev_dax(ref);
dev_dbg(&dev_dax->dev, "%s\n", __func__);
......@@ -466,18 +465,12 @@ int dev_dax_probe(struct device *dev)
if (rc)
return rc;
rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref);
if (rc)
return rc;
dev_dax->pgmap.ref = &dev_dax->ref;
dev_dax->pgmap.kill = dev_dax_percpu_kill;
dev_dax->pgmap.cleanup = dev_dax_percpu_exit;
addr = devm_memremap_pages(dev, &dev_dax->pgmap);
if (IS_ERR(addr)) {
devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref);
percpu_ref_exit(&dev_dax->ref);
if (IS_ERR(addr))
return PTR_ERR(addr);
}
inode = dax_inode(dax_dev);
cdev = inode->i_cdev;
......
......@@ -303,11 +303,19 @@ static const struct attribute_group *pmem_attribute_groups[] = {
NULL,
};
static void pmem_release_queue(void *q)
static void __pmem_release_queue(struct percpu_ref *ref)
{
struct request_queue *q;
q = container_of(ref, typeof(*q), q_usage_counter);
blk_cleanup_queue(q);
}
static void pmem_release_queue(void *ref)
{
__pmem_release_queue(ref);
}
static void pmem_freeze_queue(struct percpu_ref *ref)
{
struct request_queue *q;
......@@ -399,12 +407,10 @@ static int pmem_attach_disk(struct device *dev,
if (!q)
return -ENOMEM;
if (devm_add_action_or_reset(dev, pmem_release_queue, q))
return -ENOMEM;
pmem->pfn_flags = PFN_DEV;
pmem->pgmap.ref = &q->q_usage_counter;
pmem->pgmap.kill = pmem_freeze_queue;
pmem->pgmap.cleanup = __pmem_release_queue;
if (is_nd_pfn(dev)) {
if (setup_pagemap_fsdax(dev, &pmem->pgmap))
return -ENOMEM;
......@@ -425,6 +431,9 @@ static int pmem_attach_disk(struct device *dev,
pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
} else {
if (devm_add_action_or_reset(dev, pmem_release_queue,
&q->q_usage_counter))
return -ENOMEM;
addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM);
memcpy(&bb_res, &nsio->res, sizeof(bb_res));
......
......@@ -20,12 +20,16 @@
#include <linux/seq_buf.h>
struct pci_p2pdma {
struct percpu_ref devmap_ref;
struct completion devmap_ref_done;
struct gen_pool *pool;
bool p2pmem_published;
};
struct p2pdma_pagemap {
struct dev_pagemap pgmap;
struct percpu_ref ref;
struct completion ref_done;
};
static ssize_t size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
......@@ -74,41 +78,45 @@ static const struct attribute_group p2pmem_group = {
.name = "p2pmem",
};
static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref)
{
return container_of(ref, struct p2pdma_pagemap, ref);
}
static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
{
struct pci_p2pdma *p2p =
container_of(ref, struct pci_p2pdma, devmap_ref);
struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
complete_all(&p2p->devmap_ref_done);
complete(&p2p_pgmap->ref_done);
}
static void pci_p2pdma_percpu_kill(struct percpu_ref *ref)
{
/*
* pci_p2pdma_add_resource() may be called multiple times
* by a driver and may register the percpu_kill devm action multiple
* times. We only want the first action to actually kill the
* percpu_ref.
*/
if (percpu_ref_is_dying(ref))
return;
percpu_ref_kill(ref);
}
static void pci_p2pdma_percpu_cleanup(struct percpu_ref *ref)
{
struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
wait_for_completion(&p2p_pgmap->ref_done);
percpu_ref_exit(&p2p_pgmap->ref);
}
static void pci_p2pdma_release(void *data)
{
struct pci_dev *pdev = data;
struct pci_p2pdma *p2pdma = pdev->p2pdma;
if (!pdev->p2pdma)
if (!p2pdma)
return;
wait_for_completion(&pdev->p2pdma->devmap_ref_done);
percpu_ref_exit(&pdev->p2pdma->devmap_ref);
/* Flush and disable pci_alloc_p2p_mem() */
pdev->p2pdma = NULL;
synchronize_rcu();
gen_pool_destroy(pdev->p2pdma->pool);
gen_pool_destroy(p2pdma->pool);
sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
pdev->p2pdma = NULL;
}
static int pci_p2pdma_setup(struct pci_dev *pdev)
......@@ -124,12 +132,6 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
if (!p2p->pool)
goto out;
init_completion(&p2p->devmap_ref_done);
error = percpu_ref_init(&p2p->devmap_ref,
pci_p2pdma_percpu_release, 0, GFP_KERNEL);
if (error)
goto out_pool_destroy;
error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
if (error)
goto out_pool_destroy;
......@@ -163,6 +165,7 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
u64 offset)
{
struct p2pdma_pagemap *p2p_pgmap;
struct dev_pagemap *pgmap;
void *addr;
int error;
......@@ -185,18 +188,27 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
return error;
}
pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
if (!pgmap)
p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
if (!p2p_pgmap)
return -ENOMEM;
init_completion(&p2p_pgmap->ref_done);
error = percpu_ref_init(&p2p_pgmap->ref,
pci_p2pdma_percpu_release, 0, GFP_KERNEL);
if (error)
goto pgmap_free;
pgmap = &p2p_pgmap->pgmap;
pgmap->res.start = pci_resource_start(pdev, bar) + offset;
pgmap->res.end = pgmap->res.start + size - 1;
pgmap->res.flags = pci_resource_flags(pdev, bar);
pgmap->ref = &pdev->p2pdma->devmap_ref;
pgmap->ref = &p2p_pgmap->ref;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
pci_resource_start(pdev, bar);
pgmap->kill = pci_p2pdma_percpu_kill;
pgmap->cleanup = pci_p2pdma_percpu_cleanup;
addr = devm_memremap_pages(&pdev->dev, pgmap);
if (IS_ERR(addr)) {
......@@ -204,19 +216,22 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
goto pgmap_free;
}
error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr,
error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset,
resource_size(&pgmap->res), dev_to_node(&pdev->dev));
resource_size(&pgmap->res), dev_to_node(&pdev->dev),
&p2p_pgmap->ref);
if (error)
goto pgmap_free;
goto pages_free;
pci_info(pdev, "added peer-to-peer DMA memory %pR\n",
&pgmap->res);
return 0;
pages_free:
devm_memunmap_pages(&pdev->dev, pgmap);
pgmap_free:
devm_kfree(&pdev->dev, pgmap);
devm_kfree(&pdev->dev, p2p_pgmap);
return error;
}
EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
......@@ -585,19 +600,30 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
*/
void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
{
void *ret;
void *ret = NULL;
struct percpu_ref *ref;
/*
* Pairs with synchronize_rcu() in pci_p2pdma_release() to
* ensure pdev->p2pdma is non-NULL for the duration of the
* read-lock.
*/
rcu_read_lock();
if (unlikely(!pdev->p2pdma))
return NULL;
if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref)))
return NULL;
ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size);
goto out;
if (unlikely(!ret))
percpu_ref_put(&pdev->p2pdma->devmap_ref);
ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size,
(void **) &ref);
if (!ret)
goto out;
if (unlikely(!percpu_ref_tryget_live(ref))) {
gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size);
ret = NULL;
goto out;
}
out:
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
......@@ -610,8 +636,11 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
*/
void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
{
gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size);
percpu_ref_put(&pdev->p2pdma->devmap_ref);
struct percpu_ref *ref;
gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size,
(void **) &ref);
percpu_ref_put(ref);
}
EXPORT_SYMBOL_GPL(pci_free_p2pmem);
......
......@@ -296,6 +296,18 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
out_attach:
spin_lock(&dentry_attach_lock);
if (unlikely(dentry->d_fsdata && !alias)) {
/* d_fsdata is set by a racing thread which is doing
* the same thing as this thread is doing. Leave the racing
* thread going ahead and we return here.
*/
spin_unlock(&dentry_attach_lock);
iput(dl->dl_inode);
ocfs2_lock_res_free(&dl->dl_lockres);
kfree(dl);
return 0;
}
dentry->d_fsdata = dl;
dl->dl_count++;
spin_unlock(&dentry_attach_lock);
......
......@@ -713,6 +713,7 @@ void __iomem *devm_of_iomap(struct device *dev,
/* allows to add/remove a custom action to devres stack */
int devm_add_action(struct device *dev, void (*action)(void *), void *data);
void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
void devm_release_action(struct device *dev, void (*action)(void *), void *data);
static inline int devm_add_action_or_reset(struct device *dev,
void (*action)(void *), void *data)
......
......@@ -75,6 +75,7 @@ struct gen_pool_chunk {
struct list_head next_chunk; /* next chunk in pool */
atomic_long_t avail;
phys_addr_t phys_addr; /* physical starting address of memory chunk */
void *owner; /* private data to retrieve at alloc time */
unsigned long start_addr; /* start address of memory chunk */
unsigned long end_addr; /* end address of memory chunk (inclusive) */
unsigned long bits[0]; /* bitmap for allocating memory chunk */
......@@ -96,8 +97,15 @@ struct genpool_data_fixed {
extern struct gen_pool *gen_pool_create(int, int);
extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long);
extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t,
size_t, int);
extern int gen_pool_add_owner(struct gen_pool *, unsigned long, phys_addr_t,
size_t, int, void *);
static inline int gen_pool_add_virt(struct gen_pool *pool, unsigned long addr,
phys_addr_t phys, size_t size, int nid)
{
return gen_pool_add_owner(pool, addr, phys, size, nid, NULL);
}
/**
* gen_pool_add - add a new chunk of special memory to the pool
* @pool: pool to add new memory chunk to
......@@ -116,12 +124,47 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
return gen_pool_add_virt(pool, addr, -1, size, nid);
}
extern void gen_pool_destroy(struct gen_pool *);
extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
extern unsigned long gen_pool_alloc_algo(struct gen_pool *, size_t,
genpool_algo_t algo, void *data);
unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size,
genpool_algo_t algo, void *data, void **owner);
static inline unsigned long gen_pool_alloc_owner(struct gen_pool *pool,
size_t size, void **owner)
{
return gen_pool_alloc_algo_owner(pool, size, pool->algo, pool->data,
owner);
}
static inline unsigned long gen_pool_alloc_algo(struct gen_pool *pool,
size_t size, genpool_algo_t algo, void *data)
{
return gen_pool_alloc_algo_owner(pool, size, algo, data, NULL);
}
/**
* gen_pool_alloc - allocate special memory from the pool
* @pool: pool to allocate from
* @size: number of bytes to allocate from the pool
*
* Allocate the requested number of bytes from the specified pool.
* Uses the pool allocation function (with first-fit algorithm by default).
* Can not be used in NMI handler on architectures without
* NMI-safe cmpxchg implementation.
*/
static inline unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
{
return gen_pool_alloc_algo(pool, size, pool->algo, pool->data);
}
extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size,
dma_addr_t *dma);
extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
extern void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr,
size_t size, void **owner);
static inline void gen_pool_free(struct gen_pool *pool, unsigned long addr,
size_t size)
{
gen_pool_free_owner(pool, addr, size, NULL);
}
extern void gen_pool_for_each_chunk(struct gen_pool *,
void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
extern size_t gen_pool_avail(struct gen_pool *);
......
......@@ -117,9 +117,12 @@ struct memcg_shrinker_map {
struct mem_cgroup_per_node {
struct lruvec lruvec;
/* Legacy local VM stats */
struct lruvec_stat __percpu *lruvec_stat_local;
/* Subtree VM stats (batched updates) */
struct lruvec_stat __percpu *lruvec_stat_cpu;
atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
atomic_long_t lruvec_stat_local[NR_VM_NODE_STAT_ITEMS];
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
......@@ -265,17 +268,18 @@ struct mem_cgroup {
atomic_t moving_account;
struct task_struct *move_lock_task;
/* memory.stat */
/* Legacy local VM stats and events */
struct memcg_vmstats_percpu __percpu *vmstats_local;
/* Subtree VM stats and events (batched updates) */
struct memcg_vmstats_percpu __percpu *vmstats_percpu;
MEMCG_PADDING(_pad2_);
atomic_long_t vmstats[MEMCG_NR_STAT];
atomic_long_t vmstats_local[MEMCG_NR_STAT];
atomic_long_t vmevents[NR_VM_EVENT_ITEMS];
atomic_long_t vmevents_local[NR_VM_EVENT_ITEMS];
/* memory.events */
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
unsigned long socket_pressure;
......@@ -567,7 +571,11 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
int idx)
{
long x = atomic_long_read(&memcg->vmstats_local[idx]);
long x = 0;
int cpu;
for_each_possible_cpu(cpu)
x += per_cpu(memcg->vmstats_local->stat[idx], cpu);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
......@@ -641,13 +649,15 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
long x;
long x = 0;
int cpu;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
x = atomic_long_read(&pn->lruvec_stat_local[idx]);
for_each_possible_cpu(cpu)
x += per_cpu(pn->lruvec_stat_local->count[idx], cpu);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
......
......@@ -81,6 +81,7 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
* @res: physical address range covered by @ref
* @ref: reference count that pins the devm_memremap_pages() mapping
* @kill: callback to transition @ref to the dead state
* @cleanup: callback to wait for @ref to be idle and reap it
* @dev: host device of the mapping for debug
* @data: private data pointer for page_free()
* @type: memory type: see MEMORY_* in memory_hotplug.h
......@@ -92,6 +93,7 @@ struct dev_pagemap {
struct resource res;
struct percpu_ref *ref;
void (*kill)(struct percpu_ref *ref);
void (*cleanup)(struct percpu_ref *ref);
struct device *dev;
void *data;
enum memory_type type;
......@@ -100,6 +102,7 @@ struct dev_pagemap {
#ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
......@@ -118,6 +121,11 @@ static inline void *devm_memremap_pages(struct device *dev,
return ERR_PTR(-ENXIO);
}
static inline void devm_memunmap_pages(struct device *dev,
struct dev_pagemap *pgmap)
{
}
static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
......
......@@ -54,6 +54,10 @@ static inline void mmdrop(struct mm_struct *mm)
* followed by taking the mmap_sem for writing before modifying the
* vmas or anything the coredump pretends not to change from under it.
*
* It also has to be called when mmgrab() is used in the context of
* the process, but then the mm_count refcount is transferred outside
* the context of the process to run down_write() on that pinned mm.
*
* NOTE: find_extend_vma() called from GUP context is the only place
* that can modify the "mm" (notably the vm_start/end) under mmap_sem
* for reading and outside the context of the process, so it is also
......
......@@ -95,6 +95,7 @@ static void devm_memremap_pages_release(void *data)
pgmap->kill(pgmap->ref);
for_each_device_pfn(pfn, pgmap)
put_page(pfn_to_page(pfn));
pgmap->cleanup(pgmap->ref);
/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
......@@ -133,8 +134,8 @@ static void devm_memremap_pages_release(void *data)
* 2/ The altmap field may optionally be initialized, in which case altmap_valid
* must be set to true
*
* 3/ pgmap->ref must be 'live' on entry and will be killed at
* devm_memremap_pages_release() time, or if this routine fails.
* 3/ pgmap->ref must be 'live' on entry and will be killed and reaped
* at devm_memremap_pages_release() time, or if this routine fails.
*
* 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
......@@ -156,8 +157,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgprot_t pgprot = PAGE_KERNEL;
int error, nid, is_ram;
if (!pgmap->ref || !pgmap->kill)
if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) {
WARN(1, "Missing reference count teardown definition\n");
return ERR_PTR(-EINVAL);
}
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
......@@ -168,14 +171,16 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (conflict_pgmap) {
dev_WARN(dev, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
return ERR_PTR(-ENOMEM);
error = -ENOMEM;
goto err_array;
}
conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL);
if (conflict_pgmap) {
dev_WARN(dev, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
return ERR_PTR(-ENOMEM);
error = -ENOMEM;
goto err_array;
}
is_ram = region_intersects(align_start, align_size,
......@@ -267,10 +272,18 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgmap_array_delete(res);
err_array:
pgmap->kill(pgmap->ref);
pgmap->cleanup(pgmap->ref);
return ERR_PTR(error);
}
EXPORT_SYMBOL_GPL(devm_memremap_pages);
void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
devm_release_action(dev, devm_memremap_pages_release, pgmap);
}
EXPORT_SYMBOL_GPL(devm_memunmap_pages);
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
/* number of pfns from base where pfn_to_page() is valid */
......
......@@ -168,20 +168,21 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
EXPORT_SYMBOL(gen_pool_create);
/**
* gen_pool_add_virt - add a new chunk of special memory to the pool
* gen_pool_add_owner- add a new chunk of special memory to the pool
* @pool: pool to add new memory chunk to
* @virt: virtual starting address of memory chunk to add to pool
* @phys: physical starting address of memory chunk to add to pool
* @size: size in bytes of the memory chunk to add to pool
* @nid: node id of the node the chunk structure and bitmap should be
* allocated on, or -1
* @owner: private data the publisher would like to recall at alloc time
*
* Add a new chunk of special memory to the specified pool.
*
* Returns 0 on success or a -ve errno on failure.
*/
int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
size_t size, int nid)
int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
size_t size, int nid, void *owner)
{
struct gen_pool_chunk *chunk;
int nbits = size >> pool->min_alloc_order;
......@@ -195,6 +196,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
chunk->phys_addr = phys;
chunk->start_addr = virt;
chunk->end_addr = virt + size - 1;
chunk->owner = owner;
atomic_long_set(&chunk->avail, size);
spin_lock(&pool->lock);
......@@ -203,7 +205,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
return 0;
}
EXPORT_SYMBOL(gen_pool_add_virt);
EXPORT_SYMBOL(gen_pool_add_owner);
/**
* gen_pool_virt_to_phys - return the physical address of memory
......@@ -260,35 +262,20 @@ void gen_pool_destroy(struct gen_pool *pool)
EXPORT_SYMBOL(gen_pool_destroy);
/**
* gen_pool_alloc - allocate special memory from the pool
* @pool: pool to allocate from
* @size: number of bytes to allocate from the pool
*
* Allocate the requested number of bytes from the specified pool.
* Uses the pool allocation function (with first-fit algorithm by default).
* Can not be used in NMI handler on architectures without
* NMI-safe cmpxchg implementation.
*/
unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
{
return gen_pool_alloc_algo(pool, size, pool->algo, pool->data);
}
EXPORT_SYMBOL(gen_pool_alloc);
/**
* gen_pool_alloc_algo - allocate special memory from the pool
* gen_pool_alloc_algo_owner - allocate special memory from the pool
* @pool: pool to allocate from
* @size: number of bytes to allocate from the pool
* @algo: algorithm passed from caller
* @data: data passed to algorithm
* @owner: optionally retrieve the chunk owner
*
* Allocate the requested number of bytes from the specified pool.
* Uses the pool allocation function (with first-fit algorithm by default).
* Can not be used in NMI handler on architectures without
* NMI-safe cmpxchg implementation.
*/
unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
genpool_algo_t algo, void *data)
unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size,
genpool_algo_t algo, void *data, void **owner)
{
struct gen_pool_chunk *chunk;
unsigned long addr = 0;
......@@ -299,6 +286,9 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
BUG_ON(in_nmi());
#endif
if (owner)
*owner = NULL;
if (size == 0)
return 0;
......@@ -326,12 +316,14 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
addr = chunk->start_addr + ((unsigned long)start_bit << order);
size = nbits << order;
atomic_long_sub(size, &chunk->avail);
if (owner)
*owner = chunk->owner;
break;
}
rcu_read_unlock();
return addr;
}
EXPORT_SYMBOL(gen_pool_alloc_algo);
EXPORT_SYMBOL(gen_pool_alloc_algo_owner);
/**
* gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
......@@ -367,12 +359,14 @@ EXPORT_SYMBOL(gen_pool_dma_alloc);
* @pool: pool to free to
* @addr: starting address of memory to free back to pool
* @size: size in bytes of memory to free
* @owner: private data stashed at gen_pool_add() time
*
* Free previously allocated special memory back to the specified
* pool. Can not be used in NMI handler on architectures without
* NMI-safe cmpxchg implementation.
*/
void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size,
void **owner)
{
struct gen_pool_chunk *chunk;
int order = pool->min_alloc_order;
......@@ -382,6 +376,9 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
BUG_ON(in_nmi());
#endif
if (owner)
*owner = NULL;
nbits = (size + (1UL << order) - 1) >> order;
rcu_read_lock();
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
......@@ -392,6 +389,8 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
BUG_ON(remain);
size = nbits << order;
atomic_long_add(size, &chunk->avail);
if (owner)
*owner = chunk->owner;
rcu_read_unlock();
return;
}
......@@ -399,7 +398,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
rcu_read_unlock();
BUG();
}
EXPORT_SYMBOL(gen_pool_free);
EXPORT_SYMBOL(gen_pool_free_owner);
/**
* gen_pool_for_each_chunk - call func for every chunk of generic memory pool
......
......@@ -1354,9 +1354,8 @@ static void hmm_devmem_ref_release(struct percpu_ref *ref)
complete(&devmem->completion);
}
static void hmm_devmem_ref_exit(void *data)
static void hmm_devmem_ref_exit(struct percpu_ref *ref)
{
struct percpu_ref *ref = data;
struct hmm_devmem *devmem;
devmem = container_of(ref, struct hmm_devmem, ref);
......@@ -1433,10 +1432,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
if (ret)
return ERR_PTR(ret);
ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit, &devmem->ref);
if (ret)
return ERR_PTR(ret);
size = ALIGN(size, PA_SECTION_SIZE);
addr = min((unsigned long)iomem_resource.end,
(1UL << MAX_PHYSMEM_BITS) - 1);
......@@ -1475,6 +1470,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
devmem->pagemap.ref = &devmem->ref;
devmem->pagemap.data = devmem;
devmem->pagemap.kill = hmm_devmem_ref_kill;
devmem->pagemap.cleanup = hmm_devmem_ref_exit;
result = devm_memremap_pages(devmem->device, &devmem->pagemap);
if (IS_ERR(result))
......@@ -1512,11 +1508,6 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
if (ret)
return ERR_PTR(ret);
ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit,
&devmem->ref);
if (ret)
return ERR_PTR(ret);
devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
devmem->pfn_last = devmem->pfn_first +
(resource_size(devmem->resource) >> PAGE_SHIFT);
......@@ -1529,6 +1520,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
devmem->pagemap.ref = &devmem->ref;
devmem->pagemap.data = devmem;
devmem->pagemap.kill = hmm_devmem_ref_kill;
devmem->pagemap.cleanup = hmm_devmem_ref_exit;
result = devm_memremap_pages(devmem->device, &devmem->pagemap);
if (IS_ERR(result))
......
......@@ -1004,6 +1004,9 @@ static void collapse_huge_page(struct mm_struct *mm,
* handled by the anon_vma lock + PG_lock.
*/
down_write(&mm->mmap_sem);
result = SCAN_ANY_PROCESS;
if (!mmget_still_valid(mm))
goto out;
result = hugepage_vma_revalidate(mm, address, &vma);
if (result)
goto out;
......
......@@ -354,7 +354,7 @@ static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
}
return 0;
fail:
__memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
__memcg_destroy_list_lru_node(memcg_lrus, begin, i);
return -ENOMEM;
}
......
......@@ -691,11 +691,12 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
if (mem_cgroup_disabled())
return;
__this_cpu_add(memcg->vmstats_local->stat[idx], val);
x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
struct mem_cgroup *mi;
atomic_long_add(x, &memcg->vmstats_local[idx]);
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
atomic_long_add(x, &mi->vmstats[idx]);
x = 0;
......@@ -745,11 +746,12 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
__mod_memcg_state(memcg, idx, val);
/* Update lruvec */
__this_cpu_add(pn->lruvec_stat_local->count[idx], val);
x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
struct mem_cgroup_per_node *pi;
atomic_long_add(x, &pn->lruvec_stat_local[idx]);
for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id))
atomic_long_add(x, &pi->lruvec_stat[idx]);
x = 0;
......@@ -771,11 +773,12 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
if (mem_cgroup_disabled())
return;
__this_cpu_add(memcg->vmstats_local->events[idx], count);
x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]);
if (unlikely(x > MEMCG_CHARGE_BATCH)) {
struct mem_cgroup *mi;
atomic_long_add(x, &memcg->vmevents_local[idx]);
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
atomic_long_add(x, &mi->vmevents[idx]);
x = 0;
......@@ -790,7 +793,12 @@ static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
{
return atomic_long_read(&memcg->vmevents_local[event]);
long x = 0;
int cpu;
for_each_possible_cpu(cpu)
x += per_cpu(memcg->vmstats_local->events[event], cpu);
return x;
}
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
......@@ -2191,11 +2199,9 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
long x;
x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0);
if (x) {
atomic_long_add(x, &memcg->vmstats_local[i]);
if (x)
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
atomic_long_add(x, &memcg->vmstats[i]);
}
if (i >= NR_VM_NODE_STAT_ITEMS)
continue;
......@@ -2205,26 +2211,22 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
pn = mem_cgroup_nodeinfo(memcg, nid);
x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0);
if (x) {
atomic_long_add(x, &pn->lruvec_stat_local[i]);
if (x)
do {
atomic_long_add(x, &pn->lruvec_stat[i]);
} while ((pn = parent_nodeinfo(pn, nid)));
}
}
}
for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
long x;
x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0);
if (x) {
atomic_long_add(x, &memcg->vmevents_local[i]);
if (x)
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
atomic_long_add(x, &memcg->vmevents[i]);
}
}
}
return 0;
}
......@@ -4483,8 +4485,15 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
if (!pn)
return 1;
pn->lruvec_stat_local = alloc_percpu(struct lruvec_stat);
if (!pn->lruvec_stat_local) {
kfree(pn);
return 1;
}
pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat);
if (!pn->lruvec_stat_cpu) {
free_percpu(pn->lruvec_stat_local);
kfree(pn);
return 1;
}
......@@ -4506,6 +4515,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
return;
free_percpu(pn->lruvec_stat_cpu);
free_percpu(pn->lruvec_stat_local);
kfree(pn);
}
......@@ -4516,6 +4526,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
free_percpu(memcg->vmstats_local);
kfree(memcg);
}
......@@ -4544,6 +4555,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg->id.id < 0)
goto fail;
memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
if (!memcg->vmstats_local)
goto fail;
memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu);
if (!memcg->vmstats_percpu)
goto fail;
......
......@@ -636,11 +636,11 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
* is also counted.
* Return value: previously mlocked page counts
*/
static int count_mm_mlocked_page_nr(struct mm_struct *mm,
static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
unsigned long start, size_t len)
{
struct vm_area_struct *vma;
int count = 0;
unsigned long count = 0;
if (mm == NULL)
mm = current->mm;
......@@ -797,7 +797,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
unsigned long lock_limit;
int ret;
if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)))
if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
flags == MCL_ONFAULT)
return -EINVAL;
if (!can_do_mlock())
......
......@@ -245,14 +245,28 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
{
/*
* If there are parallel threads are doing PTE changes on same range
* under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
* flush by batching, a thread has stable TLB entry can fail to flush
* the TLB by observing pte_none|!pte_dirty, for example so flush TLB
* forcefully if we detect parallel PTE batching threads.
* under non-exclusive lock (e.g., mmap_sem read-side) but defer TLB
* flush by batching, one thread may end up seeing inconsistent PTEs
* and result in having stale TLB entries. So flush TLB forcefully
* if we detect parallel PTE batching threads.
*
* However, some syscalls, e.g. munmap(), may free page tables, this
* needs force flush everything in the given range. Otherwise this
* may result in having stale TLB entries for some architectures,
* e.g. aarch64, that could specify flush what level TLB.
*/
if (mm_tlb_flush_nested(tlb->mm)) {
/*
* The aarch64 yields better performance with fullmm by
* avoiding multiple CPUs spamming TLBI messages at the
* same time.
*
* On x86 non-fullmm doesn't yield significant difference
* against fullmm.
*/
tlb->fullmm = 1;
__tlb_reset_range(tlb);
__tlb_adjust_range(tlb, start, end - start);
tlb->freed_tables = 1;
}
tlb_flush_mmu(tlb);
......
......@@ -1505,7 +1505,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
list_for_each_entry_safe(page, next, page_list, lru) {
if (page_is_file_cache(page) && !PageDirty(page) &&
!__PageMovable(page)) {
!__PageMovable(page) && !PageUnevictable(page)) {
ClearPageActive(page);
list_move(&page->lru, &clean_pages);
}
......@@ -1953,8 +1953,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
if (global_reclaim(sc))
__count_vm_events(item, nr_reclaimed);
__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
reclaim_stat->recent_rotated[0] = stat.nr_activate[0];
reclaim_stat->recent_rotated[1] = stat.nr_activate[1];
reclaim_stat->recent_rotated[0] += stat.nr_activate[0];
reclaim_stat->recent_rotated[1] += stat.nr_activate[1];
move_pages_to_lru(lruvec, &page_list);
......
......@@ -73,7 +73,7 @@ parse_symbol() {
if [[ "${cache[$module,$address]+isset}" == "isset" ]]; then
local code=${cache[$module,$address]}
else
local code=$(addr2line -i -e "$objfile" "$address")
local code=$(${CROSS_COMPILE}addr2line -i -e "$objfile" "$address")
cache[$module,$address]=$code
fi
......
......@@ -100,7 +100,9 @@ static void nfit_test_kill(void *_pgmap)
{
struct dev_pagemap *pgmap = _pgmap;
WARN_ON(!pgmap || !pgmap->ref || !pgmap->kill || !pgmap->cleanup);
pgmap->kill(pgmap->ref);
pgmap->cleanup(pgmap->ref);
}
void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment