Commit 8ba90f5c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'mm-hotfixes-stable-2023-06-20-12-31' of...

Merge tag 'mm-hotfixes-stable-2023-06-20-12-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull hotfixes from Andrew Morton:
 "19 hotfixes.  8 of these are cc:stable.

  This includes a wholesale reversion of the post-6.4 series 'make slab
  shrink lockless'. After input from Dave Chinner it has been decided
  that we should go a different way [1]"

Link: https://lkml.kernel.org/r/ZH6K0McWBeCjaf16@dread.disaster.area [1]

* tag 'mm-hotfixes-stable-2023-06-20-12-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  selftests/mm: fix cross compilation with LLVM
  mailmap: add entries for Ben Dooks
  nilfs2: prevent general protection fault in nilfs_clear_dirty_page()
  Revert "mm: vmscan: make global slab shrink lockless"
  Revert "mm: vmscan: make memcg slab shrink lockless"
  Revert "mm: vmscan: add shrinker_srcu_generation"
  Revert "mm: shrinkers: make count and scan in shrinker debugfs lockless"
  Revert "mm: vmscan: hold write lock to reparent shrinker nr_deferred"
  Revert "mm: vmscan: remove shrinker_rwsem from synchronize_shrinkers()"
  Revert "mm: shrinkers: convert shrinker_rwsem to mutex"
  nilfs2: fix buffer corruption due to concurrent device reads
  scripts/gdb: fix SB_* constants parsing
  scripts: fix the gfp flags header path in gfp-translate
  udmabuf: revert 'Add support for mapping hugepages (v4)'
  mm/khugepaged: fix iteration in collapse_file
  memfd: check for non-NULL file_seals in memfd_create() syscall
  mm/vmalloc: do not output a spurious warning when huge vmalloc() fails
  mm/mprotect: fix do_mprotect_pkey() limit check
  writeback: fix dereferencing NULL mapping->host on writeback_page_template
parents e660abd5 0518dbe9
......@@ -70,6 +70,8 @@ Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
Ben Dooks <ben-linux@fluff.org> <ben.dooks@simtec.co.uk>
Ben Dooks <ben-linux@fluff.org> <ben.dooks@sifive.com>
Ben Gardner <bgardner@wabtec.com>
Ben M Cahill <ben.m.cahill@intel.com>
Ben Widawsky <bwidawsk@kernel.org> <ben@bwidawsk.net>
......
......@@ -12,7 +12,6 @@
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/udmabuf.h>
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/iosys-map.h>
......@@ -207,9 +206,7 @@ static long udmabuf_create(struct miscdevice *device,
struct udmabuf *ubuf;
struct dma_buf *buf;
pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit;
struct page *page, *hpage = NULL;
pgoff_t subpgoff, maxsubpgs;
struct hstate *hpstate;
struct page *page;
int seals, ret = -EINVAL;
u32 i, flags;
......@@ -245,7 +242,7 @@ static long udmabuf_create(struct miscdevice *device,
if (!memfd)
goto err;
mapping = memfd->f_mapping;
if (!shmem_mapping(mapping) && !is_file_hugepages(memfd))
if (!shmem_mapping(mapping))
goto err;
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
if (seals == -EINVAL)
......@@ -256,48 +253,16 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
pgoff = list[i].offset >> PAGE_SHIFT;
pgcnt = list[i].size >> PAGE_SHIFT;
if (is_file_hugepages(memfd)) {
hpstate = hstate_file(memfd);
pgoff = list[i].offset >> huge_page_shift(hpstate);
subpgoff = (list[i].offset &
~huge_page_mask(hpstate)) >> PAGE_SHIFT;
maxsubpgs = huge_page_size(hpstate) >> PAGE_SHIFT;
}
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
if (is_file_hugepages(memfd)) {
if (!hpage) {
hpage = find_get_page_flags(mapping, pgoff,
FGP_ACCESSED);
if (!hpage) {
ret = -EINVAL;
goto err;
}
}
page = hpage + subpgoff;
get_page(page);
subpgoff++;
if (subpgoff == maxsubpgs) {
put_page(hpage);
hpage = NULL;
subpgoff = 0;
pgoff++;
}
} else {
page = shmem_read_mapping_page(mapping,
pgoff + pgidx);
page = shmem_read_mapping_page(mapping, pgoff + pgidx);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto err;
}
}
ubuf->pages[pgbuf++] = page;
}
fput(memfd);
memfd = NULL;
if (hpage) {
put_page(hpage);
hpage = NULL;
}
}
exp_info.ops = &udmabuf_ops;
......
......@@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs cmd root_lock).
* - must take shrinker_mutex without holding cmd->root_lock
* - must take shrinker_rwsem without holding cmd->root_lock
*/
new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_MAX_CONCURRENT_LOCKS);
......
......@@ -1891,7 +1891,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs pmd root_lock).
* - must take shrinker_mutex without holding pmd->root_lock
* - must take shrinker_rwsem without holding pmd->root_lock
*/
new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
THIN_MAX_CONCURRENT_LOCKS);
......
......@@ -370,7 +370,15 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
struct folio *folio = fbatch.folios[i];
folio_lock(folio);
/*
* This folio may have been removed from the address
* space by truncation or invalidation when the lock
* was acquired. Skip processing in that case.
*/
if (likely(folio->mapping == mapping))
nilfs_clear_dirty_page(&folio->page, silent);
folio_unlock(folio);
}
folio_batch_release(&fbatch);
......
......@@ -101,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
if (unlikely(!bh))
return -ENOMEM;
lock_buffer(bh);
if (!buffer_uptodate(bh)) {
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
}
unlock_buffer(bh);
nilfs_segbuf_add_segsum_buffer(segbuf, bh);
return 0;
}
......
......@@ -981,10 +981,13 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
unsigned int isz, srsz;
bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
lock_buffer(bh_sr);
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
isz = nilfs->ns_inode_size;
srsz = NILFS_SR_BYTES(isz);
raw_sr->sr_sum = 0; /* Ensure initialization within this update */
raw_sr->sr_bytes = cpu_to_le16(srsz);
raw_sr->sr_nongc_ctime
= cpu_to_le64(nilfs_doing_gc() ?
......@@ -998,6 +1001,8 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
NILFS_SR_SUFILE_OFFSET(isz), 1);
memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
set_buffer_uptodate(bh_sr);
unlock_buffer(bh_sr);
}
static void nilfs_redirty_inodes(struct list_head *head)
......@@ -1780,6 +1785,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
......@@ -1791,6 +1797,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
b_assoc_buffers) {
clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
bd_page = bh->b_page;
......
......@@ -372,10 +372,31 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
goto out;
}
nsbp = (void *)nsbh->b_data + offset;
memset(nsbp, 0, nilfs->ns_blocksize);
lock_buffer(nsbh);
if (sb2i >= 0) {
/*
* The position of the second superblock only changes by 4KiB,
* which is larger than the maximum superblock data size
* (= 1KiB), so there is no need to use memmove() to allow
* overlap between source and destination.
*/
memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
/*
* Zero fill after copy to avoid overwriting in case of move
* within the same block.
*/
memset(nsbh->b_data, 0, offset);
memset((void *)nsbp + nilfs->ns_sbsize, 0,
nsbh->b_size - offset - nilfs->ns_sbsize);
} else {
memset(nsbh->b_data, 0, nsbh->b_size);
}
set_buffer_uptodate(nsbh);
unlock_buffer(nsbh);
if (sb2i >= 0) {
brelse(nilfs->ns_sbh[sb2i]);
nilfs->ns_sbh[sb2i] = nsbh;
nilfs->ns_sbp[sb2i] = nsbp;
......
......@@ -54,7 +54,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
* If that happens we could trigger unregistering the shrinker from within the
* shrinker path and that leads to deadlock on the shrinker_mutex. Hence we
* shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
* take a passive reference to the superblock to avoid this from occurring.
*/
static unsigned long super_cache_scan(struct shrinker *shrink,
......
......@@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(writeback_folio_template,
strscpy_pad(__entry->name,
bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
__entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0;
__entry->index = folio->index;
),
......
......@@ -2070,7 +2070,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH);
xas_lock_irq(&xas);
xas_set(&xas, index);
VM_BUG_ON_PAGE(page != xas_load(&xas), page);
......
......@@ -371,11 +371,14 @@ SYSCALL_DEFINE2(memfd_create,
inode->i_mode &= ~0111;
file_seals = memfd_file_seals_ptr(file);
if (file_seals) {
*file_seals &= ~F_SEAL_SEAL;
*file_seals |= F_SEAL_EXEC;
}
} else if (flags & MFD_ALLOW_SEALING) {
/* MFD_EXEC and MFD_ALLOW_SEALING are set */
file_seals = memfd_file_seals_ptr(file);
if (file_seals)
*file_seals &= ~F_SEAL_SEAL;
}
......
......@@ -867,7 +867,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
}
tlb_finish_mmu(&tlb);
if (!error && vma_iter_end(&vmi) < end)
if (!error && tmp < end)
error = -ENOMEM;
out:
......
......@@ -5,12 +5,10 @@
#include <linux/seq_file.h>
#include <linux/shrinker.h>
#include <linux/memcontrol.h>
#include <linux/srcu.h>
/* defined in vmscan.c */
extern struct mutex shrinker_mutex;
extern struct rw_semaphore shrinker_rwsem;
extern struct list_head shrinker_list;
extern struct srcu_struct shrinker_srcu;
static DEFINE_IDA(shrinker_debugfs_ida);
static struct dentry *shrinker_debugfs_root;
......@@ -51,13 +49,18 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v)
struct mem_cgroup *memcg;
unsigned long total;
bool memcg_aware;
int ret = 0, nid, srcu_idx;
int ret, nid;
count_per_node = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
if (!count_per_node)
return -ENOMEM;
srcu_idx = srcu_read_lock(&shrinker_srcu);
ret = down_read_killable(&shrinker_rwsem);
if (ret) {
kfree(count_per_node);
return ret;
}
rcu_read_lock();
memcg_aware = shrinker->flags & SHRINKER_MEMCG_AWARE;
......@@ -88,7 +91,8 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v)
}
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
srcu_read_unlock(&shrinker_srcu, srcu_idx);
rcu_read_unlock();
up_read(&shrinker_rwsem);
kfree(count_per_node);
return ret;
......@@ -111,8 +115,9 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
.gfp_mask = GFP_KERNEL,
};
struct mem_cgroup *memcg = NULL;
int nid, srcu_idx;
int nid;
char kbuf[72];
ssize_t ret;
read_len = size < (sizeof(kbuf) - 1) ? size : (sizeof(kbuf) - 1);
if (copy_from_user(kbuf, buf, read_len))
......@@ -141,7 +146,11 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
return -EINVAL;
}
srcu_idx = srcu_read_lock(&shrinker_srcu);
ret = down_read_killable(&shrinker_rwsem);
if (ret) {
mem_cgroup_put(memcg);
return ret;
}
sc.nid = nid;
sc.memcg = memcg;
......@@ -150,7 +159,7 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
shrinker->scan_objects(shrinker, &sc);
srcu_read_unlock(&shrinker_srcu, srcu_idx);
up_read(&shrinker_rwsem);
mem_cgroup_put(memcg);
return size;
......@@ -168,7 +177,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker)
char buf[128];
int id;
lockdep_assert_held(&shrinker_mutex);
lockdep_assert_held(&shrinker_rwsem);
/* debugfs isn't initialized yet, add debugfs entries later. */
if (!shrinker_debugfs_root)
......@@ -211,7 +220,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
if (!new)
return -ENOMEM;
mutex_lock(&shrinker_mutex);
down_write(&shrinker_rwsem);
old = shrinker->name;
shrinker->name = new;
......@@ -229,7 +238,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
shrinker->debugfs_entry = entry;
}
mutex_unlock(&shrinker_mutex);
up_write(&shrinker_rwsem);
kfree_const(old);
......@@ -242,7 +251,7 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
{
struct dentry *entry = shrinker->debugfs_entry;
lockdep_assert_held(&shrinker_mutex);
lockdep_assert_held(&shrinker_rwsem);
kfree_const(shrinker->name);
shrinker->name = NULL;
......@@ -271,14 +280,14 @@ static int __init shrinker_debugfs_init(void)
shrinker_debugfs_root = dentry;
/* Create debugfs entries for shrinkers registered at boot */
mutex_lock(&shrinker_mutex);
down_write(&shrinker_rwsem);
list_for_each_entry(shrinker, &shrinker_list, list)
if (!shrinker->debugfs_entry) {
ret = shrinker_debugfs_add(shrinker);
if (ret)
break;
}
mutex_unlock(&shrinker_mutex);
up_write(&shrinker_rwsem);
return ret;
}
......
......@@ -3098,11 +3098,20 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
/* vm_area_alloc_pages() can also fail due to a fatal signal */
if (!fatal_signal_pending(current))
/*
* vm_area_alloc_pages() can fail due to insufficient memory but
* also:-
*
* - a pending fatal signal
* - insufficient huge page-order pages
*
* Since we always retry allocations at order-0 in the huge page
* case a warning for either is spurious.
*/
if (!fatal_signal_pending(current) && page_order == 0)
warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, page order %u, failed to allocate pages",
area->nr_pages * PAGE_SIZE, page_order);
"vmalloc error: size %lu, failed to allocate pages",
area->nr_pages * PAGE_SIZE);
goto fail;
}
......
......@@ -35,7 +35,7 @@
#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
......@@ -57,7 +57,6 @@
#include <linux/khugepaged.h>
#include <linux/rculist_nulls.h>
#include <linux/random.h>
#include <linux/srcu.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
......@@ -190,9 +189,7 @@ struct scan_control {
int vm_swappiness = 60;
LIST_HEAD(shrinker_list);
DEFINE_MUTEX(shrinker_mutex);
DEFINE_SRCU(shrinker_srcu);
static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0);
DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_MEMCG
static int shrinker_nr_max;
......@@ -211,21 +208,8 @@ static inline int shrinker_defer_size(int nr_items)
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
int nid)
{
return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
&shrinker_srcu,
lockdep_is_held(&shrinker_mutex));
}
static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
int nid)
{
return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
&shrinker_srcu);
}
static void free_shrinker_info_rcu(struct rcu_head *head)
{
kvfree(container_of(head, struct shrinker_info, rcu));
return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
lockdep_is_held(&shrinker_rwsem));
}
static int expand_one_shrinker_info(struct mem_cgroup *memcg,
......@@ -266,7 +250,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
defer_size - old_defer_size);
rcu_assign_pointer(pn->shrinker_info, new);
call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_rcu);
kvfree_rcu(old, rcu);
}
return 0;
......@@ -292,7 +276,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
int nid, size, ret = 0;
int map_size, defer_size = 0;
mutex_lock(&shrinker_mutex);
down_write(&shrinker_rwsem);
map_size = shrinker_map_size(shrinker_nr_max);
defer_size = shrinker_defer_size(shrinker_nr_max);
size = map_size + defer_size;
......@@ -308,7 +292,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
info->map_nr_max = shrinker_nr_max;
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
}
mutex_unlock(&shrinker_mutex);
up_write(&shrinker_rwsem);
return ret;
}
......@@ -324,7 +308,7 @@ static int expand_shrinker_info(int new_id)
if (!root_mem_cgroup)
goto out;
lockdep_assert_held(&shrinker_mutex);
lockdep_assert_held(&shrinker_rwsem);
map_size = shrinker_map_size(new_nr_max);
defer_size = shrinker_defer_size(new_nr_max);
......@@ -352,16 +336,15 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
{
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
struct shrinker_info *info;
int srcu_idx;
srcu_idx = srcu_read_lock(&shrinker_srcu);
info = shrinker_info_srcu(memcg, nid);
rcu_read_lock();
info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
/* Pairs with smp mb in shrink_slab() */
smp_mb__before_atomic();
set_bit(shrinker_id, info->map);
}
srcu_read_unlock(&shrinker_srcu, srcu_idx);
rcu_read_unlock();
}
}
......@@ -374,7 +357,8 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
if (mem_cgroup_disabled())
return -ENOSYS;
mutex_lock(&shrinker_mutex);
down_write(&shrinker_rwsem);
/* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
......@@ -388,7 +372,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
shrinker->id = id;
ret = 0;
unlock:
mutex_unlock(&shrinker_mutex);
up_write(&shrinker_rwsem);
return ret;
}
......@@ -398,7 +382,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
BUG_ON(id < 0);
lockdep_assert_held(&shrinker_mutex);
lockdep_assert_held(&shrinker_rwsem);
idr_remove(&shrinker_idr, id);
}
......@@ -408,7 +392,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;
info = shrinker_info_srcu(memcg, nid);
info = shrinker_info_protected(memcg, nid);
return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
}
......@@ -417,7 +401,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;
info = shrinker_info_srcu(memcg, nid);
info = shrinker_info_protected(memcg, nid);
return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
}
......@@ -433,7 +417,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
parent = root_mem_cgroup;
/* Prevent from concurrent shrinker_info expand */
mutex_lock(&shrinker_mutex);
down_read(&shrinker_rwsem);
for_each_node(nid) {
child_info = shrinker_info_protected(memcg, nid);
parent_info = shrinker_info_protected(parent, nid);
......@@ -442,7 +426,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
atomic_long_add(nr, &parent_info->nr_deferred[i]);
}
}
mutex_unlock(&shrinker_mutex);
up_read(&shrinker_rwsem);
}
static bool cgroup_reclaim(struct scan_control *sc)
......@@ -743,9 +727,9 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
shrinker->name = NULL;
#endif
if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
mutex_lock(&shrinker_mutex);
down_write(&shrinker_rwsem);
unregister_memcg_shrinker(shrinker);
mutex_unlock(&shrinker_mutex);
up_write(&shrinker_rwsem);
return;
}
......@@ -755,11 +739,11 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
void register_shrinker_prepared(struct shrinker *shrinker)
{
mutex_lock(&shrinker_mutex);
list_add_tail_rcu(&shrinker->list, &shrinker_list);
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
shrinker->flags |= SHRINKER_REGISTERED;
shrinker_debugfs_add(shrinker);
mutex_unlock(&shrinker_mutex);
up_write(&shrinker_rwsem);
}
static int __register_shrinker(struct shrinker *shrinker)
......@@ -810,16 +794,13 @@ void unregister_shrinker(struct shrinker *shrinker)
if (!(shrinker->flags & SHRINKER_REGISTERED))
return;
mutex_lock(&shrinker_mutex);
list_del_rcu(&shrinker->list);
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
shrinker->flags &= ~SHRINKER_REGISTERED;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
mutex_unlock(&shrinker_mutex);
atomic_inc(&shrinker_srcu_generation);
synchronize_srcu(&shrinker_srcu);
up_write(&shrinker_rwsem);
shrinker_debugfs_remove(debugfs_entry, debugfs_id);
......@@ -831,13 +812,15 @@ EXPORT_SYMBOL(unregister_shrinker);
/**
* synchronize_shrinkers - Wait for all running shrinkers to complete.
*
* This is useful to guarantee that all shrinker invocations have seen an
* update, before freeing memory.
* This is equivalent to calling unregister_shrink() and register_shrinker(),
* but atomically and with less overhead. This is useful to guarantee that all
* shrinker invocations have seen an update, before freeing memory, similar to
* rcu.
*/
void synchronize_shrinkers(void)
{
atomic_inc(&shrinker_srcu_generation);
synchronize_srcu(&shrinker_srcu);
down_write(&shrinker_rwsem);
up_write(&shrinker_rwsem);
}
EXPORT_SYMBOL(synchronize_shrinkers);
......@@ -946,20 +929,19 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
int srcu_idx, generation;
int i = 0;
int i;
if (!mem_cgroup_online(memcg))
return 0;
again:
srcu_idx = srcu_read_lock(&shrinker_srcu);
info = shrinker_info_srcu(memcg, nid);
if (!down_read_trylock(&shrinker_rwsem))
return 0;
info = shrinker_info_protected(memcg, nid);
if (unlikely(!info))
goto unlock;
generation = atomic_read(&shrinker_srcu_generation);
for_each_set_bit_from(i, info->map, info->map_nr_max) {
for_each_set_bit(i, info->map, info->map_nr_max) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
......@@ -1005,14 +987,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
set_shrinker_bit(memcg, nid, i);
}
freed += ret;
if (atomic_read(&shrinker_srcu_generation) != generation) {
srcu_read_unlock(&shrinker_srcu, srcu_idx);
i++;
goto again;
if (rwsem_is_contended(&shrinker_rwsem)) {
freed = freed ? : 1;
break;
}
}
unlock:
srcu_read_unlock(&shrinker_srcu, srcu_idx);
up_read(&shrinker_rwsem);
return freed;
}
#else /* CONFIG_MEMCG */
......@@ -1049,7 +1031,6 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
{
unsigned long ret, freed = 0;
struct shrinker *shrinker;
int srcu_idx, generation;
/*
* The root memcg might be allocated even though memcg is disabled
......@@ -1061,11 +1042,10 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
srcu_idx = srcu_read_lock(&shrinker_srcu);
if (!down_read_trylock(&shrinker_rwsem))
goto out;
generation = atomic_read(&shrinker_srcu_generation);
list_for_each_entry_srcu(shrinker, &shrinker_list, list,
srcu_read_lock_held(&shrinker_srcu)) {
list_for_each_entry(shrinker, &shrinker_list, list) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
......@@ -1076,14 +1056,19 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
if (ret == SHRINK_EMPTY)
ret = 0;
freed += ret;
if (atomic_read(&shrinker_srcu_generation) != generation) {
/*
* Bail out if someone want to register a new shrinker to
* prevent the registration from being stalled for long periods
* by parallel ongoing shrinking.
*/
if (rwsem_is_contended(&shrinker_rwsem)) {
freed = freed ? : 1;
break;
}
}
srcu_read_unlock(&shrinker_srcu, srcu_idx);
up_read(&shrinker_rwsem);
out:
cond_resched();
return freed;
}
......
......@@ -48,12 +48,12 @@ if IS_BUILTIN(CONFIG_COMMON_CLK):
LX_GDBPARSED(CLK_GET_RATE_NOCACHE)
/* linux/fs.h */
LX_VALUE(SB_RDONLY)
LX_VALUE(SB_SYNCHRONOUS)
LX_VALUE(SB_MANDLOCK)
LX_VALUE(SB_DIRSYNC)
LX_VALUE(SB_NOATIME)
LX_VALUE(SB_NODIRATIME)
LX_GDBPARSED(SB_RDONLY)
LX_GDBPARSED(SB_SYNCHRONOUS)
LX_GDBPARSED(SB_MANDLOCK)
LX_GDBPARSED(SB_DIRSYNC)
LX_GDBPARSED(SB_NOATIME)
LX_GDBPARSED(SB_NODIRATIME)
/* linux/htimer.h */
LX_GDBPARSED(hrtimer_resolution)
......
......@@ -63,11 +63,11 @@ fi
# Extract GFP flags from the kernel source
TMPFILE=`mktemp -t gfptranslate-XXXXXX` || exit 1
grep -q ___GFP $SOURCE/include/linux/gfp.h
grep -q ___GFP $SOURCE/include/linux/gfp_types.h
if [ $? -eq 0 ]; then
grep "^#define ___GFP" $SOURCE/include/linux/gfp.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
grep "^#define ___GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
else
grep "^#define __GFP" $SOURCE/include/linux/gfp.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
grep "^#define __GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
fi
# Parse the flags
......
......@@ -5,12 +5,15 @@ LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h
include local_config.mk
ifeq ($(ARCH),)
ifeq ($(CROSS_COMPILE),)
uname_M := $(shell uname -m 2>/dev/null || echo not)
else
uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+')
endif
MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
endif
# Without this, failed build products remain, with up-to-date timestamps,
# thus tricking Make (and you!) into believing that All Is Well, in subsequent
......@@ -65,7 +68,7 @@ TEST_GEN_PROGS += ksm_tests
TEST_GEN_PROGS += ksm_functional_tests
TEST_GEN_PROGS += mdwe_test
ifeq ($(MACHINE),x86_64)
ifeq ($(ARCH),x86_64)
CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
......@@ -87,13 +90,13 @@ TEST_GEN_PROGS += $(BINARIES_64)
endif
else
ifneq (,$(findstring $(MACHINE),ppc64))
ifneq (,$(findstring $(ARCH),ppc64))
TEST_GEN_PROGS += protection_keys
endif
endif
ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
TEST_GEN_PROGS += va_high_addr_switch
TEST_GEN_PROGS += virtual_address_range
TEST_GEN_PROGS += write_to_hugetlbfs
......@@ -112,7 +115,7 @@ $(TEST_GEN_PROGS): vm_util.c
$(OUTPUT)/uffd-stress: uffd-common.c
$(OUTPUT)/uffd-unit-tests: uffd-common.c
ifeq ($(MACHINE),x86_64)
ifeq ($(ARCH),x86_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment