Commit b7cbaf59 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "Subsystems affected by this patch series: mm (memremap, memcg,
  slab-generic, kasan, mempolicy, pagecache, oom-kill, pagemap),
  kthread, signals, lib, epoll, and core-kernel"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  kernel/hung_task.c: make type annotations consistent
  epoll: add a selftest for epoll timeout race
  mm: always have io_remap_pfn_range() set pgprot_decrypted()
  mm, oom: keep oom_adj under or at upper limit when printing
  kthread_worker: prevent queuing delayed work from timer_fn when it is being canceled
  mm/truncate.c: make __invalidate_mapping_pages() static
  lib/crc32test: remove extra local_irq_disable/enable
  ptrace: fix task_join_group_stop() for the case when current is traced
  mm: mempolicy: fix potential pte_unmap_unlock pte error
  kasan: adopt KUNIT tests to SW_TAGS mode
  mm: memcg: link page counters to root if use_hierarchy is false
  mm: memcontrol: correct the NR_ANON_THPS counter of hierarchical memcg
  hugetlb_cgroup: fix reservation accounting
  mm/mremap_pages: fix static key devmap_managed_key updates
parents 495023e4 3b70ae4f
...@@ -1049,6 +1049,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, ...@@ -1049,6 +1049,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
OOM_SCORE_ADJ_MAX; OOM_SCORE_ADJ_MAX;
put_task_struct(task); put_task_struct(task);
if (oom_adj > OOM_ADJUST_MAX)
oom_adj = OOM_ADJUST_MAX;
len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
return simple_read_from_buffer(buf, count, ppos, buffer, len); return simple_read_from_buffer(buf, count, ppos, buffer, len);
} }
......
...@@ -2759,6 +2759,15 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, ...@@ -2759,6 +2759,15 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
return VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
} }
#ifndef io_remap_pfn_range
static inline int io_remap_pfn_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn,
unsigned long size, pgprot_t prot)
{
return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot));
}
#endif
static inline vm_fault_t vmf_error(int err) static inline vm_fault_t vmf_error(int err)
{ {
if (err == -ENOMEM) if (err == -ENOMEM)
......
...@@ -1427,10 +1427,6 @@ typedef unsigned int pgtbl_mod_mask; ...@@ -1427,10 +1427,6 @@ typedef unsigned int pgtbl_mod_mask;
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#ifndef io_remap_pfn_range
#define io_remap_pfn_range remap_pfn_range
#endif
#ifndef has_transparent_hugepage #ifndef has_transparent_hugepage
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define has_transparent_hugepage() 1 #define has_transparent_hugepage() 1
......
...@@ -225,8 +225,7 @@ static long hung_timeout_jiffies(unsigned long last_checked, ...@@ -225,8 +225,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
* Process updating of timeout sysctl * Process updating of timeout sysctl
*/ */
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void __user *buffer, void *buffer, size_t *lenp, loff_t *ppos)
size_t *lenp, loff_t *ppos)
{ {
int ret; int ret;
......
...@@ -897,7 +897,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) ...@@ -897,7 +897,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
/* Move the work from worker->delayed_work_list. */ /* Move the work from worker->delayed_work_list. */
WARN_ON_ONCE(list_empty(&work->node)); WARN_ON_ONCE(list_empty(&work->node));
list_del_init(&work->node); list_del_init(&work->node);
kthread_insert_work(worker, work, &worker->work_list); if (!work->canceling)
kthread_insert_work(worker, work, &worker->work_list);
raw_spin_unlock_irqrestore(&worker->lock, flags); raw_spin_unlock_irqrestore(&worker->lock, flags);
} }
......
...@@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task) ...@@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task)
void task_join_group_stop(struct task_struct *task) void task_join_group_stop(struct task_struct *task)
{ {
unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK;
struct signal_struct *sig = current->signal;
if (sig->group_stop_count) {
sig->group_stop_count++;
mask |= JOBCTL_STOP_CONSUME;
} else if (!(sig->flags & SIGNAL_STOP_STOPPED))
return;
/* Have the new thread join an on-going signal group stop */ /* Have the new thread join an on-going signal group stop */
unsigned long jobctl = current->jobctl; task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
if (jobctl & JOBCTL_STOP_PENDING) {
struct signal_struct *sig = current->signal;
unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
if (task_set_jobctl_pending(task, signr | gstop)) {
sig->group_stop_count++;
}
}
} }
/* /*
......
...@@ -683,7 +683,6 @@ static int __init crc32c_test(void) ...@@ -683,7 +683,6 @@ static int __init crc32c_test(void)
/* reduce OS noise */ /* reduce OS noise */
local_irq_save(flags); local_irq_save(flags);
local_irq_disable();
nsec = ktime_get_ns(); nsec = ktime_get_ns();
for (i = 0; i < 100; i++) { for (i = 0; i < 100; i++) {
...@@ -694,7 +693,6 @@ static int __init crc32c_test(void) ...@@ -694,7 +693,6 @@ static int __init crc32c_test(void)
nsec = ktime_get_ns() - nsec; nsec = ktime_get_ns() - nsec;
local_irq_restore(flags); local_irq_restore(flags);
local_irq_enable();
pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
...@@ -768,7 +766,6 @@ static int __init crc32_test(void) ...@@ -768,7 +766,6 @@ static int __init crc32_test(void)
/* reduce OS noise */ /* reduce OS noise */
local_irq_save(flags); local_irq_save(flags);
local_irq_disable();
nsec = ktime_get_ns(); nsec = ktime_get_ns();
for (i = 0; i < 100; i++) { for (i = 0; i < 100; i++) {
...@@ -783,7 +780,6 @@ static int __init crc32_test(void) ...@@ -783,7 +780,6 @@ static int __init crc32_test(void)
nsec = ktime_get_ns() - nsec; nsec = ktime_get_ns() - nsec;
local_irq_restore(flags); local_irq_restore(flags);
local_irq_enable();
pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
CRC_LE_BITS, CRC_BE_BITS); CRC_LE_BITS, CRC_BE_BITS);
......
...@@ -216,6 +216,12 @@ static void kmalloc_oob_16(struct kunit *test) ...@@ -216,6 +216,12 @@ static void kmalloc_oob_16(struct kunit *test)
u64 words[2]; u64 words[2];
} *ptr1, *ptr2; } *ptr1, *ptr2;
/* This test is specifically crafted for the generic mode. */
if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
kunit_info(test, "CONFIG_KASAN_GENERIC required\n");
return;
}
ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
...@@ -227,6 +233,23 @@ static void kmalloc_oob_16(struct kunit *test) ...@@ -227,6 +233,23 @@ static void kmalloc_oob_16(struct kunit *test)
kfree(ptr2); kfree(ptr2);
} }
static void kmalloc_uaf_16(struct kunit *test)
{
struct {
u64 words[2];
} *ptr1, *ptr2;
ptr1 = kmalloc(sizeof(*ptr1), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
kfree(ptr2);
KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2);
kfree(ptr1);
}
static void kmalloc_oob_memset_2(struct kunit *test) static void kmalloc_oob_memset_2(struct kunit *test)
{ {
char *ptr; char *ptr;
...@@ -429,6 +452,12 @@ static void kasan_global_oob(struct kunit *test) ...@@ -429,6 +452,12 @@ static void kasan_global_oob(struct kunit *test)
volatile int i = 3; volatile int i = 3;
char *p = &global_array[ARRAY_SIZE(global_array) + i]; char *p = &global_array[ARRAY_SIZE(global_array) + i];
/* Only generic mode instruments globals. */
if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
kunit_info(test, "CONFIG_KASAN_GENERIC required");
return;
}
KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
} }
...@@ -467,6 +496,12 @@ static void kasan_alloca_oob_left(struct kunit *test) ...@@ -467,6 +496,12 @@ static void kasan_alloca_oob_left(struct kunit *test)
char alloca_array[i]; char alloca_array[i];
char *p = alloca_array - 1; char *p = alloca_array - 1;
/* Only generic mode instruments dynamic allocas. */
if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
kunit_info(test, "CONFIG_KASAN_GENERIC required");
return;
}
if (!IS_ENABLED(CONFIG_KASAN_STACK)) { if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
kunit_info(test, "CONFIG_KASAN_STACK is not enabled"); kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
return; return;
...@@ -481,6 +516,12 @@ static void kasan_alloca_oob_right(struct kunit *test) ...@@ -481,6 +516,12 @@ static void kasan_alloca_oob_right(struct kunit *test)
char alloca_array[i]; char alloca_array[i];
char *p = alloca_array + i; char *p = alloca_array + i;
/* Only generic mode instruments dynamic allocas. */
if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
kunit_info(test, "CONFIG_KASAN_GENERIC required");
return;
}
if (!IS_ENABLED(CONFIG_KASAN_STACK)) { if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
kunit_info(test, "CONFIG_KASAN_STACK is not enabled"); kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
return; return;
...@@ -551,6 +592,9 @@ static void kasan_memchr(struct kunit *test) ...@@ -551,6 +592,9 @@ static void kasan_memchr(struct kunit *test)
return; return;
} }
if (OOB_TAG_OFF)
size = round_up(size, OOB_TAG_OFF);
ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
...@@ -573,6 +617,9 @@ static void kasan_memcmp(struct kunit *test) ...@@ -573,6 +617,9 @@ static void kasan_memcmp(struct kunit *test)
return; return;
} }
if (OOB_TAG_OFF)
size = round_up(size, OOB_TAG_OFF);
ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
memset(arr, 0, sizeof(arr)); memset(arr, 0, sizeof(arr));
...@@ -619,13 +666,50 @@ static void kasan_strings(struct kunit *test) ...@@ -619,13 +666,50 @@ static void kasan_strings(struct kunit *test)
KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strnlen(ptr, 1)); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strnlen(ptr, 1));
} }
static void kasan_bitops(struct kunit *test) static void kasan_bitops_modify(struct kunit *test, int nr, void *addr)
{
KUNIT_EXPECT_KASAN_FAIL(test, set_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, change_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(nr, addr));
}
static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr)
{
KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, __test_and_set_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit_lock(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, test_and_clear_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, __test_and_clear_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, test_and_change_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr));
#if defined(clear_bit_unlock_is_negative_byte)
KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result =
clear_bit_unlock_is_negative_byte(nr, addr));
#endif
}
static void kasan_bitops_generic(struct kunit *test)
{ {
long *bits;
/* This test is specifically crafted for the generic mode. */
if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
kunit_info(test, "CONFIG_KASAN_GENERIC required\n");
return;
}
/* /*
* Allocate 1 more byte, which causes kzalloc to round up to 16-bytes; * Allocate 1 more byte, which causes kzalloc to round up to 16-bytes;
* this way we do not actually corrupt other memory. * this way we do not actually corrupt other memory.
*/ */
long *bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL); bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits);
/* /*
...@@ -633,55 +717,34 @@ static void kasan_bitops(struct kunit *test) ...@@ -633,55 +717,34 @@ static void kasan_bitops(struct kunit *test)
* below accesses are still out-of-bounds, since bitops are defined to * below accesses are still out-of-bounds, since bitops are defined to
* operate on the whole long the bit is in. * operate on the whole long the bit is in.
*/ */
KUNIT_EXPECT_KASAN_FAIL(test, set_bit(BITS_PER_LONG, bits)); kasan_bitops_modify(test, BITS_PER_LONG, bits);
KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(BITS_PER_LONG, bits));
KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(BITS_PER_LONG, bits));
KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(BITS_PER_LONG, bits));
KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(BITS_PER_LONG, bits));
KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(BITS_PER_LONG, bits));
KUNIT_EXPECT_KASAN_FAIL(test, change_bit(BITS_PER_LONG, bits));
KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(BITS_PER_LONG, bits));
/* /*
* Below calls try to access bit beyond allocated memory. * Below calls try to access bit beyond allocated memory.
*/ */
KUNIT_EXPECT_KASAN_FAIL(test, kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, bits);
test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
KUNIT_EXPECT_KASAN_FAIL(test,
__test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
KUNIT_EXPECT_KASAN_FAIL(test,
test_and_set_bit_lock(BITS_PER_LONG + BITS_PER_BYTE, bits));
KUNIT_EXPECT_KASAN_FAIL(test, kfree(bits);
test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); }
KUNIT_EXPECT_KASAN_FAIL(test, static void kasan_bitops_tags(struct kunit *test)
__test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); {
long *bits;
KUNIT_EXPECT_KASAN_FAIL(test, /* This test is specifically crafted for the tag-based mode. */
test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); if (IS_ENABLED(CONFIG_KASAN_GENERIC)) {
kunit_info(test, "CONFIG_KASAN_SW_TAGS required\n");
return;
}
KUNIT_EXPECT_KASAN_FAIL(test, /* Allocation size will be rounded to up granule size, which is 16. */
__test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); bits = kzalloc(sizeof(*bits), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits);
KUNIT_EXPECT_KASAN_FAIL(test, /* Do the accesses past the 16 allocated bytes. */
kasan_int_result = kasan_bitops_modify(test, BITS_PER_LONG, &bits[1]);
test_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, &bits[1]);
#if defined(clear_bit_unlock_is_negative_byte)
KUNIT_EXPECT_KASAN_FAIL(test,
kasan_int_result = clear_bit_unlock_is_negative_byte(
BITS_PER_LONG + BITS_PER_BYTE, bits));
#endif
kfree(bits); kfree(bits);
} }
...@@ -728,6 +791,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { ...@@ -728,6 +791,7 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(kmalloc_oob_krealloc_more), KUNIT_CASE(kmalloc_oob_krealloc_more),
KUNIT_CASE(kmalloc_oob_krealloc_less), KUNIT_CASE(kmalloc_oob_krealloc_less),
KUNIT_CASE(kmalloc_oob_16), KUNIT_CASE(kmalloc_oob_16),
KUNIT_CASE(kmalloc_uaf_16),
KUNIT_CASE(kmalloc_oob_in_memset), KUNIT_CASE(kmalloc_oob_in_memset),
KUNIT_CASE(kmalloc_oob_memset_2), KUNIT_CASE(kmalloc_oob_memset_2),
KUNIT_CASE(kmalloc_oob_memset_4), KUNIT_CASE(kmalloc_oob_memset_4),
...@@ -751,7 +815,8 @@ static struct kunit_case kasan_kunit_test_cases[] = { ...@@ -751,7 +815,8 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(kasan_memchr), KUNIT_CASE(kasan_memchr),
KUNIT_CASE(kasan_memcmp), KUNIT_CASE(kasan_memcmp),
KUNIT_CASE(kasan_strings), KUNIT_CASE(kasan_strings),
KUNIT_CASE(kasan_bitops), KUNIT_CASE(kasan_bitops_generic),
KUNIT_CASE(kasan_bitops_tags),
KUNIT_CASE(kmalloc_double_kzfree), KUNIT_CASE(kmalloc_double_kzfree),
KUNIT_CASE(vmalloc_oob), KUNIT_CASE(vmalloc_oob),
{} {}
......
...@@ -648,6 +648,8 @@ static long region_del(struct resv_map *resv, long f, long t) ...@@ -648,6 +648,8 @@ static long region_del(struct resv_map *resv, long f, long t)
} }
del += t - f; del += t - f;
hugetlb_cgroup_uncharge_file_region(
resv, rg, t - f);
/* New entry for end of split region */ /* New entry for end of split region */
nrg->from = t; nrg->from = t;
...@@ -660,9 +662,6 @@ static long region_del(struct resv_map *resv, long f, long t) ...@@ -660,9 +662,6 @@ static long region_del(struct resv_map *resv, long f, long t)
/* Original entry is trimmed */ /* Original entry is trimmed */
rg->to = f; rg->to = f;
hugetlb_cgroup_uncharge_file_region(
resv, rg, nrg->to - nrg->from);
list_add(&nrg->link, &rg->link); list_add(&nrg->link, &rg->link);
nrg = NULL; nrg = NULL;
break; break;
...@@ -678,17 +677,17 @@ static long region_del(struct resv_map *resv, long f, long t) ...@@ -678,17 +677,17 @@ static long region_del(struct resv_map *resv, long f, long t)
} }
if (f <= rg->from) { /* Trim beginning of region */ if (f <= rg->from) { /* Trim beginning of region */
del += t - rg->from;
rg->from = t;
hugetlb_cgroup_uncharge_file_region(resv, rg, hugetlb_cgroup_uncharge_file_region(resv, rg,
t - rg->from); t - rg->from);
} else { /* Trim end of region */
del += rg->to - f;
rg->to = f;
del += t - rg->from;
rg->from = t;
} else { /* Trim end of region */
hugetlb_cgroup_uncharge_file_region(resv, rg, hugetlb_cgroup_uncharge_file_region(resv, rg,
rg->to - f); rg->to - f);
del += rg->to - f;
rg->to = f;
} }
} }
...@@ -2443,6 +2442,9 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, ...@@ -2443,6 +2442,9 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
rsv_adjust = hugepage_subpool_put_pages(spool, 1); rsv_adjust = hugepage_subpool_put_pages(spool, 1);
hugetlb_acct_memory(h, -rsv_adjust); hugetlb_acct_memory(h, -rsv_adjust);
if (deferred_reserve)
hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
pages_per_huge_page(h), page);
} }
return page; return page;
......
...@@ -4110,11 +4110,17 @@ static int memcg_stat_show(struct seq_file *m, void *v) ...@@ -4110,11 +4110,17 @@ static int memcg_stat_show(struct seq_file *m, void *v)
(u64)memsw * PAGE_SIZE); (u64)memsw * PAGE_SIZE);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
unsigned long nr;
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue; continue;
nr = memcg_page_state(memcg, memcg1_stats[i]);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (memcg1_stats[i] == NR_ANON_THPS)
nr *= HPAGE_PMD_NR;
#endif
seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
(u64)memcg_page_state(memcg, memcg1_stats[i]) * (u64)nr * PAGE_SIZE);
PAGE_SIZE);
} }
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
...@@ -5339,17 +5345,22 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -5339,17 +5345,22 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
memcg->swappiness = mem_cgroup_swappiness(parent); memcg->swappiness = mem_cgroup_swappiness(parent);
memcg->oom_kill_disable = parent->oom_kill_disable; memcg->oom_kill_disable = parent->oom_kill_disable;
} }
if (parent && parent->use_hierarchy) { if (!parent) {
page_counter_init(&memcg->memory, NULL);
page_counter_init(&memcg->swap, NULL);
page_counter_init(&memcg->kmem, NULL);
page_counter_init(&memcg->tcpmem, NULL);
} else if (parent->use_hierarchy) {
memcg->use_hierarchy = true; memcg->use_hierarchy = true;
page_counter_init(&memcg->memory, &parent->memory); page_counter_init(&memcg->memory, &parent->memory);
page_counter_init(&memcg->swap, &parent->swap); page_counter_init(&memcg->swap, &parent->swap);
page_counter_init(&memcg->kmem, &parent->kmem); page_counter_init(&memcg->kmem, &parent->kmem);
page_counter_init(&memcg->tcpmem, &parent->tcpmem); page_counter_init(&memcg->tcpmem, &parent->tcpmem);
} else { } else {
page_counter_init(&memcg->memory, NULL); page_counter_init(&memcg->memory, &root_mem_cgroup->memory);
page_counter_init(&memcg->swap, NULL); page_counter_init(&memcg->swap, &root_mem_cgroup->swap);
page_counter_init(&memcg->kmem, NULL); page_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
page_counter_init(&memcg->tcpmem, NULL); page_counter_init(&memcg->tcpmem, &root_mem_cgroup->tcpmem);
/* /*
* Deeper hierachy with use_hierarchy == false doesn't make * Deeper hierachy with use_hierarchy == false doesn't make
* much sense so let cgroup subsystem know about this * much sense so let cgroup subsystem know about this
......
...@@ -525,7 +525,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -525,7 +525,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long flags = qp->flags; unsigned long flags = qp->flags;
int ret; int ret;
bool has_unmovable = false; bool has_unmovable = false;
pte_t *pte; pte_t *pte, *mapped_pte;
spinlock_t *ptl; spinlock_t *ptl;
ptl = pmd_trans_huge_lock(pmd, vma); ptl = pmd_trans_huge_lock(pmd, vma);
...@@ -539,7 +539,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -539,7 +539,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
if (pmd_trans_unstable(pmd)) if (pmd_trans_unstable(pmd))
return 0; return 0;
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) { for (; addr != end; pte++, addr += PAGE_SIZE) {
if (!pte_present(*pte)) if (!pte_present(*pte))
continue; continue;
...@@ -571,7 +571,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -571,7 +571,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
} else } else
break; break;
} }
pte_unmap_unlock(pte - 1, ptl); pte_unmap_unlock(mapped_pte, ptl);
cond_resched(); cond_resched();
if (has_unmovable) if (has_unmovable)
......
...@@ -41,28 +41,24 @@ EXPORT_SYMBOL_GPL(memremap_compat_align); ...@@ -41,28 +41,24 @@ EXPORT_SYMBOL_GPL(memremap_compat_align);
DEFINE_STATIC_KEY_FALSE(devmap_managed_key); DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
EXPORT_SYMBOL(devmap_managed_key); EXPORT_SYMBOL(devmap_managed_key);
static void devmap_managed_enable_put(void) static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
{ {
static_branch_dec(&devmap_managed_key); if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
pgmap->type == MEMORY_DEVICE_FS_DAX)
static_branch_dec(&devmap_managed_key);
} }
static int devmap_managed_enable_get(struct dev_pagemap *pgmap) static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
{ {
if (pgmap->type == MEMORY_DEVICE_PRIVATE && if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
(!pgmap->ops || !pgmap->ops->page_free)) { pgmap->type == MEMORY_DEVICE_FS_DAX)
WARN(1, "Missing page_free method\n"); static_branch_inc(&devmap_managed_key);
return -EINVAL;
}
static_branch_inc(&devmap_managed_key);
return 0;
} }
#else #else
static int devmap_managed_enable_get(struct dev_pagemap *pgmap) static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
{ {
return -EINVAL;
} }
static void devmap_managed_enable_put(void) static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
{ {
} }
#endif /* CONFIG_DEV_PAGEMAP_OPS */ #endif /* CONFIG_DEV_PAGEMAP_OPS */
...@@ -169,7 +165,7 @@ void memunmap_pages(struct dev_pagemap *pgmap) ...@@ -169,7 +165,7 @@ void memunmap_pages(struct dev_pagemap *pgmap)
pageunmap_range(pgmap, i); pageunmap_range(pgmap, i);
WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
devmap_managed_enable_put(); devmap_managed_enable_put(pgmap);
} }
EXPORT_SYMBOL_GPL(memunmap_pages); EXPORT_SYMBOL_GPL(memunmap_pages);
...@@ -307,7 +303,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) ...@@ -307,7 +303,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
.pgprot = PAGE_KERNEL, .pgprot = PAGE_KERNEL,
}; };
const int nr_range = pgmap->nr_range; const int nr_range = pgmap->nr_range;
bool need_devmap_managed = true;
int error, i; int error, i;
if (WARN_ONCE(!nr_range, "nr_range must be specified\n")) if (WARN_ONCE(!nr_range, "nr_range must be specified\n"))
...@@ -323,6 +318,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) ...@@ -323,6 +318,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
WARN(1, "Missing migrate_to_ram method\n"); WARN(1, "Missing migrate_to_ram method\n");
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
if (!pgmap->ops->page_free) {
WARN(1, "Missing page_free method\n");
return ERR_PTR(-EINVAL);
}
if (!pgmap->owner) { if (!pgmap->owner) {
WARN(1, "Missing owner\n"); WARN(1, "Missing owner\n");
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -336,11 +335,9 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) ...@@ -336,11 +335,9 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
} }
break; break;
case MEMORY_DEVICE_GENERIC: case MEMORY_DEVICE_GENERIC:
need_devmap_managed = false;
break; break;
case MEMORY_DEVICE_PCI_P2PDMA: case MEMORY_DEVICE_PCI_P2PDMA:
params.pgprot = pgprot_noncached(params.pgprot); params.pgprot = pgprot_noncached(params.pgprot);
need_devmap_managed = false;
break; break;
default: default:
WARN(1, "Invalid pgmap type %d\n", pgmap->type); WARN(1, "Invalid pgmap type %d\n", pgmap->type);
...@@ -364,11 +361,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) ...@@ -364,11 +361,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
} }
} }
if (need_devmap_managed) { devmap_managed_enable_get(pgmap);
error = devmap_managed_enable_get(pgmap);
if (error)
return ERR_PTR(error);
}
/* /*
* Clear the pgmap nr_range as it will be incremented for each * Clear the pgmap nr_range as it will be incremented for each
......
...@@ -528,7 +528,7 @@ void truncate_inode_pages_final(struct address_space *mapping) ...@@ -528,7 +528,7 @@ void truncate_inode_pages_final(struct address_space *mapping)
} }
EXPORT_SYMBOL(truncate_inode_pages_final); EXPORT_SYMBOL(truncate_inode_pages_final);
unsigned long __invalidate_mapping_pages(struct address_space *mapping, static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
{ {
pgoff_t indices[PAGEVEC_SIZE]; pgoff_t indices[PAGEVEC_SIZE];
......
...@@ -3282,4 +3282,99 @@ TEST(epoll60) ...@@ -3282,4 +3282,99 @@ TEST(epoll60)
close(ctx.epfd); close(ctx.epfd);
} }
struct epoll61_ctx {
int epfd;
int evfd;
};
static void *epoll61_write_eventfd(void *ctx_)
{
struct epoll61_ctx *ctx = ctx_;
int64_t l = 1;
usleep(10950);
write(ctx->evfd, &l, sizeof(l));
return NULL;
}
static void *epoll61_epoll_with_timeout(void *ctx_)
{
struct epoll61_ctx *ctx = ctx_;
struct epoll_event events[1];
int n;
n = epoll_wait(ctx->epfd, events, 1, 11);
/*
* If epoll returned the eventfd, write on the eventfd to wake up the
* blocking poller.
*/
if (n == 1) {
int64_t l = 1;
write(ctx->evfd, &l, sizeof(l));
}
return NULL;
}
static void *epoll61_blocking_epoll(void *ctx_)
{
struct epoll61_ctx *ctx = ctx_;
struct epoll_event events[1];
epoll_wait(ctx->epfd, events, 1, -1);
return NULL;
}
TEST(epoll61)
{
struct epoll61_ctx ctx;
struct epoll_event ev;
int i, r;
ctx.epfd = epoll_create1(0);
ASSERT_GE(ctx.epfd, 0);
ctx.evfd = eventfd(0, EFD_NONBLOCK);
ASSERT_GE(ctx.evfd, 0);
ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP;
ev.data.ptr = NULL;
r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev);
ASSERT_EQ(r, 0);
/*
* We are testing a race. Repeat the test case 1000 times to make it
* more likely to fail in case of a bug.
*/
for (i = 0; i < 1000; i++) {
pthread_t threads[3];
int n;
/*
* Start 3 threads:
* Thread 1 sleeps for 10.9ms and writes to the evenfd.
* Thread 2 calls epoll with a timeout of 11ms.
* Thread 3 calls epoll with a timeout of -1.
*
* The eventfd write by Thread 1 should either wakeup Thread 2
* or Thread 3. If it wakes up Thread 2, Thread 2 writes on the
* eventfd to wake up Thread 3.
*
* If no events are missed, all three threads should eventually
* be joinable.
*/
ASSERT_EQ(pthread_create(&threads[0], NULL,
epoll61_write_eventfd, &ctx), 0);
ASSERT_EQ(pthread_create(&threads[1], NULL,
epoll61_epoll_with_timeout, &ctx), 0);
ASSERT_EQ(pthread_create(&threads[2], NULL,
epoll61_blocking_epoll, &ctx), 0);
for (n = 0; n < ARRAY_SIZE(threads); ++n)
ASSERT_EQ(pthread_join(threads[n], NULL), 0);
}
close(ctx.epfd);
close(ctx.evfd);
}
TEST_HARNESS_MAIN TEST_HARNESS_MAIN
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment