Commit 522d6d38 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (fixes from Andrew Morton)

Merge misc fixes from Andrew Morton.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (22 commits)
  pidns: fix free_pid() to handle the first fork failure
  ipc,msg: prevent race with rmid in msgsnd,msgrcv
  ipc/sem.c: update sem_otime for all operations
  mm/hwpoison: fix the lack of one reference count against poisoned page
  mm/hwpoison: fix false report on 2nd attempt at page recovery
  mm/hwpoison: fix test for a transparent huge page
  mm/hwpoison: fix traversal of hugetlbfs pages to avoid printk flood
  block: change config option name for cmdline partition parsing
  mm/mlock.c: prevent walking off the end of a pagetable in no-pmd configuration
  mm: avoid reinserting isolated balloon pages into LRU lists
  arch/parisc/mm/fault.c: fix uninitialized variable usage
  include/asm-generic/vtime.h: avoid zero-length file
  nilfs2: fix issue with race condition of competition between segments for dirty blocks
  Documentation/kernel-parameters.txt: replace kernelcore with Movable
  mm/bounce.c: fix a regression where MS_SNAP_STABLE (stable pages snapshotting) was ignored
  kernel/kmod.c: check for NULL in call_usermodehelper_exec()
  ipc/sem.c: synchronize the proc interface
  ipc/sem.c: optimize sem_lock()
  ipc/sem.c: fix race in sem_lock()
  mm/compaction.c: periodically schedule when freeing pages
  ...
parents df532d54 314a8ad0
...@@ -6,6 +6,8 @@ capability.txt ...@@ -6,6 +6,8 @@ capability.txt
- Generic Block Device Capability (/sys/block/<device>/capability) - Generic Block Device Capability (/sys/block/<device>/capability)
cfq-iosched.txt cfq-iosched.txt
- CFQ IO scheduler tunables - CFQ IO scheduler tunables
cmdline-partition.txt
- how to specify block device partitions on kernel command line
data-integrity.txt data-integrity.txt
- Block data integrity - Block data integrity
deadline-iosched.txt deadline-iosched.txt
......
Embedded device command line partition Embedded device command line partition parsing
===================================================================== =====================================================================
Read block device partition table from command line. Support for reading the block device partition table from the command line.
The partition used for fixed block device (eMMC) embedded device. It is typically used for fixed block (eMMC) embedded devices.
It is no MBR, save storage space. Bootloader can be easily accessed It has no MBR, so saves storage space. Bootloader can be easily accessed
by absolute address of data on the block device. by absolute address of data on the block device.
Users can easily change the partition. Users can easily change the partition.
......
...@@ -480,6 +480,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -480,6 +480,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Format: <io>,<irq>,<mode> Format: <io>,<irq>,<mode>
See header of drivers/net/hamradio/baycom_ser_hdx.c. See header of drivers/net/hamradio/baycom_ser_hdx.c.
blkdevparts= Manual partition parsing of block device(s) for
embedded devices based on command line input.
See Documentation/block/cmdline-partition.txt
boot_delay= Milliseconds to delay each printk during boot. boot_delay= Milliseconds to delay each printk during boot.
Values larger than 10 seconds (10000) are changed to Values larger than 10 seconds (10000) are changed to
no delay (0). no delay (0).
...@@ -1357,7 +1361,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -1357,7 +1361,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
pages. In the event, a node is too small to have both pages. In the event, a node is too small to have both
kernelcore and Movable pages, kernelcore pages will kernelcore and Movable pages, kernelcore pages will
take priority and other nodes will have a larger number take priority and other nodes will have a larger number
of kernelcore pages. The Movable zone is used for the of Movable pages. The Movable zone is used for the
allocation of pages that may be reclaimed or moved allocation of pages that may be reclaimed or moved
by the page migration subsystem. This means that by the page migration subsystem. This means that
HugeTLB pages may not be allocated from this zone. HugeTLB pages may not be allocated from this zone.
......
...@@ -182,6 +182,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, ...@@ -182,6 +182,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
if (user_mode(regs)) if (user_mode(regs))
flags |= FAULT_FLAG_USER; flags |= FAULT_FLAG_USER;
acc_type = parisc_acctyp(code, regs->iir);
if (acc_type & VM_WRITE) if (acc_type & VM_WRITE)
flags |= FAULT_FLAG_WRITE; flags |= FAULT_FLAG_WRITE;
retry: retry:
...@@ -196,8 +199,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, ...@@ -196,8 +199,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
good_area: good_area:
acc_type = parisc_acctyp(code,regs->iir);
if ((vma->vm_flags & acc_type) != acc_type) if ((vma->vm_flags & acc_type) != acc_type)
goto bad_area; goto bad_area;
......
...@@ -99,11 +99,16 @@ config BLK_DEV_THROTTLING ...@@ -99,11 +99,16 @@ config BLK_DEV_THROTTLING
See Documentation/cgroups/blkio-controller.txt for more information. See Documentation/cgroups/blkio-controller.txt for more information.
config CMDLINE_PARSER config BLK_CMDLINE_PARSER
bool "Block device command line partition parser" bool "Block device command line partition parser"
default n default n
---help--- ---help---
Parsing command line, get the partitions information. Enabling this option allows you to specify the partition layout from
the kernel boot args. This is typically of use for embedded devices
which don't otherwise have any standardized method for listing the
partitions on a block device.
See Documentation/block/cmdline-partition.txt for more information.
menu "Partition Types" menu "Partition Types"
......
...@@ -18,4 +18,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o ...@@ -18,4 +18,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
obj-$(CONFIG_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
...@@ -263,7 +263,7 @@ config SYSV68_PARTITION ...@@ -263,7 +263,7 @@ config SYSV68_PARTITION
config CMDLINE_PARTITION config CMDLINE_PARTITION
bool "Command line partition support" if PARTITION_ADVANCED bool "Command line partition support" if PARTITION_ADVANCED
select CMDLINE_PARSER select BLK_CMDLINE_PARSER
help help
Say Y here if you would read the partitions table from bootargs. Say Y here if you want to read the partition table from bootargs.
The format for the command line is just like mtdparts. The format for the command line is just like mtdparts.
...@@ -2,15 +2,15 @@ ...@@ -2,15 +2,15 @@
* Copyright (C) 2013 HUAWEI * Copyright (C) 2013 HUAWEI
* Author: Cai Zhiyong <caizhiyong@huawei.com> * Author: Cai Zhiyong <caizhiyong@huawei.com>
* *
* Read block device partition table from command line. * Read block device partition table from the command line.
* The partition used for fixed block device (eMMC) embedded device. * Typically used for fixed block (eMMC) embedded devices.
* It is no MBR, save storage space. Bootloader can be easily accessed * It has no MBR, so saves storage space. Bootloader can be easily accessed
* by absolute address of data on the block device. * by absolute address of data on the block device.
* Users can easily change the partition. * Users can easily change the partition.
* *
* The format for the command line is just like mtdparts. * The format for the command line is just like mtdparts.
* *
* Verbose config please reference "Documentation/block/cmdline-partition.txt" * For further information, see "Documentation/block/cmdline-partition.txt"
* *
*/ */
......
...@@ -1413,7 +1413,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, ...@@ -1413,7 +1413,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
* long file_ofs * long file_ofs
* followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
*/ */
static void fill_files_note(struct memelfnote *note) static int fill_files_note(struct memelfnote *note)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned count, size, names_ofs, remaining, n; unsigned count, size, names_ofs, remaining, n;
...@@ -1428,11 +1428,11 @@ static void fill_files_note(struct memelfnote *note) ...@@ -1428,11 +1428,11 @@ static void fill_files_note(struct memelfnote *note)
names_ofs = (2 + 3 * count) * sizeof(data[0]); names_ofs = (2 + 3 * count) * sizeof(data[0]);
alloc: alloc:
if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
goto err; return -EINVAL;
size = round_up(size, PAGE_SIZE); size = round_up(size, PAGE_SIZE);
data = vmalloc(size); data = vmalloc(size);
if (!data) if (!data)
goto err; return -ENOMEM;
start_end_ofs = data + 2; start_end_ofs = data + 2;
name_base = name_curpos = ((char *)data) + names_ofs; name_base = name_curpos = ((char *)data) + names_ofs;
...@@ -1485,7 +1485,7 @@ static void fill_files_note(struct memelfnote *note) ...@@ -1485,7 +1485,7 @@ static void fill_files_note(struct memelfnote *note)
size = name_curpos - (char *)data; size = name_curpos - (char *)data;
fill_note(note, "CORE", NT_FILE, size, data); fill_note(note, "CORE", NT_FILE, size, data);
err: ; return 0;
} }
#ifdef CORE_DUMP_USE_REGSET #ifdef CORE_DUMP_USE_REGSET
...@@ -1686,7 +1686,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, ...@@ -1686,7 +1686,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_auxv_note(&info->auxv, current->mm); fill_auxv_note(&info->auxv, current->mm);
info->size += notesize(&info->auxv); info->size += notesize(&info->auxv);
fill_files_note(&info->files); if (fill_files_note(&info->files) == 0)
info->size += notesize(&info->files); info->size += notesize(&info->files);
return 1; return 1;
...@@ -1719,7 +1719,8 @@ static int write_note_info(struct elf_note_info *info, ...@@ -1719,7 +1719,8 @@ static int write_note_info(struct elf_note_info *info,
return 0; return 0;
if (first && !writenote(&info->auxv, file, foffset)) if (first && !writenote(&info->auxv, file, foffset))
return 0; return 0;
if (first && !writenote(&info->files, file, foffset)) if (first && info->files.data &&
!writenote(&info->files, file, foffset))
return 0; return 0;
for (i = 1; i < info->thread_notes; ++i) for (i = 1; i < info->thread_notes; ++i)
...@@ -1806,6 +1807,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) ...@@ -1806,6 +1807,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
struct elf_note_info { struct elf_note_info {
struct memelfnote *notes; struct memelfnote *notes;
struct memelfnote *notes_files;
struct elf_prstatus *prstatus; /* NT_PRSTATUS */ struct elf_prstatus *prstatus; /* NT_PRSTATUS */
struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
struct list_head thread_list; struct list_head thread_list;
...@@ -1896,9 +1898,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, ...@@ -1896,9 +1898,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
fill_auxv_note(info->notes + 3, current->mm); fill_auxv_note(info->notes + 3, current->mm);
fill_files_note(info->notes + 4); info->numnote = 4;
info->numnote = 5; if (fill_files_note(info->notes + info->numnote) == 0) {
info->notes_files = info->notes + info->numnote;
info->numnote++;
}
/* Try to dump the FPU. */ /* Try to dump the FPU. */
info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
...@@ -1960,8 +1965,9 @@ static void free_note_info(struct elf_note_info *info) ...@@ -1960,8 +1965,9 @@ static void free_note_info(struct elf_note_info *info)
kfree(list_entry(tmp, struct elf_thread_status, list)); kfree(list_entry(tmp, struct elf_thread_status, list));
} }
/* Free data allocated by fill_files_note(): */ /* Free data possibly allocated by fill_files_note(): */
vfree(info->notes[4].data); if (info->notes_files)
vfree(info->notes_files->data);
kfree(info->prstatus); kfree(info->prstatus);
kfree(info->psinfo); kfree(info->psinfo);
...@@ -2044,7 +2050,7 @@ static int elf_core_dump(struct coredump_params *cprm) ...@@ -2044,7 +2050,7 @@ static int elf_core_dump(struct coredump_params *cprm)
struct vm_area_struct *vma, *gate_vma; struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL; struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff, foffset; loff_t offset = 0, dataoff, foffset;
struct elf_note_info info; struct elf_note_info info = { };
struct elf_phdr *phdr4note = NULL; struct elf_phdr *phdr4note = NULL;
struct elf_shdr *shdr4extnum = NULL; struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum; Elf_Half e_phnum;
......
...@@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh) ...@@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh); clear_buffer_nilfs_checked(bh);
clear_buffer_nilfs_redirected(bh); clear_buffer_nilfs_redirected(bh);
clear_buffer_async_write(bh);
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
if (nilfs_page_buffers_clean(page)) if (nilfs_page_buffers_clean(page))
__nilfs_clear_page_dirty(page); __nilfs_clear_page_dirty(page);
...@@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) ...@@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
"discard block %llu, size %zu", "discard block %llu, size %zu",
(u64)bh->b_blocknr, bh->b_size); (u64)bh->b_blocknr, bh->b_size);
} }
clear_buffer_async_write(bh);
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh); clear_buffer_nilfs_checked(bh);
......
...@@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, ...@@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
bh = head = page_buffers(page); bh = head = page_buffers(page);
do { do {
if (!buffer_dirty(bh)) if (!buffer_dirty(bh) || buffer_async_write(bh))
continue; continue;
get_bh(bh); get_bh(bh);
list_add_tail(&bh->b_assoc_buffers, listp); list_add_tail(&bh->b_assoc_buffers, listp);
...@@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, ...@@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
for (i = 0; i < pagevec_count(&pvec); i++) { for (i = 0; i < pagevec_count(&pvec); i++) {
bh = head = page_buffers(pvec.pages[i]); bh = head = page_buffers(pvec.pages[i]);
do { do {
if (buffer_dirty(bh)) { if (buffer_dirty(bh) &&
!buffer_async_write(bh)) {
get_bh(bh); get_bh(bh);
list_add_tail(&bh->b_assoc_buffers, list_add_tail(&bh->b_assoc_buffers,
listp); listp);
...@@ -1579,6 +1580,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) ...@@ -1579,6 +1580,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_segsum_buffers, list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) { b_assoc_buffers) {
set_buffer_async_write(bh);
if (bh->b_page != bd_page) { if (bh->b_page != bd_page) {
if (bd_page) { if (bd_page) {
lock_page(bd_page); lock_page(bd_page);
...@@ -1592,6 +1594,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) ...@@ -1592,6 +1594,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_payload_buffers, list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) { b_assoc_buffers) {
set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) { if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) { if (bh->b_page != bd_page) {
lock_page(bd_page); lock_page(bd_page);
...@@ -1677,6 +1680,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) ...@@ -1677,6 +1680,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers, list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) { b_assoc_buffers) {
clear_buffer_async_write(bh);
if (bh->b_page != bd_page) { if (bh->b_page != bd_page) {
if (bd_page) if (bd_page)
end_page_writeback(bd_page); end_page_writeback(bd_page);
...@@ -1686,6 +1690,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) ...@@ -1686,6 +1690,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(bh, &segbuf->sb_payload_buffers, list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) { b_assoc_buffers) {
clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) { if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) { if (bh->b_page != bd_page) {
end_page_writeback(bd_page); end_page_writeback(bd_page);
...@@ -1755,6 +1760,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) ...@@ -1755,6 +1760,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) { b_assoc_buffers) {
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
clear_buffer_async_write(bh);
if (bh->b_page != bd_page) { if (bh->b_page != bd_page) {
if (bd_page) if (bd_page)
end_page_writeback(bd_page); end_page_writeback(bd_page);
...@@ -1776,6 +1782,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) ...@@ -1776,6 +1782,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) { b_assoc_buffers) {
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
clear_buffer_async_write(bh);
clear_buffer_delay(bh); clear_buffer_delay(bh);
clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_redirected(bh); clear_buffer_nilfs_redirected(bh);
......
/* no content, but patch(1) dislikes empty files */
...@@ -158,6 +158,26 @@ static inline bool balloon_page_movable(struct page *page) ...@@ -158,6 +158,26 @@ static inline bool balloon_page_movable(struct page *page)
return false; return false;
} }
/*
* isolated_balloon_page - identify an isolated balloon page on private
* compaction/migration page lists.
*
* After a compaction thread isolates a balloon page for migration, it raises
* the page refcount to prevent concurrent compaction threads from re-isolating
* the same page. For that reason putback_movable_pages(), or other routines
* that need to identify isolated balloon pages on private pagelists, cannot
* rely on balloon_page_movable() to accomplish the task.
*/
static inline bool isolated_balloon_page(struct page *page)
{
/* Already isolated balloon pages, by default, have a raised refcount */
if (page_flags_cleared(page) && !page_mapped(page) &&
page_count(page) >= 2)
return __is_movable_balloon_page(page);
return false;
}
/* /*
* balloon_page_insert - insert a page into the balloon's page list and make * balloon_page_insert - insert a page into the balloon's page list and make
* the page->mapping assignment accordingly. * the page->mapping assignment accordingly.
...@@ -243,6 +263,11 @@ static inline bool balloon_page_movable(struct page *page) ...@@ -243,6 +263,11 @@ static inline bool balloon_page_movable(struct page *page)
return false; return false;
} }
static inline bool isolated_balloon_page(struct page *page)
{
return false;
}
static inline bool balloon_page_isolate(struct page *page) static inline bool balloon_page_isolate(struct page *page)
{ {
return false; return false;
......
...@@ -695,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, ...@@ -695,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
if (ipcperms(ns, &msq->q_perm, S_IWUGO)) if (ipcperms(ns, &msq->q_perm, S_IWUGO))
goto out_unlock0; goto out_unlock0;
/* raced with RMID? */
if (msq->q_perm.deleted) {
err = -EIDRM;
goto out_unlock0;
}
err = security_msg_queue_msgsnd(msq, msg, msgflg); err = security_msg_queue_msgsnd(msq, msg, msgflg);
if (err) if (err)
goto out_unlock0; goto out_unlock0;
...@@ -901,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl ...@@ -901,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
goto out_unlock1; goto out_unlock1;
ipc_lock_object(&msq->q_perm); ipc_lock_object(&msq->q_perm);
/* raced with RMID? */
if (msq->q_perm.deleted) {
msg = ERR_PTR(-EIDRM);
goto out_unlock0;
}
msg = find_msg(msq, &msgtyp, mode); msg = find_msg(msq, &msgtyp, mode);
if (!IS_ERR(msg)) { if (!IS_ERR(msg)) {
/* /*
......
...@@ -252,71 +252,113 @@ static void sem_rcu_free(struct rcu_head *head) ...@@ -252,71 +252,113 @@ static void sem_rcu_free(struct rcu_head *head)
ipc_rcu_free(head); ipc_rcu_free(head);
} }
/*
* Wait until all currently ongoing simple ops have completed.
* Caller must own sem_perm.lock.
* New simple ops cannot start, because simple ops first check
* that sem_perm.lock is free.
* that a) sem_perm.lock is free and b) complex_count is 0.
*/
static void sem_wait_array(struct sem_array *sma)
{
int i;
struct sem *sem;
if (sma->complex_count) {
/* The thread that increased sma->complex_count waited on
* all sem->lock locks. Thus we don't need to wait again.
*/
return;
}
for (i = 0; i < sma->sem_nsems; i++) {
sem = sma->sem_base + i;
spin_unlock_wait(&sem->lock);
}
}
/* /*
* If the request contains only one semaphore operation, and there are * If the request contains only one semaphore operation, and there are
* no complex transactions pending, lock only the semaphore involved. * no complex transactions pending, lock only the semaphore involved.
* Otherwise, lock the entire semaphore array, since we either have * Otherwise, lock the entire semaphore array, since we either have
* multiple semaphores in our own semops, or we need to look at * multiple semaphores in our own semops, or we need to look at
* semaphores from other pending complex operations. * semaphores from other pending complex operations.
*
* Carefully guard against sma->complex_count changing between zero
* and non-zero while we are spinning for the lock. The value of
* sma->complex_count cannot change while we are holding the lock,
* so sem_unlock should be fine.
*
* The global lock path checks that all the local locks have been released,
* checking each local lock once. This means that the local lock paths
* cannot start their critical sections while the global lock is held.
*/ */
static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
int nsops) int nsops)
{ {
int locknum; struct sem *sem;
again:
if (nsops == 1 && !sma->complex_count) {
struct sem *sem = sma->sem_base + sops->sem_num;
/* Lock just the semaphore we are interested in. */ if (nsops != 1) {
spin_lock(&sem->lock); /* Complex operation - acquire a full lock */
ipc_lock_object(&sma->sem_perm);
/* /* And wait until all simple ops that are processed
* If sma->complex_count was set while we were spinning, * right now have dropped their locks.
* we may need to look at things we did not lock here.
*/ */
if (unlikely(sma->complex_count)) { sem_wait_array(sma);
spin_unlock(&sem->lock); return -1;
goto lock_array;
} }
/* /*
* Another process is holding the global lock on the * Only one semaphore affected - try to optimize locking.
* sem_array; we cannot enter our critical section, * The rules are:
* but have to wait for the global lock to be released. * - optimized locking is possible if no complex operation
* is either enqueued or processed right now.
* - The test for enqueued complex ops is simple:
* sma->complex_count != 0
* - Testing for complex ops that are processed right now is
* a bit more difficult. Complex ops acquire the full lock
* and first wait that the running simple ops have completed.
* (see above)
* Thus: If we own a simple lock and the global lock is free
* and complex_count is now 0, then it will stay 0 and
* thus just locking sem->lock is sufficient.
*/
sem = sma->sem_base + sops->sem_num;
if (sma->complex_count == 0) {
/*
* It appears that no complex operation is around.
* Acquire the per-semaphore lock.
*/
spin_lock(&sem->lock);
/* Then check that the global lock is free */
if (!spin_is_locked(&sma->sem_perm.lock)) {
/* spin_is_locked() is not a memory barrier */
smp_mb();
/* Now repeat the test of complex_count:
* It can't change anymore until we drop sem->lock.
* Thus: if is now 0, then it will stay 0.
*/ */
if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { if (sma->complex_count == 0) {
/* fast path successful! */
return sops->sem_num;
}
}
spin_unlock(&sem->lock); spin_unlock(&sem->lock);
spin_unlock_wait(&sma->sem_perm.lock);
goto again;
} }
locknum = sops->sem_num; /* slow path: acquire the full lock */
ipc_lock_object(&sma->sem_perm);
if (sma->complex_count == 0) {
/* False alarm:
* There is no complex operation, thus we can switch
* back to the fast path.
*/
spin_lock(&sem->lock);
ipc_unlock_object(&sma->sem_perm);
return sops->sem_num;
} else { } else {
int i; /* Not a false alarm, thus complete the sequence for a
/* * full lock.
* Lock the semaphore array, and wait for all of the
* individual semaphore locks to go away. The code
* above ensures no new single-lock holders will enter
* their critical section while the array lock is held.
*/ */
lock_array: sem_wait_array(sma);
ipc_lock_object(&sma->sem_perm); return -1;
for (i = 0; i < sma->sem_nsems; i++) {
struct sem *sem = sma->sem_base + i;
spin_unlock_wait(&sem->lock);
} }
locknum = -1;
}
return locknum;
} }
static inline void sem_unlock(struct sem_array *sma, int locknum) static inline void sem_unlock(struct sem_array *sma, int locknum)
...@@ -875,6 +917,24 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) ...@@ -875,6 +917,24 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
return semop_completed; return semop_completed;
} }
/**
* set_semotime(sma, sops) - set sem_otime
* @sma: semaphore array
* @sops: operations that modified the array, may be NULL
*
* sem_otime is replicated to avoid cache line trashing.
* This function sets one instance to the current time.
*/
static void set_semotime(struct sem_array *sma, struct sembuf *sops)
{
if (sops == NULL) {
sma->sem_base[0].sem_otime = get_seconds();
} else {
sma->sem_base[sops[0].sem_num].sem_otime =
get_seconds();
}
}
/** /**
* do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
* @sma: semaphore array * @sma: semaphore array
...@@ -925,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop ...@@ -925,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
} }
} }
} }
if (otime) { if (otime)
if (sops == NULL) { set_semotime(sma, sops);
sma->sem_base[0].sem_otime = get_seconds();
} else {
sma->sem_base[sops[0].sem_num].sem_otime =
get_seconds();
}
}
} }
/* The following counts are associated to each semaphore: /* The following counts are associated to each semaphore:
* semncnt number of tasks waiting on semval being nonzero * semncnt number of tasks waiting on semval being nonzero
* semzcnt number of tasks waiting on semval being zero * semzcnt number of tasks waiting on semval being zero
...@@ -1797,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, ...@@ -1797,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
error = perform_atomic_semop(sma, sops, nsops, un, error = perform_atomic_semop(sma, sops, nsops, un,
task_tgid_vnr(current)); task_tgid_vnr(current));
if (error <= 0) { if (error == 0) {
if (alter && error == 0) /* If the operation was successful, then do
* the required updates.
*/
if (alter)
do_smart_update(sma, sops, nsops, 1, &tasks); do_smart_update(sma, sops, nsops, 1, &tasks);
else
goto out_unlock_free; set_semotime(sma, sops);
} }
if (error <= 0)
goto out_unlock_free;
/* We need to sleep on this operation, so we put the current /* We need to sleep on this operation, so we put the current
* task into the pending queue and go to sleep. * task into the pending queue and go to sleep.
...@@ -2061,6 +2119,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) ...@@ -2061,6 +2119,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
struct sem_array *sma = it; struct sem_array *sma = it;
time_t sem_otime; time_t sem_otime;
/*
* The proc interface isn't aware of sem_lock(), it calls
* ipc_lock_object() directly (in sysvipc_find_ipc).
* In order to stay compatible with sem_lock(), we must wait until
* all simple semop() calls have left their critical regions.
*/
sem_wait_array(sma);
sem_otime = get_semotime(sma); sem_otime = get_semotime(sma);
return seq_printf(s, return seq_printf(s,
......
...@@ -571,6 +571,10 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) ...@@ -571,6 +571,10 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
DECLARE_COMPLETION_ONSTACK(done); DECLARE_COMPLETION_ONSTACK(done);
int retval = 0; int retval = 0;
if (!sub_info->path) {
call_usermodehelper_freeinfo(sub_info);
return -EINVAL;
}
helper_lock(); helper_lock();
if (!khelper_wq || usermodehelper_disabled) { if (!khelper_wq || usermodehelper_disabled) {
retval = -EBUSY; retval = -EBUSY;
......
...@@ -273,6 +273,11 @@ void free_pid(struct pid *pid) ...@@ -273,6 +273,11 @@ void free_pid(struct pid *pid)
*/ */
wake_up_process(ns->child_reaper); wake_up_process(ns->child_reaper);
break; break;
case PIDNS_HASH_ADDING:
/* Handle a fork failure of the first process */
WARN_ON(ns->child_reaper);
ns->nr_hashed = 0;
/* fall through */
case 0: case 0:
schedule_work(&ns->proc_work); schedule_work(&ns->proc_work);
break; break;
......
...@@ -204,6 +204,8 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, ...@@ -204,6 +204,8 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
struct bio_vec *to, *from; struct bio_vec *to, *from;
unsigned i; unsigned i;
if (force)
goto bounce;
bio_for_each_segment(from, *bio_orig, i) bio_for_each_segment(from, *bio_orig, i)
if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q)) if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
goto bounce; goto bounce;
......
...@@ -677,6 +677,13 @@ static void isolate_freepages(struct zone *zone, ...@@ -677,6 +677,13 @@ static void isolate_freepages(struct zone *zone,
pfn -= pageblock_nr_pages) { pfn -= pageblock_nr_pages) {
unsigned long isolated; unsigned long isolated;
/*
* This can iterate a massively long zone without finding any
* suitable migration targets, so periodically check if we need
* to schedule.
*/
cond_resched();
if (!pfn_valid(pfn)) if (!pfn_valid(pfn))
continue; continue;
......
...@@ -20,8 +20,6 @@ static int hwpoison_inject(void *data, u64 val) ...@@ -20,8 +20,6 @@ static int hwpoison_inject(void *data, u64 val)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (!hwpoison_filter_enable)
goto inject;
if (!pfn_valid(pfn)) if (!pfn_valid(pfn))
return -ENXIO; return -ENXIO;
...@@ -33,6 +31,9 @@ static int hwpoison_inject(void *data, u64 val) ...@@ -33,6 +31,9 @@ static int hwpoison_inject(void *data, u64 val)
if (!get_page_unless_zero(hpage)) if (!get_page_unless_zero(hpage))
return 0; return 0;
if (!hwpoison_filter_enable)
goto inject;
if (!PageLRU(p) && !PageHuge(p)) if (!PageLRU(p) && !PageHuge(p))
shake_page(p, 0); shake_page(p, 0);
/* /*
......
...@@ -343,10 +343,11 @@ static long madvise_remove(struct vm_area_struct *vma, ...@@ -343,10 +343,11 @@ static long madvise_remove(struct vm_area_struct *vma,
*/ */
static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
{ {
struct page *p;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
for (; start < end; start += PAGE_SIZE) { for (; start < end; start += PAGE_SIZE <<
struct page *p; compound_order(compound_head(p))) {
int ret; int ret;
ret = get_user_pages_fast(start, 1, 0, &p); ret = get_user_pages_fast(start, 1, 0, &p);
......
...@@ -1114,8 +1114,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1114,8 +1114,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* shake_page could have turned it free. * shake_page could have turned it free.
*/ */
if (is_free_buddy_page(p)) { if (is_free_buddy_page(p)) {
action_result(pfn, "free buddy, 2nd try", if (flags & MF_COUNT_INCREASED)
DELAYED); action_result(pfn, "free buddy", DELAYED);
else
action_result(pfn, "free buddy, 2nd try", DELAYED);
return 0; return 0;
} }
action_result(pfn, "non LRU", IGNORED); action_result(pfn, "non LRU", IGNORED);
...@@ -1349,7 +1351,7 @@ int unpoison_memory(unsigned long pfn) ...@@ -1349,7 +1351,7 @@ int unpoison_memory(unsigned long pfn)
* worked by memory_failure() and the page lock is not held yet. * worked by memory_failure() and the page lock is not held yet.
* In such case, we yield to memory_failure() and make unpoison fail. * In such case, we yield to memory_failure() and make unpoison fail.
*/ */
if (PageTransHuge(page)) { if (!PageHuge(page) && PageTransHuge(page)) {
pr_info("MCE: Memory failure is now running on %#lx\n", pfn); pr_info("MCE: Memory failure is now running on %#lx\n", pfn);
return 0; return 0;
} }
......
...@@ -107,7 +107,7 @@ void putback_movable_pages(struct list_head *l) ...@@ -107,7 +107,7 @@ void putback_movable_pages(struct list_head *l)
list_del(&page->lru); list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON + dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page)); page_is_file_cache(page));
if (unlikely(balloon_page_movable(page))) if (unlikely(isolated_balloon_page(page)))
balloon_page_putback(page); balloon_page_putback(page);
else else
putback_lru_page(page); putback_lru_page(page);
......
...@@ -379,10 +379,14 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, ...@@ -379,10 +379,14 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
/* /*
* Initialize pte walk starting at the already pinned page where we * Initialize pte walk starting at the already pinned page where we
* are sure that there is a pte. * are sure that there is a pte, as it was pinned under the same
* mmap_sem write op.
*/ */
pte = get_locked_pte(vma->vm_mm, start, &ptl); pte = get_locked_pte(vma->vm_mm, start, &ptl);
end = min(end, pmd_addr_end(start, end)); /* Make sure we do not cross the page table boundary */
end = pgd_addr_end(start, end);
end = pud_addr_end(start, end);
end = pmd_addr_end(start, end);
/* The page next to the pinned page is the first we will try to get */ /* The page next to the pinned page is the first we will try to get */
start += PAGE_SIZE; start += PAGE_SIZE;
......
...@@ -6366,10 +6366,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) ...@@ -6366,10 +6366,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
list_del(&page->lru); list_del(&page->lru);
rmv_page_order(page); rmv_page_order(page);
zone->free_area[order].nr_free--; zone->free_area[order].nr_free--;
#ifdef CONFIG_HIGHMEM
if (PageHighMem(page))
totalhigh_pages -= 1 << order;
#endif
for (i = 0; i < (1 << order); i++) for (i = 0; i < (1 << order); i++)
SetPageReserved((page+i)); SetPageReserved((page+i));
pfn += (1 << order); pfn += (1 << order);
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include <asm/div64.h> #include <asm/div64.h>
#include <linux/swapops.h> #include <linux/swapops.h>
#include <linux/balloon_compaction.h>
#include "internal.h" #include "internal.h"
...@@ -1113,7 +1114,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, ...@@ -1113,7 +1114,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
LIST_HEAD(clean_pages); LIST_HEAD(clean_pages);
list_for_each_entry_safe(page, next, page_list, lru) { list_for_each_entry_safe(page, next, page_list, lru) {
if (page_is_file_cache(page) && !PageDirty(page)) { if (page_is_file_cache(page) && !PageDirty(page) &&
!isolated_balloon_page(page)) {
ClearPageActive(page); ClearPageActive(page);
list_move(&page->lru, &clean_pages); list_move(&page->lru, &clean_pages);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment