Commit 64bf6ae9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'v6.5/vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull misc vfs updates from Christian Brauner:
 "Miscellaneous features, cleanups, and fixes for vfs and individual fs

  Features:

   - Use mode 0600 for file created by cachefilesd so it can be run by
     unprivileged users. This aligns them with directories which are
     already created with mode 0700 by cachefilesd

   - Reorder a few members in struct file to prevent some false sharing
     scenarios

   - Indicate that an eventfd is used a semaphore in the eventfd's
     fdinfo procfs file

   - Add a missing uapi header for eventfd exposing relevant uapi
     defines

   - Let the VFS protect transitions of a superblock from read-only to
     read-write in addition to the protection it already provides for
     transitions from read-write to read-only. Protecting read-only to
     read-write transitions allows filesystems such as ext4 to perform
     internal writes, keeping writers away until the transition is
     completed

  Cleanups:

   - Arnd removed the architecture specific arch_report_meminfo()
     prototypes and added a generic one into procfs.h. Note, we got a
     report about a warning in amdpgpu codepaths that suggested this was
     bisectable to this change but we concluded it was a false positive

   - Remove unused parameters from split_fs_names()

   - Rename put_and_unmap_page() to unmap_and_put_page() to let the name
     reflect the order of the cleanup operation that has to unmap before
     the actual put

   - Unexport buffer_check_dirty_writeback() as it is not used outside
     of block device aops

   - Stop allocating aio rings from highmem

   - Protecting read-{only,write} transitions in the VFS used open-coded
     barriers in various places. Replace them with proper little helpers
     and document both the helpers and all barrier interactions involved
     when transitioning between read-{only,write} states

   - Use flexible array members in old readdir codepaths

  Fixes:

   - Use the correct type __poll_t for epoll and eventfd

   - Replace all deprecated strlcpy() invocations, whose return value
     isn't checked with an equivalent strscpy() call

   - Fix some kernel-doc warnings in fs/open.c

   - Reduce the stack usage in jffs2's xattr codepaths finally getting
     rid of this: fs/jffs2/xattr.c:887:1: error: the frame size of 1088
     bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
     royally annoying compilation warning

   - Use __FMODE_NONOTIFY instead of FMODE_NONOTIFY where an int and not
     fmode_t is required to avoid fmode_t to integer degradation
     warnings

   - Create coredumps with O_WRONLY instead of O_RDWR. There's a long
     explanation in that commit how O_RDWR is actually a bug which we
     found out with the help of Linus and git archeology

   - Fix "no previous prototype" warnings in the pipe codepaths

   - Add overflow calculations for remap_verify_area() as a signed
     addition overflow could be triggered in xfstests

   - Fix a null pointer dereference in sysv

   - Use an unsigned variable for length calculations in jfs avoiding
     compilation warnings with gcc 13

   - Fix a dangling pipe pointer in the watch queue codepath

   - The legacy mount option parser provided as a fallback by the VFS
     for filesystems not yet converted to the new mount api did prefix
     the generated mount option string with a leading ',' causing issues
     for some filesystems

   - Fix a repeated word in a comment in fs.h

   - autofs: Update the ctime when mtime is updated as mandated by
     POSIX"

* tag 'v6.5/vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (27 commits)
  readdir: Replace one-element arrays with flexible-array members
  fs: Provide helpers for manipulating sb->s_readonly_remount
  fs: Protect reconfiguration of sb read-write from racing writes
  eventfd: add a uapi header for eventfd userspace APIs
  autofs: set ctime as well when mtime changes on a dir
  eventfd: show the EFD_SEMAPHORE flag in fdinfo
  fs/aio: Stop allocating aio rings from HIGHMEM
  fs: Fix comment typo
  fs: unexport buffer_check_dirty_writeback
  fs: avoid empty option when generating legacy mount string
  watch_queue: prevent dangling pipe pointer
  fs.h: Optimize file struct to prevent false sharing
  highmem: Rename put_and_unmap_page() to unmap_and_put_page()
  cachefiles: Allow the cache to be non-root
  init: remove unused names parameter in split_fs_names()
  jfs: Use unsigned variable for length calculations
  fs/sysv: Null check to prevent null-ptr-deref bug
  fs: use UB-safe check for signed addition overflow in remap_verify_area
  procfs: consolidate arch_report_meminfo declaration
  fs: pipe: reveal missing function protoypes
  ...
parents 5c1c88cd 2507135e
...@@ -472,9 +472,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, ...@@ -472,9 +472,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
#define pte_same(A,B) (pte_val(A) == pte_val(B)) #define pte_same(A,B) (pte_val(A) == pte_val(B))
struct seq_file;
extern void arch_report_meminfo(struct seq_file *m);
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -165,9 +165,6 @@ static inline bool is_ioremap_addr(const void *x) ...@@ -165,9 +165,6 @@ static inline bool is_ioremap_addr(const void *x)
return addr >= IOREMAP_BASE && addr < IOREMAP_END; return addr >= IOREMAP_BASE && addr < IOREMAP_END;
} }
struct seq_file;
void arch_report_meminfo(struct seq_file *m);
#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC64 */
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count) ...@@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count)
atomic_long_add(count, &direct_pages_count[level]); atomic_long_add(count, &direct_pages_count[level]);
} }
struct seq_file;
void arch_report_meminfo(struct seq_file *m);
/* /*
* The S390 doesn't have any external MMU info: the kernel page * The S390 doesn't have any external MMU info: the kernel page
* tables contain all the necessary information. * tables contain all the necessary information.
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com> * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/ */
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
extern pgd_t early_top_pgt[PTRS_PER_PGD]; extern pgd_t early_top_pgt[PTRS_PER_PGD];
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
struct seq_file;
void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
bool user); bool user);
......
...@@ -513,9 +513,6 @@ extern void native_pagetable_init(void); ...@@ -513,9 +513,6 @@ extern void native_pagetable_init(void);
#define native_pagetable_init paging_init #define native_pagetable_init paging_init
#endif #endif
struct seq_file;
extern void arch_report_meminfo(struct seq_file *m);
enum pg_level { enum pg_level {
PG_LEVEL_NONE, PG_LEVEL_NONE,
PG_LEVEL_4K, PG_LEVEL_4K,
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/pfn.h> #include <linux/pfn.h>
#include <linux/percpu.h> #include <linux/percpu.h>
......
...@@ -530,7 +530,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) ...@@ -530,7 +530,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
struct page *page; struct page *page;
page = find_or_create_page(file->f_mapping, page = find_or_create_page(file->f_mapping,
i, GFP_HIGHUSER | __GFP_ZERO); i, GFP_USER | __GFP_ZERO);
if (!page) if (!page)
break; break;
pr_debug("pid(%d) page[%d]->count=%d\n", pr_debug("pid(%d) page[%d]->count=%d\n",
...@@ -571,7 +571,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) ...@@ -571,7 +571,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->user_id = ctx->mmap_base; ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */ ctx->nr_events = nr_events; /* trusted copy */
ring = kmap_atomic(ctx->ring_pages[0]); ring = page_address(ctx->ring_pages[0]);
ring->nr = nr_events; /* user copy */ ring->nr = nr_events; /* user copy */
ring->id = ~0U; ring->id = ~0U;
ring->head = ring->tail = 0; ring->head = ring->tail = 0;
...@@ -579,7 +579,6 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) ...@@ -579,7 +579,6 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ring->compat_features = AIO_RING_COMPAT_FEATURES; ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring); ring->header_length = sizeof(struct aio_ring);
kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]); flush_dcache_page(ctx->ring_pages[0]);
return 0; return 0;
...@@ -682,9 +681,8 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) ...@@ -682,9 +681,8 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
* we are protected from page migration * we are protected from page migration
* changes ring_pages by ->ring_lock. * changes ring_pages by ->ring_lock.
*/ */
ring = kmap_atomic(ctx->ring_pages[0]); ring = page_address(ctx->ring_pages[0]);
ring->id = ctx->id; ring->id = ctx->id;
kunmap_atomic(ring);
return 0; return 0;
} }
...@@ -1025,9 +1023,8 @@ static void user_refill_reqs_available(struct kioctx *ctx) ...@@ -1025,9 +1023,8 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* against ctx->completed_events below will make sure we do the * against ctx->completed_events below will make sure we do the
* safe/right thing. * safe/right thing.
*/ */
ring = kmap_atomic(ctx->ring_pages[0]); ring = page_address(ctx->ring_pages[0]);
head = ring->head; head = ring->head;
kunmap_atomic(ring);
refill_reqs_available(ctx, head, ctx->tail); refill_reqs_available(ctx, head, ctx->tail);
} }
...@@ -1133,12 +1130,11 @@ static void aio_complete(struct aio_kiocb *iocb) ...@@ -1133,12 +1130,11 @@ static void aio_complete(struct aio_kiocb *iocb)
if (++tail >= ctx->nr_events) if (++tail >= ctx->nr_events)
tail = 0; tail = 0;
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE; event = ev_page + pos % AIO_EVENTS_PER_PAGE;
*event = iocb->ki_res; *event = iocb->ki_res;
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb, pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
...@@ -1152,10 +1148,9 @@ static void aio_complete(struct aio_kiocb *iocb) ...@@ -1152,10 +1148,9 @@ static void aio_complete(struct aio_kiocb *iocb)
ctx->tail = tail; ctx->tail = tail;
ring = kmap_atomic(ctx->ring_pages[0]); ring = page_address(ctx->ring_pages[0]);
head = ring->head; head = ring->head;
ring->tail = tail; ring->tail = tail;
kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]); flush_dcache_page(ctx->ring_pages[0]);
ctx->completed_events++; ctx->completed_events++;
...@@ -1215,10 +1210,9 @@ static long aio_read_events_ring(struct kioctx *ctx, ...@@ -1215,10 +1210,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
mutex_lock(&ctx->ring_lock); mutex_lock(&ctx->ring_lock);
/* Access to ->ring_pages here is protected by ctx->ring_lock. */ /* Access to ->ring_pages here is protected by ctx->ring_lock. */
ring = kmap_atomic(ctx->ring_pages[0]); ring = page_address(ctx->ring_pages[0]);
head = ring->head; head = ring->head;
tail = ring->tail; tail = ring->tail;
kunmap_atomic(ring);
/* /*
* Ensure that once we've read the current tail pointer, that * Ensure that once we've read the current tail pointer, that
...@@ -1250,10 +1244,9 @@ static long aio_read_events_ring(struct kioctx *ctx, ...@@ -1250,10 +1244,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
avail = min(avail, nr - ret); avail = min(avail, nr - ret);
avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos); avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
ev = kmap(page); ev = page_address(page);
copy_ret = copy_to_user(event + ret, ev + pos, copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail); sizeof(*ev) * avail);
kunmap(page);
if (unlikely(copy_ret)) { if (unlikely(copy_ret)) {
ret = -EFAULT; ret = -EFAULT;
...@@ -1265,9 +1258,8 @@ static long aio_read_events_ring(struct kioctx *ctx, ...@@ -1265,9 +1258,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
head %= ctx->nr_events; head %= ctx->nr_events;
} }
ring = kmap_atomic(ctx->ring_pages[0]); ring = page_address(ctx->ring_pages[0]);
ring->head = head; ring->head = head;
kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]); flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, tail); pr_debug("%li h%u t%u\n", ret, head, tail);
......
...@@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap, ...@@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap,
p_ino = autofs_dentry_ino(dentry->d_parent); p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++; p_ino->count++;
dir->i_mtime = current_time(dir); dir->i_mtime = dir->i_ctime = current_time(dir);
return 0; return 0;
} }
...@@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) ...@@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
d_inode(dentry)->i_size = 0; d_inode(dentry)->i_size = 0;
clear_nlink(d_inode(dentry)); clear_nlink(d_inode(dentry));
dir->i_mtime = current_time(dir); dir->i_mtime = dir->i_ctime = current_time(dir);
spin_lock(&sbi->lookup_lock); spin_lock(&sbi->lookup_lock);
__autofs_add_expiring(dentry); __autofs_add_expiring(dentry);
...@@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap, ...@@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap,
p_ino = autofs_dentry_ino(dentry->d_parent); p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++; p_ino->count++;
inc_nlink(dir); inc_nlink(dir);
dir->i_mtime = current_time(dir); dir->i_mtime = dir->i_ctime = current_time(dir);
return 0; return 0;
} }
......
...@@ -111,7 +111,6 @@ void buffer_check_dirty_writeback(struct folio *folio, ...@@ -111,7 +111,6 @@ void buffer_check_dirty_writeback(struct folio *folio,
bh = bh->b_this_page; bh = bh->b_this_page;
} while (bh != head); } while (bh != head);
} }
EXPORT_SYMBOL(buffer_check_dirty_writeback);
/* /*
* Block until a buffer comes unlocked. This doesn't stop it * Block until a buffer comes unlocked. This doesn't stop it
......
...@@ -451,7 +451,8 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) ...@@ -451,7 +451,8 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
ret = cachefiles_inject_write_error(); ret = cachefiles_inject_write_error();
if (ret == 0) { if (ret == 0) {
file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG, file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath,
S_IFREG | 0600,
O_RDWR | O_LARGEFILE | O_DIRECT, O_RDWR | O_LARGEFILE | O_DIRECT,
cache->cache_cred); cache->cache_cred);
ret = PTR_ERR_OR_ZERO(file); ret = PTR_ERR_OR_ZERO(file);
......
...@@ -150,7 +150,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, ...@@ -150,7 +150,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
cd->major = major; cd->major = major;
cd->baseminor = baseminor; cd->baseminor = baseminor;
cd->minorct = minorct; cd->minorct = minorct;
strlcpy(cd->name, name, sizeof(cd->name)); strscpy(cd->name, name, sizeof(cd->name));
if (!prev) { if (!prev) {
cd->next = curr; cd->next = curr;
......
...@@ -648,7 +648,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) ...@@ -648,7 +648,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
} else { } else {
struct mnt_idmap *idmap; struct mnt_idmap *idmap;
struct inode *inode; struct inode *inode;
int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
O_LARGEFILE | O_EXCL; O_LARGEFILE | O_EXCL;
if (cprm.limit < binfmt->min_coredump) if (cprm.limit < binfmt->min_coredump)
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include "mount.h" #include "mount.h"
#include "internal.h"
struct prepend_buffer { struct prepend_buffer {
char *buf; char *buf;
......
...@@ -33,17 +33,17 @@ struct eventfd_ctx { ...@@ -33,17 +33,17 @@ struct eventfd_ctx {
/* /*
* Every time that a write(2) is performed on an eventfd, the * Every time that a write(2) is performed on an eventfd, the
* value of the __u64 being written is added to "count" and a * value of the __u64 being written is added to "count" and a
* wakeup is performed on "wqh". A read(2) will return the "count" * wakeup is performed on "wqh". If EFD_SEMAPHORE flag was not
* value to userspace, and will reset "count" to zero. The kernel * specified, a read(2) will return the "count" value to userspace,
* side eventfd_signal() also, adds to the "count" counter and * and will reset "count" to zero. The kernel side eventfd_signal()
* issue a wakeup. * also, adds to the "count" counter and issue a wakeup.
*/ */
__u64 count; __u64 count;
unsigned int flags; unsigned int flags;
int id; int id;
}; };
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask) __u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask)
{ {
unsigned long flags; unsigned long flags;
...@@ -301,6 +301,8 @@ static void eventfd_show_fdinfo(struct seq_file *m, struct file *f) ...@@ -301,6 +301,8 @@ static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
(unsigned long long)ctx->count); (unsigned long long)ctx->count);
spin_unlock_irq(&ctx->wqh.lock); spin_unlock_irq(&ctx->wqh.lock);
seq_printf(m, "eventfd-id: %d\n", ctx->id); seq_printf(m, "eventfd-id: %d\n", ctx->id);
seq_printf(m, "eventfd-semaphore: %d\n",
!!(ctx->flags & EFD_SEMAPHORE));
} }
#endif #endif
......
...@@ -536,7 +536,7 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, ...@@ -536,7 +536,7 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
#else #else
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
unsigned pollflags) __poll_t pollflags)
{ {
wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
} }
......
...@@ -561,6 +561,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) ...@@ -561,6 +561,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
return -ENOMEM; return -ENOMEM;
} }
if (size)
ctx->legacy_data[size++] = ','; ctx->legacy_data[size++] = ',';
len = strlen(param->key); len = strlen(param->key);
memcpy(ctx->legacy_data + size, param->key, len); memcpy(ctx->legacy_data + size, param->key, len);
......
...@@ -120,6 +120,47 @@ void put_super(struct super_block *sb); ...@@ -120,6 +120,47 @@ void put_super(struct super_block *sb);
extern bool mount_capable(struct fs_context *); extern bool mount_capable(struct fs_context *);
int sb_init_dio_done_wq(struct super_block *sb); int sb_init_dio_done_wq(struct super_block *sb);
/*
* Prepare superblock for changing its read-only state (i.e., either remount
* read-write superblock read-only or vice versa). After this function returns
* mnt_is_readonly() will return true for any mount of the superblock if its
* caller is able to observe any changes done by the remount. This holds until
* sb_end_ro_state_change() is called.
*/
static inline void sb_start_ro_state_change(struct super_block *sb)
{
WRITE_ONCE(sb->s_readonly_remount, 1);
/*
* For RO->RW transition, the barrier pairs with the barrier in
* mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY
* cleared, it will see s_readonly_remount set.
* For RW->RO transition, the barrier pairs with the barrier in
* __mnt_want_write() before the mnt_is_readonly() check. The barrier
* makes sure if __mnt_want_write() sees MNT_WRITE_HOLD already
* cleared, it will see s_readonly_remount set.
*/
smp_wmb();
}
/*
* Ends section changing read-only state of the superblock. After this function
* returns if mnt_is_readonly() returns false, the caller will be able to
* observe all the changes remount did to the superblock.
*/
static inline void sb_end_ro_state_change(struct super_block *sb)
{
/*
* This barrier provides release semantics that pairs with
* the smp_rmb() acquire semantics in mnt_is_readonly().
* This barrier pair ensure that when mnt_is_readonly() sees
* 0 for sb->s_readonly_remount, it will also see all the
* preceding flag changes that were made during the RO state
* change.
*/
smp_wmb();
WRITE_ONCE(sb->s_readonly_remount, 0);
}
/* /*
* open.c * open.c
*/ */
......
...@@ -211,7 +211,10 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) ...@@ -211,7 +211,10 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
ic->scan_dents = NULL; ic->scan_dents = NULL;
cond_resched(); cond_resched();
} }
jffs2_build_xattr_subsystem(c); ret = jffs2_build_xattr_subsystem(c);
if (ret)
goto exit;
c->flags &= ~JFFS2_SB_FLAG_BUILDING; c->flags &= ~JFFS2_SB_FLAG_BUILDING;
dbg_fsbuild("FS build complete\n"); dbg_fsbuild("FS build complete\n");
......
...@@ -772,10 +772,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c) ...@@ -772,10 +772,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
} }
#define XREF_TMPHASH_SIZE (128) #define XREF_TMPHASH_SIZE (128)
void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
{ {
struct jffs2_xattr_ref *ref, *_ref; struct jffs2_xattr_ref *ref, *_ref;
struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE]; struct jffs2_xattr_ref **xref_tmphash;
struct jffs2_xattr_datum *xd, *_xd; struct jffs2_xattr_datum *xd, *_xd;
struct jffs2_inode_cache *ic; struct jffs2_inode_cache *ic;
struct jffs2_raw_node_ref *raw; struct jffs2_raw_node_ref *raw;
...@@ -784,9 +784,12 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) ...@@ -784,9 +784,12 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING)); BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING));
xref_tmphash = kcalloc(XREF_TMPHASH_SIZE,
sizeof(struct jffs2_xattr_ref *), GFP_KERNEL);
if (!xref_tmphash)
return -ENOMEM;
/* Phase.1 : Merge same xref */ /* Phase.1 : Merge same xref */
for (i=0; i < XREF_TMPHASH_SIZE; i++)
xref_tmphash[i] = NULL;
for (ref=c->xref_temp; ref; ref=_ref) { for (ref=c->xref_temp; ref; ref=_ref) {
struct jffs2_xattr_ref *tmp; struct jffs2_xattr_ref *tmp;
...@@ -884,6 +887,8 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) ...@@ -884,6 +887,8 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
"%u of xref (%u dead, %u orphan) found.\n", "%u of xref (%u dead, %u orphan) found.\n",
xdatum_count, xdatum_unchecked_count, xdatum_orphan_count, xdatum_count, xdatum_unchecked_count, xdatum_orphan_count,
xref_count, xref_dead_count, xref_orphan_count); xref_count, xref_dead_count, xref_orphan_count);
kfree(xref_tmphash);
return 0;
} }
struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
......
...@@ -71,7 +71,7 @@ static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref) ...@@ -71,7 +71,7 @@ static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref)
#ifdef CONFIG_JFFS2_FS_XATTR #ifdef CONFIG_JFFS2_FS_XATTR
extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c); extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c);
extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c); extern int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c); extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c);
extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
...@@ -103,7 +103,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); ...@@ -103,7 +103,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
#else #else
#define jffs2_init_xattr_subsystem(c) #define jffs2_init_xattr_subsystem(c)
#define jffs2_build_xattr_subsystem(c) #define jffs2_build_xattr_subsystem(c) (0)
#define jffs2_clear_xattr_subsystem(c) #define jffs2_clear_xattr_subsystem(c)
#define jffs2_xattr_do_crccheck_inode(c, ic) #define jffs2_xattr_do_crccheck_inode(c, ic)
......
...@@ -876,7 +876,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip, ...@@ -876,7 +876,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
tid_t tid; tid_t tid;
ino_t ino = 0; ino_t ino = 0;
struct component_name dname; struct component_name dname;
int ssize; /* source pathname size */ u32 ssize; /* source pathname size */
struct btstack btstack; struct btstack btstack;
struct inode *ip = d_inode(dentry); struct inode *ip = d_inode(dentry);
s64 xlen = 0; s64 xlen = 0;
...@@ -957,7 +957,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip, ...@@ -957,7 +957,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
if (ssize > sizeof (JFS_IP(ip)->i_inline)) if (ssize > sizeof (JFS_IP(ip)->i_inline))
JFS_IP(ip)->mode2 &= ~INLINEEA; JFS_IP(ip)->mode2 &= ~INLINEEA;
jfs_info("jfs_symlink: fast symlink added ssize:%d name:%s ", jfs_info("jfs_symlink: fast symlink added ssize:%u name:%s ",
ssize, name); ssize, name);
} }
/* /*
...@@ -987,7 +987,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip, ...@@ -987,7 +987,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
ip->i_size = ssize - 1; ip->i_size = ssize - 1;
while (ssize) { while (ssize) {
/* This is kind of silly since PATH_MAX == 4K */ /* This is kind of silly since PATH_MAX == 4K */
int copy_size = min(ssize, PSIZE); u32 copy_size = min_t(u32, ssize, PSIZE);
mp = get_metapage(ip, xaddr, PSIZE, 1); mp = get_metapage(ip, xaddr, PSIZE, 1);
......
...@@ -309,9 +309,16 @@ static unsigned int mnt_get_writers(struct mount *mnt) ...@@ -309,9 +309,16 @@ static unsigned int mnt_get_writers(struct mount *mnt)
static int mnt_is_readonly(struct vfsmount *mnt) static int mnt_is_readonly(struct vfsmount *mnt)
{ {
if (mnt->mnt_sb->s_readonly_remount) if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))
return 1; return 1;
/* Order wrt setting s_flags/s_readonly_remount in do_remount() */ /*
* The barrier pairs with the barrier in sb_start_ro_state_change()
* making sure if we don't see s_readonly_remount set yet, we also will
* not see any superblock / mount flag changes done by remount.
* It also pairs with the barrier in sb_end_ro_state_change()
* assuring that if we see s_readonly_remount already cleared, we will
* see the values of superblock / mount flags updated by remount.
*/
smp_rmb(); smp_rmb();
return __mnt_is_readonly(mnt); return __mnt_is_readonly(mnt);
} }
...@@ -364,9 +371,11 @@ int __mnt_want_write(struct vfsmount *m) ...@@ -364,9 +371,11 @@ int __mnt_want_write(struct vfsmount *m)
} }
} }
/* /*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will * The barrier pairs with the barrier sb_start_ro_state_change() making
* be set to match its requirements. So we must not load that until * sure that if we see MNT_WRITE_HOLD cleared, we will also see
* MNT_WRITE_HOLD is cleared. * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
* mnt_is_readonly() and bail in case we are racing with remount
* read-only.
*/ */
smp_rmb(); smp_rmb();
if (mnt_is_readonly(m)) { if (mnt_is_readonly(m)) {
...@@ -588,10 +597,8 @@ int sb_prepare_remount_readonly(struct super_block *sb) ...@@ -588,10 +597,8 @@ int sb_prepare_remount_readonly(struct super_block *sb)
if (!err && atomic_long_read(&sb->s_remove_count)) if (!err && atomic_long_read(&sb->s_remove_count))
err = -EBUSY; err = -EBUSY;
if (!err) { if (!err)
sb->s_readonly_remount = 1; sb_start_ro_state_change(sb);
smp_wmb();
}
list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
......
...@@ -700,10 +700,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) ...@@ -700,10 +700,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
return do_fchmodat(AT_FDCWD, filename, mode); return do_fchmodat(AT_FDCWD, filename, mode);
} }
/** /*
* setattr_vfsuid - check and set ia_fsuid attribute
* @kuid: new inode owner
*
* Check whether @kuid is valid and if so generate and set vfsuid_t in * Check whether @kuid is valid and if so generate and set vfsuid_t in
* ia_vfsuid. * ia_vfsuid.
* *
...@@ -718,10 +715,7 @@ static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid) ...@@ -718,10 +715,7 @@ static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
return true; return true;
} }
/** /*
* setattr_vfsgid - check and set ia_fsgid attribute
* @kgid: new inode owner
*
* Check whether @kgid is valid and if so generate and set vfsgid_t in * Check whether @kgid is valid and if so generate and set vfsgid_t in
* ia_vfsgid. * ia_vfsgid.
* *
...@@ -989,7 +983,6 @@ static int do_dentry_open(struct file *f, ...@@ -989,7 +983,6 @@ static int do_dentry_open(struct file *f,
* @file: file pointer * @file: file pointer
* @dentry: pointer to dentry * @dentry: pointer to dentry
* @open: open callback * @open: open callback
* @opened: state of open
* *
* This can be used to finish opening a file passed to i_op->atomic_open(). * This can be used to finish opening a file passed to i_op->atomic_open().
* *
...@@ -1043,7 +1036,6 @@ EXPORT_SYMBOL(file_path); ...@@ -1043,7 +1036,6 @@ EXPORT_SYMBOL(file_path);
* vfs_open - open the file at the given path * vfs_open - open the file at the given path
* @path: path to open * @path: path to open
* @file: newly allocated file with f_flag initialized * @file: newly allocated file with f_flag initialized
* @cred: credentials to use
*/ */
int vfs_open(const struct path *path, struct file *file) int vfs_open(const struct path *path, struct file *file)
{ {
...@@ -1156,7 +1148,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) ...@@ -1156,7 +1148,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
inline int build_open_flags(const struct open_how *how, struct open_flags *op) inline int build_open_flags(const struct open_how *how, struct open_flags *op)
{ {
u64 flags = how->flags; u64 flags = how->flags;
u64 strip = FMODE_NONOTIFY | O_CLOEXEC; u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
int lookup_flags = 0; int lookup_flags = 0;
int acc_mode = ACC_MODE(flags); int acc_mode = ACC_MODE(flags);
......
...@@ -35,7 +35,7 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode) ...@@ -35,7 +35,7 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
} }
/* No atime modification nor notify on underlying */ /* No atime modification nor notify on underlying */
#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY) #define OVL_OPEN_FLAGS (O_NOATIME | __FMODE_NONOTIFY)
static struct file *ovl_open_realfile(const struct file *file, static struct file *ovl_open_realfile(const struct file *file,
const struct path *realpath) const struct path *realpath)
......
...@@ -131,7 +131,7 @@ struct old_linux_dirent { ...@@ -131,7 +131,7 @@ struct old_linux_dirent {
unsigned long d_ino; unsigned long d_ino;
unsigned long d_offset; unsigned long d_offset;
unsigned short d_namlen; unsigned short d_namlen;
char d_name[1]; char d_name[];
}; };
struct readdir_callback { struct readdir_callback {
...@@ -208,7 +208,7 @@ struct linux_dirent { ...@@ -208,7 +208,7 @@ struct linux_dirent {
unsigned long d_ino; unsigned long d_ino;
unsigned long d_off; unsigned long d_off;
unsigned short d_reclen; unsigned short d_reclen;
char d_name[1]; char d_name[];
}; };
struct getdents_callback { struct getdents_callback {
...@@ -388,7 +388,7 @@ struct compat_old_linux_dirent { ...@@ -388,7 +388,7 @@ struct compat_old_linux_dirent {
compat_ulong_t d_ino; compat_ulong_t d_ino;
compat_ulong_t d_offset; compat_ulong_t d_offset;
unsigned short d_namlen; unsigned short d_namlen;
char d_name[1]; char d_name[];
}; };
struct compat_readdir_callback { struct compat_readdir_callback {
...@@ -460,7 +460,7 @@ struct compat_linux_dirent { ...@@ -460,7 +460,7 @@ struct compat_linux_dirent {
compat_ulong_t d_ino; compat_ulong_t d_ino;
compat_ulong_t d_off; compat_ulong_t d_off;
unsigned short d_reclen; unsigned short d_reclen;
char d_name[1]; char d_name[];
}; };
struct compat_getdents_callback { struct compat_getdents_callback {
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/dax.h> #include <linux/dax.h>
#include <linux/overflow.h>
#include "internal.h" #include "internal.h"
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -101,10 +102,12 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, ...@@ -101,10 +102,12 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
static int remap_verify_area(struct file *file, loff_t pos, loff_t len, static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
bool write) bool write)
{ {
loff_t tmp;
if (unlikely(pos < 0 || len < 0)) if (unlikely(pos < 0 || len < 0))
return -EINVAL; return -EINVAL;
if (unlikely((loff_t) (pos + len) < 0)) if (unlikely(check_add_overflow(pos, len, &tmp)))
return -EINVAL; return -EINVAL;
return security_file_permission(file, write ? MAY_WRITE : MAY_READ); return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
......
...@@ -595,7 +595,7 @@ struct super_block *sget_fc(struct fs_context *fc, ...@@ -595,7 +595,7 @@ struct super_block *sget_fc(struct fs_context *fc,
fc->s_fs_info = NULL; fc->s_fs_info = NULL;
s->s_type = fc->fs_type; s->s_type = fc->fs_type;
s->s_iflags |= fc->s_iflags; s->s_iflags |= fc->s_iflags;
strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id)); strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
list_add_tail(&s->s_list, &super_blocks); list_add_tail(&s->s_list, &super_blocks);
hlist_add_head(&s->s_instances, &s->s_type->fs_supers); hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
...@@ -674,7 +674,7 @@ struct super_block *sget(struct file_system_type *type, ...@@ -674,7 +674,7 @@ struct super_block *sget(struct file_system_type *type,
return ERR_PTR(err); return ERR_PTR(err);
} }
s->s_type = type; s->s_type = type;
strlcpy(s->s_id, type->name, sizeof(s->s_id)); strscpy(s->s_id, type->name, sizeof(s->s_id));
list_add_tail(&s->s_list, &super_blocks); list_add_tail(&s->s_list, &super_blocks);
hlist_add_head(&s->s_instances, &type->fs_supers); hlist_add_head(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
...@@ -903,6 +903,7 @@ int reconfigure_super(struct fs_context *fc) ...@@ -903,6 +903,7 @@ int reconfigure_super(struct fs_context *fc)
struct super_block *sb = fc->root->d_sb; struct super_block *sb = fc->root->d_sb;
int retval; int retval;
bool remount_ro = false; bool remount_ro = false;
bool remount_rw = false;
bool force = fc->sb_flags & SB_FORCE; bool force = fc->sb_flags & SB_FORCE;
if (fc->sb_flags_mask & ~MS_RMT_MASK) if (fc->sb_flags_mask & ~MS_RMT_MASK)
...@@ -920,7 +921,7 @@ int reconfigure_super(struct fs_context *fc) ...@@ -920,7 +921,7 @@ int reconfigure_super(struct fs_context *fc)
bdev_read_only(sb->s_bdev)) bdev_read_only(sb->s_bdev))
return -EACCES; return -EACCES;
#endif #endif
remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb);
remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
} }
...@@ -943,13 +944,18 @@ int reconfigure_super(struct fs_context *fc) ...@@ -943,13 +944,18 @@ int reconfigure_super(struct fs_context *fc)
*/ */
if (remount_ro) { if (remount_ro) {
if (force) { if (force) {
sb->s_readonly_remount = 1; sb_start_ro_state_change(sb);
smp_wmb();
} else { } else {
retval = sb_prepare_remount_readonly(sb); retval = sb_prepare_remount_readonly(sb);
if (retval) if (retval)
return retval; return retval;
} }
} else if (remount_rw) {
/*
* Protect filesystem's reconfigure code from writes from
* userspace until reconfigure finishes.
*/
sb_start_ro_state_change(sb);
} }
if (fc->ops->reconfigure) { if (fc->ops->reconfigure) {
...@@ -965,9 +971,7 @@ int reconfigure_super(struct fs_context *fc) ...@@ -965,9 +971,7 @@ int reconfigure_super(struct fs_context *fc)
WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
(fc->sb_flags & fc->sb_flags_mask))); (fc->sb_flags & fc->sb_flags_mask)));
/* Needs to be ordered wrt mnt_is_readonly() */ sb_end_ro_state_change(sb);
smp_wmb();
sb->s_readonly_remount = 0;
/* /*
* Some filesystems modify their metadata via some other path than the * Some filesystems modify their metadata via some other path than the
...@@ -982,7 +986,7 @@ int reconfigure_super(struct fs_context *fc) ...@@ -982,7 +986,7 @@ int reconfigure_super(struct fs_context *fc)
return 0; return 0;
cancel_readonly: cancel_readonly:
sb->s_readonly_remount = 0; sb_end_ro_state_change(sb);
return retval; return retval;
} }
......
...@@ -52,7 +52,7 @@ static int sysv_handle_dirsync(struct inode *dir) ...@@ -52,7 +52,7 @@ static int sysv_handle_dirsync(struct inode *dir)
} }
/* /*
* Calls to dir_get_page()/put_and_unmap_page() must be nested according to the * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
* rules documented in mm/highmem.rst. * rules documented in mm/highmem.rst.
* *
* NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_page() * NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_page()
...@@ -103,11 +103,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx) ...@@ -103,11 +103,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN), if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN),
fs16_to_cpu(SYSV_SB(sb), de->inode), fs16_to_cpu(SYSV_SB(sb), de->inode),
DT_UNKNOWN)) { DT_UNKNOWN)) {
put_and_unmap_page(page, kaddr); unmap_and_put_page(page, kaddr);
return 0; return 0;
} }
} }
put_and_unmap_page(page, kaddr); unmap_and_put_page(page, kaddr);
} }
return 0; return 0;
} }
...@@ -131,7 +131,7 @@ static inline int namecompare(int len, int maxlen, ...@@ -131,7 +131,7 @@ static inline int namecompare(int len, int maxlen,
* itself (as a parameter - res_dir). It does NOT read the inode of the * itself (as a parameter - res_dir). It does NOT read the inode of the
* entry - you'll have to do that yourself if you want to. * entry - you'll have to do that yourself if you want to.
* *
* On Success put_and_unmap_page() should be called on *res_page. * On Success unmap_and_put_page() should be called on *res_page.
* *
* sysv_find_entry() acts as a call to dir_get_page() and must be treated * sysv_find_entry() acts as a call to dir_get_page() and must be treated
* accordingly for nesting purposes. * accordingly for nesting purposes.
...@@ -166,7 +166,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_ ...@@ -166,7 +166,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
name, de->name)) name, de->name))
goto found; goto found;
} }
put_and_unmap_page(page, kaddr); unmap_and_put_page(page, kaddr);
} }
if (++n >= npages) if (++n >= npages)
...@@ -209,7 +209,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode) ...@@ -209,7 +209,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
goto out_page; goto out_page;
de++; de++;
} }
put_and_unmap_page(page, kaddr); unmap_and_put_page(page, kaddr);
} }
BUG(); BUG();
return -EINVAL; return -EINVAL;
...@@ -228,7 +228,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode) ...@@ -228,7 +228,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
mark_inode_dirty(dir); mark_inode_dirty(dir);
err = sysv_handle_dirsync(dir); err = sysv_handle_dirsync(dir);
out_page: out_page:
put_and_unmap_page(page, kaddr); unmap_and_put_page(page, kaddr);
return err; return err;
out_unlock: out_unlock:
unlock_page(page); unlock_page(page);
...@@ -321,12 +321,12 @@ int sysv_empty_dir(struct inode * inode) ...@@ -321,12 +321,12 @@ int sysv_empty_dir(struct inode * inode)
if (de->name[1] != '.' || de->name[2]) if (de->name[1] != '.' || de->name[2])
goto not_empty; goto not_empty;
} }
put_and_unmap_page(page, kaddr); unmap_and_put_page(page, kaddr);
} }
return 1; return 1;
not_empty: not_empty:
put_and_unmap_page(page, kaddr); unmap_and_put_page(page, kaddr);
return 0; return 0;
} }
...@@ -352,7 +352,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page, ...@@ -352,7 +352,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page,
} }
/* /*
* Calls to dir_get_page()/put_and_unmap_page() must be nested according to the * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
* rules documented in mm/highmem.rst. * rules documented in mm/highmem.rst.
* *
* sysv_dotdot() acts as a call to dir_get_page() and must be treated * sysv_dotdot() acts as a call to dir_get_page() and must be treated
...@@ -376,7 +376,7 @@ ino_t sysv_inode_by_name(struct dentry *dentry) ...@@ -376,7 +376,7 @@ ino_t sysv_inode_by_name(struct dentry *dentry)
if (de) { if (de) {
res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode); res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode);
put_and_unmap_page(page, de); unmap_and_put_page(page, de);
} }
return res; return res;
} }
...@@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode, ...@@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode,
*/ */
parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key); parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key);
bh = sb_getblk(inode->i_sb, parent); bh = sb_getblk(inode->i_sb, parent);
if (!bh) {
sysv_free_block(inode->i_sb, branch[n].key);
break;
}
lock_buffer(bh); lock_buffer(bh);
memset(bh->b_data, 0, blocksize); memset(bh->b_data, 0, blocksize);
branch[n].bh = bh; branch[n].bh = bh;
......
...@@ -164,7 +164,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry) ...@@ -164,7 +164,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
inode->i_ctime = dir->i_ctime; inode->i_ctime = dir->i_ctime;
inode_dec_link_count(inode); inode_dec_link_count(inode);
} }
put_and_unmap_page(page, de); unmap_and_put_page(page, de);
return err; return err;
} }
...@@ -227,7 +227,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir, ...@@ -227,7 +227,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (!new_de) if (!new_de)
goto out_dir; goto out_dir;
err = sysv_set_link(new_de, new_page, old_inode); err = sysv_set_link(new_de, new_page, old_inode);
put_and_unmap_page(new_page, new_de); unmap_and_put_page(new_page, new_de);
if (err) if (err)
goto out_dir; goto out_dir;
new_inode->i_ctime = current_time(new_inode); new_inode->i_ctime = current_time(new_inode);
...@@ -256,9 +256,9 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir, ...@@ -256,9 +256,9 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
out_dir: out_dir:
if (dir_de) if (dir_de)
put_and_unmap_page(dir_page, dir_de); unmap_and_put_page(dir_page, dir_de);
out_old: out_old:
put_and_unmap_page(old_page, old_de); unmap_and_put_page(old_page, old_de);
out: out:
return err; return err;
} }
......
...@@ -9,12 +9,12 @@ ...@@ -9,12 +9,12 @@
#ifndef _LINUX_EVENTFD_H #ifndef _LINUX_EVENTFD_H
#define _LINUX_EVENTFD_H #define _LINUX_EVENTFD_H
#include <linux/fcntl.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/percpu-defs.h> #include <linux/percpu-defs.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <uapi/linux/eventfd.h>
/* /*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
...@@ -23,10 +23,6 @@ ...@@ -23,10 +23,6 @@
* from eventfd, in order to leave a free define-space for * from eventfd, in order to leave a free define-space for
* shared O_* flags. * shared O_* flags.
*/ */
#define EFD_SEMAPHORE (1 << 0)
#define EFD_CLOEXEC O_CLOEXEC
#define EFD_NONBLOCK O_NONBLOCK
#define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
...@@ -40,7 +36,7 @@ struct file *eventfd_fget(int fd); ...@@ -40,7 +36,7 @@ struct file *eventfd_fget(int fd);
struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd);
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask); __u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask);
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt); __u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
......
...@@ -956,29 +956,35 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) ...@@ -956,29 +956,35 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
index < ra->start + ra->size); index < ra->start + ra->size);
} }
/*
* f_{lock,count,pos_lock} members can be highly contended and share
* the same cacheline. f_{lock,mode} are very frequently used together
* and so share the same cacheline as well. The read-mostly
* f_{path,inode,op} are kept on a separate cacheline.
*/
struct file { struct file {
union { union {
struct llist_node f_llist; struct llist_node f_llist;
struct rcu_head f_rcuhead; struct rcu_head f_rcuhead;
unsigned int f_iocb_flags; unsigned int f_iocb_flags;
}; };
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/* /*
* Protects f_ep, f_flags. * Protects f_ep, f_flags.
* Must not be taken from IRQ context. * Must not be taken from IRQ context.
*/ */
spinlock_t f_lock; spinlock_t f_lock;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode; fmode_t f_mode;
atomic_long_t f_count;
struct mutex f_pos_lock; struct mutex f_pos_lock;
loff_t f_pos; loff_t f_pos;
unsigned int f_flags;
struct fown_struct f_owner; struct fown_struct f_owner;
const struct cred *f_cred; const struct cred *f_cred;
struct file_ra_state f_ra; struct file_ra_state f_ra;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
u64 f_version; u64 f_version;
#ifdef CONFIG_SECURITY #ifdef CONFIG_SECURITY
...@@ -1242,7 +1248,7 @@ struct super_block { ...@@ -1242,7 +1248,7 @@ struct super_block {
*/ */
atomic_long_t s_fsnotify_connectors; atomic_long_t s_fsnotify_connectors;
/* Being remounted read-only */ /* Read-only state of the superblock is being changed */
int s_readonly_remount; int s_readonly_remount;
/* per-sb errseq_t for reporting writeback errors via syncfs */ /* per-sb errseq_t for reporting writeback errors via syncfs */
...@@ -2669,7 +2675,7 @@ extern void evict_inodes(struct super_block *sb); ...@@ -2669,7 +2675,7 @@ extern void evict_inodes(struct super_block *sb);
void dump_mapping(const struct address_space *); void dump_mapping(const struct address_space *);
/* /*
* Userspace may rely on the the inode number being non-zero. For example, glibc * Userspace may rely on the inode number being non-zero. For example, glibc
* simply ignores files with zero i_ino in unlink() and other places. * simply ignores files with zero i_ino in unlink() and other places.
* *
* As an additional complication, if userspace was compiled with * As an additional complication, if userspace was compiled with
......
...@@ -507,7 +507,7 @@ static inline void folio_zero_range(struct folio *folio, ...@@ -507,7 +507,7 @@ static inline void folio_zero_range(struct folio *folio,
zero_user_segments(&folio->page, start, start + length, 0, 0); zero_user_segments(&folio->page, start, start + length, 0, 0);
} }
static inline void put_and_unmap_page(struct page *page, void *addr) static inline void unmap_and_put_page(struct page *page, void *addr)
{ {
kunmap_local(addr); kunmap_local(addr);
put_page(page); put_page(page);
......
...@@ -261,18 +261,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); ...@@ -261,18 +261,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
extern const struct pipe_buf_operations nosteal_pipe_buf_ops; extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
#ifdef CONFIG_WATCH_QUEUE
unsigned long account_pipe_buffers(struct user_struct *user, unsigned long account_pipe_buffers(struct user_struct *user,
unsigned long old, unsigned long new); unsigned long old, unsigned long new);
bool too_many_pipe_buffers_soft(unsigned long user_bufs); bool too_many_pipe_buffers_soft(unsigned long user_bufs);
bool too_many_pipe_buffers_hard(unsigned long user_bufs); bool too_many_pipe_buffers_hard(unsigned long user_bufs);
bool pipe_is_unprivileged_user(void); bool pipe_is_unprivileged_user(void);
#endif
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */ /* for F_SETPIPE_SZ and F_GETPIPE_SZ */
#ifdef CONFIG_WATCH_QUEUE
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
#endif
long pipe_fcntl(struct file *, unsigned int, unsigned long arg); long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
......
...@@ -158,6 +158,8 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, ...@@ -158,6 +158,8 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task); struct pid *pid, struct task_struct *task);
#endif /* CONFIG_PROC_PID_ARCH_STATUS */ #endif /* CONFIG_PROC_PID_ARCH_STATUS */
void arch_report_meminfo(struct seq_file *m);
#else /* CONFIG_PROC_FS */ #else /* CONFIG_PROC_FS */
static inline void proc_root_init(void) static inline void proc_root_init(void)
......
...@@ -38,7 +38,7 @@ struct watch_filter { ...@@ -38,7 +38,7 @@ struct watch_filter {
struct watch_queue { struct watch_queue {
struct rcu_head rcu; struct rcu_head rcu;
struct watch_filter __rcu *filter; struct watch_filter __rcu *filter;
struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */ struct pipe_inode_info *pipe; /* Pipe we use as a buffer, NULL if queue closed */
struct hlist_head watches; /* Contributory watches */ struct hlist_head watches; /* Contributory watches */
struct page **notes; /* Preallocated notifications */ struct page **notes; /* Preallocated notifications */
unsigned long *notes_bitmap; /* Allocation bitmap for notes */ unsigned long *notes_bitmap; /* Allocation bitmap for notes */
...@@ -46,7 +46,6 @@ struct watch_queue { ...@@ -46,7 +46,6 @@ struct watch_queue {
spinlock_t lock; spinlock_t lock;
unsigned int nr_notes; /* Number of notes */ unsigned int nr_notes; /* Number of notes */
unsigned int nr_pages; /* Number of pages in notes[] */ unsigned int nr_pages; /* Number of pages in notes[] */
bool defunct; /* T when queues closed */
}; };
/* /*
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_EVENTFD_H
#define _UAPI_LINUX_EVENTFD_H
#include <linux/fcntl.h>
#define EFD_SEMAPHORE (1 << 0)
#define EFD_CLOEXEC O_CLOEXEC
#define EFD_NONBLOCK O_NONBLOCK
#endif /* _UAPI_LINUX_EVENTFD_H */
...@@ -338,7 +338,7 @@ __setup("rootfstype=", fs_names_setup); ...@@ -338,7 +338,7 @@ __setup("rootfstype=", fs_names_setup);
__setup("rootdelay=", root_delay_setup); __setup("rootdelay=", root_delay_setup);
/* This can return zero length strings. Caller should check */ /* This can return zero length strings. Caller should check */
static int __init split_fs_names(char *page, size_t size, char *names) static int __init split_fs_names(char *page, size_t size)
{ {
int count = 1; int count = 1;
char *p = page; char *p = page;
...@@ -402,7 +402,7 @@ void __init mount_block_root(char *name, int flags) ...@@ -402,7 +402,7 @@ void __init mount_block_root(char *name, int flags)
scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)", scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
if (root_fs_names) if (root_fs_names)
num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names); num_fs = split_fs_names(fs_names, PAGE_SIZE);
else else
num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE); num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
retry: retry:
...@@ -545,7 +545,7 @@ static int __init mount_nodev_root(void) ...@@ -545,7 +545,7 @@ static int __init mount_nodev_root(void)
fs_names = (void *)__get_free_page(GFP_KERNEL); fs_names = (void *)__get_free_page(GFP_KERNEL);
if (!fs_names) if (!fs_names)
return -EINVAL; return -EINVAL;
num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names); num_fs = split_fs_names(fs_names, PAGE_SIZE);
for (i = 0, fstype = fs_names; i < num_fs; for (i = 0, fstype = fs_names; i < num_fs;
i++, fstype += strlen(fstype) + 1) { i++, fstype += strlen(fstype) + 1) {
......
...@@ -42,7 +42,7 @@ MODULE_AUTHOR("Red Hat, Inc."); ...@@ -42,7 +42,7 @@ MODULE_AUTHOR("Red Hat, Inc.");
static inline bool lock_wqueue(struct watch_queue *wqueue) static inline bool lock_wqueue(struct watch_queue *wqueue)
{ {
spin_lock_bh(&wqueue->lock); spin_lock_bh(&wqueue->lock);
if (unlikely(wqueue->defunct)) { if (unlikely(!wqueue->pipe)) {
spin_unlock_bh(&wqueue->lock); spin_unlock_bh(&wqueue->lock);
return false; return false;
} }
...@@ -104,9 +104,6 @@ static bool post_one_notification(struct watch_queue *wqueue, ...@@ -104,9 +104,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
unsigned int head, tail, mask, note, offset, len; unsigned int head, tail, mask, note, offset, len;
bool done = false; bool done = false;
if (!pipe)
return false;
spin_lock_irq(&pipe->rd_wait.lock); spin_lock_irq(&pipe->rd_wait.lock);
mask = pipe->ring_size - 1; mask = pipe->ring_size - 1;
...@@ -603,8 +600,11 @@ void watch_queue_clear(struct watch_queue *wqueue) ...@@ -603,8 +600,11 @@ void watch_queue_clear(struct watch_queue *wqueue)
rcu_read_lock(); rcu_read_lock();
spin_lock_bh(&wqueue->lock); spin_lock_bh(&wqueue->lock);
/* Prevent new notifications from being stored. */ /*
wqueue->defunct = true; * This pipe can be freed by callers like free_pipe_info().
* Removing this reference also prevents new notifications.
*/
wqueue->pipe = NULL;
while (!hlist_empty(&wqueue->watches)) { while (!hlist_empty(&wqueue->watches)) {
watch = hlist_entry(wqueue->watches.first, struct watch, queue_node); watch = hlist_entry(wqueue->watches.first, struct watch, queue_node);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment