Commit 888d3c9f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'sysctl-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux

Pull sysctl updates from Luis Chamberlain:
 "This only does a few sysctl moves from the kernel/sysctl.c file, the
  rest of the work has been put towards deprecating two API calls which
  incur recursion and prevent us from simplifying the registration
  process / saving memory per move. Most of the changes have been
  soaking on linux-next since v6.3-rc3.

  I've slowed down the kernel/sysctl.c moves due to Matthew Wilcox's
  feedback that we should see if we could *save* memory with these moves
  instead of incurring more memory. We currently incur more memory since
  when we move a syctl from kernel/sysclt.c out to its own file we end
  up having to add a new empty sysctl used to register it. To achieve
  saving memory we want to allow syctls to be passed without requiring
  the end element being empty, and just have our registration process
  rely on ARRAY_SIZE(). Without this, supporting both styles of sysctls
  would make the sysctl registration pretty brittle, hard to read and
  maintain as can be seen from Meng Tang's efforts to do just this [0].
  Fortunately, in order to use ARRAY_SIZE() for all sysctl registrations
  also implies doing the work to deprecate two API calls which use
  recursion in order to support sysctl declarations with subdirectories.

  And so during this development cycle quite a bit of effort went into
  this deprecation effort. I've annotated the following two APIs are
  deprecated and in few kernel releases we should be good to remove
  them:

   - register_sysctl_table()
   - register_sysctl_paths()

  During this merge window we should be able to deprecate and unexport
  register_sysctl_paths(), we can probably do that towards the end of
  this merge window.

  Deprecating register_sysctl_table() will take a bit more time but this
  pull request goes with a few example of how to do this.

  As it turns out each of the conversions to move away from either of
  these two API calls *also* saves memory. And so long term, all these
  changes *will* prove to have saved a bit of memory on boot.

  The way I see it then is if remove a user of one deprecated call, it
  gives us enough savings to move one kernel/sysctl.c out from the
  generic arrays as we end up with about the same amount of bytes.

  Since deprecating register_sysctl_table() and register_sysctl_paths()
  does not require maintainer coordination except the final unexport
  you'll see quite a bit of these changes from other pull requests, I've
  just kept the stragglers after rc3"

Link: https://lkml.kernel.org/r/ZAD+cpbrqlc5vmry@bombadil.infradead.org [0]

* tag 'sysctl-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux: (29 commits)
  fs: fix sysctls.c built
  mm: compaction: remove incorrect #ifdef checks
  mm: compaction: move compaction sysctl to its own file
  mm: memory-failure: Move memory failure sysctls to its own file
  arm: simplify two-level sysctl registration for ctl_isa_vars
  ia64: simplify one-level sysctl registration for kdump_ctl_table
  utsname: simplify one-level sysctl registration for uts_kern_table
  ntfs: simplfy one-level sysctl registration for ntfs_sysctls
  coda: simplify one-level sysctl registration for coda_table
  fs/cachefiles: simplify one-level sysctl registration for cachefiles_sysctls
  xfs: simplify two-level sysctl registration for xfs_table
  nfs: simplify two-level sysctl registration for nfs_cb_sysctls
  nfs: simplify two-level sysctl registration for nfs4_cb_sysctls
  lockd: simplify two-level sysctl registration for nlm_sysctls
  proc_sysctl: enhance documentation
  xen: simplify sysctl registration for balloon
  md: simplify sysctl registration
  hv: simplify sysctl registration
  scsi: simplify sysctl registration with register_sysctl()
  csky: simplify alignment sysctl registration
  ...
parents b6a78285 e3184de9
......@@ -40,27 +40,11 @@ static struct ctl_table ctl_isa_vars[4] = {
static struct ctl_table_header *isa_sysctl_header;
static struct ctl_table ctl_isa[2] = {
{
.procname = "isa",
.mode = 0555,
.child = ctl_isa_vars,
}, {}
};
static struct ctl_table ctl_bus[2] = {
{
.procname = "bus",
.mode = 0555,
.child = ctl_isa,
}, {}
};
void __init
register_isa_ports(unsigned int membase, unsigned int portbase, unsigned int portshift)
{
isa_membase = membase;
isa_portbase = portbase;
isa_portshift = portshift;
isa_sysctl_header = register_sysctl_table(ctl_bus);
isa_sysctl_header = register_sysctl("bus/isa", ctl_isa_vars);
}
......@@ -332,22 +332,9 @@ static struct ctl_table alignment_tbl[5] = {
{}
};
static struct ctl_table sysctl_table[2] = {
{
.procname = "csky_alignment",
.mode = 0555,
.child = alignment_tbl},
{}
};
static struct ctl_path sysctl_path[2] = {
{.procname = "csky"},
{}
};
static int __init csky_alignment_init(void)
{
register_sysctl_paths(sysctl_path, sysctl_table);
register_sysctl_init("csky/csky_alignment", alignment_tbl);
return 0;
}
......
......@@ -234,15 +234,6 @@ static struct ctl_table kdump_ctl_table[] = {
},
{ }
};
static struct ctl_table sys_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = kdump_ctl_table,
},
{ }
};
#endif
static int
......@@ -257,7 +248,7 @@ machine_crash_setup(void)
if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
return ret;
#ifdef CONFIG_SYSCTL
register_sysctl_table(sys_table);
register_sysctl("kernel", kdump_ctl_table);
#endif
return 0;
}
......
......@@ -1460,15 +1460,6 @@ static struct ctl_table hv_ctl_table[] = {
{}
};
static struct ctl_table hv_root_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = hv_ctl_table
},
{}
};
/*
* vmbus_bus_init -Main vmbus driver initialization routine.
*
......@@ -1547,7 +1538,7 @@ static int vmbus_bus_init(void)
* message recording won't be available in isolated
* guests should the following registration fail.
*/
hv_ctl_table_hdr = register_sysctl_table(hv_root_table);
hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
if (!hv_ctl_table_hdr)
pr_err("Hyper-V: sysctl table register error");
......
......@@ -322,26 +322,6 @@ static struct ctl_table raid_table[] = {
{ }
};
static struct ctl_table raid_dir_table[] = {
{
.procname = "raid",
.maxlen = 0,
.mode = S_IRUGO|S_IXUGO,
.child = raid_table,
},
{ }
};
static struct ctl_table raid_root_table[] = {
{
.procname = "dev",
.maxlen = 0,
.mode = 0555,
.child = raid_dir_table,
},
{ }
};
static int start_readonly;
/*
......@@ -9653,7 +9633,7 @@ static int __init md_init(void)
mdp_major = ret;
register_reboot_notifier(&md_notifier);
raid_table_header = register_sysctl_table(raid_root_table);
raid_table_header = register_sysctl("dev/raid", raid_table);
md_geninit();
return 0;
......
......@@ -21,25 +21,11 @@ static struct ctl_table scsi_table[] = {
{ }
};
static struct ctl_table scsi_dir_table[] = {
{ .procname = "scsi",
.mode = 0555,
.child = scsi_table },
{ }
};
static struct ctl_table scsi_root_table[] = {
{ .procname = "dev",
.mode = 0555,
.child = scsi_dir_table },
{ }
};
static struct ctl_table_header *scsi_table_header;
int __init scsi_init_sysctl(void)
{
scsi_table_header = register_sysctl_table(scsi_root_table);
scsi_table_header = register_sysctl("dev/scsi", scsi_table);
if (!scsi_table_header)
return -ENOMEM;
return 0;
......
......@@ -97,24 +97,6 @@ static struct ctl_table balloon_table[] = {
{ }
};
static struct ctl_table balloon_root[] = {
{
.procname = "balloon",
.mode = 0555,
.child = balloon_table,
},
{ }
};
static struct ctl_table xen_root[] = {
{
.procname = "xen",
.mode = 0555,
.child = balloon_root,
},
{ }
};
#else
#define xen_hotplug_unpopulated 0
#endif
......@@ -747,7 +729,7 @@ static int __init balloon_init(void)
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
set_online_page_callback(&xen_online_page);
register_memory_notifier(&xen_memory_nb);
register_sysctl_table(xen_root);
register_sysctl_init("xen/balloon", balloon_table);
#endif
balloon_add_regions();
......
......@@ -6,7 +6,6 @@
# Rewritten to use lists instead of if-statements.
#
obj-$(CONFIG_SYSCTL) += sysctls.o
obj-y := open.o read_write.o file_table.o super.o \
char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
......@@ -50,7 +49,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_COREDUMP) += coredump.o
obj-$(CONFIG_SYSCTL) += drop_caches.o
obj-$(CONFIG_SYSCTL) += drop_caches.o sysctls.o
obj-$(CONFIG_FHANDLE) += fhandle.o
obj-y += iomap/
......
......@@ -22,18 +22,9 @@ static struct ctl_table cachefiles_sysctls[] = {
{}
};
static struct ctl_table cachefiles_sysctls_root[] = {
{
.procname = "cachefiles",
.mode = 0555,
.child = cachefiles_sysctls,
},
{}
};
int __init cachefiles_register_error_injection(void)
{
cachefiles_sysctl = register_sysctl_table(cachefiles_sysctls_root);
cachefiles_sysctl = register_sysctl("cachefiles", cachefiles_sysctls);
if (!cachefiles_sysctl)
return -ENOMEM;
return 0;
......
......@@ -39,19 +39,10 @@ static struct ctl_table coda_table[] = {
{}
};
static struct ctl_table fs_table[] = {
{
.procname = "coda",
.mode = 0555,
.child = coda_table
},
{}
};
void coda_sysctl_init(void)
{
if ( !fs_table_header )
fs_table_header = register_sysctl_table(fs_table);
fs_table_header = register_sysctl("coda", coda_table);
}
void coda_sysctl_clean(void)
......
......@@ -510,24 +510,6 @@ static struct ctl_table nlm_sysctls[] = {
{ }
};
static struct ctl_table nlm_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
.child = nlm_sysctls,
},
{ }
};
static struct ctl_table nlm_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
.child = nlm_sysctl_dir,
},
{ }
};
#endif /* CONFIG_SYSCTL */
/*
......@@ -644,7 +626,7 @@ static int __init init_nlm(void)
#ifdef CONFIG_SYSCTL
err = -ENOMEM;
nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
nlm_sysctl_table = register_sysctl("fs/nfs", nlm_sysctls);
if (nlm_sysctl_table == NULL)
goto err_sysctl;
#endif
......
......@@ -37,27 +37,10 @@ static struct ctl_table nfs4_cb_sysctls[] = {
{ }
};
static struct ctl_table nfs4_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
.child = nfs4_cb_sysctls,
},
{ }
};
static struct ctl_table nfs4_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
.child = nfs4_cb_sysctl_dir,
},
{ }
};
int nfs4_register_sysctl(void)
{
nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root);
nfs4_callback_sysctl_table = register_sysctl("fs/nfs",
nfs4_cb_sysctls);
if (nfs4_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
......
......@@ -32,27 +32,9 @@ static struct ctl_table nfs_cb_sysctls[] = {
{ }
};
static struct ctl_table nfs_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
.child = nfs_cb_sysctls,
},
{ }
};
static struct ctl_table nfs_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
.child = nfs_cb_sysctl_dir,
},
{ }
};
int nfs_register_sysctl(void)
{
nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root);
nfs_callback_sysctl_table = register_sysctl("fs/nfs", nfs_cb_sysctls);
if (nfs_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
......
......@@ -31,16 +31,6 @@ static struct ctl_table ntfs_sysctls[] = {
{}
};
/* Define the parent directory /proc/sys/fs. */
static struct ctl_table sysctls_root[] = {
{
.procname = "fs",
.mode = 0555,
.child = ntfs_sysctls
},
{}
};
/* Storage for the sysctls header. */
static struct ctl_table_header *sysctls_root_table;
......@@ -54,7 +44,7 @@ int ntfs_sysctl(int add)
{
if (add) {
BUG_ON(sysctls_root_table);
sysctls_root_table = register_sysctl_table(sysctls_root);
sysctls_root_table = register_sysctl("fs", ntfs_sysctls);
if (!sysctls_root_table)
return -ENOMEM;
} else {
......
......@@ -1283,11 +1283,43 @@ static int insert_links(struct ctl_table_header *head)
return err;
}
/* Find the directory for the ctl_table. If one is not found create it. */
static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path)
{
const char *name, *nextname;
for (name = path; name; name = nextname) {
int namelen;
nextname = strchr(name, '/');
if (nextname) {
namelen = nextname - name;
nextname++;
} else {
namelen = strlen(name);
}
if (namelen == 0)
continue;
/*
* namelen ensures if name is "foo/bar/yay" only foo is
* registered first. We traverse as if using mkdir -p and
* return a ctl_dir for the last directory entry.
*/
dir = get_subdir(dir, name, namelen);
if (IS_ERR(dir))
break;
}
return dir;
}
/**
* __register_sysctl_table - register a leaf sysctl table
* @set: Sysctl tree to register on
* @path: The path to the directory the sysctl table is in.
* @table: the top-level table structure
* @table: the top-level table structure without any child. This table
* should not be free'd after registration. So it should not be
* used on stack. It can either be a global or dynamically allocated
* by the caller and free'd later after sysctl unregistration.
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
......@@ -1308,9 +1340,12 @@ static int insert_links(struct ctl_table_header *head)
* proc_handler - the text handler routine (described below)
*
* extra1, extra2 - extra pointers usable by the proc handler routines
* XXX: we should eventually modify these to use long min / max [0]
* [0] https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
*
* Leaf nodes in the sysctl tree will be represented by a single file
* under /proc; non-leaf nodes will be represented by directories.
* under /proc; non-leaf nodes (where child is not NULL) are not allowed,
* sysctl_check_table() verifies this.
*
* There must be a proc_handler routine for any terminal nodes.
* Several default handlers are available to cover common cases -
......@@ -1331,7 +1366,6 @@ struct ctl_table_header *__register_sysctl_table(
{
struct ctl_table_root *root = set->dir.header.root;
struct ctl_table_header *header;
const char *name, *nextname;
struct ctl_dir *dir;
struct ctl_table *entry;
struct ctl_node *node;
......@@ -1352,28 +1386,13 @@ struct ctl_table_header *__register_sysctl_table(
spin_lock(&sysctl_lock);
dir = &set->dir;
/* Reference moved down the diretory tree get_subdir */
/* Reference moved down the directory tree get_subdir */
dir->header.nreg++;
spin_unlock(&sysctl_lock);
/* Find the directory for the ctl_table */
for (name = path; name; name = nextname) {
int namelen;
nextname = strchr(name, '/');
if (nextname) {
namelen = nextname - name;
nextname++;
} else {
namelen = strlen(name);
}
if (namelen == 0)
continue;
dir = get_subdir(dir, name, namelen);
if (IS_ERR(dir))
goto fail;
}
dir = sysctl_mkdir_p(dir, path);
if (IS_ERR(dir))
goto fail;
spin_lock(&sysctl_lock);
if (insert_header(dir, header))
goto fail_put_dir_locked;
......@@ -1394,8 +1413,15 @@ struct ctl_table_header *__register_sysctl_table(
/**
* register_sysctl - register a sysctl table
* @path: The path to the directory the sysctl table is in.
* @table: the table structure
* @path: The path to the directory the sysctl table is in. If the path
* doesn't exist we will create it for you.
* @table: the table structure. The calller must ensure the life of the @table
* will be kept during the lifetime use of the syctl. It must not be freed
* until unregister_sysctl_table() is called with the given returned table
* with this registration. If your code is non modular then you don't need
* to call unregister_sysctl_table() and can instead use something like
* register_sysctl_init() which does not care for the result of the syctl
* registration.
*
* Register a sysctl table. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
......@@ -1411,8 +1437,11 @@ EXPORT_SYMBOL(register_sysctl);
/**
* __register_sysctl_init() - register sysctl table to path
* @path: path name for sysctl base
* @table: This is the sysctl table that needs to be registered to the path
* @path: path name for sysctl base. If that path doesn't exist we will create
* it for you.
* @table: This is the sysctl table that needs to be registered to the path.
* The caller must ensure the life of the @table will be kept during the
* lifetime use of the sysctl.
* @table_name: The name of sysctl table, only used for log printing when
* registration fails
*
......@@ -1424,10 +1453,7 @@ EXPORT_SYMBOL(register_sysctl);
* register_sysctl() failing on init are extremely low, and so for both reasons
* this function does not return any error as it is used by initialization code.
*
* Context: Can only be called after your respective sysctl base path has been
* registered. So for instance, most base directories are registered early on
* init before init levels are processed through proc_sys_init() and
* sysctl_init_bases().
* Context: if your base directory does not exist it will be created for you.
*/
void __init __register_sysctl_init(const char *path, struct ctl_table *table,
const char *table_name)
......@@ -1557,6 +1583,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos,
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
* We are slowly deprecating this call so avoid its use.
*
* See __register_sysctl_table for more details.
*/
......@@ -1628,6 +1655,7 @@ struct ctl_table_header *__register_sysctl_paths(
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
* We are slowly deprecating this caller so avoid future uses of it.
*
* See __register_sysctl_paths for more details.
*/
......
......@@ -32,7 +32,22 @@
#include <linux/swapops.h>
#include <linux/miscdevice.h>
int sysctl_unprivileged_userfaultfd __read_mostly;
static int sysctl_unprivileged_userfaultfd __read_mostly;
#ifdef CONFIG_SYSCTL
static struct ctl_table vm_userfaultfd_table[] = {
{
.procname = "unprivileged_userfaultfd",
.data = &sysctl_unprivileged_userfaultfd,
.maxlen = sizeof(sysctl_unprivileged_userfaultfd),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{ }
};
#endif
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
......@@ -2180,6 +2195,9 @@ static int __init userfaultfd_init(void)
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
init_once_userfaultfd_ctx);
#ifdef CONFIG_SYSCTL
register_sysctl_init("vm", vm_userfaultfd_table);
#endif
return 0;
}
__initcall(userfaultfd_init);
......@@ -210,28 +210,10 @@ static struct ctl_table xfs_table[] = {
{}
};
static struct ctl_table xfs_dir_table[] = {
{
.procname = "xfs",
.mode = 0555,
.child = xfs_table
},
{}
};
static struct ctl_table xfs_root_table[] = {
{
.procname = "fs",
.mode = 0555,
.child = xfs_dir_table
},
{}
};
int
xfs_sysctl_register(void)
{
xfs_table_header = register_sysctl_table(xfs_root_table);
xfs_table_header = register_sysctl("fs/xfs", xfs_table);
if (!xfs_table_header)
return -ENOMEM;
return 0;
......
......@@ -81,13 +81,6 @@ static inline unsigned long compact_gap(unsigned int order)
}
#ifdef CONFIG_COMPACTION
extern unsigned int sysctl_compaction_proactiveness;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos);
extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table,
int write, void *buffer, size_t *length, loff_t *ppos);
extern int sysctl_extfrag_threshold;
extern int sysctl_compact_unevictable_allowed;
extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order);
extern int fragmentation_index(struct zone *zone, unsigned int order);
......
......@@ -124,14 +124,6 @@ void hugepage_put_subpool(struct hugepage_subpool *spool);
void hugetlb_dup_vma_private(struct vm_area_struct *vma);
void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int move_hugetlb_page_tables(struct vm_area_struct *vma,
struct vm_area_struct *new_vma,
unsigned long old_addr, unsigned long new_addr,
......
......@@ -3442,8 +3442,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
extern void shake_page(struct page *p);
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
......
......@@ -36,8 +36,6 @@
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
extern int sysctl_unprivileged_userfaultfd;
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
/*
......
......@@ -2368,12 +2368,6 @@ static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
return ret;
}
static struct ctl_path seccomp_sysctl_path[] = {
{ .procname = "kernel", },
{ .procname = "seccomp", },
{ }
};
static struct ctl_table seccomp_sysctl_table[] = {
{
.procname = "actions_avail",
......@@ -2392,14 +2386,7 @@ static struct ctl_table seccomp_sysctl_table[] = {
static int __init seccomp_sysctl_init(void)
{
struct ctl_table_header *hdr;
hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
if (!hdr)
pr_warn("sysctl registration failed\n");
else
kmemleak_not_leak(hdr);
register_sysctl_init("kernel/seccomp", seccomp_sysctl_table);
return 0;
}
......
......@@ -42,7 +42,6 @@
#include <linux/highuid.h>
#include <linux/writeback.h>
#include <linux/ratelimit.h>
#include <linux/compaction.h>
#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/key.h>
......@@ -746,27 +745,6 @@ int proc_dointvec(struct ctl_table *table, int write, void *buffer,
return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
}
#ifdef CONFIG_COMPACTION
static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
int write, void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, old;
if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
old = *(int *)table->data;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret)
return ret;
if (old != *(int *)table->data)
pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
table->procname, current->comm,
task_pid_nr(current));
return ret;
}
#endif
/**
* proc_douintvec - read a vector of unsigned integers
* @table: the sysctl table
......@@ -2140,38 +2118,6 @@ static struct ctl_table vm_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_HUGETLB_PAGE
{
.procname = "nr_hugepages",
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_sysctl_handler,
},
#ifdef CONFIG_NUMA
{
.procname = "nr_hugepages_mempolicy",
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &hugetlb_mempolicy_sysctl_handler,
},
#endif
{
.procname = "hugetlb_shm_group",
.data = &sysctl_hugetlb_shm_group,
.maxlen = sizeof(gid_t),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "nr_overcommit_hugepages",
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_overcommit_handler,
},
#endif
{
.procname = "lowmem_reserve_ratio",
......@@ -2189,43 +2135,6 @@ static struct ctl_table vm_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_FOUR,
},
#ifdef CONFIG_COMPACTION
{
.procname = "compact_memory",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0200,
.proc_handler = sysctl_compaction_handler,
},
{
.procname = "compaction_proactiveness",
.data = &sysctl_compaction_proactiveness,
.maxlen = sizeof(sysctl_compaction_proactiveness),
.mode = 0644,
.proc_handler = compaction_proactiveness_sysctl_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "extfrag_threshold",
.data = &sysctl_extfrag_threshold,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "compact_unevictable_allowed",
.data = &sysctl_compact_unevictable_allowed,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax_warn_RT_change,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif /* CONFIG_COMPACTION */
{
.procname = "min_free_kbytes",
.data = &min_free_kbytes,
......@@ -2382,26 +2291,6 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec,
.extra1 = SYSCTL_ZERO,
},
#endif
#ifdef CONFIG_MEMORY_FAILURE
{
.procname = "memory_failure_early_kill",
.data = &sysctl_memory_failure_early_kill,
.maxlen = sizeof(sysctl_memory_failure_early_kill),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{
.procname = "memory_failure_recovery",
.data = &sysctl_memory_failure_recovery,
.maxlen = sizeof(sysctl_memory_failure_recovery),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
{
.procname = "user_reserve_kbytes",
......@@ -2438,17 +2327,6 @@ static struct ctl_table vm_table[] = {
.extra1 = (void *)&mmap_rnd_compat_bits_min,
.extra2 = (void *)&mmap_rnd_compat_bits_max,
},
#endif
#ifdef CONFIG_USERFAULTFD
{
.procname = "unprivileged_userfaultfd",
.data = &sysctl_unprivileged_userfaultfd,
.maxlen = sizeof(sysctl_unprivileged_userfaultfd),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
{ }
};
......
......@@ -123,15 +123,6 @@ static struct ctl_table uts_kern_table[] = {
{}
};
static struct ctl_table uts_root_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = uts_kern_table,
},
{}
};
#ifdef CONFIG_PROC_SYSCTL
/*
* Notify userspace about a change in a certain entry of uts_kern_table,
......@@ -147,7 +138,7 @@ void uts_proc_notify(enum uts_proc proc)
static int __init utsname_sysctl_init(void)
{
register_sysctl_table(uts_root_table);
register_sysctl("kernel", uts_kern_table);
return 0;
}
......
......@@ -1716,7 +1716,14 @@ typedef enum {
* Allow userspace to control policy on scanning the unevictable LRU for
* compactable pages.
*/
int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT;
static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT;
/*
* Tunable for proactive compaction. It determines how
* aggressively the kernel should compact memory in the
* background. It takes values in the range [0, 100].
*/
static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
static int sysctl_extfrag_threshold = 500;
static inline void
update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
......@@ -2572,8 +2579,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
return ret;
}
int sysctl_extfrag_threshold = 500;
/**
* try_to_compact_pages - Direct compact to satisfy a high-order allocation
* @gfp_mask: The GFP mask of the current allocation
......@@ -2730,14 +2735,7 @@ static void compact_nodes(void)
compact_node(nid);
}
/*
* Tunable for proactive compaction. It determines how
* aggressively the kernel should compact memory in the
* background. It takes values in the range [0, 100].
*/
unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
static int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc, nid;
......@@ -2767,7 +2765,7 @@ int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
* This is the entry point for compacting all nodes via
* /proc/sys/vm/compact_memory
*/
int sysctl_compaction_handler(struct ctl_table *table, int write,
static int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
if (write)
......@@ -3063,6 +3061,63 @@ static int kcompactd_cpu_online(unsigned int cpu)
return 0;
}
static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
int write, void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, old;
if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
old = *(int *)table->data;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret)
return ret;
if (old != *(int *)table->data)
pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
table->procname, current->comm,
task_pid_nr(current));
return ret;
}
static struct ctl_table vm_compaction[] = {
{
.procname = "compact_memory",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0200,
.proc_handler = sysctl_compaction_handler,
},
{
.procname = "compaction_proactiveness",
.data = &sysctl_compaction_proactiveness,
.maxlen = sizeof(sysctl_compaction_proactiveness),
.mode = 0644,
.proc_handler = compaction_proactiveness_sysctl_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "extfrag_threshold",
.data = &sysctl_extfrag_threshold,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "compact_unevictable_allowed",
.data = &sysctl_compact_unevictable_allowed,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax_warn_RT_change,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{ }
};
static int __init kcompactd_init(void)
{
int nid;
......@@ -3078,6 +3133,7 @@ static int __init kcompactd_init(void)
for_each_node_state(nid, N_MEMORY)
kcompactd_run(nid);
register_sysctl_init("vm", vm_compaction);
return 0;
}
subsys_initcall(kcompactd_init)
......
......@@ -4202,6 +4202,12 @@ static void __init hugetlb_sysfs_init(void)
hugetlb_register_all_nodes();
}
#ifdef CONFIG_SYSCTL
static void hugetlb_sysctl_init(void);
#else
static inline void hugetlb_sysctl_init(void) { }
#endif
static int __init hugetlb_init(void)
{
int i;
......@@ -4257,6 +4263,7 @@ static int __init hugetlb_init(void)
hugetlb_sysfs_init();
hugetlb_cgroup_file_init();
hugetlb_sysctl_init();
#ifdef CONFIG_SMP
num_fault_mutexes = roundup_pow_of_two(8 * num_possible_cpus());
......@@ -4588,7 +4595,7 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
return ret;
}
int hugetlb_sysctl_handler(struct ctl_table *table, int write,
static int hugetlb_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
......@@ -4597,7 +4604,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
}
#ifdef CONFIG_NUMA
int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
static int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
return hugetlb_sysctl_handler_common(true, table, write,
......@@ -4605,7 +4612,7 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
}
#endif /* CONFIG_NUMA */
int hugetlb_overcommit_handler(struct ctl_table *table, int write,
static int hugetlb_overcommit_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
......@@ -4634,6 +4641,44 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
return ret;
}
static struct ctl_table hugetlb_table[] = {
{
.procname = "nr_hugepages",
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_sysctl_handler,
},
#ifdef CONFIG_NUMA
{
.procname = "nr_hugepages_mempolicy",
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &hugetlb_mempolicy_sysctl_handler,
},
#endif
{
.procname = "hugetlb_shm_group",
.data = &sysctl_hugetlb_shm_group,
.maxlen = sizeof(gid_t),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "nr_overcommit_hugepages",
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_overcommit_handler,
},
{ }
};
static void hugetlb_sysctl_init(void)
{
register_sysctl_init("vm", hugetlb_table);
}
#endif /* CONFIG_SYSCTL */
void hugetlb_report_meminfo(struct seq_file *m)
......
......@@ -62,13 +62,14 @@
#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
#include <linux/shmem_fs.h>
#include <linux/sysctl.h>
#include "swap.h"
#include "internal.h"
#include "ras/ras_event.h"
int sysctl_memory_failure_early_kill __read_mostly = 0;
static int sysctl_memory_failure_early_kill __read_mostly;
int sysctl_memory_failure_recovery __read_mostly = 1;
static int sysctl_memory_failure_recovery __read_mostly = 1;
atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
......@@ -122,6 +123,37 @@ const struct attribute_group memory_failure_attr_group = {
.attrs = memory_failure_attr,
};
#ifdef CONFIG_SYSCTL
static struct ctl_table memory_failure_table[] = {
{
.procname = "memory_failure_early_kill",
.data = &sysctl_memory_failure_early_kill,
.maxlen = sizeof(sysctl_memory_failure_early_kill),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{
.procname = "memory_failure_recovery",
.data = &sysctl_memory_failure_recovery,
.maxlen = sizeof(sysctl_memory_failure_recovery),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{ }
};
static int __init memory_failure_sysctl_init(void)
{
register_sysctl_init("vm", memory_failure_table);
return 0;
}
late_initcall(memory_failure_sysctl_init);
#endif /* CONFIG_SYSCTL */
/*
* Return values:
* 1: the page is dissolved (if needed) and taken off from buddy,
......
......@@ -1764,11 +1764,6 @@ static int apparmor_dointvec(struct ctl_table *table, int write,
return proc_dointvec(table, write, buffer, lenp, ppos);
}
static struct ctl_path apparmor_sysctl_path[] = {
{ .procname = "kernel", },
{ }
};
static struct ctl_table apparmor_sysctl_table[] = {
{
.procname = "unprivileged_userns_apparmor_policy",
......@@ -1790,8 +1785,7 @@ static struct ctl_table apparmor_sysctl_table[] = {
static int __init apparmor_init_sysctl(void)
{
return register_sysctl_paths(apparmor_sysctl_path,
apparmor_sysctl_table) ? 0 : -ENOMEM;
return register_sysctl("kernel", apparmor_sysctl_table) ? 0 : -ENOMEM;
}
#else
static inline int apparmor_init_sysctl(void)
......
......@@ -52,12 +52,6 @@ static bool deny_reading_verity_digests;
#endif
#ifdef CONFIG_SYSCTL
static struct ctl_path loadpin_sysctl_path[] = {
{ .procname = "kernel", },
{ .procname = "loadpin", },
{ }
};
static struct ctl_table loadpin_sysctl_table[] = {
{
.procname = "enforce",
......@@ -262,7 +256,7 @@ static int __init loadpin_init(void)
enforce ? "" : "not ");
parse_exclude();
#ifdef CONFIG_SYSCTL
if (!register_sysctl_paths(loadpin_sysctl_path, loadpin_sysctl_table))
if (!register_sysctl("kernel/loadpin", loadpin_sysctl_table))
pr_notice("sysctl registration failed!\n");
#endif
security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks), "loadpin");
......
......@@ -447,12 +447,6 @@ static int yama_dointvec_minmax(struct ctl_table *table, int write,
static int max_scope = YAMA_SCOPE_NO_ATTACH;
static struct ctl_path yama_sysctl_path[] = {
{ .procname = "kernel", },
{ .procname = "yama", },
{ }
};
static struct ctl_table yama_sysctl_table[] = {
{
.procname = "ptrace_scope",
......@@ -467,7 +461,7 @@ static struct ctl_table yama_sysctl_table[] = {
};
static void __init yama_init_sysctl(void)
{
if (!register_sysctl_paths(yama_sysctl_path, yama_sysctl_table))
if (!register_sysctl("kernel/yama", yama_sysctl_table))
panic("Yama: sysctl registration failed.\n");
}
#else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment