Commit 5a6e75f8 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

shmem: prepare huge= mount option and sysfs knob

This patch adds new mount option "huge=".  It can have following values:

  - "always":
	Attempt to allocate huge pages every time we need a new page;

  - "never":
	Do not allocate huge pages;

  - "within_size":
	Only allocate huge page if it will be fully within i_size.
	Also respect fadvise()/madvise() hints;

  - "advise:
	Only allocate huge pages if requested with fadvise()/madvise();

Default is "never" for now.

"mount -o remount,huge= /mountpoint" works fine after mount: remounting
huge=never will not attempt to break up huge pages at all, just stop
more from being allocated.

No new config option: put this under CONFIG_TRANSPARENT_HUGEPAGE, which
is the appropriate option to protect those who don't want the new bloat,
and with which we shall share some pmd code.

Prohibit the option when !CONFIG_TRANSPARENT_HUGEPAGE, just as mpol is
invalid without CONFIG_NUMA (was hidden in mpol_parse_str(): make it
explicit).

Allow enabling THP only if the machine has_transparent_hugepage().

But what about Shmem with no user-visible mount? SysV SHM, memfds,
shared anonymous mmaps (of /dev/zero or MAP_ANONYMOUS), GPU drivers' DRM
objects, Ashmem.  Though unlikely to suit all usages, provide sysfs knob
/sys/kernel/mm/transparent_hugepage/shmem_enabled to experiment with
huge on those.

And allow shmem_enabled two further values:

  - "deny":
	For use in emergencies, to force the huge option off from
	all mounts;
  - "force":
	Force the huge option on for all - very useful for testing;

Based on patch by Hugh Dickins.

Link: http://lkml.kernel.org/r/1466021202-61880-28-git-send-email-kirill.shutemov@linux.intel.comSigned-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 65c45377
...@@ -41,6 +41,8 @@ enum transparent_hugepage_flag { ...@@ -41,6 +41,8 @@ enum transparent_hugepage_flag {
#endif #endif
}; };
extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
......
...@@ -28,9 +28,10 @@ struct shmem_sb_info { ...@@ -28,9 +28,10 @@ struct shmem_sb_info {
unsigned long max_inodes; /* How many inodes are allowed */ unsigned long max_inodes; /* How many inodes are allowed */
unsigned long free_inodes; /* How many are left for allocation */ unsigned long free_inodes; /* How many are left for allocation */
spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
umode_t mode; /* Mount mode for root directory */
unsigned char huge; /* Whether to try for hugepages */
kuid_t uid; /* Mount uid for root directory */ kuid_t uid; /* Mount uid for root directory */
kgid_t gid; /* Mount gid for root directory */ kgid_t gid; /* Mount gid for root directory */
umode_t mode; /* Mount mode for root directory */
struct mempolicy *mpol; /* default memory policy for mappings */ struct mempolicy *mpol; /* default memory policy for mappings */
}; };
......
...@@ -442,6 +442,9 @@ static struct attribute *hugepage_attr[] = { ...@@ -442,6 +442,9 @@ static struct attribute *hugepage_attr[] = {
&enabled_attr.attr, &enabled_attr.attr,
&defrag_attr.attr, &defrag_attr.attr,
&use_zero_page_attr.attr, &use_zero_page_attr.attr,
#ifdef CONFIG_SHMEM
&shmem_enabled_attr.attr,
#endif
#ifdef CONFIG_DEBUG_VM #ifdef CONFIG_DEBUG_VM
&debug_cow_attr.attr, &debug_cow_attr.attr,
#endif #endif
......
...@@ -288,6 +288,87 @@ static bool shmem_confirm_swap(struct address_space *mapping, ...@@ -288,6 +288,87 @@ static bool shmem_confirm_swap(struct address_space *mapping,
return item == swp_to_radix_entry(swap); return item == swp_to_radix_entry(swap);
} }
/*
* Definitions for "huge tmpfs": tmpfs mounted with the huge= option
*
* SHMEM_HUGE_NEVER:
* disables huge pages for the mount;
* SHMEM_HUGE_ALWAYS:
* enables huge pages for the mount;
* SHMEM_HUGE_WITHIN_SIZE:
* only allocate huge pages if the page will be fully within i_size,
* also respect fadvise()/madvise() hints;
* SHMEM_HUGE_ADVISE:
* only allocate huge pages if requested with fadvise()/madvise();
*/
#define SHMEM_HUGE_NEVER 0
#define SHMEM_HUGE_ALWAYS 1
#define SHMEM_HUGE_WITHIN_SIZE 2
#define SHMEM_HUGE_ADVISE 3
/*
* Special values.
* Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled:
*
* SHMEM_HUGE_DENY:
* disables huge on shm_mnt and all mounts, for emergency use;
* SHMEM_HUGE_FORCE:
* enables huge on shm_mnt and all mounts, w/o needing option, for testing;
*
*/
#define SHMEM_HUGE_DENY (-1)
#define SHMEM_HUGE_FORCE (-2)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* ifdef here to avoid bloating shmem.o when not necessary */
int shmem_huge __read_mostly;
static int shmem_parse_huge(const char *str)
{
if (!strcmp(str, "never"))
return SHMEM_HUGE_NEVER;
if (!strcmp(str, "always"))
return SHMEM_HUGE_ALWAYS;
if (!strcmp(str, "within_size"))
return SHMEM_HUGE_WITHIN_SIZE;
if (!strcmp(str, "advise"))
return SHMEM_HUGE_ADVISE;
if (!strcmp(str, "deny"))
return SHMEM_HUGE_DENY;
if (!strcmp(str, "force"))
return SHMEM_HUGE_FORCE;
return -EINVAL;
}
static const char *shmem_format_huge(int huge)
{
switch (huge) {
case SHMEM_HUGE_NEVER:
return "never";
case SHMEM_HUGE_ALWAYS:
return "always";
case SHMEM_HUGE_WITHIN_SIZE:
return "within_size";
case SHMEM_HUGE_ADVISE:
return "advise";
case SHMEM_HUGE_DENY:
return "deny";
case SHMEM_HUGE_FORCE:
return "force";
default:
VM_BUG_ON(1);
return "bad_val";
}
}
#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
#define shmem_huge SHMEM_HUGE_DENY
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/* /*
* Like add_to_page_cache_locked, but error if expected item has gone. * Like add_to_page_cache_locked, but error if expected item has gone.
*/ */
...@@ -2860,11 +2941,24 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, ...@@ -2860,11 +2941,24 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
sbinfo->gid = make_kgid(current_user_ns(), gid); sbinfo->gid = make_kgid(current_user_ns(), gid);
if (!gid_valid(sbinfo->gid)) if (!gid_valid(sbinfo->gid))
goto bad_val; goto bad_val;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
} else if (!strcmp(this_char, "huge")) {
int huge;
huge = shmem_parse_huge(value);
if (huge < 0)
goto bad_val;
if (!has_transparent_hugepage() &&
huge != SHMEM_HUGE_NEVER)
goto bad_val;
sbinfo->huge = huge;
#endif
#ifdef CONFIG_NUMA
} else if (!strcmp(this_char,"mpol")) { } else if (!strcmp(this_char,"mpol")) {
mpol_put(mpol); mpol_put(mpol);
mpol = NULL; mpol = NULL;
if (mpol_parse_str(value, &mpol)) if (mpol_parse_str(value, &mpol))
goto bad_val; goto bad_val;
#endif
} else { } else {
pr_err("tmpfs: Bad mount option %s\n", this_char); pr_err("tmpfs: Bad mount option %s\n", this_char);
goto error; goto error;
...@@ -2910,6 +3004,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) ...@@ -2910,6 +3004,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
goto out; goto out;
error = 0; error = 0;
sbinfo->huge = config.huge;
sbinfo->max_blocks = config.max_blocks; sbinfo->max_blocks = config.max_blocks;
sbinfo->max_inodes = config.max_inodes; sbinfo->max_inodes = config.max_inodes;
sbinfo->free_inodes = config.max_inodes - inodes; sbinfo->free_inodes = config.max_inodes - inodes;
...@@ -2943,6 +3038,11 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) ...@@ -2943,6 +3038,11 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
seq_printf(seq, ",gid=%u", seq_printf(seq, ",gid=%u",
from_kgid_munged(&init_user_ns, sbinfo->gid)); from_kgid_munged(&init_user_ns, sbinfo->gid));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
if (sbinfo->huge)
seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
#endif
shmem_show_mpol(seq, sbinfo->mpol); shmem_show_mpol(seq, sbinfo->mpol);
return 0; return 0;
} }
...@@ -3282,6 +3382,13 @@ int __init shmem_init(void) ...@@ -3282,6 +3382,13 @@ int __init shmem_init(void)
pr_err("Could not kern_mount tmpfs\n"); pr_err("Could not kern_mount tmpfs\n");
goto out1; goto out1;
} }
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
else
shmem_huge = 0; /* just in case it was patched */
#endif
return 0; return 0;
out1: out1:
...@@ -3293,6 +3400,60 @@ int __init shmem_init(void) ...@@ -3293,6 +3400,60 @@ int __init shmem_init(void)
return error; return error;
} }
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
static ssize_t shmem_enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
int values[] = {
SHMEM_HUGE_ALWAYS,
SHMEM_HUGE_WITHIN_SIZE,
SHMEM_HUGE_ADVISE,
SHMEM_HUGE_NEVER,
SHMEM_HUGE_DENY,
SHMEM_HUGE_FORCE,
};
int i, count;
for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) {
const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s ";
count += sprintf(buf + count, fmt,
shmem_format_huge(values[i]));
}
buf[count - 1] = '\n';
return count;
}
static ssize_t shmem_enabled_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t count)
{
char tmp[16];
int huge;
if (count + 1 > sizeof(tmp))
return -EINVAL;
memcpy(tmp, buf, count);
tmp[count] = '\0';
if (count && tmp[count - 1] == '\n')
tmp[count - 1] = '\0';
huge = shmem_parse_huge(tmp);
if (huge == -EINVAL)
return -EINVAL;
if (!has_transparent_hugepage() &&
huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
return -EINVAL;
shmem_huge = huge;
if (shmem_huge < SHMEM_HUGE_DENY)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
return count;
}
struct kobj_attribute shmem_enabled_attr =
__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
#else /* !CONFIG_SHMEM */ #else /* !CONFIG_SHMEM */
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment