Commit 5abe3795 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Add as a feature case-insensitive directories (the casefold feature)
  using Unicode 12.1.

  Also, the usual largish number of cleanups and bug fixes"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits)
  ext4: export /sys/fs/ext4/feature/casefold if Unicode support is present
  ext4: fix ext4_show_options for file systems w/o journal
  unicode: refactor the rule for regenerating utf8data.h
  docs: ext4.rst: document case-insensitive directories
  ext4: Support case-insensitive file name lookups
  ext4: include charset encoding information in the superblock
  MAINTAINERS: add Unicode subsystem entry
  unicode: update unicode database unicode version 12.1.0
  unicode: introduce test module for normalized utf8 implementation
  unicode: implement higher level API for string handling
  unicode: reduce the size of utf8data[]
  unicode: introduce code for UTF-8 normalization
  unicode: introduce UTF-8 character database
  ext4: actually request zeroing of inode table after grow
  ext4: cond_resched in work-heavy group loops
  ext4: fix use-after-free race with debug_want_extra_isize
  ext4: avoid drop reference to iloc.bh twice
  ext4: ignore e_value_offs for xattrs with value-in-ea-inode
  ext4: protect journal inode's blocks using block_validity
  ext4: use BUG() instead of BUG_ON(1)
  ...
parents e5fef2a9 db90f419
......@@ -91,10 +91,48 @@ Currently Available
* large block (up to pagesize) support
* efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force
the ordering)
* Case-insensitive file name lookups
[1] Filesystems with a block size of 1k may see a limit imposed by the
directory hash tree having a maximum depth of two.
case-insensitive file name lookups
======================================================
The case-insensitive file name lookup feature is supported on a
per-directory basis, allowing the user to mix case-insensitive and
case-sensitive directories in the same filesystem. It is enabled by
flipping the +F inode attribute of an empty directory. The
case-insensitive string match operation is only defined when we know how
text in encoded in a byte sequence. For that reason, in order to enable
case-insensitive directories, the filesystem must have the
casefold feature, which stores the filesystem-wide encoding
model used. By default, the charset adopted is the latest version of
Unicode (12.1.0, by the time of this writing), encoded in the UTF-8
form. The comparison algorithm is implemented by normalizing the
strings to the Canonical decomposition form, as defined by Unicode,
followed by a byte per byte comparison.
The case-awareness is name-preserving on the disk, meaning that the file
name provided by userspace is a byte-per-byte match to what is actually
written in the disk. The Unicode normalization format used by the
kernel is thus an internal representation, and not exposed to the
userspace nor to the disk, with the important exception of disk hashes,
used on large case-insensitive directories with DX feature. On DX
directories, the hash must be calculated using the casefolded version of
the filename, meaning that the normalization format used actually has an
impact on where the directory entry is stored.
When we change from viewing filenames as opaque byte sequences to seeing
them as encoded strings we need to address what happens when a program
tries to create a file with an invalid name. The Unicode subsystem
within the kernel leaves the decision of what to do in this case to the
filesystem, which select its preferred behavior by enabling/disabling
the strict mode. When Ext4 encounters one of those strings and the
filesystem did not require strict mode, it falls back to considering the
entire string as an opaque byte sequence, which still allows the user to
operate on that file, but the case-insensitive lookups won't work.
Options
=======
......
......@@ -176,6 +176,7 @@ mkprep
mkregtable
mktables
mktree
mkutf8data
modpost
modules.builtin
modules.order
......@@ -254,6 +255,7 @@ vsyscall_32.lds
wanxlfw.inc
uImage
unifdef
utf8data.h
wakeup.bin
wakeup.elf
wakeup.lds
......
......@@ -15984,6 +15984,12 @@ F: drivers/uwb/
F: include/linux/uwb.h
F: include/linux/uwb/
UNICODE SUBSYSTEM:
M: Gabriel Krisman Bertazi <krisman@collabora.com>
L: linux-fsdevel@vger.kernel.org
S: Supported
F: fs/unicode/
UNICORE32 ARCHITECTURE:
M: Guan Xuetao <gxt@pku.edu.cn>
W: http://mprc.pku.edu.cn/~guanxuetao/linux
......
......@@ -317,5 +317,6 @@ endif # NETWORK_FILESYSTEMS
source "fs/nls/Kconfig"
source "fs/dlm/Kconfig"
source "fs/unicode/Kconfig"
endmenu
......@@ -92,6 +92,7 @@ obj-$(CONFIG_EXPORTFS) += exportfs/
obj-$(CONFIG_NFSD) += nfsd/
obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
obj-$(CONFIG_UNICODE) += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
obj-$(CONFIG_CIFS) += cifs/
obj-$(CONFIG_HPFS_FS) += hpfs/
......
......@@ -137,6 +137,48 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
printk(KERN_CONT "\n");
}
static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
{
struct inode *inode;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_map_blocks map;
u32 i = 0, err = 0, num, n;
if ((ino < EXT4_ROOT_INO) ||
(ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
return -EINVAL;
inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL);
if (IS_ERR(inode))
return PTR_ERR(inode);
num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
while (i < num) {
map.m_lblk = i;
map.m_len = num - i;
n = ext4_map_blocks(NULL, inode, &map, 0);
if (n < 0) {
err = n;
break;
}
if (n == 0) {
i++;
} else {
if (!ext4_data_block_valid(sbi, map.m_pblk, n)) {
ext4_error(sb, "blocks %llu-%llu from inode %u "
"overlap system zone", map.m_pblk,
map.m_pblk + map.m_len - 1, ino);
err = -EFSCORRUPTED;
break;
}
err = add_system_zone(sbi, map.m_pblk, n);
if (err < 0)
break;
i += n;
}
}
iput(inode);
return err;
}
int ext4_setup_system_zone(struct super_block *sb)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
......@@ -155,6 +197,7 @@ int ext4_setup_system_zone(struct super_block *sb)
return 0;
for (i=0; i < ngroups; i++) {
cond_resched();
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0)))
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
......@@ -171,6 +214,12 @@ int ext4_setup_system_zone(struct super_block *sb)
if (ret)
return ret;
}
if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
ret = ext4_protect_reserved_inode(sb,
le32_to_cpu(sbi->s_es->s_journal_inum));
if (ret)
return ret;
}
if (test_opt(sb, DEBUG))
debug_print_tree(sbi);
......
......@@ -26,6 +26,7 @@
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
#include "ext4.h"
#include "xattr.h"
......@@ -660,3 +661,50 @@ const struct file_operations ext4_dir_operations = {
.open = ext4_dir_open,
.release = ext4_release_dir,
};
#ifdef CONFIG_UNICODE
static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
struct qstr qstr = {.name = str, .len = len };
if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) {
if (len != name->len)
return -1;
return !memcmp(str, name, len);
}
return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
}
static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
{
const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
const struct unicode_map *um = sbi->s_encoding;
unsigned char *norm;
int len, ret = 0;
if (!IS_CASEFOLDED(dentry->d_inode))
return 0;
norm = kmalloc(PATH_MAX, GFP_ATOMIC);
if (!norm)
return -ENOMEM;
len = utf8_casefold(um, str, norm, PATH_MAX);
if (len < 0) {
if (ext4_has_strict_mode(sbi))
ret = -EINVAL;
goto out;
}
str->hash = full_name_hash(dentry, norm, len);
out:
kfree(norm);
return ret;
}
const struct dentry_operations ext4_dentry_ops = {
.d_hash = ext4_d_hash,
.d_compare = ext4_d_compare,
};
#endif
......@@ -399,10 +399,11 @@ struct flex_groups {
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
#define EXT4_FL_USER_VISIBLE 0x704BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
......@@ -417,10 +418,10 @@ struct flex_groups {
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
EXT4_PROJINHERIT_FL)
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
......@@ -1313,7 +1314,9 @@ struct ext4_super_block {
__u8 s_first_error_time_hi;
__u8 s_last_error_time_hi;
__u8 s_pad[2];
__le32 s_reserved[96]; /* Padding to the end of the block */
__le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */
__le32 s_reserved[95]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
......@@ -1338,6 +1341,16 @@ struct ext4_super_block {
/* Number of quota types we support */
#define EXT4_MAXQUOTAS 3
#define EXT4_ENC_UTF8_12_1 1
/*
* Flags for ext4_sb_info.s_encoding_flags.
*/
#define EXT4_ENC_STRICT_MODE_FL (1 << 0)
#define ext4_has_strict_mode(sbi) \
(sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
/*
* fourth extended-fs super-block data in memory
*/
......@@ -1387,6 +1400,10 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
#ifdef CONFIG_UNICODE
struct unicode_map *s_encoding;
__u16 s_encoding_flags;
#endif
/* Journaling */
struct journal_s *s_journal;
......@@ -1592,9 +1609,6 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_SB(sb) (sb)
#endif
/*
* Returns true if the inode is inode is encrypted
*/
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
......@@ -1663,6 +1677,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
#define EXT4_FEATURE_INCOMPAT_CASEFOLD 0x20000
extern void ext4_update_dynamic_rev(struct super_block *sb);
......@@ -1756,6 +1771,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(csum_seed, CSUM_SEED)
EXT4_FEATURE_INCOMPAT_FUNCS(largedir, LARGEDIR)
EXT4_FEATURE_INCOMPAT_FUNCS(inline_data, INLINE_DATA)
EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
......@@ -1783,6 +1799,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
EXT4_FEATURE_INCOMPAT_CASEFOLD | \
EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
EXT4_FEATURE_INCOMPAT_LARGEDIR)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
......@@ -2376,8 +2393,8 @@ extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
dx_hash_info *hinfo);
extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo);
/* ialloc.c */
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
......@@ -2973,6 +2990,10 @@ static inline void ext4_unlock_group(struct super_block *sb,
/* dir.c */
extern const struct file_operations ext4_dir_operations;
#ifdef CONFIG_UNICODE
extern const struct dentry_operations ext4_dentry_ops;
#endif
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
......@@ -3065,6 +3086,10 @@ extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
extern int ext4_handle_dirty_dirent_node(handle_t *handle,
struct inode *inode,
struct buffer_head *bh);
extern int ext4_ci_compare(const struct inode *parent,
const struct qstr *name,
const struct qstr *entry);
#define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
[S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
......
......@@ -711,7 +711,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
* We don't need to check unwritten extent because
* indirect-based file doesn't have it.
*/
BUG_ON(1);
BUG();
}
} else if (retval == 0) {
if (ext4_es_is_written(es)) {
......@@ -780,7 +780,7 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
}
p = &(*p)->rb_right;
} else {
BUG_ON(1);
BUG();
return -EINVAL;
}
}
......
......@@ -6,6 +6,7 @@
*/
#include <linux/fs.h>
#include <linux/unicode.h>
#include <linux/compiler.h>
#include <linux/bitops.h>
#include "ext4.h"
......@@ -196,7 +197,8 @@ static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
* represented, and whether or not the returned hash is 32 bits or 64
* bits. 32 bit hashes will return 0 for the minor hash.
*/
int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
static int __ext4fs_dirhash(const char *name, int len,
struct dx_hash_info *hinfo)
{
__u32 hash;
__u32 minor_hash = 0;
......@@ -268,3 +270,33 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
hinfo->minor_hash = minor_hash;
return 0;
}
int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo)
{
#ifdef CONFIG_UNICODE
const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
if (len && IS_CASEFOLDED(dir)) {
buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
if (!buff)
return -ENOMEM;
dlen = utf8_casefold(um, &qstr, buff, PATH_MAX);
if (dlen < 0) {
kfree(buff);
goto opaque_seq;
}
r = __ext4fs_dirhash(buff, dlen, hinfo);
kfree(buff);
return r;
}
opaque_seq:
#endif
return __ext4fs_dirhash(name, len, hinfo);
}
......@@ -455,7 +455,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
if (qstr) {
hinfo.hash_version = DX_HASH_HALF_MD4;
hinfo.seed = sbi->s_hash_seed;
ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo);
grp = hinfo.hash;
} else
grp = prandom_u32();
......
......@@ -1407,7 +1407,7 @@ int htree_inlinedir_to_tree(struct file *dir_file,
}
}
ext4fs_dirhash(de->name, de->name_len, hinfo);
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash)))
......
......@@ -399,6 +399,10 @@ static int __check_block_validity(struct inode *inode, const char *func,
unsigned int line,
struct ext4_map_blocks *map)
{
if (ext4_has_feature_journal(inode->i_sb) &&
(inode->i_ino ==
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
return 0;
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
......@@ -541,7 +545,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
map->m_len = retval;
retval = 0;
} else {
BUG_ON(1);
BUG();
}
#ifdef ES_AGGRESSIVE_TEST
ext4_map_blocks_es_recheck(handle, inode, map,
......@@ -1876,7 +1880,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
else if (ext4_es_is_unwritten(&es))
map->m_flags |= EXT4_MAP_UNWRITTEN;
else
BUG_ON(1);
BUG();
#ifdef ES_AGGRESSIVE_TEST
ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
......@@ -4738,9 +4742,11 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
new_fl |= S_CASEFOLD;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
S_ENCRYPTED);
S_ENCRYPTED|S_CASEFOLD);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
......
......@@ -278,6 +278,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
struct ext4_iloc iloc;
unsigned int oldflags, mask, i;
unsigned int jflag;
struct super_block *sb = inode->i_sb;
/* Is it quota file? Do not allow user to mess with it */
if (ext4_is_quota_file(inode))
......@@ -322,6 +323,23 @@ static int ext4_ioctl_setflags(struct inode *inode,
goto flags_out;
}
if ((flags ^ oldflags) & EXT4_CASEFOLD_FL) {
if (!ext4_has_feature_casefold(sb)) {
err = -EOPNOTSUPP;
goto flags_out;
}
if (!S_ISDIR(inode->i_mode)) {
err = -ENOTDIR;
goto flags_out;
}
if (!ext4_empty_dir(inode)) {
err = -ENOTEMPTY;
goto flags_out;
}
}
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
......@@ -978,7 +996,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err == 0)
err = err2;
mnt_drop_write_file(filp);
if (!err && (o_group > EXT4_SB(sb)->s_groups_count) &&
if (!err && (o_group < EXT4_SB(sb)->s_groups_count) &&
ext4_has_group_desc_csum(sb) &&
test_opt(sb, INIT_INODE_TABLE))
err = ext4_register_li_request(sb, o_group);
......
......@@ -1539,7 +1539,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,
ex->fe_len += 1 << order;
}
if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) {
if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
/* Should never happen! (but apparently sometimes does?!?) */
WARN_ON(1);
ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
......@@ -2490,6 +2490,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
for (i = 0; i < ngroups; i++) {
cond_resched();
desc = ext4_get_group_desc(sb, i, NULL);
if (desc == NULL) {
ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
......@@ -2705,6 +2706,7 @@ int ext4_mb_release(struct super_block *sb)
if (sbi->s_group_info) {
for (i = 0; i < ngroups; i++) {
cond_resched();
grinfo = ext4_get_group_info(sb, i);
#ifdef DOUBLE_CHECK
kfree(grinfo->bb_bitmap);
......
......@@ -35,6 +35,7 @@
#include <linux/buffer_head.h>
#include <linux/bio.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
#include "ext4.h"
#include "ext4_jbd2.h"
......@@ -629,7 +630,7 @@ static struct stats dx_show_leaf(struct inode *dir,
}
if (!fscrypt_has_encryption_key(dir)) {
/* Directory is not encrypted */
ext4fs_dirhash(de->name,
ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(U)%x.%u ", len,
name, h.hash,
......@@ -662,8 +663,8 @@ static struct stats dx_show_leaf(struct inode *dir,
name = fname_crypto_str.name;
len = fname_crypto_str.len;
}
ext4fs_dirhash(de->name, de->name_len,
&h);
ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de
- base));
......@@ -673,7 +674,7 @@ static struct stats dx_show_leaf(struct inode *dir,
#else
int len = de->name_len;
char *name = de->name;
ext4fs_dirhash(de->name, de->name_len, &h);
ext4fs_dirhash(dir, de->name, de->name_len, &h);
printk("%*.s:%x.%u ", len, name, h.hash,
(unsigned) ((char *) de - base));
#endif
......@@ -762,7 +763,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
if (fname && fname_name(fname))
ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo);
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
hash = hinfo->hash;
if (root->info.unused_flags & 1) {
......@@ -1008,7 +1009,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
/* silently ignore the rest of the block */
break;
}
ext4fs_dirhash(de->name, de->name_len, hinfo);
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash)))
......@@ -1197,7 +1198,7 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
while ((char *) de < base + blocksize) {
if (de->name_len && de->inode) {
ext4fs_dirhash(de->name, de->name_len, &h);
ext4fs_dirhash(dir, de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
......@@ -1252,15 +1253,52 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
dx_set_count(entries, count + 1);
}
#ifdef CONFIG_UNICODE
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for.
*
* Returns: 0 if the directory entry matches, more than 0 if it
* doesn't match or less than zero on error.
*/
int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry)
{
const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
const struct unicode_map *um = sbi->s_encoding;
int ret;
ret = utf8_strncasecmp(um, name, entry);
if (ret < 0) {
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
*/
if (ext4_has_strict_mode(sbi))
return -EINVAL;
if (name->len != entry->len)
return 1;
return !!memcmp(name->name, entry->name, name->len);
}
return ret;
}
#endif
/*
* Test whether a directory entry matches the filename being searched for.
*
* Return: %true if the directory entry matches, otherwise %false.
*/
static inline bool ext4_match(const struct ext4_filename *fname,
static inline bool ext4_match(const struct inode *parent,
const struct ext4_filename *fname,
const struct ext4_dir_entry_2 *de)
{
struct fscrypt_name f;
#ifdef CONFIG_UNICODE
const struct qstr entry = {.name = de->name, .len = de->name_len};
#endif
if (!de->inode)
return false;
......@@ -1270,6 +1308,12 @@ static inline bool ext4_match(const struct ext4_filename *fname,
#ifdef CONFIG_FS_ENCRYPTION
f.crypto_buf = fname->crypto_buf;
#endif
#ifdef CONFIG_UNICODE
if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent))
return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0);
#endif
return fscrypt_match_name(&f, de->name, de->name_len);
}
......@@ -1290,7 +1334,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
if ((char *) de + de->name_len <= dlimit &&
ext4_match(fname, de)) {
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
......@@ -1588,6 +1632,17 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
return ERR_PTR(-EPERM);
}
}
#ifdef CONFIG_UNICODE
if (!inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well. For now, prevent the negative dentry
* from being cached.
*/
return NULL;
}
#endif
return d_splice_alias(inode, dentry);
}
......@@ -1798,7 +1853,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
if (ext4_check_dir_entry(dir, NULL, de, bh,
buf, buf_size, offset))
return -EFSCORRUPTED;
if (ext4_match(fname, de))
if (ext4_match(dir, fname, de))
return -EEXIST;
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
......@@ -1983,7 +2038,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
if (fname->hinfo.hash_version <= DX_HASH_TEA)
fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
ext4fs_dirhash(fname_name(fname), fname_len(fname), &fname->hinfo);
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), &fname->hinfo);
memset(frames, 0, sizeof(frames));
frame = frames;
......@@ -2036,6 +2091,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct ext4_dir_entry_2 *de;
struct ext4_dir_entry_tail *t;
struct super_block *sb;
struct ext4_sb_info *sbi;
struct ext4_filename fname;
int retval;
int dx_fallback=0;
......@@ -2047,10 +2103,17 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
csum_size = sizeof(struct ext4_dir_entry_tail);
sb = dir->i_sb;
sbi = EXT4_SB(sb);
blocksize = sb->s_blocksize;
if (!dentry->d_name.len)
return -EINVAL;
#ifdef CONFIG_UNICODE
if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
utf8_validate(sbi->s_encoding, &dentry->d_name))
return -EINVAL;
#endif
retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
if (retval)
return retval;
......@@ -2975,6 +3038,17 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at ext4_lookup(), when it is better
* supported by the VFS for the CI case.
*/
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
end_rmdir:
brelse(bh);
if (handle)
......@@ -3044,6 +3118,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at ext4_lookup(), when it is better
* supported by the VFS for the CI case.
*/
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
end_unlink:
brelse(bh);
if (handle)
......
......@@ -126,9 +126,10 @@ int ext4_mpage_readpages(struct address_space *mapping,
int fully_mapped = 1;
unsigned first_hole = blocks_per_page;
prefetchw(&page->flags);
if (pages) {
page = lru_to_page(pages);
prefetchw(&page->flags);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping, page->index,
readahead_gfp_mask(mapping)))
......
......@@ -874,6 +874,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
if (unlikely(err)) {
ext4_std_error(sb, err);
iloc.bh = NULL;
goto errout;
}
brelse(dind);
......
......@@ -42,6 +42,7 @@
#include <linux/cleancache.h>
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
......@@ -1054,6 +1055,9 @@ static void ext4_put_super(struct super_block *sb)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev);
#ifdef CONFIG_UNICODE
utf8_unload(sbi->s_encoding);
#endif
kfree(sbi);
}
......@@ -1749,6 +1753,36 @@ static const struct mount_opts {
{Opt_err, 0, 0}
};
#ifdef CONFIG_UNICODE
static const struct ext4_sb_encodings {
__u16 magic;
char *name;
char *version;
} ext4_sb_encoding_map[] = {
{EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
};
static int ext4_sb_read_encoding(const struct ext4_super_block *es,
const struct ext4_sb_encodings **encoding,
__u16 *flags)
{
__u16 magic = le16_to_cpu(es->s_encoding);
int i;
for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
if (magic == ext4_sb_encoding_map[i].magic)
break;
if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
return -EINVAL;
*encoding = &ext4_sb_encoding_map[i];
*flags = le16_to_cpu(es->s_encoding_flags);
return 0;
}
#endif
static int handle_mount_opt(struct super_block *sb, char *opt, int token,
substring_t *args, unsigned long *journal_devnum,
unsigned int *journal_ioprio, int is_remount)
......@@ -2875,6 +2909,15 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
#ifndef CONFIG_UNICODE
if (ext4_has_feature_casefold(sb)) {
ext4_msg(sb, KERN_ERR,
"Filesystem with casefold feature cannot be "
"mounted without CONFIG_UNICODE");
return 0;
}
#endif
if (readonly)
return 1;
......@@ -3496,6 +3539,37 @@ int ext4_calculate_overhead(struct super_block *sb)
return 0;
}
static void ext4_clamp_want_extra_isize(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
sbi->s_want_extra_isize == 0) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
if (ext4_has_feature_extra_isize(sb)) {
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_want_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_want_extra_isize);
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_min_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_min_extra_isize);
}
}
/* Check if enough inode space is available */
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
sbi->s_inode_size) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
ext4_msg(sb, KERN_INFO,
"required extra inode space not available");
}
}
static void ext4_set_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
......@@ -3722,6 +3796,43 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
&journal_ioprio, 0))
goto failed_mount;
#ifdef CONFIG_UNICODE
if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) {
const struct ext4_sb_encodings *encoding_info;
struct unicode_map *encoding;
__u16 encoding_flags;
if (ext4_has_feature_encrypt(sb)) {
ext4_msg(sb, KERN_ERR,
"Can't mount with encoding and encryption");
goto failed_mount;
}
if (ext4_sb_read_encoding(es, &encoding_info,
&encoding_flags)) {
ext4_msg(sb, KERN_ERR,
"Encoding requested by superblock is unknown");
goto failed_mount;
}
encoding = utf8_load(encoding_info->version);
if (IS_ERR(encoding)) {
ext4_msg(sb, KERN_ERR,
"can't mount with superblock charset: %s-%s "
"not supported by the kernel. flags: 0x%x.",
encoding_info->name, encoding_info->version,
encoding_flags);
goto failed_mount;
}
ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
"%s-%s with flags 0x%hx", encoding_info->name,
encoding_info->version?:"\b", encoding_flags);
sbi->s_encoding = encoding;
sbi->s_encoding_flags = encoding_flags;
}
#endif
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
"with data=journal disables delayed "
......@@ -4219,7 +4330,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"data=, fs mounted w/o journal");
goto failed_mount_wq;
}
sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM;
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
clear_opt(sb, DATA_FLAGS);
sbi->s_journal = NULL;
......@@ -4354,6 +4465,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
iput(root);
goto failed_mount4;
}
#ifdef CONFIG_UNICODE
if (sbi->s_encoding)
sb->s_d_op = &ext4_dentry_ops;
#endif
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
......@@ -4368,30 +4485,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
} else if (ret)
goto failed_mount4a;
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
sbi->s_want_extra_isize == 0) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
if (ext4_has_feature_extra_isize(sb)) {
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_want_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_want_extra_isize);
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_min_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_min_extra_isize);
}
}
/* Check if enough inode space is available */
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
sbi->s_inode_size) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
ext4_msg(sb, KERN_INFO, "required extra inode space not"
"available");
}
ext4_clamp_want_extra_isize(sb);
ext4_set_resv_clusters(sb);
......@@ -4559,6 +4653,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
#ifdef CONFIG_UNICODE
utf8_unload(sbi->s_encoding);
#endif
#ifdef CONFIG_QUOTA
for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(sbi->s_qf_names[i]);
......@@ -5175,6 +5274,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
ext4_clamp_want_extra_isize(sb);
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
......
......@@ -238,6 +238,9 @@ EXT4_ATTR_FEATURE(meta_bg_resize);
#ifdef CONFIG_FS_ENCRYPTION
EXT4_ATTR_FEATURE(encryption);
#endif
#ifdef CONFIG_UNICODE
EXT4_ATTR_FEATURE(casefold);
#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
static struct attribute *ext4_feat_attrs[] = {
......@@ -246,6 +249,9 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(meta_bg_resize),
#ifdef CONFIG_FS_ENCRYPTION
ATTR_LIST(encryption),
#endif
#ifdef CONFIG_UNICODE
ATTR_LIST(casefold),
#endif
ATTR_LIST(metadata_csum_seed),
NULL,
......
......@@ -1696,7 +1696,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
/* No failures allowed past this point. */
if (!s->not_found && here->e_value_size && here->e_value_offs) {
if (!s->not_found && here->e_value_size && !here->e_value_inum) {
/* Remove the old value. */
void *first_val = s->base + min_offs;
size_t offs = le16_to_cpu(here->e_value_offs);
......
......@@ -132,7 +132,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
return;
}
spin_lock(&journal->j_list_lock);
nblocks = jbd2_space_needed(journal);
space_left = jbd2_log_space_left(journal);
if (space_left < nblocks) {
int chkpt = journal->j_checkpoint_transactions != NULL;
......
......@@ -1350,6 +1350,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
journal_superblock_t *sb = journal->j_superblock;
int ret;
/* Buffer got discarded which means block device got invalidated */
if (!buffer_mapped(bh))
return -EIO;
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
......
mkutf8data
utf8data.h
#
# UTF-8 normalization
#
config UNICODE
bool "UTF-8 normalization and casefolding support"
help
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
support.
config UNICODE_NORMALIZATION_SELFTEST
tristate "Test UTF-8 normalization support"
depends on UNICODE
default n
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_UNICODE) += unicode.o
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
unicode-y := utf8-norm.o utf8-core.o
$(obj)/utf8-norm.o: $(obj)/utf8data.h
# In the normal build, the checked-in utf8data.h is just shipped.
#
# To generate utf8data.h from UCD, put *.txt files in this directory
# and pass REGENERATE_UTF8DATA=1 from the command line.
ifdef REGENERATE_UTF8DATA
quiet_cmd_utf8data = GEN $@
cmd_utf8data = $< \
-a $(srctree)/$(src)/DerivedAge.txt \
-c $(srctree)/$(src)/DerivedCombiningClass.txt \
-p $(srctree)/$(src)/DerivedCoreProperties.txt \
-d $(srctree)/$(src)/UnicodeData.txt \
-f $(srctree)/$(src)/CaseFolding.txt \
-n $(srctree)/$(src)/NormalizationCorrections.txt \
-t $(srctree)/$(src)/NormalizationTest.txt \
-o $@
$(obj)/utf8data.h: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
$(call if_changed,utf8data)
else
$(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE
$(call if_changed,shipped)
endif
targets += utf8data.h
hostprogs-y += mkutf8data
The utf8data.h file in this directory is generated from the Unicode
Character Database for version 12.1.0 of the Unicode standard.
The full set of files can be found here:
http://www.unicode.org/Public/12.1.0/ucd/
Note!
The URL's listed below are not stable. That's because Unicode 12.1.0
has not been officially released yet; it is scheduled to be released
on May 8, 2019. We taking Unicode 12.1.0 a few weeks early because it
contains a new Japanese character which is required in order to
specify Japenese dates after May 1, 2019, when Crown Prince Naruhito
ascends to the Chrysanthemum Throne. (Isn't internationalization fun?
The abdication of Emperor Akihito of Japan is requiring dozens of
software packages to be updated with only a month's notice. :-)
We will update the URL's (and any needed changes to the checksums)
after the final Unicode 12.1.0 is released.
Individual source links:
https://www.unicode.org/Public/12.1.0/ucd/CaseFolding-12.1.0d2.txt
https://www.unicode.org/Public/12.1.0/ucd/DerivedAge-12.1.0d3.txt
https://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedCombiningClass-12.1.0d2.txt
https://www.unicode.org/Public/12.1.0/ucd/DerivedCoreProperties-12.1.0d2.txt
https://www.unicode.org/Public/12.1.0/ucd/NormalizationCorrections-12.1.0d1.txt
https://www.unicode.org/Public/12.1.0/ucd/NormalizationTest-12.1.0d3.txt
https://www.unicode.org/Public/12.1.0/ucd/UnicodeData-12.1.0d2.txt
md5sums (verify by running "md5sum -c README.utf8data"):
900e76da1d822a160fd6b8c0b1d70094 CaseFolding.txt
131256380bff4fea8ad4a851616f2f10 DerivedAge.txt
e731a4089b30002144e107e3d6f8d1fa DerivedCombiningClass.txt
a47c9fbd7ff92a9b261ba9831e68778a DerivedCoreProperties.txt
fcab6dad15e440879d92f315978f93d3 NormalizationCorrections.txt
f9ff1c55a60decf436100f791b44aa98 NormalizationTest.txt
755f6af699f8c8d2d958da411f78f6c6 UnicodeData.txt
sha1sums (verify by running "sha1sum -c README.utf8data"):
dc9245f6803c4ac99555c361f5052e0b13eb779b CaseFolding.txt
3281104f237184cdb5d869e86eb8573678ada7da DerivedAge.txt
2f5f995ccb96e0fa84b15151b35d5e2681535175 DerivedCombiningClass.txt
5b8698a3fcd5018e1987f296b02e2c17e696415e DerivedCoreProperties.txt
cd83935fbc012345d8792d2c704f69497e753835 NormalizationCorrections.txt
ea419aae505b337b0d99a83fa83fe58ddff7c19f NormalizationTest.txt
dc973c0fc93d6f09d9ab9f70d1c9f89c447f0526 UnicodeData.txt
To update to the newer version of the Unicode standard, the latest
released version of the UCD can be found here:
http://www.unicode.org/Public/UCD/latest/
Then, build under fs/unicode/ with REGENERATE_UTF8DATA=1:
make REGENERATE_UTF8DATA=1 fs/unicode/
After sanity checking the newly generated utf8data.h file (the
version generated from the 12.1.0 UCD should be 4,109 lines long, and
have a total size of 324k) and/or comparing it with the older version
of utf8data.h_shipped, rename it to utf8data.h_shipped.
If you are a kernel developer updating to a newer version of the
Unicode Character Database, please update this README.utf8data file
with the version of the UCD that was used, the md5sum and sha1sums of
the *.txt files, before checking in the new versions of the utf8data.h
and README.utf8data files.
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/parser.h>
#include <linux/errno.h>
#include <linux/unicode.h>
#include "utf8n.h"
int utf8_validate(const struct unicode_map *um, const struct qstr *str)
{
const struct utf8data *data = utf8nfdi(um->version);
if (utf8nlen(data, str->name, str->len) < 0)
return -1;
return 0;
}
EXPORT_SYMBOL(utf8_validate);
int utf8_strncmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
return -EINVAL;
do {
c1 = utf8byte(&cur1);
c2 = utf8byte(&cur2);
if (c1 < 0 || c2 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);
return 0;
}
EXPORT_SYMBOL(utf8_strncmp);
int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
return -EINVAL;
do {
c1 = utf8byte(&cur1);
c2 = utf8byte(&cur2);
if (c1 < 0 || c2 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);
return 0;
}
EXPORT_SYMBOL(utf8_strncasecmp);
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur;
size_t nlen = 0;
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) {
int c = utf8byte(&cur);
dest[nlen] = c;
if (!c)
return nlen;
if (c == -1)
break;
}
return -EINVAL;
}
EXPORT_SYMBOL(utf8_casefold);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur;
ssize_t nlen = 0;
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) {
int c = utf8byte(&cur);
dest[nlen] = c;
if (!c)
return nlen;
if (c == -1)
break;
}
return -EINVAL;
}
EXPORT_SYMBOL(utf8_normalize);
static int utf8_parse_version(const char *version, unsigned int *maj,
unsigned int *min, unsigned int *rev)
{
substring_t args[3];
char version_string[12];
const struct match_token token[] = {
{1, "%d.%d.%d"},
{0, NULL}
};
strncpy(version_string, version, sizeof(version_string));
if (match_token(version_string, token, args) != 1)
return -EINVAL;
if (match_int(&args[0], maj) || match_int(&args[1], min) ||
match_int(&args[2], rev))
return -EINVAL;
return 0;
}
struct unicode_map *utf8_load(const char *version)
{
struct unicode_map *um = NULL;
int unicode_version;
if (version) {
unsigned int maj, min, rev;
if (utf8_parse_version(version, &maj, &min, &rev) < 0)
return ERR_PTR(-EINVAL);
if (!utf8version_is_supported(maj, min, rev))
return ERR_PTR(-EINVAL);
unicode_version = UNICODE_AGE(maj, min, rev);
} else {
unicode_version = utf8version_latest();
printk(KERN_WARNING"UTF-8 version not specified. "
"Assuming latest supported version (%d.%d.%d).",
(unicode_version >> 16) & 0xff,
(unicode_version >> 8) & 0xff,
(unicode_version & 0xff));
}
um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
if (!um)
return ERR_PTR(-ENOMEM);
um->charset = "UTF-8";
um->version = unicode_version;
return um;
}
EXPORT_SYMBOL(utf8_load);
void utf8_unload(struct unicode_map *um)
{
kfree(um);
}
EXPORT_SYMBOL(utf8_unload);
MODULE_LICENSE("GPL v2");
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (c) 2014 SGI.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef UTF8NORM_H
#define UTF8NORM_H
#include <linux/types.h>
#include <linux/export.h>
#include <linux/string.h>
#include <linux/module.h>
/* Encoding a unicode version number as a single unsigned int. */
#define UNICODE_MAJ_SHIFT (16)
#define UNICODE_MIN_SHIFT (8)
#define UNICODE_AGE(MAJ, MIN, REV) \
(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
((unsigned int)(REV)))
/* Highest unicode version supported by the data tables. */
extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
extern int utf8version_latest(void);
/*
* Look for the correct const struct utf8data for a unicode version.
* Returns NULL if the version requested is too new.
*
* Two normalization forms are supported: nfdi and nfdicf.
*
* nfdi:
* - Apply unicode normalization form NFD.
* - Remove any Default_Ignorable_Code_Point.
*
* nfdicf:
* - Apply unicode normalization form NFD.
* - Remove any Default_Ignorable_Code_Point.
* - Apply a full casefold (C + F).
*/
extern const struct utf8data *utf8nfdi(unsigned int maxage);
extern const struct utf8data *utf8nfdicf(unsigned int maxage);
/*
* Determine the maximum age of any unicode character in the string.
* Returns 0 if only unassigned code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
extern int utf8agemax(const struct utf8data *data, const char *s);
extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len);
/*
* Determine the minimum age of any unicode character in the string.
* Returns 0 if any unassigned code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
extern int utf8agemin(const struct utf8data *data, const char *s);
extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
/*
* Determine the length of the normalized from of the string,
* excluding any terminating NULL byte.
* Returns 0 if only ignorable code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
extern ssize_t utf8len(const struct utf8data *data, const char *s);
extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
/* Needed in struct utf8cursor below. */
#define UTF8HANGULLEAF (12)
/*
* Cursor structure used by the normalizer.
*/
struct utf8cursor {
const struct utf8data *data;
const char *s;
const char *p;
const char *ss;
const char *sp;
unsigned int len;
unsigned int slen;
short int ccc;
short int nccc;
unsigned char hangul[UTF8HANGULLEAF];
};
/*
* Initialize a utf8cursor to normalize a string.
* Returns 0 on success.
* Returns -1 on failure.
*/
extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
const char *s);
extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
const char *s, size_t len);
/*
* Get the next byte in the normalization.
* Returns a value > 0 && < 256 on success.
* Returns 0 when the end of the normalization is reached.
* Returns -1 if the string being normalized is not valid UTF-8.
*/
extern int utf8byte(struct utf8cursor *u8c);
#endif /* UTF8NORM_H */
......@@ -1963,6 +1963,7 @@ struct super_operations {
#define S_DAX 0 /* Make all the DAX code disappear */
#endif
#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */
#define S_CASEFOLD 32768 /* Casefolded file */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
......@@ -2003,6 +2004,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED)
#define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD)
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_UNICODE_H
#define _LINUX_UNICODE_H
#include <linux/init.h>
#include <linux/dcache.h>
struct unicode_map {
const char *charset;
int version;
};
int utf8_validate(const struct unicode_map *um, const struct qstr *str);
int utf8_strncmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2);
int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen);
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen);
struct unicode_map *utf8_load(const char *version);
void utf8_unload(struct unicode_map *um);
#endif /* _LINUX_UNICODE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment