Commit e50e5129 authored by Andreas Dilger's avatar Andreas Dilger Committed by Theodore Ts'o

ext4: xattr-in-inode support

Large xattr support is implemented for EXT4_FEATURE_INCOMPAT_EA_INODE.

If the size of an xattr value is larger than will fit in a single
external block, then the xattr value will be saved into the body
of an external xattr inode.

The also helps support a larger number of xattr, since only the headers
will be stored in the in-inode space or the single external block.

The inode is referenced from the xattr header via "e_value_inum",
which was formerly "e_value_block", but that field was never used.
The e_value_size still contains the xattr size so that listing
xattrs does not need to look up the inode if the data is not accessed.

struct ext4_xattr_entry {
        __u8    e_name_len;     /* length of name */
        __u8    e_name_index;   /* attribute name index */
        __le16  e_value_offs;   /* offset in disk block of value */
        __le32  e_value_inum;   /* inode in which value is stored */
        __le32  e_value_size;   /* size of attribute value */
        __le32  e_hash;         /* hash value of name and value */
        char    e_name[0];      /* attribute name */
};

The xattr inode is marked with the EXT4_EA_INODE_FL flag and also
holds a back-reference to the owning inode in its i_mtime field,
allowing the ext4/e2fsck to verify the correct inode is accessed.

[ Applied fix by Dan Carpenter to avoid freeing an ERR_PTR. ]

Lustre-Jira: https://jira.hpdd.intel.com/browse/LU-80
Lustre-bugzilla: https://bugzilla.lustre.org/show_bug.cgi?id=4424Signed-off-by: default avatarKalpak Shah <kalpak.shah@sun.com>
Signed-off-by: default avatarJames Simmons <uja.ornl@gmail.com>
Signed-off-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Signed-off-by: default avatarTahsin Erdogan <tahsin@google.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Signed-off-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
parent e08ac99f
...@@ -1797,6 +1797,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT) ...@@ -1797,6 +1797,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_EXTENTS| \ EXT4_FEATURE_INCOMPAT_EXTENTS| \
EXT4_FEATURE_INCOMPAT_64BIT| \ EXT4_FEATURE_INCOMPAT_64BIT| \
EXT4_FEATURE_INCOMPAT_FLEX_BG| \ EXT4_FEATURE_INCOMPAT_FLEX_BG| \
EXT4_FEATURE_INCOMPAT_EA_INODE| \
EXT4_FEATURE_INCOMPAT_MMP | \ EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \ EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \ EXT4_FEATURE_INCOMPAT_ENCRYPT | \
...@@ -2230,6 +2231,12 @@ struct mmpd_data { ...@@ -2230,6 +2231,12 @@ struct mmpd_data {
*/ */
#define EXT4_MMP_MAX_CHECK_INTERVAL 300UL #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL
/*
* Maximum size of xattr attributes for FEATURE_INCOMPAT_EA_INODE 1Mb
* This limit is arbitrary, but is reasonable for the xattr API.
*/
#define EXT4_XATTR_MAX_LARGE_EA_SIZE (1024 * 1024)
/* /*
* Function prototypes * Function prototypes
*/ */
...@@ -2242,6 +2249,10 @@ struct mmpd_data { ...@@ -2242,6 +2249,10 @@ struct mmpd_data {
# define ATTRIB_NORET __attribute__((noreturn)) # define ATTRIB_NORET __attribute__((noreturn))
# define NORET_AND noreturn, # define NORET_AND noreturn,
struct ext4_xattr_ino_array {
unsigned int xia_count; /* # of used item in the array */
unsigned int xia_inodes[0];
};
/* bitmap.c */ /* bitmap.c */
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
...@@ -2489,6 +2500,7 @@ extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); ...@@ -2489,6 +2500,7 @@ extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *); extern void ext4_set_inode_flags(struct inode *);
extern int ext4_alloc_da_blocks(struct inode *inode); extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode); extern void ext4_set_aops(struct inode *inode);
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk);
extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
......
...@@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ...@@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
* as writing the quota to disk may need the lock as well. * as writing the quota to disk may need the lock as well.
*/ */
dquot_initialize(inode); dquot_initialize(inode);
ext4_xattr_delete_inode(handle, inode);
dquot_free_inode(inode); dquot_free_inode(inode);
dquot_drop(inode); dquot_drop(inode);
......
...@@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode, ...@@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
/* Compute min_offs. */ /* Compute min_offs. */
for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
if (!entry->e_value_block && entry->e_value_size) { if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs); size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs) if (offs < min_offs)
min_offs = offs; min_offs = offs;
......
...@@ -139,8 +139,6 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset, ...@@ -139,8 +139,6 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset,
unsigned int length); unsigned int length);
static int __ext4_journalled_writepage(struct page *page, unsigned int len); static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents);
/* /*
* Test whether an inode is a fast symlink. * Test whether an inode is a fast symlink.
...@@ -189,6 +187,8 @@ void ext4_evict_inode(struct inode *inode) ...@@ -189,6 +187,8 @@ void ext4_evict_inode(struct inode *inode)
{ {
handle_t *handle; handle_t *handle;
int err; int err;
int extra_credits = 3;
struct ext4_xattr_ino_array *lea_ino_array = NULL;
trace_ext4_evict_inode(inode); trace_ext4_evict_inode(inode);
...@@ -238,8 +238,8 @@ void ext4_evict_inode(struct inode *inode) ...@@ -238,8 +238,8 @@ void ext4_evict_inode(struct inode *inode)
* protection against it * protection against it
*/ */
sb_start_intwrite(inode->i_sb); sb_start_intwrite(inode->i_sb);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
ext4_blocks_for_truncate(inode)+3); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle)); ext4_std_error(inode->i_sb, PTR_ERR(handle));
/* /*
...@@ -251,9 +251,36 @@ void ext4_evict_inode(struct inode *inode) ...@@ -251,9 +251,36 @@ void ext4_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb); sb_end_intwrite(inode->i_sb);
goto no_delete; goto no_delete;
} }
if (IS_SYNC(inode)) if (IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
/*
* Delete xattr inode before deleting the main inode.
*/
err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array);
if (err) {
ext4_warning(inode->i_sb,
"couldn't delete inode's xattr (err %d)", err);
goto stop_handle;
}
if (!IS_NOQUOTA(inode))
extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
if (!ext4_handle_has_enough_credits(handle,
ext4_blocks_for_truncate(inode) + extra_credits)) {
err = ext4_journal_extend(handle,
ext4_blocks_for_truncate(inode) + extra_credits);
if (err > 0)
err = ext4_journal_restart(handle,
ext4_blocks_for_truncate(inode) + extra_credits);
if (err != 0) {
ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err);
goto stop_handle;
}
}
inode->i_size = 0; inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode); err = ext4_mark_inode_dirty(handle, inode);
if (err) { if (err) {
...@@ -277,10 +304,10 @@ void ext4_evict_inode(struct inode *inode) ...@@ -277,10 +304,10 @@ void ext4_evict_inode(struct inode *inode)
* enough credits left in the handle to remove the inode from * enough credits left in the handle to remove the inode from
* the orphan list and set the dtime field. * the orphan list and set the dtime field.
*/ */
if (!ext4_handle_has_enough_credits(handle, 3)) { if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
err = ext4_journal_extend(handle, 3); err = ext4_journal_extend(handle, extra_credits);
if (err > 0) if (err > 0)
err = ext4_journal_restart(handle, 3); err = ext4_journal_restart(handle, extra_credits);
if (err != 0) { if (err != 0) {
ext4_warning(inode->i_sb, ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err); "couldn't extend journal (err %d)", err);
...@@ -315,8 +342,12 @@ void ext4_evict_inode(struct inode *inode) ...@@ -315,8 +342,12 @@ void ext4_evict_inode(struct inode *inode)
ext4_clear_inode(inode); ext4_clear_inode(inode);
else else
ext4_free_inode(handle, inode); ext4_free_inode(handle, inode);
ext4_journal_stop(handle); ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb); sb_end_intwrite(inode->i_sb);
if (lea_ino_array != NULL)
ext4_xattr_inode_array_free(inode, lea_ino_array);
return; return;
no_delete: no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
...@@ -5504,7 +5535,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks, ...@@ -5504,7 +5535,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
* *
* Also account for superblock, inode, quota and xattr blocks * Also account for superblock, inode, quota and xattr blocks
*/ */
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents) int pextents)
{ {
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
......
This diff is collapsed.
...@@ -44,7 +44,7 @@ struct ext4_xattr_entry { ...@@ -44,7 +44,7 @@ struct ext4_xattr_entry {
__u8 e_name_len; /* length of name */ __u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */ __u8 e_name_index; /* attribute name index */
__le16 e_value_offs; /* offset in disk block of value */ __le16 e_value_offs; /* offset in disk block of value */
__le32 e_value_block; /* disk block attribute is stored on (n/i) */ __le32 e_value_inum; /* inode in which the value is stored */
__le32 e_value_size; /* size of attribute value */ __le32 e_value_size; /* size of attribute value */
__le32 e_hash; /* hash value of name and value */ __le32 e_hash; /* hash value of name and value */
char e_name[0]; /* attribute name */ char e_name[0]; /* attribute name */
...@@ -69,6 +69,26 @@ struct ext4_xattr_entry { ...@@ -69,6 +69,26 @@ struct ext4_xattr_entry {
EXT4_I(inode)->i_extra_isize)) EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
/*
* Link EA inode back to parent one using i_mtime field.
* Extra integer type conversion added to ignore higher
* bits in i_mtime.tv_sec which might be set by ext4_get()
*/
#define EXT4_XATTR_INODE_SET_PARENT(inode, inum) \
do { \
(inode)->i_mtime.tv_sec = inum; \
} while(0)
#define EXT4_XATTR_INODE_GET_PARENT(inode) \
((__u32)(inode)->i_mtime.tv_sec)
/*
* The minimum size of EA value when you start storing it in an external inode
* size of block - size of header - size of 1 entry - 4 null bytes
*/
#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \
((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
#define BFIRST(bh) ENTRY(BHDR(bh)+1) #define BFIRST(bh) ENTRY(BHDR(bh)+1)
...@@ -77,10 +97,11 @@ struct ext4_xattr_entry { ...@@ -77,10 +97,11 @@ struct ext4_xattr_entry {
#define EXT4_ZERO_XATTR_VALUE ((void *)-1) #define EXT4_ZERO_XATTR_VALUE ((void *)-1)
struct ext4_xattr_info { struct ext4_xattr_info {
int name_index;
const char *name; const char *name;
const void *value; const void *value;
size_t value_len; size_t value_len;
int name_index;
int in_inode;
}; };
struct ext4_xattr_search { struct ext4_xattr_search {
...@@ -140,7 +161,13 @@ extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); ...@@ -140,7 +161,13 @@ extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext4_xattr_delete_inode(handle_t *, struct inode *); extern struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
int *err);
extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct ext4_xattr_ino_array **array);
extern void ext4_xattr_inode_array_free(struct inode *inode,
struct ext4_xattr_ino_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle); struct ext4_inode *raw_inode, handle_t *handle);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment