Commit 4ed8a6bb authored by Mark Fasheh's avatar Mark Fasheh

ocfs2: Store dir index records inline

Allow us to store a small number of directory index records in the
ocfs2_dx_root_block. This saves us a disk read on small to medium sized
directories (less than about 250 entries). The inline root is automatically
turned into a root block with extents if the directory size increases beyond
it's capacity.
Signed-off-by: default avatarMark Fasheh <mfasheh@suse.com>
Acked-by: default avatarJoel Becker <joel.becker@oracle.com>
parent 9b7895ef
...@@ -151,6 +151,7 @@ static void ocfs2_init_dir_trailer(struct inode *inode, ...@@ -151,6 +151,7 @@ static void ocfs2_init_dir_trailer(struct inode *inode,
void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
{ {
brelse(res->dl_dx_root_bh);
brelse(res->dl_leaf_bh); brelse(res->dl_leaf_bh);
brelse(res->dl_dx_leaf_bh); brelse(res->dl_dx_leaf_bh);
} }
...@@ -162,6 +163,11 @@ static int ocfs2_dir_indexed(struct inode *inode) ...@@ -162,6 +163,11 @@ static int ocfs2_dir_indexed(struct inode *inode)
return 0; return 0;
} }
static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
{
return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
}
/* /*
* Hashing code adapted from ext3 * Hashing code adapted from ext3
*/ */
...@@ -799,13 +805,18 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode, ...@@ -799,13 +805,18 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
* Returns the block index, from the start of the cluster which this * Returns the block index, from the start of the cluster which this
* hash belongs too. * hash belongs too.
*/ */
static unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb, static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
struct ocfs2_dx_hinfo *hinfo) u32 minor_hash)
{ {
u32 minor_hash = hinfo->minor_hash;
return minor_hash & osb->osb_dx_mask; return minor_hash & osb->osb_dx_mask;
} }
static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
struct ocfs2_dx_hinfo *hinfo)
{
return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash);
}
static int ocfs2_dx_dir_lookup(struct inode *inode, static int ocfs2_dx_dir_lookup(struct inode *inode,
struct ocfs2_extent_list *el, struct ocfs2_extent_list *el,
struct ocfs2_dx_hinfo *hinfo, struct ocfs2_dx_hinfo *hinfo,
...@@ -855,7 +866,7 @@ static int ocfs2_dx_dir_lookup(struct inode *inode, ...@@ -855,7 +866,7 @@ static int ocfs2_dx_dir_lookup(struct inode *inode,
static int ocfs2_dx_dir_search(const char *name, int namelen, static int ocfs2_dx_dir_search(const char *name, int namelen,
struct inode *dir, struct inode *dir,
struct ocfs2_extent_list *dr_el, struct ocfs2_dx_root_block *dx_root,
struct ocfs2_dir_lookup_result *res) struct ocfs2_dir_lookup_result *res)
{ {
int ret, i, found; int ret, i, found;
...@@ -866,9 +877,18 @@ static int ocfs2_dx_dir_search(const char *name, int namelen, ...@@ -866,9 +877,18 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
struct buffer_head *dir_ent_bh = NULL; struct buffer_head *dir_ent_bh = NULL;
struct ocfs2_dir_entry *dir_ent = NULL; struct ocfs2_dir_entry *dir_ent = NULL;
struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo; struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
struct ocfs2_extent_list *dr_el;
struct ocfs2_dx_entry_list *entry_list;
ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo); ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo);
if (ocfs2_dx_root_inline(dx_root)) {
entry_list = &dx_root->dr_entries;
goto search;
}
dr_el = &dx_root->dr_list;
ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys); ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
...@@ -893,12 +913,15 @@ static int ocfs2_dx_dir_search(const char *name, int namelen, ...@@ -893,12 +913,15 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
le16_to_cpu(dx_leaf->dl_list.de_num_used), le16_to_cpu(dx_leaf->dl_list.de_num_used),
le16_to_cpu(dx_leaf->dl_list.de_count)); le16_to_cpu(dx_leaf->dl_list.de_count));
entry_list = &dx_leaf->dl_list;
search:
/* /*
* Empty leaf is legal, so no need to check for that. * Empty leaf is legal, so no need to check for that.
*/ */
found = 0; found = 0;
for (i = 0; i < le16_to_cpu(dx_leaf->dl_list.de_num_used); i++) { for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
dx_entry = &dx_leaf->dl_list.de_entries[i]; dx_entry = &entry_list->de_entries[i];
if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash) if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
|| hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash)) || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
...@@ -982,14 +1005,15 @@ static int ocfs2_find_entry_dx(const char *name, int namelen, ...@@ -982,14 +1005,15 @@ static int ocfs2_find_entry_dx(const char *name, int namelen,
} }
dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
ret = ocfs2_dx_dir_search(name, namelen, dir, &dx_root->dr_list, ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup);
lookup);
if (ret) { if (ret) {
if (ret != -ENOENT) if (ret != -ENOENT)
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
lookup->dl_dx_root_bh = dx_root_bh;
dx_root_bh = NULL;
out: out:
brelse(di_bh); brelse(di_bh);
brelse(dx_root_bh); brelse(dx_root_bh);
...@@ -1126,64 +1150,88 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, ...@@ -1126,64 +1150,88 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
return status; return status;
} }
static void ocfs2_dx_leaf_remove_entry(struct ocfs2_dx_leaf *dx_leaf, int index) static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
int index)
{ {
struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; int num_used = le16_to_cpu(entry_list->de_num_used);
int num_used = le16_to_cpu(dl_list->de_num_used);
if (num_used == 1 || index == (num_used - 1)) if (num_used == 1 || index == (num_used - 1))
goto clear; goto clear;
memmove(&dl_list->de_entries[index], &dl_list->de_entries[index + 1], memmove(&entry_list->de_entries[index],
&entry_list->de_entries[index + 1],
(num_used - index - 1)*sizeof(struct ocfs2_dx_entry)); (num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
clear: clear:
num_used--; num_used--;
memset(&dl_list->de_entries[num_used], 0, memset(&entry_list->de_entries[num_used], 0,
sizeof(struct ocfs2_dx_entry)); sizeof(struct ocfs2_dx_entry));
dl_list->de_num_used = cpu_to_le16(num_used); entry_list->de_num_used = cpu_to_le16(num_used);
} }
static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
struct ocfs2_dir_lookup_result *lookup) struct ocfs2_dir_lookup_result *lookup)
{ {
int ret, index; int ret, index;
struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
struct buffer_head *leaf_bh = lookup->dl_leaf_bh; struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
struct ocfs2_dx_leaf *dx_leaf; struct ocfs2_dx_leaf *dx_leaf;
struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry; struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
struct ocfs2_dx_root_block *dx_root;
struct ocfs2_dx_entry_list *entry_list;
dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
if (ocfs2_dx_root_inline(dx_root)) {
entry_list = &dx_root->dr_entries;
} else {
dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
entry_list = &dx_leaf->dl_list;
}
dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
/* Neither of these are a disk corruption - that should have /* Neither of these are a disk corruption - that should have
* been caught by lookup, before we got here. */ * been caught by lookup, before we got here. */
BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_count) <= 0); BUG_ON(le16_to_cpu(entry_list->de_count) <= 0);
BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_num_used) <= 0); BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0);
index = (char *)dx_entry - (char *)dx_leaf->dl_list.de_entries; index = (char *)dx_entry - (char *)entry_list->de_entries;
index /= sizeof(*dx_entry); index /= sizeof(*dx_entry);
if (index >= le16_to_cpu(dx_leaf->dl_list.de_num_used)) { if (index >= le16_to_cpu(entry_list->de_num_used)) {
mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n", mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, index, dx_leaf, (unsigned long long)OCFS2_I(dir)->ip_blkno, index,
dx_entry); entry_list, dx_entry);
return -EIO; return -EIO;
} }
mlog(0, "Dir %llu: delete entry at index: %d\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, index);
/* /*
* Add the index leaf into the journal before removing the * Add the block holding our index into the journal before
* unindexed entry. If we get an error return from * removing the unindexed entry. If we get an error return
* __ocfs2_delete_entry(), then it hasn't removed the entry * from __ocfs2_delete_entry(), then it hasn't removed the
* yet. Likewise, successful return means we *must* remove the * entry yet. Likewise, successful return means we *must*
* indexed entry. * remove the indexed entry.
*
* We're also careful to journal the root tree block here if
* we're going to be adding to the start of the free list.
*/ */
ret = ocfs2_journal_access_dl(handle, dir, lookup->dl_dx_leaf_bh, if (ocfs2_dx_root_inline(dx_root)) {
OCFS2_JOURNAL_ACCESS_WRITE); ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
if (ret) { OCFS2_JOURNAL_ACCESS_WRITE);
mlog_errno(ret); if (ret) {
goto out; mlog_errno(ret);
goto out;
}
} else {
ret = ocfs2_journal_access_dl(handle, dir,
lookup->dl_dx_leaf_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
}
} }
mlog(0, "Dir %llu: delete entry at index: %d\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, index);
ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry, ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
leaf_bh, leaf_bh->b_data, leaf_bh->b_size); leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
if (ret) { if (ret) {
...@@ -1191,9 +1239,12 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, ...@@ -1191,9 +1239,12 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
goto out; goto out;
} }
ocfs2_dx_leaf_remove_entry(dx_leaf, index); ocfs2_dx_list_remove_entry(entry_list, index);
ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh); if (ocfs2_dx_root_inline(dx_root))
ocfs2_journal_dirty(handle, dx_root_bh);
else
ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);
out: out:
return ret; return ret;
...@@ -1290,13 +1341,30 @@ static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf, ...@@ -1290,13 +1341,30 @@ static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1); le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
} }
static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
struct ocfs2_dx_hinfo *hinfo,
u64 dirent_blk)
{
int i;
struct ocfs2_dx_entry *dx_entry;
i = le16_to_cpu(entry_list->de_num_used);
dx_entry = &entry_list->de_entries[i];
memset(dx_entry, 0, sizeof(*dx_entry));
dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
le16_add_cpu(&entry_list->de_num_used, 1);
}
static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle, static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
struct ocfs2_dx_hinfo *hinfo, struct ocfs2_dx_hinfo *hinfo,
u64 dirent_blk, u64 dirent_blk,
struct buffer_head *dx_leaf_bh) struct buffer_head *dx_leaf_bh)
{ {
int ret, i; int ret;
struct ocfs2_dx_entry *dx_entry;
struct ocfs2_dx_leaf *dx_leaf; struct ocfs2_dx_leaf *dx_leaf;
ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
...@@ -1307,25 +1375,48 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle, ...@@ -1307,25 +1375,48 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
} }
dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
i = le16_to_cpu(dx_leaf->dl_list.de_num_used); ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk);
dx_entry = &dx_leaf->dl_list.de_entries[i]; ocfs2_journal_dirty(handle, dx_leaf_bh);
memset(dx_entry, 0, sizeof(*dx_entry)); out:
dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash); return ret;
dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash); }
dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1); static int ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
struct ocfs2_dx_hinfo *hinfo,
u64 dirent_blk,
struct buffer_head *dx_root_bh)
{
int ret;
struct ocfs2_dx_root_block *dx_root;
ocfs2_journal_dirty(handle, dx_leaf_bh); ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
}
dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
ocfs2_journal_dirty(handle, dx_root_bh);
out: out:
return ret; return ret;
} }
static int ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle, static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
struct ocfs2_dir_lookup_result *lookup) struct ocfs2_dir_lookup_result *lookup)
{ {
struct ocfs2_dx_root_block *dx_root;
dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
if (ocfs2_dx_root_inline(dx_root))
return ocfs2_dx_inline_root_insert(dir, handle,
&lookup->dl_hinfo,
lookup->dl_leaf_bh->b_blocknr,
lookup->dl_dx_root_bh);
return __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo, return __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
lookup->dl_leaf_bh->b_blocknr, lookup->dl_leaf_bh->b_blocknr,
lookup->dl_dx_leaf_bh); lookup->dl_dx_leaf_bh);
...@@ -1409,11 +1500,12 @@ int __ocfs2_add_entry(handle_t *handle, ...@@ -1409,11 +1500,12 @@ int __ocfs2_add_entry(handle_t *handle,
else { else {
status = ocfs2_journal_access_db(handle, dir, status = ocfs2_journal_access_db(handle, dir,
insert_bh, insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE); OCFS2_JOURNAL_ACCESS_WRITE);
if (ocfs2_dir_indexed(dir)) { if (ocfs2_dir_indexed(dir)) {
status = ocfs2_dx_dir_leaf_insert(dir, status = ocfs2_dx_dir_insert(dir,
handle, handle,
lookup); lookup);
if (status) { if (status) {
mlog_errno(status); mlog_errno(status);
goto bail; goto bail;
...@@ -2019,6 +2111,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, ...@@ -2019,6 +2111,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
handle_t *handle, struct inode *dir, handle_t *handle, struct inode *dir,
struct buffer_head *di_bh, struct buffer_head *di_bh,
struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *meta_ac,
int dx_inline,
struct buffer_head **ret_dx_root_bh) struct buffer_head **ret_dx_root_bh)
{ {
int ret; int ret;
...@@ -2062,8 +2155,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, ...@@ -2062,8 +2155,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
dx_root->dr_blkno = cpu_to_le64(dr_blkno); dx_root->dr_blkno = cpu_to_le64(dr_blkno);
dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno); dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
dx_root->dr_list.l_count =
cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); if (dx_inline) {
dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
dx_root->dr_entries.de_count =
cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb));
} else {
dx_root->dr_list.l_count =
cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
}
ret = ocfs2_journal_dirty(handle, dx_root_bh); ret = ocfs2_journal_dirty(handle, dx_root_bh);
if (ret) if (ret)
...@@ -2236,20 +2336,12 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb, ...@@ -2236,20 +2336,12 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac) struct ocfs2_alloc_context *meta_ac)
{ {
int ret, num_dx_leaves, i; int ret;
struct buffer_head *leaf_bh = NULL; struct buffer_head *leaf_bh = NULL;
struct buffer_head *dx_root_bh = NULL; struct buffer_head *dx_root_bh = NULL;
struct buffer_head **dx_leaves = NULL;
struct ocfs2_extent_tree et;
struct ocfs2_dx_hinfo hinfo; struct ocfs2_dx_hinfo hinfo;
u64 insert_blkno; struct ocfs2_dx_root_block *dx_root;
struct ocfs2_dx_entry_list *entry_list;
dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
if (!dx_leaves) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
/* /*
* Our strategy is to create the directory as though it were * Our strategy is to create the directory as though it were
...@@ -2258,7 +2350,8 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb, ...@@ -2258,7 +2350,8 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
* very well known quantity. * very well known quantity.
* *
* Essentially, we have two dirents ("." and ".."), in the 1st * Essentially, we have two dirents ("." and ".."), in the 1st
* block which need indexing. * block which need indexing. These are easily inserted into
* the index block.
*/ */
ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh, ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh,
...@@ -2268,61 +2361,22 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb, ...@@ -2268,61 +2361,22 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
goto out; goto out;
} }
/* ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh,
* Allocate and format the index leaf first, before attaching meta_ac, 1, &dx_root_bh);
* the index root. That way we're sure that the main bitmap
* won't -enospc on us with a half-created dir index.
*
* The meta data allocation for our index block will not
* -enospc on us unless there is a disk corruption.
*/
ret = __ocfs2_dx_dir_new_cluster(inode, 0, handle, data_ac, dx_leaves,
num_dx_leaves, &insert_blkno);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
entry_list = &dx_root->dr_entries;
ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo); /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */
i = ocfs2_dx_dir_hash_idx(osb, &hinfo); ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
ret = __ocfs2_dx_dir_leaf_insert(inode, handle, &hinfo,
leaf_bh->b_blocknr, dx_leaves[i]);
if (ret) {
mlog_errno(ret);
goto out;
}
ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo); ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo);
i = ocfs2_dx_dir_hash_idx(osb, &hinfo); ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
ret = __ocfs2_dx_dir_leaf_insert(inode, handle, &hinfo,
leaf_bh->b_blocknr, dx_leaves[i]);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, meta_ac,
&dx_root_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
/* This should never fail considering we start with an empty
* dx_root. */
ocfs2_init_dx_root_extent_tree(&et, inode, dx_root_bh);
ret = ocfs2_insert_extent(osb, handle, inode, &et, 0,
insert_blkno, 1, 0, NULL);
if (ret)
mlog_errno(ret);
out: out:
if (dx_leaves) {
for (i = 0; i < num_dx_leaves; i++)
brelse(dx_leaves[i]);
kfree(dx_leaves);
}
brelse(dx_root_bh); brelse(dx_root_bh);
brelse(leaf_bh); brelse(leaf_bh);
return ret; return ret;
...@@ -2392,6 +2446,74 @@ static int ocfs2_dx_dir_index_block(struct inode *dir, ...@@ -2392,6 +2446,74 @@ static int ocfs2_dx_dir_index_block(struct inode *dir,
out: out:
return ret; return ret;
} }
/*
* XXX: This expects dx_root_bh to already be part of the transaction.
*/
static void ocfs2_dx_dir_index_root_block(struct inode *dir,
struct buffer_head *dx_root_bh,
struct buffer_head *dirent_bh)
{
char *de_buf, *limit;
struct ocfs2_dx_root_block *dx_root;
struct ocfs2_dir_entry *de;
struct ocfs2_dx_hinfo hinfo;
u64 dirent_blk = dirent_bh->b_blocknr;
dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
de_buf = dirent_bh->b_data;
limit = de_buf + dir->i_sb->s_blocksize;
while (de_buf < limit) {
de = (struct ocfs2_dir_entry *)de_buf;
if (!de->name_len || !de->inode)
goto inc;
ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);
mlog(0,
"dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n",
(unsigned long long)dir->i_ino, hinfo.major_hash,
hinfo.minor_hash,
le16_to_cpu(dx_root->dr_entries.de_num_used),
de->name_len, de->name);
ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
dirent_blk);
inc:
de_buf += le16_to_cpu(de->rec_len);
}
}
/*
* Count the number of inline directory entries in di_bh and compare
* them against the number of entries we can hold in an inline dx root
* block.
*/
static int ocfs2_new_dx_should_be_inline(struct inode *dir,
struct buffer_head *di_bh)
{
int dirent_count = 0;
char *de_buf, *limit;
struct ocfs2_dir_entry *de;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
de_buf = di->id2.i_data.id_data;
limit = de_buf + i_size_read(dir);
while (de_buf < limit) {
de = (struct ocfs2_dir_entry *)de_buf;
if (de->name_len && de->inode)
dirent_count++;
de_buf += le16_to_cpu(de->rec_len);
}
/* We are careful to leave room for one extra record. */
return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb);
}
/* /*
* Expand rec_len of the rightmost dirent in a directory block so that it * Expand rec_len of the rightmost dirent in a directory block so that it
...@@ -2442,7 +2564,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ...@@ -2442,7 +2564,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
{ {
u32 alloc, dx_alloc, bit_off, len; u32 alloc, dx_alloc, bit_off, len;
struct super_block *sb = dir->i_sb; struct super_block *sb = dir->i_sb;
int ret, i, num_dx_leaves = 0, int ret, i, num_dx_leaves = 0, dx_inline = 0,
credits = ocfs2_inline_to_extents_credits(sb); credits = ocfs2_inline_to_extents_credits(sb);
u64 dx_insert_blkno, blkno, u64 dx_insert_blkno, blkno,
bytes = blocks_wanted << sb->s_blocksize_bits; bytes = blocks_wanted << sb->s_blocksize_bits;
...@@ -2465,15 +2587,19 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ...@@ -2465,15 +2587,19 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
dx_alloc = 0; dx_alloc = 0;
if (ocfs2_supports_indexed_dirs(osb)) { if (ocfs2_supports_indexed_dirs(osb)) {
/* Add one more cluster for an index leaf */
dx_alloc++;
credits += ocfs2_add_dir_index_credits(sb); credits += ocfs2_add_dir_index_credits(sb);
dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb, &num_dx_leaves); dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh);
if (!dx_leaves) { if (!dx_inline) {
ret = -ENOMEM; /* Add one more cluster for an index leaf */
mlog_errno(ret); dx_alloc++;
goto out; dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb,
&num_dx_leaves);
if (!dx_leaves) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
} }
/* This gets us the dx_root */ /* This gets us the dx_root */
...@@ -2524,7 +2650,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ...@@ -2524,7 +2650,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
} }
did_quota = 1; did_quota = 1;
if (ocfs2_supports_indexed_dirs(osb)) { if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
/* /*
* Allocate our index cluster first, to maximize the * Allocate our index cluster first, to maximize the
* possibility that unindexed leaves grow * possibility that unindexed leaves grow
...@@ -2587,7 +2713,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ...@@ -2587,7 +2713,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
goto out_commit; goto out_commit;
} }
if (ocfs2_supports_indexed_dirs(osb)) { if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
/*
* Dx dirs with an external cluster need to do this up
* front. Inline dx root's get handled later, after
* we've allocated our root block.
*/
ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves, ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
num_dx_leaves, dirdata_bh); num_dx_leaves, dirdata_bh);
if (ret) { if (ret) {
...@@ -2650,17 +2781,23 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ...@@ -2650,17 +2781,23 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
if (ocfs2_supports_indexed_dirs(osb)) { if (ocfs2_supports_indexed_dirs(osb)) {
ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
meta_ac, &dx_root_bh); meta_ac, dx_inline,
&dx_root_bh);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out_commit; goto out_commit;
} }
ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh); if (dx_inline) {
ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0, ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
dx_insert_blkno, 1, 0, NULL); dirdata_bh);
if (ret) } else {
mlog_errno(ret); ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0,
dx_insert_blkno, 1, 0, NULL);
if (ret)
mlog_errno(ret);
}
} }
/* /*
...@@ -2690,14 +2827,18 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ...@@ -2690,14 +2827,18 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
if (ocfs2_supports_indexed_dirs(osb)) { if (ocfs2_supports_indexed_dirs(osb)) {
unsigned int off; unsigned int off;
/* if (!dx_inline) {
* We need to return the correct block within the /*
* cluster which should hold our entry. * We need to return the correct block within the
*/ * cluster which should hold our entry.
off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), */
&lookup->dl_hinfo); off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb),
get_bh(dx_leaves[off]); &lookup->dl_hinfo);
lookup->dl_dx_leaf_bh = dx_leaves[off]; get_bh(dx_leaves[off]);
lookup->dl_dx_leaf_bh = dx_leaves[off];
}
lookup->dl_dx_root_bh = dx_root_bh;
dx_root_bh = NULL;
} }
out_commit: out_commit:
...@@ -3506,6 +3647,138 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, ...@@ -3506,6 +3647,138 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
return ret; return ret;
} }
static int ocfs2_expand_inline_dx_root(struct inode *dir,
struct buffer_head *dx_root_bh)
{
int ret, num_dx_leaves, i, j, did_quota = 0;
struct buffer_head **dx_leaves = NULL;
struct ocfs2_extent_tree et;
u64 insert_blkno;
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
handle_t *handle = NULL;
struct ocfs2_dx_root_block *dx_root;
struct ocfs2_dx_entry_list *entry_list;
struct ocfs2_dx_entry *dx_entry;
struct ocfs2_dx_leaf *target_leaf;
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
goto out;
}
dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
if (!dx_leaves) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out;
}
if (vfs_dq_alloc_space_nodirty(dir,
ocfs2_clusters_to_bytes(osb->sb, 1))) {
ret = -EDQUOT;
goto out_commit;
}
did_quota = 1;
/*
* We do this up front, before the allocation, so that a
* failure to add the dx_root_bh to the journal won't result
* us losing clusters.
*/
ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves,
num_dx_leaves, &insert_blkno);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
/*
* Transfer the entries from our dx_root into the appropriate
* block
*/
dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
entry_list = &dx_root->dr_entries;
for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
dx_entry = &entry_list->de_entries[i];
j = __ocfs2_dx_dir_hash_idx(osb,
le32_to_cpu(dx_entry->dx_minor_hash));
target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data;
ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry);
/* Each leaf has been passed to the journal already
* via __ocfs2_dx_dir_new_cluster() */
}
dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
offsetof(struct ocfs2_dx_root_block, dr_list));
dx_root->dr_list.l_count =
cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
/* This should never fail considering we start with an empty
* dx_root. */
ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
ret = ocfs2_insert_extent(osb, handle, dir, &et, 0,
insert_blkno, 1, 0, NULL);
if (ret)
mlog_errno(ret);
did_quota = 0;
ocfs2_journal_dirty(handle, dx_root_bh);
out_commit:
if (ret < 0 && did_quota)
vfs_dq_free_space_nodirty(dir,
ocfs2_clusters_to_bytes(dir->i_sb, 1));
ocfs2_commit_trans(osb, handle);
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
if (dx_leaves) {
for (i = 0; i < num_dx_leaves; i++)
brelse(dx_leaves[i]);
kfree(dx_leaves);
}
return ret;
}
static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
{
struct ocfs2_dx_root_block *dx_root;
struct ocfs2_dx_entry_list *entry_list;
dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
entry_list = &dx_root->dr_entries;
if (le16_to_cpu(entry_list->de_num_used) >=
le16_to_cpu(entry_list->de_count))
return -ENOSPC;
return 0;
}
static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
struct buffer_head *di_bh, const char *name, struct buffer_head *di_bh, const char *name,
int namelen, int namelen,
...@@ -3527,6 +3800,23 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, ...@@ -3527,6 +3800,23 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
} }
dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
if (ocfs2_dx_root_inline(dx_root)) {
ret = ocfs2_inline_dx_has_space(dx_root_bh);
if (ret == 0)
goto search_el;
/*
* We ran out of room in the root block. Expand it to
* an extent, then allow ocfs2_find_dir_space_dx to do
* the rest.
*/
ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
}
restart_search: restart_search:
ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo, ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo,
...@@ -3578,8 +3868,11 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, ...@@ -3578,8 +3868,11 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
goto restart_search; goto restart_search;
} }
search_el:
lookup->dl_dx_leaf_bh = dx_leaf_bh; lookup->dl_dx_leaf_bh = dx_leaf_bh;
dx_leaf_bh = NULL; dx_leaf_bh = NULL;
lookup->dl_dx_root_bh = dx_root_bh;
dx_root_bh = NULL;
out: out:
brelse(dx_leaf_bh); brelse(dx_leaf_bh);
...@@ -3774,10 +4067,12 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) ...@@ -3774,10 +4067,12 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); if (ocfs2_dx_root_inline(dx_root))
goto remove_index;
dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
/* XXX: What if dr_clusters is too large? */ /* XXX: What if dr_clusters is too large? */
while (le32_to_cpu(dx_root->dr_clusters)) { while (le32_to_cpu(dx_root->dr_clusters)) {
...@@ -3803,6 +4098,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) ...@@ -3803,6 +4098,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
major_hash = cpos - 1; major_hash = cpos - 1;
} }
remove_index:
ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh); ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
......
...@@ -37,6 +37,8 @@ struct ocfs2_dir_lookup_result { ...@@ -37,6 +37,8 @@ struct ocfs2_dir_lookup_result {
struct ocfs2_dir_entry *dl_entry; /* Target dirent in struct ocfs2_dir_entry *dl_entry; /* Target dirent in
* unindexed leaf */ * unindexed leaf */
struct buffer_head *dl_dx_root_bh; /* Root of indexed
* tree */
struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */ struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */
struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in
* indexed leaf */ * indexed leaf */
......
...@@ -458,6 +458,16 @@ static inline int ocfs2_rename_credits(struct super_block *sb) ...@@ -458,6 +458,16 @@ static inline int ocfs2_rename_credits(struct super_block *sb)
#define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \ #define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
OCFS2_SUBALLOC_FREE) OCFS2_SUBALLOC_FREE)
static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
{
int credits = 1 + OCFS2_SUBALLOC_ALLOC;
credits += ocfs2_clusters_to_blocks(sb, 1);
credits += ocfs2_quota_trans_credits(sb);
return credits;
}
/* /*
* Please note that the caller must make sure that root_el is the root * Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
......
...@@ -321,10 +321,8 @@ static int ocfs2_mknod(struct inode *dir, ...@@ -321,10 +321,8 @@ static int ocfs2_mknod(struct inode *dir,
want_clusters += 1; want_clusters += 1;
/* Dir indexing requires extra space as well */ /* Dir indexing requires extra space as well */
if (ocfs2_supports_indexed_dirs(osb)) { if (ocfs2_supports_indexed_dirs(osb))
want_clusters++;
want_meta++; want_meta++;
}
} }
status = ocfs2_reserve_new_metadata_blocks(osb, want_meta, &meta_ac); status = ocfs2_reserve_new_metadata_blocks(osb, want_meta, &meta_ac);
......
...@@ -815,6 +815,8 @@ struct ocfs2_dx_entry_list { ...@@ -815,6 +815,8 @@ struct ocfs2_dx_entry_list {
* length de_num_used */ * length de_num_used */
}; };
#define OCFS2_DX_FLAG_INLINE 0x01
/* /*
* A directory indexing block. Each indexed directory has one of these, * A directory indexing block. Each indexed directory has one of these,
* pointed to by ocfs2_dinode. * pointed to by ocfs2_dinode.
...@@ -835,13 +837,21 @@ struct ocfs2_dx_root_block { ...@@ -835,13 +837,21 @@ struct ocfs2_dx_root_block {
* extent block */ * extent block */
__le32 dr_clusters; /* Clusters allocated __le32 dr_clusters; /* Clusters allocated
* to the indexed tree. */ * to the indexed tree. */
__le32 dr_reserved1; __u8 dr_flags; /* OCFS2_DX_FLAG_* flags */
__u8 dr_reserved0;
__le16 dr_reserved1;
__le64 dr_dir_blkno; /* Pointer to parent inode */ __le64 dr_dir_blkno; /* Pointer to parent inode */
__le64 dr_reserved2; __le64 dr_reserved2;
__le64 dr_reserved3[16]; __le64 dr_reserved3[16];
struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 union {
* bits for maximum space struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
* efficiency. */ * bits for maximum space
* efficiency. */
struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of
* entries. We grow out
* to extents if this
* gets too big. */
};
}; };
/* /*
...@@ -1228,6 +1238,16 @@ static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb) ...@@ -1228,6 +1238,16 @@ static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
return size / sizeof(struct ocfs2_dx_entry); return size / sizeof(struct ocfs2_dx_entry);
} }
static inline int ocfs2_dx_entries_per_root(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);
return size / sizeof(struct ocfs2_dx_entry);
}
static inline u16 ocfs2_local_alloc_size(struct super_block *sb) static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
{ {
u16 size; u16 size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment