Commit a6e3d7db authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (23 commits)
  ocfs2: Optionally return filldir errors
  ocfs2: Write support for directories with inline data
  ocfs2: Read support for directories with inline data
  ocfs2: Write support for inline data
  ocfs2: Read support for inline data
  ocfs2: Structure updates for inline data
  ocfs2: Cleanup dirent size check
  ocfs2: Rename cleanups
  ocfs2: Provide convenience function for ino lookup
  ocfs2: Implement ocfs2_empty_dir() as a caller of ocfs2_dir_foreach()
  ocfs2: Remove open coded readdir()
  ocfs2: Pass raw u64 to filldir
  ocfs2: Abstract out core dir listing functionality
  ocfs2: Move directory manipulation code into dir.c
  ocfs2: Small refactor of truncate zeroing code
  ocfs2: move nonsparse hole-filling into ocfs2_write_begin()
  ocfs2: Sync ocfs2_fs.h with ocfs2-tools
  [PATCH] fs/ocfs2/: removed unneeded initial value and function's return value
  ocfs2: Implement show_options()
  ocfs2: Clear slot map when umounting a local volume
  ...
parents 42f04b6d e7b34019
...@@ -354,7 +354,6 @@ struct ocfs2_insert_type { ...@@ -354,7 +354,6 @@ struct ocfs2_insert_type {
enum ocfs2_append_type ins_appending; enum ocfs2_append_type ins_appending;
enum ocfs2_contig_type ins_contig; enum ocfs2_contig_type ins_contig;
int ins_contig_index; int ins_contig_index;
int ins_free_records;
int ins_tree_depth; int ins_tree_depth;
}; };
...@@ -362,7 +361,6 @@ struct ocfs2_merge_ctxt { ...@@ -362,7 +361,6 @@ struct ocfs2_merge_ctxt {
enum ocfs2_contig_type c_contig_type; enum ocfs2_contig_type c_contig_type;
int c_has_empty_extent; int c_has_empty_extent;
int c_split_covers_rec; int c_split_covers_rec;
int c_used_tail_recs;
}; };
/* /*
...@@ -2808,20 +2806,13 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, ...@@ -2808,20 +2806,13 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
struct ocfs2_merge_ctxt *ctxt) struct ocfs2_merge_ctxt *ctxt)
{ {
int ret = 0, delete_tail_recs = 0; int ret = 0;
struct ocfs2_extent_list *el = path_leaf_el(left_path); struct ocfs2_extent_list *el = path_leaf_el(left_path);
struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
BUG_ON(ctxt->c_contig_type == CONTIG_NONE); BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
if (ctxt->c_split_covers_rec) { if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
delete_tail_recs++;
if (ctxt->c_contig_type == CONTIG_LEFTRIGHT ||
ctxt->c_has_empty_extent)
delete_tail_recs++;
if (ctxt->c_has_empty_extent) {
/* /*
* The merge code will need to create an empty * The merge code will need to create an empty
* extent to take the place of the newly * extent to take the place of the newly
...@@ -2838,7 +2829,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, ...@@ -2838,7 +2829,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
split_index--; split_index--;
rec = &el->l_recs[split_index]; rec = &el->l_recs[split_index];
} }
}
if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) { if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
/* /*
...@@ -3593,6 +3583,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, ...@@ -3593,6 +3583,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
struct buffer_head *di_bh, struct buffer_head *di_bh,
struct buffer_head **last_eb_bh, struct buffer_head **last_eb_bh,
struct ocfs2_extent_rec *insert_rec, struct ocfs2_extent_rec *insert_rec,
int *free_records,
struct ocfs2_insert_type *insert) struct ocfs2_insert_type *insert)
{ {
int ret; int ret;
...@@ -3633,7 +3624,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, ...@@ -3633,7 +3624,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* XXX: This test is simplistic, we can search for empty * XXX: This test is simplistic, we can search for empty
* extent records too. * extent records too.
*/ */
insert->ins_free_records = le16_to_cpu(el->l_count) - *free_records = le16_to_cpu(el->l_count) -
le16_to_cpu(el->l_next_free_rec); le16_to_cpu(el->l_next_free_rec);
if (!insert->ins_tree_depth) { if (!insert->ins_tree_depth) {
...@@ -3730,10 +3721,13 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, ...@@ -3730,10 +3721,13 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
struct ocfs2_alloc_context *meta_ac) struct ocfs2_alloc_context *meta_ac)
{ {
int status; int status;
int uninitialized_var(free_records);
struct buffer_head *last_eb_bh = NULL; struct buffer_head *last_eb_bh = NULL;
struct ocfs2_insert_type insert = {0, }; struct ocfs2_insert_type insert = {0, };
struct ocfs2_extent_rec rec; struct ocfs2_extent_rec rec;
BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
mlog(0, "add %u clusters at position %u to inode %llu\n", mlog(0, "add %u clusters at position %u to inode %llu\n",
new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
...@@ -3752,7 +3746,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, ...@@ -3752,7 +3746,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
rec.e_flags = flags; rec.e_flags = flags;
status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
&insert); &free_records, &insert);
if (status < 0) { if (status < 0) {
mlog_errno(status); mlog_errno(status);
goto bail; goto bail;
...@@ -3762,9 +3756,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, ...@@ -3762,9 +3756,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
"Insert.contig_index: %d, Insert.free_records: %d, " "Insert.contig_index: %d, Insert.free_records: %d, "
"Insert.tree_depth: %d\n", "Insert.tree_depth: %d\n",
insert.ins_appending, insert.ins_contig, insert.ins_contig_index, insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
insert.ins_free_records, insert.ins_tree_depth); free_records, insert.ins_tree_depth);
if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) { if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
status = ocfs2_grow_tree(inode, handle, fe_bh, status = ocfs2_grow_tree(inode, handle, fe_bh,
&insert.ins_tree_depth, &last_eb_bh, &insert.ins_tree_depth, &last_eb_bh,
meta_ac); meta_ac);
...@@ -3847,26 +3841,17 @@ static int ocfs2_split_and_insert(struct inode *inode, ...@@ -3847,26 +3841,17 @@ static int ocfs2_split_and_insert(struct inode *inode,
if (le16_to_cpu(rightmost_el->l_next_free_rec) == if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
le16_to_cpu(rightmost_el->l_count)) { le16_to_cpu(rightmost_el->l_count)) {
int old_depth = depth;
ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
meta_ac); meta_ac);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
if (old_depth != depth) {
eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
rightmost_el = &eb->h_list;
}
} }
memset(&insert, 0, sizeof(struct ocfs2_insert_type)); memset(&insert, 0, sizeof(struct ocfs2_insert_type));
insert.ins_appending = APPEND_NONE; insert.ins_appending = APPEND_NONE;
insert.ins_contig = CONTIG_NONE; insert.ins_contig = CONTIG_NONE;
insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
- le16_to_cpu(rightmost_el->l_next_free_rec);
insert.ins_tree_depth = depth; insert.ins_tree_depth = depth;
insert_range = le32_to_cpu(split_rec.e_cpos) + insert_range = le32_to_cpu(split_rec.e_cpos) +
...@@ -4015,11 +4000,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode, ...@@ -4015,11 +4000,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
} else } else
rightmost_el = path_root_el(path); rightmost_el = path_root_el(path);
ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec);
if (ctxt.c_used_tail_recs > 0 &&
ocfs2_is_empty_extent(&rightmost_el->l_recs[0]))
ctxt.c_used_tail_recs--;
if (rec->e_cpos == split_rec->e_cpos && if (rec->e_cpos == split_rec->e_cpos &&
rec->e_leaf_clusters == split_rec->e_leaf_clusters) rec->e_leaf_clusters == split_rec->e_leaf_clusters)
ctxt.c_split_covers_rec = 1; ctxt.c_split_covers_rec = 1;
...@@ -4028,10 +4008,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode, ...@@ -4028,10 +4008,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
mlog(0, "index: %d, contig: %u, used_tail_recs: %u, " mlog(0, "index: %d, contig: %u, has_empty: %u, split_covers: %u\n",
"has_empty: %u, split_covers: %u\n", split_index, split_index, ctxt.c_contig_type, ctxt.c_has_empty_extent,
ctxt.c_contig_type, ctxt.c_used_tail_recs, ctxt.c_split_covers_rec);
ctxt.c_has_empty_extent, ctxt.c_split_covers_rec);
if (ctxt.c_contig_type == CONTIG_NONE) { if (ctxt.c_contig_type == CONTIG_NONE) {
if (ctxt.c_split_covers_rec) if (ctxt.c_split_covers_rec)
...@@ -4180,27 +4159,18 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, ...@@ -4180,27 +4159,18 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
if (le16_to_cpu(rightmost_el->l_next_free_rec) == if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
le16_to_cpu(rightmost_el->l_count)) { le16_to_cpu(rightmost_el->l_count)) {
int old_depth = depth;
ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh,
meta_ac); meta_ac);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
if (old_depth != depth) {
eb = (struct ocfs2_extent_block *)last_eb_bh->b_data;
rightmost_el = &eb->h_list;
}
} }
memset(&insert, 0, sizeof(struct ocfs2_insert_type)); memset(&insert, 0, sizeof(struct ocfs2_insert_type));
insert.ins_appending = APPEND_NONE; insert.ins_appending = APPEND_NONE;
insert.ins_contig = CONTIG_NONE; insert.ins_contig = CONTIG_NONE;
insert.ins_split = SPLIT_RIGHT; insert.ins_split = SPLIT_RIGHT;
insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
- le16_to_cpu(rightmost_el->l_next_free_rec);
insert.ins_tree_depth = depth; insert.ins_tree_depth = depth;
ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert);
...@@ -5665,39 +5635,18 @@ static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) ...@@ -5665,39 +5635,18 @@ static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
return ocfs2_journal_dirty_data(handle, bh); return ocfs2_journal_dirty_data(handle, bh);
} }
static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start, static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
loff_t end, struct page **pages, unsigned int from, unsigned int to,
int numpages, u64 phys, handle_t *handle) struct page *page, int zero, u64 *phys)
{ {
int i, ret, partial = 0; int ret, partial = 0;
void *kaddr;
struct page *page;
unsigned int from, to = PAGE_CACHE_SIZE;
struct super_block *sb = inode->i_sb;
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
if (numpages == 0)
goto out;
to = PAGE_CACHE_SIZE;
for(i = 0; i < numpages; i++) {
page = pages[i];
from = start & (PAGE_CACHE_SIZE - 1);
if ((end >> PAGE_CACHE_SHIFT) == page->index)
to = end & (PAGE_CACHE_SIZE - 1);
BUG_ON(from > PAGE_CACHE_SIZE);
BUG_ON(to > PAGE_CACHE_SIZE);
ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0); ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
if (ret) if (ret)
mlog_errno(ret); mlog_errno(ret);
kaddr = kmap_atomic(page, KM_USER0); if (zero)
memset(kaddr + from, 0, to - from); zero_user_page(page, from, to - from, KM_USER0);
kunmap_atomic(kaddr, KM_USER0);
/* /*
* Need to set the buffers we zero'd into uptodate * Need to set the buffers we zero'd into uptodate
...@@ -5723,55 +5672,58 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start, ...@@ -5723,55 +5672,58 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
SetPageUptodate(page); SetPageUptodate(page);
flush_dcache_page(page); flush_dcache_page(page);
}
static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
loff_t end, struct page **pages,
int numpages, u64 phys, handle_t *handle)
{
int i;
struct page *page;
unsigned int from, to = PAGE_CACHE_SIZE;
struct super_block *sb = inode->i_sb;
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
if (numpages == 0)
goto out;
to = PAGE_CACHE_SIZE;
for(i = 0; i < numpages; i++) {
page = pages[i];
from = start & (PAGE_CACHE_SIZE - 1);
if ((end >> PAGE_CACHE_SHIFT) == page->index)
to = end & (PAGE_CACHE_SIZE - 1);
BUG_ON(from > PAGE_CACHE_SIZE);
BUG_ON(to > PAGE_CACHE_SIZE);
ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
&phys);
start = (page->index + 1) << PAGE_CACHE_SHIFT; start = (page->index + 1) << PAGE_CACHE_SHIFT;
} }
out: out:
if (pages) { if (pages)
for (i = 0; i < numpages; i++) { ocfs2_unlock_and_free_pages(pages, numpages);
page = pages[i];
unlock_page(page);
mark_page_accessed(page);
page_cache_release(page);
}
}
} }
static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
struct page **pages, int *num, u64 *phys) struct page **pages, int *num)
{ {
int i, numpages = 0, ret = 0; int numpages, ret = 0;
unsigned int ext_flags;
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
unsigned long index; unsigned long index;
loff_t last_page_bytes; loff_t last_page_bytes;
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
BUG_ON(start > end); BUG_ON(start > end);
if (start == end)
goto out;
BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
(end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
ret = ocfs2_extent_map_get_blocks(inode, start >> sb->s_blocksize_bits, numpages = 0;
phys, NULL, &ext_flags);
if (ret) {
mlog_errno(ret);
goto out;
}
/* Tail is a hole. */
if (*phys == 0)
goto out;
/* Tail is marked as unwritten, we can count on write to zero
* in that case. */
if (ext_flags & OCFS2_EXT_UNWRITTEN)
goto out;
last_page_bytes = PAGE_ALIGN(end); last_page_bytes = PAGE_ALIGN(end);
index = start >> PAGE_CACHE_SHIFT; index = start >> PAGE_CACHE_SHIFT;
do { do {
...@@ -5788,14 +5740,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, ...@@ -5788,14 +5740,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
out: out:
if (ret != 0) { if (ret != 0) {
if (pages) { if (pages)
for (i = 0; i < numpages; i++) { ocfs2_unlock_and_free_pages(pages, numpages);
if (pages[i]) {
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
}
}
numpages = 0; numpages = 0;
} }
...@@ -5816,18 +5762,20 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, ...@@ -5816,18 +5762,20 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
u64 range_start, u64 range_end) u64 range_start, u64 range_end)
{ {
int ret, numpages; int ret = 0, numpages;
struct page **pages = NULL; struct page **pages = NULL;
u64 phys; u64 phys;
unsigned int ext_flags;
struct super_block *sb = inode->i_sb;
/* /*
* File systems which don't support sparse files zero on every * File systems which don't support sparse files zero on every
* extend. * extend.
*/ */
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) if (!ocfs2_sparse_alloc(OCFS2_SB(sb)))
return 0; return 0;
pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb), pages = kcalloc(ocfs2_pages_per_cluster(sb),
sizeof(struct page *), GFP_NOFS); sizeof(struct page *), GFP_NOFS);
if (pages == NULL) { if (pages == NULL) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -5835,16 +5783,31 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, ...@@ -5835,16 +5783,31 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
goto out; goto out;
} }
ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages, if (range_start == range_end)
&numpages, &phys); goto out;
ret = ocfs2_extent_map_get_blocks(inode,
range_start >> sb->s_blocksize_bits,
&phys, NULL, &ext_flags);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
if (numpages == 0) /*
* Tail is a hole, or is marked unwritten. In either case, we
* can count on read and write to return/push zero's.
*/
if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN)
goto out; goto out;
ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
&numpages);
if (ret) {
mlog_errno(ret);
goto out;
}
ocfs2_zero_cluster_pages(inode, range_start, range_end, pages, ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
numpages, phys, handle); numpages, phys, handle);
...@@ -5865,6 +5828,178 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, ...@@ -5865,6 +5828,178 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
return ret; return ret;
} }
static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
{
unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
}
void ocfs2_dinode_new_extent_list(struct inode *inode,
struct ocfs2_dinode *di)
{
ocfs2_zero_dinode_id2(inode, di);
di->id2.i_list.l_tree_depth = 0;
di->id2.i_list.l_next_free_rec = 0;
di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
}
void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_inline_data *idata = &di->id2.i_data;
spin_lock(&oi->ip_lock);
oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);
/*
* We clear the entire i_data structure here so that all
* fields can be properly initialized.
*/
ocfs2_zero_dinode_id2(inode, di);
idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
}
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
int ret, i, has_data, num_pages = 0;
handle_t *handle;
u64 uninitialized_var(block);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_alloc_context *data_ac = NULL;
struct page **pages = NULL;
loff_t end = osb->s_clustersize;
has_data = i_size_read(inode) ? 1 : 0;
if (has_data) {
pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
goto out;
}
}
handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out_unlock;
}
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
if (has_data) {
u32 bit_off, num;
unsigned int page_end;
u64 phys;
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
&num);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
/*
* Save two copies, one for insert, and one that can
* be changed by ocfs2_map_and_dirty_page() below.
*/
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
/*
* Non sparse file systems zero on extend, so no need
* to do that now.
*/
if (!ocfs2_sparse_alloc(osb) &&
PAGE_CACHE_SIZE < osb->s_clustersize)
end = PAGE_CACHE_SIZE;
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
/*
* This should populate the 1st page for us and mark
* it up to date.
*/
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
page_end = PAGE_CACHE_SIZE;
if (PAGE_CACHE_SIZE > osb->s_clustersize)
page_end = osb->s_clustersize;
for (i = 0; i < num_pages; i++)
ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
pages[i], i > 0, &phys);
}
spin_lock(&oi->ip_lock);
oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);
ocfs2_dinode_new_extent_list(inode, di);
ocfs2_journal_dirty(handle, di_bh);
if (has_data) {
/*
* An error at this point should be extremely rare. If
* this proves to be false, we could always re-build
* the in-inode data from our pages.
*/
ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
0, block, 1, 0, NULL);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
inode->i_blocks = ocfs2_inode_sector_count(inode);
}
out_commit:
ocfs2_commit_trans(osb, handle);
out_unlock:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
out:
if (pages) {
ocfs2_unlock_and_free_pages(pages, num_pages);
kfree(pages);
}
return ret;
}
/* /*
* It is expected, that by the time you call this function, * It is expected, that by the time you call this function,
* inode->i_size and fe->i_size have been adjusted. * inode->i_size and fe->i_size have been adjusted.
...@@ -6090,6 +6225,81 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, ...@@ -6090,6 +6225,81 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
return status; return status;
} }
/*
* 'start' is inclusive, 'end' is not.
*/
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
unsigned int start, unsigned int end, int trunc)
{
int ret;
unsigned int numbytes;
handle_t *handle;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_inline_data *idata = &di->id2.i_data;
if (end > i_size_read(inode))
end = i_size_read(inode);
BUG_ON(start >= end);
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
!ocfs2_supports_inline_data(osb)) {
ocfs2_error(inode->i_sb,
"Inline data flags for inode %llu don't agree! "
"Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
le16_to_cpu(di->i_dyn_features),
OCFS2_I(inode)->ip_dyn_features,
osb->s_feature_incompat);
ret = -EROFS;
goto out;
}
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out;
}
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
numbytes = end - start;
memset(idata->id_data + start, 0, numbytes);
/*
* No need to worry about the data page here - it's been
* truncated already and inline data doesn't need it for
* pushing zero's to disk, so we'll let readpage pick it up
* later.
*/
if (trunc) {
i_size_write(inode, start);
di->i_size = cpu_to_le64(start);
}
inode->i_blocks = ocfs2_inode_sector_count(inode);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
ocfs2_journal_dirty(handle, di_bh);
out_commit:
ocfs2_commit_trans(osb, handle);
out:
return ret;
}
static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
{ {
/* /*
......
...@@ -62,6 +62,11 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) ...@@ -62,6 +62,11 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
} }
void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di);
void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di);
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh);
int ocfs2_truncate_log_init(struct ocfs2_super *osb); int ocfs2_truncate_log_init(struct ocfs2_super *osb);
void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb); void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb);
void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
...@@ -115,6 +120,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, ...@@ -115,6 +120,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
struct inode *inode, struct inode *inode,
struct buffer_head *fe_bh, struct buffer_head *fe_bh,
struct ocfs2_truncate_context *tc); struct ocfs2_truncate_context *tc);
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
unsigned int start, unsigned int end, int trunc);
int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
u32 cpos, struct buffer_head **leaf_bh); u32 cpos, struct buffer_head **leaf_bh);
......
...@@ -206,9 +206,70 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, ...@@ -206,9 +206,70 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
return err; return err;
} }
int ocfs2_read_inline_data(struct inode *inode, struct page *page,
struct buffer_head *di_bh)
{
void *kaddr;
unsigned int size;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
return -EROFS;
}
size = i_size_read(inode);
if (size > PAGE_CACHE_SIZE ||
size > ocfs2_max_inline_data(inode->i_sb)) {
ocfs2_error(inode->i_sb,
"Inode %llu has with inline data has bad size: %u",
(unsigned long long)OCFS2_I(inode)->ip_blkno, size);
return -EROFS;
}
kaddr = kmap_atomic(page, KM_USER0);
if (size)
memcpy(kaddr, di->id2.i_data.id_data, size);
/* Clear the remaining part of the page */
memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
SetPageUptodate(page);
return 0;
}
static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
{
int ret;
struct buffer_head *di_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!PageLocked(page));
BUG_ON(!OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
OCFS2_BH_CACHED, inode);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_read_inline_data(inode, page, di_bh);
out:
unlock_page(page);
brelse(di_bh);
return ret;
}
static int ocfs2_readpage(struct file *file, struct page *page) static int ocfs2_readpage(struct file *file, struct page *page)
{ {
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
int ret, unlock = 1; int ret, unlock = 1;
...@@ -222,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) ...@@ -222,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
goto out; goto out;
} }
if (down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem) == 0) { if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
ret = AOP_TRUNCATED_PAGE; ret = AOP_TRUNCATED_PAGE;
goto out_meta_unlock; goto out_meta_unlock;
} }
...@@ -252,6 +313,9 @@ static int ocfs2_readpage(struct file *file, struct page *page) ...@@ -252,6 +313,9 @@ static int ocfs2_readpage(struct file *file, struct page *page)
goto out_alloc; goto out_alloc;
} }
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
ret = ocfs2_readpage_inline(inode, page);
else
ret = block_read_full_page(page, ocfs2_get_block); ret = block_read_full_page(page, ocfs2_get_block);
unlock = 0; unlock = 0;
...@@ -301,12 +365,8 @@ int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, ...@@ -301,12 +365,8 @@ int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
{ {
int ret; int ret;
down_read(&OCFS2_I(inode)->ip_alloc_sem);
ret = block_prepare_write(page, from, to, ocfs2_get_block); ret = block_prepare_write(page, from, to, ocfs2_get_block);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
return ret; return ret;
} }
...@@ -401,7 +461,9 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) ...@@ -401,7 +461,9 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
down_read(&OCFS2_I(inode)->ip_alloc_sem); down_read(&OCFS2_I(inode)->ip_alloc_sem);
} }
err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
NULL);
if (!INODE_JOURNAL(inode)) { if (!INODE_JOURNAL(inode)) {
up_read(&OCFS2_I(inode)->ip_alloc_sem); up_read(&OCFS2_I(inode)->ip_alloc_sem);
...@@ -415,7 +477,6 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) ...@@ -415,7 +477,6 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
goto bail; goto bail;
} }
bail: bail:
status = err ? 0 : p_blkno; status = err ? 0 : p_blkno;
...@@ -570,6 +631,13 @@ static ssize_t ocfs2_direct_IO(int rw, ...@@ -570,6 +631,13 @@ static ssize_t ocfs2_direct_IO(int rw,
mlog_entry_void(); mlog_entry_void();
/*
* Fallback to buffered I/O if we see an inode without
* extents.
*/
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
/* /*
* We get PR data locks even for O_DIRECT. This * We get PR data locks even for O_DIRECT. This
...@@ -834,18 +902,22 @@ struct ocfs2_write_ctxt { ...@@ -834,18 +902,22 @@ struct ocfs2_write_ctxt {
struct ocfs2_cached_dealloc_ctxt w_dealloc; struct ocfs2_cached_dealloc_ctxt w_dealloc;
}; };
static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
{ {
int i; int i;
for(i = 0; i < wc->w_num_pages; i++) { for(i = 0; i < num_pages; i++) {
if (wc->w_pages[i] == NULL) if (pages[i]) {
continue; unlock_page(pages[i]);
mark_page_accessed(pages[i]);
unlock_page(wc->w_pages[i]); page_cache_release(pages[i]);
mark_page_accessed(wc->w_pages[i]); }
page_cache_release(wc->w_pages[i]);
} }
}
static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
{
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
brelse(wc->w_di_bh); brelse(wc->w_di_bh);
kfree(wc); kfree(wc);
...@@ -1360,6 +1432,160 @@ static int ocfs2_populate_write_desc(struct inode *inode, ...@@ -1360,6 +1432,160 @@ static int ocfs2_populate_write_desc(struct inode *inode,
return ret; return ret;
} }
static int ocfs2_write_begin_inline(struct address_space *mapping,
struct inode *inode,
struct ocfs2_write_ctxt *wc)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct page *page;
handle_t *handle;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
page = find_or_create_page(mapping, 0, GFP_NOFS);
if (!page) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
/*
* If we don't set w_num_pages then this page won't get unlocked
* and freed on cleanup of the write context.
*/
wc->w_pages[0] = wc->w_target_page = page;
wc->w_num_pages = 1;
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out;
}
ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
ocfs2_commit_trans(osb, handle);
mlog_errno(ret);
goto out;
}
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
ocfs2_set_inode_data_inline(inode, di);
if (!PageUptodate(page)) {
ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);
if (ret) {
ocfs2_commit_trans(osb, handle);
goto out;
}
}
wc->w_handle = handle;
out:
return ret;
}
int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)
{
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
if (new_size < le16_to_cpu(di->id2.i_data.id_count))
return 1;
return 0;
}
static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
struct inode *inode, loff_t pos,
unsigned len, struct page *mmap_page,
struct ocfs2_write_ctxt *wc)
{
int ret, written = 0;
loff_t end = pos + len;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
(unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
oi->ip_dyn_features);
/*
* Handle inodes which already have inline data 1st.
*/
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
if (mmap_page == NULL &&
ocfs2_size_fits_inline_data(wc->w_di_bh, end))
goto do_inline_write;
/*
* The write won't fit - we have to give this inode an
* inline extent list now.
*/
ret = ocfs2_convert_inline_data_to_extents(inode, wc->w_di_bh);
if (ret)
mlog_errno(ret);
goto out;
}
/*
* Check whether the inode can accept inline data.
*/
if (oi->ip_clusters != 0 || i_size_read(inode) != 0)
return 0;
/*
* Check whether the write can fit.
*/
if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
return 0;
do_inline_write:
ret = ocfs2_write_begin_inline(mapping, inode, wc);
if (ret) {
mlog_errno(ret);
goto out;
}
/*
* This signals to the caller that the data can be written
* inline.
*/
written = 1;
out:
return written ? written : ret;
}
/*
* This function only does anything for file systems which can't
* handle sparse files.
*
* What we want to do here is fill in any hole between the current end
* of allocation and the end of our write. That way the rest of the
* write path can treat it as an non-allocating write, which has no
* special case code for sparse/nonsparse files.
*/
static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
unsigned len,
struct ocfs2_write_ctxt *wc)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
loff_t newsize = pos + len;
if (ocfs2_sparse_alloc(osb))
return 0;
if (newsize <= i_size_read(inode))
return 0;
ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
if (ret)
mlog_errno(ret);
return ret;
}
int ocfs2_write_begin_nolock(struct address_space *mapping, int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata, struct page **pagep, void **fsdata,
...@@ -1381,6 +1607,25 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, ...@@ -1381,6 +1607,25 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
return ret; return ret;
} }
if (ocfs2_supports_inline_data(osb)) {
ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,
mmap_page, wc);
if (ret == 1) {
ret = 0;
goto success;
}
if (ret < 0) {
mlog_errno(ret);
goto out;
}
}
ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
&extents_to_split); &extents_to_split);
if (ret) { if (ret) {
...@@ -1462,6 +1707,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, ...@@ -1462,6 +1707,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
if (meta_ac) if (meta_ac)
ocfs2_free_alloc_context(meta_ac); ocfs2_free_alloc_context(meta_ac);
success:
*pagep = wc->w_target_page; *pagep = wc->w_target_page;
*fsdata = wc; *fsdata = wc;
return 0; return 0;
...@@ -1529,6 +1775,31 @@ int ocfs2_write_begin(struct file *file, struct address_space *mapping, ...@@ -1529,6 +1775,31 @@ int ocfs2_write_begin(struct file *file, struct address_space *mapping,
return ret; return ret;
} }
static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
unsigned len, unsigned *copied,
struct ocfs2_dinode *di,
struct ocfs2_write_ctxt *wc)
{
void *kaddr;
if (unlikely(*copied < len)) {
if (!PageUptodate(wc->w_target_page)) {
*copied = 0;
return;
}
}
kaddr = kmap_atomic(wc->w_target_page, KM_USER0);
memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
kunmap_atomic(kaddr, KM_USER0);
mlog(0, "Data written to inode at offset %llu. "
"id_count = %u, copied = %u, i_dyn_features = 0x%x\n",
(unsigned long long)pos, *copied,
le16_to_cpu(di->id2.i_data.id_count),
le16_to_cpu(di->i_dyn_features));
}
int ocfs2_write_end_nolock(struct address_space *mapping, int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied, loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata) struct page *page, void *fsdata)
...@@ -1542,6 +1813,11 @@ int ocfs2_write_end_nolock(struct address_space *mapping, ...@@ -1542,6 +1813,11 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
handle_t *handle = wc->w_handle; handle_t *handle = wc->w_handle;
struct page *tmppage; struct page *tmppage;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);
goto out_write_size;
}
if (unlikely(copied < len)) { if (unlikely(copied < len)) {
if (!PageUptodate(wc->w_target_page)) if (!PageUptodate(wc->w_target_page))
copied = 0; copied = 0;
...@@ -1579,6 +1855,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping, ...@@ -1579,6 +1855,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
block_commit_write(tmppage, from, to); block_commit_write(tmppage, from, to);
} }
out_write_size:
pos += copied; pos += copied;
if (pos > inode->i_size) { if (pos > inode->i_size) {
i_size_write(inode, pos); i_size_write(inode, pos);
......
...@@ -34,6 +34,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, ...@@ -34,6 +34,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
struct inode *inode, unsigned int from, struct inode *inode, unsigned int from,
unsigned int to, int new); unsigned int to, int new);
void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages);
int walk_page_buffers( handle_t *handle, int walk_page_buffers( handle_t *handle,
struct buffer_head *head, struct buffer_head *head,
unsigned from, unsigned from,
...@@ -59,6 +61,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, ...@@ -59,6 +61,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
struct page **pagep, void **fsdata, struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page); struct buffer_head *di_bh, struct page *mmap_page);
int ocfs2_read_inline_data(struct inode *inode, struct page *page,
struct buffer_head *di_bh);
int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
/* all ocfs2_dio_end_io()'s fault */ /* all ocfs2_dio_end_io()'s fault */
#define ocfs2_iocb_is_rw_locked(iocb) \ #define ocfs2_iocb_is_rw_locked(iocb) \
test_bit(0, (unsigned long *)&iocb->private) test_bit(0, (unsigned long *)&iocb->private)
......
...@@ -55,10 +55,16 @@ ...@@ -55,10 +55,16 @@
#include "journal.h" #include "journal.h"
#include "namei.h" #include "namei.h"
#include "suballoc.h" #include "suballoc.h"
#include "super.h"
#include "uptodate.h" #include "uptodate.h"
#include "buffer_head_io.h" #include "buffer_head_io.h"
#define NAMEI_RA_CHUNKS 2
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
static unsigned char ocfs2_filetype_table[] = { static unsigned char ocfs2_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
}; };
...@@ -66,12 +72,614 @@ static unsigned char ocfs2_filetype_table[] = { ...@@ -66,12 +72,614 @@ static unsigned char ocfs2_filetype_table[] = {
static int ocfs2_extend_dir(struct ocfs2_super *osb, static int ocfs2_extend_dir(struct ocfs2_super *osb,
struct inode *dir, struct inode *dir,
struct buffer_head *parent_fe_bh, struct buffer_head *parent_fe_bh,
unsigned int blocks_wanted,
struct buffer_head **new_de_bh); struct buffer_head **new_de_bh);
static int ocfs2_do_extend_dir(struct super_block *sb,
handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **new_bh);
/* /*
* ocfs2_readdir() * bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
*/
static int ocfs2_check_dir_entry(struct inode * dir,
struct ocfs2_dir_entry * de,
struct buffer_head * bh,
unsigned long offset)
{
const char *error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len);
if (rlen < OCFS2_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
else if (rlen % 4 != 0)
error_msg = "rec_len % 4 != 0";
else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
error_msg = "rec_len is too small for name_len";
else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
error_msg = "directory entry across blocks";
if (error_msg != NULL)
mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
"offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
de->name_len);
return error_msg == NULL ? 1 : 0;
}
static inline int ocfs2_match(int len,
const char * const name,
struct ocfs2_dir_entry *de)
{
if (len != de->name_len)
return 0;
if (!de->inode)
return 0;
return !memcmp(name, de->name, len);
}
/*
* Returns 0 if not found, -1 on failure, and 1 on success
*/
static int inline ocfs2_search_dirblock(struct buffer_head *bh,
struct inode *dir,
const char *name, int namelen,
unsigned long offset,
char *first_de,
unsigned int bytes,
struct ocfs2_dir_entry **res_dir)
{
struct ocfs2_dir_entry *de;
char *dlimit, *de_buf;
int de_len;
int ret = 0;
mlog_entry_void();
de_buf = first_de;
dlimit = de_buf + bytes;
while (de_buf < dlimit) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
de = (struct ocfs2_dir_entry *) de_buf;
if (de_buf + namelen <= dlimit &&
ocfs2_match(namelen, name, de)) {
/* found a match - just to be sure, do a full check */
if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
ret = -1;
goto bail;
}
*res_dir = de;
ret = 1;
goto bail;
}
/* prevent looping on a bad block */
de_len = le16_to_cpu(de->rec_len);
if (de_len <= 0) {
ret = -1;
goto bail;
}
de_buf += de_len;
offset += de_len;
}
bail:
mlog_exit(ret);
return ret;
}
static struct buffer_head *ocfs2_find_entry_id(const char *name,
int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
{
int ret, found;
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, dir);
if (ret) {
mlog_errno(ret);
goto out;
}
di = (struct ocfs2_dinode *)di_bh->b_data;
data = &di->id2.i_data;
found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0,
data->id_data, i_size_read(dir), res_dir);
if (found == 1)
return di_bh;
brelse(di_bh);
out:
return NULL;
}
struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
{
struct super_block *sb;
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
unsigned long start, block, b;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
buffer */
int num = 0;
int nblocks, i, err;
mlog_entry_void();
sb = dir->i_sb;
nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
start = OCFS2_I(dir)->ip_dir_start_lookup;
if (start >= nblocks)
start = 0;
block = start;
restart:
do {
/*
* We deal with the read-ahead logic here.
*/
if (ra_ptr >= ra_max) {
/* Refill the readahead buffer */
ra_ptr = 0;
b = block;
for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
/*
* Terminate if we reach the end of the
* directory and must wrap, or if our
* search has finished at this block.
*/
if (b >= nblocks || (num && block == start)) {
bh_use[ra_max] = NULL;
break;
}
num++;
bh = ocfs2_bread(dir, b++, &err, 1);
bh_use[ra_max] = bh;
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
goto next;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
block);
brelse(bh);
goto next;
}
i = ocfs2_search_dirblock(bh, dir, name, namelen,
block << sb->s_blocksize_bits,
bh->b_data, sb->s_blocksize,
res_dir);
if (i == 1) {
OCFS2_I(dir)->ip_dir_start_lookup = block;
ret = bh;
goto cleanup_and_exit;
} else {
brelse(bh);
if (i < 0)
goto cleanup_and_exit;
}
next:
if (++block >= nblocks)
block = 0;
} while (block != start);
/*
* If the directory has grown while we were searching, then
* search the last part of the directory before giving up.
*/
block = nblocks;
nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
if (block < nblocks) {
start = 0;
goto restart;
}
cleanup_and_exit:
/* Clean up the read-ahead blocks */
for (; ra_ptr < ra_max; ra_ptr++)
brelse(bh_use[ra_ptr]);
mlog_exit_ptr(ret);
return ret;
}
/*
* Try to find an entry of the provided name within 'dir'.
*
* If nothing was found, NULL is returned. Otherwise, a buffer_head
* and pointer to the dir entry are passed back.
* *
* Caller can NOT assume anything about the contents of the
* buffer_head - it is passed back only so that it can be passed into
* any one of the manipulation functions (add entry, delete entry,
* etc). As an example, bh in the extent directory case is a data
* block, in the inline-data case it actually points to an inode.
*/ */
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
{
*res_dir = NULL;
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return ocfs2_find_entry_id(name, namelen, dir, res_dir);
return ocfs2_find_entry_el(name, namelen, dir, res_dir);
}
/*
* Update inode number and type of a previously found directory entry.
*/
int ocfs2_update_entry(struct inode *dir, handle_t *handle,
struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
struct inode *new_entry_inode)
{
int ret;
/*
* The same code works fine for both inline-data and extent
* based directories, so no need to split this up.
*/
ret = ocfs2_journal_access(handle, dir, de_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
}
de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
ocfs2_set_de_type(de, new_entry_inode->i_mode);
ocfs2_journal_dirty(handle, de_bh);
out:
return ret;
}
static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh, char *first_de,
unsigned int bytes)
{
struct ocfs2_dir_entry *de, *pde;
int i, status = -ENOENT;
mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
i = 0;
pde = NULL;
de = (struct ocfs2_dir_entry *) first_de;
while (i < bytes) {
if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
status = -EIO;
mlog_errno(status);
goto bail;
}
if (de == de_del) {
status = ocfs2_journal_access(handle, dir, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
status = -EIO;
mlog_errno(status);
goto bail;
}
if (pde)
pde->rec_len =
cpu_to_le16(le16_to_cpu(pde->rec_len) +
le16_to_cpu(de->rec_len));
else
de->inode = 0;
dir->i_version++;
status = ocfs2_journal_dirty(handle, bh);
goto bail;
}
i += le16_to_cpu(de->rec_len);
pde = de;
de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
}
bail:
mlog_exit(status);
return status;
}
static inline int ocfs2_delete_entry_id(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh)
{
int ret;
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, dir);
if (ret) {
mlog_errno(ret);
goto out;
}
di = (struct ocfs2_dinode *)di_bh->b_data;
data = &di->id2.i_data;
ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data,
i_size_read(dir));
brelse(di_bh);
out:
return ret;
}
static inline int ocfs2_delete_entry_el(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh)
{
return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data,
bh->b_size);
}
/*
* ocfs2_delete_entry deletes a directory entry by merging it with the
* previous entry
*/
int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh)
{
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return ocfs2_delete_entry_id(handle, dir, de_del, bh);
return ocfs2_delete_entry_el(handle, dir, de_del, bh);
}
/*
* Check whether 'de' has enough room to hold an entry of
* 'new_rec_len' bytes.
*/
static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
unsigned int new_rec_len)
{
unsigned int de_really_used;
/* Check whether this is an empty record with enough space */
if (le64_to_cpu(de->inode) == 0 &&
le16_to_cpu(de->rec_len) >= new_rec_len)
return 1;
/*
* Record might have free space at the end which we can
* use.
*/
de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
return 1;
return 0;
}
/* we don't always have a dentry for what we want to add, so people
* like orphan dir can call this instead.
*
* If you pass me insert_bh, I'll skip the search of the other dir
* blocks and put the record in there.
*/
int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
{
unsigned long offset;
unsigned short rec_len;
struct ocfs2_dir_entry *de, *de1;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_fe_bh->b_data;
struct super_block *sb = dir->i_sb;
int retval, status;
unsigned int size = sb->s_blocksize;
char *data_start = insert_bh->b_data;
mlog_entry_void();
if (!namelen)
return -EINVAL;
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
data_start = di->id2.i_data.id_data;
size = i_size_read(dir);
BUG_ON(insert_bh != parent_fe_bh);
}
rec_len = OCFS2_DIR_REC_LEN(namelen);
offset = 0;
de = (struct ocfs2_dir_entry *) data_start;
while (1) {
BUG_ON((char *)de >= (size + data_start));
/* These checks should've already been passed by the
* prepare function, but I guess we can leave them
* here anyway. */
if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
retval = -ENOENT;
goto bail;
}
if (ocfs2_match(namelen, name, de)) {
retval = -EEXIST;
goto bail;
}
if (ocfs2_dirent_would_fit(de, rec_len)) {
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
if (retval < 0) {
mlog_errno(retval);
goto bail;
}
status = ocfs2_journal_access(handle, dir, insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
/* By now the buffer is marked for journaling */
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
de1 = (struct ocfs2_dir_entry *)((char *) de +
OCFS2_DIR_REC_LEN(de->name_len));
de1->rec_len =
cpu_to_le16(le16_to_cpu(de->rec_len) -
OCFS2_DIR_REC_LEN(de->name_len));
de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
de = de1;
}
de->file_type = OCFS2_FT_UNKNOWN;
if (blkno) {
de->inode = cpu_to_le64(blkno);
ocfs2_set_de_type(de, inode->i_mode);
} else
de->inode = 0;
de->name_len = namelen;
memcpy(de->name, name, namelen);
dir->i_version++;
status = ocfs2_journal_dirty(handle, insert_bh);
retval = 0;
goto bail;
}
offset += le16_to_cpu(de->rec_len);
de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
}
/* when you think about it, the assert above should prevent us
* from ever getting here. */
retval = -ENOSPC;
bail:
mlog_exit(retval);
return retval;
}
static int ocfs2_dir_foreach_blk_id(struct inode *inode,
unsigned long *f_version,
loff_t *f_pos, void *priv,
filldir_t filldir, int *filldir_err)
{
int ret, i, filldir_ret;
unsigned long offset = *f_pos;
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
struct ocfs2_dir_entry *de;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, inode);
if (ret) {
mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
goto out;
}
di = (struct ocfs2_dinode *)di_bh->b_data;
data = &di->id2.i_data;
while (*f_pos < i_size_read(inode)) {
revalidate:
/* If the dir block has changed since the last call to
* readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the block
* to make sure. */
if (*f_version != inode->i_version) {
for (i = 0; i < i_size_read(inode) && i < offset; ) {
de = (struct ocfs2_dir_entry *)
(data->id_data + i);
/* It's too expensive to do a full
* dirent test each time round this
* loop, but we do have to test at
* least that it is non-zero. A
* failure will be detected in the
* dirent test below. */
if (le16_to_cpu(de->rec_len) <
OCFS2_DIR_REC_LEN(1))
break;
i += le16_to_cpu(de->rec_len);
}
*f_pos = offset = i;
*f_version = inode->i_version;
}
de = (struct ocfs2_dir_entry *) (data->id_data + *f_pos);
if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) {
/* On error, skip the f_pos to the end. */
*f_pos = i_size_read(inode);
goto out;
}
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
/* We might block in the next section
* if the data destination is
* currently swapped out. So, use a
* version stamp to detect whether or
* not the directory has been modified
* during the copy operation.
*/
unsigned long version = *f_version;
unsigned char d_type = DT_UNKNOWN;
if (de->file_type < OCFS2_FT_MAX)
d_type = ocfs2_filetype_table[de->file_type];
filldir_ret = filldir(priv, de->name,
de->name_len,
*f_pos,
le64_to_cpu(de->inode),
d_type);
if (filldir_ret) {
if (filldir_err)
*filldir_err = filldir_ret;
break;
}
if (version != *f_version)
goto revalidate;
}
*f_pos += le16_to_cpu(de->rec_len);
}
out:
brelse(di_bh);
return 0;
}
static int ocfs2_dir_foreach_blk_el(struct inode *inode,
unsigned long *f_version,
loff_t *f_pos, void *priv,
filldir_t filldir, int *filldir_err)
{ {
int error = 0; int error = 0;
unsigned long offset, blk, last_ra_blk = 0; unsigned long offset, blk, last_ra_blk = 0;
...@@ -79,45 +687,23 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) ...@@ -79,45 +687,23 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
struct buffer_head * bh, * tmp; struct buffer_head * bh, * tmp;
struct ocfs2_dir_entry * de; struct ocfs2_dir_entry * de;
int err; int err;
struct inode *inode = filp->f_path.dentry->d_inode;
struct super_block * sb = inode->i_sb; struct super_block * sb = inode->i_sb;
unsigned int ra_sectors = 16; unsigned int ra_sectors = 16;
int lock_level = 0;
mlog_entry("dirino=%llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
stored = 0; stored = 0;
bh = NULL; bh = NULL;
error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); offset = (*f_pos) & (sb->s_blocksize - 1);
if (lock_level && error >= 0) {
/* We release EX lock which used to update atime
* and get PR lock again to reduce contention
* on commonly accessed directories. */
ocfs2_meta_unlock(inode, 1);
lock_level = 0;
error = ocfs2_meta_lock(inode, NULL, 0);
}
if (error < 0) {
if (error != -ENOENT)
mlog_errno(error);
/* we haven't got any yet, so propagate the error. */
stored = error;
goto bail_nolock;
}
offset = filp->f_pos & (sb->s_blocksize - 1);
while (!error && !stored && filp->f_pos < i_size_read(inode)) { while (!error && !stored && *f_pos < i_size_read(inode)) {
blk = (filp->f_pos) >> sb->s_blocksize_bits; blk = (*f_pos) >> sb->s_blocksize_bits;
bh = ocfs2_bread(inode, blk, &err, 0); bh = ocfs2_bread(inode, blk, &err, 0);
if (!bh) { if (!bh) {
mlog(ML_ERROR, mlog(ML_ERROR,
"directory #%llu contains a hole at offset %lld\n", "directory #%llu contains a hole at offset %lld\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno,
filp->f_pos); *f_pos);
filp->f_pos += sb->s_blocksize - offset; *f_pos += sb->s_blocksize - offset;
continue; continue;
} }
...@@ -143,7 +729,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) ...@@ -143,7 +729,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
* readdir(2), then we might be pointing to an invalid * readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the block * dirent right now. Scan from the start of the block
* to make sure. */ * to make sure. */
if (filp->f_version != inode->i_version) { if (*f_version != inode->i_version) {
for (i = 0; i < sb->s_blocksize && i < offset; ) { for (i = 0; i < sb->s_blocksize && i < offset; ) {
de = (struct ocfs2_dir_entry *) (bh->b_data + i); de = (struct ocfs2_dir_entry *) (bh->b_data + i);
/* It's too expensive to do a full /* It's too expensive to do a full
...@@ -158,21 +744,20 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) ...@@ -158,21 +744,20 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
i += le16_to_cpu(de->rec_len); i += le16_to_cpu(de->rec_len);
} }
offset = i; offset = i;
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) *f_pos = ((*f_pos) & ~(sb->s_blocksize - 1))
| offset; | offset;
filp->f_version = inode->i_version; *f_version = inode->i_version;
} }
while (!error && filp->f_pos < i_size_read(inode) while (!error && *f_pos < i_size_read(inode)
&& offset < sb->s_blocksize) { && offset < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + offset); de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
/* On error, skip the f_pos to the /* On error, skip the f_pos to the
next block. */ next block. */
filp->f_pos = (filp->f_pos | *f_pos = ((*f_pos) | (sb->s_blocksize - 1)) + 1;
(sb->s_blocksize - 1)) + 1;
brelse(bh); brelse(bh);
goto bail; goto out;
} }
offset += le16_to_cpu(de->rec_len); offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) { if (le64_to_cpu(de->inode)) {
...@@ -183,172 +768,599 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) ...@@ -183,172 +768,599 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
* not the directory has been modified * not the directory has been modified
* during the copy operation. * during the copy operation.
*/ */
unsigned long version = filp->f_version; unsigned long version = *f_version;
unsigned char d_type = DT_UNKNOWN; unsigned char d_type = DT_UNKNOWN;
if (de->file_type < OCFS2_FT_MAX) if (de->file_type < OCFS2_FT_MAX)
d_type = ocfs2_filetype_table[de->file_type]; d_type = ocfs2_filetype_table[de->file_type];
error = filldir(dirent, de->name, error = filldir(priv, de->name,
de->name_len, de->name_len,
filp->f_pos, *f_pos,
ino_from_blkno(sb, le64_to_cpu(de->inode)), le64_to_cpu(de->inode),
d_type); d_type);
if (error) if (error) {
if (filldir_err)
*filldir_err = error;
break; break;
if (version != filp->f_version) }
if (version != *f_version)
goto revalidate; goto revalidate;
stored ++; stored ++;
} }
filp->f_pos += le16_to_cpu(de->rec_len); *f_pos += le16_to_cpu(de->rec_len);
} }
offset = 0; offset = 0;
brelse(bh); brelse(bh);
} }
stored = 0; stored = 0;
bail: out:
return stored;
}
static int ocfs2_dir_foreach_blk(struct inode *inode, unsigned long *f_version,
loff_t *f_pos, void *priv, filldir_t filldir,
int *filldir_err)
{
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv,
filldir, filldir_err);
return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir,
filldir_err);
}
/*
* This is intended to be called from inside other kernel functions,
* so we fake some arguments.
*/
int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
filldir_t filldir)
{
int ret = 0, filldir_err = 0;
unsigned long version = inode->i_version;
while (*f_pos < i_size_read(inode)) {
ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv,
filldir, &filldir_err);
if (ret || filldir_err)
break;
}
if (ret > 0)
ret = -EIO;
return 0;
}
/*
* ocfs2_readdir()
*
*/
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
int error = 0;
struct inode *inode = filp->f_path.dentry->d_inode;
int lock_level = 0;
mlog_entry("dirino=%llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
if (lock_level && error >= 0) {
/* We release EX lock which used to update atime
* and get PR lock again to reduce contention
* on commonly accessed directories. */
ocfs2_meta_unlock(inode, 1);
lock_level = 0;
error = ocfs2_meta_lock(inode, NULL, 0);
}
if (error < 0) {
if (error != -ENOENT)
mlog_errno(error);
/* we haven't got any yet, so propagate the error. */
goto bail_nolock;
}
error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos,
dirent, filldir, NULL);
ocfs2_meta_unlock(inode, lock_level); ocfs2_meta_unlock(inode, lock_level);
bail_nolock: bail_nolock:
mlog_exit(stored); mlog_exit(error);
return error;
}
/*
* NOTE: this should always be called with parent dir i_mutex taken.
*/
int ocfs2_find_files_on_disk(const char *name,
int namelen,
u64 *blkno,
struct inode *inode,
struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent)
{
int status = -ENOENT;
mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n",
namelen, name, blkno, inode, dirent_bh, dirent);
*dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
if (!*dirent_bh || !*dirent) {
status = -ENOENT;
goto leave;
}
*blkno = le64_to_cpu((*dirent)->inode);
status = 0;
leave:
if (status < 0) {
*dirent = NULL;
if (*dirent_bh) {
brelse(*dirent_bh);
*dirent_bh = NULL;
}
}
mlog_exit(status);
return status;
}
/*
* Convenience function for callers which just want the block number
* mapped to a name and don't require the full dirent info, etc.
*/
int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
int namelen, u64 *blkno)
{
int ret;
struct buffer_head *bh = NULL;
struct ocfs2_dir_entry *dirent = NULL;
ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &bh, &dirent);
brelse(bh);
return ret;
}
/* Check for a name within a directory.
*
* Return 0 if the name does not exist
* Return -EEXIST if the directory contains the name
*
* Callers should have i_mutex + a cluster lock on dir
*/
int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
int namelen)
{
int ret;
struct buffer_head *dirent_bh = NULL;
struct ocfs2_dir_entry *dirent = NULL;
mlog_entry("dir %llu, name '%.*s'\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
ret = -EEXIST;
dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent);
if (dirent_bh)
goto bail;
ret = 0;
bail:
if (dirent_bh)
brelse(dirent_bh);
mlog_exit(ret);
return ret;
}
struct ocfs2_empty_dir_priv {
unsigned seen_dot;
unsigned seen_dot_dot;
unsigned seen_other;
};
static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
loff_t pos, u64 ino, unsigned type)
{
struct ocfs2_empty_dir_priv *p = priv;
/*
* Check the positions of "." and ".." records to be sure
* they're in the correct place.
*/
if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
p->seen_dot = 1;
return 0;
}
if (name_len == 2 && !strncmp("..", name, 2) &&
pos == OCFS2_DIR_REC_LEN(1)) {
p->seen_dot_dot = 1;
return 0;
}
p->seen_other = 1;
return 1;
}
/*
* routine to check that the specified directory is empty (for rmdir)
*
* Returns 1 if dir is empty, zero otherwise.
*/
int ocfs2_empty_dir(struct inode *inode)
{
int ret;
loff_t start = 0;
struct ocfs2_empty_dir_priv priv;
memset(&priv, 0, sizeof(priv));
ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir);
if (ret)
mlog_errno(ret);
if (!priv.seen_dot || !priv.seen_dot_dot) {
mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
/*
* XXX: Is it really safe to allow an unlink to continue?
*/
return 1;
}
return !priv.seen_other;
}
static void ocfs2_fill_initial_dirents(struct inode *inode,
struct inode *parent,
char *start, unsigned int size)
{
struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
de->name_len = 1;
de->rec_len =
cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
strcpy(de->name, ".");
ocfs2_set_de_type(de, S_IFDIR);
de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1));
de->name_len = 2;
strcpy(de->name, "..");
ocfs2_set_de_type(de, S_IFDIR);
}
/*
* This works together with code in ocfs2_mknod_locked() which sets
* the inline-data flag and initializes the inline-data section.
*/
static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
handle_t *handle,
struct inode *parent,
struct inode *inode,
struct buffer_head *di_bh)
{
int ret;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_inline_data *data = &di->id2.i_data;
unsigned int size = le16_to_cpu(data->id_count);
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
}
ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
ocfs2_journal_dirty(handle, di_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
i_size_write(inode, size);
inode->i_nlink = 2;
inode->i_blocks = ocfs2_inode_sector_count(inode);
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
if (ret < 0)
mlog_errno(ret);
return stored; out:
return ret;
} }
/* static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
* NOTE: this should always be called with parent dir i_mutex taken. handle_t *handle,
*/ struct inode *parent,
int ocfs2_find_files_on_disk(const char *name,
int namelen,
u64 *blkno,
struct inode *inode, struct inode *inode,
struct buffer_head **dirent_bh, struct buffer_head *fe_bh,
struct ocfs2_dir_entry **dirent) struct ocfs2_alloc_context *data_ac)
{ {
int status = -ENOENT; int status;
struct buffer_head *new_bh = NULL;
mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n", mlog_entry_void();
namelen, name, blkno, inode, dirent_bh, dirent);
*dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
if (!*dirent_bh || !*dirent) { data_ac, NULL, &new_bh);
status = -ENOENT; if (status < 0) {
goto leave; mlog_errno(status);
goto bail;
} }
*blkno = le64_to_cpu((*dirent)->inode); ocfs2_set_new_buffer_uptodate(inode, new_bh);
status = 0; status = ocfs2_journal_access(handle, inode, new_bh,
leave: OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) { if (status < 0) {
*dirent = NULL; mlog_errno(status);
if (*dirent_bh) { goto bail;
brelse(*dirent_bh); }
*dirent_bh = NULL; memset(new_bh->b_data, 0, osb->sb->s_blocksize);
ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data,
osb->sb->s_blocksize);
status = ocfs2_journal_dirty(handle, new_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
} }
i_size_write(inode, inode->i_sb->s_blocksize);
inode->i_nlink = 2;
inode->i_blocks = ocfs2_inode_sector_count(inode);
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
} }
status = 0;
bail:
if (new_bh)
brelse(new_bh);
mlog_exit(status); mlog_exit(status);
return status; return status;
} }
/* Check for a name within a directory. int ocfs2_fill_new_dir(struct ocfs2_super *osb,
* handle_t *handle,
* Return 0 if the name does not exist struct inode *parent,
* Return -EEXIST if the directory contains the name struct inode *inode,
* struct buffer_head *fe_bh,
* Callers should have i_mutex + a cluster lock on dir struct ocfs2_alloc_context *data_ac)
*/
int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
int namelen)
{ {
int ret; BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
struct buffer_head *dirent_bh = NULL;
struct ocfs2_dir_entry *dirent = NULL;
mlog_entry("dir %llu, name '%.*s'\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
ret = -EEXIST; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent); return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh);
if (dirent_bh)
goto bail;
ret = 0; return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
bail: data_ac);
if (dirent_bh) }
brelse(dirent_bh);
mlog_exit(ret); static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
return ret; unsigned int new_size)
{
struct ocfs2_dir_entry *de;
struct ocfs2_dir_entry *prev_de;
char *de_buf, *limit;
unsigned int bytes = new_size - old_size;
limit = start + old_size;
de_buf = start;
de = (struct ocfs2_dir_entry *)de_buf;
do {
prev_de = de;
de_buf += le16_to_cpu(de->rec_len);
de = (struct ocfs2_dir_entry *)de_buf;
} while (de_buf < limit);
le16_add_cpu(&prev_de->rec_len, bytes);
} }
/* /*
* routine to check that the specified directory is empty (for rmdir) * We allocate enough clusters to fulfill "blocks_wanted", but set
* i_size to exactly one block. Ocfs2_extend_dir() will handle the
* rest automatically for us.
*
* *first_block_bh is a pointer to the 1st data block allocated to the
* directory.
*/ */
int ocfs2_empty_dir(struct inode *inode) static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
unsigned int blocks_wanted,
struct buffer_head **first_block_bh)
{ {
unsigned long offset; int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
struct buffer_head * bh; u32 alloc, bit_off, len;
struct ocfs2_dir_entry * de, * de1; struct super_block *sb = dir->i_sb;
struct super_block * sb; u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
int err; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(dir);
struct ocfs2_alloc_context *data_ac;
struct buffer_head *dirdata_bh = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
handle_t *handle;
alloc = ocfs2_clusters_for_bytes(sb, bytes);
/*
* We should never need more than 2 clusters for this -
* maximum dirent size is far less than one block. In fact,
* the only time we'd need more than one cluster is if
* blocksize == clustersize and the dirent won't fit in the
* extra space that the expansion to a single block gives. As
* of today, that only happens on 4k/4k file systems.
*/
BUG_ON(alloc > 2);
sb = inode->i_sb; ret = ocfs2_reserve_clusters(osb, alloc, &data_ac);
if ((i_size_read(inode) < if (ret) {
(OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) || mlog_errno(ret);
!(bh = ocfs2_bread(inode, 0, &err, 0))) { goto out;
mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
return 1;
} }
de = (struct ocfs2_dir_entry *) bh->b_data; down_write(&oi->ip_alloc_sem);
de1 = (struct ocfs2_dir_entry *)
((char *)de + le16_to_cpu(de->rec_len)); /*
if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) || * Prepare for worst case allocation scenario of two seperate
!le64_to_cpu(de1->inode) || * extents.
strcmp(".", de->name) || */
strcmp("..", de1->name)) { if (alloc == 2)
mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n", credits += OCFS2_SUBALLOC_ALLOC;
(unsigned long long)OCFS2_I(inode)->ip_blkno);
brelse(bh); handle = ocfs2_start_trans(osb, credits);
return 1; if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out_sem;
} }
offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len)); /*
while (offset < i_size_read(inode) ) { * Try to claim as many clusters as the bitmap can give though
if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) { * if we only get one now, that's enough to continue. The rest
brelse(bh); * will be claimed after the conversion to extents.
bh = ocfs2_bread(inode, */
offset >> sb->s_blocksize_bits, &err, 0); ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
if (!bh) { if (ret) {
mlog(ML_ERROR, "dir %llu has a hole at %lu\n", mlog_errno(ret);
(unsigned long long)OCFS2_I(inode)->ip_blkno, offset); goto out_commit;
offset += sb->s_blocksize;
continue;
} }
de = (struct ocfs2_dir_entry *) bh->b_data;
/*
* Operations are carefully ordered so that we set up the new
* data block first. The conversion from inline data to
* extents follows.
*/
blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
dirdata_bh = sb_getblk(sb, blkno);
if (!dirdata_bh) {
ret = -EIO;
mlog_errno(ret);
goto out_commit;
} }
if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
brelse(bh); ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
return 1;
ret = ocfs2_journal_access(handle, dir, dirdata_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (ret) {
mlog_errno(ret);
goto out_commit;
} }
if (le64_to_cpu(de->inode)) {
brelse(bh); memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
return 0; memset(dirdata_bh->b_data + i_size_read(dir), 0,
sb->s_blocksize - i_size_read(dir));
ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir),
sb->s_blocksize);
ret = ocfs2_journal_dirty(handle, dirdata_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
} }
offset += le16_to_cpu(de->rec_len);
de = (struct ocfs2_dir_entry *) /*
((char *)de + le16_to_cpu(de->rec_len)); * Set extent, i_size, etc on the directory. After this, the
* inode should contain the same exact dirents as before and
* be fully accessible from system calls.
*
* We let the later dirent insert modify c/mtime - to the user
* the data hasn't changed.
*/
ret = ocfs2_journal_access(handle, dir, di_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (ret) {
mlog_errno(ret);
goto out_commit;
} }
brelse(bh);
return 1; spin_lock(&oi->ip_lock);
oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);
ocfs2_dinode_new_extent_list(dir, di);
i_size_write(dir, sb->s_blocksize);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
di->i_size = cpu_to_le64(sb->s_blocksize);
di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
dir->i_blocks = ocfs2_inode_sector_count(dir);
/*
* This should never fail as our extent list is empty and all
* related blocks have been journaled already.
*/
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0,
NULL);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_journal_dirty(handle, di_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
/*
* We asked for two clusters, but only got one in the 1st
* pass. Claim the 2nd cluster as a separate extent.
*/
if (alloc > len) {
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
&len);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno,
len, 0, NULL);
if (ret) {
mlog_errno(ret);
goto out;
}
}
*first_block_bh = dirdata_bh;
dirdata_bh = NULL;
out_commit:
ocfs2_commit_trans(osb, handle);
out_sem:
up_write(&oi->ip_alloc_sem);
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
brelse(dirdata_bh);
return ret;
} }
/* returns a bh of the 1st new block in the allocation. */ /* returns a bh of the 1st new block in the allocation. */
int ocfs2_do_extend_dir(struct super_block *sb, static int ocfs2_do_extend_dir(struct super_block *sb,
handle_t *handle, handle_t *handle,
struct inode *dir, struct inode *dir,
struct buffer_head *parent_fe_bh, struct buffer_head *parent_fe_bh,
...@@ -396,10 +1408,18 @@ int ocfs2_do_extend_dir(struct super_block *sb, ...@@ -396,10 +1408,18 @@ int ocfs2_do_extend_dir(struct super_block *sb,
return status; return status;
} }
/* assumes you already have a cluster lock on the directory. */ /*
* Assumes you already have a cluster lock on the directory.
*
* 'blocks_wanted' is only used if we have an inline directory which
* is to be turned into an extent based one. The size of the dirent to
* insert might be larger than the space gained by growing to just one
* block, so we may have to grow the inode by two blocks in that case.
*/
static int ocfs2_extend_dir(struct ocfs2_super *osb, static int ocfs2_extend_dir(struct ocfs2_super *osb,
struct inode *dir, struct inode *dir,
struct buffer_head *parent_fe_bh, struct buffer_head *parent_fe_bh,
unsigned int blocks_wanted,
struct buffer_head **new_de_bh) struct buffer_head **new_de_bh)
{ {
int status = 0; int status = 0;
...@@ -415,6 +1435,38 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, ...@@ -415,6 +1435,38 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
mlog_entry_void(); mlog_entry_void();
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
blocks_wanted, &new_bh);
if (status) {
mlog_errno(status);
goto bail;
}
if (blocks_wanted == 1) {
/*
* If the new dirent will fit inside the space
* created by pushing out to one block, then
* we can complete the operation
* here. Otherwise we have to expand i_size
* and format the 2nd block below.
*/
BUG_ON(new_bh == NULL);
goto bail_bh;
}
/*
* Get rid of 'new_bh' - we want to format the 2nd
* data block and return that instead.
*/
brelse(new_bh);
new_bh = NULL;
dir_i_size = i_size_read(dir);
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
goto do_extend;
}
dir_i_size = i_size_read(dir); dir_i_size = i_size_read(dir);
mlog(0, "extending dir %llu (i_size = %lld)\n", mlog(0, "extending dir %llu (i_size = %lld)\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
...@@ -452,6 +1504,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, ...@@ -452,6 +1504,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
} }
do_extend:
down_write(&OCFS2_I(dir)->ip_alloc_sem); down_write(&OCFS2_I(dir)->ip_alloc_sem);
drop_alloc_sem = 1; drop_alloc_sem = 1;
...@@ -497,6 +1550,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, ...@@ -497,6 +1550,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
goto bail; goto bail;
} }
bail_bh:
*new_de_bh = new_bh; *new_de_bh = new_bh;
get_bh(*new_de_bh); get_bh(*new_de_bh);
bail: bail:
...@@ -517,41 +1571,71 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, ...@@ -517,41 +1571,71 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
return status; return status;
} }
/* static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
* Search the dir for a good spot, extending it if necessary. The const char *name, int namelen,
* block containing an appropriate record is returned in ret_de_bh. struct buffer_head **ret_de_bh,
*/ unsigned int *blocks_wanted)
int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
struct inode *dir,
struct buffer_head *parent_fe_bh,
const char *name,
int namelen,
struct buffer_head **ret_de_bh)
{ {
unsigned long offset; int ret;
struct buffer_head * bh = NULL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
unsigned short rec_len; struct ocfs2_dir_entry *de, *last_de = NULL;
struct ocfs2_dinode *fe; char *de_buf, *limit;
struct ocfs2_dir_entry *de; unsigned long offset = 0;
struct super_block *sb; unsigned int rec_len, new_rec_len;
int status;
de_buf = di->id2.i_data.id_data;
limit = de_buf + i_size_read(dir);
rec_len = OCFS2_DIR_REC_LEN(namelen);
mlog_entry_void(); while (de_buf < limit) {
de = (struct ocfs2_dir_entry *)de_buf;
mlog(0, "getting ready to insert namelen %d into dir %llu\n", if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); ret = -ENOENT;
goto out;
}
if (ocfs2_match(namelen, name, de)) {
ret = -EEXIST;
goto out;
}
if (ocfs2_dirent_would_fit(de, rec_len)) {
/* Ok, we found a spot. Return this bh and let
* the caller actually fill it in. */
*ret_de_bh = di_bh;
get_bh(*ret_de_bh);
ret = 0;
goto out;
}
BUG_ON(!S_ISDIR(dir->i_mode)); last_de = de;
fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; de_buf += le16_to_cpu(de->rec_len);
BUG_ON(le64_to_cpu(fe->i_size) != i_size_read(dir)); offset += le16_to_cpu(de->rec_len);
}
sb = dir->i_sb; /*
* We're going to require expansion of the directory - figure
* out how many blocks we'll need so that a place for the
* dirent can be found.
*/
*blocks_wanted = 1;
new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir));
if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
*blocks_wanted = 2;
if (!namelen) { ret = -ENOSPC;
status = -EINVAL; out:
mlog_errno(status); return ret;
goto bail; }
}
static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
int namelen, struct buffer_head **ret_de_bh)
{
unsigned long offset;
struct buffer_head *bh = NULL;
unsigned short rec_len;
struct ocfs2_dir_entry *de;
struct super_block *sb = dir->i_sb;
int status;
bh = ocfs2_bread(dir, 0, &status, 0); bh = ocfs2_bread(dir, 0, &status, 0);
if (!bh) { if (!bh) {
...@@ -568,17 +1652,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, ...@@ -568,17 +1652,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
bh = NULL; bh = NULL;
if (i_size_read(dir) <= offset) { if (i_size_read(dir) <= offset) {
status = ocfs2_extend_dir(osb, /*
dir, * Caller will have to expand this
parent_fe_bh, * directory.
&bh); */
if (status < 0) { status = -ENOSPC;
mlog_errno(status);
goto bail;
}
BUG_ON(!bh);
*ret_de_bh = bh;
get_bh(*ret_de_bh);
goto bail; goto bail;
} }
bh = ocfs2_bread(dir, bh = ocfs2_bread(dir,
...@@ -600,10 +1678,7 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, ...@@ -600,10 +1678,7 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
status = -EEXIST; status = -EEXIST;
goto bail; goto bail;
} }
if (((le64_to_cpu(de->inode) == 0) && if (ocfs2_dirent_would_fit(de, rec_len)) {
(le16_to_cpu(de->rec_len) >= rec_len)) ||
(le16_to_cpu(de->rec_len) >=
(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
/* Ok, we found a spot. Return this bh and let /* Ok, we found a spot. Return this bh and let
* the caller actually fill it in. */ * the caller actually fill it in. */
*ret_de_bh = bh; *ret_de_bh = bh;
...@@ -623,3 +1698,61 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, ...@@ -623,3 +1698,61 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
mlog_exit(status); mlog_exit(status);
return status; return status;
} }
int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
struct inode *dir,
struct buffer_head *parent_fe_bh,
const char *name,
int namelen,
struct buffer_head **ret_de_bh)
{
int ret;
unsigned int blocks_wanted = 1;
struct buffer_head *bh = NULL;
mlog(0, "getting ready to insert namelen %d into dir %llu\n",
namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
*ret_de_bh = NULL;
if (!namelen) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
}
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name,
namelen, &bh, &blocks_wanted);
} else
ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh);
if (ret && ret != -ENOSPC) {
mlog_errno(ret);
goto out;
}
if (ret == -ENOSPC) {
/*
* We have to expand the directory to add this name.
*/
BUG_ON(bh);
ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
&bh);
if (ret) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
BUG_ON(!bh);
}
*ret_de_bh = bh;
bh = NULL;
out:
if (bh)
brelse(bh);
return ret;
}
...@@ -26,17 +26,49 @@ ...@@ -26,17 +26,49 @@
#ifndef OCFS2_DIR_H #ifndef OCFS2_DIR_H
#define OCFS2_DIR_H #define OCFS2_DIR_H
struct buffer_head *ocfs2_find_entry(const char *name,
int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir);
int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh);
int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh);
static inline int ocfs2_add_entry(handle_t *handle,
struct dentry *dentry,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
{
return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len,
inode, blkno, parent_fe_bh, insert_bh);
}
int ocfs2_update_entry(struct inode *dir, handle_t *handle,
struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
struct inode *new_entry_inode);
int ocfs2_check_dir_for_entry(struct inode *dir, int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name, const char *name,
int namelen); int namelen);
int ocfs2_empty_dir(struct inode *inode); /* FIXME: to namei.c */ int ocfs2_empty_dir(struct inode *inode);
int ocfs2_find_files_on_disk(const char *name, int ocfs2_find_files_on_disk(const char *name,
int namelen, int namelen,
u64 *blkno, u64 *blkno,
struct inode *inode, struct inode *inode,
struct buffer_head **dirent_bh, struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent); struct ocfs2_dir_entry **dirent);
int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
int namelen, u64 *blkno);
int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir); int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
filldir_t filldir);
int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
struct inode *dir, struct inode *dir,
struct buffer_head *parent_fe_bh, struct buffer_head *parent_fe_bh,
...@@ -44,11 +76,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, ...@@ -44,11 +76,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
int namelen, int namelen,
struct buffer_head **ret_de_bh); struct buffer_head **ret_de_bh);
struct ocfs2_alloc_context; struct ocfs2_alloc_context;
int ocfs2_do_extend_dir(struct super_block *sb, int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle, handle_t *handle,
struct inode *dir, struct inode *parent,
struct buffer_head *parent_fe_bh, struct inode *inode,
struct ocfs2_alloc_context *data_ac, struct buffer_head *fe_bh,
struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac);
struct buffer_head **new_bh);
#endif /* OCFS2_DIR_H */ #endif /* OCFS2_DIR_H */
...@@ -1482,6 +1482,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) ...@@ -1482,6 +1482,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb->lvb_imtime_packed = lvb->lvb_imtime_packed =
cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
out: out:
...@@ -1515,6 +1516,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) ...@@ -1515,6 +1516,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
ocfs2_set_inode_flags(inode); ocfs2_set_inode_flags(inode);
/* fast-symlinks are a special case */ /* fast-symlinks are a special case */
......
...@@ -29,12 +29,12 @@ ...@@ -29,12 +29,12 @@
#include "dcache.h" #include "dcache.h"
#define OCFS2_LVB_VERSION 4 #define OCFS2_LVB_VERSION 5
struct ocfs2_meta_lvb { struct ocfs2_meta_lvb {
__u8 lvb_version; __u8 lvb_version;
__u8 lvb_reserved0; __u8 lvb_reserved0;
__be16 lvb_reserved1; __be16 lvb_idynfeatures;
__be32 lvb_iclusters; __be32 lvb_iclusters;
__be32 lvb_iuid; __be32 lvb_iuid;
__be32 lvb_igid; __be32 lvb_igid;
......
...@@ -88,8 +88,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) ...@@ -88,8 +88,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
struct dentry *parent; struct dentry *parent;
struct inode *inode; struct inode *inode;
struct inode *dir = child->d_inode; struct inode *dir = child->d_inode;
struct buffer_head *dirent_bh = NULL;
struct ocfs2_dir_entry *dirent;
mlog_entry("(0x%p, '%.*s')\n", child, mlog_entry("(0x%p, '%.*s')\n", child,
child->d_name.len, child->d_name.name); child->d_name.len, child->d_name.name);
...@@ -105,8 +103,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) ...@@ -105,8 +103,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
goto bail; goto bail;
} }
status = ocfs2_find_files_on_disk("..", 2, &blkno, dir, &dirent_bh, status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
&dirent);
if (status < 0) { if (status < 0) {
parent = ERR_PTR(-ENOENT); parent = ERR_PTR(-ENOENT);
goto bail_unlock; goto bail_unlock;
...@@ -131,9 +128,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) ...@@ -131,9 +128,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
bail_unlock: bail_unlock:
ocfs2_meta_unlock(dir, 0); ocfs2_meta_unlock(dir, 0);
if (dirent_bh)
brelse(dirent_bh);
bail: bail:
mlog_exit_ptr(parent); mlog_exit_ptr(parent);
......
...@@ -387,6 +387,12 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, ...@@ -387,6 +387,12 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
struct ocfs2_extent_rec *rec; struct ocfs2_extent_rec *rec;
u32 coff; u32 coff;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = -ERANGE;
mlog_errno(ret);
goto out;
}
ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
num_clusters, extent_flags); num_clusters, extent_flags);
if (ret == 0) if (ret == 0)
......
...@@ -397,6 +397,15 @@ static int ocfs2_truncate_file(struct inode *inode, ...@@ -397,6 +397,15 @@ static int ocfs2_truncate_file(struct inode *inode,
unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(inode->i_mapping, new_i_size); truncate_inode_pages(inode->i_mapping, new_i_size);
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
i_size_read(inode), 0);
if (status)
mlog_errno(status);
goto bail_unlock_data;
}
/* alright, we're going to need to do a full blown alloc size /* alright, we're going to need to do a full blown alloc size
* change. Orphan the inode so that recovery can complete the * change. Orphan the inode so that recovery can complete the
* truncate if necessary. This does the task of marking * truncate if necessary. This does the task of marking
...@@ -779,25 +788,6 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, ...@@ -779,25 +788,6 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
return status; return status;
} }
static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
u32 clusters_to_add, int mark_unwritten)
{
int ret;
/*
* The alloc sem blocks peope in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
* here.
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
mark_unwritten);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
return ret;
}
/* Some parts of this taken from generic_cont_expand, which turned out /* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to * to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->prepare_write() and * worry about recursive locking in ->prepare_write() and
...@@ -889,25 +879,48 @@ static int ocfs2_zero_extend(struct inode *inode, ...@@ -889,25 +879,48 @@ static int ocfs2_zero_extend(struct inode *inode,
return ret; return ret;
} }
/* int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
* A tail_to_skip value > 0 indicates that we're being called from {
* ocfs2_file_aio_write(). This has the following implications: int ret;
* u32 clusters_to_add;
* - we don't want to update i_size struct ocfs2_inode_info *oi = OCFS2_I(inode);
* - di_bh will be NULL, which is fine because it's only used in the
* case where we want to update i_size. clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
* - ocfs2_zero_extend() will then only be filling the hole created if (clusters_to_add < oi->ip_clusters)
* between i_size and the start of the write. clusters_to_add = 0;
else
clusters_to_add -= oi->ip_clusters;
if (clusters_to_add) {
ret = __ocfs2_extend_allocation(inode, oi->ip_clusters,
clusters_to_add, 0);
if (ret) {
mlog_errno(ret);
goto out;
}
}
/*
* Call this even if we don't add any clusters to the tree. We
* still need to zero the area between the old i_size and the
* new i_size.
*/ */
ret = ocfs2_zero_extend(inode, zero_to);
if (ret < 0)
mlog_errno(ret);
out:
return ret;
}
static int ocfs2_extend_file(struct inode *inode, static int ocfs2_extend_file(struct inode *inode,
struct buffer_head *di_bh, struct buffer_head *di_bh,
u64 new_i_size, u64 new_i_size)
size_t tail_to_skip)
{ {
int ret = 0; int ret = 0, data_locked = 0;
u32 clusters_to_add = 0; struct ocfs2_inode_info *oi = OCFS2_I(inode);
BUG_ON(!tail_to_skip && !di_bh); BUG_ON(!di_bh);
/* setattr sometimes calls us like this. */ /* setattr sometimes calls us like this. */
if (new_i_size == 0) if (new_i_size == 0)
...@@ -917,13 +930,18 @@ static int ocfs2_extend_file(struct inode *inode, ...@@ -917,13 +930,18 @@ static int ocfs2_extend_file(struct inode *inode,
goto out; goto out;
BUG_ON(new_i_size < i_size_read(inode)); BUG_ON(new_i_size < i_size_read(inode));
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { /*
BUG_ON(tail_to_skip != 0); * Fall through for converting inline data, even if the fs
* supports sparse files.
*
* The check for inline data here is legal - nobody can add
* the feature since we have i_mutex. We must check it again
* after acquiring ip_alloc_sem though, as paths like mmap
* might have raced us to converting the inode to extents.
*/
if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
&& ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
goto out_update_size; goto out_update_size;
}
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) -
OCFS2_I(inode)->ip_clusters;
/* /*
* protect the pages that ocfs2_zero_extend is going to be * protect the pages that ocfs2_zero_extend is going to be
...@@ -937,39 +955,52 @@ static int ocfs2_extend_file(struct inode *inode, ...@@ -937,39 +955,52 @@ static int ocfs2_extend_file(struct inode *inode,
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
data_locked = 1;
/*
* The alloc sem blocks people in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
* here.
*/
down_write(&oi->ip_alloc_sem);
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
/*
* We can optimize small extends by keeping the inodes
* inline data.
*/
if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
up_write(&oi->ip_alloc_sem);
goto out_update_size;
}
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
if (ret) {
up_write(&oi->ip_alloc_sem);
if (clusters_to_add) {
ret = ocfs2_extend_allocation(inode,
OCFS2_I(inode)->ip_clusters,
clusters_to_add, 0);
if (ret < 0) {
mlog_errno(ret); mlog_errno(ret);
goto out_unlock; goto out_unlock;
} }
} }
/* if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
* Call this even if we don't add any clusters to the tree. We ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
* still need to zero the area between the old i_size and the
* new i_size. up_write(&oi->ip_alloc_sem);
*/
ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
if (ret < 0) { if (ret < 0) {
mlog_errno(ret); mlog_errno(ret);
goto out_unlock; goto out_unlock;
} }
out_update_size: out_update_size:
if (!tail_to_skip) {
/* We're being called from ocfs2_setattr() which wants
* us to update i_size */
ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
if (ret < 0) if (ret < 0)
mlog_errno(ret); mlog_errno(ret);
}
out_unlock: out_unlock:
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) if (data_locked)
ocfs2_data_unlock(inode, 1); ocfs2_data_unlock(inode, 1);
out: out:
...@@ -1035,7 +1066,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -1035,7 +1066,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
if (i_size_read(inode) > attr->ia_size) if (i_size_read(inode) > attr->ia_size)
status = ocfs2_truncate_file(inode, bh, attr->ia_size); status = ocfs2_truncate_file(inode, bh, attr->ia_size);
else else
status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); status = ocfs2_extend_file(inode, bh, attr->ia_size);
if (status < 0) { if (status < 0) {
if (status != -ENOSPC) if (status != -ENOSPC)
mlog_errno(status); mlog_errno(status);
...@@ -1243,6 +1274,31 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, ...@@ -1243,6 +1274,31 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
{ {
int ret; int ret;
u32 cpos, phys_cpos, clusters, alloc_size; u32 cpos, phys_cpos, clusters, alloc_size;
u64 end = start + len;
struct buffer_head *di_bh = NULL;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
OCFS2_I(inode)->ip_blkno, &di_bh,
OCFS2_BH_CACHED, inode);
if (ret) {
mlog_errno(ret);
goto out;
}
/*
* Nothing to do if the requested reservation range
* fits within the inode.
*/
if (ocfs2_size_fits_inline_data(di_bh, end))
goto out;
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
}
/* /*
* We consider both start and len to be inclusive. * We consider both start and len to be inclusive.
...@@ -1288,6 +1344,8 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, ...@@ -1288,6 +1344,8 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
ret = 0; ret = 0;
out: out:
brelse(di_bh);
return ret; return ret;
} }
...@@ -1469,6 +1527,14 @@ static int ocfs2_remove_inode_range(struct inode *inode, ...@@ -1469,6 +1527,14 @@ static int ocfs2_remove_inode_range(struct inode *inode,
if (byte_len == 0) if (byte_len == 0)
return 0; return 0;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
byte_start + byte_len, 1);
if (ret)
mlog_errno(ret);
return ret;
}
trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
if (trunc_len >= trunc_start) if (trunc_len >= trunc_start)
...@@ -1713,15 +1779,13 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, ...@@ -1713,15 +1779,13 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
int appending, int appending,
int *direct_io) int *direct_io)
{ {
int ret = 0, meta_level = appending; int ret = 0, meta_level = 0;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
u32 clusters; loff_t saved_pos, end;
loff_t newsize, saved_pos;
/* /*
* We sample i_size under a read level meta lock to see if our write * We start with a read level meta lock and only jump to an ex
* is extending the file, if it is we back off and get a write level * if we need to make modifications here.
* meta lock.
*/ */
for(;;) { for(;;) {
ret = ocfs2_meta_lock(inode, NULL, meta_level); ret = ocfs2_meta_lock(inode, NULL, meta_level);
...@@ -1763,8 +1827,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, ...@@ -1763,8 +1827,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
saved_pos = *ppos; saved_pos = *ppos;
} }
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { end = saved_pos + count;
loff_t end = saved_pos + count;
/* /*
* Skip the O_DIRECT checks if we don't need * Skip the O_DIRECT checks if we don't need
...@@ -1773,6 +1836,15 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, ...@@ -1773,6 +1836,15 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
if (!direct_io || !(*direct_io)) if (!direct_io || !(*direct_io))
break; break;
/*
* There's no sane way to do direct writes to an inode
* with inline data.
*/
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
*direct_io = 0;
break;
}
/* /*
* Allowing concurrent direct writes means * Allowing concurrent direct writes means
* i_size changes wouldn't be synchronized, so * i_size changes wouldn't be synchronized, so
...@@ -1790,8 +1862,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, ...@@ -1790,8 +1862,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
* caller will have to retake some cluster * caller will have to retake some cluster
* locks and initiate the io as buffered. * locks and initiate the io as buffered.
*/ */
ret = ocfs2_check_range_for_holes(inode, saved_pos, ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
count);
if (ret == 1) { if (ret == 1) {
*direct_io = 0; *direct_io = 0;
ret = 0; ret = 0;
...@@ -1800,53 +1871,6 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, ...@@ -1800,53 +1871,6 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
break; break;
} }
/*
* The rest of this loop is concerned with legacy file
* systems which don't support sparse files.
*/
newsize = count + saved_pos;
mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
(long long) saved_pos, (long long) newsize,
(long long) i_size_read(inode));
/* No need for a higher level metadata lock if we're
* never going past i_size. */
if (newsize <= i_size_read(inode))
break;
if (meta_level == 0) {
ocfs2_meta_unlock(inode, meta_level);
meta_level = 1;
continue;
}
spin_lock(&OCFS2_I(inode)->ip_lock);
clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) -
OCFS2_I(inode)->ip_clusters;
spin_unlock(&OCFS2_I(inode)->ip_lock);
mlog(0, "Writing at EOF, may need more allocation: "
"i_size = %lld, newsize = %lld, need %u clusters\n",
(long long) i_size_read(inode), (long long) newsize,
clusters);
/* We only want to continue the rest of this loop if
* our extend will actually require more
* allocation. */
if (!clusters)
break;
ret = ocfs2_extend_file(inode, NULL, newsize, count);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out_unlock;
}
break;
}
if (appending) if (appending)
*ppos = saved_pos; *ppos = saved_pos;
......
...@@ -47,6 +47,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, ...@@ -47,6 +47,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret); enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add, u32 extents_to_split, u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac, struct ocfs2_alloc_context **data_ac,
......
...@@ -241,6 +241,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ...@@ -241,6 +241,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
inode->i_version = 1; inode->i_version = 1;
inode->i_generation = le32_to_cpu(fe->i_generation); inode->i_generation = le32_to_cpu(fe->i_generation);
...@@ -513,6 +514,10 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, ...@@ -513,6 +514,10 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
fe = (struct ocfs2_dinode *) fe_bh->b_data; fe = (struct ocfs2_dinode *) fe_bh->b_data;
/*
* This check will also skip truncate of inodes with inline
* data and fast symlinks.
*/
if (fe->i_clusters) { if (fe->i_clusters) {
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
...@@ -1220,6 +1225,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle, ...@@ -1220,6 +1225,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
ocfs2_get_inode_flags(OCFS2_I(inode)); ocfs2_get_inode_flags(OCFS2_I(inode));
fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
fe->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
spin_unlock(&OCFS2_I(inode)->ip_lock); spin_unlock(&OCFS2_I(inode)->ip_lock);
fe->i_size = cpu_to_le64(i_size_read(inode)); fe->i_size = cpu_to_le64(i_size_read(inode));
...@@ -1257,6 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode, ...@@ -1257,6 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode,
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
ocfs2_set_inode_flags(inode); ocfs2_set_inode_flags(inode);
i_size_write(inode, le64_to_cpu(fe->i_size)); i_size_write(inode, le64_to_cpu(fe->i_size));
inode->i_nlink = le16_to_cpu(fe->i_links_count); inode->i_nlink = le16_to_cpu(fe->i_links_count);
......
...@@ -51,6 +51,7 @@ struct ocfs2_inode_info ...@@ -51,6 +51,7 @@ struct ocfs2_inode_info
u32 ip_flags; /* see below */ u32 ip_flags; /* see below */
u32 ip_attr; /* inode attributes */ u32 ip_attr; /* inode attributes */
u16 ip_dyn_features;
/* protected by recovery_lock. */ /* protected by recovery_lock. */
struct inode *ip_next_orphan; struct inode *ip_next_orphan;
......
...@@ -35,13 +35,13 @@ ...@@ -35,13 +35,13 @@
#include "ocfs2.h" #include "ocfs2.h"
#include "alloc.h" #include "alloc.h"
#include "dir.h"
#include "dlmglue.h" #include "dlmglue.h"
#include "extent_map.h" #include "extent_map.h"
#include "heartbeat.h" #include "heartbeat.h"
#include "inode.h" #include "inode.h"
#include "journal.h" #include "journal.h"
#include "localalloc.h" #include "localalloc.h"
#include "namei.h"
#include "slot_map.h" #include "slot_map.h"
#include "super.h" #include "super.h"
#include "vote.h" #include "vote.h"
...@@ -1213,17 +1213,49 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) ...@@ -1213,17 +1213,49 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
return status; return status;
} }
struct ocfs2_orphan_filldir_priv {
struct inode *head;
struct ocfs2_super *osb;
};
static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
loff_t pos, u64 ino, unsigned type)
{
struct ocfs2_orphan_filldir_priv *p = priv;
struct inode *iter;
if (name_len == 1 && !strncmp(".", name, 1))
return 0;
if (name_len == 2 && !strncmp("..", name, 2))
return 0;
/* Skip bad inodes so that recovery can continue */
iter = ocfs2_iget(p->osb, ino,
OCFS2_FI_FLAG_ORPHAN_RECOVERY);
if (IS_ERR(iter))
return 0;
mlog(0, "queue orphan %llu\n",
(unsigned long long)OCFS2_I(iter)->ip_blkno);
/* No locking is required for the next_orphan queue as there
* is only ever a single process doing orphan recovery. */
OCFS2_I(iter)->ip_next_orphan = p->head;
p->head = iter;
return 0;
}
static int ocfs2_queue_orphans(struct ocfs2_super *osb, static int ocfs2_queue_orphans(struct ocfs2_super *osb,
int slot, int slot,
struct inode **head) struct inode **head)
{ {
int status; int status;
struct inode *orphan_dir_inode = NULL; struct inode *orphan_dir_inode = NULL;
struct inode *iter; struct ocfs2_orphan_filldir_priv priv;
unsigned long offset, blk, local; loff_t pos = 0;
struct buffer_head *bh = NULL;
struct ocfs2_dir_entry *de; priv.osb = osb;
struct super_block *sb = osb->sb; priv.head = *head;
orphan_dir_inode = ocfs2_get_system_file_inode(osb, orphan_dir_inode = ocfs2_get_system_file_inode(osb,
ORPHAN_DIR_SYSTEM_INODE, ORPHAN_DIR_SYSTEM_INODE,
...@@ -1241,77 +1273,15 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, ...@@ -1241,77 +1273,15 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
goto out; goto out;
} }
offset = 0; status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv,
iter = NULL; ocfs2_orphan_filldir);
while(offset < i_size_read(orphan_dir_inode)) { if (status) {
blk = offset >> sb->s_blocksize_bits;
bh = ocfs2_bread(orphan_dir_inode, blk, &status, 0);
if (!bh)
status = -EINVAL;
if (status < 0) {
if (bh)
brelse(bh);
mlog_errno(status); mlog_errno(status);
goto out_unlock; goto out;
}
local = 0;
while(offset < i_size_read(orphan_dir_inode)
&& local < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + local);
if (!ocfs2_check_dir_entry(orphan_dir_inode,
de, bh, local)) {
status = -EINVAL;
mlog_errno(status);
brelse(bh);
goto out_unlock;
}
local += le16_to_cpu(de->rec_len);
offset += le16_to_cpu(de->rec_len);
/* I guess we silently fail on no inode? */
if (!le64_to_cpu(de->inode))
continue;
if (de->file_type > OCFS2_FT_MAX) {
mlog(ML_ERROR,
"block %llu contains invalid de: "
"inode = %llu, rec_len = %u, "
"name_len = %u, file_type = %u, "
"name='%.*s'\n",
(unsigned long long)bh->b_blocknr,
(unsigned long long)le64_to_cpu(de->inode),
le16_to_cpu(de->rec_len),
de->name_len,
de->file_type,
de->name_len,
de->name);
continue;
} }
if (de->name_len == 1 && !strncmp(".", de->name, 1))
continue;
if (de->name_len == 2 && !strncmp("..", de->name, 2))
continue;
iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
OCFS2_FI_FLAG_ORPHAN_RECOVERY);
if (IS_ERR(iter))
continue;
mlog(0, "queue orphan %llu\n", *head = priv.head;
(unsigned long long)OCFS2_I(iter)->ip_blkno);
/* No locking is required for the next_orphan
* queue as there is only ever a single
* process doing orphan recovery. */
OCFS2_I(iter)->ip_next_orphan = *head;
*head = iter;
}
brelse(bh);
}
out_unlock:
ocfs2_meta_unlock(orphan_dir_inode, 0); ocfs2_meta_unlock(orphan_dir_inode, 0);
out: out:
mutex_unlock(&orphan_dir_inode->i_mutex); mutex_unlock(&orphan_dir_inode->i_mutex);
......
...@@ -282,6 +282,9 @@ int ocfs2_journal_dirty_data(handle_t *handle, ...@@ -282,6 +282,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* prev. group desc. if we relink. */ * prev. group desc. if we relink. */
#define OCFS2_SUBALLOC_ALLOC (3) #define OCFS2_SUBALLOC_ALLOC (3)
#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \
+ OCFS2_INODE_UPDATE_CREDITS)
/* dinode + group descriptor update. We don't relink on free yet. */ /* dinode + group descriptor update. We don't relink on free yet. */
#define OCFS2_SUBALLOC_FREE (2) #define OCFS2_SUBALLOC_FREE (2)
......
...@@ -64,29 +64,6 @@ ...@@ -64,29 +64,6 @@
#include "buffer_head_io.h" #include "buffer_head_io.h"
#define NAMEI_RA_CHUNKS 2
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
static int inline ocfs2_search_dirblock(struct buffer_head *bh,
struct inode *dir,
const char *name, int namelen,
unsigned long offset,
struct ocfs2_dir_entry **res_dir);
static int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh);
static int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh);
static int ocfs2_mknod_locked(struct ocfs2_super *osb, static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode *dir, struct inode *dir,
struct dentry *dentry, int mode, struct dentry *dentry, int mode,
...@@ -97,13 +74,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, ...@@ -97,13 +74,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode **ret_inode, struct inode **ret_inode,
struct ocfs2_alloc_context *inode_ac); struct ocfs2_alloc_context *inode_ac);
static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle,
struct inode *parent,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac);
static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
struct inode **ret_orphan_dir, struct inode **ret_orphan_dir,
struct inode *inode, struct inode *inode,
...@@ -123,17 +93,6 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb, ...@@ -123,17 +93,6 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
struct inode *inode, struct inode *inode,
const char *symname); const char *symname);
static inline int ocfs2_add_entry(handle_t *handle,
struct dentry *dentry,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
{
return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len,
inode, blkno, parent_fe_bh, insert_bh);
}
/* An orphan dir name is an 8 byte value, printed as a hex string */ /* An orphan dir name is an 8 byte value, printed as a hex string */
#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
...@@ -142,10 +101,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, ...@@ -142,10 +101,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
{ {
int status; int status;
u64 blkno; u64 blkno;
struct buffer_head *dirent_bh = NULL;
struct inode *inode = NULL; struct inode *inode = NULL;
struct dentry *ret; struct dentry *ret;
struct ocfs2_dir_entry *dirent;
struct ocfs2_inode_info *oi; struct ocfs2_inode_info *oi;
mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
...@@ -167,9 +124,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, ...@@ -167,9 +124,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
goto bail; goto bail;
} }
status = ocfs2_find_files_on_disk(dentry->d_name.name, status = ocfs2_lookup_ino_from_name(dir, dentry->d_name.name,
dentry->d_name.len, &blkno, dentry->d_name.len, &blkno);
dir, &dirent_bh, &dirent);
if (status < 0) if (status < 0)
goto bail_add; goto bail_add;
...@@ -224,83 +180,12 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, ...@@ -224,83 +180,12 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
ocfs2_meta_unlock(dir, 0); ocfs2_meta_unlock(dir, 0);
bail: bail:
if (dirent_bh)
brelse(dirent_bh);
mlog_exit_ptr(ret); mlog_exit_ptr(ret);
return ret; return ret;
} }
static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle,
struct inode *parent,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac)
{
int status;
struct buffer_head *new_bh = NULL;
struct ocfs2_dir_entry *de = NULL;
mlog_entry_void();
status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
data_ac, NULL, &new_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
ocfs2_set_new_buffer_uptodate(inode, new_bh);
status = ocfs2_journal_access(handle, inode, new_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
memset(new_bh->b_data, 0, osb->sb->s_blocksize);
de = (struct ocfs2_dir_entry *) new_bh->b_data;
de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
de->name_len = 1;
de->rec_len =
cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
strcpy(de->name, ".");
ocfs2_set_de_type(de, S_IFDIR);
de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
OCFS2_DIR_REC_LEN(1));
de->name_len = 2;
strcpy(de->name, "..");
ocfs2_set_de_type(de, S_IFDIR);
status = ocfs2_journal_dirty(handle, new_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
i_size_write(inode, inode->i_sb->s_blocksize);
inode->i_nlink = 2;
inode->i_blocks = ocfs2_inode_sector_count(inode);
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
status = 0;
bail:
if (new_bh)
brelse(new_bh);
mlog_exit(status);
return status;
}
static int ocfs2_mknod(struct inode *dir, static int ocfs2_mknod(struct inode *dir,
struct dentry *dentry, struct dentry *dentry,
int mode, int mode,
...@@ -365,9 +250,8 @@ static int ocfs2_mknod(struct inode *dir, ...@@ -365,9 +250,8 @@ static int ocfs2_mknod(struct inode *dir,
goto leave; goto leave;
} }
/* are we making a directory? If so, reserve a cluster for his /* Reserve a cluster if creating an extent based directory. */
* 1st extent. */ if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
if (S_ISDIR(mode)) {
status = ocfs2_reserve_clusters(osb, 1, &data_ac); status = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (status < 0) { if (status < 0) {
if (status != -ENOSPC) if (status != -ENOSPC)
...@@ -564,10 +448,21 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, ...@@ -564,10 +448,21 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
cpu_to_le32(CURRENT_TIME.tv_nsec); cpu_to_le32(CURRENT_TIME.tv_nsec);
fe->i_dtime = 0; fe->i_dtime = 0;
/*
* If supported, directories start with inline data.
*/
if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
u16 feat = le16_to_cpu(fe->i_dyn_features);
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
} else {
fel = &fe->id2.i_list; fel = &fe->id2.i_list;
fel->l_tree_depth = 0; fel->l_tree_depth = 0;
fel->l_next_free_rec = 0; fel->l_next_free_rec = 0;
fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb)); fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
}
status = ocfs2_journal_dirty(handle, *new_fe_bh); status = ocfs2_journal_dirty(handle, *new_fe_bh);
if (status < 0) { if (status < 0) {
...@@ -1048,11 +943,6 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) ...@@ -1048,11 +943,6 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
ocfs2_meta_unlock(inode2, 1); ocfs2_meta_unlock(inode2, 1);
} }
#define PARENT_INO(buffer) \
((struct ocfs2_dir_entry *) \
((char *)buffer + \
le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
static int ocfs2_rename(struct inode *old_dir, static int ocfs2_rename(struct inode *old_dir,
struct dentry *old_dentry, struct dentry *old_dentry,
struct inode *new_dir, struct inode *new_dir,
...@@ -1070,12 +960,12 @@ static int ocfs2_rename(struct inode *old_dir, ...@@ -1070,12 +960,12 @@ static int ocfs2_rename(struct inode *old_dir,
struct buffer_head *old_inode_bh = NULL; struct buffer_head *old_inode_bh = NULL;
struct buffer_head *insert_entry_bh = NULL; struct buffer_head *insert_entry_bh = NULL;
struct ocfs2_super *osb = NULL; struct ocfs2_super *osb = NULL;
u64 newfe_blkno; u64 newfe_blkno, old_de_ino;
handle_t *handle = NULL; handle_t *handle = NULL;
struct buffer_head *old_dir_bh = NULL; struct buffer_head *old_dir_bh = NULL;
struct buffer_head *new_dir_bh = NULL; struct buffer_head *new_dir_bh = NULL;
struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL,
// and new_dentry *new_de = NULL;
struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
// this is the 1st dirent bh // this is the 1st dirent bh
...@@ -1159,27 +1049,35 @@ static int ocfs2_rename(struct inode *old_dir, ...@@ -1159,27 +1049,35 @@ static int ocfs2_rename(struct inode *old_dir,
} }
if (S_ISDIR(old_inode->i_mode)) { if (S_ISDIR(old_inode->i_mode)) {
u64 old_inode_parent;
status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
old_inode, &old_inode_de_bh,
&old_inode_dot_dot_de);
if (status) {
status = -EIO; status = -EIO;
old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
if (!old_inode_de_bh)
goto bail; goto bail;
}
if (old_inode_parent != OCFS2_I(old_dir)->ip_blkno) {
status = -EIO; status = -EIO;
if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) !=
OCFS2_I(old_dir)->ip_blkno)
goto bail; goto bail;
}
if (!new_inode && new_dir != old_dir &&
new_dir->i_nlink >= OCFS2_LINK_MAX) {
status = -EMLINK; status = -EMLINK;
if (!new_inode && new_dir!=old_dir &&
new_dir->i_nlink >= OCFS2_LINK_MAX)
goto bail; goto bail;
} }
}
status = -ENOENT; status = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
old_dentry->d_name.len, old_dentry->d_name.len,
old_dir, &old_de); &old_de_ino);
if (!old_de_bh) if (status) {
status = -ENOENT;
goto bail; goto bail;
}
/* /*
* Check for inode number is _not_ due to possible IO errors. * Check for inode number is _not_ due to possible IO errors.
...@@ -1187,8 +1085,10 @@ static int ocfs2_rename(struct inode *old_dir, ...@@ -1187,8 +1085,10 @@ static int ocfs2_rename(struct inode *old_dir,
* and merrily kill the link to whatever was created under the * and merrily kill the link to whatever was created under the
* same name. Goodbye sticky bit ;-< * same name. Goodbye sticky bit ;-<
*/ */
if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno) if (old_de_ino != OCFS2_I(old_inode)->ip_blkno) {
status = -ENOENT;
goto bail; goto bail;
}
/* check if the target already exists (in which case we need /* check if the target already exists (in which case we need
* to delete it */ * to delete it */
...@@ -1321,20 +1221,13 @@ static int ocfs2_rename(struct inode *old_dir, ...@@ -1321,20 +1221,13 @@ static int ocfs2_rename(struct inode *old_dir,
} }
/* change the dirent to point to the correct inode */ /* change the dirent to point to the correct inode */
status = ocfs2_journal_access(handle, new_dir, new_de_bh, status = ocfs2_update_entry(new_dir, handle, new_de_bh,
OCFS2_JOURNAL_ACCESS_WRITE); new_de, old_inode);
if (status < 0) { if (status < 0) {
mlog_errno(status); mlog_errno(status);
goto bail; goto bail;
} }
new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
new_de->file_type = old_de->file_type;
new_dir->i_version++; new_dir->i_version++;
status = ocfs2_journal_dirty(handle, new_de_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
if (S_ISDIR(new_inode->i_mode)) if (S_ISDIR(new_inode->i_mode))
newfe->i_links_count = 0; newfe->i_links_count = 0;
...@@ -1370,7 +1263,21 @@ static int ocfs2_rename(struct inode *old_dir, ...@@ -1370,7 +1263,21 @@ static int ocfs2_rename(struct inode *old_dir,
} else } else
mlog_errno(status); mlog_errno(status);
/* now that the name has been added to new_dir, remove the old name */ /*
* Now that the name has been added to new_dir, remove the old name.
*
* We don't keep any directory entry context around until now
* because the insert might have changed the type of directory
* we're dealing with.
*/
old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
old_dentry->d_name.len,
old_dir, &old_de);
if (!old_de_bh) {
status = -EIO;
goto bail;
}
status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh); status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
if (status < 0) { if (status < 0) {
mlog_errno(status); mlog_errno(status);
...@@ -1383,12 +1290,8 @@ static int ocfs2_rename(struct inode *old_dir, ...@@ -1383,12 +1290,8 @@ static int ocfs2_rename(struct inode *old_dir,
} }
old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
if (old_inode_de_bh) { if (old_inode_de_bh) {
status = ocfs2_journal_access(handle, old_inode, status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh,
old_inode_de_bh, old_inode_dot_dot_de, new_dir);
OCFS2_JOURNAL_ACCESS_WRITE);
PARENT_INO(old_inode_de_bh->b_data) =
cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
status = ocfs2_journal_dirty(handle, old_inode_de_bh);
old_dir->i_nlink--; old_dir->i_nlink--;
if (new_inode) { if (new_inode) {
new_inode->i_nlink--; new_inode->i_nlink--;
...@@ -1767,329 +1670,6 @@ static int ocfs2_symlink(struct inode *dir, ...@@ -1767,329 +1670,6 @@ static int ocfs2_symlink(struct inode *dir,
return status; return status;
} }
int ocfs2_check_dir_entry(struct inode * dir,
struct ocfs2_dir_entry * de,
struct buffer_head * bh,
unsigned long offset)
{
const char *error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len);
if (rlen < OCFS2_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
else if (rlen % 4 != 0)
error_msg = "rec_len % 4 != 0";
else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
error_msg = "rec_len is too small for name_len";
else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
error_msg = "directory entry across blocks";
if (error_msg != NULL)
mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
"offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
de->name_len);
return error_msg == NULL ? 1 : 0;
}
/* we don't always have a dentry for what we want to add, so people
* like orphan dir can call this instead.
*
* If you pass me insert_bh, I'll skip the search of the other dir
* blocks and put the record in there.
*/
static int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
{
unsigned long offset;
unsigned short rec_len;
struct ocfs2_dir_entry *de, *de1;
struct super_block *sb;
int retval, status;
mlog_entry_void();
sb = dir->i_sb;
if (!namelen)
return -EINVAL;
rec_len = OCFS2_DIR_REC_LEN(namelen);
offset = 0;
de = (struct ocfs2_dir_entry *) insert_bh->b_data;
while (1) {
BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
/* These checks should've already been passed by the
* prepare function, but I guess we can leave them
* here anyway. */
if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
retval = -ENOENT;
goto bail;
}
if (ocfs2_match(namelen, name, de)) {
retval = -EEXIST;
goto bail;
}
if (((le64_to_cpu(de->inode) == 0) &&
(le16_to_cpu(de->rec_len) >= rec_len)) ||
(le16_to_cpu(de->rec_len) >=
(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
if (retval < 0) {
mlog_errno(retval);
goto bail;
}
status = ocfs2_journal_access(handle, dir, insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
/* By now the buffer is marked for journaling */
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
de1 = (struct ocfs2_dir_entry *)((char *) de +
OCFS2_DIR_REC_LEN(de->name_len));
de1->rec_len =
cpu_to_le16(le16_to_cpu(de->rec_len) -
OCFS2_DIR_REC_LEN(de->name_len));
de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
de = de1;
}
de->file_type = OCFS2_FT_UNKNOWN;
if (blkno) {
de->inode = cpu_to_le64(blkno);
ocfs2_set_de_type(de, inode->i_mode);
} else
de->inode = 0;
de->name_len = namelen;
memcpy(de->name, name, namelen);
dir->i_version++;
status = ocfs2_journal_dirty(handle, insert_bh);
retval = 0;
goto bail;
}
offset += le16_to_cpu(de->rec_len);
de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
}
/* when you think about it, the assert above should prevent us
* from ever getting here. */
retval = -ENOSPC;
bail:
mlog_exit(retval);
return retval;
}
/*
* ocfs2_delete_entry deletes a directory entry by merging it with the
* previous entry
*/
static int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh)
{
struct ocfs2_dir_entry *de, *pde;
int i, status = -ENOENT;
mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
i = 0;
pde = NULL;
de = (struct ocfs2_dir_entry *) bh->b_data;
while (i < bh->b_size) {
if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
status = -EIO;
mlog_errno(status);
goto bail;
}
if (de == de_del) {
status = ocfs2_journal_access(handle, dir, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
status = -EIO;
mlog_errno(status);
goto bail;
}
if (pde)
pde->rec_len =
cpu_to_le16(le16_to_cpu(pde->rec_len) +
le16_to_cpu(de->rec_len));
else
de->inode = 0;
dir->i_version++;
status = ocfs2_journal_dirty(handle, bh);
goto bail;
}
i += le16_to_cpu(de->rec_len);
pde = de;
de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
}
bail:
mlog_exit(status);
return status;
}
/*
* Returns 0 if not found, -1 on failure, and 1 on success
*/
static int inline ocfs2_search_dirblock(struct buffer_head *bh,
struct inode *dir,
const char *name, int namelen,
unsigned long offset,
struct ocfs2_dir_entry **res_dir)
{
struct ocfs2_dir_entry *de;
char *dlimit, *de_buf;
int de_len;
int ret = 0;
mlog_entry_void();
de_buf = bh->b_data;
dlimit = de_buf + dir->i_sb->s_blocksize;
while (de_buf < dlimit) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
de = (struct ocfs2_dir_entry *) de_buf;
if (de_buf + namelen <= dlimit &&
ocfs2_match(namelen, name, de)) {
/* found a match - just to be sure, do a full check */
if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
ret = -1;
goto bail;
}
*res_dir = de;
ret = 1;
goto bail;
}
/* prevent looping on a bad block */
de_len = le16_to_cpu(de->rec_len);
if (de_len <= 0) {
ret = -1;
goto bail;
}
de_buf += de_len;
offset += de_len;
}
bail:
mlog_exit(ret);
return ret;
}
struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
{
struct super_block *sb;
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
unsigned long start, block, b;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
buffer */
int num = 0;
int nblocks, i, err;
mlog_entry_void();
*res_dir = NULL;
sb = dir->i_sb;
nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
start = OCFS2_I(dir)->ip_dir_start_lookup;
if (start >= nblocks)
start = 0;
block = start;
restart:
do {
/*
* We deal with the read-ahead logic here.
*/
if (ra_ptr >= ra_max) {
/* Refill the readahead buffer */
ra_ptr = 0;
b = block;
for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
/*
* Terminate if we reach the end of the
* directory and must wrap, or if our
* search has finished at this block.
*/
if (b >= nblocks || (num && block == start)) {
bh_use[ra_max] = NULL;
break;
}
num++;
bh = ocfs2_bread(dir, b++, &err, 1);
bh_use[ra_max] = bh;
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
goto next;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
block);
brelse(bh);
goto next;
}
i = ocfs2_search_dirblock(bh, dir, name, namelen,
block << sb->s_blocksize_bits,
res_dir);
if (i == 1) {
OCFS2_I(dir)->ip_dir_start_lookup = block;
ret = bh;
goto cleanup_and_exit;
} else {
brelse(bh);
if (i < 0)
goto cleanup_and_exit;
}
next:
if (++block >= nblocks)
block = 0;
} while (block != start);
/*
* If the directory has grown while we were searching, then
* search the last part of the directory before giving up.
*/
block = nblocks;
nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
if (block < nblocks) {
start = 0;
goto restart;
}
cleanup_and_exit:
/* Clean up the read-ahead blocks */
for (; ra_ptr < ra_max; ra_ptr++)
brelse(bh_use[ra_ptr]);
mlog_exit_ptr(ret);
return ret;
}
static int ocfs2_blkno_stringify(u64 blkno, char *name) static int ocfs2_blkno_stringify(u64 blkno, char *name)
{ {
int status, namelen; int status, namelen;
......
...@@ -30,29 +30,10 @@ extern const struct inode_operations ocfs2_dir_iops; ...@@ -30,29 +30,10 @@ extern const struct inode_operations ocfs2_dir_iops;
struct dentry *ocfs2_get_parent(struct dentry *child); struct dentry *ocfs2_get_parent(struct dentry *child);
int ocfs2_check_dir_entry (struct inode *dir,
struct ocfs2_dir_entry *de,
struct buffer_head *bh,
unsigned long offset);
struct buffer_head *ocfs2_find_entry(const char *name,
int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir);
int ocfs2_orphan_del(struct ocfs2_super *osb, int ocfs2_orphan_del(struct ocfs2_super *osb,
handle_t *handle, handle_t *handle,
struct inode *orphan_dir_inode, struct inode *orphan_dir_inode,
struct inode *inode, struct inode *inode,
struct buffer_head *orphan_dir_bh); struct buffer_head *orphan_dir_bh);
static inline int ocfs2_match(int len,
const char * const name,
struct ocfs2_dir_entry *de)
{
if (len != de->name_len)
return 0;
if (!de->inode)
return 0;
return !memcmp(name, de->name, len);
}
#endif /* OCFS2_NAMEI_H */ #endif /* OCFS2_NAMEI_H */
...@@ -319,6 +319,13 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) ...@@ -319,6 +319,13 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
return 0; return 0;
} }
static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
return 1;
return 0;
}
/* set / clear functions because cluster events can make these happen /* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also * in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock * means that any future flags osb_flags must be protected by spinlock
......
...@@ -87,7 +87,8 @@ ...@@ -87,7 +87,8 @@
#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
| OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
/* /*
...@@ -110,6 +111,20 @@ ...@@ -110,6 +111,20 @@
/* Support for sparse allocation in b-trees */ /* Support for sparse allocation in b-trees */
#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010 #define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010
/*
* Tunefs sets this incompat flag before starting an operation which
* would require cleanup on abort. This is done to protect users from
* inadvertently mounting the fs after an aborted run without
* fsck-ing.
*
* s_tunefs_flags on the super block describes precisely which
* operations were in progress.
*/
#define OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG 0x0020
/* Support for data packed into inode blocks */
#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
/* /*
* backup superblock flag is used to indicate that this volume * backup superblock flag is used to indicate that this volume
* has backup superblocks. * has backup superblocks.
...@@ -129,6 +144,11 @@ ...@@ -129,6 +144,11 @@
/* the max backup superblock nums */ /* the max backup superblock nums */
#define OCFS2_MAX_BACKUP_SUPERBLOCKS 6 #define OCFS2_MAX_BACKUP_SUPERBLOCKS 6
/*
* Flags on ocfs2_super_block.s_tunefs_flags
*/
#define OCFS2_TUNEFS_INPROG_REMOVE_SLOT 0x0001 /* Removing slots */
/* /*
* Flags on ocfs2_dinode.i_flags * Flags on ocfs2_dinode.i_flags
*/ */
...@@ -146,6 +166,17 @@ ...@@ -146,6 +166,17 @@
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
/*
* Flags on ocfs2_dinode.i_dyn_features
*
* These can change much more often than i_flags. When adding flags,
* keep in mind that i_dyn_features is only 16 bits wide.
*/
#define OCFS2_INLINE_DATA_FL (0x0001) /* Data stored in inode block */
#define OCFS2_HAS_XATTR_FL (0x0002)
#define OCFS2_INLINE_XATTR_FL (0x0004)
#define OCFS2_INDEXED_DIR_FL (0x0008)
/* Inode attributes, keep in sync with EXT2 */ /* Inode attributes, keep in sync with EXT2 */
#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ #define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ #define OCFS2_UNRM_FL (0x00000002) /* Undelete */
...@@ -447,8 +478,8 @@ struct ocfs2_super_block { ...@@ -447,8 +478,8 @@ struct ocfs2_super_block {
__le32 s_clustersize_bits; /* Clustersize for this fs */ __le32 s_clustersize_bits; /* Clustersize for this fs */
/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts /*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
before tunefs required */ before tunefs required */
__le16 s_reserved1; __le16 s_tunefs_flag;
__le32 s_reserved2; __le32 s_reserved1;
__le64 s_first_cluster_group; /* Block offset of 1st cluster __le64 s_first_cluster_group; /* Block offset of 1st cluster
* group header */ * group header */
/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
...@@ -470,6 +501,19 @@ struct ocfs2_local_alloc ...@@ -470,6 +501,19 @@ struct ocfs2_local_alloc
/*10*/ __u8 la_bitmap[0]; /*10*/ __u8 la_bitmap[0];
}; };
/*
* Data-in-inode header. This is only used if i_dyn_features has
* OCFS2_INLINE_DATA_FL set.
*/
struct ocfs2_inline_data
{
/*00*/ __le16 id_count; /* Number of bytes that can be used
* for data, starting at id_data */
__le16 id_reserved0;
__le32 id_reserved1;
__u8 id_data[0]; /* Start of user data */
};
/* /*
* On disk inode for OCFS2 * On disk inode for OCFS2
*/ */
...@@ -502,7 +546,7 @@ struct ocfs2_dinode { ...@@ -502,7 +546,7 @@ struct ocfs2_dinode {
__le32 i_attr; __le32 i_attr;
__le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
was set in i_flags */ was set in i_flags */
__le16 i_reserved1; __le16 i_dyn_features;
/*70*/ __le64 i_reserved2[8]; /*70*/ __le64 i_reserved2[8];
/*B8*/ union { /*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this __le64 i_pad1; /* Generic way to refer to this
...@@ -528,6 +572,7 @@ struct ocfs2_dinode { ...@@ -528,6 +572,7 @@ struct ocfs2_dinode {
struct ocfs2_chain_list i_chain; struct ocfs2_chain_list i_chain;
struct ocfs2_extent_list i_list; struct ocfs2_extent_list i_list;
struct ocfs2_truncate_log i_dealloc; struct ocfs2_truncate_log i_dealloc;
struct ocfs2_inline_data i_data;
__u8 i_symlink[0]; __u8 i_symlink[0];
} id2; } id2;
/* Actual on-disk size is one block */ /* Actual on-disk size is one block */
...@@ -577,6 +622,12 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb) ...@@ -577,6 +622,12 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
offsetof(struct ocfs2_dinode, id2.i_symlink); offsetof(struct ocfs2_dinode, id2.i_symlink);
} }
static inline int ocfs2_max_inline_data(struct super_block *sb)
{
return sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
{ {
int size; int size;
...@@ -656,6 +707,11 @@ static inline int ocfs2_fast_symlink_chars(int blocksize) ...@@ -656,6 +707,11 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink); return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
} }
static inline int ocfs2_max_inline_data(int blocksize)
{
return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_extent_recs_per_inode(int blocksize) static inline int ocfs2_extent_recs_per_inode(int blocksize)
{ {
int size; int size;
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/parser.h> #include <linux/parser.h>
#include <linux/crc32.h> #include <linux/crc32.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/mount.h>
#include <cluster/nodemanager.h> #include <cluster/nodemanager.h>
...@@ -91,6 +92,7 @@ struct mount_options ...@@ -91,6 +92,7 @@ struct mount_options
static int ocfs2_parse_options(struct super_block *sb, char *options, static int ocfs2_parse_options(struct super_block *sb, char *options,
struct mount_options *mopt, struct mount_options *mopt,
int is_remount); int is_remount);
static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
static void ocfs2_put_super(struct super_block *sb); static void ocfs2_put_super(struct super_block *sb);
static int ocfs2_mount_volume(struct super_block *sb); static int ocfs2_mount_volume(struct super_block *sb);
static int ocfs2_remount(struct super_block *sb, int *flags, char *data); static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
...@@ -105,7 +107,7 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait); ...@@ -105,7 +107,7 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait);
static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
static int ocfs2_release_system_inodes(struct ocfs2_super *osb); static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); static int ocfs2_fill_local_node_info(struct ocfs2_super *osb);
static int ocfs2_check_volume(struct ocfs2_super *osb); static int ocfs2_check_volume(struct ocfs2_super *osb);
static int ocfs2_verify_volume(struct ocfs2_dinode *di, static int ocfs2_verify_volume(struct ocfs2_dinode *di,
...@@ -133,6 +135,7 @@ static const struct super_operations ocfs2_sops = { ...@@ -133,6 +135,7 @@ static const struct super_operations ocfs2_sops = {
.write_super = ocfs2_write_super, .write_super = ocfs2_write_super,
.put_super = ocfs2_put_super, .put_super = ocfs2_put_super,
.remount_fs = ocfs2_remount, .remount_fs = ocfs2_remount,
.show_options = ocfs2_show_options,
}; };
enum { enum {
...@@ -177,7 +180,7 @@ static void ocfs2_write_super(struct super_block *sb) ...@@ -177,7 +180,7 @@ static void ocfs2_write_super(struct super_block *sb)
static int ocfs2_sync_fs(struct super_block *sb, int wait) static int ocfs2_sync_fs(struct super_block *sb, int wait)
{ {
int status = 0; int status;
tid_t target; tid_t target;
struct ocfs2_super *osb = OCFS2_SB(sb); struct ocfs2_super *osb = OCFS2_SB(sb);
...@@ -275,9 +278,9 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) ...@@ -275,9 +278,9 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
return status; return status;
} }
static int ocfs2_release_system_inodes(struct ocfs2_super *osb) static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
{ {
int status = 0, i; int i;
struct inode *inode; struct inode *inode;
mlog_entry_void(); mlog_entry_void();
...@@ -302,8 +305,7 @@ static int ocfs2_release_system_inodes(struct ocfs2_super *osb) ...@@ -302,8 +305,7 @@ static int ocfs2_release_system_inodes(struct ocfs2_super *osb)
osb->root_inode = NULL; osb->root_inode = NULL;
} }
mlog_exit(status); mlog_exit(0);
return status;
} }
/* We're allocating fs objects, use GFP_NOFS */ /* We're allocating fs objects, use GFP_NOFS */
...@@ -453,7 +455,7 @@ static int ocfs2_sb_probe(struct super_block *sb, ...@@ -453,7 +455,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
struct buffer_head **bh, struct buffer_head **bh,
int *sector_size) int *sector_size)
{ {
int status = 0, tmpstat; int status, tmpstat;
struct ocfs1_vol_disk_hdr *hdr; struct ocfs1_vol_disk_hdr *hdr;
struct ocfs2_dinode *di; struct ocfs2_dinode *di;
int blksize; int blksize;
...@@ -830,6 +832,41 @@ static int ocfs2_parse_options(struct super_block *sb, ...@@ -830,6 +832,41 @@ static int ocfs2_parse_options(struct super_block *sb,
return status; return status;
} }
static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
{
struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
unsigned long opts = osb->s_mount_opt;
if (opts & OCFS2_MOUNT_HB_LOCAL)
seq_printf(s, ",_netdev,heartbeat=local");
else
seq_printf(s, ",heartbeat=none");
if (opts & OCFS2_MOUNT_NOINTR)
seq_printf(s, ",nointr");
if (opts & OCFS2_MOUNT_DATA_WRITEBACK)
seq_printf(s, ",data=writeback");
else
seq_printf(s, ",data=ordered");
if (opts & OCFS2_MOUNT_BARRIER)
seq_printf(s, ",barrier=1");
if (opts & OCFS2_MOUNT_ERRORS_PANIC)
seq_printf(s, ",errors=panic");
else
seq_printf(s, ",errors=remount-ro");
if (osb->preferred_slot != OCFS2_INVALID_SLOT)
seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM)
seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
return 0;
}
static int __init ocfs2_init(void) static int __init ocfs2_init(void)
{ {
int status; int status;
...@@ -1209,12 +1246,13 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ...@@ -1209,12 +1246,13 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
tmp = ocfs2_request_umount_vote(osb); tmp = ocfs2_request_umount_vote(osb);
if (tmp < 0) if (tmp < 0)
mlog_errno(tmp); mlog_errno(tmp);
}
if (osb->slot_num != OCFS2_INVALID_SLOT) if (osb->slot_num != OCFS2_INVALID_SLOT)
ocfs2_put_slot(osb); ocfs2_put_slot(osb);
if (osb->dlm)
ocfs2_super_unlock(osb, 1); ocfs2_super_unlock(osb, 1);
}
ocfs2_release_system_inodes(osb); ocfs2_release_system_inodes(osb);
...@@ -1275,7 +1313,7 @@ static int ocfs2_initialize_super(struct super_block *sb, ...@@ -1275,7 +1313,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
struct buffer_head *bh, struct buffer_head *bh,
int sector_size) int sector_size)
{ {
int status = 0; int status;
int i, cbits, bbits; int i, cbits, bbits;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
struct inode *inode = NULL; struct inode *inode = NULL;
...@@ -1596,7 +1634,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, ...@@ -1596,7 +1634,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
static int ocfs2_check_volume(struct ocfs2_super *osb) static int ocfs2_check_volume(struct ocfs2_super *osb)
{ {
int status = 0; int status;
int dirty; int dirty;
int local; int local;
struct ocfs2_dinode *local_alloc = NULL; /* only used if we struct ocfs2_dinode *local_alloc = NULL; /* only used if we
......
...@@ -100,17 +100,14 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, ...@@ -100,17 +100,14 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
char namebuf[40]; char namebuf[40];
struct inode *inode = NULL; struct inode *inode = NULL;
u64 blkno; u64 blkno;
struct buffer_head *dirent_bh = NULL;
struct ocfs2_dir_entry *de = NULL;
int status = 0; int status = 0;
ocfs2_sprintf_system_inode_name(namebuf, ocfs2_sprintf_system_inode_name(namebuf,
sizeof(namebuf), sizeof(namebuf),
type, slot); type, slot);
status = ocfs2_find_files_on_disk(namebuf, strlen(namebuf), status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
&blkno, osb->sys_root_inode, strlen(namebuf), &blkno);
&dirent_bh, &de);
if (status < 0) { if (status < 0) {
goto bail; goto bail;
} }
...@@ -122,8 +119,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, ...@@ -122,8 +119,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
goto bail; goto bail;
} }
bail: bail:
if (dirent_bh)
brelse(dirent_bh);
return inode; return inode;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment