Commit e90c01e1 authored by Steven Whitehouse's avatar Steven Whitehouse

[GFS2] Reverse block order in build_height

The original code ordered the blocks allocated in the build_height
routine backwards causing excessive disk seeks during a read of the
metadata. This patch reverses the order to try and reduce disk seeks.

Example: A five level metadata tree, I = Inode, P = Pointers, D = Data

You need to read the blocks in the order:

I P5 P4 P3 P2 P1 D

in order to read a single data block. The new code now orders the blocks
in this way. The old code used to order them as:

I P1 P2 P3 P4 P5 D

requiring two extra seeks on average. Note that for files which are
grown by gradual extension rather than by truncate or by llseek/write
at a large offset, this doesn't apply. In the case of writing to a
file linearly, this routine will only be called upon to extend the
height of the tree by one block at a time, so the ordering is
determined by when its called rather than by the internals of the
routine itself. Optimising that part of the ordering is a much
harder problem.
Signed-off-by: default avatarSteven Whitehouse <swhiteho@redhat.com>
parent 7d63b54a
...@@ -164,72 +164,62 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) ...@@ -164,72 +164,62 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size)
* @ip: The GFS2 inode * @ip: The GFS2 inode
* @height: The height to build to * @height: The height to build to
* *
* This routine makes sure that the metadata tree is tall enough to hold
* "size" bytes of data.
* *
* Returns: errno * Returns: errno
*/ */
static int build_height(struct gfs2_inode *ip, int height) static int build_height(struct inode *inode, unsigned height)
{ {
struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_inode *ip = inode->u.generic_ip;
struct buffer_head *bh, *dibh; unsigned new_height = height - ip->i_di.di_height;
uint64_t block = 0, *bp; struct buffer_head *dibh;
unsigned int x; struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
int new_block;
int error; int error;
u64 *bp;
u64 bn;
unsigned n;
if (height <= ip->i_di.di_height)
return 0;
while (ip->i_di.di_height < height) {
error = gfs2_meta_inode_buffer(ip, &dibh); error = gfs2_meta_inode_buffer(ip, &dibh);
if (error) if (error)
return error; return error;
new_block = 0; for(n = 0; n < new_height; n++) {
bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)); bn = gfs2_alloc_meta(ip);
for (x = 0; x < sdp->sd_diptrs; x++, bp++) blocks[n] = gfs2_meta_new(ip->i_gl, bn);
if (*bp) { gfs2_trans_add_bh(ip->i_gl, blocks[n], 1);
new_block = 1;
break;
} }
if (new_block) { n = 0;
/* Get a new block, fill it with the old direct bn = blocks[0]->b_blocknr;
pointers, and write it out */ if (new_height > 1) {
for(; n < new_height-1; n++) {
block = gfs2_alloc_meta(ip); gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN,
bh = gfs2_meta_new(ip->i_gl, block);
gfs2_trans_add_bh(ip->i_gl, bh, 1);
gfs2_metatype_set(bh,
GFS2_METATYPE_IN,
GFS2_FORMAT_IN); GFS2_FORMAT_IN);
gfs2_buffer_copy_tail(bh, gfs2_buffer_clear_tail(blocks[n],
sizeof(struct gfs2_meta_header), sizeof(struct gfs2_meta_header));
dibh, sizeof(struct gfs2_dinode)); bp = (u64 *)(blocks[n]->b_data +
sizeof(struct gfs2_meta_header));
brelse(bh); *bp = cpu_to_be64(blocks[n+1]->b_blocknr);
brelse(blocks[n]);
blocks[n] = NULL;
} }
}
/* Set up the new direct pointer and write it out to disk */ gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header),
dibh, sizeof(struct gfs2_dinode));
brelse(blocks[n]);
gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
bp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
if (new_block) { *bp = cpu_to_be64(bn);
*(uint64_t *)(dibh->b_data + ip->i_di.di_height += new_height;
sizeof(struct gfs2_dinode)) = ip->i_di.di_blocks += new_height;
cpu_to_be64(block);
ip->i_di.di_blocks++;
}
ip->i_di.di_height++;
gfs2_dinode_out(&ip->i_di, dibh->b_data); gfs2_dinode_out(&ip->i_di, dibh->b_data);
brelse(dibh); brelse(dibh);
} return error;
return 0;
} }
/** /**
...@@ -416,7 +406,7 @@ static struct buffer_head *gfs2_block_pointers(struct inode *inode, u64 lblock, ...@@ -416,7 +406,7 @@ static struct buffer_head *gfs2_block_pointers(struct inode *inode, u64 lblock,
if (!create) if (!create)
goto out; goto out;
error = build_height(ip, height); error = build_height(inode, height);
if (error) if (error)
goto out; goto out;
} }
...@@ -806,7 +796,7 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size) ...@@ -806,7 +796,7 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size)
h = calc_tree_height(ip, size); h = calc_tree_height(ip, size);
if (ip->i_di.di_height < h) { if (ip->i_di.di_height < h) {
down_write(&ip->i_rw_mutex); down_write(&ip->i_rw_mutex);
error = build_height(ip, h); error = build_height(ip->i_vnode, h);
up_write(&ip->i_rw_mutex); up_write(&ip->i_rw_mutex);
if (error) if (error)
goto out_end_trans; goto out_end_trans;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment