Commit 198a49a9 authored by Jinshan Xiong's avatar Jinshan Xiong Committed by Greg Kroah-Hartman

staging: lustre: clio: revise readahead to support 16MB IO

Read ahead currently doesn't handle 16MB RPC packets correctly
by assuming the packets are a default size instead of querying
the size. This work adjust the read ahead policy to issue
read ahead RPC by the underlying RPC size.
Signed-off-by: default avatarJinshan Xiong <jinshan.xiong@intel.com>
Signed-off-by: default avatarGu Zheng <gzheng@ddn.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7990
Reviewed-on: http://review.whamcloud.com/19368Reviewed-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Reviewed-by: default avatarLi Xi <lixi@ddn.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarJames Simmons <jsimmons@infradead.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent ea3f00df
...@@ -1452,8 +1452,10 @@ struct cl_read_ahead { ...@@ -1452,8 +1452,10 @@ struct cl_read_ahead {
* cra_end is included. * cra_end is included.
*/ */
pgoff_t cra_end; pgoff_t cra_end;
/* optimal RPC size for this read, by pages */
unsigned long cra_rpc_size;
/* /*
* Release routine. If readahead holds resources underneath, this * Release callback. If readahead holds resources underneath, this
* function should be called to release it. * function should be called to release it.
*/ */
void (*cra_release)(const struct lu_env *env, void *cbdata); void (*cra_release)(const struct lu_env *env, void *cbdata);
......
...@@ -351,13 +351,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) ...@@ -351,13 +351,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
/* This value may be reduced at connect time in /*
* ptlrpc_connect_interpret() . We initialize it to only * Set it to possible maximum size. It may be reduced by ocd_brw_size
* 1MB until we know what the performance looks like. * from OFD after connecting.
* In the future this should likely be increased. LU-1431
*/ */
cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES, cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
LNET_MTU >> PAGE_SHIFT);
/* /*
* set cl_chunkbits default value to PAGE_CACHE_SHIFT, * set cl_chunkbits default value to PAGE_CACHE_SHIFT,
......
...@@ -281,10 +281,8 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode) ...@@ -281,10 +281,8 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode)
return container_of(inode, struct ll_inode_info, lli_vfs_inode); return container_of(inode, struct ll_inode_info, lli_vfs_inode);
} }
/* default to about 40meg of readahead on a given system. That much tied /* default to about 64M of readahead on a given system. */
* up in 512k readahead requests serviced at 40ms each is about 1GB/s. #define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT))
*/
#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_SHIFT))
/* default to read-ahead full files smaller than 2MB on the second read */ /* default to read-ahead full files smaller than 2MB on the second read */
#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT)) #define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT))
...@@ -321,6 +319,9 @@ struct ll_ra_info { ...@@ -321,6 +319,9 @@ struct ll_ra_info {
struct ra_io_arg { struct ra_io_arg {
unsigned long ria_start; /* start offset of read-ahead*/ unsigned long ria_start; /* start offset of read-ahead*/
unsigned long ria_end; /* end offset of read-ahead*/ unsigned long ria_end; /* end offset of read-ahead*/
unsigned long ria_reserved; /* reserved pages for read-ahead */
unsigned long ria_end_min; /* minimum end to cover current read */
bool ria_eof; /* reach end of file */
/* If stride read pattern is detected, ria_stoff means where /* If stride read pattern is detected, ria_stoff means where
* stride read is started. Note: for normal read-ahead, the * stride read is started. Note: for normal read-ahead, the
* value here is meaningless, and also it will not be accessed * value here is meaningless, and also it will not be accessed
...@@ -550,6 +551,11 @@ struct ll_readahead_state { ...@@ -550,6 +551,11 @@ struct ll_readahead_state {
* PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages. * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
*/ */
unsigned long ras_window_start, ras_window_len; unsigned long ras_window_start, ras_window_len;
/*
* Optimal RPC size. It decides how many pages will be sent
* for each read-ahead.
*/
unsigned long ras_rpc_size;
/* /*
* Where next read-ahead should start at. This lies within read-ahead * Where next read-ahead should start at. This lies within read-ahead
* window. Read-ahead window is read in pieces rather than at once * window. Read-ahead window is read in pieces rather than at once
......
This diff is collapsed.
...@@ -99,6 +99,7 @@ static int osc_io_read_ahead(const struct lu_env *env, ...@@ -99,6 +99,7 @@ static int osc_io_read_ahead(const struct lu_env *env,
ldlm_lock_decref(&lockh, dlmlock->l_req_mode); ldlm_lock_decref(&lockh, dlmlock->l_req_mode);
} }
ra->cra_rpc_size = osc_cli(osc)->cl_max_pages_per_rpc;
ra->cra_end = cl_index(osc2cl(osc), ra->cra_end = cl_index(osc2cl(osc),
dlmlock->l_policy_data.l_extent.end); dlmlock->l_policy_data.l_extent.end);
ra->cra_release = osc_read_ahead_release; ra->cra_release = osc_read_ahead_release;
...@@ -138,7 +139,7 @@ static int osc_io_submit(const struct lu_env *env, ...@@ -138,7 +139,7 @@ static int osc_io_submit(const struct lu_env *env,
LASSERT(qin->pl_nr > 0); LASSERT(qin->pl_nr > 0);
CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt); CDEBUG(D_CACHE | D_READA, "%d %d\n", qin->pl_nr, crt);
osc = cl2osc(ios->cis_obj); osc = cl2osc(ios->cis_obj);
cli = osc_cli(osc); cli = osc_cli(osc);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment