Commit 749ef9f8 authored by Corrado Zoccolo's avatar Corrado Zoccolo Committed by Jens Axboe

cfq: improve fsync performance for small files

Fsync performance for small files achieved by cfq on high-end disks is
lower than what deadline can achieve, due to idling introduced between
the sync write happening in process context and the journal commit.

Moreover, when competing with a sequential reader, a process writing
small files and fsync-ing them is starved.

This patch fixes the two problems by:
- marking journal commits as WRITE_SYNC, so that they get the REQ_NOIDLE
  flag set,
- force all queues that have REQ_NOIDLE requests to be put in the noidle
  tree.

Having the queue associated to the fsync-ing process and the one associated
 to journal commits in the noidle tree allows:
- switching between them without idling,
- fairness vs. competing idling queues, since they will be serviced only
  after the noidle tree expires its slice.
Acked-by: default avatarVivek Goyal <vgoyal@redhat.com>
Reviewed-by: default avatarJeff Moyer <jmoyer@redhat.com>
Tested-by: default avatarJeff Moyer <jmoyer@redhat.com>
Signed-off-by: default avatarCorrado Zoccolo <czoccolo@gmail.com>
Signed-off-by: default avatarJens Axboe <jaxboe@fusionio.com>
parent 6d0aed7a
...@@ -216,7 +216,6 @@ struct cfq_data { ...@@ -216,7 +216,6 @@ struct cfq_data {
enum wl_type_t serving_type; enum wl_type_t serving_type;
unsigned long workload_expires; unsigned long workload_expires;
struct cfq_group *serving_group; struct cfq_group *serving_group;
bool noidle_tree_requires_idle;
/* /*
* Each priority tree is sorted by next_request position. These * Each priority tree is sorted by next_request position. These
...@@ -2126,7 +2125,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) ...@@ -2126,7 +2125,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
slice = max_t(unsigned, slice, CFQ_MIN_TT); slice = max_t(unsigned, slice, CFQ_MIN_TT);
cfq_log(cfqd, "workload slice:%d", slice); cfq_log(cfqd, "workload slice:%d", slice);
cfqd->workload_expires = jiffies + slice; cfqd->workload_expires = jiffies + slice;
cfqd->noidle_tree_requires_idle = false;
} }
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
...@@ -3108,7 +3106,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, ...@@ -3108,7 +3106,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (cfqq->queued[0] + cfqq->queued[1] >= 4) if (cfqq->queued[0] + cfqq->queued[1] >= 4)
cfq_mark_cfqq_deep(cfqq); cfq_mark_cfqq_deep(cfqq);
if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
enable_idle = 0;
else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
(!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0; enable_idle = 0;
else if (sample_valid(cic->ttime_samples)) { else if (sample_valid(cic->ttime_samples)) {
...@@ -3421,17 +3421,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) ...@@ -3421,17 +3421,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_slice_expired(cfqd, 1); cfq_slice_expired(cfqd, 1);
else if (sync && cfqq_empty && else if (sync && cfqq_empty &&
!cfq_close_cooperator(cfqd, cfqq)) { !cfq_close_cooperator(cfqd, cfqq)) {
cfqd->noidle_tree_requires_idle |= cfq_arm_slice_timer(cfqd);
!(rq->cmd_flags & REQ_NOIDLE);
/*
* Idling is enabled for SYNC_WORKLOAD.
* SYNC_NOIDLE_WORKLOAD idles at the end of the tree
* only if we processed at least one !REQ_NOIDLE request
*/
if (cfqd->serving_type == SYNC_WORKLOAD
|| cfqd->noidle_tree_requires_idle
|| cfqq->cfqg->nr_cfqq == 1)
cfq_arm_slice_timer(cfqd);
} }
} }
......
...@@ -318,7 +318,7 @@ void journal_commit_transaction(journal_t *journal) ...@@ -318,7 +318,7 @@ void journal_commit_transaction(journal_t *journal)
int first_tag = 0; int first_tag = 0;
int tag_flag; int tag_flag;
int i; int i;
int write_op = WRITE; int write_op = WRITE_SYNC;
/* /*
* First job: lock down the current transaction and wait for * First job: lock down the current transaction and wait for
......
...@@ -360,7 +360,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -360,7 +360,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
int tag_bytes = journal_tag_bytes(journal); int tag_bytes = journal_tag_bytes(journal);
struct buffer_head *cbh = NULL; /* For transactional checksums */ struct buffer_head *cbh = NULL; /* For transactional checksums */
__u32 crc32_sum = ~0; __u32 crc32_sum = ~0;
int write_op = WRITE; int write_op = WRITE_SYNC;
/* /*
* First job: lock down the current transaction and wait for * First job: lock down the current transaction and wait for
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment