Commit 6b2fb799 authored by Maxim Patlasov's avatar Maxim Patlasov Committed by Miklos Szeredi

fuse: optimize writepages search

Re-work fi->writepages, replacing list with rb-tree.  This improves
performance because kernel fuse iterates through fi->writepages for each
writeback page and typical number of entries is about 800 (for 100MB of
fuse writeback).

Before patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s

 2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0

  29.86%  [kernel]               [k] _raw_spin_lock
  26.62%  [fuse]                 [k] fuse_page_is_writeback

After patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s

 2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0

  23.55%  [kernel]             [k] copy_user_enhanced_fast_string
   9.87%  [kernel]             [k] __memcpy
   3.10%  [kernel]             [k] _raw_spin_lock
Signed-off-by: default avatarMaxim Patlasov <mpatlasov@virtuozzo.com>
Signed-off-by: default avatarVasily Averin <vvs@virtuozzo.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent 5ddd9ced
...@@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) ...@@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
struct fuse_writepage_args { struct fuse_writepage_args {
struct fuse_io_args ia; struct fuse_io_args ia;
struct list_head writepages_entry; struct rb_node writepages_entry;
struct list_head queue_entry; struct list_head queue_entry;
struct fuse_writepage_args *next; struct fuse_writepage_args *next;
struct inode *inode; struct inode *inode;
...@@ -366,17 +366,23 @@ struct fuse_writepage_args { ...@@ -366,17 +366,23 @@ struct fuse_writepage_args {
static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi, static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
pgoff_t idx_from, pgoff_t idx_to) pgoff_t idx_from, pgoff_t idx_to)
{ {
struct fuse_writepage_args *wpa; struct rb_node *n;
n = fi->writepages.rb_node;
list_for_each_entry(wpa, &fi->writepages, writepages_entry) { while (n) {
struct fuse_writepage_args *wpa;
pgoff_t curr_index; pgoff_t curr_index;
wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
WARN_ON(get_fuse_inode(wpa->inode) != fi); WARN_ON(get_fuse_inode(wpa->inode) != fi);
curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT; curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
if (idx_from < curr_index + wpa->ia.ap.num_pages && if (idx_from >= curr_index + wpa->ia.ap.num_pages)
curr_index <= idx_to) { n = n->rb_right;
else if (idx_to < curr_index)
n = n->rb_left;
else
return wpa; return wpa;
}
} }
return NULL; return NULL;
} }
...@@ -1624,7 +1630,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, ...@@ -1624,7 +1630,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
struct backing_dev_info *bdi = inode_to_bdi(inode); struct backing_dev_info *bdi = inode_to_bdi(inode);
int i; int i;
list_del(&wpa->writepages_entry); rb_erase(&wpa->writepages_entry, &fi->writepages);
for (i = 0; i < ap->num_pages; i++) { for (i = 0; i < ap->num_pages; i++) {
dec_wb_stat(&bdi->wb, WB_WRITEBACK); dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
...@@ -1712,6 +1718,36 @@ __acquires(fi->lock) ...@@ -1712,6 +1718,36 @@ __acquires(fi->lock)
} }
} }
static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
{
pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
WARN_ON(!wpa->ia.ap.num_pages);
while (*p) {
struct fuse_writepage_args *curr;
pgoff_t curr_index;
parent = *p;
curr = rb_entry(parent, struct fuse_writepage_args,
writepages_entry);
WARN_ON(curr->inode != wpa->inode);
curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
if (idx_from >= curr_index + curr->ia.ap.num_pages)
p = &(*p)->rb_right;
else if (idx_to < curr_index)
p = &(*p)->rb_left;
else
return (void) WARN_ON(true);
}
rb_link_node(&wpa->writepages_entry, parent, p);
rb_insert_color(&wpa->writepages_entry, root);
}
static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
int error) int error)
{ {
...@@ -1730,7 +1766,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, ...@@ -1730,7 +1766,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
wpa->next = next->next; wpa->next = next->next;
next->next = NULL; next->next = NULL;
next->ia.ff = fuse_file_get(wpa->ia.ff); next->ia.ff = fuse_file_get(wpa->ia.ff);
list_add(&next->writepages_entry, &fi->writepages); tree_insert(&fi->writepages, next);
/* /*
* Skip fuse_flush_writepages() to make it easy to crop requests * Skip fuse_flush_writepages() to make it easy to crop requests
...@@ -1865,7 +1901,7 @@ static int fuse_writepage_locked(struct page *page) ...@@ -1865,7 +1901,7 @@ static int fuse_writepage_locked(struct page *page)
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
spin_lock(&fi->lock); spin_lock(&fi->lock);
list_add(&wpa->writepages_entry, &fi->writepages); tree_insert(&fi->writepages, wpa);
list_add_tail(&wpa->queue_entry, &fi->queued_writes); list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode); fuse_flush_writepages(inode);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
...@@ -1977,10 +2013,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, ...@@ -1977,10 +2013,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
WARN_ON(new_ap->num_pages != 0); WARN_ON(new_ap->num_pages != 0);
spin_lock(&fi->lock); spin_lock(&fi->lock);
list_del(&new_wpa->writepages_entry); rb_erase(&new_wpa->writepages_entry, &fi->writepages);
old_wpa = fuse_find_writeback(fi, page->index, page->index); old_wpa = fuse_find_writeback(fi, page->index, page->index);
if (!old_wpa) { if (!old_wpa) {
list_add(&new_wpa->writepages_entry, &fi->writepages); tree_insert(&fi->writepages, new_wpa);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
return false; return false;
} }
...@@ -2095,7 +2131,7 @@ static int fuse_writepages_fill(struct page *page, ...@@ -2095,7 +2131,7 @@ static int fuse_writepages_fill(struct page *page,
wpa->inode = inode; wpa->inode = inode;
spin_lock(&fi->lock); spin_lock(&fi->lock);
list_add(&wpa->writepages_entry, &fi->writepages); tree_insert(&fi->writepages, wpa);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
data->wpa = wpa; data->wpa = wpa;
...@@ -3405,5 +3441,5 @@ void fuse_init_file_inode(struct inode *inode) ...@@ -3405,5 +3441,5 @@ void fuse_init_file_inode(struct inode *inode)
INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0; fi->writectr = 0;
init_waitqueue_head(&fi->page_waitq); init_waitqueue_head(&fi->page_waitq);
INIT_LIST_HEAD(&fi->writepages); fi->writepages = RB_ROOT;
} }
...@@ -111,7 +111,7 @@ struct fuse_inode { ...@@ -111,7 +111,7 @@ struct fuse_inode {
wait_queue_head_t page_waitq; wait_queue_head_t page_waitq;
/* List of writepage requestst (pending or sent) */ /* List of writepage requestst (pending or sent) */
struct list_head writepages; struct rb_root writepages;
}; };
/* readdir cache (directory only) */ /* readdir cache (directory only) */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment