CacheFiles: Handle truncate unlocking the page we're reading

Handle truncate unlocking the page we're attempting to read from the backing device before the read has completed. This was causing reports like the following to occur: Pid: 4765, comm: kslowd Not tainted 2.6.30.1 #1 Call Trace: [<ffffffffa0331d7a>] ? cachefiles_read_waiter+0xd9/0x147 [cachefiles] [<ffffffff804b74bd>] ? __wait_on_bit+0x60/0x6f [<ffffffff8022bbbb>] ? __wake_up_common+0x3f/0x71 [<ffffffff8022cc32>] ? __wake_up+0x30/0x44 [<ffffffff8024a41f>] ? __wake_up_bit+0x28/0x2d [<ffffffffa003a793>] ? ext3_truncate+0x4d7/0x8ed [ext3] [<ffffffff80281f90>] ? pagevec_lookup+0x17/0x1f [<ffffffff8028c2ff>] ? unmap_mapping_range+0x59/0x1ff [<ffffffff8022cc32>] ? __wake_up+0x30/0x44 [<ffffffff8028e286>] ? vmtruncate+0xc2/0xe2 [<ffffffff802b82cf>] ? inode_setattr+0x22/0x10a [<ffffffffa003baa5>] ? ext3_setattr+0x17b/0x1e6 [ext3] [<ffffffff802b853d>] ? notify_change+0x186/0x2c9 [<ffffffffa032d9de>] ? cachefiles_attr_changed+0x133/0x1cd [cachefiles] [<ffffffffa032df7f>] ? cachefiles_lookup_object+0xcf/0x12a [cachefiles] [<ffffffffa0318165>] ? fscache_lookup_object+0x110/0x122 [fscache] [<ffffffffa03188c3>] ? fscache_object_slow_work_execute+0x590/0x6bc [fscache] [<ffffffff80278f82>] ? slow_work_thread+0x285/0x43a [<ffffffff8024a446>] ? autoremove_wake_function+0x0/0x2e [<ffffffff80278cfd>] ? slow_work_thread+0x0/0x43a [<ffffffff8024a317>] ? kthread+0x54/0x81 [<ffffffff8020c93a>] ? child_rip+0xa/0x20 [<ffffffff8024a2c3>] ? kthread+0x0/0x81 [<ffffffff8020c930>] ? child_rip+0x0/0x20 CacheFiles: I/O Error: Readpage failed on backing file 200000000000810 FS-Cache: Cache cachefiles stopped due to I/O error Reported-by: Christian Kujau <lists@nerdbynature.de> Reported-by: Takashi Iwai <tiwai@suse.de> Reported-by: Duc Le Minh <duclm.vn@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com>

CacheFiles: Handle truncate unlocking the page we're reading
Handle truncate unlocking the page we're attempting to read from the backing device before the read has completed. This was causing reports like the following to occur: Pid: 4765, comm: kslowd Not tainted 2.6.30.1 #1 Call Trace: [<ffffffffa0331d7a>] ? cachefiles_read_waiter+0xd9/0x147 [cachefiles] [<ffffffff804b74bd>] ? __wait_on_bit+0x60/0x6f [<ffffffff8022bbbb>] ? __wake_up_common+0x3f/0x71 [<ffffffff8022cc32>] ? __wake_up+0x30/0x44 [<ffffffff8024a41f>] ? __wake_up_bit+0x28/0x2d [<ffffffffa003a793>] ? ext3_truncate+0x4d7/0x8ed [ext3] [<ffffffff80281f90>] ? pagevec_lookup+0x17/0x1f [<ffffffff8028c2ff>] ? unmap_mapping_range+0x59/0x1ff [<ffffffff8022cc32>] ? __wake_up+0x30/0x44 [<ffffffff8028e286>] ? vmtruncate+0xc2/0xe2 [<ffffffff802b82cf>] ? inode_setattr+0x22/0x10a [<ffffffffa003baa5>] ? ext3_setattr+0x17b/0x1e6 [ext3] [<ffffffff802b853d>] ? notify_change+0x186/0x2c9 [<ffffffffa032d9de>] ? cachefiles_attr_changed+0x133/0x1cd [cachefiles] [<ffffffffa032df7f>] ? cachefiles_lookup_object+0xcf/0x12a [cachefiles] [<ffffffffa0318165>] ? fscache_lookup_object+0x110/0x122 [fscache] [<ffffffffa03188c3>] ? fscache_object_slow_work_execute+0x590/0x6bc [fscache] [<ffffffff80278f82>] ? slow_work_thread+0x285/0x43a [<ffffffff8024a446>] ? autoremove_wake_function+0x0/0x2e [<ffffffff80278cfd>] ? slow_work_thread+0x0/0x43a [<ffffffff8024a317>] ? kthread+0x54/0x81 [<ffffffff8020c93a>] ? child_rip+0xa/0x20 [<ffffffff8024a2c3>] ? kthread+0x0/0x81 [<ffffffff8020c930>] ? child_rip+0x0/0x20 CacheFiles: I/O Error: Readpage failed on backing file 200000000000810 FS-Cache: Cache cachefiles stopped due to I/O error Reported-by: Christian Kujau <lists@nerdbynature.de> Reported-by: Takashi Iwai <tiwai@suse.de> Reported-by: Duc Le Minh <duclm.vn@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com>
5e929b33 · David Howells · a17754fb · 5e929b33
Commit 5e929b33 authored Nov 19, 2009 by David Howells
Hide whitespace changes
Inline Side-by-side

Showing with 93 additions and 6 deletions

fs/cachefiles/rdwr.c fs/cachefiles/rdwr.c +93 -6

No files found.
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -40,8 +40,10 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,

 	_debug("--- monitor %p %lx ---", page, page->flags);

-	if (!PageUptodate(page) && !PageError(page))
-		dump_stack();
+	if (!PageUptodate(page) && !PageError(page)) {
+		/* unlocked, not uptodate and not erronous? */
+		_debug("page probably truncated");
+	}

 	/* remove from the waitqueue */
 	list_del(&wait->task_list);
@@ -60,6 +62,84 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
 	return 0;
 }

+/*
+ * handle a probably truncated page
+ * - check to see if the page is still relevant and reissue the read if
+ *   possible
+ * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we
+ *   must wait again and 0 if successful
+ */
+static int cachefiles_read_reissue(struct cachefiles_object *object,
+				   struct cachefiles_one_read *monitor)
+{
+	struct address_space *bmapping = object->backer->d_inode->i_mapping;
+	struct page *backpage = monitor->back_page, *backpage2;
+	int ret;
+
+	kenter("{ino=%lx},{%lx,%lx}",
+	       object->backer->d_inode->i_ino,
+	       backpage->index, backpage->flags);
+
+	/* skip if the page was truncated away completely */
+	if (backpage->mapping != bmapping) {
+		kleave(" = -ENODATA [mapping]");
+		return -ENODATA;
+	}
+
+	backpage2 = find_get_page(bmapping, backpage->index);
+	if (!backpage2) {
+		kleave(" = -ENODATA [gone]");
+		return -ENODATA;
+	}
+
+	if (backpage != backpage2) {
+		put_page(backpage2);
+		kleave(" = -ENODATA [different]");
+		return -ENODATA;
+	}
+
+	/* the page is still there and we already have a ref on it, so we don't
+	 * need a second */
+	put_page(backpage2);
+
+	INIT_LIST_HEAD(&monitor->op_link);
+	add_page_wait_queue(backpage, &monitor->monitor);
+
+	if (trylock_page(backpage)) {
+		ret = -EIO;
+		if (PageError(backpage))
+			goto unlock_discard;
+		ret = 0;
+		if (PageUptodate(backpage))
+			goto unlock_discard;
+
+		kdebug("reissue read");
+		ret = bmapping->a_ops->readpage(NULL, backpage);
+		if (ret < 0)
+			goto unlock_discard;
+	}
+
+	/* but the page may have been read before the monitor was installed, so
+	 * the monitor may miss the event - so we have to ensure that we do get
+	 * one in such a case */
+	if (trylock_page(backpage)) {
+		_debug("jumpstart %p {%lx}", backpage, backpage->flags);
+		unlock_page(backpage);
+	}
+
+	/* it'll reappear on the todo list */
+	kleave(" = -EINPROGRESS");
+	return -EINPROGRESS;
+
+unlock_discard:
+	unlock_page(backpage);
+	spin_lock_irq(&object->work_lock);
+	list_del(&monitor->op_link);
+	spin_unlock_irq(&object->work_lock);
+	kleave(" = %d", ret);
+	return ret;
+}
+
 /*
 * copy data from backing pages to netfs pages to complete a read operation
 * - driven by FS-Cache's thread pool
@@ -92,20 +172,26 @@ static void cachefiles_read_copier(struct fscache_operation *_op)

 		_debug("- copy {%lu}", monitor->back_page->index);

-		error = -EIO;
+	recheck:
 		if (PageUptodate(monitor->back_page)) {
 			copy_highpage(monitor->netfs_page, monitor->back_page);

 			pagevec_add(&pagevec, monitor->netfs_page);
 			fscache_mark_pages_cached(monitor->op, &pagevec);
 			error = 0;
-		}
-
-		if (error)
+		} else if (!PageError(monitor->back_page)) {
+			/* the page has probably been truncated */
+			error = cachefiles_read_reissue(object, monitor);
+			if (error == -EINPROGRESS)
+				goto next;
+			goto recheck;
+		} else {
 			cachefiles_io_error_obj(
 				object,
 				"Readpage failed on backing file %lx",
 				(unsigned long) monitor->back_page->flags);
+			error = -EIO;
+		}

 		page_cache_release(monitor->back_page);

@@ -114,6 +200,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
 		fscache_put_retrieval(op);
 		kfree(monitor);

+	next:
 		/* let the thread pool have some air occasionally */
 		max--;
 		if (max < 0 || need_resched()) {