[PATCH] Simplified readahead congestion control

Reinstate the feature wherein readahead will be bypassed if the underlying queue is read-congersted. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] Simplified readahead congestion control
Reinstate the feature wherein readahead will be bypassed if the underlying queue is read-congersted. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
250c01d0 · Ram Pai · Linus Torvalds · 6f734a1a · 250c01d0
Commit 250c01d0 authored Jan 03, 2005 by Ram Pai Committed by Linus Torvalds Jan 03, 2005
Hide whitespace changes
Inline Side-by-side

Showing with 92 additions and 32 deletions

mm/readahead.c mm/readahead.c +92 -32

No files found.
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -249,6 +249,9 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 * We really don't want to intermingle reads and writes like that.
 *
 * Returns the number of pages requested, or the maximum amount of I/O allowed.
+ *
+ * do_page_cache_readahead() returns -1 if it encountered request queue
+ * congestion.
 */
 static inline int
 __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
@@ -359,13 +362,45 @@ int check_ra_success(struct file_ra_state *ra, unsigned long nr_to_read,
 }
 /*
- * Issue the I/O. If pages already in cache, increment the hit count until
+ * This version skips the IO if the queue is read-congested, and will tell the
- * we exceed max, then turn RA off until we start missing again.
+ * block layer to abandon the readahead if request allocation would block.
+ *
+ * force_page_cache_readahead() will ignore queue congestion and will block on
+ * request queues.
 */
 int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			unsigned long offset, unsigned long nr_to_read)
 {
-	return  __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
+	if (bdi_read_congested(mapping->backing_dev_info))
+		return -1;
+	return __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
+}
+/*
+ * Read 'nr_to_read' pages starting at page 'offset'. If the flag 'block'
+ * is set wait till the read completes.  Otherwise attempt to read without
+ * blocking.
+ * Returns 1 meaning 'success' if read is succesfull without switching off
+ * readhaead mode. Otherwise return failure.
+ */
+static int
+blockable_page_cache_readahead(struct address_space *mapping, struct file *filp,
+			unsigned long offset, unsigned long nr_to_read,
+			struct file_ra_state *ra, int block)
+{
+	int actual;
+	if (block) {
+		actual = __do_page_cache_readahead(mapping, filp,
+						offset, nr_to_read);
+	} else {
+		actual = do_page_cache_readahead(mapping, filp,
+						offset, nr_to_read);
+		if (actual == -1)
+			return 0;
+	}
+	return check_ra_success(ra, nr_to_read, actual);
 }
 /*
@@ -379,7 +414,7 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
 {
 	unsigned long max, min;
 	unsigned long newsize = req_size;
-	unsigned long actual=0;
+	unsigned long block;
 	/*
 	 * Here we detect the case where the application is performing
@@ -392,7 +427,6 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
 	max = get_max_readahead(ra);
 	min = get_min_readahead(ra);
-//	maxsane = max_sane_readahead(max);
 	newsize = min(req_size, max);
 	if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE)) {
@@ -407,12 +441,11 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
 	if ((ra->size == 0 && offset == 0)	/* first io and start of file */
 	    || (ra->size == -1 && ra->prev_page == offset - 1)) {
 		/* First sequential */
-		ra->prev_page  = offset + newsize-1;
+		ra->prev_page  = offset + newsize - 1;
 		ra->size = get_init_ra_size(newsize, max);
 		ra->start = offset;
-		actual = do_page_cache_readahead(mapping, filp, offset,
+		if (!blockable_page_cache_readahead(mapping, filp, offset,
-						ra->size);
+							 ra->size, ra, 1))
-		if (!check_ra_success(ra, ra->size, actual))
 			goto out;
 		/*
@@ -427,9 +460,8 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
 			ra->ahead_size = get_next_ra_size(ra->size, max, min,
 							  &ra->flags);
 			ra->ahead_start = ra->start + ra->size;
-			actual = do_page_cache_readahead(mapping, filp,
+			blockable_page_cache_readahead(mapping, filp,
-					 ra->ahead_start, ra->ahead_size);
+				 ra->ahead_start, ra->ahead_size, ra, 1);
-			check_ra_success(ra, ra->ahead_size, actual);
 		}
 		goto out;
 	}
@@ -441,10 +473,9 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
 	 */
 	if ((offset != (ra->prev_page+1) || (ra->size == 0))) {
 		ra_off(ra);
-		ra->prev_page  = offset + newsize-1;
+		ra->prev_page  = offset + newsize - 1;
-		actual = do_page_cache_readahead(mapping, filp, offset,
+		blockable_page_cache_readahead(mapping, filp, offset,
-						 newsize);
+				 newsize, ra, 1);
-		check_ra_success(ra, newsize, actual);
 		goto out;
 	}
@@ -454,33 +485,62 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
 	 */
 	if (ra->ahead_start == 0) {	 /* no ahead window yet */
-		ra->ahead_size = get_next_ra_size(max(newsize,ra->size),
+		ra->ahead_size = get_next_ra_size(ra->size, max, min,
-						  max, min, &ra->flags);
+						  &ra->flags);
 		ra->ahead_start = ra->start + ra->size;
-		newsize = min (newsize, ra->ahead_start - offset);
+		block = ((offset + newsize -1) >= ra->ahead_start);
-		actual = do_page_cache_readahead(mapping, filp,
+		if (!blockable_page_cache_readahead(mapping, filp,
-					 ra->ahead_start, ra->ahead_size);
+		    ra->ahead_start, ra->ahead_size, ra, block)) {
-		if (!check_ra_success(ra, ra->ahead_size, actual))
+			/* A read failure in blocking mode, implies pages are
+			 * all cached. So we can safely assume we have taken
+			 * care of all the pages requested in this call. A read
+			 * failure in non-blocking mode, implies we are reading
+			 * more pages than requested in this call.  So we safely
+			 * assume we have taken care of all the pages requested
+			 * in this call.
+			 *
+			 * Just reset the ahead window in case we failed due to
+			 * congestion.  The ahead window will any way be closed
+			 * in case we failed due to exessive page cache hits.
+			 */
+			ra->ahead_start = 0;
+			ra->ahead_size = 0;
 			goto out;
+		}
 	}
 	/*
-	 * Already have an ahead window, check if we crossed into it if so,
+	 * Already have an ahead window, check if we crossed into it.
-	 * shift windows and issue a new ahead window.
+	 * If so, shift windows and issue a new ahead window.
-	 * Only return the #pages that are in the current window, so that we
+	 * Only return the #pages that are in the current window, so that
-	 * get called back on the first page of the ahead window which will
+	 * we get called back on the first page of the ahead window which
-	 * allow us to submit more IO.
+	 * will allow us to submit more IO.
 	 */
-	if ((offset + newsize -1) >= ra->ahead_start) {
+	if ((offset + newsize - 1) >= ra->ahead_start) {
 		ra->start = ra->ahead_start;
 		ra->size = ra->ahead_size;
 		ra->ahead_start = ra->ahead_start + ra->ahead_size;
 		ra->ahead_size = get_next_ra_size(ra->ahead_size,
 						  max, min, &ra->flags);
-		newsize = min (newsize, ra->ahead_start - offset);
+		block = ((offset + newsize - 1) >= ra->ahead_start);
-		actual = do_page_cache_readahead(mapping, filp,
+		if (!blockable_page_cache_readahead(mapping, filp,
-				 ra->ahead_start, ra->ahead_size);
+			ra->ahead_start, ra->ahead_size, ra, block)) {
-		check_ra_success(ra, ra->ahead_size, actual);
+			/* A read failure in blocking mode, implies pages are
+			 * all cached. So we can safely assume we have taken
+			 * care of all the pages requested in this call.
+			 * A read failure in non-blocking mode, implies we are
+			 * reading more pages than requested in this call.  So
+			 * we safely assume we have taken care of all the pages
+			 * requested in this call.
+			 *
+			 * Just reset the ahead window in case we failed due to
+			 * congestion.  The ahead window will any way be closed
+			 * in case we failed due to excessive page cache hits.
+			 */
+			ra->ahead_start = 0;
+			ra->ahead_size = 0;
+		}
 	}
 out:
 	ra->prev_page = offset + newsize - 1;
 	return(newsize);