diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 31795e619273e439ad3d869e63c42a6b153c39a1..6a5fb939d3e5fd6cea40532402389d02f0835566 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -25,6 +25,9 @@
 #define PCPU_MIN_ALLOC_SHIFT		2
 #define PCPU_MIN_ALLOC_SIZE		(1 << PCPU_MIN_ALLOC_SHIFT)
 
+/* number of bits per page, used to trigger a scan if blocks are > PAGE_SIZE */
+#define PCPU_BITS_PER_PAGE		(PAGE_SIZE >> PCPU_MIN_ALLOC_SHIFT)
+
 /*
  * This determines the size of each metadata block.  There are several subtle
  * constraints around this constant.  The reserved region must be a multiple of
diff --git a/mm/percpu.c b/mm/percpu.c
index f38f47a656420a3efe7e3cdf695d5dbd0d644f8f..57b3168eae089f150ec0254cc44ebc374ff49fbc 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -300,6 +300,11 @@ static unsigned long pcpu_off_to_block_off(int off)
 	return off & (PCPU_BITMAP_BLOCK_BITS - 1);
 }
 
+static unsigned long pcpu_block_off_to_off(int index, int off)
+{
+	return index * PCPU_BITMAP_BLOCK_BITS + off;
+}
+
 /**
  * pcpu_mem_zalloc - allocate memory
  * @size: bytes to allocate
@@ -623,6 +628,17 @@ static void pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off,
  * @chunk: chunk of interest
  * @bit_off: chunk offset
  * @bits: size of request
+ *
+ * Updates metadata for the allocation path.  This avoids a blind block
+ * refresh by making use of the block contig hints.  If this fails, it scans
+ * forward and backward to determine the extent of the free area.  This is
+ * capped at the boundary of blocks.
+ *
+ * A chunk update is triggered if a page becomes free, a block becomes free,
+ * or the free spans across blocks.  This tradeoff is to minimize iterating
+ * over the block metadata to update chunk->contig_bits.  chunk->contig_bits
+ * may be off by up to a page, but it will never be more than the available
+ * space.  If the contig hint is contained in one block, it will be accurate.
  */
 static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
 					int bits)
@@ -630,6 +646,7 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
 	struct pcpu_block_md *s_block, *e_block, *block;
 	int s_index, e_index;	/* block indexes of the freed allocation */
 	int s_off, e_off;	/* block offsets of the freed allocation */
+	int start, end;		/* start and end of the whole free area */
 
 	/*
 	 * Calculate per block offsets.
@@ -645,13 +662,46 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
 	s_block = chunk->md_blocks + s_index;
 	e_block = chunk->md_blocks + e_index;
 
+	/*
+	 * Check if the freed area aligns with the block->contig_hint.
+	 * If it does, then the scan to find the beginning/end of the
+	 * larger free area can be avoided.
+	 *
+	 * start and end refer to beginning and end of the free area
+	 * within each their respective blocks.  This is not necessarily
+	 * the entire free area as it may span blocks past the beginning
+	 * or end of the block.
+	 */
+	start = s_off;
+	if (s_off == s_block->contig_hint + s_block->contig_hint_start) {
+		start = s_block->contig_hint_start;
+	} else {
+		/*
+		 * Scan backwards to find the extent of the free area.
+		 * find_last_bit returns the starting bit, so if the start bit
+		 * is returned, that means there was no last bit and the
+		 * remainder of the chunk is free.
+		 */
+		int l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index),
+					  start);
+		start = (start == l_bit) ? 0 : l_bit + 1;
+	}
+
+	end = e_off;
+	if (e_off == e_block->contig_hint_start)
+		end = e_block->contig_hint_start + e_block->contig_hint;
+	else
+		end = find_next_bit(pcpu_index_alloc_map(chunk, e_index),
+				    PCPU_BITMAP_BLOCK_BITS, end);
+
 	/* update s_block */
-	pcpu_block_refresh_hint(chunk, s_index);
+	e_off = (s_index == e_index) ? end : PCPU_BITMAP_BLOCK_BITS;
+	pcpu_block_update(s_block, start, e_off);
 
 	/* freeing in the same block */
 	if (s_index != e_index) {
 		/* update e_block */
-		pcpu_block_refresh_hint(chunk, e_index);
+		pcpu_block_update(e_block, 0, end);
 
 		/* reset md_blocks in the middle */
 		for (block = s_block + 1; block < e_block; block++) {
@@ -663,7 +713,19 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
 		}
 	}
 
-	pcpu_chunk_refresh_hint(chunk);
+	/*
+	 * Refresh chunk metadata when the free makes a page free, a block
+	 * free, or spans across blocks.  The contig hint may be off by up to
+	 * a page, but if the hint is contained in a block, it will be accurate
+	 * with the else condition below.
+	 */
+	if ((ALIGN_DOWN(end, min(PCPU_BITS_PER_PAGE, PCPU_BITMAP_BLOCK_BITS)) >
+	     ALIGN(start, min(PCPU_BITS_PER_PAGE, PCPU_BITMAP_BLOCK_BITS))) ||
+	    s_index != e_index)
+		pcpu_chunk_refresh_hint(chunk);
+	else
+		pcpu_chunk_update(chunk, pcpu_block_off_to_off(s_index, start),
+				  s_block->contig_hint);
 }
 
 /**