Commit d08b03c5 authored by Andrew Morton's avatar Andrew Morton Committed by Jens Axboe

[PATCH] shrink_slab arith overflow fix

shrink_slab() wants to calculate

	nr_scanned_pages * seeks_per_object * entries_in_slab /
		nr_lru_pages

entries_in_slab and nr_lru_pages can vary a lot.  There is a potential
for 32-bit overflows.

I spent ages trying to avoid corner cases which cause a significant
lack of precision while preserving some clarity.  Gave up and used
do_div().  The code is called rarely - at most once per 128 kbytes of
reclaim.

The patch adds a tweak to balance_pgdat() to reduce the call rate to
shrink_slab() in the case where the zone is just a little bit below
pages_high.

Also increase SHRINK_BATCH.  The things we're shrinking are typically a
few hundred bytes, and a batchcount of 128 gives us a minimum of ten
pages or so per shrinking callout.
parent 0a7bf9c8
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/topology.h> #include <asm/topology.h>
#include <asm/div64.h>
#include <linux/swapops.h> #include <linux/swapops.h>
...@@ -85,7 +86,7 @@ struct shrinker { ...@@ -85,7 +86,7 @@ struct shrinker {
shrinker_t shrinker; shrinker_t shrinker;
struct list_head list; struct list_head list;
int seeks; /* seeks to recreate an obj */ int seeks; /* seeks to recreate an obj */
int nr; /* objs pending delete */ long nr; /* objs pending delete */
}; };
static LIST_HEAD(shrinker_list); static LIST_HEAD(shrinker_list);
...@@ -121,7 +122,7 @@ void remove_shrinker(struct shrinker *shrinker) ...@@ -121,7 +122,7 @@ void remove_shrinker(struct shrinker *shrinker)
kfree(shrinker); kfree(shrinker);
} }
#define SHRINK_BATCH 32 #define SHRINK_BATCH 128
/* /*
* Call the shrink functions to age shrinkable caches * Call the shrink functions to age shrinkable caches
* *
...@@ -134,29 +135,27 @@ void remove_shrinker(struct shrinker *shrinker) ...@@ -134,29 +135,27 @@ void remove_shrinker(struct shrinker *shrinker)
* slab to avoid swapping. * slab to avoid swapping.
* *
* FIXME: do not do for zone highmem * FIXME: do not do for zone highmem
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
*/ */
static int shrink_slab(int scanned, unsigned int gfp_mask) static int shrink_slab(long scanned, unsigned int gfp_mask)
{ {
struct list_head *lh; struct shrinker *shrinker;
int pages; long pages;
if (down_trylock(&shrinker_sem)) if (down_trylock(&shrinker_sem))
return 0; return 0;
pages = nr_used_zone_pages(); pages = nr_used_zone_pages();
list_for_each(lh, &shrinker_list) { list_for_each_entry(shrinker, &shrinker_list, list) {
struct shrinker *shrinker; long long delta;
int entries;
unsigned long delta;
shrinker = list_entry(lh, struct shrinker, list); delta = scanned * shrinker->seeks;
entries = (*shrinker->shrinker)(0, gfp_mask); delta *= (*shrinker->shrinker)(0, gfp_mask);
if (!entries) do_div(delta, pages + 1);
continue; shrinker->nr += delta;
delta = scanned * shrinker->seeks * entries;
shrinker->nr += delta / (pages + 1);
if (shrinker->nr > SHRINK_BATCH) { if (shrinker->nr > SHRINK_BATCH) {
int nr = shrinker->nr; long nr = shrinker->nr;
shrinker->nr = 0; shrinker->nr = 0;
(*shrinker->shrinker)(nr, gfp_mask); (*shrinker->shrinker)(nr, gfp_mask);
...@@ -824,7 +823,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps) ...@@ -824,7 +823,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps)
int i; int i;
for (priority = DEF_PRIORITY; priority; priority--) { for (priority = DEF_PRIORITY; priority; priority--) {
int success = 1; int all_zones_ok = 1;
for (i = 0; i < pgdat->nr_zones; i++) { for (i = 0; i < pgdat->nr_zones; i++) {
struct zone *zone = pgdat->node_zones + i; struct zone *zone = pgdat->node_zones + i;
...@@ -832,20 +831,24 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps) ...@@ -832,20 +831,24 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps)
int max_scan; int max_scan;
int to_reclaim; int to_reclaim;
to_reclaim = zone->pages_high - zone->free_pages; if (nr_pages && to_free > 0) { /* Software suspend */
if (nr_pages && to_free > 0)
to_reclaim = min(to_free, SWAP_CLUSTER_MAX*8); to_reclaim = min(to_free, SWAP_CLUSTER_MAX*8);
} else { /* Zone balancing */
to_reclaim = zone->pages_high-zone->free_pages;
if (to_reclaim <= 0) if (to_reclaim <= 0)
continue; continue;
success = 0; }
all_zones_ok = 0;
max_scan = zone->nr_inactive >> priority; max_scan = zone->nr_inactive >> priority;
if (max_scan < to_reclaim * 2) if (max_scan < to_reclaim * 2)
max_scan = to_reclaim * 2; max_scan = to_reclaim * 2;
if (max_scan < SWAP_CLUSTER_MAX)
max_scan = SWAP_CLUSTER_MAX;
to_free -= shrink_zone(zone, max_scan, GFP_KSWAPD, to_free -= shrink_zone(zone, max_scan, GFP_KSWAPD,
to_reclaim, &nr_mapped, ps, priority); to_reclaim, &nr_mapped, ps, priority);
shrink_slab(max_scan + nr_mapped, GFP_KSWAPD); shrink_slab(max_scan + nr_mapped, GFP_KSWAPD);
} }
if (success) if (all_zones_ok)
break; break;
blk_congestion_wait(WRITE, HZ/4); blk_congestion_wait(WRITE, HZ/4);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment