Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
a206231b
Commit
a206231b
authored
Oct 29, 2002
by
Andrew Morton
Committed by
Linus Torvalds
Oct 29, 2002
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[PATCH] hot-n-cold pages: page allocator core
Hot/Cold pages and zone->lock amortisation
parent
1d2652dd
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
151 additions
and
39 deletions
+151
-39
include/linux/gfp.h
include/linux/gfp.h
+4
-3
include/linux/mm.h
include/linux/mm.h
+0
-1
include/linux/mmzone.h
include/linux/mmzone.h
+17
-0
mm/page_alloc.c
mm/page_alloc.c
+127
-33
mm/swap.c
mm/swap.c
+3
-2
No files found.
include/linux/gfp.h
View file @
a206231b
...
...
@@ -17,6 +17,7 @@
#define __GFP_IO 0x40
/* Can start low memory physical IO? */
#define __GFP_HIGHIO 0x80
/* Can start high mem physical IO? */
#define __GFP_FS 0x100
/* Can call down to low-level FS? */
#define __GFP_COLD 0x200
/* Cache-cold page required */
#define GFP_NOHIGHIO ( __GFP_WAIT | __GFP_IO)
#define GFP_NOIO ( __GFP_WAIT)
...
...
@@ -32,6 +33,7 @@
#define GFP_DMA __GFP_DMA
/*
* There is only one page-allocator function, and two main namespaces to
* it. The alloc_page*() variants return 'struct page *' and as such
...
...
@@ -77,11 +79,10 @@ extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
#define __get_dma_pages(gfp_mask, order) \
__get_free_pages((gfp_mask) | GFP_DMA,(order))
/*
* There is only one 'core' page-freeing function.
*/
extern
void
FASTCALL
(
__free_pages
(
struct
page
*
page
,
unsigned
int
order
));
extern
void
FASTCALL
(
free_pages
(
unsigned
long
addr
,
unsigned
int
order
));
extern
void
FASTCALL
(
free_hot_page
(
struct
page
*
page
));
extern
void
FASTCALL
(
free_cold_page
(
struct
page
*
page
));
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr),0)
...
...
include/linux/mm.h
View file @
a206231b
...
...
@@ -211,7 +211,6 @@ struct page {
#define set_page_count(p,v) atomic_set(&(p)->count, v)
extern
void
FASTCALL
(
__page_cache_release
(
struct
page
*
));
void
FASTCALL
(
__free_pages_ok
(
struct
page
*
page
,
unsigned
int
order
));
static
inline
void
put_page
(
struct
page
*
page
)
{
...
...
include/linux/mmzone.h
View file @
a206231b
...
...
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/wait.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <asm/atomic.h>
#ifdef CONFIG_DISCONTIGMEM
#include <asm/numnodes.h>
...
...
@@ -46,6 +47,18 @@ struct zone_padding {
#define ZONE_PADDING(name)
#endif
struct
per_cpu_pages
{
int
count
;
/* number of pages in the list */
int
low
;
/* low watermark, refill needed */
int
high
;
/* high watermark, emptying needed */
int
batch
;
/* chunk size for buddy add/remove */
struct
list_head
list
;
/* the list of pages */
};
struct
per_cpu_pageset
{
struct
per_cpu_pages
pcp
[
2
];
/* 0: hot. 1: cold */
}
____cacheline_aligned_in_smp
;
/*
* On machines where it is needed (eg PCs) we divide physical memory
* into multiple physical zones. On a PC we have 3 zones:
...
...
@@ -107,6 +120,10 @@ struct zone {
unsigned
long
wait_table_size
;
unsigned
long
wait_table_bits
;
ZONE_PADDING
(
_pad3_
)
struct
per_cpu_pageset
pageset
[
NR_CPUS
];
/*
* Discontig memory support fields.
*/
...
...
mm/page_alloc.c
View file @
a206231b
...
...
@@ -10,6 +10,8 @@
* Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
* Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
* Zone balancing, Kanoj Sarcar, SGI, Jan 2000
* Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002
* (lots of bits borrowed from Ingo Molnar & Andrew Morton)
*/
#include <linux/config.h>
...
...
@@ -151,13 +153,14 @@ static inline void free_pages_check(const char *function, struct page *page)
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free, or 0 for all on the list.
*/
static
void
static
int
free_pages_bulk
(
struct
zone
*
zone
,
int
count
,
struct
list_head
*
list
,
unsigned
int
order
)
{
unsigned
long
mask
,
flags
;
struct
free_area
*
area
;
struct
page
*
base
,
*
page
=
NULL
;
int
ret
=
0
;
mask
=
(
~
0UL
)
<<
order
;
base
=
zone
->
zone_mem_map
;
...
...
@@ -169,8 +172,10 @@ free_pages_bulk(struct zone *zone, int count,
list_del
(
&
page
->
list
);
__free_pages_bulk
(
page
,
base
,
zone
,
area
,
mask
,
order
);
mod_page_state
(
pgfree
,
count
<<
order
);
ret
++
;
}
spin_unlock_irqrestore
(
&
zone
->
lock
,
flags
);
return
ret
;
}
void
__free_pages_ok
(
struct
page
*
page
,
unsigned
int
order
)
...
...
@@ -201,14 +206,13 @@ expand(struct zone *zone, struct page *page,
index
+=
size
;
page
+=
size
;
}
BUG_ON
(
bad_range
(
zone
,
page
));
return
page
;
}
/*
* This page is about to be returned from the page allocator
*/
static
inline
void
prep_new_page
(
struct
page
*
page
)
static
void
prep_new_page
(
struct
page
*
page
)
{
if
(
page
->
mapping
||
page_mapped
(
page
)
||
...
...
@@ -248,36 +252,17 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
continue
;
page
=
list_entry
(
curr
,
struct
page
,
list
);
BUG_ON
(
bad_range
(
zone
,
page
));
list_del
(
curr
);
index
=
page
-
zone
->
zone_mem_map
;
if
(
current_order
!=
MAX_ORDER
-
1
)
MARK_USED
(
index
,
current_order
,
area
);
zone
->
free_pages
-=
1UL
<<
order
;
page
=
expand
(
zone
,
page
,
index
,
order
,
current_order
,
area
);
return
page
;
return
expand
(
zone
,
page
,
index
,
order
,
current_order
,
area
);
}
return
NULL
;
}
/* Obtain a single element from the buddy allocator */
static
struct
page
*
rmqueue
(
struct
zone
*
zone
,
unsigned
int
order
)
{
unsigned
long
flags
;
struct
page
*
page
;
spin_lock_irqsave
(
&
zone
->
lock
,
flags
);
page
=
__rmqueue
(
zone
,
order
);
spin_unlock_irqrestore
(
&
zone
->
lock
,
flags
);
if
(
page
!=
NULL
)
{
BUG_ON
(
bad_range
(
zone
,
page
));
prep_new_page
(
page
);
}
return
page
;
}
/*
* Obtain a specified number of elements from the buddy allocator, all under
* a single hold of the lock, for efficiency. Add them to the supplied list.
...
...
@@ -340,6 +325,72 @@ int is_head_of_free_region(struct page *page)
}
#endif
/* CONFIG_SOFTWARE_SUSPEND */
/*
* Free a 0-order page
*/
static
void
FASTCALL
(
free_hot_cold_page
(
struct
page
*
page
,
int
cold
));
static
void
free_hot_cold_page
(
struct
page
*
page
,
int
cold
)
{
struct
zone
*
zone
=
page_zone
(
page
);
struct
per_cpu_pages
*
pcp
;
unsigned
long
flags
;
free_pages_check
(
__FUNCTION__
,
page
);
pcp
=
&
zone
->
pageset
[
get_cpu
()].
pcp
[
cold
];
local_irq_save
(
flags
);
if
(
pcp
->
count
>=
pcp
->
high
)
pcp
->
count
-=
free_pages_bulk
(
zone
,
pcp
->
batch
,
&
pcp
->
list
,
0
);
list_add
(
&
page
->
list
,
&
pcp
->
list
);
pcp
->
count
++
;
local_irq_restore
(
flags
);
put_cpu
();
}
void
free_hot_page
(
struct
page
*
page
)
{
free_hot_cold_page
(
page
,
0
);
}
void
free_cold_page
(
struct
page
*
page
)
{
free_hot_cold_page
(
page
,
1
);
}
static
struct
page
*
buffered_rmqueue
(
struct
zone
*
zone
,
int
order
,
int
cold
)
{
unsigned
long
flags
;
struct
page
*
page
=
NULL
;
if
(
order
==
0
)
{
struct
per_cpu_pages
*
pcp
;
pcp
=
&
zone
->
pageset
[
get_cpu
()].
pcp
[
cold
];
local_irq_save
(
flags
);
if
(
pcp
->
count
<=
pcp
->
low
)
pcp
->
count
+=
rmqueue_bulk
(
zone
,
0
,
pcp
->
batch
,
&
pcp
->
list
);
if
(
pcp
->
count
)
{
page
=
list_entry
(
pcp
->
list
.
next
,
struct
page
,
list
);
list_del
(
&
page
->
list
);
pcp
->
count
--
;
}
local_irq_restore
(
flags
);
put_cpu
();
}
if
(
page
==
NULL
)
{
spin_lock_irqsave
(
&
zone
->
lock
,
flags
);
page
=
__rmqueue
(
zone
,
order
);
spin_unlock_irqrestore
(
&
zone
->
lock
,
flags
);
}
if
(
page
!=
NULL
)
{
BUG_ON
(
bad_range
(
zone
,
page
));
prep_new_page
(
page
);
}
return
page
;
}
/*
* This is the 'heart' of the zoned buddy allocator:
*/
...
...
@@ -349,13 +400,18 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
{
unsigned
long
min
;
struct
zone
**
zones
,
*
classzone
;
struct
page
*
page
;
struct
page
*
page
;
int
cflags
;
int
i
;
int
cold
;
if
(
gfp_mask
&
__GFP_WAIT
)
might_sleep
();
cold
=
0
;
if
(
gfp_mask
&
__GFP_COLD
)
cold
=
1
;
mod_page_state
(
pgalloc
,
1
<<
order
);
zones
=
zonelist
->
zones
;
/* the list of zones suitable for gfp_mask */
...
...
@@ -371,7 +427,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
/* the incremental min is allegedly to discourage fallback */
min
+=
z
->
pages_low
;
if
(
z
->
free_pages
>
min
||
z
->
free_pages
>=
z
->
pages_high
)
{
page
=
rmqueue
(
z
,
order
);
page
=
buffered_rmqueue
(
z
,
order
,
cold
);
if
(
page
)
return
page
;
}
...
...
@@ -396,7 +452,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
local_min
>>=
2
;
min
+=
local_min
;
if
(
z
->
free_pages
>
min
||
z
->
free_pages
>=
z
->
pages_high
)
{
page
=
rmqueue
(
z
,
order
);
page
=
buffered_rmqueue
(
z
,
order
,
cold
);
if
(
page
)
return
page
;
}
...
...
@@ -410,7 +466,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
for
(
i
=
0
;
zones
[
i
]
!=
NULL
;
i
++
)
{
struct
zone
*
z
=
zones
[
i
];
page
=
rmqueue
(
z
,
order
);
page
=
buffered_rmqueue
(
z
,
order
,
cold
);
if
(
page
)
return
page
;
}
...
...
@@ -440,7 +496,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
min
+=
z
->
pages_min
;
if
(
z
->
free_pages
>
min
||
z
->
free_pages
>=
z
->
pages_high
)
{
page
=
rmqueue
(
z
,
order
);
page
=
buffered_rmqueue
(
z
,
order
,
cold
);
if
(
page
)
return
page
;
}
...
...
@@ -492,13 +548,17 @@ void __pagevec_free(struct pagevec *pvec)
int
i
=
pagevec_count
(
pvec
);
while
(
--
i
>=
0
)
__free_pages_ok
(
pvec
->
pages
[
i
],
0
);
free_hot_page
(
pvec
->
pages
[
i
]
);
}
void
__free_pages
(
struct
page
*
page
,
unsigned
int
order
)
{
if
(
!
PageReserved
(
page
)
&&
put_page_testzero
(
page
))
if
(
!
PageReserved
(
page
)
&&
put_page_testzero
(
page
))
{
if
(
order
==
0
)
free_hot_page
(
page
);
else
__free_pages_ok
(
page
,
order
);
}
}
void
free_pages
(
unsigned
long
addr
,
unsigned
int
order
)
...
...
@@ -899,7 +959,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
unsigned
long
i
,
j
;
unsigned
long
local_offset
;
const
unsigned
long
zone_required_alignment
=
1UL
<<
(
MAX_ORDER
-
1
);
int
nid
=
pgdat
->
node_id
;
int
cpu
,
nid
=
pgdat
->
node_id
;
struct
page
*
lmem_map
=
pgdat
->
node_mem_map
;
unsigned
long
zone_start_pfn
=
pgdat
->
node_start_pfn
;
...
...
@@ -911,13 +971,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
struct
zone
*
zone
=
pgdat
->
node_zones
+
j
;
unsigned
long
mask
;
unsigned
long
size
,
realsize
;
unsigned
long
batch
;
zone_table
[
nid
*
MAX_NR_ZONES
+
j
]
=
zone
;
realsize
=
size
=
zones_size
[
j
];
if
(
zholes_size
)
realsize
-=
zholes_size
[
j
];
printk
(
" %s zone: %lu pages
\n
"
,
zone_names
[
j
],
realsize
);
zone
->
spanned_pages
=
size
;
zone
->
present_pages
=
realsize
;
zone
->
name
=
zone_names
[
j
];
...
...
@@ -925,6 +985,40 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
spin_lock_init
(
&
zone
->
lru_lock
);
zone
->
zone_pgdat
=
pgdat
;
zone
->
free_pages
=
0
;
/*
* The per-cpu-pages pools are set to around 1000th of the
* size of the zone. But no more than 1/4 of a meg - there's
* no point in going beyond the size of L2 cache.
*
* OK, so we don't know how big the cache is. So guess.
*/
batch
=
zone
->
present_pages
/
1024
;
if
(
batch
*
PAGE_SIZE
>
256
*
1024
)
batch
=
(
256
*
1024
)
/
PAGE_SIZE
;
batch
/=
4
;
/* We effectively *= 4 below */
if
(
batch
<
1
)
batch
=
1
;
for
(
cpu
=
0
;
cpu
<
NR_CPUS
;
cpu
++
)
{
struct
per_cpu_pages
*
pcp
;
pcp
=
&
zone
->
pageset
[
cpu
].
pcp
[
0
];
/* hot */
pcp
->
count
=
0
;
pcp
->
low
=
2
*
batch
;
pcp
->
high
=
6
*
batch
;
pcp
->
batch
=
1
*
batch
;
INIT_LIST_HEAD
(
&
pcp
->
list
);
pcp
=
&
zone
->
pageset
[
cpu
].
pcp
[
1
];
/* cold */
pcp
->
count
=
0
;
pcp
->
low
=
0
;
pcp
->
high
=
2
*
batch
;
pcp
->
batch
=
1
*
batch
;
INIT_LIST_HEAD
(
&
pcp
->
list
);
}
printk
(
" %s zone: %lu pages, LIFO batch:%lu
\n
"
,
zone_names
[
j
],
realsize
,
batch
);
INIT_LIST_HEAD
(
&
zone
->
active_list
);
INIT_LIST_HEAD
(
&
zone
->
inactive_list
);
atomic_set
(
&
zone
->
refill_counter
,
0
);
...
...
mm/swap.c
View file @
a206231b
...
...
@@ -69,7 +69,8 @@ void lru_add_drain(void)
}
/*
* This path almost never happens - pages are normally freed via pagevecs.
* This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking.
*/
void
__page_cache_release
(
struct
page
*
page
)
{
...
...
@@ -83,7 +84,7 @@ void __page_cache_release(struct page *page)
page
=
NULL
;
spin_unlock_irqrestore
(
&
zone
->
lru_lock
,
flags
);
if
(
page
)
__free_pages_ok
(
page
,
0
);
free_hot_page
(
page
);
}
/*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment