Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
e7144e64
Commit
e7144e64
authored
Sep 22, 2002
by
Linus Torvalds
Browse files
Options
Browse Files
Download
Plain Diff
Merge master.kernel.org:/home/davem/BK/net-2.5
into home.transmeta.com:/home/torvalds/v2.5/linux
parents
da29f6a8
407ee6c8
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
397 additions
and
126 deletions
+397
-126
drivers/block/ll_rw_blk.c
drivers/block/ll_rw_blk.c
+135
-25
drivers/char/sysrq.c
drivers/char/sysrq.c
+4
-3
fs/buffer.c
fs/buffer.c
+4
-6
fs/ext2/ialloc.c
fs/ext2/ialloc.c
+8
-1
fs/ext3/inode.c
fs/ext3/inode.c
+2
-2
fs/fs-writeback.c
fs/fs-writeback.c
+22
-18
fs/mpage.c
fs/mpage.c
+13
-0
include/linux/backing-dev.h
include/linux/backing-dev.h
+14
-0
include/linux/blkdev.h
include/linux/blkdev.h
+1
-0
include/linux/buffer_head.h
include/linux/buffer_head.h
+0
-1
include/linux/sched.h
include/linux/sched.h
+2
-0
include/linux/writeback.h
include/linux/writeback.h
+4
-0
kernel/suspend.c
kernel/suspend.c
+0
-1
mm/filemap.c
mm/filemap.c
+4
-0
mm/mempool.c
mm/mempool.c
+2
-2
mm/page-writeback.c
mm/page-writeback.c
+40
-15
mm/swapfile.c
mm/swapfile.c
+20
-0
mm/vmscan.c
mm/vmscan.c
+122
-52
No files found.
drivers/block/ll_rw_blk.c
View file @
e7144e64
...
...
@@ -46,13 +46,76 @@ static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
struct
blk_dev_struct
blk_dev
[
MAX_BLKDEV
];
/* initialized by blk_dev_init() */
/*
*
How many reqeusts do we allocate per queue,
*
and how many do we "batch" on freeing them?
*
Number of requests per queue. This many for reads and for writes (twice
*
this number, total).
*/
int
queue_nr_requests
,
batch_requests
;
static
int
queue_nr_requests
;
/*
* How many free requests must be available before we wake a process which
* is waiting for a request?
*/
static
int
batch_requests
;
unsigned
long
blk_max_low_pfn
,
blk_max_pfn
;
int
blk_nohighio
=
0
;
static
struct
congestion_state
{
wait_queue_head_t
wqh
;
atomic_t
nr_congested_queues
;
}
congestion_states
[
2
];
/*
* Return the threshold (number of free requests) at which the queue is
* considered to be congested. It include a little hysteresis to keep the
* context switch rate down.
*/
static
inline
int
queue_congestion_on_threshold
(
void
)
{
int
ret
;
ret
=
queue_nr_requests
/
4
-
1
;
if
(
ret
<
0
)
ret
=
1
;
return
ret
;
}
/*
* The threshold at which a queue is considered to be uncongested
*/
static
inline
int
queue_congestion_off_threshold
(
void
)
{
int
ret
;
ret
=
queue_nr_requests
/
4
+
1
;
if
(
ret
>
queue_nr_requests
)
ret
=
queue_nr_requests
;
return
ret
;
}
static
void
clear_queue_congested
(
request_queue_t
*
q
,
int
rw
)
{
enum
bdi_state
bit
;
struct
congestion_state
*
cs
=
&
congestion_states
[
rw
];
bit
=
(
rw
==
WRITE
)
?
BDI_write_congested
:
BDI_read_congested
;
if
(
test_and_clear_bit
(
bit
,
&
q
->
backing_dev_info
.
state
))
atomic_dec
(
&
cs
->
nr_congested_queues
);
if
(
waitqueue_active
(
&
cs
->
wqh
))
wake_up
(
&
cs
->
wqh
);
}
static
void
set_queue_congested
(
request_queue_t
*
q
,
int
rw
)
{
enum
bdi_state
bit
;
bit
=
(
rw
==
WRITE
)
?
BDI_write_congested
:
BDI_read_congested
;
if
(
!
test_and_set_bit
(
bit
,
&
q
->
backing_dev_info
.
state
))
atomic_inc
(
&
congestion_states
[
rw
].
nr_congested_queues
);
}
/**
* bdev_get_queue: - return the queue that matches the given device
* @bdev: device
...
...
@@ -360,8 +423,8 @@ int blk_queue_init_tags(request_queue_t *q, int depth)
struct
blk_queue_tag
*
tags
;
int
bits
,
i
;
if
(
depth
>
queue_nr_requests
)
{
depth
=
queue_nr_requests
;
if
(
depth
>
(
queue_nr_requests
*
2
)
)
{
depth
=
(
queue_nr_requests
*
2
)
;
printk
(
"blk_queue_init_tags: adjusted depth to %d
\n
"
,
depth
);
}
...
...
@@ -1019,7 +1082,7 @@ static int __blk_cleanup_queue(struct request_list *list)
**/
void
blk_cleanup_queue
(
request_queue_t
*
q
)
{
int
count
=
queue_nr_requests
;
int
count
=
(
queue_nr_requests
*
2
)
;
count
-=
__blk_cleanup_queue
(
&
q
->
rq
[
READ
]);
count
-=
__blk_cleanup_queue
(
&
q
->
rq
[
WRITE
]);
...
...
@@ -1050,7 +1113,7 @@ static int blk_init_free_list(request_queue_t *q)
* Divide requests in half between read and write
*/
rl
=
&
q
->
rq
[
READ
];
for
(
i
=
0
;
i
<
queue_nr_requests
;
i
++
)
{
for
(
i
=
0
;
i
<
(
queue_nr_requests
*
2
)
;
i
++
)
{
rq
=
kmem_cache_alloc
(
request_cachep
,
SLAB_KERNEL
);
if
(
!
rq
)
goto
nomem
;
...
...
@@ -1058,7 +1121,7 @@ static int blk_init_free_list(request_queue_t *q)
/*
* half way through, switch to WRITE list
*/
if
(
i
==
queue_nr_requests
/
2
)
if
(
i
==
queue_nr_requests
)
rl
=
&
q
->
rq
[
WRITE
];
memset
(
rq
,
0
,
sizeof
(
struct
request
));
...
...
@@ -1144,7 +1207,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
* Get a free request. queue lock must be held and interrupts
* disabled on the way in.
*/
static
inline
struct
request
*
get_request
(
request_queue_t
*
q
,
int
rw
)
static
struct
request
*
get_request
(
request_queue_t
*
q
,
int
rw
)
{
struct
request
*
rq
=
NULL
;
struct
request_list
*
rl
=
q
->
rq
+
rw
;
...
...
@@ -1153,6 +1216,8 @@ static inline struct request *get_request(request_queue_t *q, int rw)
rq
=
blkdev_free_rq
(
&
rl
->
free
);
list_del
(
&
rq
->
queuelist
);
rl
->
count
--
;
if
(
rl
->
count
<
queue_congestion_on_threshold
())
set_queue_congested
(
q
,
rw
);
rq
->
flags
=
0
;
rq
->
rq_status
=
RQ_ACTIVE
;
rq
->
special
=
NULL
;
...
...
@@ -1365,13 +1430,50 @@ void blk_put_request(struct request *req)
* it didn't come out of our reserved rq pools
*/
if
(
rl
)
{
int
rw
=
0
;
list_add
(
&
req
->
queuelist
,
&
rl
->
free
);
if
(
++
rl
->
count
>=
batch_requests
&&
waitqueue_active
(
&
rl
->
wait
))
if
(
rl
==
&
q
->
rq
[
WRITE
])
rw
=
WRITE
;
else
if
(
rl
==
&
q
->
rq
[
READ
])
rw
=
READ
;
else
BUG
();
rl
->
count
++
;
if
(
rl
->
count
>=
queue_congestion_off_threshold
())
clear_queue_congested
(
q
,
rw
);
if
(
rl
->
count
>=
batch_requests
&&
waitqueue_active
(
&
rl
->
wait
))
wake_up
(
&
rl
->
wait
);
}
}
/**
* blk_congestion_wait - wait for a queue to become uncongested
* @rw: READ or WRITE
* @timeout: timeout in jiffies
*
* Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
* If no queues are congested then just return, in the hope that the caller
* will submit some more IO.
*/
void
blk_congestion_wait
(
int
rw
,
long
timeout
)
{
DECLARE_WAITQUEUE
(
wait
,
current
);
struct
congestion_state
*
cs
=
&
congestion_states
[
rw
];
if
(
atomic_read
(
&
cs
->
nr_congested_queues
)
==
0
)
return
;
blk_run_queues
();
set_current_state
(
TASK_UNINTERRUPTIBLE
);
add_wait_queue
(
&
cs
->
wqh
,
&
wait
);
if
(
atomic_read
(
&
cs
->
nr_congested_queues
)
!=
0
)
schedule_timeout
(
timeout
);
set_current_state
(
TASK_RUNNING
);
remove_wait_queue
(
&
cs
->
wqh
,
&
wait
);
}
/*
* Has to be called with the request spinlock acquired
*/
...
...
@@ -1868,6 +1970,7 @@ void end_that_request_last(struct request *req)
int
__init
blk_dev_init
(
void
)
{
int
total_ram
=
nr_free_pages
()
<<
(
PAGE_SHIFT
-
10
);
int
i
;
request_cachep
=
kmem_cache_create
(
"blkdev_requests"
,
sizeof
(
struct
request
),
0
,
...
...
@@ -1876,26 +1979,33 @@ int __init blk_dev_init(void)
panic
(
"Can't create request pool slab cache
\n
"
);
/*
* Free request slots per queue.
* (Half for reads, half for writes)
*/
queue_nr_requests
=
(
total_ram
>>
8
)
&
~
15
;
/* One per quarter-megabyte */
if
(
queue_nr_requests
<
32
)
queue_nr_requests
=
32
;
if
(
queue_nr_requests
>
256
)
queue_nr_requests
=
256
;
/*
* Batch frees according to queue length
* Free request slots per queue. One per quarter-megabyte.
* We use this many requests for reads, and this many for writes.
*/
if
((
batch_requests
=
queue_nr_requests
/
4
)
>
32
)
batch_requests
=
32
;
printk
(
"block: %d slots per queue, batch=%d
\n
"
,
queue_nr_requests
,
batch_requests
);
queue_nr_requests
=
(
total_ram
>>
9
)
&
~
7
;
if
(
queue_nr_requests
<
16
)
queue_nr_requests
=
16
;
if
(
queue_nr_requests
>
128
)
queue_nr_requests
=
128
;
batch_requests
=
queue_nr_requests
/
8
;
if
(
batch_requests
>
8
)
batch_requests
=
8
;
printk
(
"block request queues:
\n
"
);
printk
(
" %d requests per read queue
\n
"
,
queue_nr_requests
);
printk
(
" %d requests per write queue
\n
"
,
queue_nr_requests
);
printk
(
" %d requests per batch
\n
"
,
batch_requests
);
printk
(
" enter congestion at %d
\n
"
,
queue_congestion_on_threshold
());
printk
(
" exit congestion at %d
\n
"
,
queue_congestion_off_threshold
());
blk_max_low_pfn
=
max_low_pfn
;
blk_max_pfn
=
max_pfn
;
for
(
i
=
0
;
i
<
ARRAY_SIZE
(
congestion_states
);
i
++
)
{
init_waitqueue_head
(
&
congestion_states
[
i
].
wqh
);
atomic_set
(
&
congestion_states
[
i
].
nr_congested_queues
,
0
);
}
return
0
;
};
...
...
drivers/char/sysrq.c
View file @
e7144e64
...
...
@@ -28,7 +28,8 @@
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/buffer_head.h>
/* for fsync_bdev()/wakeup_bdflush() */
#include <linux/writeback.h>
#include <linux/buffer_head.h>
/* for fsync_bdev() */
#include <linux/spinlock.h>
...
...
@@ -227,7 +228,7 @@ static void sysrq_handle_sync(int key, struct pt_regs *pt_regs,
struct
tty_struct
*
tty
)
{
emergency_sync_scheduled
=
EMERG_SYNC
;
wakeup_bdflush
();
wakeup_bdflush
(
0
);
}
static
struct
sysrq_key_op
sysrq_sync_op
=
{
handler:
sysrq_handle_sync
,
...
...
@@ -239,7 +240,7 @@ static void sysrq_handle_mountro(int key, struct pt_regs *pt_regs,
struct
tty_struct
*
tty
)
{
emergency_sync_scheduled
=
EMERG_REMOUNT
;
wakeup_bdflush
();
wakeup_bdflush
(
0
);
}
static
struct
sysrq_key_op
sysrq_mountro_op
=
{
handler:
sysrq_handle_mountro
,
...
...
fs/buffer.c
View file @
e7144e64
...
...
@@ -458,19 +458,17 @@ void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
}
/*
* FIXME: What is this function actually trying to do? Why "zones[0]"?
* Is it still correct/needed if/when blockdev mappings use GFP_HIGHUSER?
* Kick pdflush then try to free up some ZONE_NORMAL memory.
*/
static
void
free_more_memory
(
void
)
{
struct
zone
*
zone
;
zone
=
contig_page_data
.
node_zonelists
[
GFP_NOFS
&
GFP_ZONEMASK
].
zones
[
0
];
wakeup_bdflush
();
try_to_free_pages
(
zone
,
GFP_NOFS
,
0
);
zone
=
contig_page_data
.
node_zonelists
[
GFP_NOFS
&
GFP_ZONEMASK
].
zones
[
0
];
wakeup_bdflush
(
1024
);
blk_run_queues
();
yield
();
try_to_free_pages
(
zone
,
GFP_NOFS
,
0
);
}
/*
...
...
fs/ext2/ialloc.c
View file @
e7144e64
...
...
@@ -16,9 +16,9 @@
#include "ext2.h"
#include <linux/quotaops.h>
#include <linux/sched.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
/*
* ialloc.c contains the inodes allocation and deallocation routines
*/
...
...
@@ -169,6 +169,13 @@ static void ext2_preread_inode(struct inode *inode)
unsigned
long
block
;
struct
buffer_head
*
bh
;
struct
ext2_group_desc
*
gdp
;
struct
backing_dev_info
*
bdi
;
bdi
=
inode
->
i_mapping
->
backing_dev_info
;
if
(
bdi_read_congested
(
bdi
))
return
;
if
(
bdi_write_congested
(
bdi
))
return
;
block_group
=
(
inode
->
i_ino
-
1
)
/
EXT2_INODES_PER_GROUP
(
inode
->
i_sb
);
gdp
=
ext2_get_group_desc
(
inode
->
i_sb
,
block_group
,
&
bh
);
...
...
fs/ext3/inode.c
View file @
e7144e64
...
...
@@ -1473,7 +1473,7 @@ struct address_space_operations ext3_aops = {
};
/* For writeback mode, we can use mpage_writepages() */
#if 0 /* Doesn't work for shared mappings */
static int
ext3_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
...
...
@@ -1486,12 +1486,12 @@ ext3_writepages(struct address_space *mapping, struct writeback_control *wbc)
ret = err;
return ret;
}
#endif
struct
address_space_operations
ext3_writeback_aops
=
{
.
readpage
=
ext3_readpage
,
/* BKL not held. Don't need */
.
readpages
=
ext3_readpages
,
/* BKL not held. Don't need */
.
writepage
=
ext3_writepage
,
/* BKL not held. We take it */
.
writepages
=
ext3_writepages
,
/* BKL not held. Don't need */
.
sync_page
=
block_sync_page
,
.
prepare_write
=
ext3_prepare_write
,
/* BKL not held. We take it */
.
commit_write
=
ext3_commit_write
,
/* BKL not held. We take it */
...
...
fs/fs-writeback.c
View file @
e7144e64
...
...
@@ -220,44 +220,52 @@ __writeback_single_inode(struct inode *inode, int sync,
*
* FIXME: this linear search could get expensive with many fileystems. But
* how to fix? We need to go from an address_space to all inodes which share
* a queue with that address_space.
* a queue with that address_space. (Easy: have a global "dirty superblocks"
* list).
*
* The inodes to be written are parked on sb->s_io. They are moved back onto
* sb->s_dirty as they are selected for writing. This way, none can be missed
* on the writer throttling path, and we get decent balancing between many
* thrlttled threads: we don't want them all piling up on __wait_on_inode.
* thr
o
lttled threads: we don't want them all piling up on __wait_on_inode.
*/
static
void
sync_sb_inodes
(
struct
super_block
*
sb
,
struct
writeback_control
*
wbc
)
{
struct
list_head
*
tmp
;
struct
list_head
*
head
;
const
unsigned
long
start
=
jiffies
;
/* livelock avoidance */
list_splice_init
(
&
sb
->
s_dirty
,
&
sb
->
s_io
);
head
=
&
sb
->
s_io
;
while
((
tmp
=
head
->
prev
)
!=
head
)
{
struct
inode
*
inode
=
list_entry
(
tmp
,
struct
inode
,
i_list
);
while
(
!
list_empty
(
&
sb
->
s_io
))
{
struct
inode
*
inode
=
list_entry
(
sb
->
s_io
.
prev
,
struct
inode
,
i_list
);
struct
address_space
*
mapping
=
inode
->
i_mapping
;
struct
backing_dev_info
*
bdi
;
struct
backing_dev_info
*
bdi
=
mapping
->
backing_dev_info
;
int
really_sync
;
if
(
wbc
->
bdi
&&
mapping
->
backing_dev_info
!=
wbc
->
bdi
)
{
if
(
wbc
->
nonblocking
&&
bdi_write_congested
(
bdi
))
{
wbc
->
encountered_congestion
=
1
;
if
(
sb
!=
blockdev_superblock
)
break
;
/*
inappropriate superblock
*/
break
;
/*
Skip a congested fs
*/
list_move
(
&
inode
->
i_list
,
&
sb
->
s_dirty
);
continue
;
/* not this blockdev */
continue
;
/* Skip a congested blockdev */
}
if
(
wbc
->
bdi
&&
bdi
!=
wbc
->
bdi
)
{
if
(
sb
!=
blockdev_superblock
)
break
;
/* fs has the wrong queue */
list_move
(
&
inode
->
i_list
,
&
sb
->
s_dirty
);
continue
;
/* blockdev has wrong queue */
}
/* Was this inode dirtied after sync_sb_inodes was called? */
if
(
time_after
(
mapping
->
dirtied_when
,
start
))
break
;
/* Was this inode dirtied too recently? */
if
(
wbc
->
older_than_this
&&
time_after
(
mapping
->
dirtied_when
,
*
wbc
->
older_than_this
))
goto
out
;
break
;
bdi
=
mapping
->
backing_dev_info
;
/* Is another pdflush already flushing this queue? */
if
(
current_is_pdflush
()
&&
!
writeback_acquire
(
bdi
))
break
;
...
...
@@ -278,11 +286,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
if
(
wbc
->
nr_to_write
<=
0
)
break
;
}
out:
/*
* Leave any unwritten inodes on s_io.
*/
return
;
return
;
/* Leave any unwritten inodes on s_io */
}
/*
...
...
fs/mpage.c
View file @
e7144e64
...
...
@@ -22,6 +22,7 @@
#include <linux/prefetch.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
/*
...
...
@@ -522,6 +523,7 @@ int
mpage_writepages
(
struct
address_space
*
mapping
,
struct
writeback_control
*
wbc
,
get_block_t
get_block
)
{
struct
backing_dev_info
*
bdi
=
mapping
->
backing_dev_info
;
struct
bio
*
bio
=
NULL
;
sector_t
last_block_in_bio
=
0
;
int
ret
=
0
;
...
...
@@ -530,6 +532,12 @@ mpage_writepages(struct address_space *mapping,
struct
pagevec
pvec
;
int
(
*
writepage
)(
struct
page
*
);
if
(
wbc
->
nonblocking
&&
bdi_write_congested
(
bdi
))
{
blk_run_queues
();
wbc
->
encountered_congestion
=
1
;
return
0
;
}
writepage
=
NULL
;
if
(
get_block
==
NULL
)
writepage
=
mapping
->
a_ops
->
writepage
;
...
...
@@ -585,6 +593,11 @@ mpage_writepages(struct address_space *mapping,
}
if
(
ret
||
(
--
(
wbc
->
nr_to_write
)
<=
0
))
done
=
1
;
if
(
wbc
->
nonblocking
&&
bdi_write_congested
(
bdi
))
{
blk_run_queues
();
wbc
->
encountered_congestion
=
1
;
done
=
1
;
}
}
else
{
unlock_page
(
page
);
}
...
...
include/linux/backing-dev.h
View file @
e7144e64
...
...
@@ -8,11 +8,15 @@
#ifndef _LINUX_BACKING_DEV_H
#define _LINUX_BACKING_DEV_H
#include <asm/atomic.h>
/*
* Bits in backing_dev_info.state
*/
enum
bdi_state
{
BDI_pdflush
,
/* A pdflush thread is working this device */
BDI_write_congested
,
/* The write queue is getting full */
BDI_read_congested
,
/* The read queue is getting full */
BDI_unused
,
/* Available bits start here */
};
...
...
@@ -28,4 +32,14 @@ int writeback_acquire(struct backing_dev_info *bdi);
int
writeback_in_progress
(
struct
backing_dev_info
*
bdi
);
void
writeback_release
(
struct
backing_dev_info
*
bdi
);
static
inline
int
bdi_read_congested
(
struct
backing_dev_info
*
bdi
)
{
return
test_bit
(
BDI_read_congested
,
&
bdi
->
state
);
}
static
inline
int
bdi_write_congested
(
struct
backing_dev_info
*
bdi
)
{
return
test_bit
(
BDI_write_congested
,
&
bdi
->
state
);
}
#endif
/* _LINUX_BACKING_DEV_H */
include/linux/blkdev.h
View file @
e7144e64
...
...
@@ -345,6 +345,7 @@ extern void blk_queue_end_tag(request_queue_t *, struct request *);
extern
int
blk_queue_init_tags
(
request_queue_t
*
,
int
);
extern
void
blk_queue_free_tags
(
request_queue_t
*
);
extern
void
blk_queue_invalidate_tags
(
request_queue_t
*
);
extern
void
blk_congestion_wait
(
int
rw
,
long
timeout
);
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
...
...
include/linux/buffer_head.h
View file @
e7144e64
...
...
@@ -163,7 +163,6 @@ struct buffer_head * __getblk(struct block_device *, sector_t, int);
void
__brelse
(
struct
buffer_head
*
);
void
__bforget
(
struct
buffer_head
*
);
struct
buffer_head
*
__bread
(
struct
block_device
*
,
sector_t
block
,
int
size
);
void
wakeup_bdflush
(
void
);
struct
buffer_head
*
alloc_buffer_head
(
void
);
void
free_buffer_head
(
struct
buffer_head
*
bh
);
void
FASTCALL
(
unlock_buffer
(
struct
buffer_head
*
bh
));
...
...
include/linux/sched.h
View file @
e7144e64
...
...
@@ -273,6 +273,7 @@ extern struct user_struct root_user;
#define INIT_USER (&root_user)
typedef
struct
prio_array
prio_array_t
;
struct
backing_dev_info
;
struct
task_struct
{
volatile
long
state
;
/* -1 unrunnable, 0 runnable, >0 stopped */
...
...
@@ -398,6 +399,7 @@ struct task_struct {
/* journalling filesystem info */
void
*
journal_info
;
struct
dentry
*
proc_dentry
;
struct
backing_dev_info
*
backing_dev_info
;
};
extern
void
__put_task_struct
(
struct
task_struct
*
tsk
);
...
...
include/linux/writeback.h
View file @
e7144e64
...
...
@@ -43,6 +43,8 @@ struct writeback_control {
older than this */
long
nr_to_write
;
/* Write this many pages, and decrement
this for each page written */
int
nonblocking
;
/* Don't get stuck on request queues */
int
encountered_congestion
;
/* An output: a queue is full */
};
void
writeback_inodes
(
struct
writeback_control
*
wbc
);
...
...
@@ -61,6 +63,8 @@ static inline void wait_on_inode(struct inode *inode)
/*
* mm/page-writeback.c
*/
int
wakeup_bdflush
(
long
nr_pages
);
/* These 5 are exported to sysctl. */
extern
int
dirty_background_ratio
;
extern
int
dirty_async_ratio
;
...
...
kernel/suspend.c
View file @
e7144e64
...
...
@@ -81,7 +81,6 @@ unsigned char software_suspend_enabled = 0;
#define TIMEOUT (6 * HZ)
/* Timeout for stopping processes */
#define ADDRESS(x) ((unsigned long) phys_to_virt(((x) << PAGE_SHIFT)))
extern
void
wakeup_bdflush
(
void
);
extern
int
C_A_D
;
/* References to section boundaries */
...
...
mm/filemap.c
View file @
e7144e64
...
...
@@ -1755,6 +1755,9 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov,
if
(
unlikely
(
pos
<
0
))
return
-
EINVAL
;
/* We can write back this queue in page reclaim */
current
->
backing_dev_info
=
mapping
->
backing_dev_info
;
pagevec_init
(
&
lru_pvec
);
if
(
unlikely
(
file
->
f_error
))
{
...
...
@@ -1959,6 +1962,7 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov,
err
=
written
?
written
:
status
;
out:
pagevec_lru_add
(
&
lru_pvec
);
current
->
backing_dev_info
=
0
;
return
err
;
}
...
...
mm/mempool.c
View file @
e7144e64
...
...
@@ -12,7 +12,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/mempool.h>
#include <linux/
buffer_head.h>
/* for wakeup_bdflush() */
#include <linux/
writeback.h>
static
void
add_element
(
mempool_t
*
pool
,
void
*
element
)
{
...
...
@@ -210,7 +210,7 @@ void * mempool_alloc(mempool_t *pool, int gfp_mask)
/*
* Kick the VM at this point.
*/
wakeup_bdflush
();
wakeup_bdflush
(
0
);
spin_lock_irqsave
(
&
pool
->
lock
,
flags
);
if
(
likely
(
pool
->
curr_nr
))
{
...
...
mm/page-writeback.c
View file @
e7144e64
...
...
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/sysrq.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
#include <linux/notifier.h>
#include <linux/smp.h>
...
...
@@ -172,33 +173,47 @@ static void background_writeout(unsigned long _min_pages)
.
sync_mode
=
WB_SYNC_NONE
,
.
older_than_this
=
NULL
,
.
nr_to_write
=
0
,
.
nonblocking
=
1
,
};
CHECK_EMERGENCY_SYNC
background_thresh
=
(
dirty_background_ratio
*
total_pages
)
/
100
;
do
{
for
(
;
;
)
{
struct
page_state
ps
;
get_page_state
(
&
ps
);
if
(
ps
.
nr_dirty
<
background_thresh
&&
min_pages
<=
0
)
break
;
wbc
.
encountered_congestion
=
0
;
wbc
.
nr_to_write
=
MAX_WRITEBACK_PAGES
;
writeback_inodes
(
&
wbc
);
min_pages
-=
MAX_WRITEBACK_PAGES
-
wbc
.
nr_to_write
;
}
while
(
wbc
.
nr_to_write
<=
0
);
if
(
wbc
.
nr_to_write
==
MAX_WRITEBACK_PAGES
)
{
/* Wrote nothing */
if
(
wbc
.
encountered_congestion
)
blk_congestion_wait
(
WRITE
,
HZ
/
10
);
else
break
;
}
}
blk_run_queues
();
}
/*
* Start heavy writeback of everything.
* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
* the whole world. Returns 0 if a pdflush thread was dispatched. Returns
* -1 if all pdflush threads were busy.
*/
void
wakeup_bdflush
(
void
)
int
wakeup_bdflush
(
long
nr_pages
)
{
struct
page_state
ps
;
if
(
nr_pages
==
0
)
{
struct
page_state
ps
;
get_page_state
(
&
ps
);
pdflush_operation
(
background_writeout
,
ps
.
nr_dirty
);
get_page_state
(
&
ps
);
nr_pages
=
ps
.
nr_dirty
;
}
return
pdflush_operation
(
background_writeout
,
nr_pages
);
}
static
struct
timer_list
wb_timer
;
...
...
@@ -223,25 +238,36 @@ static void wb_kupdate(unsigned long arg)
unsigned
long
oldest_jif
;
unsigned
long
start_jif
;
unsigned
long
next_jif
;
long
nr_to_write
;
struct
page_state
ps
;
struct
writeback_control
wbc
=
{
.
bdi
=
NULL
,
.
sync_mode
=
WB_SYNC_NONE
,
.
older_than_this
=
&
oldest_jif
,
.
nr_to_write
=
0
,
.
nonblocking
=
1
,
};
sync_supers
();
get_page_state
(
&
ps
);
get_page_state
(
&
ps
);
oldest_jif
=
jiffies
-
(
dirty_expire_centisecs
*
HZ
)
/
100
;
start_jif
=
jiffies
;
next_jif
=
start_jif
+
(
dirty_writeback_centisecs
*
HZ
)
/
100
;
wbc
.
nr_to_write
=
ps
.
nr_dirty
;
writeback_inodes
(
&
wbc
);
nr_to_write
=
ps
.
nr_dirty
;
while
(
nr_to_write
>
0
)
{
wbc
.
encountered_congestion
=
0
;
wbc
.
nr_to_write
=
MAX_WRITEBACK_PAGES
;
writeback_inodes
(
&
wbc
);
if
(
wbc
.
nr_to_write
==
MAX_WRITEBACK_PAGES
)
{
if
(
wbc
.
encountered_congestion
)
blk_congestion_wait
(
WRITE
,
HZ
);
else
break
;
/* All the old data is written */
}
nr_to_write
-=
MAX_WRITEBACK_PAGES
-
wbc
.
nr_to_write
;
}
blk_run_queues
();
yield
();
if
(
time_before
(
next_jif
,
jiffies
+
HZ
))
next_jif
=
jiffies
+
HZ
;
mod_timer
(
&
wb_timer
,
next_jif
);
...
...
@@ -493,7 +519,6 @@ int __set_page_dirty_buffers(struct page *page)
buffer_error
();
spin_lock
(
&
mapping
->
private_lock
);
if
(
page_has_buffers
(
page
))
{
struct
buffer_head
*
head
=
page_buffers
(
page
);
struct
buffer_head
*
bh
=
head
;
...
...
@@ -506,6 +531,7 @@ int __set_page_dirty_buffers(struct page *page)
bh
=
bh
->
b_this_page
;
}
while
(
bh
!=
head
);
}
spin_unlock
(
&
mapping
->
private_lock
);
if
(
!
TestSetPageDirty
(
page
))
{
write_lock
(
&
mapping
->
page_lock
);
...
...
@@ -519,7 +545,6 @@ int __set_page_dirty_buffers(struct page *page)
__mark_inode_dirty
(
mapping
->
host
,
I_DIRTY_PAGES
);
}
spin_unlock
(
&
mapping
->
private_lock
);
out:
return
ret
;
}
...
...
mm/swapfile.c
View file @
e7144e64
...
...
@@ -918,6 +918,26 @@ static int setup_swap_extents(struct swap_info_struct *sis)
return
ret
;
}
#if 0 /* We don't need this yet */
#include <linux/backing-dev.h>
int page_queue_congested(struct page *page)
{
struct backing_dev_info *bdi;
BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */
bdi = page->mapping->backing_dev_info;
if (PageSwapCache(page)) {
swp_entry_t entry = { .val = page->index };
struct swap_info_struct *sis;
sis = get_swap_info_struct(swp_type(entry));
bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
}
return bdi_write_congested(bdi);
}
#endif
asmlinkage
long
sys_swapoff
(
const
char
*
specialfile
)
{
struct
swap_info_struct
*
p
=
NULL
;
...
...
mm/vmscan.c
View file @
e7144e64
...
...
@@ -21,9 +21,11 @@
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/suspend.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
/* for try_to_release_page() */
#include <linux/mm_inline.h>
#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/rmap-locking.h>
#include <asm/pgalloc.h>
...
...
@@ -32,11 +34,11 @@
/*
* The "priority" of VM scanning is how much of the queues we
* will scan in one go. A value of
6
for DEF_PRIORITY implies
* that we'll scan 1/
64th of the queues ("queue_length >> 6
")
* will scan in one go. A value of
12
for DEF_PRIORITY implies
* that we'll scan 1/
4096th of the queues ("queue_length >> 12
")
* during a normal aging round.
*/
#define DEF_PRIORITY
(6)
#define DEF_PRIORITY
12
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
...
...
@@ -95,7 +97,7 @@ static inline int is_page_cache_freeable(struct page *page)
static
/* inline */
int
shrink_list
(
struct
list_head
*
page_list
,
int
nr_pages
,
unsigned
int
gfp_mask
,
int
priority
,
int
*
max_scan
)
unsigned
int
gfp_mask
,
int
*
max_scan
)
{
struct
address_space
*
mapping
;
LIST_HEAD
(
ret_pages
);
...
...
@@ -117,11 +119,22 @@ shrink_list(struct list_head *page_list, int nr_pages,
BUG_ON
(
PageActive
(
page
));
may_enter_fs
=
(
gfp_mask
&
__GFP_FS
)
||
(
PageSwapCache
(
page
)
&&
(
gfp_mask
&
__GFP_IO
));
/*
* If the page is mapped into pagetables then wait on it, to
* throttle this allocator to the rate at which we can clear
* MAP_SHARED data. This will also throttle against swapcache
* writes.
*/
if
(
PageWriteback
(
page
))
{
if
(
may_enter_fs
)
wait_on_page_writeback
(
page
);
/* throttling */
else
goto
keep_locked
;
if
(
may_enter_fs
)
{
if
(
page
->
pte
.
direct
||
page
->
mapping
->
backing_dev_info
==
current
->
backing_dev_info
)
{
wait_on_page_writeback
(
page
);
}
}
goto
keep_locked
;
}
pte_chain_lock
(
page
);
...
...
@@ -172,15 +185,43 @@ shrink_list(struct list_head *page_list, int nr_pages,
* will write it. So we're back to page-at-a-time writepage
* in LRU order.
*/
if
(
PageDirty
(
page
)
&&
is_page_cache_freeable
(
page
)
&&
mapping
&&
may_enter_fs
)
{
/*
* If the page is dirty, only perform writeback if that write
* will be non-blocking. To prevent this allocation from being
* stalled by pagecache activity. But note that there may be
* stalls if we need to run get_block(). We could test
* PagePrivate for that.
*
* If this process is currently in generic_file_write() against
* this page's queue, we can perform writeback even if that
* will block.
*
* If the page is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
* See swapfile.c:page_queue_congested().
*/
if
(
PageDirty
(
page
))
{
int
(
*
writeback
)(
struct
page
*
,
struct
writeback_control
*
);
struct
backing_dev_info
*
bdi
;
const
int
cluster_size
=
SWAP_CLUSTER_MAX
;
struct
writeback_control
wbc
=
{
.
nr_to_write
=
cluster_size
,
};
if
(
!
is_page_cache_freeable
(
page
))
goto
keep_locked
;
if
(
!
mapping
)
goto
keep_locked
;
if
(
!
may_enter_fs
)
goto
keep_locked
;
bdi
=
mapping
->
backing_dev_info
;
if
(
bdi
!=
current
->
backing_dev_info
&&
bdi_write_congested
(
bdi
))
goto
keep_locked
;
writeback
=
mapping
->
a_ops
->
vm_writeback
;
if
(
writeback
==
NULL
)
writeback
=
generic_vm_writeback
;
...
...
@@ -279,7 +320,7 @@ shrink_list(struct list_head *page_list, int nr_pages,
*/
static
/* inline */
int
shrink_cache
(
int
nr_pages
,
struct
zone
*
zone
,
unsigned
int
gfp_mask
,
int
priority
,
int
max_scan
)
unsigned
int
gfp_mask
,
int
max_scan
)
{
LIST_HEAD
(
page_list
);
struct
pagevec
pvec
;
...
...
@@ -298,11 +339,13 @@ shrink_cache(int nr_pages, struct zone *zone,
spin_lock_irq
(
&
zone
->
lru_lock
);
while
(
max_scan
>
0
&&
nr_pages
>
0
)
{
struct
page
*
page
;
int
n
=
0
;
int
nr_taken
=
0
;
int
nr_scan
=
0
;
while
(
n
<
nr_to_process
&&
!
list_empty
(
&
zone
->
inactive_list
))
{
while
(
nr_scan
++
<
nr_to_process
&&
!
list_empty
(
&
zone
->
inactive_list
))
{
page
=
list_entry
(
zone
->
inactive_list
.
prev
,
struct
page
,
lru
);
struct
page
,
lru
);
prefetchw_prev_lru_page
(
page
,
&
zone
->
inactive_list
,
flags
);
...
...
@@ -318,18 +361,17 @@ shrink_cache(int nr_pages, struct zone *zone,
}
list_add
(
&
page
->
lru
,
&
page_list
);
page_cache_get
(
page
);
n
++
;
n
r_taken
++
;
}
zone
->
nr_inactive
-=
n
;
zone
->
nr_inactive
-=
n
r_taken
;
spin_unlock_irq
(
&
zone
->
lru_lock
);
if
(
list_empty
(
&
page_list
)
)
if
(
nr_taken
==
0
)
goto
done
;
max_scan
-=
n
;
KERNEL_STAT_ADD
(
pgscan
,
n
);
nr_pages
=
shrink_list
(
&
page_list
,
nr_pages
,
gfp_mask
,
priority
,
&
max_scan
);
max_scan
-=
nr_scan
;
KERNEL_STAT_ADD
(
pgscan
,
nr_scan
);
nr_pages
=
shrink_list
(
&
page_list
,
nr_pages
,
gfp_mask
,
&
max_scan
);
if
(
nr_pages
<=
0
&&
list_empty
(
&
page_list
))
goto
done
;
...
...
@@ -420,6 +462,15 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in)
}
pte_chain_unlock
(
page
);
}
/*
* FIXME: need to consider page_count(page) here if/when we
* reap orphaned pages via the LRU (Daniel's locking stuff)
*/
if
(
total_swap_pages
==
0
&&
!
page
->
mapping
&&
!
PagePrivate
(
page
))
{
list_add
(
&
page
->
lru
,
&
l_active
);
continue
;
}
list_add
(
&
page
->
lru
,
&
l_inactive
);
pgdeactivate
++
;
}
...
...
@@ -470,11 +521,10 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in)
}
static
/* inline */
int
shrink_zone
(
struct
zone
*
zone
,
int
priority
,
unsigned
int
gfp_mask
,
int
nr_pages
)
shrink_zone
(
struct
zone
*
zone
,
int
max_scan
,
unsigned
int
gfp_mask
,
int
nr_pages
)
{
unsigned
long
ratio
;
int
max_scan
;
/* This is bogus for ZONE_HIGHMEM? */
if
(
kmem_cache_reap
(
gfp_mask
)
>=
nr_pages
)
...
...
@@ -497,43 +547,50 @@ shrink_zone(struct zone *zone, int priority,
atomic_sub
(
SWAP_CLUSTER_MAX
,
&
zone
->
refill_counter
);
refill_inactive_zone
(
zone
,
SWAP_CLUSTER_MAX
);
}
max_scan
=
zone
->
nr_inactive
/
priority
;
nr_pages
=
shrink_cache
(
nr_pages
,
zone
,
gfp_mask
,
priority
,
max_scan
);
if
(
nr_pages
<=
0
)
return
0
;
wakeup_bdflush
();
shrink_dcache_memory
(
priority
,
gfp_mask
);
/* After shrinking the dcache, get rid of unused inodes too .. */
shrink_icache_memory
(
1
,
gfp_mask
);
#ifdef CONFIG_QUOTA
shrink_dqcache_memory
(
DEF_PRIORITY
,
gfp_mask
);
#endif
nr_pages
=
shrink_cache
(
nr_pages
,
zone
,
gfp_mask
,
max_scan
);
return
nr_pages
;
}
static
int
shrink_caches
(
struct
zone
*
classzone
,
int
priority
,
int
gfp_mask
,
int
nr_pages
)
int
*
total_scanned
,
int
gfp_mask
,
int
nr_pages
)
{
struct
zone
*
first_classzone
;
struct
zone
*
zone
;
first_classzone
=
classzone
->
zone_pgdat
->
node_zones
;
zone
=
classzone
;
while
(
zone
>=
first_classzone
&&
nr_pages
>
0
)
{
if
(
zone
->
free_pages
<=
zone
->
pages_high
)
{
nr_pages
=
shrink_zone
(
zone
,
priority
,
gfp_mask
,
nr_pages
);
}
zone
--
;
for
(
zone
=
classzone
;
zone
>=
first_classzone
;
zone
--
)
{
int
max_scan
;
int
to_reclaim
;
int
unreclaimed
;
to_reclaim
=
zone
->
pages_high
-
zone
->
free_pages
;
if
(
to_reclaim
<
0
)
continue
;
/* zone has enough memory */
if
(
to_reclaim
>
SWAP_CLUSTER_MAX
)
to_reclaim
=
SWAP_CLUSTER_MAX
;
if
(
to_reclaim
<
nr_pages
)
to_reclaim
=
nr_pages
;
/*
* If we cannot reclaim `nr_pages' pages by scanning twice
* that many pages then fall back to the next zone.
*/
max_scan
=
zone
->
nr_inactive
>>
priority
;
if
(
max_scan
<
to_reclaim
*
2
)
max_scan
=
to_reclaim
*
2
;
unreclaimed
=
shrink_zone
(
zone
,
max_scan
,
gfp_mask
,
to_reclaim
);
nr_pages
-=
to_reclaim
-
unreclaimed
;
*
total_scanned
+=
max_scan
;
}
shrink_dcache_memory
(
priority
,
gfp_mask
);
shrink_icache_memory
(
1
,
gfp_mask
);
#ifdef CONFIG_QUOTA
shrink_dqcache_memory
(
DEF_PRIORITY
,
gfp_mask
);
#endif
return
nr_pages
;
}
...
...
@@ -564,12 +621,25 @@ try_to_free_pages(struct zone *classzone,
KERNEL_STAT_INC
(
pageoutrun
);
for
(
priority
=
DEF_PRIORITY
;
priority
;
priority
--
)
{
nr_pages
=
shrink_caches
(
classzone
,
priority
,
int
total_scanned
=
0
;
nr_pages
=
shrink_caches
(
classzone
,
priority
,
&
total_scanned
,
gfp_mask
,
nr_pages
);
if
(
nr_pages
<=
0
)
return
1
;
if
(
total_scanned
==
0
)
return
1
;
/* All zones had enough free memory */
if
(
!
(
gfp_mask
&
__GFP_FS
))
break
;
break
;
/* Let the caller handle it */
/*
* Try to write back as many pages as we just scanned. Not
* sure if that makes sense, but it's an attempt to avoid
* creating IO storms unnecessarily
*/
wakeup_bdflush
(
total_scanned
);
/* Take a nap, wait for some writeback to complete */
blk_congestion_wait
(
WRITE
,
HZ
/
4
);
}
if
(
gfp_mask
&
__GFP_FS
)
out_of_memory
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment