Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
e5a66496
Commit
e5a66496
authored
Feb 21, 2019
by
Kent Overstreet
Committed by
Kent Overstreet
Oct 22, 2023
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bcachefs: Journal reclaim refactoring
Signed-off-by:
Kent Overstreet
<
kent.overstreet@linux.dev
>
parent
2d3b5810
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
290 additions
and
278 deletions
+290
-278
fs/bcachefs/journal.c
fs/bcachefs/journal.c
+11
-15
fs/bcachefs/journal_io.c
fs/bcachefs/journal_io.c
+22
-110
fs/bcachefs/journal_io.h
fs/bcachefs/journal_io.h
+0
-1
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_reclaim.c
+245
-151
fs/bcachefs/journal_reclaim.h
fs/bcachefs/journal_reclaim.h
+6
-1
fs/bcachefs/journal_types.h
fs/bcachefs/journal_types.h
+6
-0
No files found.
fs/bcachefs/journal.c
View file @
e5a66496
...
...
@@ -84,17 +84,12 @@ void bch2_journal_halt(struct journal *j)
journal_wake
(
j
);
closure_wake_up
(
&
journal_cur_buf
(
j
)
->
wait
);
closure_wake_up
(
&
journal_prev_buf
(
j
)
->
wait
);
}
/* journal entry close/open: */
void
__bch2_journal_buf_put
(
struct
journal
*
j
,
bool
need_write_just_set
)
{
struct
journal_buf
*
w
=
journal_prev_buf
(
j
);
atomic_dec_bug
(
&
journal_seq_pin
(
j
,
le64_to_cpu
(
w
->
data
->
seq
))
->
count
);
if
(
!
need_write_just_set
&&
test_bit
(
JOURNAL_NEED_WRITE
,
&
j
->
flags
))
bch2_time_stats_update
(
j
->
delay_time
,
...
...
@@ -175,7 +170,6 @@ static bool __journal_entry_close(struct journal *j)
* Hence, we want update/set last_seq on the current journal entry right
* before we open a new one:
*/
bch2_journal_reclaim_fast
(
j
);
buf
->
data
->
last_seq
=
cpu_to_le64
(
journal_last_seq
(
j
));
if
(
journal_entry_empty
(
buf
->
data
))
...
...
@@ -189,8 +183,8 @@ static bool __journal_entry_close(struct journal *j)
cancel_delayed_work
(
&
j
->
write_work
);
/* ugh - might be called from __journal_res_get() under wait_event() */
__set_current_state
(
TASK_RUNNING
);
bch2_journal_space_available
(
j
);
bch2_journal_buf_put
(
j
,
old
.
idx
,
set_need_write
);
return
true
;
}
...
...
@@ -220,7 +214,7 @@ static int journal_entry_open(struct journal *j)
{
struct
journal_buf
*
buf
=
journal_cur_buf
(
j
);
union
journal_res_state
old
,
new
;
int
u64s
,
ret
;
int
u64s
;
u64
v
;
lockdep_assert_held
(
&
j
->
lock
);
...
...
@@ -229,12 +223,10 @@ static int journal_entry_open(struct journal *j)
if
(
j
->
blocked
)
return
-
EAGAIN
;
if
(
!
fifo_free
(
&
j
->
pin
)
)
return
-
ENOSPC
;
if
(
j
->
cur_entry_error
)
return
j
->
cur_entry_error
;
ret
=
bch2_journal_space_available
(
j
);
if
(
ret
)
return
ret
;
BUG_ON
(
!
j
->
cur_entry_sectors
);
buf
->
u64s_reserved
=
j
->
entry_u64s_reserved
;
buf
->
disk_sectors
=
j
->
cur_entry_sectors
;
...
...
@@ -411,7 +403,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
{
int
ret
;
wait_event
(
j
->
wait
,
closure_wait_event
(
&
j
->
async_
wait
,
(
ret
=
__journal_res_get
(
j
,
res
,
flags
))
!=
-
EAGAIN
||
(
flags
&
JOURNAL_RES_GET_NONBLOCK
));
return
ret
;
...
...
@@ -969,6 +961,7 @@ void bch2_fs_journal_start(struct journal *j)
c
->
last_bucket_seq_cleanup
=
journal_cur_seq
(
j
);
bch2_journal_space_available
(
j
);
spin_unlock
(
&
j
->
lock
);
/*
...
...
@@ -1144,9 +1137,12 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
pr_buf
(
&
out
,
"dev %u:
\n
"
"
\t
nr
\t\t
%u
\n
"
"
\t
available
\t
%u:%u
\n
"
"
\t
cur_idx
\t\t
%u (seq %llu)
\n
"
"
\t
last_idx
\t
%u (seq %llu)
\n
"
,
iter
,
ja
->
nr
,
bch2_journal_dev_buckets_available
(
j
,
ja
),
ja
->
sectors_free
,
ja
->
cur_idx
,
ja
->
bucket_seq
[
ja
->
cur_idx
],
ja
->
last_idx
,
ja
->
bucket_seq
[
ja
->
last_idx
]);
}
...
...
fs/bcachefs/journal_io.c
View file @
e5a66496
...
...
@@ -825,7 +825,6 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
int
bch2_journal_replay
(
struct
bch_fs
*
c
,
struct
list_head
*
list
)
{
struct
journal
*
j
=
&
c
->
journal
;
struct
journal_entry_pin_list
*
pin_list
;
struct
bkey_i
*
k
,
*
_n
;
struct
jset_entry
*
entry
;
struct
journal_replay
*
i
,
*
n
;
...
...
@@ -867,10 +866,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
cond_resched
();
}
pin_list
=
journal_seq_pin
(
j
,
j
->
replay_journal_seq
);
if
(
atomic_dec_and_test
(
&
pin_list
->
count
))
journal_wake
(
j
);
bch2_journal_pin_put
(
j
,
j
->
replay_journal_seq
);
}
j
->
replay_journal_seq
=
0
;
...
...
@@ -885,101 +881,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
/* journal write: */
static
unsigned
journal_dev_buckets_available
(
struct
journal
*
j
,
struct
journal_device
*
ja
)
{
struct
bch_fs
*
c
=
container_of
(
j
,
struct
bch_fs
,
journal
);
unsigned
next
=
(
ja
->
cur_idx
+
1
)
%
ja
->
nr
;
unsigned
available
=
(
ja
->
last_idx
+
ja
->
nr
-
next
)
%
ja
->
nr
;
/*
* Allocator startup needs some journal space before we can do journal
* replay:
*/
if
(
available
&&
test_bit
(
BCH_FS_ALLOCATOR_STARTED
,
&
c
->
flags
))
available
--
;
/*
* Don't use the last bucket unless writing the new last_seq
* will make another bucket available:
*/
if
(
available
&&
journal_last_seq
(
j
)
<=
ja
->
bucket_seq
[
ja
->
last_idx
])
--
available
;
return
available
;
}
int
bch2_journal_space_available
(
struct
journal
*
j
)
{
struct
bch_fs
*
c
=
container_of
(
j
,
struct
bch_fs
,
journal
);
struct
bch_dev
*
ca
;
unsigned
sectors_next_entry
=
UINT_MAX
;
unsigned
i
,
nr_online
=
0
,
nr_devs
=
0
;
unsigned
unwritten_sectors
=
j
->
reservations
.
prev_buf_unwritten
?
journal_prev_buf
(
j
)
->
sectors
:
0
;
int
ret
=
0
;
lockdep_assert_held
(
&
j
->
lock
);
rcu_read_lock
();
for_each_member_device_rcu
(
ca
,
c
,
i
,
&
c
->
rw_devs
[
BCH_DATA_JOURNAL
])
{
struct
journal_device
*
ja
=
&
ca
->
journal
;
unsigned
buckets_this_device
,
sectors_this_device
;
if
(
!
ja
->
nr
)
continue
;
nr_online
++
;
buckets_this_device
=
journal_dev_buckets_available
(
j
,
ja
);
sectors_this_device
=
ja
->
sectors_free
;
/*
* We that we don't allocate the space for a journal entry
* until we write it out - thus, account for it here:
*/
if
(
unwritten_sectors
>=
sectors_this_device
)
{
if
(
!
buckets_this_device
)
continue
;
buckets_this_device
--
;
sectors_this_device
=
ca
->
mi
.
bucket_size
;
}
sectors_this_device
-=
unwritten_sectors
;
if
(
buckets_this_device
)
sectors_this_device
=
ca
->
mi
.
bucket_size
;
if
(
!
sectors_this_device
)
continue
;
sectors_next_entry
=
min
(
sectors_next_entry
,
sectors_this_device
);
nr_devs
++
;
}
rcu_read_unlock
();
if
(
nr_online
<
c
->
opts
.
metadata_replicas_required
)
{
ret
=
-
EROFS
;
sectors_next_entry
=
0
;
}
else
if
(
!
sectors_next_entry
||
nr_devs
<
min_t
(
unsigned
,
nr_online
,
c
->
opts
.
metadata_replicas
))
{
ret
=
-
ENOSPC
;
sectors_next_entry
=
0
;
}
WRITE_ONCE
(
j
->
cur_entry_sectors
,
sectors_next_entry
);
return
ret
;
}
static
void
__journal_write_alloc
(
struct
journal
*
j
,
struct
journal_buf
*
w
,
struct
dev_alloc_list
*
devs_sorted
,
...
...
@@ -1053,7 +954,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
devs_sorted
=
bch2_dev_alloc_list
(
c
,
&
j
->
wp
.
stripe
,
&
c
->
rw_devs
[
BCH_DATA_JOURNAL
]);
spin_lock
(
&
j
->
lock
);
__journal_write_alloc
(
j
,
w
,
&
devs_sorted
,
sectors
,
&
replicas
,
replicas_want
);
...
...
@@ -1069,7 +969,7 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
if
(
sectors
>
ja
->
sectors_free
&&
sectors
<=
ca
->
mi
.
bucket_size
&&
journal_dev_buckets_available
(
j
,
ja
))
{
bch2_
journal_dev_buckets_available
(
j
,
ja
))
{
ja
->
cur_idx
=
(
ja
->
cur_idx
+
1
)
%
ja
->
nr
;
ja
->
sectors_free
=
ca
->
mi
.
bucket_size
;
}
...
...
@@ -1078,7 +978,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
__journal_write_alloc
(
j
,
w
,
&
devs_sorted
,
sectors
,
&
replicas
,
replicas_want
);
done:
spin_unlock
(
&
j
->
lock
);
rcu_read_unlock
();
return
replicas
>=
c
->
opts
.
metadata_replicas_required
?
0
:
-
EROFS
;
...
...
@@ -1237,6 +1136,9 @@ void bch2_journal_write(struct closure *cl)
struct
bch_extent_ptr
*
ptr
;
bool
validate_before_checksum
=
false
;
unsigned
i
,
sectors
,
bytes
,
u64s
;
int
ret
;
bch2_journal_pin_put
(
j
,
le64_to_cpu
(
w
->
data
->
seq
));
journal_buf_realloc
(
j
,
w
);
jset
=
w
->
data
;
...
...
@@ -1293,7 +1195,23 @@ void bch2_journal_write(struct closure *cl)
bytes
=
vstruct_bytes
(
jset
);
memset
((
void
*
)
jset
+
bytes
,
0
,
(
sectors
<<
9
)
-
bytes
);
if
(
journal_write_alloc
(
j
,
w
,
sectors
))
{
spin_lock
(
&
j
->
lock
);
ret
=
journal_write_alloc
(
j
,
w
,
sectors
);
/*
* write is allocated, no longer need to account for it in
* bch2_journal_space_available():
*/
w
->
sectors
=
0
;
/*
* journal entry has been compacted and allocated, recalculate space
* available:
*/
bch2_journal_space_available
(
j
);
spin_unlock
(
&
j
->
lock
);
if
(
ret
)
{
bch2_journal_halt
(
j
);
bch_err
(
c
,
"Unable to allocate journal write"
);
bch2_fatal_error
(
c
);
...
...
@@ -1301,12 +1219,6 @@ void bch2_journal_write(struct closure *cl)
return
;
}
/*
* write is allocated, no longer need to account for it in
* bch2_journal_entry_sectors:
*/
w
->
sectors
=
0
;
/*
* XXX: we really should just disable the entire journal in nochanges
* mode
...
...
fs/bcachefs/journal_io.h
View file @
e5a66496
...
...
@@ -40,7 +40,6 @@ int bch2_journal_read(struct bch_fs *, struct list_head *);
void
bch2_journal_entries_free
(
struct
list_head
*
);
int
bch2_journal_replay
(
struct
bch_fs
*
,
struct
list_head
*
);
int
bch2_journal_space_available
(
struct
journal
*
);
void
bch2_journal_write
(
struct
closure
*
);
#endif
/* _BCACHEFS_JOURNAL_IO_H */
fs/bcachefs/journal_reclaim.c
View file @
e5a66496
...
...
@@ -2,15 +2,213 @@
#include "bcachefs.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "replicas.h"
#include "super.h"
/* Free space calculations: */
unsigned
bch2_journal_dev_buckets_available
(
struct
journal
*
j
,
struct
journal_device
*
ja
)
{
struct
bch_fs
*
c
=
container_of
(
j
,
struct
bch_fs
,
journal
);
unsigned
next
=
(
ja
->
cur_idx
+
1
)
%
ja
->
nr
;
unsigned
available
=
(
ja
->
last_idx
+
ja
->
nr
-
next
)
%
ja
->
nr
;
/*
* Allocator startup needs some journal space before we can do journal
* replay:
*/
if
(
available
&&
test_bit
(
BCH_FS_ALLOCATOR_STARTED
,
&
c
->
flags
))
available
--
;
/*
* Don't use the last bucket unless writing the new last_seq
* will make another bucket available:
*/
if
(
available
&&
journal_last_seq
(
j
)
<=
ja
->
bucket_seq
[
ja
->
last_idx
])
--
available
;
return
available
;
}
void
bch2_journal_space_available
(
struct
journal
*
j
)
{
struct
bch_fs
*
c
=
container_of
(
j
,
struct
bch_fs
,
journal
);
struct
bch_dev
*
ca
;
unsigned
sectors_next_entry
=
UINT_MAX
;
unsigned
sectors_total
=
UINT_MAX
;
unsigned
max_entry_size
=
min
(
j
->
buf
[
0
].
buf_size
>>
9
,
j
->
buf
[
1
].
buf_size
>>
9
);
unsigned
i
,
nr_online
=
0
,
nr_devs
=
0
;
unsigned
unwritten_sectors
=
j
->
reservations
.
prev_buf_unwritten
?
journal_prev_buf
(
j
)
->
sectors
:
0
;
int
ret
=
0
;
lockdep_assert_held
(
&
j
->
lock
);
rcu_read_lock
();
for_each_member_device_rcu
(
ca
,
c
,
i
,
&
c
->
rw_devs
[
BCH_DATA_JOURNAL
])
{
struct
journal_device
*
ja
=
&
ca
->
journal
;
unsigned
buckets_this_device
,
sectors_this_device
;
if
(
!
ja
->
nr
)
continue
;
nr_online
++
;
buckets_this_device
=
bch2_journal_dev_buckets_available
(
j
,
ja
);
sectors_this_device
=
ja
->
sectors_free
;
/*
* We that we don't allocate the space for a journal entry
* until we write it out - thus, account for it here:
*/
if
(
unwritten_sectors
>=
sectors_this_device
)
{
if
(
!
buckets_this_device
)
continue
;
buckets_this_device
--
;
sectors_this_device
=
ca
->
mi
.
bucket_size
;
}
sectors_this_device
-=
unwritten_sectors
;
if
(
sectors_this_device
<
ca
->
mi
.
bucket_size
&&
buckets_this_device
)
{
buckets_this_device
--
;
sectors_this_device
=
ca
->
mi
.
bucket_size
;
}
if
(
!
sectors_this_device
)
continue
;
sectors_next_entry
=
min
(
sectors_next_entry
,
sectors_this_device
);
sectors_total
=
min
(
sectors_total
,
buckets_this_device
*
ca
->
mi
.
bucket_size
+
sectors_this_device
);
max_entry_size
=
min_t
(
unsigned
,
max_entry_size
,
ca
->
mi
.
bucket_size
);
nr_devs
++
;
}
rcu_read_unlock
();
if
(
nr_online
<
c
->
opts
.
metadata_replicas_required
)
{
ret
=
-
EROFS
;
sectors_next_entry
=
0
;
}
else
if
(
!
sectors_next_entry
||
nr_devs
<
min_t
(
unsigned
,
nr_online
,
c
->
opts
.
metadata_replicas
))
{
ret
=
-
ENOSPC
;
sectors_next_entry
=
0
;
}
else
if
(
!
fifo_free
(
&
j
->
pin
))
{
ret
=
-
ENOSPC
;
sectors_next_entry
=
0
;
}
j
->
cur_entry_sectors
=
sectors_next_entry
;
j
->
cur_entry_error
=
ret
;
if
(
!
ret
)
journal_wake
(
j
);
}
/* Discards - last part of journal reclaim: */
static
bool
should_discard_bucket
(
struct
journal
*
j
,
struct
journal_device
*
ja
)
{
bool
ret
;
spin_lock
(
&
j
->
lock
);
ret
=
ja
->
nr
&&
ja
->
last_idx
!=
ja
->
cur_idx
&&
ja
->
bucket_seq
[
ja
->
last_idx
]
<
j
->
last_seq_ondisk
;
spin_unlock
(
&
j
->
lock
);
return
ret
;
}
/*
* Advance ja->last_idx as long as it points to buckets that are no longer
* dirty, issuing discards if necessary:
*/
static
void
journal_do_discards
(
struct
journal
*
j
)
{
struct
bch_fs
*
c
=
container_of
(
j
,
struct
bch_fs
,
journal
);
struct
bch_dev
*
ca
;
unsigned
iter
;
mutex_lock
(
&
j
->
reclaim_lock
);
for_each_rw_member
(
ca
,
c
,
iter
)
{
struct
journal_device
*
ja
=
&
ca
->
journal
;
while
(
should_discard_bucket
(
j
,
ja
))
{
if
(
ca
->
mi
.
discard
&&
bdev_max_discard_sectors
(
ca
->
disk_sb
.
bdev
))
blkdev_issue_discard
(
ca
->
disk_sb
.
bdev
,
bucket_to_sector
(
ca
,
ja
->
buckets
[
ja
->
last_idx
]),
ca
->
mi
.
bucket_size
,
GFP_NOIO
);
spin_lock
(
&
j
->
lock
);
ja
->
last_idx
=
(
ja
->
last_idx
+
1
)
%
ja
->
nr
;
bch2_journal_space_available
(
j
);
spin_unlock
(
&
j
->
lock
);
}
}
mutex_unlock
(
&
j
->
reclaim_lock
);
}
/*
* Journal entry pinning - machinery for holding a reference on a given journal
* entry, holding it open to ensure it gets replayed during recovery:
*/
static
void
bch2_journal_reclaim_fast
(
struct
journal
*
j
)
{
struct
journal_entry_pin_list
temp
;
bool
popped
=
false
;
lockdep_assert_held
(
&
j
->
lock
);
/*
* Unpin journal entries whose reference counts reached zero, meaning
* all btree nodes got written out
*/
while
(
!
fifo_empty
(
&
j
->
pin
)
&&
!
atomic_read
(
&
fifo_peek_front
(
&
j
->
pin
).
count
))
{
BUG_ON
(
!
list_empty
(
&
fifo_peek_front
(
&
j
->
pin
).
list
));
BUG_ON
(
!
fifo_pop
(
&
j
->
pin
,
temp
));
popped
=
true
;
}
if
(
popped
)
bch2_journal_space_available
(
j
);
}
void
bch2_journal_pin_put
(
struct
journal
*
j
,
u64
seq
)
{
struct
journal_entry_pin_list
*
pin_list
=
journal_seq_pin
(
j
,
seq
);
if
(
atomic_dec_and_test
(
&
pin_list
->
count
))
{
spin_lock
(
&
j
->
lock
);
bch2_journal_reclaim_fast
(
j
);
spin_unlock
(
&
j
->
lock
);
}
}
static
inline
void
__journal_pin_add
(
struct
journal
*
j
,
u64
seq
,
struct
journal_entry_pin
*
pin
,
...
...
@@ -25,10 +223,7 @@ static inline void __journal_pin_add(struct journal *j,
pin
->
seq
=
seq
;
pin
->
flush
=
flush_fn
;
if
(
flush_fn
)
list_add
(
&
pin
->
list
,
&
pin_list
->
list
);
else
INIT_LIST_HEAD
(
&
pin
->
list
);
list_add
(
&
pin
->
list
,
flush_fn
?
&
pin_list
->
list
:
&
pin_list
->
flushed
);
/*
* If the journal is currently full, we might want to call flush_fn
...
...
@@ -130,86 +325,53 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
* data off of a specific device:
*/
/**
* bch2_journal_reclaim_fast - do the fast part of journal reclaim
*
* Called from IO submission context, does not block. Cleans up after btree
* write completions by advancing the journal pin and each cache's last_idx,
* kicking off discards and background reclaim as necessary.
*/
void
bch2_journal_reclaim_fast
(
struct
journal
*
j
)
{
struct
journal_entry_pin_list
temp
;
bool
popped
=
false
;
lockdep_assert_held
(
&
j
->
lock
);
/*
* Unpin journal entries whose reference counts reached zero, meaning
* all btree nodes got written out
*/
while
(
!
fifo_empty
(
&
j
->
pin
)
&&
!
atomic_read
(
&
fifo_peek_front
(
&
j
->
pin
).
count
))
{
BUG_ON
(
!
list_empty
(
&
fifo_peek_front
(
&
j
->
pin
).
list
));
BUG_ON
(
!
fifo_pop
(
&
j
->
pin
,
temp
));
popped
=
true
;
}
if
(
popped
)
journal_wake
(
j
);
}
static
void
journal_pin_mark_flushing
(
struct
journal
*
j
,
struct
journal_entry_pin
*
pin
,
u64
seq
)
{
lockdep_assert_held
(
&
j
->
reclaim_lock
);
list_move
(
&
pin
->
list
,
&
journal_seq_pin
(
j
,
seq
)
->
flushed
);
BUG_ON
(
j
->
flush_in_progress
);
j
->
flush_in_progress
=
pin
;
}
static
void
journal_pin_flush
(
struct
journal
*
j
,
struct
journal_entry_pin
*
pin
,
u64
seq
)
{
pin
->
flush
(
j
,
pin
,
seq
);
BUG_ON
(
j
->
flush_in_progress
!=
pin
);
j
->
flush_in_progress
=
NULL
;
wake_up
(
&
j
->
pin_flush_wait
);
}
static
struct
journal_entry_pin
*
journal_get_next_pin
(
struct
journal
*
j
,
u64
seq_to_flush
,
u64
*
seq
)
journal_get_next_pin
(
struct
journal
*
j
,
u64
max_seq
,
u64
*
seq
)
{
struct
journal_entry_pin_list
*
pin_list
;
struct
journal_entry_pin
*
ret
=
NULL
;
/* no need to iterate over empty fifo entries: */
bch2_journal_reclaim_fast
(
j
);
spin_lock
(
&
j
->
lock
);
BUG_ON
(
!
atomic_read
(
&
fifo_peek_front
(
&
j
->
pin
).
count
));
fifo_for_each_entry_ptr
(
pin_list
,
&
j
->
pin
,
*
seq
)
if
(
*
seq
>
seq_to_flush
||
if
(
*
seq
>
max_seq
||
(
ret
=
list_first_entry_or_null
(
&
pin_list
->
list
,
struct
journal_entry_pin
,
list
)))
break
;
if
(
ret
)
{
list_move
(
&
ret
->
list
,
&
pin_list
->
flushed
);
BUG_ON
(
j
->
flush_in_progress
);
j
->
flush_in_progress
=
ret
;
j
->
last_flushed
=
jiffies
;
}
spin_unlock
(
&
j
->
lock
);
return
ret
;
}
static
bool
should_discard_bucket
(
struct
journal
*
j
,
struct
journal_device
*
ja
)
static
void
journal_flush_pins
(
struct
journal
*
j
,
u64
seq_to_flush
,
unsigned
min_nr
)
{
bool
ret
;
struct
journal_entry_pin
*
pin
;
u64
seq
;
spin_lock
(
&
j
->
lock
);
ret
=
ja
->
nr
&&
(
ja
->
last_idx
!=
ja
->
cur_idx
&&
ja
->
bucket_seq
[
ja
->
last_idx
]
<
j
->
last_seq_ondisk
);
spin_unlock
(
&
j
->
lock
);
lockdep_assert_held
(
&
j
->
reclaim_lock
);
return
ret
;
while
((
pin
=
journal_get_next_pin
(
j
,
min_nr
?
U64_MAX
:
seq_to_flush
,
&
seq
)))
{
if
(
min_nr
)
min_nr
--
;
pin
->
flush
(
j
,
pin
,
seq
);
BUG_ON
(
j
->
flush_in_progress
!=
pin
);
j
->
flush_in_progress
=
NULL
;
wake_up
(
&
j
->
pin_flush_wait
);
}
}
/**
...
...
@@ -236,103 +398,43 @@ void bch2_journal_reclaim_work(struct work_struct *work)
struct
bch_fs
,
journal
.
reclaim_work
);
struct
journal
*
j
=
&
c
->
journal
;
struct
bch_dev
*
ca
;
struct
journal_entry_pin
*
pin
;
u64
seq
,
seq_to_flush
=
0
;
unsigned
iter
,
bucket_to_flush
;
unsigned
long
next_flush
;
bool
reclaim_lock_held
=
false
,
need_flush
;
unsigned
iter
,
bucket_to_flush
,
min_nr
=
0
;
u64
seq_to_flush
=
0
;
journal_do_discards
(
j
);
mutex_lock
(
&
j
->
reclaim_lock
);
spin_lock
(
&
j
->
lock
);
/*
* Advance last_idx to point to the oldest journal entry containing
* btree node updates that have not yet been written out
*/
for_each_rw_member
(
ca
,
c
,
iter
)
{
struct
journal_device
*
ja
=
&
ca
->
journal
;
if
(
!
ja
->
nr
)
continue
;
while
(
should_discard_bucket
(
j
,
ja
))
{
if
(
!
reclaim_lock_held
)
{
/*
* ugh:
* might be called from __journal_res_get()
* under wait_event() - have to go back to
* TASK_RUNNING before doing something that
* would block, but only if we're doing work:
*/
__set_current_state
(
TASK_RUNNING
);
mutex_lock
(
&
j
->
reclaim_lock
);
reclaim_lock_held
=
true
;
/* recheck under reclaim_lock: */
continue
;
}
if
(
ca
->
mi
.
discard
&&
bdev_max_discard_sectors
(
ca
->
disk_sb
.
bdev
))
blkdev_issue_discard
(
ca
->
disk_sb
.
bdev
,
bucket_to_sector
(
ca
,
ja
->
buckets
[
ja
->
last_idx
]),
ca
->
mi
.
bucket_size
,
GFP_NOIO
);
spin_lock
(
&
j
->
lock
);
ja
->
last_idx
=
(
ja
->
last_idx
+
1
)
%
ja
->
nr
;
spin_unlock
(
&
j
->
lock
);
journal_wake
(
j
);
}
/*
* Write out enough btree nodes to free up 50% journal
* buckets
*/
spin_lock
(
&
j
->
lock
);
/* Try to keep the journal at most half full: */
bucket_to_flush
=
(
ja
->
cur_idx
+
(
ja
->
nr
>>
1
))
%
ja
->
nr
;
seq_to_flush
=
max_t
(
u64
,
seq_to_flush
,
ja
->
bucket_seq
[
bucket_to_flush
]);
spin_unlock
(
&
j
->
lock
);
}
/* Also flush if the pin fifo is more than half full */
spin_lock
(
&
j
->
lock
);
seq_to_flush
=
max_t
(
s64
,
seq_to_flush
,
(
s64
)
journal_cur_seq
(
j
)
-
(
j
->
pin
.
size
>>
1
));
spin_unlock
(
&
j
->
lock
);
/*
* If it's been longer than j->reclaim_delay_ms since we last flushed,
* make sure to flush at least one journal pin:
*/
next_flush
=
j
->
last_flushed
+
msecs_to_jiffies
(
j
->
reclaim_delay_ms
);
need_flush
=
time_after
(
jiffies
,
next_flush
);
if
(
time_after
(
jiffies
,
j
->
last_flushed
+
msecs_to_jiffies
(
j
->
reclaim_delay_ms
)))
min_nr
=
1
;
while
((
pin
=
journal_get_next_pin
(
j
,
need_flush
?
U64_MAX
:
seq_to_flush
,
&
seq
)))
{
if
(
!
reclaim_lock_held
)
{
spin_unlock
(
&
j
->
lock
);
__set_current_state
(
TASK_RUNNING
);
mutex_lock
(
&
j
->
reclaim_lock
);
reclaim_lock_held
=
true
;
spin_lock
(
&
j
->
lock
);
continue
;
}
journal_flush_pins
(
j
,
seq_to_flush
,
min_nr
);
journal_pin_mark_flushing
(
j
,
pin
,
seq
);
spin_unlock
(
&
j
->
lock
);
journal_pin_flush
(
j
,
pin
,
seq
);
need_flush
=
false
;
j
->
last_flushed
=
jiffies
;
spin_lock
(
&
j
->
lock
);
}
spin_unlock
(
&
j
->
lock
);
if
(
reclaim_lock_held
)
mutex_unlock
(
&
j
->
reclaim_lock
);
if
(
!
test_bit
(
BCH_FS_RO
,
&
c
->
flags
))
...
...
@@ -342,8 +444,6 @@ void bch2_journal_reclaim_work(struct work_struct *work)
static
int
journal_flush_done
(
struct
journal
*
j
,
u64
seq_to_flush
)
{
struct
journal_entry_pin
*
pin
;
u64
pin_seq
;
int
ret
;
ret
=
bch2_journal_error
(
j
);
...
...
@@ -351,16 +451,10 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
return
ret
;
mutex_lock
(
&
j
->
reclaim_lock
);
spin_lock
(
&
j
->
lock
);
while
((
pin
=
journal_get_next_pin
(
j
,
seq_to_flush
,
&
pin_seq
)))
{
journal_pin_mark_flushing
(
j
,
pin
,
pin_seq
);
spin_unlock
(
&
j
->
lock
);
journal_pin_flush
(
j
,
pin
,
pin_seq
);
journal_flush_pins
(
j
,
seq_to_flush
,
0
);
spin_lock
(
&
j
->
lock
);
}
/*
* If journal replay hasn't completed, the unreplayed journal entries
* hold refs on their corresponding sequence numbers
...
...
fs/bcachefs/journal_reclaim.h
View file @
e5a66496
...
...
@@ -4,6 +4,10 @@
#define JOURNAL_PIN (32 * 1024)
unsigned
bch2_journal_dev_buckets_available
(
struct
journal
*
,
struct
journal_device
*
);
void
bch2_journal_space_available
(
struct
journal
*
);
static
inline
bool
journal_pin_active
(
struct
journal_entry_pin
*
pin
)
{
return
pin
->
seq
!=
0
;
...
...
@@ -17,6 +21,8 @@ journal_seq_pin(struct journal *j, u64 seq)
return
&
j
->
pin
.
data
[
seq
&
j
->
pin
.
mask
];
}
void
bch2_journal_pin_put
(
struct
journal
*
,
u64
);
void
bch2_journal_pin_add
(
struct
journal
*
,
u64
,
struct
journal_entry_pin
*
,
journal_pin_flush_fn
);
void
bch2_journal_pin_update
(
struct
journal
*
,
u64
,
struct
journal_entry_pin
*
,
...
...
@@ -28,7 +34,6 @@ void bch2_journal_pin_add_if_older(struct journal *,
journal_pin_flush_fn
);
void
bch2_journal_pin_flush
(
struct
journal
*
,
struct
journal_entry_pin
*
);
void
bch2_journal_reclaim_fast
(
struct
journal
*
);
void
bch2_journal_reclaim_work
(
struct
work_struct
*
);
void
bch2_journal_flush_pins
(
struct
journal
*
,
u64
);
...
...
fs/bcachefs/journal_types.h
View file @
e5a66496
...
...
@@ -136,6 +136,12 @@ struct journal {
unsigned
cur_entry_u64s
;
unsigned
cur_entry_sectors
;
/*
* 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
* insufficient devices:
*/
int
cur_entry_error
;
/* Reserved space in journal entry to be used just prior to write */
unsigned
entry_u64s_reserved
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment