Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
a9ec3454
Commit
a9ec3454
authored
Nov 18, 2018
by
Kent Overstreet
Committed by
Kent Overstreet
Oct 22, 2023
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bcachefs: Journal refactoring
Signed-off-by:
Kent Overstreet
<
kent.overstreet@linux.dev
>
parent
f1a79365
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
124 additions
and
230 deletions
+124
-230
fs/bcachefs/journal.c
fs/bcachefs/journal.c
+2
-6
fs/bcachefs/journal_io.c
fs/bcachefs/journal_io.c
+120
-222
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_reclaim.c
+2
-1
fs/bcachefs/journal_types.h
fs/bcachefs/journal_types.h
+0
-1
No files found.
fs/bcachefs/journal.c
View file @
a9ec3454
...
...
@@ -134,6 +134,8 @@ static enum {
c
->
opts
.
block_size
;
BUG_ON
(
j
->
prev_buf_sectors
>
j
->
cur_buf_sectors
);
bkey_extent_init
(
&
buf
->
key
);
/*
* We have to set last_seq here, _before_ opening a new journal entry:
*
...
...
@@ -890,10 +892,6 @@ static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
void
bch2_dev_journal_stop
(
struct
journal
*
j
,
struct
bch_dev
*
ca
)
{
spin_lock
(
&
j
->
lock
);
bch2_extent_drop_device
(
bkey_i_to_s_extent
(
&
j
->
key
),
ca
->
dev_idx
);
spin_unlock
(
&
j
->
lock
);
wait_event
(
j
->
wait
,
!
bch2_journal_writing_to_device
(
j
,
ca
->
dev_idx
));
}
...
...
@@ -1032,8 +1030,6 @@ int bch2_fs_journal_init(struct journal *j)
j
->
write_delay_ms
=
1000
;
j
->
reclaim_delay_ms
=
100
;
bkey_extent_init
(
&
j
->
key
);
atomic64_set
(
&
j
->
reservations
.
counter
,
((
union
journal_res_state
)
{
.
cur_entry_offset
=
JOURNAL_ENTRY_CLOSED_VAL
}).
v
);
...
...
fs/bcachefs/journal_io.c
View file @
a9ec3454
...
...
@@ -426,7 +426,7 @@ static int journal_read_buf_realloc(struct journal_read_buf *b,
static
int
journal_read_bucket
(
struct
bch_dev
*
ca
,
struct
journal_read_buf
*
buf
,
struct
journal_list
*
jlist
,
unsigned
bucket
,
u64
*
seq
,
bool
*
entries_found
)
unsigned
bucket
)
{
struct
bch_fs
*
c
=
ca
->
fs
;
struct
journal_device
*
ja
=
&
ca
->
journal
;
...
...
@@ -511,7 +511,6 @@ static int journal_read_bucket(struct bch_dev *ca,
switch
(
ret
)
{
case
JOURNAL_ENTRY_ADD_OK
:
*
entries_found
=
true
;
break
;
case
JOURNAL_ENTRY_ADD_OUT_OF_RANGE
:
break
;
...
...
@@ -519,9 +518,6 @@ static int journal_read_bucket(struct bch_dev *ca,
return
ret
;
}
if
(
le64_to_cpu
(
j
->
seq
)
>
*
seq
)
*
seq
=
le64_to_cpu
(
j
->
seq
);
sectors
=
vstruct_sectors
(
j
,
c
->
block_bits
);
next_block:
pr_debug
(
"next"
);
...
...
@@ -535,124 +531,51 @@ static int journal_read_bucket(struct bch_dev *ca,
static
void
bch2_journal_read_device
(
struct
closure
*
cl
)
{
#define read_bucket(b) \
({ \
bool entries_found = false; \
ret = journal_read_bucket(ca, &buf, jlist, b, &seq, \
&entries_found); \
if (ret) \
goto err; \
__set_bit(b, bitmap); \
entries_found; \
})
struct
journal_device
*
ja
=
container_of
(
cl
,
struct
journal_device
,
read
);
struct
bch_dev
*
ca
=
container_of
(
ja
,
struct
bch_dev
,
journal
);
struct
journal_list
*
jlist
=
container_of
(
cl
->
parent
,
struct
journal_list
,
cl
);
struct
request_queue
*
q
=
bdev_get_queue
(
ca
->
disk_sb
.
bdev
);
struct
journal_read_buf
buf
=
{
NULL
,
0
};
unsigned
long
*
bitmap
;
unsigned
i
,
l
,
r
;
u64
seq
=
0
;
u64
min_seq
=
U64_MAX
;
unsigned
i
;
int
ret
;
if
(
!
ja
->
nr
)
goto
out
;
bitmap
=
kcalloc
(
BITS_TO_LONGS
(
ja
->
nr
),
ja
->
nr
,
GFP_KERNEL
);
if
(
!
bitmap
)
{
ret
=
-
ENOMEM
;
goto
err
;
}
ret
=
journal_read_buf_realloc
(
&
buf
,
PAGE_SIZE
);
if
(
ret
)
goto
err
;
pr_debug
(
"%u journal buckets"
,
ja
->
nr
);
/*
* If the device supports discard but not secure discard, we can't do
* the fancy fibonacci hash/binary search because the live journal
* entries might not form a contiguous range:
*/
for
(
i
=
0
;
i
<
ja
->
nr
;
i
++
)
read_bucket
(
i
);
goto
search_done
;
if
(
!
blk_queue_nonrot
(
q
))
goto
linear_scan
;
/*
* Read journal buckets ordered by golden ratio hash to quickly
* find a sequence of buckets with valid journal entries
*/
for
(
i
=
0
;
i
<
ja
->
nr
;
i
++
)
{
l
=
(
i
*
2654435769U
)
%
ja
->
nr
;
if
(
test_bit
(
l
,
bitmap
))
break
;
if
(
read_bucket
(
l
))
goto
bsearch
;
ret
=
journal_read_bucket
(
ca
,
&
buf
,
jlist
,
i
);
if
(
ret
)
goto
err
;
}
/*
* If that fails, check all the buckets we haven't checked
* already
*/
pr_debug
(
"falling back to linear search"
);
linear_scan:
for
(
l
=
find_first_zero_bit
(
bitmap
,
ja
->
nr
);
l
<
ja
->
nr
;
l
=
find_next_zero_bit
(
bitmap
,
ja
->
nr
,
l
+
1
))
if
(
read_bucket
(
l
))
goto
bsearch
;
/* no journal entries on this device? */
if
(
l
==
ja
->
nr
)
goto
out
;
bsearch:
/* Binary search */
r
=
find_next_bit
(
bitmap
,
ja
->
nr
,
l
+
1
);
pr_debug
(
"starting binary search, l %u r %u"
,
l
,
r
);
while
(
l
+
1
<
r
)
{
unsigned
m
=
(
l
+
r
)
>>
1
;
u64
cur_seq
=
seq
;
read_bucket
(
m
);
/* Find the journal bucket with the highest sequence number: */
for
(
i
=
0
;
i
<
ja
->
nr
;
i
++
)
{
if
(
ja
->
bucket_seq
[
i
]
>
ja
->
bucket_seq
[
ja
->
cur_idx
])
ja
->
cur_idx
=
i
;
if
(
cur_seq
!=
seq
)
l
=
m
;
else
r
=
m
;
min_seq
=
min
(
ja
->
bucket_seq
[
i
],
min_seq
);
}
search_done:
/*
* Find the journal bucket with the highest sequence number:
*
* If there's duplicate journal entries in multiple buckets (which
* definitely isn't supposed to happen, but...) - make sure to start
* cur_idx at the last of those buckets, so we don't deadlock trying to
* allocate
*/
seq
=
0
;
while
(
ja
->
bucket_seq
[
ja
->
cur_idx
]
>
min_seq
&&
ja
->
bucket_seq
[
ja
->
cur_idx
]
>
ja
->
bucket_seq
[(
ja
->
cur_idx
+
1
)
%
ja
->
nr
])
ja
->
cur_idx
++
;
for
(
i
=
0
;
i
<
ja
->
nr
;
i
++
)
if
(
ja
->
bucket_seq
[
i
]
>=
seq
&&
ja
->
bucket_seq
[
i
]
!=
ja
->
bucket_seq
[(
i
+
1
)
%
ja
->
nr
])
{
/*
* When journal_next_bucket() goes to allocate for
* the first time, it'll use the bucket after
* ja->cur_idx
*/
ja
->
cur_idx
=
i
;
seq
=
ja
->
bucket_seq
[
i
];
}
ja
->
sectors_free
=
0
;
/*
* Set last_idx to indicate the entire journal is full and needs to be
...
...
@@ -660,20 +583,8 @@ static void bch2_journal_read_device(struct closure *cl)
* pinned when it first runs:
*/
ja
->
last_idx
=
(
ja
->
cur_idx
+
1
)
%
ja
->
nr
;
/*
* Read buckets in reverse order until we stop finding more journal
* entries:
*/
for
(
i
=
(
ja
->
cur_idx
+
ja
->
nr
-
1
)
%
ja
->
nr
;
i
!=
ja
->
cur_idx
;
i
=
(
i
+
ja
->
nr
-
1
)
%
ja
->
nr
)
if
(
!
test_bit
(
i
,
bitmap
)
&&
!
read_bucket
(
i
))
break
;
out:
kvpfree
(
buf
.
data
,
buf
.
size
);
kfree
(
bitmap
);
percpu_ref_put
(
&
ca
->
io_ref
);
closure_return
(
cl
);
return
;
...
...
@@ -682,7 +593,6 @@ static void bch2_journal_read_device(struct closure *cl)
jlist
->
ret
=
ret
;
mutex_unlock
(
&
jlist
->
lock
);
goto
out
;
#undef read_bucket
}
void
bch2_journal_entries_free
(
struct
list_head
*
list
)
...
...
@@ -937,32 +847,18 @@ static void bch2_journal_add_btree_root(struct journal_buf *buf,
}
static
unsigned
journal_dev_buckets_available
(
struct
journal
*
j
,
struct
bch_dev
*
c
a
)
struct
journal_device
*
j
a
)
{
struct
journal_device
*
ja
=
&
ca
->
journal
;
unsigned
next
=
(
ja
->
cur_idx
+
1
)
%
ja
->
nr
;
unsigned
available
=
(
ja
->
last_idx
+
ja
->
nr
-
next
)
%
ja
->
nr
;
/*
* Hack to avoid a deadlock during journal replay:
* journal replay might require setting a new btree
* root, which requires writing another journal entry -
* thus, if the journal is full (and this happens when
* replaying the first journal bucket's entries) we're
* screwed.
*
* So don't let the journal fill up unless we're in
* replay:
*/
if
(
test_bit
(
JOURNAL_REPLAY_DONE
,
&
j
->
flags
))
available
=
max
((
int
)
available
-
2
,
0
);
/*
* Don't use the last bucket unless writing the new last_seq
* will make another bucket available:
*/
if
(
ja
->
bucket_seq
[
ja
->
last_idx
]
>=
journal_last_seq
(
j
))
available
=
max
((
int
)
available
-
1
,
0
);
if
(
available
&&
journal_last_seq
(
j
)
<=
ja
->
bucket_seq
[
ja
->
last_idx
])
--
available
;
return
available
;
}
...
...
@@ -972,7 +868,6 @@ int bch2_journal_entry_sectors(struct journal *j)
{
struct
bch_fs
*
c
=
container_of
(
j
,
struct
bch_fs
,
journal
);
struct
bch_dev
*
ca
;
struct
bkey_s_extent
e
=
bkey_i_to_s_extent
(
&
j
->
key
);
unsigned
sectors_available
=
UINT_MAX
;
unsigned
i
,
nr_online
=
0
,
nr_devs
=
0
;
...
...
@@ -982,38 +877,39 @@ int bch2_journal_entry_sectors(struct journal *j)
for_each_member_device_rcu
(
ca
,
c
,
i
,
&
c
->
rw_devs
[
BCH_DATA_JOURNAL
])
{
struct
journal_device
*
ja
=
&
ca
->
journal
;
unsigned
buckets_
required
=
0
;
unsigned
buckets_
this_device
,
sectors_this_device
;
if
(
!
ja
->
nr
)
continue
;
sectors_available
=
min_t
(
unsigned
,
sectors_available
,
ca
->
mi
.
bucket_size
);
buckets_this_device
=
journal_dev_buckets_available
(
j
,
ja
);
sectors_this_device
=
ja
->
sectors_free
;
nr_online
++
;
/*
* Note that we don't allocate the space for a journal entry
* until we write it out - thus, if we haven't started the write
* for the previous entry we have to make sure we have space for
* it too:
* We that we don't allocate the space for a journal entry
* until we write it out - thus, account for it here:
*/
if
(
bch2_extent_has_device
(
e
.
c
,
ca
->
dev_idx
))
{
if
(
j
->
prev_buf_sectors
>
ja
->
sectors_free
)
buckets_required
++
;
if
(
j
->
prev_buf_sectors
+
sectors_available
>
ja
->
sectors_free
)
buckets_required
++
;
}
else
{
if
(
j
->
prev_buf_sectors
+
sectors_available
>
ca
->
mi
.
bucket_size
)
buckets_required
++
;
if
(
j
->
prev_buf_sectors
>=
sectors_this_device
)
{
if
(
!
buckets_this_device
)
continue
;
buckets_required
++
;
buckets_this_device
--
;
sectors_this_device
=
ca
->
mi
.
bucket_size
;
}
if
(
journal_dev_buckets_available
(
j
,
ca
)
>=
buckets_required
)
sectors_this_device
-=
j
->
prev_buf_sectors
;
if
(
buckets_this_device
)
sectors_this_device
=
ca
->
mi
.
bucket_size
;
if
(
!
sectors_this_device
)
continue
;
sectors_available
=
min
(
sectors_available
,
sectors_this_device
);
nr_devs
++
;
nr_online
++
;
}
rcu_read_unlock
();
...
...
@@ -1026,106 +922,111 @@ int bch2_journal_entry_sectors(struct journal *j)
return
sectors_available
;
}
/**
* journal_next_bucket - move on to the next journal bucket if possible
*/
static
int
journal_write_alloc
(
struct
journal
*
j
,
struct
journal_buf
*
w
,
unsigned
sectors
)
static
void
__journal_write_alloc
(
struct
journal
*
j
,
struct
journal_buf
*
w
,
struct
dev_alloc_list
*
devs_sorted
,
unsigned
sectors
,
unsigned
*
replicas
,
unsigned
replicas_want
)
{
struct
bch_fs
*
c
=
container_of
(
j
,
struct
bch_fs
,
journal
);
struct
bkey_s_extent
e
;
struct
bch_extent_ptr
*
ptr
;
struct
bkey_i_extent
*
e
=
bkey_i_to_extent
(
&
w
->
key
);
struct
journal_device
*
ja
;
struct
bch_dev
*
ca
;
struct
dev_alloc_list
devs_sorted
;
unsigned
i
,
replicas
,
replicas_want
=
READ_ONCE
(
c
->
opts
.
metadata_replicas
);
unsigned
i
;
spin_lock
(
&
j
->
lock
);
e
=
bkey_i_to_s_extent
(
&
j
->
key
);
if
(
*
replicas
>=
replicas_want
)
return
;
for
(
i
=
0
;
i
<
devs_sorted
->
nr
;
i
++
)
{
ca
=
rcu_dereference
(
c
->
devs
[
devs_sorted
->
devs
[
i
]]);
if
(
!
ca
)
continue
;
ja
=
&
ca
->
journal
;
/*
* Drop any pointers to devices that have been removed, are no longer
* empty, or filled up their current journal bucket:
*
* Note that a device may have had a small amount of free space (perhaps
* one sector) that wasn't enough for the smallest possible journal
* entry - that's why we drop pointers to devices <= current free space,
* i.e. whichever device was limiting the current journal entry size.
* Check that we can use this device, and aren't already using
* it:
*/
bch2_extent_drop_ptrs
(
e
,
ptr
,
({
ca
=
bch_dev_bkey_exists
(
c
,
ptr
->
dev
);
if
(
!
ca
->
mi
.
durability
||
ca
->
mi
.
state
!=
BCH_MEMBER_STATE_RW
||
ca
->
journal
.
sectors_free
<=
sectors
;
}));
!
ja
->
nr
||
bch2_extent_has_device
(
extent_i_to_s_c
(
e
),
ca
->
dev_idx
)
||
sectors
>
ja
->
sectors_free
)
continue
;
extent_for_each_ptr
(
e
,
ptr
)
{
ca
=
bch_dev_bkey_exists
(
c
,
ptr
->
dev
);
bch2_dev_stripe_increment
(
c
,
ca
,
&
j
->
wp
.
stripe
);
extent_ptr_append
(
e
,
(
struct
bch_extent_ptr
)
{
.
offset
=
bucket_to_sector
(
ca
,
ja
->
buckets
[
ja
->
cur_idx
])
+
ca
->
mi
.
bucket_size
-
ja
->
sectors_free
,
.
dev
=
ca
->
dev_idx
,
});
BUG_ON
(
ca
->
mi
.
state
!=
BCH_MEMBER_STATE_RW
||
ca
->
journal
.
sectors_free
<=
sectors
);
ca
->
journal
.
sectors_free
-=
sectors
;
ja
->
sectors_free
-=
sectors
;
ja
->
bucket_seq
[
ja
->
cur_idx
]
=
le64_to_cpu
(
w
->
data
->
seq
);
*
replicas
+=
ca
->
mi
.
durability
;
if
(
*
replicas
>=
replicas_want
)
break
;
}
}
replicas
=
bch2_extent_nr_ptrs
(
e
.
c
);
/**
* journal_next_bucket - move on to the next journal bucket if possible
*/
static
int
journal_write_alloc
(
struct
journal
*
j
,
struct
journal_buf
*
w
,
unsigned
sectors
)
{
struct
bch_fs
*
c
=
container_of
(
j
,
struct
bch_fs
,
journal
);
struct
journal_device
*
ja
;
struct
bch_dev
*
ca
;
struct
dev_alloc_list
devs_sorted
;
unsigned
i
,
replicas
=
0
,
replicas_want
=
READ_ONCE
(
c
->
opts
.
metadata_replicas
);
rcu_read_lock
();
devs_sorted
=
bch2_dev_alloc_list
(
c
,
&
j
->
wp
.
stripe
,
&
c
->
rw_devs
[
BCH_DATA_JOURNAL
]);
spin_lock
(
&
j
->
lock
);
__journal_write_alloc
(
j
,
w
,
&
devs_sorted
,
sectors
,
&
replicas
,
replicas_want
);
if
(
replicas
>=
replicas_want
)
goto
done
;
for
(
i
=
0
;
i
<
devs_sorted
.
nr
;
i
++
)
{
ca
=
rcu_dereference
(
c
->
devs
[
devs_sorted
.
devs
[
i
]]);
if
(
!
ca
)
continue
;
if
(
!
ca
->
mi
.
durability
)
continue
;
ja
=
&
ca
->
journal
;
if
(
!
ja
->
nr
)
continue
;
if
(
replicas
>=
replicas_want
)
break
;
/*
* Check that we can use this device, and aren't already using
* it:
*/
if
(
bch2_extent_has_device
(
e
.
c
,
ca
->
dev_idx
)
||
!
journal_dev_buckets_available
(
j
,
ca
)
||
sectors
>
ca
->
mi
.
bucket_size
)
continue
;
bch2_dev_stripe_increment
(
c
,
ca
,
&
j
->
wp
.
stripe
);
ja
->
sectors_free
=
ca
->
mi
.
bucket_size
-
sectors
;
if
(
sectors
>
ja
->
sectors_free
&&
sectors
<=
ca
->
mi
.
bucket_size
&&
journal_dev_buckets_available
(
j
,
ja
))
{
ja
->
cur_idx
=
(
ja
->
cur_idx
+
1
)
%
ja
->
nr
;
ja
->
bucket_seq
[
ja
->
cur_idx
]
=
le64_to_cpu
(
w
->
data
->
seq
);
extent_ptr_append
(
bkey_i_to_extent
(
&
j
->
key
),
(
struct
bch_extent_ptr
)
{
.
offset
=
bucket_to_sector
(
ca
,
ja
->
buckets
[
ja
->
cur_idx
]),
.
dev
=
ca
->
dev_idx
,
});
replicas
+=
ca
->
mi
.
durability
;
ja
->
sectors_free
=
ca
->
mi
.
bucket_size
;
}
}
rcu_read_unlock
();
__journal_write_alloc
(
j
,
w
,
&
devs_sorted
,
sectors
,
&
replicas
,
replicas_want
);
done:
if
(
replicas
>=
replicas_want
)
j
->
prev_buf_sectors
=
0
;
bkey_copy
(
&
w
->
key
,
&
j
->
key
);
spin_unlock
(
&
j
->
lock
);
rcu_read_unlock
();
if
(
replicas
<
c
->
opts
.
metadata_replicas_required
)
return
-
EROFS
;
BUG_ON
(
!
replicas
);
return
0
;
return
replicas
>=
replicas_want
?
0
:
-
EROFS
;
}
static
void
journal_write_compact
(
struct
jset
*
jset
)
...
...
@@ -1376,9 +1277,6 @@ void bch2_journal_write(struct closure *cl)
}
no_io:
extent_for_each_ptr
(
bkey_i_to_s_extent
(
&
j
->
key
),
ptr
)
ptr
->
offset
+=
sectors
;
bch2_bucket_seq_cleanup
(
c
);
continue_at
(
cl
,
journal_write_done
,
system_highpri_wq
);
...
...
fs/bcachefs/journal_reclaim.c
View file @
a9ec3454
...
...
@@ -126,7 +126,8 @@ void bch2_journal_reclaim_fast(struct journal *j)
* Unpin journal entries whose reference counts reached zero, meaning
* all btree nodes got written out
*/
while
(
!
atomic_read
(
&
fifo_peek_front
(
&
j
->
pin
).
count
))
{
while
(
!
fifo_empty
(
&
j
->
pin
)
&&
!
atomic_read
(
&
fifo_peek_front
(
&
j
->
pin
).
count
))
{
BUG_ON
(
!
list_empty
(
&
fifo_peek_front
(
&
j
->
pin
).
list
));
BUG_ON
(
!
fifo_pop
(
&
j
->
pin
,
temp
));
popped
=
true
;
...
...
fs/bcachefs/journal_types.h
View file @
a9ec3454
...
...
@@ -185,7 +185,6 @@ struct journal {
struct
list_head
seq_blacklist
;
struct
journal_seq_blacklist
*
new_blacklist
;
BKEY_PADDED
(
key
);
struct
write_point
wp
;
spinlock_t
err_lock
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment