Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
f2bdfda9
Commit
f2bdfda9
authored
Jul 22, 2016
by
Dave Chinner
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'xfs-4.8-misc-fixes-4' into for-next
parents
dc4113d2
72ccbbe1
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
259 additions
and
103 deletions
+259
-103
fs/xfs/libxfs/xfs_da_btree.c
fs/xfs/libxfs/xfs_da_btree.c
+29
-30
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.c
+26
-3
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_buf_item.c
+1
-0
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot.c
+1
-0
fs/xfs/xfs_dquot_item.c
fs/xfs/xfs_dquot_item.c
+2
-0
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_extfree_item.c
+2
-0
fs/xfs/xfs_file.c
fs/xfs/xfs_file.c
+2
-2
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_inode_item.c
+1
-0
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_cil.c
+194
-64
fs/xfs/xfs_super.c
fs/xfs/xfs_super.c
+0
-4
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans.h
+1
-0
No files found.
fs/xfs/libxfs/xfs_da_btree.c
View file @
f2bdfda9
...
@@ -356,7 +356,6 @@ xfs_da3_split(
...
@@ -356,7 +356,6 @@ xfs_da3_split(
struct
xfs_da_state_blk
*
newblk
;
struct
xfs_da_state_blk
*
newblk
;
struct
xfs_da_state_blk
*
addblk
;
struct
xfs_da_state_blk
*
addblk
;
struct
xfs_da_intnode
*
node
;
struct
xfs_da_intnode
*
node
;
struct
xfs_buf
*
bp
;
int
max
;
int
max
;
int
action
=
0
;
int
action
=
0
;
int
error
;
int
error
;
...
@@ -397,7 +396,9 @@ xfs_da3_split(
...
@@ -397,7 +396,9 @@ xfs_da3_split(
break
;
break
;
}
}
/*
/*
* Entry wouldn't fit, split the leaf again.
* Entry wouldn't fit, split the leaf again. The new
* extrablk will be consumed by xfs_da3_node_split if
* the node is split.
*/
*/
state
->
extravalid
=
1
;
state
->
extravalid
=
1
;
if
(
state
->
inleaf
)
{
if
(
state
->
inleaf
)
{
...
@@ -445,6 +446,14 @@ xfs_da3_split(
...
@@ -445,6 +446,14 @@ xfs_da3_split(
if
(
!
addblk
)
if
(
!
addblk
)
return
0
;
return
0
;
/*
* xfs_da3_node_split() should have consumed any extra blocks we added
* during a double leaf split in the attr fork. This is guaranteed as
* we can't be here if the attr fork only has a single leaf block.
*/
ASSERT
(
state
->
extravalid
==
0
||
state
->
path
.
blk
[
max
].
magic
==
XFS_DIR2_LEAFN_MAGIC
);
/*
/*
* Split the root node.
* Split the root node.
*/
*/
...
@@ -457,43 +466,33 @@ xfs_da3_split(
...
@@ -457,43 +466,33 @@ xfs_da3_split(
}
}
/*
/*
* Update pointers to the node which used to be block 0 and
* Update pointers to the node which used to be block 0 and
just got
*
just got bumped because of the addition of a new root node.
*
bumped because of the addition of a new root node. Note that the
*
There might be three blocks involved if a double split occurred,
*
original block 0 could be at any position in the list of blocks in
*
and the original block 0 could be at any position in the list
.
*
the tree
.
*
*
* Note: the magic numbers and sibling pointers are in the same
* Note: the magic numbers and sibling pointers are in the same
physical
* p
hysical place for both v2 and v3 headers (by design). Hence it
* p
lace for both v2 and v3 headers (by design). Hence it doesn't matter
*
doesn't matter which version of the xfs_da_intnode structure we us
e
*
which version of the xfs_da_intnode structure we use here as th
e
*
here as the
result will be the same using either structure.
* result will be the same using either structure.
*/
*/
node
=
oldblk
->
bp
->
b_addr
;
node
=
oldblk
->
bp
->
b_addr
;
if
(
node
->
hdr
.
info
.
forw
)
{
if
(
node
->
hdr
.
info
.
forw
)
{
if
(
be32_to_cpu
(
node
->
hdr
.
info
.
forw
)
==
addblk
->
blkno
)
{
ASSERT
(
be32_to_cpu
(
node
->
hdr
.
info
.
forw
)
==
addblk
->
blkno
);
bp
=
addblk
->
bp
;
node
=
addblk
->
bp
->
b_addr
;
}
else
{
ASSERT
(
state
->
extravalid
);
bp
=
state
->
extrablk
.
bp
;
}
node
=
bp
->
b_addr
;
node
->
hdr
.
info
.
back
=
cpu_to_be32
(
oldblk
->
blkno
);
node
->
hdr
.
info
.
back
=
cpu_to_be32
(
oldblk
->
blkno
);
xfs_trans_log_buf
(
state
->
args
->
trans
,
bp
,
xfs_trans_log_buf
(
state
->
args
->
trans
,
addblk
->
bp
,
XFS_DA_LOGRANGE
(
node
,
&
node
->
hdr
.
info
,
XFS_DA_LOGRANGE
(
node
,
&
node
->
hdr
.
info
,
sizeof
(
node
->
hdr
.
info
)));
sizeof
(
node
->
hdr
.
info
)));
}
}
node
=
oldblk
->
bp
->
b_addr
;
node
=
oldblk
->
bp
->
b_addr
;
if
(
node
->
hdr
.
info
.
back
)
{
if
(
node
->
hdr
.
info
.
back
)
{
if
(
be32_to_cpu
(
node
->
hdr
.
info
.
back
)
==
addblk
->
blkno
)
{
ASSERT
(
be32_to_cpu
(
node
->
hdr
.
info
.
back
)
==
addblk
->
blkno
);
bp
=
addblk
->
bp
;
node
=
addblk
->
bp
->
b_addr
;
}
else
{
ASSERT
(
state
->
extravalid
);
bp
=
state
->
extrablk
.
bp
;
}
node
=
bp
->
b_addr
;
node
->
hdr
.
info
.
forw
=
cpu_to_be32
(
oldblk
->
blkno
);
node
->
hdr
.
info
.
forw
=
cpu_to_be32
(
oldblk
->
blkno
);
xfs_trans_log_buf
(
state
->
args
->
trans
,
bp
,
xfs_trans_log_buf
(
state
->
args
->
trans
,
addblk
->
bp
,
XFS_DA_LOGRANGE
(
node
,
&
node
->
hdr
.
info
,
XFS_DA_LOGRANGE
(
node
,
&
node
->
hdr
.
info
,
sizeof
(
node
->
hdr
.
info
)));
sizeof
(
node
->
hdr
.
info
)));
}
}
addblk
->
bp
=
NULL
;
addblk
->
bp
=
NULL
;
return
0
;
return
0
;
...
...
fs/xfs/xfs_aops.c
View file @
f2bdfda9
...
@@ -87,6 +87,12 @@ xfs_find_bdev_for_inode(
...
@@ -87,6 +87,12 @@ xfs_find_bdev_for_inode(
* We're now finished for good with this page. Update the page state via the
* We're now finished for good with this page. Update the page state via the
* associated buffer_heads, paying attention to the start and end offsets that
* associated buffer_heads, paying attention to the start and end offsets that
* we need to process on the page.
* we need to process on the page.
*
* Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
* buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
* the page at all, as we may be racing with memory reclaim and it can free both
* the bufferhead chain and the page as it will see the page as clean and
* unused.
*/
*/
static
void
static
void
xfs_finish_page_writeback
(
xfs_finish_page_writeback
(
...
@@ -95,8 +101,9 @@ xfs_finish_page_writeback(
...
@@ -95,8 +101,9 @@ xfs_finish_page_writeback(
int
error
)
int
error
)
{
{
unsigned
int
end
=
bvec
->
bv_offset
+
bvec
->
bv_len
-
1
;
unsigned
int
end
=
bvec
->
bv_offset
+
bvec
->
bv_len
-
1
;
struct
buffer_head
*
head
,
*
bh
;
struct
buffer_head
*
head
,
*
bh
,
*
next
;
unsigned
int
off
=
0
;
unsigned
int
off
=
0
;
unsigned
int
bsize
;
ASSERT
(
bvec
->
bv_offset
<
PAGE_SIZE
);
ASSERT
(
bvec
->
bv_offset
<
PAGE_SIZE
);
ASSERT
((
bvec
->
bv_offset
&
((
1
<<
inode
->
i_blkbits
)
-
1
))
==
0
);
ASSERT
((
bvec
->
bv_offset
&
((
1
<<
inode
->
i_blkbits
)
-
1
))
==
0
);
...
@@ -105,15 +112,17 @@ xfs_finish_page_writeback(
...
@@ -105,15 +112,17 @@ xfs_finish_page_writeback(
bh
=
head
=
page_buffers
(
bvec
->
bv_page
);
bh
=
head
=
page_buffers
(
bvec
->
bv_page
);
bsize
=
bh
->
b_size
;
do
{
do
{
next
=
bh
->
b_this_page
;
if
(
off
<
bvec
->
bv_offset
)
if
(
off
<
bvec
->
bv_offset
)
goto
next_bh
;
goto
next_bh
;
if
(
off
>
end
)
if
(
off
>
end
)
break
;
break
;
bh
->
b_end_io
(
bh
,
!
error
);
bh
->
b_end_io
(
bh
,
!
error
);
next_bh:
next_bh:
off
+=
b
h
->
b_
size
;
off
+=
bsize
;
}
while
((
bh
=
bh
->
b_this_page
)
!=
head
);
}
while
((
bh
=
next
)
!=
head
);
}
}
/*
/*
...
@@ -1040,6 +1049,20 @@ xfs_vm_releasepage(
...
@@ -1040,6 +1049,20 @@ xfs_vm_releasepage(
trace_xfs_releasepage
(
page
->
mapping
->
host
,
page
,
0
,
0
);
trace_xfs_releasepage
(
page
->
mapping
->
host
,
page
,
0
,
0
);
/*
* mm accommodates an old ext3 case where clean pages might not have had
* the dirty bit cleared. Thus, it can send actual dirty pages to
* ->releasepage() via shrink_active_list(). Conversely,
* block_invalidatepage() can send pages that are still marked dirty
* but otherwise have invalidated buffers.
*
* We've historically freed buffers on the latter. Instead, quietly
* filter out all dirty pages to avoid spurious buffer state warnings.
* This can likely be removed once shrink_active_list() is fixed.
*/
if
(
PageDirty
(
page
))
return
0
;
xfs_count_page_state
(
page
,
&
delalloc
,
&
unwritten
);
xfs_count_page_state
(
page
,
&
delalloc
,
&
unwritten
);
if
(
WARN_ON_ONCE
(
delalloc
))
if
(
WARN_ON_ONCE
(
delalloc
))
...
...
fs/xfs/xfs_buf_item.c
View file @
f2bdfda9
...
@@ -957,6 +957,7 @@ xfs_buf_item_free(
...
@@ -957,6 +957,7 @@ xfs_buf_item_free(
xfs_buf_log_item_t
*
bip
)
xfs_buf_log_item_t
*
bip
)
{
{
xfs_buf_item_free_format
(
bip
);
xfs_buf_item_free_format
(
bip
);
kmem_free
(
bip
->
bli_item
.
li_lv_shadow
);
kmem_zone_free
(
xfs_buf_item_zone
,
bip
);
kmem_zone_free
(
xfs_buf_item_zone
,
bip
);
}
}
...
...
fs/xfs/xfs_dquot.c
View file @
f2bdfda9
...
@@ -74,6 +74,7 @@ xfs_qm_dqdestroy(
...
@@ -74,6 +74,7 @@ xfs_qm_dqdestroy(
{
{
ASSERT
(
list_empty
(
&
dqp
->
q_lru
));
ASSERT
(
list_empty
(
&
dqp
->
q_lru
));
kmem_free
(
dqp
->
q_logitem
.
qli_item
.
li_lv_shadow
);
mutex_destroy
(
&
dqp
->
q_qlock
);
mutex_destroy
(
&
dqp
->
q_qlock
);
XFS_STATS_DEC
(
dqp
->
q_mount
,
xs_qm_dquot
);
XFS_STATS_DEC
(
dqp
->
q_mount
,
xs_qm_dquot
);
...
...
fs/xfs/xfs_dquot_item.c
View file @
f2bdfda9
...
@@ -370,6 +370,8 @@ xfs_qm_qoffend_logitem_committed(
...
@@ -370,6 +370,8 @@ xfs_qm_qoffend_logitem_committed(
spin_lock
(
&
ailp
->
xa_lock
);
spin_lock
(
&
ailp
->
xa_lock
);
xfs_trans_ail_delete
(
ailp
,
&
qfs
->
qql_item
,
SHUTDOWN_LOG_IO_ERROR
);
xfs_trans_ail_delete
(
ailp
,
&
qfs
->
qql_item
,
SHUTDOWN_LOG_IO_ERROR
);
kmem_free
(
qfs
->
qql_item
.
li_lv_shadow
);
kmem_free
(
lip
->
li_lv_shadow
);
kmem_free
(
qfs
);
kmem_free
(
qfs
);
kmem_free
(
qfe
);
kmem_free
(
qfe
);
return
(
xfs_lsn_t
)
-
1
;
return
(
xfs_lsn_t
)
-
1
;
...
...
fs/xfs/xfs_extfree_item.c
View file @
f2bdfda9
...
@@ -40,6 +40,7 @@ void
...
@@ -40,6 +40,7 @@ void
xfs_efi_item_free
(
xfs_efi_item_free
(
struct
xfs_efi_log_item
*
efip
)
struct
xfs_efi_log_item
*
efip
)
{
{
kmem_free
(
efip
->
efi_item
.
li_lv_shadow
);
if
(
efip
->
efi_format
.
efi_nextents
>
XFS_EFI_MAX_FAST_EXTENTS
)
if
(
efip
->
efi_format
.
efi_nextents
>
XFS_EFI_MAX_FAST_EXTENTS
)
kmem_free
(
efip
);
kmem_free
(
efip
);
else
else
...
@@ -300,6 +301,7 @@ static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
...
@@ -300,6 +301,7 @@ static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
STATIC
void
STATIC
void
xfs_efd_item_free
(
struct
xfs_efd_log_item
*
efdp
)
xfs_efd_item_free
(
struct
xfs_efd_log_item
*
efdp
)
{
{
kmem_free
(
efdp
->
efd_item
.
li_lv_shadow
);
if
(
efdp
->
efd_format
.
efd_nextents
>
XFS_EFD_MAX_FAST_EXTENTS
)
if
(
efdp
->
efd_format
.
efd_nextents
>
XFS_EFD_MAX_FAST_EXTENTS
)
kmem_free
(
efdp
);
kmem_free
(
efdp
);
else
else
...
...
fs/xfs/xfs_file.c
View file @
f2bdfda9
...
@@ -327,7 +327,7 @@ xfs_file_dio_aio_read(
...
@@ -327,7 +327,7 @@ xfs_file_dio_aio_read(
return
ret
;
return
ret
;
}
}
STATIC
ssize_t
static
noinline
ssize_t
xfs_file_dax_read
(
xfs_file_dax_read
(
struct
kiocb
*
iocb
,
struct
kiocb
*
iocb
,
struct
iov_iter
*
to
)
struct
iov_iter
*
to
)
...
@@ -706,7 +706,7 @@ xfs_file_dio_aio_write(
...
@@ -706,7 +706,7 @@ xfs_file_dio_aio_write(
return
ret
;
return
ret
;
}
}
STATIC
ssize_t
static
noinline
ssize_t
xfs_file_dax_write
(
xfs_file_dax_write
(
struct
kiocb
*
iocb
,
struct
kiocb
*
iocb
,
struct
iov_iter
*
from
)
struct
iov_iter
*
from
)
...
...
fs/xfs/xfs_inode_item.c
View file @
f2bdfda9
...
@@ -651,6 +651,7 @@ void
...
@@ -651,6 +651,7 @@ void
xfs_inode_item_destroy
(
xfs_inode_item_destroy
(
xfs_inode_t
*
ip
)
xfs_inode_t
*
ip
)
{
{
kmem_free
(
ip
->
i_itemp
->
ili_item
.
li_lv_shadow
);
kmem_zone_free
(
xfs_ili_zone
,
ip
->
i_itemp
);
kmem_zone_free
(
xfs_ili_zone
,
ip
->
i_itemp
);
}
}
...
...
fs/xfs/xfs_log_cil.c
View file @
f2bdfda9
...
@@ -78,6 +78,157 @@ xlog_cil_init_post_recovery(
...
@@ -78,6 +78,157 @@ xlog_cil_init_post_recovery(
log
->
l_cilp
->
xc_ctx
->
sequence
=
1
;
log
->
l_cilp
->
xc_ctx
->
sequence
=
1
;
}
}
static
inline
int
xlog_cil_iovec_space
(
uint
niovecs
)
{
return
round_up
((
sizeof
(
struct
xfs_log_vec
)
+
niovecs
*
sizeof
(
struct
xfs_log_iovec
)),
sizeof
(
uint64_t
));
}
/*
* Allocate or pin log vector buffers for CIL insertion.
*
* The CIL currently uses disposable buffers for copying a snapshot of the
* modified items into the log during a push. The biggest problem with this is
* the requirement to allocate the disposable buffer during the commit if:
* a) does not exist; or
* b) it is too small
*
* If we do this allocation within xlog_cil_insert_format_items(), it is done
* under the xc_ctx_lock, which means that a CIL push cannot occur during
* the memory allocation. This means that we have a potential deadlock situation
* under low memory conditions when we have lots of dirty metadata pinned in
* the CIL and we need a CIL commit to occur to free memory.
*
* To avoid this, we need to move the memory allocation outside the
* xc_ctx_lock, but because the log vector buffers are disposable, that opens
* up a TOCTOU race condition w.r.t. the CIL committing and removing the log
* vector buffers between the check and the formatting of the item into the
* log vector buffer within the xc_ctx_lock.
*
* Because the log vector buffer needs to be unchanged during the CIL push
* process, we cannot share the buffer between the transaction commit (which
* modifies the buffer) and the CIL push context that is writing the changes
* into the log. This means skipping preallocation of buffer space is
* unreliable, but we most definitely do not want to be allocating and freeing
* buffers unnecessarily during commits when overwrites can be done safely.
*
* The simplest solution to this problem is to allocate a shadow buffer when a
* log item is committed for the second time, and then to only use this buffer
* if necessary. The buffer can remain attached to the log item until such time
* it is needed, and this is the buffer that is reallocated to match the size of
* the incoming modification. Then during the formatting of the item we can swap
* the active buffer with the new one if we can't reuse the existing buffer. We
* don't free the old buffer as it may be reused on the next modification if
* it's size is right, otherwise we'll free and reallocate it at that point.
*
* This function builds a vector for the changes in each log item in the
* transaction. It then works out the length of the buffer needed for each log
* item, allocates them and attaches the vector to the log item in preparation
* for the formatting step which occurs under the xc_ctx_lock.
*
* While this means the memory footprint goes up, it avoids the repeated
* alloc/free pattern that repeated modifications of an item would otherwise
* cause, and hence minimises the CPU overhead of such behaviour.
*/
static
void
xlog_cil_alloc_shadow_bufs
(
struct
xlog
*
log
,
struct
xfs_trans
*
tp
)
{
struct
xfs_log_item_desc
*
lidp
;
list_for_each_entry
(
lidp
,
&
tp
->
t_items
,
lid_trans
)
{
struct
xfs_log_item
*
lip
=
lidp
->
lid_item
;
struct
xfs_log_vec
*
lv
;
int
niovecs
=
0
;
int
nbytes
=
0
;
int
buf_size
;
bool
ordered
=
false
;
/* Skip items which aren't dirty in this transaction. */
if
(
!
(
lidp
->
lid_flags
&
XFS_LID_DIRTY
))
continue
;
/* get number of vecs and size of data to be stored */
lip
->
li_ops
->
iop_size
(
lip
,
&
niovecs
,
&
nbytes
);
/*
* Ordered items need to be tracked but we do not wish to write
* them. We need a logvec to track the object, but we do not
* need an iovec or buffer to be allocated for copying data.
*/
if
(
niovecs
==
XFS_LOG_VEC_ORDERED
)
{
ordered
=
true
;
niovecs
=
0
;
nbytes
=
0
;
}
/*
* We 64-bit align the length of each iovec so that the start
* of the next one is naturally aligned. We'll need to
* account for that slack space here. Then round nbytes up
* to 64-bit alignment so that the initial buffer alignment is
* easy to calculate and verify.
*/
nbytes
+=
niovecs
*
sizeof
(
uint64_t
);
nbytes
=
round_up
(
nbytes
,
sizeof
(
uint64_t
));
/*
* The data buffer needs to start 64-bit aligned, so round up
* that space to ensure we can align it appropriately and not
* overrun the buffer.
*/
buf_size
=
nbytes
+
xlog_cil_iovec_space
(
niovecs
);
/*
* if we have no shadow buffer, or it is too small, we need to
* reallocate it.
*/
if
(
!
lip
->
li_lv_shadow
||
buf_size
>
lip
->
li_lv_shadow
->
lv_size
)
{
/*
* We free and allocate here as a realloc would copy
* unecessary data. We don't use kmem_zalloc() for the
* same reason - we don't need to zero the data area in
* the buffer, only the log vector header and the iovec
* storage.
*/
kmem_free
(
lip
->
li_lv_shadow
);
lv
=
kmem_alloc
(
buf_size
,
KM_SLEEP
|
KM_NOFS
);
memset
(
lv
,
0
,
xlog_cil_iovec_space
(
niovecs
));
lv
->
lv_item
=
lip
;
lv
->
lv_size
=
buf_size
;
if
(
ordered
)
lv
->
lv_buf_len
=
XFS_LOG_VEC_ORDERED
;
else
lv
->
lv_iovecp
=
(
struct
xfs_log_iovec
*
)
&
lv
[
1
];
lip
->
li_lv_shadow
=
lv
;
}
else
{
/* same or smaller, optimise common overwrite case */
lv
=
lip
->
li_lv_shadow
;
if
(
ordered
)
lv
->
lv_buf_len
=
XFS_LOG_VEC_ORDERED
;
else
lv
->
lv_buf_len
=
0
;
lv
->
lv_bytes
=
0
;
lv
->
lv_next
=
NULL
;
}
/* Ensure the lv is set up according to ->iop_size */
lv
->
lv_niovecs
=
niovecs
;
/* The allocated data region lies beyond the iovec region */
lv
->
lv_buf
=
(
char
*
)
lv
+
xlog_cil_iovec_space
(
niovecs
);
}
}
/*
/*
* Prepare the log item for insertion into the CIL. Calculate the difference in
* Prepare the log item for insertion into the CIL. Calculate the difference in
* log space and vectors it will consume, and if it is a new item pin it as
* log space and vectors it will consume, and if it is a new item pin it as
...
@@ -100,16 +251,19 @@ xfs_cil_prepare_item(
...
@@ -100,16 +251,19 @@ xfs_cil_prepare_item(
/*
/*
* If there is no old LV, this is the first time we've seen the item in
* If there is no old LV, this is the first time we've seen the item in
* this CIL context and so we need to pin it. If we are replacing the
* this CIL context and so we need to pin it. If we are replacing the
* old_lv, then remove the space it accounts for and free it.
* old_lv, then remove the space it accounts for and make it the shadow
* buffer for later freeing. In both cases we are now switching to the
* shadow buffer, so update the the pointer to it appropriately.
*/
*/
if
(
!
old_lv
)
if
(
!
old_lv
)
{
lv
->
lv_item
->
li_ops
->
iop_pin
(
lv
->
lv_item
);
lv
->
lv_item
->
li_ops
->
iop_pin
(
lv
->
lv_item
);
else
if
(
old_lv
!=
lv
)
{
lv
->
lv_item
->
li_lv_shadow
=
NULL
;
}
else
if
(
old_lv
!=
lv
)
{
ASSERT
(
lv
->
lv_buf_len
!=
XFS_LOG_VEC_ORDERED
);
ASSERT
(
lv
->
lv_buf_len
!=
XFS_LOG_VEC_ORDERED
);
*
diff_len
-=
old_lv
->
lv_bytes
;
*
diff_len
-=
old_lv
->
lv_bytes
;
*
diff_iovecs
-=
old_lv
->
lv_niovecs
;
*
diff_iovecs
-=
old_lv
->
lv_niovecs
;
kmem_free
(
old_lv
)
;
lv
->
lv_item
->
li_lv_shadow
=
old_lv
;
}
}
/* attach new log vector to log item */
/* attach new log vector to log item */
...
@@ -133,11 +287,13 @@ xfs_cil_prepare_item(
...
@@ -133,11 +287,13 @@ xfs_cil_prepare_item(
* write it out asynchronously without needing to relock the object that was
* write it out asynchronously without needing to relock the object that was
* modified at the time it gets written into the iclog.
* modified at the time it gets written into the iclog.
*
*
* This function builds a vector for the changes in each log item in the
* This function takes the prepared log vectors attached to each log item, and
* transaction. It then works out the length of the buffer needed for each log
* formats the changes into the log vector buffer. The buffer it uses is
* item, allocates them and formats the vector for the item into the buffer.
* dependent on the current state of the vector in the CIL - the shadow lv is
* The buffer is then attached to the log item are then inserted into the
* guaranteed to be large enough for the current modification, but we will only
* Committed Item List for tracking until the next checkpoint is written out.
* use that if we can't reuse the existing lv. If we can't reuse the existing
* lv, then simple swap it out for the shadow lv. We don't free it - that is
* done lazily either by th enext modification or the freeing of the log item.
*
*
* We don't set up region headers during this process; we simply copy the
* We don't set up region headers during this process; we simply copy the
* regions into the flat buffer. We can do this because we still have to do a
* regions into the flat buffer. We can do this because we still have to do a
...
@@ -170,59 +326,29 @@ xlog_cil_insert_format_items(
...
@@ -170,59 +326,29 @@ xlog_cil_insert_format_items(
list_for_each_entry
(
lidp
,
&
tp
->
t_items
,
lid_trans
)
{
list_for_each_entry
(
lidp
,
&
tp
->
t_items
,
lid_trans
)
{
struct
xfs_log_item
*
lip
=
lidp
->
lid_item
;
struct
xfs_log_item
*
lip
=
lidp
->
lid_item
;
struct
xfs_log_vec
*
lv
;
struct
xfs_log_vec
*
lv
;
struct
xfs_log_vec
*
old_lv
;
struct
xfs_log_vec
*
old_lv
=
NULL
;
int
niovecs
=
0
;
struct
xfs_log_vec
*
shadow
;
int
nbytes
=
0
;
int
buf_size
;
bool
ordered
=
false
;
bool
ordered
=
false
;
/* Skip items which aren't dirty in this transaction. */
/* Skip items which aren't dirty in this transaction. */
if
(
!
(
lidp
->
lid_flags
&
XFS_LID_DIRTY
))
if
(
!
(
lidp
->
lid_flags
&
XFS_LID_DIRTY
))
continue
;
continue
;
/* get number of vecs and size of data to be stored */
lip
->
li_ops
->
iop_size
(
lip
,
&
niovecs
,
&
nbytes
);
/* Skip items that do not have any vectors for writing */
if
(
!
niovecs
)
continue
;
/*
/*
* Ordered items need to be tracked but we do not wish to write
* The formatting size information is already attached to
* them. We need a logvec to track the object, but we do not
* the shadow lv on the log item.
* need an iovec or buffer to be allocated for copying data.
*/
*/
if
(
niovecs
==
XFS_LOG_VEC_ORDERED
)
{
shadow
=
lip
->
li_lv_shadow
;
if
(
shadow
->
lv_buf_len
==
XFS_LOG_VEC_ORDERED
)
ordered
=
true
;
ordered
=
true
;
niovecs
=
0
;
nbytes
=
0
;
}
/*
/* Skip items that do not have any vectors for writing */
* We 64-bit align the length of each iovec so that the start
if
(
!
shadow
->
lv_niovecs
&&
!
ordered
)
* of the next one is naturally aligned. We'll need to
continue
;
* account for that slack space here. Then round nbytes up
* to 64-bit alignment so that the initial buffer alignment is
* easy to calculate and verify.
*/
nbytes
+=
niovecs
*
sizeof
(
uint64_t
);
nbytes
=
round_up
(
nbytes
,
sizeof
(
uint64_t
));
/* grab the old item if it exists for reservation accounting */
old_lv
=
lip
->
li_lv
;
/*
* The data buffer needs to start 64-bit aligned, so round up
* that space to ensure we can align it appropriately and not
* overrun the buffer.
*/
buf_size
=
nbytes
+
round_up
((
sizeof
(
struct
xfs_log_vec
)
+
niovecs
*
sizeof
(
struct
xfs_log_iovec
)),
sizeof
(
uint64_t
));
/* compare to existing item size */
/* compare to existing item size */
if
(
lip
->
li_lv
&&
buf_size
<=
lip
->
li_lv
->
lv_size
)
{
old_lv
=
lip
->
li_lv
;
if
(
lip
->
li_lv
&&
shadow
->
lv_size
<=
lip
->
li_lv
->
lv_size
)
{
/* same or smaller, optimise common overwrite case */
/* same or smaller, optimise common overwrite case */
lv
=
lip
->
li_lv
;
lv
=
lip
->
li_lv
;
lv
->
lv_next
=
NULL
;
lv
->
lv_next
=
NULL
;
...
@@ -236,32 +362,29 @@ xlog_cil_insert_format_items(
...
@@ -236,32 +362,29 @@ xlog_cil_insert_format_items(
*/
*/
*
diff_iovecs
-=
lv
->
lv_niovecs
;
*
diff_iovecs
-=
lv
->
lv_niovecs
;
*
diff_len
-=
lv
->
lv_bytes
;
*
diff_len
-=
lv
->
lv_bytes
;
/* Ensure the lv is set up according to ->iop_size */
lv
->
lv_niovecs
=
shadow
->
lv_niovecs
;
/* reset the lv buffer information for new formatting */
lv
->
lv_buf_len
=
0
;
lv
->
lv_bytes
=
0
;
lv
->
lv_buf
=
(
char
*
)
lv
+
xlog_cil_iovec_space
(
lv
->
lv_niovecs
);
}
else
{
}
else
{
/*
allocate new data chunk
*/
/*
switch to shadow buffer!
*/
lv
=
kmem_zalloc
(
buf_size
,
KM_SLEEP
|
KM_NOFS
)
;
lv
=
shadow
;
lv
->
lv_item
=
lip
;
lv
->
lv_item
=
lip
;
lv
->
lv_size
=
buf_size
;
if
(
ordered
)
{
if
(
ordered
)
{
/* track as an ordered logvec */
/* track as an ordered logvec */
ASSERT
(
lip
->
li_lv
==
NULL
);
ASSERT
(
lip
->
li_lv
==
NULL
);
lv
->
lv_buf_len
=
XFS_LOG_VEC_ORDERED
;
goto
insert
;
goto
insert
;
}
}
lv
->
lv_iovecp
=
(
struct
xfs_log_iovec
*
)
&
lv
[
1
];
}
}
/* Ensure the lv is set up according to ->iop_size */
lv
->
lv_niovecs
=
niovecs
;
/* The allocated data region lies beyond the iovec region */
lv
->
lv_buf_len
=
0
;
lv
->
lv_bytes
=
0
;
lv
->
lv_buf
=
(
char
*
)
lv
+
buf_size
-
nbytes
;
ASSERT
(
IS_ALIGNED
((
unsigned
long
)
lv
->
lv_buf
,
sizeof
(
uint64_t
)));
ASSERT
(
IS_ALIGNED
((
unsigned
long
)
lv
->
lv_buf
,
sizeof
(
uint64_t
)));
lip
->
li_ops
->
iop_format
(
lip
,
lv
);
lip
->
li_ops
->
iop_format
(
lip
,
lv
);
insert:
insert:
ASSERT
(
lv
->
lv_buf_len
<=
nbytes
);
xfs_cil_prepare_item
(
log
,
lv
,
old_lv
,
diff_len
,
diff_iovecs
);
xfs_cil_prepare_item
(
log
,
lv
,
old_lv
,
diff_len
,
diff_iovecs
);
}
}
}
}
...
@@ -783,6 +906,13 @@ xfs_log_commit_cil(
...
@@ -783,6 +906,13 @@ xfs_log_commit_cil(
struct
xlog
*
log
=
mp
->
m_log
;
struct
xlog
*
log
=
mp
->
m_log
;
struct
xfs_cil
*
cil
=
log
->
l_cilp
;
struct
xfs_cil
*
cil
=
log
->
l_cilp
;
/*
* Do all necessary memory allocation before we lock the CIL.
* This ensures the allocation does not deadlock with a CIL
* push in memory reclaim (e.g. from kswapd).
*/
xlog_cil_alloc_shadow_bufs
(
log
,
tp
);
/* lock out background commit */
/* lock out background commit */
down_read
(
&
cil
->
xc_ctx_lock
);
down_read
(
&
cil
->
xc_ctx_lock
);
...
...
fs/xfs/xfs_super.c
View file @
f2bdfda9
...
@@ -1573,10 +1573,6 @@ xfs_fs_fill_super(
...
@@ -1573,10 +1573,6 @@ xfs_fs_fill_super(
}
}
}
}
if
(
xfs_sb_version_hassparseinodes
(
&
mp
->
m_sb
))
xfs_alert
(
mp
,
"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!"
);
error
=
xfs_mountfs
(
mp
);
error
=
xfs_mountfs
(
mp
);
if
(
error
)
if
(
error
)
goto
out_filestream_unmount
;
goto
out_filestream_unmount
;
...
...
fs/xfs/xfs_trans.h
View file @
f2bdfda9
...
@@ -52,6 +52,7 @@ typedef struct xfs_log_item {
...
@@ -52,6 +52,7 @@ typedef struct xfs_log_item {
/* delayed logging */
/* delayed logging */
struct
list_head
li_cil
;
/* CIL pointers */
struct
list_head
li_cil
;
/* CIL pointers */
struct
xfs_log_vec
*
li_lv
;
/* active log vector */
struct
xfs_log_vec
*
li_lv
;
/* active log vector */
struct
xfs_log_vec
*
li_lv_shadow
;
/* standby vector */
xfs_lsn_t
li_seq
;
/* CIL commit seq */
xfs_lsn_t
li_seq
;
/* CIL commit seq */
}
xfs_log_item_t
;
}
xfs_log_item_t
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment