Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
9b7fad20
Commit
9b7fad20
authored
Jun 21, 2016
by
Dave Chinner
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'xfs-4.8-iomap-write' into for-next
parents
07931b7b
3c2bdc91
Changes
10
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
367 additions
and
776 deletions
+367
-776
fs/xfs/Kconfig
fs/xfs/Kconfig
+1
-0
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.c
+19
-264
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_bmap_util.c
+130
-213
fs/xfs/xfs_file.c
fs/xfs/xfs_file.c
+12
-181
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode.h
+2
-1
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.c
+171
-0
fs/xfs/xfs_iomap.h
fs/xfs/xfs_iomap.h
+7
-0
fs/xfs/xfs_iops.c
fs/xfs/xfs_iops.c
+22
-91
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_pnfs.c
+0
-26
fs/xfs/xfs_trace.h
fs/xfs/xfs_trace.h
+3
-0
No files found.
fs/xfs/Kconfig
View file @
9b7fad20
...
...
@@ -4,6 +4,7 @@ config XFS_FS
depends on (64BIT || LBDAF)
select EXPORTFS
select LIBCRC32C
select FS_IOMAP
help
XFS is a high performance journaling filesystem which originated
on the SGI IRIX platform. It is completely multi-threaded, can
...
...
fs/xfs/xfs_aops.c
View file @
9b7fad20
...
...
@@ -1143,6 +1143,8 @@ __xfs_get_blocks(
ssize_t
size
;
int
new
=
0
;
BUG_ON
(
create
&&
!
direct
);
if
(
XFS_FORCED_SHUTDOWN
(
mp
))
return
-
EIO
;
...
...
@@ -1150,22 +1152,14 @@ __xfs_get_blocks(
ASSERT
(
bh_result
->
b_size
>=
(
1
<<
inode
->
i_blkbits
));
size
=
bh_result
->
b_size
;
if
(
!
create
&&
direct
&&
offset
>=
i_size_read
(
inode
))
if
(
!
create
&&
offset
>=
i_size_read
(
inode
))
return
0
;
/*
* Direct I/O is usually done on preallocated files, so try getting
* a block mapping without an exclusive lock first. For buffered
* writes we already have the exclusive iolock anyway, so avoiding
* a lock roundtrip here by taking the ilock exclusive from the
* beginning is a useful micro optimization.
* a block mapping without an exclusive lock first.
*/
if
(
create
&&
!
direct
)
{
lockmode
=
XFS_ILOCK_EXCL
;
xfs_ilock
(
ip
,
lockmode
);
}
else
{
lockmode
=
xfs_ilock_data_map_shared
(
ip
);
}
ASSERT
(
offset
<=
mp
->
m_super
->
s_maxbytes
);
if
(
offset
+
size
>
mp
->
m_super
->
s_maxbytes
)
...
...
@@ -1184,7 +1178,6 @@ __xfs_get_blocks(
(
imap
.
br_startblock
==
HOLESTARTBLOCK
||
imap
.
br_startblock
==
DELAYSTARTBLOCK
)
||
(
IS_DAX
(
inode
)
&&
ISUNWRITTEN
(
&
imap
))))
{
if
(
direct
||
xfs_get_extsz_hint
(
ip
))
{
/*
* xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return.
...
...
@@ -1198,23 +1191,6 @@ __xfs_get_blocks(
return
error
;
new
=
1
;
}
else
{
/*
* Delalloc reservations do not require a transaction,
* we can go on without dropping the lock here. If we
* are allocating a new delalloc block, make sure that
* we set the new flag so that we mark the buffer new so
* that we know that it is newly allocated if the write
* fails.
*/
if
(
nimaps
&&
imap
.
br_startblock
==
HOLESTARTBLOCK
)
new
=
1
;
error
=
xfs_iomap_write_delay
(
ip
,
offset
,
size
,
&
imap
);
if
(
error
)
goto
out_unlock
;
xfs_iunlock
(
ip
,
lockmode
);
}
trace_xfs_get_blocks_alloc
(
ip
,
offset
,
size
,
ISUNWRITTEN
(
&
imap
)
?
XFS_IO_UNWRITTEN
:
XFS_IO_DELALLOC
,
&
imap
);
...
...
@@ -1235,9 +1211,7 @@ __xfs_get_blocks(
}
/* trim mapping down to size requested */
if
(
direct
||
size
>
(
1
<<
inode
->
i_blkbits
))
xfs_map_trim_size
(
inode
,
iblock
,
bh_result
,
&
imap
,
offset
,
size
);
xfs_map_trim_size
(
inode
,
iblock
,
bh_result
,
&
imap
,
offset
,
size
);
/*
* For unwritten extents do not report a disk address in the buffered
...
...
@@ -1250,7 +1224,7 @@ __xfs_get_blocks(
if
(
ISUNWRITTEN
(
&
imap
))
set_buffer_unwritten
(
bh_result
);
/* direct IO needs special help */
if
(
create
&&
direct
)
{
if
(
create
)
{
if
(
dax_fault
)
ASSERT
(
!
ISUNWRITTEN
(
&
imap
));
else
...
...
@@ -1279,14 +1253,7 @@ __xfs_get_blocks(
(
new
||
ISUNWRITTEN
(
&
imap
))))
set_buffer_new
(
bh_result
);
if
(
imap
.
br_startblock
==
DELAYSTARTBLOCK
)
{
BUG_ON
(
direct
);
if
(
create
)
{
set_buffer_uptodate
(
bh_result
);
set_buffer_mapped
(
bh_result
);
set_buffer_delay
(
bh_result
);
}
}
BUG_ON
(
direct
&&
imap
.
br_startblock
==
DELAYSTARTBLOCK
);
return
0
;
...
...
@@ -1427,216 +1394,6 @@ xfs_vm_direct_IO(
xfs_get_blocks_direct
,
endio
,
NULL
,
flags
);
}
/*
* Punch out the delalloc blocks we have already allocated.
*
* Don't bother with xfs_setattr given that nothing can have made it to disk yet
* as the page is still locked at this point.
*/
STATIC
void
xfs_vm_kill_delalloc_range
(
struct
inode
*
inode
,
loff_t
start
,
loff_t
end
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
xfs_fileoff_t
start_fsb
;
xfs_fileoff_t
end_fsb
;
int
error
;
start_fsb
=
XFS_B_TO_FSB
(
ip
->
i_mount
,
start
);
end_fsb
=
XFS_B_TO_FSB
(
ip
->
i_mount
,
end
);
if
(
end_fsb
<=
start_fsb
)
return
;
xfs_ilock
(
ip
,
XFS_ILOCK_EXCL
);
error
=
xfs_bmap_punch_delalloc_range
(
ip
,
start_fsb
,
end_fsb
-
start_fsb
);
if
(
error
)
{
/* something screwed, just bail */
if
(
!
XFS_FORCED_SHUTDOWN
(
ip
->
i_mount
))
{
xfs_alert
(
ip
->
i_mount
,
"xfs_vm_write_failed: unable to clean up ino %lld"
,
ip
->
i_ino
);
}
}
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
}
STATIC
void
xfs_vm_write_failed
(
struct
inode
*
inode
,
struct
page
*
page
,
loff_t
pos
,
unsigned
len
)
{
loff_t
block_offset
;
loff_t
block_start
;
loff_t
block_end
;
loff_t
from
=
pos
&
(
PAGE_SIZE
-
1
);
loff_t
to
=
from
+
len
;
struct
buffer_head
*
bh
,
*
head
;
struct
xfs_mount
*
mp
=
XFS_I
(
inode
)
->
i_mount
;
/*
* The request pos offset might be 32 or 64 bit, this is all fine
* on 64-bit platform. However, for 64-bit pos request on 32-bit
* platform, the high 32-bit will be masked off if we evaluate the
* block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
* 0xfffff000 as an unsigned long, hence the result is incorrect
* which could cause the following ASSERT failed in most cases.
* In order to avoid this, we can evaluate the block_offset of the
* start of the page by using shifts rather than masks the mismatch
* problem.
*/
block_offset
=
(
pos
>>
PAGE_SHIFT
)
<<
PAGE_SHIFT
;
ASSERT
(
block_offset
+
from
==
pos
);
head
=
page_buffers
(
page
);
block_start
=
0
;
for
(
bh
=
head
;
bh
!=
head
||
!
block_start
;
bh
=
bh
->
b_this_page
,
block_start
=
block_end
,
block_offset
+=
bh
->
b_size
)
{
block_end
=
block_start
+
bh
->
b_size
;
/* skip buffers before the write */
if
(
block_end
<=
from
)
continue
;
/* if the buffer is after the write, we're done */
if
(
block_start
>=
to
)
break
;
/*
* Process delalloc and unwritten buffers beyond EOF. We can
* encounter unwritten buffers in the event that a file has
* post-EOF unwritten extents and an extending write happens to
* fail (e.g., an unaligned write that also involves a delalloc
* to the same page).
*/
if
(
!
buffer_delay
(
bh
)
&&
!
buffer_unwritten
(
bh
))
continue
;
if
(
!
xfs_mp_fail_writes
(
mp
)
&&
!
buffer_new
(
bh
)
&&
block_offset
<
i_size_read
(
inode
))
continue
;
if
(
buffer_delay
(
bh
))
xfs_vm_kill_delalloc_range
(
inode
,
block_offset
,
block_offset
+
bh
->
b_size
);
/*
* This buffer does not contain data anymore. make sure anyone
* who finds it knows that for certain.
*/
clear_buffer_delay
(
bh
);
clear_buffer_uptodate
(
bh
);
clear_buffer_mapped
(
bh
);
clear_buffer_new
(
bh
);
clear_buffer_dirty
(
bh
);
clear_buffer_unwritten
(
bh
);
}
}
/*
* This used to call block_write_begin(), but it unlocks and releases the page
* on error, and we need that page to be able to punch stale delalloc blocks out
* on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
* the appropriate point.
*/
STATIC
int
xfs_vm_write_begin
(
struct
file
*
file
,
struct
address_space
*
mapping
,
loff_t
pos
,
unsigned
len
,
unsigned
flags
,
struct
page
**
pagep
,
void
**
fsdata
)
{
pgoff_t
index
=
pos
>>
PAGE_SHIFT
;
struct
page
*
page
;
int
status
;
struct
xfs_mount
*
mp
=
XFS_I
(
mapping
->
host
)
->
i_mount
;
ASSERT
(
len
<=
PAGE_SIZE
);
page
=
grab_cache_page_write_begin
(
mapping
,
index
,
flags
);
if
(
!
page
)
return
-
ENOMEM
;
status
=
__block_write_begin
(
page
,
pos
,
len
,
xfs_get_blocks
);
if
(
xfs_mp_fail_writes
(
mp
))
status
=
-
EIO
;
if
(
unlikely
(
status
))
{
struct
inode
*
inode
=
mapping
->
host
;
size_t
isize
=
i_size_read
(
inode
);
xfs_vm_write_failed
(
inode
,
page
,
pos
,
len
);
unlock_page
(
page
);
/*
* If the write is beyond EOF, we only want to kill blocks
* allocated in this write, not blocks that were previously
* written successfully.
*/
if
(
xfs_mp_fail_writes
(
mp
))
isize
=
0
;
if
(
pos
+
len
>
isize
)
{
ssize_t
start
=
max_t
(
ssize_t
,
pos
,
isize
);
truncate_pagecache_range
(
inode
,
start
,
pos
+
len
);
}
put_page
(
page
);
page
=
NULL
;
}
*
pagep
=
page
;
return
status
;
}
/*
* On failure, we only need to kill delalloc blocks beyond EOF in the range of
* this specific write because they will never be written. Previous writes
* beyond EOF where block allocation succeeded do not need to be trashed, so
* only new blocks from this write should be trashed. For blocks within
* EOF, generic_write_end() zeros them so they are safe to leave alone and be
* written with all the other valid data.
*/
STATIC
int
xfs_vm_write_end
(
struct
file
*
file
,
struct
address_space
*
mapping
,
loff_t
pos
,
unsigned
len
,
unsigned
copied
,
struct
page
*
page
,
void
*
fsdata
)
{
int
ret
;
ASSERT
(
len
<=
PAGE_SIZE
);
ret
=
generic_write_end
(
file
,
mapping
,
pos
,
len
,
copied
,
page
,
fsdata
);
if
(
unlikely
(
ret
<
len
))
{
struct
inode
*
inode
=
mapping
->
host
;
size_t
isize
=
i_size_read
(
inode
);
loff_t
to
=
pos
+
len
;
if
(
to
>
isize
)
{
/* only kill blocks in this write beyond EOF */
if
(
pos
>
isize
)
isize
=
pos
;
xfs_vm_kill_delalloc_range
(
inode
,
isize
,
to
);
truncate_pagecache_range
(
inode
,
isize
,
to
);
}
}
return
ret
;
}
STATIC
sector_t
xfs_vm_bmap
(
struct
address_space
*
mapping
,
...
...
@@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = {
.
set_page_dirty
=
xfs_vm_set_page_dirty
,
.
releasepage
=
xfs_vm_releasepage
,
.
invalidatepage
=
xfs_vm_invalidatepage
,
.
write_begin
=
xfs_vm_write_begin
,
.
write_end
=
xfs_vm_write_end
,
.
bmap
=
xfs_vm_bmap
,
.
direct_IO
=
xfs_vm_direct_IO
,
.
migratepage
=
buffer_migrate_page
,
...
...
fs/xfs/xfs_bmap_util.c
View file @
9b7fad20
This diff is collapsed.
Click to expand it.
fs/xfs/xfs_file.c
View file @
9b7fad20
...
...
@@ -37,6 +37,7 @@
#include "xfs_log.h"
#include "xfs_icache.h"
#include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include <linux/dcache.h>
#include <linux/falloc.h>
...
...
@@ -80,61 +81,17 @@ xfs_rw_ilock_demote(
}
/*
* xfs_iozero clears the specified range supplied via the page cache (except in
* the DAX case). Writes through the page cache will allocate blocks over holes,
* though the callers usually map the holes first and avoid them. If a block is
* not completely zeroed, then it will be read from disk before being partially
* zeroed.
*
* In the DAX case, we can just directly write to the underlying pages. This
* will not allocate blocks, but will avoid holes and unwritten extents and so
* not do unnecessary work.
* Clear the specified ranges to zero through either the pagecache or DAX.
* Holes and unwritten extents will be left as-is as they already are zeroed.
*/
int
xfs_iozero
(
struct
xfs_inode
*
ip
,
/* inode */
loff_t
pos
,
/* offset in file */
size_t
count
)
/* size of data to zero */
xfs_zero_range
(
struct
xfs_inode
*
ip
,
xfs_off_t
pos
,
xfs_off_t
count
,
bool
*
did_zero
)
{
struct
page
*
page
;
struct
address_space
*
mapping
;
int
status
=
0
;
mapping
=
VFS_I
(
ip
)
->
i_mapping
;
do
{
unsigned
offset
,
bytes
;
void
*
fsdata
;
offset
=
(
pos
&
(
PAGE_SIZE
-
1
));
/* Within page */
bytes
=
PAGE_SIZE
-
offset
;
if
(
bytes
>
count
)
bytes
=
count
;
if
(
IS_DAX
(
VFS_I
(
ip
)))
{
status
=
dax_zero_page_range
(
VFS_I
(
ip
),
pos
,
bytes
,
xfs_get_blocks_direct
);
if
(
status
)
break
;
}
else
{
status
=
pagecache_write_begin
(
NULL
,
mapping
,
pos
,
bytes
,
AOP_FLAG_UNINTERRUPTIBLE
,
&
page
,
&
fsdata
);
if
(
status
)
break
;
zero_user
(
page
,
offset
,
bytes
);
status
=
pagecache_write_end
(
NULL
,
mapping
,
pos
,
bytes
,
bytes
,
page
,
fsdata
);
WARN_ON
(
status
<=
0
);
/* can't return less than zero! */
status
=
0
;
}
pos
+=
bytes
;
count
-=
bytes
;
}
while
(
count
);
return
status
;
return
iomap_zero_range
(
VFS_I
(
ip
),
pos
,
count
,
NULL
,
&
xfs_iomap_ops
);
}
int
...
...
@@ -423,49 +380,6 @@ xfs_file_splice_read(
return
ret
;
}
/*
* This routine is called to handle zeroing any space in the last block of the
* file that is beyond the EOF. We do this since the size is being increased
* without writing anything to that block and we don't want to read the
* garbage on the disk.
*/
STATIC
int
/* error (positive) */
xfs_zero_last_block
(
struct
xfs_inode
*
ip
,
xfs_fsize_t
offset
,
xfs_fsize_t
isize
,
bool
*
did_zeroing
)
{
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
xfs_fileoff_t
last_fsb
=
XFS_B_TO_FSBT
(
mp
,
isize
);
int
zero_offset
=
XFS_B_FSB_OFFSET
(
mp
,
isize
);
int
zero_len
;
int
nimaps
=
1
;
int
error
=
0
;
struct
xfs_bmbt_irec
imap
;
xfs_ilock
(
ip
,
XFS_ILOCK_EXCL
);
error
=
xfs_bmapi_read
(
ip
,
last_fsb
,
1
,
&
imap
,
&
nimaps
,
0
);
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
if
(
error
)
return
error
;
ASSERT
(
nimaps
>
0
);
/*
* If the block underlying isize is just a hole, then there
* is nothing to zero.
*/
if
(
imap
.
br_startblock
==
HOLESTARTBLOCK
)
return
0
;
zero_len
=
mp
->
m_sb
.
sb_blocksize
-
zero_offset
;
if
(
isize
+
zero_len
>
offset
)
zero_len
=
offset
-
isize
;
*
did_zeroing
=
true
;
return
xfs_iozero
(
ip
,
isize
,
zero_len
);
}
/*
* Zero any on disk space between the current EOF and the new, larger EOF.
*
...
...
@@ -484,94 +398,11 @@ xfs_zero_eof(
xfs_fsize_t
isize
,
/* current inode size */
bool
*
did_zeroing
)
{
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
xfs_fileoff_t
start_zero_fsb
;
xfs_fileoff_t
end_zero_fsb
;
xfs_fileoff_t
zero_count_fsb
;
xfs_fileoff_t
last_fsb
;
xfs_fileoff_t
zero_off
;
xfs_fsize_t
zero_len
;
int
nimaps
;
int
error
=
0
;
struct
xfs_bmbt_irec
imap
;
ASSERT
(
xfs_isilocked
(
ip
,
XFS_IOLOCK_EXCL
));
ASSERT
(
offset
>
isize
);
trace_xfs_zero_eof
(
ip
,
isize
,
offset
-
isize
);
/*
* First handle zeroing the block on which isize resides.
*
* We only zero a part of that block so it is handled specially.
*/
if
(
XFS_B_FSB_OFFSET
(
mp
,
isize
)
!=
0
)
{
error
=
xfs_zero_last_block
(
ip
,
offset
,
isize
,
did_zeroing
);
if
(
error
)
return
error
;
}
/*
* Calculate the range between the new size and the old where blocks
* needing to be zeroed may exist.
*
* To get the block where the last byte in the file currently resides,
* we need to subtract one from the size and truncate back to a block
* boundary. We subtract 1 in case the size is exactly on a block
* boundary.
*/
last_fsb
=
isize
?
XFS_B_TO_FSBT
(
mp
,
isize
-
1
)
:
(
xfs_fileoff_t
)
-
1
;
start_zero_fsb
=
XFS_B_TO_FSB
(
mp
,
(
xfs_ufsize_t
)
isize
);
end_zero_fsb
=
XFS_B_TO_FSBT
(
mp
,
offset
-
1
);
ASSERT
((
xfs_sfiloff_t
)
last_fsb
<
(
xfs_sfiloff_t
)
start_zero_fsb
);
if
(
last_fsb
==
end_zero_fsb
)
{
/*
* The size was only incremented on its last block.
* We took care of that above, so just return.
*/
return
0
;
}
ASSERT
(
start_zero_fsb
<=
end_zero_fsb
);
while
(
start_zero_fsb
<=
end_zero_fsb
)
{
nimaps
=
1
;
zero_count_fsb
=
end_zero_fsb
-
start_zero_fsb
+
1
;
xfs_ilock
(
ip
,
XFS_ILOCK_EXCL
);
error
=
xfs_bmapi_read
(
ip
,
start_zero_fsb
,
zero_count_fsb
,
&
imap
,
&
nimaps
,
0
);
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
if
(
error
)
return
error
;
ASSERT
(
nimaps
>
0
);
if
(
imap
.
br_state
==
XFS_EXT_UNWRITTEN
||
imap
.
br_startblock
==
HOLESTARTBLOCK
)
{
start_zero_fsb
=
imap
.
br_startoff
+
imap
.
br_blockcount
;
ASSERT
(
start_zero_fsb
<=
(
end_zero_fsb
+
1
));
continue
;
}
/*
* There are blocks we need to zero.
*/
zero_off
=
XFS_FSB_TO_B
(
mp
,
start_zero_fsb
);
zero_len
=
XFS_FSB_TO_B
(
mp
,
imap
.
br_blockcount
);
if
((
zero_off
+
zero_len
)
>
offset
)
zero_len
=
offset
-
zero_off
;
error
=
xfs_iozero
(
ip
,
zero_off
,
zero_len
);
if
(
error
)
return
error
;
*
did_zeroing
=
true
;
start_zero_fsb
=
imap
.
br_startoff
+
imap
.
br_blockcount
;
ASSERT
(
start_zero_fsb
<=
(
end_zero_fsb
+
1
));
}
return
0
;
return
xfs_zero_range
(
ip
,
isize
,
offset
-
isize
,
did_zeroing
);
}
/*
...
...
@@ -841,7 +672,7 @@ xfs_file_buffered_aio_write(
write_retry:
trace_xfs_file_buffered_write
(
ip
,
iov_iter_count
(
from
),
iocb
->
ki_pos
,
0
);
ret
=
generic_perform_write
(
file
,
from
,
iocb
->
ki_po
s
);
ret
=
iomap_file_buffered_write
(
iocb
,
from
,
&
xfs_iomap_op
s
);
if
(
likely
(
ret
>=
0
))
iocb
->
ki_pos
+=
ret
;
...
...
@@ -1553,7 +1384,7 @@ xfs_filemap_page_mkwrite(
if
(
IS_DAX
(
inode
))
{
ret
=
__dax_mkwrite
(
vma
,
vmf
,
xfs_get_blocks_dax_fault
);
}
else
{
ret
=
block_page_mkwrite
(
vma
,
vmf
,
xfs_get_block
s
);
ret
=
iomap_page_mkwrite
(
vma
,
vmf
,
&
xfs_iomap_op
s
);
ret
=
block_page_mkwrite_return
(
ret
);
}
...
...
fs/xfs/xfs_inode.h
View file @
9b7fad20
...
...
@@ -427,7 +427,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
enum
xfs_prealloc_flags
flags
);
int
xfs_zero_eof
(
struct
xfs_inode
*
ip
,
xfs_off_t
offset
,
xfs_fsize_t
isize
,
bool
*
did_zeroing
);
int
xfs_iozero
(
struct
xfs_inode
*
ip
,
loff_t
pos
,
size_t
count
);
int
xfs_zero_range
(
struct
xfs_inode
*
ip
,
xfs_off_t
pos
,
xfs_off_t
count
,
bool
*
did_zero
);
loff_t
__xfs_seek_hole_data
(
struct
inode
*
inode
,
loff_t
start
,
loff_t
eof
,
int
whence
);
...
...
fs/xfs/xfs_iomap.c
View file @
9b7fad20
...
...
@@ -15,6 +15,7 @@
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/iomap.h>
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
...
...
@@ -940,3 +941,173 @@ xfs_iomap_write_unwritten(
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
return
error
;
}
void
xfs_bmbt_to_iomap
(
struct
xfs_inode
*
ip
,
struct
iomap
*
iomap
,
struct
xfs_bmbt_irec
*
imap
)
{
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
if
(
imap
->
br_startblock
==
HOLESTARTBLOCK
)
{
iomap
->
blkno
=
IOMAP_NULL_BLOCK
;
iomap
->
type
=
IOMAP_HOLE
;
}
else
if
(
imap
->
br_startblock
==
DELAYSTARTBLOCK
)
{
iomap
->
blkno
=
IOMAP_NULL_BLOCK
;
iomap
->
type
=
IOMAP_DELALLOC
;
}
else
{
iomap
->
blkno
=
xfs_fsb_to_db
(
ip
,
imap
->
br_startblock
);
if
(
imap
->
br_state
==
XFS_EXT_UNWRITTEN
)
iomap
->
type
=
IOMAP_UNWRITTEN
;
else
iomap
->
type
=
IOMAP_MAPPED
;
}
iomap
->
offset
=
XFS_FSB_TO_B
(
mp
,
imap
->
br_startoff
);
iomap
->
length
=
XFS_FSB_TO_B
(
mp
,
imap
->
br_blockcount
);
iomap
->
bdev
=
xfs_find_bdev_for_inode
(
VFS_I
(
ip
));
}
static
inline
bool
imap_needs_alloc
(
struct
xfs_bmbt_irec
*
imap
,
int
nimaps
)
{
return
!
nimaps
||
imap
->
br_startblock
==
HOLESTARTBLOCK
||
imap
->
br_startblock
==
DELAYSTARTBLOCK
;
}
static
int
xfs_file_iomap_begin
(
struct
inode
*
inode
,
loff_t
offset
,
loff_t
length
,
unsigned
flags
,
struct
iomap
*
iomap
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
struct
xfs_bmbt_irec
imap
;
xfs_fileoff_t
offset_fsb
,
end_fsb
;
int
nimaps
=
1
,
error
=
0
;
if
(
XFS_FORCED_SHUTDOWN
(
mp
))
return
-
EIO
;
xfs_ilock
(
ip
,
XFS_ILOCK_EXCL
);
ASSERT
(
offset
<=
mp
->
m_super
->
s_maxbytes
);
if
((
xfs_fsize_t
)
offset
+
length
>
mp
->
m_super
->
s_maxbytes
)
length
=
mp
->
m_super
->
s_maxbytes
-
offset
;
offset_fsb
=
XFS_B_TO_FSBT
(
mp
,
offset
);
end_fsb
=
XFS_B_TO_FSB
(
mp
,
offset
+
length
);
error
=
xfs_bmapi_read
(
ip
,
offset_fsb
,
end_fsb
-
offset_fsb
,
&
imap
,
&
nimaps
,
XFS_BMAPI_ENTIRE
);
if
(
error
)
{
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
return
error
;
}
if
((
flags
&
IOMAP_WRITE
)
&&
imap_needs_alloc
(
&
imap
,
nimaps
))
{
/*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
* number pulled out of thin air as a best guess for initial
* testing.
*
* Note that the values needs to be less than 32-bits wide until
* the lower level functions are updated.
*/
length
=
min_t
(
loff_t
,
length
,
1024
*
PAGE_SIZE
);
if
(
xfs_get_extsz_hint
(
ip
))
{
/*
* xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return.
*/
xfs_ilock_demote
(
ip
,
XFS_ILOCK_EXCL
);
error
=
xfs_iomap_write_direct
(
ip
,
offset
,
length
,
&
imap
,
nimaps
);
}
else
{
error
=
xfs_iomap_write_delay
(
ip
,
offset
,
length
,
&
imap
);
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
}
if
(
error
)
return
error
;
trace_xfs_iomap_alloc
(
ip
,
offset
,
length
,
0
,
&
imap
);
xfs_bmbt_to_iomap
(
ip
,
iomap
,
&
imap
);
}
else
if
(
nimaps
)
{
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
trace_xfs_iomap_found
(
ip
,
offset
,
length
,
0
,
&
imap
);
xfs_bmbt_to_iomap
(
ip
,
iomap
,
&
imap
);
}
else
{
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
trace_xfs_iomap_not_found
(
ip
,
offset
,
length
,
0
,
&
imap
);
iomap
->
blkno
=
IOMAP_NULL_BLOCK
;
iomap
->
type
=
IOMAP_HOLE
;
iomap
->
offset
=
offset
;
iomap
->
length
=
length
;
}
return
0
;
}
static
int
xfs_file_iomap_end_delalloc
(
struct
xfs_inode
*
ip
,
loff_t
offset
,
loff_t
length
,
ssize_t
written
)
{
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
xfs_fileoff_t
start_fsb
;
xfs_fileoff_t
end_fsb
;
int
error
=
0
;
start_fsb
=
XFS_B_TO_FSB
(
mp
,
offset
+
written
);
end_fsb
=
XFS_B_TO_FSB
(
mp
,
offset
+
length
);
/*
* Trim back delalloc blocks if we didn't manage to write the whole
* range reserved.
*
* We don't need to care about racing delalloc as we hold i_mutex
* across the reserve/allocate/unreserve calls. If there are delalloc
* blocks in the range, they are ours.
*/
if
(
start_fsb
<
end_fsb
)
{
xfs_ilock
(
ip
,
XFS_ILOCK_EXCL
);
error
=
xfs_bmap_punch_delalloc_range
(
ip
,
start_fsb
,
end_fsb
-
start_fsb
);
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
if
(
error
&&
!
XFS_FORCED_SHUTDOWN
(
mp
))
{
xfs_alert
(
mp
,
"%s: unable to clean up ino %lld"
,
__func__
,
ip
->
i_ino
);
return
error
;
}
}
return
0
;
}
static
int
xfs_file_iomap_end
(
struct
inode
*
inode
,
loff_t
offset
,
loff_t
length
,
ssize_t
written
,
unsigned
flags
,
struct
iomap
*
iomap
)
{
if
((
flags
&
IOMAP_WRITE
)
&&
iomap
->
type
==
IOMAP_DELALLOC
)
return
xfs_file_iomap_end_delalloc
(
XFS_I
(
inode
),
offset
,
length
,
written
);
return
0
;
}
struct
iomap_ops
xfs_iomap_ops
=
{
.
iomap_begin
=
xfs_file_iomap_begin
,
.
iomap_end
=
xfs_file_iomap_end
,
};
fs/xfs/xfs_iomap.h
View file @
9b7fad20
...
...
@@ -18,6 +18,8 @@
#ifndef __XFS_IOMAP_H__
#define __XFS_IOMAP_H__
#include <linux/iomap.h>
struct
xfs_inode
;
struct
xfs_bmbt_irec
;
...
...
@@ -29,4 +31,9 @@ int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
struct
xfs_bmbt_irec
*
);
int
xfs_iomap_write_unwritten
(
struct
xfs_inode
*
,
xfs_off_t
,
xfs_off_t
);
void
xfs_bmbt_to_iomap
(
struct
xfs_inode
*
,
struct
iomap
*
,
struct
xfs_bmbt_irec
*
);
extern
struct
iomap_ops
xfs_iomap_ops
;
#endif
/* __XFS_IOMAP_H__*/
fs/xfs/xfs_iops.c
View file @
9b7fad20
...
...
@@ -38,12 +38,13 @@
#include "xfs_dir2.h"
#include "xfs_trans_space.h"
#include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
#include <linux/
fie
map.h>
#include <linux/
io
map.h>
#include <linux/slab.h>
/*
...
...
@@ -800,20 +801,30 @@ xfs_setattr_size(
if
(
error
)
return
error
;
/*
* Wait for all direct I/O to complete.
*/
inode_dio_wait
(
inode
);
/*
* File data changes must be complete before we start the transaction to
* modify the inode. This needs to be done before joining the inode to
* the transaction because the inode cannot be unlocked once it is a
* part of the transaction.
*
* Start with zeroing any data block beyond EOF that we may expose on
* file extension.
* Start with zeroing any data beyond EOF that we may expose on file
* extension, or zeroing out the rest of the block on a downward
* truncate.
*/
if
(
newsize
>
oldsize
)
{
error
=
xfs_zero_eof
(
ip
,
newsize
,
oldsize
,
&
did_zeroing
);
}
else
{
error
=
iomap_truncate_page
(
inode
,
newsize
,
&
did_zeroing
,
&
xfs_iomap_ops
);
}
if
(
error
)
return
error
;
}
/*
* We are going to log the inode size change in this transaction so
...
...
@@ -823,17 +834,14 @@ xfs_setattr_size(
* problem. Note that this includes any block zeroing we did above;
* otherwise those blocks may not be zeroed after a crash.
*/
if
(
newsize
>
ip
->
i_d
.
di_size
&&
(
oldsize
!=
ip
->
i_d
.
di_size
||
did_zeroing
))
{
if
(
did_zeroing
||
(
newsize
>
ip
->
i_d
.
di_size
&&
oldsize
!=
ip
->
i_d
.
di_size
))
{
error
=
filemap_write_and_wait_range
(
VFS_I
(
ip
)
->
i_mapping
,
ip
->
i_d
.
di_size
,
newsize
);
if
(
error
)
return
error
;
}
/* Now wait for all direct I/O to complete. */
inode_dio_wait
(
inode
);
/*
* We've already locked out new page faults, so now we can safely remove
* pages from the page cache knowing they won't get refaulted until we
...
...
@@ -851,13 +859,6 @@ xfs_setattr_size(
* to hope that the caller sees ENOMEM and retries the truncate
* operation.
*/
if
(
IS_DAX
(
inode
))
error
=
dax_truncate_page
(
inode
,
newsize
,
xfs_get_blocks_direct
);
else
error
=
block_truncate_page
(
inode
->
i_mapping
,
newsize
,
xfs_get_blocks
);
if
(
error
)
return
error
;
truncate_setsize
(
inode
,
newsize
);
error
=
xfs_trans_alloc
(
mp
,
&
M_RES
(
mp
)
->
tr_itruncate
,
0
,
0
,
0
,
&
tp
);
...
...
@@ -998,51 +999,6 @@ xfs_vn_update_time(
return
xfs_trans_commit
(
tp
);
}
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
/*
* Call fiemap helper to fill in user data.
* Returns positive errors to xfs_getbmap.
*/
STATIC
int
xfs_fiemap_format
(
void
**
arg
,
struct
getbmapx
*
bmv
,
int
*
full
)
{
int
error
;
struct
fiemap_extent_info
*
fieinfo
=
*
arg
;
u32
fiemap_flags
=
0
;
u64
logical
,
physical
,
length
;
/* Do nothing for a hole */
if
(
bmv
->
bmv_block
==
-
1LL
)
return
0
;
logical
=
BBTOB
(
bmv
->
bmv_offset
);
physical
=
BBTOB
(
bmv
->
bmv_block
);
length
=
BBTOB
(
bmv
->
bmv_length
);
if
(
bmv
->
bmv_oflags
&
BMV_OF_PREALLOC
)
fiemap_flags
|=
FIEMAP_EXTENT_UNWRITTEN
;
else
if
(
bmv
->
bmv_oflags
&
BMV_OF_DELALLOC
)
{
fiemap_flags
|=
(
FIEMAP_EXTENT_DELALLOC
|
FIEMAP_EXTENT_UNKNOWN
);
physical
=
0
;
/* no block yet */
}
if
(
bmv
->
bmv_oflags
&
BMV_OF_LAST
)
fiemap_flags
|=
FIEMAP_EXTENT_LAST
;
error
=
fiemap_fill_next_extent
(
fieinfo
,
logical
,
physical
,
length
,
fiemap_flags
);
if
(
error
>
0
)
{
error
=
0
;
*
full
=
1
;
/* user array now full */
}
return
error
;
}
STATIC
int
xfs_vn_fiemap
(
struct
inode
*
inode
,
...
...
@@ -1050,38 +1006,13 @@ xfs_vn_fiemap(
u64
start
,
u64
length
)
{
xfs_inode_t
*
ip
=
XFS_I
(
inode
);
struct
getbmapx
bm
;
int
error
;
error
=
fiemap_check_flags
(
fieinfo
,
XFS_FIEMAP_FLAGS
);
if
(
error
)
return
error
;
xfs_ilock
(
XFS_I
(
inode
),
XFS_IOLOCK_SHARED
);
error
=
iomap_fiemap
(
inode
,
fieinfo
,
start
,
length
,
&
xfs_iomap_ops
);
xfs_iunlock
(
XFS_I
(
inode
),
XFS_IOLOCK_SHARED
)
;
/* Set up bmap header for xfs internal routine */
bm
.
bmv_offset
=
BTOBBT
(
start
);
/* Special case for whole file */
if
(
length
==
FIEMAP_MAX_OFFSET
)
bm
.
bmv_length
=
-
1LL
;
else
bm
.
bmv_length
=
BTOBB
(
start
+
length
)
-
bm
.
bmv_offset
;
/* We add one because in getbmap world count includes the header */
bm
.
bmv_count
=
!
fieinfo
->
fi_extents_max
?
MAXEXTNUM
:
fieinfo
->
fi_extents_max
+
1
;
bm
.
bmv_count
=
min_t
(
__s32
,
bm
.
bmv_count
,
(
PAGE_SIZE
*
16
/
sizeof
(
struct
getbmapx
)));
bm
.
bmv_iflags
=
BMV_IF_PREALLOC
|
BMV_IF_NO_HOLES
;
if
(
fieinfo
->
fi_flags
&
FIEMAP_FLAG_XATTR
)
bm
.
bmv_iflags
|=
BMV_IF_ATTRFORK
;
if
(
!
(
fieinfo
->
fi_flags
&
FIEMAP_FLAG_SYNC
))
bm
.
bmv_iflags
|=
BMV_IF_DELALLOC
;
error
=
xfs_getbmap
(
ip
,
&
bm
,
xfs_fiemap_format
,
fieinfo
);
if
(
error
)
return
error
;
return
0
;
}
STATIC
int
...
...
fs/xfs/xfs_pnfs.c
View file @
9b7fad20
...
...
@@ -80,32 +80,6 @@ xfs_fs_get_uuid(
return
0
;
}
static
void
xfs_bmbt_to_iomap
(
struct
xfs_inode
*
ip
,
struct
iomap
*
iomap
,
struct
xfs_bmbt_irec
*
imap
)
{
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
if
(
imap
->
br_startblock
==
HOLESTARTBLOCK
)
{
iomap
->
blkno
=
IOMAP_NULL_BLOCK
;
iomap
->
type
=
IOMAP_HOLE
;
}
else
if
(
imap
->
br_startblock
==
DELAYSTARTBLOCK
)
{
iomap
->
blkno
=
IOMAP_NULL_BLOCK
;
iomap
->
type
=
IOMAP_DELALLOC
;
}
else
{
iomap
->
blkno
=
XFS_FSB_TO_DADDR
(
ip
->
i_mount
,
imap
->
br_startblock
);
if
(
imap
->
br_state
==
XFS_EXT_UNWRITTEN
)
iomap
->
type
=
IOMAP_UNWRITTEN
;
else
iomap
->
type
=
IOMAP_MAPPED
;
}
iomap
->
offset
=
XFS_FSB_TO_B
(
mp
,
imap
->
br_startoff
);
iomap
->
length
=
XFS_FSB_TO_B
(
mp
,
imap
->
br_blockcount
);
}
/*
* Get a layout for the pNFS client.
*/
...
...
fs/xfs/xfs_trace.h
View file @
9b7fad20
...
...
@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
DEFINE_IOMAP_EVENT
(
xfs_get_blocks_found
);
DEFINE_IOMAP_EVENT
(
xfs_get_blocks_alloc
);
DEFINE_IOMAP_EVENT
(
xfs_get_blocks_map_direct
);
DEFINE_IOMAP_EVENT
(
xfs_iomap_alloc
);
DEFINE_IOMAP_EVENT
(
xfs_iomap_found
);
DEFINE_IOMAP_EVENT
(
xfs_iomap_not_found
);
DECLARE_EVENT_CLASS
(
xfs_simple_io_class
,
TP_PROTO
(
struct
xfs_inode
*
ip
,
xfs_off_t
offset
,
ssize_t
count
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment