Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
a1f45e66
Commit
a1f45e66
authored
Oct 03, 2016
by
Dave Chinner
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'iomap-4.9-dax' into for-next
parents
a89b3f97
d5bfccdf
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
464 additions
and
122 deletions
+464
-122
fs/dax.c
fs/dax.c
+240
-12
fs/ext2/Kconfig
fs/ext2/Kconfig
+1
-0
fs/ext2/ext2.h
fs/ext2/ext2.h
+1
-0
fs/ext2/file.c
fs/ext2/file.c
+69
-7
fs/ext2/inode.c
fs/ext2/inode.c
+81
-19
fs/internal.h
fs/internal.h
+11
-0
fs/iomap.c
fs/iomap.c
+1
-4
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.c
+21
-10
fs/xfs/xfs_aops.h
fs/xfs/xfs_aops.h
+1
-0
fs/xfs/xfs_file.c
fs/xfs/xfs_file.c
+17
-62
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.c
+14
-8
include/linux/dax.h
include/linux/dax.h
+6
-0
include/linux/iomap.h
include/linux/iomap.h
+1
-0
No files found.
fs/dax.c
View file @
a1f45e66
...
...
@@ -31,6 +31,8 @@
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
#include <linux/iomap.h>
#include "internal.h"
/*
* We use lowest available bit in exceptional entry for locking, other two
...
...
@@ -580,14 +582,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
return
VM_FAULT_LOCKED
;
}
static
int
copy_user_
bh
(
struct
page
*
to
,
struct
inode
*
inod
e
,
struct
buffer_head
*
bh
,
unsigned
long
vaddr
)
static
int
copy_user_
dax
(
struct
block_device
*
bdev
,
sector_t
sector
,
size_t
siz
e
,
struct
page
*
to
,
unsigned
long
vaddr
)
{
struct
blk_dax_ctl
dax
=
{
.
sector
=
to_sector
(
bh
,
inode
)
,
.
size
=
bh
->
b_
size
,
.
sector
=
sector
,
.
size
=
size
,
};
struct
block_device
*
bdev
=
bh
->
b_bdev
;
void
*
vto
;
if
(
dax_map_atomic
(
bdev
,
&
dax
)
<
0
)
...
...
@@ -790,14 +791,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
EXPORT_SYMBOL_GPL
(
dax_writeback_mapping_range
);
static
int
dax_insert_mapping
(
struct
address_space
*
mapping
,
struct
buffer_head
*
bh
,
void
**
entryp
,
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
struct
block_device
*
bdev
,
sector_t
sector
,
size_t
size
,
void
**
entryp
,
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
{
unsigned
long
vaddr
=
(
unsigned
long
)
vmf
->
virtual_address
;
struct
block_device
*
bdev
=
bh
->
b_bdev
;
struct
blk_dax_ctl
dax
=
{
.
sector
=
to_sector
(
bh
,
mapping
->
host
)
,
.
size
=
bh
->
b_
size
,
.
sector
=
sector
,
.
size
=
size
,
};
void
*
ret
;
void
*
entry
=
*
entryp
;
...
...
@@ -868,7 +868,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if
(
vmf
->
cow_page
)
{
struct
page
*
new_page
=
vmf
->
cow_page
;
if
(
buffer_written
(
&
bh
))
error
=
copy_user_bh
(
new_page
,
inode
,
&
bh
,
vaddr
);
error
=
copy_user_dax
(
bh
.
b_bdev
,
to_sector
(
&
bh
,
inode
),
bh
.
b_size
,
new_page
,
vaddr
);
else
clear_user_highpage
(
new_page
,
vaddr
);
if
(
error
)
...
...
@@ -898,7 +899,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
/* Filesystem should not return unwritten buffers to us! */
WARN_ON_ONCE
(
buffer_unwritten
(
&
bh
)
||
buffer_new
(
&
bh
));
error
=
dax_insert_mapping
(
mapping
,
&
bh
,
&
entry
,
vma
,
vmf
);
error
=
dax_insert_mapping
(
mapping
,
bh
.
b_bdev
,
to_sector
(
&
bh
,
inode
),
bh
.
b_size
,
&
entry
,
vma
,
vmf
);
unlock_entry:
put_locked_mapping_entry
(
mapping
,
vmf
->
pgoff
,
entry
);
out:
...
...
@@ -1241,3 +1243,229 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
return
dax_zero_page_range
(
inode
,
from
,
length
,
get_block
);
}
EXPORT_SYMBOL_GPL
(
dax_truncate_page
);
#ifdef CONFIG_FS_IOMAP
static
loff_t
iomap_dax_actor
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
void
*
data
,
struct
iomap
*
iomap
)
{
struct
iov_iter
*
iter
=
data
;
loff_t
end
=
pos
+
length
,
done
=
0
;
ssize_t
ret
=
0
;
if
(
iov_iter_rw
(
iter
)
==
READ
)
{
end
=
min
(
end
,
i_size_read
(
inode
));
if
(
pos
>=
end
)
return
0
;
if
(
iomap
->
type
==
IOMAP_HOLE
||
iomap
->
type
==
IOMAP_UNWRITTEN
)
return
iov_iter_zero
(
min
(
length
,
end
-
pos
),
iter
);
}
if
(
WARN_ON_ONCE
(
iomap
->
type
!=
IOMAP_MAPPED
))
return
-
EIO
;
while
(
pos
<
end
)
{
unsigned
offset
=
pos
&
(
PAGE_SIZE
-
1
);
struct
blk_dax_ctl
dax
=
{
0
};
ssize_t
map_len
;
dax
.
sector
=
iomap
->
blkno
+
(((
pos
&
PAGE_MASK
)
-
iomap
->
offset
)
>>
9
);
dax
.
size
=
(
length
+
offset
+
PAGE_SIZE
-
1
)
&
PAGE_MASK
;
map_len
=
dax_map_atomic
(
iomap
->
bdev
,
&
dax
);
if
(
map_len
<
0
)
{
ret
=
map_len
;
break
;
}
dax
.
addr
+=
offset
;
map_len
-=
offset
;
if
(
map_len
>
end
-
pos
)
map_len
=
end
-
pos
;
if
(
iov_iter_rw
(
iter
)
==
WRITE
)
map_len
=
copy_from_iter_pmem
(
dax
.
addr
,
map_len
,
iter
);
else
map_len
=
copy_to_iter
(
dax
.
addr
,
map_len
,
iter
);
dax_unmap_atomic
(
iomap
->
bdev
,
&
dax
);
if
(
map_len
<=
0
)
{
ret
=
map_len
?
map_len
:
-
EFAULT
;
break
;
}
pos
+=
map_len
;
length
-=
map_len
;
done
+=
map_len
;
}
return
done
?
done
:
ret
;
}
/**
* iomap_dax_rw - Perform I/O to a DAX file
* @iocb: The control block for this I/O
* @iter: The addresses to do I/O from or to
* @ops: iomap ops passed from the file system
*
* This function performs read and write operations to directly mapped
* persistent memory. The callers needs to take care of read/write exclusion
* and evicting any page cache pages in the region under I/O.
*/
ssize_t
iomap_dax_rw
(
struct
kiocb
*
iocb
,
struct
iov_iter
*
iter
,
struct
iomap_ops
*
ops
)
{
struct
address_space
*
mapping
=
iocb
->
ki_filp
->
f_mapping
;
struct
inode
*
inode
=
mapping
->
host
;
loff_t
pos
=
iocb
->
ki_pos
,
ret
=
0
,
done
=
0
;
unsigned
flags
=
0
;
if
(
iov_iter_rw
(
iter
)
==
WRITE
)
flags
|=
IOMAP_WRITE
;
/*
* Yes, even DAX files can have page cache attached to them: A zeroed
* page is inserted into the pagecache when we have to serve a write
* fault on a hole. It should never be dirtied and can simply be
* dropped from the pagecache once we get real data for the page.
*
* XXX: This is racy against mmap, and there's nothing we can do about
* it. We'll eventually need to shift this down even further so that
* we can check if we allocated blocks over a hole first.
*/
if
(
mapping
->
nrpages
)
{
ret
=
invalidate_inode_pages2_range
(
mapping
,
pos
>>
PAGE_SHIFT
,
(
pos
+
iov_iter_count
(
iter
)
-
1
)
>>
PAGE_SHIFT
);
WARN_ON_ONCE
(
ret
);
}
while
(
iov_iter_count
(
iter
))
{
ret
=
iomap_apply
(
inode
,
pos
,
iov_iter_count
(
iter
),
flags
,
ops
,
iter
,
iomap_dax_actor
);
if
(
ret
<=
0
)
break
;
pos
+=
ret
;
done
+=
ret
;
}
iocb
->
ki_pos
+=
done
;
return
done
?
done
:
ret
;
}
EXPORT_SYMBOL_GPL
(
iomap_dax_rw
);
/**
* iomap_dax_fault - handle a page fault on a DAX file
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
* @ops: iomap ops passed from the file system
*
* When a page fault occurs, filesystems may call this helper in their fault
* or mkwrite handler for DAX files. Assumes the caller has done all the
* necessary locking for the page fault to proceed successfully.
*/
int
iomap_dax_fault
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
,
struct
iomap_ops
*
ops
)
{
struct
address_space
*
mapping
=
vma
->
vm_file
->
f_mapping
;
struct
inode
*
inode
=
mapping
->
host
;
unsigned
long
vaddr
=
(
unsigned
long
)
vmf
->
virtual_address
;
loff_t
pos
=
(
loff_t
)
vmf
->
pgoff
<<
PAGE_SHIFT
;
sector_t
sector
;
struct
iomap
iomap
=
{
0
};
unsigned
flags
=
0
;
int
error
,
major
=
0
;
void
*
entry
;
/*
* Check whether offset isn't beyond end of file now. Caller is supposed
* to hold locks serializing us with truncate / punch hole so this is
* a reliable test.
*/
if
(
pos
>=
i_size_read
(
inode
))
return
VM_FAULT_SIGBUS
;
entry
=
grab_mapping_entry
(
mapping
,
vmf
->
pgoff
);
if
(
IS_ERR
(
entry
))
{
error
=
PTR_ERR
(
entry
);
goto
out
;
}
if
((
vmf
->
flags
&
FAULT_FLAG_WRITE
)
&&
!
vmf
->
cow_page
)
flags
|=
IOMAP_WRITE
;
/*
* Note that we don't bother to use iomap_apply here: DAX required
* the file system block size to be equal the page size, which means
* that we never have to deal with more than a single extent here.
*/
error
=
ops
->
iomap_begin
(
inode
,
pos
,
PAGE_SIZE
,
flags
,
&
iomap
);
if
(
error
)
goto
unlock_entry
;
if
(
WARN_ON_ONCE
(
iomap
.
offset
+
iomap
.
length
<
pos
+
PAGE_SIZE
))
{
error
=
-
EIO
;
/* fs corruption? */
goto
unlock_entry
;
}
sector
=
iomap
.
blkno
+
(((
pos
&
PAGE_MASK
)
-
iomap
.
offset
)
>>
9
);
if
(
vmf
->
cow_page
)
{
switch
(
iomap
.
type
)
{
case
IOMAP_HOLE
:
case
IOMAP_UNWRITTEN
:
clear_user_highpage
(
vmf
->
cow_page
,
vaddr
);
break
;
case
IOMAP_MAPPED
:
error
=
copy_user_dax
(
iomap
.
bdev
,
sector
,
PAGE_SIZE
,
vmf
->
cow_page
,
vaddr
);
break
;
default:
WARN_ON_ONCE
(
1
);
error
=
-
EIO
;
break
;
}
if
(
error
)
goto
unlock_entry
;
if
(
!
radix_tree_exceptional_entry
(
entry
))
{
vmf
->
page
=
entry
;
return
VM_FAULT_LOCKED
;
}
vmf
->
entry
=
entry
;
return
VM_FAULT_DAX_LOCKED
;
}
switch
(
iomap
.
type
)
{
case
IOMAP_MAPPED
:
if
(
iomap
.
flags
&
IOMAP_F_NEW
)
{
count_vm_event
(
PGMAJFAULT
);
mem_cgroup_count_vm_event
(
vma
->
vm_mm
,
PGMAJFAULT
);
major
=
VM_FAULT_MAJOR
;
}
error
=
dax_insert_mapping
(
mapping
,
iomap
.
bdev
,
sector
,
PAGE_SIZE
,
&
entry
,
vma
,
vmf
);
break
;
case
IOMAP_UNWRITTEN
:
case
IOMAP_HOLE
:
if
(
!
(
vmf
->
flags
&
FAULT_FLAG_WRITE
))
return
dax_load_hole
(
mapping
,
entry
,
vmf
);
/*FALLTHRU*/
default:
WARN_ON_ONCE
(
1
);
error
=
-
EIO
;
break
;
}
unlock_entry:
put_locked_mapping_entry
(
mapping
,
vmf
->
pgoff
,
entry
);
out:
if
(
error
==
-
ENOMEM
)
return
VM_FAULT_OOM
|
major
;
/* -EBUSY is fine, somebody else faulted on the same PTE */
if
(
error
<
0
&&
error
!=
-
EBUSY
)
return
VM_FAULT_SIGBUS
|
major
;
return
VM_FAULT_NOPAGE
|
major
;
}
EXPORT_SYMBOL_GPL
(
iomap_dax_fault
);
#endif
/* CONFIG_FS_IOMAP */
fs/ext2/Kconfig
View file @
a1f45e66
config EXT2_FS
tristate "Second extended fs support"
select FS_IOMAP if FS_DAX
help
Ext2 is a standard Linux file system for hard disks.
...
...
fs/ext2/ext2.h
View file @
a1f45e66
...
...
@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
/* inode.c */
extern
const
struct
address_space_operations
ext2_aops
;
extern
const
struct
address_space_operations
ext2_nobh_aops
;
extern
struct
iomap_ops
ext2_iomap_ops
;
/* namei.c */
extern
const
struct
inode_operations
ext2_dir_inode_operations
;
...
...
fs/ext2/file.c
View file @
a1f45e66
...
...
@@ -22,11 +22,59 @@
#include <linux/pagemap.h>
#include <linux/dax.h>
#include <linux/quotaops.h>
#include <linux/iomap.h>
#include <linux/uio.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
#ifdef CONFIG_FS_DAX
static
ssize_t
ext2_dax_read_iter
(
struct
kiocb
*
iocb
,
struct
iov_iter
*
to
)
{
struct
inode
*
inode
=
iocb
->
ki_filp
->
f_mapping
->
host
;
ssize_t
ret
;
if
(
!
iov_iter_count
(
to
))
return
0
;
/* skip atime */
inode_lock_shared
(
inode
);
ret
=
iomap_dax_rw
(
iocb
,
to
,
&
ext2_iomap_ops
);
inode_unlock_shared
(
inode
);
file_accessed
(
iocb
->
ki_filp
);
return
ret
;
}
static
ssize_t
ext2_dax_write_iter
(
struct
kiocb
*
iocb
,
struct
iov_iter
*
from
)
{
struct
file
*
file
=
iocb
->
ki_filp
;
struct
inode
*
inode
=
file
->
f_mapping
->
host
;
ssize_t
ret
;
inode_lock
(
inode
);
ret
=
generic_write_checks
(
iocb
,
from
);
if
(
ret
<=
0
)
goto
out_unlock
;
ret
=
file_remove_privs
(
file
);
if
(
ret
)
goto
out_unlock
;
ret
=
file_update_time
(
file
);
if
(
ret
)
goto
out_unlock
;
ret
=
iomap_dax_rw
(
iocb
,
from
,
&
ext2_iomap_ops
);
if
(
ret
>
0
&&
iocb
->
ki_pos
>
i_size_read
(
inode
))
{
i_size_write
(
inode
,
iocb
->
ki_pos
);
mark_inode_dirty
(
inode
);
}
out_unlock:
inode_unlock
(
inode
);
if
(
ret
>
0
)
ret
=
generic_write_sync
(
iocb
,
ret
);
return
ret
;
}
/*
* The lock ordering for ext2 DAX fault paths is:
*
...
...
@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
down_read
(
&
ei
->
dax_sem
);
ret
=
dax_fault
(
vma
,
vmf
,
ext2_get_block
);
ret
=
iomap_dax_fault
(
vma
,
vmf
,
&
ext2_iomap_ops
);
up_read
(
&
ei
->
dax_sem
);
if
(
vmf
->
flags
&
FAULT_FLAG_WRITE
)
...
...
@@ -156,14 +204,28 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
return
ret
;
}
/*
* We have mostly NULL's here: the current defaults are ok for
* the ext2 filesystem.
*/
static
ssize_t
ext2_file_read_iter
(
struct
kiocb
*
iocb
,
struct
iov_iter
*
to
)
{
#ifdef CONFIG_FS_DAX
if
(
IS_DAX
(
iocb
->
ki_filp
->
f_mapping
->
host
))
return
ext2_dax_read_iter
(
iocb
,
to
);
#endif
return
generic_file_read_iter
(
iocb
,
to
);
}
static
ssize_t
ext2_file_write_iter
(
struct
kiocb
*
iocb
,
struct
iov_iter
*
from
)
{
#ifdef CONFIG_FS_DAX
if
(
IS_DAX
(
iocb
->
ki_filp
->
f_mapping
->
host
))
return
ext2_dax_write_iter
(
iocb
,
from
);
#endif
return
generic_file_write_iter
(
iocb
,
from
);
}
const
struct
file_operations
ext2_file_operations
=
{
.
llseek
=
generic_file_llseek
,
.
read_iter
=
generic
_file_read_iter
,
.
write_iter
=
generic
_file_write_iter
,
.
read_iter
=
ext2
_file_read_iter
,
.
write_iter
=
ext2
_file_write_iter
,
.
unlocked_ioctl
=
ext2_ioctl
,
#ifdef CONFIG_COMPAT
.
compat_ioctl
=
ext2_compat_ioctl
,
...
...
fs/ext2/inode.c
View file @
a1f45e66
...
...
@@ -32,6 +32,7 @@
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/fiemap.h>
#include <linux/iomap.h>
#include <linux/namei.h>
#include <linux/uio.h>
#include "ext2.h"
...
...
@@ -618,7 +619,7 @@ static void ext2_splice_branch(struct inode *inode,
*/
static
int
ext2_get_blocks
(
struct
inode
*
inode
,
sector_t
iblock
,
unsigned
long
maxblocks
,
struct
buffer_head
*
bh_result
,
u32
*
bno
,
bool
*
new
,
bool
*
boundary
,
int
create
)
{
int
err
=
-
EIO
;
...
...
@@ -644,7 +645,6 @@ static int ext2_get_blocks(struct inode *inode,
/* Simplest case - block found, no allocation needed */
if
(
!
partial
)
{
first_block
=
le32_to_cpu
(
chain
[
depth
-
1
].
key
);
clear_buffer_new
(
bh_result
);
/* What's this do? */
count
++
;
/*map more blocks*/
while
(
count
<
maxblocks
&&
count
<=
blocks_to_boundary
)
{
...
...
@@ -699,7 +699,6 @@ static int ext2_get_blocks(struct inode *inode,
mutex_unlock
(
&
ei
->
truncate_mutex
);
if
(
err
)
goto
cleanup
;
clear_buffer_new
(
bh_result
);
goto
got_it
;
}
}
...
...
@@ -745,15 +744,16 @@ static int ext2_get_blocks(struct inode *inode,
mutex_unlock
(
&
ei
->
truncate_mutex
);
goto
cleanup
;
}
}
else
set_buffer_new
(
bh_result
);
}
else
{
*
new
=
true
;
}
ext2_splice_branch
(
inode
,
iblock
,
partial
,
indirect_blks
,
count
);
mutex_unlock
(
&
ei
->
truncate_mutex
);
got_it:
map_bh
(
bh_result
,
inode
->
i_sb
,
le32_to_cpu
(
chain
[
depth
-
1
].
key
)
);
*
bno
=
le32_to_cpu
(
chain
[
depth
-
1
].
key
);
if
(
count
>
blocks_to_boundary
)
set_buffer_boundary
(
bh_result
)
;
*
boundary
=
true
;
err
=
count
;
/* Clean up and exit */
partial
=
chain
+
depth
-
1
;
/* the whole chain */
...
...
@@ -765,19 +765,82 @@ static int ext2_get_blocks(struct inode *inode,
return
err
;
}
int
ext2_get_block
(
struct
inode
*
inode
,
sector_t
iblock
,
struct
buffer_head
*
bh_result
,
int
create
)
int
ext2_get_block
(
struct
inode
*
inode
,
sector_t
iblock
,
struct
buffer_head
*
bh_result
,
int
create
)
{
unsigned
max_blocks
=
bh_result
->
b_size
>>
inode
->
i_blkbits
;
int
ret
=
ext2_get_blocks
(
inode
,
iblock
,
max_blocks
,
bh_result
,
create
);
if
(
ret
>
0
)
{
bool
new
=
false
,
boundary
=
false
;
u32
bno
;
int
ret
;
ret
=
ext2_get_blocks
(
inode
,
iblock
,
max_blocks
,
&
bno
,
&
new
,
&
boundary
,
create
);
if
(
ret
<=
0
)
return
ret
;
map_bh
(
bh_result
,
inode
->
i_sb
,
bno
);
bh_result
->
b_size
=
(
ret
<<
inode
->
i_blkbits
);
ret
=
0
;
}
if
(
new
)
set_buffer_new
(
bh_result
);
if
(
boundary
)
set_buffer_boundary
(
bh_result
);
return
0
;
}
#ifdef CONFIG_FS_DAX
static
int
ext2_iomap_begin
(
struct
inode
*
inode
,
loff_t
offset
,
loff_t
length
,
unsigned
flags
,
struct
iomap
*
iomap
)
{
unsigned
int
blkbits
=
inode
->
i_blkbits
;
unsigned
long
first_block
=
offset
>>
blkbits
;
unsigned
long
max_blocks
=
(
length
+
(
1
<<
blkbits
)
-
1
)
>>
blkbits
;
bool
new
=
false
,
boundary
=
false
;
u32
bno
;
int
ret
;
ret
=
ext2_get_blocks
(
inode
,
first_block
,
max_blocks
,
&
bno
,
&
new
,
&
boundary
,
flags
&
IOMAP_WRITE
);
if
(
ret
<
0
)
return
ret
;
iomap
->
flags
=
0
;
iomap
->
bdev
=
inode
->
i_sb
->
s_bdev
;
iomap
->
offset
=
(
u64
)
first_block
<<
blkbits
;
if
(
ret
==
0
)
{
iomap
->
type
=
IOMAP_HOLE
;
iomap
->
blkno
=
IOMAP_NULL_BLOCK
;
iomap
->
length
=
1
<<
blkbits
;
}
else
{
iomap
->
type
=
IOMAP_MAPPED
;
iomap
->
blkno
=
(
sector_t
)
bno
<<
(
blkbits
-
9
);
iomap
->
length
=
(
u64
)
ret
<<
blkbits
;
iomap
->
flags
|=
IOMAP_F_MERGED
;
}
if
(
new
)
iomap
->
flags
|=
IOMAP_F_NEW
;
return
0
;
}
static
int
ext2_iomap_end
(
struct
inode
*
inode
,
loff_t
offset
,
loff_t
length
,
ssize_t
written
,
unsigned
flags
,
struct
iomap
*
iomap
)
{
if
(
iomap
->
type
==
IOMAP_MAPPED
&&
written
<
length
&&
(
flags
&
IOMAP_WRITE
))
ext2_write_failed
(
inode
->
i_mapping
,
offset
+
length
);
return
0
;
}
struct
iomap_ops
ext2_iomap_ops
=
{
.
iomap_begin
=
ext2_iomap_begin
,
.
iomap_end
=
ext2_iomap_end
,
};
#endif
/* CONFIG_FS_DAX */
int
ext2_fiemap
(
struct
inode
*
inode
,
struct
fiemap_extent_info
*
fieinfo
,
u64
start
,
u64
len
)
{
...
...
@@ -863,10 +926,9 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
loff_t
offset
=
iocb
->
ki_pos
;
ssize_t
ret
;
if
(
IS_DAX
(
inode
))
ret
=
dax_do_io
(
iocb
,
inode
,
iter
,
ext2_get_block
,
NULL
,
DIO_LOCKING
);
else
if
(
WARN_ON_ONCE
(
IS_DAX
(
inode
)))
return
-
EIO
;
ret
=
blockdev_direct_IO
(
iocb
,
inode
,
iter
,
ext2_get_block
);
if
(
ret
<
0
&&
iov_iter_rw
(
iter
)
==
WRITE
)
ext2_write_failed
(
mapping
,
offset
+
count
);
...
...
fs/internal.h
View file @
a1f45e66
...
...
@@ -12,6 +12,7 @@
struct
super_block
;
struct
file_system_type
;
struct
iomap
;
struct
iomap_ops
;
struct
linux_binprm
;
struct
path
;
struct
mount
;
...
...
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
extern
int
do_vfs_ioctl
(
struct
file
*
file
,
unsigned
int
fd
,
unsigned
int
cmd
,
unsigned
long
arg
);
extern
long
vfs_ioctl
(
struct
file
*
file
,
unsigned
int
cmd
,
unsigned
long
arg
);
/*
* iomap support:
*/
typedef
loff_t
(
*
iomap_actor_t
)(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
len
,
void
*
data
,
struct
iomap
*
iomap
);
loff_t
iomap_apply
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
unsigned
flags
,
struct
iomap_ops
*
ops
,
void
*
data
,
iomap_actor_t
actor
);
fs/iomap.c
View file @
a1f45e66
...
...
@@ -27,9 +27,6 @@
#include <linux/dax.h>
#include "internal.h"
typedef
loff_t
(
*
iomap_actor_t
)(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
len
,
void
*
data
,
struct
iomap
*
iomap
);
/*
* Execute a iomap write on a segment of the mapping that spans a
* contiguous range of pages that have identical block mapping state.
...
...
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
* resources they require in the iomap_begin call, and release them in the
* iomap_end call.
*/
static
loff_t
loff_t
iomap_apply
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
unsigned
flags
,
struct
iomap_ops
*
ops
,
void
*
data
,
iomap_actor_t
actor
)
{
...
...
fs/xfs/xfs_aops.c
View file @
a1f45e66
...
...
@@ -200,7 +200,7 @@ xfs_setfilesize_trans_alloc(
* Update on-disk file size now that data has been written to disk.
*/
STATIC
int
xfs_setfilesize
(
__
xfs_setfilesize
(
struct
xfs_inode
*
ip
,
struct
xfs_trans
*
tp
,
xfs_off_t
offset
,
...
...
@@ -225,6 +225,23 @@ xfs_setfilesize(
return
xfs_trans_commit
(
tp
);
}
int
xfs_setfilesize
(
struct
xfs_inode
*
ip
,
xfs_off_t
offset
,
size_t
size
)
{
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
struct
xfs_trans
*
tp
;
int
error
;
error
=
xfs_trans_alloc
(
mp
,
&
M_RES
(
mp
)
->
tr_fsyncts
,
0
,
0
,
0
,
&
tp
);
if
(
error
)
return
error
;
return
__xfs_setfilesize
(
ip
,
tp
,
offset
,
size
);
}
STATIC
int
xfs_setfilesize_ioend
(
struct
xfs_ioend
*
ioend
,
...
...
@@ -247,7 +264,7 @@ xfs_setfilesize_ioend(
return
error
;
}
return
xfs_setfilesize
(
ip
,
tp
,
ioend
->
io_offset
,
ioend
->
io_size
);
return
__
xfs_setfilesize
(
ip
,
tp
,
ioend
->
io_offset
,
ioend
->
io_size
);
}
/*
...
...
@@ -1336,13 +1353,12 @@ xfs_end_io_direct_write(
{
struct
inode
*
inode
=
file_inode
(
iocb
->
ki_filp
);
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
uintptr_t
flags
=
(
uintptr_t
)
private
;
int
error
=
0
;
trace_xfs_end_io_direct_write
(
ip
,
offset
,
size
);
if
(
XFS_FORCED_SHUTDOWN
(
mp
))
if
(
XFS_FORCED_SHUTDOWN
(
ip
->
i_mount
))
return
-
EIO
;
if
(
size
<=
0
)
...
...
@@ -1380,14 +1396,9 @@ xfs_end_io_direct_write(
error
=
xfs_iomap_write_unwritten
(
ip
,
offset
,
size
);
}
else
if
(
flags
&
XFS_DIO_FLAG_APPEND
)
{
struct
xfs_trans
*
tp
;
trace_xfs_end_io_direct_write_append
(
ip
,
offset
,
size
);
error
=
xfs_trans_alloc
(
mp
,
&
M_RES
(
mp
)
->
tr_fsyncts
,
0
,
0
,
0
,
&
tp
);
if
(
!
error
)
error
=
xfs_setfilesize
(
ip
,
tp
,
offset
,
size
);
error
=
xfs_setfilesize
(
ip
,
offset
,
size
);
}
return
error
;
...
...
fs/xfs/xfs_aops.h
View file @
a1f45e66
...
...
@@ -62,6 +62,7 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
int
xfs_end_io_direct_write
(
struct
kiocb
*
iocb
,
loff_t
offset
,
ssize_t
size
,
void
*
private
);
int
xfs_setfilesize
(
struct
xfs_inode
*
ip
,
xfs_off_t
offset
,
size_t
size
);
extern
void
xfs_count_page_state
(
struct
page
*
,
int
*
,
int
*
);
extern
struct
block_device
*
xfs_find_bdev_for_inode
(
struct
inode
*
);
...
...
fs/xfs/xfs_file.c
View file @
a1f45e66
...
...
@@ -333,10 +333,7 @@ xfs_file_dax_read(
struct
kiocb
*
iocb
,
struct
iov_iter
*
to
)
{
struct
address_space
*
mapping
=
iocb
->
ki_filp
->
f_mapping
;
struct
inode
*
inode
=
mapping
->
host
;
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
struct
iov_iter
data
=
*
to
;
struct
xfs_inode
*
ip
=
XFS_I
(
iocb
->
ki_filp
->
f_mapping
->
host
);
size_t
count
=
iov_iter_count
(
to
);
ssize_t
ret
=
0
;
...
...
@@ -346,11 +343,7 @@ xfs_file_dax_read(
return
0
;
/* skip atime */
xfs_rw_ilock
(
ip
,
XFS_IOLOCK_SHARED
);
ret
=
dax_do_io
(
iocb
,
inode
,
&
data
,
xfs_get_blocks_direct
,
NULL
,
0
);
if
(
ret
>
0
)
{
iocb
->
ki_pos
+=
ret
;
iov_iter_advance
(
to
,
ret
);
}
ret
=
iomap_dax_rw
(
iocb
,
to
,
&
xfs_iomap_ops
);
xfs_rw_iunlock
(
ip
,
XFS_IOLOCK_SHARED
);
file_accessed
(
iocb
->
ki_filp
);
...
...
@@ -712,70 +705,32 @@ xfs_file_dax_write(
struct
kiocb
*
iocb
,
struct
iov_iter
*
from
)
{
struct
address_space
*
mapping
=
iocb
->
ki_filp
->
f_mapping
;
struct
inode
*
inode
=
mapping
->
host
;
struct
inode
*
inode
=
iocb
->
ki_filp
->
f_mapping
->
host
;
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
ssize_t
ret
=
0
;
int
unaligned_io
=
0
;
int
iolock
;
struct
iov_iter
data
;
int
iolock
=
XFS_IOLOCK_EXCL
;
ssize_t
ret
,
error
=
0
;
size_t
count
;
loff_t
pos
;
/* "unaligned" here means not aligned to a filesystem block */
if
((
iocb
->
ki_pos
&
mp
->
m_blockmask
)
||
((
iocb
->
ki_pos
+
iov_iter_count
(
from
))
&
mp
->
m_blockmask
))
{
unaligned_io
=
1
;
iolock
=
XFS_IOLOCK_EXCL
;
}
else
if
(
mapping
->
nrpages
)
{
iolock
=
XFS_IOLOCK_EXCL
;
}
else
{
iolock
=
XFS_IOLOCK_SHARED
;
}
xfs_rw_ilock
(
ip
,
iolock
);
ret
=
xfs_file_aio_write_checks
(
iocb
,
from
,
&
iolock
);
if
(
ret
)
goto
out
;
/*
* Yes, even DAX files can have page cache attached to them: A zeroed
* page is inserted into the pagecache when we have to serve a write
* fault on a hole. It should never be dirtied and can simply be
* dropped from the pagecache once we get real data for the page.
*
* XXX: This is racy against mmap, and there's nothing we can do about
* it. dax_do_io() should really do this invalidation internally as
* it will know if we've allocated over a holei for this specific IO and
* if so it needs to update the mapping tree and invalidate existing
* PTEs over the newly allocated range. Remove this invalidation when
* dax_do_io() is fixed up.
*/
if
(
mapping
->
nrpages
)
{
loff_t
end
=
iocb
->
ki_pos
+
iov_iter_count
(
from
)
-
1
;
pos
=
iocb
->
ki_pos
;
count
=
iov_iter_count
(
from
);
ret
=
invalidate_inode_pages2_range
(
mapping
,
iocb
->
ki_pos
>>
PAGE_SHIFT
,
end
>>
PAGE_SHIFT
);
WARN_ON_ONCE
(
ret
);
}
trace_xfs_file_dax_write
(
ip
,
count
,
pos
);
if
(
iolock
==
XFS_IOLOCK_EXCL
&&
!
unaligned_io
)
{
xfs_rw_ilock_demote
(
ip
,
XFS_IOLOCK_EXCL
);
iolock
=
XFS_IOLOCK_SHARED
;
ret
=
iomap_dax_rw
(
iocb
,
from
,
&
xfs_iomap_ops
);
if
(
ret
>
0
&&
iocb
->
ki_pos
>
i_size_read
(
inode
))
{
i_size_write
(
inode
,
iocb
->
ki_pos
);
error
=
xfs_setfilesize
(
ip
,
pos
,
ret
);
}
trace_xfs_file_dax_write
(
ip
,
iov_iter_count
(
from
),
iocb
->
ki_pos
);
data
=
*
from
;
ret
=
dax_do_io
(
iocb
,
inode
,
&
data
,
xfs_get_blocks_direct
,
xfs_end_io_direct_write
,
0
);
if
(
ret
>
0
)
{
iocb
->
ki_pos
+=
ret
;
iov_iter_advance
(
from
,
ret
);
}
out:
xfs_rw_iunlock
(
ip
,
iolock
);
return
ret
;
return
error
?
error
:
ret
;
}
STATIC
ssize_t
...
...
@@ -1514,7 +1469,7 @@ xfs_filemap_page_mkwrite(
xfs_ilock
(
XFS_I
(
inode
),
XFS_MMAPLOCK_SHARED
);
if
(
IS_DAX
(
inode
))
{
ret
=
dax_mkwrite
(
vma
,
vmf
,
xfs_get_blocks_dax_fault
);
ret
=
iomap_dax_fault
(
vma
,
vmf
,
&
xfs_iomap_ops
);
}
else
{
ret
=
iomap_page_mkwrite
(
vma
,
vmf
,
&
xfs_iomap_ops
);
ret
=
block_page_mkwrite_return
(
ret
);
...
...
@@ -1548,7 +1503,7 @@ xfs_filemap_fault(
* changes to xfs_get_blocks_direct() to map unwritten extent
* ioend for conversion on read-only mappings.
*/
ret
=
dax_fault
(
vma
,
vmf
,
xfs_get_blocks_dax_fault
);
ret
=
iomap_dax_fault
(
vma
,
vmf
,
&
xfs_iomap_ops
);
}
else
ret
=
filemap_fault
(
vma
,
vmf
);
xfs_iunlock
(
XFS_I
(
inode
),
XFS_MMAPLOCK_SHARED
);
...
...
fs/xfs/xfs_iomap.c
View file @
a1f45e66
...
...
@@ -934,11 +934,13 @@ xfs_iomap_write_unwritten(
return
error
;
}
static
inline
bool
imap_needs_alloc
(
struct
xfs_bmbt_irec
*
imap
,
int
nimaps
)
static
inline
bool
imap_needs_alloc
(
struct
inode
*
inode
,
struct
xfs_bmbt_irec
*
imap
,
int
nimaps
)
{
return
!
nimaps
||
imap
->
br_startblock
==
HOLESTARTBLOCK
||
imap
->
br_startblock
==
DELAYSTARTBLOCK
;
imap
->
br_startblock
==
DELAYSTARTBLOCK
||
(
IS_DAX
(
inode
)
&&
ISUNWRITTEN
(
imap
));
}
static
int
...
...
@@ -954,16 +956,18 @@ xfs_file_iomap_begin(
struct
xfs_bmbt_irec
imap
;
xfs_fileoff_t
offset_fsb
,
end_fsb
;
int
nimaps
=
1
,
error
=
0
;
unsigned
lockmode
;
if
(
XFS_FORCED_SHUTDOWN
(
mp
))
return
-
EIO
;
if
((
flags
&
IOMAP_WRITE
)
&&
!
xfs_get_extsz_hint
(
ip
))
{
if
((
flags
&
IOMAP_WRITE
)
&&
!
IS_DAX
(
inode
)
&&
!
xfs_get_extsz_hint
(
ip
))
{
return
xfs_file_iomap_begin_delay
(
inode
,
offset
,
length
,
flags
,
iomap
);
}
xfs_ilock
(
ip
,
XFS_ILOCK_EXCL
);
lockmode
=
xfs_ilock_data_map_shared
(
ip
);
ASSERT
(
offset
<=
mp
->
m_super
->
s_maxbytes
);
if
((
xfs_fsize_t
)
offset
+
length
>
mp
->
m_super
->
s_maxbytes
)
...
...
@@ -974,11 +978,11 @@ xfs_file_iomap_begin(
error
=
xfs_bmapi_read
(
ip
,
offset_fsb
,
end_fsb
-
offset_fsb
,
&
imap
,
&
nimaps
,
XFS_BMAPI_ENTIRE
);
if
(
error
)
{
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
xfs_iunlock
(
ip
,
lockmode
);
return
error
;
}
if
((
flags
&
IOMAP_WRITE
)
&&
imap_needs_alloc
(
&
imap
,
nimaps
))
{
if
((
flags
&
IOMAP_WRITE
)
&&
imap_needs_alloc
(
inode
,
&
imap
,
nimaps
))
{
/*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
...
...
@@ -994,17 +998,19 @@ xfs_file_iomap_begin(
* xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return.
*/
xfs_ilock_demote
(
ip
,
XFS_ILOCK_EXCL
);
if
(
lockmode
==
XFS_ILOCK_EXCL
)
xfs_ilock_demote
(
ip
,
lockmode
);
error
=
xfs_iomap_write_direct
(
ip
,
offset
,
length
,
&
imap
,
nimaps
);
if
(
error
)
return
error
;
iomap
->
flags
=
IOMAP_F_NEW
;
trace_xfs_iomap_alloc
(
ip
,
offset
,
length
,
0
,
&
imap
);
}
else
{
ASSERT
(
nimaps
);
xfs_iunlock
(
ip
,
XFS_ILOCK_EXCL
);
xfs_iunlock
(
ip
,
lockmode
);
trace_xfs_iomap_found
(
ip
,
offset
,
length
,
0
,
&
imap
);
}
...
...
include/linux/dax.h
View file @
a1f45e66
...
...
@@ -6,13 +6,19 @@
#include <linux/radix-tree.h>
#include <asm/pgtable.h>
struct
iomap_ops
;
/* We use lowest available exceptional entry bit for locking */
#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
ssize_t
iomap_dax_rw
(
struct
kiocb
*
iocb
,
struct
iov_iter
*
iter
,
struct
iomap_ops
*
ops
);
ssize_t
dax_do_io
(
struct
kiocb
*
,
struct
inode
*
,
struct
iov_iter
*
,
get_block_t
,
dio_iodone_t
,
int
flags
);
int
dax_zero_page_range
(
struct
inode
*
,
loff_t
from
,
unsigned
len
,
get_block_t
);
int
dax_truncate_page
(
struct
inode
*
,
loff_t
from
,
get_block_t
);
int
iomap_dax_fault
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
,
struct
iomap_ops
*
ops
);
int
dax_fault
(
struct
vm_area_struct
*
,
struct
vm_fault
*
,
get_block_t
);
int
dax_delete_mapping_entry
(
struct
address_space
*
mapping
,
pgoff_t
index
);
void
dax_wake_mapping_entry_waiter
(
struct
address_space
*
mapping
,
...
...
include/linux/iomap.h
View file @
a1f45e66
...
...
@@ -23,6 +23,7 @@ struct vm_fault;
*/
#define IOMAP_F_MERGED 0x01
/* contains multiple blocks/extents */
#define IOMAP_F_SHARED 0x02
/* block shared with another file */
#define IOMAP_F_NEW 0x04
/* blocks have been newly allocated */
/*
* Magic value for blkno:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment