Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
c2efdfc1
Commit
c2efdfc1
authored
Jul 11, 2018
by
Darrick J. Wong
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'iomap-4.19-merge' into xfs-4.19-merge
parents
1e4b044d
806a1477
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
536 additions
and
96 deletions
+536
-96
fs/buffer.c
fs/buffer.c
+40
-36
fs/internal.h
fs/internal.h
+2
-0
fs/iomap.c
fs/iomap.c
+474
-58
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.c
+4
-2
include/linux/iomap.h
include/linux/iomap.h
+16
-0
No files found.
fs/buffer.c
View file @
c2efdfc1
...
...
@@ -1900,15 +1900,16 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
break
;
case
IOMAP_UNWRITTEN
:
/*
* For unwritten regions, we always need to ensure that
*
sub-block writes cause the regions in the block we are not
*
writing to are zeroed. Set the
buffer as new to ensure this.
* For unwritten regions, we always need to ensure that
regions
*
in the block we are not writing to are zeroed. Mark the
* buffer as new to ensure this.
*/
set_buffer_new
(
bh
);
set_buffer_unwritten
(
bh
);
/* FALLTHRU */
case
IOMAP_MAPPED
:
if
(
offset
>=
i_size_read
(
inode
))
if
((
iomap
->
flags
&
IOMAP_F_NEW
)
||
offset
>=
i_size_read
(
inode
))
set_buffer_new
(
bh
);
bh
->
b_blocknr
=
(
iomap
->
addr
+
offset
-
iomap
->
offset
)
>>
inode
->
i_blkbits
;
...
...
@@ -2076,6 +2077,40 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
}
EXPORT_SYMBOL
(
block_write_begin
);
int
__generic_write_end
(
struct
inode
*
inode
,
loff_t
pos
,
unsigned
copied
,
struct
page
*
page
)
{
loff_t
old_size
=
inode
->
i_size
;
bool
i_size_changed
=
false
;
/*
* No need to use i_size_read() here, the i_size cannot change under us
* because we hold i_rwsem.
*
* But it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
if
(
pos
+
copied
>
inode
->
i_size
)
{
i_size_write
(
inode
,
pos
+
copied
);
i_size_changed
=
true
;
}
unlock_page
(
page
);
put_page
(
page
);
if
(
old_size
<
pos
)
pagecache_isize_extended
(
inode
,
old_size
,
pos
);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
* ordering of page lock and transaction start for journaling
* filesystems.
*/
if
(
i_size_changed
)
mark_inode_dirty
(
inode
);
return
copied
;
}
int
block_write_end
(
struct
file
*
file
,
struct
address_space
*
mapping
,
loff_t
pos
,
unsigned
len
,
unsigned
copied
,
struct
page
*
page
,
void
*
fsdata
)
...
...
@@ -2116,39 +2151,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
loff_t
pos
,
unsigned
len
,
unsigned
copied
,
struct
page
*
page
,
void
*
fsdata
)
{
struct
inode
*
inode
=
mapping
->
host
;
loff_t
old_size
=
inode
->
i_size
;
int
i_size_changed
=
0
;
copied
=
block_write_end
(
file
,
mapping
,
pos
,
len
,
copied
,
page
,
fsdata
);
/*
* No need to use i_size_read() here, the i_size
* cannot change under us because we hold i_mutex.
*
* But it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
if
(
pos
+
copied
>
inode
->
i_size
)
{
i_size_write
(
inode
,
pos
+
copied
);
i_size_changed
=
1
;
}
unlock_page
(
page
);
put_page
(
page
);
if
(
old_size
<
pos
)
pagecache_isize_extended
(
inode
,
old_size
,
pos
);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
* ordering of page lock and transaction start for journaling
* filesystems.
*/
if
(
i_size_changed
)
mark_inode_dirty
(
inode
);
return
copied
;
return
__generic_write_end
(
mapping
->
host
,
pos
,
copied
,
page
);
}
EXPORT_SYMBOL
(
generic_write_end
);
...
...
fs/internal.h
View file @
c2efdfc1
...
...
@@ -43,6 +43,8 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
extern
void
guard_bio_eod
(
int
rw
,
struct
bio
*
bio
);
extern
int
__block_write_begin_int
(
struct
page
*
page
,
loff_t
pos
,
unsigned
len
,
get_block_t
*
get_block
,
struct
iomap
*
iomap
);
int
__generic_write_end
(
struct
inode
*
inode
,
loff_t
pos
,
unsigned
copied
,
struct
page
*
page
);
/*
* char_dev.c
...
...
fs/iomap.c
View file @
c2efdfc1
/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (c) 2016 Christoph Hellwig.
* Copyright (c) 2016
-2018
Christoph Hellwig.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
...
...
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
...
...
@@ -103,6 +104,243 @@ iomap_sector(struct iomap *iomap, loff_t pos)
return
(
iomap
->
addr
+
pos
-
iomap
->
offset
)
>>
SECTOR_SHIFT
;
}
static
void
iomap_read_inline_data
(
struct
inode
*
inode
,
struct
page
*
page
,
struct
iomap
*
iomap
)
{
size_t
size
=
i_size_read
(
inode
);
void
*
addr
;
if
(
PageUptodate
(
page
))
return
;
BUG_ON
(
page
->
index
);
BUG_ON
(
size
>
PAGE_SIZE
-
offset_in_page
(
iomap
->
inline_data
));
addr
=
kmap_atomic
(
page
);
memcpy
(
addr
,
iomap
->
inline_data
,
size
);
memset
(
addr
+
size
,
0
,
PAGE_SIZE
-
size
);
kunmap_atomic
(
addr
);
SetPageUptodate
(
page
);
}
static
void
iomap_read_end_io
(
struct
bio
*
bio
)
{
int
error
=
blk_status_to_errno
(
bio
->
bi_status
);
struct
bio_vec
*
bvec
;
int
i
;
bio_for_each_segment_all
(
bvec
,
bio
,
i
)
page_endio
(
bvec
->
bv_page
,
false
,
error
);
bio_put
(
bio
);
}
struct
iomap_readpage_ctx
{
struct
page
*
cur_page
;
bool
cur_page_in_bio
;
bool
is_readahead
;
struct
bio
*
bio
;
struct
list_head
*
pages
;
};
static
loff_t
iomap_readpage_actor
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
void
*
data
,
struct
iomap
*
iomap
)
{
struct
iomap_readpage_ctx
*
ctx
=
data
;
struct
page
*
page
=
ctx
->
cur_page
;
unsigned
poff
=
pos
&
(
PAGE_SIZE
-
1
);
unsigned
plen
=
min_t
(
loff_t
,
PAGE_SIZE
-
poff
,
length
);
bool
is_contig
=
false
;
sector_t
sector
;
if
(
iomap
->
type
==
IOMAP_INLINE
)
{
WARN_ON_ONCE
(
poff
);
iomap_read_inline_data
(
inode
,
page
,
iomap
);
return
PAGE_SIZE
;
}
/* we don't support blocksize < PAGE_SIZE quite yet. */
WARN_ON_ONCE
(
pos
!=
page_offset
(
page
));
WARN_ON_ONCE
(
plen
!=
PAGE_SIZE
);
if
(
iomap
->
type
!=
IOMAP_MAPPED
||
pos
>=
i_size_read
(
inode
))
{
zero_user
(
page
,
poff
,
plen
);
SetPageUptodate
(
page
);
goto
done
;
}
ctx
->
cur_page_in_bio
=
true
;
/*
* Try to merge into a previous segment if we can.
*/
sector
=
iomap_sector
(
iomap
,
pos
);
if
(
ctx
->
bio
&&
bio_end_sector
(
ctx
->
bio
)
==
sector
)
{
if
(
__bio_try_merge_page
(
ctx
->
bio
,
page
,
plen
,
poff
))
goto
done
;
is_contig
=
true
;
}
if
(
!
ctx
->
bio
||
!
is_contig
||
bio_full
(
ctx
->
bio
))
{
gfp_t
gfp
=
mapping_gfp_constraint
(
page
->
mapping
,
GFP_KERNEL
);
int
nr_vecs
=
(
length
+
PAGE_SIZE
-
1
)
>>
PAGE_SHIFT
;
if
(
ctx
->
bio
)
submit_bio
(
ctx
->
bio
);
if
(
ctx
->
is_readahead
)
/* same as readahead_gfp_mask */
gfp
|=
__GFP_NORETRY
|
__GFP_NOWARN
;
ctx
->
bio
=
bio_alloc
(
gfp
,
min
(
BIO_MAX_PAGES
,
nr_vecs
));
ctx
->
bio
->
bi_opf
=
REQ_OP_READ
;
if
(
ctx
->
is_readahead
)
ctx
->
bio
->
bi_opf
|=
REQ_RAHEAD
;
ctx
->
bio
->
bi_iter
.
bi_sector
=
sector
;
bio_set_dev
(
ctx
->
bio
,
iomap
->
bdev
);
ctx
->
bio
->
bi_end_io
=
iomap_read_end_io
;
}
__bio_add_page
(
ctx
->
bio
,
page
,
plen
,
poff
);
done:
return
plen
;
}
int
iomap_readpage
(
struct
page
*
page
,
const
struct
iomap_ops
*
ops
)
{
struct
iomap_readpage_ctx
ctx
=
{
.
cur_page
=
page
};
struct
inode
*
inode
=
page
->
mapping
->
host
;
unsigned
poff
;
loff_t
ret
;
WARN_ON_ONCE
(
page_has_buffers
(
page
));
for
(
poff
=
0
;
poff
<
PAGE_SIZE
;
poff
+=
ret
)
{
ret
=
iomap_apply
(
inode
,
page_offset
(
page
)
+
poff
,
PAGE_SIZE
-
poff
,
0
,
ops
,
&
ctx
,
iomap_readpage_actor
);
if
(
ret
<=
0
)
{
WARN_ON_ONCE
(
ret
==
0
);
SetPageError
(
page
);
break
;
}
}
if
(
ctx
.
bio
)
{
submit_bio
(
ctx
.
bio
);
WARN_ON_ONCE
(
!
ctx
.
cur_page_in_bio
);
}
else
{
WARN_ON_ONCE
(
ctx
.
cur_page_in_bio
);
unlock_page
(
page
);
}
/*
* Just like mpage_readpages and block_read_full_page we always
* return 0 and just mark the page as PageError on errors. This
* should be cleaned up all through the stack eventually.
*/
return
0
;
}
EXPORT_SYMBOL_GPL
(
iomap_readpage
);
static
struct
page
*
iomap_next_page
(
struct
inode
*
inode
,
struct
list_head
*
pages
,
loff_t
pos
,
loff_t
length
,
loff_t
*
done
)
{
while
(
!
list_empty
(
pages
))
{
struct
page
*
page
=
lru_to_page
(
pages
);
if
(
page_offset
(
page
)
>=
(
u64
)
pos
+
length
)
break
;
list_del
(
&
page
->
lru
);
if
(
!
add_to_page_cache_lru
(
page
,
inode
->
i_mapping
,
page
->
index
,
GFP_NOFS
))
return
page
;
/*
* If we already have a page in the page cache at index we are
* done. Upper layers don't care if it is uptodate after the
* readpages call itself as every page gets checked again once
* actually needed.
*/
*
done
+=
PAGE_SIZE
;
put_page
(
page
);
}
return
NULL
;
}
static
loff_t
iomap_readpages_actor
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
void
*
data
,
struct
iomap
*
iomap
)
{
struct
iomap_readpage_ctx
*
ctx
=
data
;
loff_t
done
,
ret
;
for
(
done
=
0
;
done
<
length
;
done
+=
ret
)
{
if
(
ctx
->
cur_page
&&
((
pos
+
done
)
&
(
PAGE_SIZE
-
1
))
==
0
)
{
if
(
!
ctx
->
cur_page_in_bio
)
unlock_page
(
ctx
->
cur_page
);
put_page
(
ctx
->
cur_page
);
ctx
->
cur_page
=
NULL
;
}
if
(
!
ctx
->
cur_page
)
{
ctx
->
cur_page
=
iomap_next_page
(
inode
,
ctx
->
pages
,
pos
,
length
,
&
done
);
if
(
!
ctx
->
cur_page
)
break
;
ctx
->
cur_page_in_bio
=
false
;
}
ret
=
iomap_readpage_actor
(
inode
,
pos
+
done
,
length
-
done
,
ctx
,
iomap
);
}
return
done
;
}
int
iomap_readpages
(
struct
address_space
*
mapping
,
struct
list_head
*
pages
,
unsigned
nr_pages
,
const
struct
iomap_ops
*
ops
)
{
struct
iomap_readpage_ctx
ctx
=
{
.
pages
=
pages
,
.
is_readahead
=
true
,
};
loff_t
pos
=
page_offset
(
list_entry
(
pages
->
prev
,
struct
page
,
lru
));
loff_t
last
=
page_offset
(
list_entry
(
pages
->
next
,
struct
page
,
lru
));
loff_t
length
=
last
-
pos
+
PAGE_SIZE
,
ret
=
0
;
while
(
length
>
0
)
{
ret
=
iomap_apply
(
mapping
->
host
,
pos
,
length
,
0
,
ops
,
&
ctx
,
iomap_readpages_actor
);
if
(
ret
<=
0
)
{
WARN_ON_ONCE
(
ret
==
0
);
goto
done
;
}
pos
+=
ret
;
length
-=
ret
;
}
ret
=
0
;
done:
if
(
ctx
.
bio
)
submit_bio
(
ctx
.
bio
);
if
(
ctx
.
cur_page
)
{
if
(
!
ctx
.
cur_page_in_bio
)
unlock_page
(
ctx
.
cur_page
);
put_page
(
ctx
.
cur_page
);
}
/*
* Check that we didn't lose a page due to the arcance calling
* conventions..
*/
WARN_ON_ONCE
(
!
ret
&&
!
list_empty
(
ctx
.
pages
));
return
ret
;
}
EXPORT_SYMBOL_GPL
(
iomap_readpages
);
static
void
iomap_write_failed
(
struct
inode
*
inode
,
loff_t
pos
,
unsigned
len
)
{
...
...
@@ -116,6 +354,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
truncate_pagecache_range
(
inode
,
max
(
pos
,
i_size
),
pos
+
len
);
}
static
int
iomap_read_page_sync
(
struct
inode
*
inode
,
loff_t
block_start
,
struct
page
*
page
,
unsigned
poff
,
unsigned
plen
,
unsigned
from
,
unsigned
to
,
struct
iomap
*
iomap
)
{
struct
bio_vec
bvec
;
struct
bio
bio
;
if
(
iomap
->
type
!=
IOMAP_MAPPED
||
block_start
>=
i_size_read
(
inode
))
{
zero_user_segments
(
page
,
poff
,
from
,
to
,
poff
+
plen
);
return
0
;
}
bio_init
(
&
bio
,
&
bvec
,
1
);
bio
.
bi_opf
=
REQ_OP_READ
;
bio
.
bi_iter
.
bi_sector
=
iomap_sector
(
iomap
,
block_start
);
bio_set_dev
(
&
bio
,
iomap
->
bdev
);
__bio_add_page
(
&
bio
,
page
,
plen
,
poff
);
return
submit_bio_wait
(
&
bio
);
}
static
int
__iomap_write_begin
(
struct
inode
*
inode
,
loff_t
pos
,
unsigned
len
,
struct
page
*
page
,
struct
iomap
*
iomap
)
{
loff_t
block_size
=
i_blocksize
(
inode
);
loff_t
block_start
=
pos
&
~
(
block_size
-
1
);
loff_t
block_end
=
(
pos
+
len
+
block_size
-
1
)
&
~
(
block_size
-
1
);
unsigned
poff
=
block_start
&
(
PAGE_SIZE
-
1
);
unsigned
plen
=
min_t
(
loff_t
,
PAGE_SIZE
-
poff
,
block_end
-
block_start
);
unsigned
from
=
pos
&
(
PAGE_SIZE
-
1
),
to
=
from
+
len
;
WARN_ON_ONCE
(
i_blocksize
(
inode
)
<
PAGE_SIZE
);
if
(
PageUptodate
(
page
))
return
0
;
if
(
from
<=
poff
&&
to
>=
poff
+
plen
)
return
0
;
return
iomap_read_page_sync
(
inode
,
block_start
,
page
,
poff
,
plen
,
from
,
to
,
iomap
);
}
static
int
iomap_write_begin
(
struct
inode
*
inode
,
loff_t
pos
,
unsigned
len
,
unsigned
flags
,
struct
page
**
pagep
,
struct
iomap
*
iomap
)
...
...
@@ -133,7 +413,12 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
if
(
!
page
)
return
-
ENOMEM
;
status
=
__block_write_begin_int
(
page
,
pos
,
len
,
NULL
,
iomap
);
if
(
iomap
->
type
==
IOMAP_INLINE
)
iomap_read_inline_data
(
inode
,
page
,
iomap
);
else
if
(
iomap
->
flags
&
IOMAP_F_BUFFER_HEAD
)
status
=
__block_write_begin_int
(
page
,
pos
,
len
,
NULL
,
iomap
);
else
status
=
__iomap_write_begin
(
inode
,
pos
,
len
,
page
,
iomap
);
if
(
unlikely
(
status
))
{
unlock_page
(
page
);
put_page
(
page
);
...
...
@@ -146,14 +431,93 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
return
status
;
}
int
iomap_set_page_dirty
(
struct
page
*
page
)
{
struct
address_space
*
mapping
=
page_mapping
(
page
);
int
newly_dirty
;
if
(
unlikely
(
!
mapping
))
return
!
TestSetPageDirty
(
page
);
/*
* Lock out page->mem_cgroup migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
lock_page_memcg
(
page
);
newly_dirty
=
!
TestSetPageDirty
(
page
);
if
(
newly_dirty
)
__set_page_dirty
(
page
,
mapping
,
0
);
unlock_page_memcg
(
page
);
if
(
newly_dirty
)
__mark_inode_dirty
(
mapping
->
host
,
I_DIRTY_PAGES
);
return
newly_dirty
;
}
EXPORT_SYMBOL_GPL
(
iomap_set_page_dirty
);
static
int
__iomap_write_end
(
struct
inode
*
inode
,
loff_t
pos
,
unsigned
len
,
unsigned
copied
,
struct
page
*
page
,
struct
iomap
*
iomap
)
{
flush_dcache_page
(
page
);
/*
* The blocks that were entirely written will now be uptodate, so we
* don't have to worry about a readpage reading them and overwriting a
* partial write. However if we have encountered a short write and only
* partially written into a block, it will not be marked uptodate, so a
* readpage might come in and destroy our partial write.
*
* Do the simplest thing, and just treat any short write to a non
* uptodate page as a zero-length write, and force the caller to redo
* the whole thing.
*/
if
(
unlikely
(
copied
<
len
&&
!
PageUptodate
(
page
)))
{
copied
=
0
;
}
else
{
SetPageUptodate
(
page
);
iomap_set_page_dirty
(
page
);
}
return
__generic_write_end
(
inode
,
pos
,
copied
,
page
);
}
static
int
iomap_write_end_inline
(
struct
inode
*
inode
,
struct
page
*
page
,
struct
iomap
*
iomap
,
loff_t
pos
,
unsigned
copied
)
{
void
*
addr
;
WARN_ON_ONCE
(
!
PageUptodate
(
page
));
BUG_ON
(
pos
+
copied
>
PAGE_SIZE
-
offset_in_page
(
iomap
->
inline_data
));
addr
=
kmap_atomic
(
page
);
memcpy
(
iomap
->
inline_data
+
pos
,
addr
+
pos
,
copied
);
kunmap_atomic
(
addr
);
mark_inode_dirty
(
inode
);
__generic_write_end
(
inode
,
pos
,
copied
,
page
);
return
copied
;
}
static
int
iomap_write_end
(
struct
inode
*
inode
,
loff_t
pos
,
unsigned
len
,
unsigned
copied
,
struct
page
*
page
)
unsigned
copied
,
struct
page
*
page
,
struct
iomap
*
iomap
)
{
int
ret
;
ret
=
generic_write_end
(
NULL
,
inode
->
i_mapping
,
pos
,
len
,
copied
,
page
,
NULL
);
if
(
iomap
->
type
==
IOMAP_INLINE
)
{
ret
=
iomap_write_end_inline
(
inode
,
page
,
iomap
,
pos
,
copied
);
}
else
if
(
iomap
->
flags
&
IOMAP_F_BUFFER_HEAD
)
{
ret
=
generic_write_end
(
NULL
,
inode
->
i_mapping
,
pos
,
len
,
copied
,
page
,
NULL
);
}
else
{
ret
=
__iomap_write_end
(
inode
,
pos
,
len
,
copied
,
page
,
iomap
);
}
if
(
iomap
->
page_done
)
iomap
->
page_done
(
inode
,
pos
,
copied
,
page
,
iomap
);
if
(
ret
<
len
)
iomap_write_failed
(
inode
,
pos
,
len
);
return
ret
;
...
...
@@ -208,7 +572,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
flush_dcache_page
(
page
);
status
=
iomap_write_end
(
inode
,
pos
,
bytes
,
copied
,
page
);
status
=
iomap_write_end
(
inode
,
pos
,
bytes
,
copied
,
page
,
iomap
);
if
(
unlikely
(
status
<
0
))
break
;
copied
=
status
;
...
...
@@ -302,7 +667,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
WARN_ON_ONCE
(
!
PageUptodate
(
page
));
status
=
iomap_write_end
(
inode
,
pos
,
bytes
,
bytes
,
page
);
status
=
iomap_write_end
(
inode
,
pos
,
bytes
,
bytes
,
page
,
iomap
);
if
(
unlikely
(
status
<=
0
))
{
if
(
WARN_ON_ONCE
(
status
==
0
))
return
-
EIO
;
...
...
@@ -354,7 +719,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
zero_user
(
page
,
offset
,
bytes
);
mark_page_accessed
(
page
);
return
iomap_write_end
(
inode
,
pos
,
bytes
,
bytes
,
page
);
return
iomap_write_end
(
inode
,
pos
,
bytes
,
bytes
,
page
,
iomap
);
}
static
int
iomap_dax_zero
(
loff_t
pos
,
unsigned
offset
,
unsigned
bytes
,
...
...
@@ -440,11 +805,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
struct
page
*
page
=
data
;
int
ret
;
ret
=
__block_write_begin_int
(
page
,
pos
,
length
,
NULL
,
iomap
);
if
(
ret
)
return
ret
;
if
(
iomap
->
flags
&
IOMAP_F_BUFFER_HEAD
)
{
ret
=
__block_write_begin_int
(
page
,
pos
,
length
,
NULL
,
iomap
);
if
(
ret
)
return
ret
;
block_commit_write
(
page
,
0
,
length
);
}
else
{
WARN_ON_ONCE
(
!
PageUptodate
(
page
));
WARN_ON_ONCE
(
i_blocksize
(
inode
)
<
PAGE_SIZE
);
}
block_commit_write
(
page
,
0
,
length
);
return
length
;
}
...
...
@@ -811,6 +1181,7 @@ struct iomap_dio {
atomic_t
ref
;
unsigned
flags
;
int
error
;
bool
wait_for_completion
;
union
{
/* used during submission and for synchronous completion: */
...
...
@@ -914,9 +1285,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
iomap_dio_set_error
(
dio
,
blk_status_to_errno
(
bio
->
bi_status
));
if
(
atomic_dec_and_test
(
&
dio
->
ref
))
{
if
(
is_sync_kiocb
(
dio
->
iocb
)
)
{
if
(
dio
->
wait_for_completion
)
{
struct
task_struct
*
waiter
=
dio
->
submit
.
waiter
;
WRITE_ONCE
(
dio
->
submit
.
waiter
,
NULL
);
wake_up_process
(
waiter
);
}
else
if
(
dio
->
flags
&
IOMAP_DIO_WRITE
)
{
...
...
@@ -963,10 +1333,9 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
}
static
loff_t
iomap_dio_actor
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
void
*
data
,
struct
iomap
*
iomap
)
iomap_dio_
bio_
actor
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
struct
iomap_dio
*
dio
,
struct
iomap
*
iomap
)
{
struct
iomap_dio
*
dio
=
data
;
unsigned
int
blkbits
=
blksize_bits
(
bdev_logical_block_size
(
iomap
->
bdev
));
unsigned
int
fs_block_size
=
i_blocksize
(
inode
),
pad
;
unsigned
int
align
=
iov_iter_alignment
(
dio
->
submit
.
iter
);
...
...
@@ -980,41 +1349,27 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
if
((
pos
|
length
|
align
)
&
((
1
<<
blkbits
)
-
1
))
return
-
EINVAL
;
switch
(
iomap
->
type
)
{
case
IOMAP_HOLE
:
if
(
WARN_ON_ONCE
(
dio
->
flags
&
IOMAP_DIO_WRITE
))
return
-
EIO
;
/*FALLTHRU*/
case
IOMAP_UNWRITTEN
:
if
(
!
(
dio
->
flags
&
IOMAP_DIO_WRITE
))
{
length
=
iov_iter_zero
(
length
,
dio
->
submit
.
iter
);
dio
->
size
+=
length
;
return
length
;
}
if
(
iomap
->
type
==
IOMAP_UNWRITTEN
)
{
dio
->
flags
|=
IOMAP_DIO_UNWRITTEN
;
need_zeroout
=
true
;
break
;
case
IOMAP_MAPPED
:
if
(
iomap
->
flags
&
IOMAP_F_SHARED
)
dio
->
flags
|=
IOMAP_DIO_COW
;
if
(
iomap
->
flags
&
IOMAP_F_NEW
)
{
need_zeroout
=
true
;
}
else
{
/*
* Use a FUA write if we need datasync semantics, this
* is a pure data IO that doesn't require any metadata
* updates and the underlying device supports FUA. This
* allows us to avoid cache flushes on IO completion.
*/
if
(
!
(
iomap
->
flags
&
(
IOMAP_F_SHARED
|
IOMAP_F_DIRTY
))
&&
(
dio
->
flags
&
IOMAP_DIO_WRITE_FUA
)
&&
blk_queue_fua
(
bdev_get_queue
(
iomap
->
bdev
)))
use_fua
=
true
;
}
break
;
default:
WARN_ON_ONCE
(
1
);
return
-
EIO
;
}
if
(
iomap
->
flags
&
IOMAP_F_SHARED
)
dio
->
flags
|=
IOMAP_DIO_COW
;
if
(
iomap
->
flags
&
IOMAP_F_NEW
)
{
need_zeroout
=
true
;
}
else
{
/*
* Use a FUA write if we need datasync semantics, this
* is a pure data IO that doesn't require any metadata
* updates and the underlying device supports FUA. This
* allows us to avoid cache flushes on IO completion.
*/
if
(
!
(
iomap
->
flags
&
(
IOMAP_F_SHARED
|
IOMAP_F_DIRTY
))
&&
(
dio
->
flags
&
IOMAP_DIO_WRITE_FUA
)
&&
blk_queue_fua
(
bdev_get_queue
(
iomap
->
bdev
)))
use_fua
=
true
;
}
/*
...
...
@@ -1093,6 +1448,66 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
return
copied
;
}
static
loff_t
iomap_dio_hole_actor
(
loff_t
length
,
struct
iomap_dio
*
dio
)
{
length
=
iov_iter_zero
(
length
,
dio
->
submit
.
iter
);
dio
->
size
+=
length
;
return
length
;
}
static
loff_t
iomap_dio_inline_actor
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
struct
iomap_dio
*
dio
,
struct
iomap
*
iomap
)
{
struct
iov_iter
*
iter
=
dio
->
submit
.
iter
;
size_t
copied
;
BUG_ON
(
pos
+
length
>
PAGE_SIZE
-
offset_in_page
(
iomap
->
inline_data
));
if
(
dio
->
flags
&
IOMAP_DIO_WRITE
)
{
loff_t
size
=
inode
->
i_size
;
if
(
pos
>
size
)
memset
(
iomap
->
inline_data
+
size
,
0
,
pos
-
size
);
copied
=
copy_from_iter
(
iomap
->
inline_data
+
pos
,
length
,
iter
);
if
(
copied
)
{
if
(
pos
+
copied
>
size
)
i_size_write
(
inode
,
pos
+
copied
);
mark_inode_dirty
(
inode
);
}
}
else
{
copied
=
copy_to_iter
(
iomap
->
inline_data
+
pos
,
length
,
iter
);
}
dio
->
size
+=
copied
;
return
copied
;
}
static
loff_t
iomap_dio_actor
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
length
,
void
*
data
,
struct
iomap
*
iomap
)
{
struct
iomap_dio
*
dio
=
data
;
switch
(
iomap
->
type
)
{
case
IOMAP_HOLE
:
if
(
WARN_ON_ONCE
(
dio
->
flags
&
IOMAP_DIO_WRITE
))
return
-
EIO
;
return
iomap_dio_hole_actor
(
length
,
dio
);
case
IOMAP_UNWRITTEN
:
if
(
!
(
dio
->
flags
&
IOMAP_DIO_WRITE
))
return
iomap_dio_hole_actor
(
length
,
dio
);
return
iomap_dio_bio_actor
(
inode
,
pos
,
length
,
dio
,
iomap
);
case
IOMAP_MAPPED
:
return
iomap_dio_bio_actor
(
inode
,
pos
,
length
,
dio
,
iomap
);
case
IOMAP_INLINE
:
return
iomap_dio_inline_actor
(
inode
,
pos
,
length
,
dio
,
iomap
);
default:
WARN_ON_ONCE
(
1
);
return
-
EIO
;
}
}
/*
* iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
* is being issued as AIO or not. This allows us to optimise pure data writes
...
...
@@ -1131,13 +1546,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio
->
end_io
=
end_io
;
dio
->
error
=
0
;
dio
->
flags
=
0
;
dio
->
wait_for_completion
=
is_sync_kiocb
(
iocb
);
dio
->
submit
.
iter
=
iter
;
if
(
is_sync_kiocb
(
iocb
))
{
dio
->
submit
.
waiter
=
current
;
dio
->
submit
.
cookie
=
BLK_QC_T_NONE
;
dio
->
submit
.
last_queue
=
NULL
;
}
dio
->
submit
.
waiter
=
current
;
dio
->
submit
.
cookie
=
BLK_QC_T_NONE
;
dio
->
submit
.
last_queue
=
NULL
;
if
(
iov_iter_rw
(
iter
)
==
READ
)
{
if
(
pos
>=
dio
->
i_size
)
...
...
@@ -1187,7 +1601,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio_warn_stale_pagecache
(
iocb
->
ki_filp
);
ret
=
0
;
if
(
iov_iter_rw
(
iter
)
==
WRITE
&&
!
is_sync_kiocb
(
iocb
)
&&
if
(
iov_iter_rw
(
iter
)
==
WRITE
&&
!
dio
->
wait_for_completion
&&
!
inode
->
i_sb
->
s_dio_done_wq
)
{
ret
=
sb_init_dio_done_wq
(
inode
->
i_sb
);
if
(
ret
<
0
)
...
...
@@ -1202,8 +1616,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
iomap_dio_actor
);
if
(
ret
<=
0
)
{
/* magic error code to fall back to buffered I/O */
if
(
ret
==
-
ENOTBLK
)
if
(
ret
==
-
ENOTBLK
)
{
dio
->
wait_for_completion
=
true
;
ret
=
0
;
}
break
;
}
pos
+=
ret
;
...
...
@@ -1224,7 +1640,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio
->
flags
&=
~
IOMAP_DIO_NEED_SYNC
;
if
(
!
atomic_dec_and_test
(
&
dio
->
ref
))
{
if
(
!
is_sync_kiocb
(
iocb
)
)
if
(
!
dio
->
wait_for_completion
)
return
-
EIOCBQUEUED
;
for
(;;)
{
...
...
fs/xfs/xfs_iomap.c
View file @
c2efdfc1
...
...
@@ -626,7 +626,7 @@ xfs_file_iomap_begin_delay(
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
*/
iomap
->
flags
=
IOMAP_F_NEW
;
iomap
->
flags
|
=
IOMAP_F_NEW
;
trace_xfs_iomap_alloc
(
ip
,
offset
,
count
,
0
,
&
got
);
done:
if
(
isnullstartblock
(
got
.
br_startblock
))
...
...
@@ -1032,6 +1032,8 @@ xfs_file_iomap_begin(
if
(
XFS_FORCED_SHUTDOWN
(
mp
))
return
-
EIO
;
iomap
->
flags
|=
IOMAP_F_BUFFER_HEAD
;
if
(((
flags
&
(
IOMAP_WRITE
|
IOMAP_DIRECT
))
==
IOMAP_WRITE
)
&&
!
IS_DAX
(
inode
)
&&
!
xfs_get_extsz_hint
(
ip
))
{
/* Reserve delalloc blocks for regular writeback. */
...
...
@@ -1132,7 +1134,7 @@ xfs_file_iomap_begin(
if
(
error
)
return
error
;
iomap
->
flags
=
IOMAP_F_NEW
;
iomap
->
flags
|
=
IOMAP_F_NEW
;
trace_xfs_iomap_alloc
(
ip
,
offset
,
length
,
0
,
&
imap
);
out_finish:
...
...
include/linux/iomap.h
View file @
c2efdfc1
...
...
@@ -9,6 +9,7 @@ struct fiemap_extent_info;
struct
inode
;
struct
iov_iter
;
struct
kiocb
;
struct
page
;
struct
vm_area_struct
;
struct
vm_fault
;
...
...
@@ -29,6 +30,7 @@ struct vm_fault;
*/
#define IOMAP_F_NEW 0x01
/* blocks have been newly allocated */
#define IOMAP_F_DIRTY 0x02
/* uncommitted metadata */
#define IOMAP_F_BUFFER_HEAD 0x04
/* file system requires buffer heads */
/*
* Flags that only need to be reported for IOMAP_REPORT requests:
...
...
@@ -55,6 +57,16 @@ struct iomap {
u16
flags
;
/* flags for mapping */
struct
block_device
*
bdev
;
/* block device for I/O */
struct
dax_device
*
dax_dev
;
/* dax_dev for dax operations */
void
*
inline_data
;
void
*
private
;
/* filesystem private */
/*
* Called when finished processing a page in the mapping returned in
* this iomap. At least for now this is only supported in the buffered
* write path.
*/
void
(
*
page_done
)(
struct
inode
*
inode
,
loff_t
pos
,
unsigned
copied
,
struct
page
*
page
,
struct
iomap
*
iomap
);
};
/*
...
...
@@ -88,6 +100,10 @@ struct iomap_ops {
ssize_t
iomap_file_buffered_write
(
struct
kiocb
*
iocb
,
struct
iov_iter
*
from
,
const
struct
iomap_ops
*
ops
);
int
iomap_readpage
(
struct
page
*
page
,
const
struct
iomap_ops
*
ops
);
int
iomap_readpages
(
struct
address_space
*
mapping
,
struct
list_head
*
pages
,
unsigned
nr_pages
,
const
struct
iomap_ops
*
ops
);
int
iomap_set_page_dirty
(
struct
page
*
page
);
int
iomap_file_dirty
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
len
,
const
struct
iomap_ops
*
ops
);
int
iomap_zero_range
(
struct
inode
*
inode
,
loff_t
pos
,
loff_t
len
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment