Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
b5684579
Commit
b5684579
authored
Jun 08, 2018
by
Dan Williams
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'for-4.18/dax' into libnvdimm-for-next
parents
808c340b
cc4a90ac
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
545 additions
and
295 deletions
+545
-295
drivers/dax/super.c
drivers/dax/super.c
+11
-3
drivers/nvdimm/pfn_devs.c
drivers/nvdimm/pfn_devs.c
+0
-2
drivers/nvdimm/pmem.c
drivers/nvdimm/pmem.c
+25
-0
fs/Kconfig
fs/Kconfig
+1
-0
fs/dax.c
fs/dax.c
+99
-16
fs/xfs/xfs_file.c
fs/xfs/xfs_file.c
+66
-6
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode.h
+16
-0
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl.c
+2
-6
fs/xfs/xfs_iops.c
fs/xfs/xfs_iops.c
+10
-6
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_pnfs.c
+8
-7
fs/xfs/xfs_pnfs.h
fs/xfs/xfs_pnfs.h
+3
-2
include/linux/dax.h
include/linux/dax.h
+7
-0
include/linux/memremap.h
include/linux/memremap.h
+10
-26
include/linux/mm.h
include/linux/mm.h
+53
-18
kernel/Makefile
kernel/Makefile
+2
-1
kernel/iomem.c
kernel/iomem.c
+167
-0
kernel/memremap.c
kernel/memremap.c
+30
-180
mm/Kconfig
mm/Kconfig
+5
-0
mm/gup.c
mm/gup.c
+26
-10
mm/hmm.c
mm/hmm.c
+2
-11
mm/swap.c
mm/swap.c
+2
-1
No files found.
drivers/dax/super.c
View file @
b5684579
...
...
@@ -86,6 +86,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
{
struct
block_device
*
bdev
=
sb
->
s_bdev
;
struct
dax_device
*
dax_dev
;
bool
dax_enabled
=
false
;
pgoff_t
pgoff
;
int
err
,
id
;
void
*
kaddr
;
...
...
@@ -134,14 +135,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
* on being able to do (page_address(pfn_to_page())).
*/
WARN_ON
(
IS_ENABLED
(
CONFIG_ARCH_HAS_PMEM_API
));
dax_enabled
=
true
;
}
else
if
(
pfn_t_devmap
(
pfn
))
{
/* pass */
;
}
else
{
struct
dev_pagemap
*
pgmap
;
pgmap
=
get_dev_pagemap
(
pfn_t_to_pfn
(
pfn
),
NULL
);
if
(
pgmap
&&
pgmap
->
type
==
MEMORY_DEVICE_FS_DAX
)
dax_enabled
=
true
;
put_dev_pagemap
(
pgmap
);
}
if
(
!
dax_enabled
)
{
pr_debug
(
"VFS (%s): error: dax support not enabled
\n
"
,
sb
->
s_id
);
return
-
EOPNOTSUPP
;
}
return
0
;
}
EXPORT_SYMBOL_GPL
(
__bdev_dax_supported
);
...
...
drivers/nvdimm/pfn_devs.c
View file @
b5684579
...
...
@@ -561,8 +561,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
res
->
start
+=
start_pad
;
res
->
end
-=
end_trunc
;
pgmap
->
type
=
MEMORY_DEVICE_HOST
;
if
(
nd_pfn
->
mode
==
PFN_MODE_RAM
)
{
if
(
offset
<
SZ_8K
)
return
-
EINVAL
;
...
...
drivers/nvdimm/pmem.c
View file @
b5684579
...
...
@@ -289,6 +289,27 @@ static void pmem_release_disk(void *__pmem)
put_disk
(
pmem
->
disk
);
}
static
void
pmem_release_pgmap_ops
(
void
*
__pgmap
)
{
dev_pagemap_put_ops
();
}
static
void
fsdax_pagefree
(
struct
page
*
page
,
void
*
data
)
{
wake_up_var
(
&
page
->
_refcount
);
}
static
int
setup_pagemap_fsdax
(
struct
device
*
dev
,
struct
dev_pagemap
*
pgmap
)
{
dev_pagemap_get_ops
();
if
(
devm_add_action_or_reset
(
dev
,
pmem_release_pgmap_ops
,
pgmap
))
return
-
ENOMEM
;
pgmap
->
type
=
MEMORY_DEVICE_FS_DAX
;
pgmap
->
page_free
=
fsdax_pagefree
;
return
0
;
}
static
int
pmem_attach_disk
(
struct
device
*
dev
,
struct
nd_namespace_common
*
ndns
)
{
...
...
@@ -347,6 +368,8 @@ static int pmem_attach_disk(struct device *dev,
pmem
->
pfn_flags
=
PFN_DEV
;
pmem
->
pgmap
.
ref
=
&
q
->
q_usage_counter
;
if
(
is_nd_pfn
(
dev
))
{
if
(
setup_pagemap_fsdax
(
dev
,
&
pmem
->
pgmap
))
return
-
ENOMEM
;
addr
=
devm_memremap_pages
(
dev
,
&
pmem
->
pgmap
);
pfn_sb
=
nd_pfn
->
pfn_sb
;
pmem
->
data_offset
=
le64_to_cpu
(
pfn_sb
->
dataoff
);
...
...
@@ -358,6 +381,8 @@ static int pmem_attach_disk(struct device *dev,
}
else
if
(
pmem_should_map_pages
(
dev
))
{
memcpy
(
&
pmem
->
pgmap
.
res
,
&
nsio
->
res
,
sizeof
(
pmem
->
pgmap
.
res
));
pmem
->
pgmap
.
altmap_valid
=
false
;
if
(
setup_pagemap_fsdax
(
dev
,
&
pmem
->
pgmap
))
return
-
ENOMEM
;
addr
=
devm_memremap_pages
(
dev
,
&
pmem
->
pgmap
);
pmem
->
pfn_flags
|=
PFN_MAP
;
memcpy
(
&
bb_res
,
&
pmem
->
pgmap
.
res
,
sizeof
(
bb_res
));
...
...
fs/Kconfig
View file @
b5684579
...
...
@@ -38,6 +38,7 @@ config FS_DAX
bool "Direct Access (DAX) support"
depends on MMU
depends on !(ARM || MIPS || SPARC)
select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
select FS_IOMAP
select DAX
help
...
...
fs/dax.c
View file @
b5684579
...
...
@@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
}
}
static
struct
page
*
dax_busy_page
(
void
*
entry
)
{
unsigned
long
pfn
;
for_each_mapped_pfn
(
entry
,
pfn
)
{
struct
page
*
page
=
pfn_to_page
(
pfn
);
if
(
page_ref_count
(
page
)
>
1
)
return
page
;
}
return
NULL
;
}
/*
* Find radix tree entry at given index. If it points to an exceptional entry,
* return it with the radix tree entry locked. If the radix tree doesn't
...
...
@@ -492,6 +505,90 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
return
entry
;
}
/**
* dax_layout_busy_page - find first pinned page in @mapping
* @mapping: address space to scan for a page with ref count > 1
*
* DAX requires ZONE_DEVICE mapped pages. These pages are never
* 'onlined' to the page allocator so they are considered idle when
* page->count == 1. A filesystem uses this interface to determine if
* any page in the mapping is busy, i.e. for DMA, or other
* get_user_pages() usages.
*
* It is expected that the filesystem is holding locks to block the
* establishment of new mappings in this address_space. I.e. it expects
* to be able to run unmap_mapping_range() and subsequently not race
* mapping_mapped() becoming true.
*/
struct
page
*
dax_layout_busy_page
(
struct
address_space
*
mapping
)
{
pgoff_t
indices
[
PAGEVEC_SIZE
];
struct
page
*
page
=
NULL
;
struct
pagevec
pvec
;
pgoff_t
index
,
end
;
unsigned
i
;
/*
* In the 'limited' case get_user_pages() for dax is disabled.
*/
if
(
IS_ENABLED
(
CONFIG_FS_DAX_LIMITED
))
return
NULL
;
if
(
!
dax_mapping
(
mapping
)
||
!
mapping_mapped
(
mapping
))
return
NULL
;
pagevec_init
(
&
pvec
);
index
=
0
;
end
=
-
1
;
/*
* If we race get_user_pages_fast() here either we'll see the
* elevated page count in the pagevec_lookup and wait, or
* get_user_pages_fast() will see that the page it took a reference
* against is no longer mapped in the page tables and bail to the
* get_user_pages() slow path. The slow path is protected by
* pte_lock() and pmd_lock(). New references are not taken without
* holding those locks, and unmap_mapping_range() will not zero the
* pte or pmd without holding the respective lock, so we are
* guaranteed to either see new references or prevent new
* references from being established.
*/
unmap_mapping_range
(
mapping
,
0
,
0
,
1
);
while
(
index
<
end
&&
pagevec_lookup_entries
(
&
pvec
,
mapping
,
index
,
min
(
end
-
index
,
(
pgoff_t
)
PAGEVEC_SIZE
),
indices
))
{
for
(
i
=
0
;
i
<
pagevec_count
(
&
pvec
);
i
++
)
{
struct
page
*
pvec_ent
=
pvec
.
pages
[
i
];
void
*
entry
;
index
=
indices
[
i
];
if
(
index
>=
end
)
break
;
if
(
!
radix_tree_exceptional_entry
(
pvec_ent
))
continue
;
xa_lock_irq
(
&
mapping
->
i_pages
);
entry
=
get_unlocked_mapping_entry
(
mapping
,
index
,
NULL
);
if
(
entry
)
page
=
dax_busy_page
(
entry
);
put_unlocked_mapping_entry
(
mapping
,
index
,
entry
);
xa_unlock_irq
(
&
mapping
->
i_pages
);
if
(
page
)
break
;
}
pagevec_remove_exceptionals
(
&
pvec
);
pagevec_release
(
&
pvec
);
index
++
;
if
(
page
)
break
;
}
return
page
;
}
EXPORT_SYMBOL_GPL
(
dax_layout_busy_page
);
static
int
__dax_invalidate_mapping_entry
(
struct
address_space
*
mapping
,
pgoff_t
index
,
bool
trunc
)
{
...
...
@@ -912,7 +1009,6 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
unsigned
long
vaddr
=
vmf
->
address
;
int
ret
=
VM_FAULT_NOPAGE
;
struct
page
*
zero_page
;
void
*
entry2
;
pfn_t
pfn
;
zero_page
=
ZERO_PAGE
(
0
);
...
...
@@ -922,13 +1018,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
}
pfn
=
page_to_pfn_t
(
zero_page
);
entry2
=
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
RADIX_DAX_ZERO_PAGE
,
false
);
if
(
IS_ERR
(
entry2
))
{
ret
=
VM_FAULT_SIGBUS
;
goto
out
;
}
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
RADIX_DAX_ZERO_PAGE
,
false
);
vm_insert_mixed
(
vmf
->
vma
,
vaddr
,
pfn
);
out:
trace_dax_load_hole
(
inode
,
vmf
,
ret
);
...
...
@@ -1240,10 +1331,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
entry
=
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
0
,
write
&&
!
sync
);
if
(
IS_ERR
(
entry
))
{
error
=
PTR_ERR
(
entry
);
goto
error_finish_iomap
;
}
/*
* If we are doing synchronous page fault and inode needs fsync,
...
...
@@ -1327,8 +1414,6 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
pfn
=
page_to_pfn_t
(
zero_page
);
ret
=
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
RADIX_DAX_PMD
|
RADIX_DAX_ZERO_PAGE
,
false
);
if
(
IS_ERR
(
ret
))
goto
fallback
;
ptl
=
pmd_lock
(
vmf
->
vma
->
vm_mm
,
vmf
->
pmd
);
if
(
!
pmd_none
(
*
(
vmf
->
pmd
)))
{
...
...
@@ -1450,8 +1535,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
entry
=
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
RADIX_DAX_PMD
,
write
&&
!
sync
);
if
(
IS_ERR
(
entry
))
goto
finish_iomap
;
/*
* If we are doing synchronous page fault and inode needs fsync,
...
...
fs/xfs/xfs_file.c
View file @
b5684579
...
...
@@ -312,7 +312,7 @@ xfs_file_aio_write_checks(
if
(
error
<=
0
)
return
error
;
error
=
xfs_break_layouts
(
inode
,
iolock
);
error
=
xfs_break_layouts
(
inode
,
iolock
,
BREAK_WRITE
);
if
(
error
)
return
error
;
...
...
@@ -718,6 +718,69 @@ xfs_file_write_iter(
return
ret
;
}
static
void
xfs_wait_dax_page
(
struct
inode
*
inode
,
bool
*
did_unlock
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
*
did_unlock
=
true
;
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_EXCL
);
schedule
();
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
}
static
int
xfs_break_dax_layouts
(
struct
inode
*
inode
,
uint
iolock
,
bool
*
did_unlock
)
{
struct
page
*
page
;
ASSERT
(
xfs_isilocked
(
XFS_I
(
inode
),
XFS_MMAPLOCK_EXCL
));
page
=
dax_layout_busy_page
(
inode
->
i_mapping
);
if
(
!
page
)
return
0
;
return
___wait_var_event
(
&
page
->
_refcount
,
atomic_read
(
&
page
->
_refcount
)
==
1
,
TASK_INTERRUPTIBLE
,
0
,
0
,
xfs_wait_dax_page
(
inode
,
did_unlock
));
}
int
xfs_break_layouts
(
struct
inode
*
inode
,
uint
*
iolock
,
enum
layout_break_reason
reason
)
{
bool
retry
;
int
error
;
ASSERT
(
xfs_isilocked
(
XFS_I
(
inode
),
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
do
{
retry
=
false
;
switch
(
reason
)
{
case
BREAK_UNMAP
:
error
=
xfs_break_dax_layouts
(
inode
,
*
iolock
,
&
retry
);
if
(
error
||
retry
)
break
;
/* fall through */
case
BREAK_WRITE
:
error
=
xfs_break_leased_layouts
(
inode
,
iolock
,
&
retry
);
break
;
default:
WARN_ON_ONCE
(
1
);
error
=
-
EINVAL
;
}
}
while
(
error
==
0
&&
retry
);
return
error
;
}
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
...
...
@@ -734,7 +797,7 @@ xfs_file_fallocate(
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
long
error
;
enum
xfs_prealloc_flags
flags
=
0
;
uint
iolock
=
XFS_IOLOCK_EXCL
;
uint
iolock
=
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
;
loff_t
new_size
=
0
;
bool
do_file_insert
=
false
;
...
...
@@ -744,13 +807,10 @@ xfs_file_fallocate(
return
-
EOPNOTSUPP
;
xfs_ilock
(
ip
,
iolock
);
error
=
xfs_break_layouts
(
inode
,
&
iolock
);
error
=
xfs_break_layouts
(
inode
,
&
iolock
,
BREAK_UNMAP
);
if
(
error
)
goto
out_unlock
;
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
if
(
mode
&
FALLOC_FL_PUNCH_HOLE
)
{
error
=
xfs_free_file_space
(
ip
,
offset
,
len
);
if
(
error
)
...
...
fs/xfs/xfs_inode.h
View file @
b5684579
...
...
@@ -378,6 +378,20 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
>> XFS_ILOCK_SHIFT)
/*
* Layouts are broken in the BREAK_WRITE case to ensure that
* layout-holders do not collide with local writes. Additionally,
* layouts are broken in the BREAK_UNMAP case to make sure the
* layout-holder has a consistent view of the file's extent map. While
* BREAK_WRITE breaks can be satisfied by recalling FL_LAYOUT leases,
* BREAK_UNMAP breaks additionally require waiting for busy dax-pages to
* go idle.
*/
enum
layout_break_reason
{
BREAK_WRITE
,
BREAK_UNMAP
,
};
/*
* For multiple groups support: if S_ISGID bit is set in the parent
* directory, group of new file is set to that of the parent, and
...
...
@@ -443,6 +457,8 @@ enum xfs_prealloc_flags {
int
xfs_update_prealloc_flags
(
struct
xfs_inode
*
ip
,
enum
xfs_prealloc_flags
flags
);
int
xfs_break_layouts
(
struct
inode
*
inode
,
uint
*
iolock
,
enum
layout_break_reason
reason
);
/* from xfs_iops.c */
extern
void
xfs_setup_inode
(
struct
xfs_inode
*
ip
);
...
...
fs/xfs/xfs_ioctl.c
View file @
b5684579
...
...
@@ -39,7 +39,6 @@
#include "xfs_icache.h"
#include "xfs_symlink.h"
#include "xfs_trans.h"
#include "xfs_pnfs.h"
#include "xfs_acl.h"
#include "xfs_btree.h"
#include <linux/fsmap.h>
...
...
@@ -614,7 +613,7 @@ xfs_ioc_space(
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
struct
iattr
iattr
;
enum
xfs_prealloc_flags
flags
=
0
;
uint
iolock
=
XFS_IOLOCK_EXCL
;
uint
iolock
=
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
;
int
error
;
/*
...
...
@@ -644,13 +643,10 @@ xfs_ioc_space(
return
error
;
xfs_ilock
(
ip
,
iolock
);
error
=
xfs_break_layouts
(
inode
,
&
iolock
);
error
=
xfs_break_layouts
(
inode
,
&
iolock
,
BREAK_UNMAP
);
if
(
error
)
goto
out_unlock
;
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
switch
(
bf
->
l_whence
)
{
case
0
:
/*SEEK_SET*/
break
;
...
...
fs/xfs/xfs_iops.c
View file @
b5684579
...
...
@@ -37,7 +37,6 @@
#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_trans_space.h"
#include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include <linux/capability.h>
...
...
@@ -1030,14 +1029,19 @@ xfs_vn_setattr(
int
error
;
if
(
iattr
->
ia_valid
&
ATTR_SIZE
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
d_inode
(
dentry
));
uint
iolock
=
XFS_IOLOCK_EXCL
;
struct
inode
*
inode
=
d_inode
(
dentry
);
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
uint
iolock
;
error
=
xfs_break_layouts
(
d_inode
(
dentry
),
&
iolock
);
if
(
error
)
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
=
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
;
error
=
xfs_break_layouts
(
inode
,
&
iolock
,
BREAK_UNMAP
);
if
(
error
)
{
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_EXCL
);
return
error
;
}
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
error
=
xfs_vn_setattr_size
(
dentry
,
iattr
);
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_EXCL
);
}
else
{
...
...
fs/xfs/xfs_pnfs.c
View file @
b5684579
...
...
@@ -31,19 +31,20 @@
* rules in the page fault path we don't bother.
*/
int
xfs_break_layouts
(
xfs_break_l
eased_l
ayouts
(
struct
inode
*
inode
,
uint
*
iolock
)
uint
*
iolock
,
bool
*
did_unlock
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
int
error
;
ASSERT
(
xfs_isilocked
(
ip
,
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
while
((
error
=
break_layout
(
inode
,
false
)
==
-
EWOULDBLOCK
))
{
xfs_iunlock
(
ip
,
*
iolock
);
*
did_unlock
=
true
;
error
=
break_layout
(
inode
,
true
);
*
iolock
=
XFS_IOLOCK_EXCL
;
*
iolock
&=
~
XFS_IOLOCK_SHARED
;
*
iolock
|=
XFS_IOLOCK_EXCL
;
xfs_ilock
(
ip
,
*
iolock
);
}
...
...
@@ -120,8 +121,8 @@ xfs_fs_map_blocks(
* Lock out any other I/O before we flush and invalidate the pagecache,
* and then hand out a layout to the remote system. This is very
* similar to direct I/O, except that the synchronization is much more
* complicated. See the comment near xfs_break_l
ayouts for a detailed
* explanation.
* complicated. See the comment near xfs_break_l
eased_layouts
*
for a detailed
explanation.
*/
xfs_ilock
(
ip
,
XFS_IOLOCK_EXCL
);
...
...
fs/xfs/xfs_pnfs.h
View file @
b5684579
...
...
@@ -9,10 +9,11 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
int
xfs_fs_commit_blocks
(
struct
inode
*
inode
,
struct
iomap
*
maps
,
int
nr_maps
,
struct
iattr
*
iattr
);
int
xfs_break_layouts
(
struct
inode
*
inode
,
uint
*
iolock
);
int
xfs_break_leased_layouts
(
struct
inode
*
inode
,
uint
*
iolock
,
bool
*
did_unlock
);
#else
static
inline
int
xfs_break_l
ayouts
(
struct
inode
*
inode
,
uint
*
io
lock
)
xfs_break_l
eased_layouts
(
struct
inode
*
inode
,
uint
*
iolock
,
bool
*
did_un
lock
)
{
return
0
;
}
...
...
include/linux/dax.h
View file @
b5684579
...
...
@@ -83,6 +83,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
struct
dax_device
*
fs_dax_get_by_bdev
(
struct
block_device
*
bdev
);
int
dax_writeback_mapping_range
(
struct
address_space
*
mapping
,
struct
block_device
*
bdev
,
struct
writeback_control
*
wbc
);
struct
page
*
dax_layout_busy_page
(
struct
address_space
*
mapping
);
#else
static
inline
int
bdev_dax_supported
(
struct
super_block
*
sb
,
int
blocksize
)
{
...
...
@@ -103,6 +105,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
return
NULL
;
}
static
inline
struct
page
*
dax_layout_busy_page
(
struct
address_space
*
mapping
)
{
return
NULL
;
}
static
inline
int
dax_writeback_mapping_range
(
struct
address_space
*
mapping
,
struct
block_device
*
bdev
,
struct
writeback_control
*
wbc
)
{
...
...
include/linux/memremap.h
View file @
b5684579
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MEMREMAP_H_
#define _LINUX_MEMREMAP_H_
#include <linux/mm.h>
#include <linux/ioport.h>
#include <linux/percpu-refcount.h>
...
...
@@ -30,13 +29,6 @@ struct vmem_altmap {
* Specialize ZONE_DEVICE memory into multiple types each having differents
* usage.
*
* MEMORY_DEVICE_HOST:
* Persistent device memory (pmem): struct page might be allocated in different
* memory and architecture might want to perform special actions. It is similar
* to regular memory, in that the CPU can access it transparently. However,
* it is likely to have different bandwidth and latency than regular memory.
* See Documentation/nvdimm/nvdimm.txt for more information.
*
* MEMORY_DEVICE_PRIVATE:
* Device memory that is not directly addressable by the CPU: CPU can neither
* read nor write private memory. In this case, we do still have struct pages
...
...
@@ -53,11 +45,19 @@ struct vmem_altmap {
* driver can hotplug the device memory using ZONE_DEVICE and with that memory
* type. Any page of a process can be migrated to such memory. However no one
* should be allow to pin such memory so that it can always be evicted.
*
* MEMORY_DEVICE_FS_DAX:
* Host memory that has similar access semantics as System RAM i.e. DMA
* coherent and supports page pinning. In support of coordinating page
* pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a
* wakeup event whenever a page is unpinned and becomes idle. This
* wakeup is used to coordinate physical address space management (ex:
* fs truncate/hole punch) vs pinned pages (ex: device dma).
*/
enum
memory_type
{
MEMORY_DEVICE_HOST
=
0
,
MEMORY_DEVICE_PRIVATE
,
MEMORY_DEVICE_PRIVATE
=
1
,
MEMORY_DEVICE_PUBLIC
,
MEMORY_DEVICE_FS_DAX
,
};
/*
...
...
@@ -129,8 +129,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
unsigned
long
vmem_altmap_offset
(
struct
vmem_altmap
*
altmap
);
void
vmem_altmap_free
(
struct
vmem_altmap
*
altmap
,
unsigned
long
nr_pfns
);
static
inline
bool
is_zone_device_page
(
const
struct
page
*
page
);
#else
static
inline
void
*
devm_memremap_pages
(
struct
device
*
dev
,
struct
dev_pagemap
*
pgmap
)
...
...
@@ -161,20 +159,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
}
#endif
/* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
static
inline
bool
is_device_private_page
(
const
struct
page
*
page
)
{
return
is_zone_device_page
(
page
)
&&
page
->
pgmap
->
type
==
MEMORY_DEVICE_PRIVATE
;
}
static
inline
bool
is_device_public_page
(
const
struct
page
*
page
)
{
return
is_zone_device_page
(
page
)
&&
page
->
pgmap
->
type
==
MEMORY_DEVICE_PUBLIC
;
}
#endif
/* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
static
inline
void
put_dev_pagemap
(
struct
dev_pagemap
*
pgmap
)
{
if
(
pgmap
)
...
...
include/linux/mm.h
View file @
b5684579
...
...
@@ -821,27 +821,65 @@ static inline bool is_zone_device_page(const struct page *page)
}
#endif
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
void
put_zone_device_private_or_public_page
(
struct
page
*
page
);
DECLARE_STATIC_KEY_FALSE
(
device_private_key
);
#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key)
static
inline
bool
is_device_private_page
(
const
struct
page
*
page
);
static
inline
bool
is_device_public_page
(
const
struct
page
*
page
);
#else
/* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
static
inline
void
put_zone_device_private_or_public_page
(
struct
page
*
page
)
#ifdef CONFIG_DEV_PAGEMAP_OPS
void
dev_pagemap_get_ops
(
void
);
void
dev_pagemap_put_ops
(
void
);
void
__put_devmap_managed_page
(
struct
page
*
page
);
DECLARE_STATIC_KEY_FALSE
(
devmap_managed_key
);
static
inline
bool
put_devmap_managed_page
(
struct
page
*
page
)
{
if
(
!
static_branch_unlikely
(
&
devmap_managed_key
))
return
false
;
if
(
!
is_zone_device_page
(
page
))
return
false
;
switch
(
page
->
pgmap
->
type
)
{
case
MEMORY_DEVICE_PRIVATE
:
case
MEMORY_DEVICE_PUBLIC
:
case
MEMORY_DEVICE_FS_DAX
:
__put_devmap_managed_page
(
page
);
return
true
;
default:
break
;
}
return
false
;
}
static
inline
bool
is_device_private_page
(
const
struct
page
*
page
)
{
return
is_zone_device_page
(
page
)
&&
page
->
pgmap
->
type
==
MEMORY_DEVICE_PRIVATE
;
}
#define IS_HMM_ENABLED 0
static
inline
bool
is_device_public_page
(
const
struct
page
*
page
)
{
return
is_zone_device_page
(
page
)
&&
page
->
pgmap
->
type
==
MEMORY_DEVICE_PUBLIC
;
}
#else
/* CONFIG_DEV_PAGEMAP_OPS */
static
inline
void
dev_pagemap_get_ops
(
void
)
{
}
static
inline
void
dev_pagemap_put_ops
(
void
)
{
}
static
inline
bool
put_devmap_managed_page
(
struct
page
*
page
)
{
return
false
;
}
static
inline
bool
is_device_private_page
(
const
struct
page
*
page
)
{
return
false
;
}
static
inline
bool
is_device_public_page
(
const
struct
page
*
page
)
{
return
false
;
}
#endif
/* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
#endif
/* CONFIG_DEV_PAGEMAP_OPS */
static
inline
void
get_page
(
struct
page
*
page
)
{
...
...
@@ -859,16 +897,13 @@ static inline void put_page(struct page *page)
page
=
compound_head
(
page
);
/*
* For
private device
pages we need to catch refcount transition from
* 2 to 1, when refcount reach one it means the p
rivate device page is
*
free and we
need to inform the device driver through callback. See
* For
devmap managed
pages we need to catch refcount transition from
* 2 to 1, when refcount reach one it means the p
age is free and we
* need to inform the device driver through callback. See
* include/linux/memremap.h and HMM for details.
*/
if
(
IS_HMM_ENABLED
&&
unlikely
(
is_device_private_page
(
page
)
||
unlikely
(
is_device_public_page
(
page
))))
{
put_zone_device_private_or_public_page
(
page
);
if
(
put_devmap_managed_page
(
page
))
return
;
}
if
(
put_page_testzero
(
page
))
__put_page
(
page
);
...
...
kernel/Makefile
View file @
b5684579
...
...
@@ -112,7 +112,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_CONTEXT_TRACKING)
+=
context_tracking.o
obj-$(CONFIG_TORTURE_TEST)
+=
torture.o
obj-$(CONFIG_HAS_IOMEM)
+=
memremap.o
obj-$(CONFIG_HAS_IOMEM)
+=
iomem.o
obj-$(CONFIG_ZONE_DEVICE)
+=
memremap.o
$(obj)/configs.o
:
$(obj)/config_data.h
...
...
kernel/iomem.c
0 → 100644
View file @
b5684579
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/device.h>
#include <linux/types.h>
#include <linux/io.h>
#include <linux/mm.h>
#ifndef ioremap_cache
/* temporary while we convert existing ioremap_cache users to memremap */
__weak
void
__iomem
*
ioremap_cache
(
resource_size_t
offset
,
unsigned
long
size
)
{
return
ioremap
(
offset
,
size
);
}
#endif
#ifndef arch_memremap_wb
static
void
*
arch_memremap_wb
(
resource_size_t
offset
,
unsigned
long
size
)
{
return
(
__force
void
*
)
ioremap_cache
(
offset
,
size
);
}
#endif
#ifndef arch_memremap_can_ram_remap
static
bool
arch_memremap_can_ram_remap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
return
true
;
}
#endif
static
void
*
try_ram_remap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
unsigned
long
pfn
=
PHYS_PFN
(
offset
);
/* In the simple case just return the existing linear address */
if
(
pfn_valid
(
pfn
)
&&
!
PageHighMem
(
pfn_to_page
(
pfn
))
&&
arch_memremap_can_ram_remap
(
offset
,
size
,
flags
))
return
__va
(
offset
);
return
NULL
;
/* fallback to arch_memremap_wb */
}
/**
* memremap() - remap an iomem_resource as cacheable memory
* @offset: iomem resource start address
* @size: size of remap
* @flags: any of MEMREMAP_WB, MEMREMAP_WT, MEMREMAP_WC,
* MEMREMAP_ENC, MEMREMAP_DEC
*
* memremap() is "ioremap" for cases where it is known that the resource
* being mapped does not have i/o side effects and the __iomem
* annotation is not applicable. In the case of multiple flags, the different
* mapping types will be attempted in the order listed below until one of
* them succeeds.
*
* MEMREMAP_WB - matches the default mapping for System RAM on
* the architecture. This is usually a read-allocate write-back cache.
* Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
* memremap() will bypass establishing a new mapping and instead return
* a pointer into the direct map.
*
* MEMREMAP_WT - establish a mapping whereby writes either bypass the
* cache or are written through to memory and never exist in a
* cache-dirty state with respect to program visibility. Attempts to
* map System RAM with this mapping type will fail.
*
* MEMREMAP_WC - establish a writecombine mapping, whereby writes may
* be coalesced together (e.g. in the CPU's write buffers), but is otherwise
* uncached. Attempts to map System RAM with this mapping type will fail.
*/
void
*
memremap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
int
is_ram
=
region_intersects
(
offset
,
size
,
IORESOURCE_SYSTEM_RAM
,
IORES_DESC_NONE
);
void
*
addr
=
NULL
;
if
(
!
flags
)
return
NULL
;
if
(
is_ram
==
REGION_MIXED
)
{
WARN_ONCE
(
1
,
"memremap attempted on mixed range %pa size: %#lx
\n
"
,
&
offset
,
(
unsigned
long
)
size
);
return
NULL
;
}
/* Try all mapping types requested until one returns non-NULL */
if
(
flags
&
MEMREMAP_WB
)
{
/*
* MEMREMAP_WB is special in that it can be satisifed
* from the direct map. Some archs depend on the
* capability of memremap() to autodetect cases where
* the requested range is potentially in System RAM.
*/
if
(
is_ram
==
REGION_INTERSECTS
)
addr
=
try_ram_remap
(
offset
,
size
,
flags
);
if
(
!
addr
)
addr
=
arch_memremap_wb
(
offset
,
size
);
}
/*
* If we don't have a mapping yet and other request flags are
* present then we will be attempting to establish a new virtual
* address mapping. Enforce that this mapping is not aliasing
* System RAM.
*/
if
(
!
addr
&&
is_ram
==
REGION_INTERSECTS
&&
flags
!=
MEMREMAP_WB
)
{
WARN_ONCE
(
1
,
"memremap attempted on ram %pa size: %#lx
\n
"
,
&
offset
,
(
unsigned
long
)
size
);
return
NULL
;
}
if
(
!
addr
&&
(
flags
&
MEMREMAP_WT
))
addr
=
ioremap_wt
(
offset
,
size
);
if
(
!
addr
&&
(
flags
&
MEMREMAP_WC
))
addr
=
ioremap_wc
(
offset
,
size
);
return
addr
;
}
EXPORT_SYMBOL
(
memremap
);
void
memunmap
(
void
*
addr
)
{
if
(
is_vmalloc_addr
(
addr
))
iounmap
((
void
__iomem
*
)
addr
);
}
EXPORT_SYMBOL
(
memunmap
);
static
void
devm_memremap_release
(
struct
device
*
dev
,
void
*
res
)
{
memunmap
(
*
(
void
**
)
res
);
}
static
int
devm_memremap_match
(
struct
device
*
dev
,
void
*
res
,
void
*
match_data
)
{
return
*
(
void
**
)
res
==
match_data
;
}
void
*
devm_memremap
(
struct
device
*
dev
,
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
void
**
ptr
,
*
addr
;
ptr
=
devres_alloc_node
(
devm_memremap_release
,
sizeof
(
*
ptr
),
GFP_KERNEL
,
dev_to_node
(
dev
));
if
(
!
ptr
)
return
ERR_PTR
(
-
ENOMEM
);
addr
=
memremap
(
offset
,
size
,
flags
);
if
(
addr
)
{
*
ptr
=
addr
;
devres_add
(
dev
,
ptr
);
}
else
{
devres_free
(
ptr
);
return
ERR_PTR
(
-
ENXIO
);
}
return
addr
;
}
EXPORT_SYMBOL
(
devm_memremap
);
void
devm_memunmap
(
struct
device
*
dev
,
void
*
addr
)
{
WARN_ON
(
devres_release
(
dev
,
devm_memremap_release
,
devm_memremap_match
,
addr
));
}
EXPORT_SYMBOL
(
devm_memunmap
);
kernel/memremap.c
View file @
b5684579
/*
* Copyright(c) 2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright(c) 2015 Intel Corporation. All rights reserved. */
#include <linux/radix-tree.h>
#include <linux/device.h>
#include <linux/types.h>
...
...
@@ -19,170 +9,8 @@
#include <linux/memory_hotplug.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/wait_bit.h>
#ifndef ioremap_cache
/* temporary while we convert existing ioremap_cache users to memremap */
__weak
void
__iomem
*
ioremap_cache
(
resource_size_t
offset
,
unsigned
long
size
)
{
return
ioremap
(
offset
,
size
);
}
#endif
#ifndef arch_memremap_wb
static
void
*
arch_memremap_wb
(
resource_size_t
offset
,
unsigned
long
size
)
{
return
(
__force
void
*
)
ioremap_cache
(
offset
,
size
);
}
#endif
#ifndef arch_memremap_can_ram_remap
static
bool
arch_memremap_can_ram_remap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
return
true
;
}
#endif
static
void
*
try_ram_remap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
unsigned
long
pfn
=
PHYS_PFN
(
offset
);
/* In the simple case just return the existing linear address */
if
(
pfn_valid
(
pfn
)
&&
!
PageHighMem
(
pfn_to_page
(
pfn
))
&&
arch_memremap_can_ram_remap
(
offset
,
size
,
flags
))
return
__va
(
offset
);
return
NULL
;
/* fallback to arch_memremap_wb */
}
/**
* memremap() - remap an iomem_resource as cacheable memory
* @offset: iomem resource start address
* @size: size of remap
* @flags: any of MEMREMAP_WB, MEMREMAP_WT, MEMREMAP_WC,
* MEMREMAP_ENC, MEMREMAP_DEC
*
* memremap() is "ioremap" for cases where it is known that the resource
* being mapped does not have i/o side effects and the __iomem
* annotation is not applicable. In the case of multiple flags, the different
* mapping types will be attempted in the order listed below until one of
* them succeeds.
*
* MEMREMAP_WB - matches the default mapping for System RAM on
* the architecture. This is usually a read-allocate write-back cache.
* Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
* memremap() will bypass establishing a new mapping and instead return
* a pointer into the direct map.
*
* MEMREMAP_WT - establish a mapping whereby writes either bypass the
* cache or are written through to memory and never exist in a
* cache-dirty state with respect to program visibility. Attempts to
* map System RAM with this mapping type will fail.
*
* MEMREMAP_WC - establish a writecombine mapping, whereby writes may
* be coalesced together (e.g. in the CPU's write buffers), but is otherwise
* uncached. Attempts to map System RAM with this mapping type will fail.
*/
void
*
memremap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
int
is_ram
=
region_intersects
(
offset
,
size
,
IORESOURCE_SYSTEM_RAM
,
IORES_DESC_NONE
);
void
*
addr
=
NULL
;
if
(
!
flags
)
return
NULL
;
if
(
is_ram
==
REGION_MIXED
)
{
WARN_ONCE
(
1
,
"memremap attempted on mixed range %pa size: %#lx
\n
"
,
&
offset
,
(
unsigned
long
)
size
);
return
NULL
;
}
/* Try all mapping types requested until one returns non-NULL */
if
(
flags
&
MEMREMAP_WB
)
{
/*
* MEMREMAP_WB is special in that it can be satisifed
* from the direct map. Some archs depend on the
* capability of memremap() to autodetect cases where
* the requested range is potentially in System RAM.
*/
if
(
is_ram
==
REGION_INTERSECTS
)
addr
=
try_ram_remap
(
offset
,
size
,
flags
);
if
(
!
addr
)
addr
=
arch_memremap_wb
(
offset
,
size
);
}
/*
* If we don't have a mapping yet and other request flags are
* present then we will be attempting to establish a new virtual
* address mapping. Enforce that this mapping is not aliasing
* System RAM.
*/
if
(
!
addr
&&
is_ram
==
REGION_INTERSECTS
&&
flags
!=
MEMREMAP_WB
)
{
WARN_ONCE
(
1
,
"memremap attempted on ram %pa size: %#lx
\n
"
,
&
offset
,
(
unsigned
long
)
size
);
return
NULL
;
}
if
(
!
addr
&&
(
flags
&
MEMREMAP_WT
))
addr
=
ioremap_wt
(
offset
,
size
);
if
(
!
addr
&&
(
flags
&
MEMREMAP_WC
))
addr
=
ioremap_wc
(
offset
,
size
);
return
addr
;
}
EXPORT_SYMBOL
(
memremap
);
void
memunmap
(
void
*
addr
)
{
if
(
is_vmalloc_addr
(
addr
))
iounmap
((
void
__iomem
*
)
addr
);
}
EXPORT_SYMBOL
(
memunmap
);
static
void
devm_memremap_release
(
struct
device
*
dev
,
void
*
res
)
{
memunmap
(
*
(
void
**
)
res
);
}
static
int
devm_memremap_match
(
struct
device
*
dev
,
void
*
res
,
void
*
match_data
)
{
return
*
(
void
**
)
res
==
match_data
;
}
void
*
devm_memremap
(
struct
device
*
dev
,
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
void
**
ptr
,
*
addr
;
ptr
=
devres_alloc_node
(
devm_memremap_release
,
sizeof
(
*
ptr
),
GFP_KERNEL
,
dev_to_node
(
dev
));
if
(
!
ptr
)
return
ERR_PTR
(
-
ENOMEM
);
addr
=
memremap
(
offset
,
size
,
flags
);
if
(
addr
)
{
*
ptr
=
addr
;
devres_add
(
dev
,
ptr
);
}
else
{
devres_free
(
ptr
);
return
ERR_PTR
(
-
ENXIO
);
}
return
addr
;
}
EXPORT_SYMBOL
(
devm_memremap
);
void
devm_memunmap
(
struct
device
*
dev
,
void
*
addr
)
{
WARN_ON
(
devres_release
(
dev
,
devm_memremap_release
,
devm_memremap_match
,
addr
));
}
EXPORT_SYMBOL
(
devm_memunmap
);
#ifdef CONFIG_ZONE_DEVICE
static
DEFINE_MUTEX
(
pgmap_lock
);
static
RADIX_TREE
(
pgmap_radix
,
GFP_KERNEL
);
#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
...
...
@@ -473,10 +301,32 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
return
pgmap
;
}
#endif
/* CONFIG_ZONE_DEVICE */
EXPORT_SYMBOL_GPL
(
get_dev_pagemap
);
#ifdef CONFIG_DEV_PAGEMAP_OPS
DEFINE_STATIC_KEY_FALSE
(
devmap_managed_key
);
EXPORT_SYMBOL_GPL
(
devmap_managed_key
);
static
atomic_t
devmap_enable
;
/*
* Toggle the static key for ->page_free() callbacks when dev_pagemap
* pages go idle.
*/
void
dev_pagemap_get_ops
(
void
)
{
if
(
atomic_inc_return
(
&
devmap_enable
)
==
1
)
static_branch_enable
(
&
devmap_managed_key
);
}
EXPORT_SYMBOL_GPL
(
dev_pagemap_get_ops
);
void
dev_pagemap_put_ops
(
void
)
{
if
(
atomic_dec_and_test
(
&
devmap_enable
))
static_branch_disable
(
&
devmap_managed_key
);
}
EXPORT_SYMBOL_GPL
(
dev_pagemap_put_ops
);
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
void
put_zone_device_private_or_public_page
(
struct
page
*
page
)
void
__put_devmap_managed_page
(
struct
page
*
page
)
{
int
count
=
page_ref_dec_return
(
page
);
...
...
@@ -496,5 +346,5 @@ void put_zone_device_private_or_public_page(struct page *page)
}
else
if
(
!
count
)
__put_page
(
page
);
}
EXPORT_SYMBOL
(
put_zone_device_private_or_public
_page
);
#endif
/* CONFIG_DEV
ICE_PRIVATE || CONFIG_DEVICE_PUBLIC
*/
EXPORT_SYMBOL
_GPL
(
__put_devmap_managed
_page
);
#endif
/* CONFIG_DEV
_PAGEMAP_OPS
*/
mm/Kconfig
View file @
b5684579
...
...
@@ -693,6 +693,9 @@ config ARCH_HAS_HMM
config MIGRATE_VMA_HELPER
bool
config DEV_PAGEMAP_OPS
bool
config HMM
bool
select MIGRATE_VMA_HELPER
...
...
@@ -713,6 +716,7 @@ config DEVICE_PRIVATE
bool "Unaddressable device memory (GPU memory, ...)"
depends on ARCH_HAS_HMM
select HMM
select DEV_PAGEMAP_OPS
help
Allows creation of struct pages to represent unaddressable device
...
...
@@ -723,6 +727,7 @@ config DEVICE_PUBLIC
bool "Addressable device memory (like GPU memory)"
depends on ARCH_HAS_HMM
select HMM
select DEV_PAGEMAP_OPS
help
Allows creation of struct pages to represent addressable device
...
...
mm/gup.c
View file @
b5684579
...
...
@@ -1459,32 +1459,48 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
return
1
;
}
static
int
__gup_device_huge_pmd
(
pmd_t
pmd
,
unsigned
long
addr
,
static
int
__gup_device_huge_pmd
(
pmd_t
orig
,
pmd_t
*
pmdp
,
unsigned
long
addr
,
unsigned
long
end
,
struct
page
**
pages
,
int
*
nr
)
{
unsigned
long
fault_pfn
;
int
nr_start
=
*
nr
;
fault_pfn
=
pmd_pfn
(
orig
)
+
((
addr
&
~
PMD_MASK
)
>>
PAGE_SHIFT
);
if
(
!
__gup_device_huge
(
fault_pfn
,
addr
,
end
,
pages
,
nr
))
return
0
;
fault_pfn
=
pmd_pfn
(
pmd
)
+
((
addr
&
~
PMD_MASK
)
>>
PAGE_SHIFT
);
return
__gup_device_huge
(
fault_pfn
,
addr
,
end
,
pages
,
nr
);
if
(
unlikely
(
pmd_val
(
orig
)
!=
pmd_val
(
*
pmdp
)))
{
undo_dev_pagemap
(
nr
,
nr_start
,
pages
);
return
0
;
}
return
1
;
}
static
int
__gup_device_huge_pud
(
pud_t
pud
,
unsigned
long
addr
,
static
int
__gup_device_huge_pud
(
pud_t
orig
,
pud_t
*
pudp
,
unsigned
long
addr
,
unsigned
long
end
,
struct
page
**
pages
,
int
*
nr
)
{
unsigned
long
fault_pfn
;
int
nr_start
=
*
nr
;
fault_pfn
=
pud_pfn
(
orig
)
+
((
addr
&
~
PUD_MASK
)
>>
PAGE_SHIFT
);
if
(
!
__gup_device_huge
(
fault_pfn
,
addr
,
end
,
pages
,
nr
))
return
0
;
fault_pfn
=
pud_pfn
(
pud
)
+
((
addr
&
~
PUD_MASK
)
>>
PAGE_SHIFT
);
return
__gup_device_huge
(
fault_pfn
,
addr
,
end
,
pages
,
nr
);
if
(
unlikely
(
pud_val
(
orig
)
!=
pud_val
(
*
pudp
)))
{
undo_dev_pagemap
(
nr
,
nr_start
,
pages
);
return
0
;
}
return
1
;
}
#else
static
int
__gup_device_huge_pmd
(
pmd_t
pmd
,
unsigned
long
addr
,
static
int
__gup_device_huge_pmd
(
pmd_t
orig
,
pmd_t
*
pmdp
,
unsigned
long
addr
,
unsigned
long
end
,
struct
page
**
pages
,
int
*
nr
)
{
BUILD_BUG
();
return
0
;
}
static
int
__gup_device_huge_pud
(
pud_t
pud
,
unsigned
long
addr
,
static
int
__gup_device_huge_pud
(
pud_t
pud
,
pud_t
*
pudp
,
unsigned
long
addr
,
unsigned
long
end
,
struct
page
**
pages
,
int
*
nr
)
{
BUILD_BUG
();
...
...
@@ -1502,7 +1518,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
return
0
;
if
(
pmd_devmap
(
orig
))
return
__gup_device_huge_pmd
(
orig
,
addr
,
end
,
pages
,
nr
);
return
__gup_device_huge_pmd
(
orig
,
pmdp
,
addr
,
end
,
pages
,
nr
);
refs
=
0
;
page
=
pmd_page
(
orig
)
+
((
addr
&
~
PMD_MASK
)
>>
PAGE_SHIFT
);
...
...
@@ -1540,7 +1556,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return
0
;
if
(
pud_devmap
(
orig
))
return
__gup_device_huge_pud
(
orig
,
addr
,
end
,
pages
,
nr
);
return
__gup_device_huge_pud
(
orig
,
pudp
,
addr
,
end
,
pages
,
nr
);
refs
=
0
;
page
=
pud_page
(
orig
)
+
((
addr
&
~
PUD_MASK
)
>>
PAGE_SHIFT
);
...
...
mm/hmm.c
View file @
b5684579
...
...
@@ -35,15 +35,6 @@
#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
/*
* Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
*/
DEFINE_STATIC_KEY_FALSE
(
device_private_key
);
EXPORT_SYMBOL
(
device_private_key
);
#endif
/* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
#if IS_ENABLED(CONFIG_HMM_MIRROR)
static
const
struct
mmu_notifier_ops
hmm_mmu_notifier_ops
;
...
...
@@ -1167,7 +1158,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
resource_size_t
addr
;
int
ret
;
static_branch_enable
(
&
device_private_key
);
dev_pagemap_get_ops
(
);
devmem
=
devres_alloc_node
(
&
hmm_devmem_release
,
sizeof
(
*
devmem
),
GFP_KERNEL
,
dev_to_node
(
device
));
...
...
@@ -1261,7 +1252,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
if
(
res
->
desc
!=
IORES_DESC_DEVICE_PUBLIC_MEMORY
)
return
ERR_PTR
(
-
EINVAL
);
static_branch_enable
(
&
device_private_key
);
dev_pagemap_get_ops
(
);
devmem
=
devres_alloc_node
(
&
hmm_devmem_release
,
sizeof
(
*
devmem
),
GFP_KERNEL
,
dev_to_node
(
device
));
...
...
mm/swap.c
View file @
b5684579
...
...
@@ -29,6 +29,7 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/backing-dev.h>
#include <linux/memremap.h>
#include <linux/memcontrol.h>
#include <linux/gfp.h>
#include <linux/uio.h>
...
...
@@ -743,7 +744,7 @@ void release_pages(struct page **pages, int nr)
flags
);
locked_pgdat
=
NULL
;
}
put_
zone_device_private_or_public
_page
(
page
);
put_
devmap_managed
_page
(
page
);
continue
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment