Commit 29a8ea4f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
 "1/ Fixes to the libnvdimm 'pfn' device that establishes a reserved
     area for storing a struct page array.

  2/ Fixes for dax operations on a raw block device to prevent pagecache
     collisions with dax mappings.

  3/ A fix for pfn_t usage in vm_insert_mixed that lead to a null
     pointer de-reference.

  These have received build success notification from the kbuild robot
  across 153 configs and pass the latest ndctl tests"

* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  phys_to_pfn_t: use phys_addr_t
  mm: fix pfn_t to page conversion in vm_insert_mixed
  block: use DAX for partition table reads
  block: revert runtime dax control of the raw block device
  fs, block: force direct-I/O for dax-enabled block devices
  devm_memremap_pages: fix vmem_altmap lifetime + alignment handling
  libnvdimm, pfn: fix restoring memmap location
  libnvdimm: fix mode determination for e820 devices
parents 36f90b0a 76e9f0ee
...@@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev) ...@@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
return true; return true;
} }
static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
{
unsigned long arg;
int rc = 0;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (get_user(arg, (int __user *)(argp)))
return -EFAULT;
arg = !!arg;
if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
return 0;
if (arg)
arg = S_DAX;
if (arg && !blkdev_dax_capable(bdev))
return -ENOTTY;
inode_lock(bdev->bd_inode);
if (bdev->bd_map_count == 0)
inode_set_flags(bdev->bd_inode, arg, S_DAX);
else
rc = -EBUSY;
inode_unlock(bdev->bd_inode);
return rc;
}
#else
static int blkdev_daxset(struct block_device *bdev, int arg)
{
if (arg)
return -ENOTTY;
return 0;
}
#endif #endif
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
...@@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, ...@@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACESETUP: case BLKTRACESETUP:
case BLKTRACETEARDOWN: case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp); return blk_trace_ioctl(bdev, cmd, argp);
case BLKDAXSET:
return blkdev_daxset(bdev, arg);
case BLKDAXGET: case BLKDAXGET:
return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX)); return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
break; break;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/kmod.h> #include <linux/kmod.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/genhd.h> #include <linux/genhd.h>
#include <linux/dax.h>
#include <linux/blktrace_api.h> #include <linux/blktrace_api.h>
#include "partitions/check.h" #include "partitions/check.h"
...@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) ...@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
return 0; return 0;
} }
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
{ {
struct address_space *mapping = bdev->bd_inode->i_mapping; struct address_space *mapping = bdev->bd_inode->i_mapping;
return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
NULL);
}
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{
struct page *page; struct page *page;
page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), /* don't populate page cache for dax capable devices */
NULL); if (IS_DAX(bdev->bd_inode))
page = read_dax_sector(bdev, n);
else
page = read_pagecache_sector(bdev, n);
if (!IS_ERR(page)) { if (!IS_ERR(page)) {
if (PageError(page)) if (PageError(page))
goto fail; goto fail;
......
...@@ -1277,10 +1277,12 @@ static ssize_t mode_show(struct device *dev, ...@@ -1277,10 +1277,12 @@ static ssize_t mode_show(struct device *dev,
device_lock(dev); device_lock(dev);
claim = ndns->claim; claim = ndns->claim;
if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim))) if (claim && is_nd_btt(claim))
mode = "memory";
else if (claim && is_nd_btt(claim))
mode = "safe"; mode = "safe";
else if (claim && is_nd_pfn(claim))
mode = "memory";
else if (!claim && pmem_should_map_pages(dev))
mode = "memory";
else else
mode = "raw"; mode = "raw";
rc = sprintf(buf, "%s\n", mode); rc = sprintf(buf, "%s\n", mode);
......
...@@ -301,10 +301,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) ...@@ -301,10 +301,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
switch (le32_to_cpu(pfn_sb->mode)) { switch (le32_to_cpu(pfn_sb->mode)) {
case PFN_MODE_RAM: case PFN_MODE_RAM:
break;
case PFN_MODE_PMEM: case PFN_MODE_PMEM:
/* TODO: allocate from PMEM support */ break;
return -ENOTTY;
default: default:
return -ENXIO; return -ENXIO;
} }
......
...@@ -1736,37 +1736,13 @@ static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, ...@@ -1736,37 +1736,13 @@ static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
} }
static void blkdev_vm_open(struct vm_area_struct *vma)
{
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
struct block_device *bdev = I_BDEV(bd_inode);
inode_lock(bd_inode);
bdev->bd_map_count++;
inode_unlock(bd_inode);
}
static void blkdev_vm_close(struct vm_area_struct *vma)
{
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
struct block_device *bdev = I_BDEV(bd_inode);
inode_lock(bd_inode);
bdev->bd_map_count--;
inode_unlock(bd_inode);
}
static const struct vm_operations_struct blkdev_dax_vm_ops = { static const struct vm_operations_struct blkdev_dax_vm_ops = {
.open = blkdev_vm_open,
.close = blkdev_vm_close,
.fault = blkdev_dax_fault, .fault = blkdev_dax_fault,
.pmd_fault = blkdev_dax_pmd_fault, .pmd_fault = blkdev_dax_pmd_fault,
.pfn_mkwrite = blkdev_dax_fault, .pfn_mkwrite = blkdev_dax_fault,
}; };
static const struct vm_operations_struct blkdev_default_vm_ops = { static const struct vm_operations_struct blkdev_default_vm_ops = {
.open = blkdev_vm_open,
.close = blkdev_vm_close,
.fault = filemap_fault, .fault = filemap_fault,
.map_pages = filemap_map_pages, .map_pages = filemap_map_pages,
}; };
...@@ -1774,18 +1750,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = { ...@@ -1774,18 +1750,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
{ {
struct inode *bd_inode = bdev_file_inode(file); struct inode *bd_inode = bdev_file_inode(file);
struct block_device *bdev = I_BDEV(bd_inode);
file_accessed(file); file_accessed(file);
inode_lock(bd_inode);
bdev->bd_map_count++;
if (IS_DAX(bd_inode)) { if (IS_DAX(bd_inode)) {
vma->vm_ops = &blkdev_dax_vm_ops; vma->vm_ops = &blkdev_dax_vm_ops;
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
} else { } else {
vma->vm_ops = &blkdev_default_vm_ops; vma->vm_ops = &blkdev_default_vm_ops;
} }
inode_unlock(bd_inode);
return 0; return 0;
} }
......
...@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev, ...@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
blk_queue_exit(bdev->bd_queue); blk_queue_exit(bdev->bd_queue);
} }
struct page *read_dax_sector(struct block_device *bdev, sector_t n)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
struct blk_dax_ctl dax = {
.size = PAGE_SIZE,
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
};
long rc;
if (!page)
return ERR_PTR(-ENOMEM);
rc = dax_map_atomic(bdev, &dax);
if (rc < 0)
return ERR_PTR(rc);
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
dax_unmap_atomic(bdev, &dax);
return page;
}
/* /*
* dax_clear_blocks() is called from within transaction context from XFS, * dax_clear_blocks() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS * and hence this means the stack from this point must follow GFP_NOFS
......
...@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, ...@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t); dax_iodone_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t); dax_iodone_t);
#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
#else
static inline struct page *read_dax_sector(struct block_device *bdev,
sector_t n)
{
return ERR_PTR(-ENXIO);
}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
unsigned int flags, get_block_t, dax_iodone_t); unsigned int flags, get_block_t, dax_iodone_t);
......
...@@ -484,9 +484,6 @@ struct block_device { ...@@ -484,9 +484,6 @@ struct block_device {
int bd_fsfreeze_count; int bd_fsfreeze_count;
/* Mutex for freeze */ /* Mutex for freeze */
struct mutex bd_fsfreeze_mutex; struct mutex bd_fsfreeze_mutex;
#ifdef CONFIG_FS_DAX
int bd_map_count;
#endif
}; };
/* /*
...@@ -2907,7 +2904,7 @@ extern void replace_mount_options(struct super_block *sb, char *options); ...@@ -2907,7 +2904,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
static inline bool io_is_direct(struct file *filp) static inline bool io_is_direct(struct file *filp)
{ {
return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp)); return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
} }
static inline int iocb_flags(struct file *file) static inline int iocb_flags(struct file *file)
......
...@@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn) ...@@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
return __pfn_to_pfn_t(pfn, 0); return __pfn_to_pfn_t(pfn, 0);
} }
extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags); extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
static inline bool pfn_t_has_page(pfn_t pfn) static inline bool pfn_t_has_page(pfn_t pfn)
{ {
...@@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn) ...@@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
return NULL; return NULL;
} }
static inline dma_addr_t pfn_t_to_phys(pfn_t pfn) static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
{ {
return PFN_PHYS(pfn_t_to_pfn(pfn)); return PFN_PHYS(pfn_t_to_pfn(pfn));
} }
......
...@@ -222,7 +222,6 @@ struct fsxattr { ...@@ -222,7 +222,6 @@ struct fsxattr {
#define BLKSECDISCARD _IO(0x12,125) #define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126) #define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127) #define BLKZEROOUT _IO(0x12,127)
#define BLKDAXSET _IO(0x12,128)
#define BLKDAXGET _IO(0x12,129) #define BLKDAXGET _IO(0x12,129)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
......
...@@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr) ...@@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr)
} }
EXPORT_SYMBOL(devm_memunmap); EXPORT_SYMBOL(devm_memunmap);
pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags) pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
{ {
return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
} }
...@@ -183,7 +183,11 @@ EXPORT_SYMBOL(put_zone_device_page); ...@@ -183,7 +183,11 @@ EXPORT_SYMBOL(put_zone_device_page);
static void pgmap_radix_release(struct resource *res) static void pgmap_radix_release(struct resource *res)
{ {
resource_size_t key; resource_size_t key, align_start, align_size, align_end;
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
align_end = align_start + align_size - 1;
mutex_lock(&pgmap_lock); mutex_lock(&pgmap_lock);
for (key = res->start; key <= res->end; key += SECTION_SIZE) for (key = res->start; key <= res->end; key += SECTION_SIZE)
...@@ -226,12 +230,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data) ...@@ -226,12 +230,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
percpu_ref_put(pgmap->ref); percpu_ref_put(pgmap->ref);
} }
pgmap_radix_release(res);
/* pages are dead and unused, undo the arch mapping */ /* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1); align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE); align_size = ALIGN(resource_size(res), SECTION_SIZE);
arch_remove_memory(align_start, align_size); arch_remove_memory(align_start, align_size);
pgmap_radix_release(res);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
"%s: failed to free all reserved pages\n", __func__); "%s: failed to free all reserved pages\n", __func__);
} }
...@@ -267,7 +270,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, ...@@ -267,7 +270,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
{ {
int is_ram = region_intersects(res->start, resource_size(res), int is_ram = region_intersects(res->start, resource_size(res),
"System RAM"); "System RAM");
resource_size_t key, align_start, align_size; resource_size_t key, align_start, align_size, align_end;
struct dev_pagemap *pgmap; struct dev_pagemap *pgmap;
struct page_map *page_map; struct page_map *page_map;
unsigned long pfn; unsigned long pfn;
...@@ -309,7 +312,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, ...@@ -309,7 +312,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
mutex_lock(&pgmap_lock); mutex_lock(&pgmap_lock);
error = 0; error = 0;
for (key = res->start; key <= res->end; key += SECTION_SIZE) { align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
align_end = align_start + align_size - 1;
for (key = align_start; key <= align_end; key += SECTION_SIZE) {
struct dev_pagemap *dup; struct dev_pagemap *dup;
rcu_read_lock(); rcu_read_lock();
...@@ -336,8 +342,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, ...@@ -336,8 +342,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (nid < 0) if (nid < 0)
nid = numa_mem_id(); nid = numa_mem_id();
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
error = arch_add_memory(nid, align_start, align_size, true); error = arch_add_memory(nid, align_start, align_size, true);
if (error) if (error)
goto err_add_memory; goto err_add_memory;
......
...@@ -1591,10 +1591,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, ...@@ -1591,10 +1591,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
* than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP
* without pte special, it would there be refcounted as a normal page. * without pte special, it would there be refcounted as a normal page.
*/ */
if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) { if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
struct page *page; struct page *page;
page = pfn_t_to_page(pfn); /*
* At this point we are committed to insert_page()
* regardless of whether the caller specified flags that
* result in pfn_t_has_page() == false.
*/
page = pfn_to_page(pfn_t_to_pfn(pfn));
return insert_page(vma, addr, page, vma->vm_page_prot); return insert_page(vma, addr, page, vma->vm_page_prot);
} }
return insert_pfn(vma, addr, pfn, vma->vm_page_prot); return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
......
...@@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, ...@@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
} }
EXPORT_SYMBOL(__wrap_devm_memremap_pages); EXPORT_SYMBOL(__wrap_devm_memremap_pages);
pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags) pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
{ {
struct nfit_test_resource *nfit_res = get_nfit_res(addr); struct nfit_test_resource *nfit_res = get_nfit_res(addr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment