Commit 53ef7d0e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "The bulk of this has been in multiple -next releases. There were a few
  late breaking fixes and small features that got added in the last
  couple days, but the whole set has received a build success
  notification from the kbuild robot.

  Change summary:

   - Region media error reporting: A libnvdimm region device is the
     parent to one or more namespaces. To date, media errors have been
     reported via the "badblocks" attribute attached to pmem block
     devices for namespaces in "raw" or "memory" mode. Given that
     namespaces can be in "device-dax" or "btt-sector" mode this new
     interface reports media errors generically, i.e. independent of
     namespace modes or state.

     This subsequently allows userspace tooling to craft "ACPI 6.1
     Section 9.20.7.6 Function Index 4 - Clear Uncorrectable Error"
     requests and submit them via the ioctl path for NVDIMM root bus
     devices.

   - Introduce 'struct dax_device' and 'struct dax_operations': Prompted
     by a request from Linus and feedback from Christoph this allows for
     dax capable drivers to publish their own custom dax operations.
     This fixes the broken assumption that all dax operations are
     related to a persistent memory device, and makes it easier for
     other architectures and platforms to add customized persistent
     memory support.

   - 'libnvdimm' core updates: A new "deep_flush" sysfs attribute is
     available for storage appliance applications to manually trigger
     memory controllers to drain write-pending buffers that would
     otherwise be flushed automatically by the platform ADR
     (asynchronous-DRAM-refresh) mechanism at a power loss event.
     Support for "locked" DIMMs is included to prevent namespaces from
     surfacing when the namespace label data area is locked. Finally,
     fixes for various reported deadlocks and crashes, also tagged for
     -stable.

   - ACPI / nfit driver updates: General updates of the nfit driver to
     add DSM command overrides, ACPI 6.1 health state flags support, DSM
     payload debug available by default, and various fixes.

  Acknowledgements that came after the branch was pushed:

   - commmit 565851c9 "device-dax: fix sysfs attribute deadlock":
Tested-by: default avatarYi Zhang <yizhan@redhat.com>

   - commit 23f49844 "libnvdimm: rework region badblocks clearing"
     Tested-by: Toshi Kani <toshi.kani@hpe.com>"

* tag 'libnvdimm-for-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (52 commits)
  libnvdimm, pfn: fix 'npfns' vs section alignment
  libnvdimm: handle locked label storage areas
  libnvdimm: convert NDD_ flags to use bitops, introduce NDD_LOCKED
  brd: fix uninitialized use of brd->dax_dev
  block, dax: use correct format string in bdev_dax_supported
  device-dax: fix sysfs attribute deadlock
  libnvdimm: restore "libnvdimm: band aid btt vs clear poison locking"
  libnvdimm: fix nvdimm_bus_lock() vs device_lock() ordering
  libnvdimm: rework region badblocks clearing
  acpi, nfit: kill ACPI_NFIT_DEBUG
  libnvdimm: fix clear length of nvdimm_forget_poison()
  libnvdimm, pmem: fix a NULL pointer BUG in nd_pmem_notify
  libnvdimm, region: sysfs trigger for nvdimm_flush()
  libnvdimm: fix phys_addr for nvdimm_clear_poison
  x86, dax, pmem: remove indirection around memcpy_from_pmem()
  block: remove block_device_operations ->direct_access()
  block, dax: convert bdev_dax_supported() to dax_direct_access()
  filesystem-dax: convert to dax_direct_access()
  Revert "block: use DAX for partition table reads"
  ext2, ext4, xfs: retrieve dax_device for iomap operations
  ...
parents c6a677c6 73616367
...@@ -284,6 +284,7 @@ config CPM2 ...@@ -284,6 +284,7 @@ config CPM2
config AXON_RAM config AXON_RAM
tristate "Axon DDR2 memory device driver" tristate "Axon DDR2 memory device driver"
depends on PPC_IBM_CELL_BLADE && BLOCK depends on PPC_IBM_CELL_BLADE && BLOCK
select DAX
default m default m
help help
It registers one block device per Axon's DDR2 memory bank found It registers one block device per Axon's DDR2 memory bank found
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/dax.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/fs.h> #include <linux/fs.h>
...@@ -62,6 +63,7 @@ static int azfs_major, azfs_minor; ...@@ -62,6 +63,7 @@ static int azfs_major, azfs_minor;
struct axon_ram_bank { struct axon_ram_bank {
struct platform_device *device; struct platform_device *device;
struct gendisk *disk; struct gendisk *disk;
struct dax_device *dax_dev;
unsigned int irq_id; unsigned int irq_id;
unsigned long ph_addr; unsigned long ph_addr;
unsigned long io_addr; unsigned long io_addr;
...@@ -137,25 +139,32 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) ...@@ -137,25 +139,32 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
return BLK_QC_T_NONE; return BLK_QC_T_NONE;
} }
/** static const struct block_device_operations axon_ram_devops = {
* axon_ram_direct_access - direct_access() method for block device .owner = THIS_MODULE,
* @device, @sector, @data: see block_device_operations method };
*/
static long static long
axon_ram_direct_access(struct block_device *device, sector_t sector, __axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages,
void **kaddr, pfn_t *pfn, long size) void **kaddr, pfn_t *pfn)
{ {
struct axon_ram_bank *bank = device->bd_disk->private_data; resource_size_t offset = pgoff * PAGE_SIZE;
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
*kaddr = (void *) bank->io_addr + offset; *kaddr = (void *) bank->io_addr + offset;
*pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
return bank->size - offset; return (bank->size - offset) / PAGE_SIZE;
} }
static const struct block_device_operations axon_ram_devops = { static long
.owner = THIS_MODULE, axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
.direct_access = axon_ram_direct_access void **kaddr, pfn_t *pfn)
{
struct axon_ram_bank *bank = dax_get_private(dax_dev);
return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
}
static const struct dax_operations axon_ram_dax_ops = {
.direct_access = axon_ram_dax_direct_access,
}; };
/** /**
...@@ -219,6 +228,7 @@ static int axon_ram_probe(struct platform_device *device) ...@@ -219,6 +228,7 @@ static int axon_ram_probe(struct platform_device *device)
goto failed; goto failed;
} }
bank->disk->major = azfs_major; bank->disk->major = azfs_major;
bank->disk->first_minor = azfs_minor; bank->disk->first_minor = azfs_minor;
bank->disk->fops = &axon_ram_devops; bank->disk->fops = &axon_ram_devops;
...@@ -227,6 +237,13 @@ static int axon_ram_probe(struct platform_device *device) ...@@ -227,6 +237,13 @@ static int axon_ram_probe(struct platform_device *device)
sprintf(bank->disk->disk_name, "%s%d", sprintf(bank->disk->disk_name, "%s%d",
AXON_RAM_DEVICE_NAME, axon_ram_bank_id); AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
bank->dax_dev = alloc_dax(bank, bank->disk->disk_name,
&axon_ram_dax_ops);
if (!bank->dax_dev) {
rc = -ENOMEM;
goto failed;
}
bank->disk->queue = blk_alloc_queue(GFP_KERNEL); bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
if (bank->disk->queue == NULL) { if (bank->disk->queue == NULL) {
dev_err(&device->dev, "Cannot register disk queue\n"); dev_err(&device->dev, "Cannot register disk queue\n");
...@@ -278,6 +295,8 @@ static int axon_ram_probe(struct platform_device *device) ...@@ -278,6 +295,8 @@ static int axon_ram_probe(struct platform_device *device)
del_gendisk(bank->disk); del_gendisk(bank->disk);
put_disk(bank->disk); put_disk(bank->disk);
} }
kill_dax(bank->dax_dev);
put_dax(bank->dax_dev);
device->dev.platform_data = NULL; device->dev.platform_data = NULL;
if (bank->io_addr != 0) if (bank->io_addr != 0)
iounmap((void __iomem *) bank->io_addr); iounmap((void __iomem *) bank->io_addr);
...@@ -300,6 +319,8 @@ axon_ram_remove(struct platform_device *device) ...@@ -300,6 +319,8 @@ axon_ram_remove(struct platform_device *device)
device_remove_file(&device->dev, &dev_attr_ecc); device_remove_file(&device->dev, &dev_attr_ecc);
free_irq(bank->irq_id, device); free_irq(bank->irq_id, device);
kill_dax(bank->dax_dev);
put_dax(bank->dax_dev);
del_gendisk(bank->disk); del_gendisk(bank->disk);
put_disk(bank->disk); put_disk(bank->disk);
iounmap((void __iomem *) bank->io_addr); iounmap((void __iomem *) bank->io_addr);
......
...@@ -44,11 +44,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) ...@@ -44,11 +44,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
BUG(); BUG();
} }
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
return memcpy_mcsafe(dst, src, n);
}
/** /**
* arch_wb_cache_pmem - write back a cache range with CLWB * arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address * @vaddr: virtual start address
......
...@@ -79,6 +79,7 @@ int strcmp(const char *cs, const char *ct); ...@@ -79,6 +79,7 @@ int strcmp(const char *cs, const char *ct);
#define memset(s, c, n) __memset(s, c, n) #define memset(s, c, n) __memset(s, c, n)
#endif #endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); __must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key); DECLARE_STATIC_KEY_FALSE(mcsafe_key);
......
...@@ -6,6 +6,7 @@ menuconfig BLOCK ...@@ -6,6 +6,7 @@ menuconfig BLOCK
default y default y
select SBITMAP select SBITMAP
select SRCU select SRCU
select DAX
help help
Provide block layer support for the kernel. Provide block layer support for the kernel.
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <linux/kmod.h> #include <linux/kmod.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/genhd.h> #include <linux/genhd.h>
#include <linux/dax.h>
#include <linux/blktrace_api.h> #include <linux/blktrace_api.h>
#include "partitions/check.h" #include "partitions/check.h"
...@@ -630,24 +629,12 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) ...@@ -630,24 +629,12 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
return 0; return 0;
} }
static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)),
NULL);
}
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{ {
struct address_space *mapping = bdev->bd_inode->i_mapping;
struct page *page; struct page *page;
/* don't populate page cache for dax capable devices */ page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
if (IS_DAX(bdev->bd_inode))
page = read_dax_sector(bdev, n);
else
page = read_pagecache_sector(bdev, n);
if (!IS_ERR(page)) { if (!IS_ERR(page)) {
if (PageError(page)) if (PageError(page))
goto fail; goto fail;
......
...@@ -71,7 +71,7 @@ obj-$(CONFIG_PARPORT) += parport/ ...@@ -71,7 +71,7 @@ obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/ obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/ obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DEV_DAX) += dax/ obj-$(CONFIG_DAX) += dax/
obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
obj-$(CONFIG_NUBUS) += nubus/ obj-$(CONFIG_NUBUS) += nubus/
obj-y += macintosh/ obj-y += macintosh/
......
...@@ -12,15 +12,3 @@ config ACPI_NFIT ...@@ -12,15 +12,3 @@ config ACPI_NFIT
To compile this driver as a module, choose M here: To compile this driver as a module, choose M here:
the module will be called nfit. the module will be called nfit.
config ACPI_NFIT_DEBUG
bool "NFIT DSM debug"
depends on ACPI_NFIT
depends on DYNAMIC_DEBUG
default n
help
Enabling this option causes the nfit driver to dump the
input and output buffers of _DSM operations on the ACPI0012
device and its children. This can be very verbose, so leave
it disabled unless you are debugging a hardware / firmware
issue.
This diff is collapsed.
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
| ACPI_NFIT_MEM_NOT_ARMED) | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
enum nfit_uuids { enum nfit_uuids {
/* for simplicity alias the uuid index with the family id */ /* for simplicity alias the uuid index with the family id */
...@@ -163,6 +163,7 @@ struct acpi_nfit_desc { ...@@ -163,6 +163,7 @@ struct acpi_nfit_desc {
unsigned int scrub_count; unsigned int scrub_count;
unsigned int scrub_mode; unsigned int scrub_mode;
unsigned int cancel:1; unsigned int cancel:1;
unsigned int init_complete:1;
unsigned long dimm_cmd_force_en; unsigned long dimm_cmd_force_en;
unsigned long bus_cmd_force_en; unsigned long bus_cmd_force_en;
int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
...@@ -238,6 +239,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc( ...@@ -238,6 +239,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
const u8 *to_nfit_uuid(enum nfit_uuids id); const u8 *to_nfit_uuid(enum nfit_uuids id);
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
void acpi_nfit_shutdown(void *data);
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event); void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
void __acpi_nvdimm_notify(struct device *dev, u32 event); void __acpi_nvdimm_notify(struct device *dev, u32 event);
int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
......
...@@ -323,6 +323,7 @@ config BLK_DEV_SX8 ...@@ -323,6 +323,7 @@ config BLK_DEV_SX8
config BLK_DEV_RAM config BLK_DEV_RAM
tristate "RAM block device support" tristate "RAM block device support"
select DAX if BLK_DEV_RAM_DAX
---help--- ---help---
Saying Y here will allow you to use a portion of your RAM memory as Saying Y here will allow you to use a portion of your RAM memory as
a block device, so that you can make file systems on it, read and a block device, so that you can make file systems on it, read and
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#ifdef CONFIG_BLK_DEV_RAM_DAX #ifdef CONFIG_BLK_DEV_RAM_DAX
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/dax.h>
#endif #endif
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -41,6 +42,9 @@ struct brd_device { ...@@ -41,6 +42,9 @@ struct brd_device {
struct request_queue *brd_queue; struct request_queue *brd_queue;
struct gendisk *brd_disk; struct gendisk *brd_disk;
#ifdef CONFIG_BLK_DEV_RAM_DAX
struct dax_device *dax_dev;
#endif
struct list_head brd_list; struct list_head brd_list;
/* /*
...@@ -326,30 +330,38 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, ...@@ -326,30 +330,38 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
} }
#ifdef CONFIG_BLK_DEV_RAM_DAX #ifdef CONFIG_BLK_DEV_RAM_DAX
static long brd_direct_access(struct block_device *bdev, sector_t sector, static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page; struct page *page;
if (!brd) if (!brd)
return -ENODEV; return -ENODEV;
page = brd_insert_page(brd, sector); page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512);
if (!page) if (!page)
return -ENOSPC; return -ENOSPC;
*kaddr = page_address(page); *kaddr = page_address(page);
*pfn = page_to_pfn_t(page); *pfn = page_to_pfn_t(page);
return PAGE_SIZE; return 1;
} }
#else
#define brd_direct_access NULL static long brd_dax_direct_access(struct dax_device *dax_dev,
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
{
struct brd_device *brd = dax_get_private(dax_dev);
return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn);
}
static const struct dax_operations brd_dax_ops = {
.direct_access = brd_dax_direct_access,
};
#endif #endif
static const struct block_device_operations brd_fops = { static const struct block_device_operations brd_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.rw_page = brd_rw_page, .rw_page = brd_rw_page,
.direct_access = brd_direct_access,
}; };
/* /*
...@@ -415,9 +427,6 @@ static struct brd_device *brd_alloc(int i) ...@@ -415,9 +427,6 @@ static struct brd_device *brd_alloc(int i)
* is harmless) * is harmless)
*/ */
blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
#ifdef CONFIG_BLK_DEV_RAM_DAX
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
#endif
disk = brd->brd_disk = alloc_disk(max_part); disk = brd->brd_disk = alloc_disk(max_part);
if (!disk) if (!disk)
goto out_free_queue; goto out_free_queue;
...@@ -430,8 +439,21 @@ static struct brd_device *brd_alloc(int i) ...@@ -430,8 +439,21 @@ static struct brd_device *brd_alloc(int i)
sprintf(disk->disk_name, "ram%d", i); sprintf(disk->disk_name, "ram%d", i);
set_capacity(disk, rd_size * 2); set_capacity(disk, rd_size * 2);
#ifdef CONFIG_BLK_DEV_RAM_DAX
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops);
if (!brd->dax_dev)
goto out_free_inode;
#endif
return brd; return brd;
#ifdef CONFIG_BLK_DEV_RAM_DAX
out_free_inode:
kill_dax(brd->dax_dev);
put_dax(brd->dax_dev);
#endif
out_free_queue: out_free_queue:
blk_cleanup_queue(brd->brd_queue); blk_cleanup_queue(brd->brd_queue);
out_free_dev: out_free_dev:
...@@ -471,6 +493,10 @@ static struct brd_device *brd_init_one(int i, bool *new) ...@@ -471,6 +493,10 @@ static struct brd_device *brd_init_one(int i, bool *new)
static void brd_del_one(struct brd_device *brd) static void brd_del_one(struct brd_device *brd)
{ {
list_del(&brd->brd_list); list_del(&brd->brd_list);
#ifdef CONFIG_BLK_DEV_RAM_DAX
kill_dax(brd->dax_dev);
put_dax(brd->dax_dev);
#endif
del_gendisk(brd->brd_disk); del_gendisk(brd->brd_disk);
brd_free(brd); brd_free(brd);
} }
......
menuconfig DEV_DAX menuconfig DAX
tristate "DAX: direct access to differentiated memory" tristate "DAX: direct access to differentiated memory"
select SRCU
default m if NVDIMM_DAX default m if NVDIMM_DAX
if DAX
config DEV_DAX
tristate "Device DAX: direct access mapping device"
depends on TRANSPARENT_HUGEPAGE depends on TRANSPARENT_HUGEPAGE
select SRCU
help help
Support raw access to differentiated (persistence, bandwidth, Support raw access to differentiated (persistence, bandwidth,
latency...) memory via an mmap(2) capable character latency...) memory via an mmap(2) capable character
...@@ -11,7 +16,6 @@ menuconfig DEV_DAX ...@@ -11,7 +16,6 @@ menuconfig DEV_DAX
baseline memory pool. Mappings of a /dev/daxX.Y device impose baseline memory pool. Mappings of a /dev/daxX.Y device impose
restrictions that make the mapping behavior deterministic. restrictions that make the mapping behavior deterministic.
if DEV_DAX
config DEV_DAX_PMEM config DEV_DAX_PMEM
tristate "PMEM DAX: direct access to persistent memory" tristate "PMEM DAX: direct access to persistent memory"
......
obj-$(CONFIG_DEV_DAX) += dax.o obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
dax-y := super.o
dax_pmem-y := pmem.o dax_pmem-y := pmem.o
device_dax-y := device.o
/*
* Copyright(c) 2016 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef __DAX_PRIVATE_H__
#define __DAX_PRIVATE_H__
#include <linux/device.h>
#include <linux/cdev.h>
/**
* struct dax_region - mapping infrastructure for dax devices
* @id: kernel-wide unique region for a memory range
* @base: linear address corresponding to @res
* @kref: to pin while other agents have a need to do lookups
* @dev: parent device backing this region
* @align: allocation and mapping alignment for child dax devices
* @res: physical address range of the region
* @pfn_flags: identify whether the pfns are paged back or not
*/
struct dax_region {
int id;
struct ida ida;
void *base;
struct kref kref;
struct device *dev;
unsigned int align;
struct resource res;
unsigned long pfn_flags;
};
/**
* struct dev_dax - instance data for a subdivision of a dax region
* @region - parent region
* @dax_dev - core dax functionality
* @dev - device core
* @id - child id in the region
* @num_resources - number of physical address extents in this device
* @res - array of physical address ranges
*/
struct dev_dax {
struct dax_region *region;
struct dax_device *dax_dev;
struct device dev;
int id;
int num_resources;
struct resource res[0];
};
#endif
/* /*
* Copyright(c) 2016 Intel Corporation. All rights reserved. * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as * it under the terms of version 2 of the GNU General Public License as
...@@ -12,14 +12,7 @@ ...@@ -12,14 +12,7 @@
*/ */
#ifndef __DAX_H__ #ifndef __DAX_H__
#define __DAX_H__ #define __DAX_H__
struct device; struct dax_device;
struct dax_dev; struct dax_device *inode_dax(struct inode *inode);
struct resource; struct inode *dax_inode(struct dax_device *dax_dev);
struct dax_region;
void dax_region_put(struct dax_region *dax_region);
struct dax_region *alloc_dax_region(struct device *parent,
int region_id, struct resource *res, unsigned int align,
void *addr, unsigned long flags);
struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
struct resource *res, int count);
#endif /* __DAX_H__ */ #endif /* __DAX_H__ */
/*
* Copyright(c) 2016 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef __DEVICE_DAX_H__
#define __DEVICE_DAX_H__
struct device;
struct dev_dax;
struct resource;
struct dax_region;
void dax_region_put(struct dax_region *dax_region);
struct dax_region *alloc_dax_region(struct device *parent,
int region_id, struct resource *res, unsigned int align,
void *addr, unsigned long flags);
struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
struct resource *res, int count);
#endif /* __DEVICE_DAX_H__ */
This diff is collapsed.
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include "../nvdimm/pfn.h" #include "../nvdimm/pfn.h"
#include "../nvdimm/nd.h" #include "../nvdimm/nd.h"
#include "dax.h" #include "device-dax.h"
struct dax_pmem { struct dax_pmem {
struct device *dev; struct device *dev;
...@@ -61,8 +61,8 @@ static int dax_pmem_probe(struct device *dev) ...@@ -61,8 +61,8 @@ static int dax_pmem_probe(struct device *dev)
int rc; int rc;
void *addr; void *addr;
struct resource res; struct resource res;
struct dax_dev *dax_dev;
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
struct dev_dax *dev_dax;
struct dax_pmem *dax_pmem; struct dax_pmem *dax_pmem;
struct nd_region *nd_region; struct nd_region *nd_region;
struct nd_namespace_io *nsio; struct nd_namespace_io *nsio;
...@@ -130,12 +130,12 @@ static int dax_pmem_probe(struct device *dev) ...@@ -130,12 +130,12 @@ static int dax_pmem_probe(struct device *dev)
return -ENOMEM; return -ENOMEM;
/* TODO: support for subdividing a dax region... */ /* TODO: support for subdividing a dax region... */
dax_dev = devm_create_dax_dev(dax_region, &res, 1); dev_dax = devm_create_dev_dax(dax_region, &res, 1);
/* child dax_dev instances now own the lifetime of the dax_region */ /* child dev_dax instances now own the lifetime of the dax_region */
dax_region_put(dax_region); dax_region_put(dax_region);
return PTR_ERR_OR_ZERO(dax_dev); return PTR_ERR_OR_ZERO(dev_dax);
} }
static struct nd_device_driver dax_pmem_driver = { static struct nd_device_driver dax_pmem_driver = {
......
This diff is collapsed.
...@@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN ...@@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN
config BLK_DEV_DM config BLK_DEV_DM
tristate "Device mapper support" tristate "Device mapper support"
select BLK_DEV_DM_BUILTIN select BLK_DEV_DM_BUILTIN
select DAX
---help--- ---help---
Device-mapper is a low level volume manager. It works by allowing Device-mapper is a low level volume manager. It works by allowing
people to specify mappings for ranges of logical sectors. Various people to specify mappings for ranges of logical sectors. Various
......
...@@ -58,6 +58,7 @@ struct mapped_device { ...@@ -58,6 +58,7 @@ struct mapped_device {
struct target_type *immutable_target_type; struct target_type *immutable_target_type;
struct gendisk *disk; struct gendisk *disk;
struct dax_device *dax_dev;
char name[16]; char name[16];
void *interface_ptr; void *interface_ptr;
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/dax.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/device-mapper.h> #include <linux/device-mapper.h>
...@@ -142,22 +143,20 @@ static int linear_iterate_devices(struct dm_target *ti, ...@@ -142,22 +143,20 @@ static int linear_iterate_devices(struct dm_target *ti,
return fn(ti, lc->dev, lc->start, ti->len, data); return fn(ti, lc->dev, lc->start, ti->len, data);
} }
static long linear_direct_access(struct dm_target *ti, sector_t sector, static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
long ret;
struct linear_c *lc = ti->private; struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev; struct block_device *bdev = lc->dev->bdev;
struct blk_dax_ctl dax = { struct dax_device *dax_dev = lc->dev->dax_dev;
.sector = linear_map_sector(ti, sector), sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
.size = size,
}; dev_sector = linear_map_sector(ti, sector);
long ret; ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
if (ret)
ret = bdev_direct_access(bdev, &dax); return ret;
*kaddr = dax.addr; return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
*pfn = dax.pfn;
return ret;
} }
static struct target_type linear_target = { static struct target_type linear_target = {
...@@ -171,7 +170,7 @@ static struct target_type linear_target = { ...@@ -171,7 +170,7 @@ static struct target_type linear_target = {
.status = linear_status, .status = linear_status,
.prepare_ioctl = linear_prepare_ioctl, .prepare_ioctl = linear_prepare_ioctl,
.iterate_devices = linear_iterate_devices, .iterate_devices = linear_iterate_devices,
.direct_access = linear_direct_access, .direct_access = linear_dax_direct_access,
}; };
int __init dm_linear_init(void) int __init dm_linear_init(void)
......
...@@ -2302,8 +2302,8 @@ static int origin_map(struct dm_target *ti, struct bio *bio) ...@@ -2302,8 +2302,8 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
return do_origin(o->dev, bio); return do_origin(o->dev, bio);
} }
static long origin_direct_access(struct dm_target *ti, sector_t sector, static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
DMWARN("device does not support dax."); DMWARN("device does not support dax.");
return -EIO; return -EIO;
...@@ -2368,7 +2368,7 @@ static struct target_type origin_target = { ...@@ -2368,7 +2368,7 @@ static struct target_type origin_target = {
.postsuspend = origin_postsuspend, .postsuspend = origin_postsuspend,
.status = origin_status, .status = origin_status,
.iterate_devices = origin_iterate_devices, .iterate_devices = origin_iterate_devices,
.direct_access = origin_direct_access, .direct_access = origin_dax_direct_access,
}; };
static struct target_type snapshot_target = { static struct target_type snapshot_target = {
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/dax.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/log2.h> #include <linux/log2.h>
...@@ -310,27 +311,25 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) ...@@ -310,27 +311,25 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED; return DM_MAPIO_REMAPPED;
} }
static long stripe_direct_access(struct dm_target *ti, sector_t sector, static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private; struct stripe_c *sc = ti->private;
uint32_t stripe; struct dax_device *dax_dev;
struct block_device *bdev; struct block_device *bdev;
struct blk_dax_ctl dax = { uint32_t stripe;
.size = size,
};
long ret; long ret;
stripe_map_sector(sc, sector, &stripe, &dax.sector); stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax.sector += sc->stripe[stripe].physical_start; dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev; bdev = sc->stripe[stripe].dev->bdev;
ret = bdev_direct_access(bdev, &dax); ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
*kaddr = dax.addr; if (ret)
*pfn = dax.pfn; return ret;
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
return ret;
} }
/* /*
...@@ -451,7 +450,7 @@ static struct target_type stripe_target = { ...@@ -451,7 +450,7 @@ static struct target_type stripe_target = {
.status = stripe_status, .status = stripe_status,
.iterate_devices = stripe_iterate_devices, .iterate_devices = stripe_iterate_devices,
.io_hints = stripe_io_hints, .io_hints = stripe_io_hints,
.direct_access = stripe_direct_access, .direct_access = stripe_dax_direct_access,
}; };
int __init dm_stripe_init(void) int __init dm_stripe_init(void)
......
...@@ -142,8 +142,8 @@ static void io_err_release_clone_rq(struct request *clone) ...@@ -142,8 +142,8 @@ static void io_err_release_clone_rq(struct request *clone)
{ {
} }
static long io_err_direct_access(struct dm_target *ti, sector_t sector, static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
return -EIO; return -EIO;
} }
...@@ -157,7 +157,7 @@ static struct target_type error_target = { ...@@ -157,7 +157,7 @@ static struct target_type error_target = {
.map = io_err_map, .map = io_err_map,
.clone_and_map_rq = io_err_clone_and_map_rq, .clone_and_map_rq = io_err_clone_and_map_rq,
.release_clone_rq = io_err_release_clone_rq, .release_clone_rq = io_err_release_clone_rq,
.direct_access = io_err_direct_access, .direct_access = io_err_dax_direct_access,
}; };
int __init dm_target_init(void) int __init dm_target_init(void)
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/blkpg.h> #include <linux/blkpg.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/mempool.h> #include <linux/mempool.h>
#include <linux/dax.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>
...@@ -629,6 +630,7 @@ static int open_table_device(struct table_device *td, dev_t dev, ...@@ -629,6 +630,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
} }
td->dm_dev.bdev = bdev; td->dm_dev.bdev = bdev;
td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
return 0; return 0;
} }
...@@ -642,7 +644,9 @@ static void close_table_device(struct table_device *td, struct mapped_device *md ...@@ -642,7 +644,9 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
put_dax(td->dm_dev.dax_dev);
td->dm_dev.bdev = NULL; td->dm_dev.bdev = NULL;
td->dm_dev.dax_dev = NULL;
} }
static struct table_device *find_table_device(struct list_head *l, dev_t dev, static struct table_device *find_table_device(struct list_head *l, dev_t dev,
...@@ -920,31 +924,49 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) ...@@ -920,31 +924,49 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
} }
EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
void **kaddr, pfn_t *pfn, long size) sector_t sector, int *srcu_idx)
{ {
struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_table *map; struct dm_table *map;
struct dm_target *ti; struct dm_target *ti;
int srcu_idx;
long len, ret = -EIO;
map = dm_get_live_table(md, &srcu_idx); map = dm_get_live_table(md, srcu_idx);
if (!map) if (!map)
goto out; return NULL;
ti = dm_table_find_target(map, sector); ti = dm_table_find_target(map, sector);
if (!dm_target_is_valid(ti)) if (!dm_target_is_valid(ti))
goto out; return NULL;
return ti;
}
static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long len, ret = -EIO;
int srcu_idx;
len = max_io_len(sector, ti) << SECTOR_SHIFT; ti = dm_dax_get_live_target(md, sector, &srcu_idx);
size = min(len, size);
if (!ti)
goto out;
if (!ti->type->direct_access)
goto out;
len = max_io_len(sector, ti) / PAGE_SECTORS;
if (len < 1)
goto out;
nr_pages = min(len, nr_pages);
if (ti->type->direct_access) if (ti->type->direct_access)
ret = ti->type->direct_access(ti, sector, kaddr, pfn, size); ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
out:
out:
dm_put_live_table(md, srcu_idx); dm_put_live_table(md, srcu_idx);
return min(ret, size);
return ret;
} }
/* /*
...@@ -1471,6 +1493,7 @@ static int next_free_minor(int *minor) ...@@ -1471,6 +1493,7 @@ static int next_free_minor(int *minor)
} }
static const struct block_device_operations dm_blk_dops; static const struct block_device_operations dm_blk_dops;
static const struct dax_operations dm_dax_ops;
static void dm_wq_work(struct work_struct *work); static void dm_wq_work(struct work_struct *work);
...@@ -1517,6 +1540,12 @@ static void cleanup_mapped_device(struct mapped_device *md) ...@@ -1517,6 +1540,12 @@ static void cleanup_mapped_device(struct mapped_device *md)
if (md->bs) if (md->bs)
bioset_free(md->bs); bioset_free(md->bs);
if (md->dax_dev) {
kill_dax(md->dax_dev);
put_dax(md->dax_dev);
md->dax_dev = NULL;
}
if (md->disk) { if (md->disk) {
spin_lock(&_minor_lock); spin_lock(&_minor_lock);
md->disk->private_data = NULL; md->disk->private_data = NULL;
...@@ -1544,6 +1573,7 @@ static void cleanup_mapped_device(struct mapped_device *md) ...@@ -1544,6 +1573,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
static struct mapped_device *alloc_dev(int minor) static struct mapped_device *alloc_dev(int minor)
{ {
int r, numa_node_id = dm_get_numa_node(); int r, numa_node_id = dm_get_numa_node();
struct dax_device *dax_dev;
struct mapped_device *md; struct mapped_device *md;
void *old_md; void *old_md;
...@@ -1608,6 +1638,12 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1608,6 +1638,12 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->queue = md->queue; md->disk->queue = md->queue;
md->disk->private_data = md; md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor); sprintf(md->disk->disk_name, "dm-%d", minor);
dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
if (!dax_dev)
goto bad;
md->dax_dev = dax_dev;
add_disk(md->disk); add_disk(md->disk);
format_dev_t(md->name, MKDEV(_major, minor)); format_dev_t(md->name, MKDEV(_major, minor));
...@@ -2816,12 +2852,15 @@ static const struct block_device_operations dm_blk_dops = { ...@@ -2816,12 +2852,15 @@ static const struct block_device_operations dm_blk_dops = {
.open = dm_blk_open, .open = dm_blk_open,
.release = dm_blk_close, .release = dm_blk_close,
.ioctl = dm_blk_ioctl, .ioctl = dm_blk_ioctl,
.direct_access = dm_blk_direct_access,
.getgeo = dm_blk_getgeo, .getgeo = dm_blk_getgeo,
.pr_ops = &dm_pr_ops, .pr_ops = &dm_pr_ops,
.owner = THIS_MODULE .owner = THIS_MODULE
}; };
static const struct dax_operations dm_dax_ops = {
.direct_access = dm_dax_direct_access,
};
/* /*
* module hooks * module hooks
*/ */
......
...@@ -20,6 +20,7 @@ if LIBNVDIMM ...@@ -20,6 +20,7 @@ if LIBNVDIMM
config BLK_DEV_PMEM config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support" tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM default LIBNVDIMM
select DAX
select ND_BTT if BTT select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN select ND_PFN if NVDIMM_PFN
help help
......
...@@ -314,7 +314,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) ...@@ -314,7 +314,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
if (rc < 0) { if (rc < 0) {
struct nd_btt *nd_btt = to_nd_btt(btt_dev); struct nd_btt *nd_btt = to_nd_btt(btt_dev);
__nd_detach_ndns(btt_dev, &nd_btt->ndns); nd_detach_ndns(btt_dev, &nd_btt->ndns);
put_device(btt_dev); put_device(btt_dev);
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/nd.h> #include <linux/nd.h>
#include "nd-core.h" #include "nd-core.h"
#include "nd.h" #include "nd.h"
#include "pfn.h"
int nvdimm_major; int nvdimm_major;
static int nvdimm_bus_major; static int nvdimm_bus_major;
...@@ -171,6 +172,57 @@ void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) ...@@ -171,6 +172,57 @@ void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event)
} }
EXPORT_SYMBOL_GPL(nvdimm_region_notify); EXPORT_SYMBOL_GPL(nvdimm_region_notify);
struct clear_badblocks_context {
resource_size_t phys, cleared;
};
static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
{
struct clear_badblocks_context *ctx = data;
struct nd_region *nd_region;
resource_size_t ndr_end;
sector_t sector;
/* make sure device is a region */
if (!is_nd_pmem(dev))
return 0;
nd_region = to_nd_region(dev);
ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1;
/* make sure we are in the region */
if (ctx->phys < nd_region->ndr_start
|| (ctx->phys + ctx->cleared) > ndr_end)
return 0;
sector = (ctx->phys - nd_region->ndr_start) / 512;
badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512);
return 0;
}
static void nvdimm_clear_badblocks_regions(struct nvdimm_bus *nvdimm_bus,
phys_addr_t phys, u64 cleared)
{
struct clear_badblocks_context ctx = {
.phys = phys,
.cleared = cleared,
};
device_for_each_child(&nvdimm_bus->dev, &ctx,
nvdimm_clear_badblocks_region);
}
static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus,
phys_addr_t phys, u64 cleared)
{
if (cleared > 0)
nvdimm_forget_poison(nvdimm_bus, phys, cleared);
if (cleared > 0 && cleared / 512)
nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared);
}
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
unsigned int len) unsigned int len)
{ {
...@@ -218,7 +270,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, ...@@ -218,7 +270,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
if (cmd_rc < 0) if (cmd_rc < 0)
return cmd_rc; return cmd_rc;
nvdimm_clear_from_poison_list(nvdimm_bus, phys, len); nvdimm_account_cleared_poison(nvdimm_bus, phys, clear_err.cleared);
return clear_err.cleared; return clear_err.cleared;
} }
EXPORT_SYMBOL_GPL(nvdimm_clear_poison); EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
...@@ -286,6 +339,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, ...@@ -286,6 +339,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
init_waitqueue_head(&nvdimm_bus->probe_wait); init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
mutex_init(&nvdimm_bus->reconfig_mutex); mutex_init(&nvdimm_bus->reconfig_mutex);
spin_lock_init(&nvdimm_bus->poison_lock);
if (nvdimm_bus->id < 0) { if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus); kfree(nvdimm_bus);
return NULL; return NULL;
...@@ -354,9 +408,9 @@ static int nd_bus_remove(struct device *dev) ...@@ -354,9 +408,9 @@ static int nd_bus_remove(struct device *dev)
nd_synchronize(); nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
nvdimm_bus_lock(&nvdimm_bus->dev); spin_lock(&nvdimm_bus->poison_lock);
free_poison_list(&nvdimm_bus->poison_list); free_poison_list(&nvdimm_bus->poison_list);
nvdimm_bus_unlock(&nvdimm_bus->dev); spin_unlock(&nvdimm_bus->poison_lock);
nvdimm_bus_destroy_ndctl(nvdimm_bus); nvdimm_bus_destroy_ndctl(nvdimm_bus);
...@@ -769,16 +823,55 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) ...@@ -769,16 +823,55 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
} while (true); } while (true);
} }
static int pmem_active(struct device *dev, void *data) static int nd_pmem_forget_poison_check(struct device *dev, void *data)
{ {
if (is_nd_pmem(dev) && dev->driver) struct nd_cmd_clear_error *clear_err =
(struct nd_cmd_clear_error *)data;
struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL;
struct nd_namespace_common *ndns = NULL;
struct nd_namespace_io *nsio;
resource_size_t offset = 0, end_trunc = 0, start, end, pstart, pend;
if (nd_dax || !dev->driver)
return 0;
start = clear_err->address;
end = clear_err->address + clear_err->cleared - 1;
if (nd_btt || nd_pfn || nd_dax) {
if (nd_btt)
ndns = nd_btt->ndns;
else if (nd_pfn)
ndns = nd_pfn->ndns;
else if (nd_dax)
ndns = nd_dax->nd_pfn.ndns;
if (!ndns)
return 0;
} else
ndns = to_ndns(dev);
nsio = to_nd_namespace_io(&ndns->dev);
pstart = nsio->res.start + offset;
pend = nsio->res.end - end_trunc;
if ((pstart >= start) && (pend <= end))
return -EBUSY; return -EBUSY;
return 0; return 0;
}
static int nd_ns_forget_poison_check(struct device *dev, void *data)
{
return device_for_each_child(dev, data, nd_pmem_forget_poison_check);
} }
/* set_config requires an idle interleave set */ /* set_config requires an idle interleave set */
static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus,
struct nvdimm *nvdimm, unsigned int cmd) struct nvdimm *nvdimm, unsigned int cmd, void *data)
{ {
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
...@@ -792,8 +885,8 @@ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, ...@@ -792,8 +885,8 @@ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus,
/* require clear error to go through the pmem driver */ /* require clear error to go through the pmem driver */
if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR)
return device_for_each_child(&nvdimm_bus->dev, NULL, return device_for_each_child(&nvdimm_bus->dev, data,
pmem_active); nd_ns_forget_poison_check);
if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA)
return 0; return 0;
...@@ -820,7 +913,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, ...@@ -820,7 +913,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
const char *cmd_name, *dimm_name; const char *cmd_name, *dimm_name;
unsigned long cmd_mask; unsigned long cmd_mask;
void *buf; void *buf;
int rc, i; int rc, i, cmd_rc;
if (nvdimm) { if (nvdimm) {
desc = nd_cmd_dimm_desc(cmd); desc = nd_cmd_dimm_desc(cmd);
...@@ -927,13 +1020,20 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, ...@@ -927,13 +1020,20 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
} }
nvdimm_bus_lock(&nvdimm_bus->dev); nvdimm_bus_lock(&nvdimm_bus->dev);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd); rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd, buf);
if (rc) if (rc)
goto out_unlock; goto out_unlock;
rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL); rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, &cmd_rc);
if (rc < 0) if (rc < 0)
goto out_unlock; goto out_unlock;
if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR && cmd_rc >= 0) {
struct nd_cmd_clear_error *clear_err = buf;
nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
clear_err->cleared);
}
nvdimm_bus_unlock(&nvdimm_bus->dev); nvdimm_bus_unlock(&nvdimm_bus->dev);
if (copy_to_user(p, buf, buf_len)) if (copy_to_user(p, buf, buf_len))
......
...@@ -21,8 +21,13 @@ ...@@ -21,8 +21,13 @@
void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns) void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns)
{ {
struct nd_namespace_common *ndns = *_ndns; struct nd_namespace_common *ndns = *_ndns;
struct nvdimm_bus *nvdimm_bus;
lockdep_assert_held(&ndns->dev.mutex); if (!ndns)
return;
nvdimm_bus = walk_to_nvdimm_bus(&ndns->dev);
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__); dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__);
ndns->claim = NULL; ndns->claim = NULL;
*_ndns = NULL; *_ndns = NULL;
...@@ -37,18 +42,20 @@ void nd_detach_ndns(struct device *dev, ...@@ -37,18 +42,20 @@ void nd_detach_ndns(struct device *dev,
if (!ndns) if (!ndns)
return; return;
get_device(&ndns->dev); get_device(&ndns->dev);
device_lock(&ndns->dev); nvdimm_bus_lock(&ndns->dev);
__nd_detach_ndns(dev, _ndns); __nd_detach_ndns(dev, _ndns);
device_unlock(&ndns->dev); nvdimm_bus_unlock(&ndns->dev);
put_device(&ndns->dev); put_device(&ndns->dev);
} }
bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
struct nd_namespace_common **_ndns) struct nd_namespace_common **_ndns)
{ {
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&attach->dev);
if (attach->claim) if (attach->claim)
return false; return false;
lockdep_assert_held(&attach->dev.mutex); lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__); dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__);
attach->claim = dev; attach->claim = dev;
*_ndns = attach; *_ndns = attach;
...@@ -61,9 +68,9 @@ bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, ...@@ -61,9 +68,9 @@ bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
{ {
bool claimed; bool claimed;
device_lock(&attach->dev); nvdimm_bus_lock(&attach->dev);
claimed = __nd_attach_ndns(dev, attach, _ndns); claimed = __nd_attach_ndns(dev, attach, _ndns);
device_unlock(&attach->dev); nvdimm_bus_unlock(&attach->dev);
return claimed; return claimed;
} }
...@@ -114,7 +121,7 @@ static void nd_detach_and_reset(struct device *dev, ...@@ -114,7 +121,7 @@ static void nd_detach_and_reset(struct device *dev,
struct nd_namespace_common **_ndns) struct nd_namespace_common **_ndns)
{ {
/* detach the namespace and destroy / reset the device */ /* detach the namespace and destroy / reset the device */
nd_detach_ndns(dev, _ndns); __nd_detach_ndns(dev, _ndns);
if (is_idle(dev, *_ndns)) { if (is_idle(dev, *_ndns)) {
nd_device_unregister(dev, ND_ASYNC); nd_device_unregister(dev, ND_ASYNC);
} else if (is_nd_btt(dev)) { } else if (is_nd_btt(dev)) {
...@@ -184,7 +191,7 @@ ssize_t nd_namespace_store(struct device *dev, ...@@ -184,7 +191,7 @@ ssize_t nd_namespace_store(struct device *dev,
} }
WARN_ON_ONCE(!is_nvdimm_bus_locked(dev)); WARN_ON_ONCE(!is_nvdimm_bus_locked(dev));
if (!nd_attach_ndns(dev, ndns, _ndns)) { if (!__nd_attach_ndns(dev, ndns, _ndns)) {
dev_dbg(dev, "%s already claimed\n", dev_dbg(dev, "%s already claimed\n",
dev_name(&ndns->dev)); dev_name(&ndns->dev));
len = -EBUSY; len = -EBUSY;
...@@ -239,22 +246,24 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, ...@@ -239,22 +246,24 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
if (rw == READ) { if (rw == READ) {
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
return -EIO; return -EIO;
return memcpy_from_pmem(buf, nsio->addr + offset, size); return memcpy_mcsafe(buf, nsio->addr + offset, size);
} }
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
/* /*
* FIXME: nsio_rw_bytes() may be called from atomic * FIXME: nsio_rw_bytes() may be called from atomic
* context in the btt case and nvdimm_clear_poison() * context in the btt case and the ACPI DSM path for
* takes a sleeping lock. Until the locking can be * clearing the error takes sleeping locks and allocates
* reworked this capability requires that the namespace * memory. An explicit error clearing path, and support
* is not claimed by btt. * for tracking badblocks in BTT metadata is needed to
* work around this collision.
*/ */
if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
&& (!ndns->claim || !is_nd_btt(ndns->claim))) { && (!ndns->claim || !is_nd_btt(ndns->claim))) {
long cleared; long cleared;
cleared = nvdimm_clear_poison(&ndns->dev, offset, size); cleared = nvdimm_clear_poison(&ndns->dev,
nsio->res.start + offset, size);
if (cleared < size) if (cleared < size)
rc = -EIO; rc = -EIO;
if (cleared > 0 && cleared / 512) { if (cleared > 0 && cleared / 512) {
......
...@@ -518,6 +518,15 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region, ...@@ -518,6 +518,15 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
} }
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
static void append_poison_entry(struct nvdimm_bus *nvdimm_bus,
struct nd_poison *pl, u64 addr, u64 length)
{
lockdep_assert_held(&nvdimm_bus->poison_lock);
pl->start = addr;
pl->length = length;
list_add_tail(&pl->list, &nvdimm_bus->poison_list);
}
static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length, static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
gfp_t flags) gfp_t flags)
{ {
...@@ -527,19 +536,24 @@ static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length, ...@@ -527,19 +536,24 @@ static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
if (!pl) if (!pl)
return -ENOMEM; return -ENOMEM;
pl->start = addr; append_poison_entry(nvdimm_bus, pl, addr, length);
pl->length = length;
list_add_tail(&pl->list, &nvdimm_bus->poison_list);
return 0; return 0;
} }
static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{ {
struct nd_poison *pl; struct nd_poison *pl, *pl_new;
if (list_empty(&nvdimm_bus->poison_list)) spin_unlock(&nvdimm_bus->poison_lock);
return add_poison(nvdimm_bus, addr, length, GFP_KERNEL); pl_new = kzalloc(sizeof(*pl_new), GFP_KERNEL);
spin_lock(&nvdimm_bus->poison_lock);
if (list_empty(&nvdimm_bus->poison_list)) {
if (!pl_new)
return -ENOMEM;
append_poison_entry(nvdimm_bus, pl_new, addr, length);
return 0;
}
/* /*
* There is a chance this is a duplicate, check for those first. * There is a chance this is a duplicate, check for those first.
...@@ -551,6 +565,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) ...@@ -551,6 +565,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
/* If length has changed, update this list entry */ /* If length has changed, update this list entry */
if (pl->length != length) if (pl->length != length)
pl->length = length; pl->length = length;
kfree(pl_new);
return 0; return 0;
} }
...@@ -559,29 +574,33 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) ...@@ -559,29 +574,33 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
* as any overlapping ranges will get resolved when the list is consumed * as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks * and converted to badblocks
*/ */
return add_poison(nvdimm_bus, addr, length, GFP_KERNEL); if (!pl_new)
return -ENOMEM;
append_poison_entry(nvdimm_bus, pl_new, addr, length);
return 0;
} }
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{ {
int rc; int rc;
nvdimm_bus_lock(&nvdimm_bus->dev); spin_lock(&nvdimm_bus->poison_lock);
rc = bus_add_poison(nvdimm_bus, addr, length); rc = bus_add_poison(nvdimm_bus, addr, length);
nvdimm_bus_unlock(&nvdimm_bus->dev); spin_unlock(&nvdimm_bus->poison_lock);
return rc; return rc;
} }
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
phys_addr_t start, unsigned int len) unsigned int len)
{ {
struct list_head *poison_list = &nvdimm_bus->poison_list; struct list_head *poison_list = &nvdimm_bus->poison_list;
u64 clr_end = start + len - 1; u64 clr_end = start + len - 1;
struct nd_poison *pl, *next; struct nd_poison *pl, *next;
nvdimm_bus_lock(&nvdimm_bus->dev); spin_lock(&nvdimm_bus->poison_lock);
WARN_ON_ONCE(list_empty(poison_list)); WARN_ON_ONCE(list_empty(poison_list));
/* /*
...@@ -628,15 +647,15 @@ void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, ...@@ -628,15 +647,15 @@ void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
u64 new_len = pl_end - new_start + 1; u64 new_len = pl_end - new_start + 1;
/* Add new entry covering the right half */ /* Add new entry covering the right half */
add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO); add_poison(nvdimm_bus, new_start, new_len, GFP_NOWAIT);
/* Adjust this entry to cover the left half */ /* Adjust this entry to cover the left half */
pl->length = start - pl->start; pl->length = start - pl->start;
continue; continue;
} }
} }
nvdimm_bus_unlock(&nvdimm_bus->dev); spin_unlock(&nvdimm_bus->poison_lock);
} }
EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list); EXPORT_SYMBOL_GPL(nvdimm_forget_poison);
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
......
...@@ -124,7 +124,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) ...@@ -124,7 +124,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
dev_dbg(dev, "%s: dax: %s\n", __func__, dev_dbg(dev, "%s: dax: %s\n", __func__,
rc == 0 ? dev_name(dax_dev) : "<none>"); rc == 0 ? dev_name(dax_dev) : "<none>");
if (rc < 0) { if (rc < 0) {
__nd_detach_ndns(dax_dev, &nd_pfn->ndns); nd_detach_ndns(dax_dev, &nd_pfn->ndns);
put_device(dax_dev); put_device(dax_dev);
} else } else
__nd_device_register(dax_dev); __nd_device_register(dax_dev);
......
...@@ -49,6 +49,8 @@ static int nvdimm_probe(struct device *dev) ...@@ -49,6 +49,8 @@ static int nvdimm_probe(struct device *dev)
kref_init(&ndd->kref); kref_init(&ndd->kref);
rc = nvdimm_init_nsarea(ndd); rc = nvdimm_init_nsarea(ndd);
if (rc == -EACCES)
nvdimm_set_locked(dev);
if (rc) if (rc)
goto err; goto err;
......
...@@ -34,7 +34,7 @@ int nvdimm_check_config_data(struct device *dev) ...@@ -34,7 +34,7 @@ int nvdimm_check_config_data(struct device *dev)
if (!nvdimm->cmd_mask || if (!nvdimm->cmd_mask ||
!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) { !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
if (nvdimm->flags & NDD_ALIASING) if (test_bit(NDD_ALIASING, &nvdimm->flags))
return -ENXIO; return -ENXIO;
else else
return -ENOTTY; return -ENOTTY;
...@@ -67,6 +67,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) ...@@ -67,6 +67,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
struct nvdimm_bus_descriptor *nd_desc; struct nvdimm_bus_descriptor *nd_desc;
int rc = validate_dimm(ndd); int rc = validate_dimm(ndd);
int cmd_rc = 0;
if (rc) if (rc)
return rc; return rc;
...@@ -76,8 +77,11 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) ...@@ -76,8 +77,11 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
memset(cmd, 0, sizeof(*cmd)); memset(cmd, 0, sizeof(*cmd));
nd_desc = nvdimm_bus->nd_desc; nd_desc = nvdimm_bus->nd_desc;
return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL); ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), &cmd_rc);
if (rc < 0)
return rc;
return cmd_rc;
} }
int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
...@@ -188,7 +192,14 @@ void nvdimm_set_aliasing(struct device *dev) ...@@ -188,7 +192,14 @@ void nvdimm_set_aliasing(struct device *dev)
{ {
struct nvdimm *nvdimm = to_nvdimm(dev); struct nvdimm *nvdimm = to_nvdimm(dev);
nvdimm->flags |= NDD_ALIASING; set_bit(NDD_ALIASING, &nvdimm->flags);
}
void nvdimm_set_locked(struct device *dev)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
set_bit(NDD_LOCKED, &nvdimm->flags);
} }
static void nvdimm_release(struct device *dev) static void nvdimm_release(struct device *dev)
......
...@@ -2236,14 +2236,21 @@ static int init_active_labels(struct nd_region *nd_region) ...@@ -2236,14 +2236,21 @@ static int init_active_labels(struct nd_region *nd_region)
int count, j; int count, j;
/* /*
* If the dimm is disabled then prevent the region from * If the dimm is disabled then we may need to prevent
* being activated if it aliases DPA. * the region from being activated.
*/ */
if (!ndd) { if (!ndd) {
if ((nvdimm->flags & NDD_ALIASING) == 0) if (test_bit(NDD_LOCKED, &nvdimm->flags))
/* fail, label data may be unreadable */;
else if (test_bit(NDD_ALIASING, &nvdimm->flags))
/* fail, labels needed to disambiguate dpa */;
else
return 0; return 0;
dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n",
dev_name(&nd_mapping->nvdimm->dev)); dev_err(&nd_region->dev, "%s: is %s, failing probe\n",
dev_name(&nd_mapping->nvdimm->dev),
test_bit(NDD_LOCKED, &nvdimm->flags)
? "locked" : "disabled");
return -ENXIO; return -ENXIO;
} }
nd_mapping->ndd = ndd; nd_mapping->ndd = ndd;
......
...@@ -32,6 +32,7 @@ struct nvdimm_bus { ...@@ -32,6 +32,7 @@ struct nvdimm_bus {
struct list_head poison_list; struct list_head poison_list;
struct list_head mapping_list; struct list_head mapping_list;
struct mutex reconfig_mutex; struct mutex reconfig_mutex;
spinlock_t poison_lock;
}; };
struct nvdimm { struct nvdimm {
......
...@@ -154,6 +154,7 @@ struct nd_region { ...@@ -154,6 +154,7 @@ struct nd_region {
u64 ndr_start; u64 ndr_start;
int id, num_lanes, ro, numa_node; int id, num_lanes, ro, numa_node;
void *provider_data; void *provider_data;
struct badblocks bb;
struct nd_interleave_set *nd_set; struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane; struct nd_percpu_lane __percpu *lane;
struct nd_mapping mapping[0]; struct nd_mapping mapping[0];
...@@ -239,6 +240,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, ...@@ -239,6 +240,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
unsigned int len); unsigned int len);
void nvdimm_set_aliasing(struct device *dev); void nvdimm_set_aliasing(struct device *dev);
void nvdimm_set_locked(struct device *dev);
struct nd_btt *to_nd_btt(struct device *dev); struct nd_btt *to_nd_btt(struct device *dev);
struct nd_gen_sb { struct nd_gen_sb {
......
...@@ -484,7 +484,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) ...@@ -484,7 +484,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
dev_dbg(dev, "%s: pfn: %s\n", __func__, dev_dbg(dev, "%s: pfn: %s\n", __func__,
rc == 0 ? dev_name(pfn_dev) : "<none>"); rc == 0 ? dev_name(pfn_dev) : "<none>");
if (rc < 0) { if (rc < 0) {
__nd_detach_ndns(pfn_dev, &nd_pfn->ndns); nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
put_device(pfn_dev); put_device(pfn_dev);
} else } else
__nd_device_register(pfn_dev); __nd_device_register(pfn_dev);
...@@ -538,7 +538,8 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, ...@@ -538,7 +538,8 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
altmap = NULL; altmap = NULL;
} else if (nd_pfn->mode == PFN_MODE_PMEM) { } else if (nd_pfn->mode == PFN_MODE_PMEM) {
nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE; nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
- offset) / PAGE_SIZE);
if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
dev_info(&nd_pfn->dev, dev_info(&nd_pfn->dev,
"number of pfns truncated from %lld to %ld\n", "number of pfns truncated from %lld to %ld\n",
...@@ -625,7 +626,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) ...@@ -625,7 +626,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
*/ */
start += start_pad; start += start_pad;
size = resource_size(&nsio->res); size = resource_size(&nsio->res);
npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K)
/ PAGE_SIZE);
if (nd_pfn->mode == PFN_MODE_PMEM) { if (nd_pfn->mode == PFN_MODE_PMEM) {
/* /*
* vmemmap_populate_hugepages() allocates the memmap array in * vmemmap_populate_hugepages() allocates the memmap array in
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pmem.h> #include <linux/pmem.h>
#include <linux/dax.h>
#include <linux/nd.h> #include <linux/nd.h>
#include "pmem.h" #include "pmem.h"
#include "pfn.h" #include "pfn.h"
...@@ -89,7 +90,7 @@ static int read_pmem(struct page *page, unsigned int off, ...@@ -89,7 +90,7 @@ static int read_pmem(struct page *page, unsigned int off,
int rc; int rc;
void *mem = kmap_atomic(page); void *mem = kmap_atomic(page);
rc = memcpy_from_pmem(mem + off, pmem_addr, len); rc = memcpy_mcsafe(mem + off, pmem_addr, len);
kunmap_atomic(mem); kunmap_atomic(mem);
if (rc) if (rc)
return -EIO; return -EIO;
...@@ -200,13 +201,13 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, ...@@ -200,13 +201,13 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
} }
/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
__weak long pmem_direct_access(struct block_device *bdev, sector_t sector, __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
struct pmem_device *pmem = bdev->bd_queue->queuedata; resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
resource_size_t offset = sector * 512 + pmem->data_offset;
if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
PFN_PHYS(nr_pages))))
return -EIO; return -EIO;
*kaddr = pmem->virt_addr + offset; *kaddr = pmem->virt_addr + offset;
*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
...@@ -216,17 +217,28 @@ __weak long pmem_direct_access(struct block_device *bdev, sector_t sector, ...@@ -216,17 +217,28 @@ __weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
* requested range. * requested range.
*/ */
if (unlikely(pmem->bb.count)) if (unlikely(pmem->bb.count))
return size; return nr_pages;
return pmem->size - pmem->pfn_pad - offset; return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
} }
static const struct block_device_operations pmem_fops = { static const struct block_device_operations pmem_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.rw_page = pmem_rw_page, .rw_page = pmem_rw_page,
.direct_access = pmem_direct_access,
.revalidate_disk = nvdimm_revalidate_disk, .revalidate_disk = nvdimm_revalidate_disk,
}; };
static long pmem_dax_direct_access(struct dax_device *dax_dev,
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
{
struct pmem_device *pmem = dax_get_private(dax_dev);
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
}
static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
};
static void pmem_release_queue(void *q) static void pmem_release_queue(void *q)
{ {
blk_cleanup_queue(q); blk_cleanup_queue(q);
...@@ -237,10 +249,14 @@ static void pmem_freeze_queue(void *q) ...@@ -237,10 +249,14 @@ static void pmem_freeze_queue(void *q)
blk_freeze_queue_start(q); blk_freeze_queue_start(q);
} }
static void pmem_release_disk(void *disk) static void pmem_release_disk(void *__pmem)
{ {
del_gendisk(disk); struct pmem_device *pmem = __pmem;
put_disk(disk);
kill_dax(pmem->dax_dev);
put_dax(pmem->dax_dev);
del_gendisk(pmem->disk);
put_disk(pmem->disk);
} }
static int pmem_attach_disk(struct device *dev, static int pmem_attach_disk(struct device *dev,
...@@ -251,6 +267,7 @@ static int pmem_attach_disk(struct device *dev, ...@@ -251,6 +267,7 @@ static int pmem_attach_disk(struct device *dev,
struct vmem_altmap __altmap, *altmap = NULL; struct vmem_altmap __altmap, *altmap = NULL;
struct resource *res = &nsio->res; struct resource *res = &nsio->res;
struct nd_pfn *nd_pfn = NULL; struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev;
int nid = dev_to_node(dev); int nid = dev_to_node(dev);
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem; struct pmem_device *pmem;
...@@ -334,6 +351,7 @@ static int pmem_attach_disk(struct device *dev, ...@@ -334,6 +351,7 @@ static int pmem_attach_disk(struct device *dev,
disk = alloc_disk_node(0, nid); disk = alloc_disk_node(0, nid);
if (!disk) if (!disk)
return -ENOMEM; return -ENOMEM;
pmem->disk = disk;
disk->fops = &pmem_fops; disk->fops = &pmem_fops;
disk->queue = q; disk->queue = q;
...@@ -345,9 +363,16 @@ static int pmem_attach_disk(struct device *dev, ...@@ -345,9 +363,16 @@ static int pmem_attach_disk(struct device *dev,
return -ENOMEM; return -ENOMEM;
nvdimm_badblocks_populate(nd_region, &pmem->bb, res); nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
disk->bb = &pmem->bb; disk->bb = &pmem->bb;
device_add_disk(dev, disk);
if (devm_add_action_or_reset(dev, pmem_release_disk, disk)) dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
if (!dax_dev) {
put_disk(disk);
return -ENOMEM;
}
pmem->dax_dev = dax_dev;
device_add_disk(dev, disk);
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM; return -ENOMEM;
revalidate_disk(disk); revalidate_disk(disk);
...@@ -397,12 +422,12 @@ static void nd_pmem_shutdown(struct device *dev) ...@@ -397,12 +422,12 @@ static void nd_pmem_shutdown(struct device *dev)
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
{ {
struct pmem_device *pmem = dev_get_drvdata(dev); struct nd_region *nd_region;
struct nd_region *nd_region = to_region(pmem);
resource_size_t offset = 0, end_trunc = 0; resource_size_t offset = 0, end_trunc = 0;
struct nd_namespace_common *ndns; struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio; struct nd_namespace_io *nsio;
struct resource res; struct resource res;
struct badblocks *bb;
if (event != NVDIMM_REVALIDATE_POISON) if (event != NVDIMM_REVALIDATE_POISON)
return; return;
...@@ -411,20 +436,33 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) ...@@ -411,20 +436,33 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
struct nd_btt *nd_btt = to_nd_btt(dev); struct nd_btt *nd_btt = to_nd_btt(dev);
ndns = nd_btt->ndns; ndns = nd_btt->ndns;
} else if (is_nd_pfn(dev)) { nd_region = to_nd_region(ndns->dev.parent);
struct nd_pfn *nd_pfn = to_nd_pfn(dev); nsio = to_nd_namespace_io(&ndns->dev);
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; bb = &nsio->bb;
} else {
struct pmem_device *pmem = dev_get_drvdata(dev);
ndns = nd_pfn->ndns; nd_region = to_region(pmem);
offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad); bb = &pmem->bb;
end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
} else if (is_nd_pfn(dev)) {
ndns = to_ndns(dev); struct nd_pfn *nd_pfn = to_nd_pfn(dev);
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
ndns = nd_pfn->ndns;
offset = pmem->data_offset +
__le32_to_cpu(pfn_sb->start_pad);
end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
} else {
ndns = to_ndns(dev);
}
nsio = to_nd_namespace_io(&ndns->dev);
}
nsio = to_nd_namespace_io(&ndns->dev);
res.start = nsio->res.start + offset; res.start = nsio->res.start + offset;
res.end = nsio->res.end - end_trunc; res.end = nsio->res.end - end_trunc;
nvdimm_badblocks_populate(nd_region, &pmem->bb, &res); nvdimm_badblocks_populate(nd_region, bb, &res);
} }
MODULE_ALIAS("pmem"); MODULE_ALIAS("pmem");
......
...@@ -5,8 +5,6 @@ ...@@ -5,8 +5,6 @@
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/fs.h> #include <linux/fs.h>
long pmem_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, pfn_t *pfn, long size);
/* this definition is in it's own header for tools/testing/nvdimm to consume */ /* this definition is in it's own header for tools/testing/nvdimm to consume */
struct pmem_device { struct pmem_device {
/* One contiguous memory region per device */ /* One contiguous memory region per device */
...@@ -20,5 +18,10 @@ struct pmem_device { ...@@ -20,5 +18,10 @@ struct pmem_device {
/* trim size when namespace capacity has been section aligned */ /* trim size when namespace capacity has been section aligned */
u32 pfn_pad; u32 pfn_pad;
struct badblocks bb; struct badblocks bb;
struct dax_device *dax_dev;
struct gendisk *disk;
}; };
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn);
#endif /* __NVDIMM_PMEM_H__ */ #endif /* __NVDIMM_PMEM_H__ */
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/nd.h> #include <linux/nd.h>
#include "nd-core.h"
#include "nd.h" #include "nd.h"
static int nd_region_probe(struct device *dev) static int nd_region_probe(struct device *dev)
...@@ -52,6 +53,17 @@ static int nd_region_probe(struct device *dev) ...@@ -52,6 +53,17 @@ static int nd_region_probe(struct device *dev)
if (rc && err && rc == err) if (rc && err && rc == err)
return -ENODEV; return -ENODEV;
if (is_nd_pmem(&nd_region->dev)) {
struct resource ndr_res;
if (devm_init_badblocks(dev, &nd_region->bb))
return -ENODEV;
ndr_res.start = nd_region->ndr_start;
ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1;
nvdimm_badblocks_populate(nd_region,
&nd_region->bb, &ndr_res);
}
nd_region->btt_seed = nd_btt_create(nd_region); nd_region->btt_seed = nd_btt_create(nd_region);
nd_region->pfn_seed = nd_pfn_create(nd_region); nd_region->pfn_seed = nd_pfn_create(nd_region);
nd_region->dax_seed = nd_dax_create(nd_region); nd_region->dax_seed = nd_dax_create(nd_region);
...@@ -104,6 +116,18 @@ static int child_notify(struct device *dev, void *data) ...@@ -104,6 +116,18 @@ static int child_notify(struct device *dev, void *data)
static void nd_region_notify(struct device *dev, enum nvdimm_event event) static void nd_region_notify(struct device *dev, enum nvdimm_event event)
{ {
if (event == NVDIMM_REVALIDATE_POISON) {
struct nd_region *nd_region = to_nd_region(dev);
struct resource res;
if (is_nd_pmem(&nd_region->dev)) {
res.start = nd_region->ndr_start;
res.end = nd_region->ndr_start +
nd_region->ndr_size - 1;
nvdimm_badblocks_populate(nd_region,
&nd_region->bb, &res);
}
}
device_for_each_child(dev, &event, child_notify); device_for_each_child(dev, &event, child_notify);
} }
......
...@@ -222,7 +222,7 @@ int nd_region_to_nstype(struct nd_region *nd_region) ...@@ -222,7 +222,7 @@ int nd_region_to_nstype(struct nd_region *nd_region)
struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm; struct nvdimm *nvdimm = nd_mapping->nvdimm;
if (nvdimm->flags & NDD_ALIASING) if (test_bit(NDD_ALIASING, &nvdimm->flags))
alias++; alias++;
} }
if (alias) if (alias)
...@@ -255,6 +255,35 @@ static ssize_t size_show(struct device *dev, ...@@ -255,6 +255,35 @@ static ssize_t size_show(struct device *dev,
} }
static DEVICE_ATTR_RO(size); static DEVICE_ATTR_RO(size);
static ssize_t deep_flush_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
/*
* NOTE: in the nvdimm_has_flush() error case this attribute is
* not visible.
*/
return sprintf(buf, "%d\n", nvdimm_has_flush(nd_region));
}
static ssize_t deep_flush_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
bool flush;
int rc = strtobool(buf, &flush);
struct nd_region *nd_region = to_nd_region(dev);
if (rc)
return rc;
if (!flush)
return -EINVAL;
nvdimm_flush(nd_region);
return len;
}
static DEVICE_ATTR_RW(deep_flush);
static ssize_t mappings_show(struct device *dev, static ssize_t mappings_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
...@@ -448,6 +477,25 @@ static ssize_t read_only_store(struct device *dev, ...@@ -448,6 +477,25 @@ static ssize_t read_only_store(struct device *dev,
} }
static DEVICE_ATTR_RW(read_only); static DEVICE_ATTR_RW(read_only);
static ssize_t region_badblocks_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
return badblocks_show(&nd_region->bb, buf, 0);
}
static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL);
static ssize_t resource_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
return sprintf(buf, "%#llx\n", nd_region->ndr_start);
}
static DEVICE_ATTR_RO(resource);
static struct attribute *nd_region_attributes[] = { static struct attribute *nd_region_attributes[] = {
&dev_attr_size.attr, &dev_attr_size.attr,
&dev_attr_nstype.attr, &dev_attr_nstype.attr,
...@@ -455,11 +503,14 @@ static struct attribute *nd_region_attributes[] = { ...@@ -455,11 +503,14 @@ static struct attribute *nd_region_attributes[] = {
&dev_attr_btt_seed.attr, &dev_attr_btt_seed.attr,
&dev_attr_pfn_seed.attr, &dev_attr_pfn_seed.attr,
&dev_attr_dax_seed.attr, &dev_attr_dax_seed.attr,
&dev_attr_deep_flush.attr,
&dev_attr_read_only.attr, &dev_attr_read_only.attr,
&dev_attr_set_cookie.attr, &dev_attr_set_cookie.attr,
&dev_attr_available_size.attr, &dev_attr_available_size.attr,
&dev_attr_namespace_seed.attr, &dev_attr_namespace_seed.attr,
&dev_attr_init_namespaces.attr, &dev_attr_init_namespaces.attr,
&dev_attr_badblocks.attr,
&dev_attr_resource.attr,
NULL, NULL,
}; };
...@@ -476,6 +527,23 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) ...@@ -476,6 +527,23 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr)
return 0; return 0;
if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr)
return 0;
if (!is_nd_pmem(dev) && a == &dev_attr_resource.attr)
return 0;
if (a == &dev_attr_deep_flush.attr) {
int has_flush = nvdimm_has_flush(nd_region);
if (has_flush == 1)
return a->mode;
else if (has_flush == 0)
return 0444;
else
return 0;
}
if (a != &dev_attr_set_cookie.attr if (a != &dev_attr_set_cookie.attr
&& a != &dev_attr_available_size.attr) && a != &dev_attr_available_size.attr)
return a->mode; return a->mode;
...@@ -813,7 +881,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, ...@@ -813,7 +881,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
return NULL; return NULL;
} }
if (nvdimm->flags & NDD_UNARMED) if (test_bit(NDD_UNARMED, &nvdimm->flags))
ro = 1; ro = 1;
} }
...@@ -968,17 +1036,20 @@ EXPORT_SYMBOL_GPL(nvdimm_flush); ...@@ -968,17 +1036,20 @@ EXPORT_SYMBOL_GPL(nvdimm_flush);
*/ */
int nvdimm_has_flush(struct nd_region *nd_region) int nvdimm_has_flush(struct nd_region *nd_region)
{ {
struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
int i; int i;
/* no nvdimm == flushing capability unknown */ /* no nvdimm == flushing capability unknown */
if (nd_region->ndr_mappings == 0) if (nd_region->ndr_mappings == 0)
return -ENXIO; return -ENXIO;
for (i = 0; i < nd_region->ndr_mappings; i++) for (i = 0; i < nd_region->ndr_mappings; i++) {
/* flush hints present, flushing required */ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
if (ndrd_get_flush_wpq(ndrd, i, 0)) struct nvdimm *nvdimm = nd_mapping->nvdimm;
/* flush hints present / available */
if (nvdimm->num_flush)
return 1; return 1;
}
/* /*
* The platform defines dimm devices without hints, assume * The platform defines dimm devices without hints, assume
......
...@@ -14,6 +14,7 @@ config BLK_DEV_XPRAM ...@@ -14,6 +14,7 @@ config BLK_DEV_XPRAM
config DCSSBLK config DCSSBLK
def_tristate m def_tristate m
select DAX
prompt "DCSSBLK support" prompt "DCSSBLK support"
depends on S390 && BLOCK depends on S390 && BLOCK
help help
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/dax.h>
#include <asm/extmem.h> #include <asm/extmem.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -30,8 +31,8 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); ...@@ -30,8 +31,8 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode);
static void dcssblk_release(struct gendisk *disk, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode);
static blk_qc_t dcssblk_make_request(struct request_queue *q, static blk_qc_t dcssblk_make_request(struct request_queue *q,
struct bio *bio); struct bio *bio);
static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size); long nr_pages, void **kaddr, pfn_t *pfn);
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
...@@ -40,7 +41,10 @@ static const struct block_device_operations dcssblk_devops = { ...@@ -40,7 +41,10 @@ static const struct block_device_operations dcssblk_devops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = dcssblk_open, .open = dcssblk_open,
.release = dcssblk_release, .release = dcssblk_release,
.direct_access = dcssblk_direct_access, };
static const struct dax_operations dcssblk_dax_ops = {
.direct_access = dcssblk_dax_direct_access,
}; };
struct dcssblk_dev_info { struct dcssblk_dev_info {
...@@ -57,6 +61,7 @@ struct dcssblk_dev_info { ...@@ -57,6 +61,7 @@ struct dcssblk_dev_info {
struct request_queue *dcssblk_queue; struct request_queue *dcssblk_queue;
int num_of_segments; int num_of_segments;
struct list_head seg_list; struct list_head seg_list;
struct dax_device *dax_dev;
}; };
struct segment_info { struct segment_info {
...@@ -389,6 +394,8 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch ...@@ -389,6 +394,8 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch
} }
list_del(&dev_info->lh); list_del(&dev_info->lh);
kill_dax(dev_info->dax_dev);
put_dax(dev_info->dax_dev);
del_gendisk(dev_info->gd); del_gendisk(dev_info->gd);
blk_cleanup_queue(dev_info->dcssblk_queue); blk_cleanup_queue(dev_info->dcssblk_queue);
dev_info->gd->queue = NULL; dev_info->gd->queue = NULL;
...@@ -654,6 +661,13 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char ...@@ -654,6 +661,13 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
if (rc) if (rc)
goto put_dev; goto put_dev;
dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name,
&dcssblk_dax_ops);
if (!dev_info->dax_dev) {
rc = -ENOMEM;
goto put_dev;
}
get_device(&dev_info->dev); get_device(&dev_info->dev);
device_add_disk(&dev_info->dev, dev_info->gd); device_add_disk(&dev_info->dev, dev_info->gd);
...@@ -752,6 +766,8 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch ...@@ -752,6 +766,8 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
} }
list_del(&dev_info->lh); list_del(&dev_info->lh);
kill_dax(dev_info->dax_dev);
put_dax(dev_info->dax_dev);
del_gendisk(dev_info->gd); del_gendisk(dev_info->gd);
blk_cleanup_queue(dev_info->dcssblk_queue); blk_cleanup_queue(dev_info->dcssblk_queue);
dev_info->gd->queue = NULL; dev_info->gd->queue = NULL;
...@@ -883,21 +899,26 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) ...@@ -883,21 +899,26 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
} }
static long static long
dcssblk_direct_access (struct block_device *bdev, sector_t secnum, __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
struct dcssblk_dev_info *dev_info; resource_size_t offset = pgoff * PAGE_SIZE;
unsigned long offset, dev_sz; unsigned long dev_sz;
dev_info = bdev->bd_disk->private_data;
if (!dev_info)
return -ENODEV;
dev_sz = dev_info->end - dev_info->start + 1; dev_sz = dev_info->end - dev_info->start + 1;
offset = secnum * 512;
*kaddr = (void *) dev_info->start + offset; *kaddr = (void *) dev_info->start + offset;
*pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
return dev_sz - offset; return (dev_sz - offset) / PAGE_SIZE;
}
static long
dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
struct dcssblk_dev_info *dev_info = dax_get_private(dax_dev);
return __dcssblk_direct_access(dev_info, pgoff, nr_pages, kaddr, pfn);
} }
static void static void
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/blkpg.h> #include <linux/blkpg.h>
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/dax.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
...@@ -716,50 +717,18 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, ...@@ -716,50 +717,18 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
} }
EXPORT_SYMBOL_GPL(bdev_write_page); EXPORT_SYMBOL_GPL(bdev_write_page);
/** int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
* bdev_direct_access() - Get the address for directly-accessibly memory pgoff_t *pgoff)
* @bdev: The device containing the memory
* @dax: control and output parameters for ->direct_access
*
* If a block device is made up of directly addressable memory, this function
* will tell the caller the PFN and the address of the memory. The address
* may be directly dereferenced within the kernel without the need to call
* ioremap(), kmap() or similar. The PFN is suitable for inserting into
* page tables.
*
* Return: negative errno if an error occurs, otherwise the number of bytes
* accessible at this address.
*/
long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax)
{ {
sector_t sector = dax->sector; phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
long avail, size = dax->size;
const struct block_device_operations *ops = bdev->bd_disk->fops;
/* if (pgoff)
* The device driver is allowed to sleep, in order to make the *pgoff = PHYS_PFN(phys_off);
* memory directly accessible. if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
*/
might_sleep();
if (size < 0)
return size;
if (!blk_queue_dax(bdev_get_queue(bdev)) || !ops->direct_access)
return -EOPNOTSUPP;
if ((sector + DIV_ROUND_UP(size, 512)) >
part_nr_sects_read(bdev->bd_part))
return -ERANGE;
sector += get_start_sect(bdev);
if (sector % (PAGE_SIZE / 512))
return -EINVAL; return -EINVAL;
avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn, size); return 0;
if (!avail)
return -ERANGE;
if (avail > 0 && avail & ~PAGE_MASK)
return -ENXIO;
return min(avail, size);
} }
EXPORT_SYMBOL_GPL(bdev_direct_access); EXPORT_SYMBOL(bdev_dax_pgoff);
/** /**
* bdev_dax_supported() - Check if the device supports dax for filesystem * bdev_dax_supported() - Check if the device supports dax for filesystem
...@@ -773,62 +742,46 @@ EXPORT_SYMBOL_GPL(bdev_direct_access); ...@@ -773,62 +742,46 @@ EXPORT_SYMBOL_GPL(bdev_direct_access);
*/ */
int bdev_dax_supported(struct super_block *sb, int blocksize) int bdev_dax_supported(struct super_block *sb, int blocksize)
{ {
struct blk_dax_ctl dax = { struct block_device *bdev = sb->s_bdev;
.sector = 0, struct dax_device *dax_dev;
.size = PAGE_SIZE, pgoff_t pgoff;
}; int err, id;
int err; void *kaddr;
pfn_t pfn;
long len;
if (blocksize != PAGE_SIZE) { if (blocksize != PAGE_SIZE) {
vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax"); vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax");
return -EINVAL; return -EINVAL;
} }
err = bdev_direct_access(sb->s_bdev, &dax); err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
if (err < 0) { if (err) {
switch (err) { vfs_msg(sb, KERN_ERR, "error: unaligned partition for dax");
case -EOPNOTSUPP:
vfs_msg(sb, KERN_ERR,
"error: device does not support dax");
break;
case -EINVAL:
vfs_msg(sb, KERN_ERR,
"error: unaligned partition for dax");
break;
default:
vfs_msg(sb, KERN_ERR,
"error: dax access failed (%d)", err);
}
return err; return err;
} }
return 0; dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
} if (!dax_dev) {
EXPORT_SYMBOL_GPL(bdev_dax_supported); vfs_msg(sb, KERN_ERR, "error: device does not support dax");
return -EOPNOTSUPP;
/** }
* bdev_dax_capable() - Return if the raw device is capable for dax
* @bdev: The device for raw block device access
*/
bool bdev_dax_capable(struct block_device *bdev)
{
struct blk_dax_ctl dax = {
.size = PAGE_SIZE,
};
if (!IS_ENABLED(CONFIG_FS_DAX)) id = dax_read_lock();
return false; len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
dax_read_unlock(id);
dax.sector = 0; put_dax(dax_dev);
if (bdev_direct_access(bdev, &dax) < 0)
return false;
dax.sector = bdev->bd_part->nr_sects - (PAGE_SIZE / 512); if (len < 1) {
if (bdev_direct_access(bdev, &dax) < 0) vfs_msg(sb, KERN_ERR,
return false; "error: dax access failed (%ld)", len);
return len < 0 ? len : -EIO;
}
return true; return 0;
} }
EXPORT_SYMBOL_GPL(bdev_dax_supported);
/* /*
* pseudo-fs * pseudo-fs
......
This diff is collapsed.
...@@ -799,6 +799,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock, ...@@ -799,6 +799,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap) unsigned flags, struct iomap *iomap)
{ {
struct block_device *bdev;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
unsigned long first_block = offset >> blkbits; unsigned long first_block = offset >> blkbits;
unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits; unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
...@@ -812,8 +813,13 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, ...@@ -812,8 +813,13 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return ret; return ret;
iomap->flags = 0; iomap->flags = 0;
iomap->bdev = inode->i_sb->s_bdev; bdev = inode->i_sb->s_bdev;
iomap->bdev = bdev;
iomap->offset = (u64)first_block << blkbits; iomap->offset = (u64)first_block << blkbits;
if (blk_queue_dax(bdev->bd_queue))
iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
if (ret == 0) { if (ret == 0) {
iomap->type = IOMAP_HOLE; iomap->type = IOMAP_HOLE;
...@@ -835,6 +841,7 @@ static int ...@@ -835,6 +841,7 @@ static int
ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
ssize_t written, unsigned flags, struct iomap *iomap) ssize_t written, unsigned flags, struct iomap *iomap)
{ {
put_dax(iomap->dax_dev);
if (iomap->type == IOMAP_MAPPED && if (iomap->type == IOMAP_MAPPED &&
written < length && written < length &&
(flags & IOMAP_WRITE)) (flags & IOMAP_WRITE))
......
...@@ -3305,6 +3305,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait) ...@@ -3305,6 +3305,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap) unsigned flags, struct iomap *iomap)
{ {
struct block_device *bdev;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
unsigned long first_block = offset >> blkbits; unsigned long first_block = offset >> blkbits;
unsigned long last_block = (offset + length - 1) >> blkbits; unsigned long last_block = (offset + length - 1) >> blkbits;
...@@ -3373,7 +3374,12 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, ...@@ -3373,7 +3374,12 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} }
iomap->flags = 0; iomap->flags = 0;
iomap->bdev = inode->i_sb->s_bdev; bdev = inode->i_sb->s_bdev;
iomap->bdev = bdev;
if (blk_queue_dax(bdev->bd_queue))
iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
iomap->offset = first_block << blkbits; iomap->offset = first_block << blkbits;
if (ret == 0) { if (ret == 0) {
...@@ -3406,6 +3412,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, ...@@ -3406,6 +3412,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
int blkbits = inode->i_blkbits; int blkbits = inode->i_blkbits;
bool truncate = false; bool truncate = false;
put_dax(iomap->dax_dev);
if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
return 0; return 0;
......
...@@ -360,7 +360,8 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, ...@@ -360,7 +360,8 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
sector_t sector = iomap->blkno + sector_t sector = iomap->blkno +
(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9); (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
return __dax_zero_page_range(iomap->bdev, sector, offset, bytes); return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector,
offset, bytes);
} }
static loff_t static loff_t
......
...@@ -976,6 +976,7 @@ xfs_file_iomap_begin( ...@@ -976,6 +976,7 @@ xfs_file_iomap_begin(
int nimaps = 1, error = 0; int nimaps = 1, error = 0;
bool shared = false, trimmed = false; bool shared = false, trimmed = false;
unsigned lockmode; unsigned lockmode;
struct block_device *bdev;
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
...@@ -1063,6 +1064,14 @@ xfs_file_iomap_begin( ...@@ -1063,6 +1064,14 @@ xfs_file_iomap_begin(
} }
xfs_bmbt_to_iomap(ip, iomap, &imap); xfs_bmbt_to_iomap(ip, iomap, &imap);
/* optionally associate a dax device with the iomap bdev */
bdev = iomap->bdev;
if (blk_queue_dax(bdev->bd_queue))
iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
if (shared) if (shared)
iomap->flags |= IOMAP_F_SHARED; iomap->flags |= IOMAP_F_SHARED;
return 0; return 0;
...@@ -1140,6 +1149,7 @@ xfs_file_iomap_end( ...@@ -1140,6 +1149,7 @@ xfs_file_iomap_end(
unsigned flags, unsigned flags,
struct iomap *iomap) struct iomap *iomap)
{ {
put_dax(iomap->dax_dev);
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
length, written, iomap); length, written, iomap);
......
...@@ -1923,28 +1923,12 @@ static inline bool integrity_req_gap_front_merge(struct request *req, ...@@ -1923,28 +1923,12 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
#endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLK_DEV_INTEGRITY */
/**
* struct blk_dax_ctl - control and output parameters for ->direct_access
* @sector: (input) offset relative to a block_device
* @addr: (output) kernel virtual address for @sector populated by driver
* @pfn: (output) page frame number for @addr populated by driver
* @size: (input) number of bytes requested
*/
struct blk_dax_ctl {
sector_t sector;
void *addr;
long size;
pfn_t pfn;
};
struct block_device_operations { struct block_device_operations {
int (*open) (struct block_device *, fmode_t); int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t); void (*release) (struct gendisk *, fmode_t);
int (*rw_page)(struct block_device *, sector_t, struct page *, bool); int (*rw_page)(struct block_device *, sector_t, struct page *, bool);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
long);
unsigned int (*check_events) (struct gendisk *disk, unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing); unsigned int clearing);
/* ->media_changed() is DEPRECATED, use ->check_events() instead */ /* ->media_changed() is DEPRECATED, use ->check_events() instead */
...@@ -1963,9 +1947,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, ...@@ -1963,9 +1947,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_read_page(struct block_device *, sector_t, struct page *);
extern int bdev_write_page(struct block_device *, sector_t, struct page *, extern int bdev_write_page(struct block_device *, sector_t, struct page *,
struct writeback_control *); struct writeback_control *);
extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *);
extern int bdev_dax_supported(struct super_block *, int); extern int bdev_dax_supported(struct super_block *, int);
extern bool bdev_dax_capable(struct block_device *); int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
#else /* CONFIG_BLOCK */ #else /* CONFIG_BLOCK */
struct block_device; struct block_device;
......
...@@ -7,6 +7,28 @@ ...@@ -7,6 +7,28 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
struct iomap_ops; struct iomap_ops;
struct dax_device;
struct dax_operations {
/*
* direct_access: translate a device-relative
* logical-page-offset into an absolute physical pfn. Return the
* number of pages available for DAX at that pfn.
*/
long (*direct_access)(struct dax_device *, pgoff_t, long,
void **, pfn_t *);
};
int dax_read_lock(void);
void dax_read_unlock(int id);
struct dax_device *dax_get_by_host(const char *host);
struct dax_device *alloc_dax(void *private, const char *host,
const struct dax_operations *ops);
void put_dax(struct dax_device *dax_dev);
bool dax_alive(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev);
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
void **kaddr, pfn_t *pfn);
/* /*
* We use lowest available bit in exceptional entry for locking, one bit for * We use lowest available bit in exceptional entry for locking, one bit for
...@@ -48,17 +70,13 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, ...@@ -48,17 +70,13 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, void *entry, bool wake_all); pgoff_t index, void *entry, bool wake_all);
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n); int __dax_zero_page_range(struct block_device *bdev,
int __dax_zero_page_range(struct block_device *bdev, sector_t sector, struct dax_device *dax_dev, sector_t sector,
unsigned int offset, unsigned int length); unsigned int offset, unsigned int length);
#else #else
static inline struct page *read_dax_sector(struct block_device *bdev,
sector_t n)
{
return ERR_PTR(-ENXIO);
}
static inline int __dax_zero_page_range(struct block_device *bdev, static inline int __dax_zero_page_range(struct block_device *bdev,
sector_t sector, unsigned int offset, unsigned int length) struct dax_device *dax_dev, sector_t sector,
unsigned int offset, unsigned int length)
{ {
return -ENXIO; return -ENXIO;
} }
......
...@@ -130,13 +130,15 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); ...@@ -130,13 +130,15 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
* < 0 : error * < 0 : error
* >= 0 : the number of bytes accessible at the address * >= 0 : the number of bytes accessible at the address
*/ */
typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size); long nr_pages, void **kaddr, pfn_t *pfn);
#define PAGE_SECTORS (PAGE_SIZE / 512)
void dm_error(const char *message); void dm_error(const char *message);
struct dm_dev { struct dm_dev {
struct block_device *bdev; struct block_device *bdev;
struct dax_device *dax_dev;
fmode_t mode; fmode_t mode;
char name[16]; char name[16];
}; };
...@@ -178,7 +180,7 @@ struct target_type { ...@@ -178,7 +180,7 @@ struct target_type {
dm_busy_fn busy; dm_busy_fn busy;
dm_iterate_devices_fn iterate_devices; dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints; dm_io_hints_fn io_hints;
dm_direct_access_fn direct_access; dm_dax_direct_access_fn direct_access;
/* For internal device-mapper use. */ /* For internal device-mapper use. */
struct list_head list; struct list_head list;
......
...@@ -41,6 +41,7 @@ struct iomap { ...@@ -41,6 +41,7 @@ struct iomap {
u16 type; /* type of mapping */ u16 type; /* type of mapping */
u16 flags; /* flags for mapping */ u16 flags; /* flags for mapping */
struct block_device *bdev; /* block device for I/O */ struct block_device *bdev; /* block device for I/O */
struct dax_device *dax_dev; /* dax_dev for dax operations */
}; };
/* /*
......
...@@ -20,9 +20,11 @@ ...@@ -20,9 +20,11 @@
enum { enum {
/* when a dimm supports both PMEM and BLK access a label is required */ /* when a dimm supports both PMEM and BLK access a label is required */
NDD_ALIASING = 1 << 0, NDD_ALIASING = 0,
/* unarmed memory devices may not persist writes */ /* unarmed memory devices may not persist writes */
NDD_UNARMED = 1 << 1, NDD_UNARMED = 1,
/* locked memory devices should not be accessed */
NDD_LOCKED = 2,
/* need to set a limit somewhere, but yes, this is likely overkill */ /* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M, ND_IOCTL_MAX_BUFLEN = SZ_4M,
...@@ -120,7 +122,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( ...@@ -120,7 +122,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
} }
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
phys_addr_t start, unsigned int len); phys_addr_t start, unsigned int len);
struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc); struct nvdimm_bus_descriptor *nfit_desc);
......
...@@ -31,12 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) ...@@ -31,12 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
BUG(); BUG();
} }
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
BUG();
return -EFAULT;
}
static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i) struct iov_iter *i)
{ {
...@@ -65,23 +59,6 @@ static inline bool arch_has_pmem_api(void) ...@@ -65,23 +59,6 @@ static inline bool arch_has_pmem_api(void)
return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
} }
/*
* memcpy_from_pmem - read from persistent memory with error handling
* @dst: destination buffer
* @src: source buffer
* @size: transfer length
*
* Returns 0 on success negative error code on failure.
*/
static inline int memcpy_from_pmem(void *dst, void const *src, size_t size)
{
if (arch_has_pmem_api())
return arch_memcpy_from_pmem(dst, src, size);
else
memcpy(dst, src, size);
return 0;
}
/** /**
* memcpy_to_pmem - copy data to persistent memory * memcpy_to_pmem - copy data to persistent memory
* @dst: destination buffer for the copy * @dst: destination buffer for the copy
......
...@@ -114,6 +114,14 @@ extern int memcmp(const void *,const void *,__kernel_size_t); ...@@ -114,6 +114,14 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
#ifndef __HAVE_ARCH_MEMCHR #ifndef __HAVE_ARCH_MEMCHR
extern void * memchr(const void *,int,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t);
#endif #endif
#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
static inline __must_check int memcpy_mcsafe(void *dst, const void *src,
size_t cnt)
{
memcpy(dst, src, cnt);
return 0;
}
#endif
void *memchr_inv(const void *s, int c, size_t n); void *memchr_inv(const void *s, int c, size_t n);
char *strreplace(char *s, char old, char new); char *strreplace(char *s, char old, char new);
......
...@@ -169,6 +169,7 @@ enum { ...@@ -169,6 +169,7 @@ enum {
enum { enum {
ND_ARS_VOLATILE = 1, ND_ARS_VOLATILE = 1,
ND_ARS_PERSISTENT = 2, ND_ARS_PERSISTENT = 2,
ND_CONFIG_LOCKED = 1,
}; };
static inline const char *nvdimm_bus_cmd_name(unsigned cmd) static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
......
...@@ -28,7 +28,10 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o ...@@ -28,7 +28,10 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_ACPI_NFIT) += nfit.o obj-$(CONFIG_ACPI_NFIT) += nfit.o
obj-$(CONFIG_DEV_DAX) += dax.o ifeq ($(CONFIG_DAX),m)
obj-$(CONFIG_DAX) += dax.o
endif
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
nfit-y := $(ACPI_SRC)/core.o nfit-y := $(ACPI_SRC)/core.o
...@@ -48,9 +51,13 @@ nd_blk-y += config_check.o ...@@ -48,9 +51,13 @@ nd_blk-y += config_check.o
nd_e820-y := $(NVDIMM_SRC)/e820.o nd_e820-y := $(NVDIMM_SRC)/e820.o
nd_e820-y += config_check.o nd_e820-y += config_check.o
dax-y := $(DAX_SRC)/dax.o dax-y := $(DAX_SRC)/super.o
dax-y += config_check.o dax-y += config_check.o
device_dax-y := $(DAX_SRC)/device.o
device_dax-y += dax-dev.o
device_dax-y += config_check.o
dax_pmem-y := $(DAX_SRC)/pmem.o dax_pmem-y := $(DAX_SRC)/pmem.o
dax_pmem-y += config_check.o dax_pmem-y += config_check.o
......
/*
* Copyright (c) 2016, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include "test/nfit_test.h"
#include <linux/mm.h>
#include "../../../drivers/dax/dax-private.h"
phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size)
{
struct resource *res;
phys_addr_t addr;
int i;
for (i = 0; i < dev_dax->num_resources; i++) {
res = &dev_dax->res[i];
addr = pgoff * PAGE_SIZE + res->start;
if (addr >= res->start && addr <= res->end)
break;
pgoff -= PHYS_PFN(resource_size(res));
}
if (i < dev_dax->num_resources) {
res = &dev_dax->res[i];
if (addr + size - 1 <= res->end) {
if (get_nfit_res(addr)) {
struct page *page;
if (dev_dax->region->align > PAGE_SIZE)
return -1;
page = vmalloc_to_page((void *)addr);
return PFN_PHYS(page_to_pfn(page));
} else
return addr;
}
}
return -1;
}
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
#include <pmem.h> #include <pmem.h>
#include <nd.h> #include <nd.h>
long pmem_direct_access(struct block_device *bdev, sector_t sector, long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
struct pmem_device *pmem = bdev->bd_queue->queuedata; resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
resource_size_t offset = sector * 512 + pmem->data_offset;
if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
PFN_PHYS(nr_pages))))
return -EIO; return -EIO;
/* /*
...@@ -34,11 +34,10 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, ...@@ -34,11 +34,10 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector,
*kaddr = pmem->virt_addr + offset; *kaddr = pmem->virt_addr + offset;
page = vmalloc_to_page(pmem->virt_addr + offset); page = vmalloc_to_page(pmem->virt_addr + offset);
*pfn = page_to_pfn_t(page); *pfn = page_to_pfn_t(page);
dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent, pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
"%s: sector: %#llx pfn: %#lx\n", __func__, __func__, pmem, pgoff, page_to_pfn(page));
(unsigned long long) sector, page_to_pfn(page));
return PAGE_SIZE; return 1;
} }
*kaddr = pmem->virt_addr + offset; *kaddr = pmem->virt_addr + offset;
...@@ -49,6 +48,6 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, ...@@ -49,6 +48,6 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector,
* requested range. * requested range.
*/ */
if (unlikely(pmem->bb.count)) if (unlikely(pmem->bb.count))
return size; return nr_pages;
return pmem->size - pmem->pfn_pad - offset; return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
} }
...@@ -132,6 +132,7 @@ static u32 handle[] = { ...@@ -132,6 +132,7 @@ static u32 handle[] = {
[3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1),
[4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
[5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0), [5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0),
[6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1),
}; };
static unsigned long dimm_fail_cmd_flags[NUM_DCR]; static unsigned long dimm_fail_cmd_flags[NUM_DCR];
...@@ -728,8 +729,8 @@ static int nfit_test0_alloc(struct nfit_test *t) ...@@ -728,8 +729,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
static int nfit_test1_alloc(struct nfit_test *t) static int nfit_test1_alloc(struct nfit_test *t)
{ {
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+ sizeof(struct acpi_nfit_memory_map) + sizeof(struct acpi_nfit_memory_map) * 2
+ offsetof(struct acpi_nfit_control_region, window_size); + offsetof(struct acpi_nfit_control_region, window_size) * 2;
int i; int i;
t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
...@@ -906,6 +907,7 @@ static void nfit_test0_setup(struct nfit_test *t) ...@@ -906,6 +907,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0; memdev->address = 0;
memdev->interleave_index = 0; memdev->interleave_index = 0;
memdev->interleave_ways = 2; memdev->interleave_ways = 2;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
/* mem-region2 (spa1, dimm0) */ /* mem-region2 (spa1, dimm0) */
memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2;
...@@ -921,6 +923,7 @@ static void nfit_test0_setup(struct nfit_test *t) ...@@ -921,6 +923,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = SPA0_SIZE/2; memdev->address = SPA0_SIZE/2;
memdev->interleave_index = 0; memdev->interleave_index = 0;
memdev->interleave_ways = 4; memdev->interleave_ways = 4;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
/* mem-region3 (spa1, dimm1) */ /* mem-region3 (spa1, dimm1) */
memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3;
...@@ -951,6 +954,7 @@ static void nfit_test0_setup(struct nfit_test *t) ...@@ -951,6 +954,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = SPA0_SIZE/2; memdev->address = SPA0_SIZE/2;
memdev->interleave_index = 0; memdev->interleave_index = 0;
memdev->interleave_ways = 4; memdev->interleave_ways = 4;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
/* mem-region5 (spa1, dimm3) */ /* mem-region5 (spa1, dimm3) */
memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5;
...@@ -1086,6 +1090,7 @@ static void nfit_test0_setup(struct nfit_test *t) ...@@ -1086,6 +1090,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0; memdev->address = 0;
memdev->interleave_index = 0; memdev->interleave_index = 0;
memdev->interleave_ways = 1; memdev->interleave_ways = 1;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
/* dcr-descriptor0: blk */ /* dcr-descriptor0: blk */
...@@ -1384,6 +1389,7 @@ static void nfit_test0_setup(struct nfit_test *t) ...@@ -1384,6 +1389,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0; memdev->address = 0;
memdev->interleave_index = 0; memdev->interleave_index = 0;
memdev->interleave_ways = 1; memdev->interleave_ways = 1;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
/* mem-region16 (spa/bdw4, dimm4) */ /* mem-region16 (spa/bdw4, dimm4) */
memdev = nfit_buf + offset + memdev = nfit_buf + offset +
...@@ -1486,6 +1492,34 @@ static void nfit_test1_setup(struct nfit_test *t) ...@@ -1486,6 +1492,34 @@ static void nfit_test1_setup(struct nfit_test *t)
dcr->code = NFIT_FIC_BYTE; dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0; dcr->windows = 0;
offset += dcr->header.length;
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[6];
memdev->physical_id = 0;
memdev->region_id = 0;
memdev->range_index = 0;
memdev->region_index = 0+2;
memdev->region_size = SPA2_SIZE;
memdev->region_offset = 0;
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
/* dcr-descriptor1 */
offset += sizeof(*memdev);
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 0+2;
dcr_common_init(dcr);
dcr->serial_number = ~handle[6];
dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0;
post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE);
acpi_desc = &t->acpi_desc; acpi_desc = &t->acpi_desc;
...@@ -1817,6 +1851,10 @@ static int nfit_test_probe(struct platform_device *pdev) ...@@ -1817,6 +1851,10 @@ static int nfit_test_probe(struct platform_device *pdev)
if (rc) if (rc)
return rc; return rc;
rc = devm_add_action_or_reset(&pdev->dev, acpi_nfit_shutdown, acpi_desc);
if (rc)
return rc;
if (nfit_test->setup != nfit_test0_setup) if (nfit_test->setup != nfit_test0_setup)
return 0; return 0;
...@@ -1907,7 +1945,7 @@ static __init int nfit_test_init(void) ...@@ -1907,7 +1945,7 @@ static __init int nfit_test_init(void)
case 1: case 1:
nfit_test->num_pm = 1; nfit_test->num_pm = 1;
nfit_test->dcr_idx = NUM_DCR; nfit_test->dcr_idx = NUM_DCR;
nfit_test->num_dcr = 1; nfit_test->num_dcr = 2;
nfit_test->alloc = nfit_test1_alloc; nfit_test->alloc = nfit_test1_alloc;
nfit_test->setup = nfit_test1_setup; nfit_test->setup = nfit_test1_setup;
break; break;
...@@ -1924,6 +1962,7 @@ static __init int nfit_test_init(void) ...@@ -1924,6 +1962,7 @@ static __init int nfit_test_init(void)
put_device(&pdev->dev); put_device(&pdev->dev);
goto err_register; goto err_register;
} }
get_device(&pdev->dev);
rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (rc) if (rc)
...@@ -1942,6 +1981,10 @@ static __init int nfit_test_init(void) ...@@ -1942,6 +1981,10 @@ static __init int nfit_test_init(void)
if (instances[i]) if (instances[i])
platform_device_unregister(&instances[i]->pdev); platform_device_unregister(&instances[i]->pdev);
nfit_test_teardown(); nfit_test_teardown();
for (i = 0; i < NUM_NFITS; i++)
if (instances[i])
put_device(&instances[i]->pdev.dev);
return rc; return rc;
} }
...@@ -1949,10 +1992,13 @@ static __exit void nfit_test_exit(void) ...@@ -1949,10 +1992,13 @@ static __exit void nfit_test_exit(void)
{ {
int i; int i;
platform_driver_unregister(&nfit_test_driver);
for (i = 0; i < NUM_NFITS; i++) for (i = 0; i < NUM_NFITS; i++)
platform_device_unregister(&instances[i]->pdev); platform_device_unregister(&instances[i]->pdev);
platform_driver_unregister(&nfit_test_driver);
nfit_test_teardown(); nfit_test_teardown();
for (i = 0; i < NUM_NFITS; i++)
put_device(&instances[i]->pdev.dev);
class_destroy(nfit_test_dimm); class_destroy(nfit_test_dimm);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment