Commit 8c2f7e86 authored by Dan Williams's avatar Dan Williams

libnvdimm: infrastructure for btt devices

NVDIMM namespaces, in addition to accepting "struct bio" based requests,
also have the capability to perform byte-aligned accesses.  By default
only the bio/block interface is used.  However, if another driver can
make effective use of the byte-aligned capability it can claim namespace
interface and use the byte-aligned ->rw_bytes() interface.

The BTT driver is the initial first consumer of this mechanism to allow
adding atomic sector update semantics to a pmem or blk namespace.  This
patch is the sysfs infrastructure to allow configuring a BTT instance
for a namespace.  Enabling that BTT and performing i/o is in a
subsequent patch.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 0ba1c634
......@@ -33,4 +33,7 @@ config BLK_DEV_PMEM
Say Y if you want to use an NVDIMM
config BTT
def_bool y
endif
......@@ -11,3 +11,4 @@ libnvdimm-y += region_devs.o
libnvdimm-y += region.o
libnvdimm-y += namespace_devs.o
libnvdimm-y += label.o
libnvdimm-$(CONFIG_BTT) += btt_devs.o
/*
* Block Translation Table library
* Copyright (c) 2014-2015, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _LINUX_BTT_H
#define _LINUX_BTT_H
#include <linux/types.h>
#define BTT_SIG_LEN 16
#define BTT_SIG "BTT_ARENA_INFO\0"
struct btt_sb {
u8 signature[BTT_SIG_LEN];
u8 uuid[16];
u8 parent_uuid[16];
__le32 flags;
__le16 version_major;
__le16 version_minor;
__le32 external_lbasize;
__le32 external_nlba;
__le32 internal_lbasize;
__le32 internal_nlba;
__le32 nfree;
__le32 infosize;
__le64 nextoff;
__le64 dataoff;
__le64 mapoff;
__le64 logoff;
__le64 info2off;
u8 padding[3968];
__le64 checksum;
};
#endif
This diff is collapsed.
......@@ -14,8 +14,10 @@
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/fcntl.h>
#include <linux/async.h>
#include <linux/genhd.h>
#include <linux/ndctl.h>
#include <linux/sched.h>
#include <linux/slab.h>
......@@ -103,6 +105,7 @@ static int nvdimm_bus_probe(struct device *dev)
dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
dev_name(dev), rc);
if (rc != 0)
module_put(provider);
return rc;
......@@ -163,14 +166,19 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
put_device(dev);
}
void nd_device_register(struct device *dev)
void __nd_device_register(struct device *dev)
{
dev->bus = &nvdimm_bus_type;
device_initialize(dev);
get_device(dev);
async_schedule_domain(nd_async_device_register, dev,
&nd_async_domain);
}
void nd_device_register(struct device *dev)
{
device_initialize(dev);
__nd_device_register(dev);
}
EXPORT_SYMBOL(nd_device_register);
void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
......
......@@ -666,7 +666,7 @@ static int __blk_label_update(struct nd_region *nd_region,
/* don't allow updates that consume the last label */
if (nfree - alloc < 0 || nfree - alloc + victims < 1) {
dev_info(&nsblk->dev, "insufficient label space\n");
dev_info(&nsblk->common.dev, "insufficient label space\n");
kfree(victim_map);
return -ENOSPC;
}
......@@ -762,7 +762,8 @@ static int __blk_label_update(struct nd_region *nd_region,
continue;
res = to_resource(ndd, nd_label);
res->flags &= ~DPA_RESOURCE_ADJUSTED;
dev_vdbg(&nsblk->dev, "assign label[%d] slot: %d\n", l, slot);
dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n",
l, slot);
nd_mapping->labels[l++] = nd_label;
}
nd_mapping->labels[l] = NULL;
......
This diff is collapsed.
......@@ -45,12 +45,14 @@ struct nvdimm {
bool is_nvdimm(struct device *dev);
bool is_nd_blk(struct device *dev);
bool is_nd_pmem(struct device *dev);
struct nd_btt;
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void);
void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
struct nd_region;
void nd_region_create_blk_seed(struct nd_region *nd_region);
void nd_region_create_btt_seed(struct nd_region *nd_region);
void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
......@@ -58,6 +60,7 @@ void nd_synchronize(void);
int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
void __nd_device_register(struct device *dev);
int nd_match_dimm(struct device *dev, void *data);
struct nd_label_id;
char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
......@@ -77,4 +80,5 @@ struct resource *nsblk_add_resource(struct nd_region *nd_region,
resource_size_t start);
int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
void get_ndd(struct nvdimm_drvdata *ndd);
resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
#endif /* __ND_CORE_H__ */
......@@ -19,6 +19,10 @@
#include <linux/types.h>
#include "label.h"
enum {
SECTOR_SHIFT = 9,
};
struct nvdimm_drvdata {
struct device *dev;
int nsindex_size;
......@@ -74,7 +78,9 @@ static inline struct nd_namespace_index *to_next_namespace_index(
struct nd_region {
struct device dev;
struct ida ns_ida;
struct ida btt_ida;
struct device *ns_seed;
struct device *btt_seed;
u16 ndr_mappings;
u64 ndr_size;
u64 ndr_start;
......@@ -94,6 +100,14 @@ static inline unsigned nd_inc_seq(unsigned seq)
return next[seq & 3];
}
struct nd_btt {
struct device dev;
struct nd_namespace_common *ndns;
unsigned long lbasize;
u8 *uuid;
int id;
};
enum nd_async_mode {
ND_SYNC,
ND_ASYNC,
......@@ -118,6 +132,30 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
void *buf, size_t len);
struct nd_btt *to_nd_btt(struct device *dev);
struct btt_sb;
u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
#if IS_ENABLED(CONFIG_BTT)
int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
bool is_nd_btt(struct device *dev);
struct device *nd_btt_create(struct nd_region *nd_region);
#else
static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
{
return -ENODEV;
}
static inline bool is_nd_btt(struct device *dev)
{
return false;
}
static inline struct device *nd_btt_create(struct nd_region *nd_region)
{
return NULL;
}
#endif
struct nd_region *to_nd_region(struct device *dev);
int nd_region_to_nstype(struct nd_region *nd_region);
int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
......@@ -132,4 +170,6 @@ void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id, resource_size_t start,
resource_size_t n);
resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev);
#endif /* __ND_H__ */
......@@ -121,44 +121,61 @@ static struct pmem_device *pmem_alloc(struct device *dev,
struct resource *res, int id)
{
struct pmem_device *pmem;
struct gendisk *disk;
int err;
err = -ENOMEM;
pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
if (!pmem)
goto out;
return ERR_PTR(-ENOMEM);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
err = -EINVAL;
if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) {
if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
&pmem->phys_addr, pmem->size);
goto out_free_dev;
kfree(pmem);
return ERR_PTR(-EBUSY);
}
/*
* Map the memory as non-cachable, as we can't write back the contents
* of the CPU caches in case of a crash.
*/
err = -ENOMEM;
pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
if (!pmem->virt_addr)
goto out_release_region;
if (!pmem->virt_addr) {
release_mem_region(pmem->phys_addr, pmem->size);
kfree(pmem);
return ERR_PTR(-ENXIO);
}
return pmem;
}
static void pmem_detach_disk(struct pmem_device *pmem)
{
del_gendisk(pmem->pmem_disk);
put_disk(pmem->pmem_disk);
blk_cleanup_queue(pmem->pmem_queue);
}
static int pmem_attach_disk(struct nd_namespace_common *ndns,
struct pmem_device *pmem)
{
struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
struct gendisk *disk;
pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
if (!pmem->pmem_queue)
goto out_unmap;
return -ENOMEM;
blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
blk_queue_max_hw_sectors(pmem->pmem_queue, 1024);
blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
disk = alloc_disk(0);
if (!disk)
goto out_free_queue;
if (!disk) {
blk_cleanup_queue(pmem->pmem_queue);
return -ENOMEM;
}
disk->major = pmem_major;
disk->first_minor = 0;
......@@ -166,32 +183,47 @@ static struct pmem_device *pmem_alloc(struct device *dev,
disk->private_data = pmem;
disk->queue = pmem->pmem_queue;
disk->flags = GENHD_FL_EXT_DEVT;
sprintf(disk->disk_name, "pmem%d", id);
disk->driverfs_dev = dev;
sprintf(disk->disk_name, "pmem%d", nd_region->id);
disk->driverfs_dev = &ndns->dev;
set_capacity(disk, pmem->size >> 9);
pmem->pmem_disk = disk;
add_disk(disk);
return pmem;
return 0;
}
out_free_queue:
blk_cleanup_queue(pmem->pmem_queue);
out_unmap:
iounmap(pmem->virt_addr);
out_release_region:
release_mem_region(pmem->phys_addr, pmem->size);
out_free_dev:
kfree(pmem);
out:
return ERR_PTR(err);
static int pmem_rw_bytes(struct nd_namespace_common *ndns,
resource_size_t offset, void *buf, size_t size, int rw)
{
struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
if (unlikely(offset + size > pmem->size)) {
dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
return -EFAULT;
}
if (rw == READ)
memcpy(buf, pmem->virt_addr + offset, size);
else
memcpy(pmem->virt_addr + offset, buf, size);
return 0;
}
static int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
{
/* TODO */
return -ENXIO;
}
static void nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns)
{
/* TODO */
}
static void pmem_free(struct pmem_device *pmem)
{
del_gendisk(pmem->pmem_disk);
put_disk(pmem->pmem_disk);
blk_cleanup_queue(pmem->pmem_queue);
iounmap(pmem->virt_addr);
release_mem_region(pmem->phys_addr, pmem->size);
kfree(pmem);
......@@ -200,40 +232,44 @@ static void pmem_free(struct pmem_device *pmem)
static int nd_pmem_probe(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
struct pmem_device *pmem;
int rc;
if (resource_size(&nsio->res) < ND_MIN_NAMESPACE_SIZE) {
resource_size_t size = resource_size(&nsio->res);
dev_dbg(dev, "%s: size: %pa, too small must be at least %#x\n",
__func__, &size, ND_MIN_NAMESPACE_SIZE);
return -ENODEV;
}
if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_PMEM) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
if (!nspm->uuid) {
dev_dbg(dev, "%s: uuid not set\n", __func__);
return -ENODEV;
}
}
ndns = nvdimm_namespace_common_probe(dev);
if (IS_ERR(ndns))
return PTR_ERR(ndns);
nsio = to_nd_namespace_io(&ndns->dev);
pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
if (IS_ERR(pmem))
return PTR_ERR(pmem);
dev_set_drvdata(dev, pmem);
return 0;
ndns->rw_bytes = pmem_rw_bytes;
if (is_nd_btt(dev))
rc = nvdimm_namespace_attach_btt(ndns);
else if (nd_btt_probe(ndns, pmem) == 0) {
/* we'll come back as btt-pmem */
rc = -ENXIO;
} else
rc = pmem_attach_disk(ndns, pmem);
if (rc)
pmem_free(pmem);
return rc;
}
static int nd_pmem_remove(struct device *dev)
{
struct pmem_device *pmem = dev_get_drvdata(dev);
if (is_nd_btt(dev))
nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
else
pmem_detach_disk(pmem);
pmem_free(pmem);
return 0;
}
......
......@@ -33,12 +33,13 @@ static int nd_region_probe(struct device *dev)
num_ns->count = rc + err;
dev_set_drvdata(dev, num_ns);
if (rc && err && rc == err)
return -ENODEV;
nd_region->btt_seed = nd_btt_create(nd_region);
if (err == 0)
return 0;
if (rc == err)
return -ENODEV;
/*
* Given multiple namespaces per region, we do not want to
* disable all the successfully registered peer namespaces upon
......@@ -66,6 +67,7 @@ static int nd_region_remove(struct device *dev)
/* flush attribute readers and disable */
nvdimm_bus_lock(dev);
nd_region->ns_seed = NULL;
nd_region->btt_seed = NULL;
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
......
......@@ -296,10 +296,28 @@ static ssize_t namespace_seed_show(struct device *dev,
}
static DEVICE_ATTR_RO(namespace_seed);
static ssize_t btt_seed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
ssize_t rc;
nvdimm_bus_lock(dev);
if (nd_region->btt_seed)
rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed));
else
rc = sprintf(buf, "\n");
nvdimm_bus_unlock(dev);
return rc;
}
static DEVICE_ATTR_RO(btt_seed);
static struct attribute *nd_region_attributes[] = {
&dev_attr_size.attr,
&dev_attr_nstype.attr,
&dev_attr_mappings.attr,
&dev_attr_btt_seed.attr,
&dev_attr_set_cookie.attr,
&dev_attr_available_size.attr,
&dev_attr_namespace_seed.attr,
......@@ -345,15 +363,18 @@ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
/*
* Upon successful probe/remove, take/release a reference on the
* associated interleave set (if present)
* associated interleave set (if present), and plant new btt + namespace
* seeds.
*/
static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
struct device *dev, bool probe)
{
struct nd_region *nd_region;
if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) {
struct nd_region *nd_region = to_nd_region(dev);
int i;
nd_region = to_nd_region(dev);
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = nd_mapping->ndd;
......@@ -365,14 +386,21 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
nd_mapping->ndd = NULL;
atomic_dec(&nvdimm->busy);
}
} else if (dev->parent && is_nd_blk(dev->parent) && probe) {
struct nd_region *nd_region = to_nd_region(dev->parent);
}
if (dev->parent && is_nd_blk(dev->parent) && probe) {
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->ns_seed == dev)
nd_region_create_blk_seed(nd_region);
nvdimm_bus_unlock(dev);
}
if (is_nd_btt(dev) && probe) {
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->btt_seed == dev)
nd_region_create_btt_seed(nd_region);
nvdimm_bus_unlock(dev);
}
}
void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
......@@ -546,6 +574,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
nd_region->provider_data = ndr_desc->provider_data;
nd_region->nd_set = ndr_desc->nd_set;
ida_init(&nd_region->ns_ida);
ida_init(&nd_region->btt_ida);
dev = &nd_region->dev;
dev_set_name(dev, "region%d", nd_region->id);
dev->parent = &nvdimm_bus->dev;
......
......@@ -12,6 +12,7 @@
*/
#ifndef __LINUX_ND_H__
#define __LINUX_ND_H__
#include <linux/fs.h>
#include <linux/ndctl.h>
#include <linux/device.h>
......@@ -28,13 +29,33 @@ static inline struct nd_device_driver *to_nd_device_driver(
return container_of(drv, struct nd_device_driver, drv);
};
/**
* struct nd_namespace_common - core infrastructure of a namespace
* @force_raw: ignore other personalities for the namespace (e.g. btt)
* @dev: device model node
* @claim: when set a another personality has taken ownership of the namespace
* @rw_bytes: access the raw namespace capacity with byte-aligned transfers
*/
struct nd_namespace_common {
int force_raw;
struct device dev;
struct device *claim;
int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset,
void *buf, size_t size, int rw);
};
static inline struct nd_namespace_common *to_ndns(struct device *dev)
{
return container_of(dev, struct nd_namespace_common, dev);
}
/**
* struct nd_namespace_io - infrastructure for loading an nd_pmem instance
* @dev: namespace device created by the nd region driver
* @res: struct resource conversion of a NFIT SPA table
*/
struct nd_namespace_io {
struct device dev;
struct nd_namespace_common common;
struct resource res;
};
......@@ -52,7 +73,6 @@ struct nd_namespace_pmem {
/**
* struct nd_namespace_blk - namespace for dimm-bounded persistent memory
* @dev: namespace device creation by the nd region driver
* @alt_name: namespace name supplied in the dimm label
* @uuid: namespace name supplied in the dimm label
* @id: ida allocated id
......@@ -61,7 +81,7 @@ struct nd_namespace_pmem {
* @res: discontiguous dpa extents for given dimm
*/
struct nd_namespace_blk {
struct device dev;
struct nd_namespace_common common;
char *alt_name;
u8 *uuid;
int id;
......@@ -72,7 +92,7 @@ struct nd_namespace_blk {
static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
{
return container_of(dev, struct nd_namespace_io, dev);
return container_of(dev, struct nd_namespace_io, common.dev);
}
static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
......@@ -84,7 +104,40 @@ static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev)
{
return container_of(dev, struct nd_namespace_blk, dev);
return container_of(dev, struct nd_namespace_blk, common.dev);
}
/**
* nvdimm_read_bytes() - synchronously read bytes from an nvdimm namespace
* @ndns: device to read
* @offset: namespace-relative starting offset
* @buf: buffer to fill
* @size: transfer length
*
* @buf is up-to-date upon return from this routine.
*/
static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns,
resource_size_t offset, void *buf, size_t size)
{
return ndns->rw_bytes(ndns, offset, buf, size, READ);
}
/**
* nvdimm_write_bytes() - synchronously write bytes to an nvdimm namespace
* @ndns: device to read
* @offset: namespace-relative starting offset
* @buf: buffer to drain
* @size: transfer length
*
* NVDIMM Namepaces disks do not implement sectors internally. Depending on
* the @ndns, the contents of @buf may be in cpu cache, platform buffers,
* or on backing memory media upon return from this routine. Flushing
* to media is handled internal to the @ndns driver, if at all.
*/
static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns,
resource_size_t offset, void *buf, size_t size)
{
return ndns->rw_bytes(ndns, offset, buf, size, WRITE);
}
#define MODULE_ALIAS_ND_DEVICE(type) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment