Commit c2356983 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull cxl updates from Dan Williams:
 "Compute Express Link (CXL) updates for 6.0:

   - Introduce a 'struct cxl_region' object with support for
     provisioning and assembling persistent memory regions.

   - Introduce alloc_free_mem_region() to accompany the existing
     request_free_mem_region() as a method to allocate physical memory
     capacity out of an existing resource.

   - Export insert_resource_expand_to_fit() for the CXL subsystem to
     late-publish CXL platform windows in iomem_resource.

   - Add a polled mode PCI DOE (Data Object Exchange) driver service and
     use it in cxl_pci to retrieve the CDAT (Coherent Device Attribute
     Table)"

* tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (74 commits)
  cxl/hdm: Fix skip allocations vs multiple pmem allocations
  cxl/region: Disallow region granularity != window granularity
  cxl/region: Fix x1 interleave to greater than x1 interleave routing
  cxl/region: Move HPA setup to cxl_region_attach()
  cxl/region: Fix decoder interleave programming
  Documentation: cxl: remove dangling kernel-doc reference
  cxl/region: describe targets and nr_targets members of cxl_region_params
  cxl/regions: add padding for cxl_rr_ep_add nested lists
  cxl/region: Fix IS_ERR() vs NULL check
  cxl/region: Fix region reference target accounting
  cxl/region: Fix region commit uninitialized variable warning
  cxl/region: Fix port setup uninitialized variable warnings
  cxl/region: Stop initializing interleave granularity
  cxl/hdm: Fix DPA reservation vs cxl_endpoint_decoder lifetime
  cxl/acpi: Minimize granularity for x1 interleaves
  cxl/region: Delete 'region' attribute from root decoders
  cxl/acpi: Autoload driver for 'cxl_acpi' test devices
  cxl/region: decrement ->nr_targets on error in cxl_region_attach()
  cxl/region: prevent underflow in ways_to_cxl()
  cxl/region: uninitialized variable in alloc_hpa()
  ...
parents 5e2e7383 1cd8a253
......@@ -516,6 +516,7 @@ ForEachMacros:
- 'of_property_for_each_string'
- 'of_property_for_each_u32'
- 'pci_bus_for_each_resource'
- 'pci_doe_for_each_off'
- 'pcl_for_each_chunk'
- 'pcl_for_each_segment'
- 'pcm_for_each_format'
......
This diff is collapsed.
......@@ -362,6 +362,14 @@ CXL Core
.. kernel-doc:: drivers/cxl/core/mbox.c
:doc: cxl mbox
CXL Regions
-----------
.. kernel-doc:: drivers/cxl/core/region.c
:doc: cxl core region
.. kernel-doc:: drivers/cxl/core/region.c
:identifiers:
External Interfaces
===================
......
......@@ -55,6 +55,7 @@ int memory_add_physaddr_to_nid(u64 start)
{
return hot_add_scn_to_nid(start);
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
int __weak create_section_mapping(unsigned long start, unsigned long end,
......
......@@ -2,6 +2,7 @@
menuconfig CXL_BUS
tristate "CXL (Compute Express Link) Devices Support"
depends on PCI
select PCI_DOE
help
CXL is a bus that is electrically compatible with PCI Express, but
layers three protocols on that signalling (CXL.io, CXL.cache, and
......@@ -102,4 +103,12 @@ config CXL_SUSPEND
def_bool y
depends on SUSPEND && CXL_MEM
config CXL_REGION
bool
default CXL_BUS
# For MAX_PHYSMEM_BITS
depends on SPARSEMEM
select MEMREGION
select GET_FREE_REGION
endif
This diff is collapsed.
......@@ -10,3 +10,4 @@ cxl_core-y += memdev.o
cxl_core-y += mbox.o
cxl_core-y += pci.o
cxl_core-y += hdm.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
......@@ -9,6 +9,36 @@ extern const struct device_type cxl_nvdimm_type;
extern struct attribute_group cxl_base_attribute_group;
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
extern struct device_attribute dev_attr_delete_region;
extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
extern const struct device_type cxl_region_type;
void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
int cxl_region_init(void);
void cxl_region_exit(void);
#else
static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
{
}
static inline int cxl_region_init(void)
{
return 0;
}
static inline void cxl_region_exit(void)
{
}
#define CXL_REGION_ATTR(x) NULL
#define CXL_REGION_TYPE(x) NULL
#define SET_CXL_REGION_ATTR(x)
#define CXL_PMEM_REGION_TYPE(x) NULL
#endif
struct cxl_send_command;
struct cxl_mem_query_commands;
int cxl_query_cmd(struct cxl_memdev *cxlmd,
......@@ -17,9 +47,28 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length);
struct dentry *cxl_debugfs_create_dir(const char *dir);
int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
enum cxl_decoder_mode mode);
int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size);
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
extern struct rw_semaphore cxl_dpa_rwsem;
bool is_switch_decoder(struct device *dev);
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
{
if (!port)
return NULL;
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
}
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
void cxl_mbox_init(void);
void cxl_mbox_exit(void);
#endif /* __CXL_CORE_H__ */
This diff is collapsed.
......@@ -718,12 +718,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
*/
static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
__le64 next_volatile_cap;
__le64 next_persistent_cap;
} __packed pi;
struct cxl_mbox_get_partition_info pi;
int rc;
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0,
......@@ -773,15 +768,6 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
cxlds->partition_align_bytes =
le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
dev_dbg(cxlds->dev,
"Identify Memory Device\n"
" total_bytes = %#llx\n"
" volatile_only_bytes = %#llx\n"
" persistent_only_bytes = %#llx\n"
" partition_align_bytes = %#llx\n",
cxlds->total_bytes, cxlds->volatile_only_bytes,
cxlds->persistent_only_bytes, cxlds->partition_align_bytes);
cxlds->lsa_size = le32_to_cpu(id.lsa_size);
memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
......@@ -789,42 +775,63 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
static int add_dpa_res(struct device *dev, struct resource *parent,
struct resource *res, resource_size_t start,
resource_size_t size, const char *type)
{
int rc;
if (cxlds->partition_align_bytes == 0) {
cxlds->ram_range.start = 0;
cxlds->ram_range.end = cxlds->volatile_only_bytes - 1;
cxlds->pmem_range.start = cxlds->volatile_only_bytes;
cxlds->pmem_range.end = cxlds->volatile_only_bytes +
cxlds->persistent_only_bytes - 1;
res->name = type;
res->start = start;
res->end = start + size - 1;
res->flags = IORESOURCE_MEM;
if (resource_size(res) == 0) {
dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
return 0;
}
rc = cxl_mem_get_partition_info(cxlds);
rc = request_resource(parent, res);
if (rc) {
dev_err(cxlds->dev, "Failed to query partition information\n");
dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
res, rc);
return rc;
}
dev_dbg(cxlds->dev,
"Get Partition Info\n"
" active_volatile_bytes = %#llx\n"
" active_persistent_bytes = %#llx\n"
" next_volatile_bytes = %#llx\n"
" next_persistent_bytes = %#llx\n",
cxlds->active_volatile_bytes, cxlds->active_persistent_bytes,
cxlds->next_volatile_bytes, cxlds->next_persistent_bytes);
dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
return 0;
}
cxlds->ram_range.start = 0;
cxlds->ram_range.end = cxlds->active_volatile_bytes - 1;
int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
{
struct device *dev = cxlds->dev;
int rc;
cxlds->pmem_range.start = cxlds->active_volatile_bytes;
cxlds->pmem_range.end =
cxlds->active_volatile_bytes + cxlds->active_persistent_bytes - 1;
cxlds->dpa_res =
(struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes);
return 0;
if (cxlds->partition_align_bytes == 0) {
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
cxlds->volatile_only_bytes, "ram");
if (rc)
return rc;
return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
cxlds->volatile_only_bytes,
cxlds->persistent_only_bytes, "pmem");
}
rc = cxl_mem_get_partition_info(cxlds);
if (rc) {
dev_err(dev, "Failed to query partition information\n");
return rc;
}
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
cxlds->active_volatile_bytes, "ram");
if (rc)
return rc;
return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
cxlds->active_volatile_bytes,
cxlds->active_persistent_bytes, "pmem");
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
......@@ -845,19 +852,11 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL);
static struct dentry *cxl_debugfs;
void __init cxl_mbox_init(void)
{
struct dentry *mbox_debugfs;
cxl_debugfs = debugfs_create_dir("cxl", NULL);
mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
mbox_debugfs = cxl_debugfs_create_dir("mbox");
debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
&cxl_raw_allow_all);
}
void cxl_mbox_exit(void)
{
debugfs_remove_recursive(cxl_debugfs);
}
......@@ -68,7 +68,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = range_len(&cxlds->ram_range);
unsigned long long len = resource_size(&cxlds->ram_res);
return sysfs_emit(buf, "%#llx\n", len);
}
......@@ -81,7 +81,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = range_len(&cxlds->pmem_range);
unsigned long long len = resource_size(&cxlds->pmem_res);
return sysfs_emit(buf, "%#llx\n", len);
}
......
......@@ -4,6 +4,7 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <cxlpci.h>
#include <cxlmem.h>
#include <cxl.h>
......@@ -225,7 +226,6 @@ static int dvsec_range_allowed(struct device *dev, void *arg)
{
struct range *dev_range = arg;
struct cxl_decoder *cxld;
struct range root_range;
if (!is_root_decoder(dev))
return 0;
......@@ -237,12 +237,7 @@ static int dvsec_range_allowed(struct device *dev, void *arg)
if (!(cxld->flags & CXL_DECODER_F_RAM))
return 0;
root_range = (struct range) {
.start = cxld->platform_res.start,
.end = cxld->platform_res.end,
};
return range_contains(&root_range, dev_range);
return range_contains(&cxld->hpa_range, dev_range);
}
static void disable_hdm(void *_cxlhdm)
......@@ -458,3 +453,175 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff
#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0
#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000
#define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff
#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
static struct pci_doe_mb *find_cdat_doe(struct device *uport)
{
struct cxl_memdev *cxlmd;
struct cxl_dev_state *cxlds;
unsigned long index;
void *entry;
cxlmd = to_cxl_memdev(uport);
cxlds = cxlmd->cxlds;
xa_for_each(&cxlds->doe_mbs, index, entry) {
struct pci_doe_mb *cur = entry;
if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL,
CXL_DOE_PROTOCOL_TABLE_ACCESS))
return cur;
}
return NULL;
}
#define CDAT_DOE_REQ(entry_handle) \
(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \
FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
static void cxl_doe_task_complete(struct pci_doe_task *task)
{
complete(task->private);
}
struct cdat_doe_task {
u32 request_pl;
u32 response_pl[32];
struct completion c;
struct pci_doe_task task;
};
#define DECLARE_CDAT_DOE_TASK(req, cdt) \
struct cdat_doe_task cdt = { \
.c = COMPLETION_INITIALIZER_ONSTACK(cdt.c), \
.request_pl = req, \
.task = { \
.prot.vid = PCI_DVSEC_VENDOR_ID_CXL, \
.prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \
.request_pl = &cdt.request_pl, \
.request_pl_sz = sizeof(cdt.request_pl), \
.response_pl = cdt.response_pl, \
.response_pl_sz = sizeof(cdt.response_pl), \
.complete = cxl_doe_task_complete, \
.private = &cdt.c, \
} \
}
static int cxl_cdat_get_length(struct device *dev,
struct pci_doe_mb *cdat_doe,
size_t *length)
{
DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
int rc;
rc = pci_doe_submit_task(cdat_doe, &t.task);
if (rc < 0) {
dev_err(dev, "DOE submit failed: %d", rc);
return rc;
}
wait_for_completion(&t.c);
if (t.task.rv < sizeof(u32))
return -EIO;
*length = t.response_pl[1];
dev_dbg(dev, "CDAT length %zu\n", *length);
return 0;
}
static int cxl_cdat_read_table(struct device *dev,
struct pci_doe_mb *cdat_doe,
struct cxl_cdat *cdat)
{
size_t length = cdat->length;
u32 *data = cdat->table;
int entry_handle = 0;
do {
DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
size_t entry_dw;
u32 *entry;
int rc;
rc = pci_doe_submit_task(cdat_doe, &t.task);
if (rc < 0) {
dev_err(dev, "DOE submit failed: %d", rc);
return rc;
}
wait_for_completion(&t.c);
/* 1 DW header + 1 DW data min */
if (t.task.rv < (2 * sizeof(u32)))
return -EIO;
/* Get the CXL table access header entry handle */
entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
t.response_pl[0]);
entry = t.response_pl + 1;
entry_dw = t.task.rv / sizeof(u32);
/* Skip Header */
entry_dw -= 1;
entry_dw = min(length / sizeof(u32), entry_dw);
/* Prevent length < 1 DW from causing a buffer overflow */
if (entry_dw) {
memcpy(data, entry, entry_dw * sizeof(u32));
length -= entry_dw * sizeof(u32);
data += entry_dw;
}
} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
return 0;
}
/**
* read_cdat_data - Read the CDAT data on this port
* @port: Port to read data from
*
* This call will sleep waiting for responses from the DOE mailbox.
*/
void read_cdat_data(struct cxl_port *port)
{
struct pci_doe_mb *cdat_doe;
struct device *dev = &port->dev;
struct device *uport = port->uport;
size_t cdat_length;
int rc;
cdat_doe = find_cdat_doe(uport);
if (!cdat_doe) {
dev_dbg(dev, "No CDAT mailbox\n");
return;
}
port->cdat_available = true;
if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
dev_dbg(dev, "No CDAT length\n");
return;
}
port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
if (!port->cdat.table)
return;
port->cdat.length = cdat_length;
rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat);
if (rc) {
/* Don't leave table data allocated on error */
devm_kfree(dev, port->cdat.table);
port->cdat.table = NULL;
port->cdat.length = 0;
dev_err(dev, "CDAT data read error\n");
}
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
......@@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
return is_cxl_nvdimm_bridge(dev);
}
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
{
struct cxl_port *port = find_cxl_root(&cxl_nvd->dev);
struct cxl_port *port = find_cxl_root(start);
struct device *dev;
if (!port)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -50,6 +50,24 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
return container_of(dev, struct cxl_memdev, dev);
}
static inline struct cxl_port *cxled_to_port(struct cxl_endpoint_decoder *cxled)
{
return to_cxl_port(cxled->cxld.dev.parent);
}
static inline struct cxl_port *cxlrd_to_port(struct cxl_root_decoder *cxlrd)
{
return to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
}
static inline struct cxl_memdev *
cxled_to_memdev(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = to_cxl_port(cxled->cxld.dev.parent);
return to_cxl_memdev(port->uport);
}
bool is_cxl_memdev(struct device *dev);
static inline bool is_cxl_endpoint(struct cxl_port *port)
{
......@@ -178,8 +196,9 @@ struct cxl_endpoint_dvsec_info {
* @firmware_version: Firmware version for the memory device.
* @enabled_cmds: Hardware commands found enabled in CEL.
* @exclusive_cmds: Commands that are kernel-internal only
* @pmem_range: Active Persistent memory capacity configuration
* @ram_range: Active Volatile memory capacity configuration
* @dpa_res: Overall DPA resource tree for the device
* @pmem_res: Active Persistent memory capacity configuration
* @ram_res: Active Volatile memory capacity configuration
* @total_bytes: sum of all possible capacities
* @volatile_only_bytes: hard volatile capacity
* @persistent_only_bytes: hard persistent capacity
......@@ -191,6 +210,7 @@ struct cxl_endpoint_dvsec_info {
* @component_reg_phys: register base of component registers
* @info: Cached DVSEC information about the device.
* @serial: PCIe Device Serial Number
* @doe_mbs: PCI DOE mailbox array
* @mbox_send: @dev specific transport for transmitting mailbox commands
*
* See section 8.2.9.5.2 Capacity Configuration and Label Storage for
......@@ -209,8 +229,9 @@ struct cxl_dev_state {
DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
struct range pmem_range;
struct range ram_range;
struct resource dpa_res;
struct resource pmem_res;
struct resource ram_res;
u64 total_bytes;
u64 volatile_only_bytes;
u64 persistent_only_bytes;
......@@ -224,6 +245,8 @@ struct cxl_dev_state {
resource_size_t component_reg_phys;
u64 serial;
struct xarray doe_mbs;
int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
};
......@@ -299,6 +322,13 @@ struct cxl_mbox_identify {
u8 qos_telemetry_caps;
} __packed;
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
__le64 next_volatile_cap;
__le64 next_persistent_cap;
} __packed;
struct cxl_mbox_get_lsa {
__le32 offset;
__le32 length;
......@@ -370,4 +400,8 @@ struct cxl_hdm {
unsigned int interleave_mask;
struct cxl_port *port;
};
struct seq_file;
struct dentry *cxl_debugfs_create_dir(const char *dir);
void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
#endif /* __CXL_MEM_H__ */
......@@ -74,4 +74,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
void read_cdat_data(struct cxl_port *port);
#endif /* __CXL_PCI_H__ */
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/pci.h>
......@@ -24,42 +25,32 @@
* in higher level operations.
*/
static int create_endpoint(struct cxl_memdev *cxlmd,
struct cxl_port *parent_port)
static void enable_suspend(void *data)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_port *endpoint;
int rc;
cxl_mem_active_dec();
}
endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev,
cxlds->component_reg_phys, parent_port);
if (IS_ERR(endpoint))
return PTR_ERR(endpoint);
static void remove_debugfs(void *dentry)
{
debugfs_remove_recursive(dentry);
}
dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev));
static int cxl_mem_dpa_show(struct seq_file *file, void *data)
{
struct device *dev = file->private;
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
rc = cxl_endpoint_autoremove(cxlmd, endpoint);
if (rc)
return rc;
if (!endpoint->dev.driver) {
dev_err(&cxlmd->dev, "%s failed probe\n",
dev_name(&endpoint->dev));
return -ENXIO;
}
cxl_dpa_debug(file, cxlmd->cxlds);
return 0;
}
static void enable_suspend(void *data)
{
cxl_mem_active_dec();
}
static int cxl_mem_probe(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_port *parent_port;
struct cxl_dport *dport;
struct dentry *dentry;
int rc;
/*
......@@ -73,11 +64,17 @@ static int cxl_mem_probe(struct device *dev)
if (work_pending(&cxlmd->detach_work))
return -EBUSY;
dentry = cxl_debugfs_create_dir(dev_name(dev));
debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
if (rc)
return rc;
rc = devm_cxl_enumerate_ports(cxlmd);
if (rc)
return rc;
parent_port = cxl_mem_find_port(cxlmd);
parent_port = cxl_mem_find_port(cxlmd, &dport);
if (!parent_port) {
dev_err(dev, "CXL port topology not found\n");
return -ENXIO;
......@@ -91,7 +88,7 @@ static int cxl_mem_probe(struct device *dev)
goto unlock;
}
rc = create_endpoint(cxlmd, parent_port);
rc = devm_cxl_add_endpoint(cxlmd, dport);
unlock:
device_unlock(&parent_port->dev);
put_device(&parent_port->dev);
......
......@@ -8,6 +8,7 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/io.h>
#include "cxlmem.h"
#include "cxlpci.h"
......@@ -386,6 +387,47 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
return rc;
}
static void cxl_pci_destroy_doe(void *mbs)
{
xa_destroy(mbs);
}
static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
{
struct device *dev = cxlds->dev;
struct pci_dev *pdev = to_pci_dev(dev);
u16 off = 0;
xa_init(&cxlds->doe_mbs);
if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) {
dev_err(dev, "Failed to create XArray for DOE's\n");
return;
}
/*
* Mailbox creation is best effort. Higher layers must determine if
* the lack of a mailbox for their protocol is a device failure or not.
*/
pci_doe_for_each_off(pdev, off) {
struct pci_doe_mb *doe_mb;
doe_mb = pcim_doe_create_mb(pdev, off);
if (IS_ERR(doe_mb)) {
dev_err(dev, "Failed to create MB object for MB @ %x\n",
off);
continue;
}
if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
dev_err(dev, "xa_insert failed to insert MB @ %x\n",
off);
continue;
}
dev_dbg(dev, "Created DOE mailbox @%x\n", off);
}
}
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct cxl_register_map map;
......@@ -434,6 +476,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
devm_cxl_pci_create_doe(cxlds);
rc = cxl_pci_setup_mailbox(cxlds);
if (rc)
return rc;
......@@ -454,7 +498,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
return rc;
......
......@@ -7,6 +7,7 @@
#include <linux/ndctl.h>
#include <linux/async.h>
#include <linux/slab.h>
#include <linux/nd.h>
#include "cxlmem.h"
#include "cxl.h"
......@@ -26,7 +27,23 @@ static void clear_exclusive(void *cxlds)
static void unregister_nvdimm(void *nvdimm)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
struct cxl_pmem_region *cxlr_pmem;
device_lock(&cxl_nvb->dev);
cxlr_pmem = cxl_nvd->region;
dev_set_drvdata(&cxl_nvd->dev, NULL);
cxl_nvd->region = NULL;
device_unlock(&cxl_nvb->dev);
if (cxlr_pmem) {
device_release_driver(&cxlr_pmem->dev);
put_device(&cxlr_pmem->dev);
}
nvdimm_delete(nvdimm);
cxl_nvd->bridge = NULL;
}
static int cxl_nvdimm_probe(struct device *dev)
......@@ -39,7 +56,7 @@ static int cxl_nvdimm_probe(struct device *dev)
struct nvdimm *nvdimm;
int rc;
cxl_nvb = cxl_find_nvdimm_bridge(cxl_nvd);
cxl_nvb = cxl_find_nvdimm_bridge(dev);
if (!cxl_nvb)
return -ENXIO;
......@@ -66,6 +83,7 @@ static int cxl_nvdimm_probe(struct device *dev)
}
dev_set_drvdata(dev, nvdimm);
cxl_nvd->bridge = cxl_nvb;
rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
out:
device_unlock(&cxl_nvb->dev);
......@@ -204,15 +222,38 @@ static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
return cxl_nvb->nvdimm_bus != NULL;
}
static int cxl_nvdimm_release_driver(struct device *dev, void *data)
static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
{
struct cxl_nvdimm *cxl_nvd;
if (!is_cxl_nvdimm(dev))
return 0;
cxl_nvd = to_cxl_nvdimm(dev);
if (cxl_nvd->bridge != cxl_nvb)
return 0;
device_release_driver(dev);
return 0;
}
static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus)
static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb)
{
struct cxl_pmem_region *cxlr_pmem;
if (!is_cxl_pmem_region(dev))
return 0;
cxlr_pmem = to_cxl_pmem_region(dev);
if (cxlr_pmem->bridge != cxl_nvb)
return 0;
device_release_driver(dev);
return 0;
}
static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
struct nvdimm_bus *nvdimm_bus)
{
if (!nvdimm_bus)
return;
......@@ -222,7 +263,10 @@ static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus)
* nvdimm_bus_unregister() rips the nvdimm objects out from
* underneath them.
*/
bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_release_driver);
bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
cxl_pmem_region_release_driver);
bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
cxl_nvdimm_release_driver);
nvdimm_bus_unregister(nvdimm_bus);
}
......@@ -260,7 +304,7 @@ static void cxl_nvb_update_state(struct work_struct *work)
dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
}
offline_nvdimm_bus(victim_bus);
offline_nvdimm_bus(cxl_nvb, victim_bus);
put_device(&cxl_nvb->dev);
}
......@@ -315,6 +359,203 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = {
.id = CXL_DEVICE_NVDIMM_BRIDGE,
};
static int match_cxl_nvdimm(struct device *dev, void *data)
{
return is_cxl_nvdimm(dev);
}
static void unregister_nvdimm_region(void *nd_region)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
int i;
cxlr_pmem = nd_region_provider_data(nd_region);
cxl_nvb = cxlr_pmem->bridge;
device_lock(&cxl_nvb->dev);
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
if (cxl_nvd->region) {
put_device(&cxlr_pmem->dev);
cxl_nvd->region = NULL;
}
}
device_unlock(&cxl_nvb->dev);
nvdimm_region_delete(nd_region);
}
static void cxlr_pmem_remove_resource(void *res)
{
remove_resource(res);
}
struct cxl_pmem_region_info {
u64 offset;
u64 serial;
};
static int cxl_pmem_region_probe(struct device *dev)
{
struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
struct cxl_region *cxlr = cxlr_pmem->cxlr;
struct cxl_pmem_region_info *info = NULL;
struct cxl_nvdimm_bridge *cxl_nvb;
struct nd_interleave_set *nd_set;
struct nd_region_desc ndr_desc;
struct cxl_nvdimm *cxl_nvd;
struct nvdimm *nvdimm;
struct resource *res;
int rc, i = 0;
cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
if (!cxl_nvb) {
dev_dbg(dev, "bridge not found\n");
return -ENXIO;
}
cxlr_pmem->bridge = cxl_nvb;
device_lock(&cxl_nvb->dev);
if (!cxl_nvb->nvdimm_bus) {
dev_dbg(dev, "nvdimm bus not found\n");
rc = -ENXIO;
goto err;
}
memset(&mappings, 0, sizeof(mappings));
memset(&ndr_desc, 0, sizeof(ndr_desc));
res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
if (!res) {
rc = -ENOMEM;
goto err;
}
res->name = "Persistent Memory";
res->start = cxlr_pmem->hpa_range.start;
res->end = cxlr_pmem->hpa_range.end;
res->flags = IORESOURCE_MEM;
res->desc = IORES_DESC_PERSISTENT_MEMORY;
rc = insert_resource(&iomem_resource, res);
if (rc)
goto err;
rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
if (rc)
goto err;
ndr_desc.res = res;
ndr_desc.provider_data = cxlr_pmem;
ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start);
ndr_desc.target_node = phys_to_target_node(res->start);
if (ndr_desc.target_node == NUMA_NO_NODE) {
ndr_desc.target_node = ndr_desc.numa_node;
dev_dbg(&cxlr->dev, "changing target node from %d to %d",
NUMA_NO_NODE, ndr_desc.target_node);
}
nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
if (!nd_set) {
rc = -ENOMEM;
goto err;
}
ndr_desc.memregion = cxlr->id;
set_bit(ND_REGION_CXL, &ndr_desc.flags);
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto err;
}
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_memdev *cxlmd = m->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *d;
d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
if (!d) {
dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
dev_name(&cxlmd->dev));
rc = -ENODEV;
goto err;
}
/* safe to drop ref now with bridge lock held */
put_device(d);
cxl_nvd = to_cxl_nvdimm(d);
nvdimm = dev_get_drvdata(&cxl_nvd->dev);
if (!nvdimm) {
dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
dev_name(&cxlmd->dev));
rc = -ENODEV;
goto err;
}
cxl_nvd->region = cxlr_pmem;
get_device(&cxlr_pmem->dev);
m->cxl_nvd = cxl_nvd;
mappings[i] = (struct nd_mapping_desc) {
.nvdimm = nvdimm,
.start = m->start,
.size = m->size,
.position = i,
};
info[i].offset = m->start;
info[i].serial = cxlds->serial;
}
ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
ndr_desc.mapping = mappings;
/*
* TODO enable CXL labels which skip the need for 'interleave-set cookie'
*/
nd_set->cookie1 =
nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0);
nd_set->cookie2 = nd_set->cookie1;
ndr_desc.nd_set = nd_set;
cxlr_pmem->nd_region =
nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
if (!cxlr_pmem->nd_region) {
rc = -ENOMEM;
goto err;
}
rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
cxlr_pmem->nd_region);
out:
kfree(info);
device_unlock(&cxl_nvb->dev);
put_device(&cxl_nvb->dev);
return rc;
err:
dev_dbg(dev, "failed to create nvdimm region\n");
for (i--; i >= 0; i--) {
nvdimm = mappings[i].nvdimm;
cxl_nvd = nvdimm_provider_data(nvdimm);
put_device(&cxl_nvd->region->dev);
cxl_nvd->region = NULL;
}
goto out;
}
static struct cxl_driver cxl_pmem_region_driver = {
.name = "cxl_pmem_region",
.probe = cxl_pmem_region_probe,
.id = CXL_DEVICE_PMEM_REGION,
};
/*
* Return all bridges to the CXL_NVB_NEW state to invalidate any
* ->state_work referring to the now destroyed cxl_pmem_wq.
......@@ -359,8 +600,14 @@ static __init int cxl_pmem_init(void)
if (rc)
goto err_nvdimm;
rc = cxl_driver_register(&cxl_pmem_region_driver);
if (rc)
goto err_region;
return 0;
err_region:
cxl_driver_unregister(&cxl_nvdimm_driver);
err_nvdimm:
cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
err_bridge:
......@@ -370,6 +617,7 @@ static __init int cxl_pmem_init(void)
static __exit void cxl_pmem_exit(void)
{
cxl_driver_unregister(&cxl_pmem_region_driver);
cxl_driver_unregister(&cxl_nvdimm_driver);
cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
destroy_cxl_pmem_wq();
......@@ -381,3 +629,4 @@ module_exit(cxl_pmem_exit);
MODULE_IMPORT_NS(CXL);
MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM);
MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION);
......@@ -53,6 +53,9 @@ static int cxl_port_probe(struct device *dev)
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
/* Cache the data early to ensure is_visible() works */
read_cdat_data(port);
get_device(&cxlmd->dev);
rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd);
if (rc)
......@@ -78,10 +81,60 @@ static int cxl_port_probe(struct device *dev)
return 0;
}
static ssize_t CDAT_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_port *port = to_cxl_port(dev);
if (!port->cdat_available)
return -ENXIO;
if (!port->cdat.table)
return 0;
return memory_read_from_buffer(buf, count, &offset,
port->cdat.table,
port->cdat.length);
}
static BIN_ATTR_ADMIN_RO(CDAT, 0);
static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj,
struct bin_attribute *attr, int i)
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_port *port = to_cxl_port(dev);
if ((attr == &bin_attr_CDAT) && port->cdat_available)
return attr->attr.mode;
return 0;
}
static struct bin_attribute *cxl_cdat_bin_attributes[] = {
&bin_attr_CDAT,
NULL,
};
static struct attribute_group cxl_cdat_attribute_group = {
.bin_attrs = cxl_cdat_bin_attributes,
.is_bin_visible = cxl_port_bin_attr_is_visible,
};
static const struct attribute_group *cxl_port_attribute_groups[] = {
&cxl_cdat_attribute_group,
NULL,
};
static struct cxl_driver cxl_port_driver = {
.name = "cxl_port",
.probe = cxl_port_probe,
.id = CXL_DEVICE_PORT,
.drv = {
.dev_groups = cxl_port_attribute_groups,
},
};
module_cxl_driver(cxl_port_driver);
......
......@@ -133,6 +133,7 @@ static void nd_region_release(struct device *dev)
put_device(&nvdimm->dev);
}
free_percpu(nd_region->lane);
if (!test_bit(ND_REGION_CXL, &nd_region->flags))
memregion_free(nd_region->id);
kfree(nd_region);
}
......@@ -982,9 +983,14 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
if (!nd_region)
return NULL;
/* CXL pre-assigns memregion ids before creating nvdimm regions */
if (test_bit(ND_REGION_CXL, &ndr_desc->flags)) {
nd_region->id = ndr_desc->memregion;
} else {
nd_region->id = memregion_alloc(GFP_KERNEL);
if (nd_region->id < 0)
goto err_id;
}
nd_region->lane = alloc_percpu(struct nd_percpu_lane);
if (!nd_region->lane)
......@@ -1043,9 +1049,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
return nd_region;
err_percpu:
err_percpu:
if (!test_bit(ND_REGION_CXL, &ndr_desc->flags))
memregion_free(nd_region->id);
err_id:
err_id:
kfree(nd_region);
return NULL;
}
......@@ -1068,6 +1075,13 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
void nvdimm_region_delete(struct nd_region *nd_region)
{
if (nd_region)
nd_device_unregister(&nd_region->dev, ND_SYNC);
}
EXPORT_SYMBOL_GPL(nvdimm_region_delete);
int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
{
int rc = 0;
......
......@@ -121,6 +121,9 @@ config XEN_PCIDEV_FRONTEND
config PCI_ATS
bool
config PCI_DOE
bool
config PCI_ECAM
bool
......
......@@ -31,6 +31,7 @@ obj-$(CONFIG_PCI_ECAM) += ecam.o
obj-$(CONFIG_PCI_P2PDMA) += p2pdma.o
obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
obj-$(CONFIG_VGA_ARB) += vgaarb.o
obj-$(CONFIG_PCI_DOE) += doe.o
# Endpoint library must be initialized before its users
obj-$(CONFIG_PCI_ENDPOINT) += endpoint/
......
This diff is collapsed.
......@@ -2315,7 +2315,7 @@ EXPORT_SYMBOL(pci_alloc_dev);
static bool pci_bus_crs_vendor_id(u32 l)
{
return (l & 0xffff) == 0x0001;
return (l & 0xffff) == PCI_VENDOR_ID_PCI_SIG;
}
static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l,
......
......@@ -141,6 +141,7 @@ enum {
IORES_DESC_DEVICE_PRIVATE_MEMORY = 6,
IORES_DESC_RESERVED = 7,
IORES_DESC_SOFT_RESERVED = 8,
IORES_DESC_CXL = 9,
};
/*
......@@ -329,6 +330,8 @@ struct resource *devm_request_free_mem_region(struct device *dev,
struct resource *base, unsigned long size);
struct resource *request_free_mem_region(struct resource *base,
unsigned long size, const char *name);
struct resource *alloc_free_mem_region(struct resource *base,
unsigned long size, unsigned long align, const char *name);
static inline void irqresource_disabled(struct resource *res, u32 irq)
{
......
......@@ -59,6 +59,9 @@ enum {
/* Platform provides asynchronous flush mechanism */
ND_REGION_ASYNC = 3,
/* Region was created by CXL subsystem */
ND_REGION_CXL = 4,
/* mark newly adjusted resources as requiring a label update */
DPA_RESOURCE_ADJUSTED = 1 << 0,
};
......@@ -122,6 +125,7 @@ struct nd_region_desc {
int numa_node;
int target_node;
unsigned long flags;
int memregion;
struct device_node *of_node;
int (*flush)(struct nd_region *nd_region, struct bio *bio);
};
......@@ -259,6 +263,7 @@ static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus,
cmd_mask, num_flush, flush_wpq, NULL, NULL, NULL);
}
void nvdimm_delete(struct nvdimm *nvdimm);
void nvdimm_region_delete(struct nd_region *nd_region);
const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Data Object Exchange
* PCIe r6.0, sec 6.30 DOE
*
* Copyright (C) 2021 Huawei
* Jonathan Cameron <Jonathan.Cameron@huawei.com>
*
* Copyright (C) 2022 Intel Corporation
* Ira Weiny <ira.weiny@intel.com>
*/
#ifndef LINUX_PCI_DOE_H
#define LINUX_PCI_DOE_H
struct pci_doe_protocol {
u16 vid;
u8 type;
};
struct pci_doe_mb;
/**
* struct pci_doe_task - represents a single query/response
*
* @prot: DOE Protocol
* @request_pl: The request payload
* @request_pl_sz: Size of the request payload (bytes)
* @response_pl: The response payload
* @response_pl_sz: Size of the response payload (bytes)
* @rv: Return value. Length of received response or error (bytes)
* @complete: Called when task is complete
* @private: Private data for the consumer
* @work: Used internally by the mailbox
* @doe_mb: Used internally by the mailbox
*
* The payload sizes and rv are specified in bytes with the following
* restrictions concerning the protocol.
*
* 1) The request_pl_sz must be a multiple of double words (4 bytes)
* 2) The response_pl_sz must be >= a single double word (4 bytes)
* 3) rv is returned as bytes but it will be a multiple of double words
*
* NOTE there is no need for the caller to initialize work or doe_mb.
*/
struct pci_doe_task {
struct pci_doe_protocol prot;
u32 *request_pl;
size_t request_pl_sz;
u32 *response_pl;
size_t response_pl_sz;
int rv;
void (*complete)(struct pci_doe_task *task);
void *private;
/* No need for the user to initialize these fields */
struct work_struct work;
struct pci_doe_mb *doe_mb;
};
/**
* pci_doe_for_each_off - Iterate each DOE capability
* @pdev: struct pci_dev to iterate
* @off: u16 of config space offset of each mailbox capability found
*/
#define pci_doe_for_each_off(pdev, off) \
for (off = pci_find_next_ext_capability(pdev, off, \
PCI_EXT_CAP_ID_DOE); \
off > 0; \
off = pci_find_next_ext_capability(pdev, off, \
PCI_EXT_CAP_ID_DOE))
struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
#endif
......@@ -151,6 +151,7 @@
#define PCI_CLASS_OTHERS 0xff
/* Vendors and devices. Sort key: vendor first, device next. */
#define PCI_VENDOR_ID_PCI_SIG 0x0001
#define PCI_VENDOR_ID_LOONGSON 0x0014
......
......@@ -235,6 +235,22 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size)
#define BIN_ATTR_RW(_name, _size) \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)
#define __BIN_ATTR_ADMIN_RO(_name, _size) { \
.attr = { .name = __stringify(_name), .mode = 0400 }, \
.read = _name##_read, \
.size = _size, \
}
#define __BIN_ATTR_ADMIN_RW(_name, _size) \
__BIN_ATTR(_name, 0600, _name##_read, _name##_write, _size)
#define BIN_ATTR_ADMIN_RO(_name, _size) \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size)
#define BIN_ATTR_ADMIN_RW(_name, _size) \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size)
struct sysfs_ops {
ssize_t (*show)(struct kobject *, struct attribute *, char *);
ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
......
......@@ -737,7 +737,8 @@
#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */
#define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */
#define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT
#define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE
#define PCI_EXT_CAP_DSN_SIZEOF 12
#define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
......@@ -1103,4 +1104,30 @@
#define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0
#define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4
/* Data Object Exchange */
#define PCI_DOE_CAP 0x04 /* DOE Capabilities Register */
#define PCI_DOE_CAP_INT_SUP 0x00000001 /* Interrupt Support */
#define PCI_DOE_CAP_INT_MSG_NUM 0x00000ffe /* Interrupt Message Number */
#define PCI_DOE_CTRL 0x08 /* DOE Control Register */
#define PCI_DOE_CTRL_ABORT 0x00000001 /* DOE Abort */
#define PCI_DOE_CTRL_INT_EN 0x00000002 /* DOE Interrupt Enable */
#define PCI_DOE_CTRL_GO 0x80000000 /* DOE Go */
#define PCI_DOE_STATUS 0x0c /* DOE Status Register */
#define PCI_DOE_STATUS_BUSY 0x00000001 /* DOE Busy */
#define PCI_DOE_STATUS_INT_STATUS 0x00000002 /* DOE Interrupt Status */
#define PCI_DOE_STATUS_ERROR 0x00000004 /* DOE Error */
#define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */
#define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */
#define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */
/* DOE Data Object - note not actually registers */
#define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff
#define PCI_DOE_DATA_OBJECT_HEADER_1_TYPE 0x00ff0000
#define PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH 0x0003ffff
#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000
#endif /* LINUX_PCI_REGS_H */
......@@ -489,8 +489,9 @@ int __weak page_is_ram(unsigned long pfn)
}
EXPORT_SYMBOL_GPL(page_is_ram);
static int __region_intersects(resource_size_t start, size_t size,
unsigned long flags, unsigned long desc)
static int __region_intersects(struct resource *parent, resource_size_t start,
size_t size, unsigned long flags,
unsigned long desc)
{
struct resource res;
int type = 0; int other = 0;
......@@ -499,7 +500,7 @@ static int __region_intersects(resource_size_t start, size_t size,
res.start = start;
res.end = start + size - 1;
for (p = iomem_resource.child; p ; p = p->sibling) {
for (p = parent->child; p ; p = p->sibling) {
bool is_type = (((p->flags & flags) == flags) &&
((desc == IORES_DESC_NONE) ||
(desc == p->desc)));
......@@ -543,7 +544,7 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
int ret;
read_lock(&resource_lock);
ret = __region_intersects(start, size, flags, desc);
ret = __region_intersects(&iomem_resource, start, size, flags, desc);
read_unlock(&resource_lock);
return ret;
......@@ -891,6 +892,13 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
}
write_unlock(&resource_lock);
}
/*
* Not for general consumption, only early boot memory map parsing, PCI
* resource discovery, and late discovery of CXL resources are expected
* to use this interface. The former are built-in and only the latter,
* CXL, is a module.
*/
EXPORT_SYMBOL_NS_GPL(insert_resource_expand_to_fit, CXL);
/**
* remove_resource - Remove a resource in the resource tree
......@@ -1773,39 +1781,95 @@ void resource_list_free(struct list_head *head)
}
EXPORT_SYMBOL(resource_list_free);
#ifdef CONFIG_DEVICE_PRIVATE
static struct resource *__request_free_mem_region(struct device *dev,
struct resource *base, unsigned long size, const char *name)
#ifdef CONFIG_GET_FREE_REGION
#define GFR_DESCENDING (1UL << 0)
#define GFR_REQUEST_REGION (1UL << 1)
#define GFR_DEFAULT_ALIGN (1UL << PA_SECTION_SHIFT)
static resource_size_t gfr_start(struct resource *base, resource_size_t size,
resource_size_t align, unsigned long flags)
{
if (flags & GFR_DESCENDING) {
resource_size_t end;
end = min_t(resource_size_t, base->end,
(1ULL << MAX_PHYSMEM_BITS) - 1);
return end - size + 1;
}
return ALIGN(base->start, align);
}
static bool gfr_continue(struct resource *base, resource_size_t addr,
resource_size_t size, unsigned long flags)
{
if (flags & GFR_DESCENDING)
return addr > size && addr >= base->start;
/*
* In the ascend case be careful that the last increment by
* @size did not wrap 0.
*/
return addr > addr - size &&
addr <= min_t(resource_size_t, base->end,
(1ULL << MAX_PHYSMEM_BITS) - 1);
}
static resource_size_t gfr_next(resource_size_t addr, resource_size_t size,
unsigned long flags)
{
if (flags & GFR_DESCENDING)
return addr - size;
return addr + size;
}
static void remove_free_mem_region(void *_res)
{
struct resource *res = _res;
if (res->parent)
remove_resource(res);
free_resource(res);
}
static struct resource *
get_free_mem_region(struct device *dev, struct resource *base,
resource_size_t size, const unsigned long align,
const char *name, const unsigned long desc,
const unsigned long flags)
{
resource_size_t end, addr;
resource_size_t addr;
struct resource *res;
struct region_devres *dr = NULL;
size = ALIGN(size, 1UL << PA_SECTION_SHIFT);
end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1);
addr = end - size + 1UL;
size = ALIGN(size, align);
res = alloc_resource(GFP_KERNEL);
if (!res)
return ERR_PTR(-ENOMEM);
if (dev) {
if (dev && (flags & GFR_REQUEST_REGION)) {
dr = devres_alloc(devm_region_release,
sizeof(struct region_devres), GFP_KERNEL);
if (!dr) {
free_resource(res);
return ERR_PTR(-ENOMEM);
}
} else if (dev) {
if (devm_add_action_or_reset(dev, remove_free_mem_region, res))
return ERR_PTR(-ENOMEM);
}
write_lock(&resource_lock);
for (; addr > size && addr >= base->start; addr -= size) {
if (__region_intersects(addr, size, 0, IORES_DESC_NONE) !=
for (addr = gfr_start(base, size, align, flags);
gfr_continue(base, addr, size, flags);
addr = gfr_next(addr, size, flags)) {
if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE) !=
REGION_DISJOINT)
continue;
if (__request_region_locked(res, &iomem_resource, addr, size,
name, 0))
if (flags & GFR_REQUEST_REGION) {
if (__request_region_locked(res, &iomem_resource, addr,
size, name, 0))
break;
if (dev) {
......@@ -1815,20 +1879,41 @@ static struct resource *__request_free_mem_region(struct device *dev,
devres_add(dev, dr);
}
res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
res->desc = desc;
write_unlock(&resource_lock);
/*
* A driver is claiming this region so revoke any mappings.
* A driver is claiming this region so revoke any
* mappings.
*/
revoke_iomem(res);
} else {
res->start = addr;
res->end = addr + size - 1;
res->name = name;
res->desc = desc;
res->flags = IORESOURCE_MEM;
/*
* Only succeed if the resource hosts an exclusive
* range after the insert
*/
if (__insert_resource(base, res) || res->child)
break;
write_unlock(&resource_lock);
}
return res;
}
write_unlock(&resource_lock);
if (flags & GFR_REQUEST_REGION) {
free_resource(res);
if (dr)
devres_free(dr);
} else if (dev)
devm_release_action(dev, remove_free_mem_region, res);
return ERR_PTR(-ERANGE);
}
......@@ -1847,18 +1932,48 @@ static struct resource *__request_free_mem_region(struct device *dev,
struct resource *devm_request_free_mem_region(struct device *dev,
struct resource *base, unsigned long size)
{
return __request_free_mem_region(dev, base, size, dev_name(dev));
unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;
return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
dev_name(dev),
IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
}
EXPORT_SYMBOL_GPL(devm_request_free_mem_region);
struct resource *request_free_mem_region(struct resource *base,
unsigned long size, const char *name)
{
return __request_free_mem_region(NULL, base, size, name);
unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;
return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN, name,
IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
}
EXPORT_SYMBOL_GPL(request_free_mem_region);
#endif /* CONFIG_DEVICE_PRIVATE */
/**
* alloc_free_mem_region - find a free region relative to @base
* @base: resource that will parent the new resource
* @size: size in bytes of memory to allocate from @base
* @align: alignment requirements for the allocation
* @name: resource name
*
* Buses like CXL, that can dynamically instantiate new memory regions,
* need a method to allocate physical address space for those regions.
* Allocate and insert a new resource to cover a free, unclaimed by a
* descendant of @base, range in the span of @base.
*/
struct resource *alloc_free_mem_region(struct resource *base,
unsigned long size, unsigned long align,
const char *name)
{
/* Default of ascending direction and insert resource */
unsigned long flags = 0;
return get_free_mem_region(NULL, base, size, align, name,
IORES_DESC_NONE, flags);
}
EXPORT_SYMBOL_NS_GPL(alloc_free_mem_region, CXL);
#endif /* CONFIG_GET_FREE_REGION */
static int __init strict_iomem(char *str)
{
......
......@@ -983,9 +983,14 @@ config HMM_MIRROR
bool
depends on MMU
config GET_FREE_REGION
depends on SPARSEMEM
bool
config DEVICE_PRIVATE
bool "Unaddressable device memory (GPU memory, ...)"
depends on ZONE_DEVICE
select GET_FREE_REGION
help
Allows creation of struct pages to represent unaddressable device
......
......@@ -47,6 +47,7 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o
cxl_core-y += $(CXL_CORE_SRC)/mbox.o
cxl_core-y += $(CXL_CORE_SRC)/pci.o
cxl_core-y += $(CXL_CORE_SRC)/hdm.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
cxl_core-y += config_check.o
obj-m += test/
This diff is collapsed.
This diff is collapsed.
......@@ -208,13 +208,15 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL);
bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
struct cxl_hdm *cxlhdm)
{
int rc = 0, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (!ops || !ops->is_mock_dev(cxlds->dev))
if (ops && ops->is_mock_dev(cxlds->dev))
rc = 0;
else
rc = cxl_hdm_decode_init(cxlds, cxlhdm);
put_cxl_mock_ops(index);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment