Commit b8cc56d0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'cxl-for-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull CXL (Compute Express Link) updates from Dan Williams:
 "The main new functionality this time is work to allow Linux to
  natively handle CXL link protocol errors signalled via PCIe AER for
  current generation CXL platforms. This required some enlightenment of
  the PCIe AER core to workaround the fact that current generation RCH
  (Restricted CXL Host) platforms physically hide topology details and
  registers via a mechanism called RCRB (Root Complex Register Block).

  The next major highlight is reworks to address bugs in parsing region
  configurations for next generation VH (Virtual Host) topologies. The
  old broken algorithm is replaced with a simpler one that significantly
  increases the number of region configurations supported by Linux. This
  is again relevant for error handling so that forward and reverse
  address translation of memory errors can be carried out by Linux for
  memory regions instantiated by platform firmware.

  As for other cross-tree work, the ACPI table parsing code has been
  refactored for reuse parsing the "CDAT" structure which is an
  ACPI-like data structure that is reported by CXL devices. That work is
  in preparation for v6.8 support for CXL QoS. Think of this as dynamic
  generation of NUMA node topology information generated by Linux rather
  than platform firmware.

  Lastly, a number of internal object lifetime issues have been resolved
  along with misc. fixes and feature updates (decoders_committed sysfs
  ABI).

  Summary:

   - Add support for RCH (Restricted CXL Host) Error recovery

   - Fix several region assembly bugs

   - Fix mem-device lifetime issues relative to the sanitize command and
     RCH topology.

   - Refactor ACPI table parsing for CDAT parsing re-use in preparation
     for CXL QOS support"

* tag 'cxl-for-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (50 commits)
  lib/fw_table: Remove acpi_parse_entries_array() export
  cxl/pci: Change CXL AER support check to use native AER
  cxl/hdm: Remove broken error path
  cxl/hdm: Fix && vs || bug
  acpi: Move common tables helper functions to common lib
  cxl: Add support for reading CXL switch CDAT table
  cxl: Add checksum verification to CDAT from CXL
  cxl: Export QTG ids from CFMWS to sysfs as qos_class attribute
  cxl: Add decoders_committed sysfs attribute to cxl_port
  cxl: Add cxl_decoders_committed() helper
  cxl/core/regs: Rework cxl_map_pmu_regs() to use map->dev for devm
  cxl/core/regs: Rename phys_addr in cxl_map_component_regs()
  PCI/AER: Unmask RCEC internal errors to enable RCH downstream port error handling
  PCI/AER: Forward RCH downstream port-detected errors to the CXL.mem dev handler
  cxl/pci: Disable root port interrupts in RCH mode
  cxl/pci: Add RCH downstream port error logging
  cxl/pci: Map RCH downstream AER registers for logging protocol errors
  cxl/pci: Update CXL error logging to use RAS register address
  PCI/AER: Refactor cper_print_aer() for use by CXL driver module
  cxl/pci: Add RCH downstream port AER register discovery
  ...
parents 5e2cb28d 4b928940
......@@ -178,6 +178,21 @@ Description:
hardware decoder target list.
What: /sys/bus/cxl/devices/portX/decoders_committed
Date: October, 2023
KernelVersion: v6.7
Contact: linux-cxl@vger.kernel.org
Description:
(RO) A memory device is considered active when any of its
decoders are in the "committed" state (See CXL 3.0 8.2.4.19.7
CXL HDM Decoder n Control Register). Hotplug and destructive
operations like "sanitize" are blocked while device is actively
decoding a Host Physical Address range. Note that this number
may be elevated without any regionX objects active or even
enumerated, as this may be due to decoders established by
platform firwmare or a previous kernel (kexec).
What: /sys/bus/cxl/devices/decoderX.Y
Date: June, 2021
KernelVersion: v5.14
......@@ -369,6 +384,21 @@ Description:
provided it is currently idle / not bound to a driver.
What: /sys/bus/cxl/devices/decoderX.Y/qos_class
Date: May, 2023
KernelVersion: v6.5
Contact: linux-cxl@vger.kernel.org
Description:
(RO) For CXL host platforms that support "QoS Telemmetry" this
root-decoder-only attribute conveys a platform specific cookie
that identifies a QoS performance class for the CXL Window.
This class-id can be compared against a similar "qos_class"
published for each memory-type that an endpoint supports. While
it is not required that endpoints map their local memory-class
to a matching platform class, mismatches are not recommended and
there are platform specific side-effects that may result.
What: /sys/bus/cxl/devices/regionZ/uuid
Date: May, 2022
KernelVersion: v6.0
......
......@@ -294,6 +294,8 @@ F: drivers/pnp/pnpacpi/
F: include/acpi/
F: include/linux/acpi.h
F: include/linux/fwnode.h
F: include/linux/fw_table.h
F: lib/fw_table.c
F: tools/power/acpi/
ACPI APEI
......@@ -5244,6 +5246,7 @@ L: linux-cxl@vger.kernel.org
S: Maintained
F: drivers/cxl/
F: include/uapi/linux/cxl_mem.h
F: tools/testing/cxl/
COMPUTE EXPRESS LINK PMU (CPMU)
M: Jonathan Cameron <jonathan.cameron@huawei.com>
......
......@@ -12,6 +12,7 @@ menuconfig ACPI
select PNP
select NLS
select CRC32
select FIRMWARE_TABLE
default y if X86
help
Advanced Configuration and Power Interface (ACPI) support for
......
......@@ -37,18 +37,6 @@ static struct acpi_table_desc initial_tables[ACPI_MAX_TABLES] __initdata;
static int acpi_apic_instance __initdata_or_acpilib;
enum acpi_subtable_type {
ACPI_SUBTABLE_COMMON,
ACPI_SUBTABLE_HMAT,
ACPI_SUBTABLE_PRMT,
ACPI_SUBTABLE_CEDT,
};
struct acpi_subtable_entry {
union acpi_subtable_headers *hdr;
enum acpi_subtable_type type;
};
/*
* Disable table checksum verification for the early stage due to the size
* limitation of the current x86 early mapping implementation.
......@@ -237,167 +225,6 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
}
}
static unsigned long __init_or_acpilib
acpi_get_entry_type(struct acpi_subtable_entry *entry)
{
switch (entry->type) {
case ACPI_SUBTABLE_COMMON:
return entry->hdr->common.type;
case ACPI_SUBTABLE_HMAT:
return entry->hdr->hmat.type;
case ACPI_SUBTABLE_PRMT:
return 0;
case ACPI_SUBTABLE_CEDT:
return entry->hdr->cedt.type;
}
return 0;
}
static unsigned long __init_or_acpilib
acpi_get_entry_length(struct acpi_subtable_entry *entry)
{
switch (entry->type) {
case ACPI_SUBTABLE_COMMON:
return entry->hdr->common.length;
case ACPI_SUBTABLE_HMAT:
return entry->hdr->hmat.length;
case ACPI_SUBTABLE_PRMT:
return entry->hdr->prmt.length;
case ACPI_SUBTABLE_CEDT:
return entry->hdr->cedt.length;
}
return 0;
}
static unsigned long __init_or_acpilib
acpi_get_subtable_header_length(struct acpi_subtable_entry *entry)
{
switch (entry->type) {
case ACPI_SUBTABLE_COMMON:
return sizeof(entry->hdr->common);
case ACPI_SUBTABLE_HMAT:
return sizeof(entry->hdr->hmat);
case ACPI_SUBTABLE_PRMT:
return sizeof(entry->hdr->prmt);
case ACPI_SUBTABLE_CEDT:
return sizeof(entry->hdr->cedt);
}
return 0;
}
static enum acpi_subtable_type __init_or_acpilib
acpi_get_subtable_type(char *id)
{
if (strncmp(id, ACPI_SIG_HMAT, 4) == 0)
return ACPI_SUBTABLE_HMAT;
if (strncmp(id, ACPI_SIG_PRMT, 4) == 0)
return ACPI_SUBTABLE_PRMT;
if (strncmp(id, ACPI_SIG_CEDT, 4) == 0)
return ACPI_SUBTABLE_CEDT;
return ACPI_SUBTABLE_COMMON;
}
static __init_or_acpilib bool has_handler(struct acpi_subtable_proc *proc)
{
return proc->handler || proc->handler_arg;
}
static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc,
union acpi_subtable_headers *hdr,
unsigned long end)
{
if (proc->handler)
return proc->handler(hdr, end);
if (proc->handler_arg)
return proc->handler_arg(hdr, proc->arg, end);
return -EINVAL;
}
/**
* acpi_parse_entries_array - for each proc_num find a suitable subtable
*
* @id: table id (for debugging purposes)
* @table_size: size of the root table
* @table_header: where does the table start?
* @proc: array of acpi_subtable_proc struct containing entry id
* and associated handler with it
* @proc_num: how big proc is?
* @max_entries: how many entries can we process?
*
* For each proc_num find a subtable with proc->id and run proc->handler
* on it. Assumption is that there's only single handler for particular
* entry id.
*
* The table_size is not the size of the complete ACPI table (the length
* field in the header struct), but only the size of the root table; i.e.,
* the offset from the very first byte of the complete ACPI table, to the
* first byte of the very first subtable.
*
* On success returns sum of all matching entries for all proc handlers.
* Otherwise, -ENODEV or -EINVAL is returned.
*/
static int __init_or_acpilib acpi_parse_entries_array(
char *id, unsigned long table_size,
struct acpi_table_header *table_header, struct acpi_subtable_proc *proc,
int proc_num, unsigned int max_entries)
{
struct acpi_subtable_entry entry;
unsigned long table_end, subtable_len, entry_len;
int count = 0;
int errs = 0;
int i;
table_end = (unsigned long)table_header + table_header->length;
/* Parse all entries looking for a match. */
entry.type = acpi_get_subtable_type(id);
entry.hdr = (union acpi_subtable_headers *)
((unsigned long)table_header + table_size);
subtable_len = acpi_get_subtable_header_length(&entry);
while (((unsigned long)entry.hdr) + subtable_len < table_end) {
if (max_entries && count >= max_entries)
break;
for (i = 0; i < proc_num; i++) {
if (acpi_get_entry_type(&entry) != proc[i].id)
continue;
if (!has_handler(&proc[i]) ||
(!errs &&
call_handler(&proc[i], entry.hdr, table_end))) {
errs++;
continue;
}
proc[i].count++;
break;
}
if (i != proc_num)
count++;
/*
* If entry->length is 0, break from this loop to avoid
* infinite loop.
*/
entry_len = acpi_get_entry_length(&entry);
if (entry_len == 0) {
pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, proc->id);
return -EINVAL;
}
entry.hdr = (union acpi_subtable_headers *)
((unsigned long)entry.hdr + entry_len);
}
if (max_entries && count > max_entries) {
pr_warn("[%4.4s:0x%02x] found the maximum %i entries\n",
id, proc->id, count);
}
return errs ? -EINVAL : count;
}
int __init_or_acpilib acpi_table_parse_entries_array(
char *id, unsigned long table_size, struct acpi_subtable_proc *proc,
int proc_num, unsigned int max_entries)
......
......@@ -289,6 +289,9 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
}
}
}
cxlrd->qos_class = cfmws->qtg_id;
rc = cxl_decoder_add(cxld, target_map);
err_xormap:
if (rc)
......
......@@ -73,8 +73,10 @@ struct cxl_rcrb_info;
resource_size_t __rcrb_to_component(struct device *dev,
struct cxl_rcrb_info *ri,
enum cxl_rcrb which);
u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb);
extern struct rw_semaphore cxl_dpa_rwsem;
extern struct rw_semaphore cxl_region_rwsem;
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
......
......@@ -81,26 +81,6 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
cxlhdm->interleave_mask |= GENMASK(14, 12);
}
static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
struct cxl_component_regs *regs)
{
struct cxl_register_map map = {
.dev = &port->dev,
.resource = port->component_reg_phys,
.base = crb,
.max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
};
cxl_probe_component_regs(&port->dev, crb, &map.component_map);
if (!map.component_map.hdm_decoder.valid) {
dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
/* unique error code to indicate no HDM decoder capability */
return -ENODEV;
}
return cxl_map_component_regs(&map, regs, BIT(CXL_CM_CAP_CAP_ID_HDM));
}
static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
{
struct cxl_hdm *cxlhdm;
......@@ -153,9 +133,9 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info)
{
struct cxl_register_map *reg_map = &port->reg_map;
struct device *dev = &port->dev;
struct cxl_hdm *cxlhdm;
void __iomem *crb;
int rc;
cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
......@@ -164,19 +144,29 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
cxlhdm->port = port;
dev_set_drvdata(dev, cxlhdm);
crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
if (!crb && info && info->mem_enabled) {
cxlhdm->decoder_count = info->ranges;
return cxlhdm;
} else if (!crb) {
/* Memory devices can configure device HDM using DVSEC range regs. */
if (reg_map->resource == CXL_RESOURCE_NONE) {
if (!info || !info->mem_enabled) {
dev_err(dev, "No component registers mapped\n");
return ERR_PTR(-ENXIO);
}
rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs);
iounmap(crb);
if (rc)
cxlhdm->decoder_count = info->ranges;
return cxlhdm;
}
if (!reg_map->component_map.hdm_decoder.valid) {
dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
/* unique error code to indicate no HDM decoder capability */
return ERR_PTR(-ENODEV);
}
rc = cxl_map_component_regs(reg_map, &cxlhdm->regs,
BIT(CXL_CM_CAP_CAP_ID_HDM));
if (rc) {
dev_err(dev, "Failed to map HDM capability.\n");
return ERR_PTR(rc);
}
parse_hdm_decoder_caps(cxlhdm);
if (cxlhdm->decoder_count == 0) {
......@@ -575,17 +565,11 @@ static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl)
CXL_HDM_DECODER0_CTRL_HOSTONLY);
}
static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt)
static void cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt)
{
struct cxl_dport **t = &cxlsd->target[0];
int ways = cxlsd->cxld.interleave_ways;
if (dev_WARN_ONCE(&cxlsd->cxld.dev,
ways > 8 || ways > cxlsd->nr_targets,
"ways: %d overflows targets: %d\n", ways,
cxlsd->nr_targets))
return -ENXIO;
*tgt = FIELD_PREP(GENMASK(7, 0), t[0]->port_id);
if (ways > 1)
*tgt |= FIELD_PREP(GENMASK(15, 8), t[1]->port_id);
......@@ -601,8 +585,6 @@ static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt)
*tgt |= FIELD_PREP(GENMASK_ULL(55, 48), t[6]->port_id);
if (ways > 7)
*tgt |= FIELD_PREP(GENMASK_ULL(63, 56), t[7]->port_id);
return 0;
}
/*
......@@ -643,13 +625,33 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld)
if (cxld->flags & CXL_DECODER_F_ENABLE)
return 0;
if (port->commit_end + 1 != id) {
if (cxl_num_decoders_committed(port) != id) {
dev_dbg(&port->dev,
"%s: out of order commit, expected decoder%d.%d\n",
dev_name(&cxld->dev), port->id, port->commit_end + 1);
dev_name(&cxld->dev), port->id,
cxl_num_decoders_committed(port));
return -EBUSY;
}
/*
* For endpoint decoders hosted on CXL memory devices that
* support the sanitize operation, make sure sanitize is not in-flight.
*/
if (is_endpoint_decoder(&cxld->dev)) {
struct cxl_endpoint_decoder *cxled =
to_cxl_endpoint_decoder(&cxld->dev);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_memdev_state *mds =
to_cxl_memdev_state(cxlmd->cxlds);
if (mds && mds->security.sanitize_active) {
dev_dbg(&cxlmd->dev,
"attempted to commit %s during sanitize\n",
dev_name(&cxld->dev));
return -EBUSY;
}
}
down_read(&cxl_dpa_rwsem);
/* common decoder settings */
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id));
......@@ -670,13 +672,7 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld)
void __iomem *tl_lo = hdm + CXL_HDM_DECODER0_TL_LOW(id);
u64 targets;
rc = cxlsd_set_targets(cxlsd, &targets);
if (rc) {
dev_dbg(&port->dev, "%s: target configuration error\n",
dev_name(&cxld->dev));
goto err;
}
cxlsd_set_targets(cxlsd, &targets);
writel(upper_32_bits(targets), tl_hi);
writel(lower_32_bits(targets), tl_lo);
} else {
......@@ -694,7 +690,6 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld)
port->commit_end++;
rc = cxld_await_commit(hdm, cxld->id);
err:
if (rc) {
dev_dbg(&port->dev, "%s: error %d committing decoder\n",
dev_name(&cxld->dev), rc);
......@@ -844,7 +839,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
else
cxld->target_type = CXL_DECODER_DEVMEM;
if (cxld->id != port->commit_end + 1) {
if (cxld->id != cxl_num_decoders_committed(port)) {
dev_warn(&port->dev,
"decoder%d.%d: Committed out of order\n",
port->id, cxld->id);
......
......@@ -1125,20 +1125,7 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds)
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
/**
* cxl_mem_sanitize() - Send a sanitization command to the device.
* @mds: The device data for the operation
* @cmd: The specific sanitization command opcode
*
* Return: 0 if the command was executed successfully, regardless of
* whether or not the actual security operation is done in the background,
* such as for the Sanitize case.
* Error return values can be the result of the mailbox command, -EINVAL
* when security requirements are not met or invalid contexts.
*
* See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase.
*/
int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd)
static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd)
{
int rc;
u32 sec_out = 0;
......@@ -1183,7 +1170,45 @@ int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd)
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_sanitize, CXL);
/**
* cxl_mem_sanitize() - Send a sanitization command to the device.
* @cxlmd: The device for the operation
* @cmd: The specific sanitization command opcode
*
* Return: 0 if the command was executed successfully, regardless of
* whether or not the actual security operation is done in the background,
* such as for the Sanitize case.
* Error return values can be the result of the mailbox command, -EINVAL
* when security requirements are not met or invalid contexts, or -EBUSY
* if the sanitize operation is already in flight.
*
* See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase.
*/
int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
{
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_port *endpoint;
int rc;
/* synchronize with cxl_mem_probe() and decoder write operations */
device_lock(&cxlmd->dev);
endpoint = cxlmd->endpoint;
down_read(&cxl_region_rwsem);
/*
* Require an endpoint to be safe otherwise the driver can not
* be sure that the device is unmapped.
*/
if (endpoint && cxl_num_decoders_committed(endpoint) == 0)
rc = __cxl_mem_sanitize(mds, cmd);
else
rc = -EBUSY;
up_read(&cxl_region_rwsem);
device_unlock(&cxlmd->dev);
return rc;
}
static int add_dpa_res(struct device *dev, struct resource *parent,
struct resource *res, resource_size_t start,
......@@ -1224,8 +1249,7 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds)
return 0;
}
cxlds->dpa_res =
(struct resource)DEFINE_RES_MEM(0, mds->total_bytes);
cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes);
if (mds->partition_align_bytes == 0) {
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
......@@ -1377,6 +1401,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mutex_init(&mds->mbox_mutex);
mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev;
mds->cxlds.reg_map.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
return mds;
......
......@@ -125,13 +125,16 @@ static ssize_t security_state_show(struct device *dev,
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
u64 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
u32 pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg);
u16 cmd = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
unsigned long state = mds->security.state;
int rc = 0;
if (cmd == CXL_MBOX_OP_SANITIZE && pct != 100)
return sysfs_emit(buf, "sanitize\n");
/* sync with latest submission state */
mutex_lock(&mds->mbox_mutex);
if (mds->security.sanitize_active)
rc = sysfs_emit(buf, "sanitize\n");
mutex_unlock(&mds->mbox_mutex);
if (rc)
return rc;
if (!(state & CXL_PMEM_SEC_STATE_USER_PASS_SET))
return sysfs_emit(buf, "disabled\n");
......@@ -152,24 +155,17 @@ static ssize_t security_sanitize_store(struct device *dev,
const char *buf, size_t len)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_port *port = cxlmd->endpoint;
bool sanitize;
ssize_t rc;
if (kstrtobool(buf, &sanitize) || !sanitize)
return -EINVAL;
if (!port || !is_cxl_endpoint(port))
return -EINVAL;
/* ensure no regions are mapped to this memdev */
if (port->commit_end != -1)
return -EBUSY;
rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SANITIZE);
rc = cxl_mem_sanitize(cxlmd, CXL_MBOX_OP_SANITIZE);
if (rc)
return rc;
return rc ? rc : len;
return len;
}
static struct device_attribute dev_attr_security_sanitize =
__ATTR(sanitize, 0200, NULL, security_sanitize_store);
......@@ -179,24 +175,17 @@ static ssize_t security_erase_store(struct device *dev,
const char *buf, size_t len)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_port *port = cxlmd->endpoint;
ssize_t rc;
bool erase;
if (kstrtobool(buf, &erase) || !erase)
return -EINVAL;
if (!port || !is_cxl_endpoint(port))
return -EINVAL;
/* ensure no regions are mapped to this memdev */
if (port->commit_end != -1)
return -EBUSY;
rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SECURE_ERASE);
rc = cxl_mem_sanitize(cxlmd, CXL_MBOX_OP_SECURE_ERASE);
if (rc)
return rc;
return rc ? rc : len;
return len;
}
static struct device_attribute dev_attr_security_erase =
__ATTR(erase, 0200, NULL, security_erase_store);
......@@ -242,7 +231,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
if (rc)
return rc;
if (port->commit_end == -1) {
if (cxl_num_decoders_committed(port) == 0) {
/* No regions mapped to this memdev */
rc = cxl_get_poison_by_memdev(cxlmd);
} else {
......@@ -293,7 +282,7 @@ static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
.dpa = dpa,
};
port = cxlmd->endpoint;
if (port && is_cxl_endpoint(port) && port->commit_end != -1)
if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port))
device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
return ctx.cxlr;
......@@ -556,21 +545,11 @@ void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
}
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
static void cxl_memdev_security_shutdown(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
if (mds->security.poll)
cancel_delayed_work_sync(&mds->security.poll_dwork);
}
static void cxl_memdev_shutdown(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
down_write(&cxl_memdev_rwsem);
cxl_memdev_security_shutdown(dev);
cxlmd->cxlds = NULL;
up_write(&cxl_memdev_rwsem);
}
......@@ -580,8 +559,8 @@ static void cxl_memdev_unregister(void *_cxlmd)
struct cxl_memdev *cxlmd = _cxlmd;
struct device *dev = &cxlmd->dev;
cxl_memdev_shutdown(dev);
cdev_device_del(&cxlmd->cdev, dev);
cxl_memdev_shutdown(dev);
put_device(dev);
}
......@@ -961,17 +940,16 @@ static const struct fw_upload_ops cxl_memdev_fw_ops = {
.cleanup = cxl_fw_cleanup,
};
static void devm_cxl_remove_fw_upload(void *fwl)
static void cxl_remove_fw_upload(void *fwl)
{
firmware_upload_unregister(fwl);
}
int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds)
int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
struct device *dev = &cxlds->cxlmd->dev;
struct fw_upload *fwl;
int rc;
if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds))
return 0;
......@@ -979,19 +957,10 @@ int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds)
fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev),
&cxl_memdev_fw_ops, mds);
if (IS_ERR(fwl))
return dev_err_probe(dev, PTR_ERR(fwl),
"Failed to register firmware loader\n");
rc = devm_add_action_or_reset(cxlds->dev, devm_cxl_remove_fw_upload,
fwl);
if (rc)
dev_err(dev,
"Failed to add firmware loader remove action: %d\n",
rc);
return rc;
return PTR_ERR(fwl);
return devm_add_action_or_reset(host, cxl_remove_fw_upload, fwl);
}
EXPORT_SYMBOL_NS_GPL(cxl_memdev_setup_fw_upload, CXL);
EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fw_upload, CXL);
static const struct file_operations cxl_memdev_fops = {
.owner = THIS_MODULE,
......@@ -1002,36 +971,8 @@ static const struct file_operations cxl_memdev_fops = {
.llseek = noop_llseek,
};
static void put_sanitize(void *data)
{
struct cxl_memdev_state *mds = data;
sysfs_put(mds->security.sanitize_node);
}
static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct device *dev = &cxlmd->dev;
struct kernfs_node *sec;
sec = sysfs_get_dirent(dev->kobj.sd, "security");
if (!sec) {
dev_err(dev, "sysfs_get_dirent 'security' failed\n");
return -ENODEV;
}
mds->security.sanitize_node = sysfs_get_dirent(sec, "state");
sysfs_put(sec);
if (!mds->security.sanitize_node) {
dev_err(dev, "sysfs_get_dirent 'state' failed\n");
return -ENODEV;
}
return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds);
}
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
struct cxl_dev_state *cxlds)
{
struct cxl_memdev *cxlmd;
struct device *dev;
......@@ -1059,11 +1000,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
if (rc)
goto err;
rc = cxl_memdev_security_init(cxlmd);
if (rc)
goto err;
rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
if (rc)
return ERR_PTR(rc);
return cxlmd;
......@@ -1079,6 +1016,50 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
static void sanitize_teardown_notifier(void *data)
{
struct cxl_memdev_state *mds = data;
struct kernfs_node *state;
/*
* Prevent new irq triggered invocations of the workqueue and
* flush inflight invocations.
*/
mutex_lock(&mds->mbox_mutex);
state = mds->security.sanitize_node;
mds->security.sanitize_node = NULL;
mutex_unlock(&mds->mbox_mutex);
cancel_delayed_work_sync(&mds->security.poll_dwork);
sysfs_put(state);
}
int devm_cxl_sanitize_setup_notifier(struct device *host,
struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct kernfs_node *sec;
if (!test_bit(CXL_SEC_ENABLED_SANITIZE, mds->security.enabled_cmds))
return 0;
/*
* Note, the expectation is that @cxlmd would have failed to be
* created if these sysfs_get_dirent calls fail.
*/
sec = sysfs_get_dirent(cxlmd->dev.kobj.sd, "security");
if (!sec)
return -ENOENT;
mds->security.sanitize_node = sysfs_get_dirent(sec, "state");
sysfs_put(sec);
if (!mds->security.sanitize_node)
return -ENOENT;
return devm_add_action_or_reset(host, sanitize_teardown_notifier, mds);
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_sanitize_setup_notifier, CXL);
__init int cxl_memdev_init(void)
{
dev_t devt;
......
......@@ -5,6 +5,7 @@
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/aer.h>
#include <cxlpci.h>
#include <cxlmem.h>
#include <cxl.h>
......@@ -595,6 +596,16 @@ static int cxl_cdat_read_table(struct device *dev,
return 0;
}
static unsigned char cdat_checksum(void *buf, size_t size)
{
unsigned char sum, *data = buf;
size_t i;
for (sum = 0, i = 0; i < size; i++)
sum += data[i];
return sum;
}
/**
* read_cdat_data - Read the CDAT data on this port
* @port: Port to read data from
......@@ -603,18 +614,30 @@ static int cxl_cdat_read_table(struct device *dev,
*/
void read_cdat_data(struct cxl_port *port)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
struct device *host = cxlmd->dev.parent;
struct device *uport = port->uport_dev;
struct device *dev = &port->dev;
struct pci_doe_mb *cdat_doe;
struct pci_dev *pdev = NULL;
struct cxl_memdev *cxlmd;
size_t cdat_length;
void *cdat_table;
int rc;
if (!dev_is_pci(host))
if (is_cxl_memdev(uport)) {
struct device *host;
cxlmd = to_cxl_memdev(uport);
host = cxlmd->dev.parent;
if (dev_is_pci(host))
pdev = to_pci_dev(host);
} else if (dev_is_pci(uport)) {
pdev = to_pci_dev(uport);
}
if (!pdev)
return;
cdat_doe = pci_find_doe_mailbox(to_pci_dev(host),
PCI_DVSEC_VENDOR_ID_CXL,
cdat_doe = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL,
CXL_DOE_PROTOCOL_TABLE_ACCESS);
if (!cdat_doe) {
dev_dbg(dev, "No CDAT mailbox\n");
......@@ -634,44 +657,54 @@ void read_cdat_data(struct cxl_port *port)
return;
rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length);
if (rc) {
/* Don't leave table data allocated on error */
devm_kfree(dev, cdat_table);
dev_err(dev, "CDAT data read error\n");
return;
}
if (rc)
goto err;
port->cdat.table = cdat_table + sizeof(__le32);
cdat_table = cdat_table + sizeof(__le32);
if (cdat_checksum(cdat_table, cdat_length))
goto err;
port->cdat.table = cdat_table;
port->cdat.length = cdat_length;
return;
err:
/* Don't leave table data allocated on error */
devm_kfree(dev, cdat_table);
dev_err(dev, "Failed to read/validate CDAT.\n");
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
void cxl_cor_error_detected(struct pci_dev *pdev)
static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
void __iomem *ras_base)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
void __iomem *addr;
u32 status;
if (!cxlds->regs.ras)
if (!ras_base)
return;
addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
status = readl(addr);
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
}
}
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
{
return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
}
/* CXL spec rev3.0 8.2.4.16.1 */
static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
static void header_log_copy(void __iomem *ras_base, u32 *log)
{
void __iomem *addr;
u32 *log_addr;
int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET;
log_addr = log;
for (i = 0; i < log_u32_size; i++) {
......@@ -685,17 +718,18 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
* Log the state of the RAS status registers and prepare them to log the
* next error status. Return 1 if reset needed.
*/
static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
void __iomem *ras_base)
{
u32 hl[CXL_HEADERLOG_SIZE_U32];
void __iomem *addr;
u32 status;
u32 fe;
if (!cxlds->regs.ras)
if (!ras_base)
return false;
addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
status = readl(addr);
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
return false;
......@@ -703,7 +737,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
/* If multiple errors, log header points to first error from ctrl reg */
if (hweight32(status) > 1) {
void __iomem *rcc_addr =
cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
ras_base + CXL_RAS_CAP_CONTROL_OFFSET;
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
readl(rcc_addr)));
......@@ -711,13 +745,201 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
fe = status;
}
header_log_copy(cxlds, hl);
header_log_copy(ras_base, hl);
trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
return true;
}
static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
{
return __cxl_handle_ras(cxlds, cxlds->regs.ras);
}
#ifdef CONFIG_PCIEAER_CXL
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
{
struct cxl_rcrb_info *ri = &dport->rcrb;
void __iomem *dport_aer = NULL;
resource_size_t aer_phys;
struct device *host;
if (dport->rch && ri->aer_cap) {
host = dport->reg_map.host;
aer_phys = ri->aer_cap + ri->base;
dport_aer = devm_cxl_iomap_block(host, aer_phys,
sizeof(struct aer_capability_regs));
}
dport->regs.dport_aer = dport_aer;
}
static void cxl_dport_map_regs(struct cxl_dport *dport)
{
struct cxl_register_map *map = &dport->reg_map;
struct device *dev = dport->dport_dev;
if (!map->component_map.ras.valid)
dev_dbg(dev, "RAS registers not found\n");
else if (cxl_map_component_regs(map, &dport->regs.component,
BIT(CXL_CM_CAP_CAP_ID_RAS)))
dev_dbg(dev, "Failed to map RAS capability.\n");
if (dport->rch)
cxl_dport_map_rch_aer(dport);
}
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
{
void __iomem *aer_base = dport->regs.dport_aer;
struct pci_host_bridge *bridge;
u32 aer_cmd_mask, aer_cmd;
if (!aer_base)
return;
bridge = to_pci_host_bridge(dport->dport_dev);
/*
* Disable RCH root port command interrupts.
* CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
*
* This sequence may not be necessary. CXL spec states disabling
* the root cmd register's interrupts is required. But, PCI spec
* shows these are disabled by default on reset.
*/
if (bridge->native_aer) {
aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
PCI_ERR_ROOT_CMD_NONFATAL_EN |
PCI_ERR_ROOT_CMD_FATAL_EN);
aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
aer_cmd &= ~aer_cmd_mask;
writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
}
}
void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
{
struct device *dport_dev = dport->dport_dev;
struct pci_host_bridge *host_bridge;
host_bridge = to_pci_host_bridge(dport_dev);
if (host_bridge->native_aer)
dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base);
dport->reg_map.host = host;
cxl_dport_map_regs(dport);
if (dport->rch)
cxl_disable_rch_root_ints(dport);
}
EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL);
static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
struct cxl_dport *dport)
{
return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
}
static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
struct cxl_dport *dport)
{
return __cxl_handle_ras(cxlds, dport->regs.ras);
}
/*
* Copy the AER capability registers using 32 bit read accesses.
* This is necessary because RCRB AER capability is MMIO mapped. Clear the
* status after copying.
*
* @aer_base: base address of AER capability block in RCRB
* @aer_regs: destination for copying AER capability
*/
static bool cxl_rch_get_aer_info(void __iomem *aer_base,
struct aer_capability_regs *aer_regs)
{
int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
u32 *aer_regs_buf = (u32 *)aer_regs;
int n;
if (!aer_base)
return false;
/* Use readl() to guarantee 32-bit accesses */
for (n = 0; n < read_cnt; n++)
aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
return true;
}
/* Get AER severity. Return false if there is no error. */
static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
int *severity)
{
if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
*severity = AER_FATAL;
else
*severity = AER_NONFATAL;
return true;
}
if (aer_regs->cor_status & ~aer_regs->cor_mask) {
*severity = AER_CORRECTABLE;
return true;
}
return false;
}
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
{
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
struct aer_capability_regs aer_regs;
struct cxl_dport *dport;
struct cxl_port *port;
int severity;
port = cxl_pci_find_port(pdev, &dport);
if (!port)
return;
put_device(&port->dev);
if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
return;
if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
return;
pci_print_aer(pdev, severity, &aer_regs);
if (severity == AER_CORRECTABLE)
cxl_handle_rdport_cor_ras(cxlds, dport);
else
cxl_handle_rdport_ras(cxlds, dport);
}
#else
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
#endif
void cxl_cor_error_detected(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
if (cxlds->rcd)
cxl_handle_rdport_errors(cxlds);
cxl_handle_endpoint_cor_ras(cxlds);
}
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
......@@ -726,13 +948,16 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
struct device *dev = &cxlmd->dev;
bool ue;
if (cxlds->rcd)
cxl_handle_rdport_errors(cxlds);
/*
* A frozen channel indicates an impending reset which is fatal to
* CXL.mem operation, and will likely crash the system. On the off
* chance the situation is recoverable dump the status of the RAS
* capability registers and bounce the active state of the memdev.
*/
ue = cxl_report_and_clear(cxlds);
ue = cxl_handle_endpoint_ras(cxlds);
switch (state) {
case pci_channel_io_normal:
......
......@@ -28,9 +28,22 @@
* instantiated by the core.
*/
/*
* All changes to the interleave configuration occur with this lock held
* for write.
*/
DECLARE_RWSEM(cxl_region_rwsem);
static DEFINE_IDA(cxl_port_ida);
static DEFINE_XARRAY(cxl_root_buses);
int cxl_num_decoders_committed(struct cxl_port *port)
{
lockdep_assert_held(&cxl_region_rwsem);
return port->commit_end + 1;
}
static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
......@@ -278,6 +291,15 @@ static ssize_t interleave_ways_show(struct device *dev,
static DEVICE_ATTR_RO(interleave_ways);
static ssize_t qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
return sysfs_emit(buf, "%d\n", cxlrd->qos_class);
}
static DEVICE_ATTR_RO(qos_class);
static struct attribute *cxl_decoder_base_attrs[] = {
&dev_attr_start.attr,
&dev_attr_size.attr,
......@@ -297,6 +319,7 @@ static struct attribute *cxl_decoder_root_attrs[] = {
&dev_attr_cap_type2.attr,
&dev_attr_cap_type3.attr,
&dev_attr_target_list.attr,
&dev_attr_qos_class.attr,
SET_CXL_REGION_ATTR(create_pmem_region)
SET_CXL_REGION_ATTR(create_ram_region)
SET_CXL_REGION_ATTR(delete_region)
......@@ -521,8 +544,33 @@ static void cxl_port_release(struct device *dev)
kfree(port);
}
static ssize_t decoders_committed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_port *port = to_cxl_port(dev);
int rc;
down_read(&cxl_region_rwsem);
rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port));
up_read(&cxl_region_rwsem);
return rc;
}
static DEVICE_ATTR_RO(decoders_committed);
static struct attribute *cxl_port_attrs[] = {
&dev_attr_decoders_committed.attr,
NULL,
};
static struct attribute_group cxl_port_attribute_group = {
.attrs = cxl_port_attrs,
};
static const struct attribute_group *cxl_port_attribute_groups[] = {
&cxl_base_attribute_group,
&cxl_port_attribute_group,
NULL,
};
......@@ -619,7 +667,6 @@ static int devm_cxl_link_parent_dport(struct device *host,
static struct lock_class_key cxl_port_key;
static struct cxl_port *cxl_port_alloc(struct device *uport_dev,
resource_size_t component_reg_phys,
struct cxl_dport *parent_dport)
{
struct cxl_port *port;
......@@ -670,7 +717,6 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev,
} else
dev->parent = uport_dev;
port->component_reg_phys = component_reg_phys;
ida_init(&port->decoder_ida);
port->hdm_end = -1;
port->commit_end = -1;
......@@ -691,19 +737,21 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev,
return ERR_PTR(rc);
}
static int cxl_setup_comp_regs(struct device *dev, struct cxl_register_map *map,
static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map,
resource_size_t component_reg_phys)
{
if (component_reg_phys == CXL_RESOURCE_NONE)
return 0;
*map = (struct cxl_register_map) {
.dev = dev,
.reg_type = CXL_REGLOC_RBI_COMPONENT,
.host = host,
.reg_type = CXL_REGLOC_RBI_EMPTY,
.resource = component_reg_phys,
.max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
};
if (component_reg_phys == CXL_RESOURCE_NONE)
return 0;
map->reg_type = CXL_REGLOC_RBI_COMPONENT;
map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE;
return cxl_setup_regs(map);
}
......@@ -712,17 +760,27 @@ static int cxl_port_setup_regs(struct cxl_port *port,
{
if (dev_is_platform(port->uport_dev))
return 0;
return cxl_setup_comp_regs(&port->dev, &port->comp_map,
return cxl_setup_comp_regs(&port->dev, &port->reg_map,
component_reg_phys);
}
static int cxl_dport_setup_regs(struct cxl_dport *dport,
static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
resource_size_t component_reg_phys)
{
int rc;
if (dev_is_platform(dport->dport_dev))
return 0;
return cxl_setup_comp_regs(dport->dport_dev, &dport->comp_map,
/*
* use @dport->dport_dev for the context for error messages during
* register probing, and fixup @host after the fact, since @host may be
* NULL.
*/
rc = cxl_setup_comp_regs(dport->dport_dev, &dport->reg_map,
component_reg_phys);
dport->reg_map.host = host;
return rc;
}
static struct cxl_port *__devm_cxl_add_port(struct device *host,
......@@ -734,23 +792,38 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host,
struct device *dev;
int rc;
port = cxl_port_alloc(uport_dev, component_reg_phys, parent_dport);
port = cxl_port_alloc(uport_dev, parent_dport);
if (IS_ERR(port))
return port;
dev = &port->dev;
if (is_cxl_memdev(uport_dev))
if (is_cxl_memdev(uport_dev)) {
struct cxl_memdev *cxlmd = to_cxl_memdev(uport_dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
rc = dev_set_name(dev, "endpoint%d", port->id);
else if (parent_dport)
if (rc)
goto err;
/*
* The endpoint driver already enumerated the component and RAS
* registers. Reuse that enumeration while prepping them to be
* mapped by the cxl_port driver.
*/
port->reg_map = cxlds->reg_map;
port->reg_map.host = &port->dev;
} else if (parent_dport) {
rc = dev_set_name(dev, "port%d", port->id);
else
rc = dev_set_name(dev, "root%d", port->id);
if (rc)
goto err;
rc = cxl_port_setup_regs(port, component_reg_phys);
if (rc)
goto err;
} else
rc = dev_set_name(dev, "root%d", port->id);
if (rc)
goto err;
rc = device_add(dev);
if (rc)
......@@ -983,7 +1056,16 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
if (!dport)
return ERR_PTR(-ENOMEM);
if (rcrb != CXL_RESOURCE_NONE) {
dport->dport_dev = dport_dev;
dport->port_id = port_id;
dport->port = port;
if (rcrb == CXL_RESOURCE_NONE) {
rc = cxl_dport_setup_regs(&port->dev, dport,
component_reg_phys);
if (rc)
return ERR_PTR(rc);
} else {
dport->rcrb.base = rcrb;
component_reg_phys = __rcrb_to_component(dport_dev, &dport->rcrb,
CXL_RCRB_DOWNSTREAM);
......@@ -992,6 +1074,14 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
return ERR_PTR(-ENXIO);
}
/*
* RCH @dport is not ready to map until associated with its
* memdev
*/
rc = cxl_dport_setup_regs(NULL, dport, component_reg_phys);
if (rc)
return ERR_PTR(rc);
dport->rch = true;
}
......@@ -999,14 +1089,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
dev_dbg(dport_dev, "Component Registers found for dport: %pa\n",
&component_reg_phys);
dport->dport_dev = dport_dev;
dport->port_id = port_id;
dport->port = port;
rc = cxl_dport_setup_regs(dport, component_reg_phys);
if (rc)
return ERR_PTR(rc);
cond_cxl_root_lock(port);
rc = add_dport(port, dport);
cond_cxl_root_unlock(port);
......@@ -1217,35 +1299,39 @@ static struct device *grandparent(struct device *dev)
return NULL;
}
static struct device *endpoint_host(struct cxl_port *endpoint)
{
struct cxl_port *port = to_cxl_port(endpoint->dev.parent);
if (is_cxl_root(port))
return port->uport_dev;
return &port->dev;
}
static void delete_endpoint(void *data)
{
struct cxl_memdev *cxlmd = data;
struct cxl_port *endpoint = cxlmd->endpoint;
struct cxl_port *parent_port;
struct device *parent;
struct device *host = endpoint_host(endpoint);
parent_port = cxl_mem_find_port(cxlmd, NULL);
if (!parent_port)
goto out;
parent = &parent_port->dev;
device_lock(parent);
if (parent->driver && !endpoint->dead) {
devm_release_action(parent, cxl_unlink_parent_dport, endpoint);
devm_release_action(parent, cxl_unlink_uport, endpoint);
devm_release_action(parent, unregister_port, endpoint);
device_lock(host);
if (host->driver && !endpoint->dead) {
devm_release_action(host, cxl_unlink_parent_dport, endpoint);
devm_release_action(host, cxl_unlink_uport, endpoint);
devm_release_action(host, unregister_port, endpoint);
}
cxlmd->endpoint = NULL;
device_unlock(parent);
put_device(parent);
out:
device_unlock(host);
put_device(&endpoint->dev);
put_device(host);
}
int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
{
struct device *host = endpoint_host(endpoint);
struct device *dev = &cxlmd->dev;
get_device(host);
get_device(&endpoint->dev);
cxlmd->endpoint = endpoint;
cxlmd->depth = endpoint->depth;
......@@ -1468,7 +1554,11 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
struct cxl_dport *dport;
struct cxl_port *port;
if (!dport_dev)
/*
* The terminal "grandparent" in PCI is NULL and @platform_bus
* for platform devices
*/
if (!dport_dev || dport_dev == &platform_bus)
return 0;
uport_dev = dport_dev->parent;
......@@ -1691,6 +1781,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
}
atomic_set(&cxlrd->region_id, rc);
cxlrd->qos_class = CXL_QOS_CLASS_INVALID;
return cxlrd;
}
EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL);
......@@ -2062,3 +2153,4 @@ static void cxl_core_exit(void)
subsys_initcall(cxl_core_init);
module_exit(cxl_core_exit);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
......@@ -28,12 +28,6 @@
* 3. Decoder targets
*/
/*
* All changes to the interleave configuration occur with this lock held
* for write.
*/
static DECLARE_RWSEM(cxl_region_rwsem);
static struct cxl_region *to_cxl_region(struct device *dev);
static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
......@@ -129,7 +123,7 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
{
if (!cpu_cache_has_invalidate_memregion()) {
if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
dev_warn_once(
dev_info_once(
&cxlr->dev,
"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
return 0;
......@@ -294,7 +288,7 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
*/
rc = cxl_region_invalidate_memregion(cxlr);
if (rc)
return rc;
goto out;
if (commit) {
rc = cxl_region_decode_commit(cxlr);
......@@ -1133,7 +1127,14 @@ static int cxl_port_setup_targets(struct cxl_port *port,
}
if (is_cxl_root(parent_port)) {
parent_ig = cxlrd->cxlsd.cxld.interleave_granularity;
/*
* Root decoder IG is always set to value in CFMWS which
* may be different than this region's IG. We can use the
* region's IG here since interleave_granularity_store()
* does not allow interleaved host-bridges with
* root IG != region IG.
*/
parent_ig = p->interleave_granularity;
parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
/*
* For purposes of address bit routing, use power-of-2 math for
......@@ -1195,6 +1196,14 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return rc;
}
if (iw > 8 || iw > cxlsd->nr_targets) {
dev_dbg(&cxlr->dev,
"%s:%s:%s: ways: %d overflows targets: %d\n",
dev_name(port->uport_dev), dev_name(&port->dev),
dev_name(&cxld->dev), iw, cxlsd->nr_targets);
return -ENXIO;
}
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
if (cxld->interleave_ways != iw ||
cxld->interleave_granularity != ig ||
......@@ -1480,6 +1489,14 @@ static int cxl_region_attach_auto(struct cxl_region *cxlr,
return 0;
}
static int cmp_interleave_pos(const void *a, const void *b)
{
struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
return cxled_a->pos - cxled_b->pos;
}
static struct cxl_port *next_port(struct cxl_port *port)
{
if (!port->parent_dport)
......@@ -1487,119 +1504,127 @@ static struct cxl_port *next_port(struct cxl_port *port)
return port->parent_dport->port;
}
static int decoder_match_range(struct device *dev, void *data)
static int match_switch_decoder_by_range(struct device *dev, void *data)
{
struct cxl_endpoint_decoder *cxled = data;
struct cxl_switch_decoder *cxlsd;
struct range *r1, *r2 = data;
if (!is_switch_decoder(dev))
return 0;
cxlsd = to_cxl_switch_decoder(dev);
return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range);
}
static void find_positions(const struct cxl_switch_decoder *cxlsd,
const struct cxl_port *iter_a,
const struct cxl_port *iter_b, int *a_pos,
int *b_pos)
{
int i;
r1 = &cxlsd->cxld.hpa_range;
for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) {
if (cxlsd->target[i] == iter_a->parent_dport)
*a_pos = i;
else if (cxlsd->target[i] == iter_b->parent_dport)
*b_pos = i;
if (*a_pos >= 0 && *b_pos >= 0)
break;
}
if (is_root_decoder(dev))
return range_contains(r1, r2);
return (r1->start == r2->start && r1->end == r2->end);
}
static int cmp_decode_pos(const void *a, const void *b)
static int find_pos_and_ways(struct cxl_port *port, struct range *range,
int *pos, int *ways)
{
struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a);
struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b);
struct cxl_port *port_a = cxled_to_port(cxled_a);
struct cxl_port *port_b = cxled_to_port(cxled_b);
struct cxl_port *iter_a, *iter_b, *port = NULL;
struct cxl_switch_decoder *cxlsd;
struct cxl_port *parent;
struct device *dev;
int a_pos, b_pos;
unsigned int seq;
/* Exit early if any prior sorting failed */
if (cxled_a->pos < 0 || cxled_b->pos < 0)
return 0;
/*
* Walk up the hierarchy to find a shared port, find the decoder that
* maps the range, compare the relative position of those dport
* mappings.
*/
for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) {
struct cxl_port *next_a, *next_b;
int rc = -ENXIO;
next_a = next_port(iter_a);
if (!next_a)
break;
parent = next_port(port);
if (!parent)
return rc;
for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) {
next_b = next_port(iter_b);
if (next_a != next_b)
continue;
port = next_a;
break;
dev = device_find_child(&parent->dev, range,
match_switch_decoder_by_range);
if (!dev) {
dev_err(port->uport_dev,
"failed to find decoder mapping %#llx-%#llx\n",
range->start, range->end);
return rc;
}
cxlsd = to_cxl_switch_decoder(dev);
*ways = cxlsd->cxld.interleave_ways;
if (port)
for (int i = 0; i < *ways; i++) {
if (cxlsd->target[i] == port->parent_dport) {
*pos = i;
rc = 0;
break;
}
if (!port) {
dev_err(cxlmd_a->dev.parent,
"failed to find shared port with %s\n",
dev_name(cxlmd_b->dev.parent));
goto err;
}
put_device(dev);
dev = device_find_child(&port->dev, cxled_a, decoder_match_range);
if (!dev) {
struct range *range = &cxled_a->cxld.hpa_range;
return rc;
}
dev_err(port->uport_dev,
"failed to find decoder that maps %#llx-%#llx\n",
range->start, range->end);
goto err;
}
/**
* cxl_calc_interleave_pos() - calculate an endpoint position in a region
* @cxled: endpoint decoder member of given region
*
* The endpoint position is calculated by traversing the topology from
* the endpoint to the root decoder and iteratively applying this
* calculation:
*
* position = position * parent_ways + parent_pos;
*
* ...where @position is inferred from switch and root decoder target lists.
*
* Return: position >= 0 on success
* -ENXIO on failure
*/
static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *iter, *port = cxled_to_port(cxled);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct range *range = &cxled->cxld.hpa_range;
int parent_ways = 0, parent_pos = 0, pos = 0;
int rc;
cxlsd = to_cxl_switch_decoder(dev);
do {
seq = read_seqbegin(&cxlsd->target_lock);
find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos);
} while (read_seqretry(&cxlsd->target_lock, seq));
/*
* Example: the expected interleave order of the 4-way region shown
* below is: mem0, mem2, mem1, mem3
*
* root_port
* / \
* host_bridge_0 host_bridge_1
* | | | |
* mem0 mem1 mem2 mem3
*
* In the example the calculator will iterate twice. The first iteration
* uses the mem position in the host-bridge and the ways of the host-
* bridge to generate the first, or local, position. The second
* iteration uses the host-bridge position in the root_port and the ways
* of the root_port to refine the position.
*
* A trace of the calculation per endpoint looks like this:
* mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0
* pos = 0 * 2 + 0 pos = 0 * 2 + 1
* pos: 0 pos: 1
*
* mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1
* pos = 1 * 2 + 0 pos = 1 * 2 + 1
* pos: 2 pos = 3
*
* Note that while this example is simple, the method applies to more
* complex topologies, including those with switches.
*/
put_device(dev);
/* Iterate from endpoint to root_port refining the position */
for (iter = port; iter; iter = next_port(iter)) {
if (is_cxl_root(iter))
break;
if (a_pos < 0 || b_pos < 0) {
dev_err(port->uport_dev,
"failed to find shared decoder for %s and %s\n",
dev_name(cxlmd_a->dev.parent),
dev_name(cxlmd_b->dev.parent));
goto err;
rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
if (rc)
return rc;
pos = pos * parent_ways + parent_pos;
}
dev_dbg(port->uport_dev, "%s comes %s %s\n",
dev_name(cxlmd_a->dev.parent),
a_pos - b_pos < 0 ? "before" : "after",
dev_name(cxlmd_b->dev.parent));
dev_dbg(&cxlmd->dev,
"decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
dev_name(&port->dev), range->start, range->end, pos);
return a_pos - b_pos;
err:
cxled_a->pos = -1;
return 0;
return pos;
}
static int cxl_region_sort_targets(struct cxl_region *cxlr)
......@@ -1607,22 +1632,21 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr)
struct cxl_region_params *p = &cxlr->params;
int i, rc = 0;
sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos,
NULL);
for (i = 0; i < p->nr_targets; i++) {
struct cxl_endpoint_decoder *cxled = p->targets[i];
cxled->pos = cxl_calc_interleave_pos(cxled);
/*
* Record that sorting failed, but still continue to restore
* cxled->pos with its ->targets[] position so that follow-on
* code paths can reliably do p->targets[cxled->pos] to
* self-reference their entry.
* Record that sorting failed, but still continue to calc
* cxled->pos so that follow-on code paths can reliably
* do p->targets[cxled->pos] to self-reference their entry.
*/
if (cxled->pos < 0)
rc = -ENXIO;
cxled->pos = i;
}
/* Keep the cxlr target list in interleave position order */
sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
cmp_interleave_pos, NULL);
dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
return rc;
......@@ -1658,6 +1682,12 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -ENXIO;
}
if (p->nr_targets >= p->interleave_ways) {
dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
p->nr_targets);
return -EINVAL;
}
ep_port = cxled_to_port(cxled);
root_port = cxlrd_to_port(cxlrd);
dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
......@@ -1750,7 +1780,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
if (p->nr_targets == p->interleave_ways) {
rc = cxl_region_setup_targets(cxlr);
if (rc)
goto err_decrement;
return rc;
p->state = CXL_CONFIG_ACTIVE;
}
......@@ -1761,13 +1791,27 @@ static int cxl_region_attach(struct cxl_region *cxlr,
.end = p->res->end,
};
if (p->nr_targets != p->interleave_ways)
return 0;
err_decrement:
p->nr_targets--;
cxled->pos = -1;
p->targets[pos] = NULL;
return rc;
/*
* Test the auto-discovery position calculator function
* against this successfully created user-defined region.
* A fail message here means that this interleave config
* will fail when presented as CXL_REGION_F_AUTO.
*/
for (int i = 0; i < p->nr_targets; i++) {
struct cxl_endpoint_decoder *cxled = p->targets[i];
int test_pos;
test_pos = cxl_calc_interleave_pos(cxled);
dev_dbg(&cxled->cxld.dev,
"Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
(test_pos == cxled->pos) ? "success" : "fail",
test_pos, cxled->pos);
}
return 0;
}
static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
......@@ -2696,7 +2740,7 @@ static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
return rc;
}
static int match_decoder_by_range(struct device *dev, void *data)
static int match_root_decoder_by_range(struct device *dev, void *data)
{
struct range *r1, *r2 = data;
struct cxl_root_decoder *cxlrd;
......@@ -2827,7 +2871,7 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
int rc;
cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
match_decoder_by_range);
match_root_decoder_by_range);
if (!cxlrd_dev) {
dev_err(cxlmd->dev.parent,
"%s:%s no CXL window for range %#llx:%#llx\n",
......
......@@ -204,7 +204,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map,
struct cxl_component_regs *regs,
unsigned long map_mask)
{
struct device *dev = map->dev;
struct device *host = map->host;
struct mapinfo {
const struct cxl_reg_map *rmap;
void __iomem **addr;
......@@ -216,16 +216,16 @@ int cxl_map_component_regs(const struct cxl_register_map *map,
for (i = 0; i < ARRAY_SIZE(mapinfo); i++) {
struct mapinfo *mi = &mapinfo[i];
resource_size_t phys_addr;
resource_size_t addr;
resource_size_t length;
if (!mi->rmap->valid)
continue;
if (!test_bit(mi->rmap->id, &map_mask))
continue;
phys_addr = map->resource + mi->rmap->offset;
addr = map->resource + mi->rmap->offset;
length = mi->rmap->size;
*(mi->addr) = devm_cxl_iomap_block(dev, phys_addr, length);
*(mi->addr) = devm_cxl_iomap_block(host, addr, length);
if (!*(mi->addr))
return -ENOMEM;
}
......@@ -237,7 +237,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL);
int cxl_map_device_regs(const struct cxl_register_map *map,
struct cxl_device_regs *regs)
{
struct device *dev = map->dev;
struct device *host = map->host;
resource_size_t phys_addr = map->resource;
struct mapinfo {
const struct cxl_reg_map *rmap;
......@@ -259,7 +259,7 @@ int cxl_map_device_regs(const struct cxl_register_map *map,
addr = phys_addr + mi->rmap->offset;
length = mi->rmap->size;
*(mi->addr) = devm_cxl_iomap_block(dev, addr, length);
*(mi->addr) = devm_cxl_iomap_block(host, addr, length);
if (!*(mi->addr))
return -ENOMEM;
}
......@@ -309,7 +309,7 @@ int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type,
int regloc, i;
*map = (struct cxl_register_map) {
.dev = &pdev->dev,
.host = &pdev->dev,
.resource = CXL_RESOURCE_NONE,
};
......@@ -386,10 +386,9 @@ int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type)
}
EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, CXL);
int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs,
struct cxl_register_map *map)
int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs)
{
struct device *dev = &pdev->dev;
struct device *dev = map->host;
resource_size_t phys_addr;
phys_addr = map->resource;
......@@ -403,15 +402,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_pmu_regs, CXL);
static int cxl_map_regblock(struct cxl_register_map *map)
{
struct device *dev = map->dev;
struct device *host = map->host;
map->base = ioremap(map->resource, map->max_size);
if (!map->base) {
dev_err(dev, "failed to map registers\n");
dev_err(host, "failed to map registers\n");
return -ENOMEM;
}
dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource);
dev_dbg(host, "Mapped CXL Memory Device resource %pa\n", &map->resource);
return 0;
}
......@@ -425,28 +424,28 @@ static int cxl_probe_regs(struct cxl_register_map *map)
{
struct cxl_component_reg_map *comp_map;
struct cxl_device_reg_map *dev_map;
struct device *dev = map->dev;
struct device *host = map->host;
void __iomem *base = map->base;
switch (map->reg_type) {
case CXL_REGLOC_RBI_COMPONENT:
comp_map = &map->component_map;
cxl_probe_component_regs(dev, base, comp_map);
dev_dbg(dev, "Set up component registers\n");
cxl_probe_component_regs(host, base, comp_map);
dev_dbg(host, "Set up component registers\n");
break;
case CXL_REGLOC_RBI_MEMDEV:
dev_map = &map->device_map;
cxl_probe_device_regs(dev, base, dev_map);
cxl_probe_device_regs(host, base, dev_map);
if (!dev_map->status.valid || !dev_map->mbox.valid ||
!dev_map->memdev.valid) {
dev_err(dev, "registers not found: %s%s%s\n",
dev_err(host, "registers not found: %s%s%s\n",
!dev_map->status.valid ? "status " : "",
!dev_map->mbox.valid ? "mbox " : "",
!dev_map->memdev.valid ? "memdev " : "");
return -ENXIO;
}
dev_dbg(dev, "Probing device registers...\n");
dev_dbg(host, "Probing device registers...\n");
break;
default:
break;
......@@ -470,6 +469,42 @@ int cxl_setup_regs(struct cxl_register_map *map)
}
EXPORT_SYMBOL_NS_GPL(cxl_setup_regs, CXL);
u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb)
{
void __iomem *addr;
u16 offset = 0;
u32 cap_hdr;
if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE))
return 0;
if (!request_mem_region(rcrb, SZ_4K, dev_name(dev)))
return 0;
addr = ioremap(rcrb, SZ_4K);
if (!addr)
goto out;
cap_hdr = readl(addr + offset);
while (PCI_EXT_CAP_ID(cap_hdr) != PCI_EXT_CAP_ID_ERR) {
offset = PCI_EXT_CAP_NEXT(cap_hdr);
/* Offset 0 terminates capability list. */
if (!offset)
break;
cap_hdr = readl(addr + offset);
}
if (offset)
dev_dbg(dev, "found AER extended capability (0x%x)\n", offset);
iounmap(addr);
out:
release_mem_region(rcrb, SZ_4K);
return offset;
}
resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri,
enum cxl_rcrb which)
{
......
......@@ -221,6 +221,14 @@ struct cxl_regs {
struct_group_tagged(cxl_pmu_regs, pmu_regs,
void __iomem *pmu;
);
/*
* RCH downstream port specific RAS register
* @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB
*/
struct_group_tagged(cxl_rch_regs, rch_regs,
void __iomem *dport_aer;
);
};
struct cxl_reg_map {
......@@ -247,7 +255,7 @@ struct cxl_pmu_reg_map {
/**
* struct cxl_register_map - DVSEC harvested register block mapping parameters
* @dev: device for devm operations and logging
* @host: device for devm operations and logging
* @base: virtual base of the register-block-BAR + @block_offset
* @resource: physical resource base of the register block
* @max_size: maximum mapping size to perform register search
......@@ -257,7 +265,7 @@ struct cxl_pmu_reg_map {
* @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units
*/
struct cxl_register_map {
struct device *dev;
struct device *host;
void __iomem *base;
resource_size_t resource;
resource_size_t max_size;
......@@ -278,8 +286,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map,
unsigned long map_mask);
int cxl_map_device_regs(const struct cxl_register_map *map,
struct cxl_device_regs *regs);
int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs,
struct cxl_register_map *map);
int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs);
enum cxl_regloc_type;
int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type);
......@@ -321,6 +328,7 @@ enum cxl_decoder_type {
*/
#define CXL_DECODER_MAX_INTERLEAVE 16
#define CXL_QOS_CLASS_INVALID -1
/**
* struct cxl_decoder - Common CXL HDM Decoder Attributes
......@@ -432,6 +440,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
* @calc_hb: which host bridge covers the n'th position by granularity
* @platform_data: platform specific configuration data
* @range_lock: sync region autodiscovery by address range
* @qos_class: QoS performance class cookie
* @cxlsd: base cxl switch decoder
*/
struct cxl_root_decoder {
......@@ -440,6 +449,7 @@ struct cxl_root_decoder {
cxl_calc_hb_fn calc_hb;
void *platform_data;
struct mutex range_lock;
int qos_class;
struct cxl_switch_decoder cxlsd;
};
......@@ -572,11 +582,10 @@ struct cxl_dax_region {
* @regions: cxl_region_ref instances, regions mapped by this port
* @parent_dport: dport that points to this port in the parent
* @decoder_ida: allocator for decoder ids
* @comp_map: component register capability mappings
* @reg_map: component and ras register mapping parameters
* @nr_dports: number of entries in @dports
* @hdm_end: track last allocated HDM decoder instance for allocation ordering
* @commit_end: cursor to track highest committed decoder for commit ordering
* @component_reg_phys: component register capability base address (optional)
* @dead: last ep has been removed, force port re-creation
* @depth: How deep this port is relative to the root. depth 0 is the root.
* @cdat: Cached CDAT data
......@@ -592,11 +601,10 @@ struct cxl_port {
struct xarray regions;
struct cxl_dport *parent_dport;
struct ida decoder_ida;
struct cxl_register_map comp_map;
struct cxl_register_map reg_map;
int nr_dports;
int hdm_end;
int commit_end;
resource_size_t component_reg_phys;
bool dead;
unsigned int depth;
struct cxl_cdat {
......@@ -620,19 +628,21 @@ struct cxl_rcrb_info {
/**
* struct cxl_dport - CXL downstream port
* @dport_dev: PCI bridge or firmware device representing the downstream link
* @comp_map: component register capability mappings
* @reg_map: component and ras register mapping parameters
* @port_id: unique hardware identifier for dport in decoder target list
* @rcrb: Data about the Root Complex Register Block layout
* @rch: Indicate whether this dport was enumerated in RCH or VH mode
* @port: reference to cxl_port that contains this downstream port
* @regs: Dport parsed register blocks
*/
struct cxl_dport {
struct device *dport_dev;
struct cxl_register_map comp_map;
struct cxl_register_map reg_map;
int port_id;
struct cxl_rcrb_info rcrb;
bool rch;
struct cxl_port *port;
struct cxl_regs regs;
};
/**
......@@ -679,6 +689,7 @@ static inline bool is_cxl_root(struct cxl_port *port)
return port->uport_dev == port->dev.parent;
}
int cxl_num_decoders_committed(struct cxl_port *port);
bool is_cxl_port(const struct device *dev);
struct cxl_port *to_cxl_port(const struct device *dev);
struct pci_bus;
......@@ -706,6 +717,13 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
struct device *dport_dev, int port_id,
resource_size_t rcrb);
#ifdef CONFIG_PCIEAER_CXL
void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
#else
static inline void cxl_setup_parent_dport(struct device *host,
struct cxl_dport *dport) { }
#endif
struct cxl_decoder *to_cxl_decoder(struct device *dev);
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
......
......@@ -84,9 +84,12 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
return is_cxl_memdev(port->uport_dev);
}
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
struct cxl_dev_state *cxlds);
int devm_cxl_sanitize_setup_notifier(struct device *host,
struct cxl_memdev *cxlmd);
struct cxl_memdev_state;
int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds);
int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds);
int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped);
......@@ -360,16 +363,16 @@ struct cxl_fw_state {
*
* @state: state of last security operation
* @enabled_cmds: All security commands enabled in the CEL
* @poll: polling for sanitization is enabled, device has no mbox irq support
* @poll_tmo_secs: polling timeout
* @sanitize_active: sanitize completion pending
* @poll_dwork: polling work item
* @sanitize_node: sanitation sysfs file to notify
*/
struct cxl_security_state {
unsigned long state;
DECLARE_BITMAP(enabled_cmds, CXL_SEC_ENABLED_MAX);
bool poll;
int poll_tmo_secs;
bool sanitize_active;
struct delayed_work poll_dwork;
struct kernfs_node *sanitize_node;
};
......@@ -397,6 +400,7 @@ enum cxl_devtype {
*
* @dev: The device associated with this CXL state
* @cxlmd: The device representing the CXL.mem capabilities of @dev
* @reg_map: component and ras register mapping parameters
* @regs: Parsed register blocks
* @cxl_dvsec: Offset to the PCIe device DVSEC
* @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
......@@ -404,13 +408,13 @@ enum cxl_devtype {
* @dpa_res: Overall DPA resource tree for the device
* @pmem_res: Active Persistent memory capacity configuration
* @ram_res: Active Volatile memory capacity configuration
* @component_reg_phys: register base of component registers
* @serial: PCIe Device Serial Number
* @type: Generic Memory Class device or Vendor Specific Memory device
*/
struct cxl_dev_state {
struct device *dev;
struct cxl_memdev *cxlmd;
struct cxl_register_map reg_map;
struct cxl_regs regs;
int cxl_dvsec;
bool rcd;
......@@ -418,7 +422,6 @@ struct cxl_dev_state {
struct resource dpa_res;
struct resource pmem_res;
struct resource ram_res;
resource_size_t component_reg_phys;
u64 serial;
enum cxl_devtype type;
};
......@@ -883,7 +886,7 @@ static inline void cxl_mem_active_dec(void)
}
#endif
int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd);
int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd);
struct cxl_hdm {
struct cxl_component_regs regs;
......
......@@ -49,7 +49,6 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
struct cxl_dport *parent_dport)
{
struct cxl_port *parent_port = parent_dport->port;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_port *endpoint, *iter, *down;
int rc;
......@@ -65,8 +64,8 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
ep->next = down;
}
endpoint = devm_cxl_add_port(host, &cxlmd->dev,
cxlds->component_reg_phys,
/* Note: endpoint port component registers are derived from @cxlds */
endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE,
parent_dport);
if (IS_ERR(endpoint))
return PTR_ERR(endpoint);
......@@ -158,6 +157,8 @@ static int cxl_mem_probe(struct device *dev)
else
endpoint_parent = &parent_port->dev;
cxl_setup_parent_dport(dev, dport);
device_lock(endpoint_parent);
if (!endpoint_parent->driver) {
dev_err(dev, "CXL port topology %s not enabled\n",
......
......@@ -85,25 +85,28 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
status & CXLMDEV_DEV_FATAL ? " fatal" : "", \
status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
/*
* Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique
* wrapper object for each irq within the same cxlds.
*/
struct cxl_dev_id {
struct cxl_dev_state *cxlds;
};
static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq,
irq_handler_t handler, irq_handler_t thread_fn)
irq_handler_t thread_fn)
{
struct device *dev = cxlds->dev;
struct cxl_dev_id *dev_id;
/* dev_id must be globally unique and must contain the cxlds */
dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
if (!dev_id)
return -ENOMEM;
dev_id->cxlds = cxlds;
return devm_request_threaded_irq(dev, irq, handler, thread_fn,
IRQF_SHARED | IRQF_ONESHOT,
NULL, dev_id);
return devm_request_threaded_irq(dev, irq, NULL, thread_fn,
IRQF_SHARED | IRQF_ONESHOT, NULL,
dev_id);
}
static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
......@@ -128,10 +131,10 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
if (opcode == CXL_MBOX_OP_SANITIZE) {
mutex_lock(&mds->mbox_mutex);
if (mds->security.sanitize_node)
sysfs_notify_dirent(mds->security.sanitize_node);
dev_dbg(cxlds->dev, "Sanitization operation ended\n");
mod_delayed_work(system_wq, &mds->security.poll_dwork, 0);
mutex_unlock(&mds->mbox_mutex);
} else {
/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
rcuwait_wake_up(&mds->mbox_wait);
......@@ -152,18 +155,16 @@ static void cxl_mbox_sanitize_work(struct work_struct *work)
mutex_lock(&mds->mbox_mutex);
if (cxl_mbox_background_complete(cxlds)) {
mds->security.poll_tmo_secs = 0;
put_device(cxlds->dev);
if (mds->security.sanitize_node)
sysfs_notify_dirent(mds->security.sanitize_node);
mds->security.sanitize_active = false;
dev_dbg(cxlds->dev, "Sanitization operation ended\n");
} else {
int timeout = mds->security.poll_tmo_secs + 10;
mds->security.poll_tmo_secs = min(15 * 60, timeout);
queue_delayed_work(system_wq, &mds->security.poll_dwork,
timeout * HZ);
schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ);
}
mutex_unlock(&mds->mbox_mutex);
}
......@@ -295,18 +296,15 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds,
* and allow userspace to poll(2) for completion.
*/
if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
if (mds->security.poll) {
/* hold the device throughout */
get_device(cxlds->dev);
if (mds->security.sanitize_active)
return -EBUSY;
/* give first timeout a second */
timeout = 1;
mds->security.poll_tmo_secs = timeout;
queue_delayed_work(system_wq,
&mds->security.poll_dwork,
mds->security.sanitize_active = true;
schedule_delayed_work(&mds->security.poll_dwork,
timeout * HZ);
}
dev_dbg(dev, "Sanitization operation started\n");
goto success;
}
......@@ -389,7 +387,9 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
struct device *dev = cxlds->dev;
unsigned long timeout;
int irq, msgnum;
u64 md_status;
u32 ctrl;
timeout = jiffies + mbox_ready_timeout * HZ;
do {
......@@ -437,33 +437,26 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size);
rcuwait_init(&mds->mbox_wait);
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work);
if (cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) {
u32 ctrl;
int irq, msgnum;
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
/* background command interrupts are optional */
if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ))
return 0;
msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
irq = pci_irq_vector(pdev, msgnum);
irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
if (irq < 0)
goto mbox_poll;
return 0;
if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq, NULL))
goto mbox_poll;
if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq))
return 0;
dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n");
/* enable background command mbox irq support */
ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ;
writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
return 0;
}
mbox_poll:
mds->security.poll = true;
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work);
dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported");
return 0;
}
......@@ -484,7 +477,7 @@ static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev,
resource_size_t component_reg_phys;
*map = (struct cxl_register_map) {
.dev = &pdev->dev,
.host = &pdev->dev,
.resource = CXL_RESOURCE_NONE,
};
......@@ -653,7 +646,7 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
if (irq < 0)
return irq;
return cxl_request_irq(cxlds, irq, NULL, cxl_event_thread);
return cxl_request_irq(cxlds, irq, cxl_event_thread);
}
static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
......@@ -834,16 +827,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
* If the component registers can't be found, the cxl_pci driver may
* still be useful for management functions so don't return an error.
*/
cxlds->component_reg_phys = CXL_RESOURCE_NONE;
rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT,
&cxlds->reg_map);
if (rc)
dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
else if (!map.component_map.ras.valid)
else if (!cxlds->reg_map.component_map.ras.valid)
dev_dbg(&pdev->dev, "RAS registers not found\n");
cxlds->component_reg_phys = map.resource;
rc = cxl_map_component_regs(&map, &cxlds->regs.component,
rc = cxl_map_component_regs(&cxlds->reg_map, &cxlds->regs.component,
BIT(CXL_CM_CAP_CAP_ID_RAS));
if (rc)
dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
......@@ -882,11 +873,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
cxlmd = devm_cxl_add_memdev(cxlds);
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
rc = cxl_memdev_setup_fw_upload(mds);
rc = devm_cxl_setup_fw_upload(&pdev->dev, mds);
if (rc)
return rc;
rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd);
if (rc)
return rc;
......@@ -900,7 +895,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
break;
}
rc = cxl_map_pmu_regs(pdev, &pmu_regs, &map);
rc = cxl_map_pmu_regs(&map, &pmu_regs);
if (rc) {
dev_dbg(&pdev->dev, "Could not map PMU regs\n");
break;
......
......@@ -62,6 +62,9 @@ static int cxl_switch_port_probe(struct cxl_port *port)
struct cxl_hdm *cxlhdm;
int rc;
/* Cache the data early to ensure is_visible() works */
read_cdat_data(port);
rc = devm_cxl_port_enumerate_dports(port);
if (rc < 0)
return rc;
......
......@@ -49,6 +49,15 @@ config PCIEAER_INJECT
gotten from:
https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
config PCIEAER_CXL
bool "PCI Express CXL RAS support"
default y
depends on PCIEAER && CXL_PCI
help
Enables CXL error handling.
If unsure, say Y.
#
# PCI Express ECRC
#
......
......@@ -760,8 +760,9 @@ int cper_severity_to_aer(int cper_severity)
}
}
EXPORT_SYMBOL_GPL(cper_severity_to_aer);
#endif
void cper_print_aer(struct pci_dev *dev, int aer_severity,
void pci_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer)
{
int layer, agent, tlp_header_valid = 0;
......@@ -801,7 +802,7 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity,
trace_aer_event(dev_name(&dev->dev), (status & ~mask),
aer_severity, tlp_header_valid, &aer->header_log);
}
#endif
EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL);
/**
* add_error_device - list device to be handled
......@@ -934,14 +935,153 @@ static bool find_source_device(struct pci_dev *parent,
return true;
}
#ifdef CONFIG_PCIEAER_CXL
/**
* pci_aer_unmask_internal_errors - unmask internal errors
* @dev: pointer to the pcie_dev data structure
*
* Unmasks internal errors in the Uncorrectable and Correctable Error
* Mask registers.
*
* Note: AER must be enabled and supported by the device which must be
* checked in advance, e.g. with pcie_aer_is_native().
*/
static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
{
int aer = dev->aer_cap;
u32 mask;
pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
mask &= ~PCI_ERR_UNC_INTN;
pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask);
pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
mask &= ~PCI_ERR_COR_INTERNAL;
pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
}
static bool is_cxl_mem_dev(struct pci_dev *dev)
{
/*
* The capability, status, and control fields in Device 0,
* Function 0 DVSEC control the CXL functionality of the
* entire device (CXL 3.0, 8.1.3).
*/
if (dev->devfn != PCI_DEVFN(0, 0))
return false;
/*
* CXL Memory Devices must have the 502h class code set (CXL
* 3.0, 8.1.12.1).
*/
if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
return false;
return true;
}
static bool cxl_error_is_native(struct pci_dev *dev)
{
struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
return (pcie_ports_native || host->native_aer);
}
static bool is_internal_error(struct aer_err_info *info)
{
if (info->severity == AER_CORRECTABLE)
return info->status & PCI_ERR_COR_INTERNAL;
return info->status & PCI_ERR_UNC_INTN;
}
static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
{
struct aer_err_info *info = (struct aer_err_info *)data;
const struct pci_error_handlers *err_handler;
if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
return 0;
/* protect dev->driver */
device_lock(&dev->dev);
err_handler = dev->driver ? dev->driver->err_handler : NULL;
if (!err_handler)
goto out;
if (info->severity == AER_CORRECTABLE) {
if (err_handler->cor_error_detected)
err_handler->cor_error_detected(dev);
} else if (err_handler->error_detected) {
if (info->severity == AER_NONFATAL)
err_handler->error_detected(dev, pci_channel_io_normal);
else if (info->severity == AER_FATAL)
err_handler->error_detected(dev, pci_channel_io_frozen);
}
out:
device_unlock(&dev->dev);
return 0;
}
static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
{
/*
* Internal errors of an RCEC indicate an AER error in an
* RCH's downstream port. Check and handle them in the CXL.mem
* device driver.
*/
if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
is_internal_error(info))
pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
}
static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
{
bool *handles_cxl = data;
if (!*handles_cxl)
*handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
/* Non-zero terminates iteration */
return *handles_cxl;
}
static bool handles_cxl_errors(struct pci_dev *rcec)
{
bool handles_cxl = false;
if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
pcie_aer_is_native(rcec))
pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
return handles_cxl;
}
static void cxl_rch_enable_rcec(struct pci_dev *rcec)
{
if (!handles_cxl_errors(rcec))
return;
pci_aer_unmask_internal_errors(rcec);
pci_info(rcec, "CXL: Internal errors unmasked");
}
#else
static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
static inline void cxl_rch_handle_error(struct pci_dev *dev,
struct aer_err_info *info) { }
#endif
/**
* handle_error_source - handle logging error into an event log
* pci_aer_handle_error - handle logging error into an event log
* @dev: pointer to pci_dev data structure of error source device
* @info: comprehensive error information
*
* Invoked when an error being detected by Root Port.
*/
static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info)
{
int aer = dev->aer_cap;
......@@ -965,6 +1105,12 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
else if (info->severity == AER_FATAL)
pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset);
}
static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
{
cxl_rch_handle_error(dev, info);
pci_aer_handle_error(dev, info);
pci_dev_put(dev);
}
......@@ -997,7 +1143,7 @@ static void aer_recover_work_func(struct work_struct *work)
PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
continue;
}
cper_print_aer(pdev, entry.severity, entry.regs);
pci_print_aer(pdev, entry.severity, entry.regs);
/*
* Memory for aer_capability_regs(entry.regs) is being allocated from the
* ghes_estatus_pool to protect it from overwriting when multiple sections
......@@ -1348,6 +1494,7 @@ static int aer_probe(struct pcie_device *dev)
return status;
}
cxl_rch_enable_rcec(port);
aer_enable_rootport(rpc);
pci_info(port, "enabled with IRQ %d\n", dev->irq);
return 0;
......
......@@ -15,6 +15,7 @@
#include <linux/mod_devicetable.h>
#include <linux/property.h>
#include <linux/uuid.h>
#include <linux/fw_table.h>
struct irq_domain;
struct irq_domain_ops;
......@@ -24,6 +25,16 @@ struct irq_domain_ops;
#endif
#include <acpi/acpi.h>
#ifdef CONFIG_ACPI_TABLE_LIB
#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI)
#define __init_or_acpilib
#define __initdata_or_acpilib
#else
#define EXPORT_SYMBOL_ACPI_LIB(x)
#define __init_or_acpilib __init
#define __initdata_or_acpilib __initdata
#endif
#ifdef CONFIG_ACPI
#include <linux/list.h>
......@@ -119,21 +130,8 @@ enum acpi_address_range_id {
/* Table Handlers */
union acpi_subtable_headers {
struct acpi_subtable_header common;
struct acpi_hmat_structure hmat;
struct acpi_prmt_module_header prmt;
struct acpi_cedt_header cedt;
};
typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);
typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header,
const unsigned long end);
typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header,
void *arg, const unsigned long end);
/* Debugger support */
struct acpi_debugger_ops {
......@@ -207,14 +205,6 @@ static inline int acpi_debugger_notify_command_complete(void)
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
struct acpi_subtable_proc {
int id;
acpi_tbl_entry_handler handler;
acpi_tbl_entry_handler_arg handler_arg;
void *arg;
int count;
};
void __iomem *__acpi_map_table(unsigned long phys, unsigned long size);
void __acpi_unmap_table(void __iomem *map, unsigned long size);
int early_acpi_boot_init(void);
......@@ -229,16 +219,6 @@ void acpi_reserve_initial_tables (void);
void acpi_table_init_complete (void);
int acpi_table_init (void);
#ifdef CONFIG_ACPI_TABLE_LIB
#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI)
#define __init_or_acpilib
#define __initdata_or_acpilib
#else
#define EXPORT_SYMBOL_ACPI_LIB(x)
#define __init_or_acpilib __init
#define __initdata_or_acpilib __initdata
#endif
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
int __init_or_acpilib acpi_table_parse_entries(char *id,
unsigned long table_size, int entry_id,
......
......@@ -51,7 +51,7 @@ static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
#endif
void cper_print_aer(struct pci_dev *dev, int aer_severity,
void pci_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer);
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* fw_tables.h - Parsing support for ACPI and ACPI-like tables provided by
* platform or device firmware
*
* Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2023 Intel Corp.
*/
#ifndef _FW_TABLE_H_
#define _FW_TABLE_H_
union acpi_subtable_headers;
typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header,
const unsigned long end);
typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header,
void *arg, const unsigned long end);
struct acpi_subtable_proc {
int id;
acpi_tbl_entry_handler handler;
acpi_tbl_entry_handler_arg handler_arg;
void *arg;
int count;
};
#include <linux/acpi.h>
#include <acpi/acpi.h>
union acpi_subtable_headers {
struct acpi_subtable_header common;
struct acpi_hmat_structure hmat;
struct acpi_prmt_module_header prmt;
struct acpi_cedt_header cedt;
};
int acpi_parse_entries_array(char *id, unsigned long table_size,
struct acpi_table_header *table_header,
struct acpi_subtable_proc *proc,
int proc_num, unsigned int max_entries);
#endif
......@@ -772,3 +772,6 @@ config ASN1_ENCODER
config POLYNOMIAL
tristate
config FIRMWARE_TABLE
bool
......@@ -409,6 +409,8 @@ obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
obj-$(CONFIG_FIRMWARE_TABLE) += fw_table.o
# FORTIFY_SOURCE compile-time behavior tests
TEST_FORTIFY_SRCS = $(wildcard $(srctree)/$(src)/test_fortify/*-*.c)
TEST_FORTIFY_LOGS = $(patsubst $(srctree)/$(src)/%.c, %.log, $(TEST_FORTIFY_SRCS))
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* fw_tables.c - Parsing support for ACPI and ACPI-like tables provided by
* platform or device firmware
*
* Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2023 Intel Corp.
*/
#include <linux/errno.h>
#include <linux/fw_table.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/types.h>
enum acpi_subtable_type {
ACPI_SUBTABLE_COMMON,
ACPI_SUBTABLE_HMAT,
ACPI_SUBTABLE_PRMT,
ACPI_SUBTABLE_CEDT,
};
struct acpi_subtable_entry {
union acpi_subtable_headers *hdr;
enum acpi_subtable_type type;
};
static unsigned long __init_or_acpilib
acpi_get_entry_type(struct acpi_subtable_entry *entry)
{
switch (entry->type) {
case ACPI_SUBTABLE_COMMON:
return entry->hdr->common.type;
case ACPI_SUBTABLE_HMAT:
return entry->hdr->hmat.type;
case ACPI_SUBTABLE_PRMT:
return 0;
case ACPI_SUBTABLE_CEDT:
return entry->hdr->cedt.type;
}
return 0;
}
static unsigned long __init_or_acpilib
acpi_get_entry_length(struct acpi_subtable_entry *entry)
{
switch (entry->type) {
case ACPI_SUBTABLE_COMMON:
return entry->hdr->common.length;
case ACPI_SUBTABLE_HMAT:
return entry->hdr->hmat.length;
case ACPI_SUBTABLE_PRMT:
return entry->hdr->prmt.length;
case ACPI_SUBTABLE_CEDT:
return entry->hdr->cedt.length;
}
return 0;
}
static unsigned long __init_or_acpilib
acpi_get_subtable_header_length(struct acpi_subtable_entry *entry)
{
switch (entry->type) {
case ACPI_SUBTABLE_COMMON:
return sizeof(entry->hdr->common);
case ACPI_SUBTABLE_HMAT:
return sizeof(entry->hdr->hmat);
case ACPI_SUBTABLE_PRMT:
return sizeof(entry->hdr->prmt);
case ACPI_SUBTABLE_CEDT:
return sizeof(entry->hdr->cedt);
}
return 0;
}
static enum acpi_subtable_type __init_or_acpilib
acpi_get_subtable_type(char *id)
{
if (strncmp(id, ACPI_SIG_HMAT, 4) == 0)
return ACPI_SUBTABLE_HMAT;
if (strncmp(id, ACPI_SIG_PRMT, 4) == 0)
return ACPI_SUBTABLE_PRMT;
if (strncmp(id, ACPI_SIG_CEDT, 4) == 0)
return ACPI_SUBTABLE_CEDT;
return ACPI_SUBTABLE_COMMON;
}
static __init_or_acpilib bool has_handler(struct acpi_subtable_proc *proc)
{
return proc->handler || proc->handler_arg;
}
static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc,
union acpi_subtable_headers *hdr,
unsigned long end)
{
if (proc->handler)
return proc->handler(hdr, end);
if (proc->handler_arg)
return proc->handler_arg(hdr, proc->arg, end);
return -EINVAL;
}
/**
* acpi_parse_entries_array - for each proc_num find a suitable subtable
*
* @id: table id (for debugging purposes)
* @table_size: size of the root table
* @table_header: where does the table start?
* @proc: array of acpi_subtable_proc struct containing entry id
* and associated handler with it
* @proc_num: how big proc is?
* @max_entries: how many entries can we process?
*
* For each proc_num find a subtable with proc->id and run proc->handler
* on it. Assumption is that there's only single handler for particular
* entry id.
*
* The table_size is not the size of the complete ACPI table (the length
* field in the header struct), but only the size of the root table; i.e.,
* the offset from the very first byte of the complete ACPI table, to the
* first byte of the very first subtable.
*
* On success returns sum of all matching entries for all proc handlers.
* Otherwise, -ENODEV or -EINVAL is returned.
*/
int __init_or_acpilib
acpi_parse_entries_array(char *id, unsigned long table_size,
struct acpi_table_header *table_header,
struct acpi_subtable_proc *proc,
int proc_num, unsigned int max_entries)
{
unsigned long table_end, subtable_len, entry_len;
struct acpi_subtable_entry entry;
int count = 0;
int errs = 0;
int i;
table_end = (unsigned long)table_header + table_header->length;
/* Parse all entries looking for a match. */
entry.type = acpi_get_subtable_type(id);
entry.hdr = (union acpi_subtable_headers *)
((unsigned long)table_header + table_size);
subtable_len = acpi_get_subtable_header_length(&entry);
while (((unsigned long)entry.hdr) + subtable_len < table_end) {
if (max_entries && count >= max_entries)
break;
for (i = 0; i < proc_num; i++) {
if (acpi_get_entry_type(&entry) != proc[i].id)
continue;
if (!has_handler(&proc[i]) ||
(!errs &&
call_handler(&proc[i], entry.hdr, table_end))) {
errs++;
continue;
}
proc[i].count++;
break;
}
if (i != proc_num)
count++;
/*
* If entry->length is 0, break from this loop to avoid
* infinite loop.
*/
entry_len = acpi_get_entry_length(&entry);
if (entry_len == 0) {
pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, proc->id);
return -EINVAL;
}
entry.hdr = (union acpi_subtable_headers *)
((unsigned long)entry.hdr + entry_len);
}
if (max_entries && count > max_entries) {
pr_warn("[%4.4s:0x%02x] found the maximum %i entries\n",
id, proc->id, count);
}
return errs ? -EINVAL : count;
}
......@@ -831,7 +831,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
cxld->interleave_ways = 2;
else
cxld->interleave_ways = 1;
cxld->interleave_granularity = 256;
cxld->interleave_granularity = 4096;
cxld->hpa_range = (struct range) {
.start = base,
.end = base + size - 1,
......
......@@ -89,6 +89,12 @@ static struct cxl_cel_entry mock_cel[] = {
.effect = cpu_to_le16(EFFECT(CONF_CHANGE_COLD_RESET) |
EFFECT(CONF_CHANGE_IMMEDIATE)),
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_SANITIZE),
.effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE) |
EFFECT(SECURITY_CHANGE_IMMEDIATE) |
EFFECT(BACKGROUND_OP)),
},
};
/* See CXL 2.0 Table 181 Get Health Info Output Payload */
......@@ -133,7 +139,6 @@ struct mock_event_log {
};
struct mock_event_store {
struct cxl_memdev_state *mds;
struct mock_event_log mock_logs[CXL_EVENT_TYPE_MAX];
u32 ev_status;
};
......@@ -150,8 +155,10 @@ struct cxl_mockmem_data {
int user_limit;
int master_limit;
struct mock_event_store mes;
struct cxl_memdev_state *mds;
u8 event_buf[SZ_4K];
u64 timestamp;
unsigned long sanitize_timeout;
};
static struct mock_event_log *event_find_log(struct device *dev, int log_type)
......@@ -326,7 +333,7 @@ static void cxl_mock_event_trigger(struct device *dev)
event_reset_log(log);
}
cxl_mem_get_event_records(mes->mds, mes->ev_status);
cxl_mem_get_event_records(mdata->mds, mes->ev_status);
}
struct cxl_event_record_raw maint_needed = {
......@@ -567,9 +574,26 @@ static int mock_partition_info(struct cxl_mbox_cmd *cmd)
return 0;
}
void cxl_mockmem_sanitize_work(struct work_struct *work)
{
struct cxl_memdev_state *mds =
container_of(work, typeof(*mds), security.poll_dwork.work);
mutex_lock(&mds->mbox_mutex);
if (mds->security.sanitize_node)
sysfs_notify_dirent(mds->security.sanitize_node);
mds->security.sanitize_active = false;
mutex_unlock(&mds->mbox_mutex);
dev_dbg(mds->cxlds.dev, "sanitize complete\n");
}
static int mock_sanitize(struct cxl_mockmem_data *mdata,
struct cxl_mbox_cmd *cmd)
{
struct cxl_memdev_state *mds = mdata->mds;
int rc = 0;
if (cmd->size_in != 0)
return -EINVAL;
......@@ -585,7 +609,16 @@ static int mock_sanitize(struct cxl_mockmem_data *mdata,
return -ENXIO;
}
return 0; /* assume less than 2 secs, no bg */
mutex_lock(&mds->mbox_mutex);
if (schedule_delayed_work(&mds->security.poll_dwork,
msecs_to_jiffies(mdata->sanitize_timeout))) {
mds->security.sanitize_active = true;
dev_dbg(mds->cxlds.dev, "sanitize issued\n");
} else
rc = -EBUSY;
mutex_unlock(&mds->mbox_mutex);
return rc;
}
static int mock_secure_erase(struct cxl_mockmem_data *mdata,
......@@ -1237,6 +1270,7 @@ static int mock_transfer_fw(struct cxl_mockmem_data *mdata,
}
memcpy(fw + offset, transfer->data, length);
usleep_range(1500, 2000);
return 0;
}
......@@ -1415,16 +1449,16 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (IS_ERR(mds))
return PTR_ERR(mds);
mdata->mds = mds;
mds->mbox_send = cxl_mock_mbox_send;
mds->payload_size = SZ_4K;
mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work);
cxlds = &mds->cxlds;
cxlds->serial = pdev->id;
if (is_rcd(pdev)) {
if (is_rcd(pdev))
cxlds->rcd = true;
cxlds->component_reg_phys = CXL_RESOURCE_NONE;
}
rc = cxl_enumerate_cmds(mds);
if (rc)
......@@ -1447,14 +1481,17 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
mdata->mes.mds = mds;
cxl_mock_add_event_logs(&mdata->mes);
cxlmd = devm_cxl_add_memdev(cxlds);
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
rc = cxl_memdev_setup_fw_upload(mds);
rc = devm_cxl_setup_fw_upload(&pdev->dev, mds);
if (rc)
return rc;
rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd);
if (rc)
return rc;
......@@ -1526,10 +1563,38 @@ static ssize_t fw_buf_checksum_show(struct device *dev,
static DEVICE_ATTR_RO(fw_buf_checksum);
static ssize_t sanitize_timeout_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
return sysfs_emit(buf, "%lu\n", mdata->sanitize_timeout);
}
static ssize_t sanitize_timeout_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
unsigned long val;
int rc;
rc = kstrtoul(buf, 0, &val);
if (rc)
return rc;
mdata->sanitize_timeout = val;
return count;
}
static DEVICE_ATTR_RW(sanitize_timeout);
static struct attribute *cxl_mock_mem_attrs[] = {
&dev_attr_security_lock.attr,
&dev_attr_event_trigger.attr,
&dev_attr_fw_buf_checksum.attr,
&dev_attr_sanitize_timeout.attr,
NULL
};
ATTRIBUTE_GROUPS(cxl_mock_mem);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment