Commit d591f680 authored by Bjorn Helgaas's avatar Bjorn Helgaas

PCI: Wait for device readiness with Configuration RRS

After a device reset, delays are required before the device can
successfully complete config accesses.  PCIe r6.0, sec 6.6, specifies some
delays required before software can perform config accesses.  Devices that
require more time after those delays may respond to config accesses with
Configuration Request Retry Status (RRS) completions.

Callers of pci_dev_wait() are responsible for delays until the device can
respond to config accesses.  pci_dev_wait() waits any additional time until
the device can successfully complete config accesses.

Reading config space of devices that are not present or not ready typically
returns ~0 (PCI_ERROR_RESPONSE).  Previously we polled the Command register
until we got a value other than ~0.  This is sometimes a problem because
Root Complex handling of RRS completions may include several retries and
implementation-specific behavior that is invisible to software (see sec
2.3.2), so the exponential backoff in pci_dev_wait() may not work as
intended.

Linux enables Configuration RRS Software Visibility on all Root Ports that
support it.  If it is enabled, read the Vendor ID instead of the Command
register.  RRS completions cause immediate return of the 0x0001 reserved
Vendor ID value, so the pci_dev_wait() backoff works correctly.

When a read of Vendor ID eventually completes successfully by returning a
non-0x0001 value (the Vendor ID or 0xffff for VFs), the device should be
initialized and ready to respond to config requests.

For conventional PCI devices or devices below Root Ports that don't support
Configuration RRS Software Visibility, poll the Command register as before.

This was developed independently, but is very similar to Stanislav
Spassov's previous work at
https://lore.kernel.org/linux-pci/20200223122057.6504-1-stanspas@amazon.com

Link: https://lore.kernel.org/r/20240827234848.4429-2-helgaas@kernel.orgSigned-off-by: default avatarBjorn Helgaas <bhelgaas@google.com>
Tested-by: default avatarDuc Dang <ducdang@google.com>
parent 8400291e
...@@ -1283,7 +1283,9 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) ...@@ -1283,7 +1283,9 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
{ {
int delay = 1; int delay = 1;
bool retrain = false; bool retrain = false;
struct pci_dev *bridge; struct pci_dev *root, *bridge;
root = pcie_find_root_port(dev);
if (pci_is_pcie(dev)) { if (pci_is_pcie(dev)) {
bridge = pci_upstream_bridge(dev); bridge = pci_upstream_bridge(dev);
...@@ -1292,16 +1294,23 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) ...@@ -1292,16 +1294,23 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
} }
/* /*
* After reset, the device should not silently discard config * The caller has already waited long enough after a reset that the
* requests, but it may still indicate that it needs more time by * device should respond to config requests, but it may respond
* responding to them with CRS completions. The Root Port will * with Request Retry Status (RRS) if it needs more time to
* generally synthesize ~0 (PCI_ERROR_RESPONSE) data to complete * initialize.
* the read (except when CRS SV is enabled and the read was for the *
* Vendor ID; in that case it synthesizes 0x0001 data). * If the device is below a Root Port with Configuration RRS
* Software Visibility enabled, reading the Vendor ID returns a
* special data value if the device responded with RRS. Read the
* Vendor ID until we get non-RRS status.
* *
* Wait for the device to return a non-CRS completion. Read the * If there's no Root Port or Configuration RRS Software Visibility
* Command register instead of Vendor ID so we don't have to * is not enabled, the device may still respond with RRS, but
* contend with the CRS SV value. * hardware may retry the config request. If no retries receive
* Successful Completion, hardware generally synthesizes ~0
* (PCI_ERROR_RESPONSE) data to complete the read. Reading Vendor
* ID for VFs and non-existent devices also returns ~0, so read the
* Command register until it returns something other than ~0.
*/ */
for (;;) { for (;;) {
u32 id; u32 id;
...@@ -1311,9 +1320,15 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) ...@@ -1311,9 +1320,15 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
return -ENOTTY; return -ENOTTY;
} }
pci_read_config_dword(dev, PCI_COMMAND, &id); if (root && root->config_crs_sv) {
if (!PCI_POSSIBLE_ERROR(id)) pci_read_config_dword(dev, PCI_VENDOR_ID, &id);
break; if (!pci_bus_crs_vendor_id(id))
break;
} else {
pci_read_config_dword(dev, PCI_COMMAND, &id);
if (!PCI_POSSIBLE_ERROR(id))
break;
}
if (delay > timeout) { if (delay > timeout) {
pci_warn(dev, "not ready %dms after %s; giving up\n", pci_warn(dev, "not ready %dms after %s; giving up\n",
......
...@@ -139,6 +139,11 @@ bool pci_bridge_d3_possible(struct pci_dev *dev); ...@@ -139,6 +139,11 @@ bool pci_bridge_d3_possible(struct pci_dev *dev);
void pci_bridge_d3_update(struct pci_dev *dev); void pci_bridge_d3_update(struct pci_dev *dev);
int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type); int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type);
static inline bool pci_bus_crs_vendor_id(u32 l)
{
return (l & 0xffff) == PCI_VENDOR_ID_PCI_SIG;
}
static inline void pci_wakeup_event(struct pci_dev *dev) static inline void pci_wakeup_event(struct pci_dev *dev)
{ {
/* Wait 100 ms before the system can be put into a sleep state. */ /* Wait 100 ms before the system can be put into a sleep state. */
......
...@@ -1209,9 +1209,11 @@ static void pci_enable_crs(struct pci_dev *pdev) ...@@ -1209,9 +1209,11 @@ static void pci_enable_crs(struct pci_dev *pdev)
/* Enable CRS Software Visibility if supported */ /* Enable CRS Software Visibility if supported */
pcie_capability_read_word(pdev, PCI_EXP_RTCAP, &root_cap); pcie_capability_read_word(pdev, PCI_EXP_RTCAP, &root_cap);
if (root_cap & PCI_EXP_RTCAP_CRSVIS) if (root_cap & PCI_EXP_RTCAP_CRSVIS) {
pcie_capability_set_word(pdev, PCI_EXP_RTCTL, pcie_capability_set_word(pdev, PCI_EXP_RTCTL,
PCI_EXP_RTCTL_CRSSVE); PCI_EXP_RTCTL_CRSSVE);
pdev->config_crs_sv = 1;
}
} }
static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus, static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
...@@ -2343,11 +2345,6 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus) ...@@ -2343,11 +2345,6 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
} }
EXPORT_SYMBOL(pci_alloc_dev); EXPORT_SYMBOL(pci_alloc_dev);
static bool pci_bus_crs_vendor_id(u32 l)
{
return (l & 0xffff) == PCI_VENDOR_ID_PCI_SIG;
}
static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l, static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l,
int timeout) int timeout)
{ {
......
...@@ -371,6 +371,7 @@ struct pci_dev { ...@@ -371,6 +371,7 @@ struct pci_dev {
can be generated */ can be generated */
unsigned int pme_poll:1; /* Poll device's PME status bit */ unsigned int pme_poll:1; /* Poll device's PME status bit */
unsigned int pinned:1; /* Whether this dev is pinned */ unsigned int pinned:1; /* Whether this dev is pinned */
unsigned int config_crs_sv:1; /* Config CRS software visibility */
unsigned int imm_ready:1; /* Supports Immediate Readiness */ unsigned int imm_ready:1; /* Supports Immediate Readiness */
unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d1_support:1; /* Low power state D1 is supported */
unsigned int d2_support:1; /* Low power state D2 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment