Commit d0cdf900 authored by Jim Snow's avatar Jim Snow Committed by Borislav Petkov

EDAC, sb_edac: Add Knights Landing (Xeon Phi gen 2) support

Knights Landing is the next generation architecture for HPC market.

KNL introduces concept of a tile and CHA - Cache/Home Agent for memory
accesses.

Some things are fixed in KNL:
() There's single DIMM slot per channel
() There's 2 memory controllers with 3 channels each, however,
   from EDAC standpoint, it is presented as single memory controller
   with 6 channels. In order to represent 2 MCs w/ 3 CH, it would
   require major redesign of EDAC core driver.

Basically, two functionalities are added/extended:
() during driver initialization KNL topology is being recognized, i.e.
   which channels are populated with what DIMM sizes
   (knl_get_dimm_capacity function)
() handle MCE errors - channel swizzling
Reviewed-by: default avatarTony Luck <tony.luck@intel.com>
Signed-off-by: default avatarJim Snow <jim.m.snow@intel.com>
Cc: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: lukasz.anaczkowski@intel.com
Link: http://lkml.kernel.org/r/1449136134-23706-5-git-send-email-hubert.chrzaniuk@intel.com
[ Rebase to 4.4-rc3. ]
Signed-off-by: default avatarHubert Chrzaniuk <hubert.chrzaniuk@intel.com>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
parent c1979ba2
......@@ -65,6 +65,14 @@ static const u32 ibridge_dram_rule[] = {
0xd8, 0xe0, 0xe8, 0xf0, 0xf8,
};
static const u32 knl_dram_rule[] = {
0x60, 0x68, 0x70, 0x78, 0x80, /* 0-4 */
0x88, 0x90, 0x98, 0xa0, 0xa8, /* 5-9 */
0xb0, 0xb8, 0xc0, 0xc8, 0xd0, /* 10-14 */
0xd8, 0xe0, 0xe8, 0xf0, 0xf8, /* 15-19 */
0x100, 0x108, 0x110, 0x118, /* 20-23 */
};
#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
#define A7MODE(reg) GET_BITFIELD(reg, 26, 26)
......@@ -94,6 +102,14 @@ static const u32 ibridge_interleave_list[] = {
0xdc, 0xe4, 0xec, 0xf4, 0xfc,
};
static const u32 knl_interleave_list[] = {
0x64, 0x6c, 0x74, 0x7c, 0x84, /* 0-4 */
0x8c, 0x94, 0x9c, 0xa4, 0xac, /* 5-9 */
0xb4, 0xbc, 0xc4, 0xcc, 0xd4, /* 10-14 */
0xdc, 0xe4, 0xec, 0xf4, 0xfc, /* 15-19 */
0x104, 0x10c, 0x114, 0x11c, /* 20-23 */
};
struct interleave_pkg {
unsigned char start;
unsigned char end;
......@@ -131,10 +147,13 @@ static inline int sad_pkg(const struct interleave_pkg *table, u32 reg,
/* Devices 12 Function 7 */
#define TOLM 0x80
#define TOHM 0x84
#define TOHM 0x84
#define HASWELL_TOLM 0xd0
#define HASWELL_TOHM_0 0xd4
#define HASWELL_TOHM_1 0xd8
#define KNL_TOLM 0xd0
#define KNL_TOHM_0 0xd4
#define KNL_TOHM_1 0xd8
#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff)
#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
......@@ -145,6 +164,8 @@ static inline int sad_pkg(const struct interleave_pkg *table, u32 reg,
#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11)
#define SOURCE_ID_KNL(reg) GET_BITFIELD(reg, 12, 14)
#define SAD_CONTROL 0xf4
/* Device 14 function 0 */
......@@ -167,6 +188,7 @@ static const u32 tad_dram_rule[] = {
/* Device 15, function 0 */
#define MCMTR 0x7c
#define KNL_MCMTR 0x624
#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2)
#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1)
......@@ -183,6 +205,8 @@ static const int mtr_regs[] = {
0x80, 0x84, 0x88,
};
static const int knl_mtr_reg = 0xb60;
#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19)
#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14)
#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13)
......@@ -253,6 +277,9 @@ static const u32 correrrthrsld[] = {
#define NUM_CHANNELS 8 /* 2MC per socket, four chan per MC */
#define MAX_DIMMS 3 /* Max DIMMS per channel */
#define KNL_MAX_CHAS 38 /* KNL max num. of Cache Home Agents */
#define KNL_MAX_CHANNELS 6 /* KNL max num. of PCI channels */
#define KNL_MAX_EDCS 8 /* Embedded DRAM controllers */
#define CHANNEL_UNSPECIFIED 0xf /* Intel IA32 SDM 15-14 */
enum type {
......@@ -260,6 +287,7 @@ enum type {
IVY_BRIDGE,
HASWELL,
BROADWELL,
KNIGHTS_LANDING,
};
struct sbridge_pvt;
......@@ -309,6 +337,16 @@ struct sbridge_dev {
struct mem_ctl_info *mci;
};
struct knl_pvt {
struct pci_dev *pci_cha[KNL_MAX_CHAS];
struct pci_dev *pci_channel[KNL_MAX_CHANNELS];
struct pci_dev *pci_mc0;
struct pci_dev *pci_mc1;
struct pci_dev *pci_mc0_misc;
struct pci_dev *pci_mc1_misc;
struct pci_dev *pci_mc_info; /* tolm, tohm */
};
struct sbridge_pvt {
struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
struct pci_dev *pci_sad0, *pci_sad1;
......@@ -337,6 +375,7 @@ struct sbridge_pvt {
/* Memory description */
u64 tolm, tohm;
struct knl_pvt knl;
};
#define PCI_DESCR(device_id, opt) \
......@@ -510,6 +549,50 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
{0,} /* 0 terminated list. */
};
/* Knight's Landing Support */
/*
* KNL's memory channels are swizzled between memory controllers.
* MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2
*/
#define knl_channel_remap(channel) ((channel + 3) % 6)
/* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840
/* DRAM channel stuff; bank addrs, dimmmtr, etc.. 2-8-2 - 2-9-4 (6 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL 0x7843
/* kdrwdbu TAD limits/offsets, MCMTR - 2-10-1, 2-11-1 (2 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_TA 0x7844
/* CHA broadcast registers, dram rules - 1-29-0 (1 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0 0x782a
/* SAD target - 1-29-1 (1 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1 0x782b
/* Caching / Home Agent */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHA 0x782c
/* Device with TOLM and TOHM, 0-5-0 (1 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM 0x7810
/*
* KNL differs from SB, IB, and Haswell in that it has multiple
* instances of the same device with the same device ID, so we handle that
* by creating as many copies in the table as we expect to find.
* (Like device ID must be grouped together.)
*/
static const struct pci_id_descr pci_dev_descr_knl[] = {
[0] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0, 0) },
[1] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1, 0) },
[2 ... 3] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC, 0)},
[4 ... 41] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA, 0) },
[42 ... 47] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL, 0) },
[48] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA, 0) },
[49] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0) },
};
static const struct pci_id_table pci_dev_descr_knl_table[] = {
PCI_ID_TABLE_ENTRY(pci_dev_descr_knl),
{0,}
};
/*
* Broadwell support
*
......@@ -586,6 +669,7 @@ static const struct pci_device_id sbridge_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0)},
{0,} /* 0 terminated list. */
};
......@@ -599,7 +683,7 @@ static inline int numrank(enum type type, u32 mtr)
int ranks = (1 << RANK_CNT_BITS(mtr));
int max = 4;
if (type == HASWELL || type == BROADWELL)
if (type == HASWELL || type == BROADWELL || type == KNIGHTS_LANDING)
max = 8;
if (ranks > max) {
......@@ -748,6 +832,47 @@ static u32 dram_attr(u32 reg)
return GET_BITFIELD(reg, 2, 3);
}
static u64 knl_sad_limit(u32 reg)
{
return (GET_BITFIELD(reg, 7, 26) << 26) | 0x3ffffff;
}
static u32 knl_interleave_mode(u32 reg)
{
return GET_BITFIELD(reg, 1, 2);
}
static char *knl_show_interleave_mode(u32 reg)
{
char *s;
switch (knl_interleave_mode(reg)) {
case 0:
s = "use address bits [8:6]";
break;
case 1:
s = "use address bits [10:8]";
break;
case 2:
s = "use address bits [14:12]";
break;
case 3:
s = "use address bits [32:30]";
break;
default:
WARN_ON(1);
break;
}
return s;
}
static u32 dram_attr_knl(u32 reg)
{
return GET_BITFIELD(reg, 3, 4);
}
static enum mem_type get_memory_type(struct sbridge_pvt *pvt)
{
u32 reg;
......@@ -842,6 +967,12 @@ static enum dev_type broadwell_get_width(struct sbridge_pvt *pvt, u32 mtr)
return __ibridge_get_width(GET_BITFIELD(mtr, 8, 9));
}
static enum mem_type knl_get_memory_type(struct sbridge_pvt *pvt)
{
/* DDR4 RDIMMS and LRDIMMS are supported */
return MEM_RDDR4;
}
static u8 get_node_id(struct sbridge_pvt *pvt)
{
u32 reg;
......@@ -857,6 +988,15 @@ static u8 haswell_get_node_id(struct sbridge_pvt *pvt)
return GET_BITFIELD(reg, 0, 3);
}
static u8 knl_get_node_id(struct sbridge_pvt *pvt)
{
u32 reg;
pci_read_config_dword(pvt->pci_sad1, SAD_CONTROL, &reg);
return GET_BITFIELD(reg, 0, 2);
}
static u64 haswell_get_tolm(struct sbridge_pvt *pvt)
{
u32 reg;
......@@ -878,6 +1018,26 @@ static u64 haswell_get_tohm(struct sbridge_pvt *pvt)
return rc | 0x1ffffff;
}
static u64 knl_get_tolm(struct sbridge_pvt *pvt)
{
u32 reg;
pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOLM, &reg);
return (GET_BITFIELD(reg, 26, 31) << 26) | 0x3ffffff;
}
static u64 knl_get_tohm(struct sbridge_pvt *pvt)
{
u64 rc;
u32 reg_lo, reg_hi;
pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOHM_0, &reg_lo);
pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOHM_1, &reg_hi);
rc = ((u64)reg_hi << 32) | reg_lo;
return rc | 0x3ffffff;
}
static u64 haswell_rir_limit(u32 reg)
{
return (((u64)GET_BITFIELD(reg, 1, 11) + 1) << 29) - 1;
......@@ -935,11 +1095,22 @@ static int check_if_ecc_is_active(const u8 bus, enum type type)
case BROADWELL:
id = PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA;
break;
case KNIGHTS_LANDING:
/*
* KNL doesn't group things by bus the same way
* SB/IB/Haswell does.
*/
id = PCI_DEVICE_ID_INTEL_KNL_IMC_TA;
break;
default:
return -ENODEV;
}
pdev = get_pdev_same_bus(bus, id);
if (type != KNIGHTS_LANDING)
pdev = get_pdev_same_bus(bus, id);
else
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, 0);
if (!pdev) {
sbridge_printk(KERN_ERR, "Couldn't find PCI device "
"%04x:%04x! on bus %02d\n",
......@@ -947,7 +1118,8 @@ static int check_if_ecc_is_active(const u8 bus, enum type type)
return -ENODEV;
}
pci_read_config_dword(pdev, MCMTR, &mcmtr);
pci_read_config_dword(pdev,
type == KNIGHTS_LANDING ? KNL_MCMTR : MCMTR, &mcmtr);
if (!IS_ECC_ENABLED(mcmtr)) {
sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
return -ENODEV;
......@@ -955,6 +1127,476 @@ static int check_if_ecc_is_active(const u8 bus, enum type type)
return 0;
}
/* Low bits of TAD limit, and some metadata. */
static const u32 knl_tad_dram_limit_lo[] = {
0x400, 0x500, 0x600, 0x700,
0x800, 0x900, 0xa00, 0xb00,
};
/* Low bits of TAD offset. */
static const u32 knl_tad_dram_offset_lo[] = {
0x404, 0x504, 0x604, 0x704,
0x804, 0x904, 0xa04, 0xb04,
};
/* High 16 bits of TAD limit and offset. */
static const u32 knl_tad_dram_hi[] = {
0x408, 0x508, 0x608, 0x708,
0x808, 0x908, 0xa08, 0xb08,
};
/* Number of ways a tad entry is interleaved. */
static const u32 knl_tad_ways[] = {
8, 6, 4, 3, 2, 1,
};
/*
* Retrieve the n'th Target Address Decode table entry
* from the memory controller's TAD table.
*
* @pvt: driver private data
* @entry: which entry you want to retrieve
* @mc: which memory controller (0 or 1)
* @offset: output tad range offset
* @limit: output address of first byte above tad range
* @ways: output number of interleave ways
*
* The offset value has curious semantics. It's a sort of running total
* of the sizes of all the memory regions that aren't mapped in this
* tad table.
*/
static int knl_get_tad(const struct sbridge_pvt *pvt,
const int entry,
const int mc,
u64 *offset,
u64 *limit,
int *ways)
{
u32 reg_limit_lo, reg_offset_lo, reg_hi;
struct pci_dev *pci_mc;
int way_id;
switch (mc) {
case 0:
pci_mc = pvt->knl.pci_mc0;
break;
case 1:
pci_mc = pvt->knl.pci_mc1;
break;
default:
WARN_ON(1);
return -EINVAL;
}
pci_read_config_dword(pci_mc,
knl_tad_dram_limit_lo[entry], &reg_limit_lo);
pci_read_config_dword(pci_mc,
knl_tad_dram_offset_lo[entry], &reg_offset_lo);
pci_read_config_dword(pci_mc,
knl_tad_dram_hi[entry], &reg_hi);
/* Is this TAD entry enabled? */
if (!GET_BITFIELD(reg_limit_lo, 0, 0))
return -ENODEV;
way_id = GET_BITFIELD(reg_limit_lo, 3, 5);
if (way_id < ARRAY_SIZE(knl_tad_ways)) {
*ways = knl_tad_ways[way_id];
} else {
*ways = 0;
sbridge_printk(KERN_ERR,
"Unexpected value %d in mc_tad_limit_lo wayness field\n",
way_id);
return -ENODEV;
}
/*
* The least significant 6 bits of base and limit are truncated.
* For limit, we fill the missing bits with 1s.
*/
*offset = ((u64) GET_BITFIELD(reg_offset_lo, 6, 31) << 6) |
((u64) GET_BITFIELD(reg_hi, 0, 15) << 32);
*limit = ((u64) GET_BITFIELD(reg_limit_lo, 6, 31) << 6) | 63 |
((u64) GET_BITFIELD(reg_hi, 16, 31) << 32);
return 0;
}
/* Determine which memory controller is responsible for a given channel. */
static int knl_channel_mc(int channel)
{
WARN_ON(channel < 0 || channel >= 6);
return channel < 3 ? 1 : 0;
}
/*
* Get the Nth entry from EDC_ROUTE_TABLE register.
* (This is the per-tile mapping of logical interleave targets to
* physical EDC modules.)
*
* entry 0: 0:2
* 1: 3:5
* 2: 6:8
* 3: 9:11
* 4: 12:14
* 5: 15:17
* 6: 18:20
* 7: 21:23
* reserved: 24:31
*/
static u32 knl_get_edc_route(int entry, u32 reg)
{
WARN_ON(entry >= KNL_MAX_EDCS);
return GET_BITFIELD(reg, entry*3, (entry*3)+2);
}
/*
* Get the Nth entry from MC_ROUTE_TABLE register.
* (This is the per-tile mapping of logical interleave targets to
* physical DRAM channels modules.)
*
* entry 0: mc 0:2 channel 18:19
* 1: mc 3:5 channel 20:21
* 2: mc 6:8 channel 22:23
* 3: mc 9:11 channel 24:25
* 4: mc 12:14 channel 26:27
* 5: mc 15:17 channel 28:29
* reserved: 30:31
*
* Though we have 3 bits to identify the MC, we should only see
* the values 0 or 1.
*/
static u32 knl_get_mc_route(int entry, u32 reg)
{
int mc, chan;
WARN_ON(entry >= KNL_MAX_CHANNELS);
mc = GET_BITFIELD(reg, entry*3, (entry*3)+2);
chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1);
return knl_channel_remap(mc*3 + chan);
}
/*
* Render the EDC_ROUTE register in human-readable form.
* Output string s should be at least KNL_MAX_EDCS*2 bytes.
*/
static void knl_show_edc_route(u32 reg, char *s)
{
int i;
for (i = 0; i < KNL_MAX_EDCS; i++) {
s[i*2] = knl_get_edc_route(i, reg) + '0';
s[i*2+1] = '-';
}
s[KNL_MAX_EDCS*2 - 1] = '\0';
}
/*
* Render the MC_ROUTE register in human-readable form.
* Output string s should be at least KNL_MAX_CHANNELS*2 bytes.
*/
static void knl_show_mc_route(u32 reg, char *s)
{
int i;
for (i = 0; i < KNL_MAX_CHANNELS; i++) {
s[i*2] = knl_get_mc_route(i, reg) + '0';
s[i*2+1] = '-';
}
s[KNL_MAX_CHANNELS*2 - 1] = '\0';
}
#define KNL_EDC_ROUTE 0xb8
#define KNL_MC_ROUTE 0xb4
/* Is this dram rule backed by regular DRAM in flat mode? */
#define KNL_EDRAM(reg) GET_BITFIELD(reg, 29, 29)
/* Is this dram rule cached? */
#define KNL_CACHEABLE(reg) GET_BITFIELD(reg, 28, 28)
/* Is this rule backed by edc ? */
#define KNL_EDRAM_ONLY(reg) GET_BITFIELD(reg, 29, 29)
/* Is this rule backed by DRAM, cacheable in EDRAM? */
#define KNL_CACHEABLE(reg) GET_BITFIELD(reg, 28, 28)
/* Is this rule mod3? */
#define KNL_MOD3(reg) GET_BITFIELD(reg, 27, 27)
/*
* Figure out how big our RAM modules are.
*
* The DIMMMTR register in KNL doesn't tell us the size of the DIMMs, so we
* have to figure this out from the SAD rules, interleave lists, route tables,
* and TAD rules.
*
* SAD rules can have holes in them (e.g. the 3G-4G hole), so we have to
* inspect the TAD rules to figure out how large the SAD regions really are.
*
* When we know the real size of a SAD region and how many ways it's
* interleaved, we know the individual contribution of each channel to
* TAD is size/ways.
*
* Finally, we have to check whether each channel participates in each SAD
* region.
*
* Fortunately, KNL only supports one DIMM per channel, so once we know how
* much memory the channel uses, we know the DIMM is at least that large.
* (The BIOS might possibly choose not to map all available memory, in which
* case we will underreport the size of the DIMM.)
*
* In theory, we could try to determine the EDC sizes as well, but that would
* only work in flat mode, not in cache mode.
*
* @mc_sizes: Output sizes of channels (must have space for KNL_MAX_CHANNELS
* elements)
*/
static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes)
{
u64 sad_base, sad_size, sad_limit = 0;
u64 tad_base, tad_size, tad_limit, tad_deadspace, tad_livespace;
int sad_rule = 0;
int tad_rule = 0;
int intrlv_ways, tad_ways;
u32 first_pkg, pkg;
int i;
u64 sad_actual_size[2]; /* sad size accounting for holes, per mc */
u32 dram_rule, interleave_reg;
u32 mc_route_reg[KNL_MAX_CHAS];
u32 edc_route_reg[KNL_MAX_CHAS];
int edram_only;
char edc_route_string[KNL_MAX_EDCS*2];
char mc_route_string[KNL_MAX_CHANNELS*2];
int cur_reg_start;
int mc;
int channel;
int way;
int participants[KNL_MAX_CHANNELS];
int participant_count = 0;
for (i = 0; i < KNL_MAX_CHANNELS; i++)
mc_sizes[i] = 0;
/* Read the EDC route table in each CHA. */
cur_reg_start = 0;
for (i = 0; i < KNL_MAX_CHAS; i++) {
pci_read_config_dword(pvt->knl.pci_cha[i],
KNL_EDC_ROUTE, &edc_route_reg[i]);
if (i > 0 && edc_route_reg[i] != edc_route_reg[i-1]) {
knl_show_edc_route(edc_route_reg[i-1],
edc_route_string);
if (cur_reg_start == i-1)
edac_dbg(0, "edc route table for CHA %d: %s\n",
cur_reg_start, edc_route_string);
else
edac_dbg(0, "edc route table for CHA %d-%d: %s\n",
cur_reg_start, i-1, edc_route_string);
cur_reg_start = i;
}
}
knl_show_edc_route(edc_route_reg[i-1], edc_route_string);
if (cur_reg_start == i-1)
edac_dbg(0, "edc route table for CHA %d: %s\n",
cur_reg_start, edc_route_string);
else
edac_dbg(0, "edc route table for CHA %d-%d: %s\n",
cur_reg_start, i-1, edc_route_string);
/* Read the MC route table in each CHA. */
cur_reg_start = 0;
for (i = 0; i < KNL_MAX_CHAS; i++) {
pci_read_config_dword(pvt->knl.pci_cha[i],
KNL_MC_ROUTE, &mc_route_reg[i]);
if (i > 0 && mc_route_reg[i] != mc_route_reg[i-1]) {
knl_show_mc_route(mc_route_reg[i-1], mc_route_string);
if (cur_reg_start == i-1)
edac_dbg(0, "mc route table for CHA %d: %s\n",
cur_reg_start, mc_route_string);
else
edac_dbg(0, "mc route table for CHA %d-%d: %s\n",
cur_reg_start, i-1, mc_route_string);
cur_reg_start = i;
}
}
knl_show_mc_route(mc_route_reg[i-1], mc_route_string);
if (cur_reg_start == i-1)
edac_dbg(0, "mc route table for CHA %d: %s\n",
cur_reg_start, mc_route_string);
else
edac_dbg(0, "mc route table for CHA %d-%d: %s\n",
cur_reg_start, i-1, mc_route_string);
/* Process DRAM rules */
for (sad_rule = 0; sad_rule < pvt->info.max_sad; sad_rule++) {
/* previous limit becomes the new base */
sad_base = sad_limit;
pci_read_config_dword(pvt->pci_sad0,
pvt->info.dram_rule[sad_rule], &dram_rule);
if (!DRAM_RULE_ENABLE(dram_rule))
break;
edram_only = KNL_EDRAM_ONLY(dram_rule);
sad_limit = pvt->info.sad_limit(dram_rule)+1;
sad_size = sad_limit - sad_base;
pci_read_config_dword(pvt->pci_sad0,
pvt->info.interleave_list[sad_rule], &interleave_reg);
/*
* Find out how many ways this dram rule is interleaved.
* We stop when we see the first channel again.
*/
first_pkg = sad_pkg(pvt->info.interleave_pkg,
interleave_reg, 0);
for (intrlv_ways = 1; intrlv_ways < 8; intrlv_ways++) {
pkg = sad_pkg(pvt->info.interleave_pkg,
interleave_reg, intrlv_ways);
if ((pkg & 0x8) == 0) {
/*
* 0 bit means memory is non-local,
* which KNL doesn't support
*/
edac_dbg(0, "Unexpected interleave target %d\n",
pkg);
return -1;
}
if (pkg == first_pkg)
break;
}
if (KNL_MOD3(dram_rule))
intrlv_ways *= 3;
edac_dbg(3, "dram rule %d (base 0x%llx, limit 0x%llx), %d way interleave%s\n",
sad_rule,
sad_base,
sad_limit,
intrlv_ways,
edram_only ? ", EDRAM" : "");
/*
* Find out how big the SAD region really is by iterating
* over TAD tables (SAD regions may contain holes).
* Each memory controller might have a different TAD table, so
* we have to look at both.
*
* Livespace is the memory that's mapped in this TAD table,
* deadspace is the holes (this could be the MMIO hole, or it
* could be memory that's mapped by the other TAD table but
* not this one).
*/
for (mc = 0; mc < 2; mc++) {
sad_actual_size[mc] = 0;
tad_livespace = 0;
for (tad_rule = 0;
tad_rule < ARRAY_SIZE(
knl_tad_dram_limit_lo);
tad_rule++) {
if (knl_get_tad(pvt,
tad_rule,
mc,
&tad_deadspace,
&tad_limit,
&tad_ways))
break;
tad_size = (tad_limit+1) -
(tad_livespace + tad_deadspace);
tad_livespace += tad_size;
tad_base = (tad_limit+1) - tad_size;
if (tad_base < sad_base) {
if (tad_limit > sad_base)
edac_dbg(0, "TAD region overlaps lower SAD boundary -- TAD tables may be configured incorrectly.\n");
} else if (tad_base < sad_limit) {
if (tad_limit+1 > sad_limit) {
edac_dbg(0, "TAD region overlaps upper SAD boundary -- TAD tables may be configured incorrectly.\n");
} else {
/* TAD region is completely inside SAD region */
edac_dbg(3, "TAD region %d 0x%llx - 0x%llx (%lld bytes) table%d\n",
tad_rule, tad_base,
tad_limit, tad_size,
mc);
sad_actual_size[mc] += tad_size;
}
}
tad_base = tad_limit+1;
}
}
for (mc = 0; mc < 2; mc++) {
edac_dbg(3, " total TAD DRAM footprint in table%d : 0x%llx (%lld bytes)\n",
mc, sad_actual_size[mc], sad_actual_size[mc]);
}
/* Ignore EDRAM rule */
if (edram_only)
continue;
/* Figure out which channels participate in interleave. */
for (channel = 0; channel < KNL_MAX_CHANNELS; channel++)
participants[channel] = 0;
/* For each channel, does at least one CHA have
* this channel mapped to the given target?
*/
for (channel = 0; channel < KNL_MAX_CHANNELS; channel++) {
for (way = 0; way < intrlv_ways; way++) {
int target;
int cha;
if (KNL_MOD3(dram_rule))
target = way;
else
target = 0x7 & sad_pkg(
pvt->info.interleave_pkg, interleave_reg, way);
for (cha = 0; cha < KNL_MAX_CHAS; cha++) {
if (knl_get_mc_route(target,
mc_route_reg[cha]) == channel
&& participants[channel]) {
participant_count++;
participants[channel] = 1;
break;
}
}
}
}
if (participant_count != intrlv_ways)
edac_dbg(0, "participant_count (%d) != interleave_ways (%d): DIMM size may be incorrect\n",
participant_count, intrlv_ways);
for (channel = 0; channel < KNL_MAX_CHANNELS; channel++) {
mc = knl_channel_mc(channel);
if (participants[channel]) {
edac_dbg(4, "mc channel %d contributes %lld bytes via sad entry %d\n",
channel,
sad_actual_size[mc]/intrlv_ways,
sad_rule);
mc_sizes[channel] +=
sad_actual_size[mc]/intrlv_ways;
}
}
}
return 0;
}
static int get_dimm_config(struct mem_ctl_info *mci)
{
struct sbridge_pvt *pvt = mci->pvt_info;
......@@ -964,13 +1606,20 @@ static int get_dimm_config(struct mem_ctl_info *mci)
u32 reg;
enum edac_type mode;
enum mem_type mtype;
int channels = pvt->info.type == KNIGHTS_LANDING ?
KNL_MAX_CHANNELS : NUM_CHANNELS;
u64 knl_mc_sizes[KNL_MAX_CHANNELS];
if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL)
if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL ||
pvt->info.type == KNIGHTS_LANDING)
pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, &reg);
else
pci_read_config_dword(pvt->pci_br0, SAD_TARGET, &reg);
pvt->sbridge_dev->source_id = SOURCE_ID(reg);
if (pvt->info.type == KNIGHTS_LANDING)
pvt->sbridge_dev->source_id = SOURCE_ID_KNL(reg);
else
pvt->sbridge_dev->source_id = SOURCE_ID(reg);
pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
......@@ -978,31 +1627,42 @@ static int get_dimm_config(struct mem_ctl_info *mci)
pvt->sbridge_dev->node_id,
pvt->sbridge_dev->source_id);
pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
if (IS_MIRROR_ENABLED(reg)) {
edac_dbg(0, "Memory mirror is enabled\n");
pvt->is_mirrored = true;
} else {
edac_dbg(0, "Memory mirror is disabled\n");
/* KNL doesn't support mirroring or lockstep,
* and is always closed page
*/
if (pvt->info.type == KNIGHTS_LANDING) {
mode = EDAC_S4ECD4ED;
pvt->is_mirrored = false;
}
pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
edac_dbg(0, "Lockstep is enabled\n");
mode = EDAC_S8ECD8ED;
pvt->is_lockstep = true;
if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
return -1;
} else {
edac_dbg(0, "Lockstep is disabled\n");
mode = EDAC_S4ECD4ED;
pvt->is_lockstep = false;
}
if (IS_CLOSE_PG(pvt->info.mcmtr)) {
edac_dbg(0, "address map is on closed page mode\n");
pvt->is_close_pg = true;
} else {
edac_dbg(0, "address map is on open page mode\n");
pvt->is_close_pg = false;
pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
if (IS_MIRROR_ENABLED(reg)) {
edac_dbg(0, "Memory mirror is enabled\n");
pvt->is_mirrored = true;
} else {
edac_dbg(0, "Memory mirror is disabled\n");
pvt->is_mirrored = false;
}
pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
edac_dbg(0, "Lockstep is enabled\n");
mode = EDAC_S8ECD8ED;
pvt->is_lockstep = true;
} else {
edac_dbg(0, "Lockstep is disabled\n");
mode = EDAC_S4ECD4ED;
pvt->is_lockstep = false;
}
if (IS_CLOSE_PG(pvt->info.mcmtr)) {
edac_dbg(0, "address map is on closed page mode\n");
pvt->is_close_pg = true;
} else {
edac_dbg(0, "address map is on open page mode\n");
pvt->is_close_pg = false;
}
}
mtype = pvt->info.get_memory_type(pvt);
......@@ -1018,23 +1678,46 @@ static int get_dimm_config(struct mem_ctl_info *mci)
else
banks = 8;
for (i = 0; i < NUM_CHANNELS; i++) {
for (i = 0; i < channels; i++) {
u32 mtr;
if (!pvt->pci_tad[i])
continue;
for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
int max_dimms_per_channel;
if (pvt->info.type == KNIGHTS_LANDING) {
max_dimms_per_channel = 1;
if (!pvt->knl.pci_channel[i])
continue;
} else {
max_dimms_per_channel = ARRAY_SIZE(mtr_regs);
if (!pvt->pci_tad[i])
continue;
}
for (j = 0; j < max_dimms_per_channel; j++) {
dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
i, j, 0);
pci_read_config_dword(pvt->pci_tad[i],
mtr_regs[j], &mtr);
if (pvt->info.type == KNIGHTS_LANDING) {
pci_read_config_dword(pvt->knl.pci_channel[i],
knl_mtr_reg, &mtr);
} else {
pci_read_config_dword(pvt->pci_tad[i],
mtr_regs[j], &mtr);
}
edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
if (IS_DIMM_PRESENT(mtr)) {
pvt->channel[i].dimms++;
ranks = numrank(pvt->info.type, mtr);
rows = numrow(mtr);
cols = numcol(mtr);
if (pvt->info.type == KNIGHTS_LANDING) {
/* For DDR4, this is fixed. */
cols = 1 << 10;
rows = knl_mc_sizes[i] /
((u64) cols * ranks * banks * 8);
} else {
rows = numrow(mtr);
cols = numcol(mtr);
}
size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
......@@ -1131,6 +1814,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
}
}
if (pvt->info.type == KNIGHTS_LANDING)
return;
/*
* Step 3) Get TAD range
*/
......@@ -1727,6 +2413,8 @@ static int sbridge_get_all_devices_full(u8 *num_mc,
#define sbridge_get_all_devices(num_mc, table) \
sbridge_get_all_devices_full(num_mc, table, 0, 0)
#define sbridge_get_all_devices_knl(num_mc, table) \
sbridge_get_all_devices_full(num_mc, table, 1, 1)
static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
struct sbridge_dev *sbridge_dev)
......@@ -2083,6 +2771,131 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
return -ENODEV;
}
static int knl_mci_bind_devs(struct mem_ctl_info *mci,
struct sbridge_dev *sbridge_dev)
{
struct sbridge_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev;
int dev, func;
int i;
int devidx;
for (i = 0; i < sbridge_dev->n_devs; i++) {
pdev = sbridge_dev->pdev[i];
if (!pdev)
continue;
/* Extract PCI device and function. */
dev = (pdev->devfn >> 3) & 0x1f;
func = pdev->devfn & 0x7;
switch (pdev->device) {
case PCI_DEVICE_ID_INTEL_KNL_IMC_MC:
if (dev == 8)
pvt->knl.pci_mc0 = pdev;
else if (dev == 9)
pvt->knl.pci_mc1 = pdev;
else {
sbridge_printk(KERN_ERR,
"Memory controller in unexpected place! (dev %d, fn %d)\n",
dev, func);
continue;
}
break;
case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0:
pvt->pci_sad0 = pdev;
break;
case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1:
pvt->pci_sad1 = pdev;
break;
case PCI_DEVICE_ID_INTEL_KNL_IMC_CHA:
/* There are one of these per tile, and range from
* 1.14.0 to 1.18.5.
*/
devidx = ((dev-14)*8)+func;
if (devidx < 0 || devidx >= KNL_MAX_CHAS) {
sbridge_printk(KERN_ERR,
"Caching and Home Agent in unexpected place! (dev %d, fn %d)\n",
dev, func);
continue;
}
WARN_ON(pvt->knl.pci_cha[devidx] != NULL);
pvt->knl.pci_cha[devidx] = pdev;
break;
case PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL:
devidx = -1;
/*
* MC0 channels 0-2 are device 9 function 2-4,
* MC1 channels 3-5 are device 8 function 2-4.
*/
if (dev == 9)
devidx = func-2;
else if (dev == 8)
devidx = 3 + (func-2);
if (devidx < 0 || devidx >= KNL_MAX_CHANNELS) {
sbridge_printk(KERN_ERR,
"DRAM Channel Registers in unexpected place! (dev %d, fn %d)\n",
dev, func);
continue;
}
WARN_ON(pvt->knl.pci_channel[devidx] != NULL);
pvt->knl.pci_channel[devidx] = pdev;
break;
case PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM:
pvt->knl.pci_mc_info = pdev;
break;
case PCI_DEVICE_ID_INTEL_KNL_IMC_TA:
pvt->pci_ta = pdev;
break;
default:
sbridge_printk(KERN_ERR, "Unexpected device %d\n",
pdev->device);
break;
}
}
if (!pvt->knl.pci_mc0 || !pvt->knl.pci_mc1 ||
!pvt->pci_sad0 || !pvt->pci_sad1 ||
!pvt->pci_ta) {
goto enodev;
}
for (i = 0; i < KNL_MAX_CHANNELS; i++) {
if (!pvt->knl.pci_channel[i]) {
sbridge_printk(KERN_ERR, "Missing channel %d\n", i);
goto enodev;
}
}
for (i = 0; i < KNL_MAX_CHAS; i++) {
if (!pvt->knl.pci_cha[i]) {
sbridge_printk(KERN_ERR, "Missing CHA %d\n", i);
goto enodev;
}
}
return 0;
enodev:
sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
return -ENODEV;
}
/****************************************************************************
Error check routines
****************************************************************************/
......@@ -2172,8 +2985,36 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
if (!GET_BITFIELD(m->status, 58, 58))
return;
rc = get_memory_error_data(mci, m->addr, &socket, &ha,
&channel_mask, &rank, &area_type, msg);
if (pvt->info.type == KNIGHTS_LANDING) {
if (channel == 14) {
edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable)
? " recoverable" : "",
mscod, errcode,
m->bank);
} else {
char A = *("A");
channel = knl_channel_remap(channel);
channel_mask = 1 << channel;
snprintf(msg, sizeof(msg),
"%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable)
? " recoverable" : " ",
mscod, errcode, channel, A + channel);
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
channel, 0, -1,
optype, msg);
}
return;
} else {
rc = get_memory_error_data(mci, m->addr, &socket, &ha,
&channel_mask, &rank, &area_type, msg);
}
if (rc < 0)
goto err_parsing;
new_mci = get_mci_for_node_id(socket);
......@@ -2404,10 +3245,11 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
/* allocate a new MC control structure */
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].size = type == KNIGHTS_LANDING ?
KNL_MAX_CHANNELS : NUM_CHANNELS;
layers[0].is_virt_csrow = false;
layers[1].type = EDAC_MC_LAYER_SLOT;
layers[1].size = MAX_DIMMS;
layers[1].size = type == KNIGHTS_LANDING ? 1 : MAX_DIMMS;
layers[1].is_virt_csrow = true;
mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
sizeof(*pvt));
......@@ -2425,7 +3267,8 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->sbridge_dev = sbridge_dev;
sbridge_dev->mci = mci;
mci->mtype_cap = MEM_FLAG_DDR3;
mci->mtype_cap = type == KNIGHTS_LANDING ?
MEM_FLAG_DDR4 : MEM_FLAG_DDR3;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = "sbridge_edac.c";
......@@ -2534,6 +3377,30 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
if (unlikely(rc < 0))
goto fail0;
break;
case KNIGHTS_LANDING:
/* pvt->info.rankcfgr == ??? */
pvt->info.get_tolm = knl_get_tolm;
pvt->info.get_tohm = knl_get_tohm;
pvt->info.dram_rule = knl_dram_rule;
pvt->info.get_memory_type = knl_get_memory_type;
pvt->info.get_node_id = knl_get_node_id;
pvt->info.rir_limit = NULL;
pvt->info.sad_limit = knl_sad_limit;
pvt->info.interleave_mode = knl_interleave_mode;
pvt->info.show_interleave_mode = knl_show_interleave_mode;
pvt->info.dram_attr = dram_attr_knl;
pvt->info.max_sad = ARRAY_SIZE(knl_dram_rule);
pvt->info.interleave_list = knl_interleave_list;
pvt->info.max_interleave = ARRAY_SIZE(knl_interleave_list);
pvt->info.interleave_pkg = ibridge_interleave_pkg;
pvt->info.get_width = ibridge_get_width;
mci->ctl_name = kasprintf(GFP_KERNEL,
"Knights Landing Socket#%d", mci->mc_idx);
rc = knl_mci_bind_devs(mci, sbridge_dev);
if (unlikely(rc < 0))
goto fail0;
break;
}
/* Get dimm basic config and the memory layout */
......@@ -2588,20 +3455,29 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
switch (pdev->device) {
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_ibridge_table);
rc = sbridge_get_all_devices(&num_mc,
pci_dev_descr_ibridge_table);
type = IVY_BRIDGE;
break;
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_sbridge_table);
rc = sbridge_get_all_devices(&num_mc,
pci_dev_descr_sbridge_table);
type = SANDY_BRIDGE;
break;
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_haswell_table);
rc = sbridge_get_all_devices(&num_mc,
pci_dev_descr_haswell_table);
type = HASWELL;
break;
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_broadwell_table);
rc = sbridge_get_all_devices(&num_mc,
pci_dev_descr_broadwell_table);
type = BROADWELL;
break;
case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0:
rc = sbridge_get_all_devices_knl(&num_mc,
pci_dev_descr_knl_table);
type = KNIGHTS_LANDING;
break;
}
if (unlikely(rc < 0)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment