Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
0f959e19
Commit
0f959e19
authored
Jun 22, 2020
by
Borislav Petkov
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'edac-ghes' into edac-for-next
parents
17ed808a
e370f886
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
200 additions
and
152 deletions
+200
-152
drivers/edac/ghes_edac.c
drivers/edac/ghes_edac.c
+193
-130
include/linux/edac.h
include/linux/edac.h
+7
-22
No files found.
drivers/edac/ghes_edac.c
View file @
0f959e19
...
@@ -15,9 +15,7 @@
...
@@ -15,9 +15,7 @@
#include "edac_module.h"
#include "edac_module.h"
#include <ras/ras_event.h>
#include <ras/ras_event.h>
struct
ghes_edac_pvt
{
struct
ghes_pvt
{
struct
list_head
list
;
struct
ghes
*
ghes
;
struct
mem_ctl_info
*
mci
;
struct
mem_ctl_info
*
mci
;
/* Buffers for the error handling routine */
/* Buffers for the error handling routine */
...
@@ -32,7 +30,16 @@ static refcount_t ghes_refcount = REFCOUNT_INIT(0);
...
@@ -32,7 +30,16 @@ static refcount_t ghes_refcount = REFCOUNT_INIT(0);
* also provides the necessary (implicit) memory barrier for the SMP
* also provides the necessary (implicit) memory barrier for the SMP
* case to make the pointer visible on another CPU.
* case to make the pointer visible on another CPU.
*/
*/
static
struct
ghes_edac_pvt
*
ghes_pvt
;
static
struct
ghes_pvt
*
ghes_pvt
;
/*
* This driver's representation of the system hardware, as collected
* from DMI.
*/
struct
ghes_hw_desc
{
int
num_dimms
;
struct
dimm_info
*
dimms
;
}
ghes_hw
;
/* GHES registration mutex */
/* GHES registration mutex */
static
DEFINE_MUTEX
(
ghes_reg_mutex
);
static
DEFINE_MUTEX
(
ghes_reg_mutex
);
...
@@ -74,44 +81,35 @@ struct memdev_dmi_entry {
...
@@ -74,44 +81,35 @@ struct memdev_dmi_entry {
u16
conf_mem_clk_speed
;
u16
conf_mem_clk_speed
;
}
__attribute__
((
__packed__
));
}
__attribute__
((
__packed__
));
struct
ghes_edac_dimm_fill
{
static
struct
dimm_info
*
find_dimm_by_handle
(
struct
mem_ctl_info
*
mci
,
u16
handle
)
struct
mem_ctl_info
*
mci
;
unsigned
int
count
;
};
static
void
ghes_edac_count_dimms
(
const
struct
dmi_header
*
dh
,
void
*
arg
)
{
int
*
num_dimm
=
arg
;
if
(
dh
->
type
==
DMI_ENTRY_MEM_DEVICE
)
(
*
num_dimm
)
++
;
}
static
int
get_dimm_smbios_index
(
struct
mem_ctl_info
*
mci
,
u16
handle
)
{
{
struct
dimm_info
*
dimm
;
struct
dimm_info
*
dimm
;
mci_for_each_dimm
(
mci
,
dimm
)
{
mci_for_each_dimm
(
mci
,
dimm
)
{
if
(
dimm
->
smbios_handle
==
handle
)
if
(
dimm
->
smbios_handle
==
handle
)
return
dimm
->
idx
;
return
dimm
;
}
}
return
-
1
;
return
NULL
;
}
}
static
void
ghes_edac_dmidecode
(
const
struct
dmi_header
*
dh
,
void
*
arg
)
static
void
dimm_setup_label
(
struct
dimm_info
*
dimm
,
u16
handle
)
{
{
struct
ghes_edac_dimm_fill
*
dimm_fill
=
arg
;
const
char
*
bank
=
NULL
,
*
device
=
NULL
;
struct
mem_ctl_info
*
mci
=
dimm_fill
->
mci
;
if
(
dh
->
type
==
DMI_ENTRY_MEM_DEVICE
)
{
dmi_memdev_name
(
handle
,
&
bank
,
&
device
);
struct
memdev_dmi_entry
*
entry
=
(
struct
memdev_dmi_entry
*
)
dh
;
struct
dimm_info
*
dimm
=
edac_get_dimm
(
mci
,
dimm_fill
->
count
,
0
,
0
);
/* both strings must be non-zero */
if
(
bank
&&
*
bank
&&
device
&&
*
device
)
snprintf
(
dimm
->
label
,
sizeof
(
dimm
->
label
),
"%s %s"
,
bank
,
device
);
}
static
void
assign_dmi_dimm_info
(
struct
dimm_info
*
dimm
,
struct
memdev_dmi_entry
*
entry
)
{
u16
rdr_mask
=
BIT
(
7
)
|
BIT
(
13
);
u16
rdr_mask
=
BIT
(
7
)
|
BIT
(
13
);
if
(
entry
->
size
==
0xffff
)
{
if
(
entry
->
size
==
0xffff
)
{
pr_info
(
"Can't get DIMM%i size
\n
"
,
pr_info
(
"Can't get DIMM%i size
\n
"
,
dimm
->
idx
);
dimm_fill
->
count
);
dimm
->
nr_pages
=
MiB_TO_PAGES
(
32
);
/* Unknown */
dimm
->
nr_pages
=
MiB_TO_PAGES
(
32
);
/* Unknown */
}
else
if
(
entry
->
size
==
0x7fff
)
{
}
else
if
(
entry
->
size
==
0x7fff
)
{
dimm
->
nr_pages
=
MiB_TO_PAGES
(
entry
->
extended_size
);
dimm
->
nr_pages
=
MiB_TO_PAGES
(
entry
->
extended_size
);
...
@@ -179,13 +177,11 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
...
@@ -179,13 +177,11 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
dimm
->
dtype
=
DEV_UNKNOWN
;
dimm
->
dtype
=
DEV_UNKNOWN
;
dimm
->
grain
=
128
;
/* Likely, worse case */
dimm
->
grain
=
128
;
/* Likely, worse case */
/*
dimm_setup_label
(
dimm
,
entry
->
handle
);
* FIXME: It shouldn't be hard to also fill the DIMM labels
*/
if
(
dimm
->
nr_pages
)
{
if
(
dimm
->
nr_pages
)
{
edac_dbg
(
1
,
"DIMM%i: %s size = %d MB%s
\n
"
,
edac_dbg
(
1
,
"DIMM%i: %s size = %d MB%s
\n
"
,
dimm_fill
->
count
,
edac_mem_types
[
dimm
->
mtype
],
dimm
->
idx
,
edac_mem_types
[
dimm
->
mtype
],
PAGES_TO_MiB
(
dimm
->
nr_pages
),
PAGES_TO_MiB
(
dimm
->
nr_pages
),
(
dimm
->
edac_mode
!=
EDAC_NONE
)
?
"(ECC)"
:
""
);
(
dimm
->
edac_mode
!=
EDAC_NONE
)
?
"(ECC)"
:
""
);
edac_dbg
(
2
,
"
\t
type %d, detail 0x%02x, width %d(total %d)
\n
"
,
edac_dbg
(
2
,
"
\t
type %d, detail 0x%02x, width %d(total %d)
\n
"
,
...
@@ -194,16 +190,56 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
...
@@ -194,16 +190,56 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
}
}
dimm
->
smbios_handle
=
entry
->
handle
;
dimm
->
smbios_handle
=
entry
->
handle
;
}
dimm_fill
->
count
++
;
static
void
enumerate_dimms
(
const
struct
dmi_header
*
dh
,
void
*
arg
)
{
struct
memdev_dmi_entry
*
entry
=
(
struct
memdev_dmi_entry
*
)
dh
;
struct
ghes_hw_desc
*
hw
=
(
struct
ghes_hw_desc
*
)
arg
;
struct
dimm_info
*
d
;
if
(
dh
->
type
!=
DMI_ENTRY_MEM_DEVICE
)
return
;
/* Enlarge the array with additional 16 */
if
(
!
hw
->
num_dimms
||
!
(
hw
->
num_dimms
%
16
))
{
struct
dimm_info
*
new
;
new
=
krealloc
(
hw
->
dimms
,
(
hw
->
num_dimms
+
16
)
*
sizeof
(
struct
dimm_info
),
GFP_KERNEL
);
if
(
!
new
)
{
WARN_ON_ONCE
(
1
);
return
;
}
}
hw
->
dimms
=
new
;
}
d
=
&
hw
->
dimms
[
hw
->
num_dimms
];
d
->
idx
=
hw
->
num_dimms
;
assign_dmi_dimm_info
(
d
,
entry
);
hw
->
num_dimms
++
;
}
static
void
ghes_scan_system
(
void
)
{
static
bool
scanned
;
if
(
scanned
)
return
;
dmi_walk
(
enumerate_dimms
,
&
ghes_hw
);
scanned
=
true
;
}
}
void
ghes_edac_report_mem_error
(
int
sev
,
struct
cper_sec_mem_err
*
mem_err
)
void
ghes_edac_report_mem_error
(
int
sev
,
struct
cper_sec_mem_err
*
mem_err
)
{
{
struct
edac_raw_error_desc
*
e
;
struct
edac_raw_error_desc
*
e
;
struct
mem_ctl_info
*
mci
;
struct
mem_ctl_info
*
mci
;
struct
ghes_
edac_
pvt
*
pvt
;
struct
ghes_pvt
*
pvt
;
unsigned
long
flags
;
unsigned
long
flags
;
char
*
p
;
char
*
p
;
...
@@ -228,7 +264,6 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
...
@@ -228,7 +264,6 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
memset
(
e
,
0
,
sizeof
(
*
e
));
memset
(
e
,
0
,
sizeof
(
*
e
));
e
->
error_count
=
1
;
e
->
error_count
=
1
;
e
->
grain
=
1
;
e
->
grain
=
1
;
strcpy
(
e
->
label
,
"unknown label"
);
e
->
msg
=
pvt
->
msg
;
e
->
msg
=
pvt
->
msg
;
e
->
other_detail
=
pvt
->
other_detail
;
e
->
other_detail
=
pvt
->
other_detail
;
e
->
top_layer
=
-
1
;
e
->
top_layer
=
-
1
;
...
@@ -345,7 +380,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
...
@@ -345,7 +380,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p
+=
sprintf
(
p
,
"bit_pos:%d "
,
mem_err
->
bit_pos
);
p
+=
sprintf
(
p
,
"bit_pos:%d "
,
mem_err
->
bit_pos
);
if
(
mem_err
->
validation_bits
&
CPER_MEM_VALID_MODULE_HANDLE
)
{
if
(
mem_err
->
validation_bits
&
CPER_MEM_VALID_MODULE_HANDLE
)
{
const
char
*
bank
=
NULL
,
*
device
=
NULL
;
const
char
*
bank
=
NULL
,
*
device
=
NULL
;
int
index
=
-
1
;
struct
dimm_info
*
dimm
;
dmi_memdev_name
(
mem_err
->
mem_dev_handle
,
&
bank
,
&
device
);
dmi_memdev_name
(
mem_err
->
mem_dev_handle
,
&
bank
,
&
device
);
if
(
bank
!=
NULL
&&
device
!=
NULL
)
if
(
bank
!=
NULL
&&
device
!=
NULL
)
...
@@ -354,13 +389,18 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
...
@@ -354,13 +389,18 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p
+=
sprintf
(
p
,
"DIMM DMI handle: 0x%.4x "
,
p
+=
sprintf
(
p
,
"DIMM DMI handle: 0x%.4x "
,
mem_err
->
mem_dev_handle
);
mem_err
->
mem_dev_handle
);
index
=
get_dimm_smbios_index
(
mci
,
mem_err
->
mem_dev_handle
);
dimm
=
find_dimm_by_handle
(
mci
,
mem_err
->
mem_dev_handle
);
if
(
index
>=
0
)
if
(
dimm
)
{
e
->
top_layer
=
index
;
e
->
top_layer
=
dimm
->
idx
;
strcpy
(
e
->
label
,
dimm
->
label
);
}
}
}
if
(
p
>
e
->
location
)
if
(
p
>
e
->
location
)
*
(
p
-
1
)
=
'\0'
;
*
(
p
-
1
)
=
'\0'
;
if
(
!*
e
->
label
)
strcpy
(
e
->
label
,
"unknown memory"
);
/* All other fields are mapped on e->other_detail */
/* All other fields are mapped on e->other_detail */
p
=
pvt
->
other_detail
;
p
=
pvt
->
other_detail
;
p
+=
snprintf
(
p
,
sizeof
(
pvt
->
other_detail
),
p
+=
snprintf
(
p
,
sizeof
(
pvt
->
other_detail
),
...
@@ -455,13 +495,12 @@ static struct acpi_platform_list plat_list[] = {
...
@@ -455,13 +495,12 @@ static struct acpi_platform_list plat_list[] = {
int
ghes_edac_register
(
struct
ghes
*
ghes
,
struct
device
*
dev
)
int
ghes_edac_register
(
struct
ghes
*
ghes
,
struct
device
*
dev
)
{
{
bool
fake
=
false
;
bool
fake
=
false
;
int
rc
=
0
,
num_dimm
=
0
;
struct
mem_ctl_info
*
mci
;
struct
mem_ctl_info
*
mci
;
struct
ghes_
edac_
pvt
*
pvt
;
struct
ghes_pvt
*
pvt
;
struct
edac_mc_layer
layers
[
1
];
struct
edac_mc_layer
layers
[
1
];
struct
ghes_edac_dimm_fill
dimm_fill
;
unsigned
long
flags
;
unsigned
long
flags
;
int
idx
=
-
1
;
int
idx
=
-
1
;
int
rc
=
0
;
if
(
IS_ENABLED
(
CONFIG_X86
))
{
if
(
IS_ENABLED
(
CONFIG_X86
))
{
/* Check if safe to enable on this system */
/* Check if safe to enable on this system */
...
@@ -481,20 +520,19 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
...
@@ -481,20 +520,19 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
if
(
refcount_inc_not_zero
(
&
ghes_refcount
))
if
(
refcount_inc_not_zero
(
&
ghes_refcount
))
goto
unlock
;
goto
unlock
;
/* Get the number of DIMMs */
ghes_scan_system
();
dmi_walk
(
ghes_edac_count_dimms
,
&
num_dimm
);
/* Check if we've got a bogus BIOS */
/* Check if we've got a bogus BIOS */
if
(
num_dimm
==
0
)
{
if
(
!
ghes_hw
.
num_dimms
)
{
fake
=
true
;
fake
=
true
;
num_dimm
=
1
;
ghes_hw
.
num_dimms
=
1
;
}
}
layers
[
0
].
type
=
EDAC_MC_LAYER_ALL_MEM
;
layers
[
0
].
type
=
EDAC_MC_LAYER_ALL_MEM
;
layers
[
0
].
size
=
num_dimm
;
layers
[
0
].
size
=
ghes_hw
.
num_dimms
;
layers
[
0
].
is_virt_csrow
=
true
;
layers
[
0
].
is_virt_csrow
=
true
;
mci
=
edac_mc_alloc
(
0
,
ARRAY_SIZE
(
layers
),
layers
,
sizeof
(
struct
ghes_
edac_
pvt
));
mci
=
edac_mc_alloc
(
0
,
ARRAY_SIZE
(
layers
),
layers
,
sizeof
(
struct
ghes_pvt
));
if
(
!
mci
)
{
if
(
!
mci
)
{
pr_info
(
"Can't allocate memory for EDAC data
\n
"
);
pr_info
(
"Can't allocate memory for EDAC data
\n
"
);
rc
=
-
ENOMEM
;
rc
=
-
ENOMEM
;
...
@@ -502,7 +540,6 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
...
@@ -502,7 +540,6 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
}
}
pvt
=
mci
->
pvt_info
;
pvt
=
mci
->
pvt_info
;
pvt
->
ghes
=
ghes
;
pvt
->
mci
=
mci
;
pvt
->
mci
=
mci
;
mci
->
pdev
=
dev
;
mci
->
pdev
=
dev
;
...
@@ -523,13 +560,34 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
...
@@ -523,13 +560,34 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
pr_info
(
"So, the end result of using this driver varies from vendor to vendor.
\n
"
);
pr_info
(
"So, the end result of using this driver varies from vendor to vendor.
\n
"
);
pr_info
(
"If you find incorrect reports, please contact your hardware vendor
\n
"
);
pr_info
(
"If you find incorrect reports, please contact your hardware vendor
\n
"
);
pr_info
(
"to correct its BIOS.
\n
"
);
pr_info
(
"to correct its BIOS.
\n
"
);
pr_info
(
"This system has %d DIMM sockets.
\n
"
,
num_dimm
);
pr_info
(
"This system has %d DIMM sockets.
\n
"
,
ghes_hw
.
num_dimms
);
}
}
if
(
!
fake
)
{
if
(
!
fake
)
{
dimm_fill
.
count
=
0
;
struct
dimm_info
*
src
,
*
dst
;
dimm_fill
.
mci
=
mci
;
int
i
=
0
;
dmi_walk
(
ghes_edac_dmidecode
,
&
dimm_fill
);
mci_for_each_dimm
(
mci
,
dst
)
{
src
=
&
ghes_hw
.
dimms
[
i
];
dst
->
idx
=
src
->
idx
;
dst
->
smbios_handle
=
src
->
smbios_handle
;
dst
->
nr_pages
=
src
->
nr_pages
;
dst
->
mtype
=
src
->
mtype
;
dst
->
edac_mode
=
src
->
edac_mode
;
dst
->
dtype
=
src
->
dtype
;
dst
->
grain
=
src
->
grain
;
/*
* If no src->label, preserve default label assigned
* from EDAC core.
*/
if
(
strlen
(
src
->
label
))
memcpy
(
dst
->
label
,
src
->
label
,
sizeof
(
src
->
label
));
i
++
;
}
}
else
{
}
else
{
struct
dimm_info
*
dimm
=
edac_get_dimm
(
mci
,
0
,
0
,
0
);
struct
dimm_info
*
dimm
=
edac_get_dimm
(
mci
,
0
,
0
,
0
);
...
@@ -542,7 +600,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
...
@@ -542,7 +600,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
rc
=
edac_mc_add_mc
(
mci
);
rc
=
edac_mc_add_mc
(
mci
);
if
(
rc
<
0
)
{
if
(
rc
<
0
)
{
pr_info
(
"Can't register
at
EDAC core
\n
"
);
pr_info
(
"Can't register
with the
EDAC core
\n
"
);
edac_mc_free
(
mci
);
edac_mc_free
(
mci
);
rc
=
-
ENODEV
;
rc
=
-
ENODEV
;
goto
unlock
;
goto
unlock
;
...
@@ -556,6 +614,11 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
...
@@ -556,6 +614,11 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
refcount_set
(
&
ghes_refcount
,
1
);
refcount_set
(
&
ghes_refcount
,
1
);
unlock:
unlock:
/* Not needed anymore */
kfree
(
ghes_hw
.
dimms
);
ghes_hw
.
dimms
=
NULL
;
mutex_unlock
(
&
ghes_reg_mutex
);
mutex_unlock
(
&
ghes_reg_mutex
);
return
rc
;
return
rc
;
...
...
include/linux/edac.h
View file @
0f959e19
...
@@ -594,27 +594,6 @@ struct mem_ctl_info {
...
@@ -594,27 +594,6 @@ struct mem_ctl_info {
? (mci)->dimms[(dimm)->idx + 1] \
? (mci)->dimms[(dimm)->idx + 1] \
: NULL)
: NULL)
/**
* edac_get_dimm_by_index - Get DIMM info at @index from a memory
* controller
*
* @mci: MC descriptor struct mem_ctl_info
* @index: index in the memory controller's DIMM array
*
* Returns a struct dimm_info * or NULL on failure.
*/
static
inline
struct
dimm_info
*
edac_get_dimm_by_index
(
struct
mem_ctl_info
*
mci
,
int
index
)
{
if
(
index
<
0
||
index
>=
mci
->
tot_dimms
)
return
NULL
;
if
(
WARN_ON_ONCE
(
mci
->
dimms
[
index
]
->
idx
!=
index
))
return
NULL
;
return
mci
->
dimms
[
index
];
}
/**
/**
* edac_get_dimm - Get DIMM info from a memory controller given by
* edac_get_dimm - Get DIMM info from a memory controller given by
* [layer0,layer1,layer2] position
* [layer0,layer1,layer2] position
...
@@ -650,6 +629,12 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci,
...
@@ -650,6 +629,12 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci,
if
(
mci
->
n_layers
>
2
)
if
(
mci
->
n_layers
>
2
)
index
=
index
*
mci
->
layers
[
2
].
size
+
layer2
;
index
=
index
*
mci
->
layers
[
2
].
size
+
layer2
;
return
edac_get_dimm_by_index
(
mci
,
index
);
if
(
index
<
0
||
index
>=
mci
->
tot_dimms
)
return
NULL
;
if
(
WARN_ON_ONCE
(
mci
->
dimms
[
index
]
->
idx
!=
index
))
return
NULL
;
return
mci
->
dimms
[
index
];
}
}
#endif
/* _LINUX_EDAC_H_ */
#endif
/* _LINUX_EDAC_H_ */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment