Commit b75d91f7 authored by Corey Minyard's avatar Corey Minyard Committed by Linus Torvalds

ipmi_watchdog: restore settings when BMC reset

If the BMC gets reset, it will return 0x80 response errors.

In less than a week
# grep "Error 80 on cmd 22" /var/log/kernel |wc -l
378681

In this case, it is probably a good idea to restore the IPMI settings.
Signed-off-by: default avatarCorey Minyard <cminyard@mvista.com>
Tested-by: default avatarArkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Reported-by: default avatarArkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ff05b6f7
...@@ -139,6 +139,8 @@ ...@@ -139,6 +139,8 @@
#define IPMI_WDOG_SET_TIMER 0x24 #define IPMI_WDOG_SET_TIMER 0x24
#define IPMI_WDOG_GET_TIMER 0x25 #define IPMI_WDOG_GET_TIMER 0x25
#define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80
/* These are here until the real ones get into the watchdog.h interface. */ /* These are here until the real ones get into the watchdog.h interface. */
#ifndef WDIOC_GETTIMEOUT #ifndef WDIOC_GETTIMEOUT
#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int) #define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int)
...@@ -596,6 +598,7 @@ static int ipmi_heartbeat(void) ...@@ -596,6 +598,7 @@ static int ipmi_heartbeat(void)
struct kernel_ipmi_msg msg; struct kernel_ipmi_msg msg;
int rv; int rv;
struct ipmi_system_interface_addr addr; struct ipmi_system_interface_addr addr;
int timeout_retries = 0;
if (ipmi_ignore_heartbeat) if (ipmi_ignore_heartbeat)
return 0; return 0;
...@@ -616,6 +619,7 @@ static int ipmi_heartbeat(void) ...@@ -616,6 +619,7 @@ static int ipmi_heartbeat(void)
mutex_lock(&heartbeat_lock); mutex_lock(&heartbeat_lock);
restart:
atomic_set(&heartbeat_tofree, 2); atomic_set(&heartbeat_tofree, 2);
/* /*
...@@ -653,7 +657,33 @@ static int ipmi_heartbeat(void) ...@@ -653,7 +657,33 @@ static int ipmi_heartbeat(void)
/* Wait for the heartbeat to be sent. */ /* Wait for the heartbeat to be sent. */
wait_for_completion(&heartbeat_wait); wait_for_completion(&heartbeat_wait);
if (heartbeat_recv_msg.msg.data[0] != 0) { if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) {
timeout_retries++;
if (timeout_retries > 3) {
printk(KERN_ERR PFX ": Unable to restore the IPMI"
" watchdog's settings, giving up.\n");
rv = -EIO;
goto out_unlock;
}
/*
* The timer was not initialized, that means the BMC was
* probably reset and lost the watchdog information. Attempt
* to restore the timer's info. Note that we still hold
* the heartbeat lock, to keep a heartbeat from happening
* in this process, so must say no heartbeat to avoid a
* deadlock on this mutex.
*/
rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
if (rv) {
printk(KERN_ERR PFX ": Unable to send the command to"
" set the watchdog's settings, giving up.\n");
goto out_unlock;
}
/* We might need a new heartbeat, so do it now */
goto restart;
} else if (heartbeat_recv_msg.msg.data[0] != 0) {
/* /*
* Got an error in the heartbeat response. It was already * Got an error in the heartbeat response. It was already
* reported in ipmi_wdog_msg_handler, but we should return * reported in ipmi_wdog_msg_handler, but we should return
...@@ -662,6 +692,7 @@ static int ipmi_heartbeat(void) ...@@ -662,6 +692,7 @@ static int ipmi_heartbeat(void)
rv = -EINVAL; rv = -EINVAL;
} }
out_unlock:
mutex_unlock(&heartbeat_lock); mutex_unlock(&heartbeat_lock);
return rv; return rv;
...@@ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = { ...@@ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = {
static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
void *handler_data) void *handler_data)
{ {
if (msg->msg.data[0] != 0) { if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER &&
msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)
printk(KERN_INFO PFX "response: The IPMI controller appears"
" to have been reset, will attempt to reinitialize"
" the watchdog timer\n");
else if (msg->msg.data[0] != 0)
printk(KERN_ERR PFX "response: Error %x on cmd %x\n", printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
msg->msg.data[0], msg->msg.data[0],
msg->msg.cmd); msg->msg.cmd);
}
ipmi_free_recv_msg(msg); ipmi_free_recv_msg(msg);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment