Commit 0c7f77af authored by Easwar Hariharan's avatar Easwar Hariharan Committed by Doug Ledford

IB/hfi1: Ignore non-temperature warnings on a downed link

QSFP modules can raise an interrupt to inform us of expected conditions
while the link is down, such as RX power low. Actively ignore these
conditions when the link is down as they only add reporting noise.
Continue reporting conditions that are valid at all times, such as
temperature alarms and warnings.
Reviewed-by: default avatarDean Luick <dean.luick@intel.com>
Signed-off-by: default avatarEaswar Hariharan <easwar.hariharan@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 321aebb8
...@@ -6105,7 +6105,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) ...@@ -6105,7 +6105,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
} }
/* this access is valid only when the link is up */ /* this access is valid only when the link is up */
if ((ppd->host_link_state & HLS_UP) == 0) { if (ppd->host_link_state & HLS_DOWN) {
dd_dev_info(dd, "%s: link state %s not up\n", dd_dev_info(dd, "%s: link state %s not up\n",
__func__, link_state_name(ppd->host_link_state)); __func__, link_state_name(ppd->host_link_state));
ret = -EBUSY; ret = -EBUSY;
...@@ -7429,7 +7429,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths) ...@@ -7429,7 +7429,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
retry: retry:
mutex_lock(&ppd->hls_lock); mutex_lock(&ppd->hls_lock);
/* only apply if the link is up */ /* only apply if the link is up */
if (!(ppd->host_link_state & HLS_UP)) { if (ppd->host_link_state & HLS_DOWN) {
/* still going up..wait and retry */ /* still going up..wait and retry */
if (ppd->host_link_state & HLS_GOING_UP) { if (ppd->host_link_state & HLS_GOING_UP) {
if (++tries < 1000) { if (++tries < 1000) {
...@@ -9252,6 +9252,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, ...@@ -9252,6 +9252,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
dd_dev_info(dd, "%s: QSFP cable temperature too low\n", dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
__func__); __func__);
/*
* The remaining alarms/warnings don't matter if the link is down.
*/
if (ppd->host_link_state & HLS_DOWN)
return 0;
if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
dd_dev_info(dd, "%s: QSFP supply voltage too high\n", dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
...@@ -9346,9 +9352,8 @@ void qsfp_event(struct work_struct *work) ...@@ -9346,9 +9352,8 @@ void qsfp_event(struct work_struct *work)
return; return;
/* /*
* Turn DC back on after cables has been * Turn DC back on after cable has been re-inserted. Up until
* re-inserted. Up until now, the DC has been in * now, the DC has been in reset to save power.
* reset to save power.
*/ */
dc_start(dd); dc_start(dd);
...@@ -10074,7 +10079,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd) ...@@ -10074,7 +10079,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd)
*/ */
u32 driver_logical_state(struct hfi1_pportdata *ppd) u32 driver_logical_state(struct hfi1_pportdata *ppd)
{ {
if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP)) if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN))
return IB_PORT_DOWN; return IB_PORT_DOWN;
switch (ppd->host_link_state & HLS_UP) { switch (ppd->host_link_state & HLS_UP) {
......
...@@ -998,7 +998,7 @@ static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd, ...@@ -998,7 +998,7 @@ static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
u16 per_vl_credits; u16 per_vl_credits;
__be16 be_per_vl_credits; __be16 be_per_vl_credits;
if (!(ppd->host_link_state & HLS_UP)) if (ppd->host_link_state & HLS_DOWN)
goto err_exit; goto err_exit;
if (total_credits < vl15_credits) if (total_credits < vl15_credits)
goto err_exit; goto err_exit;
......
...@@ -453,6 +453,7 @@ struct rvt_sge_state; ...@@ -453,6 +453,7 @@ struct rvt_sge_state;
#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP) #define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
#define HLS_DOWN ~(HLS_UP)
/* use this MTU size if none other is given */ /* use this MTU size if none other is given */
#define HFI1_DEFAULT_ACTIVE_MTU 10240 #define HFI1_DEFAULT_ACTIVE_MTU 10240
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment