Commit ebd11e15 authored by Cliff Wickman's avatar Cliff Wickman Committed by Greg Kroah-Hartman

x86/UV2: Work around BAU bug

commit c5d35d39 upstream.

This patch implements a workaround for a UV2 hardware bug.
The bug is a non-atomic update of a memory-mapped register. When
hardware message delivery and software message acknowledge occur
simultaneously the pending message acknowledge for the arriving
message may be lost.  This causes the sender's message status to
stay busy.

Part of the workaround is to not acknowledge a completed message
until it is verified that no other message is actually using the
resource that is mistakenly recorded in the completed message.

Part of the workaround is to test for long elapsed time in such
a busy condition, then handle it by using a spare sending
descriptor. The stay-busy condition is eventually timed out by
hardware, and then the original sending descriptor can be
re-used. Most of that logic change is in keeping track of the
current descriptor and the state of the spares.

The occurrences of the workaround are added to the BAU
statistics.
Signed-off-by: default avatarCliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120116211947.GC5767@sgi.comSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@suse.de>
parent 5869dc3c
...@@ -167,6 +167,7 @@ ...@@ -167,6 +167,7 @@
#define FLUSH_RETRY_TIMEOUT 2 #define FLUSH_RETRY_TIMEOUT 2
#define FLUSH_GIVEUP 3 #define FLUSH_GIVEUP 3
#define FLUSH_COMPLETE 4 #define FLUSH_COMPLETE 4
#define FLUSH_RETRY_BUSYBUG 5
/* /*
* tuning the action when the numalink network is extremely delayed * tuning the action when the numalink network is extremely delayed
...@@ -463,7 +464,6 @@ struct bau_pq_entry { ...@@ -463,7 +464,6 @@ struct bau_pq_entry {
struct msg_desc { struct msg_desc {
struct bau_pq_entry *msg; struct bau_pq_entry *msg;
int msg_slot; int msg_slot;
int swack_slot;
struct bau_pq_entry *queue_first; struct bau_pq_entry *queue_first;
struct bau_pq_entry *queue_last; struct bau_pq_entry *queue_last;
}; };
...@@ -517,6 +517,9 @@ struct ptc_stats { ...@@ -517,6 +517,9 @@ struct ptc_stats {
unsigned long s_retry_messages; /* retry broadcasts */ unsigned long s_retry_messages; /* retry broadcasts */
unsigned long s_bau_reenabled; /* for bau enable/disable */ unsigned long s_bau_reenabled; /* for bau enable/disable */
unsigned long s_bau_disabled; /* for bau enable/disable */ unsigned long s_bau_disabled; /* for bau enable/disable */
unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
/* destination statistics */ /* destination statistics */
unsigned long d_alltlb; /* times all tlb's on this unsigned long d_alltlb; /* times all tlb's on this
cpu were flushed */ cpu were flushed */
...@@ -593,6 +596,8 @@ struct bau_control { ...@@ -593,6 +596,8 @@ struct bau_control {
short cpus_in_socket; short cpus_in_socket;
short cpus_in_uvhub; short cpus_in_uvhub;
short partition_base_pnode; short partition_base_pnode;
short using_desc; /* an index, like uvhub_cpu */
unsigned int inuse_map;
unsigned short message_number; unsigned short message_number;
unsigned short uvhub_quiesce; unsigned short uvhub_quiesce;
short socket_acknowledge_count[DEST_Q_SIZE]; short socket_acknowledge_count[DEST_Q_SIZE];
...@@ -610,6 +615,7 @@ struct bau_control { ...@@ -610,6 +615,7 @@ struct bau_control {
int cong_response_us; int cong_response_us;
int cong_reps; int cong_reps;
int cong_period; int cong_period;
unsigned long clocks_per_100_usec;
cycles_t period_time; cycles_t period_time;
long period_requests; long period_requests;
struct hub_and_pnode *thp; struct hub_and_pnode *thp;
...@@ -670,6 +676,11 @@ static inline void write_mmr_sw_ack(unsigned long mr) ...@@ -670,6 +676,11 @@ static inline void write_mmr_sw_ack(unsigned long mr)
uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
} }
static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
{
write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
}
static inline unsigned long read_mmr_sw_ack(void) static inline unsigned long read_mmr_sw_ack(void)
{ {
return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment