Commit aedf3497 authored by James Smart's avatar James Smart Committed by James Bottomley

[SCSI] FC transport: fixes for workq deadlocks

As previously reported via Michael Reed, the FC transport took a hit
in 2.6.15 (perhaps a little earlier) when we solved a recursion error.
There are 2 deadlocks occurring:
- With scan and the delete items sharing the same workq, flushing the
  workq for the delete code was getting it stalled behind a very long
  running scan code path.
- There's a deadlock where scsi_remove_target() has to sit behind
  scsi_scan_target() due to contention over the scan_lock().

This patch resolves the 1st deadlock and significantly reduces the
odds of the second. So far, we have only replicated the 2nd deadlock
on a highly-parallel SMP system. More on the 2nd deadlock in a following
email.

This patch reworks the transport to:
- Only use the scsi host workq for scanning
- Use 2 other workq's internally. One for deletions, the other for
  scheduled deletions. Originally, we tried this with a single workq,
  but the occassional flushes of the scheduled queues was hitting the
  second deadlock with a slightly higher frequency. In the future, we'll
  look at the LLDD's and the transport to see if we can get rid of this
  extra overhead.
- When moving to the other workq's we tightened up some object states
  and some lock handling.
- Properly syncs adds/deletes
- minor code cleanups
  - directly reference fc_host_attrs, rather than through attribute
    macros
  - flush the right workq on delayed work cancel failures.

Large kudos to Michael Reed who has been working this issue for the last
month.
Signed-off-by: default avatarJames Bottomley <James.Bottomley@SteelEye.com>
parent b0d23648
This diff is collapsed.
...@@ -202,12 +202,19 @@ struct fc_rport { /* aka fc_starget_attrs */ ...@@ -202,12 +202,19 @@ struct fc_rport { /* aka fc_starget_attrs */
/* internal data */ /* internal data */
unsigned int channel; unsigned int channel;
u32 number; u32 number;
u8 flags;
struct list_head peers; struct list_head peers;
struct device dev; struct device dev;
struct work_struct dev_loss_work; struct work_struct dev_loss_work;
struct work_struct scan_work; struct work_struct scan_work;
struct work_struct stgt_delete_work;
struct work_struct rport_delete_work;
} __attribute__((aligned(sizeof(unsigned long)))); } __attribute__((aligned(sizeof(unsigned long))));
/* bit field values for struct fc_rport "flags" field: */
#define FC_RPORT_DEVLOSS_PENDING 0x01
#define FC_RPORT_SCAN_PENDING 0x02
#define dev_to_rport(d) \ #define dev_to_rport(d) \
container_of(d, struct fc_rport, dev) container_of(d, struct fc_rport, dev)
#define transport_class_to_rport(classdev) \ #define transport_class_to_rport(classdev) \
...@@ -327,13 +334,16 @@ struct fc_host_attrs { ...@@ -327,13 +334,16 @@ struct fc_host_attrs {
struct list_head rport_bindings; struct list_head rport_bindings;
u32 next_rport_number; u32 next_rport_number;
u32 next_target_id; u32 next_target_id;
u8 flags;
struct work_struct rport_del_work;
};
/* values for struct fc_host_attrs "flags" field: */ /* work queues for rport state manipulation */
#define FC_SHOST_RPORT_DEL_SCHEDULED 0x01 char work_q_name[KOBJ_NAME_LEN];
struct workqueue_struct *work_q;
char devloss_work_q_name[KOBJ_NAME_LEN];
struct workqueue_struct *devloss_work_q;
};
#define shost_to_fc_host(x) \
((struct fc_host_attrs *)(x)->shost_data)
#define fc_host_node_name(x) \ #define fc_host_node_name(x) \
(((struct fc_host_attrs *)(x)->shost_data)->node_name) (((struct fc_host_attrs *)(x)->shost_data)->node_name)
...@@ -375,10 +385,14 @@ struct fc_host_attrs { ...@@ -375,10 +385,14 @@ struct fc_host_attrs {
(((struct fc_host_attrs *)(x)->shost_data)->next_rport_number) (((struct fc_host_attrs *)(x)->shost_data)->next_rport_number)
#define fc_host_next_target_id(x) \ #define fc_host_next_target_id(x) \
(((struct fc_host_attrs *)(x)->shost_data)->next_target_id) (((struct fc_host_attrs *)(x)->shost_data)->next_target_id)
#define fc_host_flags(x) \ #define fc_host_work_q_name(x) \
(((struct fc_host_attrs *)(x)->shost_data)->flags) (((struct fc_host_attrs *)(x)->shost_data)->work_q_name)
#define fc_host_rport_del_work(x) \ #define fc_host_work_q(x) \
(((struct fc_host_attrs *)(x)->shost_data)->rport_del_work) (((struct fc_host_attrs *)(x)->shost_data)->work_q)
#define fc_host_devloss_work_q_name(x) \
(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name)
#define fc_host_devloss_work_q(x) \
(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q)
/* The functions by which the transport class and the driver communicate */ /* The functions by which the transport class and the driver communicate */
...@@ -461,10 +475,15 @@ fc_remote_port_chkready(struct fc_rport *rport) ...@@ -461,10 +475,15 @@ fc_remote_port_chkready(struct fc_rport *rport)
switch (rport->port_state) { switch (rport->port_state) {
case FC_PORTSTATE_ONLINE: case FC_PORTSTATE_ONLINE:
result = 0; if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
result = 0;
else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
result = DID_IMM_RETRY << 16;
else
result = DID_NO_CONNECT << 16;
break; break;
case FC_PORTSTATE_BLOCKED: case FC_PORTSTATE_BLOCKED:
result = DID_BUS_BUSY << 16; result = DID_IMM_RETRY << 16;
break; break;
default: default:
result = DID_NO_CONNECT << 16; result = DID_NO_CONNECT << 16;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment