Commit d19c4643 authored by Mike Christie's avatar Mike Christie Committed by Nicholas Bellinger

target: Fix ALUA transition state race between multiple initiators

Multiple threads could be writing to alua_access_state at
the same time, or there could be multiple STPGs in flight
(different initiators sending them or one initiator sending
them to different ports), or a combo of both and the
core_alua_do_transition_tg_pt calls will race with each other.

Because from the last patches we no longer delay running
core_alua_do_transition_tg_pt_work, there does not seem to be
any point in running that in a workqueue. And, we always
wait for it to complete one way or another, so we can sleep
in this code path. So, this patch made over target-pending just adds a
mutex and does the work core_alua_do_transition_tg_pt_work was doing in
core_alua_do_transition_tg_pt.

There is also no need to use an atomic for the
tg_pt_gp_alua_access_state. In core_alua_do_transition_tg_pt we will
test and set it under the transition mutex. And, it is a int/32 bits
so in the other places where it is read, we will never see it partially
updated.
Signed-off-by: default avatarMike Christie <mchristi@redhat.com>
Signed-off-by: default avatarNicholas Bellinger <nab@linux-iscsi.org>
parent 7a56dc88
...@@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) ...@@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
/* /*
* Set the ASYMMETRIC ACCESS State * Set the ASYMMETRIC ACCESS State
*/ */
buf[off++] |= (atomic_read( buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff;
&tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff);
/* /*
* Set supported ASYMMETRIC ACCESS State bits * Set supported ASYMMETRIC ACCESS State bits
*/ */
...@@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd) ...@@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd)
spin_lock(&lun->lun_tg_pt_gp_lock); spin_lock(&lun->lun_tg_pt_gp_lock);
tg_pt_gp = lun->lun_tg_pt_gp; tg_pt_gp = lun->lun_tg_pt_gp;
out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state;
nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs; nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs;
// XXX: keeps using tg_pt_gp witout reference after unlock // XXX: keeps using tg_pt_gp witout reference after unlock
...@@ -911,7 +910,7 @@ static int core_alua_write_tpg_metadata( ...@@ -911,7 +910,7 @@ static int core_alua_write_tpg_metadata(
} }
/* /*
* Called with tg_pt_gp->tg_pt_gp_md_mutex held * Called with tg_pt_gp->tg_pt_gp_transition_mutex held
*/ */
static int core_alua_update_tpg_primary_metadata( static int core_alua_update_tpg_primary_metadata(
struct t10_alua_tg_pt_gp *tg_pt_gp) struct t10_alua_tg_pt_gp *tg_pt_gp)
...@@ -934,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata( ...@@ -934,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata(
"alua_access_state=0x%02x\n" "alua_access_state=0x%02x\n"
"alua_access_status=0x%02x\n", "alua_access_status=0x%02x\n",
tg_pt_gp->tg_pt_gp_id, tg_pt_gp->tg_pt_gp_id,
tg_pt_gp->tg_pt_gp_alua_pending_state, tg_pt_gp->tg_pt_gp_alua_access_state,
tg_pt_gp->tg_pt_gp_alua_access_status); tg_pt_gp->tg_pt_gp_alua_access_status);
snprintf(path, ALUA_METADATA_PATH_LEN, snprintf(path, ALUA_METADATA_PATH_LEN,
...@@ -1013,93 +1012,41 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) ...@@ -1013,93 +1012,41 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
spin_unlock(&tg_pt_gp->tg_pt_gp_lock); spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
} }
static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
{
struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
/*
* Update the ALUA metadata buf that has been allocated in
* core_alua_do_port_transition(), this metadata will be written
* to struct file.
*
* Note that there is the case where we do not want to update the
* metadata when the saved metadata is being parsed in userspace
* when setting the existing port access state and access status.
*
* Also note that the failure to write out the ALUA metadata to
* struct file does NOT affect the actual ALUA transition.
*/
if (tg_pt_gp->tg_pt_gp_write_metadata) {
mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex);
core_alua_update_tpg_primary_metadata(tg_pt_gp);
mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex);
}
/*
* Set the current primary ALUA access state to the requested new state
*/
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
tg_pt_gp->tg_pt_gp_alua_pending_state);
pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
" from primary access state %s to %s\n", (explicit) ? "explicit" :
"implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
tg_pt_gp->tg_pt_gp_id,
core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state),
core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
core_alua_queue_state_change_ua(tg_pt_gp);
spin_lock(&dev->t10_alua.tg_pt_gps_lock);
atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
if (tg_pt_gp->tg_pt_gp_transition_complete)
complete(tg_pt_gp->tg_pt_gp_transition_complete);
}
static int core_alua_do_transition_tg_pt( static int core_alua_do_transition_tg_pt(
struct t10_alua_tg_pt_gp *tg_pt_gp, struct t10_alua_tg_pt_gp *tg_pt_gp,
int new_state, int new_state,
int explicit) int explicit)
{ {
struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; int prev_state;
DECLARE_COMPLETION_ONSTACK(wait);
mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex);
/* Nothing to be done here */ /* Nothing to be done here */
if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state) if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) {
mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
return 0; return 0;
}
if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) {
mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
return -EAGAIN; return -EAGAIN;
}
/*
* Flush any pending transitions
*/
if (!explicit)
flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/* /*
* Save the old primary ALUA access state, and set the current state * Save the old primary ALUA access state, and set the current state
* to ALUA_ACCESS_STATE_TRANSITION. * to ALUA_ACCESS_STATE_TRANSITION.
*/ */
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, prev_state = tg_pt_gp->tg_pt_gp_alua_access_state;
ALUA_ACCESS_STATE_TRANSITION); tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION;
tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ? tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;
core_alua_queue_state_change_ua(tg_pt_gp); core_alua_queue_state_change_ua(tg_pt_gp);
if (new_state == ALUA_ACCESS_STATE_TRANSITION) if (new_state == ALUA_ACCESS_STATE_TRANSITION) {
mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
return 0; return 0;
}
tg_pt_gp->tg_pt_gp_alua_previous_state =
atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
/* /*
* Check for the optional ALUA primary state transition delay * Check for the optional ALUA primary state transition delay
...@@ -1108,19 +1055,36 @@ static int core_alua_do_transition_tg_pt( ...@@ -1108,19 +1055,36 @@ static int core_alua_do_transition_tg_pt(
msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs); msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);
/* /*
* Take a reference for workqueue item * Set the current primary ALUA access state to the requested new state
*/ */
spin_lock(&dev->t10_alua.tg_pt_gps_lock); tg_pt_gp->tg_pt_gp_alua_access_state = new_state;
atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
schedule_work(&tg_pt_gp->tg_pt_gp_transition_work); /*
if (explicit) { * Update the ALUA metadata buf that has been allocated in
tg_pt_gp->tg_pt_gp_transition_complete = &wait; * core_alua_do_port_transition(), this metadata will be written
wait_for_completion(&wait); * to struct file.
tg_pt_gp->tg_pt_gp_transition_complete = NULL; *
* Note that there is the case where we do not want to update the
* metadata when the saved metadata is being parsed in userspace
* when setting the existing port access state and access status.
*
* Also note that the failure to write out the ALUA metadata to
* struct file does NOT affect the actual ALUA transition.
*/
if (tg_pt_gp->tg_pt_gp_write_metadata) {
core_alua_update_tpg_primary_metadata(tg_pt_gp);
} }
pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
" from primary access state %s to %s\n", (explicit) ? "explicit" :
"implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
tg_pt_gp->tg_pt_gp_id,
core_alua_dump_state(prev_state),
core_alua_dump_state(new_state));
core_alua_queue_state_change_ua(tg_pt_gp);
mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
return 0; return 0;
} }
...@@ -1685,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, ...@@ -1685,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
} }
INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list); INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list);
INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list); INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list);
mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex);
spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
core_alua_do_transition_tg_pt_work);
tg_pt_gp->tg_pt_gp_dev = dev; tg_pt_gp->tg_pt_gp_dev = dev;
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, tg_pt_gp->tg_pt_gp_alua_access_state =
ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED;
/* /*
* Enable both explicit and implicit ALUA support by default * Enable both explicit and implicit ALUA support by default
*/ */
...@@ -1797,8 +1759,6 @@ void core_alua_free_tg_pt_gp( ...@@ -1797,8 +1759,6 @@ void core_alua_free_tg_pt_gp(
dev->t10_alua.alua_tg_pt_gps_counter--; dev->t10_alua.alua_tg_pt_gps_counter--;
spin_unlock(&dev->t10_alua.tg_pt_gps_lock); spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/* /*
* Allow a struct t10_alua_tg_pt_gp_member * referenced by * Allow a struct t10_alua_tg_pt_gp_member * referenced by
* core_alua_get_tg_pt_gp_by_name() in * core_alua_get_tg_pt_gp_by_name() in
...@@ -1938,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page) ...@@ -1938,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page)
"Primary Access Status: %s\nTG Port Secondary Access" "Primary Access Status: %s\nTG Port Secondary Access"
" State: %s\nTG Port Secondary Access Status: %s\n", " State: %s\nTG Port Secondary Access Status: %s\n",
config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id, config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id,
core_alua_dump_state(atomic_read( core_alua_dump_state(
&tg_pt_gp->tg_pt_gp_alua_access_state)), tg_pt_gp->tg_pt_gp_alua_access_state),
core_alua_dump_status( core_alua_dump_status(
tg_pt_gp->tg_pt_gp_alua_access_status), tg_pt_gp->tg_pt_gp_alua_access_status),
atomic_read(&lun->lun_tg_pt_secondary_offline) ? atomic_read(&lun->lun_tg_pt_secondary_offline) ?
......
...@@ -2392,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item, ...@@ -2392,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item,
char *page) char *page)
{ {
return sprintf(page, "%d\n", return sprintf(page, "%d\n",
atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state)); to_tg_pt_gp(item)->tg_pt_gp_alua_access_state);
} }
static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item, static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item,
......
...@@ -280,8 +280,6 @@ struct t10_alua_tg_pt_gp { ...@@ -280,8 +280,6 @@ struct t10_alua_tg_pt_gp {
u16 tg_pt_gp_id; u16 tg_pt_gp_id;
int tg_pt_gp_valid_id; int tg_pt_gp_valid_id;
int tg_pt_gp_alua_supported_states; int tg_pt_gp_alua_supported_states;
int tg_pt_gp_alua_pending_state;
int tg_pt_gp_alua_previous_state;
int tg_pt_gp_alua_access_status; int tg_pt_gp_alua_access_status;
int tg_pt_gp_alua_access_type; int tg_pt_gp_alua_access_type;
int tg_pt_gp_nonop_delay_msecs; int tg_pt_gp_nonop_delay_msecs;
...@@ -290,18 +288,16 @@ struct t10_alua_tg_pt_gp { ...@@ -290,18 +288,16 @@ struct t10_alua_tg_pt_gp {
int tg_pt_gp_pref; int tg_pt_gp_pref;
int tg_pt_gp_write_metadata; int tg_pt_gp_write_metadata;
u32 tg_pt_gp_members; u32 tg_pt_gp_members;
atomic_t tg_pt_gp_alua_access_state; int tg_pt_gp_alua_access_state;
atomic_t tg_pt_gp_ref_cnt; atomic_t tg_pt_gp_ref_cnt;
spinlock_t tg_pt_gp_lock; spinlock_t tg_pt_gp_lock;
struct mutex tg_pt_gp_md_mutex; struct mutex tg_pt_gp_transition_mutex;
struct se_device *tg_pt_gp_dev; struct se_device *tg_pt_gp_dev;
struct config_group tg_pt_gp_group; struct config_group tg_pt_gp_group;
struct list_head tg_pt_gp_list; struct list_head tg_pt_gp_list;
struct list_head tg_pt_gp_lun_list; struct list_head tg_pt_gp_lun_list;
struct se_lun *tg_pt_gp_alua_lun; struct se_lun *tg_pt_gp_alua_lun;
struct se_node_acl *tg_pt_gp_alua_nacl; struct se_node_acl *tg_pt_gp_alua_nacl;
struct work_struct tg_pt_gp_transition_work;
struct completion *tg_pt_gp_transition_complete;
}; };
struct t10_vpd { struct t10_vpd {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment