Commit dd5a477c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "Round three of 4.8 rc fixes.

  This is likely the last rdma pull request this cycle.  The new rxe
  driver had a few issues (you probably saw the boot bot bug report) and
  they should be addressed now.  There are a couple other fixes here,
  mainly mlx4.  There are still two outstanding issues that need
  resolved but I don't think their fix will make this kernel cycle.

  Summary:

   - Various fixes to rdmavt, ipoib, mlx5, mlx4, rxe"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/rdmavt: Don't vfree a kzalloc'ed memory region
  IB/rxe: Fix kmem_cache leak
  IB/rxe: Fix race condition between requester and completer
  IB/rxe: Fix duplicate atomic request handling
  IB/rxe: Fix kernel panic in udp_setup_tunnel
  IB/mlx5: Set source mac address in FTE
  IB/mlx5: Enable MAD_IFC commands for IB ports only
  IB/mlx4: Diagnostic HW counters are not supported in slave mode
  IB/mlx4: Use correct subnet-prefix in QP1 mads under SR-IOV
  IB/mlx4: Fix code indentation in QP1 MAD flow
  IB/mlx4: Fix incorrect MC join state bit-masking on SR-IOV
  IB/ipoib: Don't allow MC joins during light MC flush
  IB/rxe: fix GFP_KERNEL in spinlock context
parents 008f08d6 e4618d40
...@@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work) ...@@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work)
/* Generate GUID changed event */ /* Generate GUID changed event */
if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
if (mlx4_is_master(dev->dev)) {
union ib_gid gid;
int err = 0;
if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
else
gid.global.subnet_prefix =
eqe->event.port_mgmt_change.params.port_info.gid_prefix;
if (err) {
pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
port, err);
} else {
pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
port,
(u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
be64_to_cpu(gid.global.subnet_prefix));
atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
be64_to_cpu(gid.global.subnet_prefix));
}
}
mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
/*if master, notify all slaves*/ /*if master, notify all slaves*/
if (mlx4_is_master(dev->dev)) if (mlx4_is_master(dev->dev))
...@@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) ...@@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
if (err) if (err)
goto demux_err; goto demux_err;
dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
atomic64_set(&dev->sriov.demux[i].subnet_prefix,
be64_to_cpu(gid.global.subnet_prefix));
err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
&dev->sriov.sqps[i]); &dev->sriov.sqps[i]);
if (err) if (err)
......
...@@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) ...@@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
bool per_port = !!(ibdev->dev->caps.flags2 & bool per_port = !!(ibdev->dev->caps.flags2 &
MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
if (mlx4_is_slave(ibdev->dev))
return 0;
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
/* i == 1 means we are building port counters */ /* i == 1 means we are building port counters */
if (i && !per_port) if (i && !per_port)
......
...@@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group) ...@@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group)
if (!group->members[i]) if (!group->members[i])
leave_state |= (1 << i); leave_state |= (1 << i);
return leave_state & (group->rec.scope_join_state & 7); return leave_state & (group->rec.scope_join_state & 0xf);
} }
static int join_group(struct mcast_group *group, int slave, u8 join_mask) static int join_group(struct mcast_group *group, int slave, u8 join_mask)
...@@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) ...@@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
} else } else
mcg_warn_group(group, "DRIVER BUG\n"); mcg_warn_group(group, "DRIVER BUG\n");
} else if (group->state == MCAST_LEAVE_SENT) { } else if (group->state == MCAST_LEAVE_SENT) {
if (group->rec.scope_join_state & 7) if (group->rec.scope_join_state & 0xf)
group->rec.scope_join_state &= 0xf8; group->rec.scope_join_state &= 0xf0;
group->state = MCAST_IDLE; group->state = MCAST_IDLE;
mutex_unlock(&group->lock); mutex_unlock(&group->lock);
if (release_group(group, 1)) if (release_group(group, 1))
...@@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask, ...@@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
static int handle_join_req(struct mcast_group *group, u8 join_mask, static int handle_join_req(struct mcast_group *group, u8 join_mask,
struct mcast_req *req) struct mcast_req *req)
{ {
u8 group_join_state = group->rec.scope_join_state & 7; u8 group_join_state = group->rec.scope_join_state & 0xf;
int ref = 0; int ref = 0;
u16 status; u16 status;
struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
...@@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) ...@@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
u8 cur_join_state; u8 cur_join_state;
resp_join_state = ((struct ib_sa_mcmember_data *) resp_join_state = ((struct ib_sa_mcmember_data *)
group->response_sa_mad.data)->scope_join_state & 7; group->response_sa_mad.data)->scope_join_state & 0xf;
cur_join_state = group->rec.scope_join_state & 7; cur_join_state = group->rec.scope_join_state & 0xf;
if (method == IB_MGMT_METHOD_GET_RESP) { if (method == IB_MGMT_METHOD_GET_RESP) {
/* successfull join */ /* successfull join */
...@@ -710,7 +710,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) ...@@ -710,7 +710,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
req = list_first_entry(&group->pending_list, struct mcast_req, req = list_first_entry(&group->pending_list, struct mcast_req,
group_list); group_list);
sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
req_join_state = sa_data->scope_join_state & 0x7; req_join_state = sa_data->scope_join_state & 0xf;
/* For a leave request, we will immediately answer the VF, and /* For a leave request, we will immediately answer the VF, and
* update our internal counters. The actual leave will be sent * update our internal counters. The actual leave will be sent
......
...@@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx { ...@@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx {
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct workqueue_struct *ud_wq; struct workqueue_struct *ud_wq;
spinlock_t ud_lock; spinlock_t ud_lock;
__be64 subnet_prefix; atomic64_t subnet_prefix;
__be64 guid_cache[128]; __be64 guid_cache[128];
struct mlx4_ib_dev *dev; struct mlx4_ib_dev *dev;
/* the following lock protects both mcg_table and mcg_mgid0_list */ /* the following lock protects both mcg_table and mcg_mgid0_list */
......
...@@ -2493,24 +2493,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, ...@@ -2493,24 +2493,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
sqp->ud_header.grh.flow_label = sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
if (is_eth) if (is_eth) {
memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
else { } else {
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid /* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so * indexes don't necessarily match the hw ones, so
* we must use our own cache */ * we must use our own cache
sqp->ud_header.grh.source_gid.global.subnet_prefix = */
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. sqp->ud_header.grh.source_gid.global.subnet_prefix =
subnet_prefix; cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
sqp->ud_header.grh.source_gid.global.interface_id = demux[sqp->qp.port - 1].
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. subnet_prefix)));
guid_cache[ah->av.ib.gid_index]; sqp->ud_header.grh.source_gid.global.interface_id =
} else to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
ib_get_cached_gid(ib_dev, guid_cache[ah->av.ib.gid_index];
be32_to_cpu(ah->av.ib.port_pd) >> 24, } else {
ah->av.ib.gid_index, ib_get_cached_gid(ib_dev,
&sqp->ud_header.grh.source_gid, NULL); be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index,
&sqp->ud_header.grh.source_gid, NULL);
}
} }
memcpy(sqp->ud_header.grh.destination_gid.raw, memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16); ah->av.ib.dgid, 16);
......
...@@ -288,7 +288,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, ...@@ -288,7 +288,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{ {
return !MLX5_CAP_GEN(dev->mdev, ib_virt); if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
return !MLX5_CAP_GEN(dev->mdev, ib_virt);
return 0;
} }
enum { enum {
...@@ -1428,6 +1430,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, ...@@ -1428,6 +1430,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
dmac_47_16), dmac_47_16),
ib_spec->eth.val.dst_mac); ib_spec->eth.val.dst_mac);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
smac_47_16),
ib_spec->eth.mask.src_mac);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
smac_47_16),
ib_spec->eth.val.src_mac);
if (ib_spec->eth.mask.vlan_tag) { if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
vlan_tag, 1); vlan_tag, 1);
......
...@@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) ...@@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr)
{ {
rvt_deinit_mregion(&mr->mr); rvt_deinit_mregion(&mr->mr);
rvt_free_lkey(&mr->mr); rvt_free_lkey(&mr->mr);
vfree(mr); kfree(mr);
} }
/** /**
......
...@@ -362,15 +362,34 @@ static int __init rxe_module_init(void) ...@@ -362,15 +362,34 @@ static int __init rxe_module_init(void)
return err; return err;
} }
err = rxe_net_init(); err = rxe_net_ipv4_init();
if (err) { if (err) {
pr_err("rxe: unable to init\n"); pr_err("rxe: unable to init ipv4 tunnel\n");
rxe_cache_exit(); rxe_cache_exit();
return err; goto exit;
}
err = rxe_net_ipv6_init();
if (err) {
pr_err("rxe: unable to init ipv6 tunnel\n");
rxe_cache_exit();
goto exit;
} }
err = register_netdevice_notifier(&rxe_net_notifier);
if (err) {
pr_err("rxe: Failed to rigister netdev notifier\n");
goto exit;
}
pr_info("rxe: loaded\n"); pr_info("rxe: loaded\n");
return 0; return 0;
exit:
rxe_release_udp_tunnel(recv_sockets.sk4);
rxe_release_udp_tunnel(recv_sockets.sk6);
return err;
} }
static void __exit rxe_module_exit(void) static void __exit rxe_module_exit(void)
......
...@@ -689,7 +689,14 @@ int rxe_completer(void *arg) ...@@ -689,7 +689,14 @@ int rxe_completer(void *arg)
qp->req.need_retry = 1; qp->req.need_retry = 1;
rxe_run_task(&qp->req.task, 1); rxe_run_task(&qp->req.task, 1);
} }
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
}
goto exit; goto exit;
} else { } else {
wqe->status = IB_WC_RETRY_EXC_ERR; wqe->status = IB_WC_RETRY_EXC_ERR;
state = COMPST_ERROR; state = COMPST_ERROR;
...@@ -716,6 +723,12 @@ int rxe_completer(void *arg) ...@@ -716,6 +723,12 @@ int rxe_completer(void *arg)
case COMPST_ERROR: case COMPST_ERROR:
do_complete(qp, wqe); do_complete(qp, wqe);
rxe_qp_error(qp); rxe_qp_error(qp);
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
}
goto exit; goto exit;
} }
} }
......
...@@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, ...@@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
return sock; return sock;
} }
static void rxe_release_udp_tunnel(struct socket *sk) void rxe_release_udp_tunnel(struct socket *sk)
{ {
udp_tunnel_sock_release(sk); if (sk)
udp_tunnel_sock_release(sk);
} }
static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
...@@ -658,51 +659,45 @@ static int rxe_notify(struct notifier_block *not_blk, ...@@ -658,51 +659,45 @@ static int rxe_notify(struct notifier_block *not_blk,
return NOTIFY_OK; return NOTIFY_OK;
} }
static struct notifier_block rxe_net_notifier = { struct notifier_block rxe_net_notifier = {
.notifier_call = rxe_notify, .notifier_call = rxe_notify,
}; };
int rxe_net_init(void) int rxe_net_ipv4_init(void)
{ {
int err;
spin_lock_init(&dev_list_lock); spin_lock_init(&dev_list_lock);
recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), true);
if (IS_ERR(recv_sockets.sk6)) {
recv_sockets.sk6 = NULL;
pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
return -1;
}
recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), false); htons(ROCE_V2_UDP_DPORT), false);
if (IS_ERR(recv_sockets.sk4)) { if (IS_ERR(recv_sockets.sk4)) {
rxe_release_udp_tunnel(recv_sockets.sk6);
recv_sockets.sk4 = NULL; recv_sockets.sk4 = NULL;
recv_sockets.sk6 = NULL;
pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
return -1; return -1;
} }
err = register_netdevice_notifier(&rxe_net_notifier); return 0;
if (err) {
rxe_release_udp_tunnel(recv_sockets.sk6);
rxe_release_udp_tunnel(recv_sockets.sk4);
pr_err("rxe: Failed to rigister netdev notifier\n");
}
return err;
} }
void rxe_net_exit(void) int rxe_net_ipv6_init(void)
{ {
if (recv_sockets.sk6) #if IS_ENABLED(CONFIG_IPV6)
rxe_release_udp_tunnel(recv_sockets.sk6);
if (recv_sockets.sk4) spin_lock_init(&dev_list_lock);
rxe_release_udp_tunnel(recv_sockets.sk4);
recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), true);
if (IS_ERR(recv_sockets.sk6)) {
recv_sockets.sk6 = NULL;
pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
return -1;
}
#endif
return 0;
}
void rxe_net_exit(void)
{
rxe_release_udp_tunnel(recv_sockets.sk6);
rxe_release_udp_tunnel(recv_sockets.sk4);
unregister_netdevice_notifier(&rxe_net_notifier); unregister_netdevice_notifier(&rxe_net_notifier);
} }
...@@ -44,10 +44,13 @@ struct rxe_recv_sockets { ...@@ -44,10 +44,13 @@ struct rxe_recv_sockets {
}; };
extern struct rxe_recv_sockets recv_sockets; extern struct rxe_recv_sockets recv_sockets;
extern struct notifier_block rxe_net_notifier;
void rxe_release_udp_tunnel(struct socket *sk);
struct rxe_dev *rxe_net_add(struct net_device *ndev); struct rxe_dev *rxe_net_add(struct net_device *ndev);
int rxe_net_init(void); int rxe_net_ipv4_init(void);
int rxe_net_ipv6_init(void);
void rxe_net_exit(void); void rxe_net_exit(void);
#endif /* RXE_NET_H */ #endif /* RXE_NET_H */
...@@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) ...@@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
* make a copy of the skb to post to the next qp * make a copy of the skb to post to the next qp
*/ */
skb_copy = (mce->qp_list.next != &mcg->qp_list) ? skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
skb_clone(skb, GFP_KERNEL) : NULL; skb_clone(skb, GFP_ATOMIC) : NULL;
pkt->qp = qp; pkt->qp = qp;
rxe_add_ref(qp); rxe_add_ref(qp);
......
...@@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, ...@@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
} }
static void update_wqe_state(struct rxe_qp *qp, static void update_wqe_state(struct rxe_qp *qp,
struct rxe_send_wqe *wqe, struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt, struct rxe_pkt_info *pkt)
enum wqe_state *prev_state)
{ {
enum wqe_state prev_state_ = wqe->state;
if (pkt->mask & RXE_END_MASK) { if (pkt->mask & RXE_END_MASK) {
if (qp_type(qp) == IB_QPT_RC) if (qp_type(qp) == IB_QPT_RC)
wqe->state = wqe_state_pending; wqe->state = wqe_state_pending;
} else { } else {
wqe->state = wqe_state_processing; wqe->state = wqe_state_processing;
} }
*prev_state = prev_state_;
} }
static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, static void update_wqe_psn(struct rxe_qp *qp,
struct rxe_pkt_info *pkt, int payload) struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt,
int payload)
{ {
/* number of packets left to send including current one */ /* number of packets left to send including current one */
int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
...@@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, ...@@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
else else
qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
}
qp->req.opcode = pkt->opcode; static void save_state(struct rxe_send_wqe *wqe,
struct rxe_qp *qp,
struct rxe_send_wqe *rollback_wqe,
struct rxe_qp *rollback_qp)
{
rollback_wqe->state = wqe->state;
rollback_wqe->first_psn = wqe->first_psn;
rollback_wqe->last_psn = wqe->last_psn;
rollback_qp->req.psn = qp->req.psn;
}
static void rollback_state(struct rxe_send_wqe *wqe,
struct rxe_qp *qp,
struct rxe_send_wqe *rollback_wqe,
struct rxe_qp *rollback_qp)
{
wqe->state = rollback_wqe->state;
wqe->first_psn = rollback_wqe->first_psn;
wqe->last_psn = rollback_wqe->last_psn;
qp->req.psn = rollback_qp->req.psn;
}
static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt, int payload)
{
qp->req.opcode = pkt->opcode;
if (pkt->mask & RXE_END_MASK) if (pkt->mask & RXE_END_MASK)
qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
...@@ -571,7 +593,8 @@ int rxe_requester(void *arg) ...@@ -571,7 +593,8 @@ int rxe_requester(void *arg)
int mtu; int mtu;
int opcode; int opcode;
int ret; int ret;
enum wqe_state prev_state; struct rxe_qp rollback_qp;
struct rxe_send_wqe rollback_wqe;
next_wqe: next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
...@@ -688,13 +711,21 @@ int rxe_requester(void *arg) ...@@ -688,13 +711,21 @@ int rxe_requester(void *arg)
goto err; goto err;
} }
update_wqe_state(qp, wqe, &pkt, &prev_state); /*
* To prevent a race on wqe access between requester and completer,
* wqe members state and psn need to be set before calling
* rxe_xmit_packet().
* Otherwise, completer might initiate an unjustified retry flow.
*/
save_state(wqe, qp, &rollback_wqe, &rollback_qp);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
if (ret) { if (ret) {
qp->need_req_skb = 1; qp->need_req_skb = 1;
kfree_skb(skb); kfree_skb(skb);
wqe->state = prev_state; rollback_state(wqe, qp, &rollback_wqe, &rollback_qp);
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
rxe_run_task(&qp->req.task, 1); rxe_run_task(&qp->req.task, 1);
......
...@@ -972,11 +972,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, ...@@ -972,11 +972,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
free_rd_atomic_resource(qp, res); free_rd_atomic_resource(qp, res);
rxe_advance_resp_resource(qp); rxe_advance_resp_resource(qp);
memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
res->type = RXE_ATOMIC_MASK; res->type = RXE_ATOMIC_MASK;
res->atomic.skb = skb; res->atomic.skb = skb;
res->first_psn = qp->resp.psn; res->first_psn = ack_pkt.psn;
res->last_psn = qp->resp.psn; res->last_psn = ack_pkt.psn;
res->cur_psn = qp->resp.psn; res->cur_psn = ack_pkt.psn;
rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);
if (rc) { if (rc) {
...@@ -1116,8 +1118,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, ...@@ -1116,8 +1118,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
rc = RESPST_CLEANUP; rc = RESPST_CLEANUP;
goto out; goto out;
} }
bth_set_psn(SKB_TO_PKT(skb_copy),
qp->resp.psn - 1);
/* Resend the result. */ /* Resend the result. */
rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
pkt, skb_copy); pkt, skb_copy);
......
...@@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ...@@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
} }
if (level == IPOIB_FLUSH_LIGHT) { if (level == IPOIB_FLUSH_LIGHT) {
int oper_up;
ipoib_mark_paths_invalid(dev); ipoib_mark_paths_invalid(dev);
/* Set IPoIB operation as down to prevent races between:
* the flush flow which leaves MCG and on the fly joins
* which can happen during that time. mcast restart task
* should deal with join requests we missed.
*/
oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
if (oper_up)
set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
ipoib_flush_ah(dev); ipoib_flush_ah(dev);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment