Commit ca8d0fa7 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-fix-xsk-wakeup'

Maxim Mikityanskiy says:

====================
This series addresses the issue described in the commit message of the
first patch: lack of synchronization between XSK wakeup and destroying
the resources used by XSK wakeup. The idea is similar to napi_synchronize.
The series contains fixes for the drivers that implement XSK.

v2 incorporates changes suggested by Björn:

1. Call synchronize_rcu in Intel drivers only if the XDP program is
   being unloaded.
2. Don't forget rcu_read_lock when wakeup is called from xsk_poll.
3. Use xs->zc as the condition to call ndo_xsk_wakeup.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents e4730423 c0fdccfd
...@@ -1152,7 +1152,7 @@ void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags); ...@@ -1152,7 +1152,7 @@ void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags);
static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
{ {
return !!vsi->xdp_prog; return !!READ_ONCE(vsi->xdp_prog);
} }
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
......
...@@ -6823,8 +6823,8 @@ void i40e_down(struct i40e_vsi *vsi) ...@@ -6823,8 +6823,8 @@ void i40e_down(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_queue_pairs; i++) { for (i = 0; i < vsi->num_queue_pairs; i++) {
i40e_clean_tx_ring(vsi->tx_rings[i]); i40e_clean_tx_ring(vsi->tx_rings[i]);
if (i40e_enabled_xdp_vsi(vsi)) { if (i40e_enabled_xdp_vsi(vsi)) {
/* Make sure that in-progress ndo_xdp_xmit /* Make sure that in-progress ndo_xdp_xmit and
* calls are completed. * ndo_xsk_wakeup calls are completed.
*/ */
synchronize_rcu(); synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[i]); i40e_clean_tx_ring(vsi->xdp_rings[i]);
...@@ -12546,8 +12546,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, ...@@ -12546,8 +12546,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
old_prog = xchg(&vsi->xdp_prog, prog); old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset) if (need_reset) {
if (!prog)
/* Wait until ndo_xsk_wakeup completes. */
synchronize_rcu();
i40e_reset_and_rebuild(pf, true, true); i40e_reset_and_rebuild(pf, true, true);
}
for (i = 0; i < vsi->num_queue_pairs; i++) for (i = 0; i < vsi->num_queue_pairs; i++)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
......
...@@ -787,8 +787,12 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) ...@@ -787,8 +787,12 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{ {
struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi; struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
struct i40e_ring *ring; struct i40e_ring *ring;
if (test_bit(__I40E_CONFIG_BUSY, pf->state))
return -ENETDOWN;
if (test_bit(__I40E_VSI_DOWN, vsi->state)) if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN; return -ENETDOWN;
......
...@@ -10261,7 +10261,12 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) ...@@ -10261,7 +10261,12 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
/* If transitioning XDP modes reconfigure rings */ /* If transitioning XDP modes reconfigure rings */
if (need_reset) { if (need_reset) {
int err = ixgbe_setup_tc(dev, adapter->hw_tcs); int err;
if (!prog)
/* Wait until ndo_xsk_wakeup completes. */
synchronize_rcu();
err = ixgbe_setup_tc(dev, adapter->hw_tcs);
if (err) { if (err) {
rcu_assign_pointer(adapter->xdp_prog, old_prog); rcu_assign_pointer(adapter->xdp_prog, old_prog);
......
...@@ -709,10 +709,14 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) ...@@ -709,10 +709,14 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
if (qid >= adapter->num_xdp_queues) if (qid >= adapter->num_xdp_queues)
return -ENXIO; return -ENXIO;
if (!adapter->xdp_ring[qid]->xsk_umem) ring = adapter->xdp_ring[qid];
if (test_bit(__IXGBE_TX_DISABLED, &ring->state))
return -ENETDOWN;
if (!ring->xsk_umem)
return -ENXIO; return -ENXIO;
ring = adapter->xdp_ring[qid];
if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
u64 eics = BIT_ULL(ring->q_vector->v_idx); u64 eics = BIT_ULL(ring->q_vector->v_idx);
......
...@@ -760,7 +760,7 @@ enum { ...@@ -760,7 +760,7 @@ enum {
MLX5E_STATE_OPENED, MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING, MLX5E_STATE_DESTROYING,
MLX5E_STATE_XDP_TX_ENABLED, MLX5E_STATE_XDP_TX_ENABLED,
MLX5E_STATE_XDP_OPEN, MLX5E_STATE_XDP_ACTIVE,
}; };
struct mlx5e_rqt { struct mlx5e_rqt {
......
...@@ -75,12 +75,18 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, ...@@ -75,12 +75,18 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
{ {
set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
if (priv->channels.params.xdp_prog)
set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
} }
static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
{ {
if (priv->channels.params.xdp_prog)
clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
/* let other device's napi(s) see our new state */ /* Let other device's napi(s) and XSK wakeups see our new state. */
synchronize_rcu(); synchronize_rcu();
} }
...@@ -89,19 +95,9 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) ...@@ -89,19 +95,9 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
} }
static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv) static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv)
{
set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
}
static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
{
clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
}
static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
{ {
return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state); return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
} }
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
......
...@@ -144,6 +144,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) ...@@ -144,6 +144,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
{ {
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
napi_synchronize(&c->napi); napi_synchronize(&c->napi);
synchronize_rcu(); /* Sync with the XSK wakeup. */
mlx5e_close_rq(&c->xskrq); mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq); mlx5e_close_cq(&c->xskrq.cq);
......
...@@ -14,7 +14,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) ...@@ -14,7 +14,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
struct mlx5e_channel *c; struct mlx5e_channel *c;
u16 ix; u16 ix;
if (unlikely(!mlx5e_xdp_is_open(priv))) if (unlikely(!mlx5e_xdp_is_active(priv)))
return -ENETDOWN; return -ENETDOWN;
if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
......
...@@ -3000,12 +3000,9 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) ...@@ -3000,12 +3000,9 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
int mlx5e_open_locked(struct net_device *netdev) int mlx5e_open_locked(struct net_device *netdev)
{ {
struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_priv *priv = netdev_priv(netdev);
bool is_xdp = priv->channels.params.xdp_prog;
int err; int err;
set_bit(MLX5E_STATE_OPENED, &priv->state); set_bit(MLX5E_STATE_OPENED, &priv->state);
if (is_xdp)
mlx5e_xdp_set_open(priv);
err = mlx5e_open_channels(priv, &priv->channels); err = mlx5e_open_channels(priv, &priv->channels);
if (err) if (err)
...@@ -3020,8 +3017,6 @@ int mlx5e_open_locked(struct net_device *netdev) ...@@ -3020,8 +3017,6 @@ int mlx5e_open_locked(struct net_device *netdev)
return 0; return 0;
err_clear_state_opened_flag: err_clear_state_opened_flag:
if (is_xdp)
mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state); clear_bit(MLX5E_STATE_OPENED, &priv->state);
return err; return err;
} }
...@@ -3053,8 +3048,6 @@ int mlx5e_close_locked(struct net_device *netdev) ...@@ -3053,8 +3048,6 @@ int mlx5e_close_locked(struct net_device *netdev)
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0; return 0;
if (priv->channels.params.xdp_prog)
mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state); clear_bit(MLX5E_STATE_OPENED, &priv->state);
netif_carrier_off(priv->netdev); netif_carrier_off(priv->netdev);
...@@ -4371,16 +4364,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) ...@@ -4371,16 +4364,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return 0; return 0;
} }
static int mlx5e_xdp_update_state(struct mlx5e_priv *priv)
{
if (priv->channels.params.xdp_prog)
mlx5e_xdp_set_open(priv);
else
mlx5e_xdp_set_closed(priv);
return 0;
}
static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
{ {
struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_priv *priv = netdev_priv(netdev);
...@@ -4415,7 +4398,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) ...@@ -4415,7 +4398,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
mlx5e_set_rq_type(priv->mdev, &new_channels.params); mlx5e_set_rq_type(priv->mdev, &new_channels.params);
old_prog = priv->channels.params.xdp_prog; old_prog = priv->channels.params.xdp_prog;
err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state); err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
if (err) if (err)
goto unlock; goto unlock;
} else { } else {
......
...@@ -334,12 +334,21 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) ...@@ -334,12 +334,21 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
} }
EXPORT_SYMBOL(xsk_umem_consume_tx); EXPORT_SYMBOL(xsk_umem_consume_tx);
static int xsk_zc_xmit(struct xdp_sock *xs) static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{ {
struct net_device *dev = xs->dev; struct net_device *dev = xs->dev;
int err;
rcu_read_lock();
err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
rcu_read_unlock();
return err;
}
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, static int xsk_zc_xmit(struct xdp_sock *xs)
XDP_WAKEUP_TX); {
return xsk_wakeup(xs, XDP_WAKEUP_TX);
} }
static void xsk_destruct_skb(struct sk_buff *skb) static void xsk_destruct_skb(struct sk_buff *skb)
...@@ -453,19 +462,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, ...@@ -453,19 +462,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
__poll_t mask = datagram_poll(file, sock, wait); __poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk); struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev;
struct xdp_umem *umem; struct xdp_umem *umem;
if (unlikely(!xsk_is_bound(xs))) if (unlikely(!xsk_is_bound(xs)))
return mask; return mask;
dev = xs->dev;
umem = xs->umem; umem = xs->umem;
if (umem->need_wakeup) { if (umem->need_wakeup) {
if (dev->netdev_ops->ndo_xsk_wakeup) if (xs->zc)
dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, xsk_wakeup(xs, umem->need_wakeup);
umem->need_wakeup);
else else
/* Poll needs to drive Tx also in copy mode */ /* Poll needs to drive Tx also in copy mode */
__xsk_sendmsg(sk); __xsk_sendmsg(sk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment