Commit 69088822 authored by Sowmini Varadhan's avatar Sowmini Varadhan Committed by David S. Miller

sunvnet: NAPIfy sunvnet

Move Rx packet procssing to the NAPI poll callback.
Disable VIO interrupt and unconditioanlly go into NAPI
context from vnet_event.

Note that we want to minimize the number of LDC
STOP/START messages sent. Specifically, do not send a STOP
message if vnet_walk_rx does not read all the available descriptors
because of the NAPI budget limitation. Instead, note the end index
as part of port state, and resume from this index when the
next poll callback is triggered.
Signed-off-by: default avatarSowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: default avatarRaghuram Kothakota <raghuram.kothakota@oracle.com>
Acked-by: default avatarDwight Engen <dwight.engen@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 132fb579
...@@ -311,9 +311,7 @@ static int vnet_rx_one(struct vnet_port *port, unsigned int len, ...@@ -311,9 +311,7 @@ static int vnet_rx_one(struct vnet_port *port, unsigned int len,
dev->stats.rx_packets++; dev->stats.rx_packets++;
dev->stats.rx_bytes += len; dev->stats.rx_bytes += len;
napi_gro_receive(&port->napi, skb);
netif_rx(skb);
return 0; return 0;
out_free_skb: out_free_skb:
...@@ -430,6 +428,7 @@ static int vnet_walk_rx_one(struct vnet_port *port, ...@@ -430,6 +428,7 @@ static int vnet_walk_rx_one(struct vnet_port *port,
struct vio_driver_state *vio = &port->vio; struct vio_driver_state *vio = &port->vio;
int err; int err;
BUG_ON(desc == NULL);
if (IS_ERR(desc)) if (IS_ERR(desc))
return PTR_ERR(desc); return PTR_ERR(desc);
...@@ -456,10 +455,11 @@ static int vnet_walk_rx_one(struct vnet_port *port, ...@@ -456,10 +455,11 @@ static int vnet_walk_rx_one(struct vnet_port *port,
} }
static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
u32 start, u32 end) u32 start, u32 end, int *npkts, int budget)
{ {
struct vio_driver_state *vio = &port->vio; struct vio_driver_state *vio = &port->vio;
int ack_start = -1, ack_end = -1; int ack_start = -1, ack_end = -1;
bool send_ack = true;
end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr); end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr);
...@@ -471,6 +471,7 @@ static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, ...@@ -471,6 +471,7 @@ static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
return err; return err;
if (err != 0) if (err != 0)
break; break;
(*npkts)++;
if (ack_start == -1) if (ack_start == -1)
ack_start = start; ack_start = start;
ack_end = start; ack_end = start;
...@@ -482,13 +483,26 @@ static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, ...@@ -482,13 +483,26 @@ static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
return err; return err;
ack_start = -1; ack_start = -1;
} }
if ((*npkts) >= budget) {
send_ack = false;
break;
}
} }
if (unlikely(ack_start == -1)) if (unlikely(ack_start == -1))
ack_start = ack_end = prev_idx(start, dr); ack_start = ack_end = prev_idx(start, dr);
return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED); if (send_ack) {
port->napi_resume = false;
return vnet_send_ack(port, dr, ack_start, ack_end,
VIO_DRING_STOPPED);
} else {
port->napi_resume = true;
port->napi_stop_idx = ack_end;
return 1;
}
} }
static int vnet_rx(struct vnet_port *port, void *msgbuf) static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts,
int budget)
{ {
struct vio_dring_data *pkt = msgbuf; struct vio_dring_data *pkt = msgbuf;
struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING]; struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
...@@ -505,11 +519,13 @@ static int vnet_rx(struct vnet_port *port, void *msgbuf) ...@@ -505,11 +519,13 @@ static int vnet_rx(struct vnet_port *port, void *msgbuf)
return 0; return 0;
} }
dr->rcv_nxt++; if (!port->napi_resume)
dr->rcv_nxt++;
/* XXX Validate pkt->start_idx and pkt->end_idx XXX */ /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx); return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx,
npkts, budget);
} }
static int idx_is_pending(struct vio_dring_state *dr, u32 end) static int idx_is_pending(struct vio_dring_state *dr, u32 end)
...@@ -542,9 +558,12 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf) ...@@ -542,9 +558,12 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf)
if (unlikely(!idx_is_pending(dr, end))) if (unlikely(!idx_is_pending(dr, end)))
return 0; return 0;
vp = port->vp;
dev = vp->dev;
/* sync for race conditions with vnet_start_xmit() and tell xmit it /* sync for race conditions with vnet_start_xmit() and tell xmit it
* is time to send a trigger. * is time to send a trigger.
*/ */
netif_tx_lock(dev);
dr->cons = next_idx(end, dr); dr->cons = next_idx(end, dr);
desc = vio_dring_entry(dr, dr->cons); desc = vio_dring_entry(dr, dr->cons);
if (desc->hdr.state == VIO_DESC_READY && port->start_cons) { if (desc->hdr.state == VIO_DESC_READY && port->start_cons) {
...@@ -559,10 +578,8 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf) ...@@ -559,10 +578,8 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf)
} else { } else {
port->start_cons = true; port->start_cons = true;
} }
netif_tx_unlock(dev);
vp = port->vp;
dev = vp->dev;
if (unlikely(netif_queue_stopped(dev) && if (unlikely(netif_queue_stopped(dev) &&
vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr))) vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
return 1; return 1;
...@@ -591,9 +608,8 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf) ...@@ -591,9 +608,8 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf)
return 0; return 0;
} }
static void maybe_tx_wakeup(unsigned long param) static void maybe_tx_wakeup(struct vnet *vp)
{ {
struct vnet *vp = (struct vnet *)param;
struct net_device *dev = vp->dev; struct net_device *dev = vp->dev;
netif_tx_lock(dev); netif_tx_lock(dev);
...@@ -617,32 +633,43 @@ static void maybe_tx_wakeup(unsigned long param) ...@@ -617,32 +633,43 @@ static void maybe_tx_wakeup(unsigned long param)
netif_tx_unlock(dev); netif_tx_unlock(dev);
} }
static void vnet_event(void *arg, int event) static inline bool port_is_up(struct vnet_port *vnet)
{
struct vio_driver_state *vio = &vnet->vio;
return !!(vio->hs_state & VIO_HS_COMPLETE);
}
static int vnet_event_napi(struct vnet_port *port, int budget)
{ {
struct vnet_port *port = arg;
struct vio_driver_state *vio = &port->vio; struct vio_driver_state *vio = &port->vio;
unsigned long flags;
int tx_wakeup, err; int tx_wakeup, err;
int npkts = 0;
int event = (port->rx_event & LDC_EVENT_RESET);
spin_lock_irqsave(&vio->lock, flags); ldc_ctrl:
if (unlikely(event == LDC_EVENT_RESET || if (unlikely(event == LDC_EVENT_RESET ||
event == LDC_EVENT_UP)) { event == LDC_EVENT_UP)) {
vio_link_state_change(vio, event); vio_link_state_change(vio, event);
spin_unlock_irqrestore(&vio->lock, flags);
if (event == LDC_EVENT_RESET) { if (event == LDC_EVENT_RESET) {
port->rmtu = 0; port->rmtu = 0;
vio_port_up(vio); vio_port_up(vio);
} }
return; port->rx_event = 0;
return 0;
} }
/* We may have multiple LDC events in rx_event. Unroll send_events() */
event = (port->rx_event & LDC_EVENT_UP);
port->rx_event &= ~(LDC_EVENT_RESET|LDC_EVENT_UP);
if (event == LDC_EVENT_UP)
goto ldc_ctrl;
event = port->rx_event;
if (!(event & LDC_EVENT_DATA_READY))
return 0;
if (unlikely(event != LDC_EVENT_DATA_READY)) { /* we dont expect any other bits than RESET, UP, DATA_READY */
pr_warn("Unexpected LDC event %d\n", event); BUG_ON(event != LDC_EVENT_DATA_READY);
spin_unlock_irqrestore(&vio->lock, flags);
return;
}
tx_wakeup = err = 0; tx_wakeup = err = 0;
while (1) { while (1) {
...@@ -651,6 +678,21 @@ static void vnet_event(void *arg, int event) ...@@ -651,6 +678,21 @@ static void vnet_event(void *arg, int event)
u64 raw[8]; u64 raw[8];
} msgbuf; } msgbuf;
if (port->napi_resume) {
struct vio_dring_data *pkt =
(struct vio_dring_data *)&msgbuf;
struct vio_dring_state *dr =
&port->vio.drings[VIO_DRIVER_RX_RING];
pkt->tag.type = VIO_TYPE_DATA;
pkt->tag.stype = VIO_SUBTYPE_INFO;
pkt->tag.stype_env = VIO_DRING_DATA;
pkt->seq = dr->rcv_nxt;
pkt->start_idx = next_idx(port->napi_stop_idx, dr);
pkt->end_idx = -1;
goto napi_resume;
}
ldc_read:
err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
if (unlikely(err < 0)) { if (unlikely(err < 0)) {
if (err == -ECONNRESET) if (err == -ECONNRESET)
...@@ -667,10 +709,22 @@ static void vnet_event(void *arg, int event) ...@@ -667,10 +709,22 @@ static void vnet_event(void *arg, int event)
err = vio_validate_sid(vio, &msgbuf.tag); err = vio_validate_sid(vio, &msgbuf.tag);
if (err < 0) if (err < 0)
break; break;
napi_resume:
if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
err = vnet_rx(port, &msgbuf); if (!port_is_up(port)) {
/* failures like handshake_failure()
* may have cleaned up dring, but
* NAPI polling may bring us here.
*/
err = -ECONNRESET;
break;
}
err = vnet_rx(port, &msgbuf, &npkts, budget);
if (npkts >= budget)
break;
if (npkts == 0 && err != -ECONNRESET)
goto ldc_read;
} else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) { } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
err = vnet_ack(port, &msgbuf); err = vnet_ack(port, &msgbuf);
if (err > 0) if (err > 0)
...@@ -691,15 +745,33 @@ static void vnet_event(void *arg, int event) ...@@ -691,15 +745,33 @@ static void vnet_event(void *arg, int event)
if (err == -ECONNRESET) if (err == -ECONNRESET)
break; break;
} }
spin_unlock(&vio->lock);
/* Kick off a tasklet to wake the queue. We cannot call
* maybe_tx_wakeup directly here because we could deadlock on
* netif_tx_lock() with dev_watchdog()
*/
if (unlikely(tx_wakeup && err != -ECONNRESET)) if (unlikely(tx_wakeup && err != -ECONNRESET))
tasklet_schedule(&port->vp->vnet_tx_wakeup); maybe_tx_wakeup(port->vp);
return npkts;
}
static int vnet_poll(struct napi_struct *napi, int budget)
{
struct vnet_port *port = container_of(napi, struct vnet_port, napi);
struct vio_driver_state *vio = &port->vio;
int processed = vnet_event_napi(port, budget);
if (processed < budget) {
napi_complete(napi);
vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED);
}
return processed;
}
static void vnet_event(void *arg, int event)
{
struct vnet_port *port = arg;
struct vio_driver_state *vio = &port->vio;
port->rx_event |= event;
vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED);
napi_schedule(&port->napi);
local_irq_restore(flags);
} }
static int __vnet_tx_trigger(struct vnet_port *port, u32 start) static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
...@@ -746,13 +818,6 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start) ...@@ -746,13 +818,6 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
return err; return err;
} }
static inline bool port_is_up(struct vnet_port *vnet)
{
struct vio_driver_state *vio = &vnet->vio;
return !!(vio->hs_state & VIO_HS_COMPLETE);
}
struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb) struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb)
{ {
unsigned int hash = vnet_hashfn(skb->data); unsigned int hash = vnet_hashfn(skb->data);
...@@ -1342,6 +1407,21 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port) ...@@ -1342,6 +1407,21 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
return err; return err;
} }
#ifdef CONFIG_NET_POLL_CONTROLLER
static void vnet_poll_controller(struct net_device *dev)
{
struct vnet *vp = netdev_priv(dev);
struct vnet_port *port;
unsigned long flags;
spin_lock_irqsave(&vp->lock, flags);
if (!list_empty(&vp->port_list)) {
port = list_entry(vp->port_list.next, struct vnet_port, list);
napi_schedule(&port->napi);
}
spin_unlock_irqrestore(&vp->lock, flags);
}
#endif
static LIST_HEAD(vnet_list); static LIST_HEAD(vnet_list);
static DEFINE_MUTEX(vnet_list_mutex); static DEFINE_MUTEX(vnet_list_mutex);
...@@ -1354,6 +1434,9 @@ static const struct net_device_ops vnet_ops = { ...@@ -1354,6 +1434,9 @@ static const struct net_device_ops vnet_ops = {
.ndo_tx_timeout = vnet_tx_timeout, .ndo_tx_timeout = vnet_tx_timeout,
.ndo_change_mtu = vnet_change_mtu, .ndo_change_mtu = vnet_change_mtu,
.ndo_start_xmit = vnet_start_xmit, .ndo_start_xmit = vnet_start_xmit,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = vnet_poll_controller,
#endif
}; };
static struct vnet *vnet_new(const u64 *local_mac) static struct vnet *vnet_new(const u64 *local_mac)
...@@ -1374,7 +1457,6 @@ static struct vnet *vnet_new(const u64 *local_mac) ...@@ -1374,7 +1457,6 @@ static struct vnet *vnet_new(const u64 *local_mac)
vp = netdev_priv(dev); vp = netdev_priv(dev);
spin_lock_init(&vp->lock); spin_lock_init(&vp->lock);
tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp);
vp->dev = dev; vp->dev = dev;
INIT_LIST_HEAD(&vp->port_list); INIT_LIST_HEAD(&vp->port_list);
...@@ -1434,7 +1516,6 @@ static void vnet_cleanup(void) ...@@ -1434,7 +1516,6 @@ static void vnet_cleanup(void)
vp = list_first_entry(&vnet_list, struct vnet, list); vp = list_first_entry(&vnet_list, struct vnet, list);
list_del(&vp->list); list_del(&vp->list);
dev = vp->dev; dev = vp->dev;
tasklet_kill(&vp->vnet_tx_wakeup);
/* vio_unregister_driver() should have cleaned up port_list */ /* vio_unregister_driver() should have cleaned up port_list */
BUG_ON(!list_empty(&vp->port_list)); BUG_ON(!list_empty(&vp->port_list));
unregister_netdev(dev); unregister_netdev(dev);
...@@ -1536,6 +1617,8 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) ...@@ -1536,6 +1617,8 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
if (err) if (err)
goto err_out_free_port; goto err_out_free_port;
netif_napi_add(port->vp->dev, &port->napi, vnet_poll, NAPI_POLL_WEIGHT);
err = vnet_port_alloc_tx_bufs(port); err = vnet_port_alloc_tx_bufs(port);
if (err) if (err)
goto err_out_free_ldc; goto err_out_free_ldc;
...@@ -1564,6 +1647,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) ...@@ -1564,6 +1647,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
setup_timer(&port->clean_timer, vnet_clean_timer_expire, setup_timer(&port->clean_timer, vnet_clean_timer_expire,
(unsigned long)port); (unsigned long)port);
napi_enable(&port->napi);
vio_port_up(&port->vio); vio_port_up(&port->vio);
mdesc_release(hp); mdesc_release(hp);
...@@ -1571,6 +1655,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) ...@@ -1571,6 +1655,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
return 0; return 0;
err_out_free_ldc: err_out_free_ldc:
netif_napi_del(&port->napi);
vio_ldc_free(&port->vio); vio_ldc_free(&port->vio);
err_out_free_port: err_out_free_port:
...@@ -1592,11 +1677,13 @@ static int vnet_port_remove(struct vio_dev *vdev) ...@@ -1592,11 +1677,13 @@ static int vnet_port_remove(struct vio_dev *vdev)
del_timer_sync(&port->vio.timer); del_timer_sync(&port->vio.timer);
del_timer_sync(&port->clean_timer); del_timer_sync(&port->clean_timer);
napi_disable(&port->napi);
spin_lock_irqsave(&vp->lock, flags); spin_lock_irqsave(&vp->lock, flags);
list_del(&port->list); list_del(&port->list);
hlist_del(&port->hash); hlist_del(&port->hash);
spin_unlock_irqrestore(&vp->lock, flags); spin_unlock_irqrestore(&vp->lock, flags);
netif_napi_del(&port->napi);
vnet_port_free_tx_bufs(port); vnet_port_free_tx_bufs(port);
vio_ldc_free(&port->vio); vio_ldc_free(&port->vio);
......
...@@ -56,6 +56,11 @@ struct vnet_port { ...@@ -56,6 +56,11 @@ struct vnet_port {
struct timer_list clean_timer; struct timer_list clean_timer;
u64 rmtu; u64 rmtu;
struct napi_struct napi;
u32 napi_stop_idx;
bool napi_resume;
int rx_event;
}; };
static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio) static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio)
...@@ -97,7 +102,6 @@ struct vnet { ...@@ -97,7 +102,6 @@ struct vnet {
struct list_head list; struct list_head list;
u64 local_mac; u64 local_mac;
struct tasklet_struct vnet_tx_wakeup;
}; };
#endif /* _SUNVNET_H */ #endif /* _SUNVNET_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment