Commit f53c3fe8 authored by Zoltan Kiss's avatar Zoltan Kiss Committed by David S. Miller

xen-netback: Introduce TX grant mapping

This patch introduces grant mapping on netback TX path. It replaces grant copy
operations, ditching grant copy coalescing along the way. Another solution for
copy coalescing is introduced in "xen-netback: Handle guests with too many
frags", older guests and Windows can broke before that patch applies.
There is a callback (xenvif_zerocopy_callback) from core stack to release the
slots back to the guests when kfree_skb or skb_orphan_frags called. It feeds a
separate dealloc thread, as scheduling NAPI instance from there is inefficient,
therefore we can't do dealloc from the instance.
Signed-off-by: default avatarZoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 3e2234b3
...@@ -79,6 +79,17 @@ struct pending_tx_info { ...@@ -79,6 +79,17 @@ struct pending_tx_info {
* if it is head of one or more tx * if it is head of one or more tx
* reqs * reqs
*/ */
/* Callback data for released SKBs. The callback is always
* xenvif_zerocopy_callback, desc contains the pending_idx, which is
* also an index in pending_tx_info array. It is initialized in
* xenvif_alloc and it never changes.
* skb_shinfo(skb)->destructor_arg points to the first mapped slot's
* callback_struct in this array of struct pending_tx_info's, then ctx
* to the next, or NULL if there is no more slot for this skb.
* ubuf_to_vif is a helper which finds the struct xenvif from a pointer
* to this field.
*/
struct ubuf_info callback_struct;
}; };
#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
...@@ -135,13 +146,31 @@ struct xenvif { ...@@ -135,13 +146,31 @@ struct xenvif {
pending_ring_idx_t pending_cons; pending_ring_idx_t pending_cons;
u16 pending_ring[MAX_PENDING_REQS]; u16 pending_ring[MAX_PENDING_REQS];
struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
/* Coalescing tx requests before copying makes number of grant /* Coalescing tx requests before copying makes number of grant
* copy ops greater or equal to number of slots required. In * copy ops greater or equal to number of slots required. In
* worst case a tx request consumes 2 gnttab_copy. * worst case a tx request consumes 2 gnttab_copy.
*/ */
struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
/* passed to gnttab_[un]map_refs with pages under (un)mapping */
struct page *pages_to_map[MAX_PENDING_REQS];
struct page *pages_to_unmap[MAX_PENDING_REQS];
/* This prevents zerocopy callbacks to race over dealloc_ring */
spinlock_t callback_lock;
/* This prevents dealloc thread and NAPI instance to race over response
* creation and pending_ring in xenvif_idx_release. In xenvif_tx_err
* it only protect response creation
*/
spinlock_t response_lock;
pending_ring_idx_t dealloc_prod;
pending_ring_idx_t dealloc_cons;
u16 dealloc_ring[MAX_PENDING_REQS];
struct task_struct *dealloc_task;
wait_queue_head_t dealloc_wq;
/* Use kthread for guest RX */ /* Use kthread for guest RX */
struct task_struct *task; struct task_struct *task;
...@@ -228,6 +257,8 @@ int xenvif_tx_action(struct xenvif *vif, int budget); ...@@ -228,6 +257,8 @@ int xenvif_tx_action(struct xenvif *vif, int budget);
int xenvif_kthread_guest_rx(void *data); int xenvif_kthread_guest_rx(void *data);
void xenvif_kick_thread(struct xenvif *vif); void xenvif_kick_thread(struct xenvif *vif);
int xenvif_dealloc_kthread(void *data);
/* Determine whether the needed number of slots (req) are available, /* Determine whether the needed number of slots (req) are available,
* and set req_event if not. * and set req_event if not.
*/ */
...@@ -235,6 +266,12 @@ bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed); ...@@ -235,6 +266,12 @@ bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);
void xenvif_stop_queue(struct xenvif *vif); void xenvif_stop_queue(struct xenvif *vif);
/* Callback from stack when TX packet can be released */
void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
/* Unmap a pending page and release it back to the guest */
void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx);
static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
{ {
return MAX_PENDING_REQS - return MAX_PENDING_REQS -
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <xen/events.h> #include <xen/events.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include <xen/balloon.h>
#define XENVIF_QUEUE_LENGTH 32 #define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64 #define XENVIF_NAPI_WEIGHT 64
...@@ -87,7 +88,8 @@ static int xenvif_poll(struct napi_struct *napi, int budget) ...@@ -87,7 +88,8 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
local_irq_save(flags); local_irq_save(flags);
RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
if (!more_to_do) if (!(more_to_do &&
xenvif_tx_pending_slots_available(vif)))
__napi_complete(napi); __napi_complete(napi);
local_irq_restore(flags); local_irq_restore(flags);
...@@ -121,7 +123,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -121,7 +123,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
BUG_ON(skb->dev != dev); BUG_ON(skb->dev != dev);
/* Drop the packet if vif is not ready */ /* Drop the packet if vif is not ready */
if (vif->task == NULL || !xenvif_schedulable(vif)) if (vif->task == NULL ||
vif->dealloc_task == NULL ||
!xenvif_schedulable(vif))
goto drop; goto drop;
/* At best we'll need one slot for the header and one for each /* At best we'll need one slot for the header and one for each
...@@ -343,8 +347,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, ...@@ -343,8 +347,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
vif->pending_prod = MAX_PENDING_REQS; vif->pending_prod = MAX_PENDING_REQS;
for (i = 0; i < MAX_PENDING_REQS; i++) for (i = 0; i < MAX_PENDING_REQS; i++)
vif->pending_ring[i] = i; vif->pending_ring[i] = i;
for (i = 0; i < MAX_PENDING_REQS; i++) spin_lock_init(&vif->callback_lock);
vif->mmap_pages[i] = NULL; spin_lock_init(&vif->response_lock);
/* If ballooning is disabled, this will consume real memory, so you
* better enable it. The long term solution would be to use just a
* bunch of valid page descriptors, without dependency on ballooning
*/
err = alloc_xenballooned_pages(MAX_PENDING_REQS,
vif->mmap_pages,
false);
if (err) {
netdev_err(dev, "Could not reserve mmap_pages\n");
return ERR_PTR(-ENOMEM);
}
for (i = 0; i < MAX_PENDING_REQS; i++) {
vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
{ .callback = xenvif_zerocopy_callback,
.ctx = NULL,
.desc = i };
vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
}
/* /*
* Initialise a dummy MAC address. We choose the numerically * Initialise a dummy MAC address. We choose the numerically
...@@ -382,12 +404,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, ...@@ -382,12 +404,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
BUG_ON(vif->tx_irq); BUG_ON(vif->tx_irq);
BUG_ON(vif->task); BUG_ON(vif->task);
BUG_ON(vif->dealloc_task);
err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
if (err < 0) if (err < 0)
goto err; goto err;
init_waitqueue_head(&vif->wq); init_waitqueue_head(&vif->wq);
init_waitqueue_head(&vif->dealloc_wq);
if (tx_evtchn == rx_evtchn) { if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */ /* feature-split-event-channels == 0 */
...@@ -431,6 +455,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, ...@@ -431,6 +455,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
vif->task = task; vif->task = task;
task = kthread_create(xenvif_dealloc_kthread,
(void *)vif, "%s-dealloc", vif->dev->name);
if (IS_ERR(task)) {
pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
err = PTR_ERR(task);
goto err_rx_unbind;
}
vif->dealloc_task = task;
rtnl_lock(); rtnl_lock();
if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
dev_set_mtu(vif->dev, ETH_DATA_LEN); dev_set_mtu(vif->dev, ETH_DATA_LEN);
...@@ -441,6 +475,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, ...@@ -441,6 +475,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
rtnl_unlock(); rtnl_unlock();
wake_up_process(vif->task); wake_up_process(vif->task);
wake_up_process(vif->dealloc_task);
return 0; return 0;
...@@ -478,6 +513,11 @@ void xenvif_disconnect(struct xenvif *vif) ...@@ -478,6 +513,11 @@ void xenvif_disconnect(struct xenvif *vif)
vif->task = NULL; vif->task = NULL;
} }
if (vif->dealloc_task) {
kthread_stop(vif->dealloc_task);
vif->dealloc_task = NULL;
}
if (vif->tx_irq) { if (vif->tx_irq) {
if (vif->tx_irq == vif->rx_irq) if (vif->tx_irq == vif->rx_irq)
unbind_from_irqhandler(vif->tx_irq, vif); unbind_from_irqhandler(vif->tx_irq, vif);
...@@ -493,6 +533,23 @@ void xenvif_disconnect(struct xenvif *vif) ...@@ -493,6 +533,23 @@ void xenvif_disconnect(struct xenvif *vif)
void xenvif_free(struct xenvif *vif) void xenvif_free(struct xenvif *vif)
{ {
int i, unmap_timeout = 0;
for (i = 0; i < MAX_PENDING_REQS; ++i) {
if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
unmap_timeout++;
schedule_timeout(msecs_to_jiffies(1000));
if (unmap_timeout > 9 &&
net_ratelimit())
netdev_err(vif->dev,
"Page still granted! Index: %x\n",
i);
i = -1;
}
}
free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
netif_napi_del(&vif->napi); netif_napi_del(&vif->napi);
unregister_netdev(vif->dev); unregister_netdev(vif->dev);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment