Commit fe88e6dd authored by David S. Miller's avatar David S. Miller

Merge branch 'ndo_xmit_flush'

Basic deferred TX queue flushing infrastructure.

Over time, and specifically and more recently at the Networking
Workshop during Kernel SUmmit in Chicago, we have discussed the idea
of having some way to optimize transmits of multiple TX packets at
a time.

There are several areas of overhead that could be amortized with such
schemes.  One has to do with locking and transactional overhead, the
other has to do with device specific costs.

This patch set here is more aimed at device specific costs.

Typically a device queues up a packet in the TX queue and then has to
do something to have the device start processing that new entry.
Sometimes this is composed of doing an MMIO write to a "tail"
register, and in other cases it can involve something as expensive as
a hypervisor call.

The basic setup defined here is that when the driver supports deferred
TX queue flushing, ndo_start_xmit should no longer perform that
operation.  Instead a new operation, ndo_xmit_flush, should do it.

I have converted IGB and virtio_net as example initial users.  The IGB
conversion is tested, virtio_net is not but it does compile :-)

All ndo_start_xmit call sites have been abstracted behind a new helper
called netdev_start_xmit().

This just adds the infrastructure, it does not actually add any
instances of actually doing multiple ndo_start_xmit calls per
ndo_xmit_flush invocation.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4c83acbc c223a078
......@@ -136,6 +136,7 @@ static void igb_update_phy_info(unsigned long);
static void igb_watchdog(unsigned long);
static void igb_watchdog_task(struct work_struct *);
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
static void igb_xmit_flush(struct net_device *netdev, u16 queue);
static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats);
static int igb_change_mtu(struct net_device *, int);
......@@ -2075,6 +2076,7 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_open = igb_open,
.ndo_stop = igb_close,
.ndo_start_xmit = igb_xmit_frame,
.ndo_xmit_flush = igb_xmit_flush,
.ndo_get_stats64 = igb_get_stats64,
.ndo_set_rx_mode = igb_set_rx_mode,
.ndo_set_mac_address = igb_set_mac,
......@@ -4915,13 +4917,6 @@ static void igb_tx_map(struct igb_ring *tx_ring,
tx_ring->next_to_use = i;
writel(i, tx_ring->tail);
/* we need this if more than one processor can write to our tail
* at a time, it synchronizes IO on IA64/Altix systems
*/
mmiowb();
return;
dma_error:
......@@ -5057,17 +5052,20 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_OK;
}
static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
struct sk_buff *skb)
static struct igb_ring *__igb_tx_queue_mapping(struct igb_adapter *adapter, unsigned int r_idx)
{
unsigned int r_idx = skb->queue_mapping;
if (r_idx >= adapter->num_tx_queues)
r_idx = r_idx % adapter->num_tx_queues;
return adapter->tx_ring[r_idx];
}
static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
struct sk_buff *skb)
{
return __igb_tx_queue_mapping(adapter, skb->queue_mapping);
}
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
struct net_device *netdev)
{
......@@ -5096,6 +5094,21 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
}
static void igb_xmit_flush(struct net_device *netdev, u16 queue)
{
struct igb_adapter *adapter = netdev_priv(netdev);
struct igb_ring *tx_ring;
tx_ring = __igb_tx_queue_mapping(adapter, queue);
writel(tx_ring->next_to_use, tx_ring->tail);
/* we need this if more than one processor can write to our tail
* at a time, it synchronizes IO on IA64/Altix systems
*/
mmiowb();
}
/**
* igb_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
......
......@@ -934,7 +934,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
virtqueue_kick(sq->vq);
/* Don't wait up for transmitted skbs to be freed. */
skb_orphan(skb);
......@@ -957,6 +956,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
static void xmit_flush(struct net_device *dev, u16 qnum)
{
struct virtnet_info *vi = netdev_priv(dev);
struct send_queue *sq = &vi->sq[qnum];
virtqueue_kick(sq->vq);
}
/*
* Send command via the control virtqueue and check status. Commands
* supported by the hypervisor, as indicated by feature bits, should
......@@ -1386,6 +1393,7 @@ static const struct net_device_ops virtnet_netdev = {
.ndo_open = virtnet_open,
.ndo_stop = virtnet_close,
.ndo_start_xmit = start_xmit,
.ndo_xmit_flush = xmit_flush,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = virtnet_set_mac_address,
.ndo_set_rx_mode = virtnet_set_rx_mode,
......
......@@ -193,7 +193,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
struct dlci_local *dlp = netdev_priv(dev);
if (skb)
dlp->slave->netdev_ops->ndo_start_xmit(skb, dlp->slave);
netdev_start_xmit(skb, dlp->slave);
return NETDEV_TX_OK;
}
......
......@@ -1101,7 +1101,7 @@ static void ncm_tx_tasklet(unsigned long data)
/* Only send if data is available. */
if (ncm->skb_tx_data) {
ncm->timer_force_tx = true;
ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev);
netdev_start_xmit(NULL, ncm->netdev);
ncm->timer_force_tx = false;
}
}
......
......@@ -782,6 +782,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
* Required can not be NULL.
*
* void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
* A driver implements this function when it wishes to support
* deferred TX queue flushing. The idea is that the expensive
* operation to trigger TX queue processing can be done after
* N calls to ndo_start_xmit rather than being done every single
* time. In this regime ndo_start_xmit will be called one or more
* times, and then a final ndo_xmit_flush call will be made to
* have the driver tell the device about the new pending TX queue
* entries. The kernel keeps track of which queues need flushing
* by monitoring skb->queue_mapping of the packets it submits to
* ndo_start_xmit. This is the queue value that will be passed
* to ndo_xmit_flush.
*
* u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
* void *accel_priv, select_queue_fallback_t fallback);
* Called to decide which queue to when device supports multiple
......@@ -1005,6 +1018,7 @@ struct net_device_ops {
int (*ndo_stop)(struct net_device *dev);
netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb,
struct net_device *dev);
void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
u16 (*ndo_select_queue)(struct net_device *dev,
struct sk_buff *skb,
void *accel_priv,
......@@ -3430,6 +3444,27 @@ int __init dev_proc_init(void);
#define dev_proc_init() 0
#endif
static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
struct sk_buff *skb, struct net_device *dev)
{
netdev_tx_t ret;
u16 q;
q = skb->queue_mapping;
ret = ops->ndo_start_xmit(skb, dev);
if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
ops->ndo_xmit_flush(dev, q);
return ret;
}
static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
return __netdev_start_xmit(ops, skb, dev);
}
int netdev_class_create_file_ns(struct class_attribute *class_attr,
const void *ns);
void netdev_class_remove_file_ns(struct class_attribute *class_attr,
......
......@@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
}
non_ip:
return mpc->old_ops->ndo_start_xmit(skb, dev);
return __netdev_start_xmit(mpc->old_ops, skb, dev);
}
static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
......
......@@ -2602,7 +2602,6 @@ EXPORT_SYMBOL(netif_skb_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc = NETDEV_TX_OK;
unsigned int skb_len;
......@@ -2667,7 +2666,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb_len = skb->len;
trace_net_dev_start_xmit(skb, dev);
rc = ops->ndo_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev);
trace_net_dev_xmit(skb, rc, dev, skb_len);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
......@@ -2686,7 +2685,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb_len = nskb->len;
trace_net_dev_start_xmit(nskb, dev);
rc = ops->ndo_start_xmit(nskb, dev);
rc = netdev_start_xmit(nskb, dev);
trace_net_dev_xmit(nskb, rc, dev, skb_len);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
......
......@@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644);
static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
const struct net_device_ops *ops = dev->netdev_ops;
int status = NETDEV_TX_OK;
netdev_features_t features;
......@@ -92,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->vlan_tci = 0;
}
status = ops->ndo_start_xmit(skb, dev);
status = netdev_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
......
......@@ -3285,8 +3285,6 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
struct net_device *odev = pkt_dev->odev;
netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *)
= odev->netdev_ops->ndo_start_xmit;
struct netdev_queue *txq;
u16 queue_map;
int ret;
......@@ -3339,7 +3337,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
goto unlock;
}
atomic_inc(&(pkt_dev->skb->users));
ret = (*xmit)(pkt_dev->skb, odev);
ret = netdev_start_xmit(pkt_dev->skb, odev);
switch (ret) {
case NETDEV_TX_OK:
......
......@@ -240,7 +240,6 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
static int packet_direct_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
const struct net_device_ops *ops = dev->netdev_ops;
netdev_features_t features;
struct netdev_queue *txq;
int ret = NETDEV_TX_BUSY;
......@@ -262,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_drv_stopped(txq)) {
ret = ops->ndo_start_xmit(skb, dev);
ret = netdev_start_xmit(skb, dev);
if (ret == NETDEV_TX_OK)
txq_trans_update(txq);
}
......
......@@ -301,7 +301,6 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
do {
struct net_device *slave = qdisc_dev(q);
struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
const struct net_device_ops *slave_ops = slave->netdev_ops;
if (slave_txq->qdisc_sleeping != q)
continue;
......@@ -317,7 +316,7 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
unsigned int length = qdisc_pkt_len(skb);
if (!netif_xmit_frozen_or_stopped(slave_txq) &&
slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
txq_trans_update(slave_txq);
__netif_tx_unlock(slave_txq);
master->slaves = NEXT_SLAVE(q);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment