Commit b5de97bc authored by David S. Miller's avatar David S. Miller

Merge branch 'dma_rmb_wmb'

Alexander Duyck says:

====================
Replace wmb()/rmb() with dma_wmb()/dma_rmb() where appropriate

This is a start of a side project cleaning up the drivers that can make use
of the dma_wmb and dma_rmb calls.  The general idea is to start removing
the unnecessary wmb/rmb calls from a number of drivers and to make use of
the lighter weight dma_wmb/dma_rmb calls as this should allow for an
overall improvement in performance as each barrier can cost a significant
number of cycles and on architectures such as x86 this is unnecessary.

These changes are what I would consider low hanging fruit.  The likelihood
of the changes introducing an error should be low since the use of the
barriers in these cases are fairly obvious.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 04abac5f 837a1dba
...@@ -3856,7 +3856,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, ...@@ -3856,7 +3856,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
while ((eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) && while ((eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) &&
(count < tx_ring->count)) { (count < tx_ring->count)) {
bool cleaned = false; bool cleaned = false;
rmb(); /* read buffer_info after eop_desc */ dma_rmb(); /* read buffer_info after eop_desc */
for ( ; !cleaned; count++) { for ( ; !cleaned; count++) {
tx_desc = E1000_TX_DESC(*tx_ring, i); tx_desc = E1000_TX_DESC(*tx_ring, i);
buffer_info = &tx_ring->buffer_info[i]; buffer_info = &tx_ring->buffer_info[i];
...@@ -4154,7 +4154,7 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter, ...@@ -4154,7 +4154,7 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
if (*work_done >= work_to_do) if (*work_done >= work_to_do)
break; break;
(*work_done)++; (*work_done)++;
rmb(); /* read descriptor and rx_buffer_info after status DD */ dma_rmb(); /* read descriptor and rx_buffer_info after status DD */
status = rx_desc->status; status = rx_desc->status;
...@@ -4375,7 +4375,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, ...@@ -4375,7 +4375,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
if (*work_done >= work_to_do) if (*work_done >= work_to_do)
break; break;
(*work_done)++; (*work_done)++;
rmb(); /* read descriptor and rx_buffer_info after status DD */ dma_rmb(); /* read descriptor and rx_buffer_info after status DD */
status = rx_desc->status; status = rx_desc->status;
length = le16_to_cpu(rx_desc->length); length = le16_to_cpu(rx_desc->length);
......
...@@ -947,7 +947,7 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, ...@@ -947,7 +947,7 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
if (*work_done >= work_to_do) if (*work_done >= work_to_do)
break; break;
(*work_done)++; (*work_done)++;
rmb(); /* read descriptor and rx_buffer_info after status DD */ dma_rmb(); /* read descriptor and rx_buffer_info after status DD */
skb = buffer_info->skb; skb = buffer_info->skb;
buffer_info->skb = NULL; buffer_info->skb = NULL;
...@@ -1232,7 +1232,7 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring) ...@@ -1232,7 +1232,7 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring)
(count < tx_ring->count)) { (count < tx_ring->count)) {
bool cleaned = false; bool cleaned = false;
rmb(); /* read buffer_info after eop_desc */ dma_rmb(); /* read buffer_info after eop_desc */
for (; !cleaned; count++) { for (; !cleaned; count++) {
tx_desc = E1000_TX_DESC(*tx_ring, i); tx_desc = E1000_TX_DESC(*tx_ring, i);
buffer_info = &tx_ring->buffer_info[i]; buffer_info = &tx_ring->buffer_info[i];
...@@ -1332,7 +1332,7 @@ static bool e1000_clean_rx_irq_ps(struct e1000_ring *rx_ring, int *work_done, ...@@ -1332,7 +1332,7 @@ static bool e1000_clean_rx_irq_ps(struct e1000_ring *rx_ring, int *work_done,
break; break;
(*work_done)++; (*work_done)++;
skb = buffer_info->skb; skb = buffer_info->skb;
rmb(); /* read descriptor and rx_buffer_info after status DD */ dma_rmb(); /* read descriptor and rx_buffer_info after status DD */
/* in the packet split case this is header only */ /* in the packet split case this is header only */
prefetch(skb->data - NET_IP_ALIGN); prefetch(skb->data - NET_IP_ALIGN);
...@@ -1536,7 +1536,7 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done, ...@@ -1536,7 +1536,7 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done,
if (*work_done >= work_to_do) if (*work_done >= work_to_do)
break; break;
(*work_done)++; (*work_done)++;
rmb(); /* read descriptor and rx_buffer_info after status DD */ dma_rmb(); /* read descriptor and rx_buffer_info after status DD */
skb = buffer_info->skb; skb = buffer_info->skb;
buffer_info->skb = NULL; buffer_info->skb = NULL;
......
...@@ -2520,7 +2520,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring, ...@@ -2520,7 +2520,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring,
DBG_PRINT(INFO_DBG, "%s: Could not allocate skb\n", DBG_PRINT(INFO_DBG, "%s: Could not allocate skb\n",
ring->dev->name); ring->dev->name);
if (first_rxdp) { if (first_rxdp) {
wmb(); dma_wmb();
first_rxdp->Control_1 |= RXD_OWN_XENA; first_rxdp->Control_1 |= RXD_OWN_XENA;
} }
swstats->mem_alloc_fail_cnt++; swstats->mem_alloc_fail_cnt++;
...@@ -2634,7 +2634,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring, ...@@ -2634,7 +2634,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring,
rxdp->Control_2 |= SET_RXD_MARKER; rxdp->Control_2 |= SET_RXD_MARKER;
if (!(alloc_tab & ((1 << rxsync_frequency) - 1))) { if (!(alloc_tab & ((1 << rxsync_frequency) - 1))) {
if (first_rxdp) { if (first_rxdp) {
wmb(); dma_wmb();
first_rxdp->Control_1 |= RXD_OWN_XENA; first_rxdp->Control_1 |= RXD_OWN_XENA;
} }
first_rxdp = rxdp; first_rxdp = rxdp;
...@@ -2649,7 +2649,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring, ...@@ -2649,7 +2649,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring,
* and other fields are seen by adapter correctly. * and other fields are seen by adapter correctly.
*/ */
if (first_rxdp) { if (first_rxdp) {
wmb(); dma_wmb();
first_rxdp->Control_1 |= RXD_OWN_XENA; first_rxdp->Control_1 |= RXD_OWN_XENA;
} }
...@@ -6950,7 +6950,7 @@ static int rxd_owner_bit_reset(struct s2io_nic *sp) ...@@ -6950,7 +6950,7 @@ static int rxd_owner_bit_reset(struct s2io_nic *sp)
} }
set_rxd_buffer_size(sp, rxdp, size); set_rxd_buffer_size(sp, rxdp, size);
wmb(); dma_wmb();
/* flip the Ownership bit to Hardware */ /* flip the Ownership bit to Hardware */
rxdp->Control_1 |= RXD_OWN_XENA; rxdp->Control_1 |= RXD_OWN_XENA;
} }
......
...@@ -718,7 +718,7 @@ static __inline__ void gem_post_rxds(struct gem *gp, int limit) ...@@ -718,7 +718,7 @@ static __inline__ void gem_post_rxds(struct gem *gp, int limit)
cluster_start = curr = (gp->rx_new & ~(4 - 1)); cluster_start = curr = (gp->rx_new & ~(4 - 1));
count = 0; count = 0;
kick = -1; kick = -1;
wmb(); dma_wmb();
while (curr != limit) { while (curr != limit) {
curr = NEXT_RX(curr); curr = NEXT_RX(curr);
if (++count == 4) { if (++count == 4) {
...@@ -1038,7 +1038,7 @@ static netdev_tx_t gem_start_xmit(struct sk_buff *skb, ...@@ -1038,7 +1038,7 @@ static netdev_tx_t gem_start_xmit(struct sk_buff *skb,
if (gem_intme(entry)) if (gem_intme(entry))
ctrl |= TXDCTRL_INTME; ctrl |= TXDCTRL_INTME;
txd->buffer = cpu_to_le64(mapping); txd->buffer = cpu_to_le64(mapping);
wmb(); dma_wmb();
txd->control_word = cpu_to_le64(ctrl); txd->control_word = cpu_to_le64(ctrl);
entry = NEXT_TX(entry); entry = NEXT_TX(entry);
} else { } else {
...@@ -1076,7 +1076,7 @@ static netdev_tx_t gem_start_xmit(struct sk_buff *skb, ...@@ -1076,7 +1076,7 @@ static netdev_tx_t gem_start_xmit(struct sk_buff *skb,
txd = &gp->init_block->txd[entry]; txd = &gp->init_block->txd[entry];
txd->buffer = cpu_to_le64(mapping); txd->buffer = cpu_to_le64(mapping);
wmb(); dma_wmb();
txd->control_word = cpu_to_le64(this_ctrl | len); txd->control_word = cpu_to_le64(this_ctrl | len);
if (gem_intme(entry)) if (gem_intme(entry))
...@@ -1086,7 +1086,7 @@ static netdev_tx_t gem_start_xmit(struct sk_buff *skb, ...@@ -1086,7 +1086,7 @@ static netdev_tx_t gem_start_xmit(struct sk_buff *skb,
} }
txd = &gp->init_block->txd[first_entry]; txd = &gp->init_block->txd[first_entry];
txd->buffer = cpu_to_le64(first_mapping); txd->buffer = cpu_to_le64(first_mapping);
wmb(); dma_wmb();
txd->control_word = txd->control_word =
cpu_to_le64(ctrl | TXDCTRL_SOF | intme | first_len); cpu_to_le64(ctrl | TXDCTRL_SOF | intme | first_len);
} }
...@@ -1585,7 +1585,7 @@ static void gem_clean_rings(struct gem *gp) ...@@ -1585,7 +1585,7 @@ static void gem_clean_rings(struct gem *gp)
gp->rx_skbs[i] = NULL; gp->rx_skbs[i] = NULL;
} }
rxd->status_word = 0; rxd->status_word = 0;
wmb(); dma_wmb();
rxd->buffer = 0; rxd->buffer = 0;
} }
...@@ -1647,7 +1647,7 @@ static void gem_init_rings(struct gem *gp) ...@@ -1647,7 +1647,7 @@ static void gem_init_rings(struct gem *gp)
RX_BUF_ALLOC_SIZE(gp), RX_BUF_ALLOC_SIZE(gp),
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
rxd->buffer = cpu_to_le64(dma_addr); rxd->buffer = cpu_to_le64(dma_addr);
wmb(); dma_wmb();
rxd->status_word = cpu_to_le64(RXDCTRL_FRESH(gp)); rxd->status_word = cpu_to_le64(RXDCTRL_FRESH(gp));
skb_reserve(skb, RX_OFFSET); skb_reserve(skb, RX_OFFSET);
} }
...@@ -1656,7 +1656,7 @@ static void gem_init_rings(struct gem *gp) ...@@ -1656,7 +1656,7 @@ static void gem_init_rings(struct gem *gp)
struct gem_txd *txd = &gb->txd[i]; struct gem_txd *txd = &gb->txd[i];
txd->control_word = 0; txd->control_word = 0;
wmb(); dma_wmb();
txd->buffer = 0; txd->buffer = 0;
} }
wmb(); wmb();
......
...@@ -196,14 +196,14 @@ static u32 sbus_hme_read32(void __iomem *reg) ...@@ -196,14 +196,14 @@ static u32 sbus_hme_read32(void __iomem *reg)
static void sbus_hme_write_rxd(struct happy_meal_rxd *rxd, u32 flags, u32 addr) static void sbus_hme_write_rxd(struct happy_meal_rxd *rxd, u32 flags, u32 addr)
{ {
rxd->rx_addr = (__force hme32)addr; rxd->rx_addr = (__force hme32)addr;
wmb(); dma_wmb();
rxd->rx_flags = (__force hme32)flags; rxd->rx_flags = (__force hme32)flags;
} }
static void sbus_hme_write_txd(struct happy_meal_txd *txd, u32 flags, u32 addr) static void sbus_hme_write_txd(struct happy_meal_txd *txd, u32 flags, u32 addr)
{ {
txd->tx_addr = (__force hme32)addr; txd->tx_addr = (__force hme32)addr;
wmb(); dma_wmb();
txd->tx_flags = (__force hme32)flags; txd->tx_flags = (__force hme32)flags;
} }
...@@ -225,14 +225,14 @@ static u32 pci_hme_read32(void __iomem *reg) ...@@ -225,14 +225,14 @@ static u32 pci_hme_read32(void __iomem *reg)
static void pci_hme_write_rxd(struct happy_meal_rxd *rxd, u32 flags, u32 addr) static void pci_hme_write_rxd(struct happy_meal_rxd *rxd, u32 flags, u32 addr)
{ {
rxd->rx_addr = (__force hme32)cpu_to_le32(addr); rxd->rx_addr = (__force hme32)cpu_to_le32(addr);
wmb(); dma_wmb();
rxd->rx_flags = (__force hme32)cpu_to_le32(flags); rxd->rx_flags = (__force hme32)cpu_to_le32(flags);
} }
static void pci_hme_write_txd(struct happy_meal_txd *txd, u32 flags, u32 addr) static void pci_hme_write_txd(struct happy_meal_txd *txd, u32 flags, u32 addr)
{ {
txd->tx_addr = (__force hme32)cpu_to_le32(addr); txd->tx_addr = (__force hme32)cpu_to_le32(addr);
wmb(); dma_wmb();
txd->tx_flags = (__force hme32)cpu_to_le32(flags); txd->tx_flags = (__force hme32)cpu_to_le32(flags);
} }
...@@ -268,12 +268,12 @@ static u32 pci_hme_read_desc32(hme32 *p) ...@@ -268,12 +268,12 @@ static u32 pci_hme_read_desc32(hme32 *p)
sbus_readl(__reg) sbus_readl(__reg)
#define hme_write_rxd(__hp, __rxd, __flags, __addr) \ #define hme_write_rxd(__hp, __rxd, __flags, __addr) \
do { (__rxd)->rx_addr = (__force hme32)(u32)(__addr); \ do { (__rxd)->rx_addr = (__force hme32)(u32)(__addr); \
wmb(); \ dma_wmb(); \
(__rxd)->rx_flags = (__force hme32)(u32)(__flags); \ (__rxd)->rx_flags = (__force hme32)(u32)(__flags); \
} while(0) } while(0)
#define hme_write_txd(__hp, __txd, __flags, __addr) \ #define hme_write_txd(__hp, __txd, __flags, __addr) \
do { (__txd)->tx_addr = (__force hme32)(u32)(__addr); \ do { (__txd)->tx_addr = (__force hme32)(u32)(__addr); \
wmb(); \ dma_wmb(); \
(__txd)->tx_flags = (__force hme32)(u32)(__flags); \ (__txd)->tx_flags = (__force hme32)(u32)(__flags); \
} while(0) } while(0)
#define hme_read_desc32(__hp, __p) ((__force u32)(hme32)*(__p)) #define hme_read_desc32(__hp, __p) ((__force u32)(hme32)*(__p))
...@@ -293,12 +293,12 @@ do { (__txd)->tx_addr = (__force hme32)(u32)(__addr); \ ...@@ -293,12 +293,12 @@ do { (__txd)->tx_addr = (__force hme32)(u32)(__addr); \
readl(__reg) readl(__reg)
#define hme_write_rxd(__hp, __rxd, __flags, __addr) \ #define hme_write_rxd(__hp, __rxd, __flags, __addr) \
do { (__rxd)->rx_addr = (__force hme32)cpu_to_le32(__addr); \ do { (__rxd)->rx_addr = (__force hme32)cpu_to_le32(__addr); \
wmb(); \ dma_wmb(); \
(__rxd)->rx_flags = (__force hme32)cpu_to_le32(__flags); \ (__rxd)->rx_flags = (__force hme32)cpu_to_le32(__flags); \
} while(0) } while(0)
#define hme_write_txd(__hp, __txd, __flags, __addr) \ #define hme_write_txd(__hp, __txd, __flags, __addr) \
do { (__txd)->tx_addr = (__force hme32)cpu_to_le32(__addr); \ do { (__txd)->tx_addr = (__force hme32)cpu_to_le32(__addr); \
wmb(); \ dma_wmb(); \
(__txd)->tx_flags = (__force hme32)cpu_to_le32(__flags); \ (__txd)->tx_flags = (__force hme32)cpu_to_le32(__flags); \
} while(0) } while(0)
static inline u32 hme_read_desc32(struct happy_meal *hp, hme32 *p) static inline u32 hme_read_desc32(struct happy_meal *hp, hme32 *p)
......
...@@ -519,7 +519,7 @@ static int vnet_walk_rx_one(struct vnet_port *port, ...@@ -519,7 +519,7 @@ static int vnet_walk_rx_one(struct vnet_port *port,
if (desc->hdr.state != VIO_DESC_READY) if (desc->hdr.state != VIO_DESC_READY)
return 1; return 1;
rmb(); dma_rmb();
viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n", viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
desc->hdr.state, desc->hdr.ack, desc->hdr.state, desc->hdr.ack,
...@@ -1380,7 +1380,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1380,7 +1380,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* This has to be a non-SMP write barrier because we are writing /* This has to be a non-SMP write barrier because we are writing
* to memory which is shared with the peer LDOM. * to memory which is shared with the peer LDOM.
*/ */
wmb(); dma_wmb();
d->hdr.state = VIO_DESC_READY; d->hdr.state = VIO_DESC_READY;
...@@ -1395,7 +1395,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1395,7 +1395,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
* is marked READY, but start_cons was false. * is marked READY, but start_cons was false.
* If so, vnet_ack() should send out the missed "start" trigger. * If so, vnet_ack() should send out the missed "start" trigger.
* *
* Note that the wmb() above makes sure the cookies et al. are * Note that the dma_wmb() above makes sure the cookies et al. are
* not globally visible before the VIO_DESC_READY, and that the * not globally visible before the VIO_DESC_READY, and that the
* stores are ordered correctly by the compiler. The consumer will * stores are ordered correctly by the compiler. The consumer will
* not proceed until the VIO_DESC_READY is visible assuring that * not proceed until the VIO_DESC_READY is visible assuring that
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment