Commit 074cc476 authored by Dan Williams's avatar Dan Williams

ioat2,3: convert to producer/consumer locking

Use separate locks for the descriptor prep (producer) and descriptor
cleanup (consumer) paths.  Allows the producer path to run concurrently
with the cleanup path.  Inspired by Documentation/circular-buffer.txt.

Cc: David Howells <dhowells@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent abb12dfd
......@@ -96,6 +96,7 @@ struct ioat_chan_common {
#define IOAT_COMPLETION_ACK 1
#define IOAT_RESET_PENDING 2
#define IOAT_KOBJ_INIT_FAIL 3
#define IOAT_RESHAPE_PENDING 4
struct timer_list timer;
#define COMPLETION_TIMEOUT msecs_to_jiffies(100)
#define IDLE_TIMEOUT msecs_to_jiffies(2000)
......
This diff is collapsed.
......@@ -50,8 +50,9 @@ extern int ioat_ring_alloc_order;
* @tail: cleanup index
* @dmacount: identical to 'head' except for occasionally resetting to zero
* @alloc_order: log2 of the number of allocated descriptors
* @produce: number of descriptors to produce at submit time
* @ring: software ring buffer implementation of hardware ring
* @ring_lock: protects ring attributes
* @prep_lock: serializes descriptor preparation (producers)
*/
struct ioat2_dma_chan {
struct ioat_chan_common base;
......@@ -61,8 +62,9 @@ struct ioat2_dma_chan {
u16 tail;
u16 dmacount;
u16 alloc_order;
u16 produce;
struct ioat_ring_ent **ring;
spinlock_t ring_lock;
spinlock_t prep_lock;
};
static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
......@@ -94,13 +96,6 @@ static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
return ioat2_ring_size(ioat) - ioat2_ring_active(ioat);
}
/* assumes caller already checked space */
static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
{
ioat->head += len;
return ioat->head - len;
}
static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
{
u16 num_descs = len >> ioat->xfercap_log;
......@@ -164,7 +159,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
int ioat2_enumerate_channels(struct ioatdma_device *device);
struct dma_async_tx_descriptor *
ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
......
......@@ -260,8 +260,8 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
struct ioat_chan_common *chan = &ioat->base;
struct ioat_ring_ent *desc;
bool seen_current = false;
int idx = ioat->tail, i;
u16 active;
int i;
dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
__func__, ioat->head, ioat->tail, ioat->issued);
......@@ -270,13 +270,14 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
for (i = 0; i < active && !seen_current; i++) {
struct dma_async_tx_descriptor *tx;
prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
smp_read_barrier_depends();
prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
desc = ioat2_get_ring_ent(ioat, idx + i);
dump_desc_dbg(ioat, desc);
tx = &desc->txd;
if (tx->cookie) {
chan->completed_cookie = tx->cookie;
ioat3_dma_unmap(ioat, desc, ioat->tail + i);
ioat3_dma_unmap(ioat, desc, idx + i);
tx->cookie = 0;
if (tx->callback) {
tx->callback(tx->callback_param);
......@@ -293,69 +294,30 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
i++;
}
}
ioat->tail += i;
smp_mb(); /* finish all descriptor reads before incrementing tail */
ioat->tail = idx + i;
BUG_ON(active && !seen_current); /* no active descs have written a completion? */
chan->last_completion = phys_complete;
active = ioat2_ring_active(ioat);
if (active == 0) {
if (active - i == 0) {
dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
__func__);
clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
}
/* 5 microsecond delay per pending descriptor */
writew(min((5 * active), IOAT_INTRDELAY_MASK),
writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
}
/* try to cleanup, but yield (via spin_trylock) to incoming submissions
* with the expectation that we will immediately poll again shortly
*/
static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat)
static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
unsigned long phys_complete;
prefetch(chan->completion);
if (!spin_trylock_bh(&chan->cleanup_lock))
return;
if (!ioat_cleanup_preamble(chan, &phys_complete)) {
spin_unlock_bh(&chan->cleanup_lock);
return;
}
if (!spin_trylock_bh(&ioat->ring_lock)) {
spin_unlock_bh(&chan->cleanup_lock);
return;
}
__cleanup(ioat, phys_complete);
spin_unlock_bh(&ioat->ring_lock);
spin_unlock_bh(&chan->cleanup_lock);
}
/* run cleanup now because we already delayed the interrupt via INTRDELAY */
static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
unsigned long phys_complete;
prefetch(chan->completion);
spin_lock_bh(&chan->cleanup_lock);
if (!ioat_cleanup_preamble(chan, &phys_complete)) {
spin_unlock_bh(&chan->cleanup_lock);
return;
}
spin_lock_bh(&ioat->ring_lock);
__cleanup(ioat, phys_complete);
spin_unlock_bh(&ioat->ring_lock);
if (ioat_cleanup_preamble(chan, &phys_complete))
__cleanup(ioat, phys_complete);
spin_unlock_bh(&chan->cleanup_lock);
}
......@@ -363,7 +325,7 @@ static void ioat3_cleanup_event(unsigned long data)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
ioat3_cleanup_sync(ioat);
ioat3_cleanup(ioat);
writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
}
......@@ -384,12 +346,10 @@ static void ioat3_timer_event(unsigned long data)
struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
struct ioat_chan_common *chan = &ioat->base;
spin_lock_bh(&chan->cleanup_lock);
if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
unsigned long phys_complete;
u64 status;
spin_lock_bh(&ioat->ring_lock);
status = ioat_chansts(chan);
/* when halted due to errors check for channel
......@@ -408,26 +368,31 @@ static void ioat3_timer_event(unsigned long data)
* acknowledged a pending completion once, then be more
* forceful with a restart
*/
spin_lock_bh(&chan->cleanup_lock);
if (ioat_cleanup_preamble(chan, &phys_complete))
__cleanup(ioat, phys_complete);
else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
spin_lock_bh(&ioat->prep_lock);
ioat3_restart_channel(ioat);
else {
spin_unlock_bh(&ioat->prep_lock);
} else {
set_bit(IOAT_COMPLETION_ACK, &chan->state);
mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
}
spin_unlock_bh(&ioat->ring_lock);
spin_unlock_bh(&chan->cleanup_lock);
} else {
u16 active;
/* if the ring is idle, empty, and oversized try to step
* down the size
*/
spin_lock_bh(&ioat->ring_lock);
spin_lock_bh(&chan->cleanup_lock);
spin_lock_bh(&ioat->prep_lock);
active = ioat2_ring_active(ioat);
if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
reshape_ring(ioat, ioat->alloc_order-1);
spin_unlock_bh(&ioat->ring_lock);
spin_unlock_bh(&ioat->prep_lock);
spin_unlock_bh(&chan->cleanup_lock);
/* keep shrinking until we get back to our minimum
* default size
......@@ -435,7 +400,6 @@ static void ioat3_timer_event(unsigned long data)
if (ioat->alloc_order > ioat_get_alloc_order())
mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
}
spin_unlock_bh(&chan->cleanup_lock);
}
static enum dma_status
......@@ -447,7 +411,7 @@ ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
return DMA_SUCCESS;
ioat3_cleanup_poll(ioat);
ioat3_cleanup(ioat);
return ioat_is_complete(c, cookie, done, used);
}
......@@ -460,15 +424,12 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
struct ioat_ring_ent *desc;
size_t total_len = len;
struct ioat_fill_descriptor *fill;
int num_descs;
u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
u16 idx;
int i;
int num_descs, idx, i;
num_descs = ioat2_xferlen_to_descs(ioat, len);
if (likely(num_descs) &&
ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
/* pass */;
if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
idx = ioat->head;
else
return NULL;
i = 0;
......@@ -513,11 +474,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
struct ioat_xor_descriptor *xor;
struct ioat_xor_ext_descriptor *xor_ex = NULL;
struct ioat_dma_descriptor *hw;
int num_descs, with_ext, idx, i;
u32 offset = 0;
int num_descs;
int with_ext;
int i;
u16 idx;
u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
BUG_ON(src_cnt < 2);
......@@ -537,9 +495,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
* (legacy) descriptor to ensure all completion writes arrive in
* order.
*/
if (likely(num_descs) &&
ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
/* pass */;
if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
idx = ioat->head;
else
return NULL;
i = 0;
......@@ -657,11 +614,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
struct ioat_pq_ext_descriptor *pq_ex = NULL;
struct ioat_dma_descriptor *hw;
u32 offset = 0;
int num_descs;
int with_ext;
int i, s;
u16 idx;
u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
int i, s, idx, with_ext, num_descs;
dev_dbg(to_dev(chan), "%s\n", __func__);
/* the engine requires at least two sources (we provide
......@@ -687,8 +641,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
* order.
*/
if (likely(num_descs) &&
ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
/* pass */;
ioat2_check_space_lock(ioat, num_descs+1) == 0)
idx = ioat->head;
else
return NULL;
i = 0;
......@@ -851,10 +805,9 @@ ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
struct ioat_ring_ent *desc;
struct ioat_dma_descriptor *hw;
u16 idx;
if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
desc = ioat2_get_ring_ent(ioat, idx);
if (ioat2_check_space_lock(ioat, 1) == 0)
desc = ioat2_get_ring_ent(ioat, ioat->head);
else
return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment