Commit 25733c4e authored by Brian Norris's avatar Brian Norris Committed by Kalle Valo

ath10k: pci: use mutex for diagnostic window CE polling

The DIAG copy engine is only used via polling, but it holds a spinlock
with softirqs disabled. Each iteration of our read/write loops can
theoretically take 20ms (two 10ms timeout loops), and this loop can be
run an unbounded number of times while holding the spinlock -- dependent
on the request size given by the caller.

As of commit 39501ea6 ("ath10k: download firmware via diag Copy
Engine for QCA6174 and QCA9377."), we transfer large chunks of firmware
memory using this mechanism. With large enough firmware segments, this
becomes an exceedingly long period for disabling soft IRQs. For example,
with a 500KiB firmware segment, in testing QCA6174A, I see 200 loop
iterations of about 50-100us each, which can total about 10-20ms.

In reality, we don't really need to block softirqs for this duration.
The DIAG CE is only used in polling mode, and we only need to hold
ce_lock to make sure any CE bookkeeping is done without screwing up
another CE. Otherwise, we only need to ensure exclusion between
ath10k_pci_diag_{read,write}_mem() contexts.

This patch moves to use fine-grained locking for the shared ce_lock,
while adding a new mutex just to ensure mutual exclusion of diag
read/write operations.

Tested on QCA6174A, firmware version WLAN.RM.4.4.1-00132-QCARMSWPZ-1.

Fixes: 39501ea6 ("ath10k: download firmware via diag Copy Engine for QCA6174 and QCA9377.")
Signed-off-by: default avatarBrian Norris <briannorris@chromium.org>
Signed-off-by: default avatarKalle Valo <kvalo@codeaurora.org>
parent c40e448e
...@@ -913,7 +913,6 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data, ...@@ -913,7 +913,6 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
int nbytes) int nbytes)
{ {
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
struct ath10k_ce *ce = ath10k_ce_priv(ar);
int ret = 0; int ret = 0;
u32 *buf; u32 *buf;
unsigned int completed_nbytes, alloc_nbytes, remaining_bytes; unsigned int completed_nbytes, alloc_nbytes, remaining_bytes;
...@@ -924,8 +923,7 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data, ...@@ -924,8 +923,7 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
void *data_buf = NULL; void *data_buf = NULL;
int i; int i;
spin_lock_bh(&ce->ce_lock); mutex_lock(&ar_pci->ce_diag_mutex);
ce_diag = ar_pci->ce_diag; ce_diag = ar_pci->ce_diag;
/* /*
...@@ -960,19 +958,17 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data, ...@@ -960,19 +958,17 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
nbytes = min_t(unsigned int, remaining_bytes, nbytes = min_t(unsigned int, remaining_bytes,
DIAG_TRANSFER_LIMIT); DIAG_TRANSFER_LIMIT);
ret = ce_diag->ops->ce_rx_post_buf(ce_diag, &ce_data, ce_data); ret = ath10k_ce_rx_post_buf(ce_diag, &ce_data, ce_data);
if (ret != 0) if (ret != 0)
goto done; goto done;
/* Request CE to send from Target(!) address to Host buffer */ /* Request CE to send from Target(!) address to Host buffer */
ret = ath10k_ce_send_nolock(ce_diag, NULL, (u32)address, nbytes, 0, ret = ath10k_ce_send(ce_diag, NULL, (u32)address, nbytes, 0, 0);
0);
if (ret) if (ret)
goto done; goto done;
i = 0; i = 0;
while (ath10k_ce_completed_send_next_nolock(ce_diag, while (ath10k_ce_completed_send_next(ce_diag, NULL) != 0) {
NULL) != 0) {
udelay(DIAG_ACCESS_CE_WAIT_US); udelay(DIAG_ACCESS_CE_WAIT_US);
i += DIAG_ACCESS_CE_WAIT_US; i += DIAG_ACCESS_CE_WAIT_US;
...@@ -983,10 +979,8 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data, ...@@ -983,10 +979,8 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
} }
i = 0; i = 0;
while (ath10k_ce_completed_recv_next_nolock(ce_diag, while (ath10k_ce_completed_recv_next(ce_diag, (void **)&buf,
(void **)&buf, &completed_nbytes) != 0) {
&completed_nbytes)
!= 0) {
udelay(DIAG_ACCESS_CE_WAIT_US); udelay(DIAG_ACCESS_CE_WAIT_US);
i += DIAG_ACCESS_CE_WAIT_US; i += DIAG_ACCESS_CE_WAIT_US;
...@@ -1019,7 +1013,7 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data, ...@@ -1019,7 +1013,7 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
dma_free_coherent(ar->dev, alloc_nbytes, data_buf, dma_free_coherent(ar->dev, alloc_nbytes, data_buf,
ce_data_base); ce_data_base);
spin_unlock_bh(&ce->ce_lock); mutex_unlock(&ar_pci->ce_diag_mutex);
return ret; return ret;
} }
...@@ -1067,7 +1061,6 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, ...@@ -1067,7 +1061,6 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address,
const void *data, int nbytes) const void *data, int nbytes)
{ {
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
struct ath10k_ce *ce = ath10k_ce_priv(ar);
int ret = 0; int ret = 0;
u32 *buf; u32 *buf;
unsigned int completed_nbytes, alloc_nbytes, remaining_bytes; unsigned int completed_nbytes, alloc_nbytes, remaining_bytes;
...@@ -1076,8 +1069,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, ...@@ -1076,8 +1069,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address,
dma_addr_t ce_data_base = 0; dma_addr_t ce_data_base = 0;
int i; int i;
spin_lock_bh(&ce->ce_lock); mutex_lock(&ar_pci->ce_diag_mutex);
ce_diag = ar_pci->ce_diag; ce_diag = ar_pci->ce_diag;
/* /*
...@@ -1118,7 +1110,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, ...@@ -1118,7 +1110,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address,
memcpy(data_buf, data, nbytes); memcpy(data_buf, data, nbytes);
/* Set up to receive directly into Target(!) address */ /* Set up to receive directly into Target(!) address */
ret = ce_diag->ops->ce_rx_post_buf(ce_diag, &address, address); ret = ath10k_ce_rx_post_buf(ce_diag, &address, address);
if (ret != 0) if (ret != 0)
goto done; goto done;
...@@ -1126,14 +1118,12 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, ...@@ -1126,14 +1118,12 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address,
* Request CE to send caller-supplied data that * Request CE to send caller-supplied data that
* was copied to bounce buffer to Target(!) address. * was copied to bounce buffer to Target(!) address.
*/ */
ret = ath10k_ce_send_nolock(ce_diag, NULL, ce_data_base, ret = ath10k_ce_send(ce_diag, NULL, ce_data_base, nbytes, 0, 0);
nbytes, 0, 0);
if (ret != 0) if (ret != 0)
goto done; goto done;
i = 0; i = 0;
while (ath10k_ce_completed_send_next_nolock(ce_diag, while (ath10k_ce_completed_send_next(ce_diag, NULL) != 0) {
NULL) != 0) {
udelay(DIAG_ACCESS_CE_WAIT_US); udelay(DIAG_ACCESS_CE_WAIT_US);
i += DIAG_ACCESS_CE_WAIT_US; i += DIAG_ACCESS_CE_WAIT_US;
...@@ -1144,10 +1134,8 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, ...@@ -1144,10 +1134,8 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address,
} }
i = 0; i = 0;
while (ath10k_ce_completed_recv_next_nolock(ce_diag, while (ath10k_ce_completed_recv_next(ce_diag, (void **)&buf,
(void **)&buf, &completed_nbytes) != 0) {
&completed_nbytes)
!= 0) {
udelay(DIAG_ACCESS_CE_WAIT_US); udelay(DIAG_ACCESS_CE_WAIT_US);
i += DIAG_ACCESS_CE_WAIT_US; i += DIAG_ACCESS_CE_WAIT_US;
...@@ -1182,7 +1170,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, ...@@ -1182,7 +1170,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address,
ath10k_warn(ar, "failed to write diag value at 0x%x: %d\n", ath10k_warn(ar, "failed to write diag value at 0x%x: %d\n",
address, ret); address, ret);
spin_unlock_bh(&ce->ce_lock); mutex_unlock(&ar_pci->ce_diag_mutex);
return ret; return ret;
} }
...@@ -3462,6 +3450,7 @@ int ath10k_pci_setup_resource(struct ath10k *ar) ...@@ -3462,6 +3450,7 @@ int ath10k_pci_setup_resource(struct ath10k *ar)
spin_lock_init(&ce->ce_lock); spin_lock_init(&ce->ce_lock);
spin_lock_init(&ar_pci->ps_lock); spin_lock_init(&ar_pci->ps_lock);
mutex_init(&ar_pci->ce_diag_mutex);
timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0); timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#define _PCI_H_ #define _PCI_H_
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/mutex.h>
#include "hw.h" #include "hw.h"
#include "ce.h" #include "ce.h"
...@@ -128,6 +129,8 @@ struct ath10k_pci { ...@@ -128,6 +129,8 @@ struct ath10k_pci {
/* Copy Engine used for Diagnostic Accesses */ /* Copy Engine used for Diagnostic Accesses */
struct ath10k_ce_pipe *ce_diag; struct ath10k_ce_pipe *ce_diag;
/* For protecting ce_diag */
struct mutex ce_diag_mutex;
struct ath10k_ce ce; struct ath10k_ce ce;
struct timer_list rx_post_retry; struct timer_list rx_post_retry;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment