[PATCH] USB: EHCI qh update race fix

This makes the EHCI driver stop trying to update a live QH ... it's not like OHCI, that can't be done safely because of a hardware race. The fix: - Unlinks the QH before updating it; only the tail can safely be updated "live", not the queue head. The async schedule (all control/bulk QHs) and periodic schedule (interrupt QH) work a bit differently ... high bandwidth transfers will hiccup. - Moves "update QH" and "clear toggle" logic into one new qh_refresh() routine, used in several places. The race shows readily enough under load with the right hardware. The controller silicon might be relatively slow, or maybe it's the bus that's slow/busy: Host Controller --- ---------- reads two TD pointers update two TD pointers wmb() activate QH reads rest of QH Net result is that the HC treated old TD pointers as valid, and things started misbehaving. Busy controllers would misbehave worse; some systems wouldn't notice more than a slowdown, especially with light USB loads. This affects behavior in two cases. The uncommon one is when an endpoint gets an error and halts. The more common one happens when the controller runs off the end of its queue and overlays an inactive "dummy" TD into the QH ... something the spec says shouldn't happen, but which more silicon seems to be doing. (Presumably to reduce DMA chatter.) Signed-off-by: David Brownell <dbrownell@users.sourceforge.net> Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>

[PATCH] USB: EHCI qh update race fix
This makes the EHCI driver stop trying to update a live QH ... it's not like OHCI, that can't be done safely because of a hardware race. The fix: - Unlinks the QH before updating it; only the tail can safely be updated "live", not the queue head. The async schedule (all control/bulk QHs) and periodic schedule (interrupt QH) work a bit differently ... high bandwidth transfers will hiccup. - Moves "update QH" and "clear toggle" logic into one new qh_refresh() routine, used in several places. The race shows readily enough under load with the right hardware. The controller silicon might be relatively slow, or maybe it's the bus that's slow/busy: Host Controller --- ---------- reads two TD pointers update two TD pointers wmb() activate QH reads rest of QH Net result is that the HC treated old TD pointers as valid, and things started misbehaving. Busy controllers would misbehave worse; some systems wouldn't notice more than a slowdown, especially with light USB loads. This affects behavior in two cases. The uncommon one is when an endpoint gets an error and halts. The more common one happens when the controller runs off the end of its queue and overlays an inactive "dummy" TD into the QH ... something the spec says shouldn't happen, but which more silicon seems to be doing. (Presumably to reduce DMA chatter.) Signed-off-by: David Brownell <dbrownell@users.sourceforge.net> Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
6679699c · David Brownell · Linus Torvalds · 114c173d · 6679699c · 6679699c
Commit 6679699c authored Dec 08, 2004 by David Brownell Committed by Linus Torvalds Dec 08, 2004
Show whitespace changes
Inline Side-by-side

Showing with 78 additions and 60 deletions

drivers/usb/host/ehci-q.c drivers/usb/host/ehci-q.c +75 -58

drivers/usb/host/ehci-sched.c drivers/usb/host/ehci-sched.c +3 -2

No files found.
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -83,19 +83,59 @@ qtd_fill (struct ehci_qtd *qtd, dma_addr_t buf, size_t len,

 /*-------------------------------------------------------------------------*/

-/* update halted (but potentially linked) qh */
-
 static inline void
 qh_update (struct ehci_hcd *ehci, struct ehci_qh *qh, struct ehci_qtd *qtd)
 {
+	/* writes to an active overlay are unsafe */
+	BUG_ON(qh->qh_state != QH_STATE_IDLE);
+
 	qh->hw_qtd_next = QTD_NEXT (qtd->qtd_dma);
 	qh->hw_alt_next = EHCI_LIST_END;

+	/* Except for control endpoints, we make hardware maintain data
+	 * toggle (like OHCI) ... here (re)initialize the toggle in the QH,
+	 * and set the pseudo-toggle in udev. Only usb_clear_halt() will
+	 * ever clear it.
+	 */
+	if (!(qh->hw_info1 & cpu_to_le32(1 << 14))) {
+		unsigned	is_out, epnum;
+
+		is_out = !(qtd->hw_token & cpu_to_le32(1 << 8));
+		epnum = (le32_to_cpup(&qh->hw_info1) >> 8) & 0x0f;
+		if (unlikely (!usb_gettoggle (qh->dev, epnum, is_out))) {
+			qh->hw_token &= ~__constant_cpu_to_le32 (QTD_TOGGLE);
+			usb_settoggle (qh->dev, epnum, is_out, 1);
+		}
+	}
+
 	/* HC must see latest qtd and qh data before we clear ACTIVE+HALT */
 	wmb ();
 	qh->hw_token &= __constant_cpu_to_le32 (QTD_TOGGLE | QTD_STS_PING);
 }

+/* if it weren't for a common silicon quirk (writing the dummy into the qh
+ * overlay, so qh->hw_token wrongly becomes inactive/halted), only fault
+ * recovery (including urb dequeue) would need software changes to a QH...
+ */
+static void
+qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh)
+{
+	struct ehci_qtd *qtd;
+
+	if (list_empty (&qh->qtd_list))
+		qtd = qh->dummy;
+	else {
+		qtd = list_entry (qh->qtd_list.next,
+				struct ehci_qtd, qtd_list);
+		/* first qtd may already be partially processed */
+		if (cpu_to_le32 (qtd->qtd_dma) == qh->hw_current)
+			qtd = NULL;
+	}
+
+	if (qtd)
+		qh_update (ehci, qh, qtd);
+}
+
 /*-------------------------------------------------------------------------*/

 static void qtd_copy_status (
@@ -226,6 +266,11 @@ __acquires(ehci->lock)
 	spin_lock (&ehci->lock);
 }

+static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
+
+static void intr_deschedule (struct ehci_hcd *ehci,
+				struct ehci_qh *qh, int wait);
+static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh);

 /*
 * Process and free completed qtds for a qh, returning URBs to drivers.
@@ -369,21 +414,27 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh, struct pt_regs *regs)
 	/* restore original state; caller must unlink or relink */
 	qh->qh_state = state;

-	/* update qh after fault cleanup */
-	if (unlikely (stopped != 0)
-			/* some EHCI 0.95 impls will overlay dummy qtds */ 
-			|| qh->hw_qtd_next == EHCI_LIST_END) {
-		if (list_empty (&qh->qtd_list))
-			end = qh->dummy;
-		else {
-			end = list_entry (qh->qtd_list.next,
-					struct ehci_qtd, qtd_list);
-			/* first qtd may already be partially processed */
-			if (cpu_to_le32 (end->qtd_dma) == qh->hw_current)
-				end = NULL;
+	/* be sure the hardware's done with the qh before refreshing
+	 * it after fault cleanup, or recovering from silicon wrongly
+	 * overlaying the dummy qtd (which reduces DMA chatter).
+	 */
+	if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END) {
+		switch (state) {
+		case QH_STATE_IDLE:
+			qh_refresh(ehci, qh);
+			break;
+		case QH_STATE_LINKED:
+			/* should be rare for periodic transfers,
+			 * except maybe high bandwidth ...
+			 */
+			if (qh->period) {
+				intr_deschedule (ehci, qh, 1);
+				(void) qh_schedule (ehci, qh);
+			} else
+				start_unlink_async (ehci, qh);
+			break;
+		/* otherwise, unlink already started */
 		}
-		if (end)
-			qh_update (ehci, qh, end);
 	}

 	return count;
@@ -557,21 +608,6 @@ qh_urb_transaction (

 /*-------------------------------------------------------------------------*/

-/*
- * Hardware maintains data toggle (like OHCI) ... here we (re)initialize
- * the hardware data toggle in the QH, and set the pseudo-toggle in udev
- * so we can see if usb_clear_halt() was called.  NOP for control, since
- * we set up qh->hw_info1 to always use the QTD toggle bits. 
- */
-static inline void
-clear_toggle (struct usb_device *udev, int ep, int is_out, struct ehci_qh *qh)
-{
-	vdbg ("clear toggle, dev %d ep 0x%x-%s",
-		udev->devnum, ep, is_out ? "out" : "in");
-	qh->hw_token &= ~__constant_cpu_to_le32 (QTD_TOGGLE);
-	usb_settoggle (udev, ep, is_out, 1);
-}
-
 // Would be best to create all qh's from config descriptors,
 // when each interface/altsetting is established.  Unlink
 // any previous qh and cancel its urbs first; endpoints are
@@ -651,10 +687,10 @@ qh_make (

 			qh->period = urb->interval;
 		}
+	}

-		/* support for tt scheduling */
+	/* support for tt scheduling, and access to toggles */
 	qh->dev = usb_get_dev (urb->dev);
-	}

 	/* using TT? */
 	switch (urb->dev->speed) {
@@ -715,8 +751,8 @@ qh_make (
 	qh->qh_state = QH_STATE_IDLE;
 	qh->hw_info1 = cpu_to_le32 (info1);
 	qh->hw_info2 = cpu_to_le32 (info2);
-	qh_update (ehci, qh, qh->dummy);
 	usb_settoggle (urb->dev, usb_pipeendpoint (urb->pipe), !is_input, 1);
+	qh_refresh (ehci, qh);
 	return qh;
 }

@@ -745,7 +781,9 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 		}
 	}

-	qh->hw_token &= ~HALT_BIT;
+	/* clear halt and/or toggle; and maybe recover from silicon quirk */
+	if (qh->qh_state == QH_STATE_IDLE)
+		qh_refresh (ehci, qh);

 	/* splice right after start */
 	qh->qh_next = head->qh_next;
@@ -820,27 +858,8 @@ static struct ehci_qh *qh_append_tds (
                                qh->hw_info1 &= ~QH_ADDR_MASK;
 		}

-		/* usb_clear_halt() means qh data toggle gets reset */
-		if (unlikely (!usb_gettoggle (urb->dev,
-					(epnum & 0x0f), !(epnum & 0x10)))
-				&& !usb_pipecontrol (urb->pipe)) {
-			/* "never happens": drivers do stall cleanup right */
-			if (qh->qh_state != QH_STATE_IDLE
-					&& !list_empty (&qh->qtd_list)
-					&& qh->qh_state != QH_STATE_COMPLETING)
-				ehci_warn (ehci, "clear toggle dev%d "
-						"ep%d%s: not idle\n",
-						usb_pipedevice (urb->pipe),
-						epnum & 0x0f,
-						usb_pipein (urb->pipe)
-							? "in" : "out");
-			/* else we know this overlay write is safe */
-			clear_toggle (urb->dev,
-				epnum & 0x0f, !(epnum & 0x10), qh);
-		}
-
 		/* just one way to queue requests: swap with the dummy qtd.
-		 * only hc or qh_completions() usually modify the overlay.
+		 * only hc or qh_refresh() ever modify the overlay.
 		 */
 		if (likely (qtd != 0)) {
 			struct ehci_qtd		*dummy;
@@ -936,8 +955,6 @@ submit_async (

 /* the async qh for the qtds being reclaimed are now unlinked from the HC */

-static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
-
 static void end_unlink_async (struct ehci_hcd *ehci, struct pt_regs *regs)
 {
 	struct ehci_qh		*qh = ehci->reclaim;

--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -325,7 +325,7 @@ static void intr_deschedule (
 		status = disable_periodic (ehci);
 	else {
 		status = 0;
-		vdbg ("periodic schedule still enabled");
+		ehci_vdbg (ehci, "periodic schedule still enabled\n");
 	}

 	/*
@@ -342,7 +342,7 @@ static void intr_deschedule (
 			 * the race is very short.  then if qh also isn't
 			 * rescheduled soon, it won't matter.  otherwise...
 			 */
-			vdbg ("intr_deschedule...");
+			ehci_vdbg (ehci, "intr_deschedule...\n");
 		}
 	} else
 		qh->hw_next = EHCI_LIST_END;
@@ -450,6 +450,7 @@ static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	__le32		c_mask;
 	unsigned	frame;		/* 0..(qh->period - 1), or NO_FRAME */

+	qh_refresh(ehci, qh);
 	qh->hw_next = EHCI_LIST_END;
 	frame = qh->start;