sch_cake: Make the dual modes fairer

CAKE host fairness does not work well with TCP flows in dual-srchost and dual-dsthost setup. The reason is that ACKs generated by TCP flows are classified as sparse flows, and affect flow isolation from other hosts. Fix this by calculating host_load based only on the bulk flows a host generates. In a hash collision the host_bulk_flow_count values must be decremented on the old hosts and incremented on the new ones *if* the queue is in the bulk set. Reported-by: Pete Heist <peteheist@gmail.com> Signed-off-by: George Amanakis <gamanakis@gmail.com> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>

sch_cake: Make the dual modes fairer
CAKE host fairness does not work well with TCP flows in dual-srchost and dual-dsthost setup. The reason is that ACKs generated by TCP flows are classified as sparse flows, and affect flow isolation from other hosts. Fix this by calculating host_load based only on the bulk flows a host generates. In a hash collision the host_bulk_flow_count values must be decremented on the old hosts and incremented on the new ones *if* the queue is in the bulk set. Reported-by: Pete Heist <peteheist@gmail.com> Signed-off-by: George Amanakis <gamanakis@gmail.com> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
71263992 · George Amanakis · David S. Miller · c21e18a5 · 71263992
Commit 71263992 authored Mar 01, 2019 by George Amanakis Committed by David S. Miller Mar 03, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 63 additions and 29 deletions

net/sched/sch_cake.c net/sched/sch_cake.c +63 -29

No files found.
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -138,8 +138,8 @@ struct cake_flow {
 struct cake_host {
 	u32 srchost_tag;
 	u32 dsthost_tag;
-	u16 srchost_refcnt;
+	u16 srchost_bulk_flow_count;
-	u16 dsthost_refcnt;
+	u16 dsthost_bulk_flow_count;
 };
 struct cake_heap_entry {
@@ -746,8 +746,10 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		 * queue, accept the collision, update the host tags.
 		 */
 		q->way_collisions++;
-		q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
+		if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
-		q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+			q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+			q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+		}
 		allocate_src = cake_dsrc(flow_mode);
 		allocate_dst = cake_ddst(flow_mode);
 found:
@@ -767,13 +769,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 			}
 			for (i = 0; i < CAKE_SET_WAYS;
 				i++, k = (k + 1) % CAKE_SET_WAYS) {
-				if (!q->hosts[outer_hash + k].srchost_refcnt)
+				if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
 					break;
 			}
 			q->hosts[outer_hash + k].srchost_tag = srchost_hash;
 found_src:
 			srchost_idx = outer_hash + k;
-			q->hosts[srchost_idx].srchost_refcnt++;
+			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+				q->hosts[srchost_idx].srchost_bulk_flow_count++;
 			q->flows[reduced_hash].srchost = srchost_idx;
 		}
@@ -789,13 +792,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 			}
 			for (i = 0; i < CAKE_SET_WAYS;
 			     i++, k = (k + 1) % CAKE_SET_WAYS) {
-				if (!q->hosts[outer_hash + k].dsthost_refcnt)
+				if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
 					break;
 			}
 			q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
 found_dst:
 			dsthost_idx = outer_hash + k;
-			q->hosts[dsthost_idx].dsthost_refcnt++;
+			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+				q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
 			q->flows[reduced_hash].dsthost = dsthost_idx;
 		}
 	}
@@ -1794,20 +1798,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		b->sparse_flow_count++;
 		if (cake_dsrc(q->flow_mode))
-			host_load = max(host_load, srchost->srchost_refcnt);
+			host_load = max(host_load, srchost->srchost_bulk_flow_count);
 		if (cake_ddst(q->flow_mode))
-			host_load = max(host_load, dsthost->dsthost_refcnt);
+			host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
 		flow->deficit = (b->flow_quantum *
 				 quantum_div[host_load]) >> 16;
 	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+		struct cake_host *srchost = &b->hosts[flow->srchost];
+		struct cake_host *dsthost = &b->hosts[flow->dsthost];
 		/* this flow was empty, accounted as a sparse flow, but actually
 		 * in the bulk rotation.
 		 */
 		flow->set = CAKE_SET_BULK;
 		b->sparse_flow_count--;
 		b->bulk_flow_count++;
+		if (cake_dsrc(q->flow_mode))
+			srchost->srchost_bulk_flow_count++;
+		if (cake_ddst(q->flow_mode))
+			dsthost->dsthost_bulk_flow_count++;
 	}
 	if (q->buffer_used > q->buffer_max_used)
@@ -1975,23 +1989,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 	dsthost = &b->hosts[flow->dsthost];
 	host_load = 1;
-	if (cake_dsrc(q->flow_mode))
-		host_load = max(host_load, srchost->srchost_refcnt);
-	if (cake_ddst(q->flow_mode))
-		host_load = max(host_load, dsthost->dsthost_refcnt);
-	WARN_ON(host_load > CAKE_QUEUES);
 	/* flow isolation (DRR++) */
 	if (flow->deficit <= 0) {
-		/* The shifted prandom_u32() is a way to apply dithering to
-		 * avoid accumulating roundoff errors
-		 */
-		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
-				  (prandom_u32() >> 16)) >> 16;
-		list_move_tail(&flow->flowchain, &b->old_flows);
 		/* Keep all flows with deficits out of the sparse and decaying
 		 * rotations.  No non-empty flow can go into the decaying
 		 * rotation, so they can't get deficits
@@ -2000,6 +1999,13 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 			if (flow->head) {
 				b->sparse_flow_count--;
 				b->bulk_flow_count++;
+				if (cake_dsrc(q->flow_mode))
+					srchost->srchost_bulk_flow_count++;
+				if (cake_ddst(q->flow_mode))
+					dsthost->dsthost_bulk_flow_count++;
 				flow->set = CAKE_SET_BULK;
 			} else {
 				/* we've moved it to the bulk rotation for
@@ -2009,6 +2015,22 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 				flow->set = CAKE_SET_SPARSE_WAIT;
 			}
 		}
+		if (cake_dsrc(q->flow_mode))
+			host_load = max(host_load, srchost->srchost_bulk_flow_count);
+		if (cake_ddst(q->flow_mode))
+			host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
+		WARN_ON(host_load > CAKE_QUEUES);
+		/* The shifted prandom_u32() is a way to apply dithering to
+		 * avoid accumulating roundoff errors
+		 */
+		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+				  (prandom_u32() >> 16)) >> 16;
+		list_move_tail(&flow->flowchain, &b->old_flows);
 		goto retry;
 	}
@@ -2029,6 +2051,13 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 					       &b->decaying_flows);
 				if (flow->set == CAKE_SET_BULK) {
 					b->bulk_flow_count--;
+					if (cake_dsrc(q->flow_mode))
+						srchost->srchost_bulk_flow_count--;
+					if (cake_ddst(q->flow_mode))
+						dsthost->dsthost_bulk_flow_count--;
 					b->decaying_flow_count++;
 				} else if (flow->set == CAKE_SET_SPARSE ||
 					   flow->set == CAKE_SET_SPARSE_WAIT) {
@@ -2042,14 +2071,19 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 				if (flow->set == CAKE_SET_SPARSE ||
 				    flow->set == CAKE_SET_SPARSE_WAIT)
 					b->sparse_flow_count--;
-				else if (flow->set == CAKE_SET_BULK)
+				else if (flow->set == CAKE_SET_BULK) {
 					b->bulk_flow_count--;
-				else
+					if (cake_dsrc(q->flow_mode))
+						srchost->srchost_bulk_flow_count--;
+					if (cake_ddst(q->flow_mode))
+						dsthost->dsthost_bulk_flow_count--;
+				} else
 					b->decaying_flow_count--;
 				flow->set = CAKE_SET_NONE;
-				srchost->srchost_refcnt--;
-				dsthost->dsthost_refcnt--;
 			}
 			goto begin;
 		}