Commit 77c842ca authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-dqs-optimize-if-stall-threshold-is-not-set'

Breno Leitao says:

====================
net: dqs: optimize if stall threshold is not set

Here are four patches aimed at enhancing the Dynamic Queue Limit (DQL)
subsystem within the networking stack.

The first two commits involve code refactoring, while the third patch
introduces the actual change. The fourth patch just improves the cache
locality.

Typically, when DQL is enabled, stall information is always populated
through dql_queue_stall(). However, this information is only necessary
if a stall threshold is set, which is stored in struct dql->stall_thrs.

Although dql_queue_stall() is relatively inexpensive, it is not entirely
free due to memory barriers and similar overheads.

To optimize performance, refrain from calling dql_queue_stall() when no
stall threshold is set, thus avoiding the processing of unnecessary
information.

v1: https://lore.kernel.org/all/20240404145939.3601097-1-leitao@debian.org/
====================

Link: https://lore.kernel.org/r/20240411192241.2498631-1-leitao@debian.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 444cde13 4ba67ef3
...@@ -50,6 +50,9 @@ struct dql { ...@@ -50,6 +50,9 @@ struct dql {
unsigned int adj_limit; /* limit + num_completed */ unsigned int adj_limit; /* limit + num_completed */
unsigned int last_obj_cnt; /* Count at last queuing */ unsigned int last_obj_cnt; /* Count at last queuing */
/* Stall threshold (in jiffies), defined by user */
unsigned short stall_thrs;
unsigned long history_head; /* top 58 bits of jiffies */ unsigned long history_head; /* top 58 bits of jiffies */
/* stall entries, a bit per entry */ /* stall entries, a bit per entry */
unsigned long history[DQL_HIST_LEN]; unsigned long history[DQL_HIST_LEN];
...@@ -71,8 +74,6 @@ struct dql { ...@@ -71,8 +74,6 @@ struct dql {
unsigned int min_limit; /* Minimum limit */ unsigned int min_limit; /* Minimum limit */
unsigned int slack_hold_time; /* Time to measure slack */ unsigned int slack_hold_time; /* Time to measure slack */
/* Stall threshold (in jiffies), defined by user */
unsigned short stall_thrs;
/* Longest stall detected, reported to user */ /* Longest stall detected, reported to user */
unsigned short stall_max; unsigned short stall_max;
unsigned long last_reap; /* Last reap (in jiffies) */ unsigned long last_reap; /* Last reap (in jiffies) */
...@@ -83,27 +84,11 @@ struct dql { ...@@ -83,27 +84,11 @@ struct dql {
#define DQL_MAX_OBJECT (UINT_MAX / 16) #define DQL_MAX_OBJECT (UINT_MAX / 16)
#define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT) #define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT)
/* /* Populate the bitmap to be processed later in dql_check_stall() */
* Record number of objects queued. Assumes that caller has already checked static inline void dql_queue_stall(struct dql *dql)
* availability in the queue with dql_avail.
*/
static inline void dql_queued(struct dql *dql, unsigned int count)
{ {
unsigned long map, now, now_hi, i; unsigned long map, now, now_hi, i;
BUG_ON(count > DQL_MAX_OBJECT);
dql->last_obj_cnt = count;
/* We want to force a write first, so that cpu do not attempt
* to get cache line containing last_obj_cnt, num_queued, adj_limit
* in Shared state, but directly does a Request For Ownership
* It is only a hint, we use barrier() only.
*/
barrier();
dql->num_queued += count;
now = jiffies; now = jiffies;
now_hi = now / BITS_PER_LONG; now_hi = now / BITS_PER_LONG;
...@@ -133,6 +118,31 @@ static inline void dql_queued(struct dql *dql, unsigned int count) ...@@ -133,6 +118,31 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now));
} }
/*
* Record number of objects queued. Assumes that caller has already checked
* availability in the queue with dql_avail.
*/
static inline void dql_queued(struct dql *dql, unsigned int count)
{
if (WARN_ON_ONCE(count > DQL_MAX_OBJECT))
return;
dql->last_obj_cnt = count;
/* We want to force a write first, so that cpu do not attempt
* to get cache line containing last_obj_cnt, num_queued, adj_limit
* in Shared state, but directly does a Request For Ownership
* It is only a hint, we use barrier() only.
*/
barrier();
dql->num_queued += count;
/* Only populate stall information if the threshold is set */
if (READ_ONCE(dql->stall_thrs))
dql_queue_stall(dql);
}
/* Returns how many objects can be queued, < 0 indicates over limit. */ /* Returns how many objects can be queued, < 0 indicates over limit. */
static inline int dql_avail(const struct dql *dql) static inline int dql_avail(const struct dql *dql)
{ {
......
...@@ -15,12 +15,10 @@ ...@@ -15,12 +15,10 @@
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0) #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
static void dql_check_stall(struct dql *dql) static void dql_check_stall(struct dql *dql, unsigned short stall_thrs)
{ {
unsigned short stall_thrs;
unsigned long now; unsigned long now;
stall_thrs = READ_ONCE(dql->stall_thrs);
if (!stall_thrs) if (!stall_thrs)
return; return;
...@@ -86,9 +84,16 @@ void dql_completed(struct dql *dql, unsigned int count) ...@@ -86,9 +84,16 @@ void dql_completed(struct dql *dql, unsigned int count)
{ {
unsigned int inprogress, prev_inprogress, limit; unsigned int inprogress, prev_inprogress, limit;
unsigned int ovlimit, completed, num_queued; unsigned int ovlimit, completed, num_queued;
unsigned short stall_thrs;
bool all_prev_completed; bool all_prev_completed;
num_queued = READ_ONCE(dql->num_queued); num_queued = READ_ONCE(dql->num_queued);
/* Read stall_thrs in advance since it belongs to the same (first)
* cache line as ->num_queued. This way, dql_check_stall() does not
* need to touch the first cache line again later, reducing the window
* of possible false sharing.
*/
stall_thrs = READ_ONCE(dql->stall_thrs);
/* Can't complete more than what's in queue */ /* Can't complete more than what's in queue */
BUG_ON(count > num_queued - dql->num_completed); BUG_ON(count > num_queued - dql->num_completed);
...@@ -178,7 +183,7 @@ void dql_completed(struct dql *dql, unsigned int count) ...@@ -178,7 +183,7 @@ void dql_completed(struct dql *dql, unsigned int count)
dql->num_completed = completed; dql->num_completed = completed;
dql->prev_num_queued = num_queued; dql->prev_num_queued = num_queued;
dql_check_stall(dql); dql_check_stall(dql, stall_thrs);
} }
EXPORT_SYMBOL(dql_completed); EXPORT_SYMBOL(dql_completed);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment