Commit 95eabdd2 authored by Antoine Tenart's avatar Antoine Tenart Committed by Florian Westphal

netfilter: conntrack: fix the gc rescheduling delay

Commit 2cfadb76 ("netfilter: conntrack: revisit gc autotuning")
changed the eviction rescheduling to the use average expiry of scanned
entries (within 1-60s) by doing:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += expires;
      next_run /= 2;
  }

The issue is the above will make the average ('next_run' here) more
dependent on the last expiration values than the firsts (for sets > 2).
Depending on the expiration values used to compute the average, the
result can be quite different than what's expected. To fix this we can
do the following:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += (expires - next_run) / ++count;
  }

Fixes: 2cfadb76 ("netfilter: conntrack: revisit gc autotuning")
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: default avatarAntoine Tenart <atenart@kernel.org>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
parent c29b0682
...@@ -67,6 +67,7 @@ struct conntrack_gc_work { ...@@ -67,6 +67,7 @@ struct conntrack_gc_work {
struct delayed_work dwork; struct delayed_work dwork;
u32 next_bucket; u32 next_bucket;
u32 avg_timeout; u32 avg_timeout;
u32 count;
u32 start_time; u32 start_time;
bool exiting; bool exiting;
bool early_drop; bool early_drop;
...@@ -1466,6 +1467,7 @@ static void gc_worker(struct work_struct *work) ...@@ -1466,6 +1467,7 @@ static void gc_worker(struct work_struct *work)
unsigned int expired_count = 0; unsigned int expired_count = 0;
unsigned long next_run; unsigned long next_run;
s32 delta_time; s32 delta_time;
long count;
gc_work = container_of(work, struct conntrack_gc_work, dwork.work); gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
...@@ -1475,10 +1477,12 @@ static void gc_worker(struct work_struct *work) ...@@ -1475,10 +1477,12 @@ static void gc_worker(struct work_struct *work)
if (i == 0) { if (i == 0) {
gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT; gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
gc_work->count = 1;
gc_work->start_time = start_time; gc_work->start_time = start_time;
} }
next_run = gc_work->avg_timeout; next_run = gc_work->avg_timeout;
count = gc_work->count;
end_time = start_time + GC_SCAN_MAX_DURATION; end_time = start_time + GC_SCAN_MAX_DURATION;
...@@ -1498,8 +1502,8 @@ static void gc_worker(struct work_struct *work) ...@@ -1498,8 +1502,8 @@ static void gc_worker(struct work_struct *work)
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet; struct nf_conntrack_net *cnet;
unsigned long expires;
struct net *net; struct net *net;
long expires;
tmp = nf_ct_tuplehash_to_ctrack(h); tmp = nf_ct_tuplehash_to_ctrack(h);
...@@ -1513,6 +1517,7 @@ static void gc_worker(struct work_struct *work) ...@@ -1513,6 +1517,7 @@ static void gc_worker(struct work_struct *work)
gc_work->next_bucket = i; gc_work->next_bucket = i;
gc_work->avg_timeout = next_run; gc_work->avg_timeout = next_run;
gc_work->count = count;
delta_time = nfct_time_stamp - gc_work->start_time; delta_time = nfct_time_stamp - gc_work->start_time;
...@@ -1528,8 +1533,8 @@ static void gc_worker(struct work_struct *work) ...@@ -1528,8 +1533,8 @@ static void gc_worker(struct work_struct *work)
} }
expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP); expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
expires = (expires - (long)next_run) / ++count;
next_run += expires; next_run += expires;
next_run /= 2u;
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
continue; continue;
...@@ -1570,6 +1575,7 @@ static void gc_worker(struct work_struct *work) ...@@ -1570,6 +1575,7 @@ static void gc_worker(struct work_struct *work)
delta_time = nfct_time_stamp - end_time; delta_time = nfct_time_stamp - end_time;
if (delta_time > 0 && i < hashsz) { if (delta_time > 0 && i < hashsz) {
gc_work->avg_timeout = next_run; gc_work->avg_timeout = next_run;
gc_work->count = count;
gc_work->next_bucket = i; gc_work->next_bucket = i;
next_run = 0; next_run = 0;
goto early_exit; goto early_exit;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment