Commit 5f56f409 authored by Wei Wang's avatar Wei Wang Committed by David S. Miller

net: introduce DST_NOGC in dst_release() to destroy dst based on refcnt

The current mechanism of freeing dst is a bit complicated. dst has its
ref count and when user grabs the reference to the dst, the ref count is
properly taken in most cases except in IPv4/IPv6/decnet/xfrm routing
code due to some historic reasons.

If the reference to dst is always taken properly, we should be able to
simplify the logic in dst_release() to destroy dst when dst->__refcnt
drops from 1 to 0. And this should be the only condition to determine
if we can call dst_destroy().
And as dst is always ref counted, there is no need for a dst garbage
list to hold the dst entries that already get removed by the routing
code but are still held by other users. And the task to periodically
check the list to free dst if ref count become 0 is also not needed
anymore.

This patch introduces a temporary flag DST_NOGC(no garbage collector).
If it is set in the dst, dst_release() will call dst_destroy() when
dst->__refcnt drops to 0. dst_hold_safe() will also check for this flag
and do atomic_inc_not_zero() similar as DST_NOCACHE to avoid double free
issue.
This temporary flag is mainly used so that we can make the transition
component by component without breaking other parts.
This flag will be removed after all components are properly transitioned.

This patch also introduces a new function dst_release_immediate() which
destroys dst without waiting on the rcu when refcnt drops to 0. It will
be used in later patches.

Follow-up patches will correct all the places to properly take ref count
on dst and mark DST_NOGC. dst_release() or dst_release_immediate() will
be used to release the dst instead of dst_free() and its related
functions.
And final clean-up patch will remove the DST_NOGC flag.
Signed-off-by: default avatarWei Wang <weiwan@google.com>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1dbe3252
...@@ -58,6 +58,7 @@ struct dst_entry { ...@@ -58,6 +58,7 @@ struct dst_entry {
#define DST_XFRM_TUNNEL 0x0080 #define DST_XFRM_TUNNEL 0x0080
#define DST_XFRM_QUEUE 0x0100 #define DST_XFRM_QUEUE 0x0100
#define DST_METADATA 0x0200 #define DST_METADATA 0x0200
#define DST_NOGC 0x0400
short error; short error;
...@@ -278,6 +279,8 @@ static inline struct dst_entry *dst_clone(struct dst_entry *dst) ...@@ -278,6 +279,8 @@ static inline struct dst_entry *dst_clone(struct dst_entry *dst)
void dst_release(struct dst_entry *dst); void dst_release(struct dst_entry *dst);
void dst_release_immediate(struct dst_entry *dst);
static inline void refdst_drop(unsigned long refdst) static inline void refdst_drop(unsigned long refdst)
{ {
if (!(refdst & SKB_DST_NOREF)) if (!(refdst & SKB_DST_NOREF))
...@@ -334,7 +337,7 @@ static inline void skb_dst_force(struct sk_buff *skb) ...@@ -334,7 +337,7 @@ static inline void skb_dst_force(struct sk_buff *skb)
*/ */
static inline bool dst_hold_safe(struct dst_entry *dst) static inline bool dst_hold_safe(struct dst_entry *dst)
{ {
if (dst->flags & DST_NOCACHE) if (dst->flags & (DST_NOCACHE | DST_NOGC))
return atomic_inc_not_zero(&dst->__refcnt); return atomic_inc_not_zero(&dst->__refcnt);
dst_hold(dst); dst_hold(dst);
return true; return true;
......
...@@ -300,18 +300,34 @@ void dst_release(struct dst_entry *dst) ...@@ -300,18 +300,34 @@ void dst_release(struct dst_entry *dst)
{ {
if (dst) { if (dst) {
int newrefcnt; int newrefcnt;
unsigned short nocache = dst->flags & DST_NOCACHE; unsigned short destroy_after_rcu = dst->flags &
(DST_NOCACHE | DST_NOGC);
newrefcnt = atomic_dec_return(&dst->__refcnt); newrefcnt = atomic_dec_return(&dst->__refcnt);
if (unlikely(newrefcnt < 0)) if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n", net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt); __func__, dst, newrefcnt);
if (!newrefcnt && unlikely(nocache)) if (!newrefcnt && unlikely(destroy_after_rcu))
call_rcu(&dst->rcu_head, dst_destroy_rcu); call_rcu(&dst->rcu_head, dst_destroy_rcu);
} }
} }
EXPORT_SYMBOL(dst_release); EXPORT_SYMBOL(dst_release);
void dst_release_immediate(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
dst_destroy(dst);
}
}
EXPORT_SYMBOL(dst_release_immediate);
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
{ {
struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment