Commit 24029a36 authored by David S. Miller's avatar David S. Miller

Merge branch 'sock_diag_destruction_events'

Craig Gallek says:

====================
Socket destruction events via netlink sock_diag

This series extends the netlink sock_diag interface to broadcast
socket information as they are being destroyed.  The current
interface is poll based and can not be used to retreive information
about sockets that are destroyed between poll intervals.

Only inet sockets are broadcast in this implementation, but other
families could easily be added as needed in the future.

If this patch set is accepted, a follow-up patch to the ss utility
in the iproute2 suite will also be submitted.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 916035dd 35ac838a
......@@ -24,6 +24,7 @@ struct inet_diag_handler {
struct inet_diag_msg *r,
void *info);
__u16 idiag_type;
__u16 idiag_info_size;
};
struct inet_connection_sock;
......
#ifndef __SOCK_DIAG_H__
#define __SOCK_DIAG_H__
#include <linux/netlink.h>
#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
struct sk_buff;
......@@ -11,6 +14,7 @@ struct sock;
struct sock_diag_handler {
__u8 family;
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
int (*get_info)(struct sk_buff *skb, struct sock *sk);
};
int sock_diag_register(const struct sock_diag_handler *h);
......@@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
struct sk_buff *skb, int attrtype);
static inline
enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
{
switch (sk->sk_family) {
case AF_INET:
switch (sk->sk_protocol) {
case IPPROTO_TCP:
return SKNLGRP_INET_TCP_DESTROY;
case IPPROTO_UDP:
return SKNLGRP_INET_UDP_DESTROY;
default:
return SKNLGRP_NONE;
}
case AF_INET6:
switch (sk->sk_protocol) {
case IPPROTO_TCP:
return SKNLGRP_INET6_TCP_DESTROY;
case IPPROTO_UDP:
return SKNLGRP_INET6_UDP_DESTROY;
default:
return SKNLGRP_NONE;
}
default:
return SKNLGRP_NONE;
}
}
static inline
bool sock_diag_has_destroy_listeners(const struct sock *sk)
{
const struct net *n = sock_net(sk);
const enum sknetlink_groups group = sock_diag_destroy_group(sk);
return group != SKNLGRP_NONE && n->diag_nlsk &&
netlink_has_listeners(n->diag_nlsk, group);
}
void sock_diag_broadcast_destroy(struct sock *sk);
#endif
......@@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int kern);
void sk_free(struct sock *sk);
void sk_destruct(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
......
......@@ -111,9 +111,10 @@ enum {
INET_DIAG_SKMEMINFO,
INET_DIAG_SHUTDOWN,
INET_DIAG_DCTCPINFO,
INET_DIAG_PROTOCOL, /* response attribute only */
};
#define INET_DIAG_MAX INET_DIAG_DCTCPINFO
#define INET_DIAG_MAX INET_DIAG_PROTOCOL
/* INET_DIAG_MEM */
......
......@@ -23,4 +23,14 @@ enum {
SK_MEMINFO_VARS,
};
enum sknetlink_groups {
SKNLGRP_NONE,
SKNLGRP_INET_TCP_DESTROY,
SKNLGRP_INET_UDP_DESTROY,
SKNLGRP_INET6_TCP_DESTROY,
SKNLGRP_INET6_UDP_DESTROY,
__SKNLGRP_MAX,
};
#define SKNLGRP_MAX (__SKNLGRP_MAX - 1)
#endif /* _UAPI__SOCK_DIAG_H__ */
......@@ -131,6 +131,7 @@
#include <linux/ipsec.h>
#include <net/cls_cgroup.h>
#include <net/netprio_cgroup.h>
#include <linux/sock_diag.h>
#include <linux/filter.h>
......@@ -1423,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
}
EXPORT_SYMBOL(sk_alloc);
static void __sk_free(struct sock *sk)
void sk_destruct(struct sock *sk)
{
struct sk_filter *filter;
......@@ -1451,6 +1452,14 @@ static void __sk_free(struct sock *sk)
sk_prot_free(sk->sk_prot_creator, sk);
}
static void __sk_free(struct sock *sk)
{
if (unlikely(sock_diag_has_destroy_listeners(sk)))
sock_diag_broadcast_destroy(sk);
else
sk_destruct(sk);
}
void sk_free(struct sock *sk)
{
/*
......
......@@ -5,6 +5,9 @@
#include <net/net_namespace.h>
#include <linux/module.h>
#include <net/sock.h>
#include <linux/kernel.h>
#include <linux/tcp.h>
#include <linux/workqueue.h>
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
......@@ -12,6 +15,7 @@
static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
static u64 sock_gen_cookie(struct sock *sk)
{
......@@ -101,6 +105,62 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
}
EXPORT_SYMBOL(sock_diag_put_filterinfo);
struct broadcast_sk {
struct sock *sk;
struct work_struct work;
};
static size_t sock_diag_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
+ nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
+ nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}
static void sock_diag_broadcast_destroy_work(struct work_struct *work)
{
struct broadcast_sk *bsk =
container_of(work, struct broadcast_sk, work);
struct sock *sk = bsk->sk;
const struct sock_diag_handler *hndl;
struct sk_buff *skb;
const enum sknetlink_groups group = sock_diag_destroy_group(sk);
int err = -1;
WARN_ON(group == SKNLGRP_NONE);
skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL);
if (!skb)
goto out;
mutex_lock(&sock_diag_table_mutex);
hndl = sock_diag_handlers[sk->sk_family];
if (hndl && hndl->get_info)
err = hndl->get_info(skb, sk);
mutex_unlock(&sock_diag_table_mutex);
if (!err)
nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
GFP_KERNEL);
else
kfree_skb(skb);
out:
sk_destruct(sk);
kfree(bsk);
}
void sock_diag_broadcast_destroy(struct sock *sk)
{
/* Note, this function is often called from an interrupt context. */
struct broadcast_sk *bsk =
kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC);
if (!bsk)
return sk_destruct(sk);
bsk->sk = sk;
INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work);
queue_work(broadcast_wq, &bsk->work);
}
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
{
mutex_lock(&sock_diag_table_mutex);
......@@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb)
mutex_unlock(&sock_diag_mutex);
}
static int sock_diag_bind(struct net *net, int group)
{
switch (group) {
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
NETLINK_SOCK_DIAG, AF_INET);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
NETLINK_SOCK_DIAG, AF_INET);
break;
}
return 0;
}
static int __net_init diag_net_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.groups = SKNLGRP_MAX,
.input = sock_diag_rcv,
.bind = sock_diag_bind,
.flags = NL_CFG_F_NONROOT_RECV,
};
net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
......@@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = {
static int __init sock_diag_init(void)
{
broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0);
BUG_ON(!broadcast_wq);
return register_pernet_subsys(&diag_net_ops);
}
static void __exit sock_diag_exit(void)
{
unregister_pernet_subsys(&diag_net_ops);
destroy_workqueue(broadcast_wq);
}
module_init(sock_diag_init);
......
......@@ -66,6 +66,7 @@ static const struct inet_diag_handler dccp_diag_handler = {
.dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
.idiag_type = IPPROTO_DCCP,
.idiag_info_size = sizeof(struct tcp_info),
};
static int __init dccp_diag_init(void)
......
......@@ -200,9 +200,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
}
#undef EXPIRES_IN_MS
if (ext & (1 << (INET_DIAG_INFO - 1))) {
if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
attr = nla_reserve(skb, INET_DIAG_INFO,
sizeof(struct tcp_info));
handler->idiag_info_size);
if (!attr)
goto errout;
......@@ -1078,14 +1078,60 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
return inet_diag_get_exact(skb, h, nlmsg_data(h));
}
static
int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
{
const struct inet_diag_handler *handler;
struct nlmsghdr *nlh;
struct nlattr *attr;
struct inet_diag_msg *r;
void *info = NULL;
int err = 0;
nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
if (!nlh)
return -ENOMEM;
r = nlmsg_data(nlh);
memset(r, 0, sizeof(*r));
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
nlmsg_cancel(skb, nlh);
return err;
}
handler = inet_diag_lock_handler(sk->sk_protocol);
if (IS_ERR(handler)) {
inet_diag_unlock_handler(handler);
nlmsg_cancel(skb, nlh);
return PTR_ERR(handler);
}
attr = handler->idiag_info_size
? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size)
: NULL;
if (attr)
info = nla_data(attr);
handler->idiag_get_info(sk, r, info);
inet_diag_unlock_handler(handler);
nlmsg_end(skb, nlh);
return 0;
}
static const struct sock_diag_handler inet_diag_handler = {
.family = AF_INET,
.dump = inet_diag_handler_dump,
.get_info = inet_diag_handler_get_info,
};
static const struct sock_diag_handler inet6_diag_handler = {
.family = AF_INET6,
.dump = inet_diag_handler_dump,
.get_info = inet_diag_handler_get_info,
};
int inet_diag_register(const struct inet_diag_handler *h)
......
......@@ -2624,13 +2624,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
/* Return information about state of tcp endpoint in API format. */
void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
u32 rate;
memset(info, 0, sizeof(*info));
if (sk->sk_type != SOCK_STREAM)
return;
info->tcpi_state = sk->sk_state;
info->tcpi_ca_state = icsk->icsk_ca_state;
......
......@@ -19,13 +19,14 @@
static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
void *_info)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_info *info = _info;
if (sk->sk_state == TCP_LISTEN) {
r->idiag_rqueue = sk->sk_ack_backlog;
r->idiag_wqueue = sk->sk_max_ack_backlog;
} else {
} else if (sk->sk_type == SOCK_STREAM) {
const struct tcp_sock *tp = tcp_sk(sk);
r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
r->idiag_wqueue = tp->write_seq - tp->snd_una;
}
......@@ -50,6 +51,7 @@ static const struct inet_diag_handler tcp_diag_handler = {
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
.idiag_type = IPPROTO_TCP,
.idiag_info_size = sizeof(struct tcp_info),
};
static int __init tcp_diag_init(void)
......
......@@ -170,6 +170,7 @@ static const struct inet_diag_handler udp_diag_handler = {
.dump_one = udp_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
.idiag_type = IPPROTO_UDP,
.idiag_info_size = 0,
};
static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
......@@ -190,6 +191,7 @@ static const struct inet_diag_handler udplite_diag_handler = {
.dump_one = udplite_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
.idiag_type = IPPROTO_UDPLITE,
.idiag_info_size = 0,
};
static int __init udp_diag_init(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment