Commit dafcc438 authored by Eliezer Tamir's avatar Eliezer Tamir Committed by David S. Miller

net: add socket option for low latency polling

adds a socket option for low latency polling.
This allows overriding the global sysctl value with a per-socket one.
Unexport sysctl_net_ll_poll since for now it's not needed in modules.
Signed-off-by: default avatarEliezer Tamir <eliezer.tamir@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 89bf1b5a
...@@ -81,4 +81,6 @@ ...@@ -81,4 +81,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -74,4 +74,6 @@ ...@@ -74,4 +74,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* __ASM_AVR32_SOCKET_H */ #endif /* __ASM_AVR32_SOCKET_H */
...@@ -76,6 +76,8 @@ ...@@ -76,6 +76,8 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -74,5 +74,7 @@ ...@@ -74,5 +74,7 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -74,4 +74,6 @@ ...@@ -74,4 +74,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -83,4 +83,6 @@ ...@@ -83,4 +83,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _ASM_IA64_SOCKET_H */ #endif /* _ASM_IA64_SOCKET_H */
...@@ -74,4 +74,6 @@ ...@@ -74,4 +74,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _ASM_M32R_SOCKET_H */ #endif /* _ASM_M32R_SOCKET_H */
...@@ -92,4 +92,6 @@ ...@@ -92,4 +92,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -74,4 +74,6 @@ ...@@ -74,4 +74,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -73,6 +73,8 @@ ...@@ -73,6 +73,8 @@
#define SO_SELECT_ERR_QUEUE 0x4026 #define SO_SELECT_ERR_QUEUE 0x4026
#define SO_LL 0x4027
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we /* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here. * have to define SOCK_NONBLOCK to a different value here.
*/ */
......
...@@ -81,4 +81,6 @@ ...@@ -81,4 +81,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _ASM_POWERPC_SOCKET_H */ #endif /* _ASM_POWERPC_SOCKET_H */
...@@ -80,4 +80,6 @@ ...@@ -80,4 +80,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -70,6 +70,8 @@ ...@@ -70,6 +70,8 @@
#define SO_SELECT_ERR_QUEUE 0x0029 #define SO_SELECT_ERR_QUEUE 0x0029
#define SO_LL 0x0030
/* Security levels - as per NRL IPv6 - don't actually do anything */ /* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
......
...@@ -85,4 +85,6 @@ ...@@ -85,4 +85,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* _XTENSA_SOCKET_H */ #endif /* _XTENSA_SOCKET_H */
...@@ -39,12 +39,12 @@ extern unsigned int sysctl_net_ll_poll __read_mostly; ...@@ -39,12 +39,12 @@ extern unsigned int sysctl_net_ll_poll __read_mostly;
/* we can use sched_clock() because we don't care much about precision /* we can use sched_clock() because we don't care much about precision
* we only care that the average is bounded * we only care that the average is bounded
*/ */
static inline u64 ll_end_time(void) static inline u64 ll_end_time(struct sock *sk)
{ {
u64 end_time = ACCESS_ONCE(sysctl_net_ll_poll); u64 end_time = ACCESS_ONCE(sk->sk_ll_usec);
/* we don't mind a ~2.5% imprecision /* we don't mind a ~2.5% imprecision
* sysctl_net_ll_poll is a u_int so this can't overflow * sk->sk_ll_usec is a u_int so this can't overflow
*/ */
end_time = (end_time << 10) + sched_clock(); end_time = (end_time << 10) + sched_clock();
...@@ -53,7 +53,7 @@ static inline u64 ll_end_time(void) ...@@ -53,7 +53,7 @@ static inline u64 ll_end_time(void)
static inline bool sk_valid_ll(struct sock *sk) static inline bool sk_valid_ll(struct sock *sk)
{ {
return sysctl_net_ll_poll && sk->sk_napi_id && return sk->sk_ll_usec && sk->sk_napi_id &&
!need_resched() && !signal_pending(current); !need_resched() && !signal_pending(current);
} }
...@@ -65,7 +65,7 @@ static inline bool can_poll_ll(u64 end_time) ...@@ -65,7 +65,7 @@ static inline bool can_poll_ll(u64 end_time)
static inline bool sk_poll_ll(struct sock *sk, int nonblock) static inline bool sk_poll_ll(struct sock *sk, int nonblock)
{ {
const struct net_device_ops *ops; const struct net_device_ops *ops;
u64 end_time = ll_end_time(); u64 end_time = ll_end_time(sk);
struct napi_struct *napi; struct napi_struct *napi;
int rc = false; int rc = false;
...@@ -118,7 +118,7 @@ static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) ...@@ -118,7 +118,7 @@ static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
#else /* CONFIG_NET_LL_RX_POLL */ #else /* CONFIG_NET_LL_RX_POLL */
static inline u64 ll_end_time(void) static inline u64 ll_end_time(struct sock *sk)
{ {
return 0; return 0;
} }
......
...@@ -230,6 +230,7 @@ struct cg_proto; ...@@ -230,6 +230,7 @@ struct cg_proto;
* @sk_wmem_queued: persistent queue size * @sk_wmem_queued: persistent queue size
* @sk_forward_alloc: space allocated forward * @sk_forward_alloc: space allocated forward
* @sk_napi_id: id of the last napi context to receive data for sk * @sk_napi_id: id of the last napi context to receive data for sk
* @sk_ll_usec: usecs to busypoll when there is no data
* @sk_allocation: allocation mode * @sk_allocation: allocation mode
* @sk_sndbuf: size of send buffer in bytes * @sk_sndbuf: size of send buffer in bytes
* @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
...@@ -328,6 +329,7 @@ struct sock { ...@@ -328,6 +329,7 @@ struct sock {
#endif #endif
#ifdef CONFIG_NET_LL_RX_POLL #ifdef CONFIG_NET_LL_RX_POLL
unsigned int sk_napi_id; unsigned int sk_napi_id;
unsigned int sk_ll_usec;
#endif #endif
atomic_t sk_drops; atomic_t sk_drops;
int sk_rcvbuf; int sk_rcvbuf;
......
...@@ -76,4 +76,6 @@ ...@@ -76,4 +76,6 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_LL 46
#endif /* __ASM_GENERIC_SOCKET_H */ #endif /* __ASM_GENERIC_SOCKET_H */
...@@ -913,6 +913,19 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -913,6 +913,19 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
break; break;
#ifdef CONFIG_NET_LL_RX_POLL
case SO_LL:
/* allow unprivileged users to decrease the value */
if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
ret = -EPERM;
else {
if (val < 0)
ret = -EINVAL;
else
sk->sk_ll_usec = val;
}
break;
#endif
default: default:
ret = -ENOPROTOOPT; ret = -ENOPROTOOPT;
break; break;
...@@ -1170,6 +1183,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, ...@@ -1170,6 +1183,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
break; break;
#ifdef CONFIG_NET_LL_RX_POLL
case SO_LL:
v.val = sk->sk_ll_usec;
break;
#endif
default: default:
return -ENOPROTOOPT; return -ENOPROTOOPT;
} }
...@@ -2288,6 +2307,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) ...@@ -2288,6 +2307,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
#ifdef CONFIG_NET_LL_RX_POLL #ifdef CONFIG_NET_LL_RX_POLL
sk->sk_napi_id = 0; sk->sk_napi_id = 0;
sk->sk_ll_usec = sysctl_net_ll_poll;
#endif #endif
/* /*
......
...@@ -108,7 +108,6 @@ ...@@ -108,7 +108,6 @@
#ifdef CONFIG_NET_LL_RX_POLL #ifdef CONFIG_NET_LL_RX_POLL
unsigned int sysctl_net_ll_poll __read_mostly; unsigned int sysctl_net_ll_poll __read_mostly;
EXPORT_SYMBOL_GPL(sysctl_net_ll_poll);
#endif #endif
static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment