Commit 627d2d6b authored by samanthakumar's avatar samanthakumar Committed by David S. Miller

udp: enable MSG_PEEK at non-zero offset

Enable peeking at UDP datagrams at the offset specified with socket
option SOL_SOCKET/SO_PEEK_OFF. Peek at any datagram in the queue, up
to the end of the given datagram.

Implement the SO_PEEK_OFF semantics introduced in commit ef64a54f
("sock: Introduce the SO_PEEK_OFF sock option"). Increase the offset
on peek, decrease it on regular reads.

When peeking, always checksum the packet immediately, to avoid
recomputation on subsequent peeks and final read.

The socket lock is not held for the duration of udp_recvmsg, so
peek and read operations can run concurrently. Only the last store
to sk_peek_off is preserved.
Signed-off-by: default avatarSam Kumar <samanthakumar@google.com>
Signed-off-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e6afc8ac
...@@ -2949,7 +2949,12 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, ...@@ -2949,7 +2949,12 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
struct iov_iter *from, int len); struct iov_iter *from, int len);
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb); void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len);
static inline void skb_free_datagram_locked(struct sock *sk,
struct sk_buff *skb)
{
__skb_free_datagram_locked(sk, skb, 0);
}
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
......
...@@ -457,6 +457,8 @@ struct sock { ...@@ -457,6 +457,8 @@ struct sock {
#define SK_CAN_REUSE 1 #define SK_CAN_REUSE 1
#define SK_FORCE_REUSE 2 #define SK_FORCE_REUSE 2
int sk_set_peek_off(struct sock *sk, int val);
static inline int sk_peek_offset(struct sock *sk, int flags) static inline int sk_peek_offset(struct sock *sk, int flags)
{ {
if (unlikely(flags & MSG_PEEK)) { if (unlikely(flags & MSG_PEEK)) {
......
...@@ -301,16 +301,19 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) ...@@ -301,16 +301,19 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
} }
EXPORT_SYMBOL(skb_free_datagram); EXPORT_SYMBOL(skb_free_datagram);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
{ {
bool slow; bool slow;
if (likely(atomic_read(&skb->users) == 1)) if (likely(atomic_read(&skb->users) == 1))
smp_rmb(); smp_rmb();
else if (likely(!atomic_dec_and_test(&skb->users))) else if (likely(!atomic_dec_and_test(&skb->users))) {
sk_peek_offset_bwd(sk, len);
return; return;
}
slow = lock_sock_fast(sk); slow = lock_sock_fast(sk);
sk_peek_offset_bwd(sk, len);
skb_orphan(skb); skb_orphan(skb);
sk_mem_reclaim_partial(sk); sk_mem_reclaim_partial(sk);
unlock_sock_fast(sk, slow); unlock_sock_fast(sk, slow);
...@@ -318,7 +321,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) ...@@ -318,7 +321,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
/* skb is now orphaned, can be freed outside of locked section */ /* skb is now orphaned, can be freed outside of locked section */
__kfree_skb(skb); __kfree_skb(skb);
} }
EXPORT_SYMBOL(skb_free_datagram_locked); EXPORT_SYMBOL(__skb_free_datagram_locked);
/** /**
* skb_kill_datagram - Free a datagram skbuff forcibly * skb_kill_datagram - Free a datagram skbuff forcibly
......
...@@ -2187,6 +2187,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount) ...@@ -2187,6 +2187,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
} }
EXPORT_SYMBOL(__sk_mem_reclaim); EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
if (val < 0)
return -EINVAL;
sk->sk_peek_off = val;
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);
/* /*
* Set of default routines for initialising struct proto_ops when * Set of default routines for initialising struct proto_ops when
......
...@@ -948,6 +948,7 @@ const struct proto_ops inet_dgram_ops = { ...@@ -948,6 +948,7 @@ const struct proto_ops inet_dgram_ops = {
.recvmsg = inet_recvmsg, .recvmsg = inet_recvmsg,
.mmap = sock_no_mmap, .mmap = sock_no_mmap,
.sendpage = inet_sendpage, .sendpage = inet_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt, .compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt, .compat_getsockopt = compat_sock_common_getsockopt,
......
...@@ -1294,7 +1294,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, ...@@ -1294,7 +1294,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
struct sk_buff *skb; struct sk_buff *skb;
unsigned int ulen, copied; unsigned int ulen, copied;
int peeked, off = 0; int peeked, peeking, off;
int err; int err;
int is_udplite = IS_UDPLITE(sk); int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false; bool checksum_valid = false;
...@@ -1304,15 +1304,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, ...@@ -1304,15 +1304,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
return ip_recv_error(sk, msg, len, addr_len); return ip_recv_error(sk, msg, len, addr_len);
try_again: try_again:
peeking = off = sk_peek_offset(sk, flags);
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &off, &err); &peeked, &off, &err);
if (!skb) if (!skb)
goto out; return err;
ulen = skb->len; ulen = skb->len;
copied = len; copied = len;
if (copied > ulen) if (copied > ulen - off)
copied = ulen; copied = ulen - off;
else if (copied < ulen) else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC; msg->msg_flags |= MSG_TRUNC;
...@@ -1322,16 +1323,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, ...@@ -1322,16 +1323,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
* coverage checksum (UDP-Lite), do it before the copy. * coverage checksum (UDP-Lite), do it before the copy.
*/ */
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
checksum_valid = !udp_lib_checksum_complete(skb); checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid) if (!checksum_valid)
goto csum_copy_err; goto csum_copy_err;
} }
if (checksum_valid || skb_csum_unnecessary(skb)) if (checksum_valid || skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, 0, msg, copied); err = skb_copy_datagram_msg(skb, off, msg, copied);
else { else {
err = skb_copy_and_csum_datagram_msg(skb, 0, msg); err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL) if (err == -EINVAL)
goto csum_copy_err; goto csum_copy_err;
...@@ -1344,7 +1345,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, ...@@ -1344,7 +1345,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
UDP_INC_STATS_USER(sock_net(sk), UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite); UDP_MIB_INERRORS, is_udplite);
} }
goto out_free; skb_free_datagram_locked(sk, skb);
return err;
} }
if (!peeked) if (!peeked)
...@@ -1368,9 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, ...@@ -1368,9 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
if (flags & MSG_TRUNC) if (flags & MSG_TRUNC)
err = ulen; err = ulen;
out_free: __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
skb_free_datagram_locked(sk, skb);
out:
return err; return err;
csum_copy_err: csum_copy_err:
......
...@@ -561,6 +561,7 @@ const struct proto_ops inet6_dgram_ops = { ...@@ -561,6 +561,7 @@ const struct proto_ops inet6_dgram_ops = {
.recvmsg = inet_recvmsg, /* ok */ .recvmsg = inet_recvmsg, /* ok */
.mmap = sock_no_mmap, .mmap = sock_no_mmap,
.sendpage = sock_no_sendpage, .sendpage = sock_no_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt, .compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt, .compat_getsockopt = compat_sock_common_getsockopt,
......
...@@ -357,7 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -357,7 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
unsigned int ulen, copied; unsigned int ulen, copied;
int peeked, off = 0; int peeked, peeking, off;
int err; int err;
int is_udplite = IS_UDPLITE(sk); int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false; bool checksum_valid = false;
...@@ -371,15 +371,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -371,15 +371,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len); return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again: try_again:
peeking = off = sk_peek_offset(sk, flags);
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &off, &err); &peeked, &off, &err);
if (!skb) if (!skb)
goto out; return err;
ulen = skb->len; ulen = skb->len;
copied = len; copied = len;
if (copied > ulen) if (copied > ulen - off)
copied = ulen; copied = ulen - off;
else if (copied < ulen) else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC; msg->msg_flags |= MSG_TRUNC;
...@@ -391,16 +392,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -391,16 +392,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
* coverage checksum (UDP-Lite), do it before the copy. * coverage checksum (UDP-Lite), do it before the copy.
*/ */
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
checksum_valid = !udp_lib_checksum_complete(skb); checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid) if (!checksum_valid)
goto csum_copy_err; goto csum_copy_err;
} }
if (checksum_valid || skb_csum_unnecessary(skb)) if (checksum_valid || skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, 0, msg, copied); err = skb_copy_datagram_msg(skb, off, msg, copied);
else { else {
err = skb_copy_and_csum_datagram_msg(skb, 0, msg); err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL) if (err == -EINVAL)
goto csum_copy_err; goto csum_copy_err;
} }
...@@ -417,7 +418,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -417,7 +418,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
UDP_MIB_INERRORS, UDP_MIB_INERRORS,
is_udplite); is_udplite);
} }
goto out_free; skb_free_datagram_locked(sk, skb);
return err;
} }
if (!peeked) { if (!peeked) {
if (is_udp4) if (is_udp4)
...@@ -465,9 +467,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -465,9 +467,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (flags & MSG_TRUNC) if (flags & MSG_TRUNC)
err = ulen; err = ulen;
out_free: __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
skb_free_datagram_locked(sk, skb);
out:
return err; return err;
csum_copy_err: csum_copy_err:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment