Commit 4e1e83be authored by David S. Miller's avatar David S. Miller

Merge branch 'SO_PRIORITY'

Eric Dumazet says:

====================
tcp: provide correct skb->priority

SO_PRIORITY socket option requests TCP egress packets
to contain a user provided value.

TCP manages to send most packets with the requested values,
notably for TCP_ESTABLISHED state, but fails to do so for
few packets.

These packets are control packets sent on behalf
of SYN_RECV or TIME_WAIT states.

Note that to test this with packetdrill, it is a bit
of a hassle, since packetdrill can not verify priority
of egress packets, other than indirect observations,
using for example sch_prio on its tunnel device.

The bad skb priorities cause problems for GCP,
as this field is one of the keys used in routing.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2b6fd3ea f6c0f5d2
...@@ -71,6 +71,7 @@ struct inet_timewait_sock { ...@@ -71,6 +71,7 @@ struct inet_timewait_sock {
tw_pad : 2, /* 2 bits hole */ tw_pad : 2, /* 2 bits hole */
tw_tos : 8; tw_tos : 8;
u32 tw_txhash; u32 tw_txhash;
u32 tw_priority;
struct timer_list tw_timer; struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb; struct inet_bind_bucket *tw_tb;
}; };
......
...@@ -981,7 +981,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb); ...@@ -981,7 +981,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
* upper-layer output functions * upper-layer output functions
*/ */
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
__u32 mark, struct ipv6_txoptions *opt, int tclass); __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);
......
...@@ -230,7 +230,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req ...@@ -230,7 +230,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt; opt = ireq->ipv6_opt;
if (!opt) if (!opt)
opt = rcu_dereference(np->opt); opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass); err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
sk->sk_priority);
rcu_read_unlock(); rcu_read_unlock();
err = net_xmit_eval(err); err = net_xmit_eval(err);
} }
...@@ -284,7 +285,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) ...@@ -284,7 +285,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) { if (!IS_ERR(dst)) {
skb_dst_set(skb, dst); skb_dst_set(skb, dst);
ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0); ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS); DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS(DCCP_MIB_OUTRSTS); DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return; return;
......
...@@ -1694,7 +1694,6 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, ...@@ -1694,7 +1694,6 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
inet_sk(sk)->tos = arg->tos; inet_sk(sk)->tos = arg->tos;
sk->sk_priority = skb->priority;
sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if; sk->sk_bound_dev_if = arg->bound_dev_if;
sk->sk_sndbuf = sysctl_wmem_default; sk->sk_sndbuf = sysctl_wmem_default;
......
...@@ -771,6 +771,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ...@@ -771,6 +771,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk) { if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark; inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk); transmit_time = tcp_transmit_time(sk);
} }
ip_send_unicast_reply(ctl_sk, ip_send_unicast_reply(ctl_sk,
...@@ -866,6 +868,8 @@ static void tcp_v4_send_ack(const struct sock *sk, ...@@ -866,6 +868,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark; inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk); transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk, ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt, skb, &TCP_SKB_CB(skb)->header.h4.opt,
......
...@@ -266,6 +266,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) ...@@ -266,6 +266,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_transparent = inet->transparent; tw->tw_transparent = inet->transparent;
tw->tw_mark = sk->sk_mark; tw->tw_mark = sk->sk_mark;
tw->tw_priority = sk->sk_priority;
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_rcv_nxt = tp->rcv_nxt;
tcptw->tw_snd_nxt = tp->snd_nxt; tcptw->tw_snd_nxt = tp->snd_nxt;
......
...@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused ...@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
fl6.daddr = sk->sk_v6_daddr; fl6.daddr = sk->sk_v6_daddr;
res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
np->tclass); np->tclass, sk->sk_priority);
rcu_read_unlock(); rcu_read_unlock();
return res; return res;
} }
......
...@@ -193,7 +193,7 @@ bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) ...@@ -193,7 +193,7 @@ bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
* which are using proper atomic operations or spinlocks. * which are using proper atomic operations or spinlocks.
*/ */
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
__u32 mark, struct ipv6_txoptions *opt, int tclass) __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
{ {
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk); const struct ipv6_pinfo *np = inet6_sk(sk);
...@@ -258,7 +258,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, ...@@ -258,7 +258,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hdr->daddr = *first_hop; hdr->daddr = *first_hop;
skb->protocol = htons(ETH_P_IPV6); skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority; skb->priority = priority;
skb->mark = mark; skb->mark = mark;
mtu = dst_mtu(dst); mtu = dst_mtu(dst);
......
...@@ -512,7 +512,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, ...@@ -512,7 +512,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt; opt = ireq->ipv6_opt;
if (!opt) if (!opt)
opt = rcu_dereference(np->opt); opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass); err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
sk->sk_priority);
rcu_read_unlock(); rcu_read_unlock();
err = net_xmit_eval(err); err = net_xmit_eval(err);
} }
...@@ -803,7 +804,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { ...@@ -803,7 +804,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst, int oif, struct tcp_md5sig_key *key, int rst,
u8 tclass, __be32 label) u8 tclass, __be32 label, u32 priority)
{ {
const struct tcphdr *th = tcp_hdr(skb); const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1; struct tcphdr *t1;
...@@ -907,7 +908,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 ...@@ -907,7 +908,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) { if (!IS_ERR(dst)) {
skb_dst_set(buff, dst); skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass); ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
priority);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS); TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst) if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS); TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
...@@ -930,6 +932,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ...@@ -930,6 +932,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
struct sock *sk1 = NULL; struct sock *sk1 = NULL;
#endif #endif
__be32 label = 0; __be32 label = 0;
u32 priority = 0;
struct net *net; struct net *net;
int oif = 0; int oif = 0;
...@@ -990,16 +993,19 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ...@@ -990,16 +993,19 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
trace_tcp_send_reset(sk, skb); trace_tcp_send_reset(sk, skb);
if (np->repflow) if (np->repflow)
label = ip6_flowlabel(ipv6h); label = ip6_flowlabel(ipv6h);
priority = sk->sk_priority;
} }
if (sk->sk_state == TCP_TIME_WAIT) if (sk->sk_state == TCP_TIME_WAIT) {
label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
priority = inet_twsk(sk)->tw_priority;
}
} else { } else {
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
label = ip6_flowlabel(ipv6h); label = ip6_flowlabel(ipv6h);
} }
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
label); label, priority);
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
out: out:
...@@ -1010,10 +1016,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ...@@ -1010,10 +1016,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass, struct tcp_md5sig_key *key, u8 tclass,
__be32 label) __be32 label, u32 priority)
{ {
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
tclass, label); tclass, label, priority);
} }
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
...@@ -1025,7 +1031,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) ...@@ -1025,7 +1031,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset, tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel)); tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
inet_twsk_put(tw); inet_twsk_put(tw);
} }
...@@ -1048,7 +1054,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, ...@@ -1048,7 +1054,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if, req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0); 0, 0, sk->sk_priority);
} }
......
...@@ -215,7 +215,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) ...@@ -215,7 +215,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
rcu_read_lock(); rcu_read_lock();
res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
tclass); tclass, sk->sk_priority);
rcu_read_unlock(); rcu_read_unlock();
return res; return res;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment