Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
79ffeeb9
Commit
79ffeeb9
authored
Nov 10, 2005
by
Linus Torvalds
Browse files
Options
Browse Files
Download
Plain Diff
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
parents
a5aac37f
6a438bbe
Changes
19
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
413 additions
and
199 deletions
+413
-199
Documentation/networking/ip-sysctl.txt
Documentation/networking/ip-sysctl.txt
+5
-0
include/linux/sysctl.h
include/linux/sysctl.h
+1
-0
include/linux/tcp.h
include/linux/tcp.h
+16
-0
include/net/sock.h
include/net/sock.h
+6
-0
include/net/tcp.h
include/net/tcp.h
+65
-6
net/ipv4/sysctl_net_ipv4.c
net/ipv4/sysctl_net_ipv4.c
+8
-0
net/ipv4/tcp.c
net/ipv4/tcp.c
+2
-1
net/ipv4/tcp_bic.c
net/ipv4/tcp_bic.c
+5
-7
net/ipv4/tcp_cong.c
net/ipv4/tcp_cong.c
+24
-16
net/ipv4/tcp_highspeed.c
net/ipv4/tcp_highspeed.c
+5
-6
net/ipv4/tcp_htcp.c
net/ipv4/tcp_htcp.c
+6
-7
net/ipv4/tcp_hybla.c
net/ipv4/tcp_hybla.c
+3
-3
net/ipv4/tcp_input.c
net/ipv4/tcp_input.c
+194
-94
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_ipv4.c
+2
-2
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_minisocks.c
+4
-3
net/ipv4/tcp_output.c
net/ipv4/tcp_output.c
+47
-14
net/ipv4/tcp_scalable.c
net/ipv4/tcp_scalable.c
+7
-7
net/ipv4/tcp_timer.c
net/ipv4/tcp_timer.c
+2
-2
net/ipv4/tcp_vegas.c
net/ipv4/tcp_vegas.c
+11
-31
No files found.
Documentation/networking/ip-sysctl.txt
View file @
79ffeeb9
...
...
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
TCP variables:
tcp_abc - INTEGER
Controls Appropriate Byte Count defined in RFC3465. If set to
0 then does congestion avoid once per ack. 1 is conservative
value, and 2 is more agressive.
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 255. Default value
...
...
include/linux/sysctl.h
View file @
79ffeeb9
...
...
@@ -390,6 +390,7 @@ enum
NET_TCP_BIC_BETA
=
108
,
NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR
=
109
,
NET_TCP_CONG_CONTROL
=
110
,
NET_TCP_ABC
=
111
,
};
enum
{
...
...
include/linux/tcp.h
View file @
79ffeeb9
...
...
@@ -307,6 +307,21 @@ struct tcp_sock {
struct
tcp_sack_block
duplicate_sack
[
1
];
/* D-SACK block */
struct
tcp_sack_block
selective_acks
[
4
];
/* The SACKS themselves*/
struct
tcp_sack_block
recv_sack_cache
[
4
];
/* from STCP, retrans queue hinting */
struct
sk_buff
*
lost_skb_hint
;
struct
sk_buff
*
scoreboard_skb_hint
;
struct
sk_buff
*
retransmit_skb_hint
;
struct
sk_buff
*
forward_skb_hint
;
struct
sk_buff
*
fastpath_skb_hint
;
int
fastpath_cnt_hint
;
int
lost_cnt_hint
;
int
retransmit_cnt_hint
;
int
forward_cnt_hint
;
__u16
advmss
;
/* Advertised MSS */
__u16
prior_ssthresh
;
/* ssthresh saved at recovery start */
__u32
lost_out
;
/* Lost packets */
...
...
@@ -326,6 +341,7 @@ struct tcp_sock {
__u32
snd_up
;
/* Urgent pointer */
__u32
total_retrans
;
/* Total retransmits for entire connection */
__u32
bytes_acked
;
/* Appropriate Byte Counting - RFC3465 */
unsigned
int
keepalive_time
;
/* time before keep alive takes place */
unsigned
int
keepalive_intvl
;
/* time interval between keep alive probes */
...
...
include/net/sock.h
View file @
79ffeeb9
...
...
@@ -1247,6 +1247,12 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
(skb != (struct sk_buff *)&(sk)->sk_write_queue); \
skb = skb->next)
/*from STCP for fast SACK Process*/
#define sk_stream_for_retrans_queue_from(skb, sk) \
for (; (skb != (sk)->sk_send_head) && \
(skb != (struct sk_buff *)&(sk)->sk_write_queue); \
skb = skb->next)
/*
* Default write policy as shown to user space via poll/select/SIGIO
*/
...
...
include/net/tcp.h
View file @
79ffeeb9
...
...
@@ -89,10 +89,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
*/
#define TCP_SYN_RETRIES 5
/* number of times to retry active opening a
* connection: ~180sec is RFC min
u
mum */
* connection: ~180sec is RFC min
i
mum */
#define TCP_SYNACK_RETRIES 5
/* number of times to retry passive opening a
* connection: ~180sec is RFC min
u
mum */
* connection: ~180sec is RFC min
i
mum */
#define TCP_ORPHAN_RETRIES 7
/* number of times to retry on an orphaned
...
...
@@ -180,7 +180,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
/* Flags in tp->nonagle */
#define TCP_NAGLE_OFF 1
/* Nagle's algo is disabled */
#define TCP_NAGLE_CORK 2
/* Socket is corked */
#define TCP_NAGLE_PUSH 4
/* Cork is overriden for already queued data */
#define TCP_NAGLE_PUSH 4
/* Cork is overrid
d
en for already queued data */
extern
struct
inet_timewait_death_row
tcp_death_row
;
...
...
@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
extern
int
sysctl_tcp_nometrics_save
;
extern
int
sysctl_tcp_moderate_rcvbuf
;
extern
int
sysctl_tcp_tso_win_divisor
;
extern
int
sysctl_tcp_abc
;
extern
atomic_t
tcp_memory_allocated
;
extern
atomic_t
tcp_sockets_allocated
;
...
...
@@ -551,13 +552,13 @@ extern u32 __tcp_select_window(struct sock *sk);
/* TCP timestamps are only 32-bits, this causes a slight
* complication on 64-bit systems since we store a snapshot
* of jiffies in the buffer control blocks below. We decidely
* of jiffies in the buffer control blocks below. We decide
d
ly
* only use of the low 32-bits of jiffies and hide the ugly
* casts with the following macro.
*/
#define tcp_time_stamp ((__u32)(jiffies))
/* This is what the send packet queu
e
ing engine uses to pass
/* This is what the send packet queuing engine uses to pass
* TCP per-packet control information to the transmission
* code. We also store the host-order sequence numbers in
* here too. This is 36 bytes on 32-bit architectures,
...
...
@@ -597,7 +598,7 @@ struct tcp_skb_cb {
#define TCPCB_EVER_RETRANS 0x80
/* Ever retransmitted frame */
#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
#define TCPCB_URG 0x20
/* Urgent pointer adv
e
nced here */
#define TCPCB_URG 0x20
/* Urgent pointer adv
a
nced here */
#define TCPCB_AT_TAIL (TCPCB_URG)
...
...
@@ -765,6 +766,33 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
(
tp
->
snd_cwnd
>>
2
)));
}
/*
* Linear increase during slow start
*/
static
inline
void
tcp_slow_start
(
struct
tcp_sock
*
tp
)
{
if
(
sysctl_tcp_abc
)
{
/* RFC3465: Slow Start
* TCP sender SHOULD increase cwnd by the number of
* previously unacknowledged bytes ACKed by each incoming
* acknowledgment, provided the increase is not more than L
*/
if
(
tp
->
bytes_acked
<
tp
->
mss_cache
)
return
;
/* We MAY increase by 2 if discovered delayed ack */
if
(
sysctl_tcp_abc
>
1
&&
tp
->
bytes_acked
>
2
*
tp
->
mss_cache
)
{
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
}
tp
->
bytes_acked
=
0
;
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
static
inline
void
tcp_sync_left_out
(
struct
tcp_sock
*
tp
)
{
if
(
tp
->
rx_opt
.
sack_ok
&&
...
...
@@ -794,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
tp
->
prior_ssthresh
=
0
;
tp
->
bytes_acked
=
0
;
if
(
inet_csk
(
sk
)
->
icsk_ca_state
<
TCP_CA_CWR
)
{
__tcp_enter_cwr
(
sk
);
tcp_set_ca_state
(
sk
,
TCP_CA_CWR
);
...
...
@@ -810,6 +839,27 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
return
3
;
}
/* RFC2861 Check whether we are limited by application or congestion window
* This is the inverse of cwnd check in tcp_tso_should_defer
*/
static
inline
int
tcp_is_cwnd_limited
(
const
struct
sock
*
sk
,
u32
in_flight
)
{
const
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
u32
left
;
if
(
in_flight
>=
tp
->
snd_cwnd
)
return
1
;
if
(
!
(
sk
->
sk_route_caps
&
NETIF_F_TSO
))
return
0
;
left
=
tp
->
snd_cwnd
-
in_flight
;
if
(
sysctl_tcp_tso_win_divisor
)
return
left
*
sysctl_tcp_tso_win_divisor
<
tp
->
snd_cwnd
;
else
return
left
<=
tcp_max_burst
(
tp
);
}
static
__inline__
void
tcp_minshall_update
(
struct
tcp_sock
*
tp
,
int
mss
,
const
struct
sk_buff
*
skb
)
{
...
...
@@ -1157,6 +1207,15 @@ static inline void tcp_mib_init(void)
TCP_ADD_STATS_USER
(
TCP_MIB_MAXCONN
,
-
1
);
}
/*from STCP */
static
inline
void
clear_all_retrans_hints
(
struct
tcp_sock
*
tp
){
tp
->
lost_skb_hint
=
NULL
;
tp
->
scoreboard_skb_hint
=
NULL
;
tp
->
retransmit_skb_hint
=
NULL
;
tp
->
forward_skb_hint
=
NULL
;
tp
->
fastpath_skb_hint
=
NULL
;
}
/* /proc */
enum
tcp_seq_states
{
TCP_SEQ_STATE_LISTENING
,
...
...
net/ipv4/sysctl_net_ipv4.c
View file @
79ffeeb9
...
...
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
.
proc_handler
=
&
proc_tcp_congestion_control
,
.
strategy
=
&
sysctl_tcp_congestion_control
,
},
{
.
ctl_name
=
NET_TCP_ABC
,
.
procname
=
"tcp_abc"
,
.
data
=
&
sysctl_tcp_abc
,
.
maxlen
=
sizeof
(
int
),
.
mode
=
0644
,
.
proc_handler
=
&
proc_dointvec
,
},
{
.
ctl_name
=
0
}
};
...
...
net/ipv4/tcp.c
View file @
79ffeeb9
...
...
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
}
else
if
(
tcp_need_reset
(
old_state
)
||
(
tp
->
snd_nxt
!=
tp
->
write_seq
&&
(
1
<<
old_state
)
&
(
TCPF_CLOSING
|
TCPF_LAST_ACK
)))
{
/* The last check adjusts for discrepanc
e
of Linux wrt. RFC
/* The last check adjusts for discrepanc
y
of Linux wrt. RFC
* states
*/
tcp_send_active_reset
(
sk
,
gfp_any
());
...
...
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp
->
packets_out
=
0
;
tp
->
snd_ssthresh
=
0x7fffffff
;
tp
->
snd_cwnd_cnt
=
0
;
tp
->
bytes_acked
=
0
;
tcp_set_ca_state
(
sk
,
TCP_CA_Open
);
tcp_clear_retrans
(
tp
);
inet_csk_delack_init
(
sk
);
...
...
net/ipv4/tcp_bic.c
View file @
79ffeeb9
...
...
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
bictcp_low_utilization
(
sk
,
data_acked
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
)
)
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
/* In "safe" area, increase. */
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
else
{
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tcp_slow_start
(
tp
);
else
{
bictcp_update
(
ca
,
tp
->
snd_cwnd
);
/* In dangerous area, increase slowly.
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
*/
if
(
tp
->
snd_cwnd_cnt
>=
ca
->
cnt
)
{
...
...
net/ipv4/tcp_cong.c
View file @
79ffeeb9
...
...
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
)
)
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
/* In "safe" area, increase. */
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
else
{
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
*/
if
(
tp
->
snd_cwnd_cnt
>=
tp
->
snd_cwnd
)
{
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
tp
->
snd_cwnd_cnt
=
0
;
}
else
tp
->
snd_cwnd_cnt
++
;
}
/* In "safe" area, increase. */
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tcp_slow_start
(
tp
);
/* In dangerous area, increase slowly. */
else
if
(
sysctl_tcp_abc
)
{
/* RFC3465: Apppriate Byte Count
* increase once for each full cwnd acked
*/
if
(
tp
->
bytes_acked
>=
tp
->
snd_cwnd
*
tp
->
mss_cache
)
{
tp
->
bytes_acked
-=
tp
->
snd_cwnd
*
tp
->
mss_cache
;
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
}
else
{
/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
if
(
tp
->
snd_cwnd_cnt
>=
tp
->
snd_cwnd
)
{
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
tp
->
snd_cwnd_cnt
=
0
;
}
else
tp
->
snd_cwnd_cnt
++
;
}
}
EXPORT_SYMBOL_GPL
(
tcp_reno_cong_avoid
);
...
...
net/ipv4/tcp_highspeed.c
View file @
79ffeeb9
...
...
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
}
static
void
hstcp_cong_avoid
(
struct
sock
*
sk
,
u32
adk
,
u32
rtt
,
u32
in_flight
,
int
goo
d
)
u32
in_flight
,
u32
pkts_acke
d
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
hstcp
*
ca
=
inet_csk_ca
(
sk
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
)
)
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
else
{
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tcp_slow_start
(
tp
);
else
{
/* Update AIMD parameters */
if
(
tp
->
snd_cwnd
>
hstcp_aimd_vals
[
ca
->
ai
].
cwnd
)
{
while
(
tp
->
snd_cwnd
>
hstcp_aimd_vals
[
ca
->
ai
].
cwnd
&&
...
...
net/ipv4/tcp_htcp.c
View file @
79ffeeb9
...
...
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
htcp
*
ca
=
inet_csk_ca
(
sk
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
)
)
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
/* In "safe" area, increase. */
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
else
{
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tcp_slow_start
(
tp
);
else
{
measure_rtt
(
sk
);
/* keep track of number of round-trip times since last backoff event */
...
...
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
htcp_alpha_update
(
ca
);
}
/* In dangerous area, increase slowly.
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
*/
if
((
tp
->
snd_cwnd_cnt
++
*
ca
->
alpha
)
>>
7
>=
tp
->
snd_cwnd
)
{
...
...
net/ipv4/tcp_hybla.c
View file @
79ffeeb9
...
...
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
ca
->
minrtt
=
tp
->
srtt
;
}
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
))
return
;
if
(
!
ca
->
hybla_en
)
return
tcp_reno_cong_avoid
(
sk
,
ack
,
rtt
,
in_flight
,
flag
);
if
(
in_flight
<
tp
->
snd_cwnd
)
return
;
if
(
ca
->
rho
==
0
)
hybla_recalc_param
(
sk
);
...
...
net/ipv4/tcp_input.c
View file @
79ffeeb9
This diff is collapsed.
Click to expand it.
net/ipv4/tcp_ipv4.c
View file @
79ffeeb9
...
...
@@ -39,7 +39,7 @@
* request_sock handling and moved
* most of it into the af independent code.
* Added tail drop and some other bugfixes.
* Added new listen sematics.
* Added new listen sema
n
tics.
* Mike McLagan : Routing by source
* Juan Jose Ciarlante: ip_dynaddr bits
* Andi Kleen: various fixes.
...
...
@@ -1210,7 +1210,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
/* An explanation is required here, I think.
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is elimin
e
ted.
* provided case of th->doff==0 is elimin
a
ted.
* So, we defer the checks. */
if
((
skb
->
ip_summed
!=
CHECKSUM_UNNECESSARY
&&
tcp_v4_checksum_init
(
skb
)))
...
...
net/ipv4/tcp_minisocks.c
View file @
79ffeeb9
...
...
@@ -158,7 +158,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
/* I am shamed, but failed to make it more elegant.
* Yes, it is direct reference to IP, which is impossible
* to generalize to IPv6. Taking into account that IPv6
* do not under
tsna
d recycling in any case, it not
* do not under
stan
d recycling in any case, it not
* a big problem in practice. --ANK */
if
(
tw
->
tw_family
==
AF_INET
&&
tcp_death_row
.
sysctl_tw_recycle
&&
tcptw
->
tw_ts_recent_stamp
&&
...
...
@@ -194,7 +194,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
/* In window segment, it may be only reset or bare ack. */
if
(
th
->
rst
)
{
/* This is TIME_WAIT assasination, in two flavors.
/* This is TIME_WAIT assas
s
ination, in two flavors.
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
...
...
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
*/
newtp
->
snd_cwnd
=
2
;
newtp
->
snd_cwnd_cnt
=
0
;
newtp
->
bytes_acked
=
0
;
newtp
->
frto_counter
=
0
;
newtp
->
frto_highmark
=
0
;
...
...
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
/* RFC793 page 36: "If the connection is in any non-synchronized state ...
* and the incoming segment acknowledges something not yet
* sent (the segment carries an unacc
a
ptable ACK) ...
* sent (the segment carries an unacc
e
ptable ACK) ...
* a reset is sent."
*
* Invalid ACK: reset will be sent by listening socket
...
...
net/ipv4/tcp_output.c
View file @
79ffeeb9
...
...
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
u16
flags
;
BUG_ON
(
len
>
skb
->
len
);
clear_all_retrans_hints
(
tp
);
nsize
=
skb_headlen
(
skb
)
-
len
;
if
(
nsize
<
0
)
nsize
=
0
;
...
...
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
for TCP options, but includes only bare TCP header.
tp->rx_opt.mss_clamp is mss negotiated at connection setup.
It is min
u
mum of user_mss and mss received with SYN.
It is min
i
mum of user_mss and mss received with SYN.
It also does not include TCP options.
tp->pmtu_cookie is last pmtu, seen by this function.
...
...
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
{
struct
inet_connection_sock
*
icsk
=
inet_csk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
/* MSS for the peer's data. Previous verions used mss_clamp
/* MSS for the peer's data. Previous ver
s
ions used mss_clamp
* here. I don't know if the value based on our guesses
* of peer's MSS is better for the performance. It's more correct
* but may be worse for the performance because of rcv_mss
...
...
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
BUG_ON
(
tcp_skb_pcount
(
skb
)
!=
1
||
tcp_skb_pcount
(
next_skb
)
!=
1
);
/* Ok. We will be able to collapse the packet. */
/* changing transmit queue under us so clear hints */
clear_all_retrans_hints
(
tp
);
/* Ok. We will be able to collapse the packet. */
__skb_unlink
(
next_skb
,
&
sk
->
sk_write_queue
);
memcpy
(
skb_put
(
skb
,
next_skb_size
),
next_skb
->
data
,
next_skb_size
);
...
...
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
}
}
clear_all_retrans_hints
(
tp
);
if
(
!
lost
)
return
;
...
...
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
int
err
;
/* Do not sent more than we queued. 1/4 is reserved for possible
* copying overhead: fr
g
agmentation, tunneling, mangling etc.
* copying overhead: fragmentation, tunneling, mangling etc.
*/
if
(
atomic_read
(
&
sk
->
sk_wmem_alloc
)
>
min
(
sk
->
sk_wmem_queued
+
(
sk
->
sk_wmem_queued
>>
2
),
sk
->
sk_sndbuf
))
...
...
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
const
struct
inet_connection_sock
*
icsk
=
inet_csk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
sk_buff
*
skb
;
int
packet_cnt
=
tp
->
lost_out
;
int
packet_cnt
;
if
(
tp
->
retransmit_skb_hint
)
{
skb
=
tp
->
retransmit_skb_hint
;
packet_cnt
=
tp
->
retransmit_cnt_hint
;
}
else
{
skb
=
sk
->
sk_write_queue
.
next
;
packet_cnt
=
0
;
}
/* First pass: retransmit lost packets. */
if
(
packet_cn
t
)
{
sk_stream_for_retrans_queue
(
skb
,
sk
)
{
if
(
tp
->
lost_ou
t
)
{
sk_stream_for_retrans_queue
_from
(
skb
,
sk
)
{
__u8
sacked
=
TCP_SKB_CB
(
skb
)
->
sacked
;
/* we could do better than to assign each time */
tp
->
retransmit_skb_hint
=
skb
;
tp
->
retransmit_cnt_hint
=
packet_cnt
;
/* Assume this retransmit will generate
* only one packet for congestion window
* calculation purposes. This works because
...
...
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if
(
tcp_packets_in_flight
(
tp
)
>=
tp
->
snd_cwnd
)
return
;
if
(
sacked
&
TCPCB_LOST
)
{
if
(
sacked
&
TCPCB_LOST
)
{
if
(
!
(
sacked
&
(
TCPCB_SACKED_ACKED
|
TCPCB_SACKED_RETRANS
)))
{
if
(
tcp_retransmit_skb
(
sk
,
skb
))
if
(
tcp_retransmit_skb
(
sk
,
skb
))
{
tp
->
retransmit_skb_hint
=
NULL
;
return
;
}
if
(
icsk
->
icsk_ca_state
!=
TCP_CA_Loss
)
NET_INC_STATS_BH
(
LINUX_MIB_TCPFASTRETRANS
);
else
...
...
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
TCP_RTO_MAX
);
}
packet_cnt
-
=
tcp_skb_pcount
(
skb
);
if
(
packet_cnt
<=
0
)
packet_cnt
+
=
tcp_skb_pcount
(
skb
);
if
(
packet_cnt
>=
tp
->
lost_out
)
break
;
}
}
...
...
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if
(
tcp_may_send_now
(
sk
,
tp
))
return
;
packet_cnt
=
0
;
if
(
tp
->
forward_skb_hint
)
{
skb
=
tp
->
forward_skb_hint
;
packet_cnt
=
tp
->
forward_cnt_hint
;
}
else
{
skb
=
sk
->
sk_write_queue
.
next
;
packet_cnt
=
0
;
}
sk_stream_for_retrans_queue_from
(
skb
,
sk
)
{
tp
->
forward_cnt_hint
=
packet_cnt
;
tp
->
forward_skb_hint
=
skb
;
sk_stream_for_retrans_queue
(
skb
,
sk
)
{
/* Similar to the retransmit loop above we
* can pretend that the retransmitted SKB
* we send out here will be composed of one
...
...
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
continue
;
/* Ok, retransmit it. */
if
(
tcp_retransmit_skb
(
sk
,
skb
))
if
(
tcp_retransmit_skb
(
sk
,
skb
))
{
tp
->
forward_skb_hint
=
NULL
;
break
;
}
if
(
skb
==
skb_peek
(
&
sk
->
sk_write_queue
))
inet_csk_reset_xmit_timer
(
sk
,
ICSK_TIME_RETRANS
,
...
...
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL
(
tcp_make_synack
);
EXPORT_SYMBOL
(
tcp_simple_retransmit
);
EXPORT_SYMBOL
(
tcp_sync_mss
);
EXPORT_SYMBOL
(
sysctl_tcp_tso_win_divisor
);
net/ipv4/tcp_scalable.c
View file @
79ffeeb9
...
...
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32
in_flight
,
int
flag
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
))
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
t
p
->
snd_cwnd
++
;
}
else
{
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
t
cp_slow_start
(
tp
)
;
else
{
tp
->
snd_cwnd_cnt
++
;
if
(
tp
->
snd_cwnd_cnt
>
min
(
tp
->
snd_cwnd
,
TCP_SCALABLE_AI_CNT
)){
tp
->
snd_cwnd
++
;
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
tp
->
snd_cwnd_cnt
=
0
;
}
}
tp
->
snd_cwnd
=
min_t
(
u32
,
tp
->
snd_cwnd
,
tp
->
snd_cwnd_clamp
);
tp
->
snd_cwnd_stamp
=
tcp_time_stamp
;
}
static
u32
tcp_scalable_ssthresh
(
struct
sock
*
sk
)
...
...
net/ipv4/tcp_timer.c
View file @
79ffeeb9
...
...
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
* to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket.
*
* Criteri
um
is still not confirmed experimentally and may change.
* Criteri
a
is still not confirmed experimentally and may change.
* We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured
* limit.
...
...
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
hole detection. :-(
It is place to make it. It is not made. I do not want
to make it. It is disgu
i
sting. It does not work in any
to make it. It is disgusting. It does not work in any
case. Let me to cite the same draft, which requires for
us to implement this:
...
...
net/ipv4/tcp_vegas.c
View file @
79ffeeb9
...
...
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
/* We don't have enough RTT samples to do the Vegas
* calculation, so we'll behave like Reno.
*/
if
(
tp
->
snd_cwnd
>
tp
->
snd_ssthresh
)
tp
->
snd_cwnd
++
;
tcp_reno_cong_avoid
(
sk
,
ack
,
seq_rtt
,
in_flight
,
cnt
);
}
else
{
u32
rtt
,
target_cwnd
,
diff
;
...
...
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
*/
diff
=
(
old_wnd
<<
V_PARAM_SHIFT
)
-
target_cwnd
;
if
(
tp
->
snd_cwnd
<
tp
->
snd_ssthresh
)
{
if
(
tp
->
snd_cwnd
<
=
tp
->
snd_ssthresh
)
{
/* Slow start. */
if
(
diff
>
gamma
)
{
/* Going too fast. Time to slow down
...
...
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
V_PARAM_SHIFT
)
+
1
);
}
tcp_slow_start
(
tp
);
}
else
{
/* Congestion avoidance. */
u32
next_snd_cwnd
;
...
...
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
else
if
(
next_snd_cwnd
<
tp
->
snd_cwnd
)
tp
->
snd_cwnd
--
;
}
}
/* Wipe the slate clean for the next RTT. */
vegas
->
cntRTT
=
0
;
vegas
->
minRTT
=
0x7fffffff
;
if
(
tp
->
snd_cwnd
<
2
)
tp
->
snd_cwnd
=
2
;
else
if
(
tp
->
snd_cwnd
>
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
=
tp
->
snd_cwnd_clamp
;
}
}
/* The following code is executed for every ack we receive,
* except for conditions checked in should_advance_cwnd()
* before the call to tcp_cong_avoid(). Mainly this means that
* we only execute this code if the ack actually acked some
* data.
*/
/* If we are in slow start, increase our cwnd in response to this ACK.
* (If we are not in slow start then we are in congestion avoidance,
* and adjust our congestion window only once per RTT. See the code
* above.)
*/
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tp
->
snd_cwnd
++
;
/* to keep cwnd from growing without bound */
tp
->
snd_cwnd
=
min_t
(
u32
,
tp
->
snd_cwnd
,
tp
->
snd_cwnd_clamp
);
/* Make sure that we are never so timid as to reduce our cwnd below
* 2 MSS.
*
* Going below 2 MSS would risk huge delayed ACKs from our receiver.
*/
tp
->
snd_cwnd
=
max
(
tp
->
snd_cwnd
,
2U
);
/* Wipe the slate clean for the next RTT. */
vegas
->
cntRTT
=
0
;
vegas
->
minRTT
=
0x7fffffff
;
}
/* Extract info for Tcp socket info provided via netlink. */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment