Commit 7c657876 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

[DCCP]: Initial implementation

Development to this point was done on a subversion repository at:

http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/

This repository will be kept at this site for the foreseable future,
so that interested parties can see the history of this code,
attributions, etc.

If I ever decide to take this offline I'll provide the full history at
some other suitable place.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c4365c92
#ifndef _LINUX_DCCP_H
#define _LINUX_DCCP_H
#include <linux/in.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/uio.h>
#include <linux/workqueue.h>
#include <net/inet_connection_sock.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/tcp.h>
/* FIXME: this is utterly wrong */
struct sockaddr_dccp {
struct sockaddr_in in;
unsigned int service;
};
enum dccp_state {
DCCP_OPEN = TCP_ESTABLISHED,
DCCP_REQUESTING = TCP_SYN_SENT,
DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME:
This mapping is horrible, but TCP has
no matching state for DCCP_PARTOPEN,
as TCP_SYN_RECV is already used by
DCCP_RESPOND, why don't stop using TCP
mapping of states? OK, now we don't use
sk_stream_sendmsg anymore, so doesn't
seem to exist any reason for us to
do the TCP mapping here */
DCCP_LISTEN = TCP_LISTEN,
DCCP_RESPOND = TCP_SYN_RECV,
DCCP_CLOSING = TCP_CLOSING,
DCCP_TIME_WAIT = TCP_TIME_WAIT,
DCCP_CLOSED = TCP_CLOSE,
DCCP_MAX_STATES = TCP_MAX_STATES,
};
#define DCCP_STATE_MASK 0xf
#define DCCP_ACTION_FIN (1<<7)
enum {
DCCPF_OPEN = TCPF_ESTABLISHED,
DCCPF_REQUESTING = TCPF_SYN_SENT,
DCCPF_PARTOPEN = TCPF_FIN_WAIT1,
DCCPF_LISTEN = TCPF_LISTEN,
DCCPF_RESPOND = TCPF_SYN_RECV,
DCCPF_CLOSING = TCPF_CLOSING,
DCCPF_TIME_WAIT = TCPF_TIME_WAIT,
DCCPF_CLOSED = TCPF_CLOSE,
};
/**
* struct dccp_hdr - generic part of DCCP packet header
*
* @dccph_sport - Relevant port on the endpoint that sent this packet
* @dccph_dport - Relevant port on the other endpoint
* @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words
* @dccph_ccval - Used by the HC-Sender CCID
* @dccph_cscov - Parts of the packet that are covered by the Checksum field
* @dccph_checksum - Internet checksum, depends on dccph_cscov
* @dccph_x - 0 = 24 bit sequence number, 1 = 48
* @dccph_type - packet type, see DCCP_PKT_ prefixed macros
* @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x
*/
struct dccp_hdr {
__u16 dccph_sport,
dccph_dport;
__u8 dccph_doff;
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u8 dccph_cscov:4,
dccph_ccval:4;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u8 dccph_ccval:4,
dccph_cscov:4;
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
__u16 dccph_checksum;
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u32 dccph_x:1,
dccph_type:4,
dccph_reserved:3,
dccph_seq:24;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u32 dccph_reserved:3,
dccph_type:4,
dccph_x:1,
dccph_seq:24;
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
};
static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
{
return (struct dccp_hdr *)skb->h.raw;
}
/**
* struct dccp_hdr_ext - the low bits of a 48 bit seq packet
*
* @dccph_seq_low - low 24 bits of a 48 bit seq packet
*/
struct dccp_hdr_ext {
__u32 dccph_seq_low;
};
static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
{
return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr));
}
static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0);
}
static inline __u64 dccp_hdr_seq(const struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 seq_nr = ntohl(dh->dccph_seq << 8);
#elif defined(__BIG_ENDIAN_BITFIELD)
__u64 seq_nr = ntohl(dh->dccph_seq);
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
if (dh->dccph_x != 0)
seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low);
return seq_nr;
}
/**
* struct dccp_hdr_request - Conection initiation request header
*
* @dccph_req_service - Service to which the client app wants to connect
* @dccph_req_options - list of options (must be a multiple of 32 bits
*/
struct dccp_hdr_request {
__u32 dccph_req_service;
};
static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
{
return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb));
}
/**
* struct dccp_hdr_ack_bits - acknowledgment bits common to most packets
*
* @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
* @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
*/
struct dccp_hdr_ack_bits {
__u32 dccph_reserved1:8,
dccph_ack_nr_high:24;
__u32 dccph_ack_nr_low;
};
static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
{
return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb));
}
static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
{
const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb);
#if defined(__LITTLE_ENDIAN_BITFIELD)
return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low);
#elif defined(__BIG_ENDIAN_BITFIELD)
return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low);
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
}
/**
* struct dccp_hdr_response - Conection initiation response header
*
* @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
* @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
* @dccph_resp_service - Echoes the Service Code on a received DCCP-Request
* @dccph_resp_options - list of options (must be a multiple of 32 bits
*/
struct dccp_hdr_response {
struct dccp_hdr_ack_bits dccph_resp_ack;
__u32 dccph_resp_service;
};
static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
{
return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb));
}
/**
* struct dccp_hdr_reset - Unconditionally shut down a connection
*
* @dccph_reset_service - Echoes the Service Code on a received DCCP-Request
* @dccph_reset_options - list of options (must be a multiple of 32 bits
*/
struct dccp_hdr_reset {
struct dccp_hdr_ack_bits dccph_reset_ack;
__u8 dccph_reset_code,
dccph_reset_data[3];
};
static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
{
return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb));
}
enum dccp_pkt_type {
DCCP_PKT_REQUEST = 0,
DCCP_PKT_RESPONSE,
DCCP_PKT_DATA,
DCCP_PKT_ACK,
DCCP_PKT_DATAACK,
DCCP_PKT_CLOSEREQ,
DCCP_PKT_CLOSE,
DCCP_PKT_RESET,
DCCP_PKT_SYNC,
DCCP_PKT_SYNCACK,
DCCP_PKT_INVALID,
};
#define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID
static inline unsigned int dccp_packet_hdr_len(const __u8 type)
{
if (type == DCCP_PKT_DATA)
return 0;
if (type == DCCP_PKT_DATAACK ||
type == DCCP_PKT_ACK ||
type == DCCP_PKT_SYNC ||
type == DCCP_PKT_SYNCACK ||
type == DCCP_PKT_CLOSE ||
type == DCCP_PKT_CLOSEREQ)
return sizeof(struct dccp_hdr_ack_bits);
if (type == DCCP_PKT_REQUEST)
return sizeof(struct dccp_hdr_request);
if (type == DCCP_PKT_RESPONSE)
return sizeof(struct dccp_hdr_response);
return sizeof(struct dccp_hdr_reset);
}
static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
{
return dccp_basic_hdr_len(skb) +
dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type);
}
enum dccp_reset_codes {
DCCP_RESET_CODE_UNSPECIFIED = 0,
DCCP_RESET_CODE_CLOSED,
DCCP_RESET_CODE_ABORTED,
DCCP_RESET_CODE_NO_CONNECTION,
DCCP_RESET_CODE_PACKET_ERROR,
DCCP_RESET_CODE_OPTION_ERROR,
DCCP_RESET_CODE_MANDATORY_ERROR,
DCCP_RESET_CODE_CONNECTION_REFUSED,
DCCP_RESET_CODE_BAD_SERVICE_CODE,
DCCP_RESET_CODE_TOO_BUSY,
DCCP_RESET_CODE_BAD_INIT_COOKIE,
DCCP_RESET_CODE_AGGRESSION_PENALTY,
};
/* DCCP options */
enum {
DCCPO_PADDING = 0,
DCCPO_MANDATORY = 1,
DCCPO_MIN_RESERVED = 3,
DCCPO_MAX_RESERVED = 31,
DCCPO_NDP_COUNT = 37,
DCCPO_ACK_VECTOR_0 = 38,
DCCPO_ACK_VECTOR_1 = 39,
DCCPO_TIMESTAMP = 41,
DCCPO_TIMESTAMP_ECHO = 42,
DCCPO_ELAPSED_TIME = 43,
DCCPO_MAX = 45,
DCCPO_MIN_CCID_SPECIFIC = 128,
DCCPO_MAX_CCID_SPECIFIC = 255,
};
/* DCCP features */
enum {
DCCPF_RESERVED = 0,
DCCPF_SEQUENCE_WINDOW = 3,
DCCPF_SEND_ACK_VECTOR = 6,
DCCPF_SEND_NDP_COUNT = 7,
/* 10-127 reserved */
DCCPF_MIN_CCID_SPECIFIC = 128,
DCCPF_MAX_CCID_SPECIFIC = 255,
};
/* initial values for each feature */
#define DCCPF_INITIAL_SEQUENCE_WINDOW 100
/* FIXME: for now we're using CCID 3 (TFRC) */
#define DCCPF_INITIAL_CCID 3
#define DCCPF_INITIAL_SEND_ACK_VECTOR 0
/* FIXME: for now we're default to 1 but it should really be 0 */
#define DCCPF_INITIAL_SEND_NDP_COUNT 1
#define DCCP_NDP_LIMIT 0xFFFFFF
/**
* struct dccp_options - option values for a DCCP connection
* @dccpo_sequence_window - Sequence Window Feature (section 7.5.2)
* @dccpo_ccid - Congestion Control Id (CCID) (section 10)
* @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5)
* @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2)
*/
struct dccp_options {
__u64 dccpo_sequence_window;
__u8 dccpo_ccid;
__u8 dccpo_send_ack_vector;
__u8 dccpo_send_ndp_count;
};
extern void __dccp_options_init(struct dccp_options *dccpo);
extern void dccp_options_init(struct dccp_options *dccpo);
extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb);
struct dccp_request_sock {
struct inet_request_sock dreq_inet_rsk;
__u64 dreq_iss;
__u64 dreq_isr;
__u32 dreq_service;
};
static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
{
return (struct dccp_request_sock *)req;
}
/* Read about the ECN nonce to see why it is 253 */
#define DCCP_MAX_ACK_VECTOR_LEN 253
struct dccp_options_received {
u32 dccpor_ndp:24,
dccpor_ack_vector_len:8;
u32 dccpor_ack_vector_idx:10;
/* 22 bits hole, try to pack */
u32 dccpor_timestamp;
u32 dccpor_timestamp_echo;
u32 dccpor_elapsed_time;
};
struct ccid;
enum dccp_role {
DCCP_ROLE_UNDEFINED,
DCCP_ROLE_LISTEN,
DCCP_ROLE_CLIENT,
DCCP_ROLE_SERVER,
};
/**
* struct dccp_sock - DCCP socket state
*
* @dccps_swl - sequence number window low
* @dccps_swh - sequence number window high
* @dccps_awl - acknowledgement number window low
* @dccps_awh - acknowledgement number window high
* @dccps_iss - initial sequence number sent
* @dccps_isr - initial sequence number received
* @dccps_osr - first OPEN sequence number received
* @dccps_gss - greatest sequence number sent
* @dccps_gsr - greatest valid sequence number received
* @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
* @dccps_timestamp_time - time of latest TIMESTAMP option
* @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
* @dccps_ext_header_len - network protocol overhead (IP/IPv6 options)
* @dccps_pmtu_cookie - Last pmtu seen by socket
* @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it
* @dccps_role - Role of this sock, one of %dccp_role
* @dccps_ndp_count - number of Non Data Packets since last data packet
* @dccps_hc_rx_ackpkts - receiver half connection acked packets
*/
struct dccp_sock {
/* inet_connection_sock has to be the first member of dccp_sock */
struct inet_connection_sock dccps_inet_connection;
__u64 dccps_swl;
__u64 dccps_swh;
__u64 dccps_awl;
__u64 dccps_awh;
__u64 dccps_iss;
__u64 dccps_isr;
__u64 dccps_osr;
__u64 dccps_gss;
__u64 dccps_gsr;
__u64 dccps_gar;
unsigned long dccps_service;
unsigned long dccps_timestamp_time;
__u32 dccps_timestamp_echo;
__u32 dccps_avg_packet_size;
unsigned long dccps_ndp_count;
__u16 dccps_ext_header_len;
__u32 dccps_pmtu_cookie;
__u32 dccps_mss_cache;
struct dccp_options dccps_options;
struct dccp_ackpkts *dccps_hc_rx_ackpkts;
void *dccps_hc_rx_ccid_private;
void *dccps_hc_tx_ccid_private;
struct ccid *dccps_hc_rx_ccid;
struct ccid *dccps_hc_tx_ccid;
struct dccp_options_received dccps_options_received;
enum dccp_role dccps_role:2;
};
static inline struct dccp_sock *dccp_sk(const struct sock *sk)
{
return (struct dccp_sock *)sk;
}
static inline const char *dccp_role(const struct sock *sk)
{
switch (dccp_sk(sk)->dccps_role) {
case DCCP_ROLE_UNDEFINED: return "undefined";
case DCCP_ROLE_LISTEN: return "listen";
case DCCP_ROLE_SERVER: return "server";
case DCCP_ROLE_CLIENT: return "client";
}
return NULL;
}
#endif /* _LINUX_DCCP_H */
......@@ -32,6 +32,7 @@ enum {
IPPROTO_PUP = 12, /* PUP protocol */
IPPROTO_UDP = 17, /* User Datagram Protocol */
IPPROTO_IDP = 22, /* XNS IDP protocol */
IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
IPPROTO_RSVP = 46, /* RSVP protocol */
IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
......
......@@ -84,6 +84,7 @@ enum sock_type {
SOCK_RAW = 3,
SOCK_RDM = 4,
SOCK_SEQPACKET = 5,
SOCK_DCCP = 6,
SOCK_PACKET = 10,
};
......
......@@ -271,6 +271,7 @@ struct ucred {
#define SOL_IRDA 266
#define SOL_NETBEUI 267
#define SOL_LLC 268
#define SOL_DCCP 269
/* IPX options */
#define IPX_TYPE 1
......
......@@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig"
endif
source "net/dccp/Kconfig"
source "net/sctp/Kconfig"
source "net/atm/Kconfig"
source "net/bridge/Kconfig"
......
......@@ -42,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_ECONET) += econet/
obj-$(CONFIG_VLAN_8021Q) += 8021q/
obj-$(CONFIG_IP_DCCP) += dccp/
obj-$(CONFIG_IP_SCTP) += sctp/
ifeq ($(CONFIG_NET),y)
......
menu "DCCP Configuration (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
config IP_DCCP
tristate "The DCCP Protocol (EXPERIMENTAL)"
---help---
Datagram Congestion Control Protocol
From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
The Datagram Congestion Control Protocol (DCCP) is a transport
protocol that implements bidirectional, unicast connections of
congestion-controlled, unreliable datagrams. It should be suitable
for use by applications such as streaming media, Internet telephony,
and on-line games
To compile this protocol support as a module, choose M here: the
module will be called dccp.
If in doubt, say N.
source "net/dccp/ccids/Kconfig"
endmenu
obj-$(CONFIG_IP_DCCP) += dccp.o
dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o
obj-y += ccids/
/*
* net/dccp/ccid.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* CCID infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "ccid.h"
static struct ccid *ccids[CCID_MAX];
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
static atomic_t ccids_lockct = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(ccids_lock);
/*
* The strategy is: modifications ccids vector are short, do not sleep and
* veeery rare, but read access should be free of any exclusive locks.
*/
static void ccids_write_lock(void)
{
spin_lock(&ccids_lock);
while (atomic_read(&ccids_lockct) != 0) {
spin_unlock(&ccids_lock);
yield();
spin_lock(&ccids_lock);
}
}
static inline void ccids_write_unlock(void)
{
spin_unlock(&ccids_lock);
}
static inline void ccids_read_lock(void)
{
atomic_inc(&ccids_lockct);
spin_unlock_wait(&ccids_lock);
}
static inline void ccids_read_unlock(void)
{
atomic_dec(&ccids_lockct);
}
#else
#define ccids_write_lock() do { } while(0)
#define ccids_write_unlock() do { } while(0)
#define ccids_read_lock() do { } while(0)
#define ccids_read_unlock() do { } while(0)
#endif
int ccid_register(struct ccid *ccid)
{
int err;
if (ccid->ccid_init == NULL)
return -1;
ccids_write_lock();
err = -EEXIST;
if (ccids[ccid->ccid_id] == NULL) {
ccids[ccid->ccid_id] = ccid;
err = 0;
}
ccids_write_unlock();
if (err == 0)
pr_info("CCID: Registered CCID %d (%s)\n",
ccid->ccid_id, ccid->ccid_name);
return err;
}
EXPORT_SYMBOL_GPL(ccid_register);
int ccid_unregister(struct ccid *ccid)
{
ccids_write_lock();
ccids[ccid->ccid_id] = NULL;
ccids_write_unlock();
pr_info("CCID: Unregistered CCID %d (%s)\n",
ccid->ccid_id, ccid->ccid_name);
return 0;
}
EXPORT_SYMBOL_GPL(ccid_unregister);
struct ccid *ccid_init(unsigned char id, struct sock *sk)
{
struct ccid *ccid;
#ifdef CONFIG_KMOD
if (ccids[id] == NULL)
request_module("net-dccp-ccid-%d", id);
#endif
ccids_read_lock();
ccid = ccids[id];
if (ccid == NULL)
goto out;
if (!try_module_get(ccid->ccid_owner))
goto out_err;
if (ccid->ccid_init(sk) != 0)
goto out_module_put;
out:
ccids_read_unlock();
return ccid;
out_module_put:
module_put(ccid->ccid_owner);
out_err:
ccid = NULL;
goto out;
}
EXPORT_SYMBOL_GPL(ccid_init);
void ccid_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid == NULL)
return;
ccids_read_lock();
if (ccids[ccid->ccid_id] != NULL) {
if (ccid->ccid_exit != NULL)
ccid->ccid_exit(sk);
module_put(ccid->ccid_owner);
}
ccids_read_unlock();
}
EXPORT_SYMBOL_GPL(ccid_exit);
#ifndef _CCID_H
#define _CCID_H
/*
* net/dccp/ccid.h
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* CCID infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <net/sock.h>
#include <linux/dccp.h>
#include <linux/list.h>
#include <linux/module.h>
#define CCID_MAX 255
struct ccid {
unsigned char ccid_id;
const char *ccid_name;
struct module *ccid_owner;
int (*ccid_init)(struct sock *sk);
void (*ccid_exit)(struct sock *sk);
int (*ccid_hc_rx_init)(struct sock *sk);
int (*ccid_hc_tx_init)(struct sock *sk);
void (*ccid_hc_rx_exit)(struct sock *sk);
void (*ccid_hc_tx_exit)(struct sock *sk);
void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb);
int (*ccid_hc_rx_parse_options)(struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb);
void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb);
void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb);
int (*ccid_hc_tx_parse_options)(struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
int (*ccid_hc_tx_send_packet)(struct sock *sk,
struct sk_buff *skb, int len,
long *delay);
void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len);
};
extern int ccid_register(struct ccid *ccid);
extern int ccid_unregister(struct ccid *ccid);
extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
extern void ccid_exit(struct ccid *ccid, struct sock *sk);
static inline void __ccid_get(struct ccid *ccid)
{
__module_get(ccid->ccid_owner);
}
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb, int len,
long *delay)
{
int rc = 0;
if (ccid->ccid_hc_tx_send_packet != NULL)
rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay);
return rc;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
int more, int len)
{
if (ccid->ccid_hc_tx_packet_sent != NULL)
ccid->ccid_hc_tx_packet_sent(sk, more, len);
}
static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
{
int rc = 0;
if (ccid->ccid_hc_rx_init != NULL)
rc = ccid->ccid_hc_rx_init(sk);
return rc;
}
static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
int rc = 0;
if (ccid->ccid_hc_tx_init != NULL)
rc = ccid->ccid_hc_tx_init(sk);
return rc;
}
static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid->ccid_hc_rx_exit != NULL)
ccid->ccid_hc_rx_exit(sk);
}
static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid->ccid_hc_tx_exit != NULL)
ccid->ccid_hc_tx_exit(sk);
}
static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_rx_packet_recv != NULL)
ccid->ccid_hc_rx_packet_recv(sk, skb);
}
static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_tx_packet_recv != NULL)
ccid->ccid_hc_tx_packet_recv(sk, skb);
}
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value)
{
int rc = 0;
if (ccid->ccid_hc_tx_parse_options != NULL)
rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value);
return rc;
}
static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value)
{
int rc = 0;
if (ccid->ccid_hc_rx_parse_options != NULL)
rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
return rc;
}
static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_tx_insert_options != NULL)
ccid->ccid_hc_tx_insert_options(sk, skb);
}
static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_rx_insert_options != NULL)
ccid->ccid_hc_rx_insert_options(sk, skb);
}
#endif /* _CCID_H */
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
depends on IP_DCCP && EXPERIMENTAL
config IP_DCCP_CCID3
tristate "CCID3 (TFRC) (EXPERIMENTAL)"
depends on IP_DCCP
---help---
CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
be reasonably fair when competing for bandwidth with TCP-like flows,
where a flow is "reasonably fair" if its sending rate is generally
within a factor of two of the sending rate of a TCP flow under the
same conditions. However, TFRC has a much lower variation of
throughput over time compared with TCP, which makes CCID 3 more
suitable than CCID 2 for applications such streaming media where a
relatively smooth sending rate is of importance.
CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
congestion control algorithms were initially described in RFC 3448.
This text was extracted from draft-ietf-dccp-spec-11.txt.
If in doubt, say M.
endmenu
obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
dccp_ccid3-y := ccid3.o
/*
* net/dccp/ccids/ccid3.c
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
* or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* Changes to meet Linux coding standards, to make it meet latest ccid3 draft
* and to make it work as a loadable module in the DCCP stack written by
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
*
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "../ccid.h"
#include "../dccp.h"
#include "ccid3.h"
#ifdef CCID3_DEBUG
extern int ccid3_debug;
#define ccid3_pr_debug(format, a...) \
do { if (ccid3_debug) \
printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
} while (0)
#else
#define ccid3_pr_debug(format, a...)
#endif
#define TFRC_MIN_PACKET_SIZE 16
#define TFRC_STD_PACKET_SIZE 256
#define TFRC_MAX_PACKET_SIZE 65535
#define USEC_IN_SEC 1000000
#define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC)
/* two seconds as per CCID3 spec 11 */
#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ))
/* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */
#define TFRC_WIN_COUNT_PER_RTT 4
#define TFRC_WIN_COUNT_LIMIT 16
#define TFRC_MAX_BACK_OFF_TIME 64
/* above is in seconds */
#define TFRC_SMALLEST_P 40
#define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */
/* Number of later packets received before one is considered lost */
#define TFRC_RECV_NUM_LATE_LOSS 3
enum ccid3_options {
TFRC_OPT_LOSS_EVENT_RATE = 192,
TFRC_OPT_LOSS_INTERVALS = 193,
TFRC_OPT_RECEIVE_RATE = 194,
};
static int ccid3_debug;
static kmem_cache_t *ccid3_tx_hist_slab;
static kmem_cache_t *ccid3_rx_hist_slab;
static kmem_cache_t *ccid3_loss_interval_hist_slab;
static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio)
{
struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio);
if (entry != NULL)
entry->ccid3htx_sent = 0;
return entry;
}
static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry)
{
if (entry != NULL)
kmem_cache_free(ccid3_tx_hist_slab, entry);
}
static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk,
struct sk_buff *skb,
int prio)
{
struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio);
if (entry != NULL) {
const struct dccp_hdr *dh = dccp_hdr(skb);
entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
entry->ccid3hrx_win_count = dh->dccph_ccval;
entry->ccid3hrx_type = dh->dccph_type;
entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
do_gettimeofday(&(entry->ccid3hrx_tstamp));
}
return entry;
}
static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry)
{
if (entry != NULL)
kmem_cache_free(ccid3_rx_hist_slab, entry);
}
static void ccid3_rx_history_delete(struct list_head *hist)
{
struct ccid3_rx_hist_entry *entry, *next;
list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) {
list_del_init(&entry->ccid3hrx_node);
kmem_cache_free(ccid3_rx_hist_slab, entry);
}
}
static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio)
{
return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio);
}
static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry)
{
if (entry != NULL)
kmem_cache_free(ccid3_loss_interval_hist_slab, entry);
}
static void ccid3_loss_interval_history_delete(struct list_head *hist)
{
struct ccid3_loss_interval_hist_entry *entry, *next;
list_for_each_entry_safe(entry, next, hist, ccid3lih_node) {
list_del_init(&entry->ccid3lih_node);
kmem_cache_free(ccid3_loss_interval_hist_slab, entry);
}
}
static int ccid3_init(struct sock *sk)
{
ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
return 0;
}
static void ccid3_exit(struct sock *sk)
{
ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
}
/* TFRC sender states */
enum ccid3_hc_tx_states {
TFRC_SSTATE_NO_SENT = 1,
TFRC_SSTATE_NO_FBACK,
TFRC_SSTATE_FBACK,
TFRC_SSTATE_TERM,
};
#ifdef CCID3_DEBUG
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
static char *ccid3_state_names[] = {
[TFRC_SSTATE_NO_SENT] = "NO_SENT",
[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
[TFRC_SSTATE_FBACK] = "FBACK",
[TFRC_SSTATE_TERM] = "TERM",
};
return ccid3_state_names[state];
}
#endif
static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state));
WARN_ON(state == oldstate);
hctx->ccid3hctx_state = state;
}
static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) {
result->tv_sec = large.tv_sec-small.tv_sec;
if (large.tv_usec < small.tv_usec) {
(result->tv_sec)--;
result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec;
} else
result->tv_usec = large.tv_usec-small.tv_usec;
}
static inline void timeval_fix(struct timeval *tv) {
if (tv->tv_usec >= USEC_IN_SEC) {
tv->tv_sec++;
tv->tv_usec -= USEC_IN_SEC;
}
}
/* returns the difference in usecs between timeval passed in and current time */
static inline u32 now_delta(struct timeval tv) {
struct timeval now;
do_gettimeofday(&now);
return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec);
}
#define CALCX_ARRSIZE 500
#define CALCX_SPLIT 50000
/* equivalent to 0.05 */
static const u32 calcx_lookup[CALCX_ARRSIZE][2] = {
{ 37172 , 8172 },
{ 53499 , 11567 },
{ 66664 , 14180 },
{ 78298 , 16388 },
{ 89021 , 18339 },
{ 99147 , 20108 },
{ 108858 , 21738 },
{ 118273 , 23260 },
{ 127474 , 24693 },
{ 136520 , 26052 },
{ 145456 , 27348 },
{ 154316 , 28589 },
{ 163130 , 29783 },
{ 171919 , 30935 },
{ 180704 , 32049 },
{ 189502 , 33130 },
{ 198328 , 34180 },
{ 207194 , 35202 },
{ 216114 , 36198 },
{ 225097 , 37172 },
{ 234153 , 38123 },
{ 243294 , 39055 },
{ 252527 , 39968 },
{ 261861 , 40864 },
{ 271305 , 41743 },
{ 280866 , 42607 },
{ 290553 , 43457 },
{ 300372 , 44293 },
{ 310333 , 45117 },
{ 320441 , 45929 },
{ 330705 , 46729 },
{ 341131 , 47518 },
{ 351728 , 48297 },
{ 362501 , 49066 },
{ 373460 , 49826 },
{ 384609 , 50577 },
{ 395958 , 51320 },
{ 407513 , 52054 },
{ 419281 , 52780 },
{ 431270 , 53499 },
{ 443487 , 54211 },
{ 455940 , 54916 },
{ 468635 , 55614 },
{ 481581 , 56306 },
{ 494785 , 56991 },
{ 508254 , 57671 },
{ 521996 , 58345 },
{ 536019 , 59014 },
{ 550331 , 59677 },
{ 564939 , 60335 },
{ 579851 , 60988 },
{ 595075 , 61636 },
{ 610619 , 62279 },
{ 626491 , 62918 },
{ 642700 , 63553 },
{ 659253 , 64183 },
{ 676158 , 64809 },
{ 693424 , 65431 },
{ 711060 , 66050 },
{ 729073 , 66664 },
{ 747472 , 67275 },
{ 766266 , 67882 },
{ 785464 , 68486 },
{ 805073 , 69087 },
{ 825103 , 69684 },
{ 845562 , 70278 },
{ 866460 , 70868 },
{ 887805 , 71456 },
{ 909606 , 72041 },
{ 931873 , 72623 },
{ 954614 , 73202 },
{ 977839 , 73778 },
{ 1001557 , 74352 },
{ 1025777 , 74923 },
{ 1050508 , 75492 },
{ 1075761 , 76058 },
{ 1101544 , 76621 },
{ 1127867 , 77183 },
{ 1154739 , 77741 },
{ 1182172 , 78298 },
{ 1210173 , 78852 },
{ 1238753 , 79405 },
{ 1267922 , 79955 },
{ 1297689 , 80503 },
{ 1328066 , 81049 },
{ 1359060 , 81593 },
{ 1390684 , 82135 },
{ 1422947 , 82675 },
{ 1455859 , 83213 },
{ 1489430 , 83750 },
{ 1523671 , 84284 },
{ 1558593 , 84817 },
{ 1594205 , 85348 },
{ 1630518 , 85878 },
{ 1667543 , 86406 },
{ 1705290 , 86932 },
{ 1743770 , 87457 },
{ 1782994 , 87980 },
{ 1822973 , 88501 },
{ 1863717 , 89021 },
{ 1905237 , 89540 },
{ 1947545 , 90057 },
{ 1990650 , 90573 },
{ 2034566 , 91087 },
{ 2079301 , 91600 },
{ 2124869 , 92111 },
{ 2171279 , 92622 },
{ 2218543 , 93131 },
{ 2266673 , 93639 },
{ 2315680 , 94145 },
{ 2365575 , 94650 },
{ 2416371 , 95154 },
{ 2468077 , 95657 },
{ 2520707 , 96159 },
{ 2574271 , 96660 },
{ 2628782 , 97159 },
{ 2684250 , 97658 },
{ 2740689 , 98155 },
{ 2798110 , 98651 },
{ 2856524 , 99147 },
{ 2915944 , 99641 },
{ 2976382 , 100134 },
{ 3037850 , 100626 },
{ 3100360 , 101117 },
{ 3163924 , 101608 },
{ 3228554 , 102097 },
{ 3294263 , 102586 },
{ 3361063 , 103073 },
{ 3428966 , 103560 },
{ 3497984 , 104045 },
{ 3568131 , 104530 },
{ 3639419 , 105014 },
{ 3711860 , 105498 },
{ 3785467 , 105980 },
{ 3860253 , 106462 },
{ 3936229 , 106942 },
{ 4013410 , 107422 },
{ 4091808 , 107902 },
{ 4171435 , 108380 },
{ 4252306 , 108858 },
{ 4334431 , 109335 },
{ 4417825 , 109811 },
{ 4502501 , 110287 },
{ 4588472 , 110762 },
{ 4675750 , 111236 },
{ 4764349 , 111709 },
{ 4854283 , 112182 },
{ 4945564 , 112654 },
{ 5038206 , 113126 },
{ 5132223 , 113597 },
{ 5227627 , 114067 },
{ 5324432 , 114537 },
{ 5422652 , 115006 },
{ 5522299 , 115474 },
{ 5623389 , 115942 },
{ 5725934 , 116409 },
{ 5829948 , 116876 },
{ 5935446 , 117342 },
{ 6042439 , 117808 },
{ 6150943 , 118273 },
{ 6260972 , 118738 },
{ 6372538 , 119202 },
{ 6485657 , 119665 },
{ 6600342 , 120128 },
{ 6716607 , 120591 },
{ 6834467 , 121053 },
{ 6953935 , 121514 },
{ 7075025 , 121976 },
{ 7197752 , 122436 },
{ 7322131 , 122896 },
{ 7448175 , 123356 },
{ 7575898 , 123815 },
{ 7705316 , 124274 },
{ 7836442 , 124733 },
{ 7969291 , 125191 },
{ 8103877 , 125648 },
{ 8240216 , 126105 },
{ 8378321 , 126562 },
{ 8518208 , 127018 },
{ 8659890 , 127474 },
{ 8803384 , 127930 },
{ 8948702 , 128385 },
{ 9095861 , 128840 },
{ 9244875 , 129294 },
{ 9395760 , 129748 },
{ 9548529 , 130202 },
{ 9703198 , 130655 },
{ 9859782 , 131108 },
{ 10018296 , 131561 },
{ 10178755 , 132014 },
{ 10341174 , 132466 },
{ 10505569 , 132917 },
{ 10671954 , 133369 },
{ 10840345 , 133820 },
{ 11010757 , 134271 },
{ 11183206 , 134721 },
{ 11357706 , 135171 },
{ 11534274 , 135621 },
{ 11712924 , 136071 },
{ 11893673 , 136520 },
{ 12076536 , 136969 },
{ 12261527 , 137418 },
{ 12448664 , 137867 },
{ 12637961 , 138315 },
{ 12829435 , 138763 },
{ 13023101 , 139211 },
{ 13218974 , 139658 },
{ 13417071 , 140106 },
{ 13617407 , 140553 },
{ 13819999 , 140999 },
{ 14024862 , 141446 },
{ 14232012 , 141892 },
{ 14441465 , 142339 },
{ 14653238 , 142785 },
{ 14867346 , 143230 },
{ 15083805 , 143676 },
{ 15302632 , 144121 },
{ 15523842 , 144566 },
{ 15747453 , 145011 },
{ 15973479 , 145456 },
{ 16201939 , 145900 },
{ 16432847 , 146345 },
{ 16666221 , 146789 },
{ 16902076 , 147233 },
{ 17140429 , 147677 },
{ 17381297 , 148121 },
{ 17624696 , 148564 },
{ 17870643 , 149007 },
{ 18119154 , 149451 },
{ 18370247 , 149894 },
{ 18623936 , 150336 },
{ 18880241 , 150779 },
{ 19139176 , 151222 },
{ 19400759 , 151664 },
{ 19665007 , 152107 },
{ 19931936 , 152549 },
{ 20201564 , 152991 },
{ 20473907 , 153433 },
{ 20748982 , 153875 },
{ 21026807 , 154316 },
{ 21307399 , 154758 },
{ 21590773 , 155199 },
{ 21876949 , 155641 },
{ 22165941 , 156082 },
{ 22457769 , 156523 },
{ 22752449 , 156964 },
{ 23049999 , 157405 },
{ 23350435 , 157846 },
{ 23653774 , 158287 },
{ 23960036 , 158727 },
{ 24269236 , 159168 },
{ 24581392 , 159608 },
{ 24896521 , 160049 },
{ 25214642 , 160489 },
{ 25535772 , 160929 },
{ 25859927 , 161370 },
{ 26187127 , 161810 },
{ 26517388 , 162250 },
{ 26850728 , 162690 },
{ 27187165 , 163130 },
{ 27526716 , 163569 },
{ 27869400 , 164009 },
{ 28215234 , 164449 },
{ 28564236 , 164889 },
{ 28916423 , 165328 },
{ 29271815 , 165768 },
{ 29630428 , 166208 },
{ 29992281 , 166647 },
{ 30357392 , 167087 },
{ 30725779 , 167526 },
{ 31097459 , 167965 },
{ 31472452 , 168405 },
{ 31850774 , 168844 },
{ 32232445 , 169283 },
{ 32617482 , 169723 },
{ 33005904 , 170162 },
{ 33397730 , 170601 },
{ 33792976 , 171041 },
{ 34191663 , 171480 },
{ 34593807 , 171919 },
{ 34999428 , 172358 },
{ 35408544 , 172797 },
{ 35821174 , 173237 },
{ 36237335 , 173676 },
{ 36657047 , 174115 },
{ 37080329 , 174554 },
{ 37507197 , 174993 },
{ 37937673 , 175433 },
{ 38371773 , 175872 },
{ 38809517 , 176311 },
{ 39250924 , 176750 },
{ 39696012 , 177190 },
{ 40144800 , 177629 },
{ 40597308 , 178068 },
{ 41053553 , 178507 },
{ 41513554 , 178947 },
{ 41977332 , 179386 },
{ 42444904 , 179825 },
{ 42916290 , 180265 },
{ 43391509 , 180704 },
{ 43870579 , 181144 },
{ 44353520 , 181583 },
{ 44840352 , 182023 },
{ 45331092 , 182462 },
{ 45825761 , 182902 },
{ 46324378 , 183342 },
{ 46826961 , 183781 },
{ 47333531 , 184221 },
{ 47844106 , 184661 },
{ 48358706 , 185101 },
{ 48877350 , 185541 },
{ 49400058 , 185981 },
{ 49926849 , 186421 },
{ 50457743 , 186861 },
{ 50992759 , 187301 },
{ 51531916 , 187741 },
{ 52075235 , 188181 },
{ 52622735 , 188622 },
{ 53174435 , 189062 },
{ 53730355 , 189502 },
{ 54290515 , 189943 },
{ 54854935 , 190383 },
{ 55423634 , 190824 },
{ 55996633 , 191265 },
{ 56573950 , 191706 },
{ 57155606 , 192146 },
{ 57741621 , 192587 },
{ 58332014 , 193028 },
{ 58926806 , 193470 },
{ 59526017 , 193911 },
{ 60129666 , 194352 },
{ 60737774 , 194793 },
{ 61350361 , 195235 },
{ 61967446 , 195677 },
{ 62589050 , 196118 },
{ 63215194 , 196560 },
{ 63845897 , 197002 },
{ 64481179 , 197444 },
{ 65121061 , 197886 },
{ 65765563 , 198328 },
{ 66414705 , 198770 },
{ 67068508 , 199213 },
{ 67726992 , 199655 },
{ 68390177 , 200098 },
{ 69058085 , 200540 },
{ 69730735 , 200983 },
{ 70408147 , 201426 },
{ 71090343 , 201869 },
{ 71777343 , 202312 },
{ 72469168 , 202755 },
{ 73165837 , 203199 },
{ 73867373 , 203642 },
{ 74573795 , 204086 },
{ 75285124 , 204529 },
{ 76001380 , 204973 },
{ 76722586 , 205417 },
{ 77448761 , 205861 },
{ 78179926 , 206306 },
{ 78916102 , 206750 },
{ 79657310 , 207194 },
{ 80403571 , 207639 },
{ 81154906 , 208084 },
{ 81911335 , 208529 },
{ 82672880 , 208974 },
{ 83439562 , 209419 },
{ 84211402 , 209864 },
{ 84988421 , 210309 },
{ 85770640 , 210755 },
{ 86558080 , 211201 },
{ 87350762 , 211647 },
{ 88148708 , 212093 },
{ 88951938 , 212539 },
{ 89760475 , 212985 },
{ 90574339 , 213432 },
{ 91393551 , 213878 },
{ 92218133 , 214325 },
{ 93048107 , 214772 },
{ 93883493 , 215219 },
{ 94724314 , 215666 },
{ 95570590 , 216114 },
{ 96422343 , 216561 },
{ 97279594 , 217009 },
{ 98142366 , 217457 },
{ 99010679 , 217905 },
{ 99884556 , 218353 },
{ 100764018 , 218801 },
{ 101649086 , 219250 },
{ 102539782 , 219698 },
{ 103436128 , 220147 },
{ 104338146 , 220596 },
{ 105245857 , 221046 },
{ 106159284 , 221495 },
{ 107078448 , 221945 },
{ 108003370 , 222394 },
{ 108934074 , 222844 },
{ 109870580 , 223294 },
{ 110812910 , 223745 },
{ 111761087 , 224195 },
{ 112715133 , 224646 },
{ 113675069 , 225097 },
{ 114640918 , 225548 },
{ 115612702 , 225999 },
{ 116590442 , 226450 },
{ 117574162 , 226902 },
{ 118563882 , 227353 },
{ 119559626 , 227805 },
{ 120561415 , 228258 },
{ 121569272 , 228710 },
{ 122583219 , 229162 },
{ 123603278 , 229615 },
{ 124629471 , 230068 },
{ 125661822 , 230521 },
{ 126700352 , 230974 },
{ 127745083 , 231428 },
{ 128796039 , 231882 },
{ 129853241 , 232336 },
{ 130916713 , 232790 },
{ 131986475 , 233244 },
{ 133062553 , 233699 },
{ 134144966 , 234153 },
{ 135233739 , 234608 },
{ 136328894 , 235064 },
{ 137430453 , 235519 },
{ 138538440 , 235975 },
{ 139652876 , 236430 },
{ 140773786 , 236886 },
{ 141901190 , 237343 },
{ 143035113 , 237799 },
{ 144175576 , 238256 },
{ 145322604 , 238713 },
{ 146476218 , 239170 },
{ 147636442 , 239627 },
{ 148803298 , 240085 },
{ 149976809 , 240542 },
{ 151156999 , 241000 },
{ 152343890 , 241459 },
{ 153537506 , 241917 },
{ 154737869 , 242376 },
{ 155945002 , 242835 },
{ 157158929 , 243294 },
{ 158379673 , 243753 },
{ 159607257 , 244213 },
{ 160841704 , 244673 },
{ 162083037 , 245133 },
{ 163331279 , 245593 },
{ 164586455 , 246054 },
{ 165848586 , 246514 },
{ 167117696 , 246975 },
{ 168393810 , 247437 },
{ 169676949 , 247898 },
{ 170967138 , 248360 },
{ 172264399 , 248822 },
{ 173568757 , 249284 },
{ 174880235 , 249747 },
{ 176198856 , 250209 },
{ 177524643 , 250672 },
{ 178857621 , 251136 },
{ 180197813 , 251599 },
{ 181545242 , 252063 },
{ 182899933 , 252527 },
{ 184261908 , 252991 },
{ 185631191 , 253456 },
{ 187007807 , 253920 },
{ 188391778 , 254385 },
{ 189783129 , 254851 },
{ 191181884 , 255316 },
{ 192588065 , 255782 },
{ 194001698 , 256248 },
{ 195422805 , 256714 },
{ 196851411 , 257181 },
{ 198287540 , 257648 },
{ 199731215 , 258115 },
{ 201182461 , 258582 },
{ 202641302 , 259050 },
{ 204107760 , 259518 },
{ 205581862 , 259986 },
{ 207063630 , 260454 },
{ 208553088 , 260923 },
{ 210050262 , 261392 },
{ 211555174 , 261861 },
{ 213067849 , 262331 },
{ 214588312 , 262800 },
{ 216116586 , 263270 },
{ 217652696 , 263741 },
{ 219196666 , 264211 },
{ 220748520 , 264682 },
{ 222308282 , 265153 },
{ 223875978 , 265625 },
{ 225451630 , 266097 },
{ 227035265 , 266569 },
{ 228626905 , 267041 },
{ 230226576 , 267514 },
{ 231834302 , 267986 },
{ 233450107 , 268460 },
{ 235074016 , 268933 },
{ 236706054 , 269407 },
{ 238346244 , 269881 },
{ 239994613 , 270355 },
{ 241651183 , 270830 },
{ 243315981 , 271305 }
};
/* Calculate the send rate as per section 3.1 of RFC3448
Returns send rate in bytes per second
Integer maths and lookups are used as not allowed floating point in kernel
The function for Xcalc as per section 3.1 of RFC3448 is:
X = s
-------------------------------------------------------------
R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
where
X is the trasmit rate in bytes/second
s is the packet size in bytes
R is the round trip time in seconds
p is the loss event rate, between 0 and 1.0, of the number of loss events
as a fraction of the number of packets transmitted
t_RTO is the TCP retransmission timeout value in seconds
b is the number of packets acknowledged by a single TCP acknowledgement
we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
X = s
-----------------------------------------------------------------------
R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
which we can break down into:
X = s
--------
R * f(p)
where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
Function parameters:
s - bytes
R - RTT in usecs
p - loss rate (decimal fraction multiplied by 1,000,000)
Returns Xcalc in bytes per second
DON'T alter this code unless you run test cases against it as the code
has been manipulated to stop underflow/overlow.
*/
static u32 ccid3_calc_x(u16 s, u32 R, u32 p)
{
int index;
u32 f;
u64 tmp1, tmp2;
if (p < CALCX_SPLIT)
index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1;
else
index = (p / (1000000 / CALCX_ARRSIZE)) - 1;
if (index < 0)
/* p should be 0 unless there is a bug in my code */
index = 0;
if (R == 0)
R = 1; /* RTT can't be zero or else divide by zero */
BUG_ON(index >= CALCX_ARRSIZE);
if (p >= CALCX_SPLIT)
f = calcx_lookup[index][0];
else
f = calcx_lookup[index][1];
tmp1 = ((u64)s * 100000000);
tmp2 = ((u64)R * (u64)f);
do_div(tmp2,10000);
do_div(tmp1,tmp2);
/* don't alter above math unless you test due to overflow on 32 bit */
return (u32)tmp1;
}
/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
{
if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK)
return;
/* if no feedback spec says t_ipi is 1 second (set elsewhere and then
* doubles after every no feedback timer (separate function) */
if (hctx->ccid3hctx_x < 10) {
ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n");
hctx->ccid3hctx_x = 10;
}
hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000)
/ (hctx->ccid3hctx_x / 10);
/* reason for above maths with 10 in there is to avoid 32 bit
* overflow for jumbo packets */
}
/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
{
hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN);
}
/*
* Update X by
* If (p > 0)
* x_calc = calcX(s, R, p);
* X = max(min(X_calc, 2 * X_recv), s / t_mbi);
* Else
* If (now - tld >= R)
* X = max(min(2 * X, 2 * X_recv), s / R);
* tld = now;
*/
static void ccid3_hc_tx_update_x(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */
hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s,
hctx->ccid3hctx_rtt,
hctx->ccid3hctx_p);
hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv),
hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME);
} else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) {
u32 rtt = hctx->ccid3hctx_rtt;
if (rtt < 10) {
rtt = 10;
} /* avoid divide by zero below */
hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x),
(hctx->ccid3hctx_s * 100000) / (rtt / 10));
/* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */
do_gettimeofday(&hctx->ccid3hctx_t_ld);
}
if (hctx->ccid3hctx_x == 0) {
ccid3_pr_debug("ccid3hctx_x = 0!\n");
hctx->ccid3hctx_x = 1;
}
}
static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct dccp_sock *dp = dccp_sk(sk);
unsigned long next_tmout = 0;
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
u32 rtt;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
/* XXX: set some sensible MIB */
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5);
goto out;
}
ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state));
if (hctx->ccid3hctx_x < 10) {
ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n");
hctx->ccid3hctx_x = 10;
}
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_TERM:
goto out;
case TFRC_SSTATE_NO_FBACK:
/* Halve send rate */
hctx->ccid3hctx_x /= 2;
if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME))
hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME;
ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n",
dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state),
hctx->ccid3hctx_x);
next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000)
/ (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT);
/* do above maths with 100000 and 10 to prevent overflow on 32 bit */
/* FIXME - not sure above calculation is correct. See section 5 of CCID3 11
* should adjust tx_t_ipi and double that to achieve it really */
break;
case TFRC_SSTATE_FBACK:
/* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */
rtt = hctx->ccid3hctx_rtt;
if (rtt < 10)
rtt = 10;
/* stop divide by zero below */
if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >=
4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) {
ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state));
/* Halve sending rate */
/* If (X_calc > 2 * X_recv)
* X_recv = max(X_recv / 2, s / (2 * t_mbi));
* Else
* X_recv = X_calc / 4;
*/
BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0);
/* check also if p is zero -> x_calc is infinity? */
if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
else
hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
/* Update sending rate */
ccid3_hc_tx_update_x(sk);
}
if (hctx->ccid3hctx_x == 0) {
ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n");
hctx->ccid3hctx_x = 10;
}
/* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */
next_tmout = max_t(u32, inet_csk(sk)->icsk_rto,
2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10));
break;
default:
printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
__FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
dump_stack();
goto out;
}
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
hctx->ccid3hctx_idle = 1;
out:
bh_unlock_sock(sk);
sock_put(sk);
}
static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb,
int len, long *delay)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
struct ccid3_tx_hist_entry *new_packet = NULL;
struct timeval now;
int rc = -ENOTCONN;
// ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len);
/*
* check if pure ACK or Terminating */
/* XXX: We only call this function for DATA and DATAACK, on, these packets can have
* zero length, but why the comment about "pure ACK"?
*/
if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM)
goto out;
/* See if last packet allocated was not sent */
if (!list_empty(&hctx->ccid3hctx_hist))
new_packet = list_entry(hctx->ccid3hctx_hist.next,
struct ccid3_tx_hist_entry, ccid3htx_node);
if (new_packet == NULL || new_packet->ccid3htx_sent) {
new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC);
rc = -ENOBUFS;
if (new_packet == NULL) {
ccid3_pr_debug("%s, sk=%p, not enough mem to add "
"to history, send refused\n", dccp_role(sk), sk);
goto out;
}
list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist);
}
do_gettimeofday(&now);
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_NO_SENT:
ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk,
dp->dccps_gss);
hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
hctx->ccid3hctx_last_win_count = 0;
hctx->ccid3hctx_t_last_win_count = now;
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT;
/* Set nominal send time for initial packet */
hctx->ccid3hctx_t_nom = now;
(hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
timeval_fix(&(hctx->ccid3hctx_t_nom));
ccid3_calc_new_delta(hctx);
rc = 0;
break;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
*delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta);
ccid3_pr_debug("send_packet delay=%ld\n",*delay);
*delay /= -1000;
/* divide by -1000 is to convert to ms and get sign right */
rc = *delay > 0 ? -EAGAIN : 0;
break;
default:
printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
__FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
dump_stack();
rc = -EINVAL;
break;
}
/* Can we send? if so add options and add to packet history */
if (rc == 0)
new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
out:
return rc;
}
static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
struct ccid3_tx_hist_entry *packet = NULL;
struct timeval now;
// ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len);
BUG_ON(hctx == NULL);
if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n",
dccp_role(sk), sk);
return;
}
do_gettimeofday(&now);
/* check if we have sent a data packet */
if (len > 0) {
unsigned long quarter_rtt;
if (list_empty(&hctx->ccid3hctx_hist)) {
printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__);
return;
}
packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node);
if (packet->ccid3htx_sent) {
printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__);
return;
}
packet->ccid3htx_tstamp = now;
packet->ccid3htx_seqno = dp->dccps_gss;
// ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno);
/*
* Check if win_count have changed */
/* COMPLIANCE_BEGIN
* Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt
*/
quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4);
if (quarter_rtt > 0) {
hctx->ccid3hctx_t_last_win_count = now;
hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count +
min_t(unsigned long, quarter_rtt, 5)) % 16;
ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n",
dccp_role(sk), sk,
packet->ccid3htx_win_count,
hctx->ccid3hctx_last_win_count);
}
/* COMPLIANCE_END */
#if 0
ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n",
dccp_role(sk), sk,
packet->ccid3htx_seqno,
packet->ccid3htx_win_count);
#endif
hctx->ccid3hctx_idle = 0;
packet->ccid3htx_sent = 1;
} else
ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
dccp_role(sk), sk, dp->dccps_gss);
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_NO_SENT:
/* if first wasn't pure ack */
if (len != 0)
printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n",
__FUNCTION__, dccp_role(sk));
return;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
if (len > 0) {
hctx->ccid3hctx_t_nom = now;
ccid3_calc_new_t_ipi(hctx);
ccid3_calc_new_delta(hctx);
(hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
timeval_fix(&(hctx->ccid3hctx_t_nom));
}
break;
default:
printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
__FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
dump_stack();
break;
}
}
static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
struct ccid3_options_received *opt_recv;
struct ccid3_tx_hist_entry *entry, *next, *packet;
unsigned long next_tmout;
u16 t_elapsed;
u32 pinv;
u32 x_recv;
u32 r_sample;
#if 0
ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n",
dccp_role(sk), sk, dccp_state_name(sk->sk_state),
skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
#endif
if (hctx == NULL)
return;
if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk);
return;
}
/* we are only interested in ACKs */
if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
return;
opt_recv = &hctx->ccid3hctx_options_received;
t_elapsed = dp->dccps_options_received.dccpor_elapsed_time;
x_recv = opt_recv->ccid3or_receive_rate;
pinv = opt_recv->ccid3or_loss_event_rate;
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_NO_SENT:
/* FIXME: what to do here? */
return;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
/* Calculate new round trip sample by
* R_sample = (now - t_recvdata) - t_delay */
/* get t_recvdata from history */
packet = NULL;
list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node)
if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) {
packet = entry;
break;
}
if (packet == NULL) {
ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n",
dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq,
dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
return;
}
/* Update RTT */
r_sample = now_delta(packet->ccid3htx_tstamp);
/* FIXME: */
// r_sample -= usecs_to_jiffies(t_elapsed * 10);
/* Update RTT estimate by
* If (No feedback recv)
* R = R_sample;
* Else
* R = q * R + (1 - q) * R_sample;
*
* q is a constant, RFC 3448 recomments 0.9
*/
if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
hctx->ccid3hctx_rtt = r_sample;
} else
hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10;
/*
* XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent
* implemention of the new window count.
*/
if (hctx->ccid3hctx_rtt < 4)
hctx->ccid3hctx_rtt = 4;
ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n",
dccp_role(sk), sk,
hctx->ccid3hctx_rtt,
r_sample);
/* Update timeout interval */
inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC);
/* Update receive rate */
hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */
/* Update loss event rate */
if (pinv == ~0 || pinv == 0)
hctx->ccid3hctx_p = 0;
else {
hctx->ccid3hctx_p = 1000000 / pinv;
if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
hctx->ccid3hctx_p = TFRC_SMALLEST_P;
ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk);
}
}
/* unschedule no feedback timer */
sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
/* Update sending rate */
ccid3_hc_tx_update_x(sk);
/* Update next send time */
if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) {
(hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC;
(hctx->ccid3hctx_t_nom).tv_sec--;
}
/* FIXME - if no feedback then t_ipi can go > 1 second */
(hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi;
ccid3_calc_new_t_ipi(hctx);
(hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
timeval_fix(&(hctx->ccid3hctx_t_nom));
ccid3_calc_new_delta(hctx);
/* remove all packets older than the one acked from history */
#if 0
FIXME!
list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) {
list_del_init(&entry->ccid3htx_node);
ccid3_tx_hist_entry_delete(entry);
}
#endif
if (hctx->ccid3hctx_x < 10) {
ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n");
hctx->ccid3hctx_x = 10;
}
/* to prevent divide by zero below */
/* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */
next_tmout = max(inet_csk(sk)->icsk_rto,
2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10));
/* maths with 100000 and 10 is to prevent overflow with 32 bit */
ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n",
dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout);
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout)));
/* set idle flag */
hctx->ccid3hctx_idle = 1;
break;
default:
printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
__FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
dump_stack();
break;
}
}
static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
return;
DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
}
static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
unsigned char len, u16 idx, unsigned char *value)
{
int rc = 0;
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
struct ccid3_options_received *opt_recv;
if (hctx == NULL)
return 0;
opt_recv = &hctx->ccid3hctx_options_received;
if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
opt_recv->ccid3or_seqno = dp->dccps_gsr;
opt_recv->ccid3or_loss_event_rate = ~0;
opt_recv->ccid3or_loss_intervals_idx = 0;
opt_recv->ccid3or_loss_intervals_len = 0;
opt_recv->ccid3or_receive_rate = 0;
}
switch (option) {
case TFRC_OPT_LOSS_EVENT_RATE:
if (len != 4) {
ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n",
dccp_role(sk), sk);
rc = -EINVAL;
} else {
opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
dccp_role(sk), sk,
opt_recv->ccid3or_loss_event_rate);
}
break;
case TFRC_OPT_LOSS_INTERVALS:
opt_recv->ccid3or_loss_intervals_idx = idx;
opt_recv->ccid3or_loss_intervals_len = len;
ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
dccp_role(sk), sk,
opt_recv->ccid3or_loss_intervals_idx,
opt_recv->ccid3or_loss_intervals_len);
break;
case TFRC_OPT_RECEIVE_RATE:
if (len != 4) {
ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n",
dccp_role(sk), sk);
rc = -EINVAL;
} else {
opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
dccp_role(sk), sk,
opt_recv->ccid3or_receive_rate);
}
break;
}
return rc;
}
static int ccid3_hc_tx_init(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx;
ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any());
if (hctx == NULL)
return -ENOMEM;
memset(hctx, 0, sizeof(*hctx));
if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE &&
dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE)
hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size;
else
hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */
hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */
inet_csk(sk)->icsk_rto = USEC_IN_SEC;
hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
init_timer(&hctx->ccid3hctx_no_feedback_timer);
return 0;
}
static void ccid3_hc_tx_exit(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
struct ccid3_tx_hist_entry *entry, *next;
ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
BUG_ON(hctx == NULL);
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
/* Empty packet history */
list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) {
list_del_init(&entry->ccid3htx_node);
ccid3_tx_hist_entry_delete(entry);
}
kfree(dp->dccps_hc_tx_ccid_private);
dp->dccps_hc_tx_ccid_private = NULL;
}
/*
* RX Half Connection methods
*/
/* TFRC receiver states */
enum ccid3_hc_rx_states {
TFRC_RSTATE_NO_DATA = 1,
TFRC_RSTATE_DATA,
TFRC_RSTATE_TERM = 127,
};
#ifdef CCID3_DEBUG
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
static char *ccid3_rx_state_names[] = {
[TFRC_RSTATE_NO_DATA] = "NO_DATA",
[TFRC_RSTATE_DATA] = "DATA",
[TFRC_RSTATE_TERM] = "TERM",
};
return ccid3_rx_state_names[state];
}
#endif
static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state));
WARN_ON(state == oldstate);
hcrx->ccid3hcrx_state = state;
}
static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
struct ccid3_rx_hist_entry *entry, *next;
u8 num_later = 0;
if (list_empty(&hcrx->ccid3hcrx_hist))
list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
else {
u64 seqno = packet->ccid3hrx_seqno;
struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next,
struct ccid3_rx_hist_entry,
ccid3hrx_node);
if (after48(seqno, iter->ccid3hrx_seqno))
list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
else {
if (iter->ccid3hrx_type == DCCP_PKT_DATA ||
iter->ccid3hrx_type == DCCP_PKT_DATAACK)
num_later = 1;
list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
if (after48(seqno, iter->ccid3hrx_seqno)) {
list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node);
goto trim_history;
}
if (iter->ccid3hrx_type == DCCP_PKT_DATA ||
iter->ccid3hrx_type == DCCP_PKT_DATAACK)
num_later++;
if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
ccid3_rx_hist_entry_delete(packet);
ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n",
dccp_role(sk), sk, seqno);
return 1;
}
}
if (num_later < TFRC_RECV_NUM_LATE_LOSS)
list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
/* FIXME: else what? should we destroy the packet like above? */
}
}
trim_history:
/* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */
num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
if (num_later == 0) {
list_del_init(&entry->ccid3hrx_node);
ccid3_rx_hist_entry_delete(entry);
} else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
entry->ccid3hrx_type == DCCP_PKT_DATAACK)
--num_later;
}
} else {
int step = 0;
u8 win_count = 0; /* Not needed, but lets shut up gcc */
int tmp;
/*
* We have no loss interval history so we need at least one
* rtt:s of data packets to approximate rtt.
*/
list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
if (num_later == 0) {
switch (step) {
case 0:
step = 1;
/* OK, find next data packet */
num_later = 1;
break;
case 1:
step = 2;
/* OK, find next data packet */
num_later = 1;
win_count = entry->ccid3hrx_win_count;
break;
case 2:
tmp = win_count - entry->ccid3hrx_win_count;
if (tmp < 0)
tmp += TFRC_WIN_COUNT_LIMIT;
if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
/* we have found a packet older than one rtt
* remove the rest */
step = 3;
} else /* OK, find next data packet */
num_later = 1;
break;
case 3:
list_del_init(&entry->ccid3hrx_node);
ccid3_rx_hist_entry_delete(entry);
break;
}
} else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
entry->ccid3hrx_type == DCCP_PKT_DATAACK)
--num_later;
}
}
return 0;
}
static void ccid3_hc_rx_send_feedback(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
struct ccid3_rx_hist_entry *entry, *packet;
ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
switch (hcrx->ccid3hcrx_state) {
case TFRC_RSTATE_NO_DATA:
hcrx->ccid3hcrx_x_recv = 0;
break;
case TFRC_RSTATE_DATA: {
u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback);
if (delta == 0)
delta = 1; /* to prevent divide by zero */
hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta;
}
break;
default:
printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
__FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
dump_stack();
return;
}
packet = NULL;
list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node)
if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
entry->ccid3hrx_type == DCCP_PKT_DATAACK) {
packet = entry;
break;
}
if (packet == NULL) {
printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n",
__FUNCTION__, dccp_role(sk), sk);
dump_stack();
return;
}
do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback));
hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count;
hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno;
hcrx->ccid3hcrx_bytes_recv = 0;
/* Convert to multiples of 10us */
hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10;
if (hcrx->ccid3hcrx_p == 0)
hcrx->ccid3hcrx_pinv = ~0;
else
hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
dccp_send_ack(sk);
}
static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
return;
if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb))
dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time);
if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv);
const u32 pinv = htonl(hcrx->ccid3hcrx_pinv);
dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv));
dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv));
}
DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
}
/* Weights used to calculate loss event rate */
/*
* These are integers as per section 8 of RFC3448. We can then divide by 4 *
* when we use it.
*/
const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, };
/*
* args: fvalue - function value to match
* returns: p closest to that value
*
* both fvalue and p are multiplied by 1,000,000 to use ints
*/
u32 calcx_reverse_lookup(u32 fvalue) {
int ctr = 0;
int small;
if (fvalue < calcx_lookup[0][1])
return 0;
if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1])
small = 1;
else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0])
return 1000000;
else
small = 0;
while (fvalue > calcx_lookup[ctr][small])
ctr++;
if (small)
return (CALCX_SPLIT * ctr / CALCX_ARRSIZE);
else
return (1000000 * ctr / CALCX_ARRSIZE) ;
}
/* calculate first loss interval
*
* returns estimated loss interval in usecs */
static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
struct ccid3_rx_hist_entry *entry, *next, *tail = NULL;
u32 rtt, delta, x_recv, fval, p, tmp2;
struct timeval tstamp, tmp_tv;
int interval = 0;
int win_count = 0;
int step = 0;
u64 tmp1;
list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
entry->ccid3hrx_type == DCCP_PKT_DATAACK) {
tail = entry;
switch (step) {
case 0:
tstamp = entry->ccid3hrx_tstamp;
win_count = entry->ccid3hrx_win_count;
step = 1;
break;
case 1:
interval = win_count - entry->ccid3hrx_win_count;
if (interval < 0)
interval += TFRC_WIN_COUNT_LIMIT;
if (interval > 4)
goto found;
break;
}
}
}
if (step == 0) {
printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n",
__FUNCTION__, dccp_role(sk), sk);
return ~0;
}
if (interval == 0) {
ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n",
dccp_role(sk), sk);
interval = 1;
}
found:
timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv);
rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval;
ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
dccp_role(sk), sk, rtt);
if (rtt == 0)
rtt = 1;
delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback);
if (delta == 0)
delta = 1;
x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta;
tmp1 = (u64)x_recv * (u64)rtt;
do_div(tmp1,10000000);
tmp2 = (u32)tmp1;
fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
/* do not alter order above or you will get overflow on 32 bit */
p = calcx_reverse_lookup(fval);
ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\
dccp_role(sk), sk, x_recv, p);
if (p == 0)
return ~0;
else
return 1000000 / p;
}
static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
struct ccid3_loss_interval_hist_entry *li_entry;
if (seq_loss != DCCP_MAX_SEQNO + 1) {
ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n",
dccp_role(sk), sk, seq_loss, win_loss);
if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
struct ccid3_loss_interval_hist_entry *li_tail = NULL;
int i;
ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk);
for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) {
li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC);
if (li_entry == NULL) {
ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist);
ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n",
dccp_role(sk), sk);
return;
}
if (li_tail == NULL)
li_tail = li_entry;
list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist);
}
li_entry->ccid3lih_seqno = seq_loss;
li_entry->ccid3lih_win_count = win_loss;
li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk);
}
}
/* FIXME: find end of interval */
}
static void ccid3_hc_rx_detect_loss(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet;
struct ccid3_rx_hist_entry *a_loss = NULL;
struct ccid3_rx_hist_entry *b_loss = NULL;
u64 seq_loss = DCCP_MAX_SEQNO + 1;
u8 win_loss = 0;
u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
if (num_later == 0) {
b_loss = entry;
break;
} else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
entry->ccid3hrx_type == DCCP_PKT_DATAACK)
--num_later;
}
if (b_loss == NULL)
goto out_update_li;
a_next = b_next;
num_later = 1;
#if 0
FIXME MERGE GIT!
list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
if (num_later == 0) {
a_loss = entry;
break;
} else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
entry->ccid3hrx_type == DCCP_PKT_DATAACK)
--num_later;
}
#endif
if (a_loss == NULL) {
if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
/* no loss event have occured yet */
ccid3_pr_debug("%s, sk=%p, TODO: find a lost data "
"packet by comparing to initial seqno\n",
dccp_role(sk), sk);
goto out_update_li;
} else {
pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history",
__FUNCTION__, dccp_role(sk), sk);
return;
}
}
/* Locate a lost data packet */
entry = packet = b_loss;
#if 0
FIXME MERGE GIT!
list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno);
if (delta != 0) {
if (packet->ccid3hrx_type == DCCP_PKT_DATA ||
packet->ccid3hrx_type == DCCP_PKT_DATAACK)
--delta;
/*
* FIXME: check this, probably this % usage is because
* in earlier drafts the ndp count was just 8 bits
* long, but now it cam be up to 24 bits long.
*/
#if 0
if (delta % DCCP_NDP_LIMIT !=
(packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT)
#endif
if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) {
seq_loss = entry->ccid3hrx_seqno;
dccp_inc_seqno(&seq_loss);
}
}
packet = entry;
if (packet == a_loss)
break;
}
#endif
if (seq_loss != DCCP_MAX_SEQNO + 1)
win_loss = a_loss->ccid3hrx_win_count;
out_update_li:
ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
}
static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
struct ccid3_loss_interval_hist_entry *li_entry, *li_next;
int i = 0;
u32 i_tot;
u32 i_tot0 = 0;
u32 i_tot1 = 0;
u32 w_tot = 0;
list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) {
if (i < TFRC_RECV_IVAL_F_LENGTH) {
i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i];
w_tot += ccid3_hc_rx_w[i];
}
if (i != 0)
i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1];
if (++i > TFRC_RECV_IVAL_F_LENGTH)
break;
}
if (i != TFRC_RECV_IVAL_F_LENGTH) {
pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n",
__FUNCTION__, dccp_role(sk), sk);
return 0;
}
i_tot = max(i_tot0, i_tot1);
/* FIXME: Why do we do this? -Ian McDonald */
if (i_tot * 4 < w_tot)
i_tot = w_tot * 4;
return i_tot * 4 / w_tot;
}
static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
struct ccid3_rx_hist_entry *packet;
struct timeval now;
u8 win_count;
u32 p_prev;
int ins;
#if 0
ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n",
dccp_role(sk), sk, dccp_state_name(sk->sk_state),
skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
#endif
if (hcrx == NULL)
return;
BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
switch (DCCP_SKB_CB(skb)->dccpd_type) {
case DCCP_PKT_ACK:
if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
return;
case DCCP_PKT_DATAACK:
if (dp->dccps_options_received.dccpor_timestamp_echo == 0)
break;
p_prev = hcrx->ccid3hcrx_rtt;
do_gettimeofday(&now);
/* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo -
usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10);
FIXME - I think above code is broken - have to look at options more, will also need
to fix pr_debug below */
if (p_prev != hcrx->ccid3hcrx_rtt)
ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n",
dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
dp->dccps_options_received.dccpor_timestamp_echo,
dp->dccps_options_received.dccpor_elapsed_time);
break;
case DCCP_PKT_DATA:
break;
default:
ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n",
dccp_role(sk), sk,
dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
return;
}
packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC);
if (packet == NULL) {
ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!",
dccp_role(sk), sk);
return;
}
win_count = packet->ccid3hrx_win_count;
ins = ccid3_hc_rx_add_hist(sk, packet);
if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
return;
switch (hcrx->ccid3hcrx_state) {
case TFRC_RSTATE_NO_DATA:
ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n",
dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb);
ccid3_hc_rx_send_feedback(sk);
ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
return;
case TFRC_RSTATE_DATA:
hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4;
if (ins == 0) {
do_gettimeofday(&now);
if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) {
hcrx->ccid3hcrx_tstamp_last_ack = now;
ccid3_hc_rx_send_feedback(sk);
}
return;
}
break;
default:
printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
__FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
dump_stack();
return;
}
/* Dealing with packet loss */
ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n",
dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb);
ccid3_hc_rx_detect_loss(sk);
p_prev = hcrx->ccid3hcrx_p;
/* Calculate loss event rate */
if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist))
/* Scaling up by 1000000 as fixed decimal */
hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk);
if (hcrx->ccid3hcrx_p > p_prev) {
ccid3_hc_rx_send_feedback(sk);
return;
}
}
static int ccid3_hc_rx_init(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx;
ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any());
if (hcrx == NULL)
return -ENOMEM;
memset(hcrx, 0, sizeof(*hcrx));
if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE &&
dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE)
hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size;
else
hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist);
return 0;
}
static void ccid3_hc_rx_exit(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
if (hcrx == NULL)
return;
ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
/* Empty packet history */
ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist);
/* Empty loss interval history */
ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist);
kfree(dp->dccps_hc_rx_ccid_private);
dp->dccps_hc_rx_ccid_private = NULL;
}
static struct ccid ccid3 = {
.ccid_id = 3,
.ccid_name = "ccid3",
.ccid_owner = THIS_MODULE,
.ccid_init = ccid3_init,
.ccid_exit = ccid3_exit,
.ccid_hc_tx_init = ccid3_hc_tx_init,
.ccid_hc_tx_exit = ccid3_hc_tx_exit,
.ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
.ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent,
.ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
.ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
.ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
.ccid_hc_rx_init = ccid3_hc_rx_init,
.ccid_hc_rx_exit = ccid3_hc_rx_exit,
.ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
.ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv,
};
module_param(ccid3_debug, int, 0444);
MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
static __init int ccid3_module_init(void)
{
int rc = -ENOMEM;
ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history",
sizeof(struct ccid3_tx_hist_entry), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (ccid3_tx_hist_slab == NULL)
goto out;
ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history",
sizeof(struct ccid3_rx_hist_entry), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (ccid3_rx_hist_slab == NULL)
goto out_free_tx_history;
ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history",
sizeof(struct ccid3_loss_interval_hist_entry), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (ccid3_loss_interval_hist_slab == NULL)
goto out_free_rx_history;
rc = ccid_register(&ccid3);
if (rc != 0)
goto out_free_loss_interval_history;
out:
return rc;
out_free_loss_interval_history:
kmem_cache_destroy(ccid3_loss_interval_hist_slab);
ccid3_loss_interval_hist_slab = NULL;
out_free_rx_history:
kmem_cache_destroy(ccid3_rx_hist_slab);
ccid3_rx_hist_slab = NULL;
out_free_tx_history:
kmem_cache_destroy(ccid3_tx_hist_slab);
ccid3_tx_hist_slab = NULL;
goto out;
}
module_init(ccid3_module_init);
static __exit void ccid3_module_exit(void)
{
ccid_unregister(&ccid3);
if (ccid3_tx_hist_slab != NULL) {
kmem_cache_destroy(ccid3_tx_hist_slab);
ccid3_tx_hist_slab = NULL;
}
if (ccid3_rx_hist_slab != NULL) {
kmem_cache_destroy(ccid3_rx_hist_slab);
ccid3_rx_hist_slab = NULL;
}
if (ccid3_loss_interval_hist_slab != NULL) {
kmem_cache_destroy(ccid3_loss_interval_hist_slab);
ccid3_loss_interval_hist_slab = NULL;
}
}
module_exit(ccid3_module_exit);
MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz> & Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
MODULE_LICENSE("GPL");
MODULE_ALIAS("net-dccp-ccid-3");
/*
* net/dccp/ccids/ccid3.h
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
* or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* Changes to meet Linux coding standards, to make it meet latest ccid3 draft
* and to make it work as a loadable module in the DCCP stack written by
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
*
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _DCCP_CCID3_H_
#define _DCCP_CCID3_H_
#include <linux/types.h>
#include <linux/list.h>
#include <linux/timer.h>
struct ccid3_tx_hist_entry {
struct list_head ccid3htx_node;
u64 ccid3htx_seqno:48,
ccid3htx_win_count:8,
ccid3htx_sent:1;
struct timeval ccid3htx_tstamp;
};
struct ccid3_options_received {
u64 ccid3or_seqno:48,
ccid3or_loss_intervals_idx:16;
u16 ccid3or_loss_intervals_len;
u32 ccid3or_loss_event_rate;
u32 ccid3or_receive_rate;
};
/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block
*
* @ccid3hctx_state - Sender state
* @ccid3hctx_x - Current sending rate
* @ccid3hctx_x_recv - Receive rate
* @ccid3hctx_x_calc - Calculated send (?) rate
* @ccid3hctx_s - Packet size
* @ccid3hctx_rtt - Estimate of current round trip time in usecs
* @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
* @ccid3hctx_last_win_count - Last window counter sent
* @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent
* @ccid3hctx_no_feedback_timer - Handle to no feedback timer
* @ccid3hctx_idle - FIXME
* @ccid3hctx_t_ld - Time last doubled during slow start
* @ccid3hctx_t_nom - Nominal send time of next packet
* @ccid3hctx_t_ipi - Interpacket (send) interval
* @ccid3hctx_delta - Send timer delta
* @ccid3hctx_hist - Packet history
*/
struct ccid3_hc_tx_sock {
u32 ccid3hctx_x;
u32 ccid3hctx_x_recv;
u32 ccid3hctx_x_calc;
u16 ccid3hctx_s;
u32 ccid3hctx_rtt;
u32 ccid3hctx_p;
u8 ccid3hctx_state;
u8 ccid3hctx_last_win_count;
u8 ccid3hctx_idle;
struct timeval ccid3hctx_t_last_win_count;
struct timer_list ccid3hctx_no_feedback_timer;
struct timeval ccid3hctx_t_ld;
struct timeval ccid3hctx_t_nom;
u32 ccid3hctx_t_ipi;
u32 ccid3hctx_delta;
struct list_head ccid3hctx_hist;
struct ccid3_options_received ccid3hctx_options_received;
};
struct ccid3_loss_interval_hist_entry {
struct list_head ccid3lih_node;
u64 ccid3lih_seqno:48,
ccid3lih_win_count:4;
u32 ccid3lih_interval;
};
struct ccid3_rx_hist_entry {
struct list_head ccid3hrx_node;
u64 ccid3hrx_seqno:48,
ccid3hrx_win_count:4,
ccid3hrx_type:4;
u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */
struct timeval ccid3hrx_tstamp;
};
struct ccid3_hc_rx_sock {
u64 ccid3hcrx_seqno_last_counter:48,
ccid3hcrx_state:8,
ccid3hcrx_last_counter:4;
unsigned long ccid3hcrx_rtt;
u32 ccid3hcrx_p;
u32 ccid3hcrx_bytes_recv;
struct timeval ccid3hcrx_tstamp_last_feedback;
struct timeval ccid3hcrx_tstamp_last_ack;
struct list_head ccid3hcrx_hist;
struct list_head ccid3hcrx_loss_interval_hist;
u16 ccid3hcrx_s;
u32 ccid3hcrx_pinv;
u32 ccid3hcrx_elapsed_time;
u32 ccid3hcrx_x_recv;
};
#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
#endif /* _DCCP_CCID3_H_ */
#ifndef _DCCP_H
#define _DCCP_H
/*
* net/dccp/dccp.h
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/dccp.h>
#include <net/snmp.h>
#include <net/sock.h>
#include <net/tcp.h>
#define DCCP_DEBUG
#ifdef DCCP_DEBUG
extern int dccp_debug;
#define dccp_pr_debug(format, a...) \
do { if (dccp_debug) \
printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
} while (0)
#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0)
#else
#define dccp_pr_debug(format, a...)
#define dccp_pr_debug_cat(format, a...)
#endif
extern struct inet_hashinfo dccp_hashinfo;
extern atomic_t dccp_orphan_count;
extern int dccp_tw_count;
extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
/* FIXME: Right size this */
#define DCCP_MAX_OPT_LEN 128
#define DCCP_MAX_PACKET_HDR 32
#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
* state, about 60 seconds */
/* draft-ietf-dccp-spec-11.txt initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
/* Maximal interval between probes for local resources. */
#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
extern struct proto dccp_v4_prot;
/* is seq1 < seq2 ? */
static inline const int before48(const u64 seq1, const u64 seq2)
{
return (const s64)((seq1 << 16) - (seq2 << 16)) < 0;
}
/* is seq1 > seq2 ? */
static inline const int after48(const u64 seq1, const u64 seq2)
{
return (const s64)((seq2 << 16) - (seq1 << 16)) < 0;
}
/* is seq2 <= seq1 <= seq3 ? */
static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3)
{
return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
}
static inline u64 max48(const u64 seq1, const u64 seq2)
{
return after48(seq1, seq2) ? seq1 : seq2;
}
enum {
DCCP_MIB_NUM = 0,
DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
DCCP_MIB_ESTABRESETS, /* EstabResets */
DCCP_MIB_CURRESTAB, /* CurrEstab */
DCCP_MIB_OUTSEGS, /* OutSegs */
DCCP_MIB_OUTRSTS,
DCCP_MIB_ABORTONTIMEOUT,
DCCP_MIB_TIMEOUTS,
DCCP_MIB_ABORTFAILED,
DCCP_MIB_PASSIVEOPENS,
DCCP_MIB_ATTEMPTFAILS,
DCCP_MIB_OUTDATAGRAMS,
DCCP_MIB_INERRS,
DCCP_MIB_OPTMANDATORYERROR,
DCCP_MIB_INVALIDOPT,
__DCCP_MIB_MAX
};
#define DCCP_MIB_MAX __DCCP_MIB_MAX
struct dccp_mib {
unsigned long mibs[DCCP_MIB_MAX];
} __SNMP_MIB_ALIGN__;
DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field)
#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field)
#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
#define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val)
#define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val)
extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
extern int dccp_send_response(struct sock *sk);
extern void dccp_send_ack(struct sock *sk);
extern void dccp_send_delayed_ack(struct sock *sk);
extern void dccp_send_sync(struct sock *sk, u64 seq);
extern void dccp_init_xmit_timers(struct sock *sk);
static inline void dccp_clear_xmit_timers(struct sock *sk)
{
inet_csk_clear_xmit_timers(sk);
}
extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
extern const char *dccp_packet_name(const int type);
extern const char *dccp_state_name(const int state);
static inline void dccp_set_state(struct sock *sk, const int state)
{
const int oldstate = sk->sk_state;
dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
dccp_role(sk), sk,
dccp_state_name(oldstate), dccp_state_name(state));
WARN_ON(state == oldstate);
switch (state) {
case DCCP_OPEN:
if (oldstate != DCCP_OPEN)
DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
break;
case DCCP_CLOSED:
if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
sk->sk_prot->unhash(sk);
if (inet_csk(sk)->icsk_bind_hash != NULL &&
!(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
inet_put_port(&dccp_hashinfo, sk);
/* fall through */
default:
if (oldstate == DCCP_OPEN)
DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
}
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables.
*/
sk->sk_state = state;
}
static inline void dccp_done(struct sock *sk)
{
dccp_set_state(sk, DCCP_CLOSED);
dccp_clear_xmit_timers(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
else
inet_csk_destroy_sock(sk);
}
static inline void dccp_openreq_init(struct request_sock *req,
struct dccp_sock *dp,
struct sk_buff *skb)
{
/*
* FIXME: fill in the other req fields from the DCCP options
* received
*/
inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
inet_rsk(req)->acked = 0;
req->rcv_wnd = 0;
}
extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len,
struct sk_buff *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
extern struct sock *dccp_create_openreq_child(struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb);
extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
extern void dccp_v4_err(struct sk_buff *skb, u32);
extern int dccp_v4_rcv(struct sk_buff *skb);
extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst);
extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev);
extern int dccp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_hdr *dh, unsigned len);
extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len);
extern void dccp_close(struct sock *sk, long timeout);
extern struct sk_buff *dccp_make_response(struct sock *sk,
struct dst_entry *dst,
struct request_sock *req);
extern int dccp_connect(struct sock *sk);
extern int dccp_disconnect(struct sock *sk, int flags);
extern int dccp_getsockopt(struct sock *sk, int level, int optname,
char *optval, int *optlen);
extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size);
extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
extern int dccp_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen);
extern void dccp_shutdown(struct sock *sk, int how);
extern int dccp_v4_checksum(struct sk_buff *skb);
extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk);
struct dccp_skb_cb {
__u8 dccpd_type;
__u8 dccpd_reset_code;
__u8 dccpd_service;
__u8 dccpd_ccval;
__u64 dccpd_seq;
__u64 dccpd_ack_seq;
int dccpd_opt_len;
};
#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
static inline int dccp_non_data_packet(const struct sk_buff *skb)
{
const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
return type == DCCP_PKT_ACK ||
type == DCCP_PKT_CLOSE ||
type == DCCP_PKT_CLOSEREQ ||
type == DCCP_PKT_RESET ||
type == DCCP_PKT_SYNC ||
type == DCCP_PKT_SYNCACK;
}
static inline int dccp_packet_without_ack(const struct sk_buff *skb)
{
const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
}
#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
static inline void dccp_set_seqno(u64 *seqno, u64 value)
{
if (value > DCCP_MAX_SEQNO)
value -= DCCP_MAX_SEQNO + 1;
*seqno = value;
}
static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
{
return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
}
static inline void dccp_inc_seqno(u64 *seqno)
{
if (++*seqno > DCCP_MAX_SEQNO)
*seqno = 0;
}
static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
{
struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh));
#if defined(__LITTLE_ENDIAN_BITFIELD)
dh->dccph_seq = htonl((gss >> 32)) >> 8;
#elif defined(__BIG_ENDIAN_BITFIELD)
dh->dccph_seq = htonl((gss >> 32));
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
dhx->dccph_seq_low = htonl(gss & 0xffffffff);
}
static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr)
{
#if defined(__LITTLE_ENDIAN_BITFIELD)
dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
#elif defined(__BIG_ENDIAN_BITFIELD)
dhack->dccph_ack_nr_high = htonl((gsr >> 32));
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
}
static inline void dccp_update_gsr(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
u64 tmp_gsr;
dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4));
dp->dccps_gsr = seq;
dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr));
dccp_set_seqno(&dp->dccps_swh,
dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4);
}
static inline void dccp_update_gss(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
u64 tmp_gss;
dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1);
dp->dccps_awl = max48(tmp_gss, dp->dccps_iss);
dp->dccps_awh = dp->dccps_gss = seq;
}
extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
extern void dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time);
extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
unsigned char option,
const void *value, unsigned char len);
extern struct socket *dccp_ctl_socket;
#define DCCP_ACKPKTS_STATE_RECEIVED 0
#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6)
#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */
#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */
/** struct dccp_ackpkts - acknowledgeable packets
*
* This data structure is the one defined in the DCCP draft
* Appendix A.
*
* @dccpap_buf_head - circular buffer head
* @dccpap_buf_tail - circular buffer tail
* @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head)
* @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0
*
* Additionally, the HC-Receiver must keep some information about the
* Ack Vectors it has recently sent. For each packet sent carrying an
* Ack Vector, it remembers four variables:
*
* @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno)
* @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
* @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno)
* @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
*
* @dccpap_buf_len - circular buffer length
* @dccpap_buf - circular buffer of acknowledgeable packets
*/
struct dccp_ackpkts {
unsigned int dccpap_buf_head;
unsigned int dccpap_buf_tail;
u64 dccpap_buf_ackno;
u64 dccpap_ack_seqno;
u64 dccpap_ack_ackno;
unsigned int dccpap_ack_ptr;
unsigned int dccpap_buf_vector_len;
unsigned int dccpap_ack_vector_len;
unsigned int dccpap_buf_len;
unsigned long dccpap_time;
u8 dccpap_buf_nonce;
u8 dccpap_ack_nonce;
u8 dccpap_buf[0];
};
extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority);
extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
struct sock *sk, u64 ackno);
#ifdef DCCP_DEBUG
extern void dccp_ackvector_print(const u64 ackno,
const unsigned char *vector, int len);
extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
#else
static inline void dccp_ackvector_print(const u64 ackno,
const unsigned char *vector,
int len) { }
static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
#endif
#endif /* _DCCP_H */
/*
* net/dccp/input.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include "ccid.h"
#include "dccp.h"
static void dccp_fin(struct sock *sk, struct sk_buff *skb)
{
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
__skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
sk->sk_data_ready(sk, 0);
}
static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
{
switch (sk->sk_state) {
case DCCP_PARTOPEN:
case DCCP_OPEN:
dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
dccp_fin(sk, skb);
dccp_set_state(sk, DCCP_CLOSED);
break;
}
}
static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
{
/*
* Step 7: Check for unexpected packet types
* If (S.is_server and P.type == CloseReq)
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
return;
}
switch (sk->sk_state) {
case DCCP_PARTOPEN:
case DCCP_OPEN:
dccp_set_state(sk, DCCP_CLOSING);
dccp_send_close(sk);
break;
}
}
static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
if (dp->dccps_options.dccpo_send_ack_vector)
dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
struct dccp_sock *dp = dccp_sk(sk);
u64 lswl = dp->dccps_swl;
u64 lawl = dp->dccps_awl;
/*
* Step 5: Prepare sequence numbers for Sync
* If P.type == Sync or P.type == SyncAck,
* If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
* / * P is valid, so update sequence number variables
* accordingly. After this update, P will pass the tests
* in Step 6. A SyncAck is generated if necessary in
* Step 15 * /
* Update S.GSR, S.SWL, S.SWH
* Otherwise,
* Drop packet and return
*/
if (dh->dccph_type == DCCP_PKT_SYNC ||
dh->dccph_type == DCCP_PKT_SYNCACK) {
if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) &&
!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
else
return -1;
/*
* Step 6: Check sequence numbers
* Let LSWL = S.SWL and LAWL = S.AWL
* If P.type == CloseReq or P.type == Close or P.type == Reset,
* LSWL := S.GSR + 1, LAWL := S.GAR
* If LSWL <= P.seqno <= S.SWH
* and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
* Update S.GSR, S.SWL, S.SWH
* If P.type != Sync,
* Update S.GAR
* Otherwise,
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
dh->dccph_type == DCCP_PKT_CLOSE ||
dh->dccph_type == DCCP_PKT_RESET) {
lswl = dp->dccps_gsr;
dccp_inc_seqno(&lswl);
lawl = dp->dccps_gar;
}
if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
(DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) {
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
if (dh->dccph_type != DCCP_PKT_SYNC &&
DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
} else {
dccp_pr_debug("Step 6 failed, sending SYNC...\n");
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
return -1;
}
return 0;
}
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
if (dccp_check_seqno(sk, skb))
goto discard;
if (dccp_parse_options(sk, skb))
goto discard;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
/*
* FIXME: check ECN to see if we should use
* DCCP_ACKPKTS_STATE_ECN_MARKED
*/
if (dp->dccps_options.dccpo_send_ack_vector) {
struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKPKTS_STATE_RECEIVED)) {
LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n"));
ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX);
goto discard;
}
/*
* FIXME: this activation is probably wrong, have to study more
* TCP delack machinery and how it fits into DCCP draft, but
* for now it kinda "works" 8)
*/
if (!inet_csk_ack_scheduled(sk)) {
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX);
}
}
ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
switch (dccp_hdr(skb)->dccph_type) {
case DCCP_PKT_DATAACK:
case DCCP_PKT_DATA:
/*
* FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option
* if it is.
*/
__skb_pull(skb, dh->dccph_doff * 4);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
sk->sk_data_ready(sk, 0);
return 0;
case DCCP_PKT_ACK:
goto discard;
case DCCP_PKT_RESET:
/*
* Step 9: Process Reset
* If P.type == Reset,
* Tear down connection
* S.state := TIMEWAIT
* Set TIMEWAIT timer
* Drop packet and return
*/
dccp_fin(sk, skb);
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
return 0;
case DCCP_PKT_CLOSEREQ:
dccp_rcv_closereq(sk, skb);
goto discard;
case DCCP_PKT_CLOSE:
dccp_rcv_close(sk, skb);
return 0;
case DCCP_PKT_REQUEST:
/* Step 7
* or (S.is_server and P.type == Response)
* or (S.is_client and P.type == Request)
* or (S.state >= OPEN and P.type == Request
* and P.seqno >= S.OSR)
* or (S.state >= OPEN and P.type == Response
* and P.seqno >= S.OSR)
* or (S.state == RESPOND and P.type == Data),
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
if (dp->dccps_role != DCCP_ROLE_LISTEN)
goto send_sync;
goto check_seq;
case DCCP_PKT_RESPONSE:
if (dp->dccps_role != DCCP_ROLE_CLIENT)
goto send_sync;
check_seq:
if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
send_sync:
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
}
break;
}
DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
discard:
__kfree_skb(skb);
return 0;
}
static int dccp_rcv_request_sent_state_process(struct sock *sk,
struct sk_buff *skb,
const struct dccp_hdr *dh,
const unsigned len)
{
/*
* Step 4: Prepare sequence numbers in REQUEST
* If S.state == REQUEST,
* If (P.type == Response or P.type == Reset)
* and S.AWL <= P.ackno <= S.AWH,
* / * Set sequence number variables corresponding to the
* other endpoint, so P will pass the tests in Step 6 * /
* Set S.GSR, S.ISR, S.SWL, S.SWH
* / * Response processing continues in Step 10; Reset
* processing continues in Step 9 * /
*/
if (dh->dccph_type == DCCP_PKT_RESPONSE) {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct dccp_sock *dp = dccp_sk(sk);
/* Stop the REQUEST timer */
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
BUG_TRAP(sk->sk_send_head != NULL);
__kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) {
dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n",
dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh);
goto out_invalid_packet;
}
dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
/* FIXME: send appropriate RESET code */
goto out_invalid_packet;
}
dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
/*
* Step 10: Process REQUEST state (second part)
* If S.state == REQUEST,
* / * If we get here, P is a valid Response from the server (see
* Step 4), and we should move to PARTOPEN state. PARTOPEN
* means send an Ack, don't send Data packets, retransmit
* Acks periodically, and always include any Init Cookie from
* the Response * /
* S.state := PARTOPEN
* Set PARTOPEN timer
* Continue with S.state == PARTOPEN
* / * Step 12 will send the Ack completing the three-way
* handshake * /
*/
dccp_set_state(sk, DCCP_PARTOPEN);
/* Make sure socket is routed, for correct metrics. */
inet_sk_rebuild_header(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
sk_wake_async(sk, 0, POLL_OUT);
}
if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
icsk->icsk_accept_queue.rskq_defer_accept) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
* It may be deleted, but with this feature tcpdumps
* look so _wonderfully_ clever, that I was not able
* to stand against the temptation 8) --ANK
*/
/*
* OK, in DCCP we can as well do a similar trick, its
* even in the draft, but there is no need for us to
* schedule an ack here, as dccp_sendmsg does this for
* us, also stated in the draft. -acme
*/
__kfree_skb(skb);
return 0;
}
dccp_send_ack(sk);
return -1;
}
out_invalid_packet:
return 1; /* dccp_v4_do_rcv will send a reset, but...
FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */
}
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
struct sk_buff *skb,
const struct dccp_hdr *dh,
const unsigned len)
{
int queued = 0;
switch (dh->dccph_type) {
case DCCP_PKT_RESET:
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
break;
case DCCP_PKT_DATAACK:
case DCCP_PKT_ACK:
/*
* FIXME: we should be reseting the PARTOPEN (DELACK) timer here,
* but only if we haven't used the DELACK timer for something else,
* like sending a delayed ack for a TIMESTAMP echo, etc, for now
* were not clearing it, sending an extra ACK when there is nothing
* else to do in DELACK is not a big deal after all.
*/
/* Stop the PARTOPEN timer */
if (sk->sk_state == DCCP_PARTOPEN)
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
dccp_set_state(sk, DCCP_OPEN);
if (dh->dccph_type == DCCP_PKT_DATAACK) {
dccp_rcv_established(sk, skb, dh, len);
queued = 1; /* packet was queued (by dccp_rcv_established) */
}
break;
}
return queued;
}
int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_hdr *dh, unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
const int old_state = sk->sk_state;
int queued = 0;
if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) {
if (dccp_check_seqno(sk, skb))
goto discard;
/*
* Step 8: Process options and mark acknowledgeable
*/
if (dccp_parse_options(sk, skb))
goto discard;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
/*
* FIXME: check ECN to see if we should use
* DCCP_ACKPKTS_STATE_ECN_MARKED
*/
if (dp->dccps_options.dccpo_send_ack_vector) {
if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKPKTS_STATE_RECEIVED))
goto discard;
/*
* FIXME: this activation is probably wrong, have to study more
* TCP delack machinery and how it fits into DCCP draft, but
* for now it kinda "works" 8)
*/
if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 &&
!inet_csk_ack_scheduled(sk)) {
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX);
}
}
}
/*
* Step 9: Process Reset
* If P.type == Reset,
* Tear down connection
* S.state := TIMEWAIT
* Set TIMEWAIT timer
* Drop packet and return
*/
if (dh->dccph_type == DCCP_PKT_RESET) {
/* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */
dccp_fin(sk, skb);
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
return 0;
/*
* Step 7: Check for unexpected packet types
* If (S.is_server and P.type == CloseReq)
* or (S.is_server and P.type == Response)
* or (S.is_client and P.type == Request)
* or (S.state == RESPOND and P.type == Data),
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
} else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
(dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
(dp->dccps_role == DCCP_ROLE_CLIENT &&
dh->dccph_type == DCCP_PKT_REQUEST) ||
(sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
goto discard;
}
switch (sk->sk_state) {
case DCCP_CLOSED:
return 1;
case DCCP_LISTEN:
if (dh->dccph_type == DCCP_PKT_ACK ||
dh->dccph_type == DCCP_PKT_DATAACK)
return 1;
if (dh->dccph_type == DCCP_PKT_RESET)
goto discard;
if (dh->dccph_type == DCCP_PKT_REQUEST) {
if (dccp_v4_conn_request(sk, skb) < 0)
return 1;
/* FIXME: do congestion control initialization */
goto discard;
}
goto discard;
case DCCP_REQUESTING:
/* FIXME: do congestion control initialization */
queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
if (queued >= 0)
return queued;
__kfree_skb(skb);
return 0;
case DCCP_RESPOND:
case DCCP_PARTOPEN:
queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len);
break;
}
if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) {
switch (old_state) {
case DCCP_PARTOPEN:
sk->sk_state_change(sk);
sk_wake_async(sk, 0, POLL_OUT);
break;
}
}
if (!queued) {
discard:
__kfree_skb(skb);
}
return 0;
}
/*
* net/dccp/ipv4.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/icmp.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/random.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include "ccid.h"
#include "dccp.h"
struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
.lhash_lock = RW_LOCK_UNLOCKED,
.lhash_users = ATOMIC_INIT(0),
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
.portalloc_lock = SPIN_LOCK_UNLOCKED,
.port_rover = 1024 - 1,
};
static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
{
return inet_csk_get_port(&dccp_hashinfo, sk, snum);
}
static void dccp_v4_hash(struct sock *sk)
{
inet_hash(&dccp_hashinfo, sk);
}
static void dccp_v4_unhash(struct sock *sk)
{
inet_unhash(&dccp_hashinfo, sk);
}
/* called with local bh disabled */
static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
struct inet_timewait_sock **twp)
{
struct inet_sock *inet = inet_sk(sk);
const u32 daddr = inet->rcv_saddr;
const u32 saddr = inet->daddr;
const int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size);
struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
const struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
write_lock(&head->lock);
/* Check TIME-WAIT sockets first. */
sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
tw = inet_twsk(sk2);
if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
goto not_unique;
}
tw = NULL;
/* And established part... */
sk_for_each(sk2, node, &head->chain) {
if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
goto not_unique;
}
/* Must record num and sport now. Otherwise we will see
* in hash table socket with a funny identity. */
inet->num = lport;
inet->sport = htons(lport);
sk->sk_hashent = hash;
BUG_TRAP(sk_unhashed(sk));
__sk_add_node(sk, &head->chain);
sock_prot_inc_use(sk->sk_prot);
write_unlock(&head->lock);
if (twp != NULL) {
*twp = tw;
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw != NULL) {
/* Silly. Should hash-dance instead... */
dccp_tw_deschedule(tw);
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw);
}
return 0;
not_unique:
write_unlock(&head->lock);
return -EADDRNOTAVAIL;
}
/*
* Bind a port for a connect operation and hash it.
*/
static int dccp_v4_hash_connect(struct sock *sk)
{
const unsigned short snum = inet_sk(sk)->num;
struct inet_bind_hashbucket *head;
struct inet_bind_bucket *tb;
int ret;
if (snum == 0) {
int rover;
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
int remaining = (high - low) + 1;
struct hlist_node *node;
struct inet_timewait_sock *tw = NULL;
local_bh_disable();
/* TODO. Actually it is not so bad idea to remove
* dccp_hashinfo.portalloc_lock before next submission to Linus.
* As soon as we touch this place at all it is time to think.
*
* Now it protects single _advisory_ variable dccp_hashinfo.port_rover,
* hence it is mostly useless.
* Code will work nicely if we just delete it, but
* I am afraid in contented case it will work not better or
* even worse: another cpu just will hit the same bucket
* and spin there.
* So some cpu salt could remove both contention and
* memory pingpong. Any ideas how to do this in a nice way?
*/
spin_lock(&dccp_hashinfo.portalloc_lock);
rover = dccp_hashinfo.port_rover;
do {
rover++;
if ((rover < low) || (rover > high))
rover = low;
head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)];
spin_lock(&head->lock);
/* Does not bother with rcv_saddr checks,
* because the established check is already
* unique enough.
*/
inet_bind_bucket_for_each(tb, node, &head->chain) {
if (tb->port == rover) {
BUG_TRAP(!hlist_empty(&tb->owners));
if (tb->fastreuse >= 0)
goto next_port;
if (!__dccp_v4_check_established(sk,
rover,
&tw))
goto ok;
goto next_port;
}
}
tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover);
if (tb == NULL) {
spin_unlock(&head->lock);
break;
}
tb->fastreuse = -1;
goto ok;
next_port:
spin_unlock(&head->lock);
} while (--remaining > 0);
dccp_hashinfo.port_rover = rover;
spin_unlock(&dccp_hashinfo.portalloc_lock);
local_bh_enable();
return -EADDRNOTAVAIL;
ok:
/* All locks still held and bhs disabled */
dccp_hashinfo.port_rover = rover;
spin_unlock(&dccp_hashinfo.portalloc_lock);
inet_bind_hash(sk, tb, rover);
if (sk_unhashed(sk)) {
inet_sk(sk)->sport = htons(rover);
__inet_hash(&dccp_hashinfo, sk, 0);
}
spin_unlock(&head->lock);
if (tw != NULL) {
dccp_tw_deschedule(tw);
inet_twsk_put(tw);
}
ret = 0;
goto out;
}
head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)];
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
__inet_hash(&dccp_hashinfo, sk, 0);
spin_unlock_bh(&head->lock);
return 0;
} else {
spin_unlock(&head->lock);
/* No definite answer... Walk to established hash table */
ret = __dccp_v4_check_established(sk, snum, NULL);
out:
local_bh_enable();
return ret;
}
}
static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct rtable *rt;
u32 daddr, nexthop;
int tmp;
int err;
dp->dccps_role = DCCP_ROLE_CLIENT;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
nexthop = daddr = usin->sin_addr.s_addr;
if (inet->opt != NULL && inet->opt->srr) {
if (daddr == 0)
return -EINVAL;
nexthop = inet->opt->faddr;
}
tmp = ip_route_connect(&rt, nexthop, inet->saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_DCCP,
inet->sport, usin->sin_port, sk);
if (tmp < 0)
return tmp;
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
if (inet->opt == NULL || !inet->opt->srr)
daddr = rt->rt_dst;
if (inet->saddr == 0)
inet->saddr = rt->rt_src;
inet->rcv_saddr = inet->saddr;
inet->dport = usin->sin_port;
inet->daddr = daddr;
dp->dccps_ext_header_len = 0;
if (inet->opt != NULL)
dp->dccps_ext_header_len = inet->opt->optlen;
/*
* Socket identity is still unknown (sport may be zero).
* However we set state to DCCP_REQUESTING and not releasing socket
* lock select source port, enter ourselves into the hash tables and
* complete initialization after this.
*/
dccp_set_state(sk, DCCP_REQUESTING);
err = dccp_v4_hash_connect(sk);
if (err != 0)
goto failure;
err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
if (err != 0)
goto failure;
/* OK, now commit destination to socket. */
sk_setup_caps(sk, &rt->u.dst);
dp->dccps_gar =
dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
inet->daddr,
inet->sport,
usin->sin_port);
dccp_update_gss(sk, dp->dccps_iss);
inet->id = dp->dccps_iss ^ jiffies;
err = dccp_connect(sk);
rt = NULL;
if (err != 0)
goto failure;
out:
return err;
failure:
/* This unhashes the socket and releases the local port, if necessary. */
dccp_set_state(sk, DCCP_CLOSED);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->dport = 0;
goto out;
}
/*
* This routine does path mtu discovery as defined in RFC1191.
*/
static inline void dccp_do_pmtu_discovery(struct sock *sk,
const struct iphdr *iph,
u32 mtu)
{
struct dst_entry *dst;
const struct inet_sock *inet = inet_sk(sk);
const struct dccp_sock *dp = dccp_sk(sk);
/* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
* send out by Linux are always < 576bytes so they should go through
* unfragmented).
*/
if (sk->sk_state == DCCP_LISTEN)
return;
/* We don't check in the destentry if pmtu discovery is forbidden
* on this route. We just assume that no packet_to_big packets
* are send back when pmtu discovery is not active.
* There is a small race when the user changes this flag in the
* route, but I think that's acceptable.
*/
if ((dst = __sk_dst_check(sk, 0)) == NULL)
return;
dst->ops->update_pmtu(dst, mtu);
/* Something is about to be wrong... Remember soft error
* for the case, if this connection will not able to recover.
*/
if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
sk->sk_err_soft = EMSGSIZE;
mtu = dst_mtu(dst);
if (inet->pmtudisc != IP_PMTUDISC_DONT &&
dp->dccps_pmtu_cookie > mtu) {
dccp_sync_mss(sk, mtu);
/*
* From: draft-ietf-dccp-spec-11.txt
*
* DCCP-Sync packets are the best choice for upward probing,
* since DCCP-Sync probes do not risk application data loss.
*/
dccp_send_sync(sk, dp->dccps_gsr);
} /* else let the usual retransmit timer handle it */
}
static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
{
int err;
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_ack_bits);
struct sk_buff *skb;
if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
return;
skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
if (skb == NULL)
return;
/* Reserve space for headers. */
skb_reserve(skb, MAX_DCCP_HEADER);
skb->dst = dst_clone(rxskb->dst);
skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
dh = dccp_hdr(skb);
memset(dh, 0, dccp_hdr_ack_len);
/* Build DCCP header and checksum it. */
dh->dccph_type = DCCP_PKT_ACK;
dh->dccph_sport = rxdh->dccph_dport;
dh->dccph_dport = rxdh->dccph_sport;
dh->dccph_doff = dccp_hdr_ack_len / 4;
dh->dccph_x = 1;
dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
bh_lock_sock(dccp_ctl_socket->sk);
err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL);
bh_unlock_sock(dccp_ctl_socket->sk);
if (err == NET_XMIT_CN || err == 0) {
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
}
}
static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
{
dccp_v4_ctl_send_ack(skb);
}
static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
struct dst_entry *dst)
{
int err = -1;
struct sk_buff *skb;
/* First, grab a route. */
if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
goto out;
skb = dccp_make_response(sk, dst, req);
if (skb != NULL) {
const struct inet_request_sock *ireq = inet_rsk(req);
err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
ireq->rmt_addr,
ireq->opt);
if (err == NET_XMIT_CN)
err = 0;
}
out:
dst_release(dst);
return err;
}
/*
* This routine is called by the ICMP module when it gets some sort of error
* condition. If err < 0 then the socket should be closed and the error
* returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
* After adjustment header points to the first 8 bytes of the tcp header. We
* need to find the appropriate port.
*
* The locking strategy used here is very "optimistic". When someone else
* accesses the socket the ICMP is just dropped and for some paths there is no
* check at all. A more general error queue to queue errors for later handling
* is probably better.
*/
void dccp_v4_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (struct iphdr *)skb->data;
const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2));
struct dccp_sock *dp;
struct inet_sock *inet;
const int type = skb->h.icmph->type;
const int code = skb->h.icmph->code;
struct sock *sk;
__u64 seq;
int err;
if (skb->len < (iph->ihl << 2) + 8) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
}
sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
iph->saddr, dh->dccph_sport, inet_iif(skb));
if (sk == NULL) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
}
if (sk->sk_state == DCCP_TIME_WAIT) {
inet_twsk_put((struct inet_timewait_sock *)sk);
return;
}
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == DCCP_CLOSED)
goto out;
dp = dccp_sk(sk);
seq = dccp_hdr_seq(skb);
if (sk->sk_state != DCCP_LISTEN &&
!between48(seq, dp->dccps_swl, dp->dccps_swh)) {
NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
switch (type) {
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
break;
case ICMP_DEST_UNREACH:
if (code > NR_ICMP_UNREACH)
goto out;
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
if (!sock_owned_by_user(sk))
dccp_do_pmtu_discovery(sk, iph, info);
goto out;
}
err = icmp_err_convert[code].errno;
break;
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
break;
default:
goto out;
}
switch (sk->sk_state) {
struct request_sock *req , **prev;
case DCCP_LISTEN:
if (sock_owned_by_user(sk))
goto out;
req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
iph->daddr, iph->saddr);
if (!req)
goto out;
/*
* ICMPs are not backlogged, hence we cannot get an established
* socket here.
*/
BUG_TRAP(!req->sk);
if (seq != dccp_rsk(req)->dreq_iss) {
NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
/*
* Still in RESPOND, just remove it silently.
* There is no good way to pass the error to the newly
* created socket, and POSIX does not want network
* errors returned from accept().
*/
inet_csk_reqsk_queue_drop(sk, req, prev);
goto out;
case DCCP_REQUESTING:
case DCCP_RESPOND:
if (!sock_owned_by_user(sk)) {
DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
sk->sk_error_report(sk);
dccp_done(sk);
} else
sk->sk_err_soft = err;
goto out;
}
/* If we've already connected we will keep trying
* until we time out, or the user gives up.
*
* rfc1122 4.2.3.9 allows to consider as hard errors
* only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
* but it is obsoleted by pmtu discovery).
*
* Note, that in modern internet, where routing is unreliable
* and in each dark corner broken firewalls sit, sending random
* errors ordered by their masters even this two messages finally lose
* their original sense (even Linux sends invalid PORT_UNREACHs)
*
* Now we are in compliance with RFCs.
* --ANK (980905)
*/
inet = inet_sk(sk);
if (!sock_owned_by_user(sk) && inet->recverr) {
sk->sk_err = err;
sk->sk_error_report(sk);
} else /* Only an error on timeout */
sk->sk_err_soft = err;
out:
bh_unlock_sock(sk);
sock_put(sk);
}
extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code);
int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
{
struct sk_buff *skb;
/*
* FIXME: what if rebuild_header fails?
* Should we be doing a rebuild_header here?
*/
int err = inet_sk_rebuild_header(sk);
if (err != 0)
return err;
skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
if (skb != NULL) {
const struct dccp_sock *dp = dccp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
err = ip_build_and_send_pkt(skb, sk,
inet->saddr, inet->daddr, NULL);
if (err == NET_XMIT_CN)
err = 0;
ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
}
return err;
}
static inline u64 dccp_v4_init_sequence(const struct sock *sk,
const struct sk_buff *skb)
{
return secure_dccp_sequence_number(skb->nh.iph->daddr,
skb->nh.iph->saddr,
dccp_hdr(skb)->dccph_dport,
dccp_hdr(skb)->dccph_sport);
}
int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct inet_request_sock *ireq;
struct dccp_sock dp;
struct request_sock *req;
struct dccp_request_sock *dreq;
const __u32 saddr = skb->nh.iph->saddr;
const __u32 daddr = skb->nh.iph->daddr;
struct dst_entry *dst = NULL;
/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
if (((struct rtable *)skb->dst)->rt_flags &
(RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
/*
* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
if (inet_csk_reqsk_queue_is_full(sk))
goto drop;
/*
* Accept backlog is full. If we have already queued enough
* of warm entries in syn queue, drop request. It is better than
* clogging syn queue with openreqs with exponentially increasing
* timeout.
*/
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
req = reqsk_alloc(sk->sk_prot->rsk_prot);
if (req == NULL)
goto drop;
/* FIXME: process options */
dccp_openreq_init(req, &dp, skb);
ireq = inet_rsk(req);
ireq->loc_addr = daddr;
ireq->rmt_addr = saddr;
/* FIXME: Merge Aristeu's option parsing code when ready */
req->rcv_wnd = 100; /* Fake, option parsing will get the right value */
ireq->opt = NULL;
/*
* Step 3: Process LISTEN state
*
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
*
* In fact we defer setting S.GSR, S.SWL, S.SWH to
* dccp_create_openreq_child.
*/
dreq = dccp_rsk(req);
dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
if (dccp_v4_send_response(sk, req, dst))
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
return 0;
drop_and_free:
/*
* FIXME: should be reqsk_free after implementing req->rsk_ops
*/
__reqsk_free(req);
drop:
DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
return -1;
}
/*
* The three way handshake has completed - we got a valid ACK or DATAACK -
* now create the new socket.
*
* This is the equivalent of TCP's tcp_v4_syn_recv_sock
*/
struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
struct inet_request_sock *ireq;
struct inet_sock *newinet;
struct dccp_sock *newdp;
struct sock *newsk;
if (sk_acceptq_is_full(sk))
goto exit_overflow;
if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
goto exit;
newsk = dccp_create_openreq_child(sk, req, skb);
if (newsk == NULL)
goto exit;
sk_setup_caps(newsk, dst);
newdp = dccp_sk(newsk);
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
newinet->daddr = ireq->rmt_addr;
newinet->rcv_saddr = ireq->loc_addr;
newinet->saddr = ireq->loc_addr;
newinet->opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = skb->nh.iph->ttl;
newinet->id = jiffies;
dccp_sync_mss(newsk, dst_mtu(dst));
__inet_hash(&dccp_hashinfo, newsk, 0);
__inet_inherit_port(&dccp_hashinfo, sk, newsk);
return newsk;
exit_overflow:
NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
exit:
NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL;
}
static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
const struct iphdr *iph = skb->nh.iph;
struct sock *nsk;
struct request_sock **prev;
/* Find possible connection requests. */
struct request_sock *req = inet_csk_search_req(sk, &prev,
dh->dccph_sport,
iph->saddr, iph->daddr);
if (req != NULL)
return dccp_check_req(sk, skb, req, prev);
nsk = __inet_lookup_established(&dccp_hashinfo,
iph->saddr, dh->dccph_sport,
iph->daddr, ntohs(dh->dccph_dport),
inet_iif(skb));
if (nsk != NULL) {
if (nsk->sk_state != DCCP_TIME_WAIT) {
bh_lock_sock(nsk);
return nsk;
}
inet_twsk_put((struct inet_timewait_sock *)nsk);
return NULL;
}
return sk;
}
int dccp_v4_checksum(struct sk_buff *skb)
{
struct dccp_hdr* dh = dccp_hdr(skb);
int checksum_len;
u32 tmp;
if (dh->dccph_cscov == 0)
checksum_len = skb->len;
else {
checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
checksum_len = checksum_len < skb->len ? checksum_len : skb->len;
}
tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
return csum_fold(tmp);
}
static int dccp_v4_verify_checksum(struct sk_buff *skb)
{
struct dccp_hdr *th = dccp_hdr(skb);
const u16 remote_checksum = th->dccph_checksum;
u16 local_checksum;
/* FIXME: don't mess with skb payload */
th->dccph_checksum = 0; /* zero it for computation */
local_checksum = dccp_v4_checksum(skb);
/* FIXME: don't mess with skb payload */
th->dccph_checksum = remote_checksum; /* put it back */
return remote_checksum == local_checksum ? 0 : -1;
}
static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
struct sk_buff *skb)
{
struct rtable *rt;
struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
.nl_u = { .ip4_u =
{ .daddr = skb->nh.iph->saddr,
.saddr = skb->nh.iph->daddr,
.tos = RT_CONN_FLAGS(sk) } },
.proto = sk->sk_protocol,
.uli_u = { .ports =
{ .sport = dccp_hdr(skb)->dccph_dport,
.dport = dccp_hdr(skb)->dccph_sport } } };
if (ip_route_output_flow(&rt, &fl, sk, 0)) {
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
return &rt->u.dst;
}
void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
{
int err;
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb;
struct dst_entry *dst;
/* Never send a reset in response to a reset. */
if (rxdh->dccph_type == DCCP_PKT_RESET)
return;
if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
return;
dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
if (dst == NULL)
return;
skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
if (skb == NULL)
goto out;
/* Reserve space for headers. */
skb_reserve(skb, MAX_DCCP_HEADER);
skb->dst = dst_clone(dst);
skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
dh = dccp_hdr(skb);
memset(dh, 0, dccp_hdr_reset_len);
/* Build DCCP header and checksum it. */
dh->dccph_type = DCCP_PKT_RESET;
dh->dccph_sport = rxdh->dccph_dport;
dh->dccph_dport = rxdh->dccph_sport;
dh->dccph_doff = dccp_hdr_reset_len / 4;
dh->dccph_x = 1;
dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code;
dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
dh->dccph_checksum = dccp_v4_checksum(skb);
bh_lock_sock(dccp_ctl_socket->sk);
err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL);
bh_unlock_sock(dccp_ctl_socket->sk);
if (err == NET_XMIT_CN || err == 0) {
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
}
out:
dst_release(dst);
}
int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_hdr *dh = dccp_hdr(skb);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
if (dccp_rcv_established(sk, skb, dh, skb->len))
goto reset;
return 0;
}
/*
* Step 3: Process LISTEN state
* If S.state == LISTEN,
* If P.type == Request or P contains a valid Init Cookie option,
* * Must scan the packet's options to check for an Init
* Cookie. Only the Init Cookie is processed here,
* however; other options are processed in Step 8. This
* scan need only be performed if the endpoint uses Init
* Cookies *
* * Generate a new socket and switch to that socket *
* Set S := new socket for this port pair
* S.state = RESPOND
* Choose S.ISS (initial seqno) or set from Init Cookie
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
* Continue with S.state == RESPOND
* * A Response packet will be generated in Step 11 *
* Otherwise,
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*
* NOTE: the check for the packet types is done in dccp_rcv_state_process
*/
if (sk->sk_state == DCCP_LISTEN) {
struct sock *nsk = dccp_v4_hnd_req(sk, skb);
if (nsk == NULL)
goto discard;
if (nsk != sk) {
if (dccp_child_process(sk, nsk, skb))
goto reset;
return 0;
}
}
if (dccp_rcv_state_process(sk, skb, dh, skb->len))
goto reset;
return 0;
reset:
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
dccp_v4_ctl_send_reset(skb);
discard:
kfree_skb(skb);
return 0;
}
static inline int dccp_invalid_packet(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
if (skb->pkt_type != PACKET_HOST)
return 1;
if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
dccp_pr_debug("pskb_may_pull failed\n");
return 1;
}
dh = dccp_hdr(skb);
/* If the packet type is not understood, drop packet and return */
if (dh->dccph_type >= DCCP_PKT_INVALID) {
dccp_pr_debug("invalid packet type\n");
return 1;
}
/*
* If P.Data Offset is too small for packet type, or too large for
* packet, drop packet and return
*/
if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff);
return 1;
}
if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff);
return 1;
}
dh = dccp_hdr(skb);
/*
* If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
* has short sequence numbers), drop packet and return
*/
if (dh->dccph_x == 0 &&
dh->dccph_type != DCCP_PKT_DATA &&
dh->dccph_type != DCCP_PKT_ACK &&
dh->dccph_type != DCCP_PKT_DATAACK) {
dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n",
dccp_packet_name(dh->dccph_type));
return 1;
}
/* If the header checksum is incorrect, drop packet and return */
if (dccp_v4_verify_checksum(skb) < 0) {
dccp_pr_debug("header checksum is incorrect\n");
return 1;
}
return 0;
}
/* this is called when real data arrives */
int dccp_v4_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
struct sock *sk;
int rc;
/* Step 1: Check header basics: */
if (dccp_invalid_packet(skb))
goto discard_it;
dh = dccp_hdr(skb);
#if 0
/*
* Use something like this to simulate some DATA/DATAACK loss to test
* dccp_ackpkts_add, you'll get something like this on a session that
* sends 10 DATA/DATAACK packets:
*
* dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
*
* 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
* 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state
* 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
*
* So...
*
* 281473596467422 was received
* 281473596467421 was not received
* 281473596467420 was received
* 281473596467419 was not received
* 281473596467418 was received
* 281473596467417 was not received
* 281473596467416 was received
* 281473596467415 was not received
* 281473596467414 was received
* 281473596467413 was received (this one was the 3way handshake RESPONSE)
*
*/
if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) {
static int discard = 0;
if (discard) {
discard = 0;
goto discard_it;
}
discard = 1;
}
#endif
DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
dccp_pr_debug("%8.8s "
"src=%u.%u.%u.%u@%-5d "
"dst=%u.%u.%u.%u@%-5d seq=%llu",
dccp_packet_name(dh->dccph_type),
NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
DCCP_SKB_CB(skb)->dccpd_seq);
if (dccp_packet_without_ack(skb)) {
DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
dccp_pr_debug_cat("\n");
} else {
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
/* Step 2:
* Look up flow ID in table and get corresponding socket */
sk = __inet_lookup(&dccp_hashinfo,
skb->nh.iph->saddr, dh->dccph_sport,
skb->nh.iph->daddr, ntohs(dh->dccph_dport),
inet_iif(skb));
/*
* Step 2:
* If no socket ...
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
if (sk == NULL) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
goto no_dccp_socket;
}
/*
* Step 2:
* ... or S.state == TIMEWAIT,
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
if (sk->sk_state == DCCP_TIME_WAIT) {
dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n");
goto discard_and_relse;
}
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
dccp_pr_debug("xfrm4_policy_check failed\n");
goto discard_and_relse;
}
if (sk_filter(sk, skb, 0)) {
dccp_pr_debug("sk_filter failed\n");
goto discard_and_relse;
}
skb->dev = NULL;
bh_lock_sock(sk);
rc = 0;
if (!sock_owned_by_user(sk))
rc = dccp_v4_do_rcv(sk, skb);
else
sk_add_backlog(sk, skb);
bh_unlock_sock(sk);
sock_put(sk);
return rc;
no_dccp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
/*
* Step 2:
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
dccp_v4_ctl_send_reset(skb);
}
discard_it:
/* Discard frame. */
kfree_skb(skb);
return 0;
discard_and_relse:
sock_put(sk);
goto discard_it;
}
static int dccp_v4_init_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
static int dccp_ctl_socket_init = 1;
dccp_options_init(&dp->dccps_options);
if (dp->dccps_options.dccpo_send_ack_vector) {
dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
GFP_KERNEL);
if (dp->dccps_hc_rx_ackpkts == NULL)
return -ENOMEM;
}
/*
* FIXME: We're hardcoding the CCID, and doing this at this point makes
* the listening (master) sock get CCID control blocks, which is not
* necessary, but for now, to not mess with the test userspace apps,
* lets leave it here, later the real solution is to do this in a
* setsockopt(CCIDs-I-want/accept). -acme
*/
if (likely(!dccp_ctl_socket_init)) {
dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk);
dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk);
if (dp->dccps_hc_rx_ccid == NULL ||
dp->dccps_hc_tx_ccid == NULL) {
ccid_exit(dp->dccps_hc_rx_ccid, sk);
ccid_exit(dp->dccps_hc_tx_ccid, sk);
dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
dp->dccps_hc_rx_ackpkts = NULL;
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
return -ENOMEM;
}
} else
dccp_ctl_socket_init = 0;
dccp_init_xmit_timers(sk);
sk->sk_state = DCCP_CLOSED;
dp->dccps_mss_cache = 536;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
return 0;
}
int dccp_v4_destroy_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
/*
* DCCP doesn't use sk_qrite_queue, just sk_send_head
* for retransmissions
*/
if (sk->sk_send_head != NULL) {
kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
}
/* Clean up a referenced DCCP bind bucket. */
if (inet_csk(sk)->icsk_bind_hash != NULL)
inet_put_port(&dccp_hashinfo, sk);
dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
dp->dccps_hc_rx_ackpkts = NULL;
ccid_exit(dp->dccps_hc_rx_ccid, sk);
ccid_exit(dp->dccps_hc_tx_ccid, sk);
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
return 0;
}
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
kfree(inet_rsk(req)->opt);
}
static struct request_sock_ops dccp_request_sock_ops = {
.family = PF_INET,
.obj_size = sizeof(struct dccp_request_sock),
.rtx_syn_ack = dccp_v4_send_response,
.send_ack = dccp_v4_reqsk_send_ack,
.destructor = dccp_v4_reqsk_destructor,
.send_reset = dccp_v4_ctl_send_reset,
};
struct proto dccp_v4_prot = {
.name = "DCCP",
.owner = THIS_MODULE,
.close = dccp_close,
.connect = dccp_v4_connect,
.disconnect = dccp_disconnect,
.ioctl = dccp_ioctl,
.init = dccp_v4_init_sock,
.setsockopt = dccp_setsockopt,
.getsockopt = dccp_getsockopt,
.sendmsg = dccp_sendmsg,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v4_do_rcv,
.hash = dccp_v4_hash,
.unhash = dccp_v4_unhash,
.accept = inet_csk_accept,
.get_port = dccp_v4_get_port,
.shutdown = dccp_shutdown,
.destroy = dccp_v4_destroy_sock,
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp_sock),
.rsk_prot = &dccp_request_sock_ops,
.twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */
};
/*
* net/dccp/minisocks.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
#include "ccid.h"
#include "dccp.h"
void dccp_time_wait(struct sock *sk, int state, int timeo)
{
/* FIXME: Implement */
dccp_pr_debug("Want to help? Start here\n");
dccp_set_state(sk, state);
}
/* This is for handling early-kills of TIME_WAIT sockets. */
void dccp_tw_deschedule(struct inet_timewait_sock *tw)
{
dccp_pr_debug("Want to help? Start here\n");
__inet_twsk_kill(tw, &dccp_hashinfo);
}
struct sock *dccp_create_openreq_child(struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb)
{
/*
* Step 3: Process LISTEN state
*
* // Generate a new socket and switch to that socket
* Set S := new socket for this port pair
*/
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
const struct dccp_request_sock *dreq = dccp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(sk);
struct dccp_sock *newdp = dccp_sk(newsk);
newdp->dccps_hc_rx_ackpkts = NULL;
newdp->dccps_role = DCCP_ROLE_SERVER;
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
if (newdp->dccps_options.dccpo_send_ack_vector) {
newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
GFP_ATOMIC);
/*
* XXX: We're using the same CCIDs set on the parent, i.e. sk_clone
* copied the master sock and left the CCID pointers for this child,
* that is why we do the __ccid_get calls.
*/
if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
goto out_free;
}
if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 ||
ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) {
dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
out_free:
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
sk_free(newsk);
return NULL;
}
__ccid_get(newdp->dccps_hc_rx_ccid);
__ccid_get(newdp->dccps_hc_tx_ccid);
/*
* Step 3: Process LISTEN state
*
* Choose S.ISS (initial seqno) or set from Init Cookie
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
*/
/* See dccp_v4_conn_request */
newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
dccp_update_gsr(newsk, dreq->dreq_isr);
newdp->dccps_iss = dreq->dreq_iss;
dccp_update_gss(newsk, dreq->dreq_iss);
dccp_init_xmit_timers(newsk);
DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
}
return newsk;
}
/*
* Process an incoming packet for RESPOND sockets represented
* as an request_sock.
*/
struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev)
{
struct sock *child = NULL;
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) {
struct dccp_request_sock *dreq = dccp_rsk(req);
dccp_pr_debug("Retransmitted REQUEST\n");
/* Send another RESPONSE packet */
dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq);
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
}
/* Network Duplicate, discard packet */
return NULL;
}
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
goto drop;
/* Invalid ACK */
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n",
DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss);
goto drop;
}
child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
/* FIXME: deal with options */
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
inet_csk_reqsk_queue_add(sk, req, child);
out:
return child;
listen_overflow:
dccp_pr_debug("listen_overflow!\n");
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
req->rsk_ops->send_reset(skb);
inet_csk_reqsk_queue_drop(sk, req, prev);
goto out;
}
/*
* Queue segment on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket.
*/
int dccp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb)
{
int ret = 0;
const int state = child->sk_state;
if (!sock_owned_by_user(child)) {
ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len);
/* Wakeup parent, send SIGIO */
if (state == DCCP_RESPOND && child->sk_state != state)
parent->sk_data_ready(parent, 0);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
* socket does not protect us more.
*/
sk_add_backlog(child, skb);
}
bh_unlock_sock(child);
sock_put(child);
return ret;
}
/*
* net/dccp/options.c
*
* An implementation of the DCCP protocol
* Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
* Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include "ccid.h"
#include "dccp.h"
static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
struct sock *sk,
const u64 ackno,
const unsigned char len,
const unsigned char *vector);
/* stores the default values for new connection. may be changed with sysctl */
static const struct dccp_options dccpo_default_values = {
.dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
.dccpo_ccid = DCCPF_INITIAL_CCID,
.dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
.dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
};
void dccp_options_init(struct dccp_options *dccpo)
{
memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
}
static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
{
u32 value = 0;
if (len > 3)
value += *bf++ << 24;
if (len > 2)
value += *bf++ << 16;
if (len > 1)
value += *bf++ << 8;
if (len > 0)
value += *bf;
return value;
}
int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
#ifdef DCCP_DEBUG
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " :
"server rx opt: ";
#endif
const struct dccp_hdr *dh = dccp_hdr(skb);
const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr = options;
const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4);
struct dccp_options_received *opt_recv = &dp->dccps_options_received;
unsigned char opt, len;
unsigned char *value;
memset(opt_recv, 0, sizeof(*opt_recv));
while (opt_ptr != opt_end) {
opt = *opt_ptr++;
len = 0;
value = NULL;
/* Check if this isn't a single byte option */
if (opt > DCCPO_MAX_RESERVED) {
if (opt_ptr == opt_end)
goto out_invalid_option;
len = *opt_ptr++;
if (len < 3)
goto out_invalid_option;
/*
* Remove the type and len fields, leaving
* just the value size
*/
len -= 2;
value = opt_ptr;
opt_ptr += len;
if (opt_ptr > opt_end)
goto out_invalid_option;
}
switch (opt) {
case DCCPO_PADDING:
break;
case DCCPO_NDP_COUNT:
if (len > 3)
goto out_invalid_option;
opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp);
break;
case DCCPO_ACK_VECTOR_0:
if (len > DCCP_MAX_ACK_VECTOR_LEN)
goto out_invalid_option;
if (pkt_type == DCCP_PKT_DATA)
continue;
opt_recv->dccpor_ack_vector_len = len;
opt_recv->dccpor_ack_vector_idx = value - options;
dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq);
dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
value, len);
dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq,
len, value);
break;
case DCCPO_TIMESTAMP:
if (len != 4)
goto out_invalid_option;
opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
dp->dccps_timestamp_time = jiffies;
dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
debug_prefix, opt_recv->dccpor_timestamp,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
break;
case DCCPO_TIMESTAMP_ECHO:
if (len < 4 || len > 8)
goto out_invalid_option;
opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n",
debug_prefix, opt_recv->dccpor_timestamp_echo,
len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq,
tcp_time_stamp - opt_recv->dccpor_timestamp_echo);
opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4);
dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix,
opt_recv->dccpor_elapsed_time);
break;
case DCCPO_ELAPSED_TIME:
if (len > 4)
goto out_invalid_option;
if (pkt_type == DCCP_PKT_DATA)
continue;
opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len);
dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
opt_recv->dccpor_elapsed_time);
break;
/*
* From draft-ietf-dccp-spec-11.txt:
*
* Option numbers 128 through 191 are for options sent from the HC-
* Sender to the HC-Receiver; option numbers 192 through 255 are for
* options sent from the HC-Receiver to the HC-Sender.
*/
case 128 ... 191: {
const u16 idx = value - options;
if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0)
goto out_invalid_option;
}
break;
case 192 ... 255: {
const u16 idx = value - options;
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0)
goto out_invalid_option;
}
break;
default:
pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n",
sk, opt, len);
break;
}
}
return 0;
out_invalid_option:
DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
return -1;
}
static void dccp_encode_value_var(const u32 value, unsigned char *to,
const unsigned int len)
{
if (len > 3)
*to++ = (value & 0xFF000000) >> 24;
if (len > 2)
*to++ = (value & 0xFF0000) >> 16;
if (len > 1)
*to++ = (value & 0xFF00) >> 8;
if (len > 0)
*to++ = (value & 0xFF);
}
static inline int dccp_ndp_len(const int ndp)
{
return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
}
void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
const unsigned char option,
const void *value, const unsigned char len)
{
unsigned char *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option));
return;
}
DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
to = skb_push(skb, len + 2);
*to++ = option;
*to++ = len + 2;
memcpy(to, value, len);
}
EXPORT_SYMBOL_GPL(dccp_insert_option);
static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
int ndp = dp->dccps_ndp_count;
if (dccp_non_data_packet(skb))
++dp->dccps_ndp_count;
else
dp->dccps_ndp_count = 0;
if (ndp > 0) {
unsigned char *ptr;
const int ndp_len = dccp_ndp_len(ndp);
const int len = ndp_len + 2;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
return;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
ptr = skb_push(skb, len);
*ptr++ = DCCPO_NDP_COUNT;
*ptr++ = len;
dccp_encode_value_var(ndp, ptr, ndp_len);
}
}
static inline int dccp_elapsed_time_len(const u32 elapsed_time)
{
return elapsed_time == 0 ? 0 :
elapsed_time <= 0xFF ? 1 :
elapsed_time <= 0xFFFF ? 2 :
elapsed_time <= 0xFFFFFF ? 3 : 4;
}
void dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time)
{
#ifdef DCCP_DEBUG
struct dccp_sock *dp = dccp_sk(sk);
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
"server TX opt: ";
#endif
const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
const int len = 2 + elapsed_time_len;
unsigned char *to;
/* If elapsed_time == 0... */
if (elapsed_time_len == 2)
return;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n"));
return;
}
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
*to++ = DCCPO_ELAPSED_TIME;
*to++ = len;
dccp_encode_value_var(elapsed_time, to, elapsed_time_len);
dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
debug_prefix, elapsed_time,
len, DCCP_SKB_CB(skb)->dccpd_seq);
}
EXPORT_SYMBOL(dccp_insert_option_elapsed_time);
static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
#ifdef DCCP_DEBUG
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
"server TX opt: ";
#endif
struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
int len = ap->dccpap_buf_vector_len + 2;
const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10;
unsigned char *to, *from;
if (elapsed_time != 0)
dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n"));
return;
}
/*
* XXX: now we have just one ack vector sent record, so
* we have to wait for it to be cleared.
*
* Of course this is not acceptable, but this is just for
* basic testing now.
*/
if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
return;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
*to++ = DCCPO_ACK_VECTOR_0;
*to++ = len;
len = ap->dccpap_buf_vector_len;
from = ap->dccpap_buf + ap->dccpap_buf_head;
/* Check if buf_head wraps */
if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head;
memcpy(to, from, tailsize);
to += tailsize;
len -= tailsize;
from = ap->dccpap_buf;
}
memcpy(to, from, len);
/*
* From draft-ietf-dccp-spec-11.txt:
*
* For each acknowledgement it sends, the HC-Receiver will add an
* acknowledgement record. ack_seqno will equal the HC-Receiver
* sequence number it used for the ack packet; ack_ptr will equal
* buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal
* buf_nonce.
*
* This implemention uses just one ack record for now.
*/
ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
ap->dccpap_ack_ptr = ap->dccpap_buf_head;
ap->dccpap_ack_ackno = ap->dccpap_buf_ackno;
ap->dccpap_ack_nonce = ap->dccpap_buf_nonce;
ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n",
debug_prefix, ap->dccpap_ack_vector_len,
ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
}
static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
{
const u32 now = htonl(tcp_time_stamp);
dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
}
static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
#ifdef DCCP_DEBUG
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
"server TX opt: ";
#endif
u32 tstamp_echo;
const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10;
const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
const int len = 6 + elapsed_time_len;
unsigned char *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n"));
return;
}
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
*to++ = DCCPO_TIMESTAMP_ECHO;
*to++ = len;
tstamp_echo = htonl(dp->dccps_timestamp_echo);
memcpy(to, &tstamp_echo, 4);
to += 4;
dccp_encode_value_var(elapsed_time, to, elapsed_time_len);
dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
debug_prefix, dp->dccps_timestamp_echo,
len, DCCP_SKB_CB(skb)->dccpd_seq);
dp->dccps_timestamp_echo = 0;
dp->dccps_timestamp_time = 0;
}
void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
if (dp->dccps_options.dccpo_send_ndp_count)
dccp_insert_option_ndp(sk, skb);
if (!dccp_packet_without_ack(skb)) {
if (dp->dccps_options.dccpo_send_ack_vector &&
dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1)
dccp_insert_option_ack_vector(sk, skb);
dccp_insert_option_timestamp(sk, skb);
if (dp->dccps_timestamp_echo != 0)
dccp_insert_option_timestamp_echo(sk, skb);
}
ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
/* XXX: insert other options when appropriate */
if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
/* The length of all options has to be a multiple of 4 */
int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
if (padding != 0) {
padding = 4 - padding;
memset(skb_push(skb, padding), 0, padding);
DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
}
}
}
struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority)
{
struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
if (ap != NULL) {
#ifdef DCCP_DEBUG
memset(ap->dccpap_buf, 0xFF, len);
#endif
ap->dccpap_buf_len = len;
ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1;
ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
ap->dccpap_ack_ptr = 0;
ap->dccpap_time = 0;
ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
}
return ap;
}
void dccp_ackpkts_free(struct dccp_ackpkts *ap)
{
if (ap != NULL) {
#ifdef DCCP_DEBUG
memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
#endif
kfree(ap);
}
}
static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
const unsigned int index)
{
return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
}
static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
const unsigned int index)
{
return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
}
/*
* If several packets are missing, the HC-Receiver may prefer to enter multiple
* bytes with run length 0, rather than a single byte with a larger run length;
* this simplifies table updates if one of the missing packets arrives.
*/
static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
const unsigned int packets,
const unsigned char state)
{
unsigned int gap;
signed long new_head;
if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
return -ENOBUFS;
gap = packets - 1;
new_head = ap->dccpap_buf_head - packets;
if (new_head < 0) {
if (gap > 0) {
memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
gap + new_head + 1);
gap = -new_head;
}
new_head += ap->dccpap_buf_len;
}
ap->dccpap_buf_head = new_head;
if (gap > 0)
memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
ap->dccpap_buf[ap->dccpap_buf_head] = state;
ap->dccpap_buf_vector_len += packets;
return 0;
}
/*
* Implements the draft-ietf-dccp-spec-11.txt Appendix A
*/
int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
{
/*
* Check at the right places if the buffer is full, if it is, tell the
* caller to start dropping packets till the HC-Sender acks our ACK
* vectors, when we will free up space in dccpap_buf.
*
* We may well decide to do buffer compression, etc, but for now lets
* just drop.
*
* From Appendix A:
*
* Of course, the circular buffer may overflow, either when the HC-
* Sender is sending data at a very high rate, when the HC-Receiver's
* acknowledgements are not reaching the HC-Sender, or when the HC-
* Sender is forgetting to acknowledge those acks (so the HC-Receiver
* is unable to clean up old state). In this case, the HC-Receiver
* should either compress the buffer (by increasing run lengths when
* possible), transfer its state to a larger buffer, or, as a last
* resort, drop all received packets, without processing them
* whatsoever, until its buffer shrinks again.
*/
/* See if this is the first ackno being inserted */
if (ap->dccpap_buf_vector_len == 0) {
ap->dccpap_buf[ap->dccpap_buf_head] = state;
ap->dccpap_buf_vector_len = 1;
} else if (after48(ackno, ap->dccpap_buf_ackno)) {
const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno);
/*
* Look if the state of this packet is the same as the previous ackno
* and if so if we can bump the head len.
*/
if (delta == 1 &&
dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK)
ap->dccpap_buf[ap->dccpap_buf_head]++;
else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
return -ENOBUFS;
} else {
/*
* A.1.2. Old Packets
*
* When a packet with Sequence Number S arrives, and S <= buf_ackno,
* the HC-Receiver will scan the table for the byte corresponding to S.
* (Indexing structures could reduce the complexity of this scan.)
*/
u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
unsigned int index = ap->dccpap_buf_head;
while (1) {
const u8 len = dccp_ackpkts_len(ap, index);
const u8 state = dccp_ackpkts_state(ap, index);
/*
* valid packets not yet in dccpap_buf have a reserved entry, with
* a len equal to 0
*/
if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
len == 0 && delta == 0) { /* Found our reserved seat! */
dccp_pr_debug("Found %llu reserved seat!\n", ackno);
ap->dccpap_buf[index] = state;
goto out;
}
/* len == 0 means one packet */
if (delta < len + 1)
goto out_duplicate;
delta -= len + 1;
if (++index == ap->dccpap_buf_len)
index = 0;
}
}
ap->dccpap_buf_ackno = ackno;
ap->dccpap_time = jiffies;
out:
dccp_pr_debug("");
dccp_ackpkts_print(ap);
return 0;
out_duplicate:
/* Duplicate packet */
dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno);
return -EILSEQ;
}
#ifdef DCCP_DEBUG
void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
{
if (!dccp_debug)
return;
printk("ACK vector len=%d, ackno=%llu |", len, ackno);
while (len--) {
const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
printk("%d,%d|", state, rl);
++vector;
}
printk("\n");
}
void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
{
dccp_ackvector_print(ap->dccpap_buf_ackno,
ap->dccpap_buf + ap->dccpap_buf_head,
ap->dccpap_buf_vector_len);
}
#endif
static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
{
/*
* As we're keeping track of the ack vector size
* (dccpap_buf_vector_len) and the sent ack vector size
* (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
* keep this code here as in the future we'll implement a vector of ack
* records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme
*/
#if 0
ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
ap->dccpap_buf_tail -= ap->dccpap_buf_len;
#endif
ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
}
void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
u64 ackno)
{
/* Check if we actually sent an ACK vector */
if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
return;
if (ackno == ap->dccpap_ack_seqno) {
#ifdef DCCP_DEBUG
struct dccp_sock *dp = dccp_sk(sk);
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " :
"server rx ack: ";
#endif
dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n",
debug_prefix, 1,
ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
dccp_ackpkts_trow_away_ack_record(ap);
ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
}
}
static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
struct sock *sk, u64 ackno,
const unsigned char len,
const unsigned char *vector)
{
unsigned char i;
/* Check if we actually sent an ACK vector */
if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
return;
/*
* We're in the receiver half connection, so if the received an ACK vector
* ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested.
*
* Extra explanation with example:
*
* if we received an ACK vector with ackno 50, it can only be acking
* 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
*/
// dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno);
if (before48(ackno, ap->dccpap_ack_seqno)) {
// dccp_pr_debug_cat("yes\n");
return;
}
// dccp_pr_debug_cat("no\n");
i = len;
while (i--) {
const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
u64 ackno_end_rl;
dccp_set_seqno(&ackno_end_rl, ackno - rl);
// dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno);
if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
// dccp_pr_debug_cat("yes\n");
if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
#ifdef DCCP_DEBUG
struct dccp_sock *dp = dccp_sk(sk);
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " :
"server rx ack: ";
#endif
dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n",
debug_prefix, len,
ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
dccp_ackpkts_trow_away_ack_record(ap);
}
/*
* If dccpap_ack_seqno was not received, no problem we'll
* send another ACK vector.
*/
ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
break;
}
// dccp_pr_debug_cat("no\n");
dccp_set_seqno(&ackno, ackno_end_rl - 1);
++vector;
}
}
/*
* net/dccp/output.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include "ccid.h"
#include "dccp.h"
static inline void dccp_event_ack_sent(struct sock *sk)
{
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
/*
* All SKB's seen here are completely headerless. It is our
* job to build the DCCP header, and pass the packet down to
* IP so it can do the same plus pass the packet off to the
* device.
*/
int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
{
if (likely(skb != NULL)) {
const struct inet_sock *inet = inet_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
struct dccp_hdr *dh;
/* XXX For now we're using only 48 bits sequence numbers */
const int dccp_header_size = sizeof(*dh) +
sizeof(struct dccp_hdr_ext) +
dccp_packet_hdr_len(dcb->dccpd_type);
int err, set_ack = 1;
u64 ackno = dp->dccps_gsr;
/*
* FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing
* to do here...
*/
dccp_inc_seqno(&dp->dccps_gss);
dcb->dccpd_seq = dp->dccps_gss;
dccp_insert_options(sk, skb);
switch (dcb->dccpd_type) {
case DCCP_PKT_DATA:
set_ack = 0;
break;
case DCCP_PKT_SYNC:
case DCCP_PKT_SYNCACK:
ackno = dcb->dccpd_seq;
break;
}
skb->h.raw = skb_push(skb, dccp_header_size);
dh = dccp_hdr(skb);
/* Data packets are not cloned as they are never retransmitted */
if (skb_cloned(skb))
skb_set_owner_w(skb, sk);
/* Build DCCP header and checksum it. */
memset(dh, 0, dccp_header_size);
dh->dccph_type = dcb->dccpd_type;
dh->dccph_sport = inet->sport;
dh->dccph_dport = inet->dport;
dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
dh->dccph_ccval = dcb->dccpd_ccval;
/* XXX For now we're using only 48 bits sequence numbers */
dh->dccph_x = 1;
dp->dccps_awh = dp->dccps_gss;
dccp_hdr_set_seq(dh, dp->dccps_gss);
if (set_ack)
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
switch (dcb->dccpd_type) {
case DCCP_PKT_REQUEST:
dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service;
break;
case DCCP_PKT_RESET:
dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code;
break;
}
dh->dccph_checksum = dccp_v4_checksum(skb);
if (dcb->dccpd_type == DCCP_PKT_ACK ||
dcb->dccpd_type == DCCP_PKT_DATAACK)
dccp_event_ack_sent(sk);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
err = ip_queue_xmit(skb, 0);
if (err <= 0)
return err;
/* NET_XMIT_CN is special. It does not guarantee,
* that this packet is lost. It tells that device
* is about to start to drop packets or already
* drops some packets of the same priority and
* invokes us to send less aggressively.
*/
return err == NET_XMIT_CN ? 0 : err;
}
return -ENOBUFS;
}
unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
{
struct dccp_sock *dp = dccp_sk(sk);
int mss_now;
/*
* FIXME: we really should be using the af_specific thing to support IPv6.
* mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
*/
mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
/* Now subtract optional transport overhead */
mss_now -= dp->dccps_ext_header_len;
/*
* FIXME: this should come from the CCID infrastructure, where, say,
* TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
* put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
* TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
* make it a multiple of 4
*/
mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
/* And store cached results */
dp->dccps_pmtu_cookie = pmtu;
dp->dccps_mss_cache = mss_now;
return mss_now;
}
int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{
if (inet_sk_rebuild_header(sk) != 0)
return -EHOSTUNREACH; /* Routing failure or similar. */
return dccp_transmit_skb(sk, (skb_cloned(skb) ?
pskb_copy(skb, GFP_ATOMIC):
skb_clone(skb, GFP_ATOMIC)));
}
struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
struct request_sock *req)
{
struct dccp_hdr *dh;
const int dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_response);
struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
dccp_header_size, 1,
GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
skb->dst = dst_clone(dst);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss;
dccp_insert_options(sk, skb);
skb->h.raw = skb_push(skb, dccp_header_size);
dh = dccp_hdr(skb);
memset(dh, 0, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_rsk(req)->rmt_port;
dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESPONSE;
dh->dccph_x = 1;
dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
dh->dccph_checksum = dccp_v4_checksum(skb);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
const enum dccp_reset_codes code)
{
struct dccp_hdr *dh;
struct dccp_sock *dp = dccp_sk(sk);
const int dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
dccp_header_size, 1,
GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
skb->dst = dst_clone(dst);
skb->csum = 0;
dccp_inc_seqno(&dp->dccps_gss);
DCCP_SKB_CB(skb)->dccpd_reset_code = code;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
dccp_insert_options(sk, skb);
skb->h.raw = skb_push(skb, dccp_header_size);
dh = dccp_hdr(skb);
memset(dh, 0, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_sk(sk)->dport;
dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESET;
dh->dccph_x = 1;
dccp_hdr_set_seq(dh, dp->dccps_gss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
dccp_hdr_reset(skb)->dccph_reset_code = code;
dh->dccph_checksum = dccp_v4_checksum(skb);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
/*
* Do all connect socket setups that can be done AF independent.
*/
static inline void dccp_connect_init(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
sk->sk_err = 0;
sock_reset_flag(sk, SOCK_DONE);
dccp_sync_mss(sk, dst_mtu(dst));
/*
* FIXME: set dp->{dccps_swh,dccps_swl}, with
* something like dccp_inc_seq
*/
icsk->icsk_retransmits = 0;
}
int dccp_connect(struct sock *sk)
{
struct sk_buff *skb;
struct inet_connection_sock *icsk = inet_csk(sk);
dccp_connect_init(sk);
skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
if (unlikely(skb == NULL))
return -ENOBUFS;
/* Reserve space for headers. */
skb_reserve(skb, MAX_DCCP_HEADER);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
/* FIXME: set service to something meaningful, coming
* from userspace*/
DCCP_SKB_CB(skb)->dccpd_service = 0;
skb->csum = 0;
skb_set_owner_w(skb, sk);
BUG_TRAP(sk->sk_send_head == NULL);
sk->sk_send_head = skb;
dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
/* Timer for repeating the REQUEST until an answer. */
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
return 0;
}
void dccp_send_ack(struct sock *sk)
{
/* If we have been reset, we may not send again. */
if (sk->sk_state != DCCP_CLOSED) {
struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
if (skb == NULL) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX);
return;
}
/* Reserve space for headers */
skb_reserve(skb, MAX_DCCP_HEADER);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
skb_set_owner_w(skb, sk);
dccp_transmit_skb(sk, skb);
}
}
EXPORT_SYMBOL_GPL(dccp_send_ack);
void dccp_send_delayed_ack(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/*
* FIXME: tune this timer. elapsed time fixes the skew, so no problem
* with using 2s, and active senders also piggyback the ACK into a
* DATAACK packet, so this is really for quiescent senders.
*/
unsigned long timeout = jiffies + 2 * HZ;
/* Use new timeout only if there wasn't a older one earlier. */
if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
/* If delack timer was blocked or is about to expire,
* send ACK now.
*
* FIXME: check the "about to expire" part
*/
if (icsk->icsk_ack.blocked) {
dccp_send_ack(sk);
return;
}
if (!time_before(timeout, icsk->icsk_ack.timeout))
timeout = icsk->icsk_ack.timeout;
}
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
void dccp_send_sync(struct sock *sk, u64 seq)
{
/*
* We are not putting this on the write queue, so
* dccp_transmit_skb() will set the ownership to this
* sock.
*/
struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
if (skb == NULL)
/* FIXME: how to make sure the sync is sent? */
return;
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_DCCP_HEADER);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC;
DCCP_SKB_CB(skb)->dccpd_seq = seq;
skb_set_owner_w(skb, sk);
dccp_transmit_skb(sk, skb);
}
/* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be
* allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances.
*/
void dccp_send_close(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
/* Socket is locked, keep trying until memory is available. */
for (;;) {
skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL);
if (skb != NULL)
break;
yield();
}
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
skb_set_owner_w(skb, sk);
dccp_transmit_skb(sk, skb);
ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
}
/*
* net/dccp/proto.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/random.h>
#include <net/checksum.h>
#include <net/inet_common.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/sock.h>
#include <net/xfrm.h>
#include <asm/semaphore.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/delay.h>
#include <linux/poll.h>
#include <linux/dccp.h>
#include "ccid.h"
#include "dccp.h"
DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics);
atomic_t dccp_orphan_count = ATOMIC_INIT(0);
static struct net_protocol dccp_protocol = {
.handler = dccp_v4_rcv,
.err_handler = dccp_v4_err,
};
const char *dccp_packet_name(const int type)
{
static const char *dccp_packet_names[] = {
[DCCP_PKT_REQUEST] = "REQUEST",
[DCCP_PKT_RESPONSE] = "RESPONSE",
[DCCP_PKT_DATA] = "DATA",
[DCCP_PKT_ACK] = "ACK",
[DCCP_PKT_DATAACK] = "DATAACK",
[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
[DCCP_PKT_CLOSE] = "CLOSE",
[DCCP_PKT_RESET] = "RESET",
[DCCP_PKT_SYNC] = "SYNC",
[DCCP_PKT_SYNCACK] = "SYNCACK",
};
if (type >= DCCP_NR_PKT_TYPES)
return "INVALID";
else
return dccp_packet_names[type];
}
EXPORT_SYMBOL_GPL(dccp_packet_name);
const char *dccp_state_name(const int state)
{
static char *dccp_state_names[] = {
[DCCP_OPEN] = "OPEN",
[DCCP_REQUESTING] = "REQUESTING",
[DCCP_PARTOPEN] = "PARTOPEN",
[DCCP_LISTEN] = "LISTEN",
[DCCP_RESPOND] = "RESPOND",
[DCCP_CLOSING] = "CLOSING",
[DCCP_TIME_WAIT] = "TIME_WAIT",
[DCCP_CLOSED] = "CLOSED",
};
if (state >= DCCP_MAX_STATES)
return "INVALID STATE!";
else
return dccp_state_names[state];
}
EXPORT_SYMBOL_GPL(dccp_state_name);
static inline int dccp_listen_start(struct sock *sk)
{
dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
}
int dccp_disconnect(struct sock *sk, int flags)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
int err = 0;
const int old_state = sk->sk_state;
if (old_state != DCCP_CLOSED)
dccp_set_state(sk, DCCP_CLOSED);
/* ABORT function of RFC793 */
if (old_state == DCCP_LISTEN) {
inet_csk_listen_stop(sk);
/* FIXME: do the active reset thing */
} else if (old_state == DCCP_REQUESTING)
sk->sk_err = ECONNRESET;
dccp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
if (sk->sk_send_head != NULL) {
__kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
}
inet->dport = 0;
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
icsk->icsk_backoff = 0;
inet_csk_delack_init(sk);
__sk_dst_reset(sk);
BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
sk->sk_error_report(sk);
return err;
}
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
dccp_pr_debug("entry\n");
return -ENOIOCTLCMD;
}
int dccp_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen)
{
dccp_pr_debug("entry\n");
if (level != SOL_DCCP)
return ip_setsockopt(sk, level, optname, optval, optlen);
return -EOPNOTSUPP;
}
int dccp_getsockopt(struct sock *sk, int level, int optname,
char *optval, int *optlen)
{
dccp_pr_debug("entry\n");
if (level != SOL_DCCP)
return ip_getsockopt(sk, level, optname, optval, optlen);
return -EOPNOTSUPP;
}
int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
const struct dccp_sock *dp = dccp_sk(sk);
const int flags = msg->msg_flags;
const int noblock = flags & MSG_DONTWAIT;
struct sk_buff *skb;
int rc, size;
long timeo;
if (len > dp->dccps_mss_cache)
return -EMSGSIZE;
lock_sock(sk);
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
/*
* We have to use sk_stream_wait_connect here to set sk_write_pending,
* so that the trick in dccp_rcv_request_sent_state_process.
*/
/* Wait for a connection to finish. */
if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
goto out_err;
size = sk->sk_prot->max_header + len;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
lock_sock(sk);
if (skb == NULL)
goto out_release;
skb_reserve(skb, sk->sk_prot->max_header);
rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
if (rc == 0) {
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
long delay;
/*
* XXX: This is just to match the Waikato tree CA interaction
* points, after the CCID3 code is stable and I have a better
* understanding of behaviour I'll change this to look more like
* TCP.
*/
while (1) {
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk,
skb, len, &delay);
if (rc == 0)
break;
if (rc != -EAGAIN)
goto out_discard;
if (delay > timeo)
goto out_discard;
release_sock(sk);
delay = schedule_timeout(delay);
lock_sock(sk);
timeo -= delay;
if (signal_pending(current))
goto out_interrupted;
rc = -EPIPE;
if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN))
goto out_discard;
}
if (sk->sk_state == DCCP_PARTOPEN) {
/* See 8.1.5. Handshake Completion */
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
dcb->dccpd_type = DCCP_PKT_DATAACK;
/* FIXME: we really should have a dccps_ack_pending or use icsk */
} else if (inet_csk_ack_scheduled(sk) ||
(dp->dccps_options.dccpo_send_ack_vector &&
ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
dcb->dccpd_type = DCCP_PKT_DATAACK;
else
dcb->dccpd_type = DCCP_PKT_DATA;
dccp_transmit_skb(sk, skb);
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
} else {
out_discard:
kfree_skb(skb);
}
out_release:
release_sock(sk);
return rc ? : len;
out_err:
rc = sk_stream_error(sk, flags, rc);
goto out_release;
out_interrupted:
rc = sock_intr_errno(timeo);
goto out_discard;
}
EXPORT_SYMBOL(dccp_sendmsg);
int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int nonblock, int flags, int *addr_len)
{
const struct dccp_hdr *dh;
int copied = 0;
unsigned long used;
int err;
int target; /* Read at least this many bytes */
long timeo;
lock_sock(sk);
err = -ENOTCONN;
if (sk->sk_state == DCCP_LISTEN)
goto out;
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
if (flags & MSG_OOB)
goto recv_urg;
/* FIXME */
#if 0
seq = &tp->copied_seq;
if (flags & MSG_PEEK) {
peek_seq = tp->copied_seq;
seq = &peek_seq;
}
#endif
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
do {
struct sk_buff *skb;
u32 offset;
/* FIXME */
#if 0
/* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
if (tp->urg_data && tp->urg_seq == *seq) {
if (copied)
break;
if (signal_pending(current)) {
copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
break;
}
}
#endif
/* Next get a buffer. */
skb = skb_peek(&sk->sk_receive_queue);
do {
if (!skb)
break;
offset = 0;
dh = dccp_hdr(skb);
if (dh->dccph_type == DCCP_PKT_DATA ||
dh->dccph_type == DCCP_PKT_DATAACK)
goto found_ok_skb;
if (dh->dccph_type == DCCP_PKT_RESET ||
dh->dccph_type == DCCP_PKT_CLOSE) {
dccp_pr_debug("found fin ok!\n");
goto found_fin_ok;
}
dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type));
BUG_TRAP(flags & MSG_PEEK);
skb = skb->next;
} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
/* Well, if we have backlog, try to process it now yet. */
if (copied >= target && !sk->sk_backlog.tail)
break;
if (copied) {
if (sk->sk_err ||
sk->sk_state == DCCP_CLOSED ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
!timeo ||
signal_pending(current) ||
(flags & MSG_PEEK))
break;
} else {
if (sock_flag(sk, SOCK_DONE))
break;
if (sk->sk_err) {
copied = sock_error(sk);
break;
}
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
if (sk->sk_state == DCCP_CLOSED) {
if (!sock_flag(sk, SOCK_DONE)) {
/* This occurs when user tries to read
* from never connected socket.
*/
copied = -ENOTCONN;
break;
}
break;
}
if (!timeo) {
copied = -EAGAIN;
break;
}
if (signal_pending(current)) {
copied = sock_intr_errno(timeo);
break;
}
}
/* FIXME: cleanup_rbuf(sk, copied); */
if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
lock_sock(sk);
} else
sk_wait_data(sk, &timeo);
continue;
found_ok_skb:
/* Ok so how much can we use? */
used = skb->len - offset;
if (len < used)
used = len;
if (!(flags & MSG_TRUNC)) {
err = skb_copy_datagram_iovec(skb, offset,
msg->msg_iov, used);
if (err) {
/* Exception. Bailout! */
if (!copied)
copied = -EFAULT;
break;
}
}
copied += used;
len -= used;
/* FIXME: tcp_rcv_space_adjust(sk); */
//skip_copy:
if (used + offset < skb->len)
continue;
if (!(flags & MSG_PEEK))
sk_eat_skb(sk, skb);
continue;
found_fin_ok:
if (!(flags & MSG_PEEK))
sk_eat_skb(sk, skb);
break;
} while (len > 0);
/* According to UNIX98, msg_name/msg_namelen are ignored
* on connected socket. I was just happy when found this 8) --ANK
*/
/* Clean up data we have read: This will do ACK frames. */
/* FIXME: cleanup_rbuf(sk, copied); */
release_sock(sk);
return copied;
out:
release_sock(sk);
return err;
recv_urg:
/* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */
goto out;
}
static int inet_dccp_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
goto out;
old_state = sk->sk_state;
if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
goto out;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
if (old_state != DCCP_LISTEN) {
/*
* FIXME: here it probably should be sk->sk_prot->listen_start
* see tcp_listen_start
*/
err = dccp_listen_start(sk);
if (err)
goto out;
}
sk->sk_max_ack_backlog = backlog;
err = 0;
out:
release_sock(sk);
return err;
}
static const unsigned char dccp_new_state[] = {
/* current state: new state: action: */
[0] = DCCP_CLOSED,
[DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
[DCCP_REQUESTING] = DCCP_CLOSED,
[DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
[DCCP_LISTEN] = DCCP_CLOSED,
[DCCP_RESPOND] = DCCP_CLOSED,
[DCCP_CLOSING] = DCCP_CLOSED,
[DCCP_TIME_WAIT] = DCCP_CLOSED,
[DCCP_CLOSED] = DCCP_CLOSED,
};
static int dccp_close_state(struct sock *sk)
{
const int next = dccp_new_state[sk->sk_state];
const int ns = next & DCCP_STATE_MASK;
if (ns != sk->sk_state)
dccp_set_state(sk, ns);
return next & DCCP_ACTION_FIN;
}
void dccp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
if (sk->sk_state == DCCP_LISTEN) {
dccp_set_state(sk, DCCP_CLOSED);
/* Special case. */
inet_csk_listen_stop(sk);
goto adjudge_to_death;
}
/*
* We need to flush the recv. buffs. We do this only on the
* descriptor close, not protocol-sourced closes, because the
*reader process may not have drained the data yet!
*/
/* FIXME: check for unread data */
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
__kfree_skb(skb);
}
if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
} else if (dccp_close_state(sk)) {
dccp_send_close(sk);
}
sk_stream_wait_close(sk, timeout);
adjudge_to_death:
release_sock(sk);
/*
* Now socket is owned by kernel and we acquire BH lock
* to finish close. No need to check for user refs.
*/
local_bh_disable();
bh_lock_sock(sk);
BUG_TRAP(!sock_owned_by_user(sk));
sock_hold(sk);
sock_orphan(sk);
if (sk->sk_state != DCCP_CLOSED)
dccp_set_state(sk, DCCP_CLOSED);
atomic_inc(&dccp_orphan_count);
if (sk->sk_state == DCCP_CLOSED)
inet_csk_destroy_sock(sk);
/* Otherwise, socket is reprieved until protocol close. */
bh_unlock_sock(sk);
local_bh_enable();
sock_put(sk);
}
void dccp_shutdown(struct sock *sk, int how)
{
dccp_pr_debug("entry\n");
}
struct proto_ops inet_dccp_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
.poll = sock_no_poll,
.ioctl = inet_ioctl,
.listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
extern struct net_proto_family inet_family_ops;
static struct inet_protosw dccp_v4_protosw = {
.type = SOCK_DCCP,
.protocol = IPPROTO_DCCP,
.prot = &dccp_v4_prot,
.ops = &inet_dccp_ops,
.capability = -1,
.no_check = 0,
.flags = 0,
};
/*
* This is the global socket data structure used for responding to
* the Out-of-the-blue (OOTB) packets. A control sock will be created
* for this socket at the initialization time.
*/
struct socket *dccp_ctl_socket;
static char dccp_ctl_socket_err_msg[] __initdata =
KERN_ERR "DCCP: Failed to create the control socket.\n";
static int __init dccp_ctl_sock_init(void)
{
int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
&dccp_ctl_socket);
if (rc < 0)
printk(dccp_ctl_socket_err_msg);
else {
dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
/* Unhash it so that IP input processing does not even
* see it, we do not wish this socket to see incoming
* packets.
*/
dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
}
return rc;
}
static void __exit dccp_ctl_sock_exit(void)
{
if (dccp_ctl_socket != NULL)
sock_release(dccp_ctl_socket);
}
static int __init init_dccp_v4_mibs(void)
{
int rc = -ENOMEM;
dccp_statistics[0] = alloc_percpu(struct dccp_mib);
if (dccp_statistics[0] == NULL)
goto out;
dccp_statistics[1] = alloc_percpu(struct dccp_mib);
if (dccp_statistics[1] == NULL)
goto out_free_one;
rc = 0;
out:
return rc;
out_free_one:
free_percpu(dccp_statistics[0]);
dccp_statistics[0] = NULL;
goto out;
}
static int thash_entries;
module_param(thash_entries, int, 0444);
MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
int dccp_debug;
module_param(dccp_debug, int, 0444);
MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
static int __init dccp_init(void)
{
unsigned long goal;
int ehash_order, bhash_order, i;
int rc = proto_register(&dccp_v4_prot, 1);
if (rc)
goto out;
dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_proto_unregister;
/*
* Size and allocate the main established and bind bucket
* hash tables.
*
* The methodology is similar to that of the buffer cache.
*/
if (num_physpages >= (128 * 1024))
goal = num_physpages >> (21 - PAGE_SHIFT);
else
goal = num_physpages >> (23 - PAGE_SHIFT);
if (thash_entries)
goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
;
do {
dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
sizeof(struct inet_ehash_bucket);
dccp_hashinfo.ehash_size >>= 1;
while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1))
dccp_hashinfo.ehash_size--;
dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
__get_free_pages(GFP_ATOMIC, ehash_order);
} while (!dccp_hashinfo.ehash && --ehash_order > 0);
if (!dccp_hashinfo.ehash) {
printk(KERN_CRIT "Failed to allocate DCCP "
"established hash table\n");
goto out_free_bind_bucket_cachep;
}
for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
rwlock_init(&dccp_hashinfo.ehash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
}
bhash_order = ehash_order;
do {
dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
sizeof(struct inet_bind_hashbucket);
if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0)
continue;
dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
__get_free_pages(GFP_ATOMIC, bhash_order);
} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
if (!dccp_hashinfo.bhash) {
printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
goto out_free_dccp_ehash;
}
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
}
if (init_dccp_v4_mibs())
goto out_free_dccp_bhash;
rc = -EAGAIN;
if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
goto out_free_dccp_v4_mibs;
inet_register_protosw(&dccp_v4_protosw);
rc = dccp_ctl_sock_init();
if (rc)
goto out_unregister_protosw;
out:
return rc;
out_unregister_protosw:
inet_unregister_protosw(&dccp_v4_protosw);
inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
out_free_dccp_v4_mibs:
free_percpu(dccp_statistics[0]);
free_percpu(dccp_statistics[1]);
dccp_statistics[0] = dccp_statistics[1] = NULL;
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
dccp_hashinfo.bhash = NULL;
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
dccp_hashinfo.ehash = NULL;
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_hashinfo.bind_bucket_cachep = NULL;
out_proto_unregister:
proto_unregister(&dccp_v4_prot);
goto out;
}
static const char dccp_del_proto_err_msg[] __exitdata =
KERN_ERR "can't remove dccp net_protocol\n";
static void __exit dccp_fini(void)
{
dccp_ctl_sock_exit();
inet_unregister_protosw(&dccp_v4_protosw);
if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
printk(dccp_del_proto_err_msg);
/* Free the control endpoint. */
sock_release(dccp_ctl_socket);
proto_unregister(&dccp_v4_prot);
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
}
module_init(dccp_init);
module_exit(dccp_fini);
/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */
MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
/*
* net/dccp/timer.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include "dccp.h"
static void dccp_write_timer(unsigned long data);
static void dccp_keepalive_timer(unsigned long data);
static void dccp_delack_timer(unsigned long data);
void dccp_init_xmit_timers(struct sock *sk)
{
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
static void dccp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
sk->sk_error_report(sk);
dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_done(sk);
DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
}
/* A write timeout has occurred. Process the after effects. */
static int dccp_write_timeout(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
if (icsk->icsk_retransmits != 0)
dst_negative_advice(&sk->sk_dst_cache);
retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
} else {
if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
hole detection. :-(
It is place to make it. It is not made. I do not want
to make it. It is disguisting. It does not work in any
case. Let me to cite the same draft, which requires for
us to implement this:
"The one security concern raised by this memo is that ICMP black holes
are often caused by over-zealous security administrators who block
all ICMP messages. It is vitally important that those who design and
deploy security systems understand the impact of strict filtering on
upper-layer protocols. The safest web site in the world is worthless
if most TCP implementations cannot transfer data from it. It would
be far nicer to have all of the black holes fixed rather than fixing
all of the TCP implementations."
Golden words :-).
*/
dst_negative_advice(&sk->sk_dst_cache);
}
retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
/*
* FIXME: see tcp_write_timout and tcp_out_of_resources
*/
}
if (icsk->icsk_retransmits >= retry_until) {
/* Has it gone just too far? */
dccp_write_err(sk);
return 1;
}
return 0;
}
/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
static void dccp_delack_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct inet_connection_sock *icsk = inet_csk(sk);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
icsk->icsk_ack.blocked = 1;
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
goto out;
}
if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
goto out;
}
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) {
/* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
*/
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
dccp_send_ack(sk);
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/*
* The DCCP retransmit timer.
*/
static void dccp_retransmit_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/*
* sk->sk_send_head has to have one skb with
* DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
* packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake
* (PARTOPEN timer), etc).
*/
BUG_TRAP(sk->sk_send_head != NULL);
/*
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
*/
if (dccp_write_timeout(sk))
goto out;
/*
* We want to know the number of packets retransmitted, not the
* total number of retransmissions of clones of original packets.
*/
if (icsk->icsk_retransmits == 0)
DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
/*
* Retransmission failed because of local congestion,
* do not backoff.
*/
if (icsk->icsk_retransmits == 0)
icsk->icsk_retransmits = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
min(icsk->icsk_rto,
TCP_RESOURCE_PROBE_INTERVAL),
TCP_RTO_MAX);
goto out;
}
icsk->icsk_backoff++;
icsk->icsk_retransmits++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
__sk_dst_reset(sk);
out:;
}
static void dccp_write_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct inet_connection_sock *icsk = inet_csk(sk);
int event = 0;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later */
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
goto out;
}
if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
goto out;
if (time_after(icsk->icsk_timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
goto out;
}
event = icsk->icsk_pending;
icsk->icsk_pending = 0;
switch (event) {
case ICSK_TIME_RETRANS:
dccp_retransmit_timer(sk);
break;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/*
* Timer for listening sockets
*/
static void dccp_response_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */;
reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries);
}
static void dccp_keepalive_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
/* Only process if socket is not in use. */
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
inet_csk_reset_keepalive_timer(sk, HZ / 20);
goto out;
}
if (sk->sk_state == DCCP_LISTEN) {
dccp_response_timer(sk);
goto out;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment