Commit e27cca96 authored by Sabrina Dubroca's avatar Sabrina Dubroca Committed by Steffen Klassert

xfrm: add espintcp (RFC 8229)

TCP encapsulation of IKE and IPsec messages (RFC 8229) is implemented
as a TCP ULP, overriding in particular the sendmsg and recvmsg
operations. A Stream Parser is used to extract messages out of the TCP
stream using the first 2 bytes as length marker. Received IKE messages
are put on "ike_queue", waiting to be dequeued by the custom recvmsg
implementation. Received ESP messages are sent to XFRM, like with UDP
encapsulation.

Some of this code is taken from the original submission by Herbert
Xu. Currently, only IPv4 is supported, like for UDP encapsulation.
Co-developed-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarSabrina Dubroca <sd@queasysnail.net>
Acked-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
parent eecd227a
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_ESPINTCP_H
#define _NET_ESPINTCP_H
#include <net/strparser.h>
#include <linux/skmsg.h>
void __init espintcp_init(void);
int espintcp_push_skb(struct sock *sk, struct sk_buff *skb);
int espintcp_queue_out(struct sock *sk, struct sk_buff *skb);
bool tcp_is_ulp_esp(struct sock *sk);
struct espintcp_msg {
struct sk_buff *skb;
struct sk_msg skmsg;
int offset;
int len;
};
struct espintcp_ctx {
struct strparser strp;
struct sk_buff_head ike_queue;
struct sk_buff_head out_queue;
struct espintcp_msg partial;
void (*saved_data_ready)(struct sock *sk);
void (*saved_write_space)(struct sock *sk);
struct work_struct work;
bool tx_running;
};
static inline struct espintcp_ctx *espintcp_getctx(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/* RCU is only needed for diag */
return (__force void *)icsk->icsk_ulp_data;
}
#endif
...@@ -193,6 +193,7 @@ struct xfrm_state { ...@@ -193,6 +193,7 @@ struct xfrm_state {
/* Data for encapsulator */ /* Data for encapsulator */
struct xfrm_encap_tmpl *encap; struct xfrm_encap_tmpl *encap;
struct sock __rcu *encap_sk;
/* Data for care-of address */ /* Data for care-of address */
xfrm_address_t *coaddr; xfrm_address_t *coaddr;
......
...@@ -42,5 +42,6 @@ struct udphdr { ...@@ -42,5 +42,6 @@ struct udphdr {
#define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */ #define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */
#define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ #define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */
#define UDP_ENCAP_RXRPC 6 #define UDP_ENCAP_RXRPC 6
#define TCP_ENCAP_ESPINTCP 7 /* Yikes, this is really xfrm encap types. */
#endif /* _UAPI_LINUX_UDP_H */ #endif /* _UAPI_LINUX_UDP_H */
...@@ -378,6 +378,17 @@ config INET_ESP_OFFLOAD ...@@ -378,6 +378,17 @@ config INET_ESP_OFFLOAD
If unsure, say N. If unsure, say N.
config INET_ESPINTCP
bool "IP: ESP in TCP encapsulation (RFC 8229)"
depends on XFRM && INET_ESP
select STREAM_PARSER
select NET_SOCK_MSG
help
Support for RFC 8229 encapsulation of ESP and IKE over
TCP/IPv4 sockets.
If unsure, say N.
config INET_IPCOMP config INET_IPCOMP
tristate "IP: IPComp transformation" tristate "IP: IPComp transformation"
select INET_XFRM_TUNNEL select INET_XFRM_TUNNEL
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include <net/icmp.h> #include <net/icmp.h>
#include <net/protocol.h> #include <net/protocol.h>
#include <net/udp.h> #include <net/udp.h>
#include <net/tcp.h>
#include <net/espintcp.h>
#include <linux/highmem.h> #include <linux/highmem.h>
...@@ -117,6 +119,132 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) ...@@ -117,6 +119,132 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
put_page(sg_page(sg)); put_page(sg_page(sg));
} }
#ifdef CONFIG_INET_ESPINTCP
struct esp_tcp_sk {
struct sock *sk;
struct rcu_head rcu;
};
static void esp_free_tcp_sk(struct rcu_head *head)
{
struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
sock_put(esk->sk);
kfree(esk);
}
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct esp_tcp_sk *esk;
__be16 sport, dport;
struct sock *nsk;
struct sock *sk;
sk = rcu_dereference(x->encap_sk);
if (sk && sk->sk_state == TCP_ESTABLISHED)
return sk;
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
nsk = rcu_dereference_protected(x->encap_sk,
lockdep_is_held(&x->lock));
if (sk && sk == nsk) {
esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
if (!esk) {
spin_unlock_bh(&x->lock);
return ERR_PTR(-ENOMEM);
}
RCU_INIT_POINTER(x->encap_sk, NULL);
esk->sk = sk;
call_rcu(&esk->rcu, esp_free_tcp_sk);
}
spin_unlock_bh(&x->lock);
sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4,
dport, x->props.saddr.a4, sport, 0);
if (!sk)
return ERR_PTR(-ENOENT);
if (!tcp_is_ulp_esp(sk)) {
sock_put(sk);
return ERR_PTR(-EINVAL);
}
spin_lock_bh(&x->lock);
nsk = rcu_dereference_protected(x->encap_sk,
lockdep_is_held(&x->lock));
if (encap->encap_sport != sport ||
encap->encap_dport != dport) {
sock_put(sk);
sk = nsk ?: ERR_PTR(-EREMCHG);
} else if (sk == nsk) {
sock_put(sk);
} else {
rcu_assign_pointer(x->encap_sk, sk);
}
spin_unlock_bh(&x->lock);
return sk;
}
static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
{
struct sock *sk;
int err;
rcu_read_lock();
sk = esp_find_tcp_sk(x);
err = PTR_ERR_OR_ZERO(sk);
if (err)
goto out;
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
err = espintcp_queue_out(sk, skb);
else
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
out:
rcu_read_unlock();
return err;
}
static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
return esp_output_tcp_finish(x, skb);
}
static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
local_bh_disable();
err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
local_bh_enable();
/* EINPROGRESS just happens to do the right thing. It
* actually means that the skb has been consumed and
* isn't coming back.
*/
return err ?: -EINPROGRESS;
}
#else
static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
{
kfree_skb(skb);
return -EOPNOTSUPP;
}
#endif
static void esp_output_done(struct crypto_async_request *base, int err) static void esp_output_done(struct crypto_async_request *base, int err)
{ {
struct sk_buff *skb = base->data; struct sk_buff *skb = base->data;
...@@ -147,6 +275,10 @@ static void esp_output_done(struct crypto_async_request *base, int err) ...@@ -147,6 +275,10 @@ static void esp_output_done(struct crypto_async_request *base, int err)
secpath_reset(skb); secpath_reset(skb);
xfrm_dev_resume(skb); xfrm_dev_resume(skb);
} else { } else {
if (!err &&
x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
esp_output_tail_tcp(x, skb);
else
xfrm_output_resume(skb, err); xfrm_output_resume(skb, err);
} }
} }
...@@ -236,7 +368,7 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, ...@@ -236,7 +368,7 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
unsigned int len; unsigned int len;
len = skb->len + esp->tailen - skb_transport_offset(skb); len = skb->len + esp->tailen - skb_transport_offset(skb);
if (len + sizeof(struct iphdr) >= IP_MAX_MTU) if (len + sizeof(struct iphdr) > IP_MAX_MTU)
return ERR_PTR(-EMSGSIZE); return ERR_PTR(-EMSGSIZE);
uh = (struct udphdr *)esp->esph; uh = (struct udphdr *)esp->esph;
...@@ -256,6 +388,41 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, ...@@ -256,6 +388,41 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
return (struct ip_esp_hdr *)(uh + 1); return (struct ip_esp_hdr *)(uh + 1);
} }
#ifdef CONFIG_INET_ESPINTCP
static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
struct sk_buff *skb,
struct esp_info *esp)
{
__be16 *lenp = (void *)esp->esph;
struct ip_esp_hdr *esph;
unsigned int len;
struct sock *sk;
len = skb->len + esp->tailen - skb_transport_offset(skb);
if (len > IP_MAX_MTU)
return ERR_PTR(-EMSGSIZE);
rcu_read_lock();
sk = esp_find_tcp_sk(x);
rcu_read_unlock();
if (IS_ERR(sk))
return ERR_CAST(sk);
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
return esph;
}
#else
static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
struct sk_buff *skb,
struct esp_info *esp)
{
return ERR_PTR(-EOPNOTSUPP);
}
#endif
static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
struct esp_info *esp) struct esp_info *esp)
{ {
...@@ -276,6 +443,9 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, ...@@ -276,6 +443,9 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
case UDP_ENCAP_ESPINUDP_NON_IKE: case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport); esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport);
break; break;
case TCP_ENCAP_ESPINTCP:
esph = esp_output_tcp_encap(x, skb, esp);
break;
} }
if (IS_ERR(esph)) if (IS_ERR(esph))
...@@ -296,7 +466,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * ...@@ -296,7 +466,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
struct sk_buff *trailer; struct sk_buff *trailer;
int tailen = esp->tailen; int tailen = esp->tailen;
/* this is non-NULL only with UDP Encapsulation */ /* this is non-NULL only with TCP/UDP Encapsulation */
if (x->encap) { if (x->encap) {
int err = esp_output_encap(x, skb, esp); int err = esp_output_encap(x, skb, esp);
...@@ -491,6 +661,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * ...@@ -491,6 +661,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
if (sg != dsg) if (sg != dsg)
esp_ssg_unref(x, tmp); esp_ssg_unref(x, tmp);
if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
err = esp_output_tail_tcp(x, skb);
error_free: error_free:
kfree(tmp); kfree(tmp);
error: error:
...@@ -617,10 +790,14 @@ int esp_input_done2(struct sk_buff *skb, int err) ...@@ -617,10 +790,14 @@ int esp_input_done2(struct sk_buff *skb, int err)
if (x->encap) { if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap; struct xfrm_encap_tmpl *encap = x->encap;
struct tcphdr *th = (void *)(skb_network_header(skb) + ihl);
struct udphdr *uh = (void *)(skb_network_header(skb) + ihl); struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
__be16 source; __be16 source;
switch (x->encap->encap_type) { switch (x->encap->encap_type) {
case TCP_ENCAP_ESPINTCP:
source = th->source;
break;
case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP:
case UDP_ENCAP_ESPINUDP_NON_IKE: case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source; source = uh->source;
...@@ -1017,6 +1194,14 @@ static int esp_init_state(struct xfrm_state *x) ...@@ -1017,6 +1194,14 @@ static int esp_init_state(struct xfrm_state *x)
case UDP_ENCAP_ESPINUDP_NON_IKE: case UDP_ENCAP_ESPINUDP_NON_IKE:
x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
break; break;
#ifdef CONFIG_INET_ESPINTCP
case TCP_ENCAP_ESPINTCP:
/* only the length field, TCP encap is done by
* the socket
*/
x->props.header_len += 2;
break;
#endif
} }
} }
......
...@@ -11,3 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o ...@@ -11,3 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_INET_ESPINTCP) += espintcp.o
This diff is collapsed.
...@@ -39,6 +39,9 @@ ...@@ -39,6 +39,9 @@
#ifdef CONFIG_XFRM_STATISTICS #ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h> #include <net/snmp.h>
#endif #endif
#ifdef CONFIG_INET_ESPINTCP
#include <net/espintcp.h>
#endif
#include "xfrm_hash.h" #include "xfrm_hash.h"
...@@ -4157,6 +4160,10 @@ void __init xfrm_init(void) ...@@ -4157,6 +4160,10 @@ void __init xfrm_init(void)
seqcount_init(&xfrm_policy_hash_generation); seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init(); xfrm_input_init();
#ifdef CONFIG_INET_ESPINTCP
espintcp_init();
#endif
RCU_INIT_POINTER(xfrm_if_cb, NULL); RCU_INIT_POINTER(xfrm_if_cb, NULL);
synchronize_rcu(); synchronize_rcu();
} }
......
...@@ -670,6 +670,9 @@ int __xfrm_state_delete(struct xfrm_state *x) ...@@ -670,6 +670,9 @@ int __xfrm_state_delete(struct xfrm_state *x)
net->xfrm.state_num--; net->xfrm.state_num--;
spin_unlock(&net->xfrm.xfrm_state_lock); spin_unlock(&net->xfrm.xfrm_state_lock);
if (x->encap_sk)
sock_put(rcu_dereference_raw(x->encap_sk));
xfrm_dev_state_delete(x); xfrm_dev_state_delete(x);
/* All xfrm_state objects are created by xfrm_state_alloc. /* All xfrm_state objects are created by xfrm_state_alloc.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment