Commit a7b4f989 authored by Jozsef Kadlecsik's avatar Jozsef Kadlecsik Committed by Patrick McHardy

netfilter: ipset: IP set core support

The patch adds the IP set core support to the kernel.

The IP set core implements a netlink (nfnetlink) based protocol by which
one can create, destroy, flush, rename, swap, list, save, restore sets,
and add, delete, test elements from userspace. For simplicity (and backward
compatibilty and for not to force ip(6)tables to be linked with a netlink
library) reasons a small getsockopt-based protocol is also kept in order
to communicate with the ip(6)tables match and target.

The netlink protocol passes all u16, etc values in network order with
NLA_F_NET_BYTEORDER flag. The protocol enforces the proper use of the
NLA_F_NESTED and NLA_F_NET_BYTEORDER flags.

For other kernel subsystems (netfilter match and target) the API contains
the functions to add, delete and test elements in sets and the required calls
to get/put refereces to the sets before those operations can be performed.

The set types (which are implemented in independent modules) are stored
in a simple RCU protected list. A set type may have variants: for example
without timeout or with timeout support, for IPv4 or for IPv6. The sets
(i.e. the pointers to the sets) are stored in an array. The sets are
identified by their index in the array, which makes possible easy and
fast swapping of sets. The array is protected indirectly by the nfnl
mutex from nfnetlink. The content of the sets are protected by the rwlock
of the set.

There are functional differences between the add/del/test functions
for the kernel and userspace:

- kernel add/del/test: works on the current packet (i.e. one element)
- kernel test: may trigger an "add" operation  in order to fill
  out unspecified parts of the element from the packet (like MAC address)
- userspace add/del: works on the netlink message and thus possibly
  on multiple elements from the IPSET_ATTR_ADT container attribute.
- userspace add: may trigger resizing of a set
Signed-off-by: default avatarJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
parent f703651e
This diff is collapsed.
#ifndef _IP_SET_GETPORT_H
#define _IP_SET_GETPORT_H
extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
__be16 *port, u8 *proto);
extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
__be16 *port, u8 *proto);
extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
__be16 *port);
#endif /*_IP_SET_GETPORT_H*/
#ifndef _PFXLEN_H
#define _PFXLEN_H
#include <asm/byteorder.h>
#include <linux/netfilter.h>
/* Prefixlen maps, by Jan Engelhardt */
extern const union nf_inet_addr ip_set_netmask_map[];
extern const union nf_inet_addr ip_set_hostmask_map[];
static inline __be32
ip_set_netmask(u8 pfxlen)
{
return ip_set_netmask_map[pfxlen].ip;
}
static inline const __be32 *
ip_set_netmask6(u8 pfxlen)
{
return &ip_set_netmask_map[pfxlen].ip6[0];
}
static inline u32
ip_set_hostmask(u8 pfxlen)
{
return (__force u32) ip_set_hostmask_map[pfxlen].ip;
}
static inline const __be32 *
ip_set_hostmask6(u8 pfxlen)
{
return &ip_set_hostmask_map[pfxlen].ip6[0];
}
#endif /*_PFXLEN_H */
......@@ -1052,4 +1052,6 @@ endif # NETFILTER_XTABLES
endmenu
source "net/netfilter/ipset/Kconfig"
source "net/netfilter/ipvs/Kconfig"
......@@ -105,5 +105,8 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
# ipset
obj-$(CONFIG_IP_SET) += ipset/
# IPVS
obj-$(CONFIG_IP_VS) += ipvs/
menuconfig IP_SET
tristate "IP set support"
depends on INET && NETFILTER
help
This option adds IP set support to the kernel.
In order to define and use the sets, you need the userspace utility
ipset(8). You can use the sets in netfilter via the "set" match
and "SET" target.
To compile it as a module, choose M here. If unsure, say N.
if IP_SET
config IP_SET_MAX
int "Maximum number of IP sets"
default 256
range 2 65534
depends on IP_SET
help
You can define here default value of the maximum number
of IP sets for the kernel.
The value can be overriden by the 'max_sets' module
parameter of the 'ip_set' module.
endif # IP_SET
#
# Makefile for the ipset modules
#
ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o
# ipset core
obj-$(CONFIG_IP_SET) += ip_set.o
This diff is collapsed.
/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/* Get Layer-4 data from the packets */
#include <linux/ip.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/ip.h>
#include <linux/netfilter/ipset/ip_set_getport.h>
/* We must handle non-linear skbs */
static bool
get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
bool src, __be16 *port, u8 *proto)
{
switch (protocol) {
case IPPROTO_TCP: {
struct tcphdr _tcph;
const struct tcphdr *th;
th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
if (th == NULL)
/* No choice either */
return false;
*port = src ? th->source : th->dest;
break;
}
case IPPROTO_UDP: {
struct udphdr _udph;
const struct udphdr *uh;
uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
if (uh == NULL)
/* No choice either */
return false;
*port = src ? uh->source : uh->dest;
break;
}
case IPPROTO_ICMP: {
struct icmphdr _ich;
const struct icmphdr *ic;
ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
if (ic == NULL)
return false;
*port = (__force __be16)htons((ic->type << 8) | ic->code);
break;
}
case IPPROTO_ICMPV6: {
struct icmp6hdr _ich;
const struct icmp6hdr *ic;
ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
if (ic == NULL)
return false;
*port = (__force __be16)
htons((ic->icmp6_type << 8) | ic->icmp6_code);
break;
}
default:
break;
}
*proto = protocol;
return true;
}
bool
ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
__be16 *port, u8 *proto)
{
const struct iphdr *iph = ip_hdr(skb);
unsigned int protooff = ip_hdrlen(skb);
int protocol = iph->protocol;
/* See comments at tcp_match in ip_tables.c */
if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET))
return false;
return get_port(skb, protocol, protooff, src, port, proto);
}
EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
bool
ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
__be16 *port, u8 *proto)
{
unsigned int protooff = 0;
int protocol;
unsigned short fragoff;
protocol = ipv6_find_hdr(skb, &protooff, -1, &fragoff);
if (protocol <= 0 || fragoff)
return false;
return get_port(skb, protocol, protooff, src, port, proto);
}
EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
bool
ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
{
bool ret;
u8 proto;
switch (pf) {
case AF_INET:
ret = ip_set_get_ip4_port(skb, src, port, &proto);
case AF_INET6:
ret = ip_set_get_ip6_port(skb, src, port, &proto);
default:
return false;
}
if (!ret)
return ret;
switch (proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
return true;
default:
return false;
}
}
EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment