Commit 92390790 authored by David S. Miller's avatar David S. Miller

Merge branch 'net_next_ovs' of git://git.kernel.org/pub/scm/linux/kernel/git/pshelar/openvswitch

Pravin B Shelar says:

====================
Open vSwitch

Following patches adds three features to OVS
1. Add fairness to upcall processing.
2. Hash action.
3. Enable Tunnel GSO features.
Rest of patches are bug fixes related to patches from same series.

v2 series changes first patch according to comment from Dave Miller.
v3 series changes first patch according to comment from Nikolay Aleksandrov.
v4 series update recirc patch commit msg.
v5 series resolve conflict with net-next, updated recic action patch.
v6 series sends all patches.
v7 series drop recirc patches.
v8 series checkpatch fix
v9 series drop HASH action patch. update sample action commit msg.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 10f29808 d9e0ecb8
......@@ -176,4 +176,12 @@ enum {
NETIF_F_HW_VLAN_STAG_RX | \
NETIF_F_HW_VLAN_STAG_TX)
#define NETIF_F_GSO_ENCAP_ALL (NETIF_F_GSO_GRE | \
NETIF_F_GSO_GRE_CSUM | \
NETIF_F_GSO_IPIP | \
NETIF_F_GSO_SIT | \
NETIF_F_GSO_UDP_TUNNEL | \
NETIF_F_GSO_UDP_TUNNEL_CSUM | \
NETIF_F_GSO_MPLS)
#endif /* _LINUX_NETDEV_FEATURES_H */
......@@ -118,6 +118,9 @@ struct ovs_vport_stats {
/* Allow last Netlink attribute to be unaligned */
#define OVS_DP_F_UNALIGNED (1 << 0)
/* Allow datapath to associate multiple Netlink PIDs to each vport */
#define OVS_DP_F_VPORT_PIDS (1 << 1)
/* Fixed logical ports. */
#define OVSP_LOCAL ((__u32)0)
......@@ -203,9 +206,10 @@ enum ovs_vport_type {
* this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
* plus a null terminator.
* @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
* @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
* OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
* this port. A value of zero indicates that upcalls should not be sent.
* @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace
* among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets
* received on this port. If this is a single-element array of value 0,
* upcalls should not be sent.
* @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
* packets sent or received through the vport.
*
......@@ -228,7 +232,8 @@ enum ovs_vport_attr {
OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */
/* receiving upcalls */
OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
__OVS_VPORT_ATTR_MAX
};
......
......@@ -38,7 +38,7 @@
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len, bool keep_skb);
const struct nlattr *attr, int len);
static int make_writable(struct sk_buff *skb, int write_len)
{
......@@ -434,11 +434,17 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
return ovs_dp_upcall(dp, skb, &upcall);
}
static bool last_action(const struct nlattr *a, int rem)
{
return a->nla_len == rem;
}
static int sample(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr)
{
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
struct sk_buff *sample_skb;
int rem;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
......@@ -455,8 +461,34 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
}
}
return do_execute_actions(dp, skb, nla_data(acts_list),
nla_len(acts_list), true);
rem = nla_len(acts_list);
a = nla_data(acts_list);
/* Actions list is either empty or only contains a single user-space
* action, the latter being a special case as it is the only known
* usage of the sample action.
* In these special cases don't clone the skb as there are no
* side-effects in the nested actions.
* Otherwise, clone in case the nested actions have side effects.
*/
if (likely(rem == 0 || (nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
last_action(a, rem)))) {
sample_skb = skb;
skb_get(skb);
} else {
sample_skb = skb_clone(skb, GFP_ATOMIC);
if (!sample_skb) /* Skip sample action when out of memory. */
return 0;
}
/* Note that do_execute_actions() never consumes skb.
* In the case where skb has been cloned above it is the clone that
* is consumed. Otherwise the skb_get(skb) call prevents
* consumption by do_execute_actions(). Thus, it is safe to simply
* return the error code and let the caller (also
* do_execute_actions()) free skb on error.
*/
return do_execute_actions(dp, sample_skb, a, rem);
}
static int execute_set_action(struct sk_buff *skb,
......@@ -507,7 +539,7 @@ static int execute_set_action(struct sk_buff *skb,
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len, bool keep_skb)
const struct nlattr *attr, int len)
{
/* Every output action needs a separate clone of 'skb', but the common
* case is just a single output action, so that doing a clone and
......@@ -562,12 +594,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
}
}
if (prev_port != -1) {
if (keep_skb)
skb = skb_clone(skb, GFP_ATOMIC);
if (prev_port != -1)
do_output(dp, skb, prev_port);
} else if (!keep_skb)
else
consume_skb(skb);
return 0;
......@@ -579,6 +608,5 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
OVS_CB(skb)->tun_key = NULL;
return do_execute_actions(dp, skb, acts->actions,
acts->actions_len, false);
return do_execute_actions(dp, skb, acts->actions, acts->actions_len);
}
......@@ -266,7 +266,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.key = &key;
upcall.userdata = NULL;
upcall.portid = p->upcall_portid;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
ovs_dp_upcall(dp, skb, &upcall);
consume_skb(skb);
stats_counter = &stats->n_missed;
......@@ -464,7 +464,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
BUG_ON(err);
nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
......@@ -1373,7 +1374,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.options = NULL;
parms.dp = dp;
parms.port_no = OVSP_LOCAL;
parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
ovs_dp_change(dp, a);
......@@ -1632,8 +1633,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
nla_put_string(skb, OVS_VPORT_ATTR_NAME,
vport->ops->get_name(vport)))
goto nla_put_failure;
ovs_vport_get_stats(vport, &vport_stats);
......@@ -1641,6 +1642,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
&vport_stats))
goto nla_put_failure;
if (ovs_vport_get_upcall_portids(vport, skb))
goto nla_put_failure;
err = ovs_vport_get_options(vport, skb);
if (err == -EMSGSIZE)
goto error;
......@@ -1762,7 +1766,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.options = a[OVS_VPORT_ATTR_OPTIONS];
parms.dp = dp;
parms.port_no = port_no;
parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
vport = new_vport(&parms);
err = PTR_ERR(vport);
......@@ -1812,8 +1816,14 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
}
if (a[OVS_VPORT_ATTR_UPCALL_PID])
vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
err = ovs_vport_set_upcall_portids(vport, ids);
if (err)
goto exit_unlock_free;
}
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
......
......@@ -140,11 +140,14 @@ static void do_setup(struct net_device *netdev)
netdev->tx_queue_len = 0;
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
eth_hw_addr_random(netdev);
}
......
......@@ -134,10 +134,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->dp = parms->dp;
vport->port_no = parms->port_no;
vport->upcall_portid = parms->upcall_portid;
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids))
return ERR_PTR(-EINVAL);
vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!vport->percpu_stats) {
kfree(vport);
......@@ -161,6 +163,10 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
*/
void ovs_vport_free(struct vport *vport)
{
/* vport is freed from RCU callback or error path, Therefore
* it is safe to use raw dereference.
*/
kfree(rcu_dereference_raw(vport->upcall_portids));
free_percpu(vport->percpu_stats);
kfree(vport);
}
......@@ -326,6 +332,99 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
return 0;
}
/**
* ovs_vport_set_upcall_portids - set upcall portids of @vport.
*
* @vport: vport to modify.
* @ids: new configuration, an array of port ids.
*
* Sets the vport's upcall_portids to @ids.
*
* Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
* as an array of U32.
*
* Must be called with ovs_mutex.
*/
int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids)
{
struct vport_portids *old, *vport_portids;
if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
return -EINVAL;
old = ovsl_dereference(vport->upcall_portids);
vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
GFP_KERNEL);
if (!vport_portids)
return -ENOMEM;
vport_portids->n_ids = nla_len(ids) / sizeof(u32);
vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
nla_memcpy(vport_portids->ids, ids, nla_len(ids));
rcu_assign_pointer(vport->upcall_portids, vport_portids);
if (old)
kfree_rcu(old, rcu);
return 0;
}
/**
* ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
*
* @vport: vport from which to retrieve the portids.
* @skb: sk_buff where portids should be appended.
*
* Retrieves the configuration of the given vport, appending the
* %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
* portids to @skb.
*
* Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
* If an error occurs, @skb is left unmodified. Must be called with
* ovs_mutex or rcu_read_lock.
*/
int ovs_vport_get_upcall_portids(const struct vport *vport,
struct sk_buff *skb)
{
struct vport_portids *ids;
ids = rcu_dereference_ovsl(vport->upcall_portids);
if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
ids->n_ids * sizeof(u32), (void *)ids->ids);
else
return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
}
/**
* ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
*
* @vport: vport from which the missed packet is received.
* @skb: skb that the missed packet was received.
*
* Uses the skb_get_hash() to select the upcall portid to send the
* upcall.
*
* Returns the portid of the target socket. Must be called with rcu_read_lock.
*/
u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb)
{
struct vport_portids *ids;
u32 ids_index;
u32 hash;
ids = rcu_dereference(p->upcall_portids);
if (ids->n_ids == 1 && ids->ids[0] == 0)
return 0;
hash = skb_get_hash(skb);
ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
return ids->ids[ids_index];
}
/**
* ovs_vport_receive - pass up received packet to the datapath for processing
*
......
......@@ -23,6 +23,7 @@
#include <linux/list.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
#include <linux/reciprocal_div.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
......@@ -52,6 +53,10 @@ void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
int ovs_vport_set_options(struct vport *, struct nlattr *options);
int ovs_vport_get_options(const struct vport *, struct sk_buff *);
int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids);
int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
int ovs_vport_send(struct vport *, struct sk_buff *);
/* The following definitions are for implementers of vport devices: */
......@@ -62,13 +67,27 @@ struct vport_err_stats {
u64 tx_dropped;
u64 tx_errors;
};
/**
* struct vport_portids - array of netlink portids of a vport.
* must be protected by rcu.
* @rn_ids: The reciprocal value of @n_ids.
* @rcu: RCU callback head for deferred destruction.
* @n_ids: Size of @ids array.
* @ids: Array storing the Netlink socket pids to be used for packets received
* on this port that miss the flow table.
*/
struct vport_portids {
struct reciprocal_value rn_ids;
struct rcu_head rcu;
u32 n_ids;
u32 ids[];
};
/**
* struct vport - one port within a datapath
* @rcu: RCU callback head for deferred destruction.
* @dp: Datapath to which this port belongs.
* @upcall_portid: The Netlink port to use for packets received on this port that
* miss the flow table.
* @upcall_portids: RCU protected 'struct vport_portids'.
* @port_no: Index into @dp's @ports array.
* @hash_node: Element in @dev_table hash table in vport.c.
* @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
......@@ -80,7 +99,7 @@ struct vport_err_stats {
struct vport {
struct rcu_head rcu;
struct datapath *dp;
u32 upcall_portid;
struct vport_portids __rcu *upcall_portids;
u16 port_no;
struct hlist_node hash_node;
......@@ -111,7 +130,7 @@ struct vport_parms {
/* For ovs_vport_alloc(). */
struct datapath *dp;
u16 port_no;
u32 upcall_portid;
struct nlattr *upcall_portids;
};
/**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment