Commit 39839107 authored by Parav Pandit's avatar Parav Pandit Committed by Jason Gunthorpe

IB/cm: Replace members of sa_path_rec with 'struct sgid_attr *'

While processing a path record entry in CM messages the associated GID
attribute is now also supplied.

Currently for RoCE a netdevice's net namespace pointer and ifindex are
stored in path record entry. Both of these fields of the netdev can change
anytime while processing CM messages. Additionally storing net namespace
without holding reference will lead to use-after-free crash. Therefore it
is removed. Netdevice information for RoCE is instead provided via
referenced gid attribute in ib_cm requests.

Such a design leads to a situation where the kernel can crash when the net
pointer becomes invalid. However today it is always initialized to
init_net, which cannot become invalid. In order to support processing
packets in any arbitrary namespace of the received packet, it is necessary
to avoid such conditions.

This patch removes the dependency on the net pointer and ifindex; instead
it will rely on SGID attribute which contains a pointer to netdev.
Signed-off-by: default avatarParav Pandit <parav@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
parent 815d456e
......@@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
return ret;
}
static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
static struct cm_port *
get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
u8 p;
struct net_device *ndev = ib_get_ndev_from_path(path);
if (attr) {
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
if (cm_dev->ib_device == attr->device) {
port = cm_dev->port[attr->port_num - 1];
break;
}
}
read_unlock_irqrestore(&cm.device_lock, flags);
} else {
/* SGID attribute can be NULL in following
* conditions.
* (a) Alternative path
* (b) IB link layer without GRH
* (c) LAP send messages
*/
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
attr = rdma_find_gid(cm_dev->ib_device,
&path->sgid,
sa_conv_pathrec_to_gid_type(path),
ndev, &p, NULL)) {
port = cm_dev->port[p - 1];
NULL);
if (!IS_ERR(attr)) {
port = cm_dev->port[attr->port_num - 1];
break;
}
}
read_unlock_irqrestore(&cm.device_lock, flags);
if (ndev)
dev_put(ndev);
if (port)
rdma_put_gid_attr(attr);
}
return port;
}
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
static int cm_init_av_by_path(struct sa_path_rec *path,
const struct ib_gid_attr *sgid_attr,
struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct rdma_ah_attr new_ah_attr;
......@@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_port *port;
int ret;
port = get_cm_port_from_path(path);
port = get_cm_port_from_path(path, sgid_attr);
if (!port)
return -EINVAL;
cm_dev = port->cm_dev;
......@@ -562,7 +581,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
* can be used to return an error response.
*/
ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
&new_ah_attr);
&new_ah_attr, sgid_attr);
if (ret)
return ret;
......@@ -1420,12 +1439,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
goto out;
}
ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
ret = cm_init_av_by_path(param->primary_path,
param->ppath_sgid_attr, &cm_id_priv->av,
cm_id_priv);
if (ret)
goto error1;
if (param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path,
ret = cm_init_av_by_path(param->alternate_path, NULL,
&cm_id_priv->alt_av, cm_id_priv);
if (ret)
goto error1;
......@@ -1980,10 +2000,6 @@ static int cm_req_handler(struct cm_work *work)
if (gid_attr.ndev) {
work->path[0].rec_type =
sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
sa_path_set_ifindex(&work->path[0],
gid_attr.ndev->ifindex);
sa_path_set_ndev(&work->path[0],
dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
} else {
cm_path_set_rec_type(work->port->cm_dev->ib_device,
......@@ -1999,7 +2015,7 @@ static int cm_req_handler(struct cm_work *work)
sa_path_set_dmac(&work->path[0],
cm_id_priv->av.ah_attr.roce.dmac);
work->path[0].hop_limit = grh->hop_limit;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
ret = cm_init_av_by_path(&work->path[0], &gid_attr, &cm_id_priv->av,
cm_id_priv);
if (ret) {
int err;
......@@ -2018,8 +2034,8 @@ static int cm_req_handler(struct cm_work *work)
goto rejected;
}
if (cm_req_has_alt_path(req_msg)) {
ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
cm_id_priv);
ret = cm_init_av_by_path(&work->path[1], NULL,
&cm_id_priv->alt_av, cm_id_priv);
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
......@@ -3142,7 +3158,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
goto out;
}
ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
cm_id_priv);
if (ret)
goto out;
......@@ -3285,7 +3301,7 @@ static int cm_lap_handler(struct cm_work *work)
if (ret)
goto unlock;
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
cm_id_priv);
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
......@@ -3487,7 +3503,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
ret = cm_init_av_by_path(param->path, param->sgid_attr,
&cm_id_priv->av,
cm_id_priv);
if (ret)
goto out;
......
......@@ -2583,8 +2583,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
route->path_rec->roce.route_resolved = true;
sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
sa_path_set_ifindex(route->path_rec, ndev->ifindex);
sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
return ndev;
}
......@@ -3510,7 +3508,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
ib_init_ah_attr_from_path(id_priv->id.device,
id_priv->id.port_num,
id_priv->id.route.path_rec,
&event.param.ud.ah_attr);
&event.param.ud.ah_attr,
rep->sgid_attr);
event.param.ud.qp_num = rep->qpn;
event.param.ud.qkey = rep->qkey;
event.event = RDMA_CM_EVENT_ESTABLISHED;
......
......@@ -1229,18 +1229,12 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
static int
roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec)
struct sa_path_rec *rec,
const struct ib_gid_attr *attr)
{
struct net_device *resolved_dev;
struct net_device *ndev;
struct net_device *idev;
struct rdma_dev_addr dev_addr = {
.bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
sa_path_get_ifindex(rec) : 0),
.net = sa_path_get_ndev(rec) ?
sa_path_get_ndev(rec) :
&init_net
};
struct rdma_dev_addr dev_addr = {};
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
......@@ -1250,6 +1244,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
if (rec->roce.route_resolved)
return 0;
if (!attr || !attr->ndev)
return -EINVAL;
dev_addr.bound_dev_if = attr->ndev->ifindex;
/* TODO: Use net from the ib_gid_attr once it is added to it,
* until than, limit itself to init_net.
*/
dev_addr.net = &init_net;
if (!device->get_netdev)
return -EOPNOTSUPP;
......@@ -1278,16 +1280,13 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
ret = -ENODEV;
goto done;
}
ndev = ib_get_ndev_from_path(rec);
rcu_read_lock();
if ((ndev && ndev != resolved_dev) ||
if (attr->ndev != resolved_dev ||
(resolved_dev != idev &&
!rdma_is_upper_dev_rcu(idev, resolved_dev)))
ret = -EHOSTUNREACH;
rcu_read_unlock();
dev_put(resolved_dev);
if (ndev)
dev_put(ndev);
done:
dev_put(idev);
if (!ret)
......@@ -1297,19 +1296,18 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr)
struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *gid_attr)
{
enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
struct net_device *ndev;
const struct ib_gid_attr *gid_attr;
ndev = ib_get_ndev_from_path(rec);
gid_attr =
rdma_find_gid_by_port(device, &rec->sgid, type, port_num, ndev);
if (ndev)
dev_put(ndev);
if (!gid_attr) {
gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
port_num, NULL);
if (IS_ERR(gid_attr))
return PTR_ERR(gid_attr);
} else
rdma_hold_gid_attr(gid_attr);
rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
be32_to_cpu(rec->flow_label),
......@@ -1318,9 +1316,26 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
return 0;
}
/**
* ib_init_ah_attr_from_path - Initialize address handle attributes based on
* an SA path record.
* @device: Device associated ah attributes initialization.
* @port_num: Port on the specified device.
* @rec: path record entry to use for ah attributes initialization.
* @ah_attr: address handle attributes to initialization from path record.
* @sgid_attr: SGID attribute to consider during initialization.
*
* When ib_init_ah_attr_from_path() returns success,
* (a) for IB link layer it optionally contains a reference to SGID attribute
* when GRH is present for IB link layer.
* (b) for RoCE link layer it contains a reference to SGID attribute.
* User must invoke rdma_destroy_ah_attr() to release reference to SGID
* attributes which are initialized using ib_init_ah_attr_from_path().
*/
int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr)
struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *gid_attr)
{
int ret = 0;
......@@ -1331,7 +1346,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
rdma_ah_set_static_rate(ah_attr, rec->rate);
if (sa_path_is_roce(rec)) {
ret = roce_resolve_route_from_path(device, port_num, rec);
ret = roce_resolve_route_from_path(device, port_num, rec,
gid_attr);
if (ret)
return ret;
......@@ -1348,7 +1364,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
}
if (rec->hop_limit > 0 || sa_path_is_roce(rec))
ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
ret = init_ah_attr_grh_fields(device, port_num,
rec, ah_attr, gid_attr);
return ret;
}
EXPORT_SYMBOL(ib_init_ah_attr_from_path);
......@@ -1556,8 +1573,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ARRAY_SIZE(path_rec_table),
mad->data, &rec);
rec.rec_type = SA_PATH_REC_TYPE_IB;
sa_path_set_ndev(&rec, NULL);
sa_path_set_ifindex(&rec, 0);
sa_path_set_dmac_zero(&rec);
if (query->conv_pr) {
......
......@@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
/* TODO: No need to set this */
sa_path_set_dmac_zero(dst);
sa_path_set_ndev(dst, NULL);
sa_path_set_ifindex(dst, 0);
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
......@@ -770,7 +770,7 @@ static void path_rec_completion(int status,
struct rdma_ah_attr av;
if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
pathrec, &av)) {
pathrec, &av, NULL)) {
ah = ipoib_create_ah(dev, priv->pd, &av);
rdma_destroy_ah_attr(&av);
}
......
......@@ -173,11 +173,6 @@ struct sa_path_rec_ib {
struct sa_path_rec_roce {
bool route_resolved;
u8 dmac[ETH_ALEN];
/* ignored in IB */
int ifindex;
/* ignored in IB */
struct net *net;
};
struct sa_path_rec_opa {
......@@ -556,13 +551,10 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
enum ib_gid_type gid_type,
struct rdma_ah_attr *ah_attr);
/**
* ib_init_ah_attr_from_path - Initialize address handle attributes based on
* an SA path record.
*/
int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr);
struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *sgid_attr);
/**
* ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
......@@ -667,45 +659,10 @@ static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec)
eth_zero_addr(rec->roce.dmac);
}
static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex)
{
if (sa_path_is_roce(rec))
rec->roce.ifindex = ifindex;
}
static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net)
{
if (sa_path_is_roce(rec))
rec->roce.net = net;
}
static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec)
{
if (sa_path_is_roce(rec))
return rec->roce.dmac;
return NULL;
}
static inline int sa_path_get_ifindex(struct sa_path_rec *rec)
{
if (sa_path_is_roce(rec))
return rec->roce.ifindex;
return 0;
}
static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec)
{
if (sa_path_is_roce(rec))
return rec->roce.net;
return NULL;
}
static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec)
{
return sa_path_get_ndev(rec) ?
dev_get_by_index(sa_path_get_ndev(rec),
sa_path_get_ifindex(rec))
: NULL;
}
#endif /* IB_SA_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment