Commit f427f4d6 authored by Divya Indi's avatar Divya Indi Committed by Jason Gunthorpe

IB/sa: Resolv use-after-free in ib_nl_make_request()

There is a race condition where ib_nl_make_request() inserts the request
data into the linked list but the timer in ib_nl_request_timeout() can see
it and destroy it before ib_nl_send_msg() is done touching it. This could
happen, for instance, if there is a long delay allocating memory during
nlmsg_new()

This causes a use-after-free in the send_mad() thread:

  [<ffffffffa02f43cb>] ? ib_pack+0x17b/0x240 [ib_core]
  [ <ffffffffa032aef1>] ib_sa_path_rec_get+0x181/0x200 [ib_sa]
  [<ffffffffa0379db0>] rdma_resolve_route+0x3c0/0x8d0 [rdma_cm]
  [<ffffffffa0374450>] ? cma_bind_port+0xa0/0xa0 [rdma_cm]
  [<ffffffffa040f850>] ? rds_rdma_cm_event_handler_cmn+0x850/0x850 [rds_rdma]
  [<ffffffffa040f22c>] rds_rdma_cm_event_handler_cmn+0x22c/0x850 [rds_rdma]
  [<ffffffffa040f860>] rds_rdma_cm_event_handler+0x10/0x20 [rds_rdma]
  [<ffffffffa037778e>] addr_handler+0x9e/0x140 [rdma_cm]
  [<ffffffffa026cdb4>] process_req+0x134/0x190 [ib_addr]
  [<ffffffff810a02f9>] process_one_work+0x169/0x4a0
  [<ffffffff810a0b2b>] worker_thread+0x5b/0x560
  [<ffffffff810a0ad0>] ? flush_delayed_work+0x50/0x50
  [<ffffffff810a68fb>] kthread+0xcb/0xf0
  [<ffffffff816ec49a>] ? __schedule+0x24a/0x810
  [<ffffffff816ec49a>] ? __schedule+0x24a/0x810
  [<ffffffff810a6830>] ? kthread_create_on_node+0x180/0x180
  [<ffffffff816f25a7>] ret_from_fork+0x47/0x90
  [<ffffffff810a6830>] ? kthread_create_on_node+0x180/0x180

The ownership rule is once the request is on the list, ownership transfers
to the list and the local thread can't touch it any more, just like for
the normal MAD case in send_mad().

Thus, instead of adding before send and then trying to delete after on
errors, move the entire thing under the spinlock so that the send and
update of the lists are atomic to the conurrent threads. Lightly reoganize
things so spinlock safe memory allocations are done in the final NL send
path and the rest of the setup work is done before and outside the lock.

Fixes: 3ebd2fd0 ("IB/sa: Put netlink request into the request list before sending")
Link: https://lore.kernel.org/r/1592964789-14533-1-git-send-email-divya.indi@oracle.comSigned-off-by: default avatarDivya Indi <divya.indi@oracle.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 2315ec12
...@@ -829,13 +829,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) ...@@ -829,13 +829,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len; return len;
} }
static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{ {
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
void *data; void *data;
struct ib_sa_mad *mad; struct ib_sa_mad *mad;
int len; int len;
unsigned long flags;
unsigned long delay;
gfp_t gfp_flag;
int ret;
INIT_LIST_HEAD(&query->list);
query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
mad = query->mad_buf->mad; mad = query->mad_buf->mad;
len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask); len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
...@@ -860,36 +867,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) ...@@ -860,36 +867,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */ /* Repair the nlmsg header length */
nlmsg_end(skb, nlh); nlmsg_end(skb, nlh);
return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask); gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
} GFP_NOWAIT;
static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) spin_lock_irqsave(&ib_nl_request_lock, flags);
{ ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
unsigned long flags;
unsigned long delay;
int ret;
INIT_LIST_HEAD(&query->list); if (ret)
query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); goto out;
/* Put the request on the list first.*/ /* Put the request on the list.*/
spin_lock_irqsave(&ib_nl_request_lock, flags);
delay = msecs_to_jiffies(sa_local_svc_timeout_ms); delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies; query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list); list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */ /* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list) if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
ret = ib_nl_send_msg(query, gfp_mask); out:
if (ret) { spin_unlock_irqrestore(&ib_nl_request_lock, flags);
ret = -EIO;
/* Remove the request */
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_del(&query->list);
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
}
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment