Commit ac40916a authored by Li RongQing's avatar Li RongQing Committed by Jakub Kicinski

rtnetlink: introduce nlmsg_new_large and use it in rtnl_getlink

if a PF has 256 or more VFs, ip link command will allocate an order 3
memory or more, and maybe trigger OOM due to memory fragment,
the VFs needed memory size is computed in rtnl_vfinfo_size.

so introduce nlmsg_new_large which calls netlink_alloc_large_skb in
which vmalloc is used for large memory, to avoid the failure of
allocating memory

    ip invoked oom-killer: gfp_mask=0xc2cc0(GFP_KERNEL|__GFP_NOWARN|\
	__GFP_COMP|__GFP_NOMEMALLOC), order=3, oom_score_adj=0
    CPU: 74 PID: 204414 Comm: ip Kdump: loaded Tainted: P           OE
    Call Trace:
    dump_stack+0x57/0x6a
    dump_header+0x4a/0x210
    oom_kill_process+0xe4/0x140
    out_of_memory+0x3e8/0x790
    __alloc_pages_slowpath.constprop.116+0x953/0xc50
    __alloc_pages_nodemask+0x2af/0x310
    kmalloc_large_node+0x38/0xf0
    __kmalloc_node_track_caller+0x417/0x4d0
    __kmalloc_reserve.isra.61+0x2e/0x80
    __alloc_skb+0x82/0x1c0
    rtnl_getlink+0x24f/0x370
    rtnetlink_rcv_msg+0x12c/0x350
    netlink_rcv_skb+0x50/0x100
    netlink_unicast+0x1b2/0x280
    netlink_sendmsg+0x355/0x4a0
    sock_sendmsg+0x5b/0x60
    ____sys_sendmsg+0x1ea/0x250
    ___sys_sendmsg+0x88/0xd0
    __sys_sendmsg+0x5e/0xa0
    do_syscall_64+0x33/0x40
    entry_SYSCALL_64_after_hwframe+0x44/0xa9
    RIP: 0033:0x7f95a65a5b70

Cc: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: default avatarLi RongQing <lirongqing@baidu.com>
Link: https://lore.kernel.org/r/20231115120108.3711-1-lirongqing@baidu.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 459a70ba
...@@ -351,5 +351,6 @@ bool netlink_ns_capable(const struct sk_buff *skb, ...@@ -351,5 +351,6 @@ bool netlink_ns_capable(const struct sk_buff *skb,
struct user_namespace *ns, int cap); struct user_namespace *ns, int cap);
bool netlink_capable(const struct sk_buff *skb, int cap); bool netlink_capable(const struct sk_buff *skb, int cap);
bool netlink_net_capable(const struct sk_buff *skb, int cap); bool netlink_net_capable(const struct sk_buff *skb, int cap);
struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast);
#endif /* __LINUX_NETLINK_H */ #endif /* __LINUX_NETLINK_H */
...@@ -1010,6 +1010,20 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags) ...@@ -1010,6 +1010,20 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
return alloc_skb(nlmsg_total_size(payload), flags); return alloc_skb(nlmsg_total_size(payload), flags);
} }
/**
* nlmsg_new_large - Allocate a new netlink message with non-contiguous
* physical memory
* @payload: size of the message payload
*
* The allocated skb is unable to have frag page for shinfo->frags*,
* as the NULL setting for skb->head in netlink_skb_destructor() will
* bypass most of the handling in skb_release_data()
*/
static inline struct sk_buff *nlmsg_new_large(size_t payload)
{
return netlink_alloc_large_skb(nlmsg_total_size(payload), 0);
}
/** /**
* nlmsg_end - Finalize a netlink message * nlmsg_end - Finalize a netlink message
* @skb: socket buffer the message is stored in * @skb: socket buffer the message is stored in
......
...@@ -3849,7 +3849,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -3849,7 +3849,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out; goto out;
err = -ENOBUFS; err = -ENOBUFS;
nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask));
if (nskb == NULL) if (nskb == NULL)
goto out; goto out;
......
...@@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) ...@@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
return sock; return sock;
} }
static struct sk_buff *netlink_alloc_large_skb(unsigned int size, struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
int broadcast)
{ {
struct sk_buff *skb; struct sk_buff *skb;
void *data; void *data;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment