Commit 341ac980 authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Alexei Starovoitov

xsk: Support tx_metadata_len

For zerocopy mode, tx_desc->addr can point to an arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.

Introduce new tx_metadata_len umem config option that indicates how many
bytes to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).

The size of the metadata has mostly the same constraints as XDP:
- less than 256 bytes
- 8-byte aligned (compared to 4-byte alignment on xdp, due to 8-byte
  timestamp in the completion)
- non-zero

This data is not interpreted in any way right now.
Reviewed-by: default avatarSong Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Reviewed-by: default avatarJakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20231127190319.1190813-2-sdf@google.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 40d0eb02
...@@ -30,6 +30,7 @@ struct xdp_umem { ...@@ -30,6 +30,7 @@ struct xdp_umem {
struct user_struct *user; struct user_struct *user;
refcount_t users; refcount_t users;
u8 flags; u8 flags;
u8 tx_metadata_len;
bool zc; bool zc;
struct page **pgs; struct page **pgs;
int id; int id;
......
...@@ -77,6 +77,7 @@ struct xsk_buff_pool { ...@@ -77,6 +77,7 @@ struct xsk_buff_pool {
u32 chunk_size; u32 chunk_size;
u32 chunk_shift; u32 chunk_shift;
u32 frame_len; u32 frame_len;
u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup; u8 cached_need_wakeup;
bool uses_need_wakeup; bool uses_need_wakeup;
bool dma_need_sync; bool dma_need_sync;
......
...@@ -76,6 +76,7 @@ struct xdp_umem_reg { ...@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size; __u32 chunk_size;
__u32 headroom; __u32 headroom;
__u32 flags; __u32 flags;
__u32 tx_metadata_len;
}; };
struct xdp_statistics { struct xdp_statistics {
......
...@@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) ...@@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM) if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL; return -EINVAL;
if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
return -EINVAL;
umem->size = size; umem->size = size;
umem->headroom = headroom; umem->headroom = headroom;
umem->chunk_size = chunk_size; umem->chunk_size = chunk_size;
...@@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) ...@@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL; umem->pgs = NULL;
umem->user = NULL; umem->user = NULL;
umem->flags = mr->flags; umem->flags = mr->flags;
umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list); INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1); refcount_set(&umem->users, 1);
......
...@@ -1283,6 +1283,14 @@ struct xdp_umem_reg_v1 { ...@@ -1283,6 +1283,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom; __u32 headroom;
}; };
struct xdp_umem_reg_v2 {
__u64 addr; /* Start of packet data area */
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
__u32 flags;
};
static int xsk_setsockopt(struct socket *sock, int level, int optname, static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen) sockptr_t optval, unsigned int optlen)
{ {
...@@ -1326,8 +1334,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, ...@@ -1326,8 +1334,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1)) if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL; return -EINVAL;
else if (optlen < sizeof(mr)) else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1); mr_size = sizeof(struct xdp_umem_reg_v1);
else if (optlen < sizeof(mr))
mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size)) if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT; return -EFAULT;
......
...@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, ...@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM; XDP_PACKET_HEADROOM;
pool->umem = umem; pool->umem = umem;
pool->addrs = umem->addrs; pool->addrs = umem->addrs;
pool->tx_metadata_len = umem->tx_metadata_len;
INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list); INIT_LIST_HEAD(&pool->xsk_tx_list);
......
...@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options) ...@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc) struct xdp_desc *desc)
{ {
u64 offset = desc->addr & (pool->chunk_size - 1); u64 addr = desc->addr - pool->tx_metadata_len;
u64 len = desc->len + pool->tx_metadata_len;
u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len) if (!desc->len)
return false; return false;
if (offset + desc->len > pool->chunk_size) if (offset + len > pool->chunk_size)
return false; return false;
if (desc->addr >= pool->addrs_cnt) if (addr >= pool->addrs_cnt)
return false; return false;
if (xp_unused_options_set(desc->options)) if (xp_unused_options_set(desc->options))
...@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, ...@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc) struct xdp_desc *desc)
{ {
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len) if (!desc->len)
return false; return false;
if (desc->len > pool->chunk_size) if (len > pool->chunk_size)
return false; return false;
if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) xp_desc_crosses_non_contig_pg(pool, addr, len))
return false; return false;
if (xp_unused_options_set(desc->options)) if (xp_unused_options_set(desc->options))
......
...@@ -76,6 +76,7 @@ struct xdp_umem_reg { ...@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size; __u32 chunk_size;
__u32 headroom; __u32 headroom;
__u32 flags; __u32 flags;
__u32 tx_metadata_len;
}; };
struct xdp_statistics { struct xdp_statistics {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment