Commit a60ebcb6 authored by Jiri Pirko's avatar Jiri Pirko Committed by Stephen Hemminger

devlink: implement shared buffer occupancy control

Use kernel shared buffer occupancy control commands to make snapshot and
clear occupancy watermarks. Also, allow to show occupancy values in a
nice way.
Signed-off-by: default avatarJiri Pirko <jiri@mellanox.com>
parent b56700bf
...@@ -27,6 +27,12 @@ ...@@ -27,6 +27,12 @@
#define pr_err(args...) fprintf(stderr, ##args) #define pr_err(args...) fprintf(stderr, ##args)
#define pr_out(args...) fprintf(stdout, ##args) #define pr_out(args...) fprintf(stdout, ##args)
#define pr_out_sp(num, args...) \
do { \
int ret = fprintf(stdout, ##args); \
if (ret < num) \
fprintf(stdout, "%*s", num - ret, ""); \
} while (0)
static int _mnlg_socket_recv_run(struct mnlg_socket *nlg, static int _mnlg_socket_recv_run(struct mnlg_socket *nlg,
mnl_cb_t data_cb, void *data) mnl_cb_t data_cb, void *data)
...@@ -275,6 +281,12 @@ static int attr_cb(const struct nlattr *attr, void *data) ...@@ -275,6 +281,12 @@ static int attr_cb(const struct nlattr *attr, void *data)
if (type == DEVLINK_ATTR_SB_TC_INDEX && if (type == DEVLINK_ATTR_SB_TC_INDEX &&
mnl_attr_validate(attr, MNL_TYPE_U16) < 0) mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
return MNL_CB_ERROR; return MNL_CB_ERROR;
if (type == DEVLINK_ATTR_SB_OCC_CUR &&
mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
return MNL_CB_ERROR;
if (type == DEVLINK_ATTR_SB_OCC_MAX &&
mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
return MNL_CB_ERROR;
tb[type] = attr; tb[type] = attr;
return MNL_CB_OK; return MNL_CB_OK;
} }
...@@ -864,6 +876,7 @@ static bool dl_dump_filter(struct dl *dl, struct nlattr **tb) ...@@ -864,6 +876,7 @@ static bool dl_dump_filter(struct dl *dl, struct nlattr **tb)
struct nlattr *attr_bus_name = tb[DEVLINK_ATTR_BUS_NAME]; struct nlattr *attr_bus_name = tb[DEVLINK_ATTR_BUS_NAME];
struct nlattr *attr_dev_name = tb[DEVLINK_ATTR_DEV_NAME]; struct nlattr *attr_dev_name = tb[DEVLINK_ATTR_DEV_NAME];
struct nlattr *attr_port_index = tb[DEVLINK_ATTR_PORT_INDEX]; struct nlattr *attr_port_index = tb[DEVLINK_ATTR_PORT_INDEX];
struct nlattr *attr_sb_index = tb[DEVLINK_ATTR_SB_INDEX];
if (opts->present & DL_OPT_HANDLE && if (opts->present & DL_OPT_HANDLE &&
attr_bus_name && attr_dev_name) { attr_bus_name && attr_dev_name) {
...@@ -885,6 +898,12 @@ static bool dl_dump_filter(struct dl *dl, struct nlattr **tb) ...@@ -885,6 +898,12 @@ static bool dl_dump_filter(struct dl *dl, struct nlattr **tb)
port_index != opts->port_index) port_index != opts->port_index)
return false; return false;
} }
if (opts->present & DL_OPT_SB && attr_sb_index) {
uint32_t sb_index = mnl_attr_get_u32(attr_sb_index);
if (sb_index != opts->sb_index)
return false;
}
return true; return true;
} }
...@@ -1168,6 +1187,9 @@ static void cmd_sb_help(void) ...@@ -1168,6 +1187,9 @@ static void cmd_sb_help(void)
pr_out(" devlink sb tc bind set DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX\n"); pr_out(" devlink sb tc bind set DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX\n");
pr_out(" type { ingress | egress } pool POOL_INDEX\n"); pr_out(" type { ingress | egress } pool POOL_INDEX\n");
pr_out(" th THRESHOLD\n"); pr_out(" th THRESHOLD\n");
pr_out(" devlink sb occupancy show { DEV | DEV/PORT_INDEX } [ sb SB_INDEX ]\n");
pr_out(" devlink sb occupancy snapshot DEV [ sb SB_INDEX ]\n");
pr_out(" devlink sb occupancy clearmax DEV [ sb SB_INDEX ]\n");
} }
static void pr_out_sb(struct nlattr **tb) static void pr_out_sb(struct nlattr **tb)
...@@ -1504,6 +1526,330 @@ static int cmd_sb_tc(struct dl *dl) ...@@ -1504,6 +1526,330 @@ static int cmd_sb_tc(struct dl *dl)
return -ENOENT; return -ENOENT;
} }
struct occ_item {
struct list_head list;
uint32_t index;
uint32_t cur;
uint32_t max;
uint32_t bound_pool_index;
};
struct occ_port {
struct list_head list;
char *bus_name;
char *dev_name;
uint32_t port_index;
uint32_t sb_index;
struct list_head pool_list;
struct list_head ing_tc_list;
struct list_head eg_tc_list;
};
struct occ_show {
struct dl *dl;
int err;
struct list_head port_list;
};
static struct occ_item *occ_item_alloc(void)
{
return calloc(1, sizeof(struct occ_item));
}
static void occ_item_free(struct occ_item *occ_item)
{
free(occ_item);
}
static struct occ_port *occ_port_alloc(uint32_t port_index)
{
struct occ_port *occ_port;
occ_port = calloc(1, sizeof(*occ_port));
if (!occ_port)
return NULL;
occ_port->port_index = port_index;
INIT_LIST_HEAD(&occ_port->pool_list);
INIT_LIST_HEAD(&occ_port->ing_tc_list);
INIT_LIST_HEAD(&occ_port->eg_tc_list);
return occ_port;
}
static void occ_port_free(struct occ_port *occ_port)
{
struct occ_item *occ_item, *tmp;
list_for_each_entry_safe(occ_item, tmp, &occ_port->pool_list, list)
occ_item_free(occ_item);
list_for_each_entry_safe(occ_item, tmp, &occ_port->ing_tc_list, list)
occ_item_free(occ_item);
list_for_each_entry_safe(occ_item, tmp, &occ_port->eg_tc_list, list)
occ_item_free(occ_item);
}
static struct occ_show *occ_show_alloc(struct dl *dl)
{
struct occ_show *occ_show;
occ_show = calloc(1, sizeof(*occ_show));
if (!occ_show)
return NULL;
occ_show->dl = dl;
INIT_LIST_HEAD(&occ_show->port_list);
return occ_show;
}
static void occ_show_free(struct occ_show *occ_show)
{
struct occ_port *occ_port, *tmp;
list_for_each_entry_safe(occ_port, tmp, &occ_show->port_list, list)
occ_port_free(occ_port);
}
static struct occ_port *occ_port_get(struct occ_show *occ_show,
struct nlattr **tb)
{
struct occ_port *occ_port;
uint32_t port_index;
port_index = mnl_attr_get_u32(tb[DEVLINK_ATTR_PORT_INDEX]);
list_for_each_entry_reverse(occ_port, &occ_show->port_list, list) {
if (occ_port->port_index == port_index)
return occ_port;
}
occ_port = occ_port_alloc(port_index);
if (!occ_port)
return NULL;
list_add_tail(&occ_port->list, &occ_show->port_list);
return occ_port;
}
static void pr_out_occ_show_item_list(const char *label, struct list_head *list,
bool bound_pool)
{
struct occ_item *occ_item;
int i = 1;
pr_out_sp(7, " %s:", label);
list_for_each_entry(occ_item, list, list) {
if ((i - 1) % 4 == 0 && i != 1)
pr_out_sp(7, " ");
if (bound_pool)
pr_out_sp(7, "%2u(%u):", occ_item->index,
occ_item->bound_pool_index);
else
pr_out_sp(7, "%2u:", occ_item->index);
pr_out_sp(15, "%7u/%u", occ_item->cur, occ_item->max);
if (i++ % 4 == 0)
pr_out("\n");
}
if ((i - 1) % 4 != 0)
pr_out("\n");
}
static void pr_out_occ_show_port(struct occ_port *occ_port)
{
pr_out_occ_show_item_list("pool", &occ_port->pool_list, false);
pr_out_occ_show_item_list("itc", &occ_port->ing_tc_list, true);
pr_out_occ_show_item_list("etc", &occ_port->eg_tc_list, true);
}
static void pr_out_occ_show(struct occ_show *occ_show)
{
struct dl *dl = occ_show->dl;
struct dl_opts *opts = &dl->opts;
struct occ_port *occ_port;
list_for_each_entry(occ_port, &occ_show->port_list, list) {
__pr_out_port_handle_nice(dl, opts->bus_name, opts->dev_name,
occ_port->port_index);
pr_out(":\n");
pr_out_occ_show_port(occ_port);
}
}
static void cmd_sb_occ_port_pool_process(struct occ_show *occ_show,
struct nlattr **tb)
{
struct occ_port *occ_port;
struct occ_item *occ_item;
if (occ_show->err || !dl_dump_filter(occ_show->dl, tb))
return;
occ_port = occ_port_get(occ_show, tb);
if (!occ_port) {
occ_show->err = -ENOMEM;
return;
}
occ_item = occ_item_alloc();
if (!occ_item) {
occ_show->err = -ENOMEM;
return;
}
occ_item->index = mnl_attr_get_u16(tb[DEVLINK_ATTR_SB_POOL_INDEX]);
occ_item->cur = mnl_attr_get_u32(tb[DEVLINK_ATTR_SB_OCC_CUR]);
occ_item->max = mnl_attr_get_u32(tb[DEVLINK_ATTR_SB_OCC_MAX]);
list_add_tail(&occ_item->list, &occ_port->pool_list);
}
static int cmd_sb_occ_port_pool_process_cb(const struct nlmsghdr *nlh, void *data)
{
struct occ_show *occ_show = data;
struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {};
struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb);
if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] ||
!tb[DEVLINK_ATTR_PORT_INDEX] || !tb[DEVLINK_ATTR_SB_INDEX] ||
!tb[DEVLINK_ATTR_SB_POOL_INDEX] ||
!tb[DEVLINK_ATTR_SB_OCC_CUR] || !tb[DEVLINK_ATTR_SB_OCC_MAX])
return MNL_CB_ERROR;
cmd_sb_occ_port_pool_process(occ_show, tb);
return MNL_CB_OK;
}
static void cmd_sb_occ_tc_pool_process(struct occ_show *occ_show,
struct nlattr **tb)
{
struct occ_port *occ_port;
struct occ_item *occ_item;
uint8_t pool_type;
if (occ_show->err || !dl_dump_filter(occ_show->dl, tb))
return;
occ_port = occ_port_get(occ_show, tb);
if (!occ_port) {
occ_show->err = -ENOMEM;
return;
}
occ_item = occ_item_alloc();
if (!occ_item) {
occ_show->err = -ENOMEM;
return;
}
occ_item->index = mnl_attr_get_u16(tb[DEVLINK_ATTR_SB_TC_INDEX]);
occ_item->cur = mnl_attr_get_u32(tb[DEVLINK_ATTR_SB_OCC_CUR]);
occ_item->max = mnl_attr_get_u32(tb[DEVLINK_ATTR_SB_OCC_MAX]);
occ_item->bound_pool_index =
mnl_attr_get_u16(tb[DEVLINK_ATTR_SB_POOL_INDEX]);
pool_type = mnl_attr_get_u8(tb[DEVLINK_ATTR_SB_POOL_TYPE]);
if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS)
list_add_tail(&occ_item->list, &occ_port->ing_tc_list);
else if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS)
list_add_tail(&occ_item->list, &occ_port->eg_tc_list);
else
occ_item_free(occ_item);
}
static int cmd_sb_occ_tc_pool_process_cb(const struct nlmsghdr *nlh, void *data)
{
struct occ_show *occ_show = data;
struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {};
struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb);
if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] ||
!tb[DEVLINK_ATTR_PORT_INDEX] || !tb[DEVLINK_ATTR_SB_INDEX] ||
!tb[DEVLINK_ATTR_SB_TC_INDEX] || !tb[DEVLINK_ATTR_SB_POOL_TYPE] ||
!tb[DEVLINK_ATTR_SB_POOL_INDEX] ||
!tb[DEVLINK_ATTR_SB_OCC_CUR] || !tb[DEVLINK_ATTR_SB_OCC_MAX])
return MNL_CB_ERROR;
cmd_sb_occ_tc_pool_process(occ_show, tb);
return MNL_CB_OK;
}
static int cmd_sb_occ_show(struct dl *dl)
{
struct nlmsghdr *nlh;
struct occ_show *occ_show;
uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP;
int err;
err = dl_argv_parse(dl, DL_OPT_HANDLE | DL_OPT_HANDLEP, DL_OPT_SB);
if (err)
return err;
occ_show = occ_show_alloc(dl);
if (!occ_show)
return -ENOMEM;
nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_SB_PORT_POOL_GET, flags);
err = _mnlg_socket_sndrcv(dl->nlg, nlh,
cmd_sb_occ_port_pool_process_cb, occ_show);
if (err)
goto out;
nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_SB_TC_POOL_BIND_GET, flags);
err = _mnlg_socket_sndrcv(dl->nlg, nlh,
cmd_sb_occ_tc_pool_process_cb, occ_show);
if (err)
goto out;
pr_out_occ_show(occ_show);
out:
occ_show_free(occ_show);
return err;
}
static int cmd_sb_occ_snapshot(struct dl *dl)
{
struct nlmsghdr *nlh;
int err;
nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_SB_OCC_SNAPSHOT,
NLM_F_REQUEST | NLM_F_ACK);
err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, DL_OPT_SB);
if (err)
return err;
return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL);
}
static int cmd_sb_occ_clearmax(struct dl *dl)
{
struct nlmsghdr *nlh;
int err;
nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_SB_OCC_MAX_CLEAR,
NLM_F_REQUEST | NLM_F_ACK);
err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, DL_OPT_SB);
if (err)
return err;
return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL);
}
static int cmd_sb_occ(struct dl *dl)
{
if (dl_argv_match(dl, "help") || dl_no_arg(dl)) {
cmd_sb_help();
return 0;
} else if (dl_argv_match(dl, "show") ||
dl_argv_match(dl, "list")) {
dl_arg_inc(dl);
return cmd_sb_occ_show(dl);
} else if (dl_argv_match(dl, "snapshot")) {
dl_arg_inc(dl);
return cmd_sb_occ_snapshot(dl);
} else if (dl_argv_match(dl, "clearmax")) {
dl_arg_inc(dl);
return cmd_sb_occ_clearmax(dl);
}
pr_err("Command \"%s\" not found\n", dl_argv(dl));
return -ENOENT;
}
static int cmd_sb(struct dl *dl) static int cmd_sb(struct dl *dl)
{ {
if (dl_argv_match(dl, "help")) { if (dl_argv_match(dl, "help")) {
...@@ -1522,6 +1868,9 @@ static int cmd_sb(struct dl *dl) ...@@ -1522,6 +1868,9 @@ static int cmd_sb(struct dl *dl)
} else if (dl_argv_match(dl, "tc")) { } else if (dl_argv_match(dl, "tc")) {
dl_arg_inc(dl); dl_arg_inc(dl);
return cmd_sb_tc(dl); return cmd_sb_tc(dl);
} else if (dl_argv_match(dl, "occupancy")) {
dl_arg_inc(dl);
return cmd_sb_occ(dl);
} }
pr_err("Command \"%s\" not found\n", dl_argv(dl)); pr_err("Command \"%s\" not found\n", dl_argv(dl));
return -ENOENT; return -ENOENT;
......
...@@ -53,6 +53,10 @@ enum devlink_command { ...@@ -53,6 +53,10 @@ enum devlink_command {
DEVLINK_CMD_SB_TC_POOL_BIND_NEW, DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
DEVLINK_CMD_SB_TC_POOL_BIND_DEL, DEVLINK_CMD_SB_TC_POOL_BIND_DEL,
/* Shared buffer occupancy monitoring commands */
DEVLINK_CMD_SB_OCC_SNAPSHOT,
DEVLINK_CMD_SB_OCC_MAX_CLEAR,
/* add new commands above here */ /* add new commands above here */
__DEVLINK_CMD_MAX, __DEVLINK_CMD_MAX,
...@@ -119,6 +123,8 @@ enum devlink_attr { ...@@ -119,6 +123,8 @@ enum devlink_attr {
DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, /* u8 */ DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, /* u8 */
DEVLINK_ATTR_SB_THRESHOLD, /* u32 */ DEVLINK_ATTR_SB_THRESHOLD, /* u32 */
DEVLINK_ATTR_SB_TC_INDEX, /* u16 */ DEVLINK_ATTR_SB_TC_INDEX, /* u16 */
DEVLINK_ATTR_SB_OCC_CUR, /* u32 */
DEVLINK_ATTR_SB_OCC_MAX, /* u32 */
/* add new attributes above here, update the policy in devlink.c */ /* add new attributes above here, update the policy in devlink.c */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment