diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 134bd038ae8af8749070060f36fc3a4fbb4683f4..fcfc0b1149852f94af9991191d7fb7ca8c54c3d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ + fw_reset.o qos.o # # Netdev basic @@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ - en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ + en/qos.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 055baf3b6cb1050779684e546a0a9b2721adfc4e..26e578a973e57e396806c9fc07cf326899aed371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -55,6 +55,7 @@ #include "en_stats.h" #include "en/dcbnl.h" #include "en/fs.h" +#include "en/qos.h" #include "lib/hv_vhca.h" extern const struct net_device_ops mlx5e_netdev_ops; @@ -161,6 +162,9 @@ do { \ ##__VA_ARGS__); \ } while (0) +#define mlx5e_state_dereference(priv, p) \ + rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) + enum mlx5e_rq_group { MLX5E_RQ_GROUP_REGULAR, MLX5E_RQ_GROUP_XSK, @@ -663,11 +667,13 @@ struct mlx5e_channel { struct mlx5e_xdpsq rq_xdpsq; struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_icosq icosq; /* internal control operations */ + struct mlx5e_txqsq __rcu * __rcu *qos_sqs; bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; __be32 mkey_be; + u16 qos_sqs_size; u8 num_tc; u8 lag_port; @@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param { int next_state; int rl_update; int rl_index; + bool qos_update; + u16 qos_queue_group_id; }; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) @@ -788,10 +796,20 @@ struct mlx5e_scratchpad { cpumask_var_t cpumask; }; +struct mlx5e_htb { + DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES)); + DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES); + struct mlx5e_sq_stats **qos_sq_stats; + u16 max_qos_sqs; + u16 maj_id; + u16 defcls; +}; + struct mlx5e_priv { /* priv data path fields - start */ /* +1 for port ptp ts */ - struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC]; + struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC + + MLX5E_QOS_MAX_LEAF_NODES]; int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -859,6 +877,7 @@ struct mlx5e_priv { struct mlx5e_hv_vhca_stats_agent stats_agent; #endif struct mlx5e_scratchpad scratchpad; + struct mlx5e_htb htb; }; struct mlx5e_rx_handlers { @@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, mlx5e_fp_preactivate preactivate, void *context); +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); @@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p); +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid); void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_free_txqsq(struct mlx5e_txqsq *sq); @@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param; int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param, struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, u32 *sqn); void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq); static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 807147d97a0fa48e8cc8c08121208dde5cadc6cf..ea2cfb04b31adb5ad982f2e0d8a9c8347eb1aec3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param); void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, struct mlx5e_sq_param *param); +void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_sq_param *param); void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 2a2bac30daaa712ecccec93be7aad2ee5ec4306f..eeddd1137dda5838a0cb13eab53cc4f849d4513d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn, csp.min_inline_mode = txqsq->min_inline_mode; csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn; - err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn); if (err) goto err_free_txqsq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c new file mode 100644 index 0000000000000000000000000000000000000000..12d7ad0612375a3b97cc2accb35f78025947f288 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -0,0 +1,984 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en.h" +#include "params.h" +#include "../qos.h" + +#define BYTES_IN_MBIT 125000 + +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev)); +} + +int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv) +{ + int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev)); + + return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1; +} + +/* Software representation of the QoS tree (internal to this file) */ + +static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv) +{ + int size = mlx5e_qos_max_leaf_nodes(priv->mdev); + int res; + + WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__); + res = find_first_zero_bit(priv->htb.qos_used_qids, size); + + return res == size ? -ENOSPC : res; +} + +struct mlx5e_qos_node { + struct hlist_node hnode; + struct rcu_head rcu; + struct mlx5e_qos_node *parent; + u64 rate; + u32 bw_share; + u32 max_average_bw; + u32 hw_id; + u32 classid; /* 16-bit, except root. */ + u16 qid; +}; + +#define MLX5E_QOS_QID_INNER 0xffff +#define MLX5E_HTB_CLASSID_ROOT 0xffffffff + +static struct mlx5e_qos_node * +mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid, + struct mlx5e_qos_node *parent) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = parent; + + node->qid = qid; + __set_bit(qid, priv->htb.qos_used_qids); + + node->classid = classid; + hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid); + + mlx5e_update_tx_netdev_queues(priv); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->qid = MLX5E_QOS_QID_INNER; + node->classid = MLX5E_HTB_CLASSID_ROOT; + hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) +{ + hash_del_rcu(&node->hnode); + if (node->qid != MLX5E_QOS_QID_INNER) { + __clear_bit(node->qid, priv->htb.qos_used_qids); + mlx5e_update_tx_netdev_queues(priv); + } + kfree_rcu(node, rcu); +} + +/* TX datapath API */ + +static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) +{ + /* These channel params are safe to access from the datapath, because: + * 1. This function is called only after checking priv->htb.maj_id != 0, + * and the number of queues can't change while HTB offload is active. + * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for + * mlx5e_select_queue to finish while holding priv->state_lock, + * preventing other code from changing the number of queues. + */ + bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS); + + return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid; +} + +int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid) +{ + struct mlx5e_qos_node *node; + u16 qid; + int res; + + rcu_read_lock(); + + node = mlx5e_sw_node_find_rcu(priv, classid); + if (!node) { + res = -ENOENT; + goto out; + } + qid = READ_ONCE(node->qid); + if (qid == MLX5E_QOS_QID_INNER) { + res = -EINVAL; + goto out; + } + res = mlx5e_qid_from_qos(&priv->channels, qid); + +out: + rcu_read_unlock(); + return res; +} + +static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_channel *c; + int ix; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + return mlx5e_state_dereference(priv, qos_sqs[qid]); +} + +/* SQ lifecycle */ + +static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + struct mlx5e_qos_node *node) +{ + struct mlx5e_create_cq_param ccp = {}; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_sq_param param_sq; + struct mlx5e_cq_param param_cq; + int txq_ix, ix, qid, err = 0; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + + params = &chs->params; + + txq_ix = mlx5e_qid_from_qos(chs, node->qid); + + WARN_ON(node->qid > priv->htb.max_qos_sqs); + if (node->qid == priv->htb.max_qos_sqs) { + struct mlx5e_sq_stats *stats, **stats_list = NULL; + + if (priv->htb.max_qos_sqs == 0) { + stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), + sizeof(*stats_list), + GFP_KERNEL); + if (!stats_list) + return -ENOMEM; + } + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) { + kvfree(stats_list); + return -ENOMEM; + } + if (stats_list) + WRITE_ONCE(priv->htb.qos_sq_stats, stats_list); + WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats); + /* Order max_qos_sqs increment after writing the array pointer. + * Pairs with smp_load_acquire in en_stats.c. + */ + smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1); + } + + ix = node->qid % params->num_channels; + qid = node->qid / params->num_channels; + c = chs->c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = kzalloc(sizeof(*sq), GFP_KERNEL); + + if (!sq) + return -ENOMEM; + + mlx5e_build_create_cq_param(&ccp, c); + + memset(¶m_sq, 0, sizeof(param_sq)); + memset(¶m_cq, 0, sizeof(param_cq)); + mlx5e_build_sq_param(priv, params, ¶m_sq); + mlx5e_build_tx_cq_param(priv, params, ¶m_cq); + err = mlx5e_open_cq(priv, params->tx_cq_moderation, ¶m_cq, &ccp, &sq->cq); + if (err) + goto err_free_sq; + err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params, + ¶m_sq, sq, 0, node->hw_id, node->qid); + if (err) + goto err_close_cq; + + rcu_assign_pointer(qos_sqs[qid], sq); + + return 0; + +err_close_cq: + mlx5e_close_cq(&sq->cq); +err_free_sq: + kfree(sq); + return err; +} + +static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) +{ + struct mlx5e_txqsq *sq; + + sq = mlx5e_get_qos_sq(priv, node->qid); + + WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq); + + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); + + qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid); + mlx5e_activate_txqsq(sq); +} + +static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq *sq; + + sq = mlx5e_get_qos_sq(priv, qid); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + /* The queue is disabled, no synchronization with datapath is needed. */ + priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; +} + +static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int ix; + + params = &priv->channels.params; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock)); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + synchronize_rcu(); /* Sync with NAPI. */ + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); +} + +void mlx5e_qos_close_queues(struct mlx5e_channel *c) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock)); + if (!qos_sqs) + return; + synchronize_rcu(); /* Sync with NAPI. */ + + for (i = 0; i < c->qos_sqs_size; i++) { + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); + } + + kvfree(qos_sqs); +} + +static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_close_queues(chs->c[i]); +} + +static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + u16 qos_sqs_size; + int i; + + qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num); + + for (i = 0; i < chs->num; i++) { + struct mlx5e_txqsq **sqs; + + sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL); + if (!sqs) + goto err_free; + + WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size); + smp_wmb(); /* Pairs with mlx5e_napi_poll. */ + rcu_assign_pointer(chs->c[i]->qos_sqs, sqs); + } + + return 0; + +err_free: + while (--i >= 0) { + struct mlx5e_txqsq **sqs; + + sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL, + lockdep_is_held(&priv->state_lock)); + + synchronize_rcu(); /* Sync with NAPI. */ + kvfree(sqs); + } + return -ENOMEM; +} + +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + struct mlx5e_qos_node *node = NULL; + int bkt, err; + + if (!priv->htb.maj_id) + return 0; + + err = mlx5e_qos_alloc_queues(priv, chs); + if (err) + return err; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + err = mlx5e_open_qos_sq(priv, chs, node); + if (err) { + mlx5e_qos_close_all_queues(chs); + return err; + } + } + + return 0; +} + +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + mlx5e_activate_qos_sq(priv, node); + } +} + +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c) +{ + struct mlx5e_params *params = &c->priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs); + if (!qos_sqs) + return; + + for (i = 0; i < c->qos_sqs_size; i++) { + u16 qid = params->num_channels * i + c->ix; + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + /* The queue is disabled, no synchronization with datapath is needed. */ + c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL; + } +} + +static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_deactivate_queues(chs->c[i]); +} + +/* HTB API */ + +int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *root; + bool opened; + int err; + + qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls); + + if (!mlx5_qos_is_supported(priv->mdev)) { + NL_SET_ERR_MSG_MOD(extack, + "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; + } + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + err = mlx5e_qos_alloc_queues(priv, &priv->channels); + if (err) + return err; + } + + root = mlx5e_sw_node_create_root(priv); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto err_free_queues; + } + + err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware."); + goto err_sw_node_delete; + } + + WRITE_ONCE(priv->htb.defcls, htb_defcls); + /* Order maj_id after defcls - pairs with + * mlx5e_select_queue/mlx5e_select_htb_queues. + */ + smp_store_release(&priv->htb.maj_id, htb_maj_id); + + return 0; + +err_sw_node_delete: + mlx5e_sw_node_delete(priv, root); + +err_free_queues: + if (opened) + mlx5e_qos_close_all_queues(&priv->channels); + return err; +} + +int mlx5e_htb_root_del(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *root; + int err; + + qos_dbg(priv->mdev, "TC_HTB_DESTROY\n"); + + WRITE_ONCE(priv->htb.maj_id, 0); + synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */ + + root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT); + if (!root) { + qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n"); + return -ENOENT; + } + err = mlx5_qos_destroy_node(priv->mdev, root->hw_id); + if (err) + qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n", + root->hw_id, err); + mlx5e_sw_node_delete(priv, root); + + mlx5e_qos_deactivate_all_queues(&priv->channels); + mlx5e_qos_close_all_queues(&priv->channels); + + return err; +} + +static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate, + struct mlx5e_qos_node *parent, u32 *bw_share) +{ + u64 share = 0; + + while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw) + parent = parent->parent; + + if (parent->max_average_bw) + share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT), + parent->max_average_bw); + else + share = 101; + + *bw_share = share == 0 ? 1 : share > 100 ? 0 : share; + + qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n", + rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share); + + return 0; +} + +static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw) +{ + *max_average_bw = div_u64(ceil, BYTES_IN_MBIT); + + qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n", + ceil, *max_average_bw); +} + +int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + int qid; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n", + classid, parent_classid, rate, ceil); + + qid = mlx5e_find_unused_qos_qid(priv); + if (qid < 0) { + NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached."); + return qid; + } + + parent = mlx5e_sw_node_find(priv, parent_classid); + if (!parent) + return -EINVAL; + + node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent); + if (IS_ERR(node)) + return PTR_ERR(node); + + node->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw); + + err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + mlx5e_sw_node_delete(priv, node); + return err; + } + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + return mlx5e_qid_from_qos(&priv->channels, node->qid); +} + +int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *child; + int err, tmp_err; + u32 new_hw_id; + u16 qid; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n", + classid, child_classid, rate, ceil); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node."); + qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n", + classid, err); + return err; + } + + /* Intentionally reuse the qid for the upcoming first child. */ + child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto err_destroy_hw_node; + } + + child->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw); + + err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share, + child->max_average_bw, &child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + goto err_delete_sw_node; + } + + /* No fail point. */ + + qid = node->qid; + /* Pairs with mlx5e_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, child); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, child); + } + } + + return 0; + +err_delete_sw_node: + child->qid = MLX5E_QOS_QID_INNER; + mlx5e_sw_node_delete(priv, child); + +err_destroy_hw_node: + tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id); + if (tmp_err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n", + new_hw_id, classid, tmp_err); + return err; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) + if (node->qid == qid) + break; + + return node; +} + +static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq) +{ + qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid); + netdev_tx_reset_queue(txq); + netif_tx_start_queue(txq); +} + +static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) +{ + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + + if (!qdisc) + return; + + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); +} + +int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, + u16 *new_qid, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node; + struct netdev_queue *txq; + u16 qid, moved_qid; + bool opened; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid); + + *old_qid = *new_qid = 0; + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + /* Store qid for reuse. */ + qid = node->qid; + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + txq = netdev_get_tx_queue(priv->netdev, + mlx5e_qid_from_qos(&priv->channels, qid)); + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + mlx5e_sw_node_delete(priv, node); + + moved_qid = mlx5e_qos_cur_leaf_nodes(priv); + + if (moved_qid == 0) { + /* The last QoS SQ was just destroyed. */ + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + moved_qid--; + + if (moved_qid < qid) { + /* The highest QoS SQ was just destroyed. */ + WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u", + qid, moved_qid); + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + + WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid); + qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid); + + node = mlx5e_sw_node_find_by_qid(priv, moved_qid); + WARN(!node, "Could not find a node with qid %u to move to queue %u", + moved_qid, qid); + + /* Stop traffic to the old queue. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + __clear_bit(moved_qid, priv->htb.qos_used_qids); + + if (opened) { + txq = netdev_get_tx_queue(priv->netdev, + mlx5e_qid_from_qos(&priv->channels, moved_qid)); + mlx5e_deactivate_qos_sq(priv, moved_qid); + mlx5e_close_qos_sq(priv, moved_qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(priv->netdev, moved_qid); + + __set_bit(qid, priv->htb.qos_used_qids); + WRITE_ONCE(node->qid, qid); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n", + node->classid, moved_qid, qid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + mlx5e_update_tx_netdev_queues(priv); + if (opened) + mlx5e_reactivate_qos_sq(priv, moved_qid, txq); + + *old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid); + *new_qid = mlx5e_qid_from_qos(&priv->channels, qid); + return 0; +} + +int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + u32 old_hw_id, new_hw_id; + int err, saved_err = 0; + u16 qid; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n", + force ? "_FORCE" : "", classid); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id, + node->parent->bw_share, + node->parent->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + if (!force) + return err; + saved_err = err; + } + + /* Store qid for reuse and prevent clearing the bit. */ + qid = node->qid; + /* Pairs with mlx5e_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(priv->netdev, qid); + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + parent = node->parent; + mlx5e_sw_node_delete(priv, node); + + node = parent; + WRITE_ONCE(node->qid, qid); + + /* Early return on error in force mode. Parent will still be an inner + * node to be deleted by a following delete operation. + */ + if (saved_err) + return saved_err; + + old_hw_id = node->hw_id; + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + err = mlx5_qos_destroy_node(priv->mdev, old_hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + return 0; +} + +static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *child; + int err = 0; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) { + u32 old_bw_share = child->bw_share; + int err_one; + + if (child->parent != node) + continue; + + mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share); + if (child->bw_share == old_bw_share) + continue; + + err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share, + child->max_average_bw, child->hw_id); + if (!err && err_one) { + err = err_one; + + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node."); + qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n", + node->classid, err); + } + } + + return err; +} + +int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + u32 bw_share, max_average_bw; + struct mlx5e_qos_node *node; + bool ceil_changed = false; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n", + classid, rate, ceil); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + node->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw); + + err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share, + max_average_bw, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node."); + qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n", + classid, err); + return err; + } + + if (max_average_bw != node->max_average_bw) + ceil_changed = true; + + node->bw_share = bw_share; + node->max_average_bw = max_average_bw; + + if (ceil_changed) + err = mlx5e_qos_update_children(priv, node, extack); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h new file mode 100644 index 0000000000000000000000000000000000000000..5af7991fcd19482f7b57068aa8daa63ecc52cc11 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_QOS_H +#define __MLX5E_EN_QOS_H + +#include <linux/mlx5/driver.h> + +#define MLX5E_QOS_MAX_LEAF_NODES 256 + +struct mlx5e_priv; +struct mlx5e_channels; +struct mlx5e_channel; + +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); +int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv); + +/* TX datapath API */ +int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid); +struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid); + +/* SQ lifecycle */ +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv); +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c); +void mlx5e_qos_close_queues(struct mlx5e_channel *c); + +/* HTB API */ +int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack); +int mlx5e_htb_root_del(struct mlx5e_priv *priv); +int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, + u16 *new_qid, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, + struct netlink_ext_ack *extack); +int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2d37742a888c1514a8506a1adf6a87ca0b6ec8f3..2e5a0696374acedb1a2737f63dba16d8974aaa5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -447,6 +447,17 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } + /* Don't allow changing the number of channels if HTB offload is active, + * because the numeration of the QoS SQs will change, while per-queue + * qdiscs are attached. + */ + if (priv->htb.maj_id) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n", + __func__); + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_channels = count; @@ -1966,6 +1977,16 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) return -EOPNOTSUPP; + /* Don't allow changing the PTP state if HTB offload is active, because + * the numeration of the QoS SQs will change, while per-queue qdiscs are + * attached. + */ + if (priv->htb.maj_id) { + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n", + __func__); + return -EINVAL; + } + new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable); /* No need to verify SQ stop room as diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f33c38629886a5248a2b80751690629553dcaa4f..b9a175982801b8ee07e2f1009e4ef8cffc9e105e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -65,6 +65,7 @@ #include "en/devlink.h" #include "lib/mlx5.h" #include "en/ptp.h" +#include "qos.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -1143,7 +1144,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); @@ -1233,6 +1233,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p) { + u64 bitmask = 0; void *in; void *sqc; int inlen; @@ -1248,9 +1249,14 @@ int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); MLX5_SET(sqc, sqc, state, p->next_state); if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { - MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); - MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); + bitmask |= 1; + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); } + if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) { + bitmask |= 1 << 2; + MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id); + } + MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask); err = mlx5_core_modify_sq(mdev, sqn, in); @@ -1267,6 +1273,7 @@ static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param, struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, u32 *sqn) { struct mlx5e_modify_sq_param msp = {0}; @@ -1278,6 +1285,10 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, msp.curr_state = MLX5_SQC_STATE_RST; msp.next_state = MLX5_SQC_STATE_RDY; + if (qos_queue_group_id) { + msp.qos_update = true; + msp.qos_queue_group_id = qos_queue_group_id; + } err = mlx5e_modify_sq(mdev, *sqn, &msp); if (err) mlx5e_destroy_sq(mdev, *sqn); @@ -1288,13 +1299,9 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, static int mlx5e_set_sq_maxrate(struct net_device *dev, struct mlx5e_txqsq *sq, u32 rate); -static int mlx5e_open_txqsq(struct mlx5e_channel *c, - u32 tisn, - int txq_ix, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_txqsq *sq, - int tc) +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid) { struct mlx5e_create_sq_param csp = {}; u32 tx_rate; @@ -1304,12 +1311,17 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, if (err) return err; + if (qos_queue_group_id) + sq->stats = c->priv->htb.qos_sq_stats[qos_qid]; + else + sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; + csp.tisn = tisn; csp.tis_lst_sz = 1; csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); if (err) goto err_free_txqsq; @@ -1366,7 +1378,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) } } -static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) { struct mlx5_core_dev *mdev = sq->mdev; struct mlx5_rate_limit rl = {0}; @@ -1403,7 +1415,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = params->tx_min_inline_mode; - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_icosq; @@ -1452,7 +1464,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_xdpsq; @@ -1703,7 +1715,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, int txq_ix = c->ix + tc * params->num_channels; err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, - params, &cparam->txq_sq, &c->sq[tc], tc); + params, &cparam->txq_sq, &c->sq[tc], tc, 0, 0); if (err) goto err_close_sqs; } @@ -2044,6 +2056,8 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); + + mlx5e_qos_deactivate_queues(c); } static void mlx5e_close_channel(struct mlx5e_channel *c) @@ -2051,6 +2065,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_close_xsk(c); mlx5e_close_queues(c); + mlx5e_qos_close_queues(c); netif_napi_del(&c->napi); kvfree(c); @@ -2198,9 +2213,8 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); } -static void mlx5e_build_sq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) +void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2379,10 +2393,18 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } + err = mlx5e_qos_open_queues(priv, chs); + if (err) + goto err_close_ptp; + mlx5e_health_channels_update(priv); kvfree(cparam); return 0; +err_close_ptp: + if (chs->port_ptp) + mlx5e_port_ptp_close(chs->port_ptp); + err_close_channels: for (i--; i >= 0; i--) mlx5e_close_channel(chs->c[i]); @@ -2915,11 +2937,31 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc) netdev_set_tc_queue(netdev, tc, nch, 0); } +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) +{ + int qos_queues, nch, ntc, num_txqs, err; + + qos_queues = mlx5e_qos_cur_leaf_nodes(priv); + + nch = priv->channels.params.num_channels; + ntc = priv->channels.params.num_tc; + num_txqs = nch * ntc + qos_queues; + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) + num_txqs += ntc; + + mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs); + err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); + if (err) + netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); + + return err; +} + static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; - int num_txqs, num_rxqs, nch, ntc; int old_num_txqs, old_ntc; + int num_rxqs, nch, ntc; int err; old_num_txqs = netdev->real_num_tx_queues; @@ -2927,18 +2969,13 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; - num_txqs = nch * ntc; - if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) - num_txqs += ntc; num_rxqs = nch * priv->profile->rq_groups; mlx5e_netdev_set_tcs(netdev, nch, ntc); - err = netif_set_real_num_tx_queues(netdev, num_txqs); - if (err) { - netdev_warn(netdev, "netif_set_real_num_tx_queues failed, %d\n", err); + err = mlx5e_update_tx_netdev_queues(priv); + if (err) goto err_tcs; - } err = netif_set_real_num_rx_queues(netdev, num_rxqs); if (err) { netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err); @@ -3042,6 +3079,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_update_num_tc_x_num_ch(priv); mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); + mlx5e_qos_activate_queues(priv); mlx5e_xdp_tx_enable(priv); netif_tx_start_all_queues(priv->netdev); @@ -3608,6 +3646,14 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); + /* MQPRIO is another toplevel qdisc that can't be attached + * simultaneously with the offloaded HTB. + */ + if (WARN_ON(priv->htb.maj_id)) { + err = -EINVAL; + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_tc = tc ? tc : 1; @@ -3628,12 +3674,55 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, return err; } +static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb) +{ + int res; + + switch (htb->command) { + case TC_HTB_CREATE: + return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid, + htb->extack); + case TC_HTB_DESTROY: + return mlx5e_htb_root_del(priv); + case TC_HTB_LEAF_ALLOC_QUEUE: + res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid, + htb->rate, htb->ceil, htb->extack); + if (res < 0) + return res; + htb->qid = res; + return 0; + case TC_HTB_LEAF_TO_INNER: + return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid, + htb->rate, htb->ceil, htb->extack); + case TC_HTB_LEAF_DEL: + return mlx5e_htb_leaf_del(priv, htb->classid, &htb->moved_qid, &htb->qid, + htb->extack); + case TC_HTB_LEAF_DEL_LAST: + case TC_HTB_LEAF_DEL_LAST_FORCE: + return mlx5e_htb_leaf_del_last(priv, htb->classid, + htb->command == TC_HTB_LEAF_DEL_LAST_FORCE, + htb->extack); + case TC_HTB_NODE_MODIFY: + return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil, + htb->extack); + case TC_HTB_LEAF_QUERY_QUEUE: + res = mlx5e_get_txq_by_classid(priv, htb->classid); + if (res < 0) + return res; + htb->qid = res; + return 0; + default: + return -EOPNOTSUPP; + } +} + static LIST_HEAD(mlx5e_block_cb_list); static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + int err; switch (type) { case TC_SETUP_BLOCK: { @@ -3647,6 +3736,11 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, } case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(priv, type_data); + case TC_SETUP_QDISC_HTB: + mutex_lock(&priv->state_lock); + err = mlx5e_setup_tc_htb(priv, type_data); + mutex_unlock(&priv->state_lock); + return err; default: return -EOPNOTSUPP; } @@ -3811,20 +3905,25 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) return 0; } -#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) -static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) +static int set_feature_hw_tc(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; } +#endif + + if (!enable && priv->htb.maj_id) { + netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n"); + return -EINVAL; + } return 0; } -#endif static int set_feature_rx_all(struct net_device *netdev, bool enable) { @@ -3922,9 +4021,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, set_feature_cvlan_filter); -#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) - err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters); -#endif + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc); err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); @@ -5028,6 +5125,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_NTUPLE; #endif } + if (mlx5_qos_is_supported(mdev)) + netdev->features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; @@ -5333,6 +5432,7 @@ int mlx5e_netdev_init(struct net_device *netdev, return -ENOMEM; mutex_init(&priv->state_lock); + hash_init(priv->htb.qos_tc2node); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); @@ -5355,8 +5455,14 @@ int mlx5e_netdev_init(struct net_device *netdev, void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) { + int i; + destroy_workqueue(priv->wq); free_cpumask_var(priv->scratchpad.cpumask); + + for (i = 0; i < priv->htb.max_qos_sqs; i++) + kfree(priv->htb.qos_sq_stats[i]); + kvfree(priv->htb.qos_sq_stats); } struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, @@ -5366,13 +5472,17 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, { struct net_device *netdev; unsigned int ptp_txqs = 0; + int qos_sqs = 0; int err; if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) ptp_txqs = profile->max_tc; + if (mlx5_qos_is_supported(mdev)) + qos_sqs = mlx5e_qos_max_leaf_nodes(mdev); + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), - nch * profile->max_tc + ptp_txqs, + nch * profile->max_tc + ptp_txqs + qos_sqs, nch * profile->rq_groups); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 2cf2042b37c76d766426451d5381a96cd06b5d2d..92c5b81427b971f81817f4149c662fb9e92e71e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -420,6 +420,25 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv, } } +static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, + struct mlx5e_sw_stats *s) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + stats = READ_ONCE(priv->htb.qos_sq_stats); + + for (i = 0; i < max_qos_sqs; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i])); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } +} + static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -449,6 +468,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) } } mlx5e_stats_grp_sw_update_stats_ptp(priv, s); + mlx5e_stats_grp_sw_update_stats_qos(priv, s); } static const struct counter_desc q_stats_desc[] = { @@ -1740,6 +1760,41 @@ static const struct counter_desc ptp_cq_stats_desc[] = { { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, }; +static const struct counter_desc qos_sq_stats_desc[] = { + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, +#endif + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) #define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) @@ -1750,6 +1805,49 @@ static const struct counter_desc ptp_cq_stats_desc[] = { #define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) #define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) #define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) +#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + int i, qid; + + for (qid = 0; qid < max_qos_sqs; qid++) + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + qos_sq_stats_desc[i].format, qid); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i, qid; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + stats = READ_ONCE(priv->htb.qos_sq_stats); + + for (qid = 0; qid < max_qos_sqs; qid++) { + struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]); + + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) { @@ -1932,6 +2030,7 @@ MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); MLX5E_DEFINE_STATS_GRP(eth_ext, 0); static MLX5E_DEFINE_STATS_GRP(tls, 0); static MLX5E_DEFINE_STATS_GRP(ptp, 0); +static MLX5E_DEFINE_STATS_GRP(qos, 0); /* The stats groups order is opposite to the update_stats() order calls */ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { @@ -1955,6 +2054,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { &MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(per_port_buff_congest), &MLX5E_STATS_GRP(ptp), + &MLX5E_STATS_GRP(qos), }; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index e41fc11f2ce78875fd2929da91336066d3b3ea26..93c41312fb037ac36b00e229fadeafbe708bcb99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -55,6 +55,8 @@ #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) + struct counter_desc { char format[ETH_GSTRING_LEN]; size_t offset; /* Byte offset */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 74f233eece5494065f9121b2bc2d75d7345c9329..da6a358a8a10324c0f441a02092a8092f780f57b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -106,28 +106,53 @@ static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb) return priv->port_ptp_tc2realtxq[up]; } +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, + u16 htb_maj_id) +{ + u16 classid; + + if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id) + classid = TC_H_MIN(skb->priority); + else + classid = READ_ONCE(priv->htb.defcls); + + if (!classid) + return 0; + + return mlx5e_get_txq_by_classid(priv, classid); +} + u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { struct mlx5e_priv *priv = netdev_priv(dev); + int num_tc_x_num_ch; int txq_ix; int up = 0; int ch_ix; - if (unlikely(priv->channels.port_ptp)) { - int num_tc_x_num_ch; + /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ + num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); + if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) { + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - mlx5e_use_ptpsq(skb)) - return mlx5e_select_ptpsq(dev, skb); + if (unlikely(htb_maj_id)) { + txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id); + if (txq_ix > 0) + return txq_ix; + } - /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ - num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); + if (unlikely(priv->channels.port_ptp)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb); txq_ix = netdev_pick_tx(dev, skb, NULL); - /* Fix netdev_pick_tx() not to choose ptp_channel txqs. + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. * If they are selected, switch to regular queues. - * Driver to select these queues only at mlx5e_select_ptpsq(). + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). */ if (unlikely(txq_ix >= num_tc_x_num_ch)) txq_ix %= num_tc_x_num_ch; @@ -702,6 +727,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) u16 pi; sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + if (unlikely(!sq)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } /* May send SKBs and WQEs. */ if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index a3cfe06d511691206a76e7cc6c8eb8a31a81c923..d54da3797c30b1909e54c4a37fe08658e259d46e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -115,17 +115,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) napi); struct mlx5e_ch_stats *ch_stats = c->stats; struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_txqsq __rcu **qos_sqs; struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; bool aff_change = false; bool busy_xsk = false; bool busy = false; int work_done = 0; + u16 qos_sqs_size; bool xsk_open; int i; rcu_read_lock(); + qos_sqs = rcu_dereference(c->qos_sqs); + xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); ch_stats->poll++; @@ -133,6 +137,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + if (unlikely(qos_sqs)) { + smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */ + qos_sqs_size = READ_ONCE(c->qos_sqs_size); + + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) + busy |= mlx5e_poll_tx_cq(&sq->cq, budget); + } + } + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); if (c->xdp) @@ -186,6 +202,16 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_handle_tx_dim(&c->sq[i]); mlx5e_cq_arm(&c->sq[i].cq); } + if (unlikely(qos_sqs)) { + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) { + mlx5e_handle_tx_dim(sq); + mlx5e_cq_arm(&sq->cq); + } + } + } mlx5e_handle_rx_dim(rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c new file mode 100644 index 0000000000000000000000000000000000000000..0777be24a3074a7a078b091cb30b93d4d33df9e1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "qos.h" + +#define MLX5_QOS_DEFAULT_DWRR_UID 0 + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, qos)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_bw_share)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_rate_limit)) + return false; + return true; +} + +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group); +} + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + void *attr; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id) +{ + return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id); +} + +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + u32 bitmask = 0; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id, bitmask); +} + +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id) +{ + return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h new file mode 100644 index 0000000000000000000000000000000000000000..125e4e47e6f71f2ecf31a960d5eca467ab8a5b92 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_QOS_H +#define __MLX5_QOS_H + +#include "mlx5_core.h" + +#define MLX5_DEBUG_QOS_MASK BIT(4) + +#define qos_err(mdev, fmt, ...) \ + mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_warn(mdev, fmt, ...) \ + mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_dbg(mdev, fmt, ...) \ + mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__) + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev); +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id); +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share, + u32 max_avg_bw, u32 id); +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id); + +#endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 823411e288c0163391c7284bbcce48e2ead0476c..71ae6aac34109889f40bdb627cbb1bb0861f0dc9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits { u8 reserved_at_4[0x1]; u8 packet_pacing_burst_bound[0x1]; u8 packet_pacing_typical_size[0x1]; - u8 reserved_at_7[0x4]; + u8 reserved_at_7[0x1]; + u8 nic_sq_scheduling[0x1]; + u8 nic_bw_share[0x1]; + u8 nic_rate_limit[0x1]; u8 packet_pacing_uid[0x1]; u8 reserved_at_c[0x14]; - u8 reserved_at_20[0x20]; + u8 reserved_at_20[0xb]; + u8 log_max_qos_nic_queue_group[0x5]; + u8 reserved_at_30[0x10]; u8 packet_pacing_max_rate[0x20]; @@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_e0[0x10]; u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; - u8 reserved_at_110[0x10]; + u8 qos_queue_group_id[0x10]; u8 reserved_at_120[0x40]; @@ -3362,6 +3367,7 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4, }; enum { @@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits { enum { SCHEDULING_HIERARCHY_E_SWITCH = 0x2, + SCHEDULING_HIERARCHY_NIC = 0x3, }; struct mlx5_ifc_query_scheduling_element_in_bits { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef517254367d93eb24763b2e2d8a74150d0bb1b8..9e8572533d8e695b2545b0d88bdb37cb30704d42 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -858,6 +858,7 @@ enum tc_setup_type { TC_SETUP_QDISC_ETS, TC_SETUP_QDISC_TBF, TC_SETUP_QDISC_FIFO, + TC_SETUP_QDISC_HTB, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f2a9c44171c6c63a8cba87f8ed031ea61e93a6f..255e4f4b521f4095f2ab1e60e3b262366b5ff95f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -783,6 +783,42 @@ struct tc_mq_qopt_offload { }; }; +enum tc_htb_command { + /* Root */ + TC_HTB_CREATE, /* Initialize HTB offload. */ + TC_HTB_DESTROY, /* Destroy HTB offload. */ + + /* Classes */ + /* Allocate qid and create leaf. */ + TC_HTB_LEAF_ALLOC_QUEUE, + /* Convert leaf to inner, preserve and return qid, create new leaf. */ + TC_HTB_LEAF_TO_INNER, + /* Delete leaf, while siblings remain. */ + TC_HTB_LEAF_DEL, + /* Delete leaf, convert parent to leaf, preserving qid. */ + TC_HTB_LEAF_DEL_LAST, + /* TC_HTB_LEAF_DEL_LAST, but delete driver data on hardware errors. */ + TC_HTB_LEAF_DEL_LAST_FORCE, + /* Modify parameters of a node. */ + TC_HTB_NODE_MODIFY, + + /* Class qdisc */ + TC_HTB_LEAF_QUERY_QUEUE, /* Query qid by classid. */ +}; + +struct tc_htb_qopt_offload { + struct netlink_ext_ack *extack; + enum tc_htb_command command; + u16 classid; + u32 parent_classid; + u16 qid; + u16 moved_qid; + u64 rate; + u64 ceil; +}; + +#define TC_HTB_CLASSID_ROOT U32_MAX + enum tc_red_command { TC_RED_REPLACE, TC_RED_DESTROY, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e7bee99aebce427aa839d595101f566f298aeef2..070f01bf17eb9f878853b52c625901f19b9f9ac7 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -210,7 +210,8 @@ struct Qdisc_class_ops { int (*change)(struct Qdisc *, u32, u32, struct nlattr **, unsigned long *, struct netlink_ext_ack *); - int (*delete)(struct Qdisc *, unsigned long); + int (*delete)(struct Qdisc *, unsigned long, + struct netlink_ext_ack *); void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ @@ -552,14 +553,20 @@ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) return qdisc->dev_queue->dev; } -static inline void sch_tree_lock(const struct Qdisc *q) +static inline void sch_tree_lock(struct Qdisc *q) { - spin_lock_bh(qdisc_root_sleeping_lock(q)); + if (q->flags & TCQ_F_MQROOT) + spin_lock_bh(qdisc_lock(q)); + else + spin_lock_bh(qdisc_root_sleeping_lock(q)); } -static inline void sch_tree_unlock(const struct Qdisc *q) +static inline void sch_tree_unlock(struct Qdisc *q) { - spin_unlock_bh(qdisc_root_sleeping_lock(q)); + if (q->flags & TCQ_F_MQROOT) + spin_unlock_bh(qdisc_lock(q)); + else + spin_unlock_bh(qdisc_root_sleeping_lock(q)); } extern struct Qdisc noop_qdisc; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 9e7c2c6078456bdc8dad240e844cfd42144bb024..79a699f106b14ef36afe459b955ab136326e36a0 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -434,6 +434,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6fe4e5cc807c90b046a16f014df43bfe841cbc43..e2e4353db8a70d5fe3d0f41d93b446c7b5ff628f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1866,7 +1866,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl) + struct Qdisc *q, unsigned long cl, + struct netlink_ext_ack *extack) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct sk_buff *skb; @@ -1885,7 +1886,7 @@ static int tclass_del_notify(struct net *net, return -EINVAL; } - err = cops->delete(q, cl); + err = cops->delete(q, cl, extack); if (err) { kfree_skb(skb); return err; @@ -2088,7 +2089,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, goto out; break; case RTM_DELTCLASS: - err = tclass_del_notify(net, cops, skb, n, q, cl); + err = tclass_del_notify(net, cops, skb, n, q, cl, extack); /* Unbind the class with flilters with 0 */ tc_bind_tclass(q, portid, clid, 0); goto out; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 007bd2d9f1ff970bfdfce45331b892936987c956..d0c9a57398fc2619aae98c452fd643330e508332 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -320,7 +320,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, return error; } -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) +static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)arg; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 53d45e029c36d46b9e4f0d1005ef98c468fdd085..320b3d31fa97f4a9f6c06205b7aa370081e34356 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1675,7 +1675,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t return err; } -static int cbq_delete(struct Qdisc *sch, unsigned long arg) +static int cbq_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index dde564670ad8ca17128e45401b369144df9231a3..fc1e47069593679927bccdc4e966d96e89de05d3 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -146,7 +146,8 @@ static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) kfree(cl); } -static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +static int drr_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl = (struct drr_class *)arg; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 2b88710994d71e5339f82ef3fe53ecce961ea42f..cd2748e2d4a2057e327e61478618a1ca8b1bf5b7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -150,7 +150,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, return err; } -static int dsmark_delete(struct Qdisc *sch, unsigned long arg) +static int dsmark_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct dsmark_qdisc_data *p = qdisc_priv(sch); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d1902fca98447f0073e98947ae0cc7f969728022..bf0034c66e359674322845b7b09959c047a6d248 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1090,7 +1090,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) } static int -hfsc_delete_class(struct Qdisc *sch, unsigned long arg) +hfsc_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cd70dbcbd72fdddcda75d98d918f70554af60214..dff3adf5a9156c2412c64a10ad1b2ce9e1367433 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -114,6 +114,7 @@ struct htb_class { * Written often fields */ struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_packed bstats_bias; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -174,6 +175,11 @@ struct htb_sched { int row_mask[TC_HTB_MAXDEPTH]; struct htb_level hlevel[TC_HTB_MAXDEPTH]; + + struct Qdisc **direct_qdiscs; + unsigned int num_direct_qdiscs; + + bool offload; }; /* find class in global hash table using given handle */ @@ -957,7 +963,7 @@ static void htb_reset(struct Qdisc *sch) if (cl->level) memset(&cl->inner, 0, sizeof(cl->inner)); else { - if (cl->leaf.q) + if (cl->leaf.q && !q->offload) qdisc_reset(cl->leaf.q); } cl->prio_activity = 0; @@ -980,6 +986,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, [TCA_HTB_RATE64] = { .type = NLA_U64 }, [TCA_HTB_CEIL64] = { .type = NLA_U64 }, + [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG }, }; static void htb_work_func(struct work_struct *work) @@ -992,12 +999,27 @@ static void htb_work_func(struct work_struct *work) rcu_read_unlock(); } +static void htb_set_lockdep_class_child(struct Qdisc *q) +{ + static struct lock_class_key child_key; + + lockdep_set_class(qdisc_lock(q), &child_key); +} + +static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) +{ + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); +} + static int htb_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; + unsigned int ntx; int err; qdisc_watchdog_init(&q->watchdog, sch); @@ -1022,9 +1044,26 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, if (gopt->version != HTB_VER >> 16) return -EINVAL; + q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); + + if (q->offload) { + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + q->num_direct_qdiscs = dev->real_num_tx_queues; + q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, + sizeof(*q->direct_qdiscs), + GFP_KERNEL); + if (!q->direct_qdiscs) + return -ENOMEM; + } + err = qdisc_class_hash_init(&q->clhash); if (err < 0) - return err; + goto err_free_direct_qdiscs; qdisc_skb_head_init(&q->direct_queue); @@ -1037,7 +1076,107 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, q->rate2quantum = 1; q->defcls = gopt->defcls; + if (!q->offload) + return 0; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *qdisc; + + qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 0), extack); + if (!qdisc) { + err = -ENOMEM; + goto err_free_qdiscs; + } + + htb_set_lockdep_class_child(qdisc); + q->direct_qdiscs[ntx] = qdisc; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + } + + sch->flags |= TCQ_F_MQROOT; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_CREATE, + .parent_classid = TC_H_MAJ(sch->handle) >> 16, + .classid = TC_H_MIN(q->defcls), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + goto err_free_qdiscs; + return 0; + +err_free_qdiscs: + /* TC_HTB_CREATE call failed, avoid any further calls to the driver. */ + q->offload = false; + + for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx]; + ntx++) + qdisc_put(q->direct_qdiscs[ntx]); + + qdisc_class_hash_destroy(&q->clhash); + /* Prevent use-after-free and double-free when htb_destroy gets called. + */ + q->clhash.hash = NULL; + q->clhash.hashsize = 0; + +err_free_direct_qdiscs: + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; + return err; +} + +static void htb_attach_offload(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct htb_sched *q = qdisc_priv(sch); + unsigned int ntx; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx]; + + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + qdisc_put(old); + qdisc_hash_add(qdisc, false); + } + for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL); + + qdisc_put(old); + } + + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; +} + +static void htb_attach_software(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx; + + /* Resemble qdisc_graft behavior. */ + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, sch); + + qdisc_refcount_inc(sch); + + qdisc_put(old); + } +} + +static void htb_attach(struct Qdisc *sch) +{ + struct htb_sched *q = qdisc_priv(sch); + + if (q->offload) + htb_attach_offload(sch); + else + htb_attach_software(sch); } static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -1046,6 +1185,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; + if (q->offload) + sch->flags |= TCQ_F_OFFLOADED; + else + sch->flags &= ~TCQ_F_OFFLOADED; + sch->qstats.overlimits = q->overlimits; /* Its safe to not acquire qdisc lock. As we hold RTNL, * no change can happen on the qdisc parameters. @@ -1063,6 +1207,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; return nla_nest_end(skb, nest); @@ -1075,6 +1221,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_opt opt; @@ -1101,6 +1248,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.level = cl->level; if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, TCA_HTB_PAD)) @@ -1117,10 +1266,39 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, return -1; } +static void htb_offload_aggregate_stats(struct htb_sched *q, + struct htb_class *cl) +{ + struct htb_class *c; + unsigned int i; + + memset(&cl->bstats, 0, sizeof(cl->bstats)); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { + struct htb_class *p = c; + + while (p && p->level < cl->level) + p = p->parent; + + if (p != cl) + continue; + + cl->bstats.bytes += c->bstats_bias.bytes; + cl->bstats.packets += c->bstats_bias.packets; + if (c->level == 0) { + cl->bstats.bytes += c->leaf.q->bstats.bytes; + cl->bstats.packets += c->leaf.q->bstats.packets; + } + } + } +} + static int htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); struct gnet_stats_queue qs = { .drops = cl->drops, .overlimits = cl->overlimits, @@ -1135,6 +1313,19 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), INT_MIN, INT_MAX); + if (q->offload) { + if (!cl->level) { + if (cl->leaf.q) + cl->bstats = cl->leaf.q->bstats; + else + memset(&cl->bstats, 0, sizeof(cl->bstats)); + cl->bstats.bytes += cl->bstats_bias.bytes; + cl->bstats.packets += cl->bstats_bias.packets; + } else { + htb_offload_aggregate_stats(q, cl); + } + } + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || @@ -1144,19 +1335,97 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); } +static struct netdev_queue * +htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; + int err; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_QUERY_QUEUE, + .classid = TC_H_MIN(tcm->tcm_parent), + }; + err = htb_offload(dev, &offload_opt); + if (err || offload_opt.qid >= dev->num_tx_queues) + return NULL; + return netdev_get_tx_queue(dev, offload_opt.qid); +} + +static struct Qdisc * +htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) +{ + struct net_device *dev = dev_queue->dev; + struct Qdisc *old_q; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + old_q = dev_graft_qdisc(dev_queue, new_q); + if (new_q) + new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + if (dev->flags & IFF_UP) + dev_activate(dev); + + return old_q; +} + +static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new) +{ + struct netdev_queue *queue_old, *queue_new; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + + queue_old = netdev_get_tx_queue(dev, qid_old); + queue_new = netdev_get_tx_queue(dev, qid_new); + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + qdisc = dev_graft_qdisc(queue_old, NULL); + qdisc->dev_queue = queue_new; + qdisc = dev_graft_qdisc(queue_new, qdisc); + if (dev->flags & IFF_UP) + dev_activate(dev); + + WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); +} + static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { + struct netdev_queue *dev_queue = sch->dev_queue; struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); + struct Qdisc *old_q; if (cl->level) return -EINVAL; - if (new == NULL && - (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack)) == NULL) - return -ENOBUFS; + + if (q->offload) { + dev_queue = new->dev_queue; + WARN_ON(dev_queue != cl->leaf.q->dev_queue); + } + + if (!new) { + new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + cl->common.classid, extack); + if (!new) + return -ENOBUFS; + } + + if (q->offload) { + htb_set_lockdep_class_child(new); + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new); + old_q = htb_graft_helper(dev_queue, new); + } *old = qdisc_replace(sch, new, &cl->leaf.q); + + if (q->offload) { + WARN_ON(old_q != *old); + qdisc_put(old_q); + } + return 0; } @@ -1184,9 +1453,10 @@ static inline int htb_parent_last_child(struct htb_class *cl) return 1; } -static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, +static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, struct Qdisc *new_q) { + struct htb_sched *q = qdisc_priv(sch); struct htb_class *parent = cl->parent; WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity); @@ -1204,6 +1474,76 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->cmode = HTB_CAN_SEND; } +static void htb_parent_to_leaf_offload(struct Qdisc *sch, + struct netdev_queue *dev_queue, + struct Qdisc *new_q) +{ + struct Qdisc *old_q; + + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new_q); + old_q = htb_graft_helper(dev_queue, new_q); + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); +} + +static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, + bool last_child, bool destroying, + struct netlink_ext_ack *extack) +{ + struct tc_htb_qopt_offload offload_opt; + struct Qdisc *q = cl->leaf.q; + struct Qdisc *old = NULL; + int err; + + if (cl->level) + return -EINVAL; + + WARN_ON(!q); + if (!destroying) { + /* On destroy of HTB, two cases are possible: + * 1. q is a normal qdisc, but q->dev_queue has noop qdisc. + * 2. q is a noop qdisc (for nodes that were inner), + * q->dev_queue is noop_netdev_queue. + */ + old = htb_graft_helper(q->dev_queue, NULL); + WARN_ON(!old); + WARN_ON(old != q); + } + + if (cl->parent) { + cl->parent->bstats_bias.bytes += q->bstats.bytes; + cl->parent->bstats_bias.packets += q->bstats.packets; + } + + offload_opt = (struct tc_htb_qopt_offload) { + .command = !last_child ? TC_HTB_LEAF_DEL : + destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : + TC_HTB_LEAF_DEL_LAST, + .classid = cl->common.classid, + .extack = extack, + }; + err = htb_offload(qdisc_dev(sch), &offload_opt); + + if (!err || destroying) + qdisc_put(old); + else + htb_graft_helper(q->dev_queue, old); + + if (last_child) + return err; + + if (!err && offload_opt.moved_qid != 0) { + if (destroying) + q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch), + offload_opt.qid); + else + htb_offload_move_qdisc(sch, offload_opt.moved_qid, + offload_opt.qid); + } + + return err; +} + static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { @@ -1217,8 +1557,11 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct hlist_node *next; + bool nonempty, changed; struct htb_class *cl; unsigned int i; @@ -1237,21 +1580,68 @@ static void htb_destroy(struct Qdisc *sch) cl->block = NULL; } } - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], - common.hnode) - htb_destroy_class(sch, cl); - } + + do { + nonempty = false; + changed = false; + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], + common.hnode) { + bool last_child; + + if (!q->offload) { + htb_destroy_class(sch, cl); + continue; + } + + nonempty = true; + + if (cl->level) + continue; + + changed = true; + + last_child = htb_parent_last_child(cl); + htb_destroy_class_offload(sch, cl, last_child, + true, NULL); + qdisc_class_hash_remove(&q->clhash, + &cl->common); + if (cl->parent) + cl->parent->children--; + if (last_child) + htb_parent_to_leaf(sch, cl, NULL); + htb_destroy_class(sch, cl); + } + } + } while (changed); + WARN_ON(nonempty); + qdisc_class_hash_destroy(&q->clhash); __qdisc_reset_queue(&q->direct_queue); + + if (!q->offload) + return; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_DESTROY, + }; + htb_offload(dev, &offload_opt); + + if (!q->direct_qdiscs) + return; + for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++) + qdisc_put(q->direct_qdiscs[i]); + kfree(q->direct_qdiscs); } -static int htb_delete(struct Qdisc *sch, unsigned long arg) +static int htb_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; struct Qdisc *new_q = NULL; int last_child = 0; + int err; /* TODO: why don't allow to delete subtree ? references ? does * tc subsys guarantee us that in htb_destroy it holds no class @@ -1260,11 +1650,28 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (cl->children || cl->filter_cnt) return -EBUSY; - if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + if (!cl->level && htb_parent_last_child(cl)) + last_child = 1; + + if (q->offload) { + err = htb_destroy_class_offload(sch, cl, last_child, false, + extack); + if (err) + return err; + } + + if (last_child) { + struct netdev_queue *dev_queue; + + dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue; + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); - last_child = 1; + if (q->offload) { + if (new_q) + htb_set_lockdep_class_child(new_q); + htb_parent_to_leaf_offload(sch, dev_queue, new_q); + } } sch_tree_lock(sch); @@ -1285,7 +1692,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) &q->hlevel[cl->level].wait_pq); if (last_child) - htb_parent_to_leaf(q, cl, new_q); + htb_parent_to_leaf(sch, cl, new_q); sch_tree_unlock(sch); @@ -1300,9 +1707,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; + struct tc_htb_qopt_offload offload_opt; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_HTB_MAX + 1]; struct Qdisc *parent_qdisc = NULL; + struct netdev_queue *dev_queue; struct tc_htb_opt *hopt; u64 rate64, ceil64; int warn = 0; @@ -1335,8 +1744,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], NULL)); + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + if (!cl) { /* new class */ - struct Qdisc *new_q; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *new_q, *old_q; int prio; struct { struct nlattr nla; @@ -1379,11 +1792,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); - if (err) { - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } + if (err) + goto err_block_put; } cl->children = 0; @@ -1392,12 +1802,76 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) RB_CLEAR_NODE(&cl->node[prio]); + cl->common.classid = classid; + + /* Make sure nothing interrupts us in between of two + * ndo_setup_tc calls. + */ + ASSERT_RTNL(); + /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) * so that can't be used inside of sch_tree_lock * -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + if (!q->offload) { + dev_queue = sch->dev_queue; + } else if (!(parent && !parent->level)) { + /* Assign a dev_queue to this classid. */ + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_ALLOC_QUEUE, + .classid = cl->common.classid, + .parent_classid = parent ? + TC_H_MIN(parent->common.classid) : + TC_HTB_CLASSID_ROOT, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n", + err); + goto err_kill_estimator; + } + dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); + } else { /* First child. */ + dev_queue = parent->leaf.q->dev_queue; + old_q = htb_graft_helper(dev_queue, NULL); + WARN_ON(old_q != parent->leaf.q); + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_TO_INNER, + .classid = cl->common.classid, + .parent_classid = + TC_H_MIN(parent->common.classid), + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n", + err); + htb_graft_helper(dev_queue, old_q); + goto err_kill_estimator; + } + parent->bstats_bias.bytes += old_q->bstats.bytes; + parent->bstats_bias.packets += old_q->bstats.packets; + qdisc_put(old_q); + } + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, classid, NULL); + if (q->offload) { + if (new_q) { + htb_set_lockdep_class_child(new_q); + /* One ref for cl->leaf.q, the other for + * dev_queue->qdisc. + */ + qdisc_refcount_inc(new_q); + } + old_q = htb_graft_helper(dev_queue, new_q); + /* No qdisc_put needed. */ + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); + } sch_tree_lock(sch); if (parent && !parent->level) { /* turn parent into inner node */ @@ -1415,10 +1889,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, : TC_HTB_MAXDEPTH) - 1; memset(&parent->inner, 0, sizeof(parent->inner)); } + /* leaf (we) needs elementary qdisc */ cl->leaf.q = new_q ? new_q : &noop_qdisc; - cl->common.classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ @@ -1444,12 +1918,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (err) return err; } - sch_tree_lock(sch); - } - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + if (q->offload) { + struct net_device *dev = qdisc_dev(sch); + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_NODE_MODIFY, + .classid = cl->common.classid, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + /* Estimator was replaced, and rollback may fail + * as well, so we don't try to recover it, and + * the estimator won't work property with the + * offload anyway, because bstats are updated + * only when the stats are queried. + */ + return err; + } - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + sch_tree_lock(sch); + } psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); @@ -1492,6 +1984,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, *arg = (unsigned long)cl; return 0; +err_kill_estimator: + gen_kill_estimator(&cl->rate_est); +err_block_put: + tcf_block_put(cl->block); + kfree(cl); failure: return err; } @@ -1557,6 +2054,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) } static const struct Qdisc_class_ops htb_class_ops = { + .select_queue = htb_select_queue, .graft = htb_graft, .leaf = htb_leaf, .qlen_notify = htb_qlen_notify, @@ -1579,6 +2077,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .dequeue = htb_dequeue, .peek = qdisc_peek_dequeued, .init = htb_init, + .attach = htb_attach, .reset = htb_reset, .destroy = htb_destroy, .dump = htb_dump, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6335230a971e21233bb1758b57ae72fff41ce6fc..1db9d4a2ef5efcd6ace03fc22d4ea1c69a2847be 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -529,7 +529,8 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) kfree(cl); } -static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) +static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index da047a37a3bf3e86b3e463cfb9f79bf986720d07..dde829d4b9f8349147a14b3366e1b420df815a9c 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -649,7 +649,8 @@ static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOSYS; } -static int sfb_delete(struct Qdisc *sch, unsigned long cl) +static int sfb_delete(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { return -ENOSYS; } diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h index 0d18b1d1fbbc8a845857461b7baa3522636dbf31..5c903abc9fa52a52477547b8ca0ce52dcab88b62 100644 --- a/tools/include/uapi/linux/pkt_sched.h +++ b/tools/include/uapi/linux/pkt_sched.h @@ -414,6 +414,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, };