Commit cf19e5e2 authored by David S. Miller's avatar David S. Miller

Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue

Jeff Kirsher says:

====================
40GbE Intel Wired LAN Driver Updates 2018-02-12

This series contains updates to i40e and i40evf.

Alan fixes a spelling mistake in code comments.  Fixes an issue on older
firmware versions or NPAR enabled PFs which do not support the
I40E_FLAG_DISABLE_FW_LLDP flag and would get into a situation where any
attempt to change any priv flag would be forbidden.

Alex got busy with the ITR code and made several cleanups and fixes so
that we can more easily understand what is going on.  The fixes included
a computational fix when determining the register offset, as well as a
fix for unnecessarily toggling the CLEARPBA bit which could lead to
potential lost events if auto-masking is not enabled.

Filip adds a necessary delay to recover after a EMP reset when using
firmware version 4.33.

Paweł adds a warning message for MFP devices when the link-down-on-close
flag is set because it may affect other partitions.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9b2c45d4 a0073a4b
...@@ -824,6 +824,7 @@ struct i40e_q_vector { ...@@ -824,6 +824,7 @@ struct i40e_q_vector {
struct i40e_ring_container rx; struct i40e_ring_container rx;
struct i40e_ring_container tx; struct i40e_ring_container tx;
u8 itr_countdown; /* when 0 should adjust adaptive ITR */
u8 num_ringpairs; /* total number of ring pairs in vector */ u8 num_ringpairs; /* total number of ring pairs in vector */
cpumask_t affinity_mask; cpumask_t affinity_mask;
...@@ -832,8 +833,6 @@ struct i40e_q_vector { ...@@ -832,8 +833,6 @@ struct i40e_q_vector {
struct rcu_head rcu; /* to avoid race with update stats on free */ struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN]; char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state; bool arm_wb_state;
#define ITR_COUNTDOWN_START 100
u8 itr_countdown; /* when 0 should adjust ITR */
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
/* lan device */ /* lan device */
......
...@@ -315,9 +315,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) ...@@ -315,9 +315,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
i, rx_ring->vsi, i, rx_ring->vsi,
rx_ring->q_vector); rx_ring->q_vector);
dev_info(&pf->pdev->dev, dev_info(&pf->pdev->dev,
" rx_rings[%i]: rx_itr_setting = %d (%s)\n", " rx_rings[%i]: itr_setting = %d (%s)\n",
i, rx_ring->rx_itr_setting, i, rx_ring->itr_setting,
ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed");
} }
for (i = 0; i < vsi->num_queue_pairs; i++) { for (i = 0; i < vsi->num_queue_pairs; i++) {
struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]); struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
...@@ -366,9 +366,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) ...@@ -366,9 +366,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
" tx_rings[%i]: DCB tc = %d\n", " tx_rings[%i]: DCB tc = %d\n",
i, tx_ring->dcb_tc); i, tx_ring->dcb_tc);
dev_info(&pf->pdev->dev, dev_info(&pf->pdev->dev,
" tx_rings[%i]: tx_itr_setting = %d (%s)\n", " tx_rings[%i]: itr_setting = %d (%s)\n",
i, tx_ring->tx_itr_setting, i, tx_ring->itr_setting,
ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed");
} }
rcu_read_unlock(); rcu_read_unlock();
dev_info(&pf->pdev->dev, dev_info(&pf->pdev->dev,
......
...@@ -2244,14 +2244,14 @@ static int __i40e_get_coalesce(struct net_device *netdev, ...@@ -2244,14 +2244,14 @@ static int __i40e_get_coalesce(struct net_device *netdev,
rx_ring = vsi->rx_rings[queue]; rx_ring = vsi->rx_rings[queue];
tx_ring = vsi->tx_rings[queue]; tx_ring = vsi->tx_rings[queue];
if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
ec->use_adaptive_rx_coalesce = 1; ec->use_adaptive_rx_coalesce = 1;
if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
ec->use_adaptive_tx_coalesce = 1; ec->use_adaptive_tx_coalesce = 1;
ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
/* we use the _usecs_high to store/set the interrupt rate limit /* we use the _usecs_high to store/set the interrupt rate limit
* that the hardware supports, that almost but not quite * that the hardware supports, that almost but not quite
...@@ -2311,34 +2311,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, ...@@ -2311,34 +2311,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
struct i40e_pf *pf = vsi->back; struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw; struct i40e_hw *hw = &pf->hw;
struct i40e_q_vector *q_vector; struct i40e_q_vector *q_vector;
u16 vector, intrl; u16 intrl;
intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit); intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
if (ec->use_adaptive_rx_coalesce) if (ec->use_adaptive_rx_coalesce)
rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
else else
rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC; rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
if (ec->use_adaptive_tx_coalesce) if (ec->use_adaptive_tx_coalesce)
tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
else else
tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC; tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
q_vector = rx_ring->q_vector; q_vector = rx_ring->q_vector;
q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
q_vector = tx_ring->q_vector; q_vector = tx_ring->q_vector;
q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); /* The interrupt handler itself will take care of programming
* the Tx and Rx ITR values based on the values we have entered
* into the q_vector, no need to write the values now.
*/
wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
i40e_flush(hw); i40e_flush(hw);
} }
...@@ -2364,11 +2365,11 @@ static int __i40e_set_coalesce(struct net_device *netdev, ...@@ -2364,11 +2365,11 @@ static int __i40e_set_coalesce(struct net_device *netdev,
vsi->work_limit = ec->tx_max_coalesced_frames_irq; vsi->work_limit = ec->tx_max_coalesced_frames_irq;
if (queue < 0) { if (queue < 0) {
cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting; cur_rx_itr = vsi->rx_rings[0]->itr_setting;
cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting; cur_tx_itr = vsi->tx_rings[0]->itr_setting;
} else if (queue < vsi->num_queue_pairs) { } else if (queue < vsi->num_queue_pairs) {
cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting; cur_rx_itr = vsi->rx_rings[queue]->itr_setting;
cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting; cur_tx_itr = vsi->tx_rings[queue]->itr_setting;
} else { } else {
netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
vsi->num_queue_pairs - 1); vsi->num_queue_pairs - 1);
...@@ -2396,7 +2397,7 @@ static int __i40e_set_coalesce(struct net_device *netdev, ...@@ -2396,7 +2397,7 @@ static int __i40e_set_coalesce(struct net_device *netdev,
return -EINVAL; return -EINVAL;
} }
if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) { if (ec->rx_coalesce_usecs > I40E_MAX_ITR) {
netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
return -EINVAL; return -EINVAL;
} }
...@@ -2407,16 +2408,16 @@ static int __i40e_set_coalesce(struct net_device *netdev, ...@@ -2407,16 +2408,16 @@ static int __i40e_set_coalesce(struct net_device *netdev,
return -EINVAL; return -EINVAL;
} }
if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) { if (ec->tx_coalesce_usecs > I40E_MAX_ITR) {
netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
return -EINVAL; return -EINVAL;
} }
if (ec->use_adaptive_rx_coalesce && !cur_rx_itr) if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
ec->rx_coalesce_usecs = I40E_MIN_ITR << 1; ec->rx_coalesce_usecs = I40E_MIN_ITR;
if (ec->use_adaptive_tx_coalesce && !cur_tx_itr) if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
ec->tx_coalesce_usecs = I40E_MIN_ITR << 1; ec->tx_coalesce_usecs = I40E_MIN_ITR;
intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high); intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg); vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
...@@ -4406,6 +4407,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) ...@@ -4406,6 +4407,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
} }
flags_complete: flags_complete:
changed_flags = orig_flags ^ new_flags;
/* Before we finalize any flag changes, we need to perform some /* Before we finalize any flag changes, we need to perform some
* checks to ensure that the changes are supported and safe. * checks to ensure that the changes are supported and safe.
*/ */
...@@ -4415,13 +4418,17 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) ...@@ -4415,13 +4418,17 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
!(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)) !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* Disable FW LLDP not supported if NPAR active or if FW /* If the driver detected FW LLDP was disabled on init, this flag could
* API version < 1.7 * be set, however we do not support _changing_ the flag if NPAR is
* enabled or FW API version < 1.7. There are situations where older
* FW versions/NPAR enabled PFs could disable LLDP, however we _must_
* not allow the user to enable/disable LLDP with this flag on
* unsupported FW versions.
*/ */
if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
if (pf->hw.func_caps.npar_enable) { if (pf->hw.func_caps.npar_enable) {
dev_warn(&pf->pdev->dev, dev_warn(&pf->pdev->dev,
"Unable to stop FW LLDP if NPAR active\n"); "Unable to change FW LLDP if NPAR active\n");
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -4429,7 +4436,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) ...@@ -4429,7 +4436,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
(pf->hw.aq.api_maj_ver == 1 && (pf->hw.aq.api_maj_ver == 1 &&
pf->hw.aq.api_min_ver < 7)) { pf->hw.aq.api_min_ver < 7)) {
dev_warn(&pf->pdev->dev, dev_warn(&pf->pdev->dev,
"FW ver does not support stopping FW LLDP\n"); "FW ver does not support changing FW LLDP\n");
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
} }
...@@ -4439,6 +4446,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) ...@@ -4439,6 +4446,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
* something else has modified the flags variable since we copied it * something else has modified the flags variable since we copied it
* originally. We'll just punt with an error and log something in the * originally. We'll just punt with an error and log something in the
* message buffer. * message buffer.
*
* This is the point of no return for this function. We need to have
* checked any discrepancies or misconfigurations and returned
* EOPNOTSUPP before updating pf->flags here.
*/ */
if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) { if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
dev_warn(&pf->pdev->dev, dev_warn(&pf->pdev->dev,
...@@ -4446,8 +4457,6 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) ...@@ -4446,8 +4457,6 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
return -EAGAIN; return -EAGAIN;
} }
changed_flags = orig_flags ^ new_flags;
/* Process any additional changes needed as a result of flag changes. /* Process any additional changes needed as a result of flag changes.
* The changed_flags value reflects the list of bits that were * The changed_flags value reflects the list of bits that were
* changed in the code above. * changed in the code above.
...@@ -4479,6 +4488,12 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) ...@@ -4479,6 +4488,12 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
} }
} }
if ((changed_flags & pf->flags &
I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
(pf->flags & I40E_FLAG_MFP_ENABLED))
dev_warn(&pf->pdev->dev,
"Turning on link-down-on-close flag may affect other partitions\n");
if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
struct i40e_dcbx_config *dcbcfg; struct i40e_dcbx_config *dcbcfg;
......
...@@ -3449,15 +3449,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) ...@@ -3449,15 +3449,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_q_vectors; i++, vector++) { for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
struct i40e_q_vector *q_vector = vsi->q_vectors[i]; struct i40e_q_vector *q_vector = vsi->q_vectors[i];
q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->rx.next_update = jiffies + 1;
q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); q_vector->rx.target_itr =
q_vector->rx.latency_range = I40E_LOW_LATENCY; ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
q_vector->rx.itr); q_vector->rx.target_itr);
q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->tx.latency_range = I40E_LOW_LATENCY;
q_vector->tx.next_update = jiffies + 1;
q_vector->tx.target_itr =
ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
q_vector->tx.itr); q_vector->tx.target_itr);
q_vector->tx.current_itr = q_vector->tx.target_itr;
wr32(hw, I40E_PFINT_RATEN(vector - 1), wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit)); i40e_intrl_usec_to_reg(vsi->int_rate_limit));
...@@ -3558,13 +3563,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) ...@@ -3558,13 +3563,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
u32 val; u32 val;
/* set the ITR configuration */ /* set the ITR configuration */
q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->rx.next_update = jiffies + 1;
q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); q_vector->tx.next_update = jiffies + 1;
q_vector->tx.latency_range = I40E_LOW_LATENCY; q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
q_vector->tx.current_itr = q_vector->tx.target_itr;
i40e_enable_misc_int_causes(pf); i40e_enable_misc_int_causes(pf);
...@@ -9215,6 +9221,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) ...@@ -9215,6 +9221,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
} }
i40e_get_oem_version(&pf->hw); i40e_get_oem_version(&pf->hw);
if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
/* The following delay is necessary for 4.33 firmware and older
* to recover after EMP reset. 200 ms should suffice but we
* put here 300 ms to be sure that FW is ready to operate
* after reset.
*/
mdelay(300);
}
/* re-verify the eeprom if we just had an EMP reset */ /* re-verify the eeprom if we just had an EMP reset */
if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
i40e_verify_eeprom(pf); i40e_verify_eeprom(pf);
...@@ -10018,7 +10035,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ...@@ -10018,7 +10035,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
ring->dcb_tc = 0; ring->dcb_tc = 0;
if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
ring->tx_itr_setting = pf->tx_itr_default; ring->itr_setting = pf->tx_itr_default;
vsi->tx_rings[i] = ring++; vsi->tx_rings[i] = ring++;
if (!i40e_enabled_xdp_vsi(vsi)) if (!i40e_enabled_xdp_vsi(vsi))
...@@ -10036,7 +10053,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ...@@ -10036,7 +10053,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
set_ring_xdp(ring); set_ring_xdp(ring);
ring->tx_itr_setting = pf->tx_itr_default; ring->itr_setting = pf->tx_itr_default;
vsi->xdp_rings[i] = ring++; vsi->xdp_rings[i] = ring++;
setup_rx: setup_rx:
...@@ -10049,7 +10066,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ...@@ -10049,7 +10066,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
ring->count = vsi->num_desc; ring->count = vsi->num_desc;
ring->size = 0; ring->size = 0;
ring->dcb_tc = 0; ring->dcb_tc = 0;
ring->rx_itr_setting = pf->rx_itr_default; ring->itr_setting = pf->rx_itr_default;
vsi->rx_rings[i] = ring; vsi->rx_rings[i] = ring;
} }
...@@ -10328,9 +10345,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) ...@@ -10328,9 +10345,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
netif_napi_add(vsi->netdev, &q_vector->napi, netif_napi_add(vsi->netdev, &q_vector->napi,
i40e_napi_poll, NAPI_POLL_WEIGHT); i40e_napi_poll, NAPI_POLL_WEIGHT);
q_vector->rx.latency_range = I40E_LOW_LATENCY;
q_vector->tx.latency_range = I40E_LOW_LATENCY;
/* tie q_vector and vsi together */ /* tie q_vector and vsi together */
vsi->q_vectors[v_idx] = q_vector; vsi->q_vectors[v_idx] = q_vector;
......
...@@ -995,99 +995,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) ...@@ -995,99 +995,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
} }
} }
static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
struct i40e_ring_container *rc)
{
return &q_vector->rx == rc;
}
static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
{
unsigned int divisor;
switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
case I40E_LINK_SPEED_40GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
break;
case I40E_LINK_SPEED_25GB:
case I40E_LINK_SPEED_20GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
break;
default:
case I40E_LINK_SPEED_10GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
break;
case I40E_LINK_SPEED_1GB:
case I40E_LINK_SPEED_100MB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
break;
}
return divisor;
}
/** /**
* i40e_set_new_dynamic_itr - Find new ITR level * i40e_update_itr - update the dynamic ITR value based on statistics
* @q_vector: structure containing interrupt and ring information
* @rc: structure containing ring performance data * @rc: structure containing ring performance data
* *
* Returns true if ITR changed, false if not * Stores a new ITR value based on packets and byte
* * counts during the last interrupt. The advantage of per interrupt
* Stores a new ITR value based on packets and byte counts during * computation is faster updates and more accurate ITR for the current
* the last interrupt. The advantage of per interrupt computation * traffic pattern. Constants in this function were computed
* is faster updates and more accurate ITR for the current traffic * based on theoretical maximum wire speed and thresholds were set based
* pattern. Constants in this function were computed based on * on testing data as well as attempting to minimize response time
* theoretical maximum wire speed and thresholds were set based on
* testing data as well as attempting to minimize response time
* while increasing bulk throughput. * while increasing bulk throughput.
**/ **/
static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) static void i40e_update_itr(struct i40e_q_vector *q_vector,
struct i40e_ring_container *rc)
{ {
enum i40e_latency_range new_latency_range = rc->latency_range; unsigned int avg_wire_size, packets, bytes, itr;
u32 new_itr = rc->itr; unsigned long next_update = jiffies;
int bytes_per_usec;
unsigned int usecs, estimated_usecs;
if (rc->total_packets == 0 || !rc->itr) /* If we don't have any rings just leave ourselves set for maximum
return false; * possible latency so we take ourselves out of the equation.
*/
if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
return;
usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; /* For Rx we want to push the delay up and default to low latency.
bytes_per_usec = rc->total_bytes / usecs; * for Tx we want to pull the delay down and default to high latency.
*/
itr = i40e_container_is_rx(q_vector, rc) ?
I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
/* If we didn't update within up to 1 - 2 jiffies we can assume
* that either packets are coming in so slow there hasn't been
* any work, or that there is so much work that NAPI is dealing
* with interrupt moderation and we don't need to do anything.
*/
if (time_after(next_update, rc->next_update))
goto clear_counts;
/* If itr_countdown is set it means we programmed an ITR within
* the last 4 interrupt cycles. This has a side effect of us
* potentially firing an early interrupt. In order to work around
* this we need to throw out any data received for a few
* interrupts following the update.
*/
if (q_vector->itr_countdown) {
itr = rc->target_itr;
goto clear_counts;
}
packets = rc->total_packets;
bytes = rc->total_bytes;
/* The calculations in this algorithm depend on interrupts actually if (i40e_container_is_rx(q_vector, rc)) {
* firing at the ITR rate. This may not happen if the packet rate is /* If Rx there are 1 to 4 packets and bytes are less than
* really low, or if we've been napi polling. Check to make sure * 9000 assume insufficient data to use bulk rate limiting
* that's not the case before we continue. * approach unless Tx is already in bulk rate limiting. We
* are likely latency driven.
*/
if (packets && packets < 4 && bytes < 9000 &&
(q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
itr = I40E_ITR_ADAPTIVE_LATENCY;
goto adjust_by_size;
}
} else if (packets < 4) {
/* If we have Tx and Rx ITR maxed and Tx ITR is running in
* bulk mode and we are receiving 4 or fewer packets just
* reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
* that the Rx can relax.
*/
if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
(q_vector->rx.target_itr & I40E_ITR_MASK) ==
I40E_ITR_ADAPTIVE_MAX_USECS)
goto clear_counts;
} else if (packets > 32) {
/* If we have processed over 32 packets in a single interrupt
* for Tx assume we need to switch over to "bulk" mode.
*/
rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
}
/* We have no packets to actually measure against. This means
* either one of the other queues on this vector is active or
* we are a Tx queue doing TSO with too high of an interrupt rate.
*
* Between 4 and 56 we can assume that our current interrupt delay
* is only slightly too low. As such we should increase it by a small
* fixed amount.
*/ */
estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); if (packets < 56) {
if (estimated_usecs > usecs) { itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
new_latency_range = I40E_LOW_LATENCY; if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
goto reset_latency; itr &= I40E_ITR_ADAPTIVE_LATENCY;
itr += I40E_ITR_ADAPTIVE_MAX_USECS;
}
goto clear_counts;
} }
/* simple throttlerate management if (packets <= 256) {
* 0-10MB/s lowest (50000 ints/s) itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
* 10-20MB/s low (20000 ints/s) itr &= I40E_ITR_MASK;
* 20-1249MB/s bulk (18000 ints/s)
/* Between 56 and 112 is our "goldilocks" zone where we are
* working out "just right". Just report that our current
* ITR is good for us.
*/
if (packets <= 112)
goto clear_counts;
/* If packet count is 128 or greater we are likely looking
* at a slight overrun of the delay we want. Try halving
* our delay to see if that will cut the number of packets
* in half per interrupt.
*/
itr /= 2;
itr &= I40E_ITR_MASK;
if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
itr = I40E_ITR_ADAPTIVE_MIN_USECS;
goto clear_counts;
}
/* The paths below assume we are dealing with a bulk ITR since
* number of packets is greater than 256. We are just going to have
* to compute a value and try to bring the count under control,
* though for smaller packet sizes there isn't much we can do as
* NAPI polling will likely be kicking in sooner rather than later.
*/
itr = I40E_ITR_ADAPTIVE_BULK;
adjust_by_size:
/* If packet counts are 256 or greater we can assume we have a gross
* overestimation of what the rate should be. Instead of trying to fine
* tune it just use the formula below to try and dial in an exact value
* give the current packet size of the frame.
*/
avg_wire_size = bytes / packets;
/* The following is a crude approximation of:
* wmem_default / (size + overhead) = desired_pkts_per_int
* rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
* (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
* *
* The math works out because the divisor is in 10^(-6) which * Assuming wmem_default is 212992 and overhead is 640 bytes per
* turns the bytes/us input value into MB/s values, but * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
* make sure to use usecs, as the register values written * formula down to
* are in 2 usec increments in the ITR registers, and make sure *
* to use the smoothed values that the countdown timer gives us. * (170 * (size + 24)) / (size + 640) = ITR
*
* We first do some math on the packet size and then finally bitshift
* by 8 after rounding up. We also have to account for PCIe link speed
* difference as ITR scales based on this.
*/ */
switch (new_latency_range) { if (avg_wire_size <= 60) {
case I40E_LOWEST_LATENCY: /* Start at 250k ints/sec */
if (bytes_per_usec > 10) avg_wire_size = 4096;
new_latency_range = I40E_LOW_LATENCY; } else if (avg_wire_size <= 380) {
break; /* 250K ints/sec to 60K ints/sec */
case I40E_LOW_LATENCY: avg_wire_size *= 40;
if (bytes_per_usec > 20) avg_wire_size += 1696;
new_latency_range = I40E_BULK_LATENCY; } else if (avg_wire_size <= 1084) {
else if (bytes_per_usec <= 10) /* 60K ints/sec to 36K ints/sec */
new_latency_range = I40E_LOWEST_LATENCY; avg_wire_size *= 15;
break; avg_wire_size += 11452;
case I40E_BULK_LATENCY: } else if (avg_wire_size <= 1980) {
default: /* 36K ints/sec to 30K ints/sec */
if (bytes_per_usec <= 20) avg_wire_size *= 5;
new_latency_range = I40E_LOW_LATENCY; avg_wire_size += 22420;
break; } else {
/* plateau at a limit of 30K ints/sec */
avg_wire_size = 32256;
} }
reset_latency: /* If we are in low latency mode halve our delay which doubles the
rc->latency_range = new_latency_range; * rate to somewhere between 100K to 16K ints/sec
*/
if (itr & I40E_ITR_ADAPTIVE_LATENCY)
avg_wire_size /= 2;
switch (new_latency_range) { /* Resultant value is 256 times larger than it needs to be. This
case I40E_LOWEST_LATENCY: * gives us room to adjust the value as needed to either increase
new_itr = I40E_ITR_50K; * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
break; *
case I40E_LOW_LATENCY: * Use addition as we have already recorded the new latency flag
new_itr = I40E_ITR_20K; * for the ITR value.
break; */
case I40E_BULK_LATENCY: itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
new_itr = I40E_ITR_18K; I40E_ITR_ADAPTIVE_MIN_INC;
break;
default: if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
break; itr &= I40E_ITR_ADAPTIVE_LATENCY;
itr += I40E_ITR_ADAPTIVE_MAX_USECS;
} }
clear_counts:
/* write back value */
rc->target_itr = itr;
/* next update should occur within next jiffy */
rc->next_update = next_update + 1;
rc->total_bytes = 0; rc->total_bytes = 0;
rc->total_packets = 0; rc->total_packets = 0;
rc->last_itr_update = jiffies;
if (new_itr != rc->itr) {
rc->itr = new_itr;
return true;
}
return false;
} }
/** /**
...@@ -1991,7 +2133,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, ...@@ -1991,7 +2133,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* @rx_buffer: rx buffer to pull data from * @rx_buffer: rx buffer to pull data from
* *
* This function will clean up the contents of the rx_buffer. It will * This function will clean up the contents of the rx_buffer. It will
* either recycle the bufer or unmap it and free the associated resources. * either recycle the buffer or unmap it and free the associated resources.
*/ */
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer) struct i40e_rx_buffer *rx_buffer)
...@@ -2274,29 +2416,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) ...@@ -2274,29 +2416,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
return failure ? budget : (int)total_rx_packets; return failure ? budget : (int)total_rx_packets;
} }
static u32 i40e_buildreg_itr(const int type, const u16 itr) static inline u32 i40e_buildreg_itr(const int type, u16 itr)
{ {
u32 val; u32 val;
/* We don't bother with setting the CLEARPBA bit as the data sheet
* points out doing so is "meaningless since it was already
* auto-cleared". The auto-clearing happens when the interrupt is
* asserted.
*
* Hardware errata 28 for also indicates that writing to a
* xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
* an event in the PBA anyway so we need to rely on the automask
* to hold pending events for us until the interrupt is re-enabled
*
* The itr value is reported in microseconds, and the register
* value is recorded in 2 microsecond units. For this reason we
* only need to shift by the interval shift - 1 instead of the
* full value.
*/
itr &= I40E_ITR_MASK;
val = I40E_PFINT_DYN_CTLN_INTENA_MASK | val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
(type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
(itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
return val; return val;
} }
/* a small macro to shorten up some long lines */ /* a small macro to shorten up some long lines */
#define INTREG I40E_PFINT_DYN_CTLN #define INTREG I40E_PFINT_DYN_CTLN
static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
{
return vsi->rx_rings[idx]->rx_itr_setting;
}
static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) /* The act of updating the ITR will cause it to immediately trigger. In order
{ * to prevent this from throwing off adaptive update statistics we defer the
return vsi->tx_rings[idx]->tx_itr_setting; * update so that it can only happen so often. So after either Tx or Rx are
} * updated we make the adaptive scheme wait until either the ITR completely
* expires via the next_update expiration or we have been through at least
* 3 interrupts.
*/
#define ITR_COUNTDOWN_START 3
/** /**
* i40e_update_enable_itr - Update itr and re-enable MSIX interrupt * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
...@@ -2308,10 +2466,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, ...@@ -2308,10 +2466,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector) struct i40e_q_vector *q_vector)
{ {
struct i40e_hw *hw = &vsi->back->hw; struct i40e_hw *hw = &vsi->back->hw;
bool rx = false, tx = false; u32 intval;
u32 rxval, txval;
int idx = q_vector->v_idx;
int rx_itr_setting, tx_itr_setting;
/* If we don't have MSIX, then we only need to re-enable icr0 */ /* If we don't have MSIX, then we only need to re-enable icr0 */
if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
...@@ -2319,65 +2474,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, ...@@ -2319,65 +2474,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
return; return;
} }
/* avoid dynamic calculation if in countdown mode OR if /* These will do nothing if dynamic updates are not enabled */
* all dynamic is disabled i40e_update_itr(q_vector, &q_vector->tx);
*/ i40e_update_itr(q_vector, &q_vector->rx);
rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
rx_itr_setting = get_rx_itr(vsi, idx);
tx_itr_setting = get_tx_itr(vsi, idx);
if (q_vector->itr_countdown > 0 ||
(!ITR_IS_DYNAMIC(rx_itr_setting) &&
!ITR_IS_DYNAMIC(tx_itr_setting))) {
goto enable_int;
}
if (ITR_IS_DYNAMIC(rx_itr_setting)) {
rx = i40e_set_new_dynamic_itr(&q_vector->rx);
rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
}
if (ITR_IS_DYNAMIC(tx_itr_setting)) {
tx = i40e_set_new_dynamic_itr(&q_vector->tx);
txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
}
if (rx || tx) { /* This block of logic allows us to get away with only updating
/* get the higher of the two ITR adjustments and * one ITR value with each interrupt. The idea is to perform a
* use the same value for both ITR registers * pseudo-lazy update with the following criteria.
* when in adaptive mode (Rx and/or Tx) *
*/ * 1. Rx is given higher priority than Tx if both are in same state
u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); * 2. If we must reduce an ITR that is given highest priority.
* 3. We then give priority to increasing ITR based on amount.
q_vector->tx.itr = q_vector->rx.itr = itr;
txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
tx = true;
rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
rx = true;
}
/* only need to enable the interrupt once, but need
* to possibly update both ITR values
*/ */
if (rx) { if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
/* set the INTENA_MSK_MASK so that this first write /* Rx ITR needs to be reduced, this is highest priority */
* won't actually enable the interrupt, instead just intval = i40e_buildreg_itr(I40E_RX_ITR,
* updating the ITR (it's bit 31 PF and VF) q_vector->rx.target_itr);
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
((q_vector->rx.target_itr - q_vector->rx.current_itr) <
(q_vector->tx.target_itr - q_vector->tx.current_itr))) {
/* Tx ITR needs to be reduced, this is second priority
* Tx ITR needs to be increased more than Rx, fourth priority
*/ */
rxval |= BIT(31); intval = i40e_buildreg_itr(I40E_TX_ITR,
/* don't check _DOWN because interrupt isn't being enabled */ q_vector->tx.target_itr);
wr32(hw, INTREG(q_vector->reg_idx), rxval); q_vector->tx.current_itr = q_vector->tx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
/* Rx ITR needs to be increased, third priority */
intval = i40e_buildreg_itr(I40E_RX_ITR,
q_vector->rx.target_itr);
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else {
/* No ITR update, lowest priority */
intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
} }
enable_int:
if (!test_bit(__I40E_VSI_DOWN, vsi->state)) if (!test_bit(__I40E_VSI_DOWN, vsi->state))
wr32(hw, INTREG(q_vector->reg_idx), txval); wr32(hw, INTREG(q_vector->reg_idx), intval);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
else
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} }
/** /**
......
...@@ -30,32 +30,37 @@ ...@@ -30,32 +30,37 @@
#include <net/xdp.h> #include <net/xdp.h>
/* Interrupt Throttling and Rate Limiting Goodies */ /* Interrupt Throttling and Rate Limiting Goodies */
#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */
#define I40E_ITR_100K 0x0005
#define I40E_ITR_50K 0x000A
#define I40E_ITR_20K 0x0019
#define I40E_ITR_18K 0x001B
#define I40E_ITR_8K 0x003E
#define I40E_ITR_4K 0x007A
#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
I40E_ITR_DYNAMIC)
#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
I40E_ITR_DYNAMIC)
#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
#define I40E_DEFAULT_IRQ_WORK 256 #define I40E_DEFAULT_IRQ_WORK 256
#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) /* The datasheet for the X710 and XL710 indicate that the maximum value for
#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
* resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
* the register value which is divided by 2 lets use the actual values and
* avoid an excessive amount of translation.
*/
#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */
#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */
#define I40E_ITR_100K 10 /* all values below must be even */
#define I40E_ITR_50K 20
#define I40E_ITR_20K 50
#define I40E_ITR_18K 60
#define I40E_ITR_8K 122
#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */
#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
/* 0x40 is the enable bit for interrupt rate limiting, and must be set if /* 0x40 is the enable bit for interrupt rate limiting, and must be set if
* the value of the rate limit is non-zero * the value of the rate limit is non-zero
*/ */
#define INTRL_ENA BIT(6) #define INTRL_ENA BIT(6)
#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
/** /**
* i40e_intrl_usec_to_reg - convert interrupt rate limit to register * i40e_intrl_usec_to_reg - convert interrupt rate limit to register
* @intrl: interrupt rate limit to convert * @intrl: interrupt rate limit to convert
...@@ -382,8 +387,7 @@ struct i40e_ring { ...@@ -382,8 +387,7 @@ struct i40e_ring {
* these values always store the USER setting, and must be converted * these values always store the USER setting, and must be converted
* before programming to a register. * before programming to a register.
*/ */
u16 rx_itr_setting; u16 itr_setting;
u16 tx_itr_setting;
u16 count; /* Number of descriptors */ u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */ u16 reg_idx; /* HW register index of the ring */
...@@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring) ...@@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring)
ring->flags |= I40E_TXR_FLAGS_XDP; ring->flags |= I40E_TXR_FLAGS_XDP;
} }
enum i40e_latency_range { #define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
I40E_LOWEST_LATENCY = 0, #define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
I40E_LOW_LATENCY = 1, #define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
I40E_BULK_LATENCY = 2, #define I40E_ITR_ADAPTIVE_LATENCY 0x8000
}; #define I40E_ITR_ADAPTIVE_BULK 0x0000
#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
struct i40e_ring_container { struct i40e_ring_container {
/* array of pointers to rings */ struct i40e_ring *ring; /* pointer to linked list of ring(s) */
struct i40e_ring *ring; unsigned long next_update; /* jiffies value of next update */
unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */ unsigned int total_packets; /* total packets processed this int */
unsigned long last_itr_update; /* jiffies of last ITR update */
u16 count; u16 count;
enum i40e_latency_range latency_range; u16 target_itr; /* target ITR setting for ring(s) */
u16 itr; u16 current_itr; /* current ITR setting for ring(s) */
}; };
/* iterator for handling rings in ring container */ /* iterator for handling rings in ring container */
......
...@@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) ...@@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
val); val);
} }
static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
struct i40e_ring_container *rc)
{
return &q_vector->rx == rc;
}
static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
{
unsigned int divisor;
switch (q_vector->adapter->link_speed) {
case I40E_LINK_SPEED_40GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
break;
case I40E_LINK_SPEED_25GB:
case I40E_LINK_SPEED_20GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
break;
default:
case I40E_LINK_SPEED_10GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
break;
case I40E_LINK_SPEED_1GB:
case I40E_LINK_SPEED_100MB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
break;
}
return divisor;
}
/** /**
* i40e_set_new_dynamic_itr - Find new ITR level * i40e_update_itr - update the dynamic ITR value based on statistics
* @q_vector: structure containing interrupt and ring information
* @rc: structure containing ring performance data * @rc: structure containing ring performance data
* *
* Returns true if ITR changed, false if not * Stores a new ITR value based on packets and byte
* * counts during the last interrupt. The advantage of per interrupt
* Stores a new ITR value based on packets and byte counts during * computation is faster updates and more accurate ITR for the current
* the last interrupt. The advantage of per interrupt computation * traffic pattern. Constants in this function were computed
* is faster updates and more accurate ITR for the current traffic * based on theoretical maximum wire speed and thresholds were set based
* pattern. Constants in this function were computed based on * on testing data as well as attempting to minimize response time
* theoretical maximum wire speed and thresholds were set based on
* testing data as well as attempting to minimize response time
* while increasing bulk throughput. * while increasing bulk throughput.
**/ **/
static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) static void i40e_update_itr(struct i40e_q_vector *q_vector,
struct i40e_ring_container *rc)
{ {
enum i40e_latency_range new_latency_range = rc->latency_range; unsigned int avg_wire_size, packets, bytes, itr;
u32 new_itr = rc->itr; unsigned long next_update = jiffies;
int bytes_per_usec;
unsigned int usecs, estimated_usecs;
if (rc->total_packets == 0 || !rc->itr) /* If we don't have any rings just leave ourselves set for maximum
return false; * possible latency so we take ourselves out of the equation.
*/
if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
return;
/* For Rx we want to push the delay up and default to low latency.
* for Tx we want to pull the delay down and default to high latency.
*/
itr = i40e_container_is_rx(q_vector, rc) ?
I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
/* If we didn't update within up to 1 - 2 jiffies we can assume
* that either packets are coming in so slow there hasn't been
* any work, or that there is so much work that NAPI is dealing
* with interrupt moderation and we don't need to do anything.
*/
if (time_after(next_update, rc->next_update))
goto clear_counts;
/* If itr_countdown is set it means we programmed an ITR within
* the last 4 interrupt cycles. This has a side effect of us
* potentially firing an early interrupt. In order to work around
* this we need to throw out any data received for a few
* interrupts following the update.
*/
if (q_vector->itr_countdown) {
itr = rc->target_itr;
goto clear_counts;
}
packets = rc->total_packets;
bytes = rc->total_bytes;
usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; if (i40e_container_is_rx(q_vector, rc)) {
bytes_per_usec = rc->total_bytes / usecs; /* If Rx there are 1 to 4 packets and bytes are less than
* 9000 assume insufficient data to use bulk rate limiting
* approach unless Tx is already in bulk rate limiting. We
* are likely latency driven.
*/
if (packets && packets < 4 && bytes < 9000 &&
(q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
itr = I40E_ITR_ADAPTIVE_LATENCY;
goto adjust_by_size;
}
} else if (packets < 4) {
/* If we have Tx and Rx ITR maxed and Tx ITR is running in
* bulk mode and we are receiving 4 or fewer packets just
* reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
* that the Rx can relax.
*/
if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
(q_vector->rx.target_itr & I40E_ITR_MASK) ==
I40E_ITR_ADAPTIVE_MAX_USECS)
goto clear_counts;
} else if (packets > 32) {
/* If we have processed over 32 packets in a single interrupt
* for Tx assume we need to switch over to "bulk" mode.
*/
rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
}
/* The calculations in this algorithm depend on interrupts actually /* We have no packets to actually measure against. This means
* firing at the ITR rate. This may not happen if the packet rate is * either one of the other queues on this vector is active or
* really low, or if we've been napi polling. Check to make sure * we are a Tx queue doing TSO with too high of an interrupt rate.
* that's not the case before we continue. *
* Between 4 and 56 we can assume that our current interrupt delay
* is only slightly too low. As such we should increase it by a small
* fixed amount.
*/ */
estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); if (packets < 56) {
if (estimated_usecs > usecs) { itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
new_latency_range = I40E_LOW_LATENCY; if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
goto reset_latency; itr &= I40E_ITR_ADAPTIVE_LATENCY;
itr += I40E_ITR_ADAPTIVE_MAX_USECS;
}
goto clear_counts;
}
if (packets <= 256) {
itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
itr &= I40E_ITR_MASK;
/* Between 56 and 112 is our "goldilocks" zone where we are
* working out "just right". Just report that our current
* ITR is good for us.
*/
if (packets <= 112)
goto clear_counts;
/* If packet count is 128 or greater we are likely looking
* at a slight overrun of the delay we want. Try halving
* our delay to see if that will cut the number of packets
* in half per interrupt.
*/
itr /= 2;
itr &= I40E_ITR_MASK;
if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
itr = I40E_ITR_ADAPTIVE_MIN_USECS;
goto clear_counts;
} }
/* simple throttlerate management /* The paths below assume we are dealing with a bulk ITR since
* 0-10MB/s lowest (50000 ints/s) * number of packets is greater than 256. We are just going to have
* 10-20MB/s low (20000 ints/s) * to compute a value and try to bring the count under control,
* 20-1249MB/s bulk (18000 ints/s) * though for smaller packet sizes there isn't much we can do as
* NAPI polling will likely be kicking in sooner rather than later.
*/
itr = I40E_ITR_ADAPTIVE_BULK;
adjust_by_size:
/* If packet counts are 256 or greater we can assume we have a gross
* overestimation of what the rate should be. Instead of trying to fine
* tune it just use the formula below to try and dial in an exact value
* give the current packet size of the frame.
*/
avg_wire_size = bytes / packets;
/* The following is a crude approximation of:
* wmem_default / (size + overhead) = desired_pkts_per_int
* rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
* (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
* *
* The math works out because the divisor is in 10^(-6) which * Assuming wmem_default is 212992 and overhead is 640 bytes per
* turns the bytes/us input value into MB/s values, but * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
* make sure to use usecs, as the register values written * formula down to
* are in 2 usec increments in the ITR registers, and make sure *
* to use the smoothed values that the countdown timer gives us. * (170 * (size + 24)) / (size + 640) = ITR
*
* We first do some math on the packet size and then finally bitshift
* by 8 after rounding up. We also have to account for PCIe link speed
* difference as ITR scales based on this.
*/ */
switch (new_latency_range) { if (avg_wire_size <= 60) {
case I40E_LOWEST_LATENCY: /* Start at 250k ints/sec */
if (bytes_per_usec > 10) avg_wire_size = 4096;
new_latency_range = I40E_LOW_LATENCY; } else if (avg_wire_size <= 380) {
break; /* 250K ints/sec to 60K ints/sec */
case I40E_LOW_LATENCY: avg_wire_size *= 40;
if (bytes_per_usec > 20) avg_wire_size += 1696;
new_latency_range = I40E_BULK_LATENCY; } else if (avg_wire_size <= 1084) {
else if (bytes_per_usec <= 10) /* 60K ints/sec to 36K ints/sec */
new_latency_range = I40E_LOWEST_LATENCY; avg_wire_size *= 15;
break; avg_wire_size += 11452;
case I40E_BULK_LATENCY: } else if (avg_wire_size <= 1980) {
default: /* 36K ints/sec to 30K ints/sec */
if (bytes_per_usec <= 20) avg_wire_size *= 5;
new_latency_range = I40E_LOW_LATENCY; avg_wire_size += 22420;
break; } else {
/* plateau at a limit of 30K ints/sec */
avg_wire_size = 32256;
} }
reset_latency: /* If we are in low latency mode halve our delay which doubles the
rc->latency_range = new_latency_range; * rate to somewhere between 100K to 16K ints/sec
*/
if (itr & I40E_ITR_ADAPTIVE_LATENCY)
avg_wire_size /= 2;
switch (new_latency_range) { /* Resultant value is 256 times larger than it needs to be. This
case I40E_LOWEST_LATENCY: * gives us room to adjust the value as needed to either increase
new_itr = I40E_ITR_50K; * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
break; *
case I40E_LOW_LATENCY: * Use addition as we have already recorded the new latency flag
new_itr = I40E_ITR_20K; * for the ITR value.
break; */
case I40E_BULK_LATENCY: itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
new_itr = I40E_ITR_18K; I40E_ITR_ADAPTIVE_MIN_INC;
break;
default: if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
break; itr &= I40E_ITR_ADAPTIVE_LATENCY;
itr += I40E_ITR_ADAPTIVE_MAX_USECS;
} }
clear_counts:
/* write back value */
rc->target_itr = itr;
/* next update should occur within next jiffy */
rc->next_update = next_update + 1;
rc->total_bytes = 0; rc->total_bytes = 0;
rc->total_packets = 0; rc->total_packets = 0;
rc->last_itr_update = jiffies;
if (new_itr != rc->itr) {
rc->itr = new_itr;
return true;
}
return false;
} }
/** /**
...@@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, ...@@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* @rx_buffer: rx buffer to pull data from * @rx_buffer: rx buffer to pull data from
* *
* This function will clean up the contents of the rx_buffer. It will * This function will clean up the contents of the rx_buffer. It will
* either recycle the bufer or unmap it and free the associated resources. * either recycle the buffer or unmap it and free the associated resources.
*/ */
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer) struct i40e_rx_buffer *rx_buffer)
...@@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) ...@@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
return failure ? budget : (int)total_rx_packets; return failure ? budget : (int)total_rx_packets;
} }
static u32 i40e_buildreg_itr(const int type, const u16 itr) static inline u32 i40e_buildreg_itr(const int type, u16 itr)
{ {
u32 val; u32 val;
/* We don't bother with setting the CLEARPBA bit as the data sheet
* points out doing so is "meaningless since it was already
* auto-cleared". The auto-clearing happens when the interrupt is
* asserted.
*
* Hardware errata 28 for also indicates that writing to a
* xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
* an event in the PBA anyway so we need to rely on the automask
* to hold pending events for us until the interrupt is re-enabled
*
* The itr value is reported in microseconds, and the register
* value is recorded in 2 microsecond units. For this reason we
* only need to shift by the interval shift - 1 instead of the
* full value.
*/
itr &= I40E_ITR_MASK;
val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
(type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
(itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
return val; return val;
} }
/* a small macro to shorten up some long lines */ /* a small macro to shorten up some long lines */
#define INTREG I40E_VFINT_DYN_CTLN1 #define INTREG I40E_VFINT_DYN_CTLN1
static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
{
struct i40evf_adapter *adapter = vsi->back;
return adapter->rx_rings[idx].rx_itr_setting; /* The act of updating the ITR will cause it to immediately trigger. In order
} * to prevent this from throwing off adaptive update statistics we defer the
* update so that it can only happen so often. So after either Tx or Rx are
static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) * updated we make the adaptive scheme wait until either the ITR completely
{ * expires via the next_update expiration or we have been through at least
struct i40evf_adapter *adapter = vsi->back; * 3 interrupts.
*/
return adapter->tx_rings[idx].tx_itr_setting; #define ITR_COUNTDOWN_START 3
}
/** /**
* i40e_update_enable_itr - Update itr and re-enable MSIX interrupt * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
...@@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, ...@@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector) struct i40e_q_vector *q_vector)
{ {
struct i40e_hw *hw = &vsi->back->hw; struct i40e_hw *hw = &vsi->back->hw;
bool rx = false, tx = false; u32 intval;
u32 rxval, txval;
int idx = q_vector->v_idx;
int rx_itr_setting, tx_itr_setting;
/* avoid dynamic calculation if in countdown mode OR if
* all dynamic is disabled
*/
rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
rx_itr_setting = get_rx_itr(vsi, idx);
tx_itr_setting = get_tx_itr(vsi, idx);
if (q_vector->itr_countdown > 0 || /* These will do nothing if dynamic updates are not enabled */
(!ITR_IS_DYNAMIC(rx_itr_setting) && i40e_update_itr(q_vector, &q_vector->tx);
!ITR_IS_DYNAMIC(tx_itr_setting))) { i40e_update_itr(q_vector, &q_vector->rx);
goto enable_int;
}
if (ITR_IS_DYNAMIC(rx_itr_setting)) {
rx = i40e_set_new_dynamic_itr(&q_vector->rx);
rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
}
if (ITR_IS_DYNAMIC(tx_itr_setting)) { /* This block of logic allows us to get away with only updating
tx = i40e_set_new_dynamic_itr(&q_vector->tx); * one ITR value with each interrupt. The idea is to perform a
txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); * pseudo-lazy update with the following criteria.
} *
* 1. Rx is given higher priority than Tx if both are in same state
if (rx || tx) { * 2. If we must reduce an ITR that is given highest priority.
/* get the higher of the two ITR adjustments and * 3. We then give priority to increasing ITR based on amount.
* use the same value for both ITR registers
* when in adaptive mode (Rx and/or Tx)
*/
u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
q_vector->tx.itr = q_vector->rx.itr = itr;
txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
tx = true;
rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
rx = true;
}
/* only need to enable the interrupt once, but need
* to possibly update both ITR values
*/ */
if (rx) { if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
/* set the INTENA_MSK_MASK so that this first write /* Rx ITR needs to be reduced, this is highest priority */
* won't actually enable the interrupt, instead just intval = i40e_buildreg_itr(I40E_RX_ITR,
* updating the ITR (it's bit 31 PF and VF) q_vector->rx.target_itr);
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
((q_vector->rx.target_itr - q_vector->rx.current_itr) <
(q_vector->tx.target_itr - q_vector->tx.current_itr))) {
/* Tx ITR needs to be reduced, this is second priority
* Tx ITR needs to be increased more than Rx, fourth priority
*/ */
rxval |= BIT(31); intval = i40e_buildreg_itr(I40E_TX_ITR,
/* don't check _DOWN because interrupt isn't being enabled */ q_vector->tx.target_itr);
wr32(hw, INTREG(q_vector->reg_idx), rxval); q_vector->tx.current_itr = q_vector->tx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
/* Rx ITR needs to be increased, third priority */
intval = i40e_buildreg_itr(I40E_RX_ITR,
q_vector->rx.target_itr);
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else {
/* No ITR update, lowest priority */
intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
} }
enable_int:
if (!test_bit(__I40E_VSI_DOWN, vsi->state)) if (!test_bit(__I40E_VSI_DOWN, vsi->state))
wr32(hw, INTREG(q_vector->reg_idx), txval); wr32(hw, INTREG(q_vector->reg_idx), intval);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
else
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} }
/** /**
......
...@@ -28,31 +28,35 @@ ...@@ -28,31 +28,35 @@
#define _I40E_TXRX_H_ #define _I40E_TXRX_H_
/* Interrupt Throttling and Rate Limiting Goodies */ /* Interrupt Throttling and Rate Limiting Goodies */
#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */
#define I40E_ITR_100K 0x0005
#define I40E_ITR_50K 0x000A
#define I40E_ITR_20K 0x0019
#define I40E_ITR_18K 0x001B
#define I40E_ITR_8K 0x003E
#define I40E_ITR_4K 0x007A
#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
I40E_ITR_DYNAMIC)
#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
I40E_ITR_DYNAMIC)
#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
#define I40E_DEFAULT_IRQ_WORK 256 #define I40E_DEFAULT_IRQ_WORK 256
#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) /* The datasheet for the X710 and XL710 indicate that the maximum value for
#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
* resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
* the register value which is divided by 2 lets use the actual values and
* avoid an excessive amount of translation.
*/
#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */
#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */
#define I40E_ITR_100K 10 /* all values below must be even */
#define I40E_ITR_50K 20
#define I40E_ITR_20K 50
#define I40E_ITR_18K 60
#define I40E_ITR_8K 122
#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */
#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
/* 0x40 is the enable bit for interrupt rate limiting, and must be set if /* 0x40 is the enable bit for interrupt rate limiting, and must be set if
* the value of the rate limit is non-zero * the value of the rate limit is non-zero
*/ */
#define INTRL_ENA BIT(6) #define INTRL_ENA BIT(6)
#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) #define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
#define I40E_INTRL_8K 125 /* 8000 ints/sec */ #define I40E_INTRL_8K 125 /* 8000 ints/sec */
...@@ -362,8 +366,7 @@ struct i40e_ring { ...@@ -362,8 +366,7 @@ struct i40e_ring {
* these values always store the USER setting, and must be converted * these values always store the USER setting, and must be converted
* before programming to a register. * before programming to a register.
*/ */
u16 rx_itr_setting; u16 itr_setting;
u16 tx_itr_setting;
u16 count; /* Number of descriptors */ u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */ u16 reg_idx; /* HW register index of the ring */
...@@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) ...@@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
} }
enum i40e_latency_range { #define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
I40E_LOWEST_LATENCY = 0, #define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
I40E_LOW_LATENCY = 1, #define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
I40E_BULK_LATENCY = 2, #define I40E_ITR_ADAPTIVE_LATENCY 0x8000
}; #define I40E_ITR_ADAPTIVE_BULK 0x0000
#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
struct i40e_ring_container { struct i40e_ring_container {
/* array of pointers to rings */ struct i40e_ring *ring; /* pointer to linked list of ring(s) */
struct i40e_ring *ring; unsigned long next_update; /* jiffies value of next update */
unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */ unsigned int total_packets; /* total packets processed this int */
unsigned long last_itr_update; /* jiffies of last ITR update */
u16 count; u16 count;
enum i40e_latency_range latency_range; u16 target_itr; /* target ITR setting for ring(s) */
u16 itr; u16 current_itr; /* current ITR setting for ring(s) */
}; };
/* iterator for handling rings in ring container */ /* iterator for handling rings in ring container */
......
...@@ -117,9 +117,8 @@ struct i40e_q_vector { ...@@ -117,9 +117,8 @@ struct i40e_q_vector {
struct i40e_ring_container rx; struct i40e_ring_container rx;
struct i40e_ring_container tx; struct i40e_ring_container tx;
u32 ring_mask; u32 ring_mask;
u8 itr_countdown; /* when 0 should adjust adaptive ITR */
u8 num_ringpairs; /* total number of ring pairs in vector */ u8 num_ringpairs; /* total number of ring pairs in vector */
#define ITR_COUNTDOWN_START 100
u8 itr_countdown; /* when 0 or 1 update ITR */
u16 v_idx; /* index in the vsi->q_vector array. */ u16 v_idx; /* index in the vsi->q_vector array. */
u16 reg_idx; /* register index of the interrupt */ u16 reg_idx; /* register index of the interrupt */
char name[IFNAMSIZ + 15]; char name[IFNAMSIZ + 15];
......
...@@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev, ...@@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev,
rx_ring = &adapter->rx_rings[queue]; rx_ring = &adapter->rx_rings[queue];
tx_ring = &adapter->tx_rings[queue]; tx_ring = &adapter->tx_rings[queue];
if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
ec->use_adaptive_rx_coalesce = 1; ec->use_adaptive_rx_coalesce = 1;
if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
ec->use_adaptive_tx_coalesce = 1; ec->use_adaptive_tx_coalesce = 1;
ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
return 0; return 0;
} }
...@@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev, ...@@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
/** /**
* i40evf_set_itr_per_queue - set ITR values for specific queue * i40evf_set_itr_per_queue - set ITR values for specific queue
* @vsi: the VSI to set values for * @adapter: the VF adapter struct to set values for
* @ec: coalesce settings from ethtool * @ec: coalesce settings from ethtool
* @queue: the queue to modify * @queue: the queue to modify
* *
...@@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, ...@@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
{ {
struct i40e_ring *rx_ring = &adapter->rx_rings[queue]; struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
struct i40e_ring *tx_ring = &adapter->tx_rings[queue]; struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
struct i40e_vsi *vsi = &adapter->vsi;
struct i40e_hw *hw = &adapter->hw;
struct i40e_q_vector *q_vector; struct i40e_q_vector *q_vector;
u16 vector;
rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
if (!ec->use_adaptive_rx_coalesce) if (!ec->use_adaptive_rx_coalesce)
rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC; rx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
if (!ec->use_adaptive_tx_coalesce) if (!ec->use_adaptive_tx_coalesce)
tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC; tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
q_vector = rx_ring->q_vector; q_vector = rx_ring->q_vector;
q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
q_vector = tx_ring->q_vector; q_vector = tx_ring->q_vector;
q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
i40e_flush(hw); /* The interrupt handler itself will take care of programming
* the Tx and Rx ITR values based on the values we have entered
* into the q_vector, no need to write the values now.
*/
} }
/** /**
...@@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, ...@@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
if (ec->rx_coalesce_usecs == 0) { if (ec->rx_coalesce_usecs == 0) {
if (ec->use_adaptive_rx_coalesce) if (ec->use_adaptive_rx_coalesce)
netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
} else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) ||
(ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { (ec->rx_coalesce_usecs > I40E_MAX_ITR)) {
netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
return -EINVAL; return -EINVAL;
} }
...@@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, ...@@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
if (ec->tx_coalesce_usecs == 0) { if (ec->tx_coalesce_usecs == 0) {
if (ec->use_adaptive_tx_coalesce) if (ec->use_adaptive_tx_coalesce)
netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
} else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) ||
(ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { (ec->tx_coalesce_usecs > I40E_MAX_ITR)) {
netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
return -EINVAL; return -EINVAL;
} }
......
...@@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) ...@@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
rx_ring->vsi = &adapter->vsi; rx_ring->vsi = &adapter->vsi;
q_vector->rx.ring = rx_ring; q_vector->rx.ring = rx_ring;
q_vector->rx.count++; q_vector->rx.count++;
q_vector->rx.latency_range = I40E_LOW_LATENCY; q_vector->rx.next_update = jiffies + 1;
q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector->ring_mask |= BIT(r_idx); q_vector->ring_mask |= BIT(r_idx);
q_vector->itr_countdown = ITR_COUNTDOWN_START; wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); q_vector->rx.current_itr);
q_vector->rx.current_itr = q_vector->rx.target_itr;
} }
/** /**
...@@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) ...@@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
tx_ring->vsi = &adapter->vsi; tx_ring->vsi = &adapter->vsi;
q_vector->tx.ring = tx_ring; q_vector->tx.ring = tx_ring;
q_vector->tx.count++; q_vector->tx.count++;
q_vector->tx.latency_range = I40E_LOW_LATENCY; q_vector->tx.next_update = jiffies + 1;
q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
q_vector->itr_countdown = ITR_COUNTDOWN_START;
q_vector->num_ringpairs++; q_vector->num_ringpairs++;
wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
q_vector->tx.target_itr);
q_vector->tx.current_itr = q_vector->tx.target_itr;
} }
/** /**
...@@ -1169,7 +1171,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) ...@@ -1169,7 +1171,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
tx_ring->netdev = adapter->netdev; tx_ring->netdev = adapter->netdev;
tx_ring->dev = &adapter->pdev->dev; tx_ring->dev = &adapter->pdev->dev;
tx_ring->count = adapter->tx_desc_count; tx_ring->count = adapter->tx_desc_count;
tx_ring->tx_itr_setting = I40E_ITR_TX_DEF; tx_ring->itr_setting = I40E_ITR_TX_DEF;
if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE) if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
...@@ -1178,7 +1180,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) ...@@ -1178,7 +1180,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
rx_ring->netdev = adapter->netdev; rx_ring->netdev = adapter->netdev;
rx_ring->dev = &adapter->pdev->dev; rx_ring->dev = &adapter->pdev->dev;
rx_ring->count = adapter->rx_desc_count; rx_ring->count = adapter->rx_desc_count;
rx_ring->rx_itr_setting = I40E_ITR_RX_DEF; rx_ring->itr_setting = I40E_ITR_RX_DEF;
} }
adapter->num_active_queues = num_active_queues; adapter->num_active_queues = num_active_queues;
......
...@@ -344,6 +344,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter) ...@@ -344,6 +344,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter)
void i40evf_map_queues(struct i40evf_adapter *adapter) void i40evf_map_queues(struct i40evf_adapter *adapter)
{ {
struct virtchnl_irq_map_info *vimi; struct virtchnl_irq_map_info *vimi;
struct virtchnl_vector_map *vecmap;
int v_idx, q_vectors, len; int v_idx, q_vectors, len;
struct i40e_q_vector *q_vector; struct i40e_q_vector *q_vector;
...@@ -367,17 +368,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) ...@@ -367,17 +368,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
vimi->num_vectors = adapter->num_msix_vectors; vimi->num_vectors = adapter->num_msix_vectors;
/* Queue vectors first */ /* Queue vectors first */
for (v_idx = 0; v_idx < q_vectors; v_idx++) { for (v_idx = 0; v_idx < q_vectors; v_idx++) {
q_vector = adapter->q_vectors + v_idx; q_vector = &adapter->q_vectors[v_idx];
vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; vecmap = &vimi->vecmap[v_idx];
vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS;
vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; vecmap->vsi_id = adapter->vsi_res->vsi_id;
vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask; vecmap->vector_id = v_idx + NONQ_VECS;
vecmap->txq_map = q_vector->ring_mask;
vecmap->rxq_map = q_vector->ring_mask;
vecmap->rxitr_idx = I40E_RX_ITR;
vecmap->txitr_idx = I40E_TX_ITR;
} }
/* Misc vector last - this is only for AdminQ messages */ /* Misc vector last - this is only for AdminQ messages */
vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; vecmap = &vimi->vecmap[v_idx];
vimi->vecmap[v_idx].vector_id = 0; vecmap->vsi_id = adapter->vsi_res->vsi_id;
vimi->vecmap[v_idx].txq_map = 0; vecmap->vector_id = 0;
vimi->vecmap[v_idx].rxq_map = 0; vecmap->txq_map = 0;
vecmap->rxq_map = 0;
adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS; adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP, i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment