Merge branch 'sfc-filter-locking'

Edward Cree says: ==================== sfc: rework locking around filter management The use of a spinlock to protect filter state combined with the need for a sleeping operation (MCDI) to apply that state to the NIC (on EF10) led to unfixable race conditions, around the handling of filter restoration after an MC reboot. So, this patch series removes the requirement to be able to modify the SW filter table from atomic context, by using a workqueue to request asynchronous filter operations (which are needed for ARFS). Then, the filter table locks are changed to mutexes, replacing the dance of spinlocks and 'busy' flags. Also, a mutex is added to protect the RSS context state, since otherwise a similar race is possible around restoring that after an MC reboot. While we're at it, fix a couple of other related bugs. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>

Merge branch 'sfc-filter-locking'
Edward Cree says: ==================== sfc: rework locking around filter management The use of a spinlock to protect filter state combined with the need for a sleeping operation (MCDI) to apply that state to the NIC (on EF10) led to unfixable race conditions, around the handling of filter restoration after an MC reboot. So, this patch series removes the requirement to be able to modify the SW filter table from atomic context, by using a workqueue to request asynchronous filter operations (which are needed for ARFS). Then, the filter table locks are changed to mutexes, replacing the dance of spinlocks and 'busy' flags. Also, a mutex is added to protect the RSS context state, since otherwise a similar race is possible around restoring that after an MC reboot. While we're at it, fix a couple of other related bugs. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
5d22d47b · David S. Miller · c709002c · a8e8fbeb · 5d22d47b · 5d22d47b
Commit 5d22d47b authored Mar 27, 2018 by David S. Miller
9 changed files
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -340,7 +340,10 @@ static int efx_poll(struct napi_struct *napi, int budget)
 			efx_update_irq_mod(efx, channel);
 		}

-		efx_filter_rfs_expire(channel);
+#ifdef CONFIG_RFS_ACCEL
+		/* Perhaps expire some ARFS filters */
+		schedule_work(&channel->filter_work);
+#endif

 		/* There is no race here; although napi_disable() will
 		 * only wait for napi_complete(), this isn't a problem
@@ -470,6 +473,10 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
 		tx_queue->channel = channel;
 	}

+#ifdef CONFIG_RFS_ACCEL
+	INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
 	rx_queue = &channel->rx_queue;
 	rx_queue->efx = efx;
 	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
@@ -512,6 +519,9 @@ efx_copy_channel(const struct efx_channel *old_channel)
 	rx_queue->buffer = NULL;
 	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
 	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+#ifdef CONFIG_RFS_ACCEL
+	INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif

 	return channel;
 }
@@ -1773,7 +1783,6 @@ static int efx_probe_filters(struct efx_nic *efx)
 {
 	int rc;

-	spin_lock_init(&efx->filter_lock);
 	init_rwsem(&efx->filter_sem);
 	mutex_lock(&efx->mac_lock);
 	down_write(&efx->filter_sem);
@@ -2648,6 +2657,7 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
 	efx_disable_interrupts(efx);

 	mutex_lock(&efx->mac_lock);
+	mutex_lock(&efx->rss_lock);
 	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
 	    method != RESET_TYPE_DATAPATH)
 		efx->phy_op->fini(efx);
@@ -2703,6 +2713,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)

 	if (efx->type->rx_restore_rss_contexts)
 		efx->type->rx_restore_rss_contexts(efx);
+	mutex_unlock(&efx->rss_lock);
 	down_read(&efx->filter_sem);
 	efx_restore_filters(efx);
 	up_read(&efx->filter_sem);
@@ -2721,6 +2732,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 fail:
 	efx->port_initialized = false;

+	mutex_unlock(&efx->rss_lock);
 	mutex_unlock(&efx->mac_lock);

 	return rc;
@@ -3007,11 +3019,15 @@ static int efx_init_struct(struct efx_nic *efx,
 	efx->rx_packet_ts_offset =
 		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
 	INIT_LIST_HEAD(&efx->rss_context.list);
+	mutex_init(&efx->rss_lock);
 	spin_lock_init(&efx->stats_lock);
 	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
 	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
 	BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
 	mutex_init(&efx->mac_lock);
+#ifdef CONFIG_RFS_ACCEL
+	mutex_init(&efx->rps_mutex);
+#endif
 	efx->phy_op = &efx_dummy_phy_operations;
 	efx->mdio.dev = net_dev;
 	INIT_WORK(&efx->mac_work, efx_mac_work);
@@ -3079,11 +3095,14 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
 /* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
 * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
 */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head)
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
 {
+	struct list_head *head = &efx->rss_context.list;
 	struct efx_rss_context *ctx, *new;
 	u32 id = 1; /* Don't use zero, that refers to the master RSS context */

+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
 	/* Search for first gap in the numbering */
 	list_for_each_entry(ctx, head, list) {
 		if (ctx->user_id != id)
@@ -3109,10 +3128,13 @@ struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head)
 	return new;
 }

-struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *head)
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
 {
+	struct list_head *head = &efx->rss_context.list;
 	struct efx_rss_context *ctx;

+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
 	list_for_each_entry(ctx, head, list)
 		if (ctx->user_id == id)
 			return ctx;

--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -170,22 +170,25 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx,
 int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 		   u16 rxq_index, u32 flow_id);
 bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned quota);
-static inline void efx_filter_rfs_expire(struct efx_channel *channel)
+static inline void efx_filter_rfs_expire(struct work_struct *data)
 {
+	struct efx_channel *channel = container_of(data, struct efx_channel,
+						   filter_work);
+
 	if (channel->rfs_filters_added >= 60 &&
 	    __efx_filter_rfs_expire(channel->efx, 100))
 		channel->rfs_filters_added -= 60;
 }
 #define efx_filter_rfs_enabled() 1
 #else
-static inline void efx_filter_rfs_expire(struct efx_channel *channel) {}
+static inline void efx_filter_rfs_expire(struct work_struct *data) {}
 #define efx_filter_rfs_enabled() 0
 #endif
 bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);

 /* RSS contexts */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *list);
-struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *list);
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
 void efx_free_rss_context_entry(struct efx_rss_context *ctx);
 static inline bool efx_rss_active(struct efx_rss_context *ctx)
 {

--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -979,7 +979,7 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	u32 rss_context = 0;
-	s32 rc;
+	s32 rc = 0;

 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
@@ -989,15 +989,17 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 	case ETHTOOL_GRXFH: {
 		struct efx_rss_context *ctx = &efx->rss_context;

+		mutex_lock(&efx->rss_lock);
 		if (info->flow_type & FLOW_RSS && info->rss_context) {
-			ctx = efx_find_rss_context_entry(info->rss_context,
-							 &efx->rss_context.list);
-			if (!ctx)
-				return -ENOENT;
+			ctx = efx_find_rss_context_entry(efx, info->rss_context);
+			if (!ctx) {
+				rc = -ENOENT;
+				goto out_unlock;
+			}
 		}
 		info->data = 0;
 		if (!efx_rss_active(ctx)) /* No RSS */
-			return 0;
+			goto out_unlock;
 		switch (info->flow_type & ~FLOW_RSS) {
 		case UDP_V4_FLOW:
 			if (ctx->rx_hash_udp_4tuple)
@@ -1024,7 +1026,9 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		default:
 			break;
 		}
-		return 0;
+out_unlock:
+		mutex_unlock(&efx->rss_lock);
+		return rc;
 	}

 	case ETHTOOL_GRXCLSRLCNT:
@@ -1084,6 +1088,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec;
 	struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec;
 	struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
+	u32 flow_type = rule->flow_type & ~(FLOW_EXT | FLOW_RSS);
 	struct ethhdr *mac_entry = &rule->h_u.ether_spec;
 	struct ethhdr *mac_mask = &rule->m_u.ether_spec;
 	enum efx_filter_flags flags = 0;
@@ -1117,14 +1122,14 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	if (rule->flow_type & FLOW_RSS)
 		spec.rss_context = rss_context;

-	switch (rule->flow_type & ~(FLOW_EXT | FLOW_RSS)) {
+	switch (flow_type) {
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
 		spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
 				    EFX_FILTER_MATCH_IP_PROTO);
 		spec.ether_type = htons(ETH_P_IP);
-		spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V4_FLOW ?
-				 IPPROTO_TCP : IPPROTO_UDP);
+		spec.ip_proto = flow_type == TCP_V4_FLOW ? IPPROTO_TCP
+							 : IPPROTO_UDP;
 		if (ip_mask->ip4dst) {
 			if (ip_mask->ip4dst != IP4_ADDR_FULL_MASK)
 				return -EINVAL;
@@ -1158,8 +1163,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 		spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
 				    EFX_FILTER_MATCH_IP_PROTO);
 		spec.ether_type = htons(ETH_P_IPV6);
-		spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V6_FLOW ?
-				 IPPROTO_TCP : IPPROTO_UDP);
+		spec.ip_proto = flow_type == TCP_V6_FLOW ? IPPROTO_TCP
+							 : IPPROTO_UDP;
 		if (!ip6_mask_is_empty(ip6_mask->ip6dst)) {
 			if (!ip6_mask_is_full(ip6_mask->ip6dst))
 				return -EINVAL;
@@ -1366,16 +1371,20 @@ static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_rss_context *ctx;
-	int rc;
+	int rc = 0;

 	if (!efx->type->rx_pull_rss_context_config)
 		return -EOPNOTSUPP;
-	ctx = efx_find_rss_context_entry(rss_context, &efx->rss_context.list);
-	if (!ctx)
-		return -ENOENT;
+
+	mutex_lock(&efx->rss_lock);
+	ctx = efx_find_rss_context_entry(efx, rss_context);
+	if (!ctx) {
+		rc = -ENOENT;
+		goto out_unlock;
+	}
 	rc = efx->type->rx_pull_rss_context_config(efx, ctx);
 	if (rc)
-		return rc;
+		goto out_unlock;

 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
@@ -1383,7 +1392,9 @@ static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
 		memcpy(indir, ctx->rx_indir_table, sizeof(ctx->rx_indir_table));
 	if (key)
 		memcpy(key, ctx->rx_hash_key, efx->type->rx_hash_key_size);
-	return 0;
+out_unlock:
+	mutex_unlock(&efx->rss_lock);
+	return rc;
 }

 static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
@@ -1401,23 +1412,31 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
 	/* Hash function is Toeplitz, cannot be changed */
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
 		return -EOPNOTSUPP;
+
+	mutex_lock(&efx->rss_lock);
+
 	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
-		if (delete)
+		if (delete) {
 			/* alloc + delete == Nothing to do */
-			return -EINVAL;
-		ctx = efx_alloc_rss_context_entry(&efx->rss_context.list);
-		if (!ctx)
-			return -ENOMEM;
+			rc = -EINVAL;
+			goto out_unlock;
+		}
+		ctx = efx_alloc_rss_context_entry(efx);
+		if (!ctx) {
+			rc = -ENOMEM;
+			goto out_unlock;
+		}
 		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 		/* Initialise indir table and key to defaults */
 		efx_set_default_rx_indir_table(efx, ctx);
 		netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
 		allocated = true;
 	} else {
-		ctx = efx_find_rss_context_entry(*rss_context,
-						 &efx->rss_context.list);
-		if (!ctx)
-			return -ENOENT;
+		ctx = efx_find_rss_context_entry(efx, *rss_context);
+		if (!ctx) {
+			rc = -ENOENT;
+			goto out_unlock;
+		}
 	}

 	if (delete) {
@@ -1425,7 +1444,7 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
 		rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL);
 		if (!rc)
 			efx_free_rss_context_entry(ctx);
-		return rc;
+		goto out_unlock;
 	}

 	if (!key)
@@ -1438,6 +1457,8 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
 		efx_free_rss_context_entry(ctx);
 	else
 		*rss_context = ctx->user_id;
+out_unlock:
+	mutex_unlock(&efx->rss_lock);
 	return rc;
 }


--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -1878,6 +1878,7 @@ struct efx_farch_filter_table {
 };

 struct efx_farch_filter_state {
+	struct rw_semaphore lock; /* Protects table contents */
 	struct efx_farch_filter_table table[EFX_FARCH_FILTER_TABLE_COUNT];
 };

@@ -2397,9 +2398,13 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 	if (rc)
 		return rc;

+	down_write(&state->lock);
+
 	table = &state->table[efx_farch_filter_spec_table_id(&spec)];
-	if (table->size == 0)
-		return -EINVAL;
+	if (table->size == 0) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}

 	netif_vdbg(efx, hw, efx->net_dev,
 		   "%s: type %d search_limit=%d", __func__, spec.type,
@@ -2412,8 +2417,6 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 			     EFX_FARCH_FILTER_MC_DEF - EFX_FARCH_FILTER_UC_DEF);
 		rep_index = spec.type - EFX_FARCH_FILTER_UC_DEF;
 		ins_index = rep_index;
-
-		spin_lock_bh(&efx->filter_lock);
 	} else {
 		/* Search concurrently for
 		 * (1) a filter to be replaced (rep_index): any filter
@@ -2443,8 +2446,6 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 		ins_index = -1;
 		depth = 1;

-		spin_lock_bh(&efx->filter_lock);
-
 		for (;;) {
 			if (!test_bit(i, table->used_bitmap)) {
 				if (ins_index < 0)
@@ -2463,7 +2464,7 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 				/* Case (b) */
 				if (ins_index < 0) {
 					rc = -EBUSY;
-					goto out;
+					goto out_unlock;
 				}
 				rep_index = -1;
 				break;
@@ -2483,11 +2484,11 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,

 		if (spec.priority == saved_spec->priority && !replace_equal) {
 			rc = -EEXIST;
-			goto out;
+			goto out_unlock;
 		}
 		if (spec.priority < saved_spec->priority) {
 			rc = -EPERM;
-			goto out;
+			goto out_unlock;
 		}
 		if (saved_spec->priority == EFX_FILTER_PRI_AUTO ||
 		    saved_spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO)
@@ -2528,8 +2529,8 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 		   __func__, spec.type, ins_index, spec.dmaq_id);
 	rc = efx_farch_filter_make_id(&spec, ins_index);

-out:
-	spin_unlock_bh(&efx->filter_lock);
+out_unlock:
+	up_write(&state->lock);
 	return rc;
 }

@@ -2604,11 +2605,11 @@ int efx_farch_filter_remove_safe(struct efx_nic *efx,
 	filter_idx = efx_farch_filter_id_index(filter_id);
 	if (filter_idx >= table->size)
 		return -ENOENT;
+	down_write(&state->lock);
 	spec = &table->spec[filter_idx];

-	spin_lock_bh(&efx->filter_lock);
 	rc = efx_farch_filter_remove(efx, table, filter_idx, priority);
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);

 	return rc;
 }
@@ -2622,30 +2623,28 @@ int efx_farch_filter_get_safe(struct efx_nic *efx,
 	struct efx_farch_filter_table *table;
 	struct efx_farch_filter_spec *spec;
 	unsigned int filter_idx;
-	int rc;
+	int rc = -ENOENT;
+
+	down_read(&state->lock);

 	table_id = efx_farch_filter_id_table_id(filter_id);
 	if ((unsigned int)table_id >= EFX_FARCH_FILTER_TABLE_COUNT)
-		return -ENOENT;
+		goto out_unlock;
 	table = &state->table[table_id];

 	filter_idx = efx_farch_filter_id_index(filter_id);
 	if (filter_idx >= table->size)
-		return -ENOENT;
+		goto out_unlock;
 	spec = &table->spec[filter_idx];

-	spin_lock_bh(&efx->filter_lock);
-
 	if (test_bit(filter_idx, table->used_bitmap) &&
 	    spec->priority == priority) {
 		efx_farch_filter_to_gen_spec(spec_buf, spec);
 		rc = 0;
-	} else {
-		rc = -ENOENT;
 	}

-	spin_unlock_bh(&efx->filter_lock);
-
+out_unlock:
+	up_read(&state->lock);
 	return rc;
 }

@@ -2658,13 +2657,13 @@ efx_farch_filter_table_clear(struct efx_nic *efx,
 	struct efx_farch_filter_table *table = &state->table[table_id];
 	unsigned int filter_idx;

-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);
 	for (filter_idx = 0; filter_idx < table->size; ++filter_idx) {
 		if (table->spec[filter_idx].priority != EFX_FILTER_PRI_AUTO)
 			efx_farch_filter_remove(efx, table,
 						filter_idx, priority);
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }

 int efx_farch_filter_clear_rx(struct efx_nic *efx,
@@ -2688,7 +2687,7 @@ u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
 	unsigned int filter_idx;
 	u32 count = 0;

-	spin_lock_bh(&efx->filter_lock);
+	down_read(&state->lock);

 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2701,7 +2700,7 @@ u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
 		}
 	}

-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&state->lock);

 	return count;
 }
@@ -2716,7 +2715,7 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 	unsigned int filter_idx;
 	s32 count = 0;

-	spin_lock_bh(&efx->filter_lock);
+	down_read(&state->lock);

 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2735,7 +2734,7 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 		}
 	}
 out:
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&state->lock);

 	return count;
 }
@@ -2749,7 +2748,7 @@ void efx_farch_filter_table_restore(struct efx_nic *efx)
 	efx_oword_t filter;
 	unsigned int filter_idx;

-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);

 	for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
 		table = &state->table[table_id];
@@ -2770,7 +2769,7 @@ void efx_farch_filter_table_restore(struct efx_nic *efx)
 	efx_farch_filter_push_rx_config(efx);
 	efx_farch_filter_push_tx_limits(efx);

-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }

 void efx_farch_filter_table_remove(struct efx_nic *efx)
@@ -2864,7 +2863,7 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
 	efx_oword_t filter;
 	unsigned int filter_idx;

-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);

 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2896,33 +2895,30 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)

 	efx_farch_filter_push_rx_config(efx);

-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }

 #ifdef CONFIG_RFS_ACCEL

-s32 efx_farch_filter_rfs_insert(struct efx_nic *efx,
-				struct efx_filter_spec *gen_spec)
-{
-	return efx_farch_filter_insert(efx, gen_spec, true);
-}
-
 bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 				     unsigned int index)
 {
 	struct efx_farch_filter_state *state = efx->filter_state;
-	struct efx_farch_filter_table *table =
-		&state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
+	struct efx_farch_filter_table *table;
+	bool ret = false;

+	down_write(&state->lock);
+	table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
 	if (test_bit(index, table->used_bitmap) &&
 	    table->spec[index].priority == EFX_FILTER_PRI_HINT &&
 	    rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id,
 				flow_id, index)) {
 		efx_farch_filter_table_clear_entry(efx, table, index);
-		return true;
+		ret = true;
 	}

-	return false;
+	up_write(&state->lock);
+	return ret;
 }

 #endif /* CONFIG_RFS_ACCEL */

--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -430,6 +430,7 @@ enum efx_sync_events_state {
 * @event_test_cpu: Last CPU to handle interrupt or test event for this channel
 * @irq_count: Number of IRQs since last adaptive moderation decision
 * @irq_mod_score: IRQ moderation score
+ * @filter_work: Work item for efx_filter_rfs_expire()
 * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS,
 *      indexed by filter ID
 * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors
@@ -475,6 +476,7 @@ struct efx_channel {
 	unsigned int irq_mod_score;
 #ifdef CONFIG_RFS_ACCEL
 	unsigned int rfs_filters_added;
+	struct work_struct filter_work;
 #define RPS_FLOW_ID_INVALID 0xFFFFFFFF
 	u32 *rps_flow_id;
 #endif
@@ -794,6 +796,7 @@ struct efx_rss_context {
 * @rx_scatter: Scatter mode enabled for receives
 * @rss_context: Main RSS context.  Its @list member is the head of the list of
 *	RSS contexts created by user requests
+ * @rss_lock: Protects custom RSS context software state in @rss_context.list
 * @int_error_count: Number of internal errors seen recently
 * @int_error_expire: Time at which error count will be expired
 * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -841,9 +844,9 @@ struct efx_rss_context {
 * @loopback_mode: Loopback status
 * @loopback_modes: Supported loopback mode bitmask
 * @loopback_selftest: Offline self-test private state
- * @filter_sem: Filter table rw_semaphore, for freeing the table
- * @filter_lock: Filter table lock, for mere content changes
+ * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state
 * @filter_state: Architecture-dependent filter table state
+ * @rps_mutex: Protects RPS state of all channels
 * @rps_expire_channel: Next channel to check for expiry
 * @rps_expire_index: Next index to check for expiry in
 *	@rps_expire_channel's @rps_flow_id
@@ -938,6 +941,7 @@ struct efx_nic {
 	int rx_packet_ts_offset;
 	bool rx_scatter;
 	struct efx_rss_context rss_context;
+	struct mutex rss_lock;

 	unsigned int_error_count;
 	unsigned long int_error_expire;
@@ -995,9 +999,9 @@ struct efx_nic {
 	void *loopback_selftest;

 	struct rw_semaphore filter_sem;
-	spinlock_t filter_lock;
 	void *filter_state;
 #ifdef CONFIG_RFS_ACCEL
+	struct mutex rps_mutex;
 	unsigned int rps_expire_channel;
 	unsigned int rps_expire_index;
 #endif
@@ -1152,10 +1156,6 @@ struct efx_udp_tunnel {
 * @filter_count_rx_used: Get the number of filters in use at a given priority
 * @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1
 * @filter_get_rx_ids: Get list of RX filters at a given priority
- * @filter_rfs_insert: Add or replace a filter for RFS.  This must be
- *	atomic.  The hardware change may be asynchronous but should
- *	not be delayed for long.  It may fail if this can't be done
- *	atomically.
 * @filter_rfs_expire_one: Consider expiring a filter inserted for RFS.
 *	This must check whether the specified table entry is used by RFS
 *	and that rps_may_expire_flow() returns true for it.
@@ -1306,8 +1306,6 @@ struct efx_nic_type {
 				 enum efx_filter_priority priority,
 				 u32 *buf, u32 size);
 #ifdef CONFIG_RFS_ACCEL
-	s32 (*filter_rfs_insert)(struct efx_nic *efx,
-				 struct efx_filter_spec *spec);
 	bool (*filter_rfs_expire_one)(struct efx_nic *efx, u32 flow_id,
 				      unsigned int index);
 #endif

--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -365,6 +365,8 @@ enum {
 * @vi_base: Absolute index of first VI in this function
 * @n_allocated_vis: Number of VIs allocated to this function
 * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot
+ * @must_restore_rss_contexts: Flag: RSS contexts have yet to be restored after
+ *	MC reboot
 * @must_restore_filters: Flag: filters have yet to be restored after MC reboot
 * @n_piobufs: Number of PIO buffers allocated to this function
 * @wc_membase: Base address of write-combining mapping of the memory BAR
@@ -407,6 +409,7 @@ struct efx_ef10_nic_data {
 	unsigned int vi_base;
 	unsigned int n_allocated_vis;
 	bool must_realloc_vis;
+	bool must_restore_rss_contexts;
 	bool must_restore_filters;
 	unsigned int n_piobufs;
 	void __iomem *wc_membase, *pio_write_base;
@@ -601,8 +604,6 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 				enum efx_filter_priority priority, u32 *buf,
 				u32 size);
 #ifdef CONFIG_RFS_ACCEL
-s32 efx_farch_filter_rfs_insert(struct efx_nic *efx,
-				struct efx_filter_spec *spec);
 bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 				     unsigned int index);
 #endif

--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -827,14 +827,67 @@ MODULE_PARM_DESC(rx_refill_threshold,

 #ifdef CONFIG_RFS_ACCEL

+/**
+ * struct efx_async_filter_insertion - Request to asynchronously insert a filter
+ * @net_dev: Reference to the netdevice
+ * @spec: The filter to insert
+ * @work: Workitem for this request
+ * @rxq_index: Identifies the channel for which this request was made
+ * @flow_id: Identifies the kernel-side flow for which this request was made
+ */
+struct efx_async_filter_insertion {
+	struct net_device *net_dev;
+	struct efx_filter_spec spec;
+	struct work_struct work;
+	u16 rxq_index;
+	u32 flow_id;
+};
+
+static void efx_filter_rfs_work(struct work_struct *data)
+{
+	struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
+							      work);
+	struct efx_nic *efx = netdev_priv(req->net_dev);
+	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
+	int rc;
+
+	rc = efx->type->filter_insert(efx, &req->spec, false);
+	if (rc >= 0) {
+		/* Remember this so we can check whether to expire the filter
+		 * later.
+		 */
+		mutex_lock(&efx->rps_mutex);
+		channel->rps_flow_id[rc] = req->flow_id;
+		++channel->rfs_filters_added;
+		mutex_unlock(&efx->rps_mutex);
+
+		if (req->spec.ether_type == htons(ETH_P_IP))
+			netif_info(efx, rx_status, efx->net_dev,
+				   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
+				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				   req->spec.rem_host, ntohs(req->spec.rem_port),
+				   req->spec.loc_host, ntohs(req->spec.loc_port),
+				   req->rxq_index, req->flow_id, rc);
+		else
+			netif_info(efx, rx_status, efx->net_dev,
+				   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
+				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				   req->spec.rem_host, ntohs(req->spec.rem_port),
+				   req->spec.loc_host, ntohs(req->spec.loc_port),
+				   req->rxq_index, req->flow_id, rc);
+	}
+
+	/* Release references */
+	dev_put(req->net_dev);
+	kfree(req);
+}
+
 int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 		   u16 rxq_index, u32 flow_id)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	struct efx_channel *channel;
-	struct efx_filter_spec spec;
+	struct efx_async_filter_insertion *req;
 	struct flow_keys fk;
-	int rc;

 	if (flow_id == RPS_FLOW_ID_INVALID)
 		return -EINVAL;
@@ -847,50 +900,39 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT)
 		return -EPROTONOSUPPORT;

-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT,
+	req = kmalloc(sizeof(*req), GFP_ATOMIC);
+	if (!req)
+		return -ENOMEM;
+
+	efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
 			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
 			   rxq_index);
-	spec.match_flags =
+	req->spec.match_flags =
 		EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
 		EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
 		EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
-	spec.ether_type = fk.basic.n_proto;
-	spec.ip_proto = fk.basic.ip_proto;
+	req->spec.ether_type = fk.basic.n_proto;
+	req->spec.ip_proto = fk.basic.ip_proto;

 	if (fk.basic.n_proto == htons(ETH_P_IP)) {
-		spec.rem_host[0] = fk.addrs.v4addrs.src;
-		spec.loc_host[0] = fk.addrs.v4addrs.dst;
+		req->spec.rem_host[0] = fk.addrs.v4addrs.src;
+		req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
 	} else {
-		memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr));
-		memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr));
+		memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
+		       sizeof(struct in6_addr));
+		memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
+		       sizeof(struct in6_addr));
 	}

-	spec.rem_port = fk.ports.src;
-	spec.loc_port = fk.ports.dst;
-
-	rc = efx->type->filter_rfs_insert(efx, &spec);
-	if (rc < 0)
-		return rc;
+	req->spec.rem_port = fk.ports.src;
+	req->spec.loc_port = fk.ports.dst;

-	/* Remember this so we can check whether to expire the filter later */
-	channel = efx_get_channel(efx, rxq_index);
-	channel->rps_flow_id[rc] = flow_id;
-	++channel->rfs_filters_added;
-
-	if (spec.ether_type == htons(ETH_P_IP))
-		netif_info(efx, rx_status, efx->net_dev,
-			   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
-			   (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-			   spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
-			   ntohs(spec.loc_port), rxq_index, flow_id, rc);
-	else
-		netif_info(efx, rx_status, efx->net_dev,
-			   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
-			   (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-			   spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
-			   ntohs(spec.loc_port), rxq_index, flow_id, rc);
-
-	return rc;
+	dev_hold(req->net_dev = net_dev);
+	INIT_WORK(&req->work, efx_filter_rfs_work);
+	req->rxq_index = rxq_index;
+	req->flow_id = flow_id;
+	schedule_work(&req->work);
+	return 0;
 }

 bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
@@ -899,9 +941,8 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
 	unsigned int channel_idx, index, size;
 	u32 flow_id;

-	if (!spin_trylock_bh(&efx->filter_lock))
+	if (!mutex_trylock(&efx->rps_mutex))
 		return false;
-
 	expire_one = efx->type->filter_rfs_expire_one;
 	channel_idx = efx->rps_expire_channel;
 	index = efx->rps_expire_index;
@@ -926,7 +967,7 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
 	efx->rps_expire_channel = channel_idx;
 	efx->rps_expire_index = index;

-	spin_unlock_bh(&efx->filter_lock);
+	mutex_unlock(&efx->rps_mutex);
 	return true;
 }


--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -1035,7 +1035,6 @@ const struct efx_nic_type siena_a0_nic_type = {
 	.filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
 	.filter_get_rx_ids = efx_farch_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_insert = efx_farch_filter_rfs_insert,
 	.filter_rfs_expire_one = efx_farch_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD