Commit f3334447 authored by Christoph Hellwig's avatar Christoph Hellwig

nvme: take node locality into account when selecting a path

Make current_path an array with an entry for every possible node, and
cache the best path on a per-node basis.  Take the node distance into
account when selecting it.  This is primarily useful for dual-ported PCIe
devices which are connected to PCIe root ports on different sockets.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKeith Busch <keith.busch@intel.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.com>
parent 73383adf
...@@ -2908,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, ...@@ -2908,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
unsigned nsid, struct nvme_id_ns *id) unsigned nsid, struct nvme_id_ns *id)
{ {
struct nvme_ns_head *head; struct nvme_ns_head *head;
size_t size = sizeof(*head);
int ret = -ENOMEM; int ret = -ENOMEM;
head = kzalloc(sizeof(*head), GFP_KERNEL); #ifdef CONFIG_NVME_MULTIPATH
size += num_possible_nodes() * sizeof(struct nvme_ns *);
#endif
head = kzalloc(size, GFP_KERNEL);
if (!head) if (!head)
goto out; goto out;
ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
......
...@@ -117,29 +117,55 @@ static const char *nvme_ana_state_names[] = { ...@@ -117,29 +117,55 @@ static const char *nvme_ana_state_names[] = {
[NVME_ANA_CHANGE] = "change", [NVME_ANA_CHANGE] = "change",
}; };
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{ {
struct nvme_ns *ns, *fallback = NULL; struct nvme_ns_head *head = ns->head;
int node;
if (!head)
return;
for_each_node(node) {
if (ns == rcu_access_pointer(head->current_path[node]))
rcu_assign_pointer(head->current_path[node], NULL);
}
}
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
{
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
struct nvme_ns *found = NULL, *fallback = NULL, *ns;
list_for_each_entry_rcu(ns, &head->list, siblings) { list_for_each_entry_rcu(ns, &head->list, siblings) {
if (ns->ctrl->state != NVME_CTRL_LIVE || if (ns->ctrl->state != NVME_CTRL_LIVE ||
test_bit(NVME_NS_ANA_PENDING, &ns->flags)) test_bit(NVME_NS_ANA_PENDING, &ns->flags))
continue; continue;
distance = node_distance(node, dev_to_node(ns->ctrl->dev));
switch (ns->ana_state) { switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED: case NVME_ANA_OPTIMIZED:
rcu_assign_pointer(head->current_path, ns); if (distance < found_distance) {
return ns; found_distance = distance;
found = ns;
}
break;
case NVME_ANA_NONOPTIMIZED: case NVME_ANA_NONOPTIMIZED:
fallback = ns; if (distance < fallback_distance) {
fallback_distance = distance;
fallback = ns;
}
break; break;
default: default:
break; break;
} }
} }
if (fallback) if (!found)
rcu_assign_pointer(head->current_path, fallback); found = fallback;
return fallback; if (found)
rcu_assign_pointer(head->current_path[node], found);
return found;
} }
static inline bool nvme_path_is_optimized(struct nvme_ns *ns) static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
...@@ -150,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns) ...@@ -150,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
{ {
struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); int node = numa_node_id();
struct nvme_ns *ns;
ns = srcu_dereference(head->current_path[node], &head->srcu);
if (unlikely(!ns || !nvme_path_is_optimized(ns))) if (unlikely(!ns || !nvme_path_is_optimized(ns)))
ns = __nvme_find_path(head); ns = __nvme_find_path(head, node);
return ns; return ns;
} }
...@@ -200,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) ...@@ -200,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
int srcu_idx; int srcu_idx;
srcu_idx = srcu_read_lock(&head->srcu); srcu_idx = srcu_read_lock(&head->srcu);
ns = srcu_dereference(head->current_path, &head->srcu); ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu);
if (likely(ns && nvme_path_is_optimized(ns))) if (likely(ns && nvme_path_is_optimized(ns)))
found = ns->queue->poll_fn(q, qc); found = ns->queue->poll_fn(q, qc);
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
......
...@@ -277,14 +277,6 @@ struct nvme_ns_ids { ...@@ -277,14 +277,6 @@ struct nvme_ns_ids {
* only ever has a single entry for private namespaces. * only ever has a single entry for private namespaces.
*/ */
struct nvme_ns_head { struct nvme_ns_head {
#ifdef CONFIG_NVME_MULTIPATH
struct gendisk *disk;
struct nvme_ns __rcu *current_path;
struct bio_list requeue_list;
spinlock_t requeue_lock;
struct work_struct requeue_work;
struct mutex lock;
#endif
struct list_head list; struct list_head list;
struct srcu_struct srcu; struct srcu_struct srcu;
struct nvme_subsystem *subsys; struct nvme_subsystem *subsys;
...@@ -293,6 +285,14 @@ struct nvme_ns_head { ...@@ -293,6 +285,14 @@ struct nvme_ns_head {
struct list_head entry; struct list_head entry;
struct kref ref; struct kref ref;
int instance; int instance;
#ifdef CONFIG_NVME_MULTIPATH
struct gendisk *disk;
struct bio_list requeue_list;
spinlock_t requeue_lock;
struct work_struct requeue_work;
struct mutex lock;
struct nvme_ns __rcu *current_path[];
#endif
}; };
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
...@@ -474,14 +474,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head); ...@@ -474,14 +474,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl);
void nvme_mpath_clear_current_path(struct nvme_ns *ns);
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
if (head && ns == rcu_access_pointer(head->current_path))
rcu_assign_pointer(head->current_path, NULL);
}
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment