Commit e29c6a13 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.6-2020-02-16' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Not a lot here, which is great, basically just three small bcache
  fixes from Coly, and four NVMe fixes via Keith"

* tag 'block-5.6-2020-02-16' of git://git.kernel.dk/linux-block:
  nvme: fix the parameter order for nvme_get_log in nvme_get_fw_slot_info
  nvme/pci: move cqe check after device shutdown
  nvme: prevent warning triggered by nvme_stop_keep_alive
  nvme/tcp: fix bug on double requeue when send fails
  bcache: remove macro nr_to_fifo_front()
  bcache: Revert "bcache: shrink btree node cache after bch_btree_check()"
  bcache: ignore pending signals when creating gc and allocator thread
parents 713db356 f25372ff
...@@ -67,6 +67,7 @@ ...@@ -67,6 +67,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/sched/signal.h>
#include <trace/events/bcache.h> #include <trace/events/bcache.h>
#define MAX_OPEN_BUCKETS 128 #define MAX_OPEN_BUCKETS 128
...@@ -733,8 +734,21 @@ int bch_open_buckets_alloc(struct cache_set *c) ...@@ -733,8 +734,21 @@ int bch_open_buckets_alloc(struct cache_set *c)
int bch_cache_allocator_start(struct cache *ca) int bch_cache_allocator_start(struct cache *ca)
{ {
struct task_struct *k = kthread_run(bch_allocator_thread, struct task_struct *k;
ca, "bcache_allocator");
/*
* In case previous btree check operation occupies too many
* system memory for bcache btree node cache, and the
* registering process is selected by OOM killer. Here just
* ignore the SIGKILL sent by OOM killer if there is, to
* avoid kthread_run() being failed by pending signals. The
* bcache registering process will exit after the registration
* done.
*/
if (signal_pending(current))
flush_signals(current);
k = kthread_run(bch_allocator_thread, ca, "bcache_allocator");
if (IS_ERR(k)) if (IS_ERR(k))
return PTR_ERR(k); return PTR_ERR(k);
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <linux/random.h> #include <linux/random.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/sched/clock.h> #include <linux/sched/clock.h>
#include <linux/sched/signal.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <trace/events/bcache.h> #include <trace/events/bcache.h>
...@@ -1913,6 +1914,18 @@ static int bch_gc_thread(void *arg) ...@@ -1913,6 +1914,18 @@ static int bch_gc_thread(void *arg)
int bch_gc_thread_start(struct cache_set *c) int bch_gc_thread_start(struct cache_set *c)
{ {
/*
* In case previous btree check operation occupies too many
* system memory for bcache btree node cache, and the
* registering process is selected by OOM killer. Here just
* ignore the SIGKILL sent by OOM killer if there is, to
* avoid kthread_run() being failed by pending signals. The
* bcache registering process will exit after the registration
* done.
*/
if (signal_pending(current))
flush_signals(current);
c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc"); c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc");
return PTR_ERR_OR_ZERO(c->gc_thread); return PTR_ERR_OR_ZERO(c->gc_thread);
} }
......
...@@ -417,8 +417,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) ...@@ -417,8 +417,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
/* Journalling */ /* Journalling */
#define nr_to_fifo_front(p, front_p, mask) (((p) - (front_p)) & (mask))
static void btree_flush_write(struct cache_set *c) static void btree_flush_write(struct cache_set *c)
{ {
struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR]; struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
...@@ -510,8 +508,7 @@ static void btree_flush_write(struct cache_set *c) ...@@ -510,8 +508,7 @@ static void btree_flush_write(struct cache_set *c)
* journal entry can be reclaimed). These selected nodes * journal entry can be reclaimed). These selected nodes
* will be ignored and skipped in the folowing for-loop. * will be ignored and skipped in the folowing for-loop.
*/ */
if (nr_to_fifo_front(btree_current_write(b)->journal, if (((btree_current_write(b)->journal - fifo_front_p) &
fifo_front_p,
mask) != 0) { mask) != 0) {
mutex_unlock(&b->write_lock); mutex_unlock(&b->write_lock);
continue; continue;
......
...@@ -1917,23 +1917,6 @@ static int run_cache_set(struct cache_set *c) ...@@ -1917,23 +1917,6 @@ static int run_cache_set(struct cache_set *c)
if (bch_btree_check(c)) if (bch_btree_check(c))
goto err; goto err;
/*
* bch_btree_check() may occupy too much system memory which
* has negative effects to user space application (e.g. data
* base) performance. Shrink the mca cache memory proactively
* here to avoid competing memory with user space workloads..
*/
if (!c->shrinker_disabled) {
struct shrink_control sc;
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = c->btree_cache_used * c->btree_pages;
/* first run to clear b->accessed tag */
c->shrink.scan_objects(&c->shrink, &sc);
/* second run to reap non-accessed nodes */
c->shrink.scan_objects(&c->shrink, &sc);
}
bch_journal_mark(c, &journal); bch_journal_mark(c, &journal);
bch_initial_gc_finish(c); bch_initial_gc_finish(c);
pr_debug("btree_check() done"); pr_debug("btree_check() done");
......
...@@ -66,8 +66,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); ...@@ -66,8 +66,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
* nvme_reset_wq - hosts nvme reset works * nvme_reset_wq - hosts nvme reset works
* nvme_delete_wq - hosts nvme delete works * nvme_delete_wq - hosts nvme delete works
* *
* nvme_wq will host works such are scan, aen handling, fw activation, * nvme_wq will host works such as scan, aen handling, fw activation,
* keep-alive error recovery, periodic reconnects etc. nvme_reset_wq * keep-alive, periodic reconnects etc. nvme_reset_wq
* runs reset works which also flush works hosted on nvme_wq for * runs reset works which also flush works hosted on nvme_wq for
* serialization purposes. nvme_delete_wq host controller deletion * serialization purposes. nvme_delete_wq host controller deletion
* works which flush reset works for serialization. * works which flush reset works for serialization.
...@@ -976,7 +976,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) ...@@ -976,7 +976,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
startka = true; startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags); spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka) if (startka)
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
} }
static int nvme_keep_alive(struct nvme_ctrl *ctrl) static int nvme_keep_alive(struct nvme_ctrl *ctrl)
...@@ -1006,7 +1006,7 @@ static void nvme_keep_alive_work(struct work_struct *work) ...@@ -1006,7 +1006,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
dev_dbg(ctrl->device, dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n"); "reschedule traffic based keep-alive timer\n");
ctrl->comp_seen = false; ctrl->comp_seen = false;
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
return; return;
} }
...@@ -1023,7 +1023,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) ...@@ -1023,7 +1023,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
if (unlikely(ctrl->kato == 0)) if (unlikely(ctrl->kato == 0))
return; return;
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
} }
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
...@@ -3867,7 +3867,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) ...@@ -3867,7 +3867,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log) if (!log)
return; return;
if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log, if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log,
sizeof(*log), 0)) sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log); kfree(log);
......
...@@ -1401,6 +1401,23 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) ...@@ -1401,6 +1401,23 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
nvme_poll_irqdisable(nvmeq, -1); nvme_poll_irqdisable(nvmeq, -1);
} }
/*
* Called only on a device that has been disabled and after all other threads
* that can check this device's completion queues have synced. This is the
* last chance for the driver to see a natural completion before
* nvme_cancel_request() terminates all incomplete requests.
*/
static void nvme_reap_pending_cqes(struct nvme_dev *dev)
{
u16 start, end;
int i;
for (i = dev->ctrl.queue_count - 1; i > 0; i--) {
nvme_process_cq(&dev->queues[i], &start, &end, -1);
nvme_complete_cqes(&dev->queues[i], start, end);
}
}
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
int entry_size) int entry_size)
{ {
...@@ -2235,11 +2252,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) ...@@ -2235,11 +2252,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
if (timeout == 0) if (timeout == 0)
return false; return false;
/* handle any remaining CQEs */
if (opcode == nvme_admin_delete_cq &&
!test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags))
nvme_poll_irqdisable(nvmeq, -1);
sent--; sent--;
if (nr_queues) if (nr_queues)
goto retry; goto retry;
...@@ -2428,6 +2440,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) ...@@ -2428,6 +2440,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_suspend_io_queues(dev); nvme_suspend_io_queues(dev);
nvme_suspend_queue(&dev->queues[0]); nvme_suspend_queue(&dev->queues[0]);
nvme_pci_disable(dev); nvme_pci_disable(dev);
nvme_reap_pending_cqes(dev);
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
......
...@@ -1088,7 +1088,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) ...@@ -1088,7 +1088,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
return; return;
queue_work(nvme_wq, &ctrl->err_work); queue_work(nvme_reset_wq, &ctrl->err_work);
} }
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
......
...@@ -422,7 +422,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) ...@@ -422,7 +422,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return; return;
queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work); queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
} }
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
...@@ -1054,7 +1054,12 @@ static void nvme_tcp_io_work(struct work_struct *w) ...@@ -1054,7 +1054,12 @@ static void nvme_tcp_io_work(struct work_struct *w)
} else if (unlikely(result < 0)) { } else if (unlikely(result < 0)) {
dev_err(queue->ctrl->ctrl.device, dev_err(queue->ctrl->ctrl.device,
"failed to send request %d\n", result); "failed to send request %d\n", result);
if (result != -EPIPE)
/*
* Fail the request unless peer closed the connection,
* in which case error recovery flow will complete all.
*/
if ((result != -EPIPE) && (result != -ECONNRESET))
nvme_tcp_fail_request(queue->request); nvme_tcp_fail_request(queue->request);
nvme_tcp_done_send_req(queue); nvme_tcp_done_send_req(queue);
return; return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment