Commit 80081ec3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.16/drivers' of git://git.kernel.dk/linux-block into next

Pull block driver changes from Jens Axboe:
 "Now that the core bits are in, here's the pull request for the driver
  related changes for 3.16.  Nothing out of the ordinary here, mostly
  business as usual.  There are a few pulls of for-3.16/core into this
  branch, which were done when the blk-mq was modified after the
  mtip32xx conversion was put in.

  The pull request contains:

   - skd and cciss converted to use pci_enable_msix_exact().  From
     Alexander Gordeev.

   - A few mtip32xx fixes from Asai @ Micron.

   - The conversion of mtip32xx from make_request_fn to blk-mq, and a
     later small fix for that conversion on quiescing for non-queued IO.
     From me.

   - A fix for bsg to use an exported function to check whether this
     driver is request based or not.  Needed updating for blk-mq, which
     is request based, but does not have a request_fn hook.  From me.

   - Small floppy bug fix from Jiri.

   - A series of cleanups for the cdrom uniform layer from Joe Perches.
     Gets rid of various old ugly macros, making the code conform more
     to the modern coding style.

   - A series of patches for drbd from the drbd crew (Lars Ellenberg and
     Philipp Reisner).

   - A use-after-free fix for null_blk from Ming Lei.

   - Also from Ming Lei is a performance patch for virtio-blk, which can
     net us a 3x win on kvm platforms where world notification is
     expensive.

   - Ming Lei also fixed a stall issue in virtio-blk, due to a race
     between queue start/stop and resource limits.

   - A small batch of fixes for xen-blk{back,front} from Olaf Hering and
     Valentin Priescu"

* 'for-3.16/drivers' of git://git.kernel.dk/linux-block: (54 commits)
  block: virtio_blk: don't hold spin lock during world switch
  xen-blkback: defer freeing blkif to avoid blocking xenwatch
  xen blkif.h: fix comment typo in discard-alignment
  xen/blkback: disable discard feature if requested by toolstack
  xen-blkfront: remove type check from blkfront_setup_discard
  floppy: do not corrupt bio.bi_flags when reading block 0
  mtip32xx: move error handling to service thread
  virtio_blk: fix race between start and stop queue
  mtip32xx: stop block hardware queues before quiescing IO
  mtip32xx: blk_mq_init_queue() returns an ERR_PTR
  mtip32xx: convert to use blk-mq
  cdrom: Remove unnecessary prototype for cdrom_get_disc_info
  cdrom: Remove unnecessary prototype for cdrom_mrw_exit
  cdrom: Remove cdrom_count_tracks prototype
  cdrom: Remove cdrom_get_next_writeable prototype
  cdrom: Remove cdrom_get_last_written prototype
  cdrom: Move mmc_ioctls above cdrom_ioctl to remove unnecessary prototype
  cdrom: Remove unnecessary sanitize_format prototype
  cdrom: Remove unnecessary check_for_audio_disc prototype
  cdrom: Remove prototype for open_for_data
  ...
parents 42555320 e8edca6f
......@@ -1008,7 +1008,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
/*
* we need a proper transport to send commands, not a stacked device
*/
if (!q->request_fn)
if (!queue_is_rq_based(q))
return 0;
bcd = &q->bsg_dev;
......
......@@ -4080,7 +4080,7 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
goto default_int_mode;
if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
err = pci_enable_msix(h->pdev, cciss_msix_entries, 4);
err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4);
if (!err) {
h->intr[0] = cciss_msix_entries[0].vector;
h->intr[1] = cciss_msix_entries[1].vector;
......@@ -4088,10 +4088,6 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
h->intr[3] = cciss_msix_entries[3].vector;
h->msix_vector = 1;
return;
}
if (err > 0) {
dev_warn(&h->pdev->dev,
"only %d MSI-X vectors available\n", err);
} else {
dev_warn(&h->pdev->dev,
"MSI-X init failed %d\n", err);
......
......@@ -29,7 +29,6 @@
#include <linux/drbd_limits.h>
#include <linux/dynamic_debug.h>
#include "drbd_int.h"
#include "drbd_wrappers.h"
enum al_transaction_types {
......@@ -204,7 +203,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
BUG_ON(!bdev->md_bdev);
drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
(void*)_RET_IP_ );
......@@ -276,7 +275,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval
return _al_get(device, first, true);
}
static
bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
{
/* for bios crossing activity log extent boundaries,
......@@ -846,7 +844,7 @@ void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
int wake_up = 0;
unsigned long flags;
if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
(unsigned long long)sector, size);
return;
......@@ -920,7 +918,7 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
if (size == 0)
return 0;
if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "sector: %llus, size: %d\n",
(unsigned long long)sector, size);
return 0;
......@@ -1023,8 +1021,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
unsigned int enr = BM_SECT_TO_EXT(sector);
struct bm_extent *bm_ext;
int i, sig;
int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
200 times -> 20 seconds. */
bool sa;
retry:
sig = wait_event_interruptible(device->al_wait,
......@@ -1035,12 +1032,15 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
if (test_bit(BME_LOCKED, &bm_ext->flags))
return 0;
/* step aside only while we are above c-min-rate; unless disabled. */
sa = drbd_rs_c_min_rate_throttle(device);
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(device->al_wait,
!_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
test_bit(BME_PRIORITY, &bm_ext->flags));
(sa && test_bit(BME_PRIORITY, &bm_ext->flags)));
if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) {
spin_lock_irq(&device->al_lock);
if (lc_put(device->resync, &bm_ext->lce) == 0) {
bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
......@@ -1052,9 +1052,6 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
return -EINTR;
if (schedule_timeout_interruptible(HZ/10))
return -EINTR;
if (sa && --sa == 0)
drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec."
"Resync stalled?\n");
goto retry;
}
}
......@@ -1288,7 +1285,7 @@ void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
sector_t esector, nr_sectors;
int wake_up = 0;
if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
(unsigned long long)sector, size);
return;
......
......@@ -382,6 +382,12 @@ enum {
__EE_CALL_AL_COMPLETE_IO,
__EE_MAY_SET_IN_SYNC,
/* is this a TRIM aka REQ_DISCARD? */
__EE_IS_TRIM,
/* our lower level cannot handle trim,
* and we want to fall back to zeroout instead */
__EE_IS_TRIM_USE_ZEROOUT,
/* In case a barrier failed,
* we need to resubmit without the barrier flag. */
__EE_RESUBMITTED,
......@@ -405,7 +411,9 @@ enum {
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_IS_TRIM (1<<__EE_IS_TRIM)
#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
......@@ -579,6 +587,7 @@ struct drbd_resource {
struct list_head resources;
struct res_opts res_opts;
struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */
struct mutex adm_mutex; /* mutex to serialize administrative requests */
spinlock_t req_lock;
unsigned susp:1; /* IO suspended by user */
......@@ -609,6 +618,7 @@ struct drbd_connection {
struct drbd_socket data; /* data/barrier/cstate/parameter packets */
struct drbd_socket meta; /* ping/ack (metadata) packets */
int agreed_pro_version; /* actually used protocol version */
u32 agreed_features;
unsigned long last_received; /* in jiffies, either socket */
unsigned int ko_count;
......@@ -814,6 +824,28 @@ struct drbd_device {
struct submit_worker submit;
};
struct drbd_config_context {
/* assigned from drbd_genlmsghdr */
unsigned int minor;
/* assigned from request attributes, if present */
unsigned int volume;
#define VOLUME_UNSPECIFIED (-1U)
/* pointer into the request skb,
* limited lifetime! */
char *resource_name;
struct nlattr *my_addr;
struct nlattr *peer_addr;
/* reply buffer */
struct sk_buff *reply_skb;
/* pointer into reply buffer */
struct drbd_genlmsghdr *reply_dh;
/* resolved from attributes, if possible */
struct drbd_device *device;
struct drbd_resource *resource;
struct drbd_connection *connection;
};
static inline struct drbd_device *minor_to_device(unsigned int minor)
{
return (struct drbd_device *)idr_find(&drbd_devices, minor);
......@@ -821,7 +853,7 @@ static inline struct drbd_device *minor_to_device(unsigned int minor)
static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device)
{
return list_first_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
}
#define for_each_resource(resource, _resources) \
......@@ -1139,6 +1171,12 @@ struct bm_extent {
#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
#define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
/* For now, don't allow more than one activity log extent worth of data
* to be discarded in one go. We may need to rework drbd_al_begin_io()
* to allow for even larger discard ranges */
#define DRBD_MAX_DISCARD_SIZE AL_EXTENT_SIZE
#define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9)
extern int drbd_bm_init(struct drbd_device *device);
extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
extern void drbd_bm_cleanup(struct drbd_device *device);
......@@ -1229,9 +1267,9 @@ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
extern rwlock_t global_state_lock;
extern int conn_lowest_minor(struct drbd_connection *connection);
enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr);
extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
extern void drbd_destroy_device(struct kref *kref);
extern void drbd_delete_device(struct drbd_device *mdev);
extern void drbd_delete_device(struct drbd_device *device);
extern struct drbd_resource *drbd_create_resource(const char *name);
extern void drbd_free_resource(struct drbd_resource *resource);
......@@ -1257,7 +1295,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
/* drbd_nl.c */
extern int drbd_msg_put_info(const char *info);
extern int drbd_msg_put_info(struct sk_buff *skb, const char *info);
extern void drbd_suspend_io(struct drbd_device *device);
extern void drbd_resume_io(struct drbd_device *device);
extern char *ppsize(char *buf, unsigned long long size);
......@@ -1283,6 +1321,10 @@ extern void conn_try_outdate_peer_async(struct drbd_connection *connection);
extern int drbd_khelper(struct drbd_device *device, char *cmd);
/* drbd_worker.c */
/* bi_end_io handlers */
extern void drbd_md_io_complete(struct bio *bio, int error);
extern void drbd_peer_request_endio(struct bio *bio, int error);
extern void drbd_request_endio(struct bio *bio, int error);
extern int drbd_worker(struct drbd_thread *thi);
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
void drbd_resync_after_changed(struct drbd_device *device);
......@@ -1332,16 +1374,20 @@ extern int w_start_resync(struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data);
extern void start_resync_timer_fn(unsigned long data);
extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
/* drbd_receiver.c */
extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_asender(struct drbd_thread *thi);
extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
extern int drbd_submit_peer_request(struct drbd_device *,
struct drbd_peer_request *, const unsigned,
const int);
extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
sector_t, unsigned int,
bool,
gfp_t) __must_hold(local);
extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
int);
......@@ -1401,6 +1447,37 @@ static inline void drbd_tcp_quickack(struct socket *sock)
(char*)&val, sizeof(val));
}
/* sets the number of 512 byte sectors of our virtual device */
static inline void drbd_set_my_capacity(struct drbd_device *device,
sector_t size)
{
/* set_capacity(device->this_bdev->bd_disk, size); */
set_capacity(device->vdisk, size);
device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
}
/*
* used to submit our private bio
*/
static inline void drbd_generic_make_request(struct drbd_device *device,
int fault_type, struct bio *bio)
{
__release(local);
if (!bio->bi_bdev) {
printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
"bio->bi_bdev == NULL\n",
device_to_minor(device));
dump_stack();
bio_endio(bio, -ENODEV);
return;
}
if (drbd_insert_fault(device, fault_type))
bio_endio(bio, -EIO);
else
generic_make_request(bio);
}
void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
/* drbd_proc.c */
......@@ -1410,6 +1487,7 @@ extern const char *drbd_conn_str(enum drbd_conns s);
extern const char *drbd_role_str(enum drbd_role s);
/* drbd_actlog.c */
extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
......@@ -2144,7 +2222,7 @@ static inline void drbd_md_flush(struct drbd_device *device)
static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
{
return list_first_entry(&resource->connections,
return list_first_entry_or_null(&resource->connections,
struct drbd_connection, connections);
}
......
......@@ -1607,8 +1607,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long b
return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
}
/* Used to send write requests
* R_PRIMARY -> Peer (P_DATA)
/* Used to send write or TRIM aka REQ_DISCARD requests
* R_PRIMARY -> Peer (P_DATA, P_TRIM)
*/
int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req)
{
......@@ -1640,6 +1640,16 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
dp_flags |= DP_SEND_WRITE_ACK;
}
p->dp_flags = cpu_to_be32(dp_flags);
if (dp_flags & DP_DISCARD) {
struct p_trim *t = (struct p_trim*)p;
t->size = cpu_to_be32(req->i.size);
err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
goto out;
}
/* our digest is still only over the payload.
* TRIM does not carry any payload. */
if (dgs)
drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
......@@ -1675,6 +1685,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
... Be noisy about digest too large ...
} */
}
out:
mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
return err;
......@@ -2570,6 +2581,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
INIT_LIST_HEAD(&resource->connections);
list_add_tail_rcu(&resource->resources, &drbd_resources);
mutex_init(&resource->conf_update);
mutex_init(&resource->adm_mutex);
spin_lock_init(&resource->req_lock);
return resource;
......@@ -2687,14 +2699,16 @@ static int init_submitter(struct drbd_device *device)
return 0;
}
enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr)
enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
{
struct drbd_resource *resource = adm_ctx->resource;
struct drbd_connection *connection;
struct drbd_device *device;
struct drbd_peer_device *peer_device, *tmp_peer_device;
struct gendisk *disk;
struct request_queue *q;
int id;
int vnr = adm_ctx->volume;
enum drbd_ret_code err = ERR_NOMEM;
device = minor_to_device(minor);
......@@ -2763,7 +2777,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) {
if (id == -ENOSPC) {
err = ERR_MINOR_EXISTS;
drbd_msg_put_info("requested minor exists already");
drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
}
goto out_no_minor_idr;
}
......@@ -2773,7 +2787,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) {
if (id == -ENOSPC) {
err = ERR_MINOR_EXISTS;
drbd_msg_put_info("requested minor exists already");
drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
}
goto out_idr_remove_minor;
}
......@@ -2794,7 +2808,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) {
if (id == -ENOSPC) {
err = ERR_INVALID_REQUEST;
drbd_msg_put_info("requested volume exists already");
drbd_msg_put_info(adm_ctx->reply_skb, "requested volume exists already");
}
goto out_idr_remove_from_resource;
}
......@@ -2803,7 +2817,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (init_submitter(device)) {
err = ERR_NOMEM;
drbd_msg_put_info("unable to create submit workqueue");
drbd_msg_put_info(adm_ctx->reply_skb, "unable to create submit workqueue");
goto out_idr_remove_vol;
}
......
This diff is collapsed.
#include "drbd_wrappers.h"
#include <linux/kernel.h>
#include <net/netlink.h>
#include <linux/drbd_genl_api.h>
......
......@@ -116,7 +116,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
/* ------------------------ ~18s average ------------------------ */
i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
dt = (jiffies - device->rs_mark_time[i]) / HZ;
if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
if (dt > 180)
stalled = 1;
if (!dt)
......
......@@ -54,6 +54,11 @@ enum drbd_packet {
P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */
P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */
P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */
/* 0x2e to 0x30 reserved, used in drbd 9 */
/* REQ_DISCARD. We used "discard" in different contexts before,
* which is why I chose TRIM here, to disambiguate. */
P_TRIM = 0x31,
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
P_MAX_OPT_CMD = 0x101,
......@@ -119,6 +124,11 @@ struct p_data {
u32 dp_flags;
} __packed;
struct p_trim {
struct p_data p_data;
u32 size; /* == bio->bi_size */
} __packed;
/*
* commands which share a struct:
* p_block_ack:
......@@ -150,6 +160,8 @@ struct p_block_req {
* ReportParams
*/
#define FF_TRIM 1
struct p_connection_features {
u32 protocol_min;
u32 feature_flags;
......
This diff is collapsed.
......@@ -522,6 +522,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
break;
case DISCARD_COMPLETED_NOTSUPP:
case DISCARD_COMPLETED_WITH_ERROR:
/* I'd rather not detach from local disk just because it
* failed a REQ_DISCARD. */
mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
break;
case QUEUE_FOR_NET_READ:
/* READ or READA, and
* no local disk,
......@@ -1235,6 +1242,7 @@ void do_submit(struct work_struct *ws)
if (list_empty(&incoming))
break;
skip_fast_path:
wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
/* Maybe more was queued, while we prepared the transaction?
* Try to stuff them into this transaction as well.
......@@ -1273,6 +1281,25 @@ void do_submit(struct work_struct *ws)
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
/* If all currently hot activity log extents are kept busy by
* incoming requests, we still must not totally starve new
* requests to cold extents. In that case, prepare one request
* in blocking mode. */
list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
list_del_init(&req->tl_requests);
req->rq_state |= RQ_IN_ACT_LOG;
if (!drbd_al_begin_io_prepare(device, &req->i)) {
/* Corresponding extent was hot after all? */
drbd_send_and_submit(device, req);
} else {
/* Found a request to a cold extent.
* Put on "pending" list,
* and try to cumulate with more. */
list_add(&req->tl_requests, &pending);
goto skip_fast_path;
}
}
}
}
......@@ -1326,23 +1353,35 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
return limit;
}
static struct drbd_request *find_oldest_request(struct drbd_connection *connection)
static void find_oldest_requests(
struct drbd_connection *connection,
struct drbd_device *device,
struct drbd_request **oldest_req_waiting_for_peer,
struct drbd_request **oldest_req_waiting_for_disk)
{
/* Walk the transfer log,
* and find the oldest not yet completed request */
struct drbd_request *r;
*oldest_req_waiting_for_peer = NULL;
*oldest_req_waiting_for_disk = NULL;
list_for_each_entry(r, &connection->transfer_log, tl_requests) {
if (atomic_read(&r->completion_ref))
return r;
const unsigned s = r->rq_state;
if (!*oldest_req_waiting_for_peer
&& ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
*oldest_req_waiting_for_peer = r;
if (!*oldest_req_waiting_for_disk
&& (s & RQ_LOCAL_PENDING) && r->device == device)
*oldest_req_waiting_for_disk = r;
if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
break;
}
return NULL;
}
void request_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
struct drbd_connection *connection = first_peer_device(device)->connection;
struct drbd_request *req; /* oldest request */
struct drbd_request *req_disk, *req_peer; /* oldest request */
struct net_conf *nc;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now;
......@@ -1366,8 +1405,8 @@ void request_timer_fn(unsigned long data)
now = jiffies;
spin_lock_irq(&device->resource->req_lock);
req = find_oldest_request(connection);
if (!req) {
find_oldest_requests(connection, device, &req_peer, &req_disk);
if (req_peer == NULL && req_disk == NULL) {
spin_unlock_irq(&device->resource->req_lock);
mod_timer(&device->request_timer, now + et);
return;
......@@ -1389,19 +1428,26 @@ void request_timer_fn(unsigned long data)
* ~198 days with 250 HZ, we have a window where the timeout would need
* to expire twice (worst case) to become effective. Good enough.
*/
if (ent && req->rq_state & RQ_NET_PENDING &&
time_after(now, req->start_time + ent) &&
if (ent && req_peer &&
time_after(now, req_peer->start_time + ent) &&
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
}
if (dt && req->rq_state & RQ_LOCAL_PENDING && req->device == device &&
time_after(now, req->start_time + dt) &&
if (dt && req_disk &&
time_after(now, req_disk->start_time + dt) &&
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
}
nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
/* Reschedule timer for the nearest not already expired timeout.
* Fallback to now + min(effective network timeout, disk timeout). */
ent = (ent && req_peer && time_before(now, req_peer->start_time + ent))
? req_peer->start_time + ent : now + et;
dt = (dt && req_disk && time_before(now, req_disk->start_time + dt))
? req_disk->start_time + dt : now + et;
nt = time_before(ent, dt) ? ent : dt;
spin_unlock_irq(&connection->resource->req_lock);
mod_timer(&device->request_timer, nt);
}
......@@ -30,7 +30,6 @@
#include <linux/slab.h>
#include <linux/drbd.h>
#include "drbd_int.h"
#include "drbd_wrappers.h"
/* The request callbacks will be called in irq context by the IDE drivers,
and in Softirqs/Tasklets/BH context by the SCSI drivers,
......@@ -111,11 +110,14 @@ enum drbd_req_event {
BARRIER_ACKED, /* in protocol A and B */
DATA_RECEIVED, /* (remote read) */
COMPLETED_OK,
READ_COMPLETED_WITH_ERROR,
READ_AHEAD_COMPLETED_WITH_ERROR,
WRITE_COMPLETED_WITH_ERROR,
DISCARD_COMPLETED_NOTSUPP,
DISCARD_COMPLETED_WITH_ERROR,
ABORT_DISK_IO,
COMPLETED_OK,
RESEND,
FAIL_FROZEN_DISK_IO,
RESTART_FROZEN_DISK_IO,
......
......@@ -54,8 +54,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns,
enum sanitize_state_warnings *warn);
static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
union drbd_state ns, enum sanitize_state_warnings *warn);
static inline bool is_susp(union drbd_state s)
{
......@@ -287,7 +287,7 @@ _req_st_cond(struct drbd_device *device, union drbd_state mask,
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
rv = is_valid_transition(os, ns);
if (rv >= SS_SUCCESS)
rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
......@@ -333,7 +333,7 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
rv = is_valid_transition(os, ns);
if (rv < SS_SUCCESS) {
spin_unlock_irqrestore(&device->resource->req_lock, flags);
......@@ -740,8 +740,8 @@ static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_st
* When we loose connection, we have to set the state of the peers disk (pdsk)
* to D_UNKNOWN. This rule and many more along those lines are in this function.
*/
static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns,
enum sanitize_state_warnings *warn)
static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
union drbd_state ns, enum sanitize_state_warnings *warn)
{
enum drbd_fencing_p fp;
enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
......@@ -882,11 +882,13 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st
}
if (fp == FP_STONITH &&
(ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
(ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
!(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
(ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
(ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
......@@ -958,7 +960,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
os = drbd_read_state(device);
ns = sanitize_state(device, ns, &ssw);
ns = sanitize_state(device, os, ns, &ssw);
if (ns.i == os.i)
return SS_NOTHING_TO_DO;
......@@ -1656,7 +1658,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
os = drbd_read_state(device);
ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk;
......@@ -1718,7 +1720,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
number_of_volumes++;
os = drbd_read_state(device);
ns = apply_mask_val(os, mask, val);
ns = sanitize_state(device, ns, NULL);
ns = sanitize_state(device, os, ns, NULL);
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk;
......@@ -1763,19 +1765,19 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
static enum drbd_state_rv
_conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val)
{
enum drbd_state_rv rv;
enum drbd_state_rv err, rv = SS_UNKNOWN_ERROR; /* continue waiting */;
if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags))
return SS_CW_SUCCESS;
rv = SS_CW_SUCCESS;
if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags))
return SS_CW_FAILED_BY_PEER;
rv = SS_CW_FAILED_BY_PEER;
rv = conn_is_valid_transition(connection, mask, val, 0);
if (rv == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
rv = SS_UNKNOWN_ERROR; /* continue waiting */
err = conn_is_valid_transition(connection, mask, val, 0);
if (err == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
return rv;
return rv;
return err;
}
enum drbd_state_rv
......
......@@ -118,7 +118,7 @@ static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __rele
/* writes on behalf of the partner, or resync writes,
* "submitted" by the receiver, final stage. */
static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
{
unsigned long flags = 0;
struct drbd_peer_device *peer_device = peer_req->peer_device;
......@@ -150,7 +150,9 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel
do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
/* FIXME do we want to detach for failed REQ_DISCARD?
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
if (peer_req->flags & EE_WAS_ERROR)
__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
spin_unlock_irqrestore(&device->resource->req_lock, flags);
......@@ -176,10 +178,12 @@ void drbd_peer_request_endio(struct bio *bio, int error)
struct drbd_device *device = peer_req->peer_device->device;
int uptodate = bio_flagged(bio, BIO_UPTODATE);
int is_write = bio_data_dir(bio) == WRITE;
int is_discard = !!(bio->bi_rw & REQ_DISCARD);
if (error && __ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "%s: error=%d s=%llus\n",
is_write ? "write" : "read", error,
is_write ? (is_discard ? "discard" : "write")
: "read", error,
(unsigned long long)peer_req->i.sector);
if (!error && !uptodate) {
if (__ratelimit(&drbd_ratelimit_state))
......@@ -263,7 +267,12 @@ void drbd_request_endio(struct bio *bio, int error)
/* to avoid recursion in __req_mod */
if (unlikely(error)) {
what = (bio_data_dir(bio) == WRITE)
if (bio->bi_rw & REQ_DISCARD)
what = (error == -EOPNOTSUPP)
? DISCARD_COMPLETED_NOTSUPP
: DISCARD_COMPLETED_WITH_ERROR;
else
what = (bio_data_dir(bio) == WRITE)
? WRITE_COMPLETED_WITH_ERROR
: (bio_rw(bio) == READ)
? READ_COMPLETED_WITH_ERROR
......@@ -395,7 +404,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
/* GFP_TRY, because if there is no memory available right now, this may
* be rescheduled for later. It is "only" background resync, after all. */
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
size, GFP_TRY);
size, true /* has real payload */, GFP_TRY);
if (!peer_req)
goto defer;
......@@ -492,10 +501,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size)
return fb;
}
static int drbd_rs_controller(struct drbd_device *device)
static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
{
struct disk_conf *dc;
unsigned int sect_in; /* Number of sectors that came in since the last turn */
unsigned int want; /* The number of sectors we want in the proxy */
int req_sect; /* Number of sectors to request in this turn */
int correction; /* Number of sectors more we need in the proxy*/
......@@ -505,9 +513,6 @@ static int drbd_rs_controller(struct drbd_device *device)
int max_sect;
struct fifo_buffer *plan;
sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
device->rs_in_flight -= sect_in;
dc = rcu_dereference(device->ldev->disk_conf);
plan = rcu_dereference(device->rs_plan_s);
......@@ -550,11 +555,16 @@ static int drbd_rs_controller(struct drbd_device *device)
static int drbd_rs_number_requests(struct drbd_device *device)
{
int number;
unsigned int sect_in; /* Number of sectors that came in since the last turn */
int number, mxb;
sect_in = atomic_xchg(&device->rs_sect_in, 0);
device->rs_in_flight -= sect_in;
rcu_read_lock();
mxb = drbd_get_max_buffers(device) / 2;
if (rcu_dereference(device->rs_plan_s)->size) {
number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
} else {
device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
......@@ -562,8 +572,14 @@ static int drbd_rs_number_requests(struct drbd_device *device)
}
rcu_read_unlock();
/* ignore the amount of pending requests, the resync controller should
* throttle down to incoming reply rate soon enough anyways. */
/* Don't have more than "max-buffers"/2 in-flight.
* Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
* potentially causing a distributed deadlock on congestion during
* online-verify or (checksum-based) resync, if max-buffers,
* socket buffer sizes and resync rate settings are mis-configured. */
if (mxb - device->rs_in_flight < number)
number = mxb - device->rs_in_flight;
return number;
}
......@@ -597,7 +613,7 @@ static int make_resync_request(struct drbd_device *device, int cancel)
max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
number = drbd_rs_number_requests(device);
if (number == 0)
if (number <= 0)
goto requeue;
for (i = 0; i < number; i++) {
......@@ -647,7 +663,7 @@ static int make_resync_request(struct drbd_device *device, int cancel)
*/
align = 1;
rollback_i = i;
for (;;) {
while (i < number) {
if (size + BM_BLOCK_SIZE > max_bio_size)
break;
......@@ -1670,11 +1686,15 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
}
clear_bit(B_RS_H_DONE, &device->flags);
write_lock_irq(&global_state_lock);
/* req_lock: serialize with drbd_send_and_submit() and others
* global_state_lock: for stable sync-after dependencies */
spin_lock_irq(&device->resource->req_lock);
write_lock(&global_state_lock);
/* Did some connection breakage or IO error race with us? */
if (device->state.conn < C_CONNECTED
|| !get_ldev_if_state(device, D_NEGOTIATING)) {
write_unlock_irq(&global_state_lock);
write_unlock(&global_state_lock);
spin_unlock_irq(&device->resource->req_lock);
mutex_unlock(device->state_mutex);
return;
}
......@@ -1714,7 +1734,8 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
}
_drbd_pause_after(device);
}
write_unlock_irq(&global_state_lock);
write_unlock(&global_state_lock);
spin_unlock_irq(&device->resource->req_lock);
if (r == SS_SUCCESS) {
/* reset rs_last_bcast when a resync or verify is started,
......@@ -1778,34 +1799,6 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
mutex_unlock(device->state_mutex);
}
/* If the resource already closed the current epoch, but we did not
* (because we have not yet seen new requests), we should send the
* corresponding barrier now. Must be checked within the same spinlock
* that is used to check for new requests. */
static bool need_to_send_barrier(struct drbd_connection *connection)
{
if (!connection->send.seen_any_write_yet)
return false;
/* Skip barriers that do not contain any writes.
* This may happen during AHEAD mode. */
if (!connection->send.current_epoch_writes)
return false;
/* ->req_lock is held when requests are queued on
* connection->sender_work, and put into ->transfer_log.
* It is also held when ->current_tle_nr is increased.
* So either there are already new requests queued,
* and corresponding barriers will be send there.
* Or nothing new is queued yet, so the difference will be 1.
*/
if (atomic_read(&connection->current_tle_nr) !=
connection->send.current_epoch_nr + 1)
return false;
return true;
}
static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
{
spin_lock_irq(&queue->q_lock);
......@@ -1864,12 +1857,22 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
spin_unlock_irq(&connection->resource->req_lock);
break;
}
send_barrier = need_to_send_barrier(connection);
/* We found nothing new to do, no to-be-communicated request,
* no other work item. We may still need to close the last
* epoch. Next incoming request epoch will be connection ->
* current transfer log epoch number. If that is different
* from the epoch of the last request we communicated, it is
* safe to send the epoch separating barrier now.
*/
send_barrier =
atomic_read(&connection->current_tle_nr) !=
connection->send.current_epoch_nr;
spin_unlock_irq(&connection->resource->req_lock);
if (send_barrier) {
drbd_send_barrier(connection);
connection->send.current_epoch_nr++;
}
if (send_barrier)
maybe_send_barrier(connection,
connection->send.current_epoch_nr + 1);
schedule();
/* may be woken up for other things but new work, too,
* e.g. if the current epoch got closed.
......
#ifndef _DRBD_WRAPPERS_H
#define _DRBD_WRAPPERS_H
#include <linux/ctype.h>
#include <linux/mm.h>
#include "drbd_int.h"
/* see get_sb_bdev and bd_claim */
extern char *drbd_sec_holder;
/* sets the number of 512 byte sectors of our virtual device */
static inline void drbd_set_my_capacity(struct drbd_device *device,
sector_t size)
{
/* set_capacity(device->this_bdev->bd_disk, size); */
set_capacity(device->vdisk, size);
device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
}
#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
/* bi_end_io handlers */
extern void drbd_md_io_complete(struct bio *bio, int error);
extern void drbd_peer_request_endio(struct bio *bio, int error);
extern void drbd_request_endio(struct bio *bio, int error);
/*
* used to submit our private bio
*/
static inline void drbd_generic_make_request(struct drbd_device *device,
int fault_type, struct bio *bio)
{
__release(local);
if (!bio->bi_bdev) {
printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
"bio->bi_bdev == NULL\n",
device_to_minor(device));
dump_stack();
bio_endio(bio, -ENODEV);
return;
}
if (drbd_insert_fault(device, fault_type))
bio_endio(bio, -EIO);
else
generic_make_request(bio);
}
#ifndef __CHECKER__
# undef __cond_lock
# define __cond_lock(x,c) (c)
#endif
#endif
......@@ -3812,7 +3812,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
bio.bi_iter.bi_size = size;
bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = 0;
bio.bi_flags = (1 << BIO_QUIET);
bio.bi_flags |= (1 << BIO_QUIET);
bio.bi_private = &cbdata;
bio.bi_end_io = floppy_rb0_cb;
......
This diff is collapsed.
......@@ -40,9 +40,11 @@
#define MTIP_MAX_RETRIES 2
/* Various timeout values in ms */
#define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000
#define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000
#define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000
#define MTIP_NCQ_CMD_TIMEOUT_MS 15000
#define MTIP_IOCTL_CMD_TIMEOUT_MS 5000
#define MTIP_INT_CMD_TIMEOUT_MS 5000
#define MTIP_QUIESCE_IO_TIMEOUT_MS (MTIP_NCQ_CMD_TIMEOUT_MS * \
(MTIP_MAX_RETRIES + 1))
/* check for timeouts every 500ms */
#define MTIP_TIMEOUT_CHECK_PERIOD 500
......@@ -331,12 +333,8 @@ struct mtip_cmd {
*/
void (*comp_func)(struct mtip_port *port,
int tag,
void *data,
struct mtip_cmd *cmd,
int status);
/* Additional callback function that may be called by comp_func() */
void (*async_callback)(void *data, int status);
void *async_data; /* Addl. data passed to async_callback() */
int scatter_ents; /* Number of scatter list entries used */
......@@ -347,10 +345,6 @@ struct mtip_cmd {
int retries; /* The number of retries left for this command. */
int direction; /* Data transfer direction */
unsigned long comp_time; /* command completion time, in jiffies */
atomic_t active; /* declares if this command sent to the drive. */
};
/* Structure used to describe a port. */
......@@ -436,12 +430,6 @@ struct mtip_port {
* or error handling is active
*/
unsigned long cmds_to_issue[SLOTBITS_IN_LONGS];
/*
* Array of command slots. Structure includes pointers to the
* command header and command table, and completion function and data
* pointers.
*/
struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS];
/* Used by mtip_service_thread to wait for an event */
wait_queue_head_t svc_wait;
/*
......@@ -452,13 +440,7 @@ struct mtip_port {
/*
* Timer used to complete commands that have been active for too long.
*/
struct timer_list cmd_timer;
unsigned long ic_pause_timer;
/*
* Semaphore used to block threads if there are no
* command slots available.
*/
struct semaphore cmd_slot;
/* Semaphore to control queue depth of unaligned IOs */
struct semaphore cmd_slot_unal;
......@@ -485,6 +467,8 @@ struct driver_data {
struct request_queue *queue; /* Our request queue. */
struct blk_mq_tag_set tags; /* blk_mq tags */
struct mtip_port *port; /* Pointer to the port data structure. */
unsigned product_type; /* magic value declaring the product type */
......
......@@ -203,8 +203,8 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
entry = llist_reverse_order(entry);
do {
cmd = container_of(entry, struct nullb_cmd, ll_list);
end_cmd(cmd);
entry = entry->next;
end_cmd(cmd);
} while (entry);
}
......
......@@ -3944,15 +3944,14 @@ static int skd_acquire_msix(struct skd_device *skdev)
for (i = 0; i < SKD_MAX_MSIX_COUNT; i++)
entries[i].entry = i;
rc = pci_enable_msix_range(pdev, entries,
SKD_MIN_MSIX_COUNT, SKD_MAX_MSIX_COUNT);
if (rc < 0) {
rc = pci_enable_msix_exact(pdev, entries, SKD_MAX_MSIX_COUNT);
if (rc) {
pr_err("(%s): failed to enable MSI-X %d\n",
skd_name(skdev), rc);
goto msix_out;
}
skdev->msix_count = rc;
skdev->msix_count = SKD_MAX_MSIX_COUNT;
skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) *
skdev->msix_count, GFP_KERNEL);
if (!skdev->msix_entries) {
......
......@@ -162,6 +162,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
unsigned int num;
const bool last = (req->cmd_flags & REQ_END) != 0;
int err;
bool notify = false;
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
......@@ -214,10 +215,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
return BLK_MQ_RQ_QUEUE_ERROR;
}
if (last)
virtqueue_kick(vblk->vq);
if (last && virtqueue_kick_prepare(vblk->vq))
notify = true;
spin_unlock_irqrestore(&vblk->vq_lock, flags);
if (notify)
virtqueue_notify(vblk->vq);
return BLK_MQ_RQ_QUEUE_OK;
}
......
......@@ -314,7 +314,7 @@ struct xen_blkif {
unsigned long long st_rd_sect;
unsigned long long st_wr_sect;
wait_queue_head_t waiting_to_free;
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
};
......@@ -361,7 +361,7 @@ struct pending_req {
#define xen_blkif_put(_b) \
do { \
if (atomic_dec_and_test(&(_b)->refcnt)) \
wake_up(&(_b)->waiting_to_free);\
schedule_work(&(_b)->free_work);\
} while (0)
struct phys_req {
......
......@@ -35,12 +35,26 @@ static void connect(struct backend_info *);
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
unsigned int);
static void xen_blkif_free(struct xen_blkif *blkif);
static void xen_vbd_free(struct xen_vbd *vbd);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
{
return be->dev;
}
/*
* The last request could free the device from softirq context and
* xen_blkif_free() can sleep.
*/
static void xen_blkif_deferred_free(struct work_struct *work)
{
struct xen_blkif *blkif;
blkif = container_of(work, struct xen_blkif, free_work);
xen_blkif_free(blkif);
}
static int blkback_name(struct xen_blkif *blkif, char *buf)
{
char *devpath, *devname;
......@@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
init_completion(&blkif->drain_complete);
atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies;
init_waitqueue_head(&blkif->waiting_to_free);
blkif->persistent_gnts.rb_node = NULL;
spin_lock_init(&blkif->free_pages_lock);
INIT_LIST_HEAD(&blkif->free_pages);
......@@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
for (i = 0; i < XEN_BLKIF_REQS; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL);
......@@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
return 0;
}
static void xen_blkif_disconnect(struct xen_blkif *blkif)
static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
if (blkif->xenblkd) {
kthread_stop(blkif->xenblkd);
......@@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
blkif->xenblkd = NULL;
}
atomic_dec(&blkif->refcnt);
wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
atomic_inc(&blkif->refcnt);
/* The above kthread_stop() guarantees that at this point we
* don't have any discard_io or other_io requests. So, checking
* for inflight IO is enough.
*/
if (atomic_read(&blkif->inflight) > 0)
return -EBUSY;
if (blkif->irq) {
unbind_from_irqhandler(blkif->irq, blkif);
......@@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
blkif->blk_rings.common.sring = NULL;
}
return 0;
}
static void xen_blkif_free(struct xen_blkif *blkif)
......@@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif)
struct pending_req *req, *n;
int i = 0, j;
if (!atomic_dec_and_test(&blkif->refcnt))
BUG();
xen_blkif_disconnect(blkif);
xen_vbd_free(&blkif->vbd);
/* Remove all persistent grants and the cache of ballooned pages. */
xen_blkbk_free_caches(blkif);
......@@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
be->backend_watch.node = NULL;
}
dev_set_drvdata(&dev->dev, NULL);
if (be->blkif) {
xen_blkif_disconnect(be->blkif);
xen_vbd_free(&be->blkif->vbd);
xen_blkif_free(be->blkif);
be->blkif = NULL;
xen_blkif_put(be->blkif);
}
kfree(be->mode);
kfree(be);
dev_set_drvdata(&dev->dev, NULL);
return 0;
}
......@@ -481,10 +499,15 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
struct xenbus_device *dev = be->dev;
struct xen_blkif *blkif = be->blkif;
int err;
int state = 0;
int state = 0, discard_enable;
struct block_device *bdev = be->blkif->vbd.bdev;
struct request_queue *q = bdev_get_queue(bdev);
err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d",
&discard_enable);
if (err == 1 && !discard_enable)
return;
if (blk_queue_discard(q)) {
err = xenbus_printf(xbt, dev->nodename,
"discard-granularity", "%u",
......@@ -700,7 +723,11 @@ static void frontend_changed(struct xenbus_device *dev,
* Enforce precondition before potential leak point.
* xen_blkif_disconnect() is idempotent.
*/
xen_blkif_disconnect(be->blkif);
err = xen_blkif_disconnect(be->blkif);
if (err) {
xenbus_dev_fatal(dev, err, "pending I/O");
break;
}
err = connect_ring(be);
if (err)
......
......@@ -1635,36 +1635,24 @@ blkfront_closing(struct blkfront_info *info)
static void blkfront_setup_discard(struct blkfront_info *info)
{
int err;
char *type;
unsigned int discard_granularity;
unsigned int discard_alignment;
unsigned int discard_secure;
type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
if (IS_ERR(type))
return;
info->feature_secdiscard = 0;
if (strncmp(type, "phy", 3) == 0) {
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-granularity", "%u", &discard_granularity,
"discard-alignment", "%u", &discard_alignment,
NULL);
if (!err) {
info->feature_discard = 1;
info->discard_granularity = discard_granularity;
info->discard_alignment = discard_alignment;
}
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-secure", "%d", &discard_secure,
NULL);
if (!err)
info->feature_secdiscard = discard_secure;
} else if (strncmp(type, "file", 4) == 0)
info->feature_discard = 1;
kfree(type);
info->feature_discard = 1;
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-granularity", "%u", &discard_granularity,
"discard-alignment", "%u", &discard_alignment,
NULL);
if (!err) {
info->discard_granularity = discard_granularity;
info->discard_alignment = discard_alignment;
}
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-secure", "%d", &discard_secure,
NULL);
if (!err)
info->feature_secdiscard = !!discard_secure;
}
static int blkfront_setup_indirect(struct blkfront_info *info)
......
This diff is collapsed.
......@@ -620,6 +620,15 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0)
/*
* Driver can handle struct request, if it either has an old style
* request_fn defined, or is blk-mq based.
*/
static inline bool queue_is_rq_based(struct request_queue *q)
{
return q->request_fn || q->mq_ops;
}
static inline unsigned int blk_queue_cluster(struct request_queue *q)
{
return q->limits.cluster;
......
......@@ -86,7 +86,7 @@ typedef uint64_t blkif_sector_t;
* Interface%20manuals/100293068c.pdf
* The backend can optionally provide three extra XenBus attributes to
* further optimize the discard functionality:
* 'discard-aligment' - Devices that support discard functionality may
* 'discard-alignment' - Devices that support discard functionality may
* internally allocate space in units that are bigger than the exported
* logical block size. The discard-alignment parameter indicates how many bytes
* the beginning of the partition is offset from the internal allocation unit's
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment