Commit 64120354 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.5/drivers' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "This is the block driver pull request for 4.5, with the exception of
  NVMe, which is in a separate branch and will be posted after this one.

  This pull request contains:

   - A set of bcache stability fixes, which have been acked by Kent.
     These have been used and tested for more than a year by the
     community, so it's about time that they got in.

   - A set of drbd updates from the drbd team (Andreas, Lars, Philipp)
     and Markus Elfring, Oleg Drokin.

   - A set of fixes for xen blkback/front from the usual suspects, (Bob,
     Konrad) as well as community based fixes from Kiri, Julien, and
     Peng.

   - A 2038 time fix for sx8 from Shraddha, with a fix from me.

   - A small mtip32xx cleanup from Zhu Yanjun.

   - A null_blk division fix from Arnd"

* 'for-4.5/drivers' of git://git.kernel.dk/linux-block: (71 commits)
  null_blk: use sector_div instead of do_div
  mtip32xx: restrict variables visible in current code module
  xen/blkfront: Fix crash if backend doesn't follow the right states.
  xen/blkback: Fix two memory leaks.
  xen/blkback: make st_ statistics per ring
  xen/blkfront: Handle non-indirect grant with 64KB pages
  xen-blkfront: Introduce blkif_ring_get_request
  xen-blkback: clear PF_NOFREEZE for xen_blkif_schedule()
  xen/blkback: Free resources if connect_ring failed.
  xen/blocks: Return -EXX instead of -1
  xen/blkback: make pool of persistent grants and free pages per-queue
  xen/blkback: get the number of hardware queues/rings from blkfront
  xen/blkback: pseudo support for multi hardware queues/rings
  xen/blkback: separate ring information out of struct xen_blkif
  xen/blkfront: correct setting for xen_blkif_max_ring_order
  xen/blkfront: make persistent grants pool per-queue
  xen/blkfront: Remove duplicate setting of ->xbdev.
  xen/blkfront: Cleanup of comments, fix unaligned variables, and syntax errors.
  xen/blkfront: negotiate number of queues/rings to be used with backend
  xen/blkfront: split per device io_lock
  ...
parents 404a4741 e93d12ae
...@@ -3665,13 +3665,12 @@ F: drivers/scsi/dpt* ...@@ -3665,13 +3665,12 @@ F: drivers/scsi/dpt*
F: drivers/scsi/dpt/ F: drivers/scsi/dpt/
DRBD DRIVER DRBD DRIVER
P: Philipp Reisner M: Philipp Reisner <philipp.reisner@linbit.com>
P: Lars Ellenberg M: Lars Ellenberg <lars.ellenberg@linbit.com>
M: drbd-dev@lists.linbit.com L: drbd-dev@lists.linbit.com
L: drbd-user@lists.linbit.com
W: http://www.drbd.org W: http://www.drbd.org
T: git git://git.drbd.org/linux-2.6-drbd.git drbd T: git git://git.linbit.com/linux-drbd.git
T: git git://git.drbd.org/drbd-8.3.git T: git git://git.linbit.com/drbd-8.4.git
S: Supported S: Supported
F: drivers/block/drbd/ F: drivers/block/drbd/
F: lib/lru_cache.c F: lib/lru_cache.c
......
This diff is collapsed.
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/bitops.h> #include <linux/bitmap.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/drbd.h> #include <linux/drbd.h>
...@@ -479,8 +479,14 @@ void drbd_bm_cleanup(struct drbd_device *device) ...@@ -479,8 +479,14 @@ void drbd_bm_cleanup(struct drbd_device *device)
* this masks out the remaining bits. * this masks out the remaining bits.
* Returns the number of bits cleared. * Returns the number of bits cleared.
*/ */
#ifndef BITS_PER_PAGE
#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
#else
# if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
# error "ambiguous BITS_PER_PAGE"
# endif
#endif
#define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
static int bm_clear_surplus(struct drbd_bitmap *b) static int bm_clear_surplus(struct drbd_bitmap *b)
{ {
...@@ -559,21 +565,19 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) ...@@ -559,21 +565,19 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
unsigned long *p_addr; unsigned long *p_addr;
unsigned long bits = 0; unsigned long bits = 0;
unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
int idx, i, last_word; int idx, last_word;
/* all but last page */ /* all but last page */
for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
p_addr = __bm_map_pidx(b, idx); p_addr = __bm_map_pidx(b, idx);
for (i = 0; i < LWPP; i++) bits += bitmap_weight(p_addr, BITS_PER_PAGE);
bits += hweight_long(p_addr[i]);
__bm_unmap(p_addr); __bm_unmap(p_addr);
cond_resched(); cond_resched();
} }
/* last (or only) page */ /* last (or only) page */
last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
p_addr = __bm_map_pidx(b, idx); p_addr = __bm_map_pidx(b, idx);
for (i = 0; i < last_word; i++) bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG);
bits += hweight_long(p_addr[i]);
p_addr[last_word] &= cpu_to_lel(mask); p_addr[last_word] &= cpu_to_lel(mask);
bits += hweight_long(p_addr[last_word]); bits += hweight_long(p_addr[last_word]);
/* 32bit arch, may have an unused padding long */ /* 32bit arch, may have an unused padding long */
...@@ -1419,6 +1423,9 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, ...@@ -1419,6 +1423,9 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
int bits; int bits;
int changed = 0; int changed = 0;
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
/* I think it is more cache line friendly to hweight_long then set to ~0UL,
* than to first bitmap_weight() all words, then bitmap_fill() all words */
for (i = first_word; i < last_word; i++) { for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]); bits = hweight_long(paddr[i]);
paddr[i] = ~0UL; paddr[i] = ~0UL;
...@@ -1628,8 +1635,7 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr) ...@@ -1628,8 +1635,7 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
int n = e-s; int n = e-s;
p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
bm = p_addr + MLPP(s); bm = p_addr + MLPP(s);
while (n--) count += bitmap_weight(bm, n * BITS_PER_LONG);
count += hweight_long(*bm++);
bm_unmap(p_addr); bm_unmap(p_addr);
} else { } else {
drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s); drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
......
...@@ -771,6 +771,13 @@ static int device_data_gen_id_show(struct seq_file *m, void *ignored) ...@@ -771,6 +771,13 @@ static int device_data_gen_id_show(struct seq_file *m, void *ignored)
return 0; return 0;
} }
static int device_ed_gen_id_show(struct seq_file *m, void *ignored)
{
struct drbd_device *device = m->private;
seq_printf(m, "0x%016llX\n", (unsigned long long)device->ed_uuid);
return 0;
}
#define drbd_debugfs_device_attr(name) \ #define drbd_debugfs_device_attr(name) \
static int device_ ## name ## _open(struct inode *inode, struct file *file) \ static int device_ ## name ## _open(struct inode *inode, struct file *file) \
{ \ { \
...@@ -796,6 +803,7 @@ drbd_debugfs_device_attr(oldest_requests) ...@@ -796,6 +803,7 @@ drbd_debugfs_device_attr(oldest_requests)
drbd_debugfs_device_attr(act_log_extents) drbd_debugfs_device_attr(act_log_extents)
drbd_debugfs_device_attr(resync_extents) drbd_debugfs_device_attr(resync_extents)
drbd_debugfs_device_attr(data_gen_id) drbd_debugfs_device_attr(data_gen_id)
drbd_debugfs_device_attr(ed_gen_id)
void drbd_debugfs_device_add(struct drbd_device *device) void drbd_debugfs_device_add(struct drbd_device *device)
{ {
...@@ -839,6 +847,7 @@ void drbd_debugfs_device_add(struct drbd_device *device) ...@@ -839,6 +847,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
DCF(act_log_extents); DCF(act_log_extents);
DCF(resync_extents); DCF(resync_extents);
DCF(data_gen_id); DCF(data_gen_id);
DCF(ed_gen_id);
#undef DCF #undef DCF
return; return;
...@@ -854,6 +863,7 @@ void drbd_debugfs_device_cleanup(struct drbd_device *device) ...@@ -854,6 +863,7 @@ void drbd_debugfs_device_cleanup(struct drbd_device *device)
drbd_debugfs_remove(&device->debugfs_vol_act_log_extents); drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
drbd_debugfs_remove(&device->debugfs_vol_resync_extents); drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
drbd_debugfs_remove(&device->debugfs_vol_data_gen_id); drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
drbd_debugfs_remove(&device->debugfs_vol_ed_gen_id);
drbd_debugfs_remove(&device->debugfs_vol); drbd_debugfs_remove(&device->debugfs_vol);
} }
......
...@@ -77,13 +77,6 @@ extern int fault_devs; ...@@ -77,13 +77,6 @@ extern int fault_devs;
extern char usermode_helper[]; extern char usermode_helper[];
/* I don't remember why XCPU ...
* This is used to wake the asender,
* and to interrupt sending the sending task
* on disconnect.
*/
#define DRBD_SIG SIGXCPU
/* This is used to stop/restart our threads. /* This is used to stop/restart our threads.
* Cannot use SIGTERM nor SIGKILL, since these * Cannot use SIGTERM nor SIGKILL, since these
* are sent out by init on runlevel changes * are sent out by init on runlevel changes
...@@ -292,6 +285,9 @@ struct drbd_device_work { ...@@ -292,6 +285,9 @@ struct drbd_device_work {
extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *); extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *);
extern void lock_all_resources(void);
extern void unlock_all_resources(void);
struct drbd_request { struct drbd_request {
struct drbd_work w; struct drbd_work w;
struct drbd_device *device; struct drbd_device *device;
...@@ -504,7 +500,6 @@ enum { ...@@ -504,7 +500,6 @@ enum {
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
SUSPEND_IO, /* suspend application io */
BITMAP_IO, /* suspend application io; BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */ once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */ BITMAP_IO_QUEUED, /* Started bitmap IO */
...@@ -632,12 +627,6 @@ struct bm_io_work { ...@@ -632,12 +627,6 @@ struct bm_io_work {
void (*done)(struct drbd_device *device, int rv); void (*done)(struct drbd_device *device, int rv);
}; };
enum write_ordering_e {
WO_none,
WO_drain_io,
WO_bdev_flush,
};
struct fifo_buffer { struct fifo_buffer {
unsigned int head_index; unsigned int head_index;
unsigned int size; unsigned int size;
...@@ -650,8 +639,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size); ...@@ -650,8 +639,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size);
enum { enum {
NET_CONGESTED, /* The data socket is congested */ NET_CONGESTED, /* The data socket is congested */
RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */ RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */
SEND_PING, /* whether asender should send a ping asap */ SEND_PING,
SIGNAL_ASENDER, /* whether asender wants to be interrupted */
GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */
CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */ CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */
CONN_WD_ST_CHG_OKAY, CONN_WD_ST_CHG_OKAY,
...@@ -670,6 +658,8 @@ enum { ...@@ -670,6 +658,8 @@ enum {
DEVICE_WORK_PENDING, /* tell worker that some device has pending work */ DEVICE_WORK_PENDING, /* tell worker that some device has pending work */
}; };
enum which_state { NOW, OLD = NOW, NEW };
struct drbd_resource { struct drbd_resource {
char *name; char *name;
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
...@@ -755,7 +745,8 @@ struct drbd_connection { ...@@ -755,7 +745,8 @@ struct drbd_connection {
unsigned long last_reconnect_jif; unsigned long last_reconnect_jif;
struct drbd_thread receiver; struct drbd_thread receiver;
struct drbd_thread worker; struct drbd_thread worker;
struct drbd_thread asender; struct drbd_thread ack_receiver;
struct workqueue_struct *ack_sender;
/* cached pointers, /* cached pointers,
* so we can look up the oldest pending requests more quickly. * so we can look up the oldest pending requests more quickly.
...@@ -774,6 +765,8 @@ struct drbd_connection { ...@@ -774,6 +765,8 @@ struct drbd_connection {
struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST]; struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
struct { struct {
unsigned long last_sent_barrier_jif;
/* whether this sender thread /* whether this sender thread
* has processed a single write yet. */ * has processed a single write yet. */
bool seen_any_write_yet; bool seen_any_write_yet;
...@@ -788,6 +781,17 @@ struct drbd_connection { ...@@ -788,6 +781,17 @@ struct drbd_connection {
} send; } send;
}; };
static inline bool has_net_conf(struct drbd_connection *connection)
{
bool has_net_conf;
rcu_read_lock();
has_net_conf = rcu_dereference(connection->net_conf);
rcu_read_unlock();
return has_net_conf;
}
void __update_timing_details( void __update_timing_details(
struct drbd_thread_timing_details *tdp, struct drbd_thread_timing_details *tdp,
unsigned int *cb_nr, unsigned int *cb_nr,
...@@ -811,6 +815,7 @@ struct drbd_peer_device { ...@@ -811,6 +815,7 @@ struct drbd_peer_device {
struct list_head peer_devices; struct list_head peer_devices;
struct drbd_device *device; struct drbd_device *device;
struct drbd_connection *connection; struct drbd_connection *connection;
struct work_struct send_acks_work;
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_peer_dev; struct dentry *debugfs_peer_dev;
#endif #endif
...@@ -829,6 +834,7 @@ struct drbd_device { ...@@ -829,6 +834,7 @@ struct drbd_device {
struct dentry *debugfs_vol_act_log_extents; struct dentry *debugfs_vol_act_log_extents;
struct dentry *debugfs_vol_resync_extents; struct dentry *debugfs_vol_resync_extents;
struct dentry *debugfs_vol_data_gen_id; struct dentry *debugfs_vol_data_gen_id;
struct dentry *debugfs_vol_ed_gen_id;
#endif #endif
unsigned int vnr; /* volume number within the connection */ unsigned int vnr; /* volume number within the connection */
...@@ -873,6 +879,7 @@ struct drbd_device { ...@@ -873,6 +879,7 @@ struct drbd_device {
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
atomic_t unacked_cnt; /* Need to send replies for */ atomic_t unacked_cnt; /* Need to send replies for */
atomic_t local_cnt; /* Waiting for local completion */ atomic_t local_cnt; /* Waiting for local completion */
atomic_t suspend_cnt;
/* Interval tree of pending local requests */ /* Interval tree of pending local requests */
struct rb_root read_requests; struct rb_root read_requests;
...@@ -1020,6 +1027,12 @@ static inline struct drbd_peer_device *first_peer_device(struct drbd_device *dev ...@@ -1020,6 +1027,12 @@ static inline struct drbd_peer_device *first_peer_device(struct drbd_device *dev
return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices); return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
} }
static inline struct drbd_peer_device *
conn_peer_device(struct drbd_connection *connection, int volume_number)
{
return idr_find(&connection->peer_devices, volume_number);
}
#define for_each_resource(resource, _resources) \ #define for_each_resource(resource, _resources) \
list_for_each_entry(resource, _resources, resources) list_for_each_entry(resource, _resources, resources)
...@@ -1113,7 +1126,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int ...@@ -1113,7 +1126,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
extern int drbd_send_bitmap(struct drbd_device *device); extern int drbd_send_bitmap(struct drbd_device *device);
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
extern void drbd_free_ldev(struct drbd_backing_dev *ldev); extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
extern void drbd_device_cleanup(struct drbd_device *device); extern void drbd_device_cleanup(struct drbd_device *device);
void drbd_print_uuids(struct drbd_device *device, const char *text); void drbd_print_uuids(struct drbd_device *device, const char *text);
...@@ -1424,7 +1437,7 @@ extern struct bio_set *drbd_md_io_bio_set; ...@@ -1424,7 +1437,7 @@ extern struct bio_set *drbd_md_io_bio_set;
/* to allocate from that set */ /* to allocate from that set */
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
extern rwlock_t global_state_lock; extern struct mutex resources_mutex;
extern int conn_lowest_minor(struct drbd_connection *connection); extern int conn_lowest_minor(struct drbd_connection *connection);
extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor); extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
...@@ -1454,6 +1467,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); ...@@ -1454,6 +1467,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
/* drbd_nl.c */ /* drbd_nl.c */
extern struct mutex notification_mutex;
extern void drbd_suspend_io(struct drbd_device *device); extern void drbd_suspend_io(struct drbd_device *device);
extern void drbd_resume_io(struct drbd_device *device); extern void drbd_resume_io(struct drbd_device *device);
extern char *ppsize(char *buf, unsigned long long size); extern char *ppsize(char *buf, unsigned long long size);
...@@ -1536,7 +1552,9 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); ...@@ -1536,7 +1552,9 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
/* drbd_receiver.c */ /* drbd_receiver.c */
extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_asender(struct drbd_thread *thi); extern int drbd_ack_receiver(struct drbd_thread *thi);
extern void drbd_send_ping_wf(struct work_struct *ws);
extern void drbd_send_acks_wf(struct work_struct *ws);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
bool throttle_if_app_is_waiting); bool throttle_if_app_is_waiting);
...@@ -1649,7 +1667,7 @@ extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int s ...@@ -1649,7 +1667,7 @@ extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int s
#define drbd_rs_failed_io(device, sector, size) \ #define drbd_rs_failed_io(device, sector, size) \
__drbd_change_sync(device, sector, size, RECORD_RS_FAILED) __drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
extern void drbd_al_shrink(struct drbd_device *device); extern void drbd_al_shrink(struct drbd_device *device);
extern int drbd_initialize_al(struct drbd_device *, void *); extern int drbd_al_initialize(struct drbd_device *, void *);
/* drbd_nl.c */ /* drbd_nl.c */
/* state info broadcast */ /* state info broadcast */
...@@ -1668,6 +1686,29 @@ struct sib_info { ...@@ -1668,6 +1686,29 @@ struct sib_info {
}; };
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
extern void notify_resource_state(struct sk_buff *,
unsigned int,
struct drbd_resource *,
struct resource_info *,
enum drbd_notification_type);
extern void notify_device_state(struct sk_buff *,
unsigned int,
struct drbd_device *,
struct device_info *,
enum drbd_notification_type);
extern void notify_connection_state(struct sk_buff *,
unsigned int,
struct drbd_connection *,
struct connection_info *,
enum drbd_notification_type);
extern void notify_peer_device_state(struct sk_buff *,
unsigned int,
struct drbd_peer_device *,
struct peer_device_info *,
enum drbd_notification_type);
extern void notify_helper(enum drbd_notification_type, struct drbd_device *,
struct drbd_connection *, const char *, int);
/* /*
* inline helper functions * inline helper functions
*************************/ *************************/
...@@ -1694,19 +1735,6 @@ static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_r ...@@ -1694,19 +1735,6 @@ static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_r
return 0; return 0;
} }
static inline enum drbd_state_rv
_drbd_set_state(struct drbd_device *device, union drbd_state ns,
enum chg_state_flags flags, struct completion *done)
{
enum drbd_state_rv rv;
read_lock(&global_state_lock);
rv = __drbd_set_state(device, ns, flags, done);
read_unlock(&global_state_lock);
return rv;
}
static inline union drbd_state drbd_read_state(struct drbd_device *device) static inline union drbd_state drbd_read_state(struct drbd_device *device)
{ {
struct drbd_resource *resource = device->resource; struct drbd_resource *resource = device->resource;
...@@ -1937,16 +1965,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit) ...@@ -1937,16 +1965,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit)
extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
static inline void wake_asender(struct drbd_connection *connection) /* To get the ack_receiver out of the blocking network stack,
* so it can change its sk_rcvtimeo from idle- to ping-timeout,
* and send a ping, we need to send a signal.
* Which signal we send is irrelevant. */
static inline void wake_ack_receiver(struct drbd_connection *connection)
{ {
if (test_bit(SIGNAL_ASENDER, &connection->flags)) struct task_struct *task = connection->ack_receiver.task;
force_sig(DRBD_SIG, connection->asender.task); if (task && get_t_state(&connection->ack_receiver) == RUNNING)
force_sig(SIGXCPU, task);
} }
static inline void request_ping(struct drbd_connection *connection) static inline void request_ping(struct drbd_connection *connection)
{ {
set_bit(SEND_PING, &connection->flags); set_bit(SEND_PING, &connection->flags);
wake_asender(connection); wake_ack_receiver(connection);
} }
extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *); extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
...@@ -2230,7 +2263,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device) ...@@ -2230,7 +2263,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device)
if (drbd_suspended(device)) if (drbd_suspended(device))
return false; return false;
if (test_bit(SUSPEND_IO, &device->flags)) if (atomic_read(&device->suspend_cnt))
return false; return false;
/* to avoid potential deadlock or bitmap corruption, /* to avoid potential deadlock or bitmap corruption,
......
...@@ -117,6 +117,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 ...@@ -117,6 +117,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0
*/ */
struct idr drbd_devices; struct idr drbd_devices;
struct list_head drbd_resources; struct list_head drbd_resources;
struct mutex resources_mutex;
struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_request_cache;
struct kmem_cache *drbd_ee_cache; /* peer requests */ struct kmem_cache *drbd_ee_cache; /* peer requests */
...@@ -1435,8 +1436,8 @@ static int we_should_drop_the_connection(struct drbd_connection *connection, str ...@@ -1435,8 +1436,8 @@ static int we_should_drop_the_connection(struct drbd_connection *connection, str
/* long elapsed = (long)(jiffies - device->last_received); */ /* long elapsed = (long)(jiffies - device->last_received); */
drop_it = connection->meta.socket == sock drop_it = connection->meta.socket == sock
|| !connection->asender.task || !connection->ack_receiver.task
|| get_t_state(&connection->asender) != RUNNING || get_t_state(&connection->ack_receiver) != RUNNING
|| connection->cstate < C_WF_REPORT_PARAMS; || connection->cstate < C_WF_REPORT_PARAMS;
if (drop_it) if (drop_it)
...@@ -1793,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, ...@@ -1793,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
drbd_update_congested(connection); drbd_update_congested(connection);
} }
do { do {
/* STRANGE
* tcp_sendmsg does _not_ use its size parameter at all ?
*
* -EAGAIN on timeout, -EINTR on signal.
*/
/* THINK
* do we need to block DRBD_SIG if sock == &meta.socket ??
* otherwise wake_asender() might interrupt some send_*Ack !
*/
rv = kernel_sendmsg(sock, &msg, &iov, 1, size); rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
if (rv == -EAGAIN) { if (rv == -EAGAIN) {
if (we_should_drop_the_connection(connection, sock)) if (we_should_drop_the_connection(connection, sock))
...@@ -2000,7 +1992,7 @@ void drbd_device_cleanup(struct drbd_device *device) ...@@ -2000,7 +1992,7 @@ void drbd_device_cleanup(struct drbd_device *device)
drbd_bm_cleanup(device); drbd_bm_cleanup(device);
} }
drbd_free_ldev(device->ldev); drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL; device->ldev = NULL;
clear_bit(AL_SUSPENDED, &device->flags); clear_bit(AL_SUSPENDED, &device->flags);
...@@ -2179,7 +2171,7 @@ void drbd_destroy_device(struct kref *kref) ...@@ -2179,7 +2171,7 @@ void drbd_destroy_device(struct kref *kref)
if (device->this_bdev) if (device->this_bdev)
bdput(device->this_bdev); bdput(device->this_bdev);
drbd_free_ldev(device->ldev); drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL; device->ldev = NULL;
drbd_release_all_peer_reqs(device); drbd_release_all_peer_reqs(device);
...@@ -2563,7 +2555,7 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op ...@@ -2563,7 +2555,7 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
cpumask_copy(resource->cpu_mask, new_cpu_mask); cpumask_copy(resource->cpu_mask, new_cpu_mask);
for_each_connection_rcu(connection, resource) { for_each_connection_rcu(connection, resource) {
connection->receiver.reset_cpu_mask = 1; connection->receiver.reset_cpu_mask = 1;
connection->asender.reset_cpu_mask = 1; connection->ack_receiver.reset_cpu_mask = 1;
connection->worker.reset_cpu_mask = 1; connection->worker.reset_cpu_mask = 1;
} }
} }
...@@ -2590,7 +2582,7 @@ struct drbd_resource *drbd_create_resource(const char *name) ...@@ -2590,7 +2582,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
kref_init(&resource->kref); kref_init(&resource->kref);
idr_init(&resource->devices); idr_init(&resource->devices);
INIT_LIST_HEAD(&resource->connections); INIT_LIST_HEAD(&resource->connections);
resource->write_ordering = WO_bdev_flush; resource->write_ordering = WO_BDEV_FLUSH;
list_add_tail_rcu(&resource->resources, &drbd_resources); list_add_tail_rcu(&resource->resources, &drbd_resources);
mutex_init(&resource->conf_update); mutex_init(&resource->conf_update);
mutex_init(&resource->adm_mutex); mutex_init(&resource->adm_mutex);
...@@ -2652,8 +2644,8 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) ...@@ -2652,8 +2644,8 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
connection->receiver.connection = connection; connection->receiver.connection = connection;
drbd_thread_init(resource, &connection->worker, drbd_worker, "worker"); drbd_thread_init(resource, &connection->worker, drbd_worker, "worker");
connection->worker.connection = connection; connection->worker.connection = connection;
drbd_thread_init(resource, &connection->asender, drbd_asender, "asender"); drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv");
connection->asender.connection = connection; connection->ack_receiver.connection = connection;
kref_init(&connection->kref); kref_init(&connection->kref);
...@@ -2702,8 +2694,8 @@ static int init_submitter(struct drbd_device *device) ...@@ -2702,8 +2694,8 @@ static int init_submitter(struct drbd_device *device)
{ {
/* opencoded create_singlethread_workqueue(), /* opencoded create_singlethread_workqueue(),
* to be able to say "drbd%d", ..., minor */ * to be able to say "drbd%d", ..., minor */
device->submit.wq = alloc_workqueue("drbd%u_submit", device->submit.wq =
WQ_UNBOUND | WQ_MEM_RECLAIM, 1, device->minor); alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor);
if (!device->submit.wq) if (!device->submit.wq)
return -ENOMEM; return -ENOMEM;
...@@ -2820,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig ...@@ -2820,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
goto out_idr_remove_from_resource; goto out_idr_remove_from_resource;
} }
kref_get(&connection->kref); kref_get(&connection->kref);
INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
} }
if (init_submitter(device)) { if (init_submitter(device)) {
...@@ -2923,7 +2916,7 @@ static int __init drbd_init(void) ...@@ -2923,7 +2916,7 @@ static int __init drbd_init(void)
drbd_proc = NULL; /* play safe for drbd_cleanup */ drbd_proc = NULL; /* play safe for drbd_cleanup */
idr_init(&drbd_devices); idr_init(&drbd_devices);
rwlock_init(&global_state_lock); mutex_init(&resources_mutex);
INIT_LIST_HEAD(&drbd_resources); INIT_LIST_HEAD(&drbd_resources);
err = drbd_genl_register(); err = drbd_genl_register();
...@@ -2971,18 +2964,6 @@ static int __init drbd_init(void) ...@@ -2971,18 +2964,6 @@ static int __init drbd_init(void)
return err; return err;
} }
void drbd_free_ldev(struct drbd_backing_dev *ldev)
{
if (ldev == NULL)
return;
blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
kfree(ldev->disk_conf);
kfree(ldev);
}
static void drbd_free_one_sock(struct drbd_socket *ds) static void drbd_free_one_sock(struct drbd_socket *ds)
{ {
struct socket *s; struct socket *s;
...@@ -3277,6 +3258,10 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev) ...@@ -3277,6 +3258,10 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
* and read it. */ * and read it. */
bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
bdev->md.md_offset = drbd_md_ss(bdev); bdev->md.md_offset = drbd_md_ss(bdev);
/* Even for (flexible or indexed) external meta data,
* initially restrict us to the 4k superblock for now.
* Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
bdev->md.md_size_sect = 8;
if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) { if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) {
/* NOTE: can't do normal error processing here as this is /* NOTE: can't do normal error processing here as this is
...@@ -3578,7 +3563,9 @@ void drbd_queue_bitmap_io(struct drbd_device *device, ...@@ -3578,7 +3563,9 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
spin_lock_irq(&device->resource->req_lock); spin_lock_irq(&device->resource->req_lock);
set_bit(BITMAP_IO, &device->flags); set_bit(BITMAP_IO, &device->flags);
if (atomic_read(&device->ap_bio_cnt) == 0) { /* don't wait for pending application IO if the caller indicates that
* application IO does not conflict anyways. */
if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
drbd_queue_work(&first_peer_device(device)->connection->sender_work, drbd_queue_work(&first_peer_device(device)->connection->sender_work,
&device->bm_io_work.w); &device->bm_io_work.w);
...@@ -3746,6 +3733,27 @@ int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i) ...@@ -3746,6 +3733,27 @@ int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i)
return 0; return 0;
} }
void lock_all_resources(void)
{
struct drbd_resource *resource;
int __maybe_unused i = 0;
mutex_lock(&resources_mutex);
local_irq_disable();
for_each_resource(resource, &drbd_resources)
spin_lock_nested(&resource->req_lock, i++);
}
void unlock_all_resources(void)
{
struct drbd_resource *resource;
for_each_resource(resource, &drbd_resources)
spin_unlock(&resource->req_lock);
local_irq_enable();
mutex_unlock(&resources_mutex);
}
#ifdef CONFIG_DRBD_FAULT_INJECTION #ifdef CONFIG_DRBD_FAULT_INJECTION
/* Fault insertion support including random number generator shamelessly /* Fault insertion support including random number generator shamelessly
* stolen from kernel/rcutorture.c */ * stolen from kernel/rcutorture.c */
......
This diff is collapsed.
...@@ -245,9 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v) ...@@ -245,9 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
char wp; char wp;
static char write_ordering_chars[] = { static char write_ordering_chars[] = {
[WO_none] = 'n', [WO_NONE] = 'n',
[WO_drain_io] = 'd', [WO_DRAIN_IO] = 'd',
[WO_bdev_flush] = 'f', [WO_BDEV_FLUSH] = 'f',
}; };
seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n", seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
......
...@@ -23,7 +23,7 @@ enum drbd_packet { ...@@ -23,7 +23,7 @@ enum drbd_packet {
P_AUTH_RESPONSE = 0x11, P_AUTH_RESPONSE = 0x11,
P_STATE_CHG_REQ = 0x12, P_STATE_CHG_REQ = 0x12,
/* asender (meta socket */ /* (meta socket) */
P_PING = 0x13, P_PING = 0x13,
P_PING_ACK = 0x14, P_PING_ACK = 0x14,
P_RECV_ACK = 0x15, /* Used in protocol B */ P_RECV_ACK = 0x15, /* Used in protocol B */
......
This diff is collapsed.
...@@ -453,12 +453,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, ...@@ -453,12 +453,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
kref_get(&req->kref); /* wait for the DONE */ kref_get(&req->kref); /* wait for the DONE */
if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) { if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
/* potentially already completed in the asender thread */ /* potentially already completed in the ack_receiver thread */
if (!(s & RQ_NET_DONE)) { if (!(s & RQ_NET_DONE)) {
atomic_add(req->i.size >> 9, &device->ap_in_flight); atomic_add(req->i.size >> 9, &device->ap_in_flight);
set_if_null_req_not_net_done(peer_device, req); set_if_null_req_not_net_done(peer_device, req);
} }
if (s & RQ_NET_PENDING) if (req->rq_state & RQ_NET_PENDING)
set_if_null_req_ack_pending(peer_device, req); set_if_null_req_ack_pending(peer_device, req);
} }
...@@ -1095,6 +1095,24 @@ static bool do_remote_read(struct drbd_request *req) ...@@ -1095,6 +1095,24 @@ static bool do_remote_read(struct drbd_request *req)
return false; return false;
} }
bool drbd_should_do_remote(union drbd_dev_state s)
{
return s.pdsk == D_UP_TO_DATE ||
(s.pdsk >= D_INCONSISTENT &&
s.conn >= C_WF_BITMAP_T &&
s.conn < C_AHEAD);
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
states. */
}
static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
{
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
since we enter state C_AHEAD only if proto >= 96 */
}
/* returns number of connections (== 1, for drbd 8.4) /* returns number of connections (== 1, for drbd 8.4)
* expected to actually write this data, * expected to actually write this data,
* which does NOT include those that we are L_AHEAD for. */ * which does NOT include those that we are L_AHEAD for. */
...@@ -1149,7 +1167,6 @@ drbd_submit_req_private_bio(struct drbd_request *req) ...@@ -1149,7 +1167,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
* stable storage, and this is a WRITE, we may not even submit * stable storage, and this is a WRITE, we may not even submit
* this bio. */ * this bio. */
if (get_ldev(device)) { if (get_ldev(device)) {
req->pre_submit_jif = jiffies;
if (drbd_insert_fault(device, if (drbd_insert_fault(device,
rw == WRITE ? DRBD_FAULT_DT_WR rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD : rw == READ ? DRBD_FAULT_DT_RD
...@@ -1293,6 +1310,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request ...@@ -1293,6 +1310,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
&device->pending_master_completion[rw == WRITE]); &device->pending_master_completion[rw == WRITE]);
if (req->private_bio) { if (req->private_bio) {
/* needs to be marked within the same spinlock */ /* needs to be marked within the same spinlock */
req->pre_submit_jif = jiffies;
list_add_tail(&req->req_pending_local, list_add_tail(&req->req_pending_local,
&device->pending_completion[rw == WRITE]); &device->pending_completion[rw == WRITE]);
_req_mod(req, TO_BE_SUBMITTED); _req_mod(req, TO_BE_SUBMITTED);
...@@ -1513,6 +1531,78 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio) ...@@ -1513,6 +1531,78 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE; return BLK_QC_T_NONE;
} }
static bool net_timeout_reached(struct drbd_request *net_req,
struct drbd_connection *connection,
unsigned long now, unsigned long ent,
unsigned int ko_count, unsigned int timeout)
{
struct drbd_device *device = net_req->device;
if (!time_after(now, net_req->pre_send_jif + ent))
return false;
if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
return false;
if (net_req->rq_state & RQ_NET_PENDING) {
drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
return true;
}
/* We received an ACK already (or are using protocol A),
* but are waiting for the epoch closing barrier ack.
* Check if we sent the barrier already. We should not blame the peer
* for being unresponsive, if we did not even ask it yet. */
if (net_req->epoch == connection->send.current_epoch_nr) {
drbd_warn(device,
"We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
return false;
}
/* Worst case: we may have been blocked for whatever reason, then
* suddenly are able to send a lot of requests (and epoch separating
* barriers) in quick succession.
* The timestamp of the net_req may be much too old and not correspond
* to the sending time of the relevant unack'ed barrier packet, so
* would trigger a spurious timeout. The latest barrier packet may
* have a too recent timestamp to trigger the timeout, potentially miss
* a timeout. Right now we don't have a place to conveniently store
* these timestamps.
* But in this particular situation, the application requests are still
* completed to upper layers, DRBD should still "feel" responsive.
* No need yet to kill this connection, it may still recover.
* If not, eventually we will have queued enough into the network for
* us to block. From that point of view, the timestamp of the last sent
* barrier packet is relevant enough.
*/
if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
connection->send.last_sent_barrier_jif, now,
jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
return true;
}
return false;
}
/* A request is considered timed out, if
* - we have some effective timeout from the configuration,
* with some state restrictions applied,
* - the oldest request is waiting for a response from the network
* resp. the local disk,
* - the oldest request is in fact older than the effective timeout,
* - the connection was established (resp. disk was attached)
* for longer than the timeout already.
* Note that for 32bit jiffies and very stable connections/disks,
* we may have a wrap around, which is catched by
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
*
* Side effect: once per 32bit wrap-around interval, which means every
* ~198 days with 250 HZ, we have a window where the timeout would need
* to expire twice (worst case) to become effective. Good enough.
*/
void request_timer_fn(unsigned long data) void request_timer_fn(unsigned long data)
{ {
struct drbd_device *device = (struct drbd_device *) data; struct drbd_device *device = (struct drbd_device *) data;
...@@ -1522,11 +1612,14 @@ void request_timer_fn(unsigned long data) ...@@ -1522,11 +1612,14 @@ void request_timer_fn(unsigned long data)
unsigned long oldest_submit_jif; unsigned long oldest_submit_jif;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now; unsigned long now;
unsigned int ko_count = 0, timeout = 0;
rcu_read_lock(); rcu_read_lock();
nc = rcu_dereference(connection->net_conf); nc = rcu_dereference(connection->net_conf);
if (nc && device->state.conn >= C_WF_REPORT_PARAMS) if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
ent = nc->timeout * HZ/10 * nc->ko_count; ko_count = nc->ko_count;
timeout = nc->timeout;
}
if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */ if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10; dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
...@@ -1534,6 +1627,8 @@ void request_timer_fn(unsigned long data) ...@@ -1534,6 +1627,8 @@ void request_timer_fn(unsigned long data)
} }
rcu_read_unlock(); rcu_read_unlock();
ent = timeout * HZ/10 * ko_count;
et = min_not_zero(dt, ent); et = min_not_zero(dt, ent);
if (!et) if (!et)
...@@ -1545,11 +1640,22 @@ void request_timer_fn(unsigned long data) ...@@ -1545,11 +1640,22 @@ void request_timer_fn(unsigned long data)
spin_lock_irq(&device->resource->req_lock); spin_lock_irq(&device->resource->req_lock);
req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local); req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local); req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
req_peer = connection->req_not_net_done;
/* maybe the oldest request waiting for the peer is in fact still /* maybe the oldest request waiting for the peer is in fact still
* blocking in tcp sendmsg */ * blocking in tcp sendmsg. That's ok, though, that's handled via the
if (!req_peer && connection->req_next && connection->req_next->pre_send_jif) * socket send timeout, requesting a ping, and bumping ko-count in
req_peer = connection->req_next; * we_should_drop_the_connection().
*/
/* check the oldest request we did successfully sent,
* but which is still waiting for an ACK. */
req_peer = connection->req_ack_pending;
/* if we don't have such request (e.g. protocoll A)
* check the oldest requests which is still waiting on its epoch
* closing barrier ack. */
if (!req_peer)
req_peer = connection->req_not_net_done;
/* evaluate the oldest peer request only in one timer! */ /* evaluate the oldest peer request only in one timer! */
if (req_peer && req_peer->device != device) if (req_peer && req_peer->device != device)
...@@ -1566,28 +1672,9 @@ void request_timer_fn(unsigned long data) ...@@ -1566,28 +1672,9 @@ void request_timer_fn(unsigned long data)
: req_write ? req_write->pre_submit_jif : req_write ? req_write->pre_submit_jif
: req_read ? req_read->pre_submit_jif : now; : req_read ? req_read->pre_submit_jif : now;
/* The request is considered timed out, if if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
* - we have some effective timeout from the configuration,
* with above state restrictions applied,
* - the oldest request is waiting for a response from the network
* resp. the local disk,
* - the oldest request is in fact older than the effective timeout,
* - the connection was established (resp. disk was attached)
* for longer than the timeout already.
* Note that for 32bit jiffies and very stable connections/disks,
* we may have a wrap around, which is catched by
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
*
* Side effect: once per 32bit wrap-around interval, which means every
* ~198 days with 250 HZ, we have a window where the timeout would need
* to expire twice (worst case) to become effective. Good enough.
*/
if (ent && req_peer &&
time_after(now, req_peer->pre_send_jif + ent) &&
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD); _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
}
if (dt && oldest_submit_jif != now && if (dt && oldest_submit_jif != now &&
time_after(now, oldest_submit_jif + dt) && time_after(now, oldest_submit_jif + dt) &&
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
......
...@@ -331,21 +331,6 @@ static inline int req_mod(struct drbd_request *req, ...@@ -331,21 +331,6 @@ static inline int req_mod(struct drbd_request *req,
return rv; return rv;
} }
static inline bool drbd_should_do_remote(union drbd_dev_state s) extern bool drbd_should_do_remote(union drbd_dev_state);
{
return s.pdsk == D_UP_TO_DATE ||
(s.pdsk >= D_INCONSISTENT &&
s.conn >= C_WF_BITMAP_T &&
s.conn < C_AHEAD);
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
states. */
}
static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
{
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
since we enter state C_AHEAD only if proto >= 96 */
}
#endif #endif
This diff is collapsed.
...@@ -122,9 +122,9 @@ extern enum drbd_state_rv ...@@ -122,9 +122,9 @@ extern enum drbd_state_rv
_drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state, _drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state,
union drbd_state, enum chg_state_flags); union drbd_state, enum chg_state_flags);
extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state, extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state,
enum chg_state_flags, enum chg_state_flags,
struct completion *done); struct completion *done);
extern void print_st_err(struct drbd_device *, union drbd_state, extern void print_st_err(struct drbd_device *, union drbd_state,
union drbd_state, int); union drbd_state, int);
......
#ifndef DRBD_STATE_CHANGE_H
#define DRBD_STATE_CHANGE_H
struct drbd_resource_state_change {
struct drbd_resource *resource;
enum drbd_role role[2];
bool susp[2];
bool susp_nod[2];
bool susp_fen[2];
};
struct drbd_device_state_change {
struct drbd_device *device;
enum drbd_disk_state disk_state[2];
};
struct drbd_connection_state_change {
struct drbd_connection *connection;
enum drbd_conns cstate[2]; /* drbd9: enum drbd_conn_state */
enum drbd_role peer_role[2];
};
struct drbd_peer_device_state_change {
struct drbd_peer_device *peer_device;
enum drbd_disk_state disk_state[2];
enum drbd_conns repl_state[2]; /* drbd9: enum drbd_repl_state */
bool resync_susp_user[2];
bool resync_susp_peer[2];
bool resync_susp_dependency[2];
};
struct drbd_state_change {
struct list_head list;
unsigned int n_devices;
unsigned int n_connections;
struct drbd_resource_state_change resource[1];
struct drbd_device_state_change *devices;
struct drbd_connection_state_change *connections;
struct drbd_peer_device_state_change *peer_devices;
};
extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_t);
extern void copy_old_to_new_state_change(struct drbd_state_change *);
extern void forget_state_change(struct drbd_state_change *);
extern void notify_resource_state_change(struct sk_buff *,
unsigned int,
struct drbd_resource_state_change *,
enum drbd_notification_type type);
extern void notify_connection_state_change(struct sk_buff *,
unsigned int,
struct drbd_connection_state_change *,
enum drbd_notification_type type);
extern void notify_device_state_change(struct sk_buff *,
unsigned int,
struct drbd_device_state_change *,
enum drbd_notification_type type);
extern void notify_peer_device_state_change(struct sk_buff *,
unsigned int,
struct drbd_peer_device_state_change *,
enum drbd_notification_type type);
#endif /* DRBD_STATE_CHANGE_H */
...@@ -55,13 +55,6 @@ static int make_resync_request(struct drbd_device *, int); ...@@ -55,13 +55,6 @@ static int make_resync_request(struct drbd_device *, int);
* *
*/ */
/* About the global_state_lock
Each state transition on an device holds a read lock. In case we have
to evaluate the resync after dependencies, we grab a write lock, because
we need stable states on all devices for that. */
rwlock_t global_state_lock;
/* used for synchronous meta data and bitmap IO /* used for synchronous meta data and bitmap IO
* submitted by drbd_md_sync_page_io() * submitted by drbd_md_sync_page_io()
*/ */
...@@ -120,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l ...@@ -120,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
unsigned long flags = 0; unsigned long flags = 0;
struct drbd_peer_device *peer_device = peer_req->peer_device; struct drbd_peer_device *peer_device = peer_req->peer_device;
struct drbd_device *device = peer_device->device; struct drbd_device *device = peer_device->device;
struct drbd_connection *connection = peer_device->connection;
struct drbd_interval i; struct drbd_interval i;
int do_wake; int do_wake;
u64 block_id; u64 block_id;
...@@ -152,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l ...@@ -152,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
if (peer_req->flags & EE_WAS_ERROR) if (peer_req->flags & EE_WAS_ERROR)
__drbd_chk_io_error(device, DRBD_WRITE_ERROR); __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
if (connection->cstate >= C_WF_REPORT_PARAMS) {
kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
kref_put(&device->kref, drbd_destroy_device);
}
spin_unlock_irqrestore(&device->resource->req_lock, flags); spin_unlock_irqrestore(&device->resource->req_lock, flags);
if (block_id == ID_SYNCER) if (block_id == ID_SYNCER)
...@@ -163,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l ...@@ -163,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
if (do_al_complete_io) if (do_al_complete_io)
drbd_al_complete_io(device, &i); drbd_al_complete_io(device, &i);
wake_asender(peer_device->connection);
put_ldev(device); put_ldev(device);
} }
...@@ -195,6 +194,12 @@ void drbd_peer_request_endio(struct bio *bio) ...@@ -195,6 +194,12 @@ void drbd_peer_request_endio(struct bio *bio)
} }
} }
void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
{
panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
device->minor, device->resource->name, device->vnr);
}
/* read, readA or write requests on R_PRIMARY coming from drbd_make_request /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
*/ */
void drbd_request_endio(struct bio *bio) void drbd_request_endio(struct bio *bio)
...@@ -238,7 +243,7 @@ void drbd_request_endio(struct bio *bio) ...@@ -238,7 +243,7 @@ void drbd_request_endio(struct bio *bio)
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
if (!bio->bi_error) if (!bio->bi_error)
panic("possible random memory corruption caused by delayed completion of aborted local request\n"); drbd_panic_after_delayed_completion_of_aborted_request(device);
} }
/* to avoid recursion in __req_mod */ /* to avoid recursion in __req_mod */
...@@ -1291,6 +1296,7 @@ static int drbd_send_barrier(struct drbd_connection *connection) ...@@ -1291,6 +1296,7 @@ static int drbd_send_barrier(struct drbd_connection *connection)
p->barrier = connection->send.current_epoch_nr; p->barrier = connection->send.current_epoch_nr;
p->pad = 0; p->pad = 0;
connection->send.current_epoch_writes = 0; connection->send.current_epoch_writes = 0;
connection->send.last_sent_barrier_jif = jiffies;
return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
} }
...@@ -1315,6 +1321,7 @@ static void re_init_if_first_write(struct drbd_connection *connection, unsigned ...@@ -1315,6 +1321,7 @@ static void re_init_if_first_write(struct drbd_connection *connection, unsigned
connection->send.seen_any_write_yet = true; connection->send.seen_any_write_yet = true;
connection->send.current_epoch_nr = epoch; connection->send.current_epoch_nr = epoch;
connection->send.current_epoch_writes = 0; connection->send.current_epoch_writes = 0;
connection->send.last_sent_barrier_jif = jiffies;
} }
} }
...@@ -1456,70 +1463,73 @@ static int _drbd_may_sync_now(struct drbd_device *device) ...@@ -1456,70 +1463,73 @@ static int _drbd_may_sync_now(struct drbd_device *device)
} }
/** /**
* _drbd_pause_after() - Pause resync on all devices that may not resync now * drbd_pause_after() - Pause resync on all devices that may not resync now
* @device: DRBD device. * @device: DRBD device.
* *
* Called from process context only (admin command and after_state_ch). * Called from process context only (admin command and after_state_ch).
*/ */
static int _drbd_pause_after(struct drbd_device *device) static bool drbd_pause_after(struct drbd_device *device)
{ {
bool changed = false;
struct drbd_device *odev; struct drbd_device *odev;
int i, rv = 0; int i;
rcu_read_lock(); rcu_read_lock();
idr_for_each_entry(&drbd_devices, odev, i) { idr_for_each_entry(&drbd_devices, odev, i) {
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
continue; continue;
if (!_drbd_may_sync_now(odev)) if (!_drbd_may_sync_now(odev) &&
rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) _drbd_set_state(_NS(odev, aftr_isp, 1),
!= SS_NOTHING_TO_DO); CS_HARD, NULL) != SS_NOTHING_TO_DO)
changed = true;
} }
rcu_read_unlock(); rcu_read_unlock();
return rv; return changed;
} }
/** /**
* _drbd_resume_next() - Resume resync on all devices that may resync now * drbd_resume_next() - Resume resync on all devices that may resync now
* @device: DRBD device. * @device: DRBD device.
* *
* Called from process context only (admin command and worker). * Called from process context only (admin command and worker).
*/ */
static int _drbd_resume_next(struct drbd_device *device) static bool drbd_resume_next(struct drbd_device *device)
{ {
bool changed = false;
struct drbd_device *odev; struct drbd_device *odev;
int i, rv = 0; int i;
rcu_read_lock(); rcu_read_lock();
idr_for_each_entry(&drbd_devices, odev, i) { idr_for_each_entry(&drbd_devices, odev, i) {
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
continue; continue;
if (odev->state.aftr_isp) { if (odev->state.aftr_isp) {
if (_drbd_may_sync_now(odev)) if (_drbd_may_sync_now(odev) &&
rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), _drbd_set_state(_NS(odev, aftr_isp, 0),
CS_HARD, NULL) CS_HARD, NULL) != SS_NOTHING_TO_DO)
!= SS_NOTHING_TO_DO) ; changed = true;
} }
} }
rcu_read_unlock(); rcu_read_unlock();
return rv; return changed;
} }
void resume_next_sg(struct drbd_device *device) void resume_next_sg(struct drbd_device *device)
{ {
write_lock_irq(&global_state_lock); lock_all_resources();
_drbd_resume_next(device); drbd_resume_next(device);
write_unlock_irq(&global_state_lock); unlock_all_resources();
} }
void suspend_other_sg(struct drbd_device *device) void suspend_other_sg(struct drbd_device *device)
{ {
write_lock_irq(&global_state_lock); lock_all_resources();
_drbd_pause_after(device); drbd_pause_after(device);
write_unlock_irq(&global_state_lock); unlock_all_resources();
} }
/* caller must hold global_state_lock */ /* caller must lock_all_resources() */
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
{ {
struct drbd_device *odev; struct drbd_device *odev;
...@@ -1557,15 +1567,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_min ...@@ -1557,15 +1567,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_min
} }
} }
/* caller must hold global_state_lock */ /* caller must lock_all_resources() */
void drbd_resync_after_changed(struct drbd_device *device) void drbd_resync_after_changed(struct drbd_device *device)
{ {
int changes; int changed;
do { do {
changes = _drbd_pause_after(device); changed = drbd_pause_after(device);
changes |= _drbd_resume_next(device); changed |= drbd_resume_next(device);
} while (changes); } while (changed);
} }
void drbd_rs_controller_reset(struct drbd_device *device) void drbd_rs_controller_reset(struct drbd_device *device)
...@@ -1685,19 +1695,14 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) ...@@ -1685,19 +1695,14 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
} else { } else {
mutex_lock(device->state_mutex); mutex_lock(device->state_mutex);
} }
clear_bit(B_RS_H_DONE, &device->flags);
/* req_lock: serialize with drbd_send_and_submit() and others lock_all_resources();
* global_state_lock: for stable sync-after dependencies */ clear_bit(B_RS_H_DONE, &device->flags);
spin_lock_irq(&device->resource->req_lock);
write_lock(&global_state_lock);
/* Did some connection breakage or IO error race with us? */ /* Did some connection breakage or IO error race with us? */
if (device->state.conn < C_CONNECTED if (device->state.conn < C_CONNECTED
|| !get_ldev_if_state(device, D_NEGOTIATING)) { || !get_ldev_if_state(device, D_NEGOTIATING)) {
write_unlock(&global_state_lock); unlock_all_resources();
spin_unlock_irq(&device->resource->req_lock); goto out;
mutex_unlock(device->state_mutex);
return;
} }
ns = drbd_read_state(device); ns = drbd_read_state(device);
...@@ -1711,7 +1716,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) ...@@ -1711,7 +1716,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
else /* side == C_SYNC_SOURCE */ else /* side == C_SYNC_SOURCE */
ns.pdsk = D_INCONSISTENT; ns.pdsk = D_INCONSISTENT;
r = __drbd_set_state(device, ns, CS_VERBOSE, NULL); r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
ns = drbd_read_state(device); ns = drbd_read_state(device);
if (ns.conn < C_CONNECTED) if (ns.conn < C_CONNECTED)
...@@ -1732,7 +1737,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) ...@@ -1732,7 +1737,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
device->rs_mark_left[i] = tw; device->rs_mark_left[i] = tw;
device->rs_mark_time[i] = now; device->rs_mark_time[i] = now;
} }
_drbd_pause_after(device); drbd_pause_after(device);
/* Forget potentially stale cached per resync extent bit-counts. /* Forget potentially stale cached per resync extent bit-counts.
* Open coded drbd_rs_cancel_all(device), we already have IRQs * Open coded drbd_rs_cancel_all(device), we already have IRQs
* disabled, and know the disk state is ok. */ * disabled, and know the disk state is ok. */
...@@ -1742,8 +1747,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) ...@@ -1742,8 +1747,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
device->resync_wenr = LC_FREE; device->resync_wenr = LC_FREE;
spin_unlock(&device->al_lock); spin_unlock(&device->al_lock);
} }
write_unlock(&global_state_lock); unlock_all_resources();
spin_unlock_irq(&device->resource->req_lock);
if (r == SS_SUCCESS) { if (r == SS_SUCCESS) {
wake_up(&device->al_wait); /* for lc_reset() above */ wake_up(&device->al_wait); /* for lc_reset() above */
...@@ -1807,6 +1811,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) ...@@ -1807,6 +1811,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
drbd_md_sync(device); drbd_md_sync(device);
} }
put_ldev(device); put_ldev(device);
out:
mutex_unlock(device->state_mutex); mutex_unlock(device->state_mutex);
} }
...@@ -1836,7 +1841,7 @@ static void drbd_ldev_destroy(struct drbd_device *device) ...@@ -1836,7 +1841,7 @@ static void drbd_ldev_destroy(struct drbd_device *device)
device->act_log = NULL; device->act_log = NULL;
__acquire(local); __acquire(local);
drbd_free_ldev(device->ldev); drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL; device->ldev = NULL;
__release(local); __release(local);
......
...@@ -104,9 +104,9 @@ ...@@ -104,9 +104,9 @@
/* Device instance number, incremented each time a device is probed. */ /* Device instance number, incremented each time a device is probed. */
static int instance; static int instance;
struct list_head online_list; static struct list_head online_list;
struct list_head removing_list; static struct list_head removing_list;
spinlock_t dev_lock; static spinlock_t dev_lock;
/* /*
* Global variable used to hold the major block device number * Global variable used to hold the major block device number
......
...@@ -495,17 +495,17 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) ...@@ -495,17 +495,17 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
id->ppaf.ch_offset = 56; id->ppaf.ch_offset = 56;
id->ppaf.ch_len = 8; id->ppaf.ch_len = 8;
do_div(size, bs); /* convert size to pages */ sector_div(size, bs); /* convert size to pages */
do_div(size, 256); /* concert size to pgs pr blk */ size >>= 8; /* concert size to pgs pr blk */
grp = &id->groups[0]; grp = &id->groups[0];
grp->mtype = 0; grp->mtype = 0;
grp->fmtype = 0; grp->fmtype = 0;
grp->num_ch = 1; grp->num_ch = 1;
grp->num_pg = 256; grp->num_pg = 256;
blksize = size; blksize = size;
do_div(size, (1 << 16)); size >>= 16;
grp->num_lun = size + 1; grp->num_lun = size + 1;
do_div(blksize, grp->num_lun); sector_div(blksize, grp->num_lun);
grp->num_blk = blksize; grp->num_blk = blksize;
grp->num_pln = 1; grp->num_pln = 1;
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/time.h> #include <linux/ktime.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/completion.h> #include <linux/completion.h>
...@@ -671,16 +671,15 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func) ...@@ -671,16 +671,15 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
static unsigned int carm_fill_sync_time(struct carm_host *host, static unsigned int carm_fill_sync_time(struct carm_host *host,
unsigned int idx, void *mem) unsigned int idx, void *mem)
{ {
struct timeval tv;
struct carm_msg_sync_time *st = mem; struct carm_msg_sync_time *st = mem;
do_gettimeofday(&tv); time64_t tv = ktime_get_real_seconds();
memset(st, 0, sizeof(*st)); memset(st, 0, sizeof(*st));
st->type = CARM_MSG_MISC; st->type = CARM_MSG_MISC;
st->subtype = MISC_SET_TIME; st->subtype = MISC_SET_TIME;
st->handle = cpu_to_le32(TAG_ENCODE(idx)); st->handle = cpu_to_le32(TAG_ENCODE(idx));
st->timestamp = cpu_to_le32(tv.tv_sec); st->timestamp = cpu_to_le32(tv);
return sizeof(struct carm_msg_sync_time); return sizeof(struct carm_msg_sync_time);
} }
......
This diff is collapsed.
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <xen/interface/io/protocols.h> #include <xen/interface/io/protocols.h>
extern unsigned int xen_blkif_max_ring_order; extern unsigned int xen_blkif_max_ring_order;
extern unsigned int xenblk_max_queues;
/* /*
* This is the maximum number of segments that would be allowed in indirect * This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend. * requests. This value will also be passed to the frontend.
...@@ -269,68 +270,79 @@ struct persistent_gnt { ...@@ -269,68 +270,79 @@ struct persistent_gnt {
struct list_head remove_node; struct list_head remove_node;
}; };
struct xen_blkif { /* Per-ring information. */
/* Unique identifier for this interface. */ struct xen_blkif_ring {
domid_t domid;
unsigned int handle;
/* Physical parameters of the comms window. */ /* Physical parameters of the comms window. */
unsigned int irq; unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
union blkif_back_rings blk_rings; union blkif_back_rings blk_rings;
void *blk_ring; void *blk_ring;
/* The VBD attached to this interface. */
struct xen_vbd vbd;
/* Back pointer to the backend_info. */
struct backend_info *be;
/* Private fields. */ /* Private fields. */
spinlock_t blk_ring_lock; spinlock_t blk_ring_lock;
atomic_t refcnt;
wait_queue_head_t wq; wait_queue_head_t wq;
/* for barrier (drain) requests */
struct completion drain_complete;
atomic_t drain;
atomic_t inflight; atomic_t inflight;
/* One thread per one blkif. */ /* One thread per blkif ring. */
struct task_struct *xenblkd; struct task_struct *xenblkd;
unsigned int waiting_reqs; unsigned int waiting_reqs;
/* tree to store persistent grants */ /* List of all 'pending_req' available */
struct list_head pending_free;
/* And its spinlock. */
spinlock_t pending_free_lock;
wait_queue_head_t pending_free_wq;
/* Tree to store persistent grants. */
spinlock_t pers_gnts_lock;
struct rb_root persistent_gnts; struct rb_root persistent_gnts;
unsigned int persistent_gnt_c; unsigned int persistent_gnt_c;
atomic_t persistent_gnt_in_use; atomic_t persistent_gnt_in_use;
unsigned long next_lru; unsigned long next_lru;
/* used by the kworker that offload work from the persistent purge */ /* Statistics. */
unsigned long st_print;
unsigned long long st_rd_req;
unsigned long long st_wr_req;
unsigned long long st_oo_req;
unsigned long long st_f_req;
unsigned long long st_ds_req;
unsigned long long st_rd_sect;
unsigned long long st_wr_sect;
/* Used by the kworker that offload work from the persistent purge. */
struct list_head persistent_purge_list; struct list_head persistent_purge_list;
struct work_struct persistent_purge_work; struct work_struct persistent_purge_work;
/* buffer of free pages to map grant refs */ /* Buffer of free pages to map grant refs. */
spinlock_t free_pages_lock; spinlock_t free_pages_lock;
int free_pages_num; int free_pages_num;
struct list_head free_pages; struct list_head free_pages;
/* List of all 'pending_req' available */
struct list_head pending_free;
/* And its spinlock. */
spinlock_t pending_free_lock;
wait_queue_head_t pending_free_wq;
/* statistics */
unsigned long st_print;
unsigned long long st_rd_req;
unsigned long long st_wr_req;
unsigned long long st_oo_req;
unsigned long long st_f_req;
unsigned long long st_ds_req;
unsigned long long st_rd_sect;
unsigned long long st_wr_sect;
struct work_struct free_work; struct work_struct free_work;
/* Thread shutdown wait queue. */ /* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq; wait_queue_head_t shutdown_wq;
unsigned int nr_ring_pages; struct xen_blkif *blkif;
};
struct xen_blkif {
/* Unique identifier for this interface. */
domid_t domid;
unsigned int handle;
/* Comms information. */
enum blkif_protocol blk_protocol;
/* The VBD attached to this interface. */
struct xen_vbd vbd;
/* Back pointer to the backend_info. */
struct backend_info *be;
atomic_t refcnt;
/* for barrier (drain) requests */
struct completion drain_complete;
atomic_t drain;
struct work_struct free_work;
unsigned int nr_ring_pages;
/* All rings for this device. */
struct xen_blkif_ring *rings;
unsigned int nr_rings;
}; };
struct seg_buf { struct seg_buf {
...@@ -352,7 +364,7 @@ struct grant_page { ...@@ -352,7 +364,7 @@ struct grant_page {
* response queued for it, with the saved 'id' passed back. * response queued for it, with the saved 'id' passed back.
*/ */
struct pending_req { struct pending_req {
struct xen_blkif *blkif; struct xen_blkif_ring *ring;
u64 id; u64 id;
int nr_segs; int nr_segs;
atomic_t pendcnt; atomic_t pendcnt;
...@@ -394,7 +406,7 @@ int xen_blkif_xenbus_init(void); ...@@ -394,7 +406,7 @@ int xen_blkif_xenbus_init(void);
irqreturn_t xen_blkif_be_int(int irq, void *dev_id); irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
int xen_blkif_schedule(void *arg); int xen_blkif_schedule(void *arg);
int xen_blkif_purge_persistent(void *arg); int xen_blkif_purge_persistent(void *arg);
void xen_blkbk_free_caches(struct xen_blkif *blkif); void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state); struct backend_info *be, int state);
......
This diff is collapsed.
This diff is collapsed.
...@@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c) ...@@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c)
do { do {
ret = btree_root(gc_root, c, &op, &writes, &stats); ret = btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes); closure_sync(&writes);
cond_resched();
if (ret && ret != -EAGAIN) if (ret && ret != -EAGAIN)
pr_warn("gc failed!"); pr_warn("gc failed!");
...@@ -2162,8 +2163,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, ...@@ -2162,8 +2163,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
rw_lock(true, b, b->level); rw_lock(true, b, b->level);
if (b->key.ptr[0] != btree_ptr || if (b->key.ptr[0] != btree_ptr ||
b->seq != seq + 1) b->seq != seq + 1) {
op->lock = b->level;
goto out; goto out;
}
} }
SET_KEY_PTRS(check_key, 1); SET_KEY_PTRS(check_key, 1);
......
...@@ -685,6 +685,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, ...@@ -685,6 +685,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
sysfs_create_link(&c->kobj, &d->kobj, d->name), sysfs_create_link(&c->kobj, &d->kobj, d->name),
"Couldn't create device <-> cache set symlinks"); "Couldn't create device <-> cache set symlinks");
clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
} }
static void bcache_device_detach(struct bcache_device *d) static void bcache_device_detach(struct bcache_device *d)
...@@ -847,8 +849,11 @@ void bch_cached_dev_run(struct cached_dev *dc) ...@@ -847,8 +849,11 @@ void bch_cached_dev_run(struct cached_dev *dc)
buf[SB_LABEL_SIZE] = '\0'; buf[SB_LABEL_SIZE] = '\0';
env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
if (atomic_xchg(&dc->running, 1)) if (atomic_xchg(&dc->running, 1)) {
kfree(env[1]);
kfree(env[2]);
return; return;
}
if (!d->c && if (!d->c &&
BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
...@@ -1933,6 +1938,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ...@@ -1933,6 +1938,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
else else
err = "device busy"; err = "device busy";
mutex_unlock(&bch_register_lock); mutex_unlock(&bch_register_lock);
if (attr == &ksysfs_register_quiet)
goto out;
} }
goto err; goto err;
} }
...@@ -1971,8 +1978,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ...@@ -1971,8 +1978,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
err_close: err_close:
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
err: err:
if (attr != &ksysfs_register_quiet) pr_info("error opening %s: %s", path, err);
pr_info("error opening %s: %s", path, err);
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
...@@ -2066,8 +2072,10 @@ static int __init bcache_init(void) ...@@ -2066,8 +2072,10 @@ static int __init bcache_init(void)
closure_debug_init(); closure_debug_init();
bcache_major = register_blkdev(0, "bcache"); bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) if (bcache_major < 0) {
unregister_reboot_notifier(&reboot);
return bcache_major; return bcache_major;
}
if (!(bcache_wq = create_workqueue("bcache")) || if (!(bcache_wq = create_workqueue("bcache")) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
......
...@@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, ...@@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
static bool dirty_pred(struct keybuf *buf, struct bkey *k) static bool dirty_pred(struct keybuf *buf, struct bkey *k)
{ {
struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys);
BUG_ON(KEY_INODE(k) != dc->disk.id);
return KEY_DIRTY(k); return KEY_DIRTY(k);
} }
...@@ -372,11 +376,24 @@ static void refill_full_stripes(struct cached_dev *dc) ...@@ -372,11 +376,24 @@ static void refill_full_stripes(struct cached_dev *dc)
} }
} }
/*
* Returns true if we scanned the entire disk
*/
static bool refill_dirty(struct cached_dev *dc) static bool refill_dirty(struct cached_dev *dc)
{ {
struct keybuf *buf = &dc->writeback_keys; struct keybuf *buf = &dc->writeback_keys;
struct bkey start = KEY(dc->disk.id, 0, 0);
struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0);
bool searched_from_start = false; struct bkey start_pos;
/*
* make sure keybuf pos is inside the range for this disk - at bringup
* we might not be attached yet so this disk's inode nr isn't
* initialized then
*/
if (bkey_cmp(&buf->last_scanned, &start) < 0 ||
bkey_cmp(&buf->last_scanned, &end) > 0)
buf->last_scanned = start;
if (dc->partial_stripes_expensive) { if (dc->partial_stripes_expensive) {
refill_full_stripes(dc); refill_full_stripes(dc);
...@@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc) ...@@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc)
return false; return false;
} }
if (bkey_cmp(&buf->last_scanned, &end) >= 0) { start_pos = buf->last_scanned;
buf->last_scanned = KEY(dc->disk.id, 0, 0);
searched_from_start = true;
}
bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start; if (bkey_cmp(&buf->last_scanned, &end) < 0)
return false;
/*
* If we get to the end start scanning again from the beginning, and
* only scan up to where we initially started scanning from:
*/
buf->last_scanned = start;
bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred);
return bkey_cmp(&buf->last_scanned, &start_pos) >= 0;
} }
static int bch_writeback_thread(void *arg) static int bch_writeback_thread(void *arg)
......
...@@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, ...@@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
static inline void bch_writeback_queue(struct cached_dev *dc) static inline void bch_writeback_queue(struct cached_dev *dc)
{ {
wake_up_process(dc->writeback_thread); if (!IS_ERR_OR_NULL(dc->writeback_thread))
wake_up_process(dc->writeback_thread);
} }
static inline void bch_writeback_add(struct cached_dev *dc) static inline void bch_writeback_add(struct cached_dev *dc)
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
*/ */
#ifndef DRBD_H #ifndef DRBD_H
#define DRBD_H #define DRBD_H
#include <linux/connector.h>
#include <asm/types.h> #include <asm/types.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -52,7 +51,7 @@ ...@@ -52,7 +51,7 @@
#endif #endif
extern const char *drbd_buildtag(void); extern const char *drbd_buildtag(void);
#define REL_VERSION "8.4.5" #define REL_VERSION "8.4.6"
#define API_VERSION 1 #define API_VERSION 1
#define PRO_VERSION_MIN 86 #define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 101 #define PRO_VERSION_MAX 101
...@@ -339,6 +338,8 @@ enum drbd_state_rv { ...@@ -339,6 +338,8 @@ enum drbd_state_rv {
#define MDF_AL_CLEAN (1 << 7) #define MDF_AL_CLEAN (1 << 7)
#define MDF_AL_DISABLED (1 << 8) #define MDF_AL_DISABLED (1 << 8)
#define MAX_PEERS 32
enum drbd_uuid_index { enum drbd_uuid_index {
UI_CURRENT, UI_CURRENT,
UI_BITMAP, UI_BITMAP,
...@@ -349,14 +350,35 @@ enum drbd_uuid_index { ...@@ -349,14 +350,35 @@ enum drbd_uuid_index {
UI_EXTENDED_SIZE /* Everything. */ UI_EXTENDED_SIZE /* Everything. */
}; };
#define HISTORY_UUIDS MAX_PEERS
enum drbd_timeout_flag { enum drbd_timeout_flag {
UT_DEFAULT = 0, UT_DEFAULT = 0,
UT_DEGRADED = 1, UT_DEGRADED = 1,
UT_PEER_OUTDATED = 2, UT_PEER_OUTDATED = 2,
}; };
enum drbd_notification_type {
NOTIFY_EXISTS,
NOTIFY_CREATE,
NOTIFY_CHANGE,
NOTIFY_DESTROY,
NOTIFY_CALL,
NOTIFY_RESPONSE,
NOTIFY_CONTINUES = 0x8000,
NOTIFY_FLAGS = NOTIFY_CONTINUES,
};
#define UUID_JUST_CREATED ((__u64)4) #define UUID_JUST_CREATED ((__u64)4)
enum write_ordering_e {
WO_NONE,
WO_DRAIN_IO,
WO_BDEV_FLUSH,
WO_BIO_BARRIER
};
/* magic numbers used in meta data and network packets */ /* magic numbers used in meta data and network packets */
#define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC 0x83740267
#define DRBD_MAGIC_BIG 0x835a #define DRBD_MAGIC_BIG 0x835a
......
...@@ -250,6 +250,76 @@ GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, ...@@ -250,6 +250,76 @@ GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach)
) )
GENL_struct(DRBD_NLA_RESOURCE_INFO, 15, resource_info,
__u32_field(1, 0, res_role)
__flg_field(2, 0, res_susp)
__flg_field(3, 0, res_susp_nod)
__flg_field(4, 0, res_susp_fen)
/* __flg_field(5, 0, res_weak) */
)
GENL_struct(DRBD_NLA_DEVICE_INFO, 16, device_info,
__u32_field(1, 0, dev_disk_state)
)
GENL_struct(DRBD_NLA_CONNECTION_INFO, 17, connection_info,
__u32_field(1, 0, conn_connection_state)
__u32_field(2, 0, conn_role)
)
GENL_struct(DRBD_NLA_PEER_DEVICE_INFO, 18, peer_device_info,
__u32_field(1, 0, peer_repl_state)
__u32_field(2, 0, peer_disk_state)
__u32_field(3, 0, peer_resync_susp_user)
__u32_field(4, 0, peer_resync_susp_peer)
__u32_field(5, 0, peer_resync_susp_dependency)
)
GENL_struct(DRBD_NLA_RESOURCE_STATISTICS, 19, resource_statistics,
__u32_field(1, 0, res_stat_write_ordering)
)
GENL_struct(DRBD_NLA_DEVICE_STATISTICS, 20, device_statistics,
__u64_field(1, 0, dev_size) /* (sectors) */
__u64_field(2, 0, dev_read) /* (sectors) */
__u64_field(3, 0, dev_write) /* (sectors) */
__u64_field(4, 0, dev_al_writes) /* activity log writes (count) */
__u64_field(5, 0, dev_bm_writes) /* bitmap writes (count) */
__u32_field(6, 0, dev_upper_pending) /* application requests in progress */
__u32_field(7, 0, dev_lower_pending) /* backing device requests in progress */
__flg_field(8, 0, dev_upper_blocked)
__flg_field(9, 0, dev_lower_blocked)
__flg_field(10, 0, dev_al_suspended) /* activity log suspended */
__u64_field(11, 0, dev_exposed_data_uuid)
__u64_field(12, 0, dev_current_uuid)
__u32_field(13, 0, dev_disk_flags)
__bin_field(14, 0, history_uuids, HISTORY_UUIDS * sizeof(__u64))
)
GENL_struct(DRBD_NLA_CONNECTION_STATISTICS, 21, connection_statistics,
__flg_field(1, 0, conn_congested)
)
GENL_struct(DRBD_NLA_PEER_DEVICE_STATISTICS, 22, peer_device_statistics,
__u64_field(1, 0, peer_dev_received) /* sectors */
__u64_field(2, 0, peer_dev_sent) /* sectors */
__u32_field(3, 0, peer_dev_pending) /* number of requests */
__u32_field(4, 0, peer_dev_unacked) /* number of requests */
__u64_field(5, 0, peer_dev_out_of_sync) /* sectors */
__u64_field(6, 0, peer_dev_resync_failed) /* sectors */
__u64_field(7, 0, peer_dev_bitmap_uuid)
__u32_field(9, 0, peer_dev_flags)
)
GENL_struct(DRBD_NLA_NOTIFICATION_HEADER, 23, drbd_notification_header,
__u32_field(1, DRBD_GENLA_F_MANDATORY, nh_type)
)
GENL_struct(DRBD_NLA_HELPER, 24, drbd_helper_info,
__str_field(1, DRBD_GENLA_F_MANDATORY, helper_name, 32)
__u32_field(2, DRBD_GENLA_F_MANDATORY, helper_status)
)
/* /*
* Notifications and commands (genlmsghdr->cmd) * Notifications and commands (genlmsghdr->cmd)
*/ */
...@@ -382,3 +452,82 @@ GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), ...@@ -382,3 +452,82 @@ GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_GET_RESOURCES, 30,
GENL_op_init(
.dumpit = drbd_adm_dump_resources,
),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_GENLA_F_MANDATORY))
GENL_op(DRBD_ADM_GET_DEVICES, 31,
GENL_op_init(
.dumpit = drbd_adm_dump_devices,
.done = drbd_adm_dump_devices_done,
),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
GENL_op(DRBD_ADM_GET_CONNECTIONS, 32,
GENL_op_init(
.dumpit = drbd_adm_dump_connections,
.done = drbd_adm_dump_connections_done,
),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_GENLA_F_MANDATORY))
GENL_op(DRBD_ADM_GET_PEER_DEVICES, 33,
GENL_op_init(
.dumpit = drbd_adm_dump_peer_devices,
.done = drbd_adm_dump_peer_devices_done,
),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
GENL_notification(
DRBD_RESOURCE_STATE, 34, events,
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_F_REQUIRED))
GENL_notification(
DRBD_DEVICE_STATE, 35, events,
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_F_REQUIRED))
GENL_notification(
DRBD_CONNECTION_STATE, 36, events,
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_F_REQUIRED))
GENL_notification(
DRBD_PEER_DEVICE_STATE, 37, events,
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_F_REQUIRED))
GENL_op(
DRBD_ADM_GET_INITIAL_STATE, 38,
GENL_op_init(
.dumpit = drbd_adm_get_initial_state,
),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY))
GENL_notification(
DRBD_HELPER, 40, events,
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_HELPER, DRBD_F_REQUIRED))
GENL_notification(
DRBD_INITIAL_STATE_DONE, 41, events,
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED))
...@@ -135,6 +135,20 @@ static inline void *idr_find(struct idr *idr, int id) ...@@ -135,6 +135,20 @@ static inline void *idr_find(struct idr *idr, int id)
#define idr_for_each_entry(idp, entry, id) \ #define idr_for_each_entry(idp, entry, id) \
for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
/**
* idr_for_each_entry - continue iteration over an idr's elements of a given type
* @idp: idr handle
* @entry: the type * to use as cursor
* @id: id entry's key
*
* Continue to iterate over list of given type, continuing after
* the current position.
*/
#define idr_for_each_entry_continue(idp, entry, id) \
for ((entry) = idr_get_next((idp), &(id)); \
entry; \
++id, (entry) = idr_get_next((idp), &(id)))
/* /*
* IDA - IDR based id allocator, use when translation from id to * IDA - IDR based id allocator, use when translation from id to
* pointer isn't necessary. * pointer isn't necessary.
......
...@@ -264,7 +264,7 @@ extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); ...@@ -264,7 +264,7 @@ extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
extern void lc_committed(struct lru_cache *lc); extern void lc_committed(struct lru_cache *lc);
struct seq_file; struct seq_file;
extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); extern void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
void (*detail) (struct seq_file *, struct lc_element *)); void (*detail) (struct seq_file *, struct lc_element *));
......
...@@ -27,6 +27,54 @@ ...@@ -27,6 +27,54 @@
typedef uint16_t blkif_vdev_t; typedef uint16_t blkif_vdev_t;
typedef uint64_t blkif_sector_t; typedef uint64_t blkif_sector_t;
/*
* Multiple hardware queues/rings:
* If supported, the backend will write the key "multi-queue-max-queues" to
* the directory for that vbd, and set its value to the maximum supported
* number of queues.
* Frontends that are aware of this feature and wish to use it can write the
* key "multi-queue-num-queues" with the number they wish to use, which must be
* greater than zero, and no more than the value reported by the backend in
* "multi-queue-max-queues".
*
* For frontends requesting just one queue, the usual event-channel and
* ring-ref keys are written as before, simplifying the backend processing
* to avoid distinguishing between a frontend that doesn't understand the
* multi-queue feature, and one that does, but requested only one queue.
*
* Frontends requesting two or more queues must not write the toplevel
* event-channel and ring-ref keys, instead writing those keys under sub-keys
* having the name "queue-N" where N is the integer ID of the queue/ring for
* which those keys belong. Queues are indexed from zero.
* For example, a frontend with two queues must write the following set of
* queue-related keys:
*
* /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
* /local/domain/1/device/vbd/0/queue-0 = ""
* /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
* /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
* /local/domain/1/device/vbd/0/queue-1 = ""
* /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
* /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
*
* It is also possible to use multiple queues/rings together with
* feature multi-page ring buffer.
* For example, a frontend requests two queues/rings and the size of each ring
* buffer is two pages must write the following set of related keys:
*
* /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
* /local/domain/1/device/vbd/0/ring-page-order = "1"
* /local/domain/1/device/vbd/0/queue-0 = ""
* /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
* /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
* /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
* /local/domain/1/device/vbd/0/queue-1 = ""
* /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
* /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
* /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
*
*/
/* /*
* REQUEST CODES. * REQUEST CODES.
*/ */
......
...@@ -238,7 +238,7 @@ void lc_reset(struct lru_cache *lc) ...@@ -238,7 +238,7 @@ void lc_reset(struct lru_cache *lc)
* @seq: the seq_file to print into * @seq: the seq_file to print into
* @lc: the lru cache to print statistics of * @lc: the lru cache to print statistics of
*/ */
size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
{ {
/* NOTE: /* NOTE:
* total calls to lc_get are * total calls to lc_get are
...@@ -250,8 +250,6 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) ...@@ -250,8 +250,6 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
lc->name, lc->used, lc->nr_elements, lc->name, lc->used, lc->nr_elements,
lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
return 0;
} }
static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment