Commit 4597fcff authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.18/dm-changes-v2' of...

Merge tag 'for-4.18/dm-changes-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Adjust various DM structure members to improve alignment relative to
   4.18 block's mempool_t and bioset changes.

 - Add DM writecache target that offers writeback caching to persistent
   memory or SSD.

 - Small DM core error message change to give context for why a DM table
   type transition wasn't allowed.

* tag 'for-4.18/dm-changes-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: add writecache target
  dm: adjust structure members to improve alignment
  dm: report which conflicting type caused error during table_load()
parents a205f0c9 48debafe
The writecache target caches writes on persistent memory or on SSD. It
doesn't cache reads because reads are supposed to be cached in page cache
in normal RAM.
When the device is constructed, the first sector should be zeroed or the
first sector should contain valid superblock from previous invocation.
Constructor parameters:
1. type of the cache device - "p" or "s"
p - persistent memory
s - SSD
2. the underlying device that will be cached
3. the cache device
4. block size (4096 is recommended; the maximum block size is the page
size)
5. the number of optional parameters (the parameters with an argument
count as two)
high_watermark n (default: 50)
start writeback when the number of used blocks reach this
watermark
low_watermark x (default: 45)
stop writeback when the number of used blocks drops below
this watermark
writeback_jobs n (default: unlimited)
limit the number of blocks that are in flight during
writeback. Setting this value reduces writeback
throughput, but it may improve latency of read requests
autocommit_blocks n (default: 64 for pmem, 65536 for ssd)
when the application writes this amount of blocks without
issuing the FLUSH request, the blocks are automatically
commited
autocommit_time ms (default: 1000)
autocommit time in milliseconds. The data is automatically
commited if this time passes and no FLUSH request is
received
fua (by default on)
applicable only to persistent memory - use the FUA flag
when writing data from persistent memory back to the
underlying device
nofua
applicable only to persistent memory - don't use the FUA
flag when writing back data and send the FLUSH request
afterwards
- some underlying devices perform better with fua, some
with nofua. The user should test it
Status:
1. error indicator - 0 if there was no error, otherwise error number
2. the number of blocks
3. the number of free blocks
4. the number of blocks under writeback
Messages:
flush
flush the cache device. The message returns successfully
if the cache device was flushed without an error
flush_on_suspend
flush the cache device on next suspend. Use this message
when you are going to remove the cache device. The proper
sequence for removing the cache device is:
1. send the "flush_on_suspend" message
2. load an inactive table with a linear target that maps
to the underlying device
3. suspend the device
4. ask for status and verify that there are no errors
5. resume the device, so that it will use the linear
target
6. the cache device is now inactive and it can be deleted
...@@ -334,6 +334,17 @@ config DM_CACHE_SMQ ...@@ -334,6 +334,17 @@ config DM_CACHE_SMQ
of less memory utilization, improved performance and increased of less memory utilization, improved performance and increased
adaptability in the face of changing workloads. adaptability in the face of changing workloads.
config DM_WRITECACHE
tristate "Writecache target"
depends on BLK_DEV_DM
---help---
The writecache target caches writes on persistent memory or SSD.
It is intended for databases or other programs that need extremely
low commit latency.
The writecache target doesn't cache reads because reads are supposed
to be cached in standard RAM.
config DM_ERA config DM_ERA
tristate "Era target (EXPERIMENTAL)" tristate "Era target (EXPERIMENTAL)"
depends on BLK_DEV_DM depends on BLK_DEV_DM
......
...@@ -67,6 +67,7 @@ obj-$(CONFIG_DM_ERA) += dm-era.o ...@@ -67,6 +67,7 @@ obj-$(CONFIG_DM_ERA) += dm-era.o
obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o
obj-$(CONFIG_DM_ZONED) += dm-zoned.o obj-$(CONFIG_DM_ZONED) += dm-zoned.o
obj-$(CONFIG_DM_WRITECACHE) += dm-writecache.o
ifeq ($(CONFIG_DM_UEVENT),y) ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o dm-mod-objs += dm-uevent.o
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
struct dm_bio_prison { struct dm_bio_prison {
spinlock_t lock; spinlock_t lock;
mempool_t cell_pool;
struct rb_root cells; struct rb_root cells;
mempool_t cell_pool;
}; };
static struct kmem_cache *_cell_cache; static struct kmem_cache *_cell_cache;
......
...@@ -21,8 +21,8 @@ struct dm_bio_prison_v2 { ...@@ -21,8 +21,8 @@ struct dm_bio_prison_v2 {
struct workqueue_struct *wq; struct workqueue_struct *wq;
spinlock_t lock; spinlock_t lock;
mempool_t cell_pool;
struct rb_root cells; struct rb_root cells;
mempool_t cell_pool;
}; };
static struct kmem_cache *_cell_cache; static struct kmem_cache *_cell_cache;
......
...@@ -371,7 +371,13 @@ struct cache_stats { ...@@ -371,7 +371,13 @@ struct cache_stats {
struct cache { struct cache {
struct dm_target *ti; struct dm_target *ti;
struct dm_target_callbacks callbacks; spinlock_t lock;
/*
* Fields for converting from sectors to blocks.
*/
int sectors_per_block_shift;
sector_t sectors_per_block;
struct dm_cache_metadata *cmd; struct dm_cache_metadata *cmd;
...@@ -402,13 +408,11 @@ struct cache { ...@@ -402,13 +408,11 @@ struct cache {
dm_cblock_t cache_size; dm_cblock_t cache_size;
/* /*
* Fields for converting from sectors to blocks. * Invalidation fields.
*/ */
sector_t sectors_per_block; spinlock_t invalidation_lock;
int sectors_per_block_shift; struct list_head invalidation_requests;
spinlock_t lock;
struct bio_list deferred_bios;
sector_t migration_threshold; sector_t migration_threshold;
wait_queue_head_t migration_wait; wait_queue_head_t migration_wait;
atomic_t nr_allocated_migrations; atomic_t nr_allocated_migrations;
...@@ -419,13 +423,11 @@ struct cache { ...@@ -419,13 +423,11 @@ struct cache {
*/ */
atomic_t nr_io_migrations; atomic_t nr_io_migrations;
struct bio_list deferred_bios;
struct rw_semaphore quiesce_lock; struct rw_semaphore quiesce_lock;
/* struct dm_target_callbacks callbacks;
* cache_size entries, dirty if set
*/
atomic_t nr_dirty;
unsigned long *dirty_bitset;
/* /*
* origin_blocks entries, discarded if set. * origin_blocks entries, discarded if set.
...@@ -442,17 +444,27 @@ struct cache { ...@@ -442,17 +444,27 @@ struct cache {
const char **ctr_args; const char **ctr_args;
struct dm_kcopyd_client *copier; struct dm_kcopyd_client *copier;
struct workqueue_struct *wq;
struct work_struct deferred_bio_worker; struct work_struct deferred_bio_worker;
struct work_struct migration_worker; struct work_struct migration_worker;
struct workqueue_struct *wq;
struct delayed_work waker; struct delayed_work waker;
struct dm_bio_prison_v2 *prison; struct dm_bio_prison_v2 *prison;
struct bio_set bs;
mempool_t migration_pool; /*
* cache_size entries, dirty if set
*/
unsigned long *dirty_bitset;
atomic_t nr_dirty;
struct dm_cache_policy *policy;
unsigned policy_nr_args; unsigned policy_nr_args;
struct dm_cache_policy *policy;
/*
* Cache features such as write-through.
*/
struct cache_features features;
struct cache_stats stats;
bool need_tick_bio:1; bool need_tick_bio:1;
bool sized:1; bool sized:1;
...@@ -461,25 +473,16 @@ struct cache { ...@@ -461,25 +473,16 @@ struct cache {
bool loaded_mappings:1; bool loaded_mappings:1;
bool loaded_discards:1; bool loaded_discards:1;
/* struct rw_semaphore background_work_lock;
* Cache features such as write-through.
*/
struct cache_features features;
struct cache_stats stats;
/* struct batcher committer;
* Invalidation fields. struct work_struct commit_ws;
*/
spinlock_t invalidation_lock;
struct list_head invalidation_requests;
struct io_tracker tracker; struct io_tracker tracker;
struct work_struct commit_ws; mempool_t migration_pool;
struct batcher committer;
struct rw_semaphore background_work_lock; struct bio_set bs;
}; };
struct per_bio_data { struct per_bio_data {
......
...@@ -31,6 +31,9 @@ struct dm_kobject_holder { ...@@ -31,6 +31,9 @@ struct dm_kobject_holder {
struct mapped_device { struct mapped_device {
struct mutex suspend_lock; struct mutex suspend_lock;
struct mutex table_devices_lock;
struct list_head table_devices;
/* /*
* The current mapping (struct dm_table *). * The current mapping (struct dm_table *).
* Use dm_get_live_table{_fast} or take suspend_lock for * Use dm_get_live_table{_fast} or take suspend_lock for
...@@ -38,17 +41,14 @@ struct mapped_device { ...@@ -38,17 +41,14 @@ struct mapped_device {
*/ */
void __rcu *map; void __rcu *map;
struct list_head table_devices;
struct mutex table_devices_lock;
unsigned long flags; unsigned long flags;
struct request_queue *queue;
int numa_node_id;
enum dm_queue_mode type;
/* Protect queue and type against concurrent access. */ /* Protect queue and type against concurrent access. */
struct mutex type_lock; struct mutex type_lock;
enum dm_queue_mode type;
int numa_node_id;
struct request_queue *queue;
atomic_t holders; atomic_t holders;
atomic_t open_count; atomic_t open_count;
...@@ -56,21 +56,21 @@ struct mapped_device { ...@@ -56,21 +56,21 @@ struct mapped_device {
struct dm_target *immutable_target; struct dm_target *immutable_target;
struct target_type *immutable_target_type; struct target_type *immutable_target_type;
char name[16];
struct gendisk *disk; struct gendisk *disk;
struct dax_device *dax_dev; struct dax_device *dax_dev;
char name[16];
void *interface_ptr;
/* /*
* A list of ios that arrived while we were suspended. * A list of ios that arrived while we were suspended.
*/ */
atomic_t pending[2];
wait_queue_head_t wait;
struct work_struct work; struct work_struct work;
wait_queue_head_t wait;
atomic_t pending[2];
spinlock_t deferred_lock; spinlock_t deferred_lock;
struct bio_list deferred; struct bio_list deferred;
void *interface_ptr;
/* /*
* Event handling. * Event handling.
*/ */
...@@ -83,17 +83,17 @@ struct mapped_device { ...@@ -83,17 +83,17 @@ struct mapped_device {
/* the number of internal suspends */ /* the number of internal suspends */
unsigned internal_suspend_count; unsigned internal_suspend_count;
/*
* Processing queue (flush)
*/
struct workqueue_struct *wq;
/* /*
* io objects are allocated from here. * io objects are allocated from here.
*/ */
struct bio_set io_bs; struct bio_set io_bs;
struct bio_set bs; struct bio_set bs;
/*
* Processing queue (flush)
*/
struct workqueue_struct *wq;
/* /*
* freeze/thaw support require holding onto a super block * freeze/thaw support require holding onto a super block
*/ */
...@@ -102,11 +102,11 @@ struct mapped_device { ...@@ -102,11 +102,11 @@ struct mapped_device {
/* forced geometry settings */ /* forced geometry settings */
struct hd_geometry geometry; struct hd_geometry geometry;
struct block_device *bdev;
/* kobject and completion */ /* kobject and completion */
struct dm_kobject_holder kobj_holder; struct dm_kobject_holder kobj_holder;
struct block_device *bdev;
/* zero-length flush that will be cloned and submitted to targets */ /* zero-length flush that will be cloned and submitted to targets */
struct bio flush_bio; struct bio flush_bio;
......
...@@ -139,25 +139,13 @@ struct crypt_config { ...@@ -139,25 +139,13 @@ struct crypt_config {
struct dm_dev *dev; struct dm_dev *dev;
sector_t start; sector_t start;
/*
* pool for per bio private data, crypto requests,
* encryption requeusts/buffer pages and integrity tags
*/
mempool_t req_pool;
mempool_t page_pool;
mempool_t tag_pool;
unsigned tag_pool_max_sectors;
struct percpu_counter n_allocated_pages; struct percpu_counter n_allocated_pages;
struct bio_set bs;
struct mutex bio_alloc_lock;
struct workqueue_struct *io_queue; struct workqueue_struct *io_queue;
struct workqueue_struct *crypt_queue; struct workqueue_struct *crypt_queue;
struct task_struct *write_thread;
wait_queue_head_t write_thread_wait; wait_queue_head_t write_thread_wait;
struct task_struct *write_thread;
struct rb_root write_tree; struct rb_root write_tree;
char *cipher; char *cipher;
...@@ -213,6 +201,18 @@ struct crypt_config { ...@@ -213,6 +201,18 @@ struct crypt_config {
unsigned int integrity_iv_size; unsigned int integrity_iv_size;
unsigned int on_disk_tag_size; unsigned int on_disk_tag_size;
/*
* pool for per bio private data, crypto requests,
* encryption requeusts/buffer pages and integrity tags
*/
unsigned tag_pool_max_sectors;
mempool_t tag_pool;
mempool_t req_pool;
mempool_t page_pool;
struct bio_set bs;
struct mutex bio_alloc_lock;
u8 *authenc_key; /* space for keys in authenc() format (if used) */ u8 *authenc_key; /* space for keys in authenc() format (if used) */
u8 key[0]; u8 key[0];
}; };
......
...@@ -1344,7 +1344,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si ...@@ -1344,7 +1344,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
goto err_unlock_md_type; goto err_unlock_md_type;
} }
} else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) { } else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) {
DMWARN("can't change device type after initial table load."); DMWARN("can't change device type (old=%u vs new=%u) after initial table load.",
dm_get_md_type(md), dm_table_get_type(t));
r = -EINVAL; r = -EINVAL;
goto err_unlock_md_type; goto err_unlock_md_type;
} }
......
...@@ -45,7 +45,6 @@ struct dm_kcopyd_client { ...@@ -45,7 +45,6 @@ struct dm_kcopyd_client {
struct dm_io_client *io_client; struct dm_io_client *io_client;
wait_queue_head_t destroyq; wait_queue_head_t destroyq;
atomic_t nr_jobs;
mempool_t job_pool; mempool_t job_pool;
...@@ -54,6 +53,8 @@ struct dm_kcopyd_client { ...@@ -54,6 +53,8 @@ struct dm_kcopyd_client {
struct dm_kcopyd_throttle *throttle; struct dm_kcopyd_throttle *throttle;
atomic_t nr_jobs;
/* /*
* We maintain three lists of jobs: * We maintain three lists of jobs:
* *
......
...@@ -63,27 +63,28 @@ struct dm_region_hash { ...@@ -63,27 +63,28 @@ struct dm_region_hash {
/* hash table */ /* hash table */
rwlock_t hash_lock; rwlock_t hash_lock;
mempool_t region_pool;
unsigned mask; unsigned mask;
unsigned nr_buckets; unsigned nr_buckets;
unsigned prime; unsigned prime;
unsigned shift; unsigned shift;
struct list_head *buckets; struct list_head *buckets;
/*
* If there was a flush failure no regions can be marked clean.
*/
int flush_failure;
unsigned max_recovery; /* Max # of regions to recover in parallel */ unsigned max_recovery; /* Max # of regions to recover in parallel */
spinlock_t region_lock; spinlock_t region_lock;
atomic_t recovery_in_flight; atomic_t recovery_in_flight;
struct semaphore recovery_count;
struct list_head clean_regions; struct list_head clean_regions;
struct list_head quiesced_regions; struct list_head quiesced_regions;
struct list_head recovered_regions; struct list_head recovered_regions;
struct list_head failed_recovered_regions; struct list_head failed_recovered_regions;
struct semaphore recovery_count;
/* mempool_t region_pool;
* If there was a flush failure no regions can be marked clean.
*/
int flush_failure;
void *context; void *context;
sector_t target_begin; sector_t target_begin;
......
...@@ -240,9 +240,9 @@ struct pool { ...@@ -240,9 +240,9 @@ struct pool {
struct dm_bio_prison *prison; struct dm_bio_prison *prison;
struct dm_kcopyd_client *copier; struct dm_kcopyd_client *copier;
struct work_struct worker;
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct throttle throttle; struct throttle throttle;
struct work_struct worker;
struct delayed_work waker; struct delayed_work waker;
struct delayed_work no_space_timeout; struct delayed_work no_space_timeout;
...@@ -260,7 +260,6 @@ struct pool { ...@@ -260,7 +260,6 @@ struct pool {
struct dm_deferred_set *all_io_ds; struct dm_deferred_set *all_io_ds;
struct dm_thin_new_mapping *next_mapping; struct dm_thin_new_mapping *next_mapping;
mempool_t mapping_pool;
process_bio_fn process_bio; process_bio_fn process_bio;
process_bio_fn process_discard; process_bio_fn process_discard;
...@@ -273,6 +272,8 @@ struct pool { ...@@ -273,6 +272,8 @@ struct pool {
process_mapping_fn process_prepared_discard_pt2; process_mapping_fn process_prepared_discard_pt2;
struct dm_bio_prison_cell **cell_sort_array; struct dm_bio_prison_cell **cell_sort_array;
mempool_t mapping_pool;
}; };
static enum pool_mode get_pool_mode(struct pool *pool); static enum pool_mode get_pool_mode(struct pool *pool);
......
This diff is collapsed.
...@@ -52,9 +52,9 @@ struct dmz_target { ...@@ -52,9 +52,9 @@ struct dmz_target {
struct dmz_reclaim *reclaim; struct dmz_reclaim *reclaim;
/* For chunk work */ /* For chunk work */
struct mutex chunk_lock;
struct radix_tree_root chunk_rxtree; struct radix_tree_root chunk_rxtree;
struct workqueue_struct *chunk_wq; struct workqueue_struct *chunk_wq;
struct mutex chunk_lock;
/* For cloned BIOs to zones */ /* For cloned BIOs to zones */
struct bio_set bio_set; struct bio_set bio_set;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment