Commit 05aa69b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.7/dm-fixes' of...

Merge tag 'for-6.7/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Various fixes for the DM delay target to address regressions
   introduced during the 6.7 merge window

 - Fixes to both DM bufio and the verity target for no-sleep mode,
   to address sleeping while atomic issues

 - Update DM crypt target in response to the treewide change that
   made MAX_ORDER inclusive

* tag 'for-6.7/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm-crypt: start allocating with MAX_ORDER
  dm-verity: don't use blocking calls from tasklets
  dm-bufio: fix no-sleep mode
  dm-delay: avoid duplicate logic
  dm-delay: fix bugs introduced by kthread mode
  dm-delay: fix a race between delay_presuspend and delay_bio
parents 23dfa043 13648e04
......@@ -254,7 +254,7 @@ enum evict_result {
typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
{
unsigned long tested = 0;
struct list_head *h = lru->cursor;
......@@ -295,6 +295,7 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
h = h->next;
if (!no_sleep)
cond_resched();
}
......@@ -382,7 +383,10 @@ struct dm_buffer {
*/
struct buffer_tree {
union {
struct rw_semaphore lock;
rwlock_t spinlock;
} u;
struct rb_root root;
} ____cacheline_aligned_in_smp;
......@@ -393,9 +397,12 @@ struct dm_buffer_cache {
* on the locks.
*/
unsigned int num_locks;
bool no_sleep;
struct buffer_tree trees[];
};
static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
{
return dm_hash_locks_index(block, num_locks);
......@@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
{
down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
else
down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
{
up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
else
up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
{
down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
else
down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
{
up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
else
up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
/*
......@@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool
static void __lh_lock(struct lock_history *lh, unsigned int index)
{
if (lh->write)
down_write(&lh->cache->trees[index].lock);
if (lh->write) {
if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
write_lock_bh(&lh->cache->trees[index].u.spinlock);
else
down_write(&lh->cache->trees[index].u.lock);
} else {
if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
read_lock_bh(&lh->cache->trees[index].u.spinlock);
else
down_read(&lh->cache->trees[index].lock);
down_read(&lh->cache->trees[index].u.lock);
}
}
static void __lh_unlock(struct lock_history *lh, unsigned int index)
{
if (lh->write)
up_write(&lh->cache->trees[index].lock);
if (lh->write) {
if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
write_unlock_bh(&lh->cache->trees[index].u.spinlock);
else
up_write(&lh->cache->trees[index].u.lock);
} else {
if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
read_unlock_bh(&lh->cache->trees[index].u.spinlock);
else
up_read(&lh->cache->trees[index].lock);
up_read(&lh->cache->trees[index].u.lock);
}
}
/*
......@@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
return le_to_buffer(le);
}
static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
{
unsigned int i;
bc->num_locks = num_locks;
bc->no_sleep = no_sleep;
for (i = 0; i < bc->num_locks; i++) {
init_rwsem(&bc->trees[i].lock);
if (no_sleep)
rwlock_init(&bc->trees[i].u.spinlock);
else
init_rwsem(&bc->trees[i].u.lock);
bc->trees[i].root = RB_ROOT;
}
......@@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
struct lru_entry *le;
struct dm_buffer *b;
le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
return NULL;
......@@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};
while (true) {
le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
break;
......@@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
{
unsigned int i;
BUG_ON(bc->no_sleep);
for (i = 0; i < bc->num_locks; i++) {
down_write(&bc->trees[i].lock);
down_write(&bc->trees[i].u.lock);
__remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
up_write(&bc->trees[i].lock);
up_write(&bc->trees[i].u.lock);
}
}
......@@ -979,8 +1017,6 @@ struct dm_bufio_client {
struct dm_buffer_cache cache; /* must be last member */
};
static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
/*----------------------------------------------------------------*/
#define dm_bufio_in_request() (!!current->bio_list)
......@@ -1871,6 +1907,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
if (need_submit)
submit_io(b, REQ_OP_READ, read_endio);
if (nf != NF_GET) /* we already tested this condition above */
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
......@@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
r = -ENOMEM;
goto bad_client;
}
cache_init(&c->cache, num_locks);
cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);
c->bdev = bdev;
c->block_size = block_size;
......
......@@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
unsigned int remaining_size;
unsigned int order = MAX_ORDER - 1;
unsigned int order = MAX_ORDER;
retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
......
......@@ -33,7 +33,7 @@ struct delay_c {
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
struct task_struct *worker;
atomic_t may_delay;
bool may_delay;
struct delay_class read;
struct delay_class write;
......@@ -73,39 +73,6 @@ static inline bool delay_is_fast(struct delay_c *dc)
return !!dc->worker;
}
static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
sizeof(struct dm_delay_info));
list_del(&delayed->list);
dm_submit_bio_remap(bio, NULL);
delayed->class->ops--;
}
}
mutex_unlock(&delayed_bios_lock);
}
static int flush_worker_fn(void *data)
{
struct delay_c *dc = data;
while (1) {
flush_delayed_bios_fast(dc, false);
if (unlikely(list_empty(&dc->delayed_bios))) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
} else
cond_resched();
}
return 0;
}
static void flush_bios(struct bio *bio)
{
struct bio *n;
......@@ -118,36 +85,61 @@ static void flush_bios(struct bio *bio)
}
}
static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
struct bio_list flush_bio_list;
unsigned long next_expires = 0;
unsigned long start_timer = 0;
struct bio_list flush_bios = { };
bool start_timer = false;
bio_list_init(&flush_bio_list);
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
cond_resched();
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
sizeof(struct dm_delay_info));
list_del(&delayed->list);
bio_list_add(&flush_bios, bio);
bio_list_add(&flush_bio_list, bio);
delayed->class->ops--;
continue;
}
if (!delay_is_fast(dc)) {
if (!start_timer) {
start_timer = 1;
start_timer = true;
next_expires = delayed->expires;
} else
} else {
next_expires = min(next_expires, delayed->expires);
}
}
}
mutex_unlock(&delayed_bios_lock);
if (start_timer)
queue_timeout(dc, next_expires);
return bio_list_get(&flush_bios);
flush_bios(bio_list_get(&flush_bio_list));
}
static int flush_worker_fn(void *data)
{
struct delay_c *dc = data;
while (!kthread_should_stop()) {
flush_delayed_bios(dc, false);
mutex_lock(&delayed_bios_lock);
if (unlikely(list_empty(&dc->delayed_bios))) {
set_current_state(TASK_INTERRUPTIBLE);
mutex_unlock(&delayed_bios_lock);
schedule();
} else {
mutex_unlock(&delayed_bios_lock);
cond_resched();
}
}
return 0;
}
static void flush_expired_bios(struct work_struct *work)
......@@ -155,10 +147,7 @@ static void flush_expired_bios(struct work_struct *work)
struct delay_c *dc;
dc = container_of(work, struct delay_c, flush_expired_bios);
if (delay_is_fast(dc))
flush_delayed_bios_fast(dc, false);
else
flush_bios(flush_delayed_bios(dc, false));
flush_delayed_bios(dc, false);
}
static void delay_dtr(struct dm_target *ti)
......@@ -177,7 +166,6 @@ static void delay_dtr(struct dm_target *ti)
if (dc->worker)
kthread_stop(dc->worker);
if (!delay_is_fast(dc))
mutex_destroy(&dc->timer_lock);
kfree(dc);
......@@ -236,7 +224,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = dc;
INIT_LIST_HEAD(&dc->delayed_bios);
atomic_set(&dc->may_delay, 1);
mutex_init(&dc->timer_lock);
dc->may_delay = true;
dc->argc = argc;
ret = delay_class_ctr(ti, &dc->read, argv);
......@@ -282,12 +271,12 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
"dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
dc->worker = NULL;
goto bad;
}
} else {
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
mutex_init(&dc->timer_lock);
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
if (!dc->kdelayd_wq) {
ret = -EINVAL;
......@@ -312,7 +301,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
struct dm_delay_info *delayed;
unsigned long expires = 0;
if (!c->delay || !atomic_read(&dc->may_delay))
if (!c->delay)
return DM_MAPIO_REMAPPED;
delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
......@@ -321,6 +310,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
mutex_lock(&delayed_bios_lock);
if (unlikely(!dc->may_delay)) {
mutex_unlock(&delayed_bios_lock);
return DM_MAPIO_REMAPPED;
}
c->ops++;
list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock);
......@@ -337,21 +330,20 @@ static void delay_presuspend(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
atomic_set(&dc->may_delay, 0);
mutex_lock(&delayed_bios_lock);
dc->may_delay = false;
mutex_unlock(&delayed_bios_lock);
if (delay_is_fast(dc))
flush_delayed_bios_fast(dc, true);
else {
if (!delay_is_fast(dc))
del_timer_sync(&dc->delay_timer);
flush_bios(flush_delayed_bios(dc, true));
}
flush_delayed_bios(dc, true);
}
static void delay_resume(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
atomic_set(&dc->may_delay, 1);
dc->may_delay = true;
}
static int delay_map(struct dm_target *ti, struct bio *bio)
......
......@@ -185,7 +185,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
{
if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->data_dev_block_bits,
verity_io_real_digest(v, io))))
verity_io_real_digest(v, io), true)))
return 0;
return memcmp(verity_io_real_digest(v, io), want_digest,
......@@ -386,7 +386,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
/* Always re-validate the corrected block against the expected hash */
r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
1 << v->data_dev_block_bits,
verity_io_real_digest(v, io));
verity_io_real_digest(v, io), true);
if (unlikely(r < 0))
return r;
......
......@@ -135,19 +135,20 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
* Wrapper for crypto_ahash_init, which handles verity salting.
*/
static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
struct crypto_wait *wait)
struct crypto_wait *wait, bool may_sleep)
{
int r;
ahash_request_set_tfm(req, v->tfm);
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
CRYPTO_TFM_REQ_MAY_BACKLOG,
ahash_request_set_callback(req,
may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
crypto_req_done, (void *)wait);
crypto_init_wait(wait);
r = crypto_wait_req(crypto_ahash_init(req), wait);
if (unlikely(r < 0)) {
if (r != -ENOMEM)
DMERR("crypto_ahash_init failed: %d", r);
return r;
}
......@@ -179,12 +180,12 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req,
}
int verity_hash(struct dm_verity *v, struct ahash_request *req,
const u8 *data, size_t len, u8 *digest)
const u8 *data, size_t len, u8 *digest, bool may_sleep)
{
int r;
struct crypto_wait wait;
r = verity_hash_init(v, req, &wait);
r = verity_hash_init(v, req, &wait, may_sleep);
if (unlikely(r < 0))
goto out;
......@@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
r = verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->hash_dev_block_bits,
verity_io_real_digest(v, io));
verity_io_real_digest(v, io), !io->in_tasklet);
if (unlikely(r < 0))
goto release_ret_r;
......@@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io)
continue;
}
r = verity_hash_init(v, req, &wait);
r = verity_hash_init(v, req, &wait, !io->in_tasklet);
if (unlikely(r < 0))
return r;
......@@ -652,7 +653,7 @@ static void verity_tasklet(unsigned long data)
io->in_tasklet = true;
err = verity_verify_io(io);
if (err == -EAGAIN) {
if (err == -EAGAIN || err == -ENOMEM) {
/* fallback to retrying with work-queue */
INIT_WORK(&io->work, verity_work);
queue_work(io->v->verify_wq, &io->work);
......@@ -1033,7 +1034,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
goto out;
r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
v->zero_digest);
v->zero_digest, true);
out:
kfree(req);
......
......@@ -128,7 +128,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
u8 *data, size_t len));
extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
const u8 *data, size_t len, u8 *digest);
const u8 *data, size_t len, u8 *digest, bool may_sleep);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest, bool *is_zero);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment