Commit 3fd47bfe authored by Coly Li's avatar Coly Li Committed by Jens Axboe

bcache: stop dc->writeback_rate_update properly

struct delayed_work writeback_rate_update in struct cache_dev is a delayed
worker to call function update_writeback_rate() in period (the interval is
defined by dc->writeback_rate_update_seconds).

When a metadate I/O error happens on cache device, bcache error handling
routine bch_cache_set_error() will call bch_cache_set_unregister() to
retire whole cache set. On the unregister code path, this delayed work is
stopped by calling cancel_delayed_work_sync(&dc->writeback_rate_update).

dc->writeback_rate_update is a special delayed work from others in bcache.
In its routine update_writeback_rate(), this delayed work is re-armed
itself. That means when cancel_delayed_work_sync() returns, this delayed
work can still be executed after several seconds defined by
dc->writeback_rate_update_seconds.

The problem is, after cancel_delayed_work_sync() returns, the cache set
unregister code path will continue and release memory of struct cache set.
Then the delayed work is scheduled to run, __update_writeback_rate()
will reference the already released cache_set memory, and trigger a NULL
pointer deference fault.

This patch introduces two more bcache device flags,
- BCACHE_DEV_WB_RUNNING
  bit set:  bcache device is in writeback mode and running, it is OK for
            dc->writeback_rate_update to re-arm itself.
  bit clear:bcache device is trying to stop dc->writeback_rate_update,
            this delayed work should not re-arm itself and quit.
- BCACHE_DEV_RATE_DW_RUNNING
  bit set:  routine update_writeback_rate() is executing.
  bit clear: routine update_writeback_rate() quits.

This patch also adds a function cancel_writeback_rate_update_dwork() to
wait for dc->writeback_rate_update quits before cancel it by calling
cancel_delayed_work_sync(). In order to avoid a deadlock by unexpected
quit dc->writeback_rate_update, after time_out seconds this function will
give up and continue to call cancel_delayed_work_sync().

And here I explain how this patch stops self re-armed delayed work properly
with the above stuffs.

update_writeback_rate() sets BCACHE_DEV_RATE_DW_RUNNING at its beginning
and clears BCACHE_DEV_RATE_DW_RUNNING at its end. Before calling
cancel_writeback_rate_update_dwork() clear flag BCACHE_DEV_WB_RUNNING.

Before calling cancel_delayed_work_sync() wait utill flag
BCACHE_DEV_RATE_DW_RUNNING is clear. So when calling
cancel_delayed_work_sync(), dc->writeback_rate_update must be already re-
armed, or quite by seeing BCACHE_DEV_WB_RUNNING cleared. In both cases
delayed work routine update_writeback_rate() won't be executed after
cancel_delayed_work_sync() returns.

Inside update_writeback_rate() before calling schedule_delayed_work(), flag
BCACHE_DEV_WB_RUNNING is checked before. If this flag is cleared, it means
someone is about to stop the delayed work. Because flag
BCACHE_DEV_RATE_DW_RUNNING is set already and cancel_delayed_work_sync()
has to wait for this flag to be cleared, we don't need to worry about race
condition here.

If update_writeback_rate() is scheduled to run after checking
BCACHE_DEV_RATE_DW_RUNNING and before calling cancel_delayed_work_sync()
in cancel_writeback_rate_update_dwork(), it is also safe. Because at this
moment BCACHE_DEV_WB_RUNNING is cleared with memory barrier. As I mentioned
previously, update_writeback_rate() will see BCACHE_DEV_WB_RUNNING is clear
and quit immediately.

Because there are more dependences inside update_writeback_rate() to struct
cache_set memory, dc->writeback_rate_update is not a simple self re-arm
delayed work. After trying many different methods (e.g. hold dc->count, or
use locks), this is the only way I can find which works to properly stop
dc->writeback_rate_update delayed work.

Changelog:
v3: change values of BCACHE_DEV_WB_RUNNING and BCACHE_DEV_RATE_DW_RUNNING
    to bit index, for test_bit().
v2: Try to fix the race issue which is pointed out by Junhui.
v1: The initial version for review
Signed-off-by: default avatarColy Li <colyli@suse.de>
Reviewed-by: default avatarJunhui Tang <tang.junhui@zte.com.cn>
Reviewed-by: default avatarMichael Lyle <mlyle@lyle.org>
Cc: Michael Lyle <mlyle@lyle.org>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent fadd94e0
...@@ -261,7 +261,8 @@ struct bcache_device { ...@@ -261,7 +261,8 @@ struct bcache_device {
#define BCACHE_DEV_CLOSING 0 #define BCACHE_DEV_CLOSING 0
#define BCACHE_DEV_DETACHING 1 #define BCACHE_DEV_DETACHING 1
#define BCACHE_DEV_UNLINK_DONE 2 #define BCACHE_DEV_UNLINK_DONE 2
#define BCACHE_DEV_WB_RUNNING 3
#define BCACHE_DEV_RATE_DW_RUNNING 4
unsigned nr_stripes; unsigned nr_stripes;
unsigned stripe_size; unsigned stripe_size;
atomic_t *stripe_sectors_dirty; atomic_t *stripe_sectors_dirty;
......
...@@ -899,6 +899,31 @@ void bch_cached_dev_run(struct cached_dev *dc) ...@@ -899,6 +899,31 @@ void bch_cached_dev_run(struct cached_dev *dc)
pr_debug("error creating sysfs link"); pr_debug("error creating sysfs link");
} }
/*
* If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
* work dc->writeback_rate_update is running. Wait until the routine
* quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
* cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
* seconds, give up waiting here and continue to cancel it too.
*/
static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
{
int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
do {
if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
&dc->disk.flags))
break;
time_out--;
schedule_timeout_interruptible(1);
} while (time_out > 0);
if (time_out == 0)
pr_warn("give up waiting for dc->writeback_write_update to quit");
cancel_delayed_work_sync(&dc->writeback_rate_update);
}
static void cached_dev_detach_finish(struct work_struct *w) static void cached_dev_detach_finish(struct work_struct *w)
{ {
struct cached_dev *dc = container_of(w, struct cached_dev, detach); struct cached_dev *dc = container_of(w, struct cached_dev, detach);
...@@ -911,7 +936,9 @@ static void cached_dev_detach_finish(struct work_struct *w) ...@@ -911,7 +936,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
mutex_lock(&bch_register_lock); mutex_lock(&bch_register_lock);
cancel_delayed_work_sync(&dc->writeback_rate_update); if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
cancel_writeback_rate_update_dwork(dc);
if (!IS_ERR_OR_NULL(dc->writeback_thread)) { if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
kthread_stop(dc->writeback_thread); kthread_stop(dc->writeback_thread);
dc->writeback_thread = NULL; dc->writeback_thread = NULL;
...@@ -954,6 +981,7 @@ void bch_cached_dev_detach(struct cached_dev *dc) ...@@ -954,6 +981,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
closure_get(&dc->disk.cl); closure_get(&dc->disk.cl);
bch_writeback_queue(dc); bch_writeback_queue(dc);
cached_dev_put(dc); cached_dev_put(dc);
} }
...@@ -1081,14 +1109,16 @@ static void cached_dev_free(struct closure *cl) ...@@ -1081,14 +1109,16 @@ static void cached_dev_free(struct closure *cl)
{ {
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
cancel_delayed_work_sync(&dc->writeback_rate_update); mutex_lock(&bch_register_lock);
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
cancel_writeback_rate_update_dwork(dc);
if (!IS_ERR_OR_NULL(dc->writeback_thread)) if (!IS_ERR_OR_NULL(dc->writeback_thread))
kthread_stop(dc->writeback_thread); kthread_stop(dc->writeback_thread);
if (dc->writeback_write_wq) if (dc->writeback_write_wq)
destroy_workqueue(dc->writeback_write_wq); destroy_workqueue(dc->writeback_write_wq);
mutex_lock(&bch_register_lock);
if (atomic_read(&dc->running)) if (atomic_read(&dc->running))
bd_unlink_disk_holder(dc->bdev, dc->disk.disk); bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
bcache_device_free(&dc->disk); bcache_device_free(&dc->disk);
......
...@@ -309,6 +309,7 @@ STORE(bch_cached_dev) ...@@ -309,6 +309,7 @@ STORE(bch_cached_dev)
bch_writeback_queue(dc); bch_writeback_queue(dc);
if (attr == &sysfs_writeback_percent) if (attr == &sysfs_writeback_percent)
if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
schedule_delayed_work(&dc->writeback_rate_update, schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ); dc->writeback_rate_update_seconds * HZ);
......
...@@ -115,6 +115,21 @@ static void update_writeback_rate(struct work_struct *work) ...@@ -115,6 +115,21 @@ static void update_writeback_rate(struct work_struct *work)
struct cached_dev, struct cached_dev,
writeback_rate_update); writeback_rate_update);
/*
* should check BCACHE_DEV_RATE_DW_RUNNING before calling
* cancel_delayed_work_sync().
*/
set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
smp_mb();
if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
smp_mb();
return;
}
down_read(&dc->writeback_lock); down_read(&dc->writeback_lock);
if (atomic_read(&dc->has_dirty) && if (atomic_read(&dc->has_dirty) &&
...@@ -123,8 +138,18 @@ static void update_writeback_rate(struct work_struct *work) ...@@ -123,8 +138,18 @@ static void update_writeback_rate(struct work_struct *work)
up_read(&dc->writeback_lock); up_read(&dc->writeback_lock);
if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
schedule_delayed_work(&dc->writeback_rate_update, schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ); dc->writeback_rate_update_seconds * HZ);
}
/*
* should check BCACHE_DEV_RATE_DW_RUNNING before calling
* cancel_delayed_work_sync().
*/
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
smp_mb();
} }
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
...@@ -675,6 +700,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) ...@@ -675,6 +700,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_p_term_inverse = 40; dc->writeback_rate_p_term_inverse = 40;
dc->writeback_rate_i_term_inverse = 10000; dc->writeback_rate_i_term_inverse = 10000;
WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
} }
...@@ -693,6 +719,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc) ...@@ -693,6 +719,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
return PTR_ERR(dc->writeback_thread); return PTR_ERR(dc->writeback_thread);
} }
WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
schedule_delayed_work(&dc->writeback_rate_update, schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ); dc->writeback_rate_update_seconds * HZ);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment