Commit efb0b63a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'zonefs-6.8-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs

Pull zonefs fix from Damien Le Moal:

 - Fix direct write error handling to avoid a race between failed IO
   completion and the submission path itself which can result in an
   invalid file size exposed to the user after the failed IO.

* tag 'zonefs-6.8-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs:
  zonefs: Improve error handling
parents 0f1dd5e9 14db5f64
...@@ -348,7 +348,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, ...@@ -348,7 +348,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
struct zonefs_inode_info *zi = ZONEFS_I(inode); struct zonefs_inode_info *zi = ZONEFS_I(inode);
if (error) { if (error) {
zonefs_io_error(inode, true); /*
* For Sync IOs, error recovery is called from
* zonefs_file_dio_write().
*/
if (!is_sync_kiocb(iocb))
zonefs_io_error(inode, true);
return error; return error;
} }
...@@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
ret = -EINVAL; ret = -EINVAL;
goto inode_unlock; goto inode_unlock;
} }
/*
* Advance the zone write pointer offset. This assumes that the
* IO will succeed, which is OK to do because we do not allow
* partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO
* fails, the error path will correct the write pointer offset.
*/
z->z_wpoffset += count;
zonefs_inode_account_active(inode);
mutex_unlock(&zi->i_truncate_mutex); mutex_unlock(&zi->i_truncate_mutex);
} }
...@@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
if (ret == -ENOTBLK) if (ret == -ENOTBLK)
ret = -EBUSY; ret = -EBUSY;
if (zonefs_zone_is_seq(z) && /*
(ret > 0 || ret == -EIOCBQUEUED)) { * For a failed IO or partial completion, trigger error recovery
if (ret > 0) * to update the zone write pointer offset to a correct value.
count = ret; * For asynchronous IOs, zonefs_file_write_dio_end_io() may already
* have executed error recovery if the IO already completed when we
/* * reach here. However, we cannot know that and execute error recovery
* Update the zone write pointer offset assuming the write * again (that will not change anything).
* operation succeeded. If it did not, the error recovery path */
* will correct it. Also do active seq file accounting. if (zonefs_zone_is_seq(z)) {
*/ if (ret > 0 && ret != count)
mutex_lock(&zi->i_truncate_mutex); ret = -EIO;
z->z_wpoffset += count; if (ret < 0 && ret != -EIOCBQUEUED)
zonefs_inode_account_active(inode); zonefs_io_error(inode, true);
mutex_unlock(&zi->i_truncate_mutex);
} }
inode_unlock: inode_unlock:
......
...@@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode) ...@@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode)
z->z_mode = inode->i_mode; z->z_mode = inode->i_mode;
} }
struct zonefs_ioerr_data {
struct inode *inode;
bool write;
};
static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
void *data) void *data)
{ {
struct zonefs_ioerr_data *err = data; struct blk_zone *z = data;
struct inode *inode = err->inode;
*z = *zone;
return 0;
}
static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
bool write)
{
struct zonefs_zone *z = zonefs_inode_zone(inode); struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
...@@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, ...@@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
data_size = zonefs_check_zone_condition(sb, z, zone); data_size = zonefs_check_zone_condition(sb, z, zone);
isize = i_size_read(inode); isize = i_size_read(inode);
if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
!err->write && isize == data_size) !write && isize == data_size)
return 0; return;
/* /*
* At this point, we detected either a bad zone or an inconsistency * At this point, we detected either a bad zone or an inconsistency
...@@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, ...@@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* In all cases, warn about inode size inconsistency and handle the * In all cases, warn about inode size inconsistency and handle the
* IO error according to the zone condition and to the mount options. * IO error according to the zone condition and to the mount options.
*/ */
if (zonefs_zone_is_seq(z) && isize != data_size) if (isize != data_size)
zonefs_warn(sb, zonefs_warn(sb,
"inode %lu: invalid size %lld (should be %lld)\n", "inode %lu: invalid size %lld (should be %lld)\n",
inode->i_ino, isize, data_size); inode->i_ino, isize, data_size);
...@@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, ...@@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
zonefs_i_size_write(inode, data_size); zonefs_i_size_write(inode, data_size);
z->z_wpoffset = data_size; z->z_wpoffset = data_size;
zonefs_inode_account_active(inode); zonefs_inode_account_active(inode);
return 0;
} }
/* /*
...@@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write) ...@@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write)
{ {
struct zonefs_zone *z = zonefs_inode_zone(inode); struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
unsigned int noio_flag; unsigned int noio_flag;
unsigned int nr_zones = 1; struct blk_zone zone;
struct zonefs_ioerr_data err = {
.inode = inode,
.write = write,
};
int ret; int ret;
/* /*
* The only files that have more than one zone are conventional zone * Conventional zone have no write pointer and cannot become read-only
* files with aggregated conventional zones, for which the inode zone * or offline. So simply fake a report for a single or aggregated zone
* size is always larger than the device zone size. * and let zonefs_handle_io_error() correct the zone inode information
* according to the mount options.
*/ */
if (z->z_size > bdev_zone_sectors(sb->s_bdev)) if (!zonefs_zone_is_seq(z)) {
nr_zones = z->z_size >> zone.start = z->z_sector;
(sbi->s_zone_sectors_shift + SECTOR_SHIFT); zone.len = z->z_size >> SECTOR_SHIFT;
zone.wp = zone.start + zone.len;
zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
zone.cond = BLK_ZONE_COND_NOT_WP;
zone.capacity = zone.len;
goto handle_io_error;
}
/* /*
* Memory allocations in blkdev_report_zones() can trigger a memory * Memory allocations in blkdev_report_zones() can trigger a memory
...@@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write) ...@@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write)
* the GFP_NOIO context avoids both problems. * the GFP_NOIO context avoids both problems.
*/ */
noio_flag = memalloc_noio_save(); noio_flag = memalloc_noio_save();
ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
zonefs_io_error_cb, &err); zonefs_io_error_cb, &zone);
if (ret != nr_zones) memalloc_noio_restore(noio_flag);
if (ret != 1) {
zonefs_err(sb, "Get inode %lu zone information failed %d\n", zonefs_err(sb, "Get inode %lu zone information failed %d\n",
inode->i_ino, ret); inode->i_ino, ret);
memalloc_noio_restore(noio_flag); zonefs_warn(sb, "remounting filesystem read-only\n");
sb->s_flags |= SB_RDONLY;
return;
}
handle_io_error:
zonefs_handle_io_error(inode, &zone, write);
} }
static struct kmem_cache *zonefs_inode_cachep; static struct kmem_cache *zonefs_inode_cachep;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment