Commit 54126faf authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.9.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull iomap updates from Christian Brauner:

 - Restore read-write hints in struct bio through the bi_write_hint
   member for the sake of UFS devices in mobile applications. This can
   result in up to 40% lower write amplification in UFS devices. The
   patch series that builds on this will be coming in via the SCSI
   maintainers (Bart)

 - Overhaul the iomap writeback code. Afterwards ->map_blocks() is able
   to map multiple blocks at once as long as they're in the same folio.
   This reduces CPU usage for buffered write workloads on e.g., xfs on
   systems with lots of cores (Christoph)

 - Record processed bytes in iomap_iter() trace event (Kassey)

 - Extend iomap_writepage_map() trace event after Christoph's
   ->map_block() changes to map mutliple blocks at once (Zhang)

* tag 'vfs-6.9.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (22 commits)
  iomap: Add processed for iomap_iter
  iomap: add pos and dirty_len into trace_iomap_writepage_map
  block, fs: Restore the per-bio/request data lifetime fields
  fs: Propagate write hints to the struct block_device inode
  fs: Move enum rw_hint into a new header file
  fs: Split fcntl_rw_hint()
  fs: Verify write lifetime constants at compile time
  fs: Fix rw_hint validation
  iomap: pass the length of the dirty region to ->map_blocks
  iomap: map multiple blocks at a time
  iomap: submit ioends immediately
  iomap: factor out a iomap_writepage_map_block helper
  iomap: only call mapping_set_error once for each failed bio
  iomap: don't chain bios
  iomap: move the iomap_sector sector calculation out of iomap_add_to_ioend
  iomap: clean up the iomap_alloc_ioend calling convention
  iomap: move all remaining per-folio logic into iomap_writepage_map
  iomap: factor out a iomap_writepage_handle_eof helper
  iomap: move the PF_MEMALLOC check to iomap_writepages
  iomap: move the io_folios field out of struct iomap_ioend
  ...
parents 77417942 86835c39
...@@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, ...@@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
bio->bi_opf = opf; bio->bi_opf = opf;
bio->bi_flags = 0; bio->bi_flags = 0;
bio->bi_ioprio = 0; bio->bi_ioprio = 0;
bio->bi_write_hint = 0;
bio->bi_status = 0; bio->bi_status = 0;
bio->bi_iter.bi_sector = 0; bio->bi_iter.bi_sector = 0;
bio->bi_iter.bi_size = 0; bio->bi_iter.bi_size = 0;
...@@ -813,6 +814,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) ...@@ -813,6 +814,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
{ {
bio_set_flag(bio, BIO_CLONED); bio_set_flag(bio, BIO_CLONED);
bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter; bio->bi_iter = bio_src->bi_iter;
if (bio->bi_bdev) { if (bio->bi_bdev) {
......
...@@ -172,6 +172,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src) ...@@ -172,6 +172,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
if (bio_flagged(bio_src, BIO_REMAPPED)) if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED); bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
......
...@@ -810,6 +810,10 @@ static struct request *attempt_merge(struct request_queue *q, ...@@ -810,6 +810,10 @@ static struct request *attempt_merge(struct request_queue *q,
if (rq_data_dir(req) != rq_data_dir(next)) if (rq_data_dir(req) != rq_data_dir(next))
return NULL; return NULL;
/* Don't merge requests with different write hints. */
if (req->write_hint != next->write_hint)
return NULL;
if (req->ioprio != next->ioprio) if (req->ioprio != next->ioprio)
return NULL; return NULL;
...@@ -937,6 +941,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) ...@@ -937,6 +941,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (!bio_crypt_rq_ctx_compatible(rq, bio)) if (!bio_crypt_rq_ctx_compatible(rq, bio))
return false; return false;
/* Don't merge requests with different write hints. */
if (rq->write_hint != bio->bi_write_hint)
return false;
if (rq->ioprio != bio_prio(bio)) if (rq->ioprio != bio_prio(bio))
return false; return false;
......
...@@ -2584,6 +2584,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, ...@@ -2584,6 +2584,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
rq->cmd_flags |= REQ_FAILFAST_MASK; rq->cmd_flags |= REQ_FAILFAST_MASK;
rq->__sector = bio->bi_iter.bi_sector; rq->__sector = bio->bi_iter.bi_sector;
rq->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(rq, bio, nr_segs); blk_rq_bio_prep(rq, bio, nr_segs);
/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */ /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
...@@ -3175,6 +3176,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, ...@@ -3175,6 +3176,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
} }
rq->nr_phys_segments = rq_src->nr_phys_segments; rq->nr_phys_segments = rq_src->nr_phys_segments;
rq->ioprio = rq_src->ioprio; rq->ioprio = rq_src->ioprio;
rq->write_hint = rq_src->write_hint;
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
goto free_and_out; goto free_and_out;
......
...@@ -169,6 +169,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) ...@@ -169,6 +169,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
if (bio_flagged(bio_src, BIO_REMAPPED)) if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED); bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
......
...@@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, ...@@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
} }
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio.bi_ioprio = iocb->ki_ioprio; bio.bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(&bio, iter); ret = bio_iov_iter_get_pages(&bio, iter);
...@@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ...@@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
for (;;) { for (;;) {
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio->bi_private = dio; bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io; bio->bi_end_io = blkdev_bio_end_io;
bio->bi_ioprio = iocb->ki_ioprio; bio->bi_ioprio = iocb->ki_ioprio;
...@@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, ...@@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
dio->flags = 0; dio->flags = 0;
dio->iocb = iocb; dio->iocb = iocb;
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio->bi_end_io = blkdev_bio_end_io_async; bio->bi_end_io = blkdev_bio_end_io_async;
bio->bi_ioprio = iocb->ki_ioprio; bio->bi_ioprio = iocb->ki_ioprio;
...@@ -482,7 +485,7 @@ static void blkdev_readahead(struct readahead_control *rac) ...@@ -482,7 +485,7 @@ static void blkdev_readahead(struct readahead_control *rac)
} }
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc, static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
struct inode *inode, loff_t offset) struct inode *inode, loff_t offset, unsigned int len)
{ {
loff_t isize = i_size_read(inode); loff_t isize = i_size_read(inode);
......
...@@ -55,7 +55,7 @@ ...@@ -55,7 +55,7 @@
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
struct writeback_control *wbc); enum rw_hint hint, struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
...@@ -1889,7 +1889,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, ...@@ -1889,7 +1889,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
do { do {
struct buffer_head *next = bh->b_this_page; struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) { if (buffer_async_write(bh)) {
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
inode->i_write_hint, wbc);
nr_underway++; nr_underway++;
} }
bh = next; bh = next;
...@@ -1944,7 +1945,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, ...@@ -1944,7 +1945,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
struct buffer_head *next = bh->b_this_page; struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) { if (buffer_async_write(bh)) {
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
inode->i_write_hint, wbc);
nr_underway++; nr_underway++;
} }
bh = next; bh = next;
...@@ -2756,6 +2758,7 @@ static void end_bio_bh_io_sync(struct bio *bio) ...@@ -2756,6 +2758,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
} }
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
enum rw_hint write_hint,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
const enum req_op op = opf & REQ_OP_MASK; const enum req_op op = opf & REQ_OP_MASK;
...@@ -2783,6 +2786,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, ...@@ -2783,6 +2786,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_write_hint = write_hint;
__bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
...@@ -2802,7 +2806,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, ...@@ -2802,7 +2806,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
void submit_bh(blk_opf_t opf, struct buffer_head *bh) void submit_bh(blk_opf_t opf, struct buffer_head *bh)
{ {
submit_bh_wbc(opf, bh, NULL); submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL);
} }
EXPORT_SYMBOL(submit_bh); EXPORT_SYMBOL(submit_bh);
......
...@@ -410,6 +410,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, ...@@ -410,6 +410,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
bio->bi_end_io = dio_bio_end_io; bio->bi_end_io = dio_bio_end_io;
if (dio->is_pinned) if (dio->is_pinned)
bio_set_flag(bio, BIO_PAGE_PINNED); bio_set_flag(bio, BIO_PAGE_PINNED);
bio->bi_write_hint = file_inode(dio->iocb->ki_filp)->i_write_hint;
sdio->bio = bio; sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
} }
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/quotaops.h> #include <linux/quotaops.h>
#include <linux/part_stat.h> #include <linux/part_stat.h>
#include <linux/rw_hint.h>
#include <crypto/hash.h> #include <crypto/hash.h>
#include <linux/fscrypt.h> #include <linux/fscrypt.h>
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/memfd.h> #include <linux/memfd.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/rw_hint.h>
#include <linux/poll.h> #include <linux/poll.h>
#include <asm/siginfo.h> #include <asm/siginfo.h>
...@@ -268,8 +269,15 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) ...@@ -268,8 +269,15 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
} }
#endif #endif
static bool rw_hint_valid(enum rw_hint hint) static bool rw_hint_valid(u64 hint)
{ {
BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
switch (hint) { switch (hint) {
case RWH_WRITE_LIFE_NOT_SET: case RWH_WRITE_LIFE_NOT_SET:
case RWH_WRITE_LIFE_NONE: case RWH_WRITE_LIFE_NONE:
...@@ -283,34 +291,40 @@ static bool rw_hint_valid(enum rw_hint hint) ...@@ -283,34 +291,40 @@ static bool rw_hint_valid(enum rw_hint hint)
} }
} }
static long fcntl_rw_hint(struct file *file, unsigned int cmd, static long fcntl_get_rw_hint(struct file *file, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
u64 __user *argp = (u64 __user *)arg; u64 __user *argp = (u64 __user *)arg;
enum rw_hint hint; u64 hint = READ_ONCE(inode->i_write_hint);
u64 h;
switch (cmd) { if (copy_to_user(argp, &hint, sizeof(*argp)))
case F_GET_RW_HINT: return -EFAULT;
h = inode->i_write_hint; return 0;
if (copy_to_user(argp, &h, sizeof(*argp))) }
return -EFAULT;
return 0;
case F_SET_RW_HINT:
if (copy_from_user(&h, argp, sizeof(h)))
return -EFAULT;
hint = (enum rw_hint) h;
if (!rw_hint_valid(hint))
return -EINVAL;
inode_lock(inode); static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
inode->i_write_hint = hint; unsigned long arg)
inode_unlock(inode); {
return 0; struct inode *inode = file_inode(file);
default: u64 __user *argp = (u64 __user *)arg;
u64 hint;
if (copy_from_user(&hint, argp, sizeof(hint)))
return -EFAULT;
if (!rw_hint_valid(hint))
return -EINVAL; return -EINVAL;
}
WRITE_ONCE(inode->i_write_hint, hint);
/*
* file->f_mapping->host may differ from inode. As an example,
* blkdev_open() modifies file->f_mapping.
*/
if (file->f_mapping->host != inode)
WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);
return 0;
} }
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
...@@ -416,8 +430,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, ...@@ -416,8 +430,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
err = memfd_fcntl(filp, cmd, argi); err = memfd_fcntl(filp, cmd, argi);
break; break;
case F_GET_RW_HINT: case F_GET_RW_HINT:
err = fcntl_get_rw_hint(filp, cmd, arg);
break;
case F_SET_RW_HINT: case F_SET_RW_HINT:
err = fcntl_rw_hint(filp, cmd, arg); err = fcntl_set_rw_hint(filp, cmd, arg);
break; break;
default: default:
break; break;
......
...@@ -2465,7 +2465,7 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) ...@@ -2465,7 +2465,7 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
} }
static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode, static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
loff_t offset) loff_t offset, unsigned int len)
{ {
int ret; int ret;
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/list_lru.h> #include <linux/list_lru.h>
#include <linux/iversion.h> #include <linux/iversion.h>
#include <linux/rw_hint.h>
#include <trace/events/writeback.h> #include <trace/events/writeback.h>
#include "internal.h" #include "internal.h"
......
This diff is collapsed.
...@@ -380,6 +380,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, ...@@ -380,6 +380,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
GFP_KERNEL); GFP_KERNEL);
bio->bi_iter.bi_sector = iomap_sector(iomap, pos); bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
bio->bi_write_hint = inode->i_write_hint;
bio->bi_ioprio = dio->iocb->ki_ioprio; bio->bi_ioprio = dio->iocb->ki_ioprio;
bio->bi_private = dio; bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io; bio->bi_end_io = iomap_dio_bio_end_io;
......
...@@ -154,7 +154,48 @@ DEFINE_EVENT(iomap_class, name, \ ...@@ -154,7 +154,48 @@ DEFINE_EVENT(iomap_class, name, \
TP_ARGS(inode, iomap)) TP_ARGS(inode, iomap))
DEFINE_IOMAP_EVENT(iomap_iter_dstmap); DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
DEFINE_IOMAP_EVENT(iomap_iter_srcmap); DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
DEFINE_IOMAP_EVENT(iomap_writepage_map);
TRACE_EVENT(iomap_writepage_map,
TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len,
struct iomap *iomap),
TP_ARGS(inode, pos, dirty_len, iomap),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(u64, pos)
__field(u64, dirty_len)
__field(u64, addr)
__field(loff_t, offset)
__field(u64, length)
__field(u16, type)
__field(u16, flags)
__field(dev_t, bdev)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->dirty_len = dirty_len;
__entry->addr = iomap->addr;
__entry->offset = iomap->offset;
__entry->length = iomap->length;
__entry->type = iomap->type;
__entry->flags = iomap->flags;
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
),
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx "
"addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
MAJOR(__entry->bdev), MINOR(__entry->bdev),
__entry->pos,
__entry->dirty_len,
__entry->addr,
__entry->offset,
__entry->length,
__print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
);
TRACE_EVENT(iomap_iter, TRACE_EVENT(iomap_iter,
TP_PROTO(struct iomap_iter *iter, const void *ops, TP_PROTO(struct iomap_iter *iter, const void *ops,
...@@ -165,6 +206,7 @@ TRACE_EVENT(iomap_iter, ...@@ -165,6 +206,7 @@ TRACE_EVENT(iomap_iter,
__field(u64, ino) __field(u64, ino)
__field(loff_t, pos) __field(loff_t, pos)
__field(u64, length) __field(u64, length)
__field(s64, processed)
__field(unsigned int, flags) __field(unsigned int, flags)
__field(const void *, ops) __field(const void *, ops)
__field(unsigned long, caller) __field(unsigned long, caller)
...@@ -174,15 +216,17 @@ TRACE_EVENT(iomap_iter, ...@@ -174,15 +216,17 @@ TRACE_EVENT(iomap_iter,
__entry->ino = iter->inode->i_ino; __entry->ino = iter->inode->i_ino;
__entry->pos = iter->pos; __entry->pos = iter->pos;
__entry->length = iomap_length(iter); __entry->length = iomap_length(iter);
__entry->processed = iter->processed;
__entry->flags = iter->flags; __entry->flags = iter->flags;
__entry->ops = ops; __entry->ops = ops;
__entry->caller = caller; __entry->caller = caller;
), ),
TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%llx flags %s (0x%x) ops %ps caller %pS", TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%llx processed %lld flags %s (0x%x) ops %ps caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino, __entry->ino,
__entry->pos, __entry->pos,
__entry->length, __entry->length,
__entry->processed,
__print_flags(__entry->flags, "|", IOMAP_FLAGS_STRINGS), __print_flags(__entry->flags, "|", IOMAP_FLAGS_STRINGS),
__entry->flags, __entry->flags,
__entry->ops, __entry->ops,
......
...@@ -605,6 +605,7 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, ...@@ -605,6 +605,7 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
GFP_NOFS); GFP_NOFS);
bio->bi_iter.bi_sector = first_block << (blkbits - 9); bio->bi_iter.bi_sector = first_block << (blkbits - 9);
wbc_init_bio(wbc, bio); wbc_init_bio(wbc, bio);
bio->bi_write_hint = inode->i_write_hint;
} }
/* /*
......
...@@ -112,7 +112,7 @@ xfs_end_ioend( ...@@ -112,7 +112,7 @@ xfs_end_ioend(
* longer dirty. If we don't remove delalloc blocks here, they become * longer dirty. If we don't remove delalloc blocks here, they become
* stale and can corrupt free space accounting on unmount. * stale and can corrupt free space accounting on unmount.
*/ */
error = blk_status_to_errno(ioend->io_bio->bi_status); error = blk_status_to_errno(ioend->io_bio.bi_status);
if (unlikely(error)) { if (unlikely(error)) {
if (ioend->io_flags & IOMAP_F_SHARED) { if (ioend->io_flags & IOMAP_F_SHARED) {
xfs_reflink_cancel_cow_range(ip, offset, size, true); xfs_reflink_cancel_cow_range(ip, offset, size, true);
...@@ -179,7 +179,7 @@ STATIC void ...@@ -179,7 +179,7 @@ STATIC void
xfs_end_bio( xfs_end_bio(
struct bio *bio) struct bio *bio)
{ {
struct iomap_ioend *ioend = bio->bi_private; struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
struct xfs_inode *ip = XFS_I(ioend->io_inode); struct xfs_inode *ip = XFS_I(ioend->io_inode);
unsigned long flags; unsigned long flags;
...@@ -276,7 +276,8 @@ static int ...@@ -276,7 +276,8 @@ static int
xfs_map_blocks( xfs_map_blocks(
struct iomap_writepage_ctx *wpc, struct iomap_writepage_ctx *wpc,
struct inode *inode, struct inode *inode,
loff_t offset) loff_t offset,
unsigned int len)
{ {
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
...@@ -444,7 +445,7 @@ xfs_prepare_ioend( ...@@ -444,7 +445,7 @@ xfs_prepare_ioend(
/* send ioends that might require a transaction to the completion wq */ /* send ioends that might require a transaction to the completion wq */
if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN || if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
(ioend->io_flags & IOMAP_F_SHARED)) (ioend->io_flags & IOMAP_F_SHARED))
ioend->io_bio->bi_end_io = xfs_end_bio; ioend->io_bio.bi_end_io = xfs_end_bio;
return status; return status;
} }
......
...@@ -125,7 +125,8 @@ static void zonefs_readahead(struct readahead_control *rac) ...@@ -125,7 +125,8 @@ static void zonefs_readahead(struct readahead_control *rac)
* which implies that the page range can only be within the fixed inode size. * which implies that the page range can only be within the fixed inode size.
*/ */
static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
struct inode *inode, loff_t offset) struct inode *inode, loff_t offset,
unsigned int len)
{ {
struct zonefs_zone *z = zonefs_inode_zone(inode); struct zonefs_zone *z = zonefs_inode_zone(inode);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/srcu.h> #include <linux/srcu.h>
#include <linux/rw_hint.h>
struct blk_mq_tags; struct blk_mq_tags;
struct blk_flush_queue; struct blk_flush_queue;
...@@ -135,6 +136,7 @@ struct request { ...@@ -135,6 +136,7 @@ struct request {
struct blk_crypto_keyslot *crypt_keyslot; struct blk_crypto_keyslot *crypt_keyslot;
#endif #endif
enum rw_hint write_hint;
unsigned short ioprio; unsigned short ioprio;
enum mq_rq_state state; enum mq_rq_state state;
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/bvec.h> #include <linux/bvec.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/rw_hint.h>
struct bio_set; struct bio_set;
struct bio; struct bio;
...@@ -269,6 +270,7 @@ struct bio { ...@@ -269,6 +270,7 @@ struct bio {
*/ */
unsigned short bi_flags; /* BIO_* below */ unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio; unsigned short bi_ioprio;
enum rw_hint bi_write_hint;
blk_status_t bi_status; blk_status_t bi_status;
atomic_t __bi_remaining; atomic_t __bi_remaining;
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <linux/mnt_idmapping.h> #include <linux/mnt_idmapping.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/maple_tree.h> #include <linux/maple_tree.h>
#include <linux/rw_hint.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <uapi/linux/fs.h> #include <uapi/linux/fs.h>
...@@ -310,19 +311,6 @@ struct address_space; ...@@ -310,19 +311,6 @@ struct address_space;
struct writeback_control; struct writeback_control;
struct readahead_control; struct readahead_control;
/*
* Write life time hint values.
* Stored in struct inode as u8.
*/
enum rw_hint {
WRITE_LIFE_NOT_SET = 0,
WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
};
/* Match RWF_* bits to IOCB bits */ /* Match RWF_* bits to IOCB bits */
#define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_HIPRI (__force int) RWF_HIPRI
#define IOCB_DSYNC (__force int) RWF_DSYNC #define IOCB_DSYNC (__force int) RWF_DSYNC
...@@ -680,7 +668,7 @@ struct inode { ...@@ -680,7 +668,7 @@ struct inode {
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes; unsigned short i_bytes;
u8 i_blkbits; u8 i_blkbits;
u8 i_write_hint; enum rw_hint i_write_hint;
blkcnt_t i_blocks; blkcnt_t i_blocks;
#ifdef __NEED_I_SIZE_ORDERED #ifdef __NEED_I_SIZE_ORDERED
......
...@@ -293,22 +293,32 @@ struct iomap_ioend { ...@@ -293,22 +293,32 @@ struct iomap_ioend {
struct list_head io_list; /* next ioend in chain */ struct list_head io_list; /* next ioend in chain */
u16 io_type; u16 io_type;
u16 io_flags; /* IOMAP_F_* */ u16 io_flags; /* IOMAP_F_* */
u32 io_folios; /* folios added to ioend */
struct inode *io_inode; /* file being written to */ struct inode *io_inode; /* file being written to */
size_t io_size; /* size of the extent */ size_t io_size; /* size of the extent */
loff_t io_offset; /* offset in the file */ loff_t io_offset; /* offset in the file */
sector_t io_sector; /* start sector of ioend */ sector_t io_sector; /* start sector of ioend */
struct bio *io_bio; /* bio being built */ struct bio io_bio; /* MUST BE LAST! */
struct bio io_inline_bio; /* MUST BE LAST! */
}; };
static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio)
{
return container_of(bio, struct iomap_ioend, io_bio);
}
struct iomap_writeback_ops { struct iomap_writeback_ops {
/* /*
* Required, maps the blocks so that writeback can be performed on * Required, maps the blocks so that writeback can be performed on
* the range starting at offset. * the range starting at offset.
*
* Can return arbitrarily large regions, but we need to call into it at
* least once per folio to allow the file systems to synchronize with
* the write path that could be invalidating mappings.
*
* An existing mapping from a previous call to this method can be reused
* by the file system if it is still valid.
*/ */
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
loff_t offset); loff_t offset, unsigned len);
/* /*
* Optional, allows the file systems to perform actions just before * Optional, allows the file systems to perform actions just before
...@@ -329,6 +339,7 @@ struct iomap_writepage_ctx { ...@@ -329,6 +339,7 @@ struct iomap_writepage_ctx {
struct iomap iomap; struct iomap iomap;
struct iomap_ioend *ioend; struct iomap_ioend *ioend;
const struct iomap_writeback_ops *ops; const struct iomap_writeback_ops *ops;
u32 nr_folios; /* folios added to the ioend */
}; };
void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_RW_HINT_H
#define _LINUX_RW_HINT_H
#include <linux/build_bug.h>
#include <linux/compiler_attributes.h>
#include <uapi/linux/fcntl.h>
/* Block storage write lifetime hint values. */
enum rw_hint {
WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET,
WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
} __packed;
/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */
#ifndef __CHECKER__
static_assert(sizeof(enum rw_hint) == 1);
#endif
#endif /* _LINUX_RW_HINT_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment