Commit 09cb6464 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "This patch series contains several performance tuning patches
  regarding to the IO submission flow, in addition to supporting new
  features such as a ZBC-base drive and multiple devices.

  It also includes some major bug fixes such as:
   - checkpoint version control
   - fdatasync-related roll-forward recovery routine
   - memory boundary or null-pointer access in corner cases
   - missing error cases

  It has various minor clean-up patches as well"

* tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (66 commits)
  f2fs: fix a missing size change in f2fs_setattr
  f2fs: fix to access nullified flush_cmd_control pointer
  f2fs: free meta pages if sanity check for ckpt is failed
  f2fs: detect wrong layout
  f2fs: call sync_fs when f2fs is idle
  Revert "f2fs: use percpu_counter for # of dirty pages in inode"
  f2fs: return AOP_WRITEPAGE_ACTIVATE for writepage
  f2fs: do not activate auto_recovery for fallocated i_size
  f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack
  f2fs: fix 32-bit build
  f2fs: set ->owner for debugfs status file's file_operations
  f2fs: fix incorrect free inode count in ->statfs
  f2fs: drop duplicate header timer.h
  f2fs: fix wrong AUTO_RECOVER condition
  f2fs: do not recover i_size if it's valid
  f2fs: fix fdatasync
  f2fs: fix to account total free nid correctly
  f2fs: fix an infinite loop when flush nodes in cp
  f2fs: don't wait writeback for datas during checkpoint
  f2fs: fix wrong written_valid_blocks counting
  ...
parents 19d37ce2 c0ed4405
......@@ -384,7 +384,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
if (error)
return error;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
if (default_acl) {
error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
......
......@@ -228,7 +228,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
f2fs_put_page(page, 0);
if (readahead)
ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
}
static int f2fs_write_meta_page(struct page *page,
......@@ -770,7 +770,12 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
/* Sanity checking of checkpoint */
if (sanity_check_ckpt(sbi))
goto fail_no_cp;
goto free_fail_no_cp;
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
if (cp_blks <= 1)
goto done;
......@@ -793,6 +798,9 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
f2fs_put_page(cp2, 1);
return 0;
free_fail_no_cp:
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
fail_no_cp:
kfree(sbi->ckpt);
return -EINVAL;
......@@ -921,7 +929,11 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
if (inode) {
update_inode_page(inode);
sync_inode_metadata(inode, 0);
/* it's on eviction */
if (is_inode_flag_set(inode, FI_DIRTY_INODE))
update_inode_page(inode);
iput(inode);
}
};
......@@ -987,7 +999,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
build_free_nids(sbi);
build_free_nids(sbi, false);
f2fs_unlock_all(sbi);
}
......@@ -998,7 +1010,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
for (;;) {
prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
if (!atomic_read(&sbi->nr_wb_bios))
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
io_schedule_timeout(5*HZ);
......@@ -1123,7 +1135,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
start_blk = __start_cp_addr(sbi);
start_blk = __start_cp_next_addr(sbi);
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
......@@ -1184,9 +1196,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
__set_cp_next_pack(sbi);
/*
* redirty superblock if metadata like node page or inode cache is
......@@ -1261,8 +1273,12 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
f2fs_wait_all_discard_bio(sbi);
if (err) {
release_discard_addrs(sbi);
} else {
clear_prefree_segments(sbi, cpc);
f2fs_wait_all_discard_bio(sbi);
}
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
......
......@@ -29,6 +29,26 @@
#include "trace.h"
#include <trace/events/f2fs.h>
static bool __is_cp_guaranteed(struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode;
struct f2fs_sb_info *sbi;
if (!mapping)
return false;
inode = mapping->host;
sbi = F2FS_I_SB(inode);
if (inode->i_ino == F2FS_META_INO(sbi) ||
inode->i_ino == F2FS_NODE_INO(sbi) ||
S_ISDIR(inode->i_mode) ||
is_cold_data(page))
return true;
return false;
}
static void f2fs_read_end_io(struct bio *bio)
{
struct bio_vec *bvec;
......@@ -71,6 +91,7 @@ static void f2fs_write_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
enum count_type type = WB_DATA_TYPE(page);
fscrypt_pullback_bio_page(&page, true);
......@@ -78,15 +99,57 @@ static void f2fs_write_end_io(struct bio *bio)
mapping_set_error(page->mapping, -EIO);
f2fs_stop_checkpoint(sbi, true);
}
dec_page_count(sbi, type);
clear_cold_data(page);
end_page_writeback(page);
}
if (atomic_dec_and_test(&sbi->nr_wb_bios) &&
if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
wq_has_sleeper(&sbi->cp_wait))
wake_up(&sbi->cp_wait);
bio_put(bio);
}
/*
* Return true, if pre_bio's bdev is same as its target device.
*/
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio)
{
struct block_device *bdev = sbi->sb->s_bdev;
int i;
for (i = 0; i < sbi->s_ndevs; i++) {
if (FDEV(i).start_blk <= blk_addr &&
FDEV(i).end_blk >= blk_addr) {
blk_addr -= FDEV(i).start_blk;
bdev = FDEV(i).bdev;
break;
}
}
if (bio) {
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
}
return bdev;
}
int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
int i;
for (i = 0; i < sbi->s_ndevs; i++)
if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
return i;
return 0;
}
static bool __same_bdev(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio)
{
return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
}
/*
* Low-level block read/write IO operations.
*/
......@@ -97,8 +160,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
bio = f2fs_bio_alloc(npages);
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
f2fs_target_device(sbi, blk_addr, bio);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
bio->bi_private = is_read ? NULL : sbi;
......@@ -109,8 +171,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
struct bio *bio, enum page_type type)
{
if (!is_read_io(bio_op(bio))) {
atomic_inc(&sbi->nr_wb_bios);
if (f2fs_sb_mounted_hmsmr(sbi->sb) &&
if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
current->plug && (type == DATA || type == NODE))
blk_finish_plug(current->plug);
}
......@@ -268,22 +329,24 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
verify_block_addr(sbi, fio->old_blkaddr);
verify_block_addr(sbi, fio->new_blkaddr);
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
if (!is_read)
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
down_write(&io->io_rwsem);
if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
(io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
(io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
int bio_blocks = MAX_BIO_BLOCKS(sbi);
io->bio = __bio_alloc(sbi, fio->new_blkaddr,
bio_blocks, is_read);
BIO_MAX_PAGES, is_read);
io->fio = *fio;
}
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
PAGE_SIZE) {
__submit_merged_bio(io);
......@@ -588,7 +651,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
int seg = CURSEG_WARM_DATA;
pgoff_t fofs;
blkcnt_t count = 1;
......@@ -606,11 +668,8 @@ static int __allocate_data_block(struct dnode_of_data *dn)
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
seg = CURSEG_DIRECT_IO;
allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
&sum, seg);
&sum, CURSEG_WARM_DATA);
set_data_blkaddr(dn);
/* update i_size */
......@@ -622,11 +681,18 @@ static int __allocate_data_block(struct dnode_of_data *dn)
return 0;
}
ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
static inline bool __force_buffered_io(struct inode *inode, int rw)
{
return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) ||
(rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
F2FS_I_SB(inode)->s_ndevs);
}
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct f2fs_map_blocks map;
ssize_t ret = 0;
int err = 0;
map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
......@@ -638,19 +704,22 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
if (iocb->ki_flags & IOCB_DIRECT) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
return f2fs_map_blocks(inode, &map, 1,
__force_buffered_io(inode, WRITE) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO);
}
if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
}
if (!f2fs_has_inline_data(inode))
return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
return ret;
return err;
}
/*
......@@ -674,7 +743,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
unsigned int ofs_in_node, last_ofs_in_node;
blkcnt_t prealloc;
struct extent_info ei;
bool allocated = false;
block_t blkaddr;
if (!maxblocks)
......@@ -714,7 +782,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
}
prealloc = 0;
ofs_in_node = dn.ofs_in_node;
last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
next_block:
......@@ -733,10 +801,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
}
} else {
err = __allocate_data_block(&dn);
if (!err) {
if (!err)
set_inode_flag(inode, FI_APPEND_WRITE);
allocated = true;
}
}
if (err)
goto sync_out;
......@@ -791,7 +857,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
err = reserve_new_blocks(&dn, prealloc);
if (err)
goto sync_out;
allocated = dn.node_changed;
map->m_len += dn.ofs_in_node - ofs_in_node;
if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
......@@ -810,9 +875,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
if (create) {
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, allocated);
f2fs_balance_fs(sbi, dn.node_changed);
}
allocated = false;
goto next_dnode;
sync_out:
......@@ -820,7 +884,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
unlock_out:
if (create) {
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, allocated);
f2fs_balance_fs(sbi, dn.node_changed);
}
out:
trace_f2fs_map_blocks(inode, map, err);
......@@ -832,19 +896,19 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
pgoff_t *next_pgofs)
{
struct f2fs_map_blocks map;
int ret;
int err;
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
map.m_next_pgofs = next_pgofs;
ret = f2fs_map_blocks(inode, &map, create, flag);
if (!ret) {
err = f2fs_map_blocks(inode, &map, create, flag);
if (!err) {
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
bh->b_size = map.m_len << inode->i_blkbits;
}
return ret;
return err;
}
static int get_data_block(struct inode *inode, sector_t iblock,
......@@ -889,7 +953,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct buffer_head map_bh;
sector_t start_blk, last_blk;
pgoff_t next_pgofs;
loff_t isize;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
int ret = 0;
......@@ -906,13 +969,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
inode_lock(inode);
isize = i_size_read(inode);
if (start >= isize)
goto out;
if (start + len > isize)
len = isize - start;
if (logical_to_blk(inode, len) == 0)
len = blk_to_logical(inode, 1);
......@@ -931,13 +987,11 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* HOLE */
if (!buffer_mapped(&map_bh)) {
start_blk = next_pgofs;
/* Go through holes util pass the EOF */
if (blk_to_logical(inode, start_blk) < isize)
if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
F2FS_I_SB(inode)->max_file_blocks))
goto prep_next;
/* Found a hole beyond isize means no more extents.
* Note that the premise is that filesystems don't
* punch holes beyond isize and keep size unchanged.
*/
flags |= FIEMAP_EXTENT_LAST;
}
......@@ -980,7 +1034,6 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct fscrypt_ctx *ctx = NULL;
struct block_device *bdev = sbi->sb->s_bdev;
struct bio *bio;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
......@@ -998,8 +1051,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
fscrypt_release_ctx(ctx);
return ERR_PTR(-ENOMEM);
}
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
bio->bi_private = ctx;
......@@ -1094,7 +1146,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
* This page will go to BIO. Do we need to send this
* BIO off first?
*/
if (bio && (last_block_in_bio != block_nr - 1)) {
if (bio && (last_block_in_bio != block_nr - 1 ||
!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
submit_and_realloc:
__submit_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
......@@ -1309,7 +1362,6 @@ static int f2fs_write_data_page(struct page *page,
if (err && err != -ENOENT)
goto redirty_out;
clear_cold_data(page);
out:
inode_dec_dirty_pages(inode);
if (err)
......@@ -1330,6 +1382,8 @@ static int f2fs_write_data_page(struct page *page,
redirty_out:
redirty_page_for_writepage(wbc, page);
if (!err)
return AOP_WRITEPAGE_ACTIVATE;
unlock_page(page);
return err;
}
......@@ -1425,6 +1479,15 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
ret = mapping->a_ops->writepage(page, wbc);
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
* get # of written pages.
*/
if (ret == AOP_WRITEPAGE_ACTIVATE) {
unlock_page(page);
ret = 0;
continue;
}
done_index = page->index + 1;
done = 1;
break;
......@@ -1712,7 +1775,6 @@ static int f2fs_write_end(struct file *file,
goto unlock_out;
set_page_dirty(page);
clear_cold_data(page);
if (pos + copied > i_size_read(inode))
f2fs_i_size_write(inode, pos + copied);
......@@ -1749,9 +1811,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (err)
return err;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
if (test_opt(F2FS_I_SB(inode), LFS))
if (__force_buffered_io(inode, rw))
return 0;
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
......@@ -1783,12 +1843,14 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
return;
if (PageDirty(page)) {
if (inode->i_ino == F2FS_META_INO(sbi))
if (inode->i_ino == F2FS_META_INO(sbi)) {
dec_page_count(sbi, F2FS_DIRTY_META);
else if (inode->i_ino == F2FS_NODE_INO(sbi))
} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
dec_page_count(sbi, F2FS_DIRTY_NODES);
else
} else {
inode_dec_dirty_pages(inode);
remove_dirty_inode(inode);
}
}
/* This is atomic written page, keep Private */
......
......@@ -50,7 +50,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->wb_bios = atomic_read(&sbi->nr_wb_bios);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
......@@ -74,7 +75,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
si->fnids = NM_I(sbi)->fcnt;
si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
......@@ -194,7 +196,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->cache_mem += sizeof(struct flush_cmd_control);
/* free nids */
si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid);
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) *
sizeof(struct free_nid);
si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
......@@ -310,22 +314,22 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n",
si->inmem_pages, si->wb_bios);
seq_printf(s, " - nodes: %4lld in %4d\n",
seq_printf(s, " - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n",
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
seq_printf(s, " - datas: %4lld in files:%4d\n",
seq_printf(s, " - datas: %4d in files:%4d\n",
si->ndirty_data, si->ndirty_files);
seq_printf(s, " - meta: %4lld in %4d\n",
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - imeta: %4lld\n",
seq_printf(s, " - imeta: %4d\n",
si->ndirty_imeta);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d\n",
si->fnids);
seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n",
si->free_nids, si->alloc_nids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
......@@ -373,6 +377,7 @@ static int stat_open(struct inode *inode, struct file *file)
}
static const struct file_operations stat_fops = {
.owner = THIS_MODULE,
.open = stat_open,
.read = seq_read,
.llseek = seq_lseek,
......
......@@ -136,7 +136,7 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
/* show encrypted name */
if (fname->hash) {
if (de->hash_code == fname->hash)
if (de->hash_code == cpu_to_le32(fname->hash))
goto found;
} else if (de_name.len == name->len &&
de->hash_code == namehash &&
......@@ -313,7 +313,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = current_time(dir);
f2fs_mark_inode_dirty_sync(dir);
f2fs_mark_inode_dirty_sync(dir, false);
f2fs_put_page(page, 1);
}
......@@ -466,7 +466,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(inode, FI_NEW_INODE);
}
dir->i_mtime = dir->i_ctime = current_time(dir);
f2fs_mark_inode_dirty_sync(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (F2FS_I(dir)->i_current_depth != current_depth)
f2fs_i_depth_write(dir, current_depth);
......@@ -731,7 +731,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
set_page_dirty(page);
dir->i_ctime = dir->i_mtime = current_time(dir);
f2fs_mark_inode_dirty_sync(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
f2fs_drop_nlink(dir, inode);
......@@ -742,6 +742,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
ClearPagePrivate(page);
ClearPageUptodate(page);
inode_dec_dirty_pages(dir);
remove_dirty_inode(dir);
}
f2fs_put_page(page, 1);
}
......@@ -784,7 +785,7 @@ bool f2fs_empty_dir(struct inode *dir)
return true;
}
bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr)
{
unsigned char d_type = DT_UNKNOWN;
......@@ -819,7 +820,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
(u32)de->hash_code, 0,
&de_name, fstr);
if (err)
return true;
return err;
de_name = *fstr;
fstr->len = save_len;
......@@ -827,12 +828,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (!dir_emit(ctx, de_name.name, de_name.len,
le32_to_cpu(de->ino), d_type))
return true;
return 1;
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
}
return false;
return 0;
}
static int f2fs_readdir(struct file *file, struct dir_context *ctx)
......@@ -871,17 +872,21 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT)
if (err == -ENOENT) {
err = 0;
continue;
else
} else {
goto out;
}
}
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);
if (err) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
break;
......@@ -891,10 +896,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
err = 0;
out:
fscrypt_fname_free_buffer(&fstr);
return err;
return err < 0 ? err : 0;
}
static int f2fs_dir_open(struct inode *inode, struct file *filp)
......
......@@ -172,7 +172,7 @@ static void __drop_largest_extent(struct inode *inode,
if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
largest->len = 0;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
}
......
......@@ -103,7 +103,7 @@ struct f2fs_mount_info {
};
#define F2FS_FEATURE_ENCRYPT 0x0001
#define F2FS_FEATURE_HMSMR 0x0002
#define F2FS_FEATURE_BLKZONED 0x0002
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
......@@ -401,6 +401,7 @@ struct f2fs_map_blocks {
#define FADVISE_LOST_PINO_BIT 0x02
#define FADVISE_ENCRYPT_BIT 0x04
#define FADVISE_ENC_NAME_BIT 0x08
#define FADVISE_KEEP_SIZE_BIT 0x10
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
......@@ -413,6 +414,8 @@ struct f2fs_map_blocks {
#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT)
#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT)
#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
#define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT)
#define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT)
#define DEF_DIR_LEVEL 0
......@@ -428,7 +431,7 @@ struct f2fs_inode_info {
/* Use below internally in f2fs*/
unsigned long flags; /* use to pass per-file flags */
struct rw_semaphore i_sem; /* protect fi info */
struct percpu_counter dirty_pages; /* # of dirty pages */
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
......@@ -493,20 +496,26 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
return __is_extent_mergeable(cur, front);
}
extern void f2fs_mark_inode_dirty_sync(struct inode *);
extern void f2fs_mark_inode_dirty_sync(struct inode *, bool);
static inline void __try_update_largest_extent(struct inode *inode,
struct extent_tree *et, struct extent_node *en)
{
if (en->ei.len > et->largest.len) {
et->largest = en->ei;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
}
enum nid_list {
FREE_NID_LIST,
ALLOC_NID_LIST,
MAX_NID_LIST,
};
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
nid_t available_nids; /* maximum available node ids */
nid_t available_nids; /* # of available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
unsigned int ram_thresh; /* control the memory footprint */
unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */
......@@ -522,9 +531,9 @@ struct f2fs_nm_info {
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
struct list_head free_nid_list; /* a list for free nids */
spinlock_t free_nid_list_lock; /* protect free nid list */
unsigned int fcnt; /* the number of free node id */
struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */
unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
spinlock_t nid_list_lock; /* protect nid lists ops */
struct mutex build_lock; /* lock for build free nids */
/* for checkpoint */
......@@ -585,7 +594,6 @@ enum {
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
NO_CHECK_TYPE,
CURSEG_DIRECT_IO, /* to use for the direct IO path */
};
struct flush_cmd {
......@@ -649,6 +657,7 @@ struct f2fs_sm_info {
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
......@@ -656,6 +665,8 @@ enum count_type {
F2FS_DIRTY_META,
F2FS_INMEM_PAGES,
F2FS_DIRTY_IMETA,
F2FS_WB_CP_DATA,
F2FS_WB_DATA,
NR_COUNT_TYPE,
};
......@@ -704,6 +715,20 @@ struct f2fs_bio_info {
struct rw_semaphore io_rwsem; /* blocking op for bio */
};
#define FDEV(i) (sbi->devs[i])
#define RDEV(i) (raw_super->devs[i])
struct f2fs_dev_info {
struct block_device *bdev;
char path[MAX_PATH_LEN];
unsigned int total_segments;
block_t start_blk;
block_t end_blk;
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_blkz; /* Total number of zones */
u8 *blkz_type; /* Array of zones type */
#endif
};
enum inode_type {
DIR_INODE, /* for dirty dir inode */
FILE_INODE, /* for dirty regular/symlink inode */
......@@ -750,6 +775,12 @@ struct f2fs_sb_info {
u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
u8 key_prefix_size;
#endif
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
unsigned int log_blocks_per_blkz; /* log2 F2FS blocks per zone */
#endif
/* for node-related operations */
struct f2fs_nm_info *nm_info; /* node manager */
struct inode *node_inode; /* cache node blocks */
......@@ -764,6 +795,7 @@ struct f2fs_sb_info {
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
int cur_cp_pack; /* remain current cp pack */
spinlock_t cp_lock; /* for flag in ckpt */
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
......@@ -815,10 +847,9 @@ struct f2fs_sb_info {
block_t discard_blks; /* discard command candidats */
block_t last_valid_block_count; /* for recovery */
u32 s_next_generation; /* for NFS support */
atomic_t nr_wb_bios; /* # of writeback bios */
/* # of pages, see count_type */
struct percpu_counter nr_pages[NR_COUNT_TYPE];
atomic_t nr_pages[NR_COUNT_TYPE];
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;
......@@ -863,6 +894,8 @@ struct f2fs_sb_info {
/* For shrinker support */
struct list_head s_list;
int s_ndevs; /* number of devices */
struct f2fs_dev_info *devs; /* for device list */
struct mutex umount_mutex;
unsigned int shrinker_run_no;
......@@ -1105,13 +1138,6 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
spin_unlock(&sbi->cp_lock);
}
static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
{
struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
return blk_queue_discard(q);
}
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
down_read(&sbi->cp_rwsem);
......@@ -1232,9 +1258,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
{
percpu_counter_inc(&sbi->nr_pages[count_type]);
atomic_inc(&sbi->nr_pages[count_type]);
if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES)
if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
return;
set_sbi_flag(sbi, SBI_IS_DIRTY);
......@@ -1242,14 +1269,14 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_inc_dirty_pages(struct inode *inode)
{
percpu_counter_inc(&F2FS_I(inode)->dirty_pages);
atomic_inc(&F2FS_I(inode)->dirty_pages);
inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
{
percpu_counter_dec(&sbi->nr_pages[count_type]);
atomic_dec(&sbi->nr_pages[count_type]);
}
static inline void inode_dec_dirty_pages(struct inode *inode)
......@@ -1258,19 +1285,19 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
!S_ISLNK(inode->i_mode))
return;
percpu_counter_dec(&F2FS_I(inode)->dirty_pages);
atomic_dec(&F2FS_I(inode)->dirty_pages);
dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
{
return percpu_counter_sum_positive(&sbi->nr_pages[count_type]);
return atomic_read(&sbi->nr_pages[count_type]);
}
static inline s64 get_dirty_pages(struct inode *inode)
static inline int get_dirty_pages(struct inode *inode)
{
return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages);
return atomic_read(&F2FS_I(inode)->dirty_pages);
}
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
......@@ -1329,22 +1356,27 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
{
block_t start_addr;
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_version = cur_cp_version(ckpt);
start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
/*
* odd numbered checkpoint should at cp segment 0
* and even segment must be at cp segment 1
*/
if (!(ckpt_version & 1))
if (sbi->cur_cp_pack == 2)
start_addr += sbi->blocks_per_seg;
return start_addr;
}
static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi)
{
block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
if (sbi->cur_cp_pack == 1)
start_addr += sbi->blocks_per_seg;
return start_addr;
}
static inline void __set_cp_next_pack(struct f2fs_sb_info *sbi)
{
sbi->cur_cp_pack = (sbi->cur_cp_pack == 1) ? 2 : 1;
}
static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
{
return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
......@@ -1621,7 +1653,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
return;
case FI_DATA_EXIST:
case FI_INLINE_DOTS:
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
}
......@@ -1648,7 +1680,7 @@ static inline void set_acl_inode(struct inode *inode, umode_t mode)
{
F2FS_I(inode)->i_acl_mode = mode;
set_inode_flag(inode, FI_ACL_MODE);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, false);
}
static inline void f2fs_i_links_write(struct inode *inode, bool inc)
......@@ -1657,7 +1689,7 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc)
inc_nlink(inode);
else
drop_nlink(inode);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void f2fs_i_blocks_write(struct inode *inode,
......@@ -1668,7 +1700,7 @@ static inline void f2fs_i_blocks_write(struct inode *inode,
inode->i_blocks = add ? inode->i_blocks + diff :
inode->i_blocks - diff;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
if (clean || recover)
set_inode_flag(inode, FI_AUTO_RECOVER);
}
......@@ -1682,34 +1714,27 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
return;
i_size_write(inode, i_size);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
if (clean || recover)
set_inode_flag(inode, FI_AUTO_RECOVER);
}
static inline bool f2fs_skip_inode_update(struct inode *inode)
{
if (!is_inode_flag_set(inode, FI_AUTO_RECOVER))
return false;
return F2FS_I(inode)->last_disk_size == i_size_read(inode);
}
static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
{
F2FS_I(inode)->i_current_depth = depth;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid)
{
F2FS_I(inode)->i_xattr_nid = xnid;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino)
{
F2FS_I(inode)->i_pino = pino;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri)
......@@ -1837,13 +1862,31 @@ static inline int is_file(struct inode *inode, int type)
static inline void set_file(struct inode *inode, int type)
{
F2FS_I(inode)->i_advise |= type;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void clear_file(struct inode *inode, int type)
{
F2FS_I(inode)->i_advise &= ~type;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
{
if (dsync) {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
bool ret;
spin_lock(&sbi->inode_lock[DIRTY_META]);
ret = list_empty(&F2FS_I(inode)->gdirty_list);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return ret;
}
if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) ||
file_keep_isize(inode) ||
i_size_read(inode) & PAGE_MASK)
return false;
return F2FS_I(inode)->last_disk_size == i_size_read(inode);
}
static inline int f2fs_readonly(struct super_block *sb)
......@@ -1955,7 +1998,7 @@ void set_de_type(struct f2fs_dir_entry *, umode_t);
unsigned char get_de_type(struct f2fs_dir_entry *);
struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
unsigned int, struct fscrypt_str *);
void do_make_empty_dir(struct inode *, struct inode *,
struct f2fs_dentry_ptr *);
......@@ -1995,7 +2038,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
/*
* super.c
*/
int f2fs_inode_dirtied(struct inode *);
int f2fs_inode_dirtied(struct inode *, bool);
void f2fs_inode_synced(struct inode *);
int f2fs_commit_super(struct f2fs_sb_info *, bool);
int f2fs_sync_fs(struct super_block *, int);
......@@ -2034,7 +2077,7 @@ void move_node_page(struct page *, int);
int fsync_node_pages(struct f2fs_sb_info *, struct inode *,
struct writeback_control *, bool);
int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
void build_free_nids(struct f2fs_sb_info *);
void build_free_nids(struct f2fs_sb_info *, bool);
bool alloc_nid(struct f2fs_sb_info *, nid_t *);
void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
......@@ -2060,7 +2103,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *, bool);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
int create_flush_cmd_control(struct f2fs_sb_info *);
void destroy_flush_cmd_control(struct f2fs_sb_info *);
void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
......@@ -2132,12 +2175,15 @@ void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
void f2fs_flush_merged_bios(struct f2fs_sb_info *);
int f2fs_submit_page_bio(struct f2fs_io_info *);
void f2fs_submit_page_mbio(struct f2fs_io_info *);
struct block_device *f2fs_target_device(struct f2fs_sb_info *,
block_t, struct bio *);
int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
void set_data_blkaddr(struct dnode_of_data *);
void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
int reserve_new_block(struct dnode_of_data *);
int f2fs_get_block(struct dnode_of_data *, pgoff_t);
ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
struct page *find_data_page(struct inode *, pgoff_t);
......@@ -2160,7 +2206,7 @@ int f2fs_migrate_page(struct address_space *, struct page *, struct page *,
int start_gc_thread(struct f2fs_sb_info *);
void stop_gc_thread(struct f2fs_sb_info *);
block_t start_bidx_of_node(unsigned int, struct inode *);
int f2fs_gc(struct f2fs_sb_info *, bool);
int f2fs_gc(struct f2fs_sb_info *, bool, bool);
void build_gc_manager(struct f2fs_sb_info *);
/*
......@@ -2181,12 +2227,12 @@ struct f2fs_stat_info {
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
int ext_tree, zombie_tree, ext_node;
s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
s64 inmem_pages;
int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
int inmem_pages;
unsigned int ndirty_dirs, ndirty_files, ndirty_all;
int nats, dirty_nats, sits, dirty_sits, fnids;
int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
int total_count, utilization;
int bg_gc, wb_bios;
int bg_gc, nr_wb_cp_data, nr_wb_data;
int inline_xattr, inline_inode, inline_dir, orphans;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
......@@ -2412,9 +2458,30 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb)
return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
}
static inline int f2fs_sb_mounted_hmsmr(struct super_block *sb)
static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb)
{
return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED);
}
#ifdef CONFIG_BLK_DEV_ZONED
static inline int get_blkz_type(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkaddr)
{
unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
int i;
for (i = 0; i < sbi->s_ndevs; i++)
if (FDEV(i).bdev == bdev)
return FDEV(i).blkz_type[zno];
return -EINVAL;
}
#endif
static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
{
return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_HMSMR);
struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb);
}
static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
......
......@@ -94,8 +94,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
/* if gced page is attached, don't write to cold segment */
clear_cold_data(page);
out:
sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME);
......@@ -210,7 +208,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
}
/* if the inode is dirty, let's recover all the time */
if (!datasync && !f2fs_skip_inode_update(inode)) {
if (!f2fs_skip_inode_update(inode, datasync)) {
f2fs_write_inode(inode, NULL);
goto go_write;
}
......@@ -264,7 +262,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
}
if (need_inode_block_update(sbi, ino)) {
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
f2fs_write_inode(inode, NULL);
goto sync_nodes;
}
......@@ -632,7 +630,7 @@ int f2fs_truncate(struct inode *inode)
return err;
inode->i_mtime = inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, false);
return 0;
}
......@@ -679,6 +677,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int err;
bool size_changed = false;
err = setattr_prepare(dentry, attr);
if (err)
......@@ -694,7 +693,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
err = f2fs_truncate(inode);
if (err)
return err;
f2fs_balance_fs(F2FS_I_SB(inode), true);
} else {
/*
* do not trim all blocks after i_size if target size is
......@@ -710,6 +708,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
inode->i_mtime = inode->i_ctime = current_time(inode);
}
size_changed = true;
}
__setattr_copy(inode, attr);
......@@ -722,7 +722,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
}
f2fs_mark_inode_dirty_sync(inode);
/* file size may changed here */
f2fs_mark_inode_dirty_sync(inode, size_changed);
/* inode change will produce dirty node pages flushed by checkpoint */
f2fs_balance_fs(F2FS_I_SB(inode), true);
return err;
}
......@@ -967,7 +972,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
new_size = (dst + i) << PAGE_SHIFT;
if (dst_inode->i_size < new_size)
f2fs_i_size_write(dst_inode, new_size);
} while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen);
} while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
f2fs_put_dnode(&dn);
} else {
......@@ -1218,6 +1223,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, dn.node_changed);
if (ret)
goto out;
......@@ -1313,15 +1321,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
pgoff_t pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
int ret;
int err;
ret = inode_newsize_ok(inode, (len + offset));
if (ret)
return ret;
err = inode_newsize_ok(inode, (len + offset));
if (err)
return err;
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
f2fs_balance_fs(sbi, true);
......@@ -1333,12 +1341,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (off_end)
map.m_len++;
ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
if (ret) {
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
if (err) {
pgoff_t last_off;
if (!map.m_len)
return ret;
return err;
last_off = map.m_lblk + map.m_len - 1;
......@@ -1352,7 +1360,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
f2fs_i_size_write(inode, new_size);
return ret;
return err;
}
static long f2fs_fallocate(struct file *file, int mode,
......@@ -1393,7 +1401,9 @@ static long f2fs_fallocate(struct file *file, int mode,
if (!ret) {
inode->i_mtime = inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, false);
if (mode & FALLOC_FL_KEEP_SIZE)
file_set_keep_isize(inode);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
......@@ -1526,7 +1536,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
goto out;
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
"Unexpected flush for atomic writes: ino=%lu, npages=%lld",
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
......@@ -1842,7 +1852,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
mutex_lock(&sbi->gc_mutex);
}
ret = f2fs_gc(sbi, sync);
ret = f2fs_gc(sbi, sync, true);
out:
mnt_drop_write_file(filp);
return ret;
......@@ -2256,12 +2266,15 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
ret = f2fs_preallocate_blocks(iocb, from);
if (!ret) {
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
int err = f2fs_preallocate_blocks(iocb, from);
if (err) {
inode_unlock(inode);
return err;
}
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
}
inode_unlock(inode);
......
......@@ -82,7 +82,7 @@ static int gc_thread_func(void *data)
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true))
wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
......@@ -544,7 +544,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
static void move_encrypted_block(struct inode *inode, block_t bidx)
static void move_encrypted_block(struct inode *inode, block_t bidx,
unsigned int segno, int off)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
......@@ -565,6 +566,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
if (!page)
return;
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
if (err)
......@@ -645,7 +649,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
f2fs_put_page(page, 1);
}
static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
unsigned int segno, int off)
{
struct page *page;
......@@ -653,6 +658,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
if (IS_ERR(page))
return;
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
......@@ -673,8 +681,10 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
retry:
set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page))
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
remove_dirty_inode(inode);
}
set_cold_data(page);
......@@ -683,8 +693,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
clear_cold_data(page);
}
out:
f2fs_put_page(page, 1);
......@@ -794,9 +802,9 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
start_bidx = start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
move_encrypted_block(inode, start_bidx);
move_encrypted_block(inode, start_bidx, segno, off);
else
move_data_page(inode, start_bidx, gc_type);
move_data_page(inode, start_bidx, gc_type, segno, off);
if (locked) {
up_write(&fi->dio_rwsem[WRITE]);
......@@ -899,7 +907,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
return sec_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
{
unsigned int segno;
int gc_type = sync ? FG_GC : BG_GC;
......@@ -940,6 +948,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
if (ret)
goto stop;
}
} else if (gc_type == BG_GC && !background) {
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
goto stop;
}
if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
......
......@@ -137,8 +137,10 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
fio.old_blkaddr = dn->data_blkaddr;
write_data_page(dn, &fio);
f2fs_wait_on_page_writeback(page, DATA, true);
if (dirty)
if (dirty) {
inode_dec_dirty_pages(dn->inode);
remove_dirty_inode(dn->inode);
}
/* this converted inline_data should be recovered. */
set_inode_flag(dn->inode, FI_APPEND_WRITE);
......@@ -419,7 +421,7 @@ static int f2fs_add_inline_entries(struct inode *dir,
}
new_name.name = d.filename[bit_pos];
new_name.len = de->name_len;
new_name.len = le16_to_cpu(de->name_len);
ino = le32_to_cpu(de->ino);
fake_mode = get_de_type(de) << S_SHIFT;
......@@ -573,7 +575,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
f2fs_put_page(page, 1);
dir->i_ctime = dir->i_mtime = current_time(dir);
f2fs_mark_inode_dirty_sync(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
f2fs_drop_nlink(dir, inode);
......@@ -610,6 +612,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
struct f2fs_inline_dentry *inline_dentry = NULL;
struct page *ipage = NULL;
struct f2fs_dentry_ptr d;
int err;
if (ctx->pos == NR_INLINE_DENTRY)
return 0;
......@@ -622,11 +625,12 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
make_dentry_ptr(inode, &d, (void *)inline_dentry, 2);
if (!f2fs_fill_dentries(ctx, &d, 0, fstr))
err = f2fs_fill_dentries(ctx, &d, 0, fstr);
if (!err)
ctx->pos = NR_INLINE_DENTRY;
f2fs_put_page(ipage, 1);
return 0;
return err < 0 ? err : 0;
}
int f2fs_inline_data_fiemap(struct inode *inode,
......
......@@ -19,10 +19,11 @@
#include <trace/events/f2fs.h>
void f2fs_mark_inode_dirty_sync(struct inode *inode)
void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
{
if (f2fs_inode_dirtied(inode))
if (f2fs_inode_dirtied(inode, sync))
return;
mark_inode_dirty_sync(inode);
}
......@@ -43,7 +44,7 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, false);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
......@@ -252,6 +253,7 @@ struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino)
int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
f2fs_inode_synced(inode);
......@@ -267,11 +269,13 @@ int update_inode(struct inode *inode, struct page *node_page)
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks);
if (F2FS_I(inode)->extent_tree)
set_raw_extent(&F2FS_I(inode)->extent_tree->largest,
&ri->i_ext);
else
if (et) {
read_lock(&et->lock);
set_raw_extent(&et->largest, &ri->i_ext);
read_unlock(&et->lock);
} else {
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
}
set_raw_inline(inode, ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
......@@ -335,7 +339,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
if (update_inode_page(inode))
if (update_inode_page(inode) && wbc && wbc->nr_to_write)
f2fs_balance_fs(sbi, true);
return 0;
}
......@@ -373,6 +377,9 @@ void f2fs_evict_inode(struct inode *inode)
goto no_delete;
#endif
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
......@@ -384,6 +391,8 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_lock_op(sbi);
err = remove_inode_page(inode);
f2fs_unlock_op(sbi);
if (err == -ENOENT)
err = 0;
}
/* give more chances, if ENOMEM case */
......@@ -403,10 +412,12 @@ void f2fs_evict_inode(struct inode *inode)
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (is_inode_flag_set(inode, FI_APPEND_WRITE))
add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
if (inode->i_nlink) {
if (is_inode_flag_set(inode, FI_APPEND_WRITE))
add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
}
if (is_inode_flag_set(inode, FI_FREE_NID)) {
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(inode, FI_FREE_NID);
......@@ -424,6 +435,18 @@ void handle_failed_inode(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
/*
* clear nlink of inode in order to release resource of inode
* immediately.
*/
clear_nlink(inode);
/*
* we must call this to avoid inode being remained as dirty, resulting
* in a panic when flushing dirty inodes in gdirty_list.
*/
update_inode_page(inode);
/* don't make bad inode, since it becomes a regular file. */
unlock_new_inode(inode);
......
......@@ -778,7 +778,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
f2fs_mark_inode_dirty_sync(old_inode);
f2fs_mark_inode_dirty_sync(old_inode, false);
f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
......@@ -938,7 +938,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_links_write(old_dir, old_nlink > 0);
up_write(&F2FS_I(old_dir)->i_sem);
}
f2fs_mark_inode_dirty_sync(old_dir);
f2fs_mark_inode_dirty_sync(old_dir, false);
/* update directory entry info of new dir inode */
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
......@@ -953,7 +953,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_links_write(new_dir, new_nlink > 0);
up_write(&F2FS_I(new_dir)->i_sem);
}
f2fs_mark_inode_dirty_sync(new_dir);
f2fs_mark_inode_dirty_sync(new_dir, false);
f2fs_unlock_op(sbi);
......
......@@ -45,8 +45,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
* give 25%, 25%, 50%, 50%, 50% memory for each components respectively
*/
if (type == FREE_NIDS) {
mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
PAGE_SHIFT;
mem_size = (nm_i->nid_cnt[FREE_NID_LIST] *
sizeof(struct free_nid)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
......@@ -270,8 +270,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
} else {
f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino ||
nat_get_blkaddr(e) != ne->block_addr ||
f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
nat_get_blkaddr(e) !=
le32_to_cpu(ne->block_addr) ||
nat_get_version(e) != ne->version);
}
}
......@@ -1204,6 +1205,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
ret = f2fs_write_inline_data(inode, page);
inode_dec_dirty_pages(inode);
remove_dirty_inode(inode);
if (ret)
set_page_dirty(page);
page_out:
......@@ -1338,7 +1340,8 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_put_page(last_page, 0);
pagevec_release(&pvec);
return -EIO;
ret = -EIO;
goto out;
}
if (!IS_DNODE(page) || !is_cold_node(page))
......@@ -1407,11 +1410,12 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
"Retry to write fsync mark: ino=%u, idx=%lx",
ino, last_page->index);
lock_page(last_page);
f2fs_wait_on_page_writeback(last_page, NODE, true);
set_page_dirty(last_page);
unlock_page(last_page);
goto retry;
}
out:
if (nwritten)
f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
return ret ? -EIO: 0;
......@@ -1692,11 +1696,35 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
return radix_tree_lookup(&nm_i->free_nid_root, n);
}
static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
struct free_nid *i)
static int __insert_nid_to_list(struct f2fs_sb_info *sbi,
struct free_nid *i, enum nid_list list, bool new)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
if (new) {
int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
if (err)
return err;
}
f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
i->state != NID_ALLOC);
nm_i->nid_cnt[list]++;
list_add_tail(&i->list, &nm_i->nid_list[list]);
return 0;
}
static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
struct free_nid *i, enum nid_list list, bool reuse)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
i->state != NID_ALLOC);
nm_i->nid_cnt[list]--;
list_del(&i->list);
radix_tree_delete(&nm_i->free_nid_root, i->nid);
if (!reuse)
radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
......@@ -1704,9 +1732,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
struct nat_entry *ne;
if (!available_free_memory(sbi, FREE_NIDS))
return -1;
int err;
/* 0 nid should not be used */
if (unlikely(nid == 0))
......@@ -1729,33 +1755,30 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
return 0;
}
spin_lock(&nm_i->free_nid_list_lock);
if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
spin_unlock(&nm_i->free_nid_list_lock);
radix_tree_preload_end();
spin_lock(&nm_i->nid_list_lock);
err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
spin_unlock(&nm_i->nid_list_lock);
radix_tree_preload_end();
if (err) {
kmem_cache_free(free_nid_slab, i);
return 0;
}
list_add_tail(&i->list, &nm_i->free_nid_list);
nm_i->fcnt++;
spin_unlock(&nm_i->free_nid_list_lock);
radix_tree_preload_end();
return 1;
}
static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
bool need_free = false;
spin_lock(&nm_i->free_nid_list_lock);
spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
if (i && i->state == NID_NEW) {
__del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
__remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
need_free = true;
}
spin_unlock(&nm_i->free_nid_list_lock);
spin_unlock(&nm_i->nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
......@@ -1778,14 +1801,12 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR) {
if (add_free_nid(sbi, start_nid, true) < 0)
break;
}
if (blk_addr == NULL_ADDR)
add_free_nid(sbi, start_nid, true);
}
}
void build_free_nids(struct f2fs_sb_info *sbi)
static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
......@@ -1794,7 +1815,10 @@ void build_free_nids(struct f2fs_sb_info *sbi)
nid_t nid = nm_i->next_scan_nid;
/* Enough entries */
if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK)
if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
return;
if (!sync && !available_free_memory(sbi, FREE_NIDS))
return;
/* readahead nat pages to be scanned */
......@@ -1830,7 +1854,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
if (addr == NULL_ADDR)
add_free_nid(sbi, nid, true);
else
remove_free_nid(nm_i, nid);
remove_free_nid(sbi, nid);
}
up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
......@@ -1839,6 +1863,13 @@ void build_free_nids(struct f2fs_sb_info *sbi)
nm_i->ra_nid_pages, META_NAT, false);
}
void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
{
mutex_lock(&NM_I(sbi)->build_lock);
__build_free_nids(sbi, sync);
mutex_unlock(&NM_I(sbi)->build_lock);
}
/*
* If this function returns success, caller can obtain a new nid
* from second parameter of this function.
......@@ -1853,31 +1884,31 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
if (time_to_inject(sbi, FAULT_ALLOC_NID))
return false;
#endif
if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
return false;
spin_lock(&nm_i->nid_list_lock);
spin_lock(&nm_i->free_nid_list_lock);
if (unlikely(nm_i->available_nids == 0)) {
spin_unlock(&nm_i->nid_list_lock);
return false;
}
/* We should not use stale free nids created by build_free_nids */
if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW)
break;
f2fs_bug_on(sbi, i->state != NID_NEW);
if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
struct free_nid, list);
*nid = i->nid;
__remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
i->state = NID_ALLOC;
nm_i->fcnt--;
spin_unlock(&nm_i->free_nid_list_lock);
__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
nm_i->available_nids--;
spin_unlock(&nm_i->nid_list_lock);
return true;
}
spin_unlock(&nm_i->free_nid_list_lock);
spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
mutex_lock(&nm_i->build_lock);
build_free_nids(sbi);
mutex_unlock(&nm_i->build_lock);
build_free_nids(sbi, true);
goto retry;
}
......@@ -1889,11 +1920,11 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
spin_lock(&nm_i->free_nid_list_lock);
spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
__del_from_free_nid_list(nm_i, i);
spin_unlock(&nm_i->free_nid_list_lock);
f2fs_bug_on(sbi, !i);
__remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
}
......@@ -1910,17 +1941,22 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
if (!nid)
return;
spin_lock(&nm_i->free_nid_list_lock);
spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
f2fs_bug_on(sbi, !i);
if (!available_free_memory(sbi, FREE_NIDS)) {
__del_from_free_nid_list(nm_i, i);
__remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
need_free = true;
} else {
__remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true);
i->state = NID_NEW;
nm_i->fcnt++;
__insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
}
spin_unlock(&nm_i->free_nid_list_lock);
nm_i->available_nids++;
spin_unlock(&nm_i->nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
......@@ -1932,24 +1968,24 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
struct free_nid *i, *next;
int nr = nr_shrink;
if (nm_i->fcnt <= MAX_FREE_NIDS)
if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
return 0;
if (!mutex_trylock(&nm_i->build_lock))
return 0;
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS)
spin_lock(&nm_i->nid_list_lock);
list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST],
list) {
if (nr_shrink <= 0 ||
nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
break;
if (i->state == NID_ALLOC)
continue;
__del_from_free_nid_list(nm_i, i);
__remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
kmem_cache_free(free_nid_slab, i);
nm_i->fcnt--;
nr_shrink--;
}
spin_unlock(&nm_i->free_nid_list_lock);
spin_unlock(&nm_i->nid_list_lock);
mutex_unlock(&nm_i->build_lock);
return nr - nr_shrink;
......@@ -2005,7 +2041,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
if (unlikely(!inc_valid_node_count(sbi, inode)))
f2fs_bug_on(sbi, 1);
remove_free_nid(NM_I(sbi), new_xnid);
remove_free_nid(sbi, new_xnid);
get_node_info(sbi, new_xnid, &ni);
ni.ino = inode->i_ino;
set_node_addr(sbi, &ni, NEW_ADDR, false);
......@@ -2035,7 +2071,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
}
/* Should not use this inode from free nid list */
remove_free_nid(NM_I(sbi), ino);
remove_free_nid(sbi, ino);
if (!PageUptodate(ipage))
SetPageUptodate(ipage);
......@@ -2069,7 +2105,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
block_t addr;
int bio_blocks = MAX_BIO_BLOCKS(sbi);
int i, idx, last_offset, nrpages;
/* scan the node segment */
......@@ -2078,7 +2113,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
sum_entry = &sum->entries[0];
for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
nrpages = min(last_offset - i, bio_blocks);
nrpages = min(last_offset - i, BIO_MAX_PAGES);
/* readahead node pages */
ra_meta_pages(sbi, addr, nrpages, META_POR, true);
......@@ -2120,6 +2155,19 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
ne = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
/*
* if a free nat in journal has not been used after last
* checkpoint, we should remove it from available nids,
* since later we will add it again.
*/
if (!get_nat_flag(ne, IS_DIRTY) &&
le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
spin_lock(&nm_i->nid_list_lock);
nm_i->available_nids--;
spin_unlock(&nm_i->nid_list_lock);
}
__set_nat_cache_dirty(nm_i, ne);
}
update_nats_in_cursum(journal, -i);
......@@ -2192,8 +2240,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
raw_nat_from_node_info(raw_ne, &ne->ni);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
if (nat_get_blkaddr(ne) == NULL_ADDR)
if (nat_get_blkaddr(ne) == NULL_ADDR) {
add_free_nid(sbi, nid, false);
spin_lock(&NM_I(sbi)->nid_list_lock);
NM_I(sbi)->available_nids++;
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
if (to_journal)
......@@ -2268,21 +2320,24 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
nm_i->fcnt = 0;
nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
F2FS_RESERVED_NODE_NUM;
nm_i->nid_cnt[FREE_NID_LIST] = 0;
nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]);
INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
spin_lock_init(&nm_i->nid_list_lock);
init_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
......@@ -2310,7 +2365,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
build_free_nids(sbi);
build_free_nids(sbi, true);
return 0;
}
......@@ -2327,17 +2382,18 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
return;
/* destroy free nid list */
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
f2fs_bug_on(sbi, i->state == NID_ALLOC);
__del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
spin_unlock(&nm_i->free_nid_list_lock);
spin_lock(&nm_i->nid_list_lock);
list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST],
list) {
__remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
spin_lock(&nm_i->free_nid_list_lock);
spin_lock(&nm_i->nid_list_lock);
}
f2fs_bug_on(sbi, nm_i->fcnt);
spin_unlock(&nm_i->free_nid_list_lock);
f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]);
f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]);
f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST]));
spin_unlock(&nm_i->nid_list_lock);
/* destroy nat cache */
down_write(&nm_i->nat_tree_lock);
......
......@@ -169,14 +169,15 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *fnid;
spin_lock(&nm_i->free_nid_list_lock);
if (nm_i->fcnt <= 0) {
spin_unlock(&nm_i->free_nid_list_lock);
spin_lock(&nm_i->nid_list_lock);
if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) {
spin_unlock(&nm_i->nid_list_lock);
return;
}
fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next,
struct free_nid, list);
*nid = fnid->nid;
spin_unlock(&nm_i->free_nid_list_lock);
spin_unlock(&nm_i->nid_list_lock);
}
/*
......@@ -313,7 +314,7 @@ static inline bool is_recoverable_dnode(struct page *page)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
return cpu_to_le64(cp_ver) == cpver_of_node(page);
return cp_ver == cpver_of_node(page);
}
/*
......
......@@ -180,13 +180,15 @@ static void recover_inode(struct inode *inode, struct page *page)
inode->i_mode = le16_to_cpu(raw->i_mode);
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
F2FS_I(inode)->i_advise = raw->i_advise;
if (file_enc_name(inode))
name = "<encrypted>";
else
......@@ -196,32 +198,6 @@ static void recover_inode(struct inode *inode, struct page *page)
ino_of_node(page), name);
}
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
struct f2fs_inode *ri = F2FS_INODE(ipage);
struct timespec disk;
if (!IS_INODE(ipage))
return true;
disk.tv_sec = le64_to_cpu(ri->i_ctime);
disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
if (timespec_compare(&inode->i_ctime, &disk) > 0)
return false;
disk.tv_sec = le64_to_cpu(ri->i_atime);
disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
if (timespec_compare(&inode->i_atime, &disk) > 0)
return false;
disk.tv_sec = le64_to_cpu(ri->i_mtime);
disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
if (timespec_compare(&inode->i_mtime, &disk) > 0)
return false;
return true;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
struct curseg_info *curseg;
......@@ -248,10 +224,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
if (entry) {
if (!is_same_inode(entry->inode, page))
goto next;
} else {
if (!entry) {
if (IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
......@@ -454,7 +427,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
continue;
}
if ((start + 1) << PAGE_SHIFT > i_size_read(inode))
if (!file_keep_isize(inode) &&
(i_size_read(inode) <= (start << PAGE_SHIFT)))
f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
/*
......@@ -507,8 +481,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
f2fs_put_dnode(&dn);
out:
f2fs_msg(sbi->sb, KERN_NOTICE,
"recover_data: ino = %lx, recovered = %d blocks, err = %d",
inode->i_ino, recovered, err);
"recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
inode->i_ino,
file_keep_isize(inode) ? "keep" : "recover",
recovered, err);
return err;
}
......
......@@ -274,8 +274,10 @@ static int __commit_inmem_pages(struct inode *inode,
set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page))
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
remove_dirty_inode(inode);
}
fio.page = page;
err = do_write_data_page(&fio);
......@@ -287,7 +289,6 @@ static int __commit_inmem_pages(struct inode *inode,
/* record old blkaddr for revoking */
cur->old_addr = fio.old_blkaddr;
clear_cold_data(page);
submit_bio = true;
}
unlock_page(page);
......@@ -363,7 +364,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
*/
if (has_not_enough_free_secs(sbi, 0, 0)) {
mutex_lock(&sbi->gc_mutex);
f2fs_gc(sbi, false);
f2fs_gc(sbi, false, false);
}
}
......@@ -380,14 +381,17 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (!available_free_memory(sbi, FREE_NIDS))
try_to_free_nids(sbi, MAX_FREE_NIDS);
else
build_free_nids(sbi);
build_free_nids(sbi, false);
if (!is_idle(sbi))
return;
/* checkpoint is the only way to shrink partial cached entries */
if (!available_free_memory(sbi, NAT_ENTRIES) ||
!available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
(is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
......@@ -400,6 +404,33 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
}
}
static int __submit_flush_wait(struct block_device *bdev)
{
struct bio *bio = f2fs_bio_alloc(0);
int ret;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
bio->bi_bdev = bdev;
ret = submit_bio_wait(bio);
bio_put(bio);
return ret;
}
static int submit_flush_wait(struct f2fs_sb_info *sbi)
{
int ret = __submit_flush_wait(sbi->sb->s_bdev);
int i;
if (sbi->s_ndevs && !ret) {
for (i = 1; i < sbi->s_ndevs; i++) {
ret = __submit_flush_wait(FDEV(i).bdev);
if (ret)
break;
}
}
return ret;
}
static int issue_flush_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
......@@ -410,25 +441,18 @@ static int issue_flush_thread(void *data)
return 0;
if (!llist_empty(&fcc->issue_list)) {
struct bio *bio;
struct flush_cmd *cmd, *next;
int ret;
bio = f2fs_bio_alloc(0);
fcc->dispatch_list = llist_del_all(&fcc->issue_list);
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
ret = submit_bio_wait(bio);
ret = submit_flush_wait(sbi);
llist_for_each_entry_safe(cmd, next,
fcc->dispatch_list, llnode) {
cmd->ret = ret;
complete(&cmd->wait);
}
bio_put(bio);
fcc->dispatch_list = NULL;
}
......@@ -449,15 +473,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
return 0;
if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
struct bio *bio = f2fs_bio_alloc(0);
int ret;
atomic_inc(&fcc->submit_flush);
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
ret = submit_bio_wait(bio);
ret = submit_flush_wait(sbi);
atomic_dec(&fcc->submit_flush);
bio_put(bio);
return ret;
}
......@@ -469,8 +489,13 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
if (!fcc->dispatch_list)
wake_up(&fcc->flush_wait_queue);
wait_for_completion(&cmd.wait);
atomic_dec(&fcc->submit_flush);
if (fcc->f2fs_issue_flush) {
wait_for_completion(&cmd.wait);
atomic_dec(&fcc->submit_flush);
} else {
llist_del_all(&fcc->issue_list);
atomic_set(&fcc->submit_flush, 0);
}
return cmd.ret;
}
......@@ -481,6 +506,11 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
struct flush_cmd_control *fcc;
int err = 0;
if (SM_I(sbi)->cmd_control_info) {
fcc = SM_I(sbi)->cmd_control_info;
goto init_thread;
}
fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
if (!fcc)
return -ENOMEM;
......@@ -488,6 +518,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->cmd_control_info = fcc;
init_thread:
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
......@@ -500,14 +531,20 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
return err;
}
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
{
struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
if (fcc && fcc->f2fs_issue_flush)
kthread_stop(fcc->f2fs_issue_flush);
kfree(fcc);
SM_I(sbi)->cmd_control_info = NULL;
if (fcc && fcc->f2fs_issue_flush) {
struct task_struct *flush_thread = fcc->f2fs_issue_flush;
fcc->f2fs_issue_flush = NULL;
kthread_stop(flush_thread);
}
if (free) {
kfree(fcc);
SM_I(sbi)->cmd_control_info = NULL;
}
}
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
......@@ -633,15 +670,23 @@ static void f2fs_submit_bio_wait_endio(struct bio *bio)
}
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct bio *bio = NULL;
int err;
err = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
&bio);
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
if (sbi->s_ndevs) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
}
err = __blkdev_issue_discard(bdev,
SECTOR_FROM_BLOCK(blkstart),
SECTOR_FROM_BLOCK(blklen),
GFP_NOFS, 0, &bio);
if (!err && bio) {
struct bio_entry *be = __add_bio_entry(sbi, bio);
......@@ -654,24 +699,101 @@ int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector,
return err;
}
#ifdef CONFIG_BLK_DEV_ZONED
static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
sector_t sector;
int devi = 0;
if (sbi->s_ndevs) {
devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
}
sector = SECTOR_FROM_BLOCK(blkstart);
if (sector & (bdev_zone_size(bdev) - 1) ||
nr_sects != bdev_zone_size(bdev)) {
f2fs_msg(sbi->sb, KERN_INFO,
"(%d) %s: Unaligned discard attempted (block %x + %x)",
devi, sbi->s_ndevs ? FDEV(devi).path: "",
blkstart, blklen);
return -EIO;
}
/*
* We need to know the type of the zone: for conventional zones,
* use regular discard if the drive supports it. For sequential
* zones, reset the zone write pointer.
*/
switch (get_blkz_type(sbi, bdev, blkstart)) {
case BLK_ZONE_TYPE_CONVENTIONAL:
if (!blk_queue_discard(bdev_get_queue(bdev)))
return 0;
return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
case BLK_ZONE_TYPE_SEQWRITE_REQ:
case BLK_ZONE_TYPE_SEQWRITE_PREF:
trace_f2fs_issue_reset_zone(sbi->sb, blkstart);
return blkdev_reset_zones(bdev, sector,
nr_sects, GFP_NOFS);
default:
/* Unknown zone type: broken device ? */
return -EIO;
}
}
#endif
static int __issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
bdev_zoned_model(bdev) != BLK_ZONED_NONE)
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
}
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
block_t blkstart, block_t blklen)
{
sector_t start = SECTOR_FROM_BLOCK(blkstart);
sector_t len = SECTOR_FROM_BLOCK(blklen);
sector_t start = blkstart, len = 0;
struct block_device *bdev;
struct seg_entry *se;
unsigned int offset;
block_t i;
int err = 0;
bdev = f2fs_target_device(sbi, blkstart, NULL);
for (i = blkstart; i < blkstart + blklen; i++, len++) {
if (i != start) {
struct block_device *bdev2 =
f2fs_target_device(sbi, i, NULL);
if (bdev2 != bdev) {
err = __issue_discard_async(sbi, bdev,
start, len);
if (err)
return err;
bdev = bdev2;
start = i;
len = 0;
}
}
for (i = blkstart; i < blkstart + blklen; i++) {
se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
offset = GET_BLKOFF_FROM_SEG0(sbi, i);
if (!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
}
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0);
if (len)
err = __issue_discard_async(sbi, bdev, start, len);
return err;
}
static void __add_discard_entry(struct f2fs_sb_info *sbi,
......@@ -1296,25 +1418,21 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
locate_dirty_segment(sbi, old_segno);
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
unsigned int old_segno;
int i;
if (test_opt(sbi, LFS))
return;
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
__allocate_new_segments(sbi, i);
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
curseg = CURSEG_I(sbi, i);
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
locate_dirty_segment(sbi, old_segno);
}
}
static const struct segment_allocation default_salloc_ops = {
......@@ -1448,21 +1566,11 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_summary *sum, int type)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
bool direct_io = (type == CURSEG_DIRECT_IO);
type = direct_io ? CURSEG_WARM_DATA : type;
curseg = CURSEG_I(sbi, type);
struct curseg_info *curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
mutex_lock(&sit_i->sentry_lock);
/* direct_io'ed data is aligned to the segment for better performance */
if (direct_io && curseg->next_blkoff &&
!has_not_enough_free_secs(sbi, 0, 0))
__allocate_new_segments(sbi, type);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/*
......@@ -2166,7 +2274,6 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
static int build_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
char *src_bitmap, *dst_bitmap;
......@@ -2233,7 +2340,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
sit_i->written_valid_blocks = 0;
sit_i->sit_bitmap = dst_bitmap;
sit_i->bitmap_size = bitmap_size;
sit_i->dirty_sentries = 0;
......@@ -2315,10 +2422,10 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
int sit_blk_cnt = SIT_BLK_CNT(sbi);
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
do {
readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
META_SIT, true);
start = start_blk * sit_i->sents_per_block;
end = (start_blk + readed) * sit_i->sents_per_block;
......@@ -2387,6 +2494,9 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
struct seg_entry *sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
else
SIT_I(sbi)->written_valid_blocks +=
sentry->valid_blocks;
}
/* set use the current segments */
......@@ -2645,7 +2755,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
if (!sm_info)
return;
destroy_flush_cmd_control(sbi);
destroy_flush_cmd_control(sbi, true);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
......
......@@ -18,6 +18,8 @@
#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
......@@ -102,8 +104,6 @@
(((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
#define SECTOR_TO_BLOCK(sectors) \
(sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
#define MAX_BIO_BLOCKS(sbi) \
((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
/*
* indicate a block allocation direction: RIGHT and LEFT.
......@@ -471,11 +471,12 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (test_opt(sbi, LFS))
return false;
return free_sections(sbi) <= (node_secs + 2 * dent_secs +
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + 1);
}
......@@ -484,14 +485,14 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
return (free_sections(sbi) + freed) <=
(node_secs + 2 * dent_secs + reserved_sections(sbi) + needed);
(node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + needed);
}
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
......@@ -695,13 +696,6 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
return false;
}
static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
return SECTOR_TO_BLOCK(queue_max_sectors(q));
}
/*
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
......@@ -719,7 +713,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
else if (type == NODE)
return 8 * sbi->blocks_per_seg;
else if (type == META)
return 8 * MAX_BIO_BLOCKS(sbi);
return 8 * BIO_MAX_PAGES;
else
return 0;
}
......@@ -736,11 +730,9 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
return 0;
nr_to_write = wbc->nr_to_write;
desired = BIO_MAX_PAGES;
if (type == NODE)
desired = 2 * max_hw_blocks(sbi);
else
desired = MAX_BIO_BLOCKS(sbi);
desired <<= 1;
wbc->nr_to_write = desired;
return desired - nr_to_write;
......
......@@ -21,14 +21,16 @@ static unsigned int shrinker_run_no;
static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
{
return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
return count > 0 ? count : 0;
}
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
{
if (NM_I(sbi)->fcnt > MAX_FREE_NIDS)
return NM_I(sbi)->fcnt - MAX_FREE_NIDS;
return 0;
long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS;
return count > 0 ? count : 0;
}
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
......
......@@ -412,14 +412,20 @@ static int parse_options(struct super_block *sb, char *options)
q = bdev_get_queue(sb->s_bdev);
if (blk_queue_discard(q)) {
set_opt(sbi, DISCARD);
} else {
} else if (!f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"mounting with \"discard\" option, but "
"the device does not support discard");
}
break;
case Opt_nodiscard:
if (f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"discard is required for zoned block devices");
return -EINVAL;
}
clear_opt(sbi, DISCARD);
break;
case Opt_noheap:
set_opt(sbi, NOHEAP);
break;
......@@ -512,6 +518,13 @@ static int parse_options(struct super_block *sb, char *options)
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
if (f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"adaptive mode is not allowed with "
"zoned block device feature");
kfree(name);
return -EINVAL;
}
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
} else if (strlen(name) == 3 &&
!strncmp(name, "lfs", 3)) {
......@@ -558,13 +571,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
init_once((void *) fi);
if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) {
kmem_cache_free(f2fs_inode_cachep, fi);
return NULL;
}
/* Initialize f2fs-specific inode info */
fi->vfs_inode.i_version = 1;
atomic_set(&fi->dirty_pages, 0);
fi->i_current_depth = 1;
fi->i_advise = 0;
init_rwsem(&fi->i_sem);
......@@ -620,24 +629,25 @@ static int f2fs_drop_inode(struct inode *inode)
return generic_drop_inode(inode);
}
int f2fs_inode_dirtied(struct inode *inode)
int f2fs_inode_dirtied(struct inode *inode, bool sync)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret = 0;
spin_lock(&sbi->inode_lock[DIRTY_META]);
if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 1;
ret = 1;
} else {
set_inode_flag(inode, FI_DIRTY_INODE);
stat_inc_dirty_inode(sbi, DIRTY_META);
}
set_inode_flag(inode, FI_DIRTY_INODE);
list_add_tail(&F2FS_I(inode)->gdirty_list,
if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
list_add_tail(&F2FS_I(inode)->gdirty_list,
&sbi->inode_list[DIRTY_META]);
inc_page_count(sbi, F2FS_DIRTY_IMETA);
stat_inc_dirty_inode(sbi, DIRTY_META);
inc_page_count(sbi, F2FS_DIRTY_IMETA);
}
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 0;
return ret;
}
void f2fs_inode_synced(struct inode *inode)
......@@ -649,10 +659,12 @@ void f2fs_inode_synced(struct inode *inode)
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return;
}
list_del_init(&F2FS_I(inode)->gdirty_list);
if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
list_del_init(&F2FS_I(inode)->gdirty_list);
dec_page_count(sbi, F2FS_DIRTY_IMETA);
}
clear_inode_flag(inode, FI_DIRTY_INODE);
clear_inode_flag(inode, FI_AUTO_RECOVER);
dec_page_count(sbi, F2FS_DIRTY_IMETA);
stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
}
......@@ -676,7 +688,7 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
clear_inode_flag(inode, FI_AUTO_RECOVER);
f2fs_inode_dirtied(inode);
f2fs_inode_dirtied(inode, false);
}
static void f2fs_i_callback(struct rcu_head *head)
......@@ -687,20 +699,28 @@ static void f2fs_i_callback(struct rcu_head *head)
static void f2fs_destroy_inode(struct inode *inode)
{
percpu_counter_destroy(&F2FS_I(inode)->dirty_pages);
call_rcu(&inode->i_rcu, f2fs_i_callback);
}
static void destroy_percpu_info(struct f2fs_sb_info *sbi)
{
int i;
for (i = 0; i < NR_COUNT_TYPE; i++)
percpu_counter_destroy(&sbi->nr_pages[i]);
percpu_counter_destroy(&sbi->alloc_valid_block_count);
percpu_counter_destroy(&sbi->total_valid_inode_count);
}
static void destroy_device_list(struct f2fs_sb_info *sbi)
{
int i;
for (i = 0; i < sbi->s_ndevs; i++) {
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
kfree(FDEV(i).blkz_type);
#endif
}
kfree(sbi->devs);
}
static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
......@@ -738,7 +758,6 @@ static void f2fs_put_super(struct super_block *sb)
* In addition, EIO will skip do checkpoint, we need this as well.
*/
release_ino_entry(sbi, true);
release_discard_addrs(sbi);
f2fs_leave_shrinker(sbi);
mutex_unlock(&sbi->umount_mutex);
......@@ -762,6 +781,8 @@ static void f2fs_put_super(struct super_block *sb)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->raw_super);
destroy_device_list(sbi);
destroy_percpu_info(sbi);
kfree(sbi);
}
......@@ -789,13 +810,17 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
static int f2fs_freeze(struct super_block *sb)
{
int err;
if (f2fs_readonly(sb))
return 0;
err = f2fs_sync_fs(sb, 1);
return err;
/* IO error happened before */
if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
return -EIO;
/* must be clean, since sync_filesystem() was already called */
if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
return -EINVAL;
return 0;
}
static int f2fs_unfreeze(struct super_block *sb)
......@@ -822,7 +847,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = user_block_count - valid_user_blocks(sbi);
buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
buf->f_ffree = buf->f_files - valid_inode_count(sbi);
buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
buf->f_bavail);
buf->f_namelen = F2FS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
......@@ -974,7 +1000,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, EXTENT_CACHE);
sbi->sb->s_flags |= MS_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
if (f2fs_sb_mounted_hmsmr(sbi->sb)) {
if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
set_opt(sbi, DISCARD);
} else {
......@@ -1076,8 +1102,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* or if flush_merge is not passed in mount option.
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
destroy_flush_cmd_control(sbi);
} else if (!SM_I(sbi)->cmd_control_info) {
clear_opt(sbi, FLUSH_MERGE);
destroy_flush_cmd_control(sbi, false);
} else {
err = create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
......@@ -1426,6 +1453,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned int ovp_segments, reserved_segments;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
......@@ -1437,6 +1465,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta >= total))
return 1;
ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Wrong layout: check mkfs.f2fs version");
return 1;
}
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
......@@ -1447,6 +1485,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
static void init_sb_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = sbi->raw_super;
int i;
sbi->log_sectors_per_block =
le32_to_cpu(raw_super->log_sectors_per_block);
......@@ -1471,6 +1510,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
mutex_init(&sbi->wio_mutex[NODE]);
......@@ -1486,13 +1528,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
static int init_percpu_info(struct f2fs_sb_info *sbi)
{
int i, err;
for (i = 0; i < NR_COUNT_TYPE; i++) {
err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
if (err)
return err;
}
int err;
err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
if (err)
......@@ -1502,6 +1538,71 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
GFP_KERNEL);
}
#ifdef CONFIG_BLK_DEV_ZONED
static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
{
struct block_device *bdev = FDEV(devi).bdev;
sector_t nr_sectors = bdev->bd_part->nr_sects;
sector_t sector = 0;
struct blk_zone *zones;
unsigned int i, nr_zones;
unsigned int n = 0;
int err = -EIO;
if (!f2fs_sb_mounted_blkzoned(sbi->sb))
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
return -EINVAL;
sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
__ilog2_u32(sbi->blocks_per_blkz))
return -EINVAL;
sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
sbi->log_blocks_per_blkz;
if (nr_sectors & (bdev_zone_size(bdev) - 1))
FDEV(devi).nr_blkz++;
FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
if (!FDEV(devi).blkz_type)
return -ENOMEM;
#define F2FS_REPORT_NR_ZONES 4096
zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
GFP_KERNEL);
if (!zones)
return -ENOMEM;
/* Get block zones type */
while (zones && sector < nr_sectors) {
nr_zones = F2FS_REPORT_NR_ZONES;
err = blkdev_report_zones(bdev, sector,
zones, &nr_zones,
GFP_KERNEL);
if (err)
break;
if (!nr_zones) {
err = -EIO;
break;
}
for (i = 0; i < nr_zones; i++) {
FDEV(devi).blkz_type[n] = zones[i].type;
sector += zones[i].len;
n++;
}
}
kfree(zones);
return err;
}
#endif
/*
* Read f2fs raw super block.
* Because we have two copies of super block, so read both of them
......@@ -1594,6 +1695,77 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
return err;
}
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
int i;
for (i = 0; i < MAX_DEVICES; i++) {
if (!RDEV(i).path[0])
return 0;
if (i == 0) {
sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
MAX_DEVICES, GFP_KERNEL);
if (!sbi->devs)
return -ENOMEM;
}
memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
if (i == 0) {
FDEV(i).start_blk = 0;
FDEV(i).end_blk = FDEV(i).start_blk +
(FDEV(i).total_segments <<
sbi->log_blocks_per_seg) - 1 +
le32_to_cpu(raw_super->segment0_blkaddr);
} else {
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
FDEV(i).end_blk = FDEV(i).start_blk +
(FDEV(i).total_segments <<
sbi->log_blocks_per_seg) - 1;
}
FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
sbi->sb->s_mode, sbi->sb->s_type);
if (IS_ERR(FDEV(i).bdev))
return PTR_ERR(FDEV(i).bdev);
/* to release errored devices */
sbi->s_ndevs = i + 1;
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
!f2fs_sb_mounted_blkzoned(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Zoned block device feature not enabled\n");
return -EINVAL;
}
if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
if (init_blkz_info(sbi, i)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Failed to initialize F2FS blkzone information");
return -EINVAL;
}
f2fs_msg(sbi->sb, KERN_INFO,
"Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
i, FDEV(i).path,
FDEV(i).total_segments,
FDEV(i).start_blk, FDEV(i).end_blk,
bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
"Host-aware" : "Host-managed");
continue;
}
#endif
f2fs_msg(sbi->sb, KERN_INFO,
"Mount Device [%2d]: %20s, %8u, %8x - %8x",
i, FDEV(i).path,
FDEV(i).total_segments,
FDEV(i).start_blk, FDEV(i).end_blk);
}
return 0;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
......@@ -1641,6 +1813,18 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
/*
* The BLKZONED feature indicates that the drive was formatted with
* zone alignment optimization. This is optional for host-aware
* devices, but mandatory for host-managed zoned block devices.
*/
#ifndef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_ERR,
"Zoned block device support is not enabled\n");
goto free_sb_buf;
}
#endif
default_options(sbi);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
......@@ -1710,6 +1894,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_meta_inode;
}
/* Initialize device list */
err = f2fs_scan_devices(sbi);
if (err) {
f2fs_msg(sb, KERN_ERR, "Failed to find devices");
goto free_devices;
}
sbi->total_valid_node_count =
le32_to_cpu(sbi->ckpt->valid_node_count);
percpu_counter_set(&sbi->total_valid_inode_count,
......@@ -1893,12 +2084,21 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_lock(&sbi->umount_mutex);
release_ino_entry(sbi, true);
f2fs_leave_shrinker(sbi);
/*
* Some dirty meta pages can be produced by recover_orphan_inodes()
* failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
* followed by write_checkpoint() through f2fs_write_node_pages(), which
* falls into an infinite loop in sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
iput(sbi->node_inode);
mutex_unlock(&sbi->umount_mutex);
free_nm:
destroy_node_manager(sbi);
free_sm:
destroy_segment_manager(sbi);
free_devices:
destroy_device_list(sbi);
kfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
......@@ -2044,3 +2244,4 @@ module_exit(exit_f2fs_fs)
MODULE_AUTHOR("Samsung Electronics's Praesto Team");
MODULE_DESCRIPTION("Flash Friendly File System");
MODULE_LICENSE("GPL");
......@@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
......@@ -554,7 +554,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (index == F2FS_XATTR_INDEX_ENCRYPTION &&
!strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
f2fs_set_encrypted_inode(inode);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
......
......@@ -52,10 +52,17 @@
#define VERSION_LEN 256
#define MAX_VOLUME_NAME 512
#define MAX_PATH_LEN 64
#define MAX_DEVICES 8
/*
* For superblock
*/
struct f2fs_device {
__u8 path[MAX_PATH_LEN];
__le32 total_segments;
} __packed;
struct f2fs_super_block {
__le32 magic; /* Magic Number */
__le16 major_ver; /* Major Version */
......@@ -94,7 +101,8 @@ struct f2fs_super_block {
__le32 feature; /* defined features */
__u8 encryption_level; /* versioning level for encryption */
__u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
__u8 reserved[871]; /* valid reserved region */
struct f2fs_device devs[MAX_DEVICES]; /* device list */
__u8 reserved[327]; /* valid reserved region */
} __packed;
/*
......
......@@ -1111,6 +1111,27 @@ TRACE_EVENT(f2fs_issue_discard,
(unsigned long long)__entry->blklen)
);
TRACE_EVENT(f2fs_issue_reset_zone,
TP_PROTO(struct super_block *sb, block_t blkstart),
TP_ARGS(sb, blkstart),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(block_t, blkstart)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->blkstart = blkstart;
),
TP_printk("dev = (%d,%d), reset zone at block = 0x%llx",
show_dev(__entry),
(unsigned long long)__entry->blkstart)
);
TRACE_EVENT(f2fs_issue_flush,
TP_PROTO(struct super_block *sb, unsigned int nobarrier,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment