Commit fe6f0ed0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'f2fs-for-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've tuned f2fs to improve general performance by
  serializing block allocation and enhancing discard flows like fstrim
  which avoids user IO contention. And we've added fsync_mode=nobarrier
  which gives an option to user where it skips issuing cache_flush
  commands to underlying flash storage. And there are many bug fixes
  related to fuzzed images, revoked atomic writes, quota ops, and minor
  direct IO.

  Enhancements:
   - add fsync_mode=nobarrier which bypasses cache_flush command
   - enhance the discarding flow which avoids user IOs and issues in
     LBA order
   - readahead some encrypted blocks during GC
   - enable in-memory inode checksum to verify the blocks if
     F2FS_CHECK_FS is set
   - enhance nat_bits behavior
   - set -o discard by default
   - set REQ_RAHEAD to bio in ->readpages

  Bug fixes:
   - fix a corner case to corrupt atomic_writes revoking flow
   - revisit i_gc_rwsem to fix race conditions
   - fix some dio behaviors captured by xfstests
   - correct handling errors given by quota-related failures
   - add many sanity check flows to avoid fuzz test failures
   - add more error number propagation to their callers
   - fix several corner cases to continue fault injection w/ shutdown
     loop"

* tag 'f2fs-for-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (89 commits)
  f2fs: readahead encrypted block during GC
  f2fs: avoid fi->i_gc_rwsem[WRITE] lock in f2fs_gc
  f2fs: fix performance issue observed with multi-thread sequential read
  f2fs: fix to skip verifying block address for non-regular inode
  f2fs: rework fault injection handling to avoid a warning
  f2fs: support fault_type mount option
  f2fs: fix to return success when trimming meta area
  f2fs: fix use-after-free of dicard command entry
  f2fs: support discard submission error injection
  f2fs: split discard command in prior to block layer
  f2fs: wake up gc thread immediately when gc_urgent is set
  f2fs: fix incorrect range->len in f2fs_trim_fs()
  f2fs: refresh recent accessed nat entry in lru list
  f2fs: fix avoid race between truncate and background GC
  f2fs: avoid race between zero_range and background GC
  f2fs: fix to do sanity check with block address in main area v2
  f2fs: fix to do sanity check with inline flags
  f2fs: fix to reset i_gc_failures correctly
  f2fs: fix invalid memory access
  f2fs: fix to avoid broken of dnode block list
  ...
parents 6faf05c2 6aa58d8a
......@@ -51,6 +51,14 @@ Description:
Controls the dirty page count condition for the in-place-update
policies.
What: /sys/fs/f2fs/<disk>/min_seq_blocks
Date: August 2018
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the dirty page count condition for batched sequential
writes in ->writepages.
What: /sys/fs/f2fs/<disk>/min_hot_blocks
Date: March 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
......
......@@ -157,6 +157,24 @@ data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
fault_injection=%d Enable fault injection in all supported types with
specified injection rate.
fault_type=%d Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
Type_Name Type_Value
FAULT_KMALLOC 0x000000001
FAULT_KVMALLOC 0x000000002
FAULT_PAGE_ALLOC 0x000000004
FAULT_PAGE_GET 0x000000008
FAULT_ALLOC_BIO 0x000000010
FAULT_ALLOC_NID 0x000000020
FAULT_ORPHAN 0x000000040
FAULT_BLOCK 0x000000080
FAULT_DIR_DEPTH 0x000000100
FAULT_EVICT_INODE 0x000000200
FAULT_TRUNCATE 0x000000400
FAULT_IO 0x000000800
FAULT_CHECKPOINT 0x000001000
FAULT_DISCARD 0x000002000
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
......
......@@ -28,6 +28,7 @@ struct kmem_cache *f2fs_inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
if (!end_io)
f2fs_flush_merged_writes(sbi);
......@@ -70,6 +71,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.encrypted_page = NULL,
.is_meta = is_meta,
};
int err;
if (unlikely(!is_meta))
fio.op_flags &= ~REQ_META;
......@@ -84,9 +86,10 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
fio.page = page;
if (f2fs_submit_page_bio(&fio)) {
err = f2fs_submit_page_bio(&fio);
if (err) {
f2fs_put_page(page, 1);
goto repeat;
return ERR_PTR(err);
}
lock_page(page);
......@@ -95,14 +98,9 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
goto repeat;
}
/*
* if there is any IO error when accessing device, make our filesystem
* readonly and make sure do not write checkpoint with non-uptodate
* meta page.
*/
if (unlikely(!PageUptodate(page))) {
memset(page_address(page), 0, PAGE_SIZE);
f2fs_stop_checkpoint(sbi, false);
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
out:
return page;
......@@ -113,13 +111,32 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, true);
}
struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct page *page;
int count = 0;
retry:
page = __get_meta_page(sbi, index, true);
if (IS_ERR(page)) {
if (PTR_ERR(page) == -EIO &&
++count <= DEFAULT_RETRY_IO_COUNT)
goto retry;
f2fs_stop_checkpoint(sbi, false);
f2fs_bug_on(sbi, 1);
}
return page;
}
/* for POR only */
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, false);
}
bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
switch (type) {
......@@ -140,8 +157,20 @@ bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
return false;
break;
case META_POR:
case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi)))
blkaddr < MAIN_BLKADDR(sbi))) {
if (type == DATA_GENERIC) {
f2fs_msg(sbi->sb, KERN_WARNING,
"access invalid blkaddr:%u", blkaddr);
WARN_ON(1);
}
return false;
}
break;
case META_GENERIC:
if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
blkaddr >= MAIN_BLKADDR(sbi)))
return false;
break;
default:
......@@ -176,7 +205,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
if (!f2fs_is_valid_meta_blkaddr(sbi, blkno, type))
if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
......@@ -242,11 +271,8 @@ static int __f2fs_write_meta_page(struct page *page,
trace_f2fs_writepage(page, META);
if (unlikely(f2fs_cp_error(sbi))) {
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
return 0;
}
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
......@@ -529,13 +555,12 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)
spin_lock(&im->ino_lock);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
#endif
if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
......@@ -572,12 +597,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
struct node_info ni;
int err = f2fs_acquire_orphan_inode(sbi);
if (err)
goto err_out;
__add_ino_entry(sbi, ino, 0, ORPHAN_INO);
int err;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
......@@ -600,14 +620,15 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* truncate all the data during iput */
iput(inode);
f2fs_get_node_info(sbi, ino, &ni);
err = f2fs_get_node_info(sbi, ino, &ni);
if (err)
goto err_out;
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
err = -EIO;
goto err_out;
}
__remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
err_out:
......@@ -639,7 +660,10 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= SB_ACTIVE;
/* Turn on quotas so that they are updated correctly */
/*
* Turn on quotas which were not enabled for read-only mounts if
* filesystem has quota feature, so that they are updated correctly.
*/
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif
......@@ -649,9 +673,15 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
for (i = 0; i < orphan_blocks; i++) {
struct page *page = f2fs_get_meta_page(sbi, start_blk + i);
struct page *page;
struct f2fs_orphan_block *orphan_blk;
page = f2fs_get_meta_page(sbi, start_blk + i);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out;
}
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
......@@ -742,10 +772,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
__u32 crc = 0;
*cp_page = f2fs_get_meta_page(sbi, cp_addr);
if (IS_ERR(*cp_page))
return PTR_ERR(*cp_page);
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset > (blk_size - sizeof(__le32))) {
f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
......@@ -753,6 +787,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
}
......@@ -772,14 +807,22 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_1, version);
if (err)
goto invalid_cp1;
return NULL;
if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
sbi->blocks_per_seg) {
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid cp_pack_total_block_count:%u",
le32_to_cpu(cp_block->cp_pack_total_block_count));
goto invalid_cp;
}
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
goto invalid_cp2;
goto invalid_cp;
cur_version = *version;
if (cur_version == pre_version) {
......@@ -787,9 +830,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
f2fs_put_page(cp_page_2, 1);
return cp_page_1;
}
invalid_cp2:
f2fs_put_page(cp_page_2, 1);
invalid_cp1:
invalid_cp:
f2fs_put_page(cp_page_1, 1);
return NULL;
}
......@@ -838,15 +880,15 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
/* Sanity checking of checkpoint */
if (f2fs_sanity_check_ckpt(sbi))
goto free_fail_no_cp;
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
/* Sanity checking of checkpoint */
if (f2fs_sanity_check_ckpt(sbi))
goto free_fail_no_cp;
if (cp_blks <= 1)
goto done;
......@@ -859,6 +901,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
if (IS_ERR(cur_page))
goto free_fail_no_cp;
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
......@@ -980,12 +1024,10 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
iput(inode);
/* We need to give cpu to another writers. */
if (ino == cur_ino) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
if (ino == cur_ino)
cond_resched();
} else {
else
ino = cur_ino;
}
} else {
/*
* We should submit bio, since it exists several
......@@ -1119,7 +1161,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
}
static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
DEFINE_WAIT(wait);
......@@ -1129,6 +1171,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
if (unlikely(f2fs_cp_error(sbi)))
break;
io_schedule_timeout(5*HZ);
}
finish_wait(&sbi->cp_wait, &wait);
......@@ -1202,8 +1247,12 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
/* writeout cp pack 2 page */
err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
f2fs_bug_on(sbi, err);
if (unlikely(err && f2fs_cp_error(sbi))) {
f2fs_put_page(page, 1);
return;
}
f2fs_bug_on(sbi, err);
f2fs_put_page(page, 0);
/* submit checkpoint (with barrier if NOBARRIER is not set) */
......@@ -1229,7 +1278,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
while (get_pages(sbi, F2FS_DIRTY_META)) {
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
break;
}
/*
......@@ -1309,7 +1358,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_sync_meta_pages(sbi, META, LONG_MAX,
FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
break;
}
}
......@@ -1348,10 +1397,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
/* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
f2fs_wait_on_all_pages_writeback(sbi);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
......@@ -1360,12 +1406,19 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
wait_on_all_pages_writeback(sbi);
f2fs_wait_on_all_pages_writeback(sbi);
/*
* invalidate intermediate page cache borrowed from meta inode
* which are used for migration of encrypted inode's blocks.
*/
if (f2fs_sb_has_encrypt(sbi->sb))
invalidate_mapping_pages(META_MAPPING(sbi),
MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
f2fs_release_ino_entry(sbi, false);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
f2fs_reset_fsync_node_info(sbi);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
......@@ -1381,7 +1434,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
return 0;
return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
}
/*
......
......@@ -126,12 +126,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
static void f2fs_read_end_io(struct bio *bio)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
bio->bi_status = BLK_STS_IOERR;
}
#endif
if (f2fs_bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
......@@ -177,6 +175,8 @@ static void f2fs_write_end_io(struct bio *bio)
page->index != nid_of_node(page));
dec_page_count(sbi, type);
if (f2fs_in_warm_node_list(sbi, page))
f2fs_del_fsync_node_entry(sbi, page);
clear_cold_data(page);
end_page_writeback(page);
}
......@@ -264,7 +264,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
if (type != DATA && type != NODE)
goto submit_io;
if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
......@@ -441,7 +441,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct page *page = fio->encrypted_page ?
fio->encrypted_page : fio->page;
verify_block_addr(fio, fio->new_blkaddr);
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
return -EFAULT;
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
......@@ -485,7 +488,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
spin_unlock(&io->io_lock);
}
if (is_valid_blkaddr(fio->old_blkaddr))
if (__is_valid_data_blkaddr(fio->old_blkaddr))
verify_block_addr(fio, fio->old_blkaddr);
verify_block_addr(fio, fio->new_blkaddr);
......@@ -534,19 +537,22 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
unsigned nr_pages)
unsigned nr_pages, unsigned op_flag)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
return ERR_PTR(-EFAULT);
bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio)
return ERR_PTR(-ENOMEM);
f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
......@@ -571,7 +577,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
if (IS_ERR(bio))
return PTR_ERR(bio);
......@@ -869,6 +875,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
block_t old_blkaddr;
pgoff_t fofs;
blkcnt_t count = 1;
int err;
......@@ -876,6 +883,10 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
err = f2fs_get_node_info(sbi, dn->nid, &ni);
if (err)
return err;
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
if (dn->data_blkaddr == NEW_ADDR)
......@@ -885,11 +896,13 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
return err;
alloc:
f2fs_get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
old_blkaddr = dn->data_blkaddr;
f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
&sum, seg_type, NULL, false);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
f2fs_set_data_blkaddr(dn);
/* update i_size */
......@@ -1045,7 +1058,13 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
next_block:
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
if (!is_valid_blkaddr(blkaddr)) {
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
err = -EFAULT;
goto sync_out;
}
if (!is_valid_data_blkaddr(sbi, blkaddr)) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
......@@ -1282,7 +1301,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
f2fs_get_node_info(sbi, inode->i_ino, &ni);
err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
if (err) {
f2fs_put_page(page, 1);
return err;
}
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
offset = offsetof(struct f2fs_inode, i_addr) +
......@@ -1309,7 +1332,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
f2fs_get_node_info(sbi, xnid, &ni);
err = f2fs_get_node_info(sbi, xnid, &ni);
if (err) {
f2fs_put_page(page, 1);
return err;
}
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
len = inode->i_sb->s_blocksize;
......@@ -1425,11 +1452,11 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* Note that the aops->readpages() function is ONLY used for read-ahead. If
* this function ever deviates from doing just read-ahead, it should either
* use ->readpage() or do the necessary surgery to decouple ->readpages()
* readom read-ahead.
* from read-ahead.
*/
static int f2fs_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages)
unsigned nr_pages, bool is_readahead)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
......@@ -1500,6 +1527,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
SetPageUptodate(page);
goto confused;
}
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC))
goto set_error_page;
} else {
zero_user_segment(page, 0, PAGE_SIZE);
if (!PageUptodate(page))
......@@ -1519,7 +1550,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
bio = NULL;
}
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
is_readahead ? REQ_RAHEAD : 0);
if (IS_ERR(bio)) {
bio = NULL;
goto set_error_page;
......@@ -1563,7 +1595,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
return ret;
}
......@@ -1580,12 +1612,13 @@ static int f2fs_read_data_pages(struct file *file,
if (f2fs_has_inline_data(inode))
return 0;
return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
}
static int encrypt_one_page(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
struct page *mpage;
gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
......@@ -1597,17 +1630,25 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
PAGE_SIZE, 0, fio->page->index, gfp_flags);
if (!IS_ERR(fio->encrypted_page))
return 0;
if (IS_ERR(fio->encrypted_page)) {
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
f2fs_flush_merged_writes(fio->sbi);
congestion_wait(BLK_RW_ASYNC, HZ/50);
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
return PTR_ERR(fio->encrypted_page);
}
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
f2fs_flush_merged_writes(fio->sbi);
congestion_wait(BLK_RW_ASYNC, HZ/50);
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
if (mpage) {
if (PageUptodate(mpage))
memcpy(page_address(mpage),
page_address(fio->encrypted_page), PAGE_SIZE);
f2fs_put_page(mpage, 1);
}
return PTR_ERR(fio->encrypted_page);
return 0;
}
static inline bool check_inplace_update_policy(struct inode *inode,
......@@ -1691,6 +1732,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
struct extent_info ei = {0,0,0};
struct node_info ni;
bool ipu_force = false;
int err = 0;
......@@ -1699,11 +1741,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
if (is_valid_blkaddr(fio->old_blkaddr)) {
ipu_force = true;
fio->need_lock = LOCK_DONE;
goto got_it;
}
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC))
return -EFAULT;
ipu_force = true;
fio->need_lock = LOCK_DONE;
goto got_it;
}
/* Deadlock due to between page->lock and f2fs_lock_op */
......@@ -1722,11 +1766,17 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
goto out_writepage;
}
got_it:
if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC)) {
err = -EFAULT;
goto out_writepage;
}
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) &&
if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
......@@ -1751,6 +1801,12 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
fio->need_lock = LOCK_REQ;
}
err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
if (err)
goto out_writepage;
fio->version = ni.version;
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
......@@ -2079,6 +2135,18 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
return ret;
}
static inline bool __should_serialize_io(struct inode *inode,
struct writeback_control *wbc)
{
if (!S_ISREG(inode->i_mode))
return false;
if (wbc->sync_mode != WB_SYNC_ALL)
return true;
if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
return true;
return false;
}
static int __f2fs_write_data_pages(struct address_space *mapping,
struct writeback_control *wbc,
enum iostat_type io_type)
......@@ -2087,6 +2155,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct blk_plug plug;
int ret;
bool locked = false;
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
......@@ -2117,10 +2186,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
else if (atomic_read(&sbi->wb_sync_req[DATA]))
goto skip_write;
if (__should_serialize_io(inode, wbc)) {
mutex_lock(&sbi->writepages);
locked = true;
}
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc, io_type);
blk_finish_plug(&plug);
if (locked)
mutex_unlock(&sbi->writepages);
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_dec(&sbi->wb_sync_req[DATA]);
/*
......@@ -2153,10 +2230,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
loff_t i_size = i_size_read(inode);
if (to > i_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
......@@ -2251,8 +2332,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
trace_f2fs_write_begin(inode, pos, len, flags);
if (f2fs_is_atomic_file(inode) &&
!f2fs_available_free_memory(sbi, INMEM_PAGES)) {
if ((f2fs_is_atomic_file(inode) &&
!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
err = -ENOMEM;
drop_atomic = true;
goto fail;
......@@ -2376,14 +2458,20 @@ static int f2fs_write_end(struct file *file,
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
loff_t offset)
{
unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
if (offset & blocksize_mask)
return -EINVAL;
if (iov_iter_alignment(iter) & blocksize_mask)
return -EINVAL;
unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
unsigned blkbits = i_blkbits;
unsigned blocksize_mask = (1 << blkbits) - 1;
unsigned long align = offset | iov_iter_alignment(iter);
struct block_device *bdev = inode->i_sb->s_bdev;
if (align & blocksize_mask) {
if (bdev)
blkbits = blksize_bits(bdev_logical_block_size(bdev));
blocksize_mask = (1 << blkbits) - 1;
if (align & blocksize_mask)
return -EINVAL;
return 1;
}
return 0;
}
......@@ -2401,7 +2489,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
err = check_direct_IO(inode, iter, offset);
if (err)
return err;
return err < 0 ? err : 0;
if (f2fs_force_buffered_io(inode, rw))
return 0;
......@@ -2495,6 +2583,10 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
/* don't remain PG_checked flag which was set during GC */
if (is_cold_data(page))
clear_cold_data(page);
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
f2fs_register_inmem_page(inode, page);
......
......@@ -215,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
si->base_mem += NM_I(sbi)->nat_blocks *
f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
......
......@@ -517,12 +517,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
}
start:
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
#endif
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
......
......@@ -41,7 +41,6 @@
} while (0)
#endif
#ifdef CONFIG_F2FS_FAULT_INJECTION
enum {
FAULT_KMALLOC,
FAULT_KVMALLOC,
......@@ -56,16 +55,20 @@ enum {
FAULT_TRUNCATE,
FAULT_IO,
FAULT_CHECKPOINT,
FAULT_DISCARD,
FAULT_MAX,
};
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
struct f2fs_fault_info {
atomic_t inject_ops;
unsigned int inject_rate;
unsigned int inject_type;
};
extern char *fault_name[FAULT_MAX];
extern char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
......@@ -178,7 +181,6 @@ enum {
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
......@@ -194,7 +196,7 @@ struct cp_control {
};
/*
* For CP/NAT/SIT/SSA readahead
* indicate meta/data type
*/
enum {
META_CP,
......@@ -202,6 +204,8 @@ enum {
META_SIT,
META_SSA,
META_POR,
DATA_GENERIC,
META_GENERIC,
};
/* for the list of ino */
......@@ -226,6 +230,12 @@ struct inode_entry {
struct inode *inode; /* vfs inode pointer */
};
struct fsync_node_entry {
struct list_head list; /* list head */
struct page *page; /* warm node page pointer */
unsigned int seq_id; /* sequence id */
};
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
......@@ -242,9 +252,10 @@ struct discard_entry {
(MAX_PLIST_NUM - 1) : (blk_num - 1))
enum {
D_PREP,
D_SUBMIT,
D_DONE,
D_PREP, /* initial */
D_PARTIAL, /* partially submitted */
D_SUBMIT, /* all submitted */
D_DONE, /* finished */
};
struct discard_info {
......@@ -269,7 +280,10 @@ struct discard_cmd {
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
unsigned char issuing; /* issuing discard */
int error; /* bio error */
spinlock_t lock; /* for state/bio_ref updating */
unsigned short bio_ref; /* bio reference count */
};
enum {
......@@ -289,6 +303,7 @@ struct discard_policy {
unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
bool io_aware; /* issue discard in idle time */
bool sync; /* submit discard with REQ_SYNC flag */
bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
};
......@@ -305,10 +320,12 @@ struct discard_cmd_control {
unsigned int max_discards; /* max. discards to be issued */
unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
atomic_t issing_discard; /* # of issing discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root root; /* root of discard rb-tree */
bool rbtree_check; /* config for consistence check */
};
/* for the list of fsync inodes, used only during recovery */
......@@ -508,13 +525,12 @@ enum {
*/
};
#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
/* vector size for gang look-up from extent cache that consists of radix tree */
#define EXT_TREE_VEC_SIZE 64
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
......@@ -600,6 +616,8 @@ enum {
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40 /* reserved */
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
......@@ -669,8 +687,8 @@ struct f2fs_inode_info {
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
int i_inline_xattr_size; /* inline xattr size */
struct timespec i_crtime; /* inode creation time */
struct timespec i_disk_time[4]; /* inode disk times */
struct timespec64 i_crtime; /* inode creation time */
struct timespec64 i_disk_time[4];/* inode disk times */
};
static inline void get_extent_info(struct extent_info *ext,
......@@ -698,22 +716,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
}
static inline bool __is_discard_mergeable(struct discard_info *back,
struct discard_info *front)
struct discard_info *front, unsigned int max_len)
{
return (back->lstart + back->len == front->lstart) &&
(back->len + front->len < DEF_MAX_DISCARD_LEN);
(back->len + front->len <= max_len);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
struct discard_info *back)
struct discard_info *back, unsigned int max_len)
{
return __is_discard_mergeable(back, cur);
return __is_discard_mergeable(back, cur, max_len);
}
static inline bool __is_discard_front_mergeable(struct discard_info *cur,
struct discard_info *front)
struct discard_info *front, unsigned int max_len)
{
return __is_discard_mergeable(cur, front);
return __is_discard_mergeable(cur, front, max_len);
}
static inline bool __is_extent_mergeable(struct extent_info *back,
......@@ -768,6 +786,7 @@ struct f2fs_nm_info {
struct radix_tree_root nat_set_root;/* root of the nat set cache */
struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
unsigned int nat_blocks; /* # of nat blocks */
......@@ -894,6 +913,7 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
unsigned int min_seq_blocks; /* threshold for sequential blocks */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
......@@ -1015,6 +1035,7 @@ struct f2fs_io_info {
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
unsigned char version; /* version of the node */
};
#define is_read_io(rw) ((rw) == READ)
......@@ -1066,6 +1087,7 @@ enum {
SBI_POR_DOING, /* recovery is doing or not */
SBI_NEED_SB_WRITE, /* need to recover superblock */
SBI_NEED_CP, /* need to checkpoint */
SBI_IS_SHUTDOWN, /* shutdown by ioctl */
};
enum {
......@@ -1112,6 +1134,7 @@ struct f2fs_sb_info {
struct rw_semaphore sb_lock; /* lock for raw super block */
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
struct mutex writepages; /* mutex for writepages() */
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
......@@ -1148,6 +1171,11 @@ struct f2fs_sb_info {
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
spinlock_t fsync_node_lock; /* for node entry lock */
struct list_head fsync_node_list; /* node list head */
unsigned int fsync_seg_id; /* sequence id */
unsigned int fsync_node_num; /* number of node entries */
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
......@@ -1215,6 +1243,7 @@ struct f2fs_sb_info {
unsigned int gc_mode; /* current GC state */
/* for skip statistic */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
......@@ -1279,7 +1308,7 @@ struct f2fs_sb_info {
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
printk("%sF2FS-fs : inject %s in %s of %pF\n", \
KERN_INFO, fault_name[type], \
KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
......@@ -1298,6 +1327,12 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
}
return false;
}
#else
#define f2fs_show_injection_info(type) do { } while (0)
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
return false;
}
#endif
/* For write statistics. Suppose sector size is 512 bytes,
......@@ -1326,7 +1361,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
struct request_list *rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
return 0;
return false;
return f2fs_time_over(sbi, REQ_TIME);
}
......@@ -1650,13 +1685,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (ret)
return ret;
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
release = *count;
goto enospc;
}
#endif
/*
* let's increase this in prior to actual block count change in order
* for f2fs_sync_file to avoid data races when deciding checkpoint.
......@@ -1680,18 +1714,20 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
sbi->total_valid_block_count -= diff;
if (!*count) {
spin_unlock(&sbi->stat_lock);
percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
goto enospc;
}
}
spin_unlock(&sbi->stat_lock);
if (unlikely(release))
if (unlikely(release)) {
percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
}
f2fs_i_blocks_write(inode, *count, true, true);
return 0;
enospc:
percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
return -ENOSPC;
}
......@@ -1863,12 +1899,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
return ret;
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
goto enospc;
}
#endif
spin_lock(&sbi->stat_lock);
......@@ -1953,17 +1987,23 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi)
static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
pgoff_t index, bool for_write)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
struct page *page = find_lock_page(mapping, index);
struct page *page;
if (page)
return page;
if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) {
if (!for_write)
page = find_get_page_flags(mapping, index,
FGP_LOCK | FGP_ACCESSED);
else
page = find_lock_page(mapping, index);
if (page)
return page;
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
f2fs_show_injection_info(FAULT_PAGE_ALLOC);
return NULL;
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
f2fs_show_injection_info(FAULT_PAGE_ALLOC);
return NULL;
}
}
#endif
if (!for_write)
return grab_cache_page(mapping, index);
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
......@@ -1973,12 +2013,11 @@ static inline struct page *f2fs_pagecache_get_page(
struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp_mask)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
f2fs_show_injection_info(FAULT_PAGE_GET);
return NULL;
}
#endif
return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
}
......@@ -2043,12 +2082,11 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
return bio;
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
f2fs_show_injection_info(FAULT_ALLOC_BIO);
return NULL;
}
#endif
return bio_alloc(GFP_KERNEL, npages);
}
......@@ -2518,7 +2556,6 @@ static inline void clear_file(struct inode *inode, int type)
static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
{
struct timespec ts;
bool ret;
if (dsync) {
......@@ -2534,16 +2571,13 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
i_size_read(inode) & ~PAGE_MASK)
return false;
ts = timespec64_to_timespec(inode->i_atime);
if (!timespec_equal(F2FS_I(inode)->i_disk_time, &ts))
if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
return false;
ts = timespec64_to_timespec(inode->i_ctime);
if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &ts))
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
return false;
ts = timespec64_to_timespec(inode->i_mtime);
if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &ts))
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
return false;
if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3,
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3,
&F2FS_I(inode)->i_crtime))
return false;
......@@ -2587,12 +2621,11 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
#endif
return kmalloc(size, flags);
}
......@@ -2605,12 +2638,11 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KVMALLOC)) {
f2fs_show_injection_info(FAULT_KVMALLOC);
return NULL;
}
#endif
return kvmalloc(size, flags);
}
......@@ -2669,13 +2701,39 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->iostat_lock);
}
static inline bool is_valid_blkaddr(block_t blkaddr)
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
(!is_read_io(fio->op) || fio->is_meta))
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
f2fs_msg(sbi->sb, KERN_ERR,
"invalid blkaddr: %u, type: %d, run fsck to fix.",
blkaddr, type);
f2fs_bug_on(sbi, 1);
}
}
static inline bool __is_valid_data_blkaddr(block_t blkaddr)
{
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
return false;
return true;
}
static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr)
{
if (!__is_valid_data_blkaddr(blkaddr))
return false;
verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
return true;
}
/*
* file.c
*/
......@@ -2790,16 +2848,21 @@ struct node_info;
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page);
void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page);
void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni);
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
int f2fs_truncate_xattr_node(struct inode *inode);
int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
unsigned int seq_id);
int f2fs_remove_inode_page(struct inode *inode);
struct page *f2fs_new_inode_page(struct inode *inode);
struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
......@@ -2808,11 +2871,12 @@ struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
struct page *f2fs_get_node_page_ra(struct page *parent, int start);
void f2fs_move_node_page(struct page *node_page, int gc_type);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic);
struct writeback_control *wbc, bool atomic,
unsigned int *seq_id);
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type);
void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
......@@ -2820,7 +2884,7 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
void f2fs_recover_inline_xattr(struct inode *inode, struct page *page);
int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
......@@ -2898,9 +2962,10 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
......@@ -2924,6 +2989,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
void f2fs_update_dirty_page(struct inode *inode, struct page *page);
void f2fs_remove_dirty_inode(struct inode *inode);
int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
int __init f2fs_create_checkpoint_caches(void);
......@@ -3362,7 +3428,7 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
#else
return 0;
return false;
#endif
}
......@@ -3373,4 +3439,11 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
F2FS_I_SB(inode)->s_ndevs);
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
unsigned int type);
#else
#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
#endif
#endif
......@@ -213,6 +213,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
unsigned int seq_id = 0;
if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
......@@ -275,7 +276,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
}
sync_nodes:
atomic_inc(&sbi->wb_sync_req[NODE]);
ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic);
ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
atomic_dec(&sbi->wb_sync_req[NODE]);
if (ret)
goto out;
......@@ -301,7 +302,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
* given fsync mark.
*/
if (!atomic) {
ret = f2fs_wait_on_node_pages_writeback(sbi, ino);
ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
if (ret)
goto out;
}
......@@ -350,13 +351,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
return pgofs;
}
static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
int whence)
static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
is_valid_blkaddr(blkaddr))
is_valid_data_blkaddr(sbi, blkaddr))
return true;
break;
case SEEK_HOLE:
......@@ -420,7 +421,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
if (__found_offset(blkaddr, dirty, pgofs, whence)) {
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
blkaddr, DATA_GENERIC)) {
f2fs_put_dnode(&dn);
goto fail;
}
if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
......@@ -513,6 +522,11 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->data_blkaddr = NULL_ADDR;
f2fs_set_data_blkaddr(dn);
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
continue;
f2fs_invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
......@@ -654,12 +668,11 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
#endif
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
......@@ -782,22 +795,26 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
if (attr->ia_size <= i_size_read(inode)) {
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
bool to_smaller = (attr->ia_size <= i_size_read(inode));
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
if (to_smaller)
err = f2fs_truncate(inode);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (err)
return err;
} else {
/*
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
up_write(&F2FS_I(inode)->i_mmap_sem);
/*
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err)
return err;
if (!to_smaller) {
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
......@@ -944,14 +961,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
f2fs_lock_op(sbi);
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
......@@ -1054,7 +1076,12 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
if (ret)
return ret;
f2fs_get_node_info(sbi, dn.nid, &ni);
ret = f2fs_get_node_info(sbi, dn.nid, &ni);
if (ret) {
f2fs_put_dnode(&dn);
return ret;
}
ilen = min((pgoff_t)
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
dn.ofs_in_node, len - i);
......@@ -1161,25 +1188,33 @@ static int __exchange_data_block(struct inode *src_inode,
return ret;
}
static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
truncate_pagecache(inode, offset);
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t pg_start, pg_end;
loff_t new_size;
int ret;
......@@ -1194,25 +1229,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
goto out_unlock;
truncate_pagecache(inode, offset);
return ret;
ret = f2fs_do_collapse(inode, pg_start, pg_end);
ret = f2fs_do_collapse(inode, offset, len);
if (ret)
goto out_unlock;
return ret;
/* write out all moved pages, if possible */
down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
......@@ -1220,11 +1247,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
truncate_pagecache(inode, new_size);
ret = f2fs_truncate_blocks(inode, new_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
out_unlock:
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
......@@ -1290,12 +1315,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
goto out_sem;
truncate_pagecache_range(inode, offset, offset + len - 1);
return ret;
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
......@@ -1307,7 +1329,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
goto out_sem;
return ret;
new_size = max_t(loff_t, new_size, offset + len);
} else {
......@@ -1315,7 +1337,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
goto out_sem;
return ret;
new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
......@@ -1326,12 +1348,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
unsigned int end_offset;
pgoff_t end;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache_range(inode,
(loff_t)index << PAGE_SHIFT,
((loff_t)pg_end << PAGE_SHIFT) - 1);
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
......@@ -1340,7 +1371,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
......@@ -1368,9 +1402,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
else
f2fs_i_size_write(inode, new_size);
}
out_sem:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
}
......@@ -1399,26 +1430,27 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (ret)
goto out;
return ret;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
goto out;
truncate_pagecache(inode, offset);
return ret;
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
nr = idx - pg_start;
if (nr > delta)
......@@ -1432,16 +1464,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
......@@ -1597,7 +1630,7 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int flags = fi->i_flags;
if (file_is_encrypt(inode))
if (f2fs_encrypted_inode(inode))
flags |= F2FS_ENCRYPT_FL;
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
flags |= F2FS_INLINE_DATA_FL;
......@@ -1688,15 +1721,18 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
inode_lock(inode);
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (f2fs_is_atomic_file(inode))
if (f2fs_is_atomic_file(inode)) {
if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
ret = -EINVAL;
goto out;
}
ret = f2fs_convert_inline_inode(inode);
if (ret)
goto out;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (!get_dirty_pages(inode))
goto skip_flush;
......@@ -1704,18 +1740,20 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
if (ret) {
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
skip_flush:
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
......@@ -1733,9 +1771,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (ret)
return ret;
inode_lock(inode);
f2fs_balance_fs(F2FS_I_SB(inode), true);
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_lock(inode);
if (f2fs_is_volatile_file(inode)) {
ret = -EINVAL;
......@@ -1761,7 +1799,6 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
ret = -EINVAL;
}
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
......@@ -1853,6 +1890,8 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
inode_unlock(inode);
mnt_drop_write_file(filp);
......@@ -1866,7 +1905,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct super_block *sb = sbi->sb;
__u32 in;
int ret;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
......@@ -1889,6 +1928,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
}
if (sb) {
f2fs_stop_checkpoint(sbi, false);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev, sb);
}
break;
......@@ -1898,13 +1938,16 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
default:
ret = -EINVAL;
......@@ -2107,7 +2150,7 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
return ret;
}
static int f2fs_ioc_f2fs_write_checkpoint(struct file *filp, unsigned long arg)
static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
......@@ -2351,15 +2394,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
inode_lock(src);
down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!inode_trylock(dst))
goto out;
if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) {
inode_unlock(dst);
goto out;
}
}
ret = -EINVAL;
......@@ -2404,6 +2442,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
goto out_unlock;
f2fs_balance_fs(sbi, true);
down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
goto out_src;
}
f2fs_lock_op(sbi);
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
pos_out >> F2FS_BLKSIZE_BITS,
......@@ -2416,13 +2462,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_i_size_write(dst, dst_osize);
}
f2fs_unlock_op(sbi);
out_unlock:
if (src != dst) {
if (src != dst)
up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
out_src:
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
out_unlock:
if (src != dst)
inode_unlock(dst);
}
out:
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
inode_unlock(src);
return ret;
}
......@@ -2782,7 +2830,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = 1;
f2fs_i_gc_failures_write(inode, 0);
goto done;
}
......@@ -2888,7 +2936,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
return f2fs_ioc_gc_range(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
return f2fs_ioc_f2fs_write_checkpoint(filp, arg);
return f2fs_ioc_write_checkpoint(filp, arg);
case F2FS_IOC_DEFRAGMENT:
return f2fs_ioc_defragment(filp, arg);
case F2FS_IOC_MOVE_RANGE:
......
......@@ -53,12 +53,10 @@ static int gc_thread_func(void *data)
continue;
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
#endif
if (!sb_start_write_trylock(sbi->sb))
continue;
......@@ -517,7 +515,11 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
continue;
}
f2fs_get_node_info(sbi, nid, &ni);
if (f2fs_get_node_info(sbi, nid, &ni)) {
f2fs_put_page(node_page, 1);
continue;
}
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
......@@ -576,7 +578,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(node_page))
return false;
f2fs_get_node_info(sbi, nid, dni);
if (f2fs_get_node_info(sbi, nid, dni)) {
f2fs_put_page(node_page, 1);
return false;
}
if (sum->version != dni->version) {
f2fs_msg(sbi->sb, KERN_WARNING,
......@@ -594,6 +599,72 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
static int ra_data_block(struct inode *inode, pgoff_t index)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
struct extent_info ei = {0, 0, 0};
struct f2fs_io_info fio = {
.sbi = sbi,
.ino = inode->i_ino,
.type = DATA,
.temp = COLD,
.op = REQ_OP_READ,
.op_flags = 0,
.encrypted_page = NULL,
.in_list = false,
.retry = false,
};
int err;
page = f2fs_grab_cache_page(mapping, index, true);
if (!page)
return -ENOMEM;
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
goto got_it;
}
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err)
goto put_page;
f2fs_put_dnode(&dn);
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC))) {
err = -EFAULT;
goto put_page;
}
got_it:
/* read page */
fio.page = page;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
dn.data_blkaddr,
FGP_LOCK | FGP_CREAT, GFP_NOFS);
if (!fio.encrypted_page) {
err = -ENOMEM;
goto put_page;
}
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_encrypted_page;
f2fs_put_page(fio.encrypted_page, 0);
f2fs_put_page(page, 1);
return 0;
put_encrypted_page:
f2fs_put_page(fio.encrypted_page, 1);
put_page:
f2fs_put_page(page, 1);
return err;
}
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
......@@ -615,7 +686,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
struct page *page;
struct page *page, *mpage;
block_t newaddr;
int err;
bool lfs_mode = test_opt(fio.sbi, LFS);
......@@ -655,7 +726,10 @@ static void move_data_block(struct inode *inode, block_t bidx,
*/
f2fs_wait_on_page_writeback(page, DATA, true);
f2fs_get_node_info(fio.sbi, dn.nid, &ni);
err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
if (err)
goto put_out;
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
......@@ -675,6 +749,23 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto recover_block;
}
mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
if (mpage) {
bool updated = false;
if (PageUptodate(mpage)) {
memcpy(page_address(fio.encrypted_page),
page_address(mpage), PAGE_SIZE);
updated = true;
}
f2fs_put_page(mpage, 1);
invalidate_mapping_pages(META_MAPPING(fio.sbi),
fio.old_blkaddr, fio.old_blkaddr);
if (updated)
goto write_page;
}
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_page_out;
......@@ -691,6 +782,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto put_page_out;
}
write_page:
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
......@@ -865,22 +957,30 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
/* if inode uses special I/O path, let's go phase 3 */
if (f2fs_post_read_required(inode)) {
add_gc_inode(gc_list, inode);
continue;
}
if (!down_write_trylock(
&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
iput(inode);
sbi->skipped_gc_rwsem++;
continue;
}
start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
ofs_in_node;
if (f2fs_post_read_required(inode)) {
int err = ra_data_block(inode, start_bidx);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err) {
iput(inode);
continue;
}
add_gc_inode(gc_list, inode);
continue;
}
start_bidx = f2fs_start_bidx_of_node(nofs, inode);
data_page = f2fs_get_read_data_page(inode,
start_bidx + ofs_in_node, REQ_RAHEAD,
true);
start_bidx, REQ_RAHEAD, true);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
......@@ -903,6 +1003,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
continue;
if (!down_write_trylock(
&fi->i_gc_rwsem[WRITE])) {
sbi->skipped_gc_rwsem++;
up_write(&fi->i_gc_rwsem[READ]);
continue;
}
......@@ -986,7 +1087,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
goto next;
sum = page_address(sum_page);
f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
if (type != GET_SUM_TYPE((&sum->footer))) {
f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
"type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
goto next;
}
/*
* this is to avoid deadlock:
......@@ -1034,6 +1141,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
unsigned long long first_skipped;
unsigned int skipped_round = 0, round = 0;
trace_f2fs_gc_begin(sbi->sb, sync, background,
......@@ -1046,6 +1154,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
sbi->skipped_gc_rwsem = 0;
first_skipped = last_skipped;
gc_more:
if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
ret = -EINVAL;
......@@ -1087,7 +1197,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
total_freed += seg_freed;
if (gc_type == FG_GC) {
if (sbi->skipped_atomic_files[FG_GC] > last_skipped)
if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
sbi->skipped_gc_rwsem)
skipped_round++;
last_skipped = sbi->skipped_atomic_files[FG_GC];
round++;
......@@ -1096,15 +1207,23 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
if (!sync) {
if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
if (skipped_round > MAX_SKIP_ATOMIC_COUNT &&
skipped_round * 2 >= round)
f2fs_drop_inmem_pages_all(sbi, true);
if (sync)
goto stop;
if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
if (skipped_round <= MAX_SKIP_GC_COUNT ||
skipped_round * 2 < round) {
segno = NULL_SEGNO;
goto gc_more;
}
if (first_skipped < last_skipped &&
(last_skipped - first_skipped) >
sbi->skipped_gc_rwsem) {
f2fs_drop_inmem_pages_all(sbi, true);
segno = NULL_SEGNO;
goto gc_more;
}
if (gc_type == FG_GC)
ret = f2fs_write_checkpoint(sbi, &cpc);
}
......
......@@ -121,6 +121,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
struct node_info ni;
int dirty, err;
if (!f2fs_exist_data(dn->inode))
......@@ -130,6 +131,24 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
if (err) {
f2fs_put_dnode(dn);
return err;
}
fio.version = ni.version;
if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(dn);
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
f2fs_msg(fio.sbi->sb, KERN_WARNING,
"%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
"run fsck to fix.",
__func__, dn->inode->i_ino, dn->data_blkaddr);
return -EINVAL;
}
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
f2fs_do_read_inline_data(page, dn->inode_page);
......@@ -363,6 +382,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
if (err)
goto out;
if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(&dn);
set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
"%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
"run fsck to fix.",
__func__, dir->i_ino, dn.data_blkaddr);
err = -EINVAL;
goto out;
}
f2fs_wait_on_page_writeback(page, DATA, true);
dentry_blk = page_address(page);
......@@ -477,6 +507,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
return 0;
recover:
lock_page(ipage);
f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
......@@ -668,7 +699,10 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
if (err)
goto out;
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
......
......@@ -68,13 +68,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
static bool __written_first_block(struct f2fs_inode *ri)
static int __written_first_block(struct f2fs_sb_info *sbi,
struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
if (is_valid_blkaddr(addr))
return true;
return false;
if (!__is_valid_data_blkaddr(addr))
return 1;
if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
return -EFAULT;
return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
......@@ -121,7 +124,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page
if (!f2fs_sb_has_inode_chksum(sbi->sb))
return false;
if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
......@@ -159,8 +162,15 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
struct f2fs_inode *ri;
__u32 provided, calculated;
if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)))
return true;
#ifdef CONFIG_F2FS_CHECK_FS
if (!f2fs_enable_inode_chksum(sbi, page))
#else
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
#endif
return true;
ri = &F2FS_NODE(page)->i;
......@@ -185,9 +195,31 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
static bool sanity_check_inode(struct inode *inode)
static bool sanity_check_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned long long iblocks;
iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
if (!iblocks) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
"run fsck to fix.",
__func__, inode->i_ino, iblocks);
return false;
}
if (ino_of_node(node_page) != nid_of_node(node_page)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: corrupted inode footer i_ino=%lx, ino,nid: "
"[%u, %u] run fsck to fix.",
__func__, inode->i_ino,
ino_of_node(node_page), nid_of_node(node_page));
return false;
}
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
&& !f2fs_has_extra_attr(inode)) {
......@@ -197,6 +229,64 @@ static bool sanity_check_inode(struct inode *inode)
__func__, inode->i_ino);
return false;
}
if (f2fs_has_extra_attr(inode) &&
!f2fs_sb_has_extra_attr(sbi->sb)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) is with extra_attr, "
"but extra_attr feature is off",
__func__, inode->i_ino);
return false;
}
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
fi->i_extra_isize % sizeof(__le32)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
"max: %zu",
__func__, inode->i_ino, fi->i_extra_isize,
F2FS_TOTAL_EXTRA_ATTR_SIZE);
return false;
}
if (F2FS_I(inode)->extent_tree) {
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
if (ei->len &&
(!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
DATA_GENERIC))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) extent info [%u, %u, %u] "
"is incorrect, run fsck to fix",
__func__, inode->i_ino,
ei->blk, ei->fofs, ei->len);
return false;
}
}
if (f2fs_has_inline_data(inode) &&
(!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx, mode=%u) should not have "
"inline_data, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
return false;
}
if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx, mode=%u) should not have "
"inline_dentry, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
return false;
}
return true;
}
......@@ -207,6 +297,7 @@ static int do_read_inode(struct inode *inode)
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
int err;
/* Check if ino is within scope */
if (f2fs_check_nid_range(sbi, inode->i_ino))
......@@ -268,6 +359,11 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
if (!sanity_check_inode(inode, node_page)) {
f2fs_put_page(node_page, 1);
return -EINVAL;
}
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
......@@ -275,8 +371,15 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
if (__written_first_block(ri))
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
if (S_ISREG(inode->i_mode)) {
err = __written_first_block(sbi, ri);
if (err < 0) {
f2fs_put_page(node_page, 1);
return err;
}
if (!err)
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
}
if (!f2fs_need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
......@@ -297,9 +400,9 @@ static int do_read_inode(struct inode *inode)
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
}
F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
f2fs_put_page(node_page, 1);
......@@ -330,10 +433,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
ret = do_read_inode(inode);
if (ret)
goto bad_inode;
if (!sanity_check_inode(inode)) {
ret = -EINVAL;
goto bad_inode;
}
make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
......@@ -470,10 +569,14 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (inode->i_nlink == 0)
clear_inline_node(node_page);
F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
#endif
}
void f2fs_update_inode_page(struct inode *inode)
......@@ -558,12 +661,11 @@ void f2fs_evict_inode(struct inode *inode)
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
#endif
if (!err) {
f2fs_lock_op(sbi);
err = f2fs_remove_inode_page(inode);
......@@ -626,6 +728,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
int err;
/*
* clear nlink of inode in order to release resource of inode
......@@ -648,10 +751,16 @@ void f2fs_handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
f2fs_get_node_info(sbi, inode->i_ino, &ni);
err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"May loss orphan inode, run fsck to fix.");
goto out;
}
if (ni.blk_addr != NULL_ADDR) {
int err = f2fs_acquire_orphan_inode(sbi);
err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
......@@ -664,6 +773,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
set_inode_flag(inode, FI_FREE_NID);
}
out:
f2fs_unlock_op(sbi);
/* iput will drop the inode object */
......
......@@ -51,7 +51,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
inode->i_ino = ino;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->i_crtime = timespec64_to_timespec(inode->i_mtime);
F2FS_I(inode)->i_crtime = inode->i_mtime;
inode->i_generation = sbi->s_next_generation++;
if (S_ISDIR(inode->i_mode))
......@@ -246,7 +246,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
return -EINVAL;
if (hot) {
strncpy(extlist[count], name, strlen(name));
memcpy(extlist[count], name, strlen(name));
sbi->raw_super->hot_ext_count = hot_count + 1;
} else {
char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
......@@ -254,7 +254,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
memcpy(buf, &extlist[cold_count],
F2FS_EXTENSION_LEN * hot_count);
memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
strncpy(extlist[cold_count], name, strlen(name));
memcpy(extlist[cold_count], name, strlen(name));
memcpy(&extlist[cold_count + 1], buf,
F2FS_EXTENSION_LEN * hot_count);
sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
......
......@@ -28,6 +28,7 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
static struct kmem_cache *nat_entry_set_slab;
static struct kmem_cache *fsync_node_entry_slab;
/*
* Check whether the given nid is within node id range.
......@@ -112,25 +113,22 @@ static void clear_node_page_dirty(struct page *page)
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
pgoff_t index = current_nat_addr(sbi, nid);
return f2fs_get_meta_page(sbi, index);
return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *src_page;
struct page *dst_page;
pgoff_t src_off;
pgoff_t dst_off;
void *src_addr;
void *dst_addr;
struct f2fs_nm_info *nm_i = NM_I(sbi);
src_off = current_nat_addr(sbi, nid);
dst_off = next_nat_addr(sbi, src_off);
dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
/* get current nat block page with lock */
src_page = f2fs_get_meta_page(sbi, src_off);
src_page = get_current_nat_page(sbi, nid);
dst_page = f2fs_grab_meta_page(sbi, dst_off);
f2fs_bug_on(sbi, PageDirty(src_page));
......@@ -176,14 +174,30 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
if (raw_ne)
node_info_from_raw_nat(&ne->ni, raw_ne);
spin_lock(&nm_i->nat_list_lock);
list_add_tail(&ne->list, &nm_i->nat_entries);
spin_unlock(&nm_i->nat_list_lock);
nm_i->nat_cnt++;
return ne;
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
return radix_tree_lookup(&nm_i->nat_root, n);
struct nat_entry *ne;
ne = radix_tree_lookup(&nm_i->nat_root, n);
/* for recent accessed nat entry, move it to tail of lru list */
if (ne && !get_nat_flag(ne, IS_DIRTY)) {
spin_lock(&nm_i->nat_list_lock);
if (!list_empty(&ne->list))
list_move_tail(&ne->list, &nm_i->nat_entries);
spin_unlock(&nm_i->nat_list_lock);
}
return ne;
}
static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
......@@ -194,7 +208,6 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
{
list_del(&e->list);
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
nm_i->nat_cnt--;
__free_nat_entry(e);
......@@ -245,16 +258,21 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
nm_i->dirty_nat_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
spin_lock(&nm_i->nat_list_lock);
if (new_ne)
list_del_init(&ne->list);
else
list_move_tail(&ne->list, &head->entry_list);
spin_unlock(&nm_i->nat_list_lock);
}
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry_set *set, struct nat_entry *ne)
{
spin_lock(&nm_i->nat_list_lock);
list_move_tail(&ne->list, &nm_i->nat_entries);
spin_unlock(&nm_i->nat_list_lock);
set_nat_flag(ne, IS_DIRTY, false);
set->entry_cnt--;
nm_i->dirty_nat_cnt--;
......@@ -267,6 +285,72 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
start, nr);
}
bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
{
return NODE_MAPPING(sbi) == page->mapping &&
IS_DNODE(page) && is_cold_node(page);
}
void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
{
spin_lock_init(&sbi->fsync_node_lock);
INIT_LIST_HEAD(&sbi->fsync_node_list);
sbi->fsync_seg_id = 0;
sbi->fsync_node_num = 0;
}
static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
struct page *page)
{
struct fsync_node_entry *fn;
unsigned long flags;
unsigned int seq_id;
fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
get_page(page);
fn->page = page;
INIT_LIST_HEAD(&fn->list);
spin_lock_irqsave(&sbi->fsync_node_lock, flags);
list_add_tail(&fn->list, &sbi->fsync_node_list);
fn->seq_id = sbi->fsync_seg_id++;
seq_id = fn->seq_id;
sbi->fsync_node_num++;
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
return seq_id;
}
void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
{
struct fsync_node_entry *fn;
unsigned long flags;
spin_lock_irqsave(&sbi->fsync_node_lock, flags);
list_for_each_entry(fn, &sbi->fsync_node_list, list) {
if (fn->page == page) {
list_del(&fn->list);
sbi->fsync_node_num--;
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
kmem_cache_free(fsync_node_entry_slab, fn);
put_page(page);
return;
}
}
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
f2fs_bug_on(sbi, 1);
}
void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
{
unsigned long flags;
spin_lock_irqsave(&sbi->fsync_node_lock, flags);
sbi->fsync_seg_id = 0;
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
}
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
......@@ -371,7 +455,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) &&
f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
......@@ -382,7 +466,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
if (!is_valid_blkaddr(new_blkaddr))
if (!is_valid_data_blkaddr(sbi, new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
......@@ -405,13 +489,25 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0;
while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
spin_lock(&nm_i->nat_list_lock);
while (nr_shrink) {
struct nat_entry *ne;
if (list_empty(&nm_i->nat_entries))
break;
ne = list_first_entry(&nm_i->nat_entries,
struct nat_entry, list);
list_del(&ne->list);
spin_unlock(&nm_i->nat_list_lock);
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
spin_lock(&nm_i->nat_list_lock);
}
spin_unlock(&nm_i->nat_list_lock);
up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
......@@ -419,7 +515,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
/*
* This function always returns success
*/
void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
......@@ -443,7 +539,7 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
up_read(&nm_i->nat_tree_lock);
return;
return 0;
}
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
......@@ -466,6 +562,9 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
up_read(&nm_i->nat_tree_lock);
page = f2fs_get_meta_page(sbi, index);
if (IS_ERR(page))
return PTR_ERR(page);
nat_blk = (struct f2fs_nat_block *)page_address(page);
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
......@@ -473,6 +572,7 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
cache:
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
return 0;
}
/*
......@@ -722,12 +822,15 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
return err;
}
static void truncate_node(struct dnode_of_data *dn)
static int truncate_node(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
int err;
f2fs_get_node_info(sbi, dn->nid, &ni);
err = f2fs_get_node_info(sbi, dn->nid, &ni);
if (err)
return err;
/* Deallocate node address */
f2fs_invalidate_blocks(sbi, ni.blk_addr);
......@@ -750,11 +853,14 @@ static void truncate_node(struct dnode_of_data *dn)
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
return 0;
}
static int truncate_dnode(struct dnode_of_data *dn)
{
struct page *page;
int err;
if (dn->nid == 0)
return 1;
......@@ -770,7 +876,10 @@ static int truncate_dnode(struct dnode_of_data *dn)
dn->node_page = page;
dn->ofs_in_node = 0;
f2fs_truncate_data_blocks(dn);
truncate_node(dn);
err = truncate_node(dn);
if (err)
return err;
return 1;
}
......@@ -835,7 +944,9 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
if (!ofs) {
/* remove current indirect node */
dn->node_page = page;
truncate_node(dn);
ret = truncate_node(dn);
if (ret)
goto out_err;
freed++;
} else {
f2fs_put_page(page, 1);
......@@ -893,7 +1004,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
truncate_node(dn);
err = truncate_node(dn);
if (err)
goto fail;
} else {
f2fs_put_page(pages[idx], 1);
}
......@@ -1014,6 +1127,7 @@ int f2fs_truncate_xattr_node(struct inode *inode)
nid_t nid = F2FS_I(inode)->i_xattr_nid;
struct dnode_of_data dn;
struct page *npage;
int err;
if (!nid)
return 0;
......@@ -1022,10 +1136,15 @@ int f2fs_truncate_xattr_node(struct inode *inode)
if (IS_ERR(npage))
return PTR_ERR(npage);
set_new_dnode(&dn, inode, NULL, npage, nid);
err = truncate_node(&dn);
if (err) {
f2fs_put_page(npage, 1);
return err;
}
f2fs_i_xnid_write(inode, 0);
set_new_dnode(&dn, inode, NULL, npage, nid);
truncate_node(&dn);
return 0;
}
......@@ -1055,11 +1174,19 @@ int f2fs_remove_inode_page(struct inode *inode)
f2fs_truncate_data_blocks_range(&dn, 1);
/* 0 is possible, after f2fs_new_inode() has failed */
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
f2fs_put_dnode(&dn);
return -EIO;
}
f2fs_bug_on(F2FS_I_SB(inode),
inode->i_blocks != 0 && inode->i_blocks != 8);
/* will put inode & node pages */
truncate_node(&dn);
err = truncate_node(&dn);
if (err) {
f2fs_put_dnode(&dn);
return err;
}
return 0;
}
......@@ -1092,7 +1219,11 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_get_node_info(sbi, dn->nid, &new_ni);
err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
if (err) {
dec_valid_node_count(sbi, dn->inode, !ofs);
goto fail;
}
f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
#endif
new_ni.nid = dn->nid;
......@@ -1140,13 +1271,21 @@ static int read_node_page(struct page *page, int op_flags)
.page = page,
.encrypted_page = NULL,
};
int err;
if (PageUptodate(page))
if (PageUptodate(page)) {
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
#endif
return LOCKED_PAGE;
}
f2fs_get_node_info(sbi, page->index, &ni);
err = f2fs_get_node_info(sbi, page->index, &ni);
if (err)
return err;
if (unlikely(ni.blk_addr == NULL_ADDR)) {
if (unlikely(ni.blk_addr == NULL_ADDR) ||
is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
ClearPageUptodate(page);
return -ENOENT;
}
......@@ -1348,7 +1487,7 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct writeback_control *wbc, bool do_balance,
enum iostat_type io_type)
enum iostat_type io_type, unsigned int *seq_id)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
......@@ -1365,6 +1504,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
.io_type = io_type,
.io_wbc = wbc,
};
unsigned int seq;
trace_f2fs_writepage(page, NODE);
......@@ -1374,10 +1514,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->sync_mode == WB_SYNC_NONE &&
IS_DNODE(page) && is_cold_node(page))
goto redirty_out;
/* get old block addr of this node page */
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
if (f2fs_get_node_info(sbi, nid, &ni))
goto redirty_out;
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
......@@ -1385,8 +1532,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
down_read(&sbi->node_write);
}
f2fs_get_node_info(sbi, nid, &ni);
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
......@@ -1396,11 +1541,22 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
return 0;
}
if (__is_valid_data_blkaddr(ni.blk_addr) &&
!f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
goto redirty_out;
if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
set_page_writeback(page);
ClearPageError(page);
if (f2fs_in_warm_node_list(sbi, page)) {
seq = f2fs_add_fsync_node_entry(sbi, page);
if (seq_id)
*seq_id = seq;
}
fio.old_blkaddr = ni.blk_addr;
f2fs_do_write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
......@@ -1448,7 +1604,7 @@ void f2fs_move_node_page(struct page *node_page, int gc_type)
goto out_page;
if (__write_node_page(node_page, false, NULL,
&wbc, false, FS_GC_NODE_IO))
&wbc, false, FS_GC_NODE_IO, NULL))
unlock_page(node_page);
goto release_page;
} else {
......@@ -1465,11 +1621,13 @@ void f2fs_move_node_page(struct page *node_page, int gc_type)
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
return __write_node_page(page, false, NULL, wbc, false,
FS_NODE_IO, NULL);
}
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic)
struct writeback_control *wbc, bool atomic,
unsigned int *seq_id)
{
pgoff_t index;
pgoff_t last_idx = ULONG_MAX;
......@@ -1550,7 +1708,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
ret = __write_node_page(page, atomic &&
page == last_page,
&submitted, wbc, true,
FS_NODE_IO);
FS_NODE_IO, seq_id);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
......@@ -1633,7 +1791,9 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
!is_cold_node(page)))
continue;
lock_node:
if (!trylock_page(page))
if (wbc->sync_mode == WB_SYNC_ALL)
lock_page(page);
else if (!trylock_page(page))
continue;
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
......@@ -1665,7 +1825,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
set_dentry_mark(page, 0);
ret = __write_node_page(page, false, &submitted,
wbc, do_balance, io_type);
wbc, do_balance, io_type, NULL);
if (ret)
unlock_page(page);
else if (submitted)
......@@ -1684,10 +1844,12 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
}
if (step < 2) {
if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
goto out;
step++;
goto next_step;
}
out:
if (nwritten)
f2fs_submit_merged_write(sbi, NODE);
......@@ -1696,35 +1858,46 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
return ret;
}
int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
unsigned int seq_id)
{
pgoff_t index = 0;
struct pagevec pvec;
struct fsync_node_entry *fn;
struct page *page;
struct list_head *head = &sbi->fsync_node_list;
unsigned long flags;
unsigned int cur_seq_id = 0;
int ret2, ret = 0;
int nr_pages;
pagevec_init(&pvec);
while (seq_id && cur_seq_id < seq_id) {
spin_lock_irqsave(&sbi->fsync_node_lock, flags);
if (list_empty(head)) {
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
break;
}
fn = list_first_entry(head, struct fsync_node_entry, list);
if (fn->seq_id > seq_id) {
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
break;
}
cur_seq_id = fn->seq_id;
page = fn->page;
get_page(page);
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
PAGECACHE_TAG_WRITEBACK))) {
int i;
f2fs_wait_on_page_writeback(page, NODE, true);
if (TestClearPageError(page))
ret = -EIO;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
put_page(page);
if (ino && ino_of_node(page) == ino) {
f2fs_wait_on_page_writeback(page, NODE, true);
if (TestClearPageError(page))
ret = -EIO;
}
}
pagevec_release(&pvec);
cond_resched();
if (ret)
break;
}
ret2 = filemap_check_errors(NODE_MAPPING(sbi));
if (!ret)
ret = ret2;
return ret;
}
......@@ -1774,6 +1947,10 @@ static int f2fs_set_node_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
#ifdef CONFIG_F2FS_CHECK_FS
if (IS_INODE(page))
f2fs_inode_chksum_set(F2FS_P_SB(page), page);
#endif
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
......@@ -1968,7 +2145,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
static void scan_nat_page(struct f2fs_sb_info *sbi,
static int scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
......@@ -1986,7 +2163,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
if (blk_addr == NEW_ADDR)
return -EINVAL;
if (blk_addr == NULL_ADDR) {
add_free_nid(sbi, start_nid, true, true);
} else {
......@@ -1995,6 +2175,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
return 0;
}
static void scan_curseg_cache(struct f2fs_sb_info *sbi)
......@@ -2050,11 +2232,11 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
up_read(&nm_i->nat_tree_lock);
}
static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
int i = 0;
int i = 0, ret;
nid_t nid = nm_i->next_scan_nid;
if (unlikely(nid >= nm_i->max_nid))
......@@ -2062,17 +2244,17 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
return;
return 0;
if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
return;
return 0;
if (!mount) {
/* try to find free nids in free_nid_bitmap */
scan_free_nid_bits(sbi);
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
return;
return 0;
}
/* readahead nat pages to be scanned */
......@@ -2086,8 +2268,16 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
nm_i->nat_block_bitmap)) {
struct page *page = get_current_nat_page(sbi, nid);
scan_nat_page(sbi, page, nid);
ret = scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
if (ret) {
up_read(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, !mount);
f2fs_msg(sbi->sb, KERN_ERR,
"NAT is corrupt, run fsck to fix it");
return -EINVAL;
}
}
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
......@@ -2108,13 +2298,19 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
return 0;
}
void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
int ret;
mutex_lock(&NM_I(sbi)->build_lock);
__f2fs_build_free_nids(sbi, sync, mount);
ret = __f2fs_build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
return ret;
}
/*
......@@ -2127,12 +2323,11 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
retry:
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
f2fs_show_injection_info(FAULT_ALLOC_NID);
return false;
}
#endif
spin_lock(&nm_i->nid_list_lock);
if (unlikely(nm_i->available_nids == 0)) {
......@@ -2277,12 +2472,16 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
struct dnode_of_data dn;
struct node_info ni;
struct page *xpage;
int err;
if (!prev_xnid)
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
f2fs_get_node_info(sbi, prev_xnid, &ni);
err = f2fs_get_node_info(sbi, prev_xnid, &ni);
if (err)
return err;
f2fs_invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, inode, false);
set_node_addr(sbi, &ni, NULL_ADDR, false);
......@@ -2317,8 +2516,11 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
int err;
f2fs_get_node_info(sbi, ino, &old_ni);
err = f2fs_get_node_info(sbi, ino, &old_ni);
if (err)
return err;
if (unlikely(old_ni.blk_addr != NULL_ADDR))
return -EINVAL;
......@@ -2372,7 +2574,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
return 0;
}
void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
......@@ -2394,6 +2596,9 @@ void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
for (idx = addr; idx < addr + nrpages; idx++) {
struct page *page = f2fs_get_tmp_page(sbi, idx);
if (IS_ERR(page))
return PTR_ERR(page);
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
......@@ -2405,6 +2610,7 @@ void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
invalidate_mapping_pages(META_MAPPING(sbi), addr,
addr + nrpages);
}
return 0;
}
static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
......@@ -2582,6 +2788,13 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
nid_t set_idx = 0;
LIST_HEAD(sets);
/* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
if (enabled_nat_bits(sbi, cpc)) {
down_write(&nm_i->nat_tree_lock);
remove_nats_in_journal(sbi);
up_write(&nm_i->nat_tree_lock);
}
if (!nm_i->dirty_nat_cnt)
return;
......@@ -2634,7 +2847,13 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
struct page *page = f2fs_get_meta_page(sbi, nat_bits_addr++);
struct page *page;
page = f2fs_get_meta_page(sbi, nat_bits_addr++);
if (IS_ERR(page)) {
disable_nat_bits(sbi, true);
return PTR_ERR(page);
}
memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
page_address(page), F2FS_BLKSIZE);
......@@ -2718,6 +2937,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
spin_lock_init(&nm_i->nat_list_lock);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->nid_list_lock);
......@@ -2762,8 +2982,8 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
for (i = 0; i < nm_i->nat_blocks; i++) {
nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
if (!nm_i->free_nid_bitmap)
f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
if (!nm_i->free_nid_bitmap[i])
return -ENOMEM;
}
......@@ -2801,8 +3021,7 @@ int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
/* load free nid status from nat_bits table */
load_free_nid_bitmap(sbi);
f2fs_build_free_nids(sbi, true, true);
return 0;
return f2fs_build_free_nids(sbi, true, true);
}
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
......@@ -2837,8 +3056,13 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
for (idx = 0; idx < found; idx++)
for (idx = 0; idx < found; idx++) {
spin_lock(&nm_i->nat_list_lock);
list_del(&natvec[idx]->list);
spin_unlock(&nm_i->nat_list_lock);
__del_from_nat_cache(nm_i, natvec[idx]);
}
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
......@@ -2893,8 +3117,15 @@ int __init f2fs_create_node_manager_caches(void)
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
goto destroy_free_nid;
fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
sizeof(struct fsync_node_entry));
if (!fsync_node_entry_slab)
goto destroy_nat_entry_set;
return 0;
destroy_nat_entry_set:
kmem_cache_destroy(nat_entry_set_slab);
destroy_free_nid:
kmem_cache_destroy(free_nid_slab);
destroy_nat_entry:
......@@ -2905,6 +3136,7 @@ int __init f2fs_create_node_manager_caches(void)
void f2fs_destroy_node_manager_caches(void)
{
kmem_cache_destroy(fsync_node_entry_slab);
kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
......
......@@ -135,6 +135,11 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
{
return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
}
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
......@@ -444,6 +449,10 @@ static inline void set_mark(struct page *page, int mark, int type)
else
flag &= ~(0x1 << type);
rn->footer.flag = cpu_to_le32(flag);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_inode_chksum_set(F2FS_P_SB(page), page);
#endif
}
#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
......@@ -241,8 +241,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
unsigned int free_blocks = sbi->user_block_count -
valid_user_blocks(sbi);
unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
valid_user_blocks(sbi);
int err = 0;
/* get node pages in the current segment */
......@@ -252,10 +252,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
while (1) {
struct fsync_inode_entry *entry;
if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = f2fs_get_tmp_page(sbi, blkaddr);
if (IS_ERR(page)) {
err = PTR_ERR(page);
break;
}
if (!is_recoverable_dnode(page))
break;
......@@ -471,7 +475,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
f2fs_get_node_info(sbi, dn.nid, &ni);
err = f2fs_get_node_info(sbi, dn.nid, &ni);
if (err)
goto err;
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
......@@ -507,14 +514,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
/* dest is valid block, try to recover from src to dest */
if (f2fs_is_valid_meta_blkaddr(sbi, dest, META_POR)) {
if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = f2fs_reserve_new_block(&dn);
#ifdef CONFIG_F2FS_FAULT_INJECTION
while (err)
while (err &&
IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
err = f2fs_reserve_new_block(&dn);
#endif
/* We should not get -ENOSPC */
f2fs_bug_on(sbi, err);
if (err)
......@@ -568,12 +574,16 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
f2fs_ra_meta_pages_cond(sbi, blkaddr);
page = f2fs_get_tmp_page(sbi, blkaddr);
if (IS_ERR(page)) {
err = PTR_ERR(page);
break;
}
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
......@@ -628,7 +638,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
if (s_flags & SB_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
f2fs_msg(sbi->sb, KERN_INFO,
"recover fsync data on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
......
......@@ -250,7 +250,13 @@ static int __revoke_inmem_pages(struct inode *inode,
err = -EAGAIN;
goto next;
}
f2fs_get_node_info(sbi, dn.nid, &ni);
err = f2fs_get_node_info(sbi, dn.nid, &ni);
if (err) {
f2fs_put_dnode(&dn);
return err;
}
if (cur->old_addr == NEW_ADDR) {
f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
f2fs_update_data_blkaddr(&dn, NEW_ADDR);
......@@ -439,8 +445,10 @@ int f2fs_commit_inmem_pages(struct inode *inode)
int err;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
down_write(&fi->i_gc_rwsem[WRITE]);
f2fs_lock_op(sbi);
set_inode_flag(inode, FI_ATOMIC_COMMIT);
mutex_lock(&fi->inmem_lock);
......@@ -455,6 +463,8 @@ int f2fs_commit_inmem_pages(struct inode *inode)
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
f2fs_unlock_op(sbi);
up_write(&fi->i_gc_rwsem[WRITE]);
return err;
}
......@@ -464,12 +474,10 @@ int f2fs_commit_inmem_pages(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
#endif
/* balance_fs_bg is able to be pending */
if (need && excess_cached_nats(sbi))
......@@ -503,7 +511,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
f2fs_build_free_nids(sbi, false, false);
if (!is_idle(sbi) && !excess_dirty_nats(sbi))
if (!is_idle(sbi) &&
(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
/* checkpoint is the only way to shrink partial cached entries */
......@@ -511,6 +520,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
excess_dirty_nodes(sbi) ||
f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
......@@ -831,9 +841,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
dc->issuing = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
spin_lock_init(&dc->lock);
dc->bio_ref = 0;
atomic_inc(&dcc->discard_cmd_cnt);
dcc->undiscard_blks += len;
......@@ -860,7 +873,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
atomic_dec(&dcc->issing_discard);
atomic_sub(dc->issuing, &dcc->issing_discard);
list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root);
......@@ -875,9 +888,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
unsigned long flags;
trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
spin_lock_irqsave(&dc->lock, flags);
if (dc->bio_ref) {
spin_unlock_irqrestore(&dc->lock, flags);
return;
}
spin_unlock_irqrestore(&dc->lock, flags);
f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP)
......@@ -893,10 +914,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
unsigned long flags;
dc->error = blk_status_to_errno(bio->bi_status);
dc->state = D_DONE;
complete_all(&dc->wait);
spin_lock_irqsave(&dc->lock, flags);
dc->bio_ref--;
if (!dc->bio_ref && dc->state == D_SUBMIT) {
dc->state = D_DONE;
complete_all(&dc->wait);
}
spin_unlock_irqrestore(&dc->lock, flags);
bio_put(bio);
}
......@@ -934,6 +962,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
dpolicy->ordered = false;
dpolicy->granularity = granularity;
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
......@@ -945,6 +974,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = true;
dpolicy->sync = false;
dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
......@@ -962,48 +992,115 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
}
}
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len);
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
struct discard_cmd *dc)
struct discard_cmd *dc,
unsigned int *issued)
{
struct block_device *bdev = dc->bdev;
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_discard_blocks =
SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
struct bio *bio = NULL;
int flag = dpolicy->sync ? REQ_SYNC : 0;
block_t lstart, start, len, total_len;
int err = 0;
if (dc->state != D_PREP)
return;
return 0;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
return;
return 0;
trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
trace_f2fs_issue_discard(bdev, dc->start, dc->len);
dc->error = __blkdev_issue_discard(dc->bdev,
SECTOR_FROM_BLOCK(dc->start),
SECTOR_FROM_BLOCK(dc->len),
GFP_NOFS, 0, &bio);
if (!dc->error) {
/* should keep before submission to avoid D_DONE right away */
dc->state = D_SUBMIT;
atomic_inc(&dcc->issued_discard);
atomic_inc(&dcc->issing_discard);
if (bio) {
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
bio->bi_opf |= flag;
submit_bio(bio);
list_move_tail(&dc->list, wait_list);
__check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
f2fs_update_iostat(sbi, FS_DISCARD, 1);
lstart = dc->lstart;
start = dc->start;
len = dc->len;
total_len = len;
dc->len = 0;
while (total_len && *issued < dpolicy->max_requests && !err) {
struct bio *bio = NULL;
unsigned long flags;
bool last = true;
if (len > max_discard_blocks) {
len = max_discard_blocks;
last = false;
}
} else {
__remove_discard_cmd(sbi, dc);
(*issued)++;
if (*issued == dpolicy->max_requests)
last = true;
dc->len += len;
if (time_to_inject(sbi, FAULT_DISCARD)) {
f2fs_show_injection_info(FAULT_DISCARD);
err = -EIO;
goto submit;
}
err = __blkdev_issue_discard(bdev,
SECTOR_FROM_BLOCK(start),
SECTOR_FROM_BLOCK(len),
GFP_NOFS, 0, &bio);
submit:
if (err) {
spin_lock_irqsave(&dc->lock, flags);
if (dc->state == D_PARTIAL)
dc->state = D_SUBMIT;
spin_unlock_irqrestore(&dc->lock, flags);
break;
}
f2fs_bug_on(sbi, !bio);
/*
* should keep before submission to avoid D_DONE
* right away
*/
spin_lock_irqsave(&dc->lock, flags);
if (last)
dc->state = D_SUBMIT;
else
dc->state = D_PARTIAL;
dc->bio_ref++;
spin_unlock_irqrestore(&dc->lock, flags);
atomic_inc(&dcc->issing_discard);
dc->issuing++;
list_move_tail(&dc->list, wait_list);
/* sanity check on discard range */
__check_sit_bitmap(sbi, start, start + len);
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
bio->bi_opf |= flag;
submit_bio(bio);
atomic_inc(&dcc->issued_discard);
f2fs_update_iostat(sbi, FS_DISCARD, 1);
lstart += len;
start += len;
total_len -= len;
len = total_len;
}
if (!err && len)
__update_discard_tree_range(sbi, bdev, lstart, start, len);
return err;
}
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
......@@ -1084,10 +1181,11 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_discard_blocks =
SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
block_t end = lstart + len;
mutex_lock(&dcc->cmd_lock);
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, lstart,
(struct rb_entry **)&prev_dc,
......@@ -1127,7 +1225,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (prev_dc && prev_dc->state == D_PREP &&
prev_dc->bdev == bdev &&
__is_discard_back_mergeable(&di, &prev_dc->di)) {
__is_discard_back_mergeable(&di, &prev_dc->di,
max_discard_blocks)) {
prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc);
......@@ -1138,7 +1237,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (next_dc && next_dc->state == D_PREP &&
next_dc->bdev == bdev &&
__is_discard_front_mergeable(&di, &next_dc->di)) {
__is_discard_front_mergeable(&di, &next_dc->di,
max_discard_blocks)) {
next_dc->di.lstart = di.lstart;
next_dc->di.len += di.len;
next_dc->di.start = di.start;
......@@ -1161,8 +1261,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
node = rb_next(&prev_dc->rb_node);
next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
}
mutex_unlock(&dcc->cmd_lock);
}
static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
......@@ -1177,10 +1275,72 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
blkstart -= FDEV(devi).start_blk;
}
mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
return 0;
}
static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
struct discard_cmd *dc;
struct blk_plug plug;
unsigned int pos = dcc->next_pos;
unsigned int issued = 0;
bool io_interrupted = false;
mutex_lock(&dcc->cmd_lock);
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, pos,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
&insert_p, &insert_parent, true);
if (!dc)
dc = next_dc;
blk_start_plug(&plug);
while (dc) {
struct rb_node *node;
int err = 0;
if (dc->state != D_PREP)
goto next;
if (dpolicy->io_aware && !is_idle(sbi)) {
io_interrupted = true;
break;
}
dcc->next_pos = dc->lstart + dc->len;
err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
if (issued >= dpolicy->max_requests)
break;
next:
node = rb_next(&dc->rb_node);
if (err)
__remove_discard_cmd(sbi, dc);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
}
blk_finish_plug(&plug);
if (!dc)
dcc->next_pos = 0;
mutex_unlock(&dcc->cmd_lock);
if (!issued && io_interrupted)
issued = -1;
return issued;
}
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
......@@ -1188,19 +1348,24 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
int i, iter = 0, issued = 0;
int i, issued = 0;
bool io_interrupted = false;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (i + 1 < dpolicy->granularity)
break;
if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
return __issue_discard_cmd_orderly(sbi, dpolicy);
pend_list = &dcc->pend_list[i];
mutex_lock(&dcc->cmd_lock);
if (list_empty(pend_list))
goto next;
f2fs_bug_on(sbi,
!f2fs_check_rb_tree_consistence(sbi, &dcc->root));
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
&dcc->root));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
......@@ -1208,20 +1373,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
!is_idle(sbi)) {
io_interrupted = true;
goto skip;
break;
}
__submit_discard_cmd(sbi, dpolicy, dc);
issued++;
skip:
if (++iter >= dpolicy->max_requests)
__submit_discard_cmd(sbi, dpolicy, dc, &issued);
if (issued >= dpolicy->max_requests)
break;
}
blk_finish_plug(&plug);
next:
mutex_unlock(&dcc->cmd_lock);
if (iter >= dpolicy->max_requests)
if (issued >= dpolicy->max_requests || io_interrupted)
break;
}
......@@ -1319,21 +1483,22 @@ static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
return trimmed;
}
static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
struct discard_policy dp;
unsigned int discard_blks;
if (dpolicy) {
__wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
return;
}
if (dpolicy)
return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
/* wait all */
__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
__wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
__wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
return discard_blks;
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
......@@ -1386,6 +1551,8 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
/* just to make sure there is no pending discard commands */
__wait_all_discard_cmd(sbi, NULL);
f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
return dropped;
}
......@@ -1643,21 +1810,30 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
mutex_lock(&dirty_i->seglist_lock);
while (1) {
int i;
if (need_align && end != -1)
end--;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
break;
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
for (i = start; i < end; i++)
clear_bit(i, prefree_map);
if (need_align) {
start = rounddown(start, sbi->segs_per_sec);
end = roundup(end, sbi->segs_per_sec);
}
dirty_i->nr_dirty[PRE] -= end - start;
for (i = start; i < end; i++) {
if (test_and_clear_bit(i, prefree_map))
dirty_i->nr_dirty[PRE]--;
}
if (!test_opt(sbi, DISCARD))
continue;
......@@ -1751,7 +1927,9 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
dcc->next_pos = 0;
dcc->root = RB_ROOT;
dcc->rbtree_check = false;
init_waitqueue_head(&dcc->discard_wait_queue);
SM_I(sbi)->dcc_info = dcc;
......@@ -1901,6 +2079,8 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
if (addr == NEW_ADDR)
return;
invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
......@@ -1919,7 +2099,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
struct seg_entry *se;
bool is_cp = false;
if (!is_valid_blkaddr(blkaddr))
if (!is_valid_data_blkaddr(sbi, blkaddr))
return true;
down_read(&sit_i->sentry_lock);
......@@ -1983,7 +2163,7 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
*/
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
return f2fs_get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
}
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
......@@ -2366,7 +2546,7 @@ bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
return has_candidate;
}
static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
unsigned int start, unsigned int end)
{
......@@ -2376,12 +2556,15 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct blk_plug plug;
int issued;
unsigned int trimmed = 0;
next:
issued = 0;
mutex_lock(&dcc->cmd_lock);
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root));
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
&dcc->root));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
......@@ -2395,6 +2578,7 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
while (dc && dc->lstart <= end) {
struct rb_node *node;
int err = 0;
if (dc->len < dpolicy->granularity)
goto skip;
......@@ -2404,19 +2588,24 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
goto skip;
}
__submit_discard_cmd(sbi, dpolicy, dc);
err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
if (++issued >= dpolicy->max_requests) {
if (issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len;
if (err)
__remove_discard_cmd(sbi, dc);
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
__wait_all_discard_cmd(sbi, NULL);
trimmed += __wait_all_discard_cmd(sbi, NULL);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto next;
}
skip:
node = rb_next(&dc->rb_node);
if (err)
__remove_discard_cmd(sbi, dc);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
if (fatal_signal_pending(current))
......@@ -2425,6 +2614,8 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
return trimmed;
}
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
......@@ -2437,12 +2628,13 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
if (end <= MAIN_BLKADDR(sbi))
return -EINVAL;
if (end < MAIN_BLKADDR(sbi))
goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
f2fs_msg(sbi->sb, KERN_WARNING,
......@@ -2454,6 +2646,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
if (need_align) {
start_segno = rounddown(start_segno, sbi->segs_per_sec);
end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
}
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
......@@ -2469,24 +2665,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
if (err)
goto out;
start_block = START_BLOCK(sbi, start_segno);
end_block = START_BLOCK(sbi, end_segno + 1);
__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
__issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
/*
* We filed discard candidates, but actually we don't need to wait for
* all of them, since they'll be issued in idle time along with runtime
* discard option. User configuration looks like using runtime discard
* or periodic fstrim instead of it.
*/
if (!test_opt(sbi, DISCARD)) {
trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
if (test_opt(sbi, DISCARD))
goto out;
start_block = START_BLOCK(sbi, start_segno);
end_block = START_BLOCK(sbi, end_segno + 1);
__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
range->len = F2FS_BLK_TO_BYTES(trimmed);
}
out:
if (!err)
range->len = F2FS_BLK_TO_BYTES(trimmed);
return err;
}
......@@ -2639,8 +2838,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
is_inode_flag_set(inode, FI_ATOMIC_FILE) ||
is_inode_flag_set(inode, FI_VOLATILE_FILE))
f2fs_is_atomic_file(inode) ||
f2fs_is_volatile_file(inode))
return CURSEG_HOT_DATA;
return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
} else {
......@@ -2781,6 +2980,9 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
reallocate:
f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio, true);
if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(fio->sbi),
fio->old_blkaddr, fio->old_blkaddr);
/* writeout dirty page into bdev */
f2fs_submit_page_write(fio);
......@@ -2836,11 +3038,9 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
{
struct f2fs_sb_info *sbi = fio->sbi;
struct f2fs_summary sum;
struct node_info ni;
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
f2fs_get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
......@@ -2937,8 +3137,11 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!recover_curseg || recover_newaddr)
update_sit_entry(sbi, new_blkaddr, 1);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
update_sit_entry(sbi, old_blkaddr, -1);
}
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
......@@ -2992,7 +3195,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct page *cpage;
if (!is_valid_blkaddr(blkaddr))
if (!is_valid_data_blkaddr(sbi, blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
......@@ -3002,7 +3205,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
}
}
static void read_compacted_summaries(struct f2fs_sb_info *sbi)
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *seg_i;
......@@ -3014,6 +3217,8 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
start = start_sum_block(sbi);
page = f2fs_get_meta_page(sbi, start++);
if (IS_ERR(page))
return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
/* Step 1: restore nat cache */
......@@ -3054,11 +3259,14 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
page = NULL;
page = f2fs_get_meta_page(sbi, start++);
if (IS_ERR(page))
return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
offset = 0;
}
}
f2fs_put_page(page, 1);
return 0;
}
static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
......@@ -3070,6 +3278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
unsigned short blk_off;
unsigned int segno = 0;
block_t blk_addr = 0;
int err = 0;
/* get segment number and block addr */
if (IS_DATASEG(type)) {
......@@ -3093,6 +3302,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
}
new = f2fs_get_meta_page(sbi, blk_addr);
if (IS_ERR(new))
return PTR_ERR(new);
sum = (struct f2fs_summary_block *)page_address(new);
if (IS_NODESEG(type)) {
......@@ -3104,7 +3315,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
f2fs_restore_node_summary(sbi, segno, sum);
err = f2fs_restore_node_summary(sbi, segno, sum);
if (err)
goto out;
}
}
......@@ -3124,8 +3337,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
curseg->alloc_type = ckpt->alloc_type[type];
curseg->next_blkoff = blk_off;
mutex_unlock(&curseg->curseg_mutex);
out:
f2fs_put_page(new, 1);
return 0;
return err;
}
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
......@@ -3143,7 +3357,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
META_CP, true);
/* restore for compacted data summary */
read_compacted_summaries(sbi);
err = read_compacted_summaries(sbi);
if (err)
return err;
type = CURSEG_HOT_NODE;
}
......@@ -3274,7 +3490,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
......@@ -3923,6 +4139,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
......
......@@ -85,7 +85,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
((!is_valid_blkaddr(blk_addr)) ? \
((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
......@@ -215,7 +215,7 @@ struct segment_allocation {
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
#define MAX_SKIP_ATOMIC_COUNT 16
#define MAX_SKIP_GC_COUNT 16
struct inmem_pages {
struct list_head list;
......@@ -448,6 +448,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
if (IS_CURSEC(sbi, secno))
goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
......@@ -455,6 +457,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_sections++;
}
}
skip_free:
spin_unlock(&free_i->segmap_lock);
}
......@@ -645,13 +648,10 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
struct f2fs_sb_info *sbi = fio->sbi;
if (PAGE_TYPE_OF_BIO(fio->type) == META &&
(!is_read_io(fio->op) || fio->is_meta))
BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
blk_addr >= MAIN_BLKADDR(sbi));
if (__is_meta_io(fio))
verify_blkaddr(sbi, blk_addr, META_GENERIC);
else
BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
blk_addr >= MAX_BLKADDR(sbi));
verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*
......
......@@ -41,7 +41,7 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
char *fault_name[FAULT_MAX] = {
char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
......@@ -55,20 +55,24 @@ char *fault_name[FAULT_MAX] = {
[FAULT_TRUNCATE] = "truncate fail",
[FAULT_IO] = "IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
[FAULT_DISCARD] = "discard error",
};
static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
unsigned int rate)
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
unsigned int type)
{
struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (rate) {
atomic_set(&ffi->inject_ops, 0);
ffi->inject_rate = rate;
ffi->inject_type = (1 << FAULT_MAX) - 1;
} else {
memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
if (type)
ffi->inject_type = type;
if (!rate && !type)
memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
#endif
......@@ -113,6 +117,7 @@ enum {
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
Opt_fault_type,
Opt_lazytime,
Opt_nolazytime,
Opt_quota,
......@@ -170,6 +175,7 @@ static match_table_t f2fs_tokens = {
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
{Opt_fault_type, "fault_type=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_quota, "quota"},
......@@ -347,12 +353,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
"QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_INFO,
"Filesystem with quota feature cannot be mounted RDWR "
"without CONFIG_QUOTA");
return -1;
}
return 0;
}
#endif
......@@ -606,7 +606,18 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
f2fs_build_fault_attr(sbi, arg);
f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
"FAULT_INJECTION was not selected");
#endif
break;
case Opt_fault_type:
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
f2fs_build_fault_attr(sbi, 0, arg);
set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
......@@ -775,6 +786,19 @@ static int parse_options(struct super_block *sb, char *options)
#ifdef CONFIG_QUOTA
if (f2fs_check_quota_options(sbi))
return -EINVAL;
#else
if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_INFO,
"Filesystem with quota feature cannot be mounted RDWR "
"without CONFIG_QUOTA");
return -EINVAL;
}
if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
f2fs_msg(sb, KERN_ERR,
"Filesystem with project quota feature cannot be "
"mounted RDWR without CONFIG_QUOTA");
return -EINVAL;
}
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
......@@ -1030,6 +1054,10 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
f2fs_wait_on_all_pages_writeback(sbi);
f2fs_bug_on(sbi, sbi->fsync_node_num);
iput(sbi->node_inode);
iput(sbi->meta_inode);
......@@ -1118,7 +1146,7 @@ static int f2fs_statfs_project(struct super_block *sb,
dquot = dqget(sb, qid);
if (IS_ERR(dquot))
return PTR_ERR(dquot);
spin_lock(&dq_data_lock);
spin_lock(&dquot->dq_dqb_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ?
dquot->dq_dqb.dqb_bsoftlimit :
......@@ -1141,7 +1169,7 @@ static int f2fs_statfs_project(struct super_block *sb,
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
}
spin_unlock(&dq_data_lock);
spin_unlock(&dquot->dq_dqb_lock);
dqput(dquot);
return 0;
}
......@@ -1310,9 +1338,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (F2FS_IO_SIZE_BITS(sbi))
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION))
if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
F2FS_OPTION(sbi).fault_info.inject_rate);
seq_printf(seq, ",fault_type=%u",
F2FS_OPTION(sbi).fault_info.inject_type);
}
#endif
#ifdef CONFIG_QUOTA
if (test_opt(sbi, QUOTA))
......@@ -1343,6 +1374,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
seq_printf(seq, ",fsync_mode=%s", "strict");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
seq_printf(seq, ",fsync_mode=%s", "nobarrier");
return 0;
}
......@@ -1355,7 +1388,8 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
F2FS_OPTION(sbi).test_dummy_encryption = false;
sbi->readdir_ra = 1;
F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
......@@ -1365,12 +1399,12 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
if (f2fs_sb_has_blkzoned(sbi->sb)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
set_opt(sbi, DISCARD);
} else {
if (f2fs_sb_has_blkzoned(sbi->sb))
set_opt_mode(sbi, F2FS_MOUNT_LFS);
else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
}
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
......@@ -1379,9 +1413,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, POSIX_ACL);
#endif
#ifdef CONFIG_F2FS_FAULT_INJECTION
f2fs_build_fault_attr(sbi, 0);
#endif
f2fs_build_fault_attr(sbi, 0, 0);
}
#ifdef CONFIG_QUOTA
......@@ -2229,9 +2261,9 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
if (secs_per_zone > total_sections) {
if (secs_per_zone > total_sections || !secs_per_zone) {
f2fs_msg(sb, KERN_INFO,
"Wrong secs_per_zone (%u > %u)",
"Wrong secs_per_zone / total_sections (%u, %u)",
secs_per_zone, total_sections);
return 1;
}
......@@ -2282,12 +2314,20 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned int ovp_segments, reserved_segments;
unsigned int main_segs, blocks_per_seg;
unsigned int sit_segs, nat_segs;
unsigned int sit_bitmap_size, nat_bitmap_size;
unsigned int log_blocks_per_seg;
unsigned int segment_count_main;
unsigned int cp_pack_start_sum, cp_payload;
block_t user_block_count;
int i;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
fsmeta += le32_to_cpu(raw_super->segment_count_sit);
fsmeta += le32_to_cpu(raw_super->segment_count_nat);
sit_segs = le32_to_cpu(raw_super->segment_count_sit);
fsmeta += sit_segs;
nat_segs = le32_to_cpu(raw_super->segment_count_nat);
fsmeta += nat_segs;
fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
......@@ -2304,6 +2344,16 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
user_block_count = le64_to_cpu(ckpt->user_block_count);
segment_count_main = le32_to_cpu(raw_super->segment_count_main);
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (!user_block_count || user_block_count >=
segment_count_main << log_blocks_per_seg) {
f2fs_msg(sbi->sb, KERN_ERR,
"Wrong user_block_count: %u", user_block_count);
return 1;
}
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
......@@ -2318,6 +2368,28 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
f2fs_msg(sbi->sb, KERN_ERR,
"Wrong bitmap size: sit: %u, nat:%u",
sit_bitmap_size, nat_bitmap_size);
return 1;
}
cp_pack_start_sum = __start_sum_addr(sbi);
cp_payload = __cp_payload(sbi);
if (cp_pack_start_sum < cp_payload + 1 ||
cp_pack_start_sum > blocks_per_seg - 1 -
NR_CURSEG_TYPE) {
f2fs_msg(sbi->sb, KERN_ERR,
"Wrong cp_pack_start_sum: %u",
cp_pack_start_sum);
return 1;
}
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
......@@ -2651,6 +2723,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}
sbi->readdir_ra = 1;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
......@@ -2700,9 +2774,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sb))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
......@@ -2771,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
......@@ -2865,6 +2937,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_init_ino_entry_info(sbi);
f2fs_init_fsync_node_info(sbi);
/* setup f2fs internal modules */
err = f2fs_build_segment_manager(sbi);
if (err) {
......@@ -2912,10 +2986,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
err = PTR_ERR(root);
goto free_stats;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
!root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
goto free_node_inode;
goto free_stats;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
......@@ -2929,10 +3004,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_root_inode;
#ifdef CONFIG_QUOTA
/*
* Turn on quotas which were not enabled for read-only mounts if
* filesystem has quota feature, so that they are updated correctly.
*/
/* Enable quota usage during mount */
if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err) {
......@@ -3090,9 +3162,19 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
static void kill_f2fs_super(struct super_block *sb)
{
if (sb->s_root) {
set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
f2fs_stop_gc_thread(F2FS_SB(sb));
f2fs_stop_discard_thread(F2FS_SB(sb));
struct f2fs_sb_info *sbi = F2FS_SB(sb);
set_sbi_flag(sbi, SBI_IS_CLOSE);
f2fs_stop_gc_thread(sbi);
f2fs_stop_discard_thread(sbi);
if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
f2fs_write_checkpoint(sbi, &cpc);
}
}
kill_block_super(sb);
}
......
......@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/compiler.h>
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
......@@ -252,6 +253,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
if (t >= 1) {
sbi->gc_mode = GC_URGENT;
if (sbi->gc_thread) {
sbi->gc_thread->gc_wake = 1;
wake_up_interruptible_all(
&sbi->gc_thread->gc_wait_queue_head);
wake_up_discard_thread(sbi, true);
......@@ -286,8 +288,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") ||
a->struct_type == GC_THREAD);
if (gc_entry)
down_read(&sbi->sb->s_umount);
if (gc_entry) {
if (!down_read_trylock(&sbi->sb->s_umount))
return -EAGAIN;
}
ret = __sbi_store(a, sbi, buf, count);
if (gc_entry)
up_read(&sbi->sb->s_umount);
......@@ -393,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
......@@ -445,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
ATTR_LIST(min_seq_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
......@@ -516,7 +522,8 @@ static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
static int segment_info_seq_show(struct seq_file *seq, void *offset)
static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
......@@ -543,7 +550,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
static int segment_bits_seq_show(struct seq_file *seq, void *offset)
static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
......@@ -567,7 +575,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
return 0;
}
static int iostat_info_seq_show(struct seq_file *seq, void *offset)
static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
......@@ -609,6 +618,28 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
int i;
seq_puts(seq, "format: victim_secmap bitmaps\n");
for (i = 0; i < MAIN_SECS(sbi); i++) {
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0);
if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1))
seq_putc(seq, '\n');
else
seq_putc(seq, ' ');
}
return 0;
}
int __init f2fs_init_sysfs(void)
{
int ret;
......@@ -658,6 +689,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
segment_bits_seq_show, sb);
proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
iostat_info_seq_show, sb);
proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc,
victim_bits_seq_show, sb);
}
return 0;
}
......@@ -668,6 +701,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
remove_proc_entry("iostat_info", sbi->s_proc);
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
remove_proc_entry("victim_bits", sbi->s_proc);
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
}
kobject_del(&sbi->s_kobj);
......
......@@ -37,9 +37,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
......@@ -62,9 +59,6 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
......@@ -100,12 +94,22 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
const char *name, const void *value,
size_t size, int flags)
{
unsigned char old_advise = F2FS_I(inode)->i_advise;
unsigned char new_advise;
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
new_advise = *(char *)value;
if (new_advise & ~FADVISE_MODIFIABLE_BITS)
return -EINVAL;
new_advise = new_advise & FADVISE_MODIFIABLE_BITS;
new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS;
F2FS_I(inode)->i_advise = new_advise;
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
......
......@@ -304,11 +304,6 @@ struct f2fs_node {
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \
((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \
BITS_PER_LONG * BITS_PER_LONG)
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment