Commit 70ef8f0d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've focused on enhancing performance with regards to
  block allocation, GC, and discard/in-place-update IO controls. There
  are a bunch of clean-ups as well as minor bug fixes.

  Enhancements:
   - disable heap-based allocation by default
   - issue small-sized discard commands by default
   - change the policy of data hotness for logging
   - distinguish IOs in terms of size and wbc type
   - start SSR earlier to avoid foreground GC
   - enhance data structures managing discard commands
   - enhance in-place update flow
   - add some more fault injection routines
   - secure one more xattr entry

  Bug fixes:
   - calculate victim cost for GC correctly
   - remain correct victim segment number for GC
   - race condition in nid allocator and initializer
   - stale pointer produced by atomic_writes
   - fix missing REQ_SYNC for flush commands
   - handle missing errors in more corner cases"

* tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (111 commits)
  f2fs: fix a mount fail for wrong next_scan_nid
  f2fs: enhance scalability of trace macro
  f2fs: relocate inode_{,un}lock in F2FS_IOC_SETFLAGS
  f2fs: Make flush bios explicitely sync
  f2fs: show available_nids in f2fs/status
  f2fs: flush dirty nats periodically
  f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard
  f2fs: allow cpc->reason to indicate more than one reason
  f2fs: release cp and dnode lock before IPU
  f2fs: shrink size of struct discard_cmd
  f2fs: don't hold cmd_lock during waiting discard command
  f2fs: nullify fio->encrypted_page for each writes
  f2fs: sanity check segment count
  f2fs: introduce valid_ipu_blkaddr to clean up
  f2fs: lookup extent cache first under IPU scenario
  f2fs: reconstruct code to write a data page
  f2fs: introduce __wait_discard_cmd
  f2fs: introduce __issue_discard_cmd
  f2fs: enable small discard by default
  f2fs: delay awaking discard thread
  ...
parents 677375ce e9cdd307
......@@ -275,10 +275,11 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, META);
/* if locked failed, cp will flush dirty pages instead */
if (!mutex_trylock(&sbi->cp_mutex))
goto skip_write;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = sync_meta_pages(sbi, META, wbc->nr_to_write);
mutex_unlock(&sbi->cp_mutex);
......@@ -567,7 +568,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
if (ni.blk_addr != NULL_ADDR) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: orphan failed (ino=%x), run fsck to fix.",
"%s: orphan failed (ino=%x) by kernel, retry mount.",
__func__, ino);
return -EIO;
}
......@@ -677,7 +678,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset >= blk_size) {
if (crc_offset > (blk_size - sizeof(__le32))) {
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
......@@ -816,7 +817,9 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type)
return;
set_inode_flag(inode, flag);
list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
if (!f2fs_is_volatile_file(inode))
list_add_tail(&F2FS_I(inode)->dirty_list,
&sbi->inode_list[type]);
stat_inc_dirty_inode(sbi, type);
}
......@@ -941,6 +944,19 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
return 0;
}
static void __prepare_cp_block(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
nid_t last_nid = nm_i->next_scan_nid;
next_free_nid(sbi, &last_nid);
ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
ckpt->next_free_nid = cpu_to_le32(last_nid);
}
/*
* Freeze all the FS-operations for checkpoint.
*/
......@@ -964,21 +980,26 @@ static int block_operations(struct f2fs_sb_info *sbi)
err = sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
cond_resched();
goto retry_flush_dents;
}
/*
* POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush. inode->i_blocks can be updated.
*/
down_write(&sbi->node_change);
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
goto out;
cond_resched();
goto retry_flush_dents;
}
/*
* POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush.
*/
retry_flush_nodes:
down_write(&sbi->node_write);
......@@ -986,11 +1007,20 @@ static int block_operations(struct f2fs_sb_info *sbi)
up_write(&sbi->node_write);
err = sync_node_pages(sbi, &wbc);
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
goto out;
}
cond_resched();
goto retry_flush_nodes;
}
/*
* sbi->node_change is used only for AIO write_begin path which produces
* dirty node blocks and some checkpoint values by block allocation.
*/
__prepare_cp_block(sbi);
up_write(&sbi->node_change);
out:
blk_finish_plug(&plug);
return err;
......@@ -1024,16 +1054,20 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
spin_lock(&sbi->cp_lock);
if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
if ((cpc->reason & CP_UMOUNT) &&
le32_to_cpu(ckpt->cp_pack_total_block_count) >
sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
disable_nat_bits(sbi, false);
if (cpc->reason == CP_UMOUNT)
if (cpc->reason & CP_TRIMMED)
__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
if (cpc->reason & CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
if (cpc->reason == CP_FASTBOOT)
if (cpc->reason & CP_FASTBOOT)
__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
......@@ -1057,7 +1091,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
block_t start_blk;
unsigned int data_sum_blocks, orphan_blocks;
__u32 crc32 = 0;
......@@ -1074,14 +1107,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return -EIO;
}
next_free_nid(sbi, &last_nid);
/*
* modify checkpoint
* version number is already updated
*/
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
ckpt->cur_node_segno[i] =
......@@ -1100,10 +1130,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
}
ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
ckpt->next_free_nid = cpu_to_le32(last_nid);
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
spin_lock(&sbi->cp_lock);
......@@ -1143,7 +1169,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* write nat bits */
if (enabled_nat_bits(sbi, cpc)) {
__u64 cp_ver = cur_cp_version(ckpt);
unsigned int i;
block_t blk;
cp_ver |= ((__u64)crc32 << 32);
......@@ -1250,8 +1275,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
goto out;
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
......@@ -1273,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_merged_bios(sbi);
/* this is the case of multiple fstrims without any changes */
if (cpc->reason == CP_DISCARD) {
if (cpc->reason & CP_DISCARD) {
if (!exist_trim_candidates(sbi, cpc)) {
unblock_operations(sbi);
goto out;
......@@ -1311,7 +1336,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
if (cpc->reason == CP_RECOVERY)
if (cpc->reason & CP_RECOVERY)
f2fs_msg(sbi->sb, KERN_NOTICE,
"checkpoint: version = %llx", ckpt_ver);
......
......@@ -309,7 +309,7 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
io->fio.op = REQ_OP_WRITE;
io->fio.op_flags = REQ_META | REQ_PRIO;
io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
if (!test_opt(sbi, NOBARRIER))
io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
}
......@@ -341,7 +341,7 @@ void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
/*
* Fill the locked page with data located in the block address.
* Return unlocked page.
* A caller needs to unlock the page on failure.
*/
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
{
......@@ -362,6 +362,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
bio_set_op_attrs(bio, fio->op, fio->op_flags);
__submit_bio(fio->sbi, bio, fio->type);
if (!is_read_io(fio->op))
inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
return 0;
}
......@@ -787,6 +790,21 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
return err;
}
static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (lock)
down_read(&sbi->node_change);
else
up_read(&sbi->node_change);
} else {
if (lock)
f2fs_lock_op(sbi);
else
f2fs_unlock_op(sbi);
}
}
/*
* f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
* f2fs_map_blocks structure.
......@@ -829,7 +847,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
next_dnode:
if (create)
f2fs_lock_op(sbi);
__do_map_lock(sbi, flag, true);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
......@@ -939,7 +957,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
f2fs_put_dnode(&dn);
if (create) {
f2fs_unlock_op(sbi);
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
goto next_dnode;
......@@ -948,7 +966,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
f2fs_put_dnode(&dn);
unlock_out:
if (create) {
f2fs_unlock_op(sbi);
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
out:
......@@ -1151,9 +1169,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
prefetchw(&page->flags);
if (pages) {
page = list_last_entry(pages, struct page, lru);
prefetchw(&page->flags);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
page->index,
......@@ -1283,17 +1302,83 @@ static int f2fs_read_data_pages(struct file *file,
return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
}
static int encrypt_one_page(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode))
return 0;
/* wait for GCed encrypted page writeback */
f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr);
retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
PAGE_SIZE, 0, fio->page->index, gfp_flags);
if (!IS_ERR(fio->encrypted_page))
return 0;
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
f2fs_flush_merged_bios(fio->sbi);
congestion_wait(BLK_RW_ASYNC, HZ/50);
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
return PTR_ERR(fio->encrypted_page);
}
static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
return false;
if (is_cold_data(fio->page))
return false;
if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
return false;
return need_inplace_update_policy(inode, fio);
}
static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
{
if (fio->old_blkaddr == NEW_ADDR)
return false;
if (fio->old_blkaddr == NULL_ADDR)
return false;
return true;
}
int do_write_data_page(struct f2fs_io_info *fio)
{
struct page *page = fio->page;
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
struct extent_info ei = {0,0,0};
bool ipu_force = false;
int err = 0;
set_new_dnode(&dn, inode, NULL, NULL, 0);
if (need_inplace_update(fio) &&
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
if (valid_ipu_blkaddr(fio)) {
ipu_force = true;
fio->need_lock = false;
goto got_it;
}
}
if (fio->need_lock)
f2fs_lock_op(fio->sbi);
err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
if (err)
return err;
goto out;
fio->old_blkaddr = dn.data_blkaddr;
......@@ -1302,31 +1387,10 @@ int do_write_data_page(struct f2fs_io_info *fio)
ClearPageUptodate(page);
goto out_writepage;
}
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
gfp_t gfp_flags = GFP_NOFS;
/* wait for GCed encrypted page writeback */
f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
fio->old_blkaddr);
retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
PAGE_SIZE, 0,
fio->page->index,
gfp_flags);
if (IS_ERR(fio->encrypted_page)) {
err = PTR_ERR(fio->encrypted_page);
if (err == -ENOMEM) {
/* flush pending ios and wait for a while */
f2fs_flush_merged_bios(F2FS_I_SB(inode));
congestion_wait(BLK_RW_ASYNC, HZ/50);
gfp_flags |= __GFP_NOFAIL;
err = 0;
goto retry_encrypt;
}
goto out_writepage;
}
}
got_it:
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
set_page_writeback(page);
......@@ -1334,22 +1398,27 @@ int do_write_data_page(struct f2fs_io_info *fio)
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
if (unlikely(fio->old_blkaddr != NEW_ADDR &&
!is_cold_data(page) &&
!IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
rewrite_data_page(fio);
if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
f2fs_put_dnode(&dn);
if (fio->need_lock)
f2fs_unlock_op(fio->sbi);
err = rewrite_data_page(fio);
trace_f2fs_do_write_data_page(fio->page, IPU);
set_inode_flag(inode, FI_UPDATE_WRITE);
trace_f2fs_do_write_data_page(page, IPU);
} else {
write_data_page(&dn, fio);
trace_f2fs_do_write_data_page(page, OPU);
set_inode_flag(inode, FI_APPEND_WRITE);
if (page->index == 0)
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
return err;
}
/* LFS mode write path */
write_data_page(&dn, fio);
trace_f2fs_do_write_data_page(page, OPU);
set_inode_flag(inode, FI_APPEND_WRITE);
if (page->index == 0)
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
out_writepage:
f2fs_put_dnode(&dn);
out:
if (fio->need_lock)
f2fs_unlock_op(fio->sbi);
return err;
}
......@@ -1370,9 +1439,11 @@ static int __write_data_page(struct page *page, bool *submitted,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
.old_blkaddr = NULL_ADDR,
.page = page,
.encrypted_page = NULL,
.submitted = false,
.need_lock = true,
};
trace_f2fs_writepage(page, DATA);
......@@ -1408,6 +1479,7 @@ static int __write_data_page(struct page *page, bool *submitted,
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
fio.need_lock = false;
err = do_write_data_page(&fio);
goto done;
}
......@@ -1416,6 +1488,8 @@ static int __write_data_page(struct page *page, bool *submitted,
need_balance_fs = true;
else if (has_not_enough_free_secs(sbi, 0, 0))
goto redirty_out;
else
set_inode_flag(inode, FI_HOT_DATA);
err = -EAGAIN;
if (f2fs_has_inline_data(inode)) {
......@@ -1423,12 +1497,12 @@ static int __write_data_page(struct page *page, bool *submitted,
if (!err)
goto out;
}
f2fs_lock_op(sbi);
if (err == -EAGAIN)
err = do_write_data_page(&fio);
if (F2FS_I(inode)->last_disk_size < psize)
F2FS_I(inode)->last_disk_size = psize;
f2fs_unlock_op(sbi);
done:
if (err && err != -ENOENT)
goto redirty_out;
......@@ -1441,12 +1515,14 @@ static int __write_data_page(struct page *page, bool *submitted,
if (wbc->for_reclaim) {
f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
DATA, WRITE);
clear_inode_flag(inode, FI_HOT_DATA);
remove_dirty_inode(inode);
submitted = NULL;
}
unlock_page(page);
f2fs_balance_fs(sbi, need_balance_fs);
if (!S_ISDIR(inode->i_mode))
f2fs_balance_fs(sbi, need_balance_fs);
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
......@@ -1495,6 +1571,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
pagevec_init(&pvec, 0);
if (get_dirty_pages(mapping->host) <=
SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
set_inode_flag(mapping->host, FI_HOT_DATA);
else
clear_inode_flag(mapping->host, FI_HOT_DATA);
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
......@@ -1580,8 +1662,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
last_idx = page->index;
}
if (--wbc->nr_to_write <= 0 &&
wbc->sync_mode == WB_SYNC_NONE) {
/* give a priority to WB_SYNC threads */
if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
--wbc->nr_to_write <= 0) &&
wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
......@@ -1637,9 +1721,18 @@ static int f2fs_write_data_pages(struct address_space *mapping,
trace_f2fs_writepages(mapping->host, wbc, DATA);
/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_inc(&sbi->wb_sync_req);
else if (atomic_read(&sbi->wb_sync_req))
goto skip_write;
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc);
blk_finish_plug(&plug);
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_dec(&sbi->wb_sync_req);
/*
* if some pages were truncated, we cannot guarantee its mapping->host
* to detect pending bios.
......@@ -1687,7 +1780,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
f2fs_lock_op(sbi);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
locked = true;
}
restart:
......@@ -1723,7 +1816,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err || dn.data_blkaddr == NULL_ADDR) {
f2fs_put_dnode(&dn);
f2fs_lock_op(sbi);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
true);
locked = true;
goto restart;
}
......@@ -1737,7 +1831,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
f2fs_unlock_op(sbi);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
return err;
}
......@@ -1951,7 +2045,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
/* This is atomic written page, keep Private */
if (IS_ATOMIC_WRITTEN_PAGE(page))
return;
return drop_inmem_page(inode, page);
set_page_private(page, 0);
ClearPagePrivate(page);
......
......@@ -51,15 +51,26 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = atomic_read(&sbi->aw_cnt);
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
if (SM_I(sbi) && SM_I(sbi)->fcc_info)
si->nr_flush =
atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
if (SM_I(sbi) && SM_I(sbi)->dcc_info)
si->nr_discard =
atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
si->nr_flushed =
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
}
if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
si->nr_discarded =
atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
si->nr_discarding =
atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
si->nr_discard_cmd =
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
}
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
......@@ -86,6 +97,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
......@@ -99,8 +111,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
si->cursec[i] = curseg->segno / sbi->segs_per_sec;
si->curzone[i] = si->cursec[i] / sbi->secs_per_zone;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
}
for (i = 0; i < 2; i++) {
......@@ -124,10 +136,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
blks_per_sec = BLKS_PER_SEC(sbi);
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
vblocks = get_valid_blocks(sbi, segno, true);
dist = abs(vblocks - hblks_per_sec);
bimodal += dist * dist;
......@@ -156,7 +168,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
if (si->base_mem)
goto get_cache;
si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
/* build stat */
si->base_mem = sizeof(struct f2fs_stat_info);
/* build superblock */
si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
si->base_mem += 2 * sizeof(struct f2fs_inode_info);
si->base_mem += sizeof(*sbi->ckpt);
si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE;
......@@ -208,8 +224,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build merge flush thread */
if (SM_I(sbi)->fcc_info)
si->cache_mem += sizeof(struct flush_cmd_control);
if (SM_I(sbi)->dcc_info)
if (SM_I(sbi)->dcc_info) {
si->cache_mem += sizeof(struct discard_cmd_control);
si->cache_mem += sizeof(struct discard_cmd) *
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
}
/* free nids */
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
......@@ -330,11 +349,16 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flush, si->nr_discard);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
si->nr_flushing, si->nr_flushed,
si->nr_discarding, si->nr_discarded,
si->nr_discard_cmd, si->undiscard_blks);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
"volatile IO: %4d (Max. %4d)\n",
si->inmem_pages, si->aw_cnt, si->max_aw_cnt,
si->vw_cnt, si->max_vw_cnt);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
......@@ -347,8 +371,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_imeta);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n",
si->free_nids, si->alloc_nids);
seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n",
si->free_nids, si->avail_nids, si->alloc_nids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
......@@ -434,7 +458,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inplace_count, 0);
atomic_set(&sbi->aw_cnt, 0);
atomic_set(&sbi->vw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
......
......@@ -94,7 +94,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
de = find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
......@@ -192,13 +192,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_put_page(dentry_page, 0);
}
/* This is to increase the speed of f2fs_create */
if (!de && room) {
F2FS_I(dir)->task = current;
if (F2FS_I(dir)->chash != namehash) {
F2FS_I(dir)->chash = namehash;
F2FS_I(dir)->clevel = level;
}
if (!de && room && F2FS_I(dir)->chash != namehash) {
F2FS_I(dir)->chash = namehash;
F2FS_I(dir)->clevel = level;
}
return de;
......@@ -239,6 +235,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
break;
}
out:
/* This is to increase the speed of f2fs_create */
if (!de)
F2FS_I(dir)->task = current;
return de;
}
......@@ -322,24 +321,6 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
set_page_dirty(ipage);
}
int update_dent_inode(struct inode *inode, struct inode *to,
const struct qstr *name)
{
struct page *page;
if (file_enc_name(to))
return 0;
page = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(page))
return PTR_ERR(page);
init_dent_inode(name, page);
f2fs_put_page(page, 1);
return 0;
}
void do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
......@@ -369,7 +350,7 @@ static int make_empty_dir(struct inode *inode,
dentry_blk = kmap_atomic(dentry_page);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
do_make_empty_dir(inode, parent, &d);
kunmap_atomic(dentry_blk);
......@@ -423,8 +404,11 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
set_cold_node(inode, page);
}
if (new_name)
if (new_name) {
init_dent_inode(new_name, page);
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
/*
* This file should be checkpointed during fsync.
......@@ -584,11 +568,9 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
err = PTR_ERR(page);
goto fail;
}
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
set_page_dirty(dentry_page);
......@@ -896,7 +878,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(inode, &d, dentry_blk);
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);
......
......@@ -18,6 +18,179 @@
#include "node.h"
#include <trace/events/f2fs.h>
static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
unsigned int ofs)
{
if (cached_re) {
if (cached_re->ofs <= ofs &&
cached_re->ofs + cached_re->len > ofs) {
return cached_re;
}
}
return NULL;
}
static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
unsigned int ofs)
{
struct rb_node *node = root->rb_node;
struct rb_entry *re;
while (node) {
re = rb_entry(node, struct rb_entry, rb_node);
if (ofs < re->ofs)
node = node->rb_left;
else if (ofs >= re->ofs + re->len)
node = node->rb_right;
else
return re;
}
return NULL;
}
struct rb_entry *__lookup_rb_tree(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs)
{
struct rb_entry *re;
re = __lookup_rb_tree_fast(cached_re, ofs);
if (!re)
return __lookup_rb_tree_slow(root, ofs);
return re;
}
struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root *root, struct rb_node **parent,
unsigned int ofs)
{
struct rb_node **p = &root->rb_node;
struct rb_entry *re;
while (*p) {
*parent = *p;
re = rb_entry(*parent, struct rb_entry, rb_node);
if (ofs < re->ofs)
p = &(*p)->rb_left;
else if (ofs >= re->ofs + re->len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
return p;
}
/*
* lookup rb entry in position of @ofs in rb-tree,
* if hit, return the entry, otherwise, return NULL
* @prev_ex: extent before ofs
* @next_ex: extent after ofs
* @insert_p: insert point for new extent at ofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
struct rb_entry *cached_re,
unsigned int ofs,
struct rb_entry **prev_entry,
struct rb_entry **next_entry,
struct rb_node ***insert_p,
struct rb_node **insert_parent,
bool force)
{
struct rb_node **pnode = &root->rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct rb_entry *re = cached_re;
*insert_p = NULL;
*insert_parent = NULL;
*prev_entry = NULL;
*next_entry = NULL;
if (RB_EMPTY_ROOT(root))
return NULL;
if (re) {
if (re->ofs <= ofs && re->ofs + re->len > ofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
re = rb_entry(*pnode, struct rb_entry, rb_node);
if (ofs < re->ofs)
pnode = &(*pnode)->rb_left;
else if (ofs >= re->ofs + re->len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
re = rb_entry(parent, struct rb_entry, rb_node);
tmp_node = parent;
if (parent && ofs > re->ofs)
tmp_node = rb_next(parent);
*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
tmp_node = parent;
if (parent && ofs < re->ofs)
tmp_node = rb_prev(parent);
*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
return NULL;
lookup_neighbors:
if (ofs == re->ofs || force) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&re->rb_node);
*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
}
if (ofs == re->ofs + re->len - 1 || force) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&re->rb_node);
*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
}
return re;
}
bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root *root)
{
#ifdef CONFIG_F2FS_CHECK_FS
struct rb_node *cur = rb_first(root), *next;
struct rb_entry *cur_re, *next_re;
if (!cur)
return true;
while (cur) {
next = rb_next(cur);
if (!next)
return true;
cur_re = rb_entry(cur, struct rb_entry, rb_node);
next_re = rb_entry(next, struct rb_entry, rb_node);
if (cur_re->ofs + cur_re->len > next_re->ofs) {
f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, "
"cur(%u, %u) next(%u, %u)",
cur_re->ofs, cur_re->len,
next_re->ofs, next_re->len);
return false;
}
cur = next;
}
#endif
return true;
}
static struct kmem_cache *extent_tree_slab;
static struct kmem_cache *extent_node_slab;
......@@ -102,36 +275,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
return et;
}
static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, unsigned int fofs)
{
struct rb_node *node = et->root.rb_node;
struct extent_node *en = et->cached_en;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
stat_inc_cached_node_hit(sbi);
return en;
}
}
while (node) {
en = rb_entry(node, struct extent_node, rb_node);
if (fofs < en->ei.fofs) {
node = node->rb_left;
} else if (fofs >= en->ei.fofs + en->ei.len) {
node = node->rb_right;
} else {
stat_inc_rbtree_node_hit(sbi);
return en;
}
}
return NULL;
}
static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei)
{
......@@ -237,17 +380,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
goto out;
}
en = __lookup_extent_tree(sbi, et, pgofs);
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
}
spin_unlock(&sbi->extent_lock);
ret = true;
en = (struct extent_node *)__lookup_rb_tree(&et->root,
(struct rb_entry *)et->cached_en, pgofs);
if (!en)
goto out;
if (en == et->cached_en)
stat_inc_cached_node_hit(sbi);
else
stat_inc_rbtree_node_hit(sbi);
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
}
spin_unlock(&sbi->extent_lock);
ret = true;
out:
stat_inc_total_hit(sbi);
read_unlock(&et->lock);
......@@ -256,83 +406,6 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
return ret;
}
/*
* lookup extent at @fofs, if hit, return the extent
* if not, return NULL and
* @prev_ex: extent before fofs
* @next_ex: extent after fofs
* @insert_p: insert point for new extent at fofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
unsigned int fofs,
struct extent_node **prev_ex,
struct extent_node **next_ex,
struct rb_node ***insert_p,
struct rb_node **insert_parent)
{
struct rb_node **pnode = &et->root.rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct extent_node *en = et->cached_en;
*insert_p = NULL;
*insert_parent = NULL;
*prev_ex = NULL;
*next_ex = NULL;
if (RB_EMPTY_ROOT(&et->root))
return NULL;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
en = rb_entry(*pnode, struct extent_node, rb_node);
if (fofs < en->ei.fofs)
pnode = &(*pnode)->rb_left;
else if (fofs >= en->ei.fofs + en->ei.len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
en = rb_entry(parent, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
return en;
}
static struct extent_node *__try_merge_extent_node(struct inode *inode,
struct extent_tree *et, struct extent_info *ei,
struct extent_node *prev_ex,
......@@ -387,17 +460,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
goto do_insert;
}
while (*p) {
parent = *p;
en = rb_entry(parent, struct extent_node, rb_node);
if (ei->fofs < en->ei.fofs)
p = &(*p)->rb_left;
else if (ei->fofs >= en->ei.fofs + en->ei.len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p);
if (!en)
......@@ -447,8 +510,11 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
__drop_largest_extent(inode, fofs, len);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
&insert_p, &insert_parent);
en = (struct extent_node *)__lookup_rb_tree_ret(&et->root,
(struct rb_entry *)et->cached_en, fofs,
(struct rb_entry **)&prev_en,
(struct rb_entry **)&next_en,
&insert_p, &insert_parent, false);
if (!en)
en = next_en;
......
......@@ -50,6 +50,7 @@ enum {
FAULT_BLOCK,
FAULT_DIR_DEPTH,
FAULT_EVICT_INODE,
FAULT_TRUNCATE,
FAULT_IO,
FAULT_CHECKPOINT,
FAULT_MAX,
......@@ -62,7 +63,7 @@ struct f2fs_fault_info {
};
extern char *fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) (fi->inject_type & (1 << (type)))
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
/*
......@@ -88,9 +89,9 @@ extern char *fault_name[FAULT_MAX];
#define F2FS_MOUNT_ADAPTIVE 0x00020000
#define F2FS_MOUNT_LFS 0x00040000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option)
#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option)
#define ver_after(a, b) (typecheck(unsigned long long, a) && \
typecheck(unsigned long long, b) && \
......@@ -124,22 +125,20 @@ enum {
SIT_BITMAP
};
enum {
CP_UMOUNT,
CP_FASTBOOT,
CP_SYNC,
CP_RECOVERY,
CP_DISCARD,
};
#define CP_UMOUNT 0x00000001
#define CP_FASTBOOT 0x00000002
#define CP_SYNC 0x00000004
#define CP_RECOVERY 0x00000008
#define CP_DISCARD 0x00000010
#define CP_TRIMMED 0x00000020
#define DEF_BATCHED_TRIM_SECTIONS 2048
#define BATCHED_TRIM_SEGMENTS(sbi) \
(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
(GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections))
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_DISCARD_BLOCKS(sbi) \
((1 << (sbi)->log_blocks_per_seg) * (sbi)->segs_per_sec)
#define DISCARD_ISSUE_RATE 8
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DISCARD_ISSUE_RATE 8
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
......@@ -181,37 +180,63 @@ struct inode_entry {
struct inode *inode; /* vfs inode pointer */
};
/* for the list of blockaddresses to be discarded */
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
block_t blkaddr; /* block address to be discarded */
int len; /* # of consecutive blocks of the discard */
block_t start_blkaddr; /* start blockaddr of current segment */
unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */
};
/* max discard pend list number */
#define MAX_PLIST_NUM 512
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
(MAX_PLIST_NUM - 1) : (blk_num - 1))
enum {
D_PREP,
D_SUBMIT,
D_DONE,
};
struct discard_info {
block_t lstart; /* logical start address */
block_t len; /* length */
block_t start; /* actual start address in dev */
};
struct discard_cmd {
struct rb_node rb_node; /* rb node located in rb-tree */
union {
struct {
block_t lstart; /* logical start address */
block_t len; /* length */
block_t start; /* actual start address in dev */
};
struct discard_info di; /* discard info */
};
struct list_head list; /* command list */
struct completion wait; /* compleation */
block_t lstart; /* logical start address */
block_t len; /* length */
struct bio *bio; /* bio */
int state; /* state */
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
int error; /* bio error */
};
struct discard_cmd_control {
struct task_struct *f2fs_issue_discard; /* discard thread */
struct list_head discard_entry_list; /* 4KB discard entry list */
int nr_discards; /* # of discards in the list */
struct list_head discard_cmd_list; /* discard cmd list */
struct list_head entry_list; /* 4KB discard entry list */
struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
struct list_head wait_list; /* store on-flushing entries */
wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
struct mutex cmd_lock;
int max_discards; /* max. discards to be issued */
atomic_t submit_discard; /* # of issued discard */
unsigned int nr_discards; /* # of discards in the list */
unsigned int max_discards; /* max. discards to be issued */
unsigned int undiscard_blks; /* # of undiscard blocks */
atomic_t issued_discard; /* # of issued discard */
atomic_t issing_discard; /* # of issing discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root root; /* root of discard rb-tree */
};
/* for the list of fsync inodes, used only during recovery */
......@@ -222,13 +247,13 @@ struct fsync_inode_entry {
block_t last_dentry; /* block address locating the last dentry */
};
#define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats))
#define sits_in_cursum(jnl) (le16_to_cpu(jnl->n_sits))
#define nats_in_cursum(jnl) (le16_to_cpu((jnl)->n_nats))
#define sits_in_cursum(jnl) (le16_to_cpu((jnl)->n_sits))
#define nat_in_journal(jnl, i) (jnl->nat_j.entries[i].ne)
#define nid_in_journal(jnl, i) (jnl->nat_j.entries[i].nid)
#define sit_in_journal(jnl, i) (jnl->sit_j.entries[i].se)
#define segno_in_journal(jnl, i) (jnl->sit_j.entries[i].segno)
#define nat_in_journal(jnl, i) ((jnl)->nat_j.entries[i].ne)
#define nid_in_journal(jnl, i) ((jnl)->nat_j.entries[i].nid)
#define sit_in_journal(jnl, i) ((jnl)->sit_j.entries[i].se)
#define segno_in_journal(jnl, i) ((jnl)->sit_j.entries[i].segno)
#define MAX_NAT_JENTRIES(jnl) (NAT_JOURNAL_ENTRIES - nats_in_cursum(jnl))
#define MAX_SIT_JENTRIES(jnl) (SIT_JOURNAL_ENTRIES - sits_in_cursum(jnl))
......@@ -270,11 +295,14 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8)
#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \
struct f2fs_defragment)
#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \
struct f2fs_move_range)
#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \
struct f2fs_flush_device)
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
......@@ -311,6 +339,11 @@ struct f2fs_move_range {
u64 len; /* size to move */
};
struct f2fs_flush_device {
u32 dev_num; /* device number to flush */
u32 segments; /* # of segments to flush */
};
/*
* For INODE and NODE manager
*/
......@@ -323,26 +356,24 @@ struct f2fs_dentry_ptr {
int max;
};
static inline void make_dentry_ptr(struct inode *inode,
struct f2fs_dentry_ptr *d, void *src, int type)
static inline void make_dentry_ptr_block(struct inode *inode,
struct f2fs_dentry_ptr *d, struct f2fs_dentry_block *t)
{
d->inode = inode;
d->max = NR_DENTRY_IN_BLOCK;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
}
if (type == 1) {
struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
d->max = NR_DENTRY_IN_BLOCK;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
} else {
struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
d->max = NR_INLINE_DENTRY;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
}
static inline void make_dentry_ptr_inline(struct inode *inode,
struct f2fs_dentry_ptr *d, struct f2fs_inline_dentry *t)
{
d->inode = inode;
d->max = NR_INLINE_DENTRY;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
}
/*
......@@ -374,16 +405,30 @@ enum {
/* number of extent info in extent cache we try to shrink */
#define EXTENT_CACHE_SHRINK_NUMBER 128
struct rb_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
unsigned int ofs; /* start offset of the entry */
unsigned int len; /* length of the entry */
};
struct extent_info {
unsigned int fofs; /* start offset in a file */
u32 blk; /* start block address of the extent */
unsigned int len; /* length of the extent */
u32 blk; /* start block address of the extent */
};
struct extent_node {
struct rb_node rb_node; /* rb node located in rb-tree */
struct rb_node rb_node;
union {
struct {
unsigned int fofs;
unsigned int len;
u32 blk;
};
struct extent_info ei; /* extent info */
};
struct list_head list; /* node in global extent list of sbi */
struct extent_info ei; /* extent info */
struct extent_tree *et; /* extent tree pointer */
};
......@@ -500,6 +545,24 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
ei->len = len;
}
static inline bool __is_discard_mergeable(struct discard_info *back,
struct discard_info *front)
{
return back->lstart + back->len == front->lstart;
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
struct discard_info *back)
{
return __is_discard_mergeable(back, cur);
}
static inline bool __is_discard_front_mergeable(struct discard_info *cur,
struct discard_info *front)
{
return __is_discard_mergeable(cur, front);
}
static inline bool __is_extent_mergeable(struct extent_info *back,
struct extent_info *front)
{
......@@ -562,7 +625,6 @@ struct f2fs_nm_info {
unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
unsigned char *nat_block_bitmap;
unsigned short *free_nid_count; /* free nid count of NAT block */
spinlock_t free_nid_lock; /* protect updating of nid count */
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
......@@ -641,7 +703,8 @@ struct flush_cmd {
struct flush_cmd_control {
struct task_struct *f2fs_issue_flush; /* flush thread */
wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
atomic_t submit_flush; /* # of issued flushes */
atomic_t issued_flush; /* # of issued flushes */
atomic_t issing_flush; /* # of issing flushes */
struct llist_head issue_list; /* list for command issue */
struct llist_node *dispatch_list; /* list for command dispatch */
};
......@@ -672,6 +735,7 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
/* for flush command control */
struct flush_cmd_control *fcc_info;
......@@ -722,6 +786,7 @@ enum page_type {
META_FLUSH,
INMEM, /* the below types are used by tracepoints only. */
INMEM_DROP,
INMEM_INVALIDATE,
INMEM_REVOKE,
IPU,
OPU,
......@@ -737,9 +802,10 @@ struct f2fs_io_info {
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
bool submitted; /* indicate IO submission */
bool need_lock; /* indicate we need to lock cp_rwsem */
};
#define is_read_io(rw) (rw == READ)
#define is_read_io(rw) ((rw) == READ)
struct f2fs_bio_info {
struct f2fs_sb_info *sbi; /* f2fs superblock */
struct bio *bio; /* bios to merge */
......@@ -827,6 +893,7 @@ struct f2fs_sb_info {
struct mutex cp_mutex; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
struct rw_semaphore node_write; /* locking node writes */
struct rw_semaphore node_change; /* locking node change */
wait_queue_head_t cp_wait;
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
long interval_time[MAX_TIME]; /* to store thresholds */
......@@ -879,6 +946,9 @@ struct f2fs_sb_info {
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;
/* writeback control */
atomic_t wb_sync_req; /* count # of WB_SYNC threads */
/* valid inode count */
struct percpu_counter total_valid_inode_count;
......@@ -912,11 +982,12 @@ struct f2fs_sb_info {
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
atomic_t aw_cnt; /* # of atomic writes */
atomic_t vw_cnt; /* # of volatile writes */
atomic_t max_aw_cnt; /* max # of atomic writes */
atomic_t max_vw_cnt; /* max # of volatile writes */
int bg_gc; /* background gc calls */
unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
#endif
unsigned int last_victim[2]; /* last victim segment # */
spinlock_t stat_lock; /* lock for stat operations */
/* For sysfs suppport */
......@@ -971,8 +1042,8 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
* and the return value is in kbytes. s is of struct f2fs_sb_info.
*/
#define BD_PART_WRITTEN(s) \
(((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) - \
s->sectors_written_start) >> 1)
(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) - \
(s)->sectors_written_start) >> 1)
static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
{
......@@ -1193,7 +1264,7 @@ static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
{
bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set;
return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set;
}
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
......@@ -1229,7 +1300,7 @@ static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
static inline bool __remain_node_summaries(int reason)
{
return (reason == CP_UMOUNT || reason == CP_FASTBOOT);
return (reason & (CP_UMOUNT | CP_FASTBOOT));
}
static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi)
......@@ -1707,6 +1778,7 @@ enum {
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
FI_HOT_DATA, /* indicate file is hot */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
......@@ -1869,12 +1941,6 @@ static inline int f2fs_has_inline_data(struct inode *inode)
return is_inode_flag_set(inode, FI_INLINE_DATA);
}
static inline void f2fs_clear_inline_inode(struct inode *inode)
{
clear_inode_flag(inode, FI_INLINE_DATA);
clear_inode_flag(inode, FI_DATA_EXIST);
}
static inline int f2fs_exist_data(struct inode *inode)
{
return is_inode_flag_set(inode, FI_DATA_EXIST);
......@@ -2029,12 +2095,6 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
/* get offset of first page in next direct node */
#define PGOFS_OF_NEXT_DNODE(pgofs, inode) \
((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) : \
(pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \
ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
/*
* file.c
*/
......@@ -2096,8 +2156,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
struct page **page);
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode);
int update_dent_inode(struct inode *inode, struct inode *to,
const struct qstr *name);
void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
const struct qstr *name, f2fs_hash_t name_hash,
unsigned int bit_pos);
......@@ -2185,6 +2243,7 @@ void destroy_node_manager_caches(void);
*/
void register_inmem_page(struct inode *inode, struct page *page);
void drop_inmem_pages(struct inode *inode);
void drop_inmem_page(struct inode *inode, struct page *page);
int commit_inmem_pages(struct inode *inode);
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
......@@ -2194,7 +2253,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr);
void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void release_discard_addrs(struct f2fs_sb_info *sbi);
int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
......@@ -2206,7 +2265,7 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
void rewrite_data_page(struct f2fs_io_info *fio);
int rewrite_data_page(struct f2fs_io_info *fio);
void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr);
......@@ -2311,7 +2370,8 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
int start_gc_thread(struct f2fs_sb_info *sbi);
void stop_gc_thread(struct f2fs_sb_info *sbi);
block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background);
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
void build_gc_manager(struct f2fs_sb_info *sbi);
/*
......@@ -2335,11 +2395,15 @@ struct f2fs_stat_info {
int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
int inmem_pages;
unsigned int ndirty_dirs, ndirty_files, ndirty_all;
int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
int nats, dirty_nats, sits, dirty_sits;
int free_nids, avail_nids, alloc_nids;
int total_count, utilization;
int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
int bg_gc, nr_wb_cp_data, nr_wb_data;
int nr_flushing, nr_flushed, nr_discarding, nr_discarded;
int nr_discard_cmd;
unsigned int undiscard_blks;
int inline_xattr, inline_inode, inline_dir, append, update, orphans;
int aw_cnt, max_aw_cnt;
int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
......@@ -2422,11 +2486,22 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
if (cur > max) \
atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
} while (0)
#define stat_inc_volatile_write(inode) \
(atomic_inc(&F2FS_I_SB(inode)->vw_cnt))
#define stat_dec_volatile_write(inode) \
(atomic_dec(&F2FS_I_SB(inode)->vw_cnt))
#define stat_update_max_volatile_write(inode) \
do { \
int cur = atomic_read(&F2FS_I_SB(inode)->vw_cnt); \
int max = atomic_read(&F2FS_I_SB(inode)->max_vw_cnt); \
if (cur > max) \
atomic_set(&F2FS_I_SB(inode)->max_vw_cnt, cur); \
} while (0)
#define stat_inc_seg_count(sbi, type, gc_type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
(si)->tot_segs++; \
if (type == SUM_TYPE_DATA) { \
si->tot_segs++; \
if ((type) == SUM_TYPE_DATA) { \
si->data_segs++; \
si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \
} else { \
......@@ -2436,14 +2511,14 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
} while (0)
#define stat_inc_tot_blk_count(si, blks) \
(si->tot_blks += (blks))
((si)->tot_blks += (blks))
#define stat_inc_data_blk_count(sbi, blks, gc_type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
stat_inc_tot_blk_count(si, blks); \
si->data_blks += (blks); \
si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \
si->bg_data_blks += ((gc_type) == BG_GC) ? (blks) : 0; \
} while (0)
#define stat_inc_node_blk_count(sbi, blks, gc_type) \
......@@ -2451,7 +2526,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
stat_inc_tot_blk_count(si, blks); \
si->node_blks += (blks); \
si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \
} while (0)
int f2fs_build_stats(struct f2fs_sb_info *sbi);
......@@ -2459,32 +2534,35 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
int __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
#define stat_inc_cp_count(si)
#define stat_inc_bg_cp_count(si)
#define stat_inc_call_count(si)
#define stat_inc_bggc_count(si)
#define stat_inc_dirty_inode(sbi, type)
#define stat_dec_dirty_inode(sbi, type)
#define stat_inc_total_hit(sb)
#define stat_inc_rbtree_node_hit(sb)
#define stat_inc_largest_node_hit(sbi)
#define stat_inc_cached_node_hit(sbi)
#define stat_inc_inline_xattr(inode)
#define stat_dec_inline_xattr(inode)
#define stat_inc_inline_inode(inode)
#define stat_dec_inline_inode(inode)
#define stat_inc_inline_dir(inode)
#define stat_dec_inline_dir(inode)
#define stat_inc_atomic_write(inode)
#define stat_dec_atomic_write(inode)
#define stat_update_max_atomic_write(inode)
#define stat_inc_seg_type(sbi, curseg)
#define stat_inc_block_count(sbi, curseg)
#define stat_inc_inplace_blocks(sbi)
#define stat_inc_seg_count(sbi, type, gc_type)
#define stat_inc_tot_blk_count(si, blks)
#define stat_inc_data_blk_count(sbi, blks, gc_type)
#define stat_inc_node_blk_count(sbi, blks, gc_type)
#define stat_inc_cp_count(si) do { } while (0)
#define stat_inc_bg_cp_count(si) do { } while (0)
#define stat_inc_call_count(si) do { } while (0)
#define stat_inc_bggc_count(si) do { } while (0)
#define stat_inc_dirty_inode(sbi, type) do { } while (0)
#define stat_dec_dirty_inode(sbi, type) do { } while (0)
#define stat_inc_total_hit(sb) do { } while (0)
#define stat_inc_rbtree_node_hit(sb) do { } while (0)
#define stat_inc_largest_node_hit(sbi) do { } while (0)
#define stat_inc_cached_node_hit(sbi) do { } while (0)
#define stat_inc_inline_xattr(inode) do { } while (0)
#define stat_dec_inline_xattr(inode) do { } while (0)
#define stat_inc_inline_inode(inode) do { } while (0)
#define stat_dec_inline_inode(inode) do { } while (0)
#define stat_inc_inline_dir(inode) do { } while (0)
#define stat_dec_inline_dir(inode) do { } while (0)
#define stat_inc_atomic_write(inode) do { } while (0)
#define stat_dec_atomic_write(inode) do { } while (0)
#define stat_update_max_atomic_write(inode) do { } while (0)
#define stat_inc_volatile_write(inode) do { } while (0)
#define stat_dec_volatile_write(inode) do { } while (0)
#define stat_update_max_volatile_write(inode) do { } while (0)
#define stat_inc_seg_type(sbi, curseg) do { } while (0)
#define stat_inc_block_count(sbi, curseg) do { } while (0)
#define stat_inc_inplace_blocks(sbi) do { } while (0)
#define stat_inc_seg_count(sbi, type, gc_type) do { } while (0)
#define stat_inc_tot_blk_count(si, blks) do { } while (0)
#define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0)
#define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0)
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
......@@ -2510,7 +2588,7 @@ extern struct kmem_cache *inode_entry_slab;
bool f2fs_may_inline_data(struct inode *inode);
bool f2fs_may_inline_dentry(struct inode *inode);
void read_inline_data(struct page *page, struct page *ipage);
bool truncate_inline_inode(struct page *ipage, u64 from);
void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from);
int f2fs_read_inline_data(struct inode *inode, struct page *page);
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
int f2fs_convert_inline_inode(struct inode *inode);
......@@ -2545,6 +2623,18 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
struct rb_entry *__lookup_rb_tree(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs);
struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root *root, struct rb_node **parent,
unsigned int ofs);
struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs,
struct rb_entry **prev_entry, struct rb_entry **next_entry,
struct rb_node ***insert_p, struct rb_node **insert_parent,
bool force);
bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root *root);
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
void f2fs_drop_extent_tree(struct inode *inode);
......
......@@ -116,11 +116,6 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
if (!dentry)
return 0;
if (update_dent_inode(inode, inode, &dentry->d_name)) {
dput(dentry);
return 0;
}
*pino = parent_ino(dentry);
dput(dentry);
return 1;
......@@ -528,7 +523,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
page = get_lock_data_page(inode, index, true);
if (IS_ERR(page))
return 0;
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
truncate_out:
f2fs_wait_on_page_writeback(page, DATA, true);
zero_user(page, offset, PAGE_SIZE - offset);
......@@ -566,9 +561,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
}
if (f2fs_has_inline_data(inode)) {
truncate_inline_inode(ipage, from);
if (from == 0)
clear_inode_flag(inode, FI_DATA_EXIST);
truncate_inline_inode(inode, ipage, from);
f2fs_put_page(ipage, 1);
truncate_page = true;
goto out;
......@@ -617,6 +610,12 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
#endif
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
......@@ -1188,8 +1187,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
if (offset + len > new_size)
new_size = offset + len;
new_size = max_t(loff_t, new_size, offset + len);
} else {
if (off_start) {
......@@ -1257,8 +1254,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
int ret = 0;
new_size = i_size_read(inode) + len;
if (new_size > inode->i_sb->s_maxbytes)
return -EFBIG;
ret = inode_newsize_ok(inode, new_size);
if (ret)
return ret;
if (offset >= i_size_read(inode))
return -EINVAL;
......@@ -1428,6 +1426,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(inode, FI_VOLATILE_FILE);
stat_dec_volatile_write(inode);
set_inode_flag(inode, FI_DROP_CACHE);
filemap_fdatawrite(inode->i_mapping);
clear_inode_flag(inode, FI_DROP_CACHE);
......@@ -1474,10 +1473,10 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
if (ret)
return ret;
flags = f2fs_mask_flags(inode->i_mode, flags);
inode_lock(inode);
flags = f2fs_mask_flags(inode->i_mode, flags);
oldflags = fi->i_flags;
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
......@@ -1491,10 +1490,11 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
flags = flags & FS_FL_USER_MODIFIABLE;
flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
fi->i_flags = flags;
inode_unlock(inode);
inode->i_ctime = current_time(inode);
f2fs_set_inode_flags(inode);
inode_unlock(inode);
out:
mnt_drop_write_file(filp);
return ret;
......@@ -1515,6 +1515,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
......@@ -1529,20 +1532,25 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
goto out;
set_inode_flag(inode, FI_ATOMIC_FILE);
set_inode_flag(inode, FI_HOT_DATA);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
if (!get_dirty_pages(inode))
goto out;
goto inc_stat;
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
if (ret) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
out:
goto out;
}
inc_stat:
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
......@@ -1592,6 +1600,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
......@@ -1605,6 +1616,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (ret)
goto out;
stat_inc_volatile_write(inode);
stat_update_max_volatile_write(inode);
set_inode_flag(inode, FI_VOLATILE_FILE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
out:
......@@ -1660,6 +1674,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(inode, FI_VOLATILE_FILE);
stat_dec_volatile_write(inode);
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
......@@ -1841,7 +1856,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
mutex_lock(&sbi->gc_mutex);
}
ret = f2fs_gc(sbi, sync, true);
ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
out:
mnt_drop_write_file(filp);
return ret;
......@@ -1879,13 +1894,12 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
pgoff_t pg_start, pg_end;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
block_t blk_end = 0;
bool fragmented = false;
int err;
/* if in-place-update policy is enabled, don't waste time here */
if (need_inplace_update(inode))
if (need_inplace_update_policy(inode, NULL))
return -EINVAL;
pg_start = range->start >> PAGE_SHIFT;
......@@ -1943,7 +1957,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
map.m_lblk = pg_start;
map.m_len = pg_end - pg_start;
sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;
sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
/*
* make sure there are enough free section for LFS allocation, this can
......@@ -2020,42 +2034,40 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!S_ISREG(inode->i_mode))
if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode))
return -EINVAL;
err = mnt_want_write_file(filp);
if (err)
return err;
if (f2fs_readonly(sbi->sb)) {
err = -EROFS;
goto out;
}
if (f2fs_readonly(sbi->sb))
return -EROFS;
if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
sizeof(range))) {
err = -EFAULT;
goto out;
}
sizeof(range)))
return -EFAULT;
/* verify alignment of offset & size */
if (range.start & (F2FS_BLKSIZE - 1) ||
range.len & (F2FS_BLKSIZE - 1)) {
err = -EINVAL;
goto out;
}
if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
return -EINVAL;
if (unlikely((range.start + range.len) >> PAGE_SHIFT >
sbi->max_file_blocks))
return -EINVAL;
err = mnt_want_write_file(filp);
if (err)
return err;
err = f2fs_defragment_range(sbi, filp, &range);
mnt_drop_write_file(filp);
f2fs_update_time(sbi, REQ_TIME);
if (err < 0)
goto out;
return err;
if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
sizeof(range)))
err = -EFAULT;
out:
mnt_drop_write_file(filp);
return err;
return -EFAULT;
return 0;
}
static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
......@@ -2189,6 +2201,8 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
range.pos_out, range.len);
mnt_drop_write_file(filp);
if (err)
goto err_out;
if (copy_to_user((struct f2fs_move_range __user *)arg,
&range, sizeof(range)))
......@@ -2198,6 +2212,69 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
return err;
}
static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct sit_info *sm = SIT_I(sbi);
unsigned int start_segno = 0, end_segno = 0;
unsigned int dev_start_segno = 0, dev_end_segno = 0;
struct f2fs_flush_device range;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (f2fs_readonly(sbi->sb))
return -EROFS;
if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
sizeof(range)))
return -EFAULT;
if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
sbi->segs_per_sec != 1) {
f2fs_msg(sbi->sb, KERN_WARNING,
"Can't flush %u in %d for segs_per_sec %u != 1\n",
range.dev_num, sbi->s_ndevs,
sbi->segs_per_sec);
return -EINVAL;
}
ret = mnt_want_write_file(filp);
if (ret)
return ret;
if (range.dev_num != 0)
dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
start_segno = sm->last_victim[FLUSH_DEVICE];
if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
start_segno = dev_start_segno;
end_segno = min(start_segno + range.segments, dev_end_segno);
while (start_segno < end_segno) {
if (!mutex_trylock(&sbi->gc_mutex)) {
ret = -EBUSY;
goto out;
}
sm->last_victim[GC_CB] = end_segno + 1;
sm->last_victim[GC_GREEDY] = end_segno + 1;
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
ret = f2fs_gc(sbi, true, true, start_segno);
if (ret == -EAGAIN)
ret = 0;
else if (ret < 0)
break;
start_segno++;
}
out:
mnt_drop_write_file(filp);
return ret;
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
......@@ -2235,6 +2312,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_defragment(filp, arg);
case F2FS_IOC_MOVE_RANGE:
return f2fs_ioc_move_range(filp, arg);
case F2FS_IOC_FLUSH_DEVICE:
return f2fs_ioc_flush_device(filp, arg);
default:
return -ENOTTY;
}
......@@ -2302,8 +2381,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GARBAGE_COLLECT:
case F2FS_IOC_WRITE_CHECKPOINT:
case F2FS_IOC_DEFRAGMENT:
break;
case F2FS_IOC_MOVE_RANGE:
case F2FS_IOC_FLUSH_DEVICE:
break;
default:
return -ENOIOCTLCMD;
......
......@@ -84,7 +84,7 @@ static int gc_thread_func(void *data)
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true))
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
......@@ -172,7 +172,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
p->offset = sbi->last_victim[p->gc_mode];
/* let's select beginning hot/small space first */
if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
p->offset = 0;
else
p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
}
static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
......@@ -182,7 +186,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
if (p->alloc_mode == SSR)
return sbi->blocks_per_seg;
if (p->gc_mode == GC_GREEDY)
return sbi->blocks_per_seg * p->ofs_unit;
return 2 * sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else /* No other gc_mode */
......@@ -207,7 +211,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
continue;
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
return GET_SEG_FROM_SEC(sbi, secno);
}
return NULL_SEGNO;
}
......@@ -215,8 +219,8 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned int secno = GET_SECNO(sbi, segno);
unsigned int start = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
unsigned long long mtime = 0;
unsigned int vblocks;
unsigned char age = 0;
......@@ -225,7 +229,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
for (i = 0; i < sbi->segs_per_sec; i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
vblocks = get_valid_blocks(sbi, segno, true);
mtime = div_u64(mtime, sbi->segs_per_sec);
vblocks = div_u64(vblocks, sbi->segs_per_sec);
......@@ -248,7 +252,7 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
unsigned int segno)
{
unsigned int valid_blocks =
get_valid_blocks(sbi, segno, sbi->segs_per_sec);
get_valid_blocks(sbi, segno, true);
return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
valid_blocks * 2 : valid_blocks;
......@@ -291,6 +295,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
unsigned int *result, int gc_type, int type, char alloc_mode)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
unsigned int last_segment = MAIN_SEGS(sbi);
......@@ -304,10 +309,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_segno = NULL_SEGNO;
p.min_cost = get_max_cost(sbi, &p);
if (*result != NULL_SEGNO) {
if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
}
if (p.max_search == 0)
goto out;
last_victim = sbi->last_victim[p.gc_mode];
last_victim = sm->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
if (p.min_segno != NULL_SEGNO)
......@@ -320,9 +333,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
if (segno >= last_segment) {
if (sbi->last_victim[p.gc_mode]) {
last_segment = sbi->last_victim[p.gc_mode];
sbi->last_victim[p.gc_mode] = 0;
if (sm->last_victim[p.gc_mode]) {
last_segment =
sm->last_victim[p.gc_mode];
sm->last_victim[p.gc_mode] = 0;
p.offset = 0;
continue;
}
......@@ -339,7 +353,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
secno = GET_SECNO(sbi, segno);
secno = GET_SEC_FROM_SEG(sbi, segno);
if (sec_usage_check(sbi, secno))
goto next;
......@@ -357,17 +371,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
next:
if (nsearched >= p.max_search) {
if (!sbi->last_victim[p.gc_mode] && segno <= last_victim)
sbi->last_victim[p.gc_mode] = last_victim + 1;
if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sbi->last_victim[p.gc_mode] = segno + 1;
sm->last_victim[p.gc_mode] = segno + 1;
sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
break;
}
}
if (p.min_segno != NULL_SEGNO) {
got_it:
if (p.alloc_mode == LFS) {
secno = GET_SECNO(sbi, p.min_segno);
secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
if (gc_type == FG_GC)
sbi->cur_victim_sec = secno;
else
......@@ -550,8 +565,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
get_node_info(sbi, nid, dni);
if (sum->version != dni->version) {
f2fs_put_page(node_page, 1);
return false;
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: valid data with mismatched node version.",
__func__);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
*nofs = ofs_of_node(node_page);
......@@ -697,8 +714,10 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC,
.old_blkaddr = NULL_ADDR,
.page = page,
.encrypted_page = NULL,
.need_lock = true,
};
bool is_dirty = PageDirty(page);
int err;
......@@ -890,7 +909,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
GET_SUM_BLOCK(sbi, segno));
f2fs_put_page(sum_page, 0);
if (get_valid_blocks(sbi, segno, 1) == 0 ||
if (get_valid_blocks(sbi, segno, false) == 0 ||
!PageUptodate(sum_page) ||
unlikely(f2fs_cp_error(sbi)))
goto next;
......@@ -905,7 +924,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
* - mutex_lock(sentry_lock) - change_curseg()
* - lock_page(sum_page)
*/
if (type == SUM_TYPE_NODE)
gc_node_segment(sbi, sum->entries, segno, gc_type);
else
......@@ -924,7 +942,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
blk_finish_plug(&plug);
if (gc_type == FG_GC &&
get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0)
get_valid_blocks(sbi, start_segno, true) == 0)
sec_freed = 1;
stat_inc_call_count(sbi->stat_info);
......@@ -932,13 +950,14 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
return sec_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
bool background, unsigned int segno)
{
unsigned int segno;
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0;
int ret = -EINVAL;
struct cp_control cpc;
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(GFP_NOFS),
......@@ -959,9 +978,11 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
if (prefree_segments(sbi)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
if (has_not_enough_free_secs(sbi, 0, 0))
gc_type = FG_GC;
}
......@@ -981,13 +1002,17 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
sbi->cur_victim_sec = NULL_SEGNO;
if (!sync) {
if (has_not_enough_free_secs(sbi, sec_freed, 0))
if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
segno = NULL_SEGNO;
goto gc_more;
}
if (gc_type == FG_GC)
ret = write_checkpoint(sbi, &cpc);
}
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
mutex_unlock(&sbi->gc_mutex);
put_gc_inode(&gc_list);
......@@ -999,7 +1024,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
void build_gc_manager(struct f2fs_sb_info *sbi)
{
u64 main_count, resv_count, ovp_count, blocks_per_sec;
u64 main_count, resv_count, ovp_count;
DIRTY_I(sbi)->v_ops = &default_v_ops;
......@@ -1007,8 +1032,12 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec,
(main_count - resv_count));
sbi->fggc_threshold = div64_u64((main_count - ovp_count) *
BLKS_PER_SEC(sbi), (main_count - resv_count));
/* give warm/cold data area from slower device */
if (sbi->s_ndevs && sbi->segs_per_sec == 1)
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
......@@ -63,19 +63,21 @@ void read_inline_data(struct page *page, struct page *ipage)
SetPageUptodate(page);
}
bool truncate_inline_inode(struct page *ipage, u64 from)
void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from)
{
void *addr;
if (from >= MAX_INLINE_DATA)
return false;
return;
addr = inline_data_addr(ipage);
f2fs_wait_on_page_writeback(ipage, NODE, true);
memset(addr + from, 0, MAX_INLINE_DATA - from);
set_page_dirty(ipage);
return true;
if (from == 0)
clear_inode_flag(inode, FI_DATA_EXIST);
}
int f2fs_read_inline_data(struct inode *inode, struct page *page)
......@@ -135,6 +137,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
/* write data page to try to make data consistent */
set_page_writeback(page);
fio.old_blkaddr = dn->data_blkaddr;
set_inode_flag(dn->inode, FI_HOT_DATA);
write_data_page(dn, &fio);
f2fs_wait_on_page_writeback(page, DATA, true);
if (dirty) {
......@@ -146,11 +149,11 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
set_inode_flag(dn->inode, FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
truncate_inline_inode(dn->inode_page, 0);
truncate_inline_inode(dn->inode, dn->inode_page, 0);
clear_inline_node(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
f2fs_clear_inline_inode(dn->inode);
clear_inode_flag(dn->inode, FI_INLINE_DATA);
f2fs_put_dnode(dn);
return 0;
}
......@@ -267,9 +270,8 @@ bool recover_inline_data(struct inode *inode, struct page *npage)
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
if (!truncate_inline_inode(ipage, 0))
return false;
f2fs_clear_inline_inode(inode);
truncate_inline_inode(inode, ipage, 0);
clear_inode_flag(inode, FI_INLINE_DATA);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
if (truncate_blocks(inode, 0, false))
......@@ -300,7 +302,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
inline_dentry = inline_data_addr(ipage);
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(NULL, &d, inline_dentry);
de = find_target_dentry(fname, namehash, NULL, &d);
unlock_page(ipage);
if (de)
......@@ -319,7 +321,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
dentry_blk = inline_data_addr(ipage);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
make_dentry_ptr_inline(NULL, &d, dentry_blk);
do_make_empty_dir(inode, parent, &d);
set_page_dirty(ipage);
......@@ -380,7 +382,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
truncate_inline_inode(ipage, 0);
truncate_inline_inode(dir, ipage, 0);
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
......@@ -400,7 +402,7 @@ static int f2fs_add_inline_entries(struct inode *dir,
unsigned long bit_pos = 0;
int err = 0;
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(NULL, &d, inline_dentry);
while (bit_pos < d.max) {
struct f2fs_dir_entry *de;
......@@ -455,7 +457,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
}
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA);
truncate_inline_inode(ipage, 0);
truncate_inline_inode(dir, ipage, 0);
unlock_page(ipage);
......@@ -527,14 +529,12 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
err = PTR_ERR(page);
goto fail;
}
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
f2fs_wait_on_page_writeback(ipage, NODE, true);
name_hash = f2fs_dentry_hash(new_name, NULL);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
make_dentry_ptr_inline(NULL, &d, dentry_blk);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
set_page_dirty(ipage);
......@@ -623,7 +623,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
inline_dentry = inline_data_addr(ipage);
make_dentry_ptr(inode, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(inode, &d, inline_dentry);
err = f2fs_fill_dentries(ctx, &d, 0, fstr);
if (!err)
......
......@@ -316,7 +316,6 @@ int update_inode_page(struct inode *inode)
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi, false);
}
f2fs_inode_synced(inode);
return 0;
}
ret = update_inode(inode, node_page);
......@@ -339,7 +338,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
if (update_inode_page(inode) && wbc && wbc->nr_to_write)
update_inode_page(inode);
if (wbc && wbc->nr_to_write)
f2fs_balance_fs(sbi, true);
return 0;
}
......@@ -372,13 +372,6 @@ void f2fs_evict_inode(struct inode *inode)
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
goto no_delete;
}
#endif
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
......@@ -389,6 +382,12 @@ void f2fs_evict_inode(struct inode *inode)
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
#endif
if (!err) {
f2fs_lock_op(sbi);
err = remove_inode_page(inode);
......@@ -411,7 +410,10 @@ void f2fs_evict_inode(struct inode *inode)
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
/* ino == 0, if f2fs_new_inode() was failed t*/
if (inode->i_ino)
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino,
inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (inode->i_nlink) {
......@@ -448,6 +450,7 @@ void handle_failed_inode(struct inode *inode)
* in a panic when flushing dirty inodes in gdirty_list.
*/
update_inode_page(inode);
f2fs_inode_synced(inode);
/* don't make bad inode, since it becomes a regular file. */
unlock_new_inode(inode);
......
......@@ -148,8 +148,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
......@@ -163,6 +161,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out:
handle_failed_inode(inode);
......@@ -424,8 +424,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
......@@ -488,6 +486,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
}
kfree(sd);
f2fs_balance_fs(sbi, true);
return err;
out:
handle_failed_inode(inode);
......@@ -509,8 +509,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
f2fs_balance_fs(sbi, true);
set_inode_flag(inode, FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
......@@ -525,6 +523,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out_fail:
......@@ -555,8 +555,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
......@@ -570,6 +568,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out:
handle_failed_inode(inode);
......@@ -596,8 +596,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err)
......@@ -623,6 +621,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
/* link_count was changed by d_tmpfile as well. */
f2fs_unlock_op(sbi);
unlock_new_inode(inode);
f2fs_balance_fs(sbi, true);
return 0;
release_out:
......@@ -721,13 +721,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto put_out_dir;
err = update_dent_inode(old_inode, new_inode,
&new_dentry->d_name);
if (err) {
release_orphan_inode(sbi);
goto put_out_dir;
}
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
new_inode->i_ctime = current_time(new_inode);
......@@ -780,8 +773,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
down_write(&F2FS_I(old_inode)->i_sem);
file_lost_pino(old_inode);
if (new_inode && file_enc_name(new_inode))
file_set_enc_name(old_inode);
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
......@@ -909,8 +900,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old_nlink = old_dir_entry ? -1 : 1;
new_nlink = -old_nlink;
err = -EMLINK;
if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
(new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
if ((old_nlink > 0 && old_dir->i_nlink >= F2FS_LINK_MAX) ||
(new_nlink > 0 && new_dir->i_nlink >= F2FS_LINK_MAX))
goto out_new_dir;
}
......@@ -918,18 +909,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_lock_op(sbi);
err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
if (err)
goto out_unlock;
if (file_enc_name(new_inode))
file_set_enc_name(old_inode);
err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name);
if (err)
goto out_undo;
if (file_enc_name(old_inode))
file_set_enc_name(new_inode);
/* update ".." directory entry info of old dentry */
if (old_dir_entry)
f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
......@@ -973,14 +952,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
return 0;
out_undo:
/*
* Still we may fail to recover name info of f2fs_inode here
* Drop it, once its name is set as encrypted
*/
update_dent_inode(old_inode, old_inode, &old_dentry->d_name);
out_unlock:
f2fs_unlock_op(sbi);
out_new_dir:
if (new_dir_entry) {
f2fs_dentry_kunmap(new_inode, new_dir_page);
......
......@@ -22,7 +22,7 @@
#include "trace.h"
#include <trace/events/f2fs.h>
#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
......@@ -63,8 +63,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
int i;
for (i = 0; i <= UPDATE_INO; i++)
mem_size += (sbi->im[i].ino_num *
sizeof(struct ino_entry)) >> PAGE_SHIFT;
mem_size += sbi->im[i].ino_num *
sizeof(struct ino_entry);
mem_size >>= PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == EXTENT_CACHE) {
mem_size = (atomic_read(&sbi->total_ext_tree) *
......@@ -177,18 +178,12 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
}
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry *ne)
struct nat_entry_set *set, struct nat_entry *ne)
{
nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
struct nat_entry_set *head;
head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (head) {
list_move_tail(&ne->list, &nm_i->nat_entries);
set_nat_flag(ne, IS_DIRTY, false);
head->entry_cnt--;
nm_i->dirty_nat_cnt--;
}
list_move_tail(&ne->list, &nm_i->nat_entries);
set_nat_flag(ne, IS_DIRTY, false);
set->entry_cnt--;
nm_i->dirty_nat_cnt--;
}
static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
......@@ -381,6 +376,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
struct page *page = NULL;
struct f2fs_nat_entry ne;
struct nat_entry *e;
pgoff_t index;
int i;
ni->nid = nid;
......@@ -406,17 +402,21 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
node_info_from_raw_nat(ni, &ne);
}
up_read(&curseg->journal_rwsem);
if (i >= 0)
if (i >= 0) {
up_read(&nm_i->nat_tree_lock);
goto cache;
}
/* Fill node_info from nat page */
page = get_current_nat_page(sbi, start_nid);
index = current_nat_addr(sbi, nid);
up_read(&nm_i->nat_tree_lock);
page = get_meta_page(sbi, index);
nat_blk = (struct f2fs_nat_block *)page_address(page);
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
up_read(&nm_i->nat_tree_lock);
/* cache nat entry */
down_write(&nm_i->nat_tree_lock);
cache_nat_entry(sbi, nid, &ne);
......@@ -1463,6 +1463,9 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
f2fs_wait_on_page_writeback(page, NODE, true);
BUG_ON(PageWriteback(page));
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
if (!atomic || page == last_page) {
set_fsync_mark(page, 1);
if (IS_INODE(page)) {
......@@ -1766,40 +1769,67 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
struct free_nid *i, *e;
struct nat_entry *ne;
int err;
int err = -EINVAL;
bool ret = false;
/* 0 nid should not be used */
if (unlikely(nid == 0))
return false;
if (build) {
/* do not add allocated nids */
ne = __lookup_nat_cache(nm_i, nid);
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
return false;
}
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = NID_NEW;
if (radix_tree_preload(GFP_NOFS)) {
kmem_cache_free(free_nid_slab, i);
return true;
}
if (radix_tree_preload(GFP_NOFS))
goto err;
spin_lock(&nm_i->nid_list_lock);
if (build) {
/*
* Thread A Thread B
* - f2fs_create
* - f2fs_new_inode
* - alloc_nid
* - __insert_nid_to_list(ALLOC_NID_LIST)
* - f2fs_balance_fs_bg
* - build_free_nids
* - __build_free_nids
* - scan_nat_page
* - add_free_nid
* - __lookup_nat_cache
* - f2fs_add_link
* - init_inode_metadata
* - new_inode_page
* - new_node_page
* - set_node_addr
* - alloc_nid_done
* - __remove_nid_from_list(ALLOC_NID_LIST)
* - __insert_nid_to_list(FREE_NID_LIST)
*/
ne = __lookup_nat_cache(nm_i, nid);
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
goto err_out;
e = __lookup_free_nid_list(nm_i, nid);
if (e) {
if (e->state == NID_NEW)
ret = true;
goto err_out;
}
}
ret = true;
err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
err_out:
spin_unlock(&nm_i->nid_list_lock);
radix_tree_preload_end();
if (err) {
err:
if (err)
kmem_cache_free(free_nid_slab, i);
return true;
}
return true;
return ret;
}
static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
......@@ -1821,7 +1851,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
}
static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
bool set, bool build, bool locked)
bool set, bool build)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
......@@ -1835,14 +1865,10 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
else
__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
if (!locked)
spin_lock(&nm_i->free_nid_lock);
if (set)
nm_i->free_nid_count[nat_ofs]++;
else if (!build)
nm_i->free_nid_count[nat_ofs]--;
if (!locked)
spin_unlock(&nm_i->free_nid_lock);
}
static void scan_nat_page(struct f2fs_sb_info *sbi,
......@@ -1871,7 +1897,9 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR)
freed = add_free_nid(sbi, start_nid, true);
update_free_nid_bitmap(sbi, start_nid, freed, true, false);
spin_lock(&NM_I(sbi)->nid_list_lock);
update_free_nid_bitmap(sbi, start_nid, freed, true);
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
......@@ -1927,6 +1955,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
int i = 0;
nid_t nid = nm_i->next_scan_nid;
if (unlikely(nid >= nm_i->max_nid))
nid = 0;
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
return;
......@@ -2026,7 +2057,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
nm_i->available_nids--;
update_free_nid_bitmap(sbi, *nid, false, false, false);
update_free_nid_bitmap(sbi, *nid, false, false);
spin_unlock(&nm_i->nid_list_lock);
return true;
......@@ -2082,7 +2113,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
nm_i->available_nids++;
update_free_nid_bitmap(sbi, nid, true, false, false);
update_free_nid_bitmap(sbi, nid, true, false);
spin_unlock(&nm_i->nid_list_lock);
......@@ -2407,16 +2438,16 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
raw_nat_from_node_info(raw_ne, &ne->ni);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
__clear_nat_cache_dirty(NM_I(sbi), set, ne);
if (nat_get_blkaddr(ne) == NULL_ADDR) {
add_free_nid(sbi, nid, false);
spin_lock(&NM_I(sbi)->nid_list_lock);
NM_I(sbi)->available_nids++;
update_free_nid_bitmap(sbi, nid, true, false, false);
update_free_nid_bitmap(sbi, nid, true, false);
spin_unlock(&NM_I(sbi)->nid_list_lock);
} else {
spin_lock(&NM_I(sbi)->nid_list_lock);
update_free_nid_bitmap(sbi, nid, false, false, false);
update_free_nid_bitmap(sbi, nid, false, false);
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
......@@ -2428,10 +2459,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
f2fs_put_page(page, 1);
}
f2fs_bug_on(sbi, set->entry_cnt);
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
kmem_cache_free(nat_entry_set_slab, set);
/* Allow dirty nats by node block allocation in write_begin */
if (!set->entry_cnt) {
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
kmem_cache_free(nat_entry_set_slab, set);
}
}
/*
......@@ -2476,8 +2508,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__flush_nat_entry_set(sbi, set, cpc);
up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
/* Allow dirty nats by node block allocation in write_begin */
}
static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
......@@ -2541,10 +2572,10 @@ inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
nid = i * NAT_ENTRY_PER_BLOCK;
last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
spin_lock(&nm_i->free_nid_lock);
spin_lock(&NM_I(sbi)->nid_list_lock);
for (; nid < last_nid; nid++)
update_free_nid_bitmap(sbi, nid, true, true, true);
spin_unlock(&nm_i->free_nid_lock);
update_free_nid_bitmap(sbi, nid, true, true);
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
for (i = 0; i < nm_i->nat_blocks; i++) {
......@@ -2635,9 +2666,6 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
sizeof(unsigned short), GFP_KERNEL);
if (!nm_i->free_nid_count)
return -ENOMEM;
spin_lock_init(&nm_i->free_nid_lock);
return 0;
}
......
......@@ -9,10 +9,10 @@
* published by the Free Software Foundation.
*/
/* start node id of a node block dedicated to the given node id */
#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
#define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
/* node block offset on the NAT area dedicated to the given start node id */
#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
#define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK)
/* # of pages to perform synchronous readahead before building free nids */
#define FREE_NID_PAGES 8
......@@ -62,16 +62,16 @@ struct nat_entry {
struct node_info ni; /* in-memory node information */
};
#define nat_get_nid(nat) (nat->ni.nid)
#define nat_set_nid(nat, n) (nat->ni.nid = n)
#define nat_get_blkaddr(nat) (nat->ni.blk_addr)
#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b)
#define nat_get_ino(nat) (nat->ni.ino)
#define nat_set_ino(nat, i) (nat->ni.ino = i)
#define nat_get_version(nat) (nat->ni.version)
#define nat_set_version(nat, v) (nat->ni.version = v)
#define nat_get_nid(nat) ((nat)->ni.nid)
#define nat_set_nid(nat, n) ((nat)->ni.nid = (n))
#define nat_get_blkaddr(nat) ((nat)->ni.blk_addr)
#define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b))
#define nat_get_ino(nat) ((nat)->ni.ino)
#define nat_set_ino(nat, i) ((nat)->ni.ino = (i))
#define nat_get_version(nat) ((nat)->ni.version)
#define nat_set_version(nat, v) ((nat)->ni.version = (v))
#define inc_node_version(version) (++version)
#define inc_node_version(version) (++(version))
static inline void copy_node_info(struct node_info *dst,
struct node_info *src)
......@@ -200,13 +200,16 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
struct f2fs_nm_info *nm_i = NM_I(sbi);
pgoff_t block_off;
pgoff_t block_addr;
int seg_off;
/*
* block_off = segment_off * 512 + off_in_segment
* OLD = (segment_off * 512) * 2 + off_in_segment
* NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment
*/
block_off = NAT_BLOCK_OFFSET(start);
seg_off = block_off >> sbi->log_blocks_per_seg;
block_addr = (pgoff_t)(nm_i->nat_blkaddr +
(seg_off << sbi->log_blocks_per_seg << 1) +
(block_off << 1) -
(block_off & (sbi->blocks_per_seg - 1)));
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
......
......@@ -198,7 +198,8 @@ static void recover_inode(struct inode *inode, struct page *page)
ino_of_node(page), name);
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
bool check_only)
{
struct curseg_info *curseg;
struct page *page = NULL;
......@@ -225,7 +226,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
entry = get_fsync_inode(head, ino_of_node(page));
if (!entry) {
if (IS_INODE(page) && is_dent_dnode(page)) {
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
break;
......@@ -569,7 +571,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
mutex_lock(&sbi->cp_mutex);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
err = find_fsync_dnodes(sbi, &inode_list, check_only);
if (err || list_empty(&inode_list))
goto out;
......
......@@ -250,6 +250,36 @@ void drop_inmem_pages(struct inode *inode)
stat_dec_atomic_write(inode);
}
void drop_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct list_head *head = &fi->inmem_pages;
struct inmem_pages *cur = NULL;
f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
mutex_lock(&fi->inmem_lock);
list_for_each_entry(cur, head, list) {
if (cur->page == page)
break;
}
f2fs_bug_on(sbi, !cur || cur->page != page);
list_del(&cur->list);
mutex_unlock(&fi->inmem_lock);
dec_page_count(sbi, F2FS_INMEM_PAGES);
kmem_cache_free(inmem_entry_slab, cur);
ClearPageUptodate(page);
set_page_private(page, 0);
ClearPagePrivate(page);
f2fs_put_page(page, 0);
trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
}
static int __commit_inmem_pages(struct inode *inode,
struct list_head *revoke_list)
{
......@@ -261,7 +291,6 @@ static int __commit_inmem_pages(struct inode *inode,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_PRIO,
.encrypted_page = NULL,
};
pgoff_t last_idx = ULONG_MAX;
int err = 0;
......@@ -281,6 +310,9 @@ static int __commit_inmem_pages(struct inode *inode,
}
fio.page = page;
fio.old_blkaddr = NULL_ADDR;
fio.encrypted_page = NULL;
fio.need_lock = false,
err = do_write_data_page(&fio);
if (err) {
unlock_page(page);
......@@ -358,11 +390,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
}
#endif
if (!need)
return;
/* balance_fs_bg is able to be pending */
if (excess_cached_nats(sbi))
if (need && excess_cached_nats(sbi))
f2fs_balance_fs_bg(sbi);
/*
......@@ -371,7 +400,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
*/
if (has_not_enough_free_secs(sbi, 0, 0)) {
mutex_lock(&sbi->gc_mutex);
f2fs_gc(sbi, false, false);
f2fs_gc(sbi, false, false, NULL_SEGNO);
}
}
......@@ -390,7 +419,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
build_free_nids(sbi, false, false);
if (!is_idle(sbi))
if (!is_idle(sbi) && !excess_dirty_nats(sbi))
return;
/* checkpoint is the only way to shrink partial cached entries */
......@@ -411,32 +440,34 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
}
}
static int __submit_flush_wait(struct block_device *bdev)
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
struct block_device *bdev)
{
struct bio *bio = f2fs_bio_alloc(0);
int ret;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
bio->bi_bdev = bdev;
ret = submit_bio_wait(bio);
bio_put(bio);
trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
test_opt(sbi, FLUSH_MERGE), ret);
return ret;
}
static int submit_flush_wait(struct f2fs_sb_info *sbi)
{
int ret = __submit_flush_wait(sbi->sb->s_bdev);
int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev);
int i;
if (sbi->s_ndevs && !ret) {
for (i = 1; i < sbi->s_ndevs; i++) {
trace_f2fs_issue_flush(FDEV(i).bdev,
test_opt(sbi, NOBARRIER),
test_opt(sbi, FLUSH_MERGE));
ret = __submit_flush_wait(FDEV(i).bdev);
if (ret)
break;
}
if (!sbi->s_ndevs || ret)
return ret;
for (i = 1; i < sbi->s_ndevs; i++) {
ret = __submit_flush_wait(sbi, FDEV(i).bdev);
if (ret)
break;
}
return ret;
}
......@@ -458,6 +489,8 @@ static int issue_flush_thread(void *data)
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
ret = submit_flush_wait(sbi);
atomic_inc(&fcc->issued_flush);
llist_for_each_entry_safe(cmd, next,
fcc->dispatch_list, llnode) {
cmd->ret = ret;
......@@ -475,25 +508,29 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
{
struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
struct flush_cmd cmd;
int ret;
if (test_opt(sbi, NOBARRIER))
return 0;
if (!test_opt(sbi, FLUSH_MERGE))
return submit_flush_wait(sbi);
if (!atomic_read(&fcc->submit_flush)) {
int ret;
if (!test_opt(sbi, FLUSH_MERGE)) {
ret = submit_flush_wait(sbi);
atomic_inc(&fcc->issued_flush);
return ret;
}
atomic_inc(&fcc->submit_flush);
if (!atomic_read(&fcc->issing_flush)) {
atomic_inc(&fcc->issing_flush);
ret = submit_flush_wait(sbi);
atomic_dec(&fcc->submit_flush);
atomic_dec(&fcc->issing_flush);
atomic_inc(&fcc->issued_flush);
return ret;
}
init_completion(&cmd.wait);
atomic_inc(&fcc->submit_flush);
atomic_inc(&fcc->issing_flush);
llist_add(&cmd.llnode, &fcc->issue_list);
if (!fcc->dispatch_list)
......@@ -501,10 +538,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
if (fcc->f2fs_issue_flush) {
wait_for_completion(&cmd.wait);
atomic_dec(&fcc->submit_flush);
atomic_dec(&fcc->issing_flush);
} else {
llist_del_all(&fcc->issue_list);
atomic_set(&fcc->submit_flush, 0);
atomic_set(&fcc->issing_flush, 0);
}
return cmd.ret;
......@@ -524,7 +561,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
if (!fcc)
return -ENOMEM;
atomic_set(&fcc->submit_flush, 0);
atomic_set(&fcc->issued_flush, 0);
atomic_set(&fcc->issing_flush, 0);
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->fcc_info = fcc;
......@@ -597,8 +635,8 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
clear_bit(GET_SECNO(sbi, segno),
if (get_valid_blocks(sbi, segno, true) == 0)
clear_bit(GET_SEC_FROM_SEG(sbi, segno),
dirty_i->victim_secmap);
}
}
......@@ -618,7 +656,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, 0);
valid_blocks = get_valid_blocks(sbi, segno, false);
if (valid_blocks == 0) {
__locate_dirty_segment(sbi, segno, PRE);
......@@ -633,162 +671,407 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_unlock(&dirty_i->seglist_lock);
}
static void __add_discard_cmd(struct f2fs_sb_info *sbi,
struct bio *bio, block_t lstart, block_t len)
static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *cmd_list = &(dcc->discard_cmd_list);
struct list_head *pend_list;
struct discard_cmd *dc;
f2fs_bug_on(sbi, !len);
pend_list = &dcc->pend_list[plist_idx(len)];
dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
INIT_LIST_HEAD(&dc->list);
dc->bio = bio;
bio->bi_private = dc;
dc->bdev = bdev;
dc->lstart = lstart;
dc->start = start;
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
atomic_inc(&dcc->discard_cmd_cnt);
dcc->undiscard_blks += len;
mutex_lock(&dcc->cmd_lock);
list_add_tail(&dc->list, cmd_list);
mutex_unlock(&dcc->cmd_lock);
return dc;
}
static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len,
struct rb_node *parent, struct rb_node **p)
{
int err = dc->bio->bi_error;
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_cmd *dc;
if (dc->state == D_DONE)
atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
if (err == -EOPNOTSUPP)
err = 0;
rb_link_node(&dc->rb_node, parent, p);
rb_insert_color(&dc->rb_node, &dcc->root);
return dc;
}
static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
atomic_dec(&dcc->issing_discard);
if (err)
f2fs_msg(sbi->sb, KERN_INFO,
"Issue discard failed, ret: %d", err);
bio_put(dc->bio);
list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root);
dcc->undiscard_blks -= dc->len;
kmem_cache_free(discard_cmd_slab, dc);
atomic_dec(&dcc->discard_cmd_cnt);
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = &(dcc->discard_cmd_list);
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
mutex_lock(&dcc->cmd_lock);
if (dc->error == -EOPNOTSUPP)
dc->error = 0;
blk_start_plug(&plug);
if (dc->error)
f2fs_msg(sbi->sb, KERN_INFO,
"Issue discard failed, ret: %d", dc->error);
__detach_discard_cmd(dcc, dc);
}
list_for_each_entry_safe(dc, tmp, wait_list, list) {
static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
if (blkaddr == NULL_ADDR) {
if (dc->state == D_PREP) {
dc->state = D_SUBMIT;
submit_bio(dc->bio);
atomic_inc(&dcc->submit_discard);
}
continue;
dc->error = bio->bi_error;
dc->state = D_DONE;
complete(&dc->wait);
bio_put(bio);
}
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct bio *bio = NULL;
if (dc->state != D_PREP)
return;
trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
dc->error = __blkdev_issue_discard(dc->bdev,
SECTOR_FROM_BLOCK(dc->start),
SECTOR_FROM_BLOCK(dc->len),
GFP_NOFS, 0, &bio);
if (!dc->error) {
/* should keep before submission to avoid D_DONE right away */
dc->state = D_SUBMIT;
atomic_inc(&dcc->issued_discard);
atomic_inc(&dcc->issing_discard);
if (bio) {
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
bio->bi_opf |= REQ_SYNC;
submit_bio(bio);
list_move_tail(&dc->list, &dcc->wait_list);
}
} else {
__remove_discard_cmd(sbi, dc);
}
}
if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
if (dc->state == D_SUBMIT)
wait_for_completion_io(&dc->wait);
else
__remove_discard_cmd(sbi, dc);
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len,
struct rb_node **insert_p,
struct rb_node *insert_parent)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct rb_node **p = &dcc->root.rb_node;
struct rb_node *parent = NULL;
struct discard_cmd *dc = NULL;
if (insert_p && insert_parent) {
parent = insert_parent;
p = insert_p;
goto do_insert;
}
p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
do_insert:
dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
if (!dc)
return NULL;
return dc;
}
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
}
static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc, block_t blkaddr)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_info di = dc->di;
bool modified = false;
if (dc->state == D_DONE || dc->len == 1) {
__remove_discard_cmd(sbi, dc);
return;
}
dcc->undiscard_blks -= di.len;
if (blkaddr > di.lstart) {
dc->len = blkaddr - dc->lstart;
dcc->undiscard_blks += dc->len;
__relocate_discard_cmd(dcc, dc);
f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
modified = true;
}
if (blkaddr < di.lstart + di.len - 1) {
if (modified) {
__insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
di.start + blkaddr + 1 - di.lstart,
di.lstart + di.len - 1 - blkaddr,
NULL, NULL);
f2fs_bug_on(sbi,
!__check_rb_tree_consistence(sbi, &dcc->root));
} else {
dc->lstart++;
dc->len--;
dc->start++;
dcc->undiscard_blks += dc->len;
__relocate_discard_cmd(dcc, dc);
f2fs_bug_on(sbi,
!__check_rb_tree_consistence(sbi, &dcc->root));
}
}
blk_finish_plug(&plug);
}
/* this comes from f2fs_put_super */
if (blkaddr == NULL_ADDR) {
list_for_each_entry_safe(dc, tmp, wait_list, list) {
wait_for_completion_io(&dc->wait);
__remove_discard_cmd(sbi, dc);
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
block_t end = lstart + len;
mutex_lock(&dcc->cmd_lock);
dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
NULL, lstart,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
&insert_p, &insert_parent, true);
if (dc)
prev_dc = dc;
if (!prev_dc) {
di.lstart = lstart;
di.len = next_dc ? next_dc->lstart - lstart : len;
di.len = min(di.len, len);
di.start = start;
}
while (1) {
struct rb_node *node;
bool merged = false;
struct discard_cmd *tdc = NULL;
if (prev_dc) {
di.lstart = prev_dc->lstart + prev_dc->len;
if (di.lstart < lstart)
di.lstart = lstart;
if (di.lstart >= end)
break;
if (!next_dc || next_dc->lstart > end)
di.len = end - di.lstart;
else
di.len = next_dc->lstart - di.lstart;
di.start = start + di.lstart - lstart;
}
if (!di.len)
goto next;
if (prev_dc && prev_dc->state == D_PREP &&
prev_dc->bdev == bdev &&
__is_discard_back_mergeable(&di, &prev_dc->di)) {
prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc);
f2fs_bug_on(sbi,
!__check_rb_tree_consistence(sbi, &dcc->root));
di = prev_dc->di;
tdc = prev_dc;
merged = true;
}
if (next_dc && next_dc->state == D_PREP &&
next_dc->bdev == bdev &&
__is_discard_front_mergeable(&di, &next_dc->di)) {
next_dc->di.lstart = di.lstart;
next_dc->di.len += di.len;
next_dc->di.start = di.start;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, next_dc);
if (tdc)
__remove_discard_cmd(sbi, tdc);
f2fs_bug_on(sbi,
!__check_rb_tree_consistence(sbi, &dcc->root));
merged = true;
}
if (!merged) {
__insert_discard_tree(sbi, bdev, di.lstart, di.start,
di.len, NULL, NULL);
f2fs_bug_on(sbi,
!__check_rb_tree_consistence(sbi, &dcc->root));
}
next:
prev_dc = next_dc;
if (!prev_dc)
break;
node = rb_next(&prev_dc->rb_node);
next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
}
mutex_unlock(&dcc->cmd_lock);
}
static void f2fs_submit_discard_endio(struct bio *bio)
static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
block_t lblkstart = blkstart;
complete(&dc->wait);
dc->state = D_DONE;
trace_f2fs_queue_discard(bdev, blkstart, blklen);
if (sbi->s_ndevs) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
}
__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
return 0;
}
static int issue_discard_thread(void *data)
static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
{
struct f2fs_sb_info *sbi = data;
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
wait_queue_head_t *q = &dcc->discard_wait_queue;
struct list_head *cmd_list = &dcc->discard_cmd_list;
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
int iter = 0;
repeat:
if (kthread_should_stop())
return 0;
int i, iter = 0;
mutex_lock(&dcc->cmd_lock);
blk_start_plug(&plug);
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
pend_list = &dcc->pend_list[i];
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
if (!issue_cond || is_idle(sbi))
__submit_discard_cmd(sbi, dc);
if (issue_cond && iter++ > DISCARD_ISSUE_RATE)
goto out;
}
}
out:
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
}
static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = &(dcc->wait_list);
struct discard_cmd *dc, *tmp;
mutex_lock(&dcc->cmd_lock);
list_for_each_entry_safe(dc, tmp, cmd_list, list) {
if (dc->state == D_PREP) {
dc->state = D_SUBMIT;
submit_bio(dc->bio);
atomic_inc(&dcc->submit_discard);
if (iter++ > DISCARD_ISSUE_RATE)
break;
} else if (dc->state == D_DONE) {
list_for_each_entry_safe(dc, tmp, wait_list, list) {
if (!wait_cond || dc->state == D_DONE) {
if (dc->ref)
continue;
wait_for_completion_io(&dc->wait);
__remove_discard_cmd(sbi, dc);
}
}
mutex_unlock(&dcc->cmd_lock);
}
blk_finish_plug(&plug);
/* This should be covered by global mutex, &sit_i->sentry_lock */
void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_cmd *dc;
bool need_wait = false;
iter = 0;
congestion_wait(BLK_RW_SYNC, HZ/50);
mutex_lock(&dcc->cmd_lock);
dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr);
if (dc) {
if (dc->state == D_PREP) {
__punch_discard_cmd(sbi, dc, blkaddr);
} else {
dc->ref++;
need_wait = true;
}
}
mutex_unlock(&dcc->cmd_lock);
wait_event_interruptible(*q,
kthread_should_stop() || !list_empty(&dcc->discard_cmd_list));
goto repeat;
if (need_wait) {
wait_for_completion_io(&dc->wait);
mutex_lock(&dcc->cmd_lock);
f2fs_bug_on(sbi, dc->state != D_DONE);
dc->ref--;
if (!dc->ref)
__remove_discard_cmd(sbi, dc);
mutex_unlock(&dcc->cmd_lock);
}
}
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
/* This comes from f2fs_put_super */
void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
{
struct bio *bio = NULL;
block_t lblkstart = blkstart;
int err;
__issue_discard_cmd(sbi, false);
__wait_discard_cmd(sbi, false);
}
trace_f2fs_issue_discard(bdev, blkstart, blklen);
static int issue_discard_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
wait_queue_head_t *q = &dcc->discard_wait_queue;
repeat:
if (kthread_should_stop())
return 0;
if (sbi->s_ndevs) {
int devi = f2fs_target_device_index(sbi, blkstart);
__issue_discard_cmd(sbi, true);
__wait_discard_cmd(sbi, true);
blkstart -= FDEV(devi).start_blk;
}
err = __blkdev_issue_discard(bdev,
SECTOR_FROM_BLOCK(blkstart),
SECTOR_FROM_BLOCK(blklen),
GFP_NOFS, 0, &bio);
if (!err && bio) {
bio->bi_end_io = f2fs_submit_discard_endio;
bio->bi_opf |= REQ_SYNC;
congestion_wait(BLK_RW_SYNC, HZ/50);
__add_discard_cmd(sbi, bio, lblkstart, blklen);
wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
}
return err;
wait_event_interruptible(*q, kthread_should_stop() ||
atomic_read(&dcc->discard_cmd_cnt));
goto repeat;
}
#ifdef CONFIG_BLK_DEV_ZONED
......@@ -796,6 +1079,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
sector_t sector, nr_sects;
block_t lblkstart = blkstart;
int devi = 0;
if (sbi->s_ndevs) {
......@@ -813,7 +1097,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
case BLK_ZONE_TYPE_CONVENTIONAL:
if (!blk_queue_discard(bdev_get_queue(bdev)))
return 0;
return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
case BLK_ZONE_TYPE_SEQWRITE_REQ:
case BLK_ZONE_TYPE_SEQWRITE_PREF:
sector = SECTOR_FROM_BLOCK(blkstart);
......@@ -845,7 +1129,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi,
bdev_zoned_model(bdev) != BLK_ZONED_NONE)
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
}
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
......@@ -888,32 +1172,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return err;
}
static void __add_discard_entry(struct f2fs_sb_info *sbi,
struct cp_control *cpc, struct seg_entry *se,
unsigned int start, unsigned int end)
{
struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list;
struct discard_entry *new, *last;
if (!list_empty(head)) {
last = list_last_entry(head, struct discard_entry, list);
if (START_BLOCK(sbi, cpc->trim_start) + start ==
last->blkaddr + last->len &&
last->len < MAX_DISCARD_BLOCKS(sbi)) {
last->len += end - start;
goto done;
}
}
new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
INIT_LIST_HEAD(&new->list);
new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
new->len = end - start;
list_add_tail(&new->list, head);
done:
SM_I(sbi)->dcc_info->nr_discards += end - start;
}
static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
bool check_only)
{
......@@ -925,7 +1183,9 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
unsigned long *discard_map = (unsigned long *)se->discard_map;
unsigned long *dmap = SIT_I(sbi)->tmp_map;
unsigned int start = 0, end = -1;
bool force = (cpc->reason == CP_DISCARD);
bool force = (cpc->reason & CP_DISCARD);
struct discard_entry *de = NULL;
struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
int i;
if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
......@@ -957,14 +1217,24 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
if (check_only)
return true;
__add_discard_entry(sbi, cpc, se, start, end);
if (!de) {
de = f2fs_kmem_cache_alloc(discard_entry_slab,
GFP_F2FS_ZERO);
de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
list_add_tail(&de->list, head);
}
for (i = start; i < end; i++)
__set_bit_le(i, (void *)de->discard_map);
SM_I(sbi)->dcc_info->nr_discards += end - start;
}
return false;
}
void release_discard_addrs(struct f2fs_sb_info *sbi)
{
struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
struct discard_entry *entry, *this;
/* drop caches */
......@@ -990,13 +1260,13 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
struct discard_entry *entry, *this;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason == CP_DISCARD);
bool force = (cpc->reason & CP_DISCARD);
mutex_lock(&dirty_i->seglist_lock);
......@@ -1026,10 +1296,10 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
continue;
}
next:
secno = GET_SECNO(sbi, start);
start_segno = secno * sbi->segs_per_sec;
secno = GET_SEC_FROM_SEG(sbi, start);
start_segno = GET_SEG_FROM_SEC(sbi, secno);
if (!IS_CURSEC(sbi, secno) &&
!get_valid_blocks(sbi, start, sbi->segs_per_sec))
!get_valid_blocks(sbi, start, true))
f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
sbi->segs_per_sec << sbi->log_blocks_per_seg);
......@@ -1043,22 +1313,46 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* send small discards */
list_for_each_entry_safe(entry, this, head, list) {
if (force && entry->len < cpc->trim_minlen)
goto skip;
f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
cpc->trimmed += entry->len;
unsigned int cur_pos = 0, next_pos, len, total_len = 0;
bool is_valid = test_bit_le(0, entry->discard_map);
find_next:
if (is_valid) {
next_pos = find_next_zero_bit_le(entry->discard_map,
sbi->blocks_per_seg, cur_pos);
len = next_pos - cur_pos;
if (force && len < cpc->trim_minlen)
goto skip;
f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
len);
cpc->trimmed += len;
total_len += len;
} else {
next_pos = find_next_bit_le(entry->discard_map,
sbi->blocks_per_seg, cur_pos);
}
skip:
cur_pos = next_pos;
is_valid = !is_valid;
if (cur_pos < sbi->blocks_per_seg)
goto find_next;
list_del(&entry->list);
SM_I(sbi)->dcc_info->nr_discards -= entry->len;
SM_I(sbi)->dcc_info->nr_discards -= total_len;
kmem_cache_free(discard_entry_slab, entry);
}
wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
}
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
struct discard_cmd_control *dcc;
int err = 0;
int err = 0, i;
if (SM_I(sbi)->dcc_info) {
dcc = SM_I(sbi)->dcc_info;
......@@ -1069,12 +1363,18 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
if (!dcc)
return -ENOMEM;
INIT_LIST_HEAD(&dcc->discard_entry_list);
INIT_LIST_HEAD(&dcc->discard_cmd_list);
INIT_LIST_HEAD(&dcc->entry_list);
for (i = 0; i < MAX_PLIST_NUM; i++)
INIT_LIST_HEAD(&dcc->pend_list[i]);
INIT_LIST_HEAD(&dcc->wait_list);
mutex_init(&dcc->cmd_lock);
atomic_set(&dcc->submit_discard, 0);
atomic_set(&dcc->issued_discard, 0);
atomic_set(&dcc->issing_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
dcc->max_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
dcc->root = RB_ROOT;
init_waitqueue_head(&dcc->discard_wait_queue);
SM_I(sbi)->dcc_info = dcc;
......@@ -1091,20 +1391,22 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
return err;
}
static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
if (dcc && dcc->f2fs_issue_discard) {
if (!dcc)
return;
if (dcc->f2fs_issue_discard) {
struct task_struct *discard_thread = dcc->f2fs_issue_discard;
dcc->f2fs_issue_discard = NULL;
kthread_stop(discard_thread);
}
if (free) {
kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
......@@ -1345,6 +1647,17 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
f2fs_put_page(page, 1);
}
static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno = curseg->segno + 1;
struct free_segmap_info *free_i = FREE_I(sbi);
if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
return !test_bit(segno, free_i->free_segmap);
return 0;
}
/*
* Find a new segment from the free segments bitmap to right order
* This function should be returned with success, otherwise BUG
......@@ -1355,8 +1668,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno, secno, zoneno;
unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
unsigned int hint = *newseg / sbi->segs_per_sec;
unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
unsigned int left_start = hint;
bool init = true;
int go_left = 0;
......@@ -1366,8 +1679,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
segno = find_next_zero_bit(free_i->free_segmap,
(hint + 1) * sbi->segs_per_sec, *newseg + 1);
if (segno < (hint + 1) * sbi->segs_per_sec)
GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
goto got_it;
}
find_other_zone:
......@@ -1398,8 +1711,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
secno = left_start;
skip_left:
hint = secno;
segno = secno * sbi->segs_per_sec;
zoneno = secno / sbi->secs_per_zone;
segno = GET_SEG_FROM_SEC(sbi, secno);
zoneno = GET_ZONE_FROM_SEC(sbi, secno);
/* give up on finding another zone */
if (!init)
......@@ -1443,7 +1756,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
struct summary_footer *sum_footer;
curseg->segno = curseg->next_segno;
curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
curseg->next_blkoff = 0;
curseg->next_segno = NULL_SEGNO;
......@@ -1456,6 +1769,20 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
__set_sit_entry_type(sbi, type, curseg->segno, modified);
}
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (sbi->segs_per_sec != 1)
return CURSEG_I(sbi, type)->segno;
if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
return 0;
if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
return SIT_I(sbi)->last_victim[ALLOC_NEXT];
return CURSEG_I(sbi, type)->segno;
}
/*
* Allocate a current working segment.
* This function always allocates a free segment in LFS manner.
......@@ -1474,6 +1801,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
if (test_opt(sbi, NOHEAP))
dir = ALLOC_RIGHT;
segno = __get_next_segno(sbi, type);
get_new_segment(sbi, &segno, new_sec, dir);
curseg->next_segno = segno;
reset_curseg(sbi, type, 1);
......@@ -1549,12 +1877,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
unsigned segno = NULL_SEGNO;
int i, cnt;
bool reversed = false;
/* need_SSR() already forces to do this */
if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
curseg->next_segno = segno;
return 1;
}
/* For node segments, let's do SSR more intensively */
if (IS_NODESEG(type)) {
......@@ -1578,9 +1909,10 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
for (; cnt-- > 0; reversed ? i-- : i++) {
if (i == type)
continue;
if (v_ops->get_victim(sbi, &(curseg)->next_segno,
BG_GC, i, SSR))
if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
curseg->next_segno = segno;
return 1;
}
}
return 0;
}
......@@ -1592,17 +1924,21 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
int type, bool force)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
if (force)
new_curseg(sbi, type, true);
else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
new_curseg(sbi, type, false);
else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
stat_inc_seg_type(sbi, CURSEG_I(sbi, type));
stat_inc_seg_type(sbi, curseg);
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
......@@ -1734,18 +2070,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
if (p_type == DATA) {
struct inode *inode = page->mapping->host;
if (S_ISDIR(inode->i_mode))
return CURSEG_HOT_DATA;
else if (is_cold_data(page) || file_is_cold(inode))
if (is_cold_data(page) || file_is_cold(inode))
return CURSEG_COLD_DATA;
else
return CURSEG_WARM_DATA;
if (is_inode_flag_set(inode, FI_HOT_DATA))
return CURSEG_HOT_DATA;
return CURSEG_WARM_DATA;
} else {
if (IS_DNODE(page))
return is_cold_node(page) ? CURSEG_WARM_NODE :
CURSEG_HOT_NODE;
else
return CURSEG_COLD_NODE;
return CURSEG_COLD_NODE;
}
}
......@@ -1788,15 +2122,14 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
if (!__has_curseg_space(sbi, type))
sit_i->s_ops->allocate_segment(sbi, type, false);
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
* SIT information should be updated after segment allocation,
* since we need to keep dirty segments precisely under SSR.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
if (!__has_curseg_space(sbi, type))
sit_i->s_ops->allocate_segment(sbi, type, false);
mutex_unlock(&sit_i->sentry_lock);
if (page && IS_NODESEG(type))
......@@ -1868,11 +2201,11 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
}
void rewrite_data_page(struct f2fs_io_info *fio)
int rewrite_data_page(struct f2fs_io_info *fio)
{
fio->new_blkaddr = fio->old_blkaddr;
stat_inc_inplace_blocks(fio->sbi);
f2fs_submit_page_mbio(fio);
return f2fs_submit_page_bio(fio);
}
void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
......@@ -2437,7 +2770,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
se = get_seg_entry(sbi, segno);
/* add discard candidates */
if (cpc->reason != CP_DISCARD) {
if (!(cpc->reason & CP_DISCARD)) {
cpc->trim_start = segno;
add_discard_addrs(sbi, cpc, false);
}
......@@ -2473,7 +2806,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_bug_on(sbi, !list_empty(head));
f2fs_bug_on(sbi, sit_i->dirty_sentries);
out:
if (cpc->reason == CP_DISCARD) {
if (cpc->reason & CP_DISCARD) {
__u64 trim_start = cpc->trim_start;
for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
......@@ -2672,10 +3005,17 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
/* build discard map only one time */
if (f2fs_discard_en(sbi)) {
memcpy(se->discard_map, se->cur_valid_map,
SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks += sbi->blocks_per_seg -
se->valid_blocks;
if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
memset(se->discard_map, 0xff,
SIT_VBLOCK_MAP_SIZE);
} else {
memcpy(se->discard_map,
se->cur_valid_map,
SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks +=
sbi->blocks_per_seg -
se->valid_blocks;
}
}
if (sbi->segs_per_sec > 1)
......@@ -2699,10 +3039,15 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
seg_info_from_raw_sit(se, &sit);
if (f2fs_discard_en(sbi)) {
memcpy(se->discard_map, se->cur_valid_map,
SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks += old_valid_blocks -
se->valid_blocks;
if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
memset(se->discard_map, 0xff,
SIT_VBLOCK_MAP_SIZE);
} else {
memcpy(se->discard_map, se->cur_valid_map,
SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks += old_valid_blocks -
se->valid_blocks;
}
}
if (sbi->segs_per_sec > 1)
......@@ -2746,7 +3091,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
if (segno >= MAIN_SEGS(sbi))
break;
offset = segno + 1;
valid_blocks = get_valid_blocks(sbi, segno, 0);
valid_blocks = get_valid_blocks(sbi, segno, false);
if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
continue;
if (valid_blocks > sbi->blocks_per_seg) {
......@@ -2852,6 +3197,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
......@@ -2988,7 +3334,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
if (!sm_info)
return;
destroy_flush_cmd_control(sbi, true);
destroy_discard_cmd_control(sbi, true);
destroy_discard_cmd_control(sbi);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
......
......@@ -21,78 +21,84 @@
#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE)
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
#define IS_CURSEG(sbi, seg) \
((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
(((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
#define IS_CURSEC(sbi, secno) \
((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
sbi->segs_per_sec)) \
(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
(sbi)->segs_per_sec)) \
#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
#define MAIN_SECS(sbi) (sbi->total_sections)
#define MAIN_SECS(sbi) ((sbi)->total_sections)
#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \
sbi->log_blocks_per_seg))
#define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \
(sbi)->log_blocks_per_seg))
#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \
(GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
(GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg))
#define NEXT_FREE_BLKADDR(sbi, curseg) \
(START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
(START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff)
#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi))
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg)
#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define GET_SECNO(sbi, segno) \
((segno) / sbi->segs_per_sec)
#define GET_ZONENO_FROM_SEGNO(sbi, segno) \
((segno / sbi->segs_per_sec) / sbi->secs_per_zone)
#define BLKS_PER_SEC(sbi) \
((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
#define GET_SEC_FROM_SEG(sbi, segno) \
((segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
((secno) * (sbi)->segs_per_sec)
#define GET_ZONE_FROM_SEC(sbi, secno) \
((secno) / (sbi)->secs_per_zone)
#define GET_ZONE_FROM_SEG(sbi, segno) \
GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno))
#define GET_SUM_BLOCK(sbi, segno) \
((sbi->sm_info->ssa_blkaddr) + segno)
((sbi)->sm_info->ssa_blkaddr + (segno))
#define GET_SUM_TYPE(footer) ((footer)->entry_type)
#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type)
#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type))
#define SIT_ENTRY_OFFSET(sit_i, segno) \
(segno % sit_i->sents_per_block)
((segno) % (sit_i)->sents_per_block)
#define SIT_BLOCK_OFFSET(segno) \
(segno / SIT_ENTRY_PER_BLOCK)
((segno) / SIT_ENTRY_PER_BLOCK)
#define START_SEGNO(segno) \
(SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
#define SIT_BLK_CNT(sbi) \
......@@ -103,7 +109,7 @@
#define SECTOR_FROM_BLOCK(blk_addr) \
(((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
#define SECTOR_TO_BLOCK(sectors) \
(sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK)
/*
* indicate a block allocation direction: RIGHT and LEFT.
......@@ -132,7 +138,10 @@ enum {
*/
enum {
GC_CB = 0,
GC_GREEDY
GC_GREEDY,
ALLOC_NEXT,
FLUSH_DEVICE,
MAX_GC_POLICY,
};
/*
......@@ -227,6 +236,8 @@ struct sit_info {
unsigned long long mounted_time; /* mount time */
unsigned long long min_mtime; /* min. modification time */
unsigned long long max_mtime; /* max. modification time */
unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
};
struct free_segmap_info {
......@@ -303,17 +314,17 @@ static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct sit_info *sit_i = SIT_I(sbi);
return &sit_i->sec_entries[GET_SECNO(sbi, segno)];
return &sit_i->sec_entries[GET_SEC_FROM_SEG(sbi, segno)];
}
static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
unsigned int segno, int section)
unsigned int segno, bool use_section)
{
/*
* In order to get # of valid blocks in a section instantly from many
* segments, f2fs manages two counting structures separately.
*/
if (section > 1)
if (use_section && sbi->segs_per_sec > 1)
return get_sec_entry(sbi, segno)->valid_blocks;
else
return get_seg_entry(sbi, segno)->valid_blocks;
......@@ -358,8 +369,8 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
spin_lock(&free_i->segmap_lock);
......@@ -379,7 +390,8 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
set_bit(segno, free_i->free_segmap);
free_i->free_segments--;
if (!test_and_set_bit(secno, free_i->free_secmap))
......@@ -390,8 +402,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
spin_lock(&free_i->segmap_lock);
......@@ -412,7 +424,8 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
spin_lock(&free_i->segmap_lock);
if (!test_and_set_bit(segno, free_i->free_segmap)) {
free_i->free_segments--;
......@@ -477,12 +490,12 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi)
static inline int overprovision_sections(struct f2fs_sb_info *sbi)
{
return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec;
return GET_SEC_FROM_SEG(sbi, (unsigned int)overprovision_segments(sbi));
}
static inline int reserved_sections(struct f2fs_sb_info *sbi)
{
return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec;
return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
}
static inline bool need_SSR(struct f2fs_sb_info *sbi)
......@@ -495,7 +508,7 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
return false;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + 1);
2 * reserved_sections(sbi));
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
......@@ -540,6 +553,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
*/
#define DEF_MIN_IPU_UTIL 70
#define DEF_MIN_FSYNC_BLOCKS 8
#define DEF_MIN_HOT_BLOCKS 16
enum {
F2FS_IPU_FORCE,
......@@ -547,17 +561,15 @@ enum {
F2FS_IPU_UTIL,
F2FS_IPU_SSR_UTIL,
F2FS_IPU_FSYNC,
F2FS_IPU_ASYNC,
};
static inline bool need_inplace_update(struct inode *inode)
static inline bool need_inplace_update_policy(struct inode *inode,
struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int policy = SM_I(sbi)->ipu_policy;
/* IPU can be done only for the user data */
if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
return false;
if (test_opt(sbi, LFS))
return false;
......@@ -572,6 +584,15 @@ static inline bool need_inplace_update(struct inode *inode)
utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
/*
* IPU for rewrite async pages
*/
if (policy & (0x1 << F2FS_IPU_ASYNC) &&
fio && fio->op == REQ_OP_WRITE &&
!(fio->op_flags & REQ_SYNC) &&
!f2fs_encrypted_inode(inode))
return true;
/* this is only set during fdatasync */
if (policy & (0x1 << F2FS_IPU_FSYNC) &&
is_inode_flag_set(inode, FI_NEED_IPU))
......@@ -719,7 +740,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
unsigned int secno)
{
if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >=
sbi->fggc_threshold)
return true;
return false;
......
......@@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = {
[FAULT_BLOCK] = "no more block",
[FAULT_DIR_DEPTH] = "too big dir depth",
[FAULT_EVICT_INODE] = "evict_inode fail",
[FAULT_TRUNCATE] = "truncate fail",
[FAULT_IO] = "IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
};
......@@ -82,6 +83,7 @@ enum {
Opt_discard,
Opt_nodiscard,
Opt_noheap,
Opt_heap,
Opt_user_xattr,
Opt_nouser_xattr,
Opt_acl,
......@@ -116,6 +118,7 @@ static match_table_t f2fs_tokens = {
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_noheap, "no_heap"},
{Opt_heap, "heap"},
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_acl, "acl"},
......@@ -293,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
......@@ -318,6 +322,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
......@@ -436,6 +441,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_noheap:
set_opt(sbi, NOHEAP);
break;
case Opt_heap:
clear_opt(sbi, NOHEAP);
break;
#ifdef CONFIG_F2FS_FS_XATTR
case Opt_user_xattr:
set_opt(sbi, XATTR_USER);
......@@ -787,7 +795,14 @@ static void f2fs_put_super(struct super_block *sb)
}
/* be sure to wait for any on-going discard commands */
f2fs_wait_discard_bio(sbi, NULL_ADDR);
f2fs_wait_discard_bios(sbi);
if (!sbi->discard_blks) {
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
write_checkpoint(sbi, &cpc);
}
/* write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
......@@ -913,7 +928,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sbi, DISCARD))
seq_puts(seq, ",discard");
if (test_opt(sbi, NOHEAP))
seq_puts(seq, ",no_heap_alloc");
seq_puts(seq, ",no_heap");
else
seq_puts(seq, ",heap");
#ifdef CONFIG_F2FS_FS_XATTR
if (test_opt(sbi, XATTR_USER))
seq_puts(seq, ",user_xattr");
......@@ -986,7 +1003,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d|%-3u", se->type,
get_valid_blocks(sbi, i, 1));
get_valid_blocks(sbi, i, false));
if ((i % 10) == 9 || i == (total_segs - 1))
seq_putc(seq, '\n');
else
......@@ -1012,7 +1029,7 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d|%-3u|", se->type,
get_valid_blocks(sbi, i, 1));
get_valid_blocks(sbi, i, false));
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
seq_printf(seq, " %.2x", se->cur_valid_map[j]);
seq_putc(seq, '\n');
......@@ -1046,6 +1063,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
set_opt(sbi, EXTENT_CACHE);
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= MS_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
......@@ -1307,7 +1325,7 @@ static int __f2fs_commit_super(struct buffer_head *bh,
unlock_buffer(bh);
/* it's rare case, we can do fua all the time */
return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA);
return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
}
static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
......@@ -1483,6 +1501,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
f2fs_msg(sb, KERN_INFO,
"Invalid segment count (%u)",
le32_to_cpu(raw_super->segment_count));
return 1;
}
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
if (sanity_check_area_boundary(sbi, bh))
return 1;
......@@ -1555,6 +1580,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
atomic_set(&sbi->wb_sync_req, 0);
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
mutex_init(&sbi->wio_mutex[NODE]);
......@@ -1917,6 +1944,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
/* disallow all the data/node/meta page writes */
set_sbi_flag(sbi, SBI_POR_DOING);
......@@ -2022,6 +2050,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_join_shrinker(sbi);
err = f2fs_build_stats(sbi);
if (err)
goto free_nm;
/* if there are nt orphan nodes free them */
err = recover_orphan_inodes(sbi);
if (err)
......@@ -2046,10 +2078,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_root_inode;
}
err = f2fs_build_stats(sbi);
if (err)
goto free_root_inode;
if (f2fs_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
......@@ -2143,7 +2171,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
remove_proc_entry("segment_bits", sbi->s_proc);
remove_proc_entry(sb->s_id, f2fs_proc_root);
}
f2fs_destroy_stats(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
......@@ -2161,6 +2188,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
truncate_inode_pages_final(META_MAPPING(sbi));
iput(sbi->node_inode);
mutex_unlock(&sbi->umount_mutex);
f2fs_destroy_stats(sbi);
free_nm:
destroy_node_manager(sbi);
free_sm:
......
......@@ -59,7 +59,7 @@ void f2fs_trace_pid(struct page *page)
pid_t pid = task_pid_nr(current);
void *p;
page->private = pid;
set_page_private(page, (unsigned long)pid);
if (radix_tree_preload(GFP_NOFS))
return;
......@@ -138,7 +138,7 @@ static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
radix_tree_for_each_slot(slot, &pids, &iter, first_index) {
results[ret] = iter.index;
if (++ret == PIDVEC_SIZE)
if (++ret == max_items)
break;
}
return ret;
......
......@@ -250,15 +250,13 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
void *cur_addr, *txattr_addr, *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = 0;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
inline_size = inline_xattr_size(inode);
if (!size && !inline_size)
return -ENODATA;
txattr_addr = kzalloc(inline_size + size + sizeof(__u32),
txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE,
GFP_F2FS_ZERO);
if (!txattr_addr)
return -ENOMEM;
......@@ -328,13 +326,14 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_xattr_header *header;
size_t size = PAGE_SIZE, inline_size = 0;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = VALID_XATTR_BLOCK_SIZE;
unsigned int inline_size = inline_xattr_size(inode);
void *txattr_addr;
int err;
inline_size = inline_xattr_size(inode);
txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE,
GFP_F2FS_ZERO);
if (!txattr_addr)
return -ENOMEM;
......@@ -358,19 +357,19 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
}
/* read from xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
if (xnid) {
struct page *xpage;
void *xattr_addr;
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
xpage = get_node_page(sbi, xnid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
goto fail;
}
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
memcpy(txattr_addr + inline_size, xattr_addr, size);
f2fs_put_page(xpage, 1);
}
......@@ -392,14 +391,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
void *txattr_addr, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t inline_size = 0;
size_t inline_size = inline_xattr_size(inode);
void *xattr_addr;
struct page *xpage;
nid_t new_nid = 0;
int err;
inline_size = inline_xattr_size(inode);
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
if (!alloc_nid(sbi, &new_nid))
return -ENOSPC;
......@@ -454,7 +451,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
xattr_addr = page_address(xpage);
memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE);
memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
......@@ -546,7 +543,9 @@ static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
const void *value, size_t size)
{
void *pval = entry->e_name + entry->e_name_len;
return (entry->e_value_size == size) && !memcmp(pval, value, size);
return (le16_to_cpu(entry->e_value_size) == size) &&
!memcmp(pval, value, size);
}
static int __f2fs_setxattr(struct inode *inode, int index,
......
......@@ -58,10 +58,10 @@ struct f2fs_xattr_entry {
#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1))
#define XATTR_ROUND (3)
#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND)
#define XATTR_ALIGN(size) (((size) + XATTR_ROUND) & ~XATTR_ROUND)
#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \
entry->e_name_len + le16_to_cpu(entry->e_value_size)))
(entry)->e_name_len + le16_to_cpu((entry)->e_value_size)))
#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\
ENTRY_SIZE(entry)))
......@@ -72,8 +72,8 @@ struct f2fs_xattr_entry {
for (entry = XATTR_FIRST_ENTRY(addr);\
!IS_XATTR_LAST_ENTRY(entry);\
entry = XATTR_NEXT_ENTRY(entry))
#define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
......
......@@ -32,9 +32,9 @@
/* 0, 1(node nid), 2(meta nid) are reserved node id */
#define F2FS_RESERVED_NODE_NUM 3
#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num)
#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
#define F2FS_ROOT_INO(sbi) ((sbi)->root_ino_num)
#define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num)
#define F2FS_META_INO(sbi) ((sbi)->meta_ino_num)
#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */
#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */
......@@ -114,6 +114,7 @@ struct f2fs_super_block {
/*
* For checkpoint
*/
#define CP_TRIMMED_FLAG 0x00000100
#define CP_NAT_BITS_FLAG 0x00000080
#define CP_CRC_RECOVERY_FLAG 0x00000040
#define CP_FASTBOOT_FLAG 0x00000020
......@@ -161,7 +162,7 @@ struct f2fs_checkpoint {
*/
#define F2FS_ORPHANS_PER_BLOCK 1020
#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \
#define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \
F2FS_ORPHANS_PER_BLOCK)
struct f2fs_orphan_block {
......@@ -301,6 +302,12 @@ struct f2fs_nat_block {
#define SIT_VBLOCK_MAP_SIZE 64
#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry))
/*
* F2FS uses 4 bytes to represent block address. As a result, supported size of
* disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments.
*/
#define F2FS_MAX_SEGMENT ((16 * 1024 * 1024) / 2)
/*
* Note that f2fs_sit_entry->vblocks has the following bit-field information.
* [15:10] : allocation type such as CURSEG_XXXX_TYPE
......@@ -449,7 +456,7 @@ typedef __le32 f2fs_hash_t;
#define F2FS_SLOT_LEN 8
#define F2FS_SLOT_LEN_BITS 3
#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS)
#define GET_DENTRY_SLOTS(x) (((x) + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS)
/* MAX level for dir lookup */
#define MAX_DIR_HASH_DEPTH 63
......
......@@ -15,6 +15,8 @@ TRACE_DEFINE_ENUM(META);
TRACE_DEFINE_ENUM(META_FLUSH);
TRACE_DEFINE_ENUM(INMEM);
TRACE_DEFINE_ENUM(INMEM_DROP);
TRACE_DEFINE_ENUM(INMEM_INVALIDATE);
TRACE_DEFINE_ENUM(INMEM_REVOKE);
TRACE_DEFINE_ENUM(IPU);
TRACE_DEFINE_ENUM(OPU);
TRACE_DEFINE_ENUM(CURSEG_HOT_DATA);
......@@ -42,6 +44,7 @@ TRACE_DEFINE_ENUM(CP_FASTBOOT);
TRACE_DEFINE_ENUM(CP_SYNC);
TRACE_DEFINE_ENUM(CP_RECOVERY);
TRACE_DEFINE_ENUM(CP_DISCARD);
TRACE_DEFINE_ENUM(CP_TRIMMED);
#define show_block_type(type) \
__print_symbolic(type, \
......@@ -51,12 +54,13 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ META_FLUSH, "META_FLUSH" }, \
{ INMEM, "INMEM" }, \
{ INMEM_DROP, "INMEM_DROP" }, \
{ INMEM_INVALIDATE, "INMEM_INVALIDATE" }, \
{ INMEM_REVOKE, "INMEM_REVOKE" }, \
{ IPU, "IN-PLACE" }, \
{ OPU, "OUT-OF-PLACE" })
#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\
REQ_PRIO)
#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \
REQ_PREFLUSH | REQ_FUA)
#define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS)
#define show_bio_type(op,op_flags) show_bio_op(op), \
......@@ -75,16 +79,13 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ REQ_OP_WRITE_ZEROES, "WRITE_ZEROES" })
#define show_bio_op_flags(flags) \
__print_symbolic(F2FS_BIO_FLAG_MASK(flags), \
{ REQ_RAHEAD, "(RA)" }, \
{ REQ_SYNC, "(S)" }, \
{ REQ_SYNC | REQ_PRIO, "(SP)" }, \
{ REQ_META, "(M)" }, \
{ REQ_META | REQ_PRIO, "(MP)" }, \
{ REQ_SYNC | REQ_PREFLUSH , "(SF)" }, \
{ REQ_SYNC | REQ_META | REQ_PRIO, "(SMP)" }, \
{ REQ_PREFLUSH | REQ_META | REQ_PRIO, "(FMP)" }, \
{ 0, " \b" })
__print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \
{ REQ_RAHEAD, "R" }, \
{ REQ_SYNC, "S" }, \
{ REQ_META, "M" }, \
{ REQ_PRIO, "P" }, \
{ REQ_PREFLUSH, "PF" }, \
{ REQ_FUA, "FUA" })
#define show_data_type(type) \
__print_symbolic(type, \
......@@ -117,12 +118,14 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ GC_CB, "Cost-Benefit" })
#define show_cpreason(type) \
__print_symbolic(type, \
__print_flags(type, "|", \
{ CP_UMOUNT, "Umount" }, \
{ CP_FASTBOOT, "Fastboot" }, \
{ CP_SYNC, "Sync" }, \
{ CP_RECOVERY, "Recovery" }, \
{ CP_DISCARD, "Discard" })
{ CP_DISCARD, "Discard" }, \
{ CP_UMOUNT, "Umount" }, \
{ CP_TRIMMED, "Trimmed" })
struct victim_sel_policy;
struct f2fs_map_blocks;
......@@ -769,7 +772,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
),
TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s%s, type = %s",
"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s",
show_dev_ino(__entry),
(unsigned long)__entry->index,
(unsigned long long)__entry->old_blkaddr,
......@@ -822,7 +825,7 @@ DECLARE_EVENT_CLASS(f2fs__bio,
__entry->size = bio->bi_iter.bi_size;
),
TP_printk("dev = (%d,%d)/(%d,%d), rw = %s%s, %s, sector = %lld, size = %u",
TP_printk("dev = (%d,%d)/(%d,%d), rw = %s(%s), %s, sector = %lld, size = %u",
show_dev(__entry->target),
show_dev(__entry->dev),
show_bio_type(__entry->op, __entry->op_flags),
......@@ -1126,7 +1129,7 @@ TRACE_EVENT(f2fs_write_checkpoint,
__entry->msg)
);
TRACE_EVENT(f2fs_issue_discard,
DECLARE_EVENT_CLASS(f2fs_discard,
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
......@@ -1150,6 +1153,20 @@ TRACE_EVENT(f2fs_issue_discard,
(unsigned long long)__entry->blklen)
);
DEFINE_EVENT(f2fs_discard, f2fs_queue_discard,
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
TP_ARGS(dev, blkstart, blklen)
);
DEFINE_EVENT(f2fs_discard, f2fs_issue_discard,
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
TP_ARGS(dev, blkstart, blklen)
);
TRACE_EVENT(f2fs_issue_reset_zone,
TP_PROTO(struct block_device *dev, block_t blkstart),
......@@ -1174,26 +1191,29 @@ TRACE_EVENT(f2fs_issue_reset_zone,
TRACE_EVENT(f2fs_issue_flush,
TP_PROTO(struct block_device *dev, unsigned int nobarrier,
unsigned int flush_merge),
unsigned int flush_merge, int ret),
TP_ARGS(dev, nobarrier, flush_merge),
TP_ARGS(dev, nobarrier, flush_merge, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned int, nobarrier)
__field(unsigned int, flush_merge)
__field(int, ret)
),
TP_fast_assign(
__entry->dev = dev->bd_dev;
__entry->nobarrier = nobarrier;
__entry->flush_merge = flush_merge;
__entry->ret = ret;
),
TP_printk("dev = (%d,%d), %s %s",
TP_printk("dev = (%d,%d), %s %s, ret = %d",
show_dev(__entry->dev),
__entry->nobarrier ? "skip (nobarrier)" : "issue",
__entry->flush_merge ? " with flush_merge" : "")
__entry->flush_merge ? " with flush_merge" : "",
__entry->ret)
);
TRACE_EVENT(f2fs_lookup_extent_tree_start,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment