Commit 3512a79d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: Fix NULL dereference in ext4_ext_migrate()'s error handling
  ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages
  ext4: Initialize preallocation list_head's properly
  ext4: Fix lockdep warning
  ext4: Fix to read empty directory blocks correctly in 64k
  jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate()
  Revert "ext4: wait on all pending commits in ext4_sync_fs()"
  jbd2: Fix return value of jbd2_journal_start_commit()
parents 39a65762 09054264
......@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
{
unsigned len = le16_to_cpu(dlen);
if (len == EXT4_MAX_REC_LEN)
if (len == EXT4_MAX_REC_LEN || len == 0)
return 1 << 16;
return len;
}
......
......@@ -47,8 +47,10 @@
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
new_size);
return jbd2_journal_begin_ordered_truncate(
EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode,
new_size);
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
......@@ -2437,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping,
int no_nrwrite_index_update;
int pages_written = 0;
long pages_skipped;
int range_cyclic, cycled = 1, io_done = 0;
int needed_blocks, ret = 0, nr_to_writebump = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
......@@ -2488,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping,
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
if (wbc->range_cyclic)
range_cyclic = wbc->range_cyclic;
if (wbc->range_cyclic) {
index = mapping->writeback_index;
else
if (index)
cycled = 0;
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = LLONG_MAX;
wbc->range_cyclic = 0;
} else
index = wbc->range_start >> PAGE_CACHE_SHIFT;
mpd.wbc = wbc;
......@@ -2504,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->no_nrwrite_index_update = 1;
pages_skipped = wbc->pages_skipped;
retry:
while (!ret && wbc->nr_to_write > 0) {
/*
......@@ -2546,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping,
pages_written += mpd.pages_written;
wbc->pages_skipped = pages_skipped;
ret = 0;
io_done = 1;
} else if (wbc->nr_to_write)
/*
* There is no more writeout needed
......@@ -2554,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping,
*/
break;
}
if (!io_done && !cycled) {
cycled = 1;
index = 0;
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = mapping->writeback_index - 1;
goto retry;
}
if (pages_skipped != wbc->pages_skipped)
printk(KERN_EMERG "This should not happen leaving %s "
"with nr_to_write = %ld ret = %d\n",
......@@ -2561,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping,
/* Update index */
index += pages_written;
wbc->range_cyclic = range_cyclic;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
/*
* set the writeback_index so that range_cyclic
......
......@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
pa->pa_free = pa->pa_len;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
INIT_LIST_HEAD(&pa->pa_group_list);
pa->pa_deleted = 0;
pa->pa_linear = 0;
......@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
INIT_LIST_HEAD(&pa->pa_group_list);
pa->pa_deleted = 0;
pa->pa_linear = 1;
......@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
pa->pa_free -= ac->ac_b_ex.fe_len;
pa->pa_len -= ac->ac_b_ex.fe_len;
spin_unlock(&pa->pa_lock);
/*
* We want to add the pa to the right bucket.
* Remove it from the list and while adding
* make sure the list to which we are adding
* doesn't grow big.
*/
if (likely(pa->pa_free)) {
spin_lock(pa->pa_obj_lock);
list_del_rcu(&pa->pa_inode_list);
spin_unlock(pa->pa_obj_lock);
ext4_mb_add_n_trim(ac);
}
}
ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->alloc_semp)
up_read(ac->alloc_semp);
if (pa) {
/*
* We want to add the pa to the right bucket.
* Remove it from the list and while adding
* make sure the list to which we are adding
* doesn't grow big. We need to release
* alloc_semp before calling ext4_mb_add_n_trim()
*/
if (pa->pa_linear && likely(pa->pa_free)) {
spin_lock(pa->pa_obj_lock);
list_del_rcu(&pa->pa_inode_list);
spin_unlock(pa->pa_obj_lock);
ext4_mb_add_n_trim(ac);
}
ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->ac_bitmap_page)
page_cache_release(ac->ac_bitmap_page);
if (ac->ac_buddy_page)
......
......@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
+ 1);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
goto err_out;
return retval;
}
tmp_inode = ext4_new_inode(handle,
inode->i_sb->s_root->d_inode,
......@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode)
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
ext4_journal_stop(handle);
tmp_inode = NULL;
goto err_out;
return retval;
}
i_size_write(tmp_inode, i_size_read(inode));
/*
......@@ -618,8 +617,7 @@ int ext4_ext_migrate(struct inode *inode)
ext4_journal_stop(handle);
if (tmp_inode)
iput(tmp_inode);
iput(tmp_inode);
return retval;
}
......@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb)
static int ext4_sync_fs(struct super_block *sb, int wait)
{
int ret = 0;
tid_t target;
trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
sb->s_dirt = 0;
if (EXT4_SB(sb)->s_journal) {
if (wait)
ret = ext4_force_commit(sb);
else
jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
&target)) {
if (wait)
jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
target);
}
} else {
ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
}
......
......@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
}
/*
* Called under j_state_lock. Returns true if a transaction was started.
* Called under j_state_lock. Returns true if a transaction commit was started.
*/
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{
......@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
/*
* Start a commit of the current running transaction (if any). Returns true
* if a transaction was started, and fills its tid in at *ptid
* if a transaction is going to be committed (or is currently already
* committing), and fills its tid in at *ptid
*/
int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{
......@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid;
ret = __jbd2_log_start_commit(journal, tid);
if (ret && ptid)
__jbd2_log_start_commit(journal, tid);
/* There's a running transaction and we've just made sure
* it's commit has been scheduled. */
if (ptid)
*ptid = tid;
} else if (journal->j_committing_transaction && ptid) {
ret = 1;
} else if (journal->j_committing_transaction) {
/*
* If ext3_write_super() recently started a commit, then we
* have to wait for completion of that transaction
*/
*ptid = journal->j_committing_transaction->t_tid;
if (ptid)
*ptid = journal->j_committing_transaction->t_tid;
ret = 1;
}
spin_unlock(&journal->j_state_lock);
......
......@@ -2129,26 +2129,46 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
}
/*
* This function must be called when inode is journaled in ordered mode
* before truncation happens. It starts writeout of truncated part in
* case it is in the committing transaction so that we stand to ordered
* mode consistency guarantees.
* File truncate and transaction commit interact with each other in a
* non-trivial way. If a transaction writing data block A is
* committing, we cannot discard the data by truncate until we have
* written them. Otherwise if we crashed after the transaction with
* write has committed but before the transaction with truncate has
* committed, we could see stale data in block A. This function is a
* helper to solve this problem. It starts writeout of the truncated
* part in case it is in the committing transaction.
*
* Filesystem code must call this function when inode is journaled in
* ordered mode before truncation happens and after the inode has been
* placed on orphan list with the new inode size. The second condition
* avoids the race that someone writes new data and we start
* committing the transaction after this function has been called but
* before a transaction for truncate is started (and furthermore it
* allows us to optimize the case where the addition to orphan list
* happens in the same transaction as write --- we don't have to write
* any data in such case).
*/
int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *jinode,
loff_t new_size)
{
journal_t *journal;
transaction_t *commit_trans;
transaction_t *inode_trans, *commit_trans;
int ret = 0;
if (!inode->i_transaction && !inode->i_next_transaction)
/* This is a quick check to avoid locking if not necessary */
if (!jinode->i_transaction)
goto out;
journal = inode->i_transaction->t_journal;
/* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started
* a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock);
if (inode->i_transaction == commit_trans) {
ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
spin_lock(&journal->j_list_lock);
inode_trans = jinode->i_transaction;
spin_unlock(&journal->j_list_lock);
if (inode_trans == commit_trans) {
ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
new_size, LLONG_MAX);
if (ret)
jbd2_journal_abort(journal, ret);
......
......@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
new_size);
return jbd2_journal_begin_ordered_truncate(
OCFS2_SB(inode->i_sb)->journal->j_journal,
&OCFS2_I(inode)->ip_jinode,
new_size);
}
#endif /* OCFS2_JOURNAL_H */
......@@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment