Commit 3e624fc7 authored by Theodore Ts'o's avatar Theodore Ts'o

ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback

The multiblock allocator needs to be able to release blocks (and issue
a blkdev discard request) when the transaction which freed those
blocks is committed.  Previously this was done via a polling mechanism
when blocks are allocated or freed.  A much better way of doing things
is to create a jbd2 callback function and attaching the list of blocks
to be freed directly to the transaction structure.
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 22359f57
...@@ -99,9 +99,6 @@ struct ext4_sb_info { ...@@ -99,9 +99,6 @@ struct ext4_sb_info {
struct inode *s_buddy_cache; struct inode *s_buddy_cache;
long s_blocks_reserved; long s_blocks_reserved;
spinlock_t s_reserve_lock; spinlock_t s_reserve_lock;
struct list_head s_active_transaction;
struct list_head s_closed_transaction;
struct list_head s_committed_transaction;
spinlock_t s_md_lock; spinlock_t s_md_lock;
tid_t s_last_transaction; tid_t s_last_transaction;
unsigned short *s_mb_offsets, *s_mb_maxs; unsigned short *s_mb_offsets, *s_mb_maxs;
......
...@@ -2523,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ...@@ -2523,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
} }
spin_lock_init(&sbi->s_md_lock); spin_lock_init(&sbi->s_md_lock);
INIT_LIST_HEAD(&sbi->s_active_transaction);
INIT_LIST_HEAD(&sbi->s_closed_transaction);
INIT_LIST_HEAD(&sbi->s_committed_transaction);
spin_lock_init(&sbi->s_bal_lock); spin_lock_init(&sbi->s_bal_lock);
sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
...@@ -2554,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ...@@ -2554,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
ext4_mb_init_per_dev_proc(sb); ext4_mb_init_per_dev_proc(sb);
ext4_mb_history_init(sb); ext4_mb_history_init(sb);
sbi->s_journal->j_commit_callback = release_blocks_on_commit;
printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
return 0; return 0;
} }
...@@ -2583,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb) ...@@ -2583,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
struct ext4_group_info *grinfo; struct ext4_group_info *grinfo;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
/* release freed, non-committed blocks */
spin_lock(&sbi->s_md_lock);
list_splice_init(&sbi->s_closed_transaction,
&sbi->s_committed_transaction);
list_splice_init(&sbi->s_active_transaction,
&sbi->s_committed_transaction);
spin_unlock(&sbi->s_md_lock);
ext4_mb_free_committed_blocks(sb);
if (sbi->s_group_info) { if (sbi->s_group_info) {
for (i = 0; i < sbi->s_groups_count; i++) { for (i = 0; i < sbi->s_groups_count; i++) {
grinfo = ext4_get_group_info(sb, i); grinfo = ext4_get_group_info(sb, i);
...@@ -2645,36 +2635,25 @@ int ext4_mb_release(struct super_block *sb) ...@@ -2645,36 +2635,25 @@ int ext4_mb_release(struct super_block *sb)
return 0; return 0;
} }
static noinline_for_stack void /*
ext4_mb_free_committed_blocks(struct super_block *sb) * This function is called by the jbd2 layer once the commit has finished,
* so we know we can free the blocks that were released with that commit.
*/
static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
{ {
struct super_block *sb = journal->j_private;
struct ext4_buddy e4b; struct ext4_buddy e4b;
struct ext4_group_info *db; struct ext4_group_info *db;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err, count = 0, count2 = 0; int err, count = 0, count2 = 0;
struct ext4_free_data *entry; struct ext4_free_data *entry;
ext4_fsblk_t discard_block; ext4_fsblk_t discard_block;
struct list_head *l, *ltmp;
if (list_empty(&sbi->s_committed_transaction)) list_for_each_safe(l, ltmp, &txn->t_private_list) {
return; entry = list_entry(l, struct ext4_free_data, list);
/* there is committed blocks to be freed yet */
do {
/* get next array of blocks */
entry = NULL;
spin_lock(&sbi->s_md_lock);
if (!list_empty(&sbi->s_committed_transaction)) {
entry = list_entry(sbi->s_committed_transaction.next,
struct ext4_free_data, list);
list_del(&entry->list);
}
spin_unlock(&sbi->s_md_lock);
if (entry == NULL)
break;
mb_debug("gonna free %u blocks in group %lu (0x%p):", mb_debug("gonna free %u blocks in group %lu (0x%p):",
entry->count, entry->group, entry); entry->count, entry->group, entry);
err = ext4_mb_load_buddy(sb, entry->group, &e4b); err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */ /* we expect to find existing buddy because it's pinned */
...@@ -2706,7 +2685,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) ...@@ -2706,7 +2685,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
kmem_cache_free(ext4_free_ext_cachep, entry); kmem_cache_free(ext4_free_ext_cachep, entry);
ext4_mb_release_desc(&e4b); ext4_mb_release_desc(&e4b);
} while (1); }
mb_debug("freed %u blocks in %u structures\n", count, count2); mb_debug("freed %u blocks in %u structures\n", count, count2);
} }
...@@ -4348,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ...@@ -4348,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
goto out1; goto out1;
} }
ext4_mb_poll_new_transaction(sb, handle);
*errp = ext4_mb_initialize_context(ac, ar); *errp = ext4_mb_initialize_context(ac, ar);
if (*errp) { if (*errp) {
ar->len = 0; ar->len = 0;
...@@ -4408,36 +4385,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ...@@ -4408,36 +4385,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
return block; return block;
} }
static void ext4_mb_poll_new_transaction(struct super_block *sb,
handle_t *handle)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (sbi->s_last_transaction == handle->h_transaction->t_tid)
return;
/* new transaction! time to close last one and free blocks for
* committed transaction. we know that only transaction can be
* active, so previos transaction can be being logged and we
* know that transaction before previous is known to be already
* logged. this means that now we may free blocks freed in all
* transactions before previous one. hope I'm clear enough ... */
spin_lock(&sbi->s_md_lock);
if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
mb_debug("new transaction %lu, old %lu\n",
(unsigned long) handle->h_transaction->t_tid,
(unsigned long) sbi->s_last_transaction);
list_splice_init(&sbi->s_closed_transaction,
&sbi->s_committed_transaction);
list_splice_init(&sbi->s_active_transaction,
&sbi->s_closed_transaction);
sbi->s_last_transaction = handle->h_transaction->t_tid;
}
spin_unlock(&sbi->s_md_lock);
ext4_mb_free_committed_blocks(sb);
}
/* /*
* We can merge two free data extents only if the physical blocks * We can merge two free data extents only if the physical blocks
...@@ -4531,9 +4478,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ...@@ -4531,9 +4478,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
kmem_cache_free(ext4_free_ext_cachep, entry); kmem_cache_free(ext4_free_ext_cachep, entry);
} }
} }
/* Add the extent to active_transaction list */ /* Add the extent to transaction's private list */
spin_lock(&sbi->s_md_lock); spin_lock(&sbi->s_md_lock);
list_add(&new_entry->list, &sbi->s_active_transaction); list_add(&new_entry->list, &handle->h_transaction->t_private_list);
spin_unlock(&sbi->s_md_lock); spin_unlock(&sbi->s_md_lock);
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
return 0; return 0;
...@@ -4562,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4562,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
*freed = 0; *freed = 0;
ext4_mb_poll_new_transaction(sb, handle);
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es; es = EXT4_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) || if (block < le32_to_cpu(es->s_first_data_block) ||
......
...@@ -269,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); ...@@ -269,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group); ext4_group_t group);
static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
static void ext4_mb_free_committed_blocks(struct super_block *);
static void ext4_mb_return_to_preallocation(struct inode *inode, static void ext4_mb_return_to_preallocation(struct inode *inode,
struct ext4_buddy *e4b, sector_t block, struct ext4_buddy *e4b, sector_t block,
int count); int count);
...@@ -278,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *, ...@@ -278,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *,
struct super_block *, struct ext4_prealloc_space *pa); struct super_block *, struct ext4_prealloc_space *pa);
static int ext4_mb_init_per_dev_proc(struct super_block *sb); static int ext4_mb_init_per_dev_proc(struct super_block *sb);
static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
......
...@@ -995,6 +995,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -995,6 +995,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction);
trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
journal->j_devname, commit_transaction->t_tid, journal->j_devname, commit_transaction->t_tid,
journal->j_tail_sequence); journal->j_tail_sequence);
......
...@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) ...@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
transaction->t_expires = jiffies + journal->j_commit_interval; transaction->t_expires = jiffies + journal->j_commit_interval;
spin_lock_init(&transaction->t_handle_lock); spin_lock_init(&transaction->t_handle_lock);
INIT_LIST_HEAD(&transaction->t_inode_list); INIT_LIST_HEAD(&transaction->t_inode_list);
INIT_LIST_HEAD(&transaction->t_private_list);
/* Set up the commit timer for the new transaction. */ /* Set up the commit timer for the new transaction. */
journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
......
...@@ -641,6 +641,11 @@ struct transaction_s ...@@ -641,6 +641,11 @@ struct transaction_s
*/ */
int t_handle_count; int t_handle_count;
/*
* For use by the filesystem to store fs-specific data
* structures associated with the transaction
*/
struct list_head t_private_list;
}; };
struct transaction_run_stats_s { struct transaction_run_stats_s {
...@@ -935,6 +940,10 @@ struct journal_s ...@@ -935,6 +940,10 @@ struct journal_s
pid_t j_last_sync_writer; pid_t j_last_sync_writer;
/* This function is called when a transaction is closed */
void (*j_commit_callback)(journal_t *,
transaction_t *);
/* /*
* Journal statistics * Journal statistics
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment