Commit ae9a8c4b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:

 - Add support for online resizing of file systems with bigalloc

 - Fix a two data corruption bugs involving DAX, as well as a corruption
   bug after a crash during a racing fallocate and delayed allocation.

 - Finally, a number of cleanups and optimizations.

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: improve smp scalability for inode generation
  ext4: add support for online resizing with bigalloc
  ext4: mention noload when recovering on read-only device
  Documentation: fix little inconsistencies
  ext4: convert timers to use timer_setup()
  jbd2: convert timers to use timer_setup()
  ext4: remove duplicate extended attributes defs
  ext4: add ext4_should_use_dax()
  ext4: add sanity check for encryption + DAX
  ext4: prevent data corruption with journaling + DAX
  ext4: prevent data corruption with inline data + DAX
  ext4: fix interaction between i_size, fallocate, and delalloc after a crash
  ext4: retry allocations conservatively
  ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
  ext4: Add iomap support for inline data
  iomap: Add IOMAP_F_DATA_INLINE flag
  iomap: Switch from blkno to disk offset
parents 32190f0a 23253068
......@@ -1979,8 +1979,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
case IOMAP_MAPPED:
if (offset >= i_size_read(inode))
set_buffer_new(bh);
bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
((offset - iomap->offset) >> inode->i_blkbits);
bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
inode->i_blkbits;
set_buffer_mapped(bh);
break;
}
......
......@@ -938,7 +938,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
{
return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
}
static loff_t
......
......@@ -821,11 +821,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret == 0) {
iomap->type = IOMAP_HOLE;
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->addr = IOMAP_NULL_ADDR;
iomap->length = 1 << blkbits;
} else {
iomap->type = IOMAP_MAPPED;
iomap->blkno = (sector_t)bno << (blkbits - 9);
iomap->addr = (u64)bno << blkbits;
iomap->length = (u64)ret << blkbits;
iomap->flags |= IOMAP_F_MERGED;
}
......
......@@ -37,6 +37,7 @@ config EXT4_FS
select CRC16
select CRYPTO
select CRYPTO_CRC32C
select FS_IOMAP
help
This is the next generation of the ext3 filesystem.
......
......@@ -601,22 +601,21 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
* ext4_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
* for the current or committing transaction to complete, and then
* return TRUE.
*
* if the total number of retries exceed three times, return FALSE.
* return TRUE. We will only retry once.
*/
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
{
if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
(*retries)++ > 3 ||
(*retries)++ > 1 ||
!EXT4_SB(sb)->s_journal)
return 0;
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
smp_mb();
if (EXT4_SB(sb)->s_mb_free_pending)
jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
if (EXT4_SB(sb)->s_mb_free_pending == 0)
return 0;
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
return 1;
}
......
......@@ -544,8 +544,8 @@ struct ext4_new_group_data {
__u64 inode_table;
__u32 blocks_count;
__u16 reserved_blocks;
__u16 unused;
__u32 free_blocks_count;
__u16 mdata_blocks;
__u32 free_clusters_count;
};
/* Indexes used to index group tables in ext4_new_group_data */
......@@ -643,43 +643,6 @@ enum {
#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
#ifndef FS_IOC_FSGETXATTR
/* Until the uapi changes get merged for project quota... */
#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
/*
* Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
*/
struct fsxattr {
__u32 fsx_xflags; /* xflags field value (get/set) */
__u32 fsx_extsize; /* extsize field value (get/set)*/
__u32 fsx_nextents; /* nextents field value (get) */
__u32 fsx_projid; /* project identifier (get/set) */
unsigned char fsx_pad[12];
};
/*
* Flags for the fsx_xflags field
*/
#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
#endif /* !defined(FS_IOC_FSGETXATTR) */
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
......@@ -1391,8 +1354,6 @@ struct ext4_sb_info {
int s_first_ino;
unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
spinlock_t s_next_gen_lock;
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
......@@ -2514,9 +2475,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
unsigned int map_len,
struct extent_status *result);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
......@@ -3047,6 +3005,10 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
extern int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
int *has_inline, __u64 start, __u64 len);
struct iomap;
extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
extern int ext4_try_to_evict_inline_data(handle_t *handle,
struct inode *inode,
int needed);
......
......@@ -4794,7 +4794,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
(offset + len > i_size_read(inode) ||
offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len;
ret = inode_newsize_ok(inode, new_size);
if (ret)
......@@ -4965,7 +4966,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
(offset + len > i_size_read(inode) ||
offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len;
ret = inode_newsize_ok(inode, new_size);
if (ret)
......
......@@ -21,6 +21,7 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/mount.h>
#include <linux/path.h>
#include <linux/dax.h>
......@@ -423,248 +424,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
return dquot_file_open(inode, filp);
}
/*
* Here we use ext4_map_blocks() to get a block mapping for a extent-based
* file rather than ext4_ext_walk_space() because we can introduce
* SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
* function. When extent status tree has been fully implemented, it will
* track all extent status for a file and we can directly use it to
* retrieve the offset for SEEK_DATA/SEEK_HOLE.
*/
/*
* When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
* lookup page cache to check whether or not there has some data between
* [startoff, endoff] because, if this range contains an unwritten extent,
* we determine this extent as a data or a hole according to whether the
* page cache has data or not.
*/
static int ext4_find_unwritten_pgoff(struct inode *inode,
int whence,
ext4_lblk_t end_blk,
loff_t *offset)
{
struct pagevec pvec;
unsigned int blkbits;
pgoff_t index;
pgoff_t end;
loff_t endoff;
loff_t startoff;
loff_t lastoff;
int found = 0;
blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset;
lastoff = startoff;
endoff = (loff_t)end_blk << blkbits;
index = startoff >> PAGE_SHIFT;
end = (endoff - 1) >> PAGE_SHIFT;
pagevec_init(&pvec, 0);
do {
int i;
unsigned long nr_pages;
nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
&index, end);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
struct buffer_head *bh, *head;
/*
* If current offset is smaller than the page offset,
* there is a hole at this offset.
*/
if (whence == SEEK_HOLE && lastoff < endoff &&
lastoff < page_offset(pvec.pages[i])) {
found = 1;
*offset = lastoff;
goto out;
}
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping)) {
unlock_page(page);
continue;
}
if (!page_has_buffers(page)) {
unlock_page(page);
continue;
}
if (page_has_buffers(page)) {
lastoff = page_offset(page);
bh = head = page_buffers(page);
do {
if (lastoff + bh->b_size <= startoff)
goto next;
if (buffer_uptodate(bh) ||
buffer_unwritten(bh)) {
if (whence == SEEK_DATA)
found = 1;
} else {
if (whence == SEEK_HOLE)
found = 1;
}
if (found) {
*offset = max_t(loff_t,
startoff, lastoff);
unlock_page(page);
goto out;
}
next:
lastoff += bh->b_size;
bh = bh->b_this_page;
} while (bh != head);
}
lastoff = page_offset(page) + PAGE_SIZE;
unlock_page(page);
}
pagevec_release(&pvec);
} while (index <= end);
/* There are no pages upto endoff - that would be a hole in there. */
if (whence == SEEK_HOLE && lastoff < endoff) {
found = 1;
*offset = lastoff;
}
out:
pagevec_release(&pvec);
return found;
}
/*
* ext4_seek_data() retrieves the offset for SEEK_DATA.
*/
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
struct extent_status es;
ext4_lblk_t start, last, end;
loff_t dataoff, isize;
int blkbits;
int ret;
inode_lock(inode);
isize = i_size_read(inode);
if (offset < 0 || offset >= isize) {
inode_unlock(inode);
return -ENXIO;
}
blkbits = inode->i_sb->s_blocksize_bits;
start = offset >> blkbits;
last = start;
end = isize >> blkbits;
dataoff = offset;
do {
ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
if (ret <= 0) {
/* No extent found -> no data */
if (ret == 0)
ret = -ENXIO;
inode_unlock(inode);
return ret;
}
last = es.es_lblk;
if (last != start)
dataoff = (loff_t)last << blkbits;
if (!ext4_es_is_unwritten(&es))
break;
/*
* If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page
* cache that has data or not.
*/
if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
es.es_lblk + es.es_len, &dataoff))
break;
last += es.es_len;
dataoff = (loff_t)last << blkbits;
cond_resched();
} while (last <= end);
inode_unlock(inode);
if (dataoff > isize)
return -ENXIO;
return vfs_setpos(file, dataoff, maxsize);
}
/*
* ext4_seek_hole() retrieves the offset for SEEK_HOLE.
*/
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
struct extent_status es;
ext4_lblk_t start, last, end;
loff_t holeoff, isize;
int blkbits;
int ret;
inode_lock(inode);
isize = i_size_read(inode);
if (offset < 0 || offset >= isize) {
inode_unlock(inode);
return -ENXIO;
}
blkbits = inode->i_sb->s_blocksize_bits;
start = offset >> blkbits;
last = start;
end = isize >> blkbits;
holeoff = offset;
do {
ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
if (ret < 0) {
inode_unlock(inode);
return ret;
}
/* Found a hole? */
if (ret == 0 || es.es_lblk > last) {
if (last != start)
holeoff = (loff_t)last << blkbits;
break;
}
/*
* If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page
* cache that has data or not.
*/
if (ext4_es_is_unwritten(&es) &&
ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
last + es.es_len, &holeoff))
break;
last += es.es_len;
holeoff = (loff_t)last << blkbits;
cond_resched();
} while (last <= end);
inode_unlock(inode);
if (holeoff > isize)
holeoff = isize;
return vfs_setpos(file, holeoff, maxsize);
}
/*
* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
* by calling generic_file_llseek_size() with the appropriate maxbytes
......@@ -681,18 +440,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
maxbytes = inode->i_sb->s_maxbytes;
switch (whence) {
case SEEK_SET:
case SEEK_CUR:
case SEEK_END:
default:
return generic_file_llseek_size(file, offset, whence,
maxbytes, i_size_read(inode));
case SEEK_DATA:
return ext4_seek_data(file, offset, maxbytes);
case SEEK_HOLE:
return ext4_seek_hole(file, offset, maxbytes);
inode_lock_shared(inode);
offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
inode_unlock_shared(inode);
break;
case SEEK_DATA:
inode_lock_shared(inode);
offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
inode_unlock_shared(inode);
break;
}
return -EINVAL;
if (offset < 0)
return offset;
return vfs_setpos(file, offset, maxbytes);
}
const struct file_operations ext4_file_operations = {
......
......@@ -1139,9 +1139,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_ino);
goto out;
}
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock);
inode->i_generation = prandom_u32();
/* Precompute checksum seed for inode metadata */
if (ext4_has_metadata_csum(sb)) {
......
......@@ -12,6 +12,7 @@
* GNU General Public License for more details.
*/
#include <linux/iomap.h>
#include <linux/fiemap.h>
#include "ext4_jbd2.h"
......@@ -302,11 +303,6 @@ static int ext4_create_inline_data(handle_t *handle,
EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
/*
* Propagate changes to inode->i_flags as well - e.g. S_DAX may
* get cleared
*/
ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
......@@ -451,11 +447,6 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
}
}
ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
/*
* Propagate changes to inode->i_flags as well - e.g. S_DAX may
* get set.
*/
ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
......@@ -1827,6 +1818,38 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
return ret;
}
int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap)
{
__u64 addr;
int error = -EAGAIN;
struct ext4_iloc iloc;
down_read(&EXT4_I(inode)->xattr_sem);
if (!ext4_has_inline_data(inode))
goto out;
error = ext4_get_inode_loc(inode, &iloc);
if (error)
goto out;
addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
addr += offsetof(struct ext4_inode, i_block);
brelse(iloc.bh);
iomap->addr = addr;
iomap->offset = 0;
iomap->length = min_t(loff_t, ext4_get_inline_size(inode),
i_size_read(inode));
iomap->type = 0;
iomap->flags = IOMAP_F_DATA_INLINE;
out:
up_read(&EXT4_I(inode)->xattr_sem);
return error;
}
int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
int *has_inline, __u64 start, __u64 len)
......
......@@ -3394,7 +3394,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
#ifdef CONFIG_FS_DAX
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap)
{
......@@ -3403,17 +3402,54 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned long first_block = offset >> blkbits;
unsigned long last_block = (offset + length - 1) >> blkbits;
struct ext4_map_blocks map;
bool delalloc = false;
int ret;
if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
return -ERANGE;
if (flags & IOMAP_REPORT) {
if (ext4_has_inline_data(inode)) {
ret = ext4_inline_data_iomap(inode, iomap);
if (ret != -EAGAIN) {
if (ret == 0 && offset >= iomap->length)
ret = -ENOENT;
return ret;
}
}
} else {
if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
return -ERANGE;
}
map.m_lblk = first_block;
map.m_len = last_block - first_block + 1;
if (!(flags & IOMAP_WRITE)) {
if (flags & IOMAP_REPORT) {
ret = ext4_map_blocks(NULL, inode, &map, 0);
} else {
if (ret < 0)
return ret;
if (ret == 0) {
ext4_lblk_t end = map.m_lblk + map.m_len - 1;
struct extent_status es;
ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
if (!es.es_len || es.es_lblk > end) {
/* entire range is a hole */
} else if (es.es_lblk > map.m_lblk) {
/* range starts with a hole */
map.m_len = es.es_lblk - map.m_lblk;
} else {
ext4_lblk_t offs = 0;
if (es.es_lblk < map.m_lblk)
offs = map.m_lblk - es.es_lblk;
map.m_lblk = es.es_lblk + offs;
map.m_len = es.es_len - offs;
delalloc = true;
}
}
} else if (flags & IOMAP_WRITE) {
int dio_credits;
handle_t *handle;
int retries = 0;
......@@ -3464,17 +3500,21 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
}
}
ext4_journal_stop(handle);
} else {
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0)
return ret;
}
iomap->flags = 0;
iomap->bdev = inode->i_sb->s_bdev;
iomap->dax_dev = sbi->s_daxdev;
iomap->offset = first_block << blkbits;
iomap->length = (u64)map.m_len << blkbits;
if (ret == 0) {
iomap->type = IOMAP_HOLE;
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->length = (u64)map.m_len << blkbits;
iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
} else {
if (map.m_flags & EXT4_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
......@@ -3484,12 +3524,12 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
WARN_ON_ONCE(1);
return -EIO;
}
iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
iomap->length = (u64)map.m_len << blkbits;
iomap->addr = (u64)map.m_pblk << blkbits;
}
if (map.m_flags & EXT4_MAP_NEW)
iomap->flags |= IOMAP_F_NEW;
return 0;
}
......@@ -3550,8 +3590,6 @@ const struct iomap_ops ext4_iomap_ops = {
.iomap_end = ext4_iomap_end,
};
#endif
static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
{
......@@ -4573,6 +4611,21 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}
static bool ext4_should_use_dax(struct inode *inode)
{
if (!test_opt(inode->i_sb, DAX))
return false;
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
return false;
if (ext4_has_inline_data(inode))
return false;
if (ext4_encrypted_inode(inode))
return false;
return true;
}
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
......@@ -4588,9 +4641,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
!ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
!(flags & EXT4_ENCRYPT_FL))
if (ext4_should_use_dax(inode))
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
......@@ -5966,11 +6017,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
}
ext4_set_aops(inode);
/*
* Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
* E.g. S_DAX may get cleared / set.
*/
ext4_set_inode_flags(inode);
jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
......@@ -6106,70 +6152,3 @@ int ext4_filemap_fault(struct vm_fault *vmf)
return err;
}
/*
* Find the first extent at or after @lblk in an inode that is not a hole.
* Search for @map_len blocks at most. The extent is returned in @result.
*
* The function returns 1 if we found an extent. The function returns 0 in
* case there is no extent at or after @lblk and in that case also sets
* @result->es_len to 0. In case of error, the error code is returned.
*/
int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
unsigned int map_len, struct extent_status *result)
{
struct ext4_map_blocks map;
struct extent_status es = {};
int ret;
map.m_lblk = lblk;
map.m_len = map_len;
/*
* For non-extent based files this loop may iterate several times since
* we do not determine full hole size.
*/
while (map.m_len > 0) {
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0)
return ret;
/* There's extent covering m_lblk? Just return it. */
if (ret > 0) {
int status;
ext4_es_store_pblock(result, map.m_pblk);
result->es_lblk = map.m_lblk;
result->es_len = map.m_len;
if (map.m_flags & EXT4_MAP_UNWRITTEN)
status = EXTENT_STATUS_UNWRITTEN;
else
status = EXTENT_STATUS_WRITTEN;
ext4_es_store_status(result, status);
return 1;
}
ext4_es_find_delayed_extent_range(inode, map.m_lblk,
map.m_lblk + map.m_len - 1,
&es);
/* Is delalloc data before next block in extent tree? */
if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
ext4_lblk_t offset = 0;
if (es.es_lblk < lblk)
offset = lblk - es.es_lblk;
result->es_lblk = es.es_lblk + offset;
ext4_es_store_pblock(result,
ext4_es_pblock(&es) + offset);
result->es_len = es.es_len - offset;
ext4_es_store_status(result, ext4_es_status(&es));
return 1;
}
/* There's a hole at m_lblk, advance us after it */
map.m_lblk += map.m_len;
map_len -= map.m_len;
map.m_len = map_len;
cond_resched();
}
result->es_len = 0;
return 0;
}
......@@ -15,6 +15,7 @@
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/quotaops.h>
#include <linux/random.h>
#include <linux/uuid.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
......@@ -99,7 +100,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
int err;
struct inode *inode_bl;
struct ext4_inode_info *ei_bl;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode))
return -EINVAL;
......@@ -158,10 +158,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
inode->i_ctime = inode_bl->i_ctime = current_time(inode);
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
inode_bl->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock);
inode->i_generation = prandom_u32();
inode_bl->i_generation = prandom_u32();
ext4_discard_preallocations(inode);
......@@ -291,10 +289,20 @@ static int ext4_ioctl_setflags(struct inode *inode,
if (err)
goto flags_out;
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
/*
* Changes to the journaling mode can cause unsafe changes to
* S_DAX if we are using the DAX mount option.
*/
if (test_opt(inode->i_sb, DAX)) {
err = -EBUSY;
goto flags_out;
}
err = ext4_change_inode_journal_flag(inode, jflag);
if (err)
goto flags_out;
if (err)
goto flags_out;
}
if (migrate) {
if (flags & EXT4_EXTENTS_FL)
err = ext4_ext_migrate(inode);
......@@ -862,12 +870,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
int err = 0, err2 = 0;
ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
if (ext4_has_feature_bigalloc(sb)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not (yet) supported with bigalloc");
return -EOPNOTSUPP;
}
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
sizeof(__u64))) {
return -EFAULT;
......
......@@ -4994,8 +4994,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_buddy e4b;
int err = 0, ret, blk_free_count;
ext4_grpblk_t blocks_freed;
int err = 0, ret, free_clusters_count;
ext4_grpblk_t clusters_freed;
ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
unsigned long cluster_count = last_cluster - first_cluster + 1;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
......@@ -5007,8 +5010,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* Check to see if we are freeing blocks across a group
* boundary.
*/
if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
ext4_warning(sb, "too much blocks added to group %u",
if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
ext4_warning(sb, "too many blocks added to group %u",
block_group);
err = -EINVAL;
goto error_return;
......@@ -5054,14 +5057,14 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
if (err)
goto error_return;
for (i = 0, blocks_freed = 0; i < count; i++) {
for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
BUFFER_TRACE(bitmap_bh, "clear bit");
if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
ext4_error(sb, "bit already cleared for block %llu",
(ext4_fsblk_t)(block + i));
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
blocks_freed++;
clusters_freed++;
}
}
......@@ -5075,19 +5078,20 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* them with group lock_held
*/
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(NULL, &e4b, bit, count);
blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
ext4_free_group_clusters_set(sb, desc, blk_free_count);
mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
mb_free_blocks(NULL, &e4b, bit, cluster_count);
free_clusters_count = clusters_freed +
ext4_free_group_clusters(sb, desc);
ext4_free_group_clusters_set(sb, desc, free_clusters_count);
ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, desc);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_NUM_B2C(sbi, blocks_freed));
clusters_freed);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
atomic64_add(clusters_freed,
&sbi->s_flex_groups[flex_group].free_clusters);
}
......
This diff is collapsed.
......@@ -1159,6 +1159,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
if (inode->i_ino == EXT4_ROOT_INO)
return -EPERM;
if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
return -EINVAL;
res = ext4_convert_inline_data(inode);
if (res)
return res;
......@@ -2790,14 +2793,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
* This function is called once a day if we have errors logged
* on the file system
*/
static void print_daily_error_info(unsigned long arg)
static void print_daily_error_info(struct timer_list *t)
{
struct super_block *sb = (struct super_block *) arg;
struct ext4_sb_info *sbi;
struct ext4_super_block *es;
sbi = EXT4_SB(sb);
es = sbi->s_es;
struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
struct super_block *sb = sbi->s_sb;
struct ext4_super_block *es = sbi->s_es;
if (es->s_error_count)
/* fsck newer than v1.41.13 is needed to clean this condition. */
......@@ -3707,6 +3707,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
goto failed_mount;
}
err = bdev_dax_supported(sb, blocksize);
if (err)
goto failed_mount;
......@@ -3976,11 +3981,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock);
setup_timer(&sbi->s_err_report, print_daily_error_info,
(unsigned long) sb);
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
/* Register extent status tree shrinker */
if (ext4_es_register_shrinker(sbi))
......@@ -4613,7 +4615,8 @@ static int ext4_load_journal(struct super_block *sb,
"required on readonly filesystem");
if (really_read_only) {
ext4_msg(sb, KERN_ERR, "write access "
"unavailable, cannot proceed");
"unavailable, cannot proceed "
"(try mounting with noload)");
return -EROFS;
}
ext4_msg(sb, KERN_INFO, "write access will "
......
......@@ -350,8 +350,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
struct iomap *iomap)
{
sector_t sector = iomap->blkno +
(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
sector_t sector = (iomap->addr +
(pos & PAGE_MASK) - iomap->offset) >> 9;
return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector,
offset, bytes);
......@@ -510,11 +510,12 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
flags |= FIEMAP_EXTENT_MERGED;
if (iomap->flags & IOMAP_F_SHARED)
flags |= FIEMAP_EXTENT_SHARED;
if (iomap->flags & IOMAP_F_DATA_INLINE)
flags |= FIEMAP_EXTENT_DATA_INLINE;
return fiemap_fill_next_extent(fi, iomap->offset,
iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
iomap->length, flags);
}
static loff_t
......@@ -830,7 +831,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
bio = bio_alloc(GFP_KERNEL, 1);
bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
(iomap->addr + pos - iomap->offset) >> 9;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
......@@ -909,7 +910,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
bio = bio_alloc(GFP_KERNEL, nr_pages);
bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
(iomap->addr + pos - iomap->offset) >> 9;
bio->bi_write_hint = dio->iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
......
......@@ -165,11 +165,11 @@ static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
* Helper function used to manage commit timeouts
*/
static void commit_timeout(unsigned long __data)
static void commit_timeout(struct timer_list *t)
{
struct task_struct * p = (struct task_struct *) __data;
journal_t *journal = from_timer(journal, t, j_commit_timer);
wake_up_process(p);
wake_up_process(journal->j_task);
}
/*
......@@ -197,8 +197,7 @@ static int kjournald2(void *arg)
* Set up an interval timer which can be used to trigger a commit wakeup
* after the commit interval expires
*/
setup_timer(&journal->j_commit_timer, commit_timeout,
(unsigned long)current);
timer_setup(&journal->j_commit_timer, commit_timeout, 0);
set_freezable();
......
......@@ -66,7 +66,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
bex->es = PNFS_BLOCK_READ_DATA;
else
bex->es = PNFS_BLOCK_READWRITE_DATA;
bex->soff = (iomap.blkno << 9);
bex->soff = iomap.addr;
break;
case IOMAP_UNWRITTEN:
if (seg->iomode & IOMODE_RW) {
......@@ -79,7 +79,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
}
bex->es = PNFS_BLOCK_INVALID_DATA;
bex->soff = (iomap.blkno << 9);
bex->soff = iomap.addr;
break;
}
/*FALLTHRU*/
......
......@@ -54,13 +54,13 @@ xfs_bmbt_to_iomap(
struct xfs_mount *mp = ip->i_mount;
if (imap->br_startblock == HOLESTARTBLOCK) {
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->addr = IOMAP_NULL_ADDR;
iomap->type = IOMAP_HOLE;
} else if (imap->br_startblock == DELAYSTARTBLOCK) {
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->addr = IOMAP_NULL_ADDR;
iomap->type = IOMAP_DELALLOC;
} else {
iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
if (imap->br_state == XFS_EXT_UNWRITTEN)
iomap->type = IOMAP_UNWRITTEN;
else
......
......@@ -16,8 +16,8 @@ struct vm_fault;
*/
#define IOMAP_HOLE 0x01 /* no blocks allocated, need allocation */
#define IOMAP_DELALLOC 0x02 /* delayed allocation blocks */
#define IOMAP_MAPPED 0x03 /* blocks allocated @blkno */
#define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */
#define IOMAP_MAPPED 0x03 /* blocks allocated at @addr */
#define IOMAP_UNWRITTEN 0x04 /* blocks allocated at @addr in unwritten state */
/*
* Flags for all iomap mappings:
......@@ -27,16 +27,17 @@ struct vm_fault;
/*
* Flags that only need to be reported for IOMAP_REPORT requests:
*/
#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */
#define IOMAP_F_SHARED 0x20 /* block shared with another file */
#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */
#define IOMAP_F_SHARED 0x20 /* block shared with another file */
#define IOMAP_F_DATA_INLINE 0x40 /* data inline in the inode */
/*
* Magic value for blkno:
* Magic value for addr:
*/
#define IOMAP_NULL_BLOCK -1LL /* blkno is not valid */
#define IOMAP_NULL_ADDR -1ULL /* addr is not valid */
struct iomap {
sector_t blkno; /* 1st sector of mapping, 512b units */
u64 addr; /* disk offset of mapping, bytes */
loff_t offset; /* file offset of mapping, bytes */
u64 length; /* length of mapping, bytes */
u16 type; /* type of mapping */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment