Commit 0c9ec4be authored by Darrick J. Wong's avatar Darrick J. Wong Committed by Theodore Ts'o

ext4: support GETFSMAP ioctls

Support the GETFSMAP ioctls so that we can use the xfs free space
management tools to probe ext4 as well.  Note that this is a partial
implementation -- we only report fixed-location metadata and free space;
everything else is reported as "unknown".
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent d0649f04
...@@ -4,11 +4,11 @@ ...@@ -4,11 +4,11 @@
obj-$(CONFIG_EXT4_FS) += ext4.o obj-$(CONFIG_EXT4_FS) += ext4.o
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \
mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
xattr_trusted.o inline.o readpage.o sysfs.o super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
This diff is collapsed.
/*
* Copyright (C) 2017 Oracle. All Rights Reserved.
*
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __EXT4_FSMAP_H__
#define __EXT4_FSMAP_H__
struct fsmap;
/* internal fsmap representation */
struct ext4_fsmap {
struct list_head fmr_list;
dev_t fmr_device; /* device id */
uint32_t fmr_flags; /* mapping flags */
uint64_t fmr_physical; /* device offset of segment */
uint64_t fmr_owner; /* owner id */
uint64_t fmr_length; /* length of segment, blocks */
};
struct ext4_fsmap_head {
uint32_t fmh_iflags; /* control flags */
uint32_t fmh_oflags; /* output flags */
unsigned int fmh_count; /* # of entries in array incl. input */
unsigned int fmh_entries; /* # of entries filled in (output). */
struct ext4_fsmap fmh_keys[2]; /* low and high keys */
};
void ext4_fsmap_from_internal(struct super_block *sb, struct fsmap *dest,
struct ext4_fsmap *src);
void ext4_fsmap_to_internal(struct super_block *sb, struct ext4_fsmap *dest,
struct fsmap *src);
/* fsmap to userspace formatter - copy to user & advance pointer */
typedef int (*ext4_fsmap_format_t)(struct ext4_fsmap *, void *);
int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head,
ext4_fsmap_format_t formatter, void *arg);
#define EXT4_QUERY_RANGE_ABORT 1
#define EXT4_QUERY_RANGE_CONTINUE 0
/* fmr_owner special values for FS_IOC_GETFSMAP; some share w/ XFS */
#define EXT4_FMR_OWN_FREE FMR_OWN_FREE /* free space */
#define EXT4_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */
#define EXT4_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */
#define EXT4_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */
#define EXT4_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */
#define EXT4_FMR_OWN_GDT FMR_OWNER('f', 1) /* group descriptors */
#define EXT4_FMR_OWN_RESV_GDT FMR_OWNER('f', 2) /* reserved gdt blocks */
#define EXT4_FMR_OWN_BLKBM FMR_OWNER('f', 3) /* inode bitmap */
#define EXT4_FMR_OWN_INOBM FMR_OWNER('f', 4) /* block bitmap */
#endif /* __EXT4_FSMAP_H__ */
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
#include <linux/delay.h> #include <linux/delay.h>
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "ext4.h" #include "ext4.h"
#include <linux/fsmap.h>
#include "fsmap.h"
#include <trace/events/ext4.h>
/** /**
* Swap memory between @a and @b for @len bytes. * Swap memory between @a and @b for @len bytes.
...@@ -489,6 +492,90 @@ int ext4_shutdown(struct super_block *sb, unsigned long arg) ...@@ -489,6 +492,90 @@ int ext4_shutdown(struct super_block *sb, unsigned long arg)
return 0; return 0;
} }
struct getfsmap_info {
struct super_block *gi_sb;
struct fsmap_head __user *gi_data;
unsigned int gi_idx;
__u32 gi_last_flags;
};
static int ext4_getfsmap_format(struct ext4_fsmap *xfm, void *priv)
{
struct getfsmap_info *info = priv;
struct fsmap fm;
trace_ext4_getfsmap_mapping(info->gi_sb, xfm);
info->gi_last_flags = xfm->fmr_flags;
ext4_fsmap_from_internal(info->gi_sb, &fm, xfm);
if (copy_to_user(&info->gi_data->fmh_recs[info->gi_idx++], &fm,
sizeof(struct fsmap)))
return -EFAULT;
return 0;
}
static int ext4_ioc_getfsmap(struct super_block *sb,
struct fsmap_head __user *arg)
{
struct getfsmap_info info = {0};
struct ext4_fsmap_head xhead = {0};
struct fsmap_head head;
bool aborted = false;
int error;
if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
return -EFAULT;
if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) ||
memchr_inv(head.fmh_keys[0].fmr_reserved, 0,
sizeof(head.fmh_keys[0].fmr_reserved)) ||
memchr_inv(head.fmh_keys[1].fmr_reserved, 0,
sizeof(head.fmh_keys[1].fmr_reserved)))
return -EINVAL;
/*
* ext4 doesn't report file extents at all, so the only valid
* file offsets are the magic ones (all zeroes or all ones).
*/
if (head.fmh_keys[0].fmr_offset ||
(head.fmh_keys[1].fmr_offset != 0 &&
head.fmh_keys[1].fmr_offset != -1ULL))
return -EINVAL;
xhead.fmh_iflags = head.fmh_iflags;
xhead.fmh_count = head.fmh_count;
ext4_fsmap_to_internal(sb, &xhead.fmh_keys[0], &head.fmh_keys[0]);
ext4_fsmap_to_internal(sb, &xhead.fmh_keys[1], &head.fmh_keys[1]);
trace_ext4_getfsmap_low_key(sb, &xhead.fmh_keys[0]);
trace_ext4_getfsmap_high_key(sb, &xhead.fmh_keys[1]);
info.gi_sb = sb;
info.gi_data = arg;
error = ext4_getfsmap(sb, &xhead, ext4_getfsmap_format, &info);
if (error == EXT4_QUERY_RANGE_ABORT) {
error = 0;
aborted = true;
} else if (error)
return error;
/* If we didn't abort, set the "last" flag in the last fmx */
if (!aborted && info.gi_idx) {
info.gi_last_flags |= FMR_OF_LAST;
if (copy_to_user(&info.gi_data->fmh_recs[info.gi_idx - 1].fmr_flags,
&info.gi_last_flags,
sizeof(info.gi_last_flags)))
return -EFAULT;
}
/* copy back header */
head.fmh_entries = xhead.fmh_entries;
head.fmh_oflags = xhead.fmh_oflags;
if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
return -EFAULT;
return 0;
}
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
struct inode *inode = file_inode(filp); struct inode *inode = file_inode(filp);
...@@ -499,6 +586,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -499,6 +586,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
switch (cmd) { switch (cmd) {
case FS_IOC_GETFSMAP:
return ext4_ioc_getfsmap(sb, (void __user *)arg);
case EXT4_IOC_GETFLAGS: case EXT4_IOC_GETFLAGS:
ext4_get_inode_flags(ei); ext4_get_inode_flags(ei);
flags = ei->i_flags & EXT4_FL_USER_VISIBLE; flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
...@@ -1009,6 +1098,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -1009,6 +1098,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GET_ENCRYPTION_PWSALT: case EXT4_IOC_GET_ENCRYPTION_PWSALT:
case EXT4_IOC_GET_ENCRYPTION_POLICY: case EXT4_IOC_GET_ENCRYPTION_POLICY:
case EXT4_IOC_SHUTDOWN: case EXT4_IOC_SHUTDOWN:
case FS_IOC_GETFSMAP:
break; break;
default: default:
return -ENOIOCTLCMD; return -ENOIOCTLCMD;
......
...@@ -5277,3 +5277,52 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) ...@@ -5277,3 +5277,52 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
return ret; return ret;
} }
/* Iterate all the free extents in the group. */
int
ext4_mballoc_query_range(
struct super_block *sb,
ext4_group_t group,
ext4_grpblk_t start,
ext4_grpblk_t end,
ext4_mballoc_query_range_fn formatter,
void *priv)
{
void *bitmap;
ext4_grpblk_t next;
struct ext4_buddy e4b;
int error;
error = ext4_mb_load_buddy(sb, group, &e4b);
if (error)
return error;
bitmap = e4b.bd_bitmap;
ext4_lock_group(sb, group);
start = (e4b.bd_info->bb_first_free > start) ?
e4b.bd_info->bb_first_free : start;
if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
while (start <= end) {
start = mb_find_next_zero_bit(bitmap, end + 1, start);
if (start > end)
break;
next = mb_find_next_bit(bitmap, end + 1, start);
ext4_unlock_group(sb, group);
error = formatter(sb, group, start, next - start, priv);
if (error)
goto out_unload;
ext4_lock_group(sb, group);
start = next + 1;
}
ext4_unlock_group(sb, group);
out_unload:
ext4_mb_unload_buddy(&e4b);
return error;
}
...@@ -199,4 +199,21 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, ...@@ -199,4 +199,21 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
return ext4_group_first_block_no(sb, fex->fe_group) + return ext4_group_first_block_no(sb, fex->fe_group) +
(fex->fe_start << EXT4_SB(sb)->s_cluster_bits); (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
} }
typedef int (*ext4_mballoc_query_range_fn)(
struct super_block *sb,
ext4_group_t agno,
ext4_grpblk_t start,
ext4_grpblk_t len,
void *priv);
int
ext4_mballoc_query_range(
struct super_block *sb,
ext4_group_t agno,
ext4_grpblk_t start,
ext4_grpblk_t end,
ext4_mballoc_query_range_fn formatter,
void *priv);
#endif #endif
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
#include "mballoc.h" #include "mballoc.h"
#include "fsmap.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/ext4.h> #include <trace/events/ext4.h>
......
...@@ -15,6 +15,7 @@ struct ext4_inode_info; ...@@ -15,6 +15,7 @@ struct ext4_inode_info;
struct mpage_da_data; struct mpage_da_data;
struct ext4_map_blocks; struct ext4_map_blocks;
struct extent_status; struct extent_status;
struct ext4_fsmap;
#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
...@@ -2529,6 +2530,79 @@ TRACE_EVENT(ext4_es_shrink, ...@@ -2529,6 +2530,79 @@ TRACE_EVENT(ext4_es_shrink,
__entry->scan_time, __entry->nr_skipped, __entry->retried) __entry->scan_time, __entry->nr_skipped, __entry->retried)
); );
/* fsmap traces */
DECLARE_EVENT_CLASS(ext4_fsmap_class,
TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len,
u64 owner),
TP_ARGS(sb, keydev, agno, bno, len, owner),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(dev_t, keydev)
__field(u32, agno)
__field(u64, bno)
__field(u64, len)
__field(u64, owner)
),
TP_fast_assign(
__entry->dev = sb->s_bdev->bd_dev;
__entry->keydev = new_decode_dev(keydev);
__entry->agno = agno;
__entry->bno = bno;
__entry->len = len;
__entry->owner = owner;
),
TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld\n",
MAJOR(__entry->dev), MINOR(__entry->dev),
MAJOR(__entry->keydev), MINOR(__entry->keydev),
__entry->agno,
__entry->bno,
__entry->len,
__entry->owner)
)
#define DEFINE_FSMAP_EVENT(name) \
DEFINE_EVENT(ext4_fsmap_class, name, \
TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, \
u64 owner), \
TP_ARGS(sb, keydev, agno, bno, len, owner))
DEFINE_FSMAP_EVENT(ext4_fsmap_low_key);
DEFINE_FSMAP_EVENT(ext4_fsmap_high_key);
DEFINE_FSMAP_EVENT(ext4_fsmap_mapping);
DECLARE_EVENT_CLASS(ext4_getfsmap_class,
TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap),
TP_ARGS(sb, fsmap),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(dev_t, keydev)
__field(u64, block)
__field(u64, len)
__field(u64, owner)
__field(u64, flags)
),
TP_fast_assign(
__entry->dev = sb->s_bdev->bd_dev;
__entry->keydev = new_decode_dev(fsmap->fmr_device);
__entry->block = fsmap->fmr_physical;
__entry->len = fsmap->fmr_length;
__entry->owner = fsmap->fmr_owner;
__entry->flags = fsmap->fmr_flags;
),
TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld flags 0x%llx\n",
MAJOR(__entry->dev), MINOR(__entry->dev),
MAJOR(__entry->keydev), MINOR(__entry->keydev),
__entry->block,
__entry->len,
__entry->owner,
__entry->flags)
)
#define DEFINE_GETFSMAP_EVENT(name) \
DEFINE_EVENT(ext4_getfsmap_class, name, \
TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), \
TP_ARGS(sb, fsmap))
DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key);
DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key);
DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping);
#endif /* _TRACE_EXT4_H */ #endif /* _TRACE_EXT4_H */
/* This part must be outside protection */ /* This part must be outside protection */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment