Commit a74b81b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2

* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (28 commits)
  Ocfs2: Teach local-mounted ocfs2 to handle unwritten_extents correctly.
  ocfs2/dlm: Do not migrate resource to a node that is leaving the domain
  ocfs2/dlm: Add new dlm message DLM_BEGIN_EXIT_DOMAIN_MSG
  Ocfs2/move_extents: Set several trivial constraints for threshold.
  Ocfs2/move_extents: Let defrag handle partial extent moving.
  Ocfs2/move_extents: move/defrag extents within a certain range.
  Ocfs2/move_extents: helper to calculate the defraging length in one run.
  Ocfs2/move_extents: move entire/partial extent.
  Ocfs2/move_extents: helpers to update the group descriptor and global bitmap inode.
  Ocfs2/move_extents: helper to probe a proper region to move in an alloc group.
  Ocfs2/move_extents: helper to validate and adjust moving goal.
  Ocfs2/move_extents: find the victim alloc group, where the given #blk fits.
  Ocfs2/move_extents: defrag a range of extent.
  Ocfs2/move_extents: move a range of extent.
  Ocfs2/move_extents: lock allocators and reserve metadata blocks and data clusters for extents moving.
  Ocfs2/move_extents: Add basic framework and source files for extent moving.
  Ocfs2/move_extents: Adding new ioctl code 'OCFS2_IOC_MOVE_EXT' to ocfs2.
  Ocfs2/refcounttree: Publicize couple of funcs from refcounttree.c
  Ocfs2: Add a new code 'OCFS2_INFO_FREEFRAG' for o2info ioctl.
  Ocfs2: Add a new code 'OCFS2_INFO_FREEINODE' for o2info ioctl.
  ...
parents f8d613e2 ece928df
What: /sys/o2cb symlink What: /sys/o2cb symlink
Date: Dec 2005 Date: May 2011
KernelVersion: 2.6.16 KernelVersion: 2.6.40
Contact: ocfs2-devel@oss.oracle.com Contact: ocfs2-devel@oss.oracle.com
Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink is
be removed when new versions of ocfs2-tools which know to look removed when new versions of ocfs2-tools which know to look
in /sys/fs/o2cb are sufficiently prevalent. Don't code new in /sys/fs/o2cb are sufficiently prevalent. Don't code new
software to look here, it should try /sys/fs/o2cb instead. software to look here, it should try /sys/fs/o2cb instead.
See Documentation/ABI/stable/o2cb for more information on usage.
Users: ocfs2-tools. It's sufficient to mail proposed changes to Users: ocfs2-tools. It's sufficient to mail proposed changes to
ocfs2-devel@oss.oracle.com. ocfs2-devel@oss.oracle.com.
...@@ -262,16 +262,6 @@ Who: Michael Buesch <mb@bu3sch.de> ...@@ -262,16 +262,6 @@ Who: Michael Buesch <mb@bu3sch.de>
--------------------------- ---------------------------
What: /sys/o2cb symlink
When: January 2010
Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb
exists as a symlink for backwards compatibility for old versions of
ocfs2-tools. 2 years should be sufficient time to phase in new versions
which know to look in /sys/fs/o2cb.
Who: ocfs2-devel@oss.oracle.com
---------------------------
What: Ability for non root users to shm_get hugetlb pages based on mlock What: Ability for non root users to shm_get hugetlb pages based on mlock
resource limits resource limits
When: 2.6.31 When: 2.6.31
......
...@@ -46,9 +46,15 @@ errors=panic Panic and halt the machine if an error occurs. ...@@ -46,9 +46,15 @@ errors=panic Panic and halt the machine if an error occurs.
intr (*) Allow signals to interrupt cluster operations. intr (*) Allow signals to interrupt cluster operations.
nointr Do not allow signals to interrupt cluster nointr Do not allow signals to interrupt cluster
operations. operations.
noatime Do not update access time.
relatime(*) Update atime if the previous atime is older than
mtime or ctime
strictatime Always update atime, but the minimum update interval
is specified by atime_quantum.
atime_quantum=60(*) OCFS2 will not update atime unless this number atime_quantum=60(*) OCFS2 will not update atime unless this number
of seconds has passed since the last update. of seconds has passed since the last update.
Set to zero to always update atime. Set to zero to always update atime. This option need
work with strictatime.
data=ordered (*) All data are forced directly out to the main file data=ordered (*) All data are forced directly out to the main file
system prior to its metadata being committed to the system prior to its metadata being committed to the
journal. journal.
......
...@@ -30,6 +30,7 @@ ocfs2-objs := \ ...@@ -30,6 +30,7 @@ ocfs2-objs := \
namei.o \ namei.o \
refcounttree.o \ refcounttree.o \
reservations.o \ reservations.o \
move_extents.o \
resize.o \ resize.o \
slot_map.o \ slot_map.o \
suballoc.o \ suballoc.o \
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/quotaops.h> #include <linux/quotaops.h>
#include <linux/blkdev.h>
#include <cluster/masklog.h> #include <cluster/masklog.h>
...@@ -7184,3 +7185,168 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, ...@@ -7184,3 +7185,168 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
out: out:
return ret; return ret;
} }
static int ocfs2_trim_extent(struct super_block *sb,
struct ocfs2_group_desc *gd,
u32 start, u32 count)
{
u64 discard, bcount;
bcount = ocfs2_clusters_to_blocks(sb, count);
discard = le64_to_cpu(gd->bg_blkno) +
ocfs2_clusters_to_blocks(sb, start);
trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
}
static int ocfs2_trim_group(struct super_block *sb,
struct ocfs2_group_desc *gd,
u32 start, u32 max, u32 minbits)
{
int ret = 0, count = 0, next;
void *bitmap = gd->bg_bitmap;
if (le16_to_cpu(gd->bg_free_bits_count) < minbits)
return 0;
trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno),
start, max, minbits);
while (start < max) {
start = ocfs2_find_next_zero_bit(bitmap, max, start);
if (start >= max)
break;
next = ocfs2_find_next_bit(bitmap, max, start);
if ((next - start) >= minbits) {
ret = ocfs2_trim_extent(sb, gd,
start, next - start);
if (ret < 0) {
mlog_errno(ret);
break;
}
count += next - start;
}
start = next + 1;
if (fatal_signal_pending(current)) {
count = -ERESTARTSYS;
break;
}
if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits)
break;
}
if (ret < 0)
count = ret;
return count;
}
int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
{
struct ocfs2_super *osb = OCFS2_SB(sb);
u64 start, len, trimmed, first_group, last_group, group;
int ret, cnt;
u32 first_bit, last_bit, minlen;
struct buffer_head *main_bm_bh = NULL;
struct inode *main_bm_inode = NULL;
struct buffer_head *gd_bh = NULL;
struct ocfs2_dinode *main_bm;
struct ocfs2_group_desc *gd = NULL;
start = range->start >> osb->s_clustersize_bits;
len = range->len >> osb->s_clustersize_bits;
minlen = range->minlen >> osb->s_clustersize_bits;
trimmed = 0;
if (!len) {
range->len = 0;
return 0;
}
if (minlen >= osb->bitmap_cpg)
return -EINVAL;
main_bm_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT);
if (!main_bm_inode) {
ret = -EIO;
mlog_errno(ret);
goto out;
}
mutex_lock(&main_bm_inode->i_mutex);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
if (ret < 0) {
mlog_errno(ret);
goto out_mutex;
}
main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
if (start >= le32_to_cpu(main_bm->i_clusters)) {
ret = -EINVAL;
goto out_unlock;
}
if (start + len > le32_to_cpu(main_bm->i_clusters))
len = le32_to_cpu(main_bm->i_clusters) - start;
trace_ocfs2_trim_fs(start, len, minlen);
/* Determine first and last group to examine based on start and len */
first_group = ocfs2_which_cluster_group(main_bm_inode, start);
if (first_group == osb->first_cluster_group_blkno)
first_bit = start;
else
first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
last_bit = osb->bitmap_cpg;
for (group = first_group; group <= last_group;) {
if (first_bit + len >= osb->bitmap_cpg)
last_bit = osb->bitmap_cpg;
else
last_bit = first_bit + len;
ret = ocfs2_read_group_descriptor(main_bm_inode,
main_bm, group,
&gd_bh);
if (ret < 0) {
mlog_errno(ret);
break;
}
gd = (struct ocfs2_group_desc *)gd_bh->b_data;
cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
brelse(gd_bh);
gd_bh = NULL;
if (cnt < 0) {
ret = cnt;
mlog_errno(ret);
break;
}
trimmed += cnt;
len -= osb->bitmap_cpg - first_bit;
first_bit = 0;
if (group == osb->first_cluster_group_blkno)
group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
else
group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
}
range->len = trimmed * sb->s_blocksize;
out_unlock:
ocfs2_inode_unlock(main_bm_inode, 0);
brelse(main_bm_bh);
out_mutex:
mutex_unlock(&main_bm_inode->i_mutex);
iput(main_bm_inode);
out:
return ret;
}
...@@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci, ...@@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
struct buffer_head **leaf_bh); struct buffer_head **leaf_bh);
int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range);
/* /*
* Helper function to look at the # of clusters in an extent record. * Helper function to look at the # of clusters in an extent record.
*/ */
......
...@@ -57,7 +57,6 @@ static struct kset *o2cb_kset; ...@@ -57,7 +57,6 @@ static struct kset *o2cb_kset;
void o2cb_sys_shutdown(void) void o2cb_sys_shutdown(void)
{ {
mlog_sys_shutdown(); mlog_sys_shutdown();
sysfs_remove_link(NULL, "o2cb");
kset_unregister(o2cb_kset); kset_unregister(o2cb_kset);
} }
...@@ -69,14 +68,6 @@ int o2cb_sys_init(void) ...@@ -69,14 +68,6 @@ int o2cb_sys_init(void)
if (!o2cb_kset) if (!o2cb_kset)
return -ENOMEM; return -ENOMEM;
/*
* Create this symlink for backwards compatibility with old
* versions of ocfs2-tools which look for things in /sys/o2cb.
*/
ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb");
if (ret)
goto error;
ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group);
if (ret) if (ret)
goto error; goto error;
......
...@@ -144,6 +144,7 @@ struct dlm_ctxt ...@@ -144,6 +144,7 @@ struct dlm_ctxt
wait_queue_head_t dlm_join_events; wait_queue_head_t dlm_join_events;
unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
struct dlm_recovery_ctxt reco; struct dlm_recovery_ctxt reco;
spinlock_t master_lock; spinlock_t master_lock;
...@@ -401,6 +402,18 @@ static inline int dlm_lvb_is_empty(char *lvb) ...@@ -401,6 +402,18 @@ static inline int dlm_lvb_is_empty(char *lvb)
return 1; return 1;
} }
static inline char *dlm_list_in_text(enum dlm_lockres_list idx)
{
if (idx == DLM_GRANTED_LIST)
return "granted";
else if (idx == DLM_CONVERTING_LIST)
return "converting";
else if (idx == DLM_BLOCKED_LIST)
return "blocked";
else
return "unknown";
}
static inline struct list_head * static inline struct list_head *
dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx)
{ {
...@@ -448,6 +461,7 @@ enum { ...@@ -448,6 +461,7 @@ enum {
DLM_FINALIZE_RECO_MSG = 518, DLM_FINALIZE_RECO_MSG = 518,
DLM_QUERY_REGION = 519, DLM_QUERY_REGION = 519,
DLM_QUERY_NODEINFO = 520, DLM_QUERY_NODEINFO = 520,
DLM_BEGIN_EXIT_DOMAIN_MSG = 521,
}; };
struct dlm_reco_node_data struct dlm_reco_node_data
......
...@@ -756,6 +756,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) ...@@ -756,6 +756,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
buf + out, len - out); buf + out, len - out);
out += snprintf(buf + out, len - out, "\n"); out += snprintf(buf + out, len - out, "\n");
/* Exit Domain Map: xx xx xx */
out += snprintf(buf + out, len - out, "Exit Domain Map: ");
out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES,
buf + out, len - out);
out += snprintf(buf + out, len - out, "\n");
/* Live Map: xx xx xx */ /* Live Map: xx xx xx */
out += snprintf(buf + out, len - out, "Live Map: "); out += snprintf(buf + out, len - out, "Live Map: ");
out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
......
...@@ -132,10 +132,12 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); ...@@ -132,10 +132,12 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
* New in version 1.1: * New in version 1.1:
* - Message DLM_QUERY_REGION added to support global heartbeat * - Message DLM_QUERY_REGION added to support global heartbeat
* - Message DLM_QUERY_NODEINFO added to allow online node removes * - Message DLM_QUERY_NODEINFO added to allow online node removes
* New in version 1.2:
* - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain
*/ */
static const struct dlm_protocol_version dlm_protocol = { static const struct dlm_protocol_version dlm_protocol = {
.pv_major = 1, .pv_major = 1,
.pv_minor = 1, .pv_minor = 2,
}; };
#define DLM_DOMAIN_BACKOFF_MS 200 #define DLM_DOMAIN_BACKOFF_MS 200
...@@ -449,14 +451,18 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) ...@@ -449,14 +451,18 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
dropped = dlm_empty_lockres(dlm, res); dropped = dlm_empty_lockres(dlm, res);
spin_lock(&res->spinlock); spin_lock(&res->spinlock);
__dlm_lockres_calc_usage(dlm, res); if (dropped)
iter = res->hash_node.next; __dlm_lockres_calc_usage(dlm, res);
else
iter = res->hash_node.next;
spin_unlock(&res->spinlock); spin_unlock(&res->spinlock);
dlm_lockres_put(res); dlm_lockres_put(res);
if (dropped) if (dropped) {
cond_resched_lock(&dlm->spinlock);
goto redo_bucket; goto redo_bucket;
}
} }
cond_resched_lock(&dlm->spinlock); cond_resched_lock(&dlm->spinlock);
num += n; num += n;
...@@ -486,6 +492,28 @@ static int dlm_no_joining_node(struct dlm_ctxt *dlm) ...@@ -486,6 +492,28 @@ static int dlm_no_joining_node(struct dlm_ctxt *dlm)
return ret; return ret;
} }
static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len,
void *data, void **ret_data)
{
struct dlm_ctxt *dlm = data;
unsigned int node;
struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf;
if (!dlm_grab(dlm))
return 0;
node = exit_msg->node_idx;
mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node);
spin_lock(&dlm->spinlock);
set_bit(node, dlm->exit_domain_map);
spin_unlock(&dlm->spinlock);
dlm_put(dlm);
return 0;
}
static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm)
{ {
/* Yikes, a double spinlock! I need domain_lock for the dlm /* Yikes, a double spinlock! I need domain_lock for the dlm
...@@ -542,6 +570,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, ...@@ -542,6 +570,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
spin_lock(&dlm->spinlock); spin_lock(&dlm->spinlock);
clear_bit(node, dlm->domain_map); clear_bit(node, dlm->domain_map);
clear_bit(node, dlm->exit_domain_map);
__dlm_print_nodes(dlm); __dlm_print_nodes(dlm);
/* notify anything attached to the heartbeat events */ /* notify anything attached to the heartbeat events */
...@@ -554,29 +583,56 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, ...@@ -554,29 +583,56 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
return 0; return 0;
} }
static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type,
unsigned int node) unsigned int node)
{ {
int status; int status;
struct dlm_exit_domain leave_msg; struct dlm_exit_domain leave_msg;
mlog(0, "Asking node %u if we can leave the domain %s me = %u\n", mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name,
node, dlm->name, dlm->node_num); msg_type, node);
memset(&leave_msg, 0, sizeof(leave_msg)); memset(&leave_msg, 0, sizeof(leave_msg));
leave_msg.node_idx = dlm->node_num; leave_msg.node_idx = dlm->node_num;
status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, status = o2net_send_message(msg_type, dlm->key, &leave_msg,
&leave_msg, sizeof(leave_msg), node, sizeof(leave_msg), node, NULL);
NULL);
if (status < 0) if (status < 0)
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " mlog(ML_ERROR, "Error %d sending domain exit message %u "
"node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node); "to node %u on domain %s\n", status, msg_type, node,
mlog(0, "status return %d from o2net_send_message\n", status); dlm->name);
return status; return status;
} }
static void dlm_begin_exit_domain(struct dlm_ctxt *dlm)
{
int node = -1;
/* Support for begin exit domain was added in 1.2 */
if (dlm->dlm_locking_proto.pv_major == 1 &&
dlm->dlm_locking_proto.pv_minor < 2)
return;
/*
* Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely
* informational. Meaning if a node does not receive the message,
* so be it.
*/
spin_lock(&dlm->spinlock);
while (1) {
node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1);
if (node >= O2NM_MAX_NODES)
break;
if (node == dlm->node_num)
continue;
spin_unlock(&dlm->spinlock);
dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node);
spin_lock(&dlm->spinlock);
}
spin_unlock(&dlm->spinlock);
}
static void dlm_leave_domain(struct dlm_ctxt *dlm) static void dlm_leave_domain(struct dlm_ctxt *dlm)
{ {
...@@ -602,7 +658,8 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) ...@@ -602,7 +658,8 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
clear_node = 1; clear_node = 1;
status = dlm_send_one_domain_exit(dlm, node); status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG,
node);
if (status < 0 && if (status < 0 &&
status != -ENOPROTOOPT && status != -ENOPROTOOPT &&
status != -ENOTCONN) { status != -ENOTCONN) {
...@@ -677,6 +734,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) ...@@ -677,6 +734,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
if (leave) { if (leave) {
mlog(0, "shutting down domain %s\n", dlm->name); mlog(0, "shutting down domain %s\n", dlm->name);
dlm_begin_exit_domain(dlm);
/* We changed dlm state, notify the thread */ /* We changed dlm state, notify the thread */
dlm_kick_thread(dlm, NULL); dlm_kick_thread(dlm, NULL);
...@@ -909,6 +967,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, ...@@ -909,6 +967,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
* leftover join state. */ * leftover join state. */
BUG_ON(dlm->joining_node != assert->node_idx); BUG_ON(dlm->joining_node != assert->node_idx);
set_bit(assert->node_idx, dlm->domain_map); set_bit(assert->node_idx, dlm->domain_map);
clear_bit(assert->node_idx, dlm->exit_domain_map);
__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n",
...@@ -1793,6 +1852,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) ...@@ -1793,6 +1852,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
if (status) if (status)
goto bail; goto bail;
status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key,
sizeof(struct dlm_exit_domain),
dlm_begin_exit_domain_handler,
dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
bail: bail:
if (status) if (status)
dlm_unregister_domain_handlers(dlm); dlm_unregister_domain_handlers(dlm);
......
This diff is collapsed.
...@@ -2393,6 +2393,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) ...@@ -2393,6 +2393,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
mlog(0, "node %u being removed from domain map!\n", idx); mlog(0, "node %u being removed from domain map!\n", idx);
clear_bit(idx, dlm->domain_map); clear_bit(idx, dlm->domain_map);
clear_bit(idx, dlm->exit_domain_map);
/* wake up migration waiters if a node goes down. /* wake up migration waiters if a node goes down.
* perhaps later we can genericize this for other waiters. */ * perhaps later we can genericize this for other waiters. */
wake_up(&dlm->migration_wq); wake_up(&dlm->migration_wq);
......
...@@ -88,7 +88,7 @@ struct workqueue_struct *user_dlm_worker; ...@@ -88,7 +88,7 @@ struct workqueue_struct *user_dlm_worker;
* signifies a bast fired on the lock. * signifies a bast fired on the lock.
*/ */
#define DLMFS_CAPABILITIES "bast stackglue" #define DLMFS_CAPABILITIES "bast stackglue"
extern int param_set_dlmfs_capabilities(const char *val, static int param_set_dlmfs_capabilities(const char *val,
struct kernel_param *kp) struct kernel_param *kp)
{ {
printk(KERN_ERR "%s: readonly parameter\n", kp->name); printk(KERN_ERR "%s: readonly parameter\n", kp->name);
......
...@@ -2670,6 +2670,7 @@ const struct file_operations ocfs2_fops_no_plocks = { ...@@ -2670,6 +2670,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
.flock = ocfs2_flock, .flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read, .splice_read = ocfs2_file_splice_read,
.splice_write = ocfs2_file_splice_write, .splice_write = ocfs2_file_splice_write,
.fallocate = ocfs2_fallocate,
}; };
const struct file_operations ocfs2_dops_no_plocks = { const struct file_operations ocfs2_dops_no_plocks = {
......
This diff is collapsed.
This diff is collapsed.
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* move_extents.h
*
* Copyright (C) 2011 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_MOVE_EXTENTS_H
#define OCFS2_MOVE_EXTENTS_H
int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp);
#endif /* OCFS2_MOVE_EXTENTS_H */
...@@ -142,6 +142,38 @@ struct ocfs2_info_journal_size { ...@@ -142,6 +142,38 @@ struct ocfs2_info_journal_size {
__u64 ij_journal_size; __u64 ij_journal_size;
}; };
struct ocfs2_info_freeinode {
struct ocfs2_info_request ifi_req;
struct ocfs2_info_local_freeinode {
__u64 lfi_total;
__u64 lfi_free;
} ifi_stat[OCFS2_MAX_SLOTS];
__u32 ifi_slotnum; /* out */
__u32 ifi_pad;
};
#define OCFS2_INFO_MAX_HIST (32)
struct ocfs2_info_freefrag {
struct ocfs2_info_request iff_req;
struct ocfs2_info_freefrag_stats { /* (out) */
struct ocfs2_info_free_chunk_list {
__u32 fc_chunks[OCFS2_INFO_MAX_HIST];
__u32 fc_clusters[OCFS2_INFO_MAX_HIST];
} ffs_fc_hist;
__u32 ffs_clusters;
__u32 ffs_free_clusters;
__u32 ffs_free_chunks;
__u32 ffs_free_chunks_real;
__u32 ffs_min; /* Minimum free chunksize in clusters */
__u32 ffs_max;
__u32 ffs_avg;
__u32 ffs_pad;
} iff_ffs;
__u32 iff_chunksize; /* chunksize in clusters(in) */
__u32 iff_pad;
};
/* Codes for ocfs2_info_request */ /* Codes for ocfs2_info_request */
enum ocfs2_info_type { enum ocfs2_info_type {
OCFS2_INFO_CLUSTERSIZE = 1, OCFS2_INFO_CLUSTERSIZE = 1,
...@@ -151,6 +183,8 @@ enum ocfs2_info_type { ...@@ -151,6 +183,8 @@ enum ocfs2_info_type {
OCFS2_INFO_UUID, OCFS2_INFO_UUID,
OCFS2_INFO_FS_FEATURES, OCFS2_INFO_FS_FEATURES,
OCFS2_INFO_JOURNAL_SIZE, OCFS2_INFO_JOURNAL_SIZE,
OCFS2_INFO_FREEINODE,
OCFS2_INFO_FREEFRAG,
OCFS2_INFO_NUM_TYPES OCFS2_INFO_NUM_TYPES
}; };
...@@ -171,4 +205,38 @@ enum ocfs2_info_type { ...@@ -171,4 +205,38 @@ enum ocfs2_info_type {
#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info)
struct ocfs2_move_extents {
/* All values are in bytes */
/* in */
__u64 me_start; /* Virtual start in the file to move */
__u64 me_len; /* Length of the extents to be moved */
__u64 me_goal; /* Physical offset of the goal,
it's in block unit */
__u64 me_threshold; /* Maximum distance from goal or threshold
for auto defragmentation */
__u64 me_flags; /* Flags for the operation:
* - auto defragmentation.
* - refcount,xattr cases.
*/
/* out */
__u64 me_moved_len; /* Moved/defraged length */
__u64 me_new_offset; /* Resulting physical location */
__u32 me_reserved[2]; /* Reserved for futhure */
};
#define OCFS2_MOVE_EXT_FL_AUTO_DEFRAG (0x00000001) /* Kernel manages to
claim new clusters
as the goal place
for extents moving */
#define OCFS2_MOVE_EXT_FL_PART_DEFRAG (0x00000002) /* Allow partial extent
moving, is to make
movement less likely
to fail, may make fs
even more fragmented */
#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation
completely gets done.
*/
#define OCFS2_IOC_MOVE_EXT _IOW('o', 6, struct ocfs2_move_extents)
#endif /* OCFS2_IOCTL_H */ #endif /* OCFS2_IOCTL_H */
...@@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc, ...@@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc,
__entry->blkno, __entry->bit) __entry->blkno, __entry->bit)
); );
TRACE_EVENT(ocfs2_trim_extent,
TP_PROTO(struct super_block *sb, unsigned long long blk,
unsigned long long count),
TP_ARGS(sb, blk, count),
TP_STRUCT__entry(
__field(int, dev_major)
__field(int, dev_minor)
__field(unsigned long long, blk)
__field(__u64, count)
),
TP_fast_assign(
__entry->dev_major = MAJOR(sb->s_dev);
__entry->dev_minor = MINOR(sb->s_dev);
__entry->blk = blk;
__entry->count = count;
),
TP_printk("%d %d %llu %llu",
__entry->dev_major, __entry->dev_minor,
__entry->blk, __entry->count)
);
DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group);
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs);
/* End of trace events for fs/ocfs2/alloc.c. */ /* End of trace events for fs/ocfs2/alloc.c. */
/* Trace events for fs/ocfs2/localalloc.c. */ /* Trace events for fs/ocfs2/localalloc.c. */
......
...@@ -66,7 +66,7 @@ struct ocfs2_cow_context { ...@@ -66,7 +66,7 @@ struct ocfs2_cow_context {
u32 *num_clusters, u32 *num_clusters,
unsigned int *extent_flags); unsigned int *extent_flags);
int (*cow_duplicate_clusters)(handle_t *handle, int (*cow_duplicate_clusters)(handle_t *handle,
struct ocfs2_cow_context *context, struct file *file,
u32 cpos, u32 old_cluster, u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len); u32 new_cluster, u32 new_len);
}; };
...@@ -2921,20 +2921,21 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) ...@@ -2921,20 +2921,21 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
return 0; return 0;
} }
static int ocfs2_duplicate_clusters_by_page(handle_t *handle, int ocfs2_duplicate_clusters_by_page(handle_t *handle,
struct ocfs2_cow_context *context, struct file *file,
u32 cpos, u32 old_cluster, u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len) u32 new_cluster, u32 new_len)
{ {
int ret = 0, partial; int ret = 0, partial;
struct ocfs2_caching_info *ci = context->data_et.et_ci; struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_caching_info *ci = INODE_CACHE(inode);
struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
struct page *page; struct page *page;
pgoff_t page_index; pgoff_t page_index;
unsigned int from, to, readahead_pages; unsigned int from, to, readahead_pages;
loff_t offset, end, map_end; loff_t offset, end, map_end;
struct address_space *mapping = context->inode->i_mapping; struct address_space *mapping = inode->i_mapping;
trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
new_cluster, new_len); new_cluster, new_len);
...@@ -2948,8 +2949,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -2948,8 +2949,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
* We only duplicate pages until we reach the page contains i_size - 1. * We only duplicate pages until we reach the page contains i_size - 1.
* So trim 'end' to i_size. * So trim 'end' to i_size.
*/ */
if (end > i_size_read(context->inode)) if (end > i_size_read(inode))
end = i_size_read(context->inode); end = i_size_read(inode);
while (offset < end) { while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT; page_index = offset >> PAGE_CACHE_SHIFT;
...@@ -2972,10 +2973,9 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -2972,10 +2973,9 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
BUG_ON(PageDirty(page)); BUG_ON(PageDirty(page));
if (PageReadahead(page) && context->file) { if (PageReadahead(page)) {
page_cache_async_readahead(mapping, page_cache_async_readahead(mapping,
&context->file->f_ra, &file->f_ra, file,
context->file,
page, page_index, page, page_index,
readahead_pages); readahead_pages);
} }
...@@ -2999,8 +2999,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -2999,8 +2999,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
} }
} }
ocfs2_map_and_dirty_page(context->inode, ocfs2_map_and_dirty_page(inode, handle, from, to,
handle, from, to,
page, 0, &new_block); page, 0, &new_block);
mark_page_accessed(page); mark_page_accessed(page);
unlock: unlock:
...@@ -3015,14 +3014,15 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -3015,14 +3014,15 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
return ret; return ret;
} }
static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
struct ocfs2_cow_context *context, struct file *file,
u32 cpos, u32 old_cluster, u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len) u32 new_cluster, u32 new_len)
{ {
int ret = 0; int ret = 0;
struct super_block *sb = context->inode->i_sb; struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_caching_info *ci = context->data_et.et_ci; struct super_block *sb = inode->i_sb;
struct ocfs2_caching_info *ci = INODE_CACHE(inode);
int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster);
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
...@@ -3145,8 +3145,8 @@ static int ocfs2_replace_clusters(handle_t *handle, ...@@ -3145,8 +3145,8 @@ static int ocfs2_replace_clusters(handle_t *handle,
/*If the old clusters is unwritten, no need to duplicate. */ /*If the old clusters is unwritten, no need to duplicate. */
if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
ret = context->cow_duplicate_clusters(handle, context, cpos, ret = context->cow_duplicate_clusters(handle, context->file,
old, new, len); cpos, old, new, len);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
...@@ -3162,22 +3162,22 @@ static int ocfs2_replace_clusters(handle_t *handle, ...@@ -3162,22 +3162,22 @@ static int ocfs2_replace_clusters(handle_t *handle,
return ret; return ret;
} }
static int ocfs2_cow_sync_writeback(struct super_block *sb, int ocfs2_cow_sync_writeback(struct super_block *sb,
struct ocfs2_cow_context *context, struct inode *inode,
u32 cpos, u32 num_clusters) u32 cpos, u32 num_clusters)
{ {
int ret = 0; int ret = 0;
loff_t offset, end, map_end; loff_t offset, end, map_end;
pgoff_t page_index; pgoff_t page_index;
struct page *page; struct page *page;
if (ocfs2_should_order_data(context->inode)) if (ocfs2_should_order_data(inode))
return 0; return 0;
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits);
ret = filemap_fdatawrite_range(context->inode->i_mapping, ret = filemap_fdatawrite_range(inode->i_mapping,
offset, end - 1); offset, end - 1);
if (ret < 0) { if (ret < 0) {
mlog_errno(ret); mlog_errno(ret);
...@@ -3190,7 +3190,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, ...@@ -3190,7 +3190,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
if (map_end > end) if (map_end > end)
map_end = end; map_end = end;
page = find_or_create_page(context->inode->i_mapping, page = find_or_create_page(inode->i_mapping,
page_index, GFP_NOFS); page_index, GFP_NOFS);
BUG_ON(!page); BUG_ON(!page);
...@@ -3349,7 +3349,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, ...@@ -3349,7 +3349,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
* in write-back mode. * in write-back mode.
*/ */
if (context->get_clusters == ocfs2_di_get_clusters) { if (context->get_clusters == ocfs2_di_get_clusters) {
ret = ocfs2_cow_sync_writeback(sb, context, cpos, ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos,
orig_num_clusters); orig_num_clusters);
if (ret) if (ret)
mlog_errno(ret); mlog_errno(ret);
......
...@@ -84,6 +84,17 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, ...@@ -84,6 +84,17 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
struct buffer_head *ref_root_bh, struct buffer_head *ref_root_bh,
u32 cpos, u32 write_len, u32 cpos, u32 write_len,
struct ocfs2_post_refcount *post); struct ocfs2_post_refcount *post);
int ocfs2_duplicate_clusters_by_page(handle_t *handle,
struct file *file,
u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len);
int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
struct file *file,
u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len);
int ocfs2_cow_sync_writeback(struct super_block *sb,
struct inode *inode,
u32 cpos, u32 num_clusters);
int ocfs2_add_refcount_flag(struct inode *inode, int ocfs2_add_refcount_flag(struct inode *inode,
struct ocfs2_extent_tree *data_et, struct ocfs2_extent_tree *data_et,
struct ocfs2_caching_info *ref_ci, struct ocfs2_caching_info *ref_ci,
......
...@@ -1567,7 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) ...@@ -1567,7 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (osb->preferred_slot != OCFS2_INVALID_SLOT) if (osb->preferred_slot != OCFS2_INVALID_SLOT)
seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME))
seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
if (osb->osb_commit_interval) if (osb->osb_commit_interval)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment