Commit 78609a81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (32 commits)
  ocfs2: recover orphans in offline slots during recovery and mount
  ocfs2: Pagecache usage optimization on ocfs2
  ocfs2: fix rare stale inode errors when exporting via nfs
  ocfs2/dlm: Tweak mle_state output
  ocfs2/dlm: Do not purge lockres that is being migrated dlm_purge_lockres()
  ocfs2/dlm: Remove struct dlm_lock_name in struct dlm_master_list_entry
  ocfs2/dlm: Show the number of lockres/mles in dlm_state
  ocfs2/dlm: dlm_set_lockres_owner() and dlm_change_lockres_owner() inlined
  ocfs2/dlm: Improve lockres counts
  ocfs2/dlm: Track number of mles
  ocfs2/dlm: Indent dlm_cleanup_master_list()
  ocfs2/dlm: Activate dlm->master_hash for master list entries
  ocfs2/dlm: Create and destroy the dlm->master_hash
  ocfs2/dlm: Refactor dlm_clean_master_list()
  ocfs2/dlm: Clean up struct dlm_lock_name
  ocfs2/dlm: Encapsulate adding and removing of mle from dlm->master_list
  ocfs2: Optimize inode group allocation by recording last used group.
  ocfs2: Allocate inode groups from global_bitmap.
  ocfs2: Optimize inode allocation by remembering last group
  ocfs2: fix leaf start calculation in ocfs2_dx_dir_rebalance()
  ...
parents 133e2a31 9140db04
......@@ -294,6 +294,55 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
};
static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 blkno)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
}
static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
return le64_to_cpu(dx_root->dr_last_eb_blk);
}
static void ocfs2_dx_root_update_clusters(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
le32_add_cpu(&dx_root->dr_clusters, clusters);
}
static int ocfs2_dx_root_sanity_check(struct inode *inode,
struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
return 0;
}
static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
et->et_root_el = &dx_root->dr_list;
}
static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
.eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk,
.eo_update_clusters = ocfs2_dx_root_update_clusters,
.eo_sanity_check = ocfs2_dx_root_sanity_check,
.eo_fill_root_el = ocfs2_dx_root_fill_root_el,
};
static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
......@@ -339,6 +388,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
&ocfs2_xattr_value_et_ops);
}
void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh)
{
__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr,
NULL, &ocfs2_dx_root_et_ops);
}
static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 new_last_eb_blk)
{
......
......@@ -75,6 +75,9 @@ struct ocfs2_xattr_value_buf;
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct ocfs2_xattr_value_buf *vb);
void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
/*
* Read an extent block into *bh. If *bh is NULL, a bh will be
......
......@@ -1956,15 +1956,16 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
}
const struct address_space_operations ocfs2_aops = {
.readpage = ocfs2_readpage,
.readpages = ocfs2_readpages,
.writepage = ocfs2_writepage,
.write_begin = ocfs2_write_begin,
.write_end = ocfs2_write_end,
.bmap = ocfs2_bmap,
.sync_page = block_sync_page,
.direct_IO = ocfs2_direct_IO,
.invalidatepage = ocfs2_invalidatepage,
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.readpage = ocfs2_readpage,
.readpages = ocfs2_readpages,
.writepage = ocfs2_writepage,
.write_begin = ocfs2_write_begin,
.write_end = ocfs2_write_end,
.bmap = ocfs2_bmap,
.sync_page = block_sync_page,
.direct_IO = ocfs2_direct_IO,
.invalidatepage = ocfs2_invalidatepage,
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
};
......@@ -33,6 +33,7 @@
#include <linux/random.h>
#include <linux/crc32.h>
#include <linux/time.h>
#include <linux/debugfs.h>
#include "heartbeat.h"
#include "tcp.h"
......@@ -60,6 +61,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
static LIST_HEAD(o2hb_node_events);
static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
#define O2HB_DEBUG_DIR "o2hb"
#define O2HB_DEBUG_LIVENODES "livenodes"
static struct dentry *o2hb_debug_dir;
static struct dentry *o2hb_debug_livenodes;
static LIST_HEAD(o2hb_all_regions);
static struct o2hb_callback {
......@@ -905,7 +911,77 @@ static int o2hb_thread(void *data)
return 0;
}
void o2hb_init(void)
#ifdef CONFIG_DEBUG_FS
static int o2hb_debug_open(struct inode *inode, struct file *file)
{
unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
char *buf = NULL;
int i = -1;
int out = 0;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto bail;
o2hb_fill_node_map(map, sizeof(map));
while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
out += snprintf(buf + out, PAGE_SIZE - out, "\n");
i_size_write(inode, out);
file->private_data = buf;
return 0;
bail:
return -ENOMEM;
}
static int o2hb_debug_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
i_size_read(file->f_mapping->host));
}
#else
static int o2hb_debug_open(struct inode *inode, struct file *file)
{
return 0;
}
static int o2hb_debug_release(struct inode *inode, struct file *file)
{
return 0;
}
static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return 0;
}
#endif /* CONFIG_DEBUG_FS */
static struct file_operations o2hb_debug_fops = {
.open = o2hb_debug_open,
.release = o2hb_debug_release,
.read = o2hb_debug_read,
.llseek = generic_file_llseek,
};
void o2hb_exit(void)
{
if (o2hb_debug_livenodes)
debugfs_remove(o2hb_debug_livenodes);
if (o2hb_debug_dir)
debugfs_remove(o2hb_debug_dir);
}
int o2hb_init(void)
{
int i;
......@@ -918,6 +994,24 @@ void o2hb_init(void)
INIT_LIST_HEAD(&o2hb_node_events);
memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
if (!o2hb_debug_dir) {
mlog_errno(-ENOMEM);
return -ENOMEM;
}
o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES,
S_IFREG|S_IRUSR,
o2hb_debug_dir, NULL,
&o2hb_debug_fops);
if (!o2hb_debug_livenodes) {
mlog_errno(-ENOMEM);
debugfs_remove(o2hb_debug_dir);
return -ENOMEM;
}
return 0;
}
/* if we're already in a callback then we're already serialized by the sem */
......
......@@ -75,7 +75,8 @@ void o2hb_unregister_callback(const char *region_uuid,
struct o2hb_callback_func *hc);
void o2hb_fill_node_map(unsigned long *map,
unsigned bytes);
void o2hb_init(void);
void o2hb_exit(void);
int o2hb_init(void);
int o2hb_check_node_heartbeating(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
int o2hb_check_local_node_heartbeating(void);
......
......@@ -881,6 +881,7 @@ static void __exit exit_o2nm(void)
o2cb_sys_shutdown();
o2net_exit();
o2hb_exit();
}
static int __init init_o2nm(void)
......@@ -889,11 +890,13 @@ static int __init init_o2nm(void)
cluster_print_version();
o2hb_init();
ret = o2hb_init();
if (ret)
goto out;
ret = o2net_init();
if (ret)
goto out;
goto out_o2hb;
ret = o2net_register_hb_callbacks();
if (ret)
......@@ -916,6 +919,8 @@ static int __init init_o2nm(void)
o2net_unregister_hb_callbacks();
out_o2net:
o2net_exit();
out_o2hb:
o2hb_exit();
out:
return ret;
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -26,44 +26,70 @@
#ifndef OCFS2_DIR_H
#define OCFS2_DIR_H
struct buffer_head *ocfs2_find_entry(const char *name,
int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir);
struct ocfs2_dx_hinfo {
u32 major_hash;
u32 minor_hash;
};
struct ocfs2_dir_lookup_result {
struct buffer_head *dl_leaf_bh; /* Unindexed leaf
* block */
struct ocfs2_dir_entry *dl_entry; /* Target dirent in
* unindexed leaf */
struct buffer_head *dl_dx_root_bh; /* Root of indexed
* tree */
struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */
struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in
* indexed leaf */
struct ocfs2_dx_hinfo dl_hinfo; /* Name hash results */
struct buffer_head *dl_prev_leaf_bh;/* Previous entry in
* dir free space
* list. NULL if
* previous entry is
* dx root block. */
};
void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res);
int ocfs2_find_entry(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_lookup_result *lookup);
int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh);
struct ocfs2_dir_lookup_result *res);
int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh);
struct ocfs2_dir_lookup_result *lookup);
static inline int ocfs2_add_entry(handle_t *handle,
struct dentry *dentry,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
struct ocfs2_dir_lookup_result *lookup)
{
return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len,
inode, blkno, parent_fe_bh, insert_bh);
inode, blkno, parent_fe_bh, lookup);
}
int ocfs2_update_entry(struct inode *dir, handle_t *handle,
struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
struct ocfs2_dir_lookup_result *res,
struct inode *new_entry_inode);
int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
int namelen);
int ocfs2_empty_dir(struct inode *inode);
int ocfs2_find_files_on_disk(const char *name,
int namelen,
u64 *blkno,
struct inode *inode,
struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent);
struct ocfs2_dir_lookup_result *res);
int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
int namelen, u64 *blkno);
int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
......@@ -74,14 +100,17 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
struct buffer_head *parent_fe_bh,
const char *name,
int namelen,
struct buffer_head **ret_de_bh);
struct ocfs2_dir_lookup_result *lookup);
struct ocfs2_alloc_context;
int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle,
struct inode *parent,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac);
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac);
int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh);
struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
void *data);
......
......@@ -52,16 +52,12 @@
enum dlm_mle_type {
DLM_MLE_BLOCK,
DLM_MLE_MASTER,
DLM_MLE_MIGRATION
};
struct dlm_lock_name {
u8 len;
u8 name[DLM_LOCKID_NAME_MAX];
DLM_MLE_MIGRATION,
DLM_MLE_NUM_TYPES
};
struct dlm_master_list_entry {
struct list_head list;
struct hlist_node master_hash_node;
struct list_head hb_events;
struct dlm_ctxt *dlm;
spinlock_t spinlock;
......@@ -78,10 +74,10 @@ struct dlm_master_list_entry {
enum dlm_mle_type type;
struct o2hb_callback_func mle_hb_up;
struct o2hb_callback_func mle_hb_down;
union {
struct dlm_lock_resource *res;
struct dlm_lock_name name;
} u;
struct dlm_lock_resource *mleres;
unsigned char mname[DLM_LOCKID_NAME_MAX];
unsigned int mnamelen;
unsigned int mnamehash;
};
enum dlm_ast_type {
......@@ -151,13 +147,14 @@ struct dlm_ctxt
unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
struct dlm_recovery_ctxt reco;
spinlock_t master_lock;
struct list_head master_list;
struct hlist_head **master_hash;
struct list_head mle_hb_events;
/* these give a really vague idea of the system load */
atomic_t local_resources;
atomic_t remote_resources;
atomic_t unknown_resources;
atomic_t mle_tot_count[DLM_MLE_NUM_TYPES];
atomic_t mle_cur_count[DLM_MLE_NUM_TYPES];
atomic_t res_tot_count;
atomic_t res_cur_count;
struct dlm_debug_ctxt *dlm_debug_ctxt;
struct dentry *dlm_debugfs_subroot;
......@@ -195,6 +192,13 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned
return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE);
}
static inline struct hlist_head *dlm_master_hash(struct dlm_ctxt *dlm,
unsigned i)
{
return dlm->master_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] +
(i % DLM_BUCKETS_PER_PAGE);
}
/* these keventd work queue items are for less-frequently
* called functions that cannot be directly called from the
* net message handlers for some reason, usually because
......@@ -848,9 +852,7 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
unsigned int len);
int dlm_is_host_down(int errno);
void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner);
struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
const char *lockid,
int namelen,
......@@ -1008,6 +1010,9 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
DLM_LOCK_RES_MIGRATING));
}
void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle);
void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle);
/* create/destroy slab caches */
int dlm_init_master_caches(void);
void dlm_destroy_master_caches(void);
......@@ -1110,6 +1115,23 @@ static inline int dlm_node_iter_next(struct dlm_node_iter *iter)
return bit;
}
static inline void dlm_set_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner)
{
assert_spin_locked(&res->spinlock);
res->owner = owner;
}
static inline void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner)
{
assert_spin_locked(&res->spinlock);
if (owner != res->owner)
dlm_set_lockres_owner(dlm, res, owner);
}
#endif /* DLMCOMMON_H */
......@@ -287,18 +287,8 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes,
static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
{
int out = 0;
unsigned int namelen;
const char *name;
char *mle_type;
if (mle->type != DLM_MLE_MASTER) {
namelen = mle->u.name.len;
name = mle->u.name.name;
} else {
namelen = mle->u.res->lockname.len;
name = mle->u.res->lockname.name;
}
if (mle->type == DLM_MLE_BLOCK)
mle_type = "BLK";
else if (mle->type == DLM_MLE_MASTER)
......@@ -306,7 +296,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
else
mle_type = "MIG";
out += stringify_lockname(name, namelen, buf + out, len - out);
out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out);
out += snprintf(buf + out, len - out,
"\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n",
mle_type, mle->master, mle->new_master,
......@@ -501,23 +491,33 @@ static struct file_operations debug_purgelist_fops = {
static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
{
struct dlm_master_list_entry *mle;
int out = 0;
unsigned long total = 0;
struct hlist_head *bucket;
struct hlist_node *list;
int i, out = 0;
unsigned long total = 0, longest = 0, bktcnt;
out += snprintf(db->buf + out, db->len - out,
"Dumping MLEs for Domain: %s\n", dlm->name);
spin_lock(&dlm->master_lock);
list_for_each_entry(mle, &dlm->master_list, list) {
++total;
if (db->len - out < 200)
continue;
out += dump_mle(mle, db->buf + out, db->len - out);
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_master_hash(dlm, i);
hlist_for_each(list, bucket) {
mle = hlist_entry(list, struct dlm_master_list_entry,
master_hash_node);
++total;
++bktcnt;
if (db->len - out < 200)
continue;
out += dump_mle(mle, db->buf + out, db->len - out);
}
longest = max(longest, bktcnt);
bktcnt = 0;
}
spin_unlock(&dlm->master_lock);
out += snprintf(db->buf + out, db->len - out,
"Total on list: %ld\n", total);
"Total: %ld, Longest: %ld\n", total, longest);
return out;
}
......@@ -756,12 +756,8 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
int out = 0;
struct dlm_reco_node_data *node;
char *state;
int lres, rres, ures, tres;
lres = atomic_read(&dlm->local_resources);
rres = atomic_read(&dlm->remote_resources);
ures = atomic_read(&dlm->unknown_resources);
tres = lres + rres + ures;
int cur_mles = 0, tot_mles = 0;
int i;
spin_lock(&dlm->spinlock);
......@@ -804,21 +800,48 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
db->buf + out, db->len - out);
out += snprintf(db->buf + out, db->len - out, "\n");
/* Mastered Resources Total: xxx Locally: xxx Remotely: ... */
/* Lock Resources: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
"Lock Resources: %d (%d)\n",
atomic_read(&dlm->res_cur_count),
atomic_read(&dlm->res_tot_count));
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i)
tot_mles += atomic_read(&dlm->mle_tot_count[i]);
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i)
cur_mles += atomic_read(&dlm->mle_cur_count[i]);
/* MLEs: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
"MLEs: %d (%d)\n", cur_mles, tot_mles);
/* Blocking: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
" Blocking: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
/* Mastery: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
" Mastery: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
/* Migration: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
"Mastered Resources Total: %d Locally: %d "
"Remotely: %d Unknown: %d\n",
tres, lres, rres, ures);
" Migration: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
/* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
out += snprintf(db->buf + out, db->len - out,
"Lists: Dirty=%s Purge=%s PendingASTs=%s "
"PendingBASTs=%s Master=%s\n",
"PendingBASTs=%s\n",
(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
(list_empty(&dlm->purge_list) ? "Empty" : "InUse"),
(list_empty(&dlm->pending_asts) ? "Empty" : "InUse"),
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"),
(list_empty(&dlm->master_list) ? "Empty" : "InUse"));
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
/* Purge Count: xxx Refs: xxx */
out += snprintf(db->buf + out, db->len - out,
......
......@@ -304,6 +304,9 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
if (dlm->lockres_hash)
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
if (dlm->master_hash)
dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
if (dlm->name)
kfree(dlm->name);
......@@ -1534,12 +1537,27 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
for (i = 0; i < DLM_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
dlm->master_hash = (struct hlist_head **)
dlm_alloc_pagevec(DLM_HASH_PAGES);
if (!dlm->master_hash) {
mlog_errno(-ENOMEM);
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
kfree(dlm->name);
kfree(dlm);
dlm = NULL;
goto leave;
}
for (i = 0; i < DLM_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
strcpy(dlm->name, domain);
dlm->key = key;
dlm->node_num = o2nm_this_node();
ret = dlm_create_debugfs_subroot(dlm);
if (ret < 0) {
dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
kfree(dlm->name);
kfree(dlm);
......@@ -1579,7 +1597,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
init_waitqueue_head(&dlm->reco.event);
init_waitqueue_head(&dlm->ast_wq);
init_waitqueue_head(&dlm->migration_wq);
INIT_LIST_HEAD(&dlm->master_list);
INIT_LIST_HEAD(&dlm->mle_hb_events);
dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
......@@ -1587,9 +1604,13 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
atomic_set(&dlm->local_resources, 0);
atomic_set(&dlm->remote_resources, 0);
atomic_set(&dlm->unknown_resources, 0);
atomic_set(&dlm->res_tot_count, 0);
atomic_set(&dlm->res_cur_count, 0);
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) {
atomic_set(&dlm->mle_tot_count[i], 0);
atomic_set(&dlm->mle_cur_count[i], 0);
}
spin_lock_init(&dlm->work_lock);
INIT_LIST_HEAD(&dlm->work_list);
......
This diff is collapsed.
......@@ -162,12 +162,28 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
if (!__dlm_lockres_unused(res)) {
spin_unlock(&res->spinlock);
mlog(0, "%s:%.*s: tried to purge but not unused\n",
dlm->name, res->lockname.len, res->lockname.name);
return -ENOTEMPTY;
__dlm_print_one_lock_resource(res);
spin_unlock(&res->spinlock);
BUG();
}
if (res->state & DLM_LOCK_RES_MIGRATING) {
mlog(0, "%s:%.*s: Delay dropref as this lockres is "
"being remastered\n", dlm->name, res->lockname.len,
res->lockname.name);
/* Re-add the lockres to the end of the purge list */
if (!list_empty(&res->purge)) {
list_del_init(&res->purge);
list_add_tail(&res->purge, &dlm->purge_list);
}
spin_unlock(&res->spinlock);
return 0;
}
master = (res->owner == dlm->node_num);
if (!master)
res->state |= DLM_LOCK_RES_DROPPING_REF;
spin_unlock(&res->spinlock);
......
......@@ -244,6 +244,10 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock,
......@@ -622,6 +626,17 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
&ocfs2_rename_lops, osb);
}
static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
struct ocfs2_super *osb)
{
/* nfs_sync lockres doesn't come from a slab so we call init
* once on it manually. */
ocfs2_lock_res_init_once(res);
ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
&ocfs2_nfs_sync_lops, osb);
}
void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_file_private *fp)
{
......@@ -2417,6 +2432,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
}
int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
{
int status;
struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
if (ocfs2_mount_local(osb))
return 0;
status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
0, 0);
if (status < 0)
mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
return status;
}
void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
{
struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
if (!ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres,
ex ? LKM_EXMODE : LKM_PRMODE);
}
int ocfs2_dentry_lock(struct dentry *dentry, int ex)
{
int ret;
......@@ -2798,6 +2841,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
osb->cconn = conn;
......@@ -2833,6 +2877,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
ocfs2_lock_res_free(&osb->osb_super_lockres);
ocfs2_lock_res_free(&osb->osb_rename_lockres);
ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
osb->cconn = NULL;
......@@ -3015,6 +3060,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
{
ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
}
int ocfs2_drop_inode_locks(struct inode *inode)
......
......@@ -115,6 +115,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
int ex);
int ocfs2_rename_lock(struct ocfs2_super *osb);
void ocfs2_rename_unlock(struct ocfs2_super *osb);
int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex);
void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex);
int ocfs2_dentry_lock(struct dentry *dentry, int ex);
void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
int ocfs2_file_lock(struct file *file, int ex, int trylock);
......
......@@ -31,6 +31,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "dir.h"
#include "dlmglue.h"
#include "dcache.h"
......@@ -38,6 +39,7 @@
#include "inode.h"
#include "buffer_head_io.h"
#include "suballoc.h"
struct ocfs2_inode_handle
{
......@@ -49,29 +51,97 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
struct ocfs2_inode_handle *handle)
{
struct inode *inode;
struct ocfs2_super *osb = OCFS2_SB(sb);
u64 blkno = handle->ih_blkno;
int status, set;
struct dentry *result;
mlog_entry("(0x%p, 0x%p)\n", sb, handle);
if (handle->ih_blkno == 0) {
mlog_errno(-ESTALE);
return ERR_PTR(-ESTALE);
if (blkno == 0) {
mlog(0, "nfs wants inode with blkno: 0\n");
result = ERR_PTR(-ESTALE);
goto bail;
}
inode = ocfs2_ilookup(sb, blkno);
/*
* If the inode exists in memory, we only need to check it's
* generation number
*/
if (inode)
goto check_gen;
/*
* This will synchronize us against ocfs2_delete_inode() on
* all nodes
*/
status = ocfs2_nfs_sync_lock(osb, 1);
if (status < 0) {
mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
goto check_err;
}
status = ocfs2_test_inode_bit(osb, blkno, &set);
if (status < 0) {
if (status == -EINVAL) {
/*
* The blkno NFS gave us doesn't even show up
* as an inode, we return -ESTALE to be
* nice
*/
mlog(0, "test inode bit failed %d\n", status);
status = -ESTALE;
} else {
mlog(ML_ERROR, "test inode bit failed %d\n", status);
}
goto unlock_nfs_sync;
}
/* If the inode allocator bit is clear, this inode must be stale */
if (!set) {
mlog(0, "inode %llu suballoc bit is clear\n", blkno);
status = -ESTALE;
goto unlock_nfs_sync;
}
inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0);
inode = ocfs2_iget(osb, blkno, 0, 0);
if (IS_ERR(inode))
return (void *)inode;
unlock_nfs_sync:
ocfs2_nfs_sync_unlock(osb, 1);
check_err:
if (status < 0) {
if (status == -ESTALE) {
mlog(0, "stale inode ino: %llu generation: %u\n",
blkno, handle->ih_generation);
}
result = ERR_PTR(status);
goto bail;
}
if (IS_ERR(inode)) {
mlog_errno(PTR_ERR(inode));
result = (void *)inode;
goto bail;
}
check_gen:
if (handle->ih_generation != inode->i_generation) {
iput(inode);
return ERR_PTR(-ESTALE);
mlog(0, "stale inode ino: %llu generation: %u\n", blkno,
handle->ih_generation);
result = ERR_PTR(-ESTALE);
goto bail;
}
result = d_obtain_alias(inode);
if (!IS_ERR(result))
result->d_op = &ocfs2_dentry_ops;
else
mlog_errno(PTR_ERR(result));
bail:
mlog_exit_ptr(result);
return result;
}
......
......@@ -38,6 +38,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "dir.h"
#include "blockcheck.h"
#include "dlmglue.h"
#include "extent_map.h"
......@@ -112,6 +113,17 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
oi->ip_attr |= OCFS2_DIRSYNC_FL;
}
struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
{
struct ocfs2_find_inode_args args;
args.fi_blkno = blkno;
args.fi_flags = 0;
args.fi_ino = ino_from_blkno(sb, blkno);
args.fi_sysfile_type = 0;
return ilookup5(sb, blkno, ocfs2_find_actor, &args);
}
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
int sysfile_type)
{
......@@ -275,7 +287,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)le64_to_cpu(fe->i_blkno));
inode->i_nlink = le16_to_cpu(fe->i_links_count);
inode->i_nlink = ocfs2_read_links_count(fe);
if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
......@@ -351,6 +363,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_set_inode_flags(inode);
OCFS2_I(inode)->ip_last_used_slot = 0;
OCFS2_I(inode)->ip_last_used_group = 0;
mlog_exit_void();
}
......@@ -606,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode,
}
handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
ocfs2_quota_trans_credits(inode->i_sb));
ocfs2_quota_trans_credits(inode->i_sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
......@@ -740,6 +754,15 @@ static int ocfs2_wipe_inode(struct inode *inode,
goto bail_unlock_dir;
}
/* Remove any dir index tree */
if (S_ISDIR(inode->i_mode)) {
status = ocfs2_dx_dir_truncate(inode, di_bh);
if (status) {
mlog_errno(status);
goto bail_unlock_dir;
}
}
/*Free extended attribute resources associated with this inode.*/
status = ocfs2_xattr_remove(inode, di_bh);
if (status < 0) {
......@@ -949,6 +972,17 @@ void ocfs2_delete_inode(struct inode *inode)
goto bail;
}
/*
* Synchronize us against ocfs2_get_dentry. We take this in
* shared mode so that all nodes can still concurrently
* process deletes.
*/
status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0);
if (status < 0) {
mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status);
ocfs2_cleanup_delete_inode(inode, 0);
goto bail_unblock;
}
/* Lock down the inode. This gives us an up to date view of
* it's metadata (for verification), and allows us to
* serialize delete_inode on multiple nodes.
......@@ -962,7 +996,7 @@ void ocfs2_delete_inode(struct inode *inode)
if (status != -ENOENT)
mlog_errno(status);
ocfs2_cleanup_delete_inode(inode, 0);
goto bail_unblock;
goto bail_unlock_nfs_sync;
}
/* Query the cluster. This will be the final decision made
......@@ -1005,6 +1039,10 @@ void ocfs2_delete_inode(struct inode *inode)
bail_unlock_inode:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
bail_unlock_nfs_sync:
ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
bail_unblock:
status = sigprocmask(SIG_SETMASK, &oldset, NULL);
if (status < 0)
......@@ -1205,7 +1243,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
spin_unlock(&OCFS2_I(inode)->ip_lock);
fe->i_size = cpu_to_le64(i_size_read(inode));
fe->i_links_count = cpu_to_le16(inode->i_nlink);
ocfs2_set_links_count(fe, inode->i_nlink);
fe->i_uid = cpu_to_le32(inode->i_uid);
fe->i_gid = cpu_to_le32(inode->i_gid);
fe->i_mode = cpu_to_le16(inode->i_mode);
......@@ -1242,7 +1280,7 @@ void ocfs2_refresh_inode(struct inode *inode,
OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
ocfs2_set_inode_flags(inode);
i_size_write(inode, le64_to_cpu(fe->i_size));
inode->i_nlink = le16_to_cpu(fe->i_links_count);
inode->i_nlink = ocfs2_read_links_count(fe);
inode->i_uid = le32_to_cpu(fe->i_uid);
inode->i_gid = le32_to_cpu(fe->i_gid);
inode->i_mode = le16_to_cpu(fe->i_mode);
......
......@@ -72,6 +72,10 @@ struct ocfs2_inode_info
struct inode vfs_inode;
struct jbd2_inode ip_jinode;
/* Only valid if the inode is the dir. */
u32 ip_last_used_slot;
u64 ip_last_used_group;
};
/*
......@@ -124,6 +128,7 @@ void ocfs2_drop_inode(struct inode *inode);
/* Flags for ocfs2_iget() */
#define OCFS2_FI_FLAG_SYSFILE 0x1
#define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2
struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff);
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type);
int ocfs2_inode_init_private(struct inode *inode);
......
......@@ -65,6 +65,11 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
int slot);
static int ocfs2_commit_thread(void *arg);
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
struct ocfs2_dinode *tl_dinode,
struct ocfs2_quota_recovery *qrec);
static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
{
......@@ -76,18 +81,97 @@ static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
return __ocfs2_wait_on_mount(osb, 1);
}
/*
* The recovery_list is a simple linked list of node numbers to recover.
* It is protected by the recovery_lock.
* This replay_map is to track online/offline slots, so we could recover
* offline slots during recovery and mount
*/
struct ocfs2_recovery_map {
unsigned int rm_used;
unsigned int *rm_entries;
enum ocfs2_replay_state {
REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */
REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */
REPLAY_DONE /* Replay was already queued */
};
struct ocfs2_replay_map {
unsigned int rm_slots;
enum ocfs2_replay_state rm_state;
unsigned char rm_replay_slots[0];
};
void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
{
if (!osb->replay_map)
return;
/* If we've already queued the replay, we don't have any more to do */
if (osb->replay_map->rm_state == REPLAY_DONE)
return;
osb->replay_map->rm_state = state;
}
int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map;
int i, node_num;
/* If replay map is already set, we don't do it again */
if (osb->replay_map)
return 0;
replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
(osb->max_slots * sizeof(char)), GFP_KERNEL);
if (!replay_map) {
mlog_errno(-ENOMEM);
return -ENOMEM;
}
spin_lock(&osb->osb_lock);
replay_map->rm_slots = osb->max_slots;
replay_map->rm_state = REPLAY_UNNEEDED;
/* set rm_replay_slots for offline slot(s) */
for (i = 0; i < replay_map->rm_slots; i++) {
if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
replay_map->rm_replay_slots[i] = 1;
}
osb->replay_map = replay_map;
spin_unlock(&osb->osb_lock);
return 0;
}
void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
int i;
if (!replay_map)
return;
if (replay_map->rm_state != REPLAY_NEEDED)
return;
for (i = 0; i < replay_map->rm_slots; i++)
if (replay_map->rm_replay_slots[i])
ocfs2_queue_recovery_completion(osb->journal, i, NULL,
NULL, NULL);
replay_map->rm_state = REPLAY_DONE;
}
void ocfs2_free_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
if (!osb->replay_map)
return;
kfree(replay_map);
osb->replay_map = NULL;
}
int ocfs2_recovery_init(struct ocfs2_super *osb)
{
struct ocfs2_recovery_map *rm;
......@@ -496,6 +580,22 @@ static struct ocfs2_triggers dq_triggers = {
},
};
static struct ocfs2_triggers dr_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
};
static struct ocfs2_triggers dl_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
};
static int __ocfs2_journal_access(handle_t *handle,
struct inode *inode,
struct buffer_head *bh,
......@@ -600,6 +700,20 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
type);
}
int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &dr_triggers,
type);
}
int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &dl_triggers,
type);
}
int ocfs2_journal_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
......@@ -1176,24 +1290,24 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
}
/* Called by the mount code to queue recovery the last part of
* recovery for it's own slot. */
* recovery for it's own and offline slot(s). */
void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
{
struct ocfs2_journal *journal = osb->journal;
if (osb->dirty) {
/* No need to queue up our truncate_log as regular
* cleanup will catch that. */
ocfs2_queue_recovery_completion(journal,
osb->slot_num,
osb->local_alloc_copy,
NULL,
NULL);
ocfs2_schedule_truncate_log_flush(osb, 0);
/* No need to queue up our truncate_log as regular cleanup will catch
* that */
ocfs2_queue_recovery_completion(journal, osb->slot_num,
osb->local_alloc_copy, NULL, NULL);
ocfs2_schedule_truncate_log_flush(osb, 0);
osb->local_alloc_copy = NULL;
osb->dirty = 0;
}
osb->local_alloc_copy = NULL;
osb->dirty = 0;
/* queue to recover orphan slots for all offline slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
ocfs2_queue_replay_slots(osb);
ocfs2_free_replay_slots(osb);
}
void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
......@@ -1236,6 +1350,14 @@ static int __ocfs2_recovery_thread(void *arg)
goto bail;
}
status = ocfs2_compute_replay_slots(osb);
if (status < 0)
mlog_errno(status);
/* queue recovery for our own slot */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
NULL, NULL);
spin_lock(&osb->osb_lock);
while (rm->rm_used) {
/* It's always safe to remove entry zero, as we won't
......@@ -1301,11 +1423,8 @@ static int __ocfs2_recovery_thread(void *arg)
ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead
* node(s) may have disallowd a previos inode delete. Re-processing
* is therefore required. */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
NULL, NULL);
/* queue recovery for offline slots */
ocfs2_queue_replay_slots(osb);
bail:
mutex_lock(&osb->recovery_lock);
......@@ -1314,6 +1433,7 @@ static int __ocfs2_recovery_thread(void *arg)
goto restart;
}
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL;
mb(); /* sync with ocfs2_recovery_thread_running */
wake_up(&osb->recovery_event);
......@@ -1465,6 +1585,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
goto done;
}
/* we need to run complete recovery for offline orphan slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",
node_num, slot_num,
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
......
......@@ -38,6 +38,17 @@ enum ocfs2_journal_state {
struct ocfs2_super;
struct ocfs2_dinode;
/*
* The recovery_list is a simple linked list of node numbers to recover.
* It is protected by the recovery_lock.
*/
struct ocfs2_recovery_map {
unsigned int rm_used;
unsigned int *rm_entries;
};
struct ocfs2_journal {
enum ocfs2_journal_state j_state; /* Journals current state */
......@@ -139,6 +150,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
/*
* Journal Control:
* Initialize, Load, Shutdown, Wipe a journal.
......@@ -266,6 +278,12 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
/* dirblock */
int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* ocfs2_dx_root_block */
int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* ocfs2_dx_leaf */
int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* Anything that has no ecc */
int ocfs2_journal_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
......@@ -368,14 +386,29 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
}
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
* bitmap block for the new bit) */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
* bitmap block for the new bit) dx_root update for free list */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
{
/* 1 block for index, 2 allocs (data, metadata), 1 clusters
* worth of blocks for initial extent. */
return 1 + 2 * OCFS2_SUBALLOC_ALLOC +
ocfs2_clusters_to_blocks(sb, 1);
}
/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
* group descriptor + mkdir/symlink blocks + quota update */
static inline int ocfs2_mknod_credits(struct super_block *sb)
/* parent fe, parent block, new file entry, index leaf, inode alloc fe, inode
* alloc group descriptor + mkdir/symlink blocks + dir blocks + xattr
* blocks + quota update */
static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
int xattr_credits)
{
return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
int dir_credits = OCFS2_DIR_LINK_ADDITIONAL_CREDITS;
if (is_dir)
dir_credits += ocfs2_add_dir_index_credits(sb);
return 4 + OCFS2_SUBALLOC_ALLOC + dir_credits + xattr_credits +
ocfs2_quota_trans_credits(sb);
}
......@@ -388,31 +421,31 @@ static inline int ocfs2_mknod_credits(struct super_block *sb)
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
* update on dir */
* update on dir + index leaf + dx root update for free list */
static inline int ocfs2_link_credits(struct super_block *sb)
{
return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
return 2*OCFS2_INODE_UPDATE_CREDITS + 3 +
ocfs2_quota_trans_credits(sb);
}
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
* dir inode link */
* dir inode link + dir inode index leaf + dir index root */
static inline int ocfs2_unlink_credits(struct super_block *sb)
{
/* The quota update from ocfs2_link_credits is unused here... */
return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
return 2 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_link_credits(sb);
}
/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
* inode alloc group descriptor */
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 1 + 1)
* inode alloc group descriptor + orphan dir index leaf */
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3)
/* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming
* directory + target unlink */
* directory + target unlink + 3 x dir index leaves */
static inline int ocfs2_rename_credits(struct super_block *sb)
{
return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
return 3 * OCFS2_INODE_UPDATE_CREDITS + 6 + ocfs2_unlink_credits(sb);
}
/* global bitmap dinode, group desc., relinked group,
......@@ -422,6 +455,20 @@ static inline int ocfs2_rename_credits(struct super_block *sb)
+ OCFS2_INODE_UPDATE_CREDITS \
+ OCFS2_XATTR_BLOCK_UPDATE_CREDITS)
/* inode update, removal of dx root block from allocator */
#define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
OCFS2_SUBALLOC_FREE)
static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
{
int credits = 1 + OCFS2_SUBALLOC_ALLOC;
credits += ocfs2_clusters_to_blocks(sb, 1);
credits += ocfs2_quota_trans_credits(sb);
return credits;
}
/*
* Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
......@@ -457,7 +504,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
{
int blocks = ocfs2_mknod_credits(sb);
int blocks = ocfs2_mknod_credits(sb, 0, 0);
/* links can be longer than one block so we may update many
* within our single allocated extent. */
......
......@@ -28,7 +28,6 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/bitops.h>
#include <linux/debugfs.h>
#define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h>
......@@ -75,84 +74,6 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
struct inode *local_alloc_inode);
#ifdef CONFIG_OCFS2_FS_STATS
static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
}
#define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE
#define LA_DEBUG_VER 1
static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
static DEFINE_MUTEX(la_debug_mutex);
struct ocfs2_super *osb = file->private_data;
int written, ret;
char *buf = osb->local_alloc_debug_buf;
mutex_lock(&la_debug_mutex);
memset(buf, 0, LA_DEBUG_BUF_SZ);
written = snprintf(buf, LA_DEBUG_BUF_SZ,
"0x%x\t0x%llx\t%u\t%u\t0x%x\n",
LA_DEBUG_VER,
(unsigned long long)osb->la_last_gd,
osb->local_alloc_default_bits,
osb->local_alloc_bits, osb->local_alloc_state);
ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
mutex_unlock(&la_debug_mutex);
return ret;
}
static const struct file_operations ocfs2_la_debug_fops = {
.open = ocfs2_la_debug_open,
.read = ocfs2_la_debug_read,
};
static void ocfs2_init_la_debug(struct ocfs2_super *osb)
{
osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
if (!osb->local_alloc_debug_buf)
return;
osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
S_IFREG|S_IRUSR,
osb->osb_debug_root,
osb,
&ocfs2_la_debug_fops);
if (!osb->local_alloc_debug) {
kfree(osb->local_alloc_debug_buf);
osb->local_alloc_debug_buf = NULL;
}
}
static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
{
if (osb->local_alloc_debug)
debugfs_remove(osb->local_alloc_debug);
if (osb->local_alloc_debug_buf)
kfree(osb->local_alloc_debug_buf);
osb->local_alloc_debug_buf = NULL;
osb->local_alloc_debug = NULL;
}
#else /* CONFIG_OCFS2_FS_STATS */
static void ocfs2_init_la_debug(struct ocfs2_super *osb)
{
return;
}
static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
{
return;
}
#endif
static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
{
return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
......@@ -226,8 +147,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
mlog_entry_void();
ocfs2_init_la_debug(osb);
if (osb->local_alloc_bits == 0)
goto bail;
......@@ -299,9 +218,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
if (inode)
iput(inode);
if (status < 0)
ocfs2_shutdown_la_debug(osb);
mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
mlog_exit(status);
......@@ -331,8 +247,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
cancel_delayed_work(&osb->la_enable_wq);
flush_workqueue(ocfs2_wq);
ocfs2_shutdown_la_debug(osb);
if (osb->local_alloc_state == OCFS2_LA_UNUSED)
goto out;
......
This diff is collapsed.
......@@ -209,6 +209,7 @@ enum ocfs2_mount_options
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
struct ocfs2_replay_map;
struct ocfs2_quota_recovery;
struct ocfs2_dentry_lock;
struct ocfs2_super
......@@ -264,6 +265,7 @@ struct ocfs2_super
atomic_t vol_state;
struct mutex recovery_lock;
struct ocfs2_recovery_map *recovery_map;
struct ocfs2_replay_map *replay_map;
struct task_struct *recovery_thread_task;
int disable_recovery;
wait_queue_head_t checkpoint_event;
......@@ -287,11 +289,6 @@ struct ocfs2_super
u64 la_last_gd;
#ifdef CONFIG_OCFS2_FS_STATS
struct dentry *local_alloc_debug;
char *local_alloc_debug_buf;
#endif
/* Next three fields are for local node slot recovery during
* mount. */
int dirty;
......@@ -305,9 +302,11 @@ struct ocfs2_super
struct ocfs2_cluster_connection *cconn;
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
struct ocfs2_lock_res osb_nfs_sync_lockres;
struct ocfs2_dlm_debug *osb_dlm_debug;
struct dentry *osb_debug_root;
struct dentry *osb_ctxt;
wait_queue_head_t recovery_event;
......@@ -344,6 +343,12 @@ struct ocfs2_super
/* used to protect metaecc calculation check of xattr. */
spinlock_t osb_xattr_lock;
unsigned int osb_dx_mask;
u32 osb_dx_seed[4];
/* the group we used to allocate inodes. */
u64 osb_inode_alloc_group;
};
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
......@@ -402,6 +407,51 @@ static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
return 0;
}
static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
return 1;
return 0;
}
static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
{
if (ocfs2_supports_indexed_dirs(osb))
return OCFS2_DX_LINK_MAX;
return OCFS2_LINK_MAX;
}
static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di)
{
u32 nlink = le16_to_cpu(di->i_links_count);
u32 hi = le16_to_cpu(di->i_links_count_hi);
if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL))
nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
return nlink;
}
static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink)
{
u16 lo, hi;
lo = nlink;
hi = nlink >> OCFS2_LINKS_HI_SHIFT;
di->i_links_count = cpu_to_le16(lo);
di->i_links_count_hi = cpu_to_le16(hi);
}
static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
{
u32 links = ocfs2_read_links_count(di);
links += n;
ocfs2_set_links_count(di, links);
}
/* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock
......@@ -482,6 +532,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
#define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
(!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
#define OCFS2_IS_VALID_DX_ROOT(ptr) \
(!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE))
#define OCFS2_IS_VALID_DX_LEAF(ptr) \
(!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
static inline unsigned long ino_from_blkno(struct super_block *sb,
u64 blkno)
{
......@@ -532,6 +588,16 @@ static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb,
return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits;
}
static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb,
u64 blocks)
{
int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits;
unsigned int clusters;
clusters = ocfs2_blocks_to_clusters(sb, blocks);
return (u64)clusters << bits;
}
static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb,
u64 bytes)
{
......
......@@ -66,6 +66,8 @@
#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
#define OCFS2_DX_ROOT_SIGNATURE "DXDIR01"
#define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1"
/* Compatibility flags */
#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
......@@ -95,7 +97,8 @@
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
| OCFS2_FEATURE_INCOMPAT_XATTR \
| OCFS2_FEATURE_INCOMPAT_META_ECC)
| OCFS2_FEATURE_INCOMPAT_META_ECC \
| OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
......@@ -151,6 +154,9 @@
/* Support for extended attributes */
#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
/* Support for indexed directores */
#define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS 0x0400
/* Metadata checksum and error correction */
#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800
......@@ -411,8 +417,12 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
#define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \
OCFS2_DIR_ROUND) & \
~OCFS2_DIR_ROUND)
#define OCFS2_DIR_MIN_REC_LEN OCFS2_DIR_REC_LEN(1)
#define OCFS2_LINK_MAX 32000
#define OCFS2_DX_LINK_MAX ((1U << 31) - 1U)
#define OCFS2_LINKS_HI_SHIFT 16
#define OCFS2_DX_ENTRIES_MAX (0xffffffffU)
#define S_SHIFT 12
static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
......@@ -628,8 +638,9 @@ struct ocfs2_super_block {
/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
for this fs*/
__le16 s_reserved0;
__le32 s_reserved1;
/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */
__le32 s_dx_seed[3]; /* seed[0-2] for dx dir hash.
* s_uuid_hash serves as seed[3]. */
/*C0*/ __le64 s_reserved2[15]; /* Fill out superblock */
/*140*/
/*
......@@ -679,7 +690,7 @@ struct ocfs2_dinode {
belongs to */
__le16 i_suballoc_bit; /* Bit offset in suballocator
block group */
/*10*/ __le16 i_reserved0;
/*10*/ __le16 i_links_count_hi; /* High 16 bits of links count */
__le16 i_xattr_inline_size;
__le32 i_clusters; /* Cluster count */
__le32 i_uid; /* Owner UID */
......@@ -705,7 +716,8 @@ struct ocfs2_dinode {
__le16 i_dyn_features;
__le64 i_xattr_loc;
/*80*/ struct ocfs2_block_check i_check; /* Error checking */
/*88*/ __le64 i_reserved2[6];
/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
__le64 i_reserved2[5];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
......@@ -781,6 +793,90 @@ struct ocfs2_dir_block_trailer {
/*40*/
};
/*
* A directory entry in the indexed tree. We don't store the full name here,
* but instead provide a pointer to the full dirent in the unindexed tree.
*
* We also store name_len here so as to reduce the number of leaf blocks we
* need to search in case of collisions.
*/
struct ocfs2_dx_entry {
__le32 dx_major_hash; /* Used to find logical
* cluster in index */
__le32 dx_minor_hash; /* Lower bits used to find
* block in cluster */
__le64 dx_dirent_blk; /* Physical block in unindexed
* tree holding this dirent. */
};
struct ocfs2_dx_entry_list {
__le32 de_reserved;
__le16 de_count; /* Maximum number of entries
* possible in de_entries */
__le16 de_num_used; /* Current number of
* de_entries entries */
struct ocfs2_dx_entry de_entries[0]; /* Indexed dir entries
* in a packed array of
* length de_num_used */
};
#define OCFS2_DX_FLAG_INLINE 0x01
/*
* A directory indexing block. Each indexed directory has one of these,
* pointed to by ocfs2_dinode.
*
* This block stores an indexed btree root, and a set of free space
* start-of-list pointers.
*/
struct ocfs2_dx_root_block {
__u8 dr_signature[8]; /* Signature for verification */
struct ocfs2_block_check dr_check; /* Error checking */
__le16 dr_suballoc_slot; /* Slot suballocator this
* block belongs to. */
__le16 dr_suballoc_bit; /* Bit offset in suballocator
* block group */
__le32 dr_fs_generation; /* Must match super block */
__le64 dr_blkno; /* Offset on disk, in blocks */
__le64 dr_last_eb_blk; /* Pointer to last
* extent block */
__le32 dr_clusters; /* Clusters allocated
* to the indexed tree. */
__u8 dr_flags; /* OCFS2_DX_FLAG_* flags */
__u8 dr_reserved0;
__le16 dr_reserved1;
__le64 dr_dir_blkno; /* Pointer to parent inode */
__le32 dr_num_entries; /* Total number of
* names stored in
* this directory.*/
__le32 dr_reserved2;
__le64 dr_free_blk; /* Pointer to head of free
* unindexed block list. */
__le64 dr_reserved3[15];
union {
struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
* bits for maximum space
* efficiency. */
struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of
* entries. We grow out
* to extents if this
* gets too big. */
};
};
/*
* The header of a leaf block in the indexed tree.
*/
struct ocfs2_dx_leaf {
__u8 dl_signature[8];/* Signature for verification */
struct ocfs2_block_check dl_check; /* Error checking */
__le64 dl_blkno; /* Offset on disk, in blocks */
__le32 dl_fs_generation;/* Must match super block */
__le32 dl_reserved0;
__le64 dl_reserved1;
struct ocfs2_dx_entry_list dl_list;
};
/*
* On disk allocator group structure for OCFS2
*/
......@@ -1112,6 +1208,16 @@ static inline int ocfs2_extent_recs_per_inode_with_xattr(
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_extent_recs_per_dx_root(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_root_block, dr_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
{
int size;
......@@ -1132,6 +1238,26 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_leaf, dl_list.de_entries);
return size / sizeof(struct ocfs2_dx_entry);
}
static inline int ocfs2_dx_entries_per_root(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);
return size / sizeof(struct ocfs2_dx_entry);
}
static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
{
u16 size;
......
......@@ -47,6 +47,7 @@ enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_OPEN,
OCFS2_LOCK_TYPE_FLOCK,
OCFS2_LOCK_TYPE_QINFO,
OCFS2_LOCK_TYPE_NFS_SYNC,
OCFS2_NUM_LOCK_TYPES
};
......@@ -81,6 +82,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_QINFO:
c = 'Q';
break;
case OCFS2_LOCK_TYPE_NFS_SYNC:
c = 'Y';
break;
default:
c = '\0';
}
......
This diff is collapsed.
......@@ -88,6 +88,8 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb,
u64 *blkno_start);
int ocfs2_claim_new_inode(struct ocfs2_super *osb,
handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
struct ocfs2_alloc_context *ac,
u16 *suballoc_bit,
u64 *fe_blkno);
......@@ -186,4 +188,6 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac);
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
#endif /* _CHAINALLOC_H_ */
......@@ -201,6 +201,170 @@ static const match_table_t tokens = {
{Opt_err, NULL}
};
#ifdef CONFIG_DEBUG_FS
static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
{
int out = 0;
int i;
struct ocfs2_cluster_connection *cconn = osb->cconn;
struct ocfs2_recovery_map *rm = osb->recovery_map;
out += snprintf(buf + out, len - out,
"%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
"Device", osb->dev_str, osb->uuid_str,
osb->fs_generation, osb->vol_label);
out += snprintf(buf + out, len - out,
"%10s => State: %d Flags: 0x%lX\n", "Volume",
atomic_read(&osb->vol_state), osb->osb_flags);
out += snprintf(buf + out, len - out,
"%10s => Block: %lu Cluster: %d\n", "Sizes",
osb->sb->s_blocksize, osb->s_clustersize);
out += snprintf(buf + out, len - out,
"%10s => Compat: 0x%X Incompat: 0x%X "
"ROcompat: 0x%X\n",
"Features", osb->s_feature_compat,
osb->s_feature_incompat, osb->s_feature_ro_compat);
out += snprintf(buf + out, len - out,
"%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount",
osb->s_mount_opt, osb->s_atime_quantum);
out += snprintf(buf + out, len - out,
"%10s => Stack: %s Name: %*s Version: %d.%d\n",
"Cluster",
(*osb->osb_cluster_stack == '\0' ?
"o2cb" : osb->osb_cluster_stack),
cconn->cc_namelen, cconn->cc_name,
cconn->cc_version.pv_major, cconn->cc_version.pv_minor);
spin_lock(&osb->dc_task_lock);
out += snprintf(buf + out, len - out,
"%10s => Pid: %d Count: %lu WakeSeq: %lu "
"WorkSeq: %lu\n", "DownCnvt",
task_pid_nr(osb->dc_task), osb->blocked_lock_count,
osb->dc_wake_sequence, osb->dc_work_sequence);
spin_unlock(&osb->dc_task_lock);
spin_lock(&osb->osb_lock);
out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:",
"Recovery",
(osb->recovery_thread_task ?
task_pid_nr(osb->recovery_thread_task) : -1));
if (rm->rm_used == 0)
out += snprintf(buf + out, len - out, " None\n");
else {
for (i = 0; i < rm->rm_used; i++)
out += snprintf(buf + out, len - out, " %d",
rm->rm_entries[i]);
out += snprintf(buf + out, len - out, "\n");
}
spin_unlock(&osb->osb_lock);
out += snprintf(buf + out, len - out,
"%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit",
task_pid_nr(osb->commit_task), osb->osb_commit_interval,
atomic_read(&osb->needs_checkpoint));
out += snprintf(buf + out, len - out,
"%10s => State: %d NumTxns: %d TxnId: %lu\n",
"Journal", osb->journal->j_state,
atomic_read(&osb->journal->j_num_trans),
osb->journal->j_trans_id);
out += snprintf(buf + out, len - out,
"%10s => GlobalAllocs: %d LocalAllocs: %d "
"SubAllocs: %d LAWinMoves: %d SAExtends: %d\n",
"Stats",
atomic_read(&osb->alloc_stats.bitmap_data),
atomic_read(&osb->alloc_stats.local_data),
atomic_read(&osb->alloc_stats.bg_allocs),
atomic_read(&osb->alloc_stats.moves),
atomic_read(&osb->alloc_stats.bg_extends));
out += snprintf(buf + out, len - out,
"%10s => State: %u Descriptor: %llu Size: %u bits "
"Default: %u bits\n",
"LocalAlloc", osb->local_alloc_state,
(unsigned long long)osb->la_last_gd,
osb->local_alloc_bits, osb->local_alloc_default_bits);
spin_lock(&osb->osb_lock);
out += snprintf(buf + out, len - out,
"%10s => Slot: %d NumStolen: %d\n", "Steal",
osb->s_inode_steal_slot,
atomic_read(&osb->s_num_inodes_stolen));
spin_unlock(&osb->osb_lock);
out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
"Slots", "Num", "RecoGen");
for (i = 0; i < osb->max_slots; ++i) {
out += snprintf(buf + out, len - out,
"%10s %c %3d %10d\n",
" ",
(i == osb->slot_num ? '*' : ' '),
i, osb->slot_recovery_generations[i]);
}
return out;
}
static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
{
struct ocfs2_super *osb = inode->i_private;
char *buf = NULL;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto bail;
i_size_write(inode, ocfs2_osb_dump(osb, buf, PAGE_SIZE));
file->private_data = buf;
return 0;
bail:
return -ENOMEM;
}
static int ocfs2_debug_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
i_size_read(file->f_mapping->host));
}
#else
static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
{
return 0;
}
static int ocfs2_debug_release(struct inode *inode, struct file *file)
{
return 0;
}
static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return 0;
}
#endif /* CONFIG_DEBUG_FS */
static struct file_operations ocfs2_osb_debug_fops = {
.open = ocfs2_osb_debug_open,
.release = ocfs2_debug_release,
.read = ocfs2_debug_read,
.llseek = generic_file_llseek,
};
/*
* write_super and sync_fs ripped right out of ext3.
*/
......@@ -926,6 +1090,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
goto read_super_error;
}
osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
osb->osb_debug_root,
osb,
&ocfs2_osb_debug_fops);
if (!osb->osb_ctxt) {
status = -EINVAL;
mlog_errno(status);
goto read_super_error;
}
status = ocfs2_mount_volume(sb);
if (osb->root_inode)
inode = igrab(osb->root_inode);
......@@ -1620,6 +1794,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
debugfs_remove(osb->osb_ctxt);
ocfs2_disable_quotas(osb);
ocfs2_shutdown_local_alloc(osb);
......@@ -1742,6 +1918,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
for (i = 0; i < 3; i++)
osb->osb_dx_seed[i] = le32_to_cpu(di->id2.i_super.s_dx_seed[i]);
osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash);
osb->sb = sb;
/* Save off for ocfs2_rw_direct */
osb->s_sectsize_bits = blksize_bits(sector_size);
......@@ -2130,6 +2312,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
* lock, and it's marked as dirty, set the bit in the recover
* map and launch a recovery thread for it. */
status = ocfs2_mark_dead_nodes(osb);
if (status < 0) {
mlog_errno(status);
goto finally;
}
status = ocfs2_compute_replay_slots(osb);
if (status < 0)
mlog_errno(status);
......
......@@ -512,7 +512,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
struct ocfs2_security_xattr_info *si,
int *want_clusters,
int *xattr_credits,
struct ocfs2_alloc_context **xattr_ac)
int *want_meta)
{
int ret = 0;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
......@@ -554,11 +554,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
(S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
(s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
if (ret) {
mlog_errno(ret);
return ret;
}
*want_meta = *want_meta + 1;
*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
}
......
......@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *,
int *, int *, struct ocfs2_alloc_context **);
int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
int, struct ocfs2_security_xattr_info *,
int *, int *, struct ocfs2_alloc_context **);
int *, int *, int *);
/*
* xattrs can live inside an inode, as part of an external xattr block,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment