Commit 78609a81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (32 commits)
  ocfs2: recover orphans in offline slots during recovery and mount
  ocfs2: Pagecache usage optimization on ocfs2
  ocfs2: fix rare stale inode errors when exporting via nfs
  ocfs2/dlm: Tweak mle_state output
  ocfs2/dlm: Do not purge lockres that is being migrated dlm_purge_lockres()
  ocfs2/dlm: Remove struct dlm_lock_name in struct dlm_master_list_entry
  ocfs2/dlm: Show the number of lockres/mles in dlm_state
  ocfs2/dlm: dlm_set_lockres_owner() and dlm_change_lockres_owner() inlined
  ocfs2/dlm: Improve lockres counts
  ocfs2/dlm: Track number of mles
  ocfs2/dlm: Indent dlm_cleanup_master_list()
  ocfs2/dlm: Activate dlm->master_hash for master list entries
  ocfs2/dlm: Create and destroy the dlm->master_hash
  ocfs2/dlm: Refactor dlm_clean_master_list()
  ocfs2/dlm: Clean up struct dlm_lock_name
  ocfs2/dlm: Encapsulate adding and removing of mle from dlm->master_list
  ocfs2: Optimize inode group allocation by recording last used group.
  ocfs2: Allocate inode groups from global_bitmap.
  ocfs2: Optimize inode allocation by remembering last group
  ocfs2: fix leaf start calculation in ocfs2_dx_dir_rebalance()
  ...
parents 133e2a31 9140db04
...@@ -294,6 +294,55 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { ...@@ -294,6 +294,55 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters, .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
}; };
static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 blkno)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
}
static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
return le64_to_cpu(dx_root->dr_last_eb_blk);
}
static void ocfs2_dx_root_update_clusters(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
le32_add_cpu(&dx_root->dr_clusters, clusters);
}
static int ocfs2_dx_root_sanity_check(struct inode *inode,
struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
return 0;
}
static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
et->et_root_el = &dx_root->dr_list;
}
static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
.eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk,
.eo_update_clusters = ocfs2_dx_root_update_clusters,
.eo_sanity_check = ocfs2_dx_root_sanity_check,
.eo_fill_root_el = ocfs2_dx_root_fill_root_el,
};
static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode, struct inode *inode,
struct buffer_head *bh, struct buffer_head *bh,
...@@ -339,6 +388,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, ...@@ -339,6 +388,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
&ocfs2_xattr_value_et_ops); &ocfs2_xattr_value_et_ops);
} }
void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh)
{
__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr,
NULL, &ocfs2_dx_root_et_ops);
}
static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 new_last_eb_blk) u64 new_last_eb_blk)
{ {
......
...@@ -75,6 +75,9 @@ struct ocfs2_xattr_value_buf; ...@@ -75,6 +75,9 @@ struct ocfs2_xattr_value_buf;
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode, struct inode *inode,
struct ocfs2_xattr_value_buf *vb); struct ocfs2_xattr_value_buf *vb);
void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
/* /*
* Read an extent block into *bh. If *bh is NULL, a bh will be * Read an extent block into *bh. If *bh is NULL, a bh will be
......
...@@ -1956,15 +1956,16 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, ...@@ -1956,15 +1956,16 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
} }
const struct address_space_operations ocfs2_aops = { const struct address_space_operations ocfs2_aops = {
.readpage = ocfs2_readpage, .readpage = ocfs2_readpage,
.readpages = ocfs2_readpages, .readpages = ocfs2_readpages,
.writepage = ocfs2_writepage, .writepage = ocfs2_writepage,
.write_begin = ocfs2_write_begin, .write_begin = ocfs2_write_begin,
.write_end = ocfs2_write_end, .write_end = ocfs2_write_end,
.bmap = ocfs2_bmap, .bmap = ocfs2_bmap,
.sync_page = block_sync_page, .sync_page = block_sync_page,
.direct_IO = ocfs2_direct_IO, .direct_IO = ocfs2_direct_IO,
.invalidatepage = ocfs2_invalidatepage, .invalidatepage = ocfs2_invalidatepage,
.releasepage = ocfs2_releasepage, .releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
}; };
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/random.h> #include <linux/random.h>
#include <linux/crc32.h> #include <linux/crc32.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/debugfs.h>
#include "heartbeat.h" #include "heartbeat.h"
#include "tcp.h" #include "tcp.h"
...@@ -60,6 +61,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; ...@@ -60,6 +61,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
static LIST_HEAD(o2hb_node_events); static LIST_HEAD(o2hb_node_events);
static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
#define O2HB_DEBUG_DIR "o2hb"
#define O2HB_DEBUG_LIVENODES "livenodes"
static struct dentry *o2hb_debug_dir;
static struct dentry *o2hb_debug_livenodes;
static LIST_HEAD(o2hb_all_regions); static LIST_HEAD(o2hb_all_regions);
static struct o2hb_callback { static struct o2hb_callback {
...@@ -905,7 +911,77 @@ static int o2hb_thread(void *data) ...@@ -905,7 +911,77 @@ static int o2hb_thread(void *data)
return 0; return 0;
} }
void o2hb_init(void) #ifdef CONFIG_DEBUG_FS
static int o2hb_debug_open(struct inode *inode, struct file *file)
{
unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
char *buf = NULL;
int i = -1;
int out = 0;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto bail;
o2hb_fill_node_map(map, sizeof(map));
while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
out += snprintf(buf + out, PAGE_SIZE - out, "\n");
i_size_write(inode, out);
file->private_data = buf;
return 0;
bail:
return -ENOMEM;
}
static int o2hb_debug_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
i_size_read(file->f_mapping->host));
}
#else
static int o2hb_debug_open(struct inode *inode, struct file *file)
{
return 0;
}
static int o2hb_debug_release(struct inode *inode, struct file *file)
{
return 0;
}
static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return 0;
}
#endif /* CONFIG_DEBUG_FS */
static struct file_operations o2hb_debug_fops = {
.open = o2hb_debug_open,
.release = o2hb_debug_release,
.read = o2hb_debug_read,
.llseek = generic_file_llseek,
};
void o2hb_exit(void)
{
if (o2hb_debug_livenodes)
debugfs_remove(o2hb_debug_livenodes);
if (o2hb_debug_dir)
debugfs_remove(o2hb_debug_dir);
}
int o2hb_init(void)
{ {
int i; int i;
...@@ -918,6 +994,24 @@ void o2hb_init(void) ...@@ -918,6 +994,24 @@ void o2hb_init(void)
INIT_LIST_HEAD(&o2hb_node_events); INIT_LIST_HEAD(&o2hb_node_events);
memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
if (!o2hb_debug_dir) {
mlog_errno(-ENOMEM);
return -ENOMEM;
}
o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES,
S_IFREG|S_IRUSR,
o2hb_debug_dir, NULL,
&o2hb_debug_fops);
if (!o2hb_debug_livenodes) {
mlog_errno(-ENOMEM);
debugfs_remove(o2hb_debug_dir);
return -ENOMEM;
}
return 0;
} }
/* if we're already in a callback then we're already serialized by the sem */ /* if we're already in a callback then we're already serialized by the sem */
......
...@@ -75,7 +75,8 @@ void o2hb_unregister_callback(const char *region_uuid, ...@@ -75,7 +75,8 @@ void o2hb_unregister_callback(const char *region_uuid,
struct o2hb_callback_func *hc); struct o2hb_callback_func *hc);
void o2hb_fill_node_map(unsigned long *map, void o2hb_fill_node_map(unsigned long *map,
unsigned bytes); unsigned bytes);
void o2hb_init(void); void o2hb_exit(void);
int o2hb_init(void);
int o2hb_check_node_heartbeating(u8 node_num); int o2hb_check_node_heartbeating(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num); int o2hb_check_node_heartbeating_from_callback(u8 node_num);
int o2hb_check_local_node_heartbeating(void); int o2hb_check_local_node_heartbeating(void);
......
...@@ -881,6 +881,7 @@ static void __exit exit_o2nm(void) ...@@ -881,6 +881,7 @@ static void __exit exit_o2nm(void)
o2cb_sys_shutdown(); o2cb_sys_shutdown();
o2net_exit(); o2net_exit();
o2hb_exit();
} }
static int __init init_o2nm(void) static int __init init_o2nm(void)
...@@ -889,11 +890,13 @@ static int __init init_o2nm(void) ...@@ -889,11 +890,13 @@ static int __init init_o2nm(void)
cluster_print_version(); cluster_print_version();
o2hb_init(); ret = o2hb_init();
if (ret)
goto out;
ret = o2net_init(); ret = o2net_init();
if (ret) if (ret)
goto out; goto out_o2hb;
ret = o2net_register_hb_callbacks(); ret = o2net_register_hb_callbacks();
if (ret) if (ret)
...@@ -916,6 +919,8 @@ static int __init init_o2nm(void) ...@@ -916,6 +919,8 @@ static int __init init_o2nm(void)
o2net_unregister_hb_callbacks(); o2net_unregister_hb_callbacks();
out_o2net: out_o2net:
o2net_exit(); o2net_exit();
out_o2hb:
o2hb_exit();
out: out:
return ret; return ret;
} }
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -26,44 +26,70 @@ ...@@ -26,44 +26,70 @@
#ifndef OCFS2_DIR_H #ifndef OCFS2_DIR_H
#define OCFS2_DIR_H #define OCFS2_DIR_H
struct buffer_head *ocfs2_find_entry(const char *name, struct ocfs2_dx_hinfo {
int namelen, u32 major_hash;
struct inode *dir, u32 minor_hash;
struct ocfs2_dir_entry **res_dir); };
struct ocfs2_dir_lookup_result {
struct buffer_head *dl_leaf_bh; /* Unindexed leaf
* block */
struct ocfs2_dir_entry *dl_entry; /* Target dirent in
* unindexed leaf */
struct buffer_head *dl_dx_root_bh; /* Root of indexed
* tree */
struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */
struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in
* indexed leaf */
struct ocfs2_dx_hinfo dl_hinfo; /* Name hash results */
struct buffer_head *dl_prev_leaf_bh;/* Previous entry in
* dir free space
* list. NULL if
* previous entry is
* dx root block. */
};
void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res);
int ocfs2_find_entry(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_lookup_result *lookup);
int ocfs2_delete_entry(handle_t *handle, int ocfs2_delete_entry(handle_t *handle,
struct inode *dir, struct inode *dir,
struct ocfs2_dir_entry *de_del, struct ocfs2_dir_lookup_result *res);
struct buffer_head *bh);
int __ocfs2_add_entry(handle_t *handle, int __ocfs2_add_entry(handle_t *handle,
struct inode *dir, struct inode *dir,
const char *name, int namelen, const char *name, int namelen,
struct inode *inode, u64 blkno, struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh, struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh); struct ocfs2_dir_lookup_result *lookup);
static inline int ocfs2_add_entry(handle_t *handle, static inline int ocfs2_add_entry(handle_t *handle,
struct dentry *dentry, struct dentry *dentry,
struct inode *inode, u64 blkno, struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh, struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh) struct ocfs2_dir_lookup_result *lookup)
{ {
return __ocfs2_add_entry(handle, dentry->d_parent->d_inode, return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len, dentry->d_name.name, dentry->d_name.len,
inode, blkno, parent_fe_bh, insert_bh); inode, blkno, parent_fe_bh, lookup);
} }
int ocfs2_update_entry(struct inode *dir, handle_t *handle, int ocfs2_update_entry(struct inode *dir, handle_t *handle,
struct buffer_head *de_bh, struct ocfs2_dir_entry *de, struct ocfs2_dir_lookup_result *res,
struct inode *new_entry_inode); struct inode *new_entry_inode);
int ocfs2_check_dir_for_entry(struct inode *dir, int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name, const char *name,
int namelen); int namelen);
int ocfs2_empty_dir(struct inode *inode); int ocfs2_empty_dir(struct inode *inode);
int ocfs2_find_files_on_disk(const char *name, int ocfs2_find_files_on_disk(const char *name,
int namelen, int namelen,
u64 *blkno, u64 *blkno,
struct inode *inode, struct inode *inode,
struct buffer_head **dirent_bh, struct ocfs2_dir_lookup_result *res);
struct ocfs2_dir_entry **dirent);
int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
int namelen, u64 *blkno); int namelen, u64 *blkno);
int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir); int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
...@@ -74,14 +100,17 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, ...@@ -74,14 +100,17 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
struct buffer_head *parent_fe_bh, struct buffer_head *parent_fe_bh,
const char *name, const char *name,
int namelen, int namelen,
struct buffer_head **ret_de_bh); struct ocfs2_dir_lookup_result *lookup);
struct ocfs2_alloc_context; struct ocfs2_alloc_context;
int ocfs2_fill_new_dir(struct ocfs2_super *osb, int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle, handle_t *handle,
struct inode *parent, struct inode *parent,
struct inode *inode, struct inode *inode,
struct buffer_head *fe_bh, struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac); struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac);
int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh);
struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize, struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
void *data); void *data);
......
...@@ -52,16 +52,12 @@ ...@@ -52,16 +52,12 @@
enum dlm_mle_type { enum dlm_mle_type {
DLM_MLE_BLOCK, DLM_MLE_BLOCK,
DLM_MLE_MASTER, DLM_MLE_MASTER,
DLM_MLE_MIGRATION DLM_MLE_MIGRATION,
}; DLM_MLE_NUM_TYPES
struct dlm_lock_name {
u8 len;
u8 name[DLM_LOCKID_NAME_MAX];
}; };
struct dlm_master_list_entry { struct dlm_master_list_entry {
struct list_head list; struct hlist_node master_hash_node;
struct list_head hb_events; struct list_head hb_events;
struct dlm_ctxt *dlm; struct dlm_ctxt *dlm;
spinlock_t spinlock; spinlock_t spinlock;
...@@ -78,10 +74,10 @@ struct dlm_master_list_entry { ...@@ -78,10 +74,10 @@ struct dlm_master_list_entry {
enum dlm_mle_type type; enum dlm_mle_type type;
struct o2hb_callback_func mle_hb_up; struct o2hb_callback_func mle_hb_up;
struct o2hb_callback_func mle_hb_down; struct o2hb_callback_func mle_hb_down;
union { struct dlm_lock_resource *mleres;
struct dlm_lock_resource *res; unsigned char mname[DLM_LOCKID_NAME_MAX];
struct dlm_lock_name name; unsigned int mnamelen;
} u; unsigned int mnamehash;
}; };
enum dlm_ast_type { enum dlm_ast_type {
...@@ -151,13 +147,14 @@ struct dlm_ctxt ...@@ -151,13 +147,14 @@ struct dlm_ctxt
unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
struct dlm_recovery_ctxt reco; struct dlm_recovery_ctxt reco;
spinlock_t master_lock; spinlock_t master_lock;
struct list_head master_list; struct hlist_head **master_hash;
struct list_head mle_hb_events; struct list_head mle_hb_events;
/* these give a really vague idea of the system load */ /* these give a really vague idea of the system load */
atomic_t local_resources; atomic_t mle_tot_count[DLM_MLE_NUM_TYPES];
atomic_t remote_resources; atomic_t mle_cur_count[DLM_MLE_NUM_TYPES];
atomic_t unknown_resources; atomic_t res_tot_count;
atomic_t res_cur_count;
struct dlm_debug_ctxt *dlm_debug_ctxt; struct dlm_debug_ctxt *dlm_debug_ctxt;
struct dentry *dlm_debugfs_subroot; struct dentry *dlm_debugfs_subroot;
...@@ -195,6 +192,13 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned ...@@ -195,6 +192,13 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned
return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE); return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE);
} }
static inline struct hlist_head *dlm_master_hash(struct dlm_ctxt *dlm,
unsigned i)
{
return dlm->master_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] +
(i % DLM_BUCKETS_PER_PAGE);
}
/* these keventd work queue items are for less-frequently /* these keventd work queue items are for less-frequently
* called functions that cannot be directly called from the * called functions that cannot be directly called from the
* net message handlers for some reason, usually because * net message handlers for some reason, usually because
...@@ -848,9 +852,7 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, ...@@ -848,9 +852,7 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
unsigned int len); unsigned int len);
int dlm_is_host_down(int errno); int dlm_is_host_down(int errno);
void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner);
struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
const char *lockid, const char *lockid,
int namelen, int namelen,
...@@ -1008,6 +1010,9 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) ...@@ -1008,6 +1010,9 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
DLM_LOCK_RES_MIGRATING)); DLM_LOCK_RES_MIGRATING));
} }
void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle);
void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle);
/* create/destroy slab caches */ /* create/destroy slab caches */
int dlm_init_master_caches(void); int dlm_init_master_caches(void);
void dlm_destroy_master_caches(void); void dlm_destroy_master_caches(void);
...@@ -1110,6 +1115,23 @@ static inline int dlm_node_iter_next(struct dlm_node_iter *iter) ...@@ -1110,6 +1115,23 @@ static inline int dlm_node_iter_next(struct dlm_node_iter *iter)
return bit; return bit;
} }
static inline void dlm_set_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner)
{
assert_spin_locked(&res->spinlock);
res->owner = owner;
}
static inline void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner)
{
assert_spin_locked(&res->spinlock);
if (owner != res->owner)
dlm_set_lockres_owner(dlm, res, owner);
}
#endif /* DLMCOMMON_H */ #endif /* DLMCOMMON_H */
...@@ -287,18 +287,8 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes, ...@@ -287,18 +287,8 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes,
static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
{ {
int out = 0; int out = 0;
unsigned int namelen;
const char *name;
char *mle_type; char *mle_type;
if (mle->type != DLM_MLE_MASTER) {
namelen = mle->u.name.len;
name = mle->u.name.name;
} else {
namelen = mle->u.res->lockname.len;
name = mle->u.res->lockname.name;
}
if (mle->type == DLM_MLE_BLOCK) if (mle->type == DLM_MLE_BLOCK)
mle_type = "BLK"; mle_type = "BLK";
else if (mle->type == DLM_MLE_MASTER) else if (mle->type == DLM_MLE_MASTER)
...@@ -306,7 +296,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) ...@@ -306,7 +296,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
else else
mle_type = "MIG"; mle_type = "MIG";
out += stringify_lockname(name, namelen, buf + out, len - out); out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out);
out += snprintf(buf + out, len - out, out += snprintf(buf + out, len - out,
"\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n",
mle_type, mle->master, mle->new_master, mle_type, mle->master, mle->new_master,
...@@ -501,23 +491,33 @@ static struct file_operations debug_purgelist_fops = { ...@@ -501,23 +491,33 @@ static struct file_operations debug_purgelist_fops = {
static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
{ {
struct dlm_master_list_entry *mle; struct dlm_master_list_entry *mle;
int out = 0; struct hlist_head *bucket;
unsigned long total = 0; struct hlist_node *list;
int i, out = 0;
unsigned long total = 0, longest = 0, bktcnt;
out += snprintf(db->buf + out, db->len - out, out += snprintf(db->buf + out, db->len - out,
"Dumping MLEs for Domain: %s\n", dlm->name); "Dumping MLEs for Domain: %s\n", dlm->name);
spin_lock(&dlm->master_lock); spin_lock(&dlm->master_lock);
list_for_each_entry(mle, &dlm->master_list, list) { for (i = 0; i < DLM_HASH_BUCKETS; i++) {
++total; bucket = dlm_master_hash(dlm, i);
if (db->len - out < 200) hlist_for_each(list, bucket) {
continue; mle = hlist_entry(list, struct dlm_master_list_entry,
out += dump_mle(mle, db->buf + out, db->len - out); master_hash_node);
++total;
++bktcnt;
if (db->len - out < 200)
continue;
out += dump_mle(mle, db->buf + out, db->len - out);
}
longest = max(longest, bktcnt);
bktcnt = 0;
} }
spin_unlock(&dlm->master_lock); spin_unlock(&dlm->master_lock);
out += snprintf(db->buf + out, db->len - out, out += snprintf(db->buf + out, db->len - out,
"Total on list: %ld\n", total); "Total: %ld, Longest: %ld\n", total, longest);
return out; return out;
} }
...@@ -756,12 +756,8 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) ...@@ -756,12 +756,8 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
int out = 0; int out = 0;
struct dlm_reco_node_data *node; struct dlm_reco_node_data *node;
char *state; char *state;
int lres, rres, ures, tres; int cur_mles = 0, tot_mles = 0;
int i;
lres = atomic_read(&dlm->local_resources);
rres = atomic_read(&dlm->remote_resources);
ures = atomic_read(&dlm->unknown_resources);
tres = lres + rres + ures;
spin_lock(&dlm->spinlock); spin_lock(&dlm->spinlock);
...@@ -804,21 +800,48 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) ...@@ -804,21 +800,48 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
db->buf + out, db->len - out); db->buf + out, db->len - out);
out += snprintf(db->buf + out, db->len - out, "\n"); out += snprintf(db->buf + out, db->len - out, "\n");
/* Mastered Resources Total: xxx Locally: xxx Remotely: ... */ /* Lock Resources: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
"Lock Resources: %d (%d)\n",
atomic_read(&dlm->res_cur_count),
atomic_read(&dlm->res_tot_count));
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i)
tot_mles += atomic_read(&dlm->mle_tot_count[i]);
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i)
cur_mles += atomic_read(&dlm->mle_cur_count[i]);
/* MLEs: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
"MLEs: %d (%d)\n", cur_mles, tot_mles);
/* Blocking: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
" Blocking: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
/* Mastery: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
" Mastery: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
/* Migration: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out, out += snprintf(db->buf + out, db->len - out,
"Mastered Resources Total: %d Locally: %d " " Migration: %d (%d)\n",
"Remotely: %d Unknown: %d\n", atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
tres, lres, rres, ures); atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
/* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
out += snprintf(db->buf + out, db->len - out, out += snprintf(db->buf + out, db->len - out,
"Lists: Dirty=%s Purge=%s PendingASTs=%s " "Lists: Dirty=%s Purge=%s PendingASTs=%s "
"PendingBASTs=%s Master=%s\n", "PendingBASTs=%s\n",
(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
(list_empty(&dlm->purge_list) ? "Empty" : "InUse"), (list_empty(&dlm->purge_list) ? "Empty" : "InUse"),
(list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"),
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"), (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
(list_empty(&dlm->master_list) ? "Empty" : "InUse"));
/* Purge Count: xxx Refs: xxx */ /* Purge Count: xxx Refs: xxx */
out += snprintf(db->buf + out, db->len - out, out += snprintf(db->buf + out, db->len - out,
......
...@@ -304,6 +304,9 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) ...@@ -304,6 +304,9 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
if (dlm->lockres_hash) if (dlm->lockres_hash)
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
if (dlm->master_hash)
dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
if (dlm->name) if (dlm->name)
kfree(dlm->name); kfree(dlm->name);
...@@ -1534,12 +1537,27 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, ...@@ -1534,12 +1537,27 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
for (i = 0; i < DLM_HASH_BUCKETS; i++) for (i = 0; i < DLM_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
dlm->master_hash = (struct hlist_head **)
dlm_alloc_pagevec(DLM_HASH_PAGES);
if (!dlm->master_hash) {
mlog_errno(-ENOMEM);
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
kfree(dlm->name);
kfree(dlm);
dlm = NULL;
goto leave;
}
for (i = 0; i < DLM_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
strcpy(dlm->name, domain); strcpy(dlm->name, domain);
dlm->key = key; dlm->key = key;
dlm->node_num = o2nm_this_node(); dlm->node_num = o2nm_this_node();
ret = dlm_create_debugfs_subroot(dlm); ret = dlm_create_debugfs_subroot(dlm);
if (ret < 0) { if (ret < 0) {
dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
kfree(dlm->name); kfree(dlm->name);
kfree(dlm); kfree(dlm);
...@@ -1579,7 +1597,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, ...@@ -1579,7 +1597,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
init_waitqueue_head(&dlm->reco.event); init_waitqueue_head(&dlm->reco.event);
init_waitqueue_head(&dlm->ast_wq); init_waitqueue_head(&dlm->ast_wq);
init_waitqueue_head(&dlm->migration_wq); init_waitqueue_head(&dlm->migration_wq);
INIT_LIST_HEAD(&dlm->master_list);
INIT_LIST_HEAD(&dlm->mle_hb_events); INIT_LIST_HEAD(&dlm->mle_hb_events);
dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
...@@ -1587,9 +1604,13 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, ...@@ -1587,9 +1604,13 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
dlm->reco.new_master = O2NM_INVALID_NODE_NUM; dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
atomic_set(&dlm->local_resources, 0);
atomic_set(&dlm->remote_resources, 0); atomic_set(&dlm->res_tot_count, 0);
atomic_set(&dlm->unknown_resources, 0); atomic_set(&dlm->res_cur_count, 0);
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) {
atomic_set(&dlm->mle_tot_count[i], 0);
atomic_set(&dlm->mle_cur_count[i], 0);
}
spin_lock_init(&dlm->work_lock); spin_lock_init(&dlm->work_lock);
INIT_LIST_HEAD(&dlm->work_list); INIT_LIST_HEAD(&dlm->work_list);
......
This diff is collapsed.
...@@ -162,12 +162,28 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, ...@@ -162,12 +162,28 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock); spin_lock(&res->spinlock);
if (!__dlm_lockres_unused(res)) { if (!__dlm_lockres_unused(res)) {
spin_unlock(&res->spinlock);
mlog(0, "%s:%.*s: tried to purge but not unused\n", mlog(0, "%s:%.*s: tried to purge but not unused\n",
dlm->name, res->lockname.len, res->lockname.name); dlm->name, res->lockname.len, res->lockname.name);
return -ENOTEMPTY; __dlm_print_one_lock_resource(res);
spin_unlock(&res->spinlock);
BUG();
} }
if (res->state & DLM_LOCK_RES_MIGRATING) {
mlog(0, "%s:%.*s: Delay dropref as this lockres is "
"being remastered\n", dlm->name, res->lockname.len,
res->lockname.name);
/* Re-add the lockres to the end of the purge list */
if (!list_empty(&res->purge)) {
list_del_init(&res->purge);
list_add_tail(&res->purge, &dlm->purge_list);
}
spin_unlock(&res->spinlock);
return 0;
}
master = (res->owner == dlm->node_num); master = (res->owner == dlm->node_num);
if (!master) if (!master)
res->state |= DLM_LOCK_RES_DROPPING_REF; res->state |= DLM_LOCK_RES_DROPPING_REF;
spin_unlock(&res->spinlock); spin_unlock(&res->spinlock);
......
...@@ -244,6 +244,10 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = { ...@@ -244,6 +244,10 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
.flags = 0, .flags = 0,
}; };
static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb, .get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock, .post_unlock = ocfs2_dentry_post_unlock,
...@@ -622,6 +626,17 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, ...@@ -622,6 +626,17 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
&ocfs2_rename_lops, osb); &ocfs2_rename_lops, osb);
} }
static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
struct ocfs2_super *osb)
{
/* nfs_sync lockres doesn't come from a slab so we call init
* once on it manually. */
ocfs2_lock_res_init_once(res);
ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
&ocfs2_nfs_sync_lops, osb);
}
void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_file_private *fp) struct ocfs2_file_private *fp)
{ {
...@@ -2417,6 +2432,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) ...@@ -2417,6 +2432,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
} }
int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
{
int status;
struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
if (ocfs2_mount_local(osb))
return 0;
status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
0, 0);
if (status < 0)
mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
return status;
}
void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
{
struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
if (!ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres,
ex ? LKM_EXMODE : LKM_PRMODE);
}
int ocfs2_dentry_lock(struct dentry *dentry, int ex) int ocfs2_dentry_lock(struct dentry *dentry, int ex)
{ {
int ret; int ret;
...@@ -2798,6 +2841,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) ...@@ -2798,6 +2841,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
local: local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
osb->cconn = conn; osb->cconn = conn;
...@@ -2833,6 +2877,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, ...@@ -2833,6 +2877,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
ocfs2_lock_res_free(&osb->osb_super_lockres); ocfs2_lock_res_free(&osb->osb_super_lockres);
ocfs2_lock_res_free(&osb->osb_rename_lockres); ocfs2_lock_res_free(&osb->osb_rename_lockres);
ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
ocfs2_cluster_disconnect(osb->cconn, hangup_pending); ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
osb->cconn = NULL; osb->cconn = NULL;
...@@ -3015,6 +3060,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) ...@@ -3015,6 +3060,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
{ {
ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
} }
int ocfs2_drop_inode_locks(struct inode *inode) int ocfs2_drop_inode_locks(struct inode *inode)
......
...@@ -115,6 +115,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb, ...@@ -115,6 +115,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
int ex); int ex);
int ocfs2_rename_lock(struct ocfs2_super *osb); int ocfs2_rename_lock(struct ocfs2_super *osb);
void ocfs2_rename_unlock(struct ocfs2_super *osb); void ocfs2_rename_unlock(struct ocfs2_super *osb);
int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex);
void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex);
int ocfs2_dentry_lock(struct dentry *dentry, int ex); int ocfs2_dentry_lock(struct dentry *dentry, int ex);
void ocfs2_dentry_unlock(struct dentry *dentry, int ex); void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
int ocfs2_file_lock(struct file *file, int ex, int trylock); int ocfs2_file_lock(struct file *file, int ex, int trylock);
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "ocfs2.h" #include "ocfs2.h"
#include "alloc.h"
#include "dir.h" #include "dir.h"
#include "dlmglue.h" #include "dlmglue.h"
#include "dcache.h" #include "dcache.h"
...@@ -38,6 +39,7 @@ ...@@ -38,6 +39,7 @@
#include "inode.h" #include "inode.h"
#include "buffer_head_io.h" #include "buffer_head_io.h"
#include "suballoc.h"
struct ocfs2_inode_handle struct ocfs2_inode_handle
{ {
...@@ -49,29 +51,97 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, ...@@ -49,29 +51,97 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
struct ocfs2_inode_handle *handle) struct ocfs2_inode_handle *handle)
{ {
struct inode *inode; struct inode *inode;
struct ocfs2_super *osb = OCFS2_SB(sb);
u64 blkno = handle->ih_blkno;
int status, set;
struct dentry *result; struct dentry *result;
mlog_entry("(0x%p, 0x%p)\n", sb, handle); mlog_entry("(0x%p, 0x%p)\n", sb, handle);
if (handle->ih_blkno == 0) { if (blkno == 0) {
mlog_errno(-ESTALE); mlog(0, "nfs wants inode with blkno: 0\n");
return ERR_PTR(-ESTALE); result = ERR_PTR(-ESTALE);
goto bail;
}
inode = ocfs2_ilookup(sb, blkno);
/*
* If the inode exists in memory, we only need to check it's
* generation number
*/
if (inode)
goto check_gen;
/*
* This will synchronize us against ocfs2_delete_inode() on
* all nodes
*/
status = ocfs2_nfs_sync_lock(osb, 1);
if (status < 0) {
mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
goto check_err;
}
status = ocfs2_test_inode_bit(osb, blkno, &set);
if (status < 0) {
if (status == -EINVAL) {
/*
* The blkno NFS gave us doesn't even show up
* as an inode, we return -ESTALE to be
* nice
*/
mlog(0, "test inode bit failed %d\n", status);
status = -ESTALE;
} else {
mlog(ML_ERROR, "test inode bit failed %d\n", status);
}
goto unlock_nfs_sync;
}
/* If the inode allocator bit is clear, this inode must be stale */
if (!set) {
mlog(0, "inode %llu suballoc bit is clear\n", blkno);
status = -ESTALE;
goto unlock_nfs_sync;
} }
inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0); inode = ocfs2_iget(osb, blkno, 0, 0);
if (IS_ERR(inode)) unlock_nfs_sync:
return (void *)inode; ocfs2_nfs_sync_unlock(osb, 1);
check_err:
if (status < 0) {
if (status == -ESTALE) {
mlog(0, "stale inode ino: %llu generation: %u\n",
blkno, handle->ih_generation);
}
result = ERR_PTR(status);
goto bail;
}
if (IS_ERR(inode)) {
mlog_errno(PTR_ERR(inode));
result = (void *)inode;
goto bail;
}
check_gen:
if (handle->ih_generation != inode->i_generation) { if (handle->ih_generation != inode->i_generation) {
iput(inode); iput(inode);
return ERR_PTR(-ESTALE); mlog(0, "stale inode ino: %llu generation: %u\n", blkno,
handle->ih_generation);
result = ERR_PTR(-ESTALE);
goto bail;
} }
result = d_obtain_alias(inode); result = d_obtain_alias(inode);
if (!IS_ERR(result)) if (!IS_ERR(result))
result->d_op = &ocfs2_dentry_ops; result->d_op = &ocfs2_dentry_ops;
else
mlog_errno(PTR_ERR(result));
bail:
mlog_exit_ptr(result); mlog_exit_ptr(result);
return result; return result;
} }
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "ocfs2.h" #include "ocfs2.h"
#include "alloc.h" #include "alloc.h"
#include "dir.h"
#include "blockcheck.h" #include "blockcheck.h"
#include "dlmglue.h" #include "dlmglue.h"
#include "extent_map.h" #include "extent_map.h"
...@@ -112,6 +113,17 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi) ...@@ -112,6 +113,17 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
oi->ip_attr |= OCFS2_DIRSYNC_FL; oi->ip_attr |= OCFS2_DIRSYNC_FL;
} }
struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
{
struct ocfs2_find_inode_args args;
args.fi_blkno = blkno;
args.fi_flags = 0;
args.fi_ino = ino_from_blkno(sb, blkno);
args.fi_sysfile_type = 0;
return ilookup5(sb, blkno, ocfs2_find_actor, &args);
}
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
int sysfile_type) int sysfile_type)
{ {
...@@ -275,7 +287,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ...@@ -275,7 +287,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
(unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)le64_to_cpu(fe->i_blkno)); (unsigned long long)le64_to_cpu(fe->i_blkno));
inode->i_nlink = le16_to_cpu(fe->i_links_count); inode->i_nlink = ocfs2_read_links_count(fe);
if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
...@@ -351,6 +363,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ...@@ -351,6 +363,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_set_inode_flags(inode); ocfs2_set_inode_flags(inode);
OCFS2_I(inode)->ip_last_used_slot = 0;
OCFS2_I(inode)->ip_last_used_group = 0;
mlog_exit_void(); mlog_exit_void();
} }
...@@ -606,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode, ...@@ -606,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode,
} }
handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS + handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
ocfs2_quota_trans_credits(inode->i_sb)); ocfs2_quota_trans_credits(inode->i_sb));
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
status = PTR_ERR(handle); status = PTR_ERR(handle);
mlog_errno(status); mlog_errno(status);
...@@ -740,6 +754,15 @@ static int ocfs2_wipe_inode(struct inode *inode, ...@@ -740,6 +754,15 @@ static int ocfs2_wipe_inode(struct inode *inode,
goto bail_unlock_dir; goto bail_unlock_dir;
} }
/* Remove any dir index tree */
if (S_ISDIR(inode->i_mode)) {
status = ocfs2_dx_dir_truncate(inode, di_bh);
if (status) {
mlog_errno(status);
goto bail_unlock_dir;
}
}
/*Free extended attribute resources associated with this inode.*/ /*Free extended attribute resources associated with this inode.*/
status = ocfs2_xattr_remove(inode, di_bh); status = ocfs2_xattr_remove(inode, di_bh);
if (status < 0) { if (status < 0) {
...@@ -949,6 +972,17 @@ void ocfs2_delete_inode(struct inode *inode) ...@@ -949,6 +972,17 @@ void ocfs2_delete_inode(struct inode *inode)
goto bail; goto bail;
} }
/*
* Synchronize us against ocfs2_get_dentry. We take this in
* shared mode so that all nodes can still concurrently
* process deletes.
*/
status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0);
if (status < 0) {
mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status);
ocfs2_cleanup_delete_inode(inode, 0);
goto bail_unblock;
}
/* Lock down the inode. This gives us an up to date view of /* Lock down the inode. This gives us an up to date view of
* it's metadata (for verification), and allows us to * it's metadata (for verification), and allows us to
* serialize delete_inode on multiple nodes. * serialize delete_inode on multiple nodes.
...@@ -962,7 +996,7 @@ void ocfs2_delete_inode(struct inode *inode) ...@@ -962,7 +996,7 @@ void ocfs2_delete_inode(struct inode *inode)
if (status != -ENOENT) if (status != -ENOENT)
mlog_errno(status); mlog_errno(status);
ocfs2_cleanup_delete_inode(inode, 0); ocfs2_cleanup_delete_inode(inode, 0);
goto bail_unblock; goto bail_unlock_nfs_sync;
} }
/* Query the cluster. This will be the final decision made /* Query the cluster. This will be the final decision made
...@@ -1005,6 +1039,10 @@ void ocfs2_delete_inode(struct inode *inode) ...@@ -1005,6 +1039,10 @@ void ocfs2_delete_inode(struct inode *inode)
bail_unlock_inode: bail_unlock_inode:
ocfs2_inode_unlock(inode, 1); ocfs2_inode_unlock(inode, 1);
brelse(di_bh); brelse(di_bh);
bail_unlock_nfs_sync:
ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
bail_unblock: bail_unblock:
status = sigprocmask(SIG_SETMASK, &oldset, NULL); status = sigprocmask(SIG_SETMASK, &oldset, NULL);
if (status < 0) if (status < 0)
...@@ -1205,7 +1243,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle, ...@@ -1205,7 +1243,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
spin_unlock(&OCFS2_I(inode)->ip_lock); spin_unlock(&OCFS2_I(inode)->ip_lock);
fe->i_size = cpu_to_le64(i_size_read(inode)); fe->i_size = cpu_to_le64(i_size_read(inode));
fe->i_links_count = cpu_to_le16(inode->i_nlink); ocfs2_set_links_count(fe, inode->i_nlink);
fe->i_uid = cpu_to_le32(inode->i_uid); fe->i_uid = cpu_to_le32(inode->i_uid);
fe->i_gid = cpu_to_le32(inode->i_gid); fe->i_gid = cpu_to_le32(inode->i_gid);
fe->i_mode = cpu_to_le16(inode->i_mode); fe->i_mode = cpu_to_le16(inode->i_mode);
...@@ -1242,7 +1280,7 @@ void ocfs2_refresh_inode(struct inode *inode, ...@@ -1242,7 +1280,7 @@ void ocfs2_refresh_inode(struct inode *inode,
OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
ocfs2_set_inode_flags(inode); ocfs2_set_inode_flags(inode);
i_size_write(inode, le64_to_cpu(fe->i_size)); i_size_write(inode, le64_to_cpu(fe->i_size));
inode->i_nlink = le16_to_cpu(fe->i_links_count); inode->i_nlink = ocfs2_read_links_count(fe);
inode->i_uid = le32_to_cpu(fe->i_uid); inode->i_uid = le32_to_cpu(fe->i_uid);
inode->i_gid = le32_to_cpu(fe->i_gid); inode->i_gid = le32_to_cpu(fe->i_gid);
inode->i_mode = le16_to_cpu(fe->i_mode); inode->i_mode = le16_to_cpu(fe->i_mode);
......
...@@ -72,6 +72,10 @@ struct ocfs2_inode_info ...@@ -72,6 +72,10 @@ struct ocfs2_inode_info
struct inode vfs_inode; struct inode vfs_inode;
struct jbd2_inode ip_jinode; struct jbd2_inode ip_jinode;
/* Only valid if the inode is the dir. */
u32 ip_last_used_slot;
u64 ip_last_used_group;
}; };
/* /*
...@@ -124,6 +128,7 @@ void ocfs2_drop_inode(struct inode *inode); ...@@ -124,6 +128,7 @@ void ocfs2_drop_inode(struct inode *inode);
/* Flags for ocfs2_iget() */ /* Flags for ocfs2_iget() */
#define OCFS2_FI_FLAG_SYSFILE 0x1 #define OCFS2_FI_FLAG_SYSFILE 0x1
#define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2 #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2
struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff);
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type); int sysfile_type);
int ocfs2_inode_init_private(struct inode *inode); int ocfs2_inode_init_private(struct inode *inode);
......
...@@ -65,6 +65,11 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb, ...@@ -65,6 +65,11 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
static int ocfs2_recover_orphans(struct ocfs2_super *osb, static int ocfs2_recover_orphans(struct ocfs2_super *osb,
int slot); int slot);
static int ocfs2_commit_thread(void *arg); static int ocfs2_commit_thread(void *arg);
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
struct ocfs2_dinode *tl_dinode,
struct ocfs2_quota_recovery *qrec);
static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
{ {
...@@ -76,18 +81,97 @@ static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb) ...@@ -76,18 +81,97 @@ static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
return __ocfs2_wait_on_mount(osb, 1); return __ocfs2_wait_on_mount(osb, 1);
} }
/* /*
* The recovery_list is a simple linked list of node numbers to recover. * This replay_map is to track online/offline slots, so we could recover
* It is protected by the recovery_lock. * offline slots during recovery and mount
*/ */
struct ocfs2_recovery_map { enum ocfs2_replay_state {
unsigned int rm_used; REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */
unsigned int *rm_entries; REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */
REPLAY_DONE /* Replay was already queued */
}; };
struct ocfs2_replay_map {
unsigned int rm_slots;
enum ocfs2_replay_state rm_state;
unsigned char rm_replay_slots[0];
};
void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
{
if (!osb->replay_map)
return;
/* If we've already queued the replay, we don't have any more to do */
if (osb->replay_map->rm_state == REPLAY_DONE)
return;
osb->replay_map->rm_state = state;
}
int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map;
int i, node_num;
/* If replay map is already set, we don't do it again */
if (osb->replay_map)
return 0;
replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
(osb->max_slots * sizeof(char)), GFP_KERNEL);
if (!replay_map) {
mlog_errno(-ENOMEM);
return -ENOMEM;
}
spin_lock(&osb->osb_lock);
replay_map->rm_slots = osb->max_slots;
replay_map->rm_state = REPLAY_UNNEEDED;
/* set rm_replay_slots for offline slot(s) */
for (i = 0; i < replay_map->rm_slots; i++) {
if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
replay_map->rm_replay_slots[i] = 1;
}
osb->replay_map = replay_map;
spin_unlock(&osb->osb_lock);
return 0;
}
void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
int i;
if (!replay_map)
return;
if (replay_map->rm_state != REPLAY_NEEDED)
return;
for (i = 0; i < replay_map->rm_slots; i++)
if (replay_map->rm_replay_slots[i])
ocfs2_queue_recovery_completion(osb->journal, i, NULL,
NULL, NULL);
replay_map->rm_state = REPLAY_DONE;
}
void ocfs2_free_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
if (!osb->replay_map)
return;
kfree(replay_map);
osb->replay_map = NULL;
}
int ocfs2_recovery_init(struct ocfs2_super *osb) int ocfs2_recovery_init(struct ocfs2_super *osb)
{ {
struct ocfs2_recovery_map *rm; struct ocfs2_recovery_map *rm;
...@@ -496,6 +580,22 @@ static struct ocfs2_triggers dq_triggers = { ...@@ -496,6 +580,22 @@ static struct ocfs2_triggers dq_triggers = {
}, },
}; };
static struct ocfs2_triggers dr_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
};
static struct ocfs2_triggers dl_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
};
static int __ocfs2_journal_access(handle_t *handle, static int __ocfs2_journal_access(handle_t *handle,
struct inode *inode, struct inode *inode,
struct buffer_head *bh, struct buffer_head *bh,
...@@ -600,6 +700,20 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, ...@@ -600,6 +700,20 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
type); type);
} }
int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &dr_triggers,
type);
}
int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &dl_triggers,
type);
}
int ocfs2_journal_access(handle_t *handle, struct inode *inode, int ocfs2_journal_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type) struct buffer_head *bh, int type)
{ {
...@@ -1176,24 +1290,24 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, ...@@ -1176,24 +1290,24 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
} }
/* Called by the mount code to queue recovery the last part of /* Called by the mount code to queue recovery the last part of
* recovery for it's own slot. */ * recovery for it's own and offline slot(s). */
void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
{ {
struct ocfs2_journal *journal = osb->journal; struct ocfs2_journal *journal = osb->journal;
if (osb->dirty) { /* No need to queue up our truncate_log as regular cleanup will catch
/* No need to queue up our truncate_log as regular * that */
* cleanup will catch that. */ ocfs2_queue_recovery_completion(journal, osb->slot_num,
ocfs2_queue_recovery_completion(journal, osb->local_alloc_copy, NULL, NULL);
osb->slot_num, ocfs2_schedule_truncate_log_flush(osb, 0);
osb->local_alloc_copy,
NULL,
NULL);
ocfs2_schedule_truncate_log_flush(osb, 0);
osb->local_alloc_copy = NULL; osb->local_alloc_copy = NULL;
osb->dirty = 0; osb->dirty = 0;
}
/* queue to recover orphan slots for all offline slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
ocfs2_queue_replay_slots(osb);
ocfs2_free_replay_slots(osb);
} }
void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
...@@ -1236,6 +1350,14 @@ static int __ocfs2_recovery_thread(void *arg) ...@@ -1236,6 +1350,14 @@ static int __ocfs2_recovery_thread(void *arg)
goto bail; goto bail;
} }
status = ocfs2_compute_replay_slots(osb);
if (status < 0)
mlog_errno(status);
/* queue recovery for our own slot */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
NULL, NULL);
spin_lock(&osb->osb_lock); spin_lock(&osb->osb_lock);
while (rm->rm_used) { while (rm->rm_used) {
/* It's always safe to remove entry zero, as we won't /* It's always safe to remove entry zero, as we won't
...@@ -1301,11 +1423,8 @@ static int __ocfs2_recovery_thread(void *arg) ...@@ -1301,11 +1423,8 @@ static int __ocfs2_recovery_thread(void *arg)
ocfs2_super_unlock(osb, 1); ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead /* queue recovery for offline slots */
* node(s) may have disallowd a previos inode delete. Re-processing ocfs2_queue_replay_slots(osb);
* is therefore required. */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
NULL, NULL);
bail: bail:
mutex_lock(&osb->recovery_lock); mutex_lock(&osb->recovery_lock);
...@@ -1314,6 +1433,7 @@ static int __ocfs2_recovery_thread(void *arg) ...@@ -1314,6 +1433,7 @@ static int __ocfs2_recovery_thread(void *arg)
goto restart; goto restart;
} }
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL; osb->recovery_thread_task = NULL;
mb(); /* sync with ocfs2_recovery_thread_running */ mb(); /* sync with ocfs2_recovery_thread_running */
wake_up(&osb->recovery_event); wake_up(&osb->recovery_event);
...@@ -1465,6 +1585,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, ...@@ -1465,6 +1585,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
goto done; goto done;
} }
/* we need to run complete recovery for offline orphan slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",
node_num, slot_num, node_num, slot_num,
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
......
...@@ -38,6 +38,17 @@ enum ocfs2_journal_state { ...@@ -38,6 +38,17 @@ enum ocfs2_journal_state {
struct ocfs2_super; struct ocfs2_super;
struct ocfs2_dinode; struct ocfs2_dinode;
/*
* The recovery_list is a simple linked list of node numbers to recover.
* It is protected by the recovery_lock.
*/
struct ocfs2_recovery_map {
unsigned int rm_used;
unsigned int *rm_entries;
};
struct ocfs2_journal { struct ocfs2_journal {
enum ocfs2_journal_state j_state; /* Journals current state */ enum ocfs2_journal_state j_state; /* Journals current state */
...@@ -139,6 +150,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); ...@@ -139,6 +150,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
int ocfs2_recovery_init(struct ocfs2_super *osb); int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
/* /*
* Journal Control: * Journal Control:
* Initialize, Load, Shutdown, Wipe a journal. * Initialize, Load, Shutdown, Wipe a journal.
...@@ -266,6 +278,12 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, ...@@ -266,6 +278,12 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
/* dirblock */ /* dirblock */
int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type); struct buffer_head *bh, int type);
/* ocfs2_dx_root_block */
int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* ocfs2_dx_leaf */
int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* Anything that has no ecc */ /* Anything that has no ecc */
int ocfs2_journal_access(handle_t *handle, struct inode *inode, int ocfs2_journal_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type); struct buffer_head *bh, int type);
...@@ -368,14 +386,29 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb) ...@@ -368,14 +386,29 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
} }
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
* bitmap block for the new bit) */ * bitmap block for the new bit) dx_root update for free list */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
{
/* 1 block for index, 2 allocs (data, metadata), 1 clusters
* worth of blocks for initial extent. */
return 1 + 2 * OCFS2_SUBALLOC_ALLOC +
ocfs2_clusters_to_blocks(sb, 1);
}
/* parent fe, parent block, new file entry, inode alloc fe, inode alloc /* parent fe, parent block, new file entry, index leaf, inode alloc fe, inode
* group descriptor + mkdir/symlink blocks + quota update */ * alloc group descriptor + mkdir/symlink blocks + dir blocks + xattr
static inline int ocfs2_mknod_credits(struct super_block *sb) * blocks + quota update */
static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
int xattr_credits)
{ {
return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS + int dir_credits = OCFS2_DIR_LINK_ADDITIONAL_CREDITS;
if (is_dir)
dir_credits += ocfs2_add_dir_index_credits(sb);
return 4 + OCFS2_SUBALLOC_ALLOC + dir_credits + xattr_credits +
ocfs2_quota_trans_credits(sb); ocfs2_quota_trans_credits(sb);
} }
...@@ -388,31 +421,31 @@ static inline int ocfs2_mknod_credits(struct super_block *sb) ...@@ -388,31 +421,31 @@ static inline int ocfs2_mknod_credits(struct super_block *sb)
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
* update on dir */ * update on dir + index leaf + dx root update for free list */
static inline int ocfs2_link_credits(struct super_block *sb) static inline int ocfs2_link_credits(struct super_block *sb)
{ {
return 2*OCFS2_INODE_UPDATE_CREDITS + 1 + return 2*OCFS2_INODE_UPDATE_CREDITS + 3 +
ocfs2_quota_trans_credits(sb); ocfs2_quota_trans_credits(sb);
} }
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan /* inode + dir inode (if we unlink a dir), + dir entry block + orphan
* dir inode link */ * dir inode link + dir inode index leaf + dir index root */
static inline int ocfs2_unlink_credits(struct super_block *sb) static inline int ocfs2_unlink_credits(struct super_block *sb)
{ {
/* The quota update from ocfs2_link_credits is unused here... */ /* The quota update from ocfs2_link_credits is unused here... */
return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb); return 2 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_link_credits(sb);
} }
/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
* inode alloc group descriptor */ * inode alloc group descriptor + orphan dir index leaf */
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 1 + 1) #define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3)
/* dinode update, old dir dinode update, new dir dinode update, old /* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming * dir dir entry, new dir dir entry, dir entry update for renaming
* directory + target unlink */ * directory + target unlink + 3 x dir index leaves */
static inline int ocfs2_rename_credits(struct super_block *sb) static inline int ocfs2_rename_credits(struct super_block *sb)
{ {
return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb); return 3 * OCFS2_INODE_UPDATE_CREDITS + 6 + ocfs2_unlink_credits(sb);
} }
/* global bitmap dinode, group desc., relinked group, /* global bitmap dinode, group desc., relinked group,
...@@ -422,6 +455,20 @@ static inline int ocfs2_rename_credits(struct super_block *sb) ...@@ -422,6 +455,20 @@ static inline int ocfs2_rename_credits(struct super_block *sb)
+ OCFS2_INODE_UPDATE_CREDITS \ + OCFS2_INODE_UPDATE_CREDITS \
+ OCFS2_XATTR_BLOCK_UPDATE_CREDITS) + OCFS2_XATTR_BLOCK_UPDATE_CREDITS)
/* inode update, removal of dx root block from allocator */
#define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
OCFS2_SUBALLOC_FREE)
static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
{
int credits = 1 + OCFS2_SUBALLOC_ALLOC;
credits += ocfs2_clusters_to_blocks(sb, 1);
credits += ocfs2_quota_trans_credits(sb);
return credits;
}
/* /*
* Please note that the caller must make sure that root_el is the root * Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
...@@ -457,7 +504,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, ...@@ -457,7 +504,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
static inline int ocfs2_calc_symlink_credits(struct super_block *sb) static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
{ {
int blocks = ocfs2_mknod_credits(sb); int blocks = ocfs2_mknod_credits(sb, 0, 0);
/* links can be longer than one block so we may update many /* links can be longer than one block so we may update many
* within our single allocated extent. */ * within our single allocated extent. */
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/debugfs.h>
#define MLOG_MASK_PREFIX ML_DISK_ALLOC #define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h> #include <cluster/masklog.h>
...@@ -75,84 +74,6 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, ...@@ -75,84 +74,6 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
struct inode *local_alloc_inode); struct inode *local_alloc_inode);
#ifdef CONFIG_OCFS2_FS_STATS
static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
}
#define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE
#define LA_DEBUG_VER 1
static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
static DEFINE_MUTEX(la_debug_mutex);
struct ocfs2_super *osb = file->private_data;
int written, ret;
char *buf = osb->local_alloc_debug_buf;
mutex_lock(&la_debug_mutex);
memset(buf, 0, LA_DEBUG_BUF_SZ);
written = snprintf(buf, LA_DEBUG_BUF_SZ,
"0x%x\t0x%llx\t%u\t%u\t0x%x\n",
LA_DEBUG_VER,
(unsigned long long)osb->la_last_gd,
osb->local_alloc_default_bits,
osb->local_alloc_bits, osb->local_alloc_state);
ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
mutex_unlock(&la_debug_mutex);
return ret;
}
static const struct file_operations ocfs2_la_debug_fops = {
.open = ocfs2_la_debug_open,
.read = ocfs2_la_debug_read,
};
static void ocfs2_init_la_debug(struct ocfs2_super *osb)
{
osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
if (!osb->local_alloc_debug_buf)
return;
osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
S_IFREG|S_IRUSR,
osb->osb_debug_root,
osb,
&ocfs2_la_debug_fops);
if (!osb->local_alloc_debug) {
kfree(osb->local_alloc_debug_buf);
osb->local_alloc_debug_buf = NULL;
}
}
static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
{
if (osb->local_alloc_debug)
debugfs_remove(osb->local_alloc_debug);
if (osb->local_alloc_debug_buf)
kfree(osb->local_alloc_debug_buf);
osb->local_alloc_debug_buf = NULL;
osb->local_alloc_debug = NULL;
}
#else /* CONFIG_OCFS2_FS_STATS */
static void ocfs2_init_la_debug(struct ocfs2_super *osb)
{
return;
}
static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
{
return;
}
#endif
static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
{ {
return (osb->local_alloc_state == OCFS2_LA_THROTTLED || return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
...@@ -226,8 +147,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) ...@@ -226,8 +147,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
mlog_entry_void(); mlog_entry_void();
ocfs2_init_la_debug(osb);
if (osb->local_alloc_bits == 0) if (osb->local_alloc_bits == 0)
goto bail; goto bail;
...@@ -299,9 +218,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) ...@@ -299,9 +218,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
if (inode) if (inode)
iput(inode); iput(inode);
if (status < 0)
ocfs2_shutdown_la_debug(osb);
mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
mlog_exit(status); mlog_exit(status);
...@@ -331,8 +247,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) ...@@ -331,8 +247,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
cancel_delayed_work(&osb->la_enable_wq); cancel_delayed_work(&osb->la_enable_wq);
flush_workqueue(ocfs2_wq); flush_workqueue(ocfs2_wq);
ocfs2_shutdown_la_debug(osb);
if (osb->local_alloc_state == OCFS2_LA_UNUSED) if (osb->local_alloc_state == OCFS2_LA_UNUSED)
goto out; goto out;
......
This diff is collapsed.
...@@ -209,6 +209,7 @@ enum ocfs2_mount_options ...@@ -209,6 +209,7 @@ enum ocfs2_mount_options
struct ocfs2_journal; struct ocfs2_journal;
struct ocfs2_slot_info; struct ocfs2_slot_info;
struct ocfs2_recovery_map; struct ocfs2_recovery_map;
struct ocfs2_replay_map;
struct ocfs2_quota_recovery; struct ocfs2_quota_recovery;
struct ocfs2_dentry_lock; struct ocfs2_dentry_lock;
struct ocfs2_super struct ocfs2_super
...@@ -264,6 +265,7 @@ struct ocfs2_super ...@@ -264,6 +265,7 @@ struct ocfs2_super
atomic_t vol_state; atomic_t vol_state;
struct mutex recovery_lock; struct mutex recovery_lock;
struct ocfs2_recovery_map *recovery_map; struct ocfs2_recovery_map *recovery_map;
struct ocfs2_replay_map *replay_map;
struct task_struct *recovery_thread_task; struct task_struct *recovery_thread_task;
int disable_recovery; int disable_recovery;
wait_queue_head_t checkpoint_event; wait_queue_head_t checkpoint_event;
...@@ -287,11 +289,6 @@ struct ocfs2_super ...@@ -287,11 +289,6 @@ struct ocfs2_super
u64 la_last_gd; u64 la_last_gd;
#ifdef CONFIG_OCFS2_FS_STATS
struct dentry *local_alloc_debug;
char *local_alloc_debug_buf;
#endif
/* Next three fields are for local node slot recovery during /* Next three fields are for local node slot recovery during
* mount. */ * mount. */
int dirty; int dirty;
...@@ -305,9 +302,11 @@ struct ocfs2_super ...@@ -305,9 +302,11 @@ struct ocfs2_super
struct ocfs2_cluster_connection *cconn; struct ocfs2_cluster_connection *cconn;
struct ocfs2_lock_res osb_super_lockres; struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres; struct ocfs2_lock_res osb_rename_lockres;
struct ocfs2_lock_res osb_nfs_sync_lockres;
struct ocfs2_dlm_debug *osb_dlm_debug; struct ocfs2_dlm_debug *osb_dlm_debug;
struct dentry *osb_debug_root; struct dentry *osb_debug_root;
struct dentry *osb_ctxt;
wait_queue_head_t recovery_event; wait_queue_head_t recovery_event;
...@@ -344,6 +343,12 @@ struct ocfs2_super ...@@ -344,6 +343,12 @@ struct ocfs2_super
/* used to protect metaecc calculation check of xattr. */ /* used to protect metaecc calculation check of xattr. */
spinlock_t osb_xattr_lock; spinlock_t osb_xattr_lock;
unsigned int osb_dx_mask;
u32 osb_dx_seed[4];
/* the group we used to allocate inodes. */
u64 osb_inode_alloc_group;
}; };
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
...@@ -402,6 +407,51 @@ static inline int ocfs2_meta_ecc(struct ocfs2_super *osb) ...@@ -402,6 +407,51 @@ static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
return 0; return 0;
} }
static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
return 1;
return 0;
}
static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
{
if (ocfs2_supports_indexed_dirs(osb))
return OCFS2_DX_LINK_MAX;
return OCFS2_LINK_MAX;
}
static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di)
{
u32 nlink = le16_to_cpu(di->i_links_count);
u32 hi = le16_to_cpu(di->i_links_count_hi);
if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL))
nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
return nlink;
}
static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink)
{
u16 lo, hi;
lo = nlink;
hi = nlink >> OCFS2_LINKS_HI_SHIFT;
di->i_links_count = cpu_to_le16(lo);
di->i_links_count_hi = cpu_to_le16(hi);
}
static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
{
u32 links = ocfs2_read_links_count(di);
links += n;
ocfs2_set_links_count(di, links);
}
/* set / clear functions because cluster events can make these happen /* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also * in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock * means that any future flags osb_flags must be protected by spinlock
...@@ -482,6 +532,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) ...@@ -482,6 +532,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
#define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
(!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
#define OCFS2_IS_VALID_DX_ROOT(ptr) \
(!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE))
#define OCFS2_IS_VALID_DX_LEAF(ptr) \
(!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
static inline unsigned long ino_from_blkno(struct super_block *sb, static inline unsigned long ino_from_blkno(struct super_block *sb,
u64 blkno) u64 blkno)
{ {
...@@ -532,6 +588,16 @@ static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb, ...@@ -532,6 +588,16 @@ static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb,
return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits;
} }
static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb,
u64 blocks)
{
int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits;
unsigned int clusters;
clusters = ocfs2_blocks_to_clusters(sb, blocks);
return (u64)clusters << bits;
}
static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb,
u64 bytes) u64 bytes)
{ {
......
...@@ -66,6 +66,8 @@ ...@@ -66,6 +66,8 @@
#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" #define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" #define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
#define OCFS2_DX_ROOT_SIGNATURE "DXDIR01"
#define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1"
/* Compatibility flags */ /* Compatibility flags */
#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
...@@ -95,7 +97,8 @@ ...@@ -95,7 +97,8 @@
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
| OCFS2_FEATURE_INCOMPAT_XATTR \ | OCFS2_FEATURE_INCOMPAT_XATTR \
| OCFS2_FEATURE_INCOMPAT_META_ECC) | OCFS2_FEATURE_INCOMPAT_META_ECC \
| OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
...@@ -151,6 +154,9 @@ ...@@ -151,6 +154,9 @@
/* Support for extended attributes */ /* Support for extended attributes */
#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
/* Support for indexed directores */
#define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS 0x0400
/* Metadata checksum and error correction */ /* Metadata checksum and error correction */
#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 #define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800
...@@ -411,8 +417,12 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { ...@@ -411,8 +417,12 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
#define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \ #define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \
OCFS2_DIR_ROUND) & \ OCFS2_DIR_ROUND) & \
~OCFS2_DIR_ROUND) ~OCFS2_DIR_ROUND)
#define OCFS2_DIR_MIN_REC_LEN OCFS2_DIR_REC_LEN(1)
#define OCFS2_LINK_MAX 32000 #define OCFS2_LINK_MAX 32000
#define OCFS2_DX_LINK_MAX ((1U << 31) - 1U)
#define OCFS2_LINKS_HI_SHIFT 16
#define OCFS2_DX_ENTRIES_MAX (0xffffffffU)
#define S_SHIFT 12 #define S_SHIFT 12
static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = { static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
...@@ -628,8 +638,9 @@ struct ocfs2_super_block { ...@@ -628,8 +638,9 @@ struct ocfs2_super_block {
/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
for this fs*/ for this fs*/
__le16 s_reserved0; __le16 s_reserved0;
__le32 s_reserved1; __le32 s_dx_seed[3]; /* seed[0-2] for dx dir hash.
/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */ * s_uuid_hash serves as seed[3]. */
/*C0*/ __le64 s_reserved2[15]; /* Fill out superblock */
/*140*/ /*140*/
/* /*
...@@ -679,7 +690,7 @@ struct ocfs2_dinode { ...@@ -679,7 +690,7 @@ struct ocfs2_dinode {
belongs to */ belongs to */
__le16 i_suballoc_bit; /* Bit offset in suballocator __le16 i_suballoc_bit; /* Bit offset in suballocator
block group */ block group */
/*10*/ __le16 i_reserved0; /*10*/ __le16 i_links_count_hi; /* High 16 bits of links count */
__le16 i_xattr_inline_size; __le16 i_xattr_inline_size;
__le32 i_clusters; /* Cluster count */ __le32 i_clusters; /* Cluster count */
__le32 i_uid; /* Owner UID */ __le32 i_uid; /* Owner UID */
...@@ -705,7 +716,8 @@ struct ocfs2_dinode { ...@@ -705,7 +716,8 @@ struct ocfs2_dinode {
__le16 i_dyn_features; __le16 i_dyn_features;
__le64 i_xattr_loc; __le64 i_xattr_loc;
/*80*/ struct ocfs2_block_check i_check; /* Error checking */ /*80*/ struct ocfs2_block_check i_check; /* Error checking */
/*88*/ __le64 i_reserved2[6]; /*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
__le64 i_reserved2[5];
/*B8*/ union { /*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this __le64 i_pad1; /* Generic way to refer to this
64bit union */ 64bit union */
...@@ -781,6 +793,90 @@ struct ocfs2_dir_block_trailer { ...@@ -781,6 +793,90 @@ struct ocfs2_dir_block_trailer {
/*40*/ /*40*/
}; };
/*
* A directory entry in the indexed tree. We don't store the full name here,
* but instead provide a pointer to the full dirent in the unindexed tree.
*
* We also store name_len here so as to reduce the number of leaf blocks we
* need to search in case of collisions.
*/
struct ocfs2_dx_entry {
__le32 dx_major_hash; /* Used to find logical
* cluster in index */
__le32 dx_minor_hash; /* Lower bits used to find
* block in cluster */
__le64 dx_dirent_blk; /* Physical block in unindexed
* tree holding this dirent. */
};
struct ocfs2_dx_entry_list {
__le32 de_reserved;
__le16 de_count; /* Maximum number of entries
* possible in de_entries */
__le16 de_num_used; /* Current number of
* de_entries entries */
struct ocfs2_dx_entry de_entries[0]; /* Indexed dir entries
* in a packed array of
* length de_num_used */
};
#define OCFS2_DX_FLAG_INLINE 0x01
/*
* A directory indexing block. Each indexed directory has one of these,
* pointed to by ocfs2_dinode.
*
* This block stores an indexed btree root, and a set of free space
* start-of-list pointers.
*/
struct ocfs2_dx_root_block {
__u8 dr_signature[8]; /* Signature for verification */
struct ocfs2_block_check dr_check; /* Error checking */
__le16 dr_suballoc_slot; /* Slot suballocator this
* block belongs to. */
__le16 dr_suballoc_bit; /* Bit offset in suballocator
* block group */
__le32 dr_fs_generation; /* Must match super block */
__le64 dr_blkno; /* Offset on disk, in blocks */
__le64 dr_last_eb_blk; /* Pointer to last
* extent block */
__le32 dr_clusters; /* Clusters allocated
* to the indexed tree. */
__u8 dr_flags; /* OCFS2_DX_FLAG_* flags */
__u8 dr_reserved0;
__le16 dr_reserved1;
__le64 dr_dir_blkno; /* Pointer to parent inode */
__le32 dr_num_entries; /* Total number of
* names stored in
* this directory.*/
__le32 dr_reserved2;
__le64 dr_free_blk; /* Pointer to head of free
* unindexed block list. */
__le64 dr_reserved3[15];
union {
struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
* bits for maximum space
* efficiency. */
struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of
* entries. We grow out
* to extents if this
* gets too big. */
};
};
/*
* The header of a leaf block in the indexed tree.
*/
struct ocfs2_dx_leaf {
__u8 dl_signature[8];/* Signature for verification */
struct ocfs2_block_check dl_check; /* Error checking */
__le64 dl_blkno; /* Offset on disk, in blocks */
__le32 dl_fs_generation;/* Must match super block */
__le32 dl_reserved0;
__le64 dl_reserved1;
struct ocfs2_dx_entry_list dl_list;
};
/* /*
* On disk allocator group structure for OCFS2 * On disk allocator group structure for OCFS2
*/ */
...@@ -1112,6 +1208,16 @@ static inline int ocfs2_extent_recs_per_inode_with_xattr( ...@@ -1112,6 +1208,16 @@ static inline int ocfs2_extent_recs_per_inode_with_xattr(
return size / sizeof(struct ocfs2_extent_rec); return size / sizeof(struct ocfs2_extent_rec);
} }
static inline int ocfs2_extent_recs_per_dx_root(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_root_block, dr_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
{ {
int size; int size;
...@@ -1132,6 +1238,26 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb) ...@@ -1132,6 +1238,26 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
return size / sizeof(struct ocfs2_extent_rec); return size / sizeof(struct ocfs2_extent_rec);
} }
static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_leaf, dl_list.de_entries);
return size / sizeof(struct ocfs2_dx_entry);
}
static inline int ocfs2_dx_entries_per_root(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);
return size / sizeof(struct ocfs2_dx_entry);
}
static inline u16 ocfs2_local_alloc_size(struct super_block *sb) static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
{ {
u16 size; u16 size;
......
...@@ -47,6 +47,7 @@ enum ocfs2_lock_type { ...@@ -47,6 +47,7 @@ enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_OPEN, OCFS2_LOCK_TYPE_OPEN,
OCFS2_LOCK_TYPE_FLOCK, OCFS2_LOCK_TYPE_FLOCK,
OCFS2_LOCK_TYPE_QINFO, OCFS2_LOCK_TYPE_QINFO,
OCFS2_LOCK_TYPE_NFS_SYNC,
OCFS2_NUM_LOCK_TYPES OCFS2_NUM_LOCK_TYPES
}; };
...@@ -81,6 +82,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) ...@@ -81,6 +82,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_QINFO: case OCFS2_LOCK_TYPE_QINFO:
c = 'Q'; c = 'Q';
break; break;
case OCFS2_LOCK_TYPE_NFS_SYNC:
c = 'Y';
break;
default: default:
c = '\0'; c = '\0';
} }
......
This diff is collapsed.
...@@ -88,6 +88,8 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb, ...@@ -88,6 +88,8 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb,
u64 *blkno_start); u64 *blkno_start);
int ocfs2_claim_new_inode(struct ocfs2_super *osb, int ocfs2_claim_new_inode(struct ocfs2_super *osb,
handle_t *handle, handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
struct ocfs2_alloc_context *ac, struct ocfs2_alloc_context *ac,
u16 *suballoc_bit, u16 *suballoc_bit,
u64 *fe_blkno); u64 *fe_blkno);
...@@ -186,4 +188,6 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, ...@@ -186,4 +188,6 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
u32 clusters_to_add, u32 extents_to_split, u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac, struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac); struct ocfs2_alloc_context **meta_ac);
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
#endif /* _CHAINALLOC_H_ */ #endif /* _CHAINALLOC_H_ */
...@@ -201,6 +201,170 @@ static const match_table_t tokens = { ...@@ -201,6 +201,170 @@ static const match_table_t tokens = {
{Opt_err, NULL} {Opt_err, NULL}
}; };
#ifdef CONFIG_DEBUG_FS
static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
{
int out = 0;
int i;
struct ocfs2_cluster_connection *cconn = osb->cconn;
struct ocfs2_recovery_map *rm = osb->recovery_map;
out += snprintf(buf + out, len - out,
"%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
"Device", osb->dev_str, osb->uuid_str,
osb->fs_generation, osb->vol_label);
out += snprintf(buf + out, len - out,
"%10s => State: %d Flags: 0x%lX\n", "Volume",
atomic_read(&osb->vol_state), osb->osb_flags);
out += snprintf(buf + out, len - out,
"%10s => Block: %lu Cluster: %d\n", "Sizes",
osb->sb->s_blocksize, osb->s_clustersize);
out += snprintf(buf + out, len - out,
"%10s => Compat: 0x%X Incompat: 0x%X "
"ROcompat: 0x%X\n",
"Features", osb->s_feature_compat,
osb->s_feature_incompat, osb->s_feature_ro_compat);
out += snprintf(buf + out, len - out,
"%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount",
osb->s_mount_opt, osb->s_atime_quantum);
out += snprintf(buf + out, len - out,
"%10s => Stack: %s Name: %*s Version: %d.%d\n",
"Cluster",
(*osb->osb_cluster_stack == '\0' ?
"o2cb" : osb->osb_cluster_stack),
cconn->cc_namelen, cconn->cc_name,
cconn->cc_version.pv_major, cconn->cc_version.pv_minor);
spin_lock(&osb->dc_task_lock);
out += snprintf(buf + out, len - out,
"%10s => Pid: %d Count: %lu WakeSeq: %lu "
"WorkSeq: %lu\n", "DownCnvt",
task_pid_nr(osb->dc_task), osb->blocked_lock_count,
osb->dc_wake_sequence, osb->dc_work_sequence);
spin_unlock(&osb->dc_task_lock);
spin_lock(&osb->osb_lock);
out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:",
"Recovery",
(osb->recovery_thread_task ?
task_pid_nr(osb->recovery_thread_task) : -1));
if (rm->rm_used == 0)
out += snprintf(buf + out, len - out, " None\n");
else {
for (i = 0; i < rm->rm_used; i++)
out += snprintf(buf + out, len - out, " %d",
rm->rm_entries[i]);
out += snprintf(buf + out, len - out, "\n");
}
spin_unlock(&osb->osb_lock);
out += snprintf(buf + out, len - out,
"%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit",
task_pid_nr(osb->commit_task), osb->osb_commit_interval,
atomic_read(&osb->needs_checkpoint));
out += snprintf(buf + out, len - out,
"%10s => State: %d NumTxns: %d TxnId: %lu\n",
"Journal", osb->journal->j_state,
atomic_read(&osb->journal->j_num_trans),
osb->journal->j_trans_id);
out += snprintf(buf + out, len - out,
"%10s => GlobalAllocs: %d LocalAllocs: %d "
"SubAllocs: %d LAWinMoves: %d SAExtends: %d\n",
"Stats",
atomic_read(&osb->alloc_stats.bitmap_data),
atomic_read(&osb->alloc_stats.local_data),
atomic_read(&osb->alloc_stats.bg_allocs),
atomic_read(&osb->alloc_stats.moves),
atomic_read(&osb->alloc_stats.bg_extends));
out += snprintf(buf + out, len - out,
"%10s => State: %u Descriptor: %llu Size: %u bits "
"Default: %u bits\n",
"LocalAlloc", osb->local_alloc_state,
(unsigned long long)osb->la_last_gd,
osb->local_alloc_bits, osb->local_alloc_default_bits);
spin_lock(&osb->osb_lock);
out += snprintf(buf + out, len - out,
"%10s => Slot: %d NumStolen: %d\n", "Steal",
osb->s_inode_steal_slot,
atomic_read(&osb->s_num_inodes_stolen));
spin_unlock(&osb->osb_lock);
out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
"Slots", "Num", "RecoGen");
for (i = 0; i < osb->max_slots; ++i) {
out += snprintf(buf + out, len - out,
"%10s %c %3d %10d\n",
" ",
(i == osb->slot_num ? '*' : ' '),
i, osb->slot_recovery_generations[i]);
}
return out;
}
static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
{
struct ocfs2_super *osb = inode->i_private;
char *buf = NULL;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto bail;
i_size_write(inode, ocfs2_osb_dump(osb, buf, PAGE_SIZE));
file->private_data = buf;
return 0;
bail:
return -ENOMEM;
}
static int ocfs2_debug_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
i_size_read(file->f_mapping->host));
}
#else
static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
{
return 0;
}
static int ocfs2_debug_release(struct inode *inode, struct file *file)
{
return 0;
}
static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return 0;
}
#endif /* CONFIG_DEBUG_FS */
static struct file_operations ocfs2_osb_debug_fops = {
.open = ocfs2_osb_debug_open,
.release = ocfs2_debug_release,
.read = ocfs2_debug_read,
.llseek = generic_file_llseek,
};
/* /*
* write_super and sync_fs ripped right out of ext3. * write_super and sync_fs ripped right out of ext3.
*/ */
...@@ -926,6 +1090,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -926,6 +1090,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
goto read_super_error; goto read_super_error;
} }
osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
osb->osb_debug_root,
osb,
&ocfs2_osb_debug_fops);
if (!osb->osb_ctxt) {
status = -EINVAL;
mlog_errno(status);
goto read_super_error;
}
status = ocfs2_mount_volume(sb); status = ocfs2_mount_volume(sb);
if (osb->root_inode) if (osb->root_inode)
inode = igrab(osb->root_inode); inode = igrab(osb->root_inode);
...@@ -1620,6 +1794,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ...@@ -1620,6 +1794,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb); osb = OCFS2_SB(sb);
BUG_ON(!osb); BUG_ON(!osb);
debugfs_remove(osb->osb_ctxt);
ocfs2_disable_quotas(osb); ocfs2_disable_quotas(osb);
ocfs2_shutdown_local_alloc(osb); ocfs2_shutdown_local_alloc(osb);
...@@ -1742,6 +1918,12 @@ static int ocfs2_initialize_super(struct super_block *sb, ...@@ -1742,6 +1918,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
for (i = 0; i < 3; i++)
osb->osb_dx_seed[i] = le32_to_cpu(di->id2.i_super.s_dx_seed[i]);
osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash);
osb->sb = sb; osb->sb = sb;
/* Save off for ocfs2_rw_direct */ /* Save off for ocfs2_rw_direct */
osb->s_sectsize_bits = blksize_bits(sector_size); osb->s_sectsize_bits = blksize_bits(sector_size);
...@@ -2130,6 +2312,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) ...@@ -2130,6 +2312,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
* lock, and it's marked as dirty, set the bit in the recover * lock, and it's marked as dirty, set the bit in the recover
* map and launch a recovery thread for it. */ * map and launch a recovery thread for it. */
status = ocfs2_mark_dead_nodes(osb); status = ocfs2_mark_dead_nodes(osb);
if (status < 0) {
mlog_errno(status);
goto finally;
}
status = ocfs2_compute_replay_slots(osb);
if (status < 0) if (status < 0)
mlog_errno(status); mlog_errno(status);
......
...@@ -512,7 +512,7 @@ int ocfs2_calc_xattr_init(struct inode *dir, ...@@ -512,7 +512,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
struct ocfs2_security_xattr_info *si, struct ocfs2_security_xattr_info *si,
int *want_clusters, int *want_clusters,
int *xattr_credits, int *xattr_credits,
struct ocfs2_alloc_context **xattr_ac) int *want_meta)
{ {
int ret = 0; int ret = 0;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
...@@ -554,11 +554,7 @@ int ocfs2_calc_xattr_init(struct inode *dir, ...@@ -554,11 +554,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
(S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
(s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); *want_meta = *want_meta + 1;
if (ret) {
mlog_errno(ret);
return ret;
}
*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
} }
......
...@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *, ...@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *,
int *, int *, struct ocfs2_alloc_context **); int *, int *, struct ocfs2_alloc_context **);
int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *, int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
int, struct ocfs2_security_xattr_info *, int, struct ocfs2_security_xattr_info *,
int *, int *, struct ocfs2_alloc_context **); int *, int *, int *);
/* /*
* xattrs can live inside an inode, as part of an external xattr block, * xattrs can live inside an inode, as part of an external xattr block,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment