Commit f7ab093f authored by Mike Marshall's avatar Mike Marshall

Orangefs: kernel client part 1

OrangeFS (formerly PVFS) is an lgpl licensed userspace networked parallel
file system. OrangeFS can be accessed through included system utilities,
user integration libraries, MPI-IO and can be used by the Hadoop
ecosystem as an alternative to the HDFS filesystem. OrangeFS is used
widely for parallel science, data analytics and engineering applications.

While applications often don't require Orangefs to be mounted into
the VFS, users do like to be able to access their files in the normal way.
The Orangefs kernel client allows Orangefs filesystems to be mounted as
a VFS. The kernel client communicates with a userspace daemon which in
turn communicates with the Orangefs server daemons that implement the
filesystem. The server daemons (there's almost always more than one)
need not be running on the same host as the kernel client.

Orangefs filesystems can also be mounted with FUSE, and we
ship code and instructions to facilitate that, but most of our users
report preferring to use our kernel module instead. Further, as an example
of a problem we can't solve with fuse, we have in the works a
not-yet-ready-for-prime-time version of a file_operations lock function
that accounts for the server daemons being distributed across more
than one running kernel.

Many people and organizations, including Clemson University,
Argonne National Laboratories and Acxiom Corporation have
helped to create what has become Orangefs over more than twenty
years. Some of the more recent contributors to the kernel client
include:

  Mike Marshall
  Christoph Hellwig
  Randy Martin
  Becky Ligon
  Walt Ligon
  Michael Moore
  Rob Ross
  Phil Carnes
Signed-off-by: default avatarMike Marshall <hubcap@omnibond.com>
parent 9ffecb10
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
/*
* Definitions of downcalls used in Linux kernel module.
*/
#ifndef __DOWNCALL_H
#define __DOWNCALL_H
/*
* Sanitized the device-client core interaction
* for clean 32-64 bit usage
*/
struct pvfs2_io_response {
__s64 amt_complete;
};
struct pvfs2_iox_response {
__s64 amt_complete;
};
struct pvfs2_lookup_response {
struct pvfs2_object_kref refn;
};
struct pvfs2_create_response {
struct pvfs2_object_kref refn;
};
struct pvfs2_symlink_response {
struct pvfs2_object_kref refn;
};
struct pvfs2_getattr_response {
struct PVFS_sys_attr_s attributes;
char link_target[PVFS2_NAME_LEN];
};
struct pvfs2_mkdir_response {
struct pvfs2_object_kref refn;
};
/*
* duplication of some system interface structures so that I don't have
* to allocate extra memory
*/
struct pvfs2_dirent {
char *d_name;
int d_length;
struct pvfs2_khandle khandle;
};
struct pvfs2_statfs_response {
__s64 block_size;
__s64 blocks_total;
__s64 blocks_avail;
__s64 files_total;
__s64 files_avail;
};
struct pvfs2_fs_mount_response {
__s32 fs_id;
__s32 id;
struct pvfs2_khandle root_khandle;
};
/* the getxattr response is the attribute value */
struct pvfs2_getxattr_response {
__s32 val_sz;
__s32 __pad1;
char val[PVFS_MAX_XATTR_VALUELEN];
};
/* the listxattr response is an array of attribute names */
struct pvfs2_listxattr_response {
__s32 returned_count;
__s32 __pad1;
__u64 token;
char key[PVFS_MAX_XATTR_LISTLEN * PVFS_MAX_XATTR_NAMELEN];
__s32 keylen;
__s32 __pad2;
__s32 lengths[PVFS_MAX_XATTR_LISTLEN];
};
struct pvfs2_param_response {
__s64 value;
};
#define PERF_COUNT_BUF_SIZE 4096
struct pvfs2_perf_count_response {
char buffer[PERF_COUNT_BUF_SIZE];
};
#define FS_KEY_BUF_SIZE 4096
struct pvfs2_fs_key_response {
__s32 fs_keylen;
__s32 __pad1;
char fs_key[FS_KEY_BUF_SIZE];
};
struct pvfs2_downcall_s {
__s32 type;
__s32 status;
/* currently trailer is used only by readdir */
__s64 trailer_size;
char * trailer_buf;
union {
struct pvfs2_io_response io;
struct pvfs2_iox_response iox;
struct pvfs2_lookup_response lookup;
struct pvfs2_create_response create;
struct pvfs2_symlink_response sym;
struct pvfs2_getattr_response getattr;
struct pvfs2_mkdir_response mkdir;
struct pvfs2_statfs_response statfs;
struct pvfs2_fs_mount_response fs_mount;
struct pvfs2_getxattr_response getxattr;
struct pvfs2_listxattr_response listxattr;
struct pvfs2_param_response param;
struct pvfs2_perf_count_response perf_count;
struct pvfs2_fs_key_response fs_key;
} resp;
};
struct pvfs2_readdir_response_s {
__u64 token;
__u64 directory_version;
__u32 __pad2;
__u32 pvfs_dirent_outcount;
struct pvfs2_dirent *dirent_array;
};
#endif /* __DOWNCALL_H */
This diff is collapsed.
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
#ifndef __PVFS2_BUFMAP_H
#define __PVFS2_BUFMAP_H
/* used to describe mapped buffers */
struct pvfs_bufmap_desc {
void *uaddr; /* user space address pointer */
struct page **page_array; /* array of mapped pages */
int array_count; /* size of above arrays */
struct list_head list_link;
};
struct pvfs2_bufmap;
struct pvfs2_bufmap *pvfs2_bufmap_ref(void);
void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap);
/*
* pvfs_bufmap_size_query is now an inline function because buffer
* sizes are not hardcoded
*/
int pvfs_bufmap_size_query(void);
int pvfs_bufmap_shift_query(void);
int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc);
int get_bufmap_init(void);
void pvfs_bufmap_finalize(void);
int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index);
void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index);
int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index);
void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index);
int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap,
int buffer_index,
const struct iovec *iov,
unsigned long nr_segs,
size_t size);
int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap,
int buffer_index,
const struct iovec *iov,
unsigned long nr_segs,
size_t size);
int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap,
int buffer_index,
const struct iovec *iov,
unsigned long nr_segs,
size_t size);
int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap,
int buffer_index,
const struct iovec *iov,
unsigned long nr_segs,
size_t size);
size_t pvfs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk,
struct iovec *iovec,
unsigned long nr_segs,
struct pvfs2_bufmap *bufmap,
int buffer_index,
size_t bytes_to_be_copied);
#endif /* __PVFS2_BUFMAP_H */
This diff is collapsed.
int pvfs2_debugfs_init(void);
int pvfs2_kernel_debug_init(void);
void pvfs2_debugfs_cleanup(void);
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
#ifndef _PVFS2_DEV_PROTO_H
#define _PVFS2_DEV_PROTO_H
/*
* types and constants shared between user space and kernel space for
* device interaction using a common protocol
*/
/*
* valid pvfs2 kernel operation types
*/
#define PVFS2_VFS_OP_INVALID 0xFF000000
#define PVFS2_VFS_OP_FILE_IO 0xFF000001
#define PVFS2_VFS_OP_LOOKUP 0xFF000002
#define PVFS2_VFS_OP_CREATE 0xFF000003
#define PVFS2_VFS_OP_GETATTR 0xFF000004
#define PVFS2_VFS_OP_REMOVE 0xFF000005
#define PVFS2_VFS_OP_MKDIR 0xFF000006
#define PVFS2_VFS_OP_READDIR 0xFF000007
#define PVFS2_VFS_OP_SETATTR 0xFF000008
#define PVFS2_VFS_OP_SYMLINK 0xFF000009
#define PVFS2_VFS_OP_RENAME 0xFF00000A
#define PVFS2_VFS_OP_STATFS 0xFF00000B
#define PVFS2_VFS_OP_TRUNCATE 0xFF00000C
#define PVFS2_VFS_OP_MMAP_RA_FLUSH 0xFF00000D
#define PVFS2_VFS_OP_FS_MOUNT 0xFF00000E
#define PVFS2_VFS_OP_FS_UMOUNT 0xFF00000F
#define PVFS2_VFS_OP_GETXATTR 0xFF000010
#define PVFS2_VFS_OP_SETXATTR 0xFF000011
#define PVFS2_VFS_OP_LISTXATTR 0xFF000012
#define PVFS2_VFS_OP_REMOVEXATTR 0xFF000013
#define PVFS2_VFS_OP_PARAM 0xFF000014
#define PVFS2_VFS_OP_PERF_COUNT 0xFF000015
#define PVFS2_VFS_OP_CANCEL 0xFF00EE00
#define PVFS2_VFS_OP_FSYNC 0xFF00EE01
#define PVFS2_VFS_OP_FSKEY 0xFF00EE02
#define PVFS2_VFS_OP_READDIRPLUS 0xFF00EE03
#define PVFS2_VFS_OP_FILE_IOX 0xFF00EE04
/*
* Misc constants. Please retain them as multiples of 8!
* Otherwise 32-64 bit interactions will be messed up :)
*/
#define PVFS2_NAME_LEN 0x00000100
#define PVFS2_MAX_DEBUG_STRING_LEN 0x00000400
#define PVFS2_MAX_DEBUG_ARRAY_LEN 0x00000800
/*
* MAX_DIRENT_COUNT cannot be larger than PVFS_REQ_LIMIT_LISTATTR.
* The value of PVFS_REQ_LIMIT_LISTATTR has been changed from 113 to 60
* to accomodate an attribute object with mirrored handles.
* MAX_DIRENT_COUNT is replaced by MAX_DIRENT_COUNT_READDIR and
* MAX_DIRENT_COUNT_READDIRPLUS, since readdir doesn't trigger a listattr
* but readdirplus might.
*/
#define MAX_DIRENT_COUNT_READDIR 0x00000060
#define MAX_DIRENT_COUNT_READDIRPLUS 0x0000003C
#include "upcall.h"
#include "downcall.h"
/*
* These macros differ from proto macros in that they don't do any
* byte-swappings and are used to ensure that kernel-clientcore interactions
* don't cause any unaligned accesses etc on 64 bit machines
*/
#ifndef roundup4
#define roundup4(x) (((x)+3) & ~3)
#endif
#ifndef roundup8
#define roundup8(x) (((x)+7) & ~7)
#endif
/* strings; decoding just points into existing character data */
#define enc_string(pptr, pbuf) do { \
__u32 len = strlen(*pbuf); \
*(__u32 *) *(pptr) = (len); \
memcpy(*(pptr)+4, *pbuf, len+1); \
*(pptr) += roundup8(4 + len + 1); \
} while (0)
#define dec_string(pptr, pbuf, plen) do { \
__u32 len = (*(__u32 *) *(pptr)); \
*pbuf = *(pptr) + 4; \
*(pptr) += roundup8(4 + len + 1); \
if (plen) \
*plen = len;\
} while (0)
struct read_write_x {
__s64 off;
__s64 len;
};
#endif
This diff is collapsed.
extern int orangefs_sysfs_init(void);
extern void orangefs_sysfs_exit(void);
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
#ifndef __UPCALL_H
#define __UPCALL_H
/*
* Sanitized this header file to fix
* 32-64 bit interaction issues between
* client-core and device
*/
struct pvfs2_io_request_s {
__s32 async_vfs_io;
__s32 buf_index;
__s32 count;
__s32 __pad1;
__s64 offset;
struct pvfs2_object_kref refn;
enum PVFS_io_type io_type;
__s32 readahead_size;
};
struct pvfs2_iox_request_s {
__s32 buf_index;
__s32 count;
struct pvfs2_object_kref refn;
enum PVFS_io_type io_type;
__s32 __pad1;
};
struct pvfs2_lookup_request_s {
__s32 sym_follow;
__s32 __pad1;
struct pvfs2_object_kref parent_refn;
char d_name[PVFS2_NAME_LEN];
};
struct pvfs2_create_request_s {
struct pvfs2_object_kref parent_refn;
struct PVFS_sys_attr_s attributes;
char d_name[PVFS2_NAME_LEN];
};
struct pvfs2_symlink_request_s {
struct pvfs2_object_kref parent_refn;
struct PVFS_sys_attr_s attributes;
char entry_name[PVFS2_NAME_LEN];
char target[PVFS2_NAME_LEN];
};
struct pvfs2_getattr_request_s {
struct pvfs2_object_kref refn;
__u32 mask;
__u32 __pad1;
};
struct pvfs2_setattr_request_s {
struct pvfs2_object_kref refn;
struct PVFS_sys_attr_s attributes;
};
struct pvfs2_remove_request_s {
struct pvfs2_object_kref parent_refn;
char d_name[PVFS2_NAME_LEN];
};
struct pvfs2_mkdir_request_s {
struct pvfs2_object_kref parent_refn;
struct PVFS_sys_attr_s attributes;
char d_name[PVFS2_NAME_LEN];
};
struct pvfs2_readdir_request_s {
struct pvfs2_object_kref refn;
__u64 token;
__s32 max_dirent_count;
__s32 buf_index;
};
struct pvfs2_readdirplus_request_s {
struct pvfs2_object_kref refn;
__u64 token;
__s32 max_dirent_count;
__u32 mask;
__s32 buf_index;
__s32 __pad1;
};
struct pvfs2_rename_request_s {
struct pvfs2_object_kref old_parent_refn;
struct pvfs2_object_kref new_parent_refn;
char d_old_name[PVFS2_NAME_LEN];
char d_new_name[PVFS2_NAME_LEN];
};
struct pvfs2_statfs_request_s {
__s32 fs_id;
__s32 __pad1;
};
struct pvfs2_truncate_request_s {
struct pvfs2_object_kref refn;
__s64 size;
};
struct pvfs2_mmap_ra_cache_flush_request_s {
struct pvfs2_object_kref refn;
};
struct pvfs2_fs_mount_request_s {
char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN];
};
struct pvfs2_fs_umount_request_s {
__s32 id;
__s32 fs_id;
char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN];
};
struct pvfs2_getxattr_request_s {
struct pvfs2_object_kref refn;
__s32 key_sz;
__s32 __pad1;
char key[PVFS_MAX_XATTR_NAMELEN];
};
struct pvfs2_setxattr_request_s {
struct pvfs2_object_kref refn;
struct PVFS_keyval_pair keyval;
__s32 flags;
__s32 __pad1;
};
struct pvfs2_listxattr_request_s {
struct pvfs2_object_kref refn;
__s32 requested_count;
__s32 __pad1;
__u64 token;
};
struct pvfs2_removexattr_request_s {
struct pvfs2_object_kref refn;
__s32 key_sz;
__s32 __pad1;
char key[PVFS_MAX_XATTR_NAMELEN];
};
struct pvfs2_op_cancel_s {
__u64 op_tag;
};
struct pvfs2_fsync_request_s {
struct pvfs2_object_kref refn;
};
enum pvfs2_param_request_type {
PVFS2_PARAM_REQUEST_SET = 1,
PVFS2_PARAM_REQUEST_GET = 2
};
enum pvfs2_param_request_op {
PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS = 1,
PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT = 2,
PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT = 3,
PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE = 4,
PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS = 5,
PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE = 6,
PVFS2_PARAM_REQUEST_OP_PERF_RESET = 7,
PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8,
PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9,
PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10,
PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11,
PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12,
PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13,
PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14,
PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15,
PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG = 16,
PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS = 17,
PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT = 18,
PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT = 19,
PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE = 20,
PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS = 21,
PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT = 22,
PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT = 23,
PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE = 24,
PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES = 25,
};
struct pvfs2_param_request_s {
enum pvfs2_param_request_type type;
enum pvfs2_param_request_op op;
__s64 value;
char s_value[PVFS2_MAX_DEBUG_STRING_LEN];
};
enum pvfs2_perf_count_request_type {
PVFS2_PERF_COUNT_REQUEST_ACACHE = 1,
PVFS2_PERF_COUNT_REQUEST_NCACHE = 2,
PVFS2_PERF_COUNT_REQUEST_CAPCACHE = 3,
};
struct pvfs2_perf_count_request_s {
enum pvfs2_perf_count_request_type type;
__s32 __pad1;
};
struct pvfs2_fs_key_request_s {
__s32 fsid;
__s32 __pad1;
};
struct pvfs2_upcall_s {
__s32 type;
__u32 uid;
__u32 gid;
int pid;
int tgid;
/* currently trailer is used only by readx/writex (iox) */
__s64 trailer_size;
char *trailer_buf;
union {
struct pvfs2_io_request_s io;
struct pvfs2_iox_request_s iox;
struct pvfs2_lookup_request_s lookup;
struct pvfs2_create_request_s create;
struct pvfs2_symlink_request_s sym;
struct pvfs2_getattr_request_s getattr;
struct pvfs2_setattr_request_s setattr;
struct pvfs2_remove_request_s remove;
struct pvfs2_mkdir_request_s mkdir;
struct pvfs2_readdir_request_s readdir;
struct pvfs2_readdirplus_request_s readdirplus;
struct pvfs2_rename_request_s rename;
struct pvfs2_statfs_request_s statfs;
struct pvfs2_truncate_request_s truncate;
struct pvfs2_mmap_ra_cache_flush_request_s ra_cache_flush;
struct pvfs2_fs_mount_request_s fs_mount;
struct pvfs2_fs_umount_request_s fs_umount;
struct pvfs2_getxattr_request_s getxattr;
struct pvfs2_setxattr_request_s setxattr;
struct pvfs2_listxattr_request_s listxattr;
struct pvfs2_removexattr_request_s removexattr;
struct pvfs2_op_cancel_s cancel;
struct pvfs2_fsync_request_s fsync;
struct pvfs2_param_request_s param;
struct pvfs2_perf_count_request_s perf_count;
struct pvfs2_fs_key_request_s fs_key;
} req;
};
#endif /* __UPCALL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment