Commit 5d4450c4 authored by Peng Tao's avatar Peng Tao Committed by Greg Kroah-Hartman

staging/lustre: fix build error on non-x86 platforms

dump_trace() is only available on X86. Without it, Lustre's own
watchdog is broken. We can only dump current task's stack.

The client-side this code is much less likely to hit deadlocks and
it's probably OK to drop this altogether, since we hardly have any
ptlrpc threads on clients, most notable ones are ldlm cb threads
that should not really be blocking on the client anyway.

Remove libcfs watchdog for now, until the upstream kernel watchdog
can detect distributed deadlocks and dump other kernel threads.

Cc: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarPeng Tao <tao.peng@emc.com>
Signed-off-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 4b1a25f0
......@@ -117,31 +117,6 @@ int libcfs_sock_write(socket_t *sock, void *buffer, int nob, int timeout);
int libcfs_sock_read(socket_t *sock, void *buffer, int nob, int timeout);
void libcfs_sock_release(socket_t *sock);
/* libcfs watchdogs */
struct lc_watchdog;
/* Add a watchdog which fires after "time" milliseconds of delay. You have to
* touch it once to enable it. */
struct lc_watchdog *lc_watchdog_add(int time,
void (*cb)(pid_t pid, void *),
void *data);
/* Enables a watchdog and resets its timer. */
void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout);
#define CFS_GET_TIMEOUT(svc) (max_t(int, obd_timeout, \
AT_OFF ? 0 : at_get(&svc->srv_at_estimate)) * \
svc->srv_watchdog_factor)
/* Disable a watchdog; touch it to restart it. */
void lc_watchdog_disable(struct lc_watchdog *lcw);
/* Clean up the watchdog */
void lc_watchdog_delete(struct lc_watchdog *lcw);
/* Dump a debug log */
void lc_watchdog_dumplog(pid_t pid, void *data);
/* need both kernel and user-land acceptor */
#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512
#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023
......
......@@ -210,7 +210,6 @@ do { \
#define ntohs(x) ___ntohs(x)
#endif
void libcfs_debug_dumpstack(task_t *tsk);
void libcfs_run_upcall(char **argv);
void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *);
void libcfs_debug_dumplog(void);
......
......@@ -93,11 +93,8 @@ static inline void __client_obd_list_lock(client_obd_lock_t *lock,
lock, task->comm, task->pid,
lock->func, lock->line,
(jiffies - lock->time) / HZ);
LCONSOLE_WARN("====== for process holding the "
"lock =====\n");
libcfs_debug_dumpstack(task);
LCONSOLE_WARN("====== for current process =====\n");
libcfs_debug_dumpstack(NULL);
dump_stack();
LCONSOLE_WARN("====== end =======\n");
cfs_pause(1000 * HZ);
}
......
......@@ -2322,8 +2322,13 @@ struct ptlrpc_thread {
pid_t t_pid;
/**
* put watchdog in the structure per thread b=14840
*
* Lustre watchdog is removed for client in the hope
* of a generic watchdog can be merged in kernel.
* When that happens, we should add below back.
*
* struct lc_watchdog *t_watchdog;
*/
struct lc_watchdog *t_watchdog;
/**
* the svc this thread belonged to b=18582
*/
......
......@@ -1548,7 +1548,7 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
break;
default:
LDLM_ERROR(lock, "Unknown LVB type: %d\n", lock->l_lvb_type);
libcfs_debug_dumpstack(NULL);
dump_stack();
RETURN(-EINVAL);
}
......
......@@ -11,7 +11,7 @@ libcfs-linux-objs += linux-crypto-adler.o
libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
libcfs-all-objs := debug.o fail.o nidstrings.o module.o tracefile.o \
watchdog.o libcfs_string.o hash.o kernel_user_comm.o \
libcfs_string.o hash.o kernel_user_comm.o \
prng.o workitem.o upcall_cache.o libcfs_cpu.o \
libcfs_mem.o libcfs_lock.o
......
......@@ -168,7 +168,7 @@ void lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
/* not reached */
}
libcfs_debug_dumpstack(NULL);
dump_stack();
if (!libcfs_panic_on_lbug)
libcfs_debug_dumplog();
libcfs_run_lbug_upcall(msgdata);
......@@ -179,48 +179,6 @@ void lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
schedule();
}
#include <linux/nmi.h>
#include <asm/stacktrace.h>
static int print_trace_stack(void *data, char *name)
{
printk(" <%s> ", name);
return 0;
}
# define RELIABLE reliable
# define DUMP_TRACE_CONST const
static void print_trace_address(void *data, unsigned long addr, int reliable)
{
char fmt[32];
touch_nmi_watchdog();
sprintf(fmt, " [<%016lx>] %s%%s\n", addr, RELIABLE ? "": "? ");
__print_symbol(fmt, addr);
}
static DUMP_TRACE_CONST struct stacktrace_ops print_trace_ops = {
.stack = print_trace_stack,
.address = print_trace_address,
.walk_stack = print_context_stack,
};
void libcfs_debug_dumpstack(struct task_struct *tsk)
{
/* dump_stack() */
/* show_trace() */
if (tsk == NULL)
tsk = current;
printk("Pid: %d, comm: %.20s\n", tsk->pid, tsk->comm);
/* show_trace_log_lvl() */
printk("\nCall Trace:\n");
dump_trace(tsk, NULL, NULL,
0,
&print_trace_ops, NULL);
printk("\n");
}
task_t *libcfs_current(void)
{
CWARN("current task struct is %p\n", current);
......@@ -255,7 +213,6 @@ void libcfs_unregister_panic_notifier(void)
atomic_notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier);
}
EXPORT_SYMBOL(libcfs_debug_dumpstack);
EXPORT_SYMBOL(libcfs_current);
......
This diff is collapsed.
......@@ -300,7 +300,7 @@ void ll_invalidate_aliases(struct inode *inode)
CERROR("called on root (?) dentry=%p, inode=%p "
"ino=%lu\n", dentry, inode, inode->i_ino);
lustre_dump_dentry(dentry, 1);
libcfs_debug_dumpstack(NULL);
dump_stack();
}
d_lustre_invalidate(dentry, 0);
......
......@@ -132,7 +132,7 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
* add dirty pages into cache during truncate */
CERROR("Proc %s is dirting page w/o inode lock, this"
"will break truncate.\n", current->comm);
libcfs_debug_dumpstack(NULL);
dump_stack();
LBUG();
return ERR_PTR(-EIO);
}
......
......@@ -785,7 +785,7 @@ static void osc_req_attr_set(const struct lu_env *env,
"no cover page!\n");
CL_PAGE_DEBUG(D_ERROR, env, apage,
"dump uncover page!\n");
libcfs_debug_dumpstack(NULL);
dump_stack();
LBUG();
}
......
......@@ -2254,7 +2254,9 @@ ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout,
ptlrpc_retry_rqbds, svcpt);
/* XXX: Add this back when libcfs watchdog is merged upstream
lc_watchdog_disable(thread->t_watchdog);
*/
cond_resched();
......@@ -2268,8 +2270,10 @@ ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
if (ptlrpc_thread_stopping(thread))
return -EINTR;
/*
lc_watchdog_touch(thread->t_watchdog,
ptlrpc_server_get_timeout(svcpt));
*/
return 0;
}
......@@ -2372,8 +2376,10 @@ static int ptlrpc_main(void *arg)
/* wake up our creator in case he's still waiting. */
wake_up(&thread->t_ctl_waitq);
/*
thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt),
NULL, NULL);
*/
spin_lock(&svcpt->scp_rep_lock);
list_add(&rs->rs_list, &svcpt->scp_rep_idle);
......@@ -2428,8 +2434,10 @@ static int ptlrpc_main(void *arg)
}
}
/*
lc_watchdog_delete(thread->t_watchdog);
thread->t_watchdog = NULL;
*/
out_srv_fini:
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment