ptrace.c 11.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Kernel support for the ptrace() and syscall tracing interfaces.
 *
 * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc.
 * Copyright (C) 2000 Matthew Wilcox <matthew@wil.cx>
 * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org>
 */

#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/user.h>
Matthew Wilcox's avatar
Matthew Wilcox committed
17
#include <linux/personality.h>
Matthew Wilcox's avatar
Matthew Wilcox committed
18
#include <linux/security.h>
Linus Torvalds's avatar
Linus Torvalds committed
19 20 21 22 23

#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/processor.h>
Matthew Wilcox's avatar
Matthew Wilcox committed
24
#include <asm/offsets.h>
Linus Torvalds's avatar
Linus Torvalds committed
25 26 27 28 29 30 31 32 33 34 35

/* These are used in entry.S, syscall_restore_rfi.  We need to record the
 * current stepping mode somewhere other than in PSW, because there is no
 * concept of saving and restoring the users PSW over a syscall.  We choose
 * to use these two bits in task->ptrace.  These bits must not clash with
 * any PT_* defined in include/linux/sched.h, and must match with the bit
 * tests in entry.S
 */
#define PT_SINGLESTEP	0x10000
#define PT_BLOCKSTEP	0x20000

Matthew Wilcox's avatar
Matthew Wilcox committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
/* PSW bits we allow the debugger to modify */
#define USER_PSW_BITS	(PSW_N | PSW_V | PSW_CB)

#undef DEBUG_PTRACE

#ifdef DEBUG_PTRACE
#define DBG(x)	printk x
#else
#define DBG(x)
#endif

#ifdef __LP64__

#define CHILD_IS_32BIT	(child->personality == PER_LINUX_32BIT)

/* This function is needed to translate 32 bit pt_regs offsets in to
 * 64 bit pt_regs offsets.  For example, a 32 bit gdb under a 64 bit kernel
 * will request offset 12 if it wants gr3, but the lower 32 bits of
 * the 64 bit kernels view of gr3 will be at offset 28 (3*8 + 4).
 * This code relies on a 32 bit pt_regs being comprised of 32 bit values
 * except for the fp registers which (a) are 64 bits, and (b) follow
 * the gr registers at the start of pt_regs.  The 32 bit pt_regs should
 * be half the size of the 64 bit pt_regs, plus 32*4 to allow for fr[]
 * being 64 bit in both cases.
 */

static long translate_usr_offset(long offset)
{
	if (offset < 0)
		return -1;
	else if (offset <= 32*4)	/* gr[0..31] */
		return offset * 2 + 4;
	else if (offset <= 32*4+32*8)	/* gr[0..31] + fr[0..31] */
		return offset + 32*4;
	else if (offset < sizeof(struct pt_regs)/2 + 32*4)
		return offset * 2 + 4 - 32*8;
	else
		return -1;
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
77 78 79 80 81 82 83 84 85 86 87 88 89 90
/*
 * Called by kernel/ptrace.c when detaching..
 *
 * Make sure single step bits etc are not set.
 */
void ptrace_disable(struct task_struct *child)
{
	/* make sure the trap bits are not set */
	pa_psw(child)->r = 0;
	pa_psw(child)->t = 0;
	pa_psw(child)->h = 0;
	pa_psw(child)->l = 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
91 92 93 94
long sys_ptrace(long request, pid_t pid, long addr, long data)
{
	struct task_struct *child;
	long ret;
Matthew Wilcox's avatar
Matthew Wilcox committed
95 96 97
#ifdef DEBUG_PTRACE
	long oaddr=addr, odata=data;
#endif
Linus Torvalds's avatar
Linus Torvalds committed
98 99 100 101 102 103 104

	lock_kernel();
	ret = -EPERM;
	if (request == PTRACE_TRACEME) {
		/* are we already being traced? */
		if (current->ptrace & PT_PTRACED)
			goto out;
Matthew Wilcox's avatar
Matthew Wilcox committed
105

106
		ret = security_ptrace(current->parent, current);
Matthew Wilcox's avatar
Matthew Wilcox committed
107 108 109
		if (ret) 
			goto out;

Linus Torvalds's avatar
Linus Torvalds committed
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
		/* set the ptrace bit in the process flags. */
		current->ptrace |= PT_PTRACED;
		ret = 0;
		goto out;
	}

	ret = -ESRCH;
	read_lock(&tasklist_lock);
	child = find_task_by_pid(pid);
	if (child)
		get_task_struct(child);
	read_unlock(&tasklist_lock);
	if (!child)
		goto out;
	ret = -EPERM;
	if (pid == 1)		/* no messing around with init! */
		goto out_tsk;

	if (request == PTRACE_ATTACH) {
Linus Torvalds's avatar
Linus Torvalds committed
129
		ret = ptrace_attach(child);
Linus Torvalds's avatar
Linus Torvalds committed
130 131
		goto out_tsk;
	}
Matthew Wilcox's avatar
Matthew Wilcox committed
132 133 134

	ret = ptrace_check_attach(child, request == PTRACE_KILL);
	if (ret < 0)
Linus Torvalds's avatar
Linus Torvalds committed
135 136 137 138 139 140 141
		goto out_tsk;

	switch (request) {
	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
	case PTRACE_PEEKDATA: {
		int copied;

Matthew Wilcox's avatar
Matthew Wilcox committed
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
#ifdef __LP64__
		if (CHILD_IS_32BIT) {
			unsigned int tmp;

			addr &= 0xffffffffL;
			copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
			ret = -EIO;
			if (copied != sizeof(tmp))
				goto out_tsk;
			ret = put_user(tmp,(unsigned int *) data);
			DBG(("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n",
				request == PTRACE_PEEKTEXT ? "TEXT" : "DATA",
				pid, oaddr, odata, ret, tmp));
		}
		else
#endif
		{
			unsigned long tmp;

			copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
			ret = -EIO;
			if (copied != sizeof(tmp))
				goto out_tsk;
			ret = put_user(tmp,(unsigned long *) data);
		}
Linus Torvalds's avatar
Linus Torvalds committed
167 168 169 170 171 172 173
		goto out_tsk;
	}

	/* when I and D space are separate, this will have to be fixed. */
	case PTRACE_POKETEXT: /* write the word at location addr. */
	case PTRACE_POKEDATA:
		ret = 0;
Matthew Wilcox's avatar
Matthew Wilcox committed
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
#ifdef __LP64__
		if (CHILD_IS_32BIT) {
			unsigned int tmp = (unsigned int)data;
			DBG(("sys_ptrace(POKE%s, %d, %lx, %lx)\n",
				request == PTRACE_POKETEXT ? "TEXT" : "DATA",
				pid, oaddr, odata));
			addr &= 0xffffffffL;
			if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp))
				goto out_tsk;
		}
		else
#endif
		{
			if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
				goto out_tsk;
		}
Linus Torvalds's avatar
Linus Torvalds committed
190 191 192
		ret = -EIO;
		goto out_tsk;

Matthew Wilcox's avatar
Matthew Wilcox committed
193 194
	/* Read the word at location addr in the USER area.  For ptraced
	   processes, the kernel saves all regs on a syscall. */
Linus Torvalds's avatar
Linus Torvalds committed
195 196
	case PTRACE_PEEKUSR: {
		ret = -EIO;
Matthew Wilcox's avatar
Matthew Wilcox committed
197 198 199
#ifdef __LP64__
		if (CHILD_IS_32BIT) {
			unsigned int tmp;
Linus Torvalds's avatar
Linus Torvalds committed
200

Matthew Wilcox's avatar
Matthew Wilcox committed
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
			if (addr & (sizeof(int)-1))
				goto out_tsk;
			if ((addr = translate_usr_offset(addr)) < 0)
				goto out_tsk;

			tmp = *(unsigned int *) ((char *) task_regs(child) + addr);
			ret = put_user(tmp, (unsigned int *) data);
			DBG(("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n",
				pid, oaddr, odata, ret, addr, tmp));
		}
		else
#endif
		{
			unsigned long tmp;

			if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
				goto out_tsk;
			tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
			ret = put_user(tmp, (unsigned long *) data);
		}
Linus Torvalds's avatar
Linus Torvalds committed
221 222 223 224 225 226 227 228 229 230 231
		goto out_tsk;
	}

	/* Write the word at location addr in the USER area.  This will need
	   to change when the kernel no longer saves all regs on a syscall.
	   FIXME.  There is a problem at the moment in that r3-r18 are only
	   saved if the process is ptraced on syscall entry, and even then
	   those values are overwritten by actual register values on syscall
	   exit. */
	case PTRACE_POKEUSR:
		ret = -EIO;
Matthew Wilcox's avatar
Matthew Wilcox committed
232 233 234 235 236 237
		/* Some register values written here may be ignored in
		 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
		 * r31/r31+4, and not with the values in pt_regs.
		 */
		 /* PT_PSW=0, so this is valid for 32 bit processes under 64
		 * bit kernels.
Linus Torvalds's avatar
Linus Torvalds committed
238
		 */
Matthew Wilcox's avatar
Matthew Wilcox committed
239 240 241 242 243 244
		if (addr == PT_PSW) {
			/* PT_PSW=0, so this is valid for 32 bit processes
			 * under 64 bit kernels.
			 *
			 * Allow writing to Nullify, Divide-step-correction,
			 * and carry/borrow bits.
245
			 * BEWARE, if you set N, and then single step, it won't
Matthew Wilcox's avatar
Matthew Wilcox committed
246 247 248 249 250 251 252 253
			 * stop on the nullified instruction.
			 */
			DBG(("sys_ptrace(POKEUSR, %d, %lx, %lx)\n",
				pid, oaddr, odata));
			data &= USER_PSW_BITS;
			task_regs(child)->gr[0] &= ~USER_PSW_BITS;
			task_regs(child)->gr[0] |= data;
			ret = 0;
Linus Torvalds's avatar
Linus Torvalds committed
254
			goto out_tsk;
Matthew Wilcox's avatar
Matthew Wilcox committed
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
		}
#ifdef __LP64__
		if (CHILD_IS_32BIT) {
			if (addr & (sizeof(int)-1))
				goto out_tsk;
			if ((addr = translate_usr_offset(addr)) < 0)
				goto out_tsk;
			DBG(("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n",
				pid, oaddr, odata, addr));
			if (addr >= PT_FR0 && addr <= PT_FR31) {
				/* Special case, fp regs are 64 bits anyway */
				*(unsigned int *) ((char *) task_regs(child) + addr) = data;
				ret = 0;
			}
			else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) ||
					addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 ||
					addr == PT_SAR+4) {
				/* Zero the top 32 bits */
				*(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0;
				*(unsigned int *) ((char *) task_regs(child) + addr) = data;
				ret = 0;
			}
			goto out_tsk;
		}
		else
#endif
		{
			if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
				goto out_tsk;
			if ((addr >= PT_GR1 && addr <= PT_GR31) ||
					addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
					(addr >= PT_FR0 && addr <= PT_FR31) ||
					addr == PT_SAR) {
				*(unsigned long *) ((char *) task_regs(child) + addr) = data;
				ret = 0;
			}
			goto out_tsk;
		}
Linus Torvalds's avatar
Linus Torvalds committed
293 294 295 296

	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
	case PTRACE_CONT:
		ret = -EIO;
Matthew Wilcox's avatar
Matthew Wilcox committed
297 298
		DBG(("sys_ptrace(%s)\n",
			request == PTRACE_SYSCALL ? "SYSCALL" : "CONT"));
Linus Torvalds's avatar
Linus Torvalds committed
299 300 301
		if ((unsigned long) data > _NSIG)
			goto out_tsk;
		child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
Matthew Wilcox's avatar
Matthew Wilcox committed
302 303 304 305 306
		if (request == PTRACE_SYSCALL) {
			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
		} else {
			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
		}		
Linus Torvalds's avatar
Linus Torvalds committed
307 308 309 310 311 312 313 314 315
		child->exit_code = data;
		goto out_wake_notrap;

	case PTRACE_KILL:
		/*
		 * make the child exit.  Best I can do is send it a
		 * sigkill.  perhaps it should be put in the status
		 * that it wants to exit.
		 */
Matthew Wilcox's avatar
Matthew Wilcox committed
316
		DBG(("sys_ptrace(KILL)\n"));
Linus Torvalds's avatar
Linus Torvalds committed
317 318 319 320 321 322
		if (child->state == TASK_ZOMBIE)	/* already dead */
			goto out_tsk;
		child->exit_code = SIGKILL;
		goto out_wake_notrap;

	case PTRACE_SINGLEBLOCK:
Matthew Wilcox's avatar
Matthew Wilcox committed
323
		DBG(("sys_ptrace(SINGLEBLOCK)\n"));
Linus Torvalds's avatar
Linus Torvalds committed
324 325 326
		ret = -EIO;
		if ((unsigned long) data > _NSIG)
			goto out_tsk;
Matthew Wilcox's avatar
Matthew Wilcox committed
327 328
		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
		child->ptrace &= ~PT_SINGLESTEP;
Linus Torvalds's avatar
Linus Torvalds committed
329 330 331 332 333 334 335 336 337 338 339
		child->ptrace |= PT_BLOCKSTEP;
		child->exit_code = data;

		/* Enable taken branch trap. */
		pa_psw(child)->r = 0;
		pa_psw(child)->t = 1;
		pa_psw(child)->h = 0;
		pa_psw(child)->l = 0;
		goto out_wake;

	case PTRACE_SINGLESTEP:
Matthew Wilcox's avatar
Matthew Wilcox committed
340
		DBG(("sys_ptrace(SINGLESTEP)\n"));
Linus Torvalds's avatar
Linus Torvalds committed
341 342 343
		ret = -EIO;
		if ((unsigned long) data > _NSIG)
			goto out_tsk;
Matthew Wilcox's avatar
Matthew Wilcox committed
344 345
		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
		child->ptrace &= ~PT_BLOCKSTEP;
Linus Torvalds's avatar
Linus Torvalds committed
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
		child->ptrace |= PT_SINGLESTEP;
		child->exit_code = data;

		if (pa_psw(child)->n) {
			struct siginfo si;

			/* Nullified, just crank over the queue. */
			task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1];
			task_regs(child)->iasq[0] = task_regs(child)->iasq[1];
			task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4;
			pa_psw(child)->n = 0;
			pa_psw(child)->x = 0;
			pa_psw(child)->y = 0;
			pa_psw(child)->z = 0;
			pa_psw(child)->b = 0;
Matthew Wilcox's avatar
Matthew Wilcox committed
361
			ptrace_disable(child);
Linus Torvalds's avatar
Linus Torvalds committed
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
			/* Don't wake up the child, but let the
			   parent know something happened. */
			si.si_code = TRAP_TRACE;
			si.si_addr = (void *) (task_regs(child)->iaoq[0] & ~3);
			si.si_signo = SIGTRAP;
			si.si_errno = 0;
			force_sig_info(SIGTRAP, &si, child);
			//notify_parent(child, SIGCHLD);
			//ret = 0;
			goto out_wake;
		}

		/* Enable recovery counter traps.  The recovery counter
		 * itself will be set to zero on a task switch.  If the
		 * task is suspended on a syscall then the syscall return
		 * path will overwrite the recovery counter with a suitable
		 * value such that it traps once back in user space.  We
		 * disable interrupts in the childs PSW here also, to avoid
		 * interrupts while the recovery counter is decrementing.
		 */
		pa_psw(child)->r = 1;
		pa_psw(child)->t = 0;
		pa_psw(child)->h = 0;
		pa_psw(child)->l = 0;
		/* give it a chance to run. */
		goto out_wake;

	case PTRACE_DETACH:
Linus Torvalds's avatar
Linus Torvalds committed
390 391
		ret = ptrace_detach(child, data);
		goto out_tsk;
Linus Torvalds's avatar
Linus Torvalds committed
392 393

	default:
394
		ret = ptrace_request(child, request, addr, data);
Linus Torvalds's avatar
Linus Torvalds committed
395 396 397 398
		goto out_tsk;
	}

out_wake_notrap:
Matthew Wilcox's avatar
Matthew Wilcox committed
399
	ptrace_disable(child);
Linus Torvalds's avatar
Linus Torvalds committed
400 401 402 403
out_wake:
	wake_up_process(child);
	ret = 0;
out_tsk:
Matthew Wilcox's avatar
Matthew Wilcox committed
404
	put_task_struct(child);
Linus Torvalds's avatar
Linus Torvalds committed
405 406
out:
	unlock_kernel();
Matthew Wilcox's avatar
Matthew Wilcox committed
407 408
	DBG(("sys_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
		request, pid, oaddr, odata, ret));
Linus Torvalds's avatar
Linus Torvalds committed
409 410 411 412 413
	return ret;
}

void syscall_trace(void)
{
Matthew Wilcox's avatar
Matthew Wilcox committed
414 415 416
	if (!test_thread_flag(TIF_SYSCALL_TRACE))
		return;
	if (!(current->ptrace & PT_PTRACED))
Linus Torvalds's avatar
Linus Torvalds committed
417
		return;
418 419
	current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
					? 0x80 : 0);
Linus Torvalds's avatar
Linus Torvalds committed
420 421 422 423 424 425 426 427 428 429 430 431 432
	current->state = TASK_STOPPED;
	notify_parent(current, SIGCHLD);
	schedule();
	/*
	 * this isn't the same as continuing with a signal, but it will do
	 * for normal use.  strace only continues with a signal if the
	 * stopping signal is not SIGTRAP.  -brl
	 */
	if (current->exit_code) {
		send_sig(current->exit_code, current, 1);
		current->exit_code = 0;
	}
}