perfmon.c 163 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
 * This file implements the perfmon-2 subsystem which is used
David Mosberger's avatar
David Mosberger committed
3
 * to program the IA-64 Performance Monitoring Unit (PMU).
Linus Torvalds's avatar
Linus Torvalds committed
4
 *
5 6
 * The initial version of perfmon.c was written by
 * Ganesh Venkitachalam, IBM Corp.
David Mosberger's avatar
David Mosberger committed
7
 *
8 9 10 11 12
 * Then it was modified for perfmon-1.x by Stephane Eranian and 
 * David Mosberger, Hewlett Packard Co.
 * 
 * Version Perfmon-2.x is a rewrite of perfmon-1.x
 * by Stephane Eranian, Hewlett Packard Co. 
David Mosberger's avatar
David Mosberger committed
13
 *
14
 * Copyright (C) 1999-2003  Hewlett Packard Co
David Mosberger's avatar
David Mosberger committed
15 16
 *               Stephane Eranian <eranian@hpl.hp.com>
 *               David Mosberger-Tang <davidm@hpl.hp.com>
17 18 19
 *
 * More information about perfmon available at:
 * 	http://www.hpl.hp.com/research/linux/perfmon
Linus Torvalds's avatar
Linus Torvalds committed
20 21 22 23 24 25 26 27
 */

#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/smp_lock.h>
#include <linux/proc_fs.h>
Linus Torvalds's avatar
Linus Torvalds committed
28 29 30
#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
31
#include <linux/sysctl.h>
32 33 34 35 36 37 38
#include <linux/list.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/vfs.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/version.h>
Linus Torvalds's avatar
Linus Torvalds committed
39

Linus Torvalds's avatar
Linus Torvalds committed
40
#include <asm/bitops.h>
Linus Torvalds's avatar
Linus Torvalds committed
41
#include <asm/errno.h>
42
#include <asm/intrinsics.h>
Linus Torvalds's avatar
Linus Torvalds committed
43 44
#include <asm/page.h>
#include <asm/perfmon.h>
Linus Torvalds's avatar
Linus Torvalds committed
45
#include <asm/processor.h>
Linus Torvalds's avatar
Linus Torvalds committed
46 47
#include <asm/signal.h>
#include <asm/system.h>
Linus Torvalds's avatar
Linus Torvalds committed
48
#include <asm/uaccess.h>
49
#include <asm/delay.h>
Linus Torvalds's avatar
Linus Torvalds committed
50 51

#ifdef CONFIG_PERFMON
Linus Torvalds's avatar
Linus Torvalds committed
52
/*
53
 * perfmon context state
Linus Torvalds's avatar
Linus Torvalds committed
54
 */
55 56 57 58 59
#define PFM_CTX_UNLOADED	1	/* context is not loaded onto any task */
#define PFM_CTX_LOADED		2	/* context is loaded onto a task */
#define PFM_CTX_MASKED		3	/* context is loaded but monitoring is masked due to overflow */
#define PFM_CTX_ZOMBIE		4	/* owner of the context is closing it */
#define PFM_CTX_TERMINATED	5	/* the task the context was loaded onto is gone */
Linus Torvalds's avatar
Linus Torvalds committed
60

61
#define PFM_INVALID_ACTIVATION	(~0UL)
Linus Torvalds's avatar
Linus Torvalds committed
62 63

/*
64
 * depth of message queue
Linus Torvalds's avatar
Linus Torvalds committed
65
 */
66 67
#define PFM_MAX_MSGS		32
#define PFM_CTXQ_EMPTY(g)	((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
68 69 70 71 72

/*
 * type of a PMU register (bitmask).
 * bitmask structure:
 * 	bit0   : register implemented
73
 * 	bit1   : end marker
74
 * 	bit2-3 : reserved
75 76 77
 * 	bit4   : pmc has pmc.pm
 * 	bit5   : pmc controls a counter (has pmc.oi), pmd is used as counter
 * 	bit6-7 : register type
78 79
 * 	bit8-31: reserved
 */
80
#define PFM_REG_NOTIMPL		0x0 /* not implemented at all */
81 82 83
#define PFM_REG_IMPL		0x1 /* register implemented */
#define PFM_REG_END		0x2 /* end marker */
#define PFM_REG_MONITOR		(0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
84 85 86 87
#define PFM_REG_COUNTING	(0x2<<4|PFM_REG_MONITOR|PFM_REG_IMPL) /* a monitor + pmc.oi+ PMD used as a counter */
#define PFM_REG_CONTROL		(0x4<<4|PFM_REG_IMPL) /* PMU control register */
#define	PFM_REG_CONFIG		(0x8<<4|PFM_REG_IMPL) /* configuration register */
#define PFM_REG_BUFFER	 	(0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
88

89 90
#define PMC_IS_LAST(i)	(pmu_conf.pmc_desc[i].type & PFM_REG_END)
#define PMD_IS_LAST(i)	(pmu_conf.pmd_desc[i].type & PFM_REG_END)
David Mosberger's avatar
David Mosberger committed
91

92
#define PFM_IS_DISABLED() (pmu_conf.enabled == 0)
David Mosberger's avatar
David Mosberger committed
93

94
#define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
Linus Torvalds's avatar
Linus Torvalds committed
95

96
/* i assumed unsigned */
97 98
#define PMC_IS_IMPL(i)	  (i< PMU_MAX_PMCS && (pmu_conf.pmc_desc[i].type & PFM_REG_IMPL))
#define PMD_IS_IMPL(i)	  (i< PMU_MAX_PMDS && (pmu_conf.pmd_desc[i].type & PFM_REG_IMPL))
Linus Torvalds's avatar
Linus Torvalds committed
99

100 101 102 103
/* XXX: these assume that register i is implemented */
#define PMD_IS_COUNTING(i) ((pmu_conf.pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
#define PMC_IS_COUNTING(i) ((pmu_conf.pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
#define PMC_IS_MONITOR(i)  ((pmu_conf.pmc_desc[i].type & PFM_REG_MONITOR)  == PFM_REG_MONITOR)
David Mosberger's avatar
David Mosberger committed
104 105
#define PMC_IS_CONTROL(i)  ((pmu_conf.pmc_desc[i].type & PFM_REG_CONTROL)  == PFM_REG_CONTROL)

106 107 108 109 110
#define PMC_DFL_VAL(i)     pmu_conf.pmc_desc[i].default_value
#define PMC_RSVD_MASK(i)   pmu_conf.pmc_desc[i].reserved_mask
#define PMD_PMD_DEP(i)	   pmu_conf.pmd_desc[i].dep_pmd[0]
#define PMC_PMD_DEP(i)	   pmu_conf.pmc_desc[i].dep_pmd[0]

111 112 113
/* k assumed unsigned (up to 64 registers) */
#define IBR_IS_IMPL(k)	  (k< IA64_NUM_DBG_REGS)
#define DBR_IS_IMPL(k)	  (k< IA64_NUM_DBG_REGS)
David Mosberger's avatar
David Mosberger committed
114 115

#define CTX_OVFL_NOBLOCK(c)	((c)->ctx_fl_block == 0)
116 117 118
#define CTX_HAS_SMPL(c)		((c)->ctx_fl_is_sampling)
#define PFM_CTX_TASK(h)		(h)->ctx_task

119 120 121 122
/* XXX: does not support more than 64 PMDs */
#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)

123
#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
David Mosberger's avatar
David Mosberger committed
124 125 126 127

#define CTX_USED_IBR(ctx,n) 	(ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
#define CTX_USED_DBR(ctx,n) 	(ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
#define CTX_USES_DBREGS(ctx)	(((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
#define PFM_CODE_RR	0	/* requesting code range restriction */
#define PFM_DATA_RR	1	/* requestion data range restriction */

#define PFM_CPUINFO_CLEAR(v)	pfm_get_cpu_var(pfm_syst_info) &= ~(v)
#define PFM_CPUINFO_SET(v)	pfm_get_cpu_var(pfm_syst_info) |= (v)
#define PFM_CPUINFO_GET()	pfm_get_cpu_var(pfm_syst_info)

/*
 * context protection macros
 * in SMP:
 * 	- we need to protect against CPU concurrency (spin_lock)
 * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
 * in UP:
 * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
 *
 * spin_lock_irqsave()/spin_lock_irqrestore():
 * 	in SMP: local_irq_disable + spin_lock
 * 	in UP : local_irq_disable
 *
 * spin_lock()/spin_lock():
 * 	in UP : removed automatically
 * 	in SMP: protect against context accesses from other CPU. interrupts
 * 	        are not masked. This is useful for the PMU interrupt handler
 * 	        because we know we will not get PMU concurrency in that code.
 */
#define PROTECT_CTX(c, f) \
	do {  \
		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
		spin_lock_irqsave(&(c)->ctx_lock, f); \
		DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
	} while(0)

#define UNPROTECT_CTX(c, f) \
	do { \
		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
	} while(0)

#define PROTECT_CTX_NOPRINT(c, f) \
	do {  \
		spin_lock_irqsave(&(c)->ctx_lock, f); \
	} while(0)


#define UNPROTECT_CTX_NOPRINT(c, f) \
	do { \
		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
	} while(0)


#define PROTECT_CTX_NOIRQ(c) \
	do {  \
		spin_lock(&(c)->ctx_lock); \
	} while(0)

#define UNPROTECT_CTX_NOIRQ(c) \
	do { \
		spin_unlock(&(c)->ctx_lock); \
	} while(0)


#ifdef CONFIG_SMP
David Mosberger's avatar
David Mosberger committed
190

191 192 193
#define GET_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)
#define INC_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)++
#define SET_ACTIVATION(c)	(c)->ctx_last_activation = GET_ACTIVATION()
David Mosberger's avatar
David Mosberger committed
194

195 196 197 198 199
#else /* !CONFIG_SMP */
#define SET_ACTIVATION(t) 	do {} while(0)
#define GET_ACTIVATION(t) 	do {} while(0)
#define INC_ACTIVATION(t) 	do {} while(0)
#endif /* CONFIG_SMP */
David Mosberger's avatar
David Mosberger committed
200

201 202 203 204 205 206
#define SET_PMU_OWNER(t, c)	do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
#define GET_PMU_OWNER()		pfm_get_cpu_var(pmu_owner)
#define GET_PMU_CTX()		pfm_get_cpu_var(pmu_ctx)

#define LOCK_PFS()	    	spin_lock(&pfm_sessions.pfs_lock)
#define UNLOCK_PFS()	    	spin_unlock(&pfm_sessions.pfs_lock)
David Mosberger's avatar
David Mosberger committed
207 208

#define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
Linus Torvalds's avatar
Linus Torvalds committed
209

210 211 212 213 214
/*
 * cmp0 must be the value of pmc0
 */
#define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)

David Mosberger's avatar
David Mosberger committed
215 216
#define PFMFS_MAGIC 0xa0b4d889

Linus Torvalds's avatar
Linus Torvalds committed
217
/*
David Mosberger's avatar
David Mosberger committed
218
 * debugging
Linus Torvalds's avatar
Linus Torvalds committed
219
 */
220
#define DPRINT(a) \
David Mosberger's avatar
David Mosberger committed
221
	do { \
222
		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
David Mosberger's avatar
David Mosberger committed
223
	} while (0)
Linus Torvalds's avatar
Linus Torvalds committed
224

225
#define DPRINT_ovfl(a) \
226
	do { \
227
		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
228
	} while (0)
229
/*
230
 * Architected PMC structure
Linus Torvalds's avatar
Linus Torvalds committed
231
 */
Linus Torvalds's avatar
Linus Torvalds committed
232
typedef struct {
David Mosberger's avatar
David Mosberger committed
233 234 235 236 237 238 239 240
	unsigned long pmc_plm:4;	/* privilege level mask */
	unsigned long pmc_ev:1;		/* external visibility */
	unsigned long pmc_oi:1;		/* overflow interrupt */
	unsigned long pmc_pm:1;		/* privileged monitor */
	unsigned long pmc_ig1:1;	/* reserved */
	unsigned long pmc_es:8;		/* event select */
	unsigned long pmc_ig2:48;	/* reserved */
} pfm_monitor_t;
Linus Torvalds's avatar
Linus Torvalds committed
241

David Mosberger's avatar
David Mosberger committed
242 243
/*
 * 64-bit software counter structure
David Mosberger's avatar
David Mosberger committed
244 245
 *
 * the next_reset_type is applied to the next call to pfm_reset_regs()
David Mosberger's avatar
David Mosberger committed
246
 */
Linus Torvalds's avatar
Linus Torvalds committed
247
typedef struct {
248 249 250 251 252 253 254 255 256
	unsigned long	val;		/* virtual 64bit counter value */
	unsigned long	lval;		/* last reset value */
	unsigned long	long_reset;	/* reset value on sampling overflow */
	unsigned long	short_reset;    /* reset value on overflow */
	unsigned long	reset_pmds[4];  /* which other pmds to reset when this counter overflows */
	unsigned long	smpl_pmds[4];   /* which pmds are accessed when counter overflow */
	unsigned long	seed;		/* seed for random-number generator */
	unsigned long	mask;		/* mask for random-number generator */
	unsigned int 	flags;		/* notify/do not notify */
David Mosberger's avatar
David Mosberger committed
257
	int 		next_reset_type;/* PFM_PMD_NO_RESET, PFM_PMD_LONG_RESET, PFM_PMD_SHORT_RESET */
258
	unsigned long	eventid;	/* overflow event identifier */
Linus Torvalds's avatar
Linus Torvalds committed
259 260 261
} pfm_counter_t;

/*
262
 * context flags
Linus Torvalds's avatar
Linus Torvalds committed
263 264
 */
typedef struct {
David Mosberger's avatar
David Mosberger committed
265 266 267
	unsigned int block:1;		/* when 1, task will blocked on user notifications */
	unsigned int system:1;		/* do system wide monitoring */
	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
268
	unsigned int is_sampling:1;	/* true if using a custom format */
269
	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
270 271 272 273 274
	unsigned int unsecure:1;	/* exclude idle task in system wide session */
	unsigned int going_zombie:1;	/* context is zombie (MASKED+blocking) */
	unsigned int trap_reason:2;	/* reason for going into pfm_handle_work() */
	unsigned int no_msg:1;		/* no message sent on overflow */
	unsigned int reserved:22;
Linus Torvalds's avatar
Linus Torvalds committed
275 276
} pfm_context_flags_t;

277
#define PFM_TRAP_REASON_NONE		0x0	/* default value */
278 279 280
#define PFM_TRAP_REASON_BLOCK		0x1	/* we need to block on overflow */
#define PFM_TRAP_REASON_RESET		0x2	/* we need to reset PMDs */

281

David Mosberger's avatar
David Mosberger committed
282 283 284
/*
 * perfmon context: encapsulates all the state of a monitoring session
 */
285

Linus Torvalds's avatar
Linus Torvalds committed
286
typedef struct pfm_context {
287
	spinlock_t		ctx_lock;		/* context protection */
Linus Torvalds's avatar
Linus Torvalds committed
288

289 290
	pfm_context_flags_t	ctx_flags;		/* bitmask of flags  (block reason incl.) */
	unsigned int		ctx_state;		/* state: active/inactive (no bitfield) */
Linus Torvalds's avatar
Linus Torvalds committed
291

292
	struct task_struct 	*ctx_task;		/* task to which context is attached */
Linus Torvalds's avatar
Linus Torvalds committed
293

David Mosberger's avatar
David Mosberger committed
294
	unsigned long		ctx_ovfl_regs[4];	/* which registers overflowed (notification) */
Linus Torvalds's avatar
Linus Torvalds committed
295

David Mosberger's avatar
David Mosberger committed
296
	struct semaphore	ctx_restart_sem;   	/* use for blocking notification mode */
Linus Torvalds's avatar
Linus Torvalds committed
297

298 299 300 301 302 303 304 305 306 307 308 309 310 311
	unsigned long		ctx_used_pmds[4];	/* bitmask of PMD used            */
	unsigned long		ctx_all_pmds[4];	/* bitmask of all accessible PMDs */
	unsigned long		ctx_reload_pmds[4];	/* bitmask of force reload PMD on ctxsw in */

	unsigned long		ctx_all_pmcs[4];	/* bitmask of all accessible PMCs */
	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */

	unsigned long		ctx_pmcs[IA64_NUM_PMC_REGS];	/*  saved copies of PMC values */

	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
312

313
	pfm_counter_t		ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
Linus Torvalds's avatar
Linus Torvalds committed
314

David Mosberger's avatar
David Mosberger committed
315
	u64			ctx_saved_psr_up;	/* only contains psr.up value */
Linus Torvalds's avatar
Linus Torvalds committed
316

317 318 319
	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
	unsigned int		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
Linus Torvalds's avatar
Linus Torvalds committed
320

321
	int			ctx_fd;			/* file descriptor used my this context */
Linus Torvalds's avatar
Linus Torvalds committed
322

323 324 325 326 327 328 329 330 331 332 333 334
	pfm_buffer_fmt_t	*ctx_buf_fmt;		/* buffer format callbacks */
	void			*ctx_smpl_hdr;		/* points to sampling buffer header kernel vaddr */
	unsigned long		ctx_smpl_size;		/* size of sampling buffer */
	void			*ctx_smpl_vaddr;	/* user level virtual address of smpl buffer */

	wait_queue_head_t 	ctx_msgq_wait;
	pfm_msg_t		ctx_msgq[PFM_MAX_MSGS];
	int			ctx_msgq_head;
	int			ctx_msgq_tail;
	struct fasync_struct	*ctx_async_queue;

	wait_queue_head_t 	ctx_zombieq;		/* termination cleanup wait queue */
David Mosberger's avatar
David Mosberger committed
335
} pfm_context_t;
Linus Torvalds's avatar
Linus Torvalds committed
336

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
/*
 * magic number used to verify that structure is really
 * a perfmon context
 */
#define PFM_IS_FILE(f)		((f)->f_op == &pfm_file_ops)

#define PFM_GET_CTX(t)	 	((pfm_context_t *)(t)->thread.pfm_context)

#ifdef CONFIG_SMP
#define SET_LAST_CPU(ctx, v)	(ctx)->ctx_last_cpu = (v)
#define GET_LAST_CPU(ctx)	(ctx)->ctx_last_cpu
#else
#define SET_LAST_CPU(ctx, v)	do {} while(0)
#define GET_LAST_CPU(ctx)	do {} while(0)
#endif


David Mosberger's avatar
David Mosberger committed
354 355 356
#define ctx_fl_block		ctx_flags.block
#define ctx_fl_system		ctx_flags.system
#define ctx_fl_using_dbreg	ctx_flags.using_dbreg
357
#define ctx_fl_is_sampling	ctx_flags.is_sampling
358
#define ctx_fl_excl_idle	ctx_flags.excl_idle
359
#define ctx_fl_unsecure		ctx_flags.unsecure
360 361 362 363 364 365
#define ctx_fl_going_zombie	ctx_flags.going_zombie
#define ctx_fl_trap_reason	ctx_flags.trap_reason
#define ctx_fl_no_msg		ctx_flags.no_msg

#define PFM_SET_WORK_PENDING(t, v)	do { (t)->thread.pfm_needs_checking = v; } while(0);
#define PFM_GET_WORK_PENDING(t)		(t)->thread.pfm_needs_checking
Linus Torvalds's avatar
Linus Torvalds committed
366

David Mosberger's avatar
David Mosberger committed
367 368 369 370 371
/*
 * global information about all sessions
 * mostly used to synchronize between system wide and per-process
 */
typedef struct {
372
	spinlock_t		pfs_lock;		   /* lock the structure */
Linus Torvalds's avatar
Linus Torvalds committed
373

374
	unsigned int		pfs_task_sessions;	   /* number of per task sessions */
375 376 377
	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
378
	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
David Mosberger's avatar
David Mosberger committed
379
} pfm_session_t;
Linus Torvalds's avatar
Linus Torvalds committed
380

381 382
/*
 * information about a PMC or PMD.
383
 * dep_pmd[]: a bitmask of dependent PMD registers
384 385 386
 * dep_pmc[]: a bitmask of dependent PMC registers
 */
typedef struct {
387
	unsigned int		type;
388
	int			pm_pos;
389 390
	unsigned long		default_value;	/* power-on default value */
	unsigned long		reserved_mask;	/* bitmask of reserved bits */
391 392
	int			(*read_check)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
	int			(*write_check)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
393 394 395
	unsigned long		dep_pmd[4];
	unsigned long		dep_pmc[4];
} pfm_reg_desc_t;
396

397 398 399 400 401 402 403 404
/* assume cnum is a valid monitor */
#define PMC_PM(cnum, val)	(((val) >> (pmu_conf.pmc_desc[cnum].pm_pos)) & 0x1)
#define PMC_WR_FUNC(cnum)	(pmu_conf.pmc_desc[cnum].write_check)
#define PMD_WR_FUNC(cnum)	(pmu_conf.pmd_desc[cnum].write_check)
#define PMD_RD_FUNC(cnum)	(pmu_conf.pmd_desc[cnum].read_check)

/*
 * This structure is initialized at boot time and contains
405
 * a description of the PMU main characteristics.
406 407
 */
typedef struct {
408 409
	unsigned long  ovfl_val;	/* overflow value for counters */

410 411
	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426

	unsigned int   num_pmcs;	/* number of PMCS: computed at init time */
	unsigned int   num_pmds;	/* number of PMDS: computed at init time */
	unsigned long  impl_pmcs[4];	/* bitmask of implemented PMCS */
	unsigned long  impl_pmds[4];	/* bitmask of implemented PMDS */

	char	      *pmu_name;	/* PMU family name */
	unsigned int  enabled;		/* indicates if perfmon initialized properly */
	unsigned int  pmu_family;	/* cpuid family pattern used to identify pmu */

	unsigned int  num_ibrs;		/* number of IBRS: computed at init time */
	unsigned int  num_dbrs;		/* number of DBRS: computed at init time */
	unsigned int  num_counters;	/* PMC/PMD counting pairs : computed at init time */

	unsigned int  use_rr_dbregs:1;	/* set if debug registers used for range restriction */
427 428
} pmu_config_t;

David Mosberger's avatar
David Mosberger committed
429
/*
430
 * debug register related type definitions
David Mosberger's avatar
David Mosberger committed
431 432
 */
typedef struct {
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
	unsigned long ibr_mask:56;
	unsigned long ibr_plm:4;
	unsigned long ibr_ig:3;
	unsigned long ibr_x:1;
} ibr_mask_reg_t;

typedef struct {
	unsigned long dbr_mask:56;
	unsigned long dbr_plm:4;
	unsigned long dbr_ig:2;
	unsigned long dbr_w:1;
	unsigned long dbr_r:1;
} dbr_mask_reg_t;

typedef union {
	unsigned long  val;
	ibr_mask_reg_t ibr;
	dbr_mask_reg_t dbr;
} dbreg_t;

Linus Torvalds's avatar
Linus Torvalds committed
453 454

/*
David Mosberger's avatar
David Mosberger committed
455
 * perfmon command descriptions
Linus Torvalds's avatar
Linus Torvalds committed
456 457
 */
typedef struct {
458 459
	int		(*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
	char		*cmd_name;
David Mosberger's avatar
David Mosberger committed
460 461 462
	int		cmd_flags;
	unsigned int	cmd_narg;
	size_t		cmd_argsize;
463
	int		(*cmd_getsize)(void *arg, size_t *sz);
David Mosberger's avatar
David Mosberger committed
464
} pfm_cmd_desc_t;
Linus Torvalds's avatar
Linus Torvalds committed
465

466 467 468 469
#define PFM_CMD_FD		0x01	/* command requires a file descriptor */
#define PFM_CMD_ARG_READ	0x02	/* command must read argument(s) */
#define PFM_CMD_ARG_RW		0x04	/* command must read/write argument(s) */
#define PFM_CMD_STOP		0x08	/* command does not work on zombie context */
Linus Torvalds's avatar
Linus Torvalds committed
470 471


472 473 474
#define PFM_CMD_IDX(cmd)	(cmd)
#define PFM_CMD_IS_VALID(cmd)	((PFM_CMD_IDX(cmd) >= 0) && (PFM_CMD_IDX(cmd) < PFM_CMD_COUNT) \
				  && pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func != NULL)
Linus Torvalds's avatar
Linus Torvalds committed
475

476 477 478 479 480
#define PFM_CMD_NAME(cmd)	pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_name
#define PFM_CMD_READ_ARG(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ)
#define PFM_CMD_RW_ARG(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_RW)
#define PFM_CMD_USE_FD(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_FD)
#define PFM_CMD_STOPPED(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_STOP)
Linus Torvalds's avatar
Linus Torvalds committed
481

David Mosberger's avatar
David Mosberger committed
482 483 484
#define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
#define PFM_CMD_NARG(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg)
#define PFM_CMD_ARG_SIZE(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize)
485
#define PFM_CMD_GETSIZE(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_getsize)
David Mosberger's avatar
David Mosberger committed
486

487 488 489 490
typedef struct {
	int	debug;		/* turn on/off debugging via syslog */
	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
491
	int 	debug_pfm_read;
492 493 494
} pfm_sysctl_t;

typedef struct {
495 496 497 498 499 500 501 502 503
	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
	unsigned long pfm_ovfl_intr_count; 		/* keep track of ovfl interrupts */
	unsigned long pfm_ovfl_intr_cycles;		/* cycles spent processing ovfl interrupts */
	unsigned long pfm_ovfl_intr_cycles_min;		/* min cycles spent processing ovfl interrupts */
	unsigned long pfm_ovfl_intr_cycles_max;		/* max cycles spent processing ovfl interrupts */
	unsigned long pfm_sysupdt_count;
	unsigned long pfm_sysupdt_cycles;
	unsigned long pfm_smpl_handler_calls;
	unsigned long pfm_smpl_handler_cycles;
504
	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
505
} pfm_stats_t;
David Mosberger's avatar
David Mosberger committed
506 507 508

/*
 * perfmon internal variables
Linus Torvalds's avatar
Linus Torvalds committed
509
 */
510 511
static pfm_stats_t		pfm_stats[NR_CPUS];
static pfm_session_t		pfm_sessions;	/* global sessions information */
512

513 514 515 516 517 518 519
static struct proc_dir_entry 	*perfmon_dir;
static pfm_uuid_t		pfm_null_uuid = {0,};

static spinlock_t		pfm_smpl_fmt_lock;
static pfm_buffer_fmt_t		*pfm_buffer_fmt_list;
#define LOCK_BUF_FMT_LIST()	    spin_lock(&pfm_smpl_fmt_lock)
#define UNLOCK_BUF_FMT_LIST()	    spin_unlock(&pfm_smpl_fmt_lock)
520 521 522

/* sysctl() controls */
static pfm_sysctl_t pfm_sysctl;
523
int pfm_debug_var;
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539

static ctl_table pfm_ctl_table[]={
	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
	{2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
	{3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
	{ 0, },
};
static ctl_table pfm_sysctl_dir[] = {
	{1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
 	{0,},
};
static ctl_table pfm_sysctl_root[] = {
	{1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
 	{0,},
};
static struct ctl_table_header *pfm_sysctl_header;
540

David Mosberger's avatar
David Mosberger committed
541
static void pfm_vm_close(struct vm_area_struct * area);
542

David Mosberger's avatar
David Mosberger committed
543
static struct vm_operations_struct pfm_vm_ops={
544
	close: pfm_vm_close
David Mosberger's avatar
David Mosberger committed
545
};
Linus Torvalds's avatar
Linus Torvalds committed
546

547
#define pfm_wait_task_inactive(t)	wait_task_inactive(t)
548
#define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
#define pfm_get_cpu_data(a,b)		per_cpu(a, b)
typedef	irqreturn_t	pfm_irq_handler_t;
#define PFM_IRQ_HANDLER_RET(v)	do {  \
		put_cpu_no_resched(); \
		return IRQ_HANDLED;   \
	} while(0);

static inline void
pfm_put_task(struct task_struct *task)
{
	if (task != current) put_task_struct(task);
}

static inline void
pfm_set_task_notify(struct task_struct *task)
{
	struct thread_info *info;

	info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
	set_bit(TIF_NOTIFY_RESUME, &info->flags);
}

static inline void
pfm_clear_task_notify(void)
{
	clear_thread_flag(TIF_NOTIFY_RESUME);
}

static inline void
pfm_reserve_page(unsigned long a)
{
	SetPageReserved(vmalloc_to_page((void *)a));
}
static inline void
pfm_unreserve_page(unsigned long a)
{
	ClearPageReserved(vmalloc_to_page((void*)a));
}

static inline int
pfm_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot)
{
	return remap_page_range(vma, from, phys_addr, size, prot);
}

static inline unsigned long
pfm_protect_ctx_ctxsw(pfm_context_t *x)
{
David Mosberger's avatar
David Mosberger committed
597
	spin_lock(&(x)->ctx_lock);
598 599 600 601 602 603 604 605 606
	return 0UL;
}

static inline unsigned long
pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
{
	spin_unlock(&(x)->ctx_lock);
}

David Mosberger's avatar
David Mosberger committed
607 608 609 610 611 612 613 614 615 616 617 618 619 620
static inline unsigned int
pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
{
	return do_munmap(mm, addr, len);
}

static inline unsigned long 
pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
{
	return get_unmapped_area(file, addr, len, pgoff, flags);
}


static struct super_block *
621
pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
David Mosberger's avatar
David Mosberger committed
622 623 624 625 626 627 628 629 630
{
	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
}

static struct file_system_type pfm_fs_type = {
	.name     = "pfmfs",
	.get_sb   = pfmfs_get_sb,
	.kill_sb  = kill_anon_super,
};
631 632 633 634 635

DEFINE_PER_CPU(unsigned long, pfm_syst_info);
DEFINE_PER_CPU(struct task_struct *, pmu_owner);
DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
DEFINE_PER_CPU(unsigned long, pmu_activation_number);
Linus Torvalds's avatar
Linus Torvalds committed
636

637

638 639
/* forward declaration */
static struct file_operations pfm_file_ops;
Linus Torvalds's avatar
Linus Torvalds committed
640

Linus Torvalds's avatar
Linus Torvalds committed
641
/*
David Mosberger's avatar
David Mosberger committed
642
 * forward declarations
Linus Torvalds's avatar
Linus Torvalds committed
643
 */
644
#ifndef CONFIG_SMP
David Mosberger's avatar
David Mosberger committed
645
static void pfm_lazy_save_regs (struct task_struct *ta);
646
#endif
David Mosberger's avatar
David Mosberger committed
647

648 649 650 651 652 653 654
/*
 * the HP simulator must be first because
 * CONFIG_IA64_HP_SIM is independent of CONFIG_MCKINLEY or CONFIG_ITANIUM
 */
#if defined(CONFIG_IA64_HP_SIM)
#include "perfmon_hpsim.h"
#elif   defined(CONFIG_ITANIUM)
655 656 657 658 659 660 661
#include "perfmon_itanium.h"
#elif defined(CONFIG_MCKINLEY)
#include "perfmon_mckinley.h"
#else
#include "perfmon_generic.h"
#endif

662 663
static int pfm_end_notify_user(pfm_context_t *ctx);

664 665 666
static inline void
pfm_clear_psr_pp(void)
{
667
	ia64_rsm(IA64_PSR_PP);
668
	ia64_srlz_i();
669 670 671 672 673
}

static inline void
pfm_set_psr_pp(void)
{
674
	ia64_ssm(IA64_PSR_PP);
675
	ia64_srlz_i();
676 677 678 679 680
}

static inline void
pfm_clear_psr_up(void)
{
681
	ia64_rsm(IA64_PSR_UP);
682
	ia64_srlz_i();
683 684 685 686 687
}

static inline void
pfm_set_psr_up(void)
{
688
	ia64_ssm(IA64_PSR_UP);
689
	ia64_srlz_i();
690 691 692 693 694 695
}

static inline unsigned long
pfm_get_psr(void)
{
	unsigned long tmp;
696 697
	tmp = ia64_getreg(_IA64_REG_PSR);
	ia64_srlz_i();
698 699 700 701 702 703
	return tmp;
}

static inline void
pfm_set_psr_l(unsigned long val)
{
704 705
	ia64_setreg(_IA64_REG_PSR_L, val);
	ia64_srlz_i();
706 707
}

708 709 710 711 712 713 714 715 716 717 718 719 720
static inline void
pfm_freeze_pmu(void)
{
	ia64_set_pmc(0,1UL);
	ia64_srlz_d();
}

static inline void
pfm_unfreeze_pmu(void)
{
	ia64_set_pmc(0,0UL);
	ia64_srlz_d();
}
721

722 723 724
/*
 * PMD[i] must be a counter. no check is made
 */
David Mosberger's avatar
David Mosberger committed
725 726 727
static inline unsigned long
pfm_read_soft_counter(pfm_context_t *ctx, int i)
{
728
	return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.ovfl_val);
David Mosberger's avatar
David Mosberger committed
729 730
}

731 732 733
/*
 * PMD[i] must be a counter. no check is made
 */
David Mosberger's avatar
David Mosberger committed
734 735 736
static inline void
pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
{
737
	ctx->ctx_pmds[i].val = val  & ~pmu_conf.ovfl_val;
David Mosberger's avatar
David Mosberger committed
738 739 740 741
	/*
	 * writing to unimplemented part is ignore, so we do not need to
	 * mask off top part
	 */
742
	ia64_set_pmd(i, val & pmu_conf.ovfl_val);
Linus Torvalds's avatar
Linus Torvalds committed
743 744
}

745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
static pfm_msg_t *
pfm_get_new_msg(pfm_context_t *ctx)
{
	int idx, next;

	next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;

	DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
	if (next == ctx->ctx_msgq_head) return NULL;

 	idx = 	ctx->ctx_msgq_tail;
	ctx->ctx_msgq_tail = next;

	DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));

	return ctx->ctx_msgq+idx;
}

static pfm_msg_t *
pfm_get_next_msg(pfm_context_t *ctx)
Linus Torvalds's avatar
Linus Torvalds committed
765
{
766 767 768 769 770 771 772 773 774 775 776
	pfm_msg_t *msg;

	DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));

	if (PFM_CTXQ_EMPTY(ctx)) return NULL;

	/*
	 * get oldest message
	 */
	msg = ctx->ctx_msgq+ctx->ctx_msgq_head;

Linus Torvalds's avatar
Linus Torvalds committed
777
	/*
778
	 * and move forward
Linus Torvalds's avatar
Linus Torvalds committed
779
	 */
780 781 782 783 784 785 786 787 788 789 790 791
	ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;

	DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));

	return msg;
}

static void
pfm_reset_msgq(pfm_context_t *ctx)
{
	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
	DPRINT(("ctx=%p msgq reset\n", ctx));
Linus Torvalds's avatar
Linus Torvalds committed
792 793
}

794

Linus Torvalds's avatar
Linus Torvalds committed
795
/* Here we want the physical address of the memory.
796 797
 * This is used when initializing the contents of the
 * area and marking the pages as reserved.
Linus Torvalds's avatar
Linus Torvalds committed
798
 */
Linus Torvalds's avatar
Linus Torvalds committed
799
static inline unsigned long
David Mosberger's avatar
David Mosberger committed
800
pfm_kvirt_to_pa(unsigned long adr)
Linus Torvalds's avatar
Linus Torvalds committed
801
{
Linus Torvalds's avatar
Linus Torvalds committed
802
	__u64 pa = ia64_tpa(adr);
Linus Torvalds's avatar
Linus Torvalds committed
803 804 805 806
	return pa;
}

static void *
David Mosberger's avatar
David Mosberger committed
807
pfm_rvmalloc(unsigned long size)
Linus Torvalds's avatar
Linus Torvalds committed
808 809
{
	void *mem;
810
	unsigned long addr;
Linus Torvalds's avatar
Linus Torvalds committed
811

812 813
	size = PAGE_ALIGN(size);
	mem  = vmalloc(size);
Linus Torvalds's avatar
Linus Torvalds committed
814
	if (mem) {
David Mosberger's avatar
David Mosberger committed
815
		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
816 817
		memset(mem, 0, size);
		addr = (unsigned long)mem;
Linus Torvalds's avatar
Linus Torvalds committed
818
		while (size > 0) {
819 820
			pfm_reserve_page(addr);
			addr+=PAGE_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
821 822 823 824 825 826 827
			size-=PAGE_SIZE;
		}
	}
	return mem;
}

static void
David Mosberger's avatar
David Mosberger committed
828
pfm_rvfree(void *mem, unsigned long size)
Linus Torvalds's avatar
Linus Torvalds committed
829
{
830
	unsigned long addr;
Linus Torvalds's avatar
Linus Torvalds committed
831 832

	if (mem) {
833 834
		DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
		addr = (unsigned long) mem;
835
		while ((long) size > 0) {
836 837
			pfm_unreserve_page(addr);
			addr+=PAGE_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
838 839 840 841
			size-=PAGE_SIZE;
		}
		vfree(mem);
	}
David Mosberger's avatar
David Mosberger committed
842 843 844
	return;
}

845 846
static pfm_context_t *
pfm_context_alloc(void)
David Mosberger's avatar
David Mosberger committed
847
{
848
	pfm_context_t *ctx;
David Mosberger's avatar
David Mosberger committed
849

850 851 852 853 854
	/* allocate context descriptor */
	ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
	if (ctx) {
		memset(ctx, 0, sizeof(pfm_context_t));
		DPRINT(("alloc ctx @%p\n", ctx));
David Mosberger's avatar
David Mosberger committed
855
	}
856 857
	return ctx;
}
David Mosberger's avatar
David Mosberger committed
858

859 860 861 862 863 864
static void
pfm_context_free(pfm_context_t *ctx)
{
	if (ctx) {
		DPRINT(("free ctx @%p\n", ctx));
		kfree(ctx);
David Mosberger's avatar
David Mosberger committed
865 866 867
	}
}

868 869
static void
pfm_mask_monitoring(struct task_struct *task)
David Mosberger's avatar
David Mosberger committed
870
{
871 872 873 874
	pfm_context_t *ctx = PFM_GET_CTX(task);
	struct thread_struct *th = &task->thread;
	unsigned long mask, val;
	int i;
David Mosberger's avatar
David Mosberger committed
875

876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
	DPRINT(("[%d] masking monitoring for [%d]\n", current->pid, task->pid));

	/*
	 * monitoring can only be masked as a result of a valid
	 * counter overflow. In UP, it means that the PMU still
	 * has an owner. Note that the owner can be different
	 * from the current task. However the PMU state belongs
	 * to the owner.
	 * In SMP, a valid overflow only happens when task is
	 * current. Therefore if we come here, we know that
	 * the PMU state belongs to the current task, therefore
	 * we can access the live registers.
	 *
	 * So in both cases, the live register contains the owner's
	 * state. We can ONLY touch the PMU registers and NOT the PSR.
	 *
	 * As a consequence to this call, the thread->pmds[] array
	 * contains stale information which must be ignored
	 * when context is reloaded AND monitoring is active (see
	 * pfm_restart).
	 */
	mask = ctx->ctx_used_pmds[0];
	for (i = 0; mask; i++, mask>>=1) {
		/* skip non used pmds */
		if ((mask & 0x1) == 0) continue;
		val = ia64_get_pmd(i);

		if (PMD_IS_COUNTING(i)) {
			/*
		 	 * we rebuild the full 64 bit value of the counter
		 	 */
			ctx->ctx_pmds[i].val += (val & pmu_conf.ovfl_val);
		} else {
			ctx->ctx_pmds[i].val = val;
		}
		DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
			i,
			ctx->ctx_pmds[i].val,
			val & pmu_conf.ovfl_val));
	}
	/*
	 * mask monitoring by setting the privilege level to 0
	 * we cannot use psr.pp/psr.up for this, it is controlled by
	 * the user
	 *
	 * if task is current, modify actual registers, otherwise modify
	 * thread save state, i.e., what will be restored in pfm_load_regs()
	 */
	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
		if ((mask & 0x1) == 0UL) continue;
		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
		th->pmcs[i] &= ~0xfUL;
	}
	/*
	 * make all of this visible
	 */
	ia64_srlz_d();
}

/*
 * must always be done with task == current
 *
 * context must be in MASKED state when calling
 */
static void
pfm_restore_monitoring(struct task_struct *task)
{
	pfm_context_t *ctx = PFM_GET_CTX(task);
	struct thread_struct *th = &task->thread;
	unsigned long mask;
	unsigned long psr, val;
948 949 950
	int i, is_system;

	is_system = ctx->ctx_fl_system;
951 952 953 954 955

	if (task != current) {
		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
		return;
	}
956
	if (ctx->ctx_state != PFM_CTX_MASKED) {
957 958 959 960 961 962 963 964 965 966 967 968 969 970 971
		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
			task->pid, current->pid, ctx->ctx_state);
		return;
	}
	psr = pfm_get_psr();
	/*
	 * monitoring is masked via the PMC.
	 * As we restore their value, we do not want each counter to
	 * restart right away. We stop monitoring using the PSR,
	 * restore the PMC (and PMD) and then re-establish the psr
	 * as it was. Note that there can be no pending overflow at
	 * this point, because monitoring was MASKED.
	 *
	 * system-wide session are pinned and self-monitoring
	 */
972
	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
973
		/* disable dcr pp */
974
		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
		pfm_clear_psr_pp();
	} else {
		pfm_clear_psr_up();
	}
	/*
	 * first, we restore the PMD
	 */
	mask = ctx->ctx_used_pmds[0];
	for (i = 0; mask; i++, mask>>=1) {
		/* skip non used pmds */
		if ((mask & 0x1) == 0) continue;

		if (PMD_IS_COUNTING(i)) {
			/*
			 * we split the 64bit value according to
			 * counter width
			 */
			val = ctx->ctx_pmds[i].val & pmu_conf.ovfl_val;
			ctx->ctx_pmds[i].val &= ~pmu_conf.ovfl_val;
		} else {
			val = ctx->ctx_pmds[i].val;
		}
		ia64_set_pmd(i, val);

		DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
			i,
			ctx->ctx_pmds[i].val,
			val));
	}
	/*
	 * restore the PMCs
	 */
	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
		if ((mask & 0x1) == 0UL) continue;
		th->pmcs[i] = ctx->ctx_pmcs[i];
		ia64_set_pmc(i, th->pmcs[i]);
		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
	}
	ia64_srlz_d();

	/*
	 * now restore PSR
	 */
1019
	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
1020
		/* enable dcr pp */
1021
		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430
		ia64_srlz_i();
	}
	pfm_set_psr_l(psr);
}

static inline void
pfm_save_pmds(unsigned long *pmds, unsigned long mask)
{
	int i;

	ia64_srlz_d();

	for (i=0; mask; i++, mask>>=1) {
		if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
	}
}

/*
 * reload from thread state (used for ctxw only)
 */
static inline void
pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
{
	int i;
	unsigned long val, ovfl_val = pmu_conf.ovfl_val;

	DPRINT(("mask=0x%lx\n", mask));
	for (i=0; mask; i++, mask>>=1) {
		if ((mask & 0x1) == 0) continue;
		val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
		ia64_set_pmd(i, val);
		DPRINT(("pmd[%d]=0x%lx\n", i, val));
	}
	ia64_srlz_d();
}

/*
 * propagate PMD from context to thread-state
 */
static inline void
pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
{
	struct thread_struct *thread = &task->thread;
	unsigned long ovfl_val = pmu_conf.ovfl_val;
	unsigned long mask = ctx->ctx_all_pmds[0];
	unsigned long val;
	int i;

	DPRINT(("mask=0x%lx\n", mask));

	for (i=0; mask; i++, mask>>=1) {

		val = ctx->ctx_pmds[i].val;

		/*
		 * We break up the 64 bit value into 2 pieces
		 * the lower bits go to the machine state in the
		 * thread (will be reloaded on ctxsw in).
		 * The upper part stays in the soft-counter.
		 */
		if (PMD_IS_COUNTING(i)) {
			ctx->ctx_pmds[i].val = val & ~ovfl_val;
			 val &= ovfl_val;
		}
		thread->pmds[i] = val;

		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
			i,
			thread->pmds[i],
			ctx->ctx_pmds[i].val));
	}
}

/*
 * propagate PMC from context to thread-state
 */
static inline void
pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
{
	struct thread_struct *thread = &task->thread;
	unsigned long mask = ctx->ctx_all_pmcs[0];
	int i;

	DPRINT(("mask=0x%lx\n", mask));

	for (i=0; mask; i++, mask>>=1) {
		/* masking 0 with ovfl_val yields 0 */
		thread->pmcs[i] = ctx->ctx_pmcs[i];
		DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
	}
}



static inline void
pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
{
	int i;

	DPRINT(("mask=0x%lx\n", mask));
	for (i=0; mask; i++, mask>>=1) {
		if ((mask & 0x1) == 0) continue;
		ia64_set_pmc(i, pmcs[i]);
		DPRINT(("pmc[%d]=0x%lx\n", i, pmcs[i]));
	}
	ia64_srlz_d();
}

static inline void
pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
{
	int i;

	for (i=0; i < nibrs; i++) {
		ia64_set_ibr(i, ibrs[i]);
	}
	ia64_srlz_i();
}

static inline void
pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
{
	int i;

	for (i=0; i < ndbrs; i++) {
		ia64_set_dbr(i, dbrs[i]);
	}
	ia64_srlz_d();
}

static inline int
pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
{
	return memcmp(a, b, sizeof(pfm_uuid_t));
}

static inline int
pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
{
	int ret = 0;
	if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
	return ret;
}

static inline int
pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
{
	int ret = 0;
	if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
	return ret;
}


static inline int
pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
		     int cpu, void *arg)
{
	int ret = 0;
	if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
	return ret;
}

static inline int
pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
		     int cpu, void *arg)
{
	int ret = 0;
	if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
	return ret;
}

static inline int
pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
{
	int ret = 0;
	if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
	return ret;
}

static inline int
pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
{
	int ret = 0;
	if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
	return ret;
}



int
pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
{
	pfm_buffer_fmt_t *p;
	int ret = 0;

	/* some sanity checks */
	if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;

	/* we need at least a handler */
	if (fmt->fmt_handler == NULL) return -EINVAL;

	/*
	 * XXX: need check validity of fmt_arg_size
	 */

	LOCK_BUF_FMT_LIST();
	p = pfm_buffer_fmt_list;


	while (p) {
		if (pfm_uuid_cmp(fmt->fmt_uuid, p->fmt_uuid) == 0) break;
		p = p->fmt_next;
	}

	if (p) {
		printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
		ret = -EBUSY;
	} else {
		fmt->fmt_prev = NULL;
		fmt->fmt_next = pfm_buffer_fmt_list;
		pfm_buffer_fmt_list = fmt;
		printk(KERN_ERR "perfmon: added sampling format %s\n", fmt->fmt_name);
	}
	UNLOCK_BUF_FMT_LIST();

	return ret;
}

int
pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
{
	pfm_buffer_fmt_t *p;
	int ret = 0;

	LOCK_BUF_FMT_LIST();
	p = pfm_buffer_fmt_list;
	while (p) {
		if (memcmp(uuid, p->fmt_uuid, sizeof(pfm_uuid_t)) == 0) break;
		p = p->fmt_next;
	}
	if (p) {
		if (p->fmt_prev)
			p->fmt_prev->fmt_next = p->fmt_next;
		else
			pfm_buffer_fmt_list = p->fmt_next;

		if (p->fmt_next)
			p->fmt_next->fmt_prev = p->fmt_prev;

		printk(KERN_ERR "perfmon: removed sampling format: %s\n",  p->fmt_name);
		p->fmt_next = p->fmt_prev = NULL;
	} else {
		printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
		ret = -EINVAL;
	}
	UNLOCK_BUF_FMT_LIST();

	return ret;

}

/*
 * find a buffer format based on its uuid
 */
static pfm_buffer_fmt_t *
pfm_find_buffer_fmt(pfm_uuid_t uuid, int nolock)
{
	pfm_buffer_fmt_t *p;

	LOCK_BUF_FMT_LIST();
	for (p = pfm_buffer_fmt_list; p ; p = p->fmt_next) {
		if (pfm_uuid_cmp(uuid, p->fmt_uuid) == 0) break;
	}

	UNLOCK_BUF_FMT_LIST();

	return p;
}

static int
pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
{
	/*
	 * validy checks on cpu_mask have been done upstream
	 */
	LOCK_PFS();

	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
		pfm_sessions.pfs_sys_sessions,
		pfm_sessions.pfs_task_sessions,
		pfm_sessions.pfs_sys_use_dbregs,
		is_syswide,
		cpu));

	if (is_syswide) {
		/*
		 * cannot mix system wide and per-task sessions
		 */
		if (pfm_sessions.pfs_task_sessions > 0UL) {
			DPRINT(("system wide not possible, %u conflicting task_sessions\n",
			  	pfm_sessions.pfs_task_sessions));
			goto abort;
		}

		if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;

		DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));

		pfm_sessions.pfs_sys_session[cpu] = task;

		pfm_sessions.pfs_sys_sessions++ ;

	} else {
		if (pfm_sessions.pfs_sys_sessions) goto abort;
		pfm_sessions.pfs_task_sessions++;
	}

	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
		pfm_sessions.pfs_sys_sessions,
		pfm_sessions.pfs_task_sessions,
		pfm_sessions.pfs_sys_use_dbregs,
		is_syswide,
		cpu));

	UNLOCK_PFS();

	return 0;

error_conflict:
	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
  		pfm_sessions.pfs_sys_session[cpu]->pid,
		smp_processor_id()));
abort:
	UNLOCK_PFS();

	return -EBUSY;

}

static int
pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
{

	/*
	 * validy checks on cpu_mask have been done upstream
	 */
	LOCK_PFS();

	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
		pfm_sessions.pfs_sys_sessions,
		pfm_sessions.pfs_task_sessions,
		pfm_sessions.pfs_sys_use_dbregs,
		is_syswide,
		cpu));


	if (is_syswide) {
		pfm_sessions.pfs_sys_session[cpu] = NULL;
		/*
		 * would not work with perfmon+more than one bit in cpu_mask
		 */
		if (ctx && ctx->ctx_fl_using_dbreg) {
			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
				printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
			} else {
				pfm_sessions.pfs_sys_use_dbregs--;
			}
		}
		pfm_sessions.pfs_sys_sessions--;
	} else {
		pfm_sessions.pfs_task_sessions--;
	}
	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
		pfm_sessions.pfs_sys_sessions,
		pfm_sessions.pfs_task_sessions,
		pfm_sessions.pfs_sys_use_dbregs,
		is_syswide,
		cpu));

	UNLOCK_PFS();

	return 0;
}

/*
 * removes virtual mapping of the sampling buffer.
 * IMPORTANT: cannot be called with interrupts disable, e.g. inside
 * a PROTECT_CTX() section.
 */
static int
pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
{
	int r;

	/* sanity checks */
	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
		return -EINVAL;
	}

	DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));

	/*
	 * does the actual unmapping
	 */
	down_write(&task->mm->mmap_sem);

	DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));

David Mosberger's avatar
David Mosberger committed
1431
	r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528

	up_write(&task->mm->mmap_sem);
	if (r !=0) {
		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
	}

	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));

	return 0;
}

/*
 * free actual physical storage used by sampling buffer
 */
#if 0
static int
pfm_free_smpl_buffer(pfm_context_t *ctx)
{
	pfm_buffer_fmt_t *fmt;

	if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;

	/*
	 * we won't use the buffer format anymore
	 */
	fmt = ctx->ctx_buf_fmt;

	DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
		ctx->ctx_smpl_hdr,
		ctx->ctx_smpl_size,
		ctx->ctx_smpl_vaddr));

	pfm_buf_fmt_exit(fmt, current, NULL, NULL);

	/*
	 * free the buffer
	 */
	pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);

	ctx->ctx_smpl_hdr  = NULL;
	ctx->ctx_smpl_size = 0UL;

	return 0;

invalid_free:
	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
	return -EINVAL;
}
#endif

static inline void
pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
{
	if (fmt == NULL) return;

	pfm_buf_fmt_exit(fmt, current, NULL, NULL);

}

/*
 * pfmfs should _never_ be mounted by userland - too much of security hassle,
 * no real gain from having the whole whorehouse mounted. So we don't need
 * any operations on the root directory. However, we need a non-trivial
 * d_name - pfm: will go nicely and kill the special-casing in procfs.
 */
static struct vfsmount *pfmfs_mnt;

static int __init
init_pfm_fs(void)
{
	int err = register_filesystem(&pfm_fs_type);
	if (!err) {
		pfmfs_mnt = kern_mount(&pfm_fs_type);
		err = PTR_ERR(pfmfs_mnt);
		if (IS_ERR(pfmfs_mnt))
			unregister_filesystem(&pfm_fs_type);
		else
			err = 0;
	}
	return err;
}

static void __exit
exit_pfm_fs(void)
{
	unregister_filesystem(&pfm_fs_type);
	mntput(pfmfs_mnt);
}

static loff_t
pfm_lseek(struct file *file, loff_t offset, int whence)
{
	DPRINT(("pfm_lseek called\n"));
	return -ESPIPE;
}

static ssize_t
David Mosberger's avatar
David Mosberger committed
1529
pfm_read(struct file *filp, char *buf, size_t size, loff_t *ppos)
1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761
{
	pfm_context_t *ctx;
	pfm_msg_t *msg;
	ssize_t ret;
	unsigned long flags;
  	DECLARE_WAITQUEUE(wait, current);
	if (PFM_IS_FILE(filp) == 0) {
		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
		return -EINVAL;
	}

	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
		return -EINVAL;
	}

	/*
	 * check even when there is no message
	 */
	if (size < sizeof(pfm_msg_t)) {
		DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
		return -EINVAL;
	}
	/*
	 * seeks are not allowed on message queues
	 */
	if (ppos != &filp->f_pos) return -ESPIPE;

	PROTECT_CTX(ctx, flags);

  	/*
	 * put ourselves on the wait queue
	 */
  	add_wait_queue(&ctx->ctx_msgq_wait, &wait);


  	for(;;) {
		/*
		 * check wait queue
		 */

  		set_current_state(TASK_INTERRUPTIBLE);

		DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));

		ret = 0;
		if(PFM_CTXQ_EMPTY(ctx) == 0) break;

		UNPROTECT_CTX(ctx, flags);

		/*
		 * check non-blocking read
		 */
      		ret = -EAGAIN;
		if(filp->f_flags & O_NONBLOCK) break;

		/*
		 * check pending signals
		 */
		if(signal_pending(current)) {
			ret = -EINTR;
			break;
		}
      		/*
		 * no message, so wait
		 */
      		schedule();

		PROTECT_CTX(ctx, flags);
	}
	DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
  	set_current_state(TASK_RUNNING);
	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);

	if (ret < 0) goto abort;

	ret = -EINVAL;
	msg = pfm_get_next_msg(ctx);
	if (msg == NULL) {
		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
		goto abort_locked;
	}

	DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));

	ret = -EFAULT;
  	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);

abort_locked:
	UNPROTECT_CTX(ctx, flags);
abort:
	return ret;
}

static ssize_t
pfm_write(struct file *file, const char *ubuf,
			  size_t size, loff_t *ppos)
{
	DPRINT(("pfm_write called\n"));
	return -EINVAL;
}

static unsigned int
pfm_poll(struct file *filp, poll_table * wait)
{
	pfm_context_t *ctx;
	unsigned long flags;
	unsigned int mask = 0;

	if (PFM_IS_FILE(filp) == 0) {
		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
		return 0;
	}

	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
		return 0;
	}


	DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));

	poll_wait(filp, &ctx->ctx_msgq_wait, wait);

	PROTECT_CTX(ctx, flags);

	if (PFM_CTXQ_EMPTY(ctx) == 0)
		mask =  POLLIN | POLLRDNORM;

	UNPROTECT_CTX(ctx, flags);

	DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));

	return mask;
}

static int
pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
{
	DPRINT(("pfm_ioctl called\n"));
	return -EINVAL;
}

/*
 * context is locked when coming here
 */
static inline int
pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
{
	int ret;

	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);

	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
		current->pid,
		fd,
		on,
		ctx->ctx_async_queue, ret));

	return ret;
}

static int
pfm_fasync(int fd, struct file *filp, int on)
{
	pfm_context_t *ctx;
	unsigned long flags;
	int ret;

	if (PFM_IS_FILE(filp) == 0) {
		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
		return -EBADF;
	}

	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
		return -EBADF;
	}


	PROTECT_CTX(ctx, flags);

	ret = pfm_do_fasync(fd, filp, ctx, on);

	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
		current->pid,
		fd,
		on,
		ctx->ctx_async_queue, ret));

	UNPROTECT_CTX(ctx, flags);

	return ret;
}

#ifdef CONFIG_SMP
/*
 * this function is exclusively called from pfm_close().
 * The context is not protected at that time, nor are interrupts
 * on the remote CPU. That's necessary to avoid deadlocks.
 */
static void
pfm_syswide_force_stop(void *info)
{
	pfm_context_t   *ctx = (pfm_context_t *)info;
	struct pt_regs *regs = ia64_task_regs(current);
	struct task_struct *owner;

	if (ctx->ctx_cpu != smp_processor_id()) {
		printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d  but on CPU%d\n",
			ctx->ctx_cpu,
			smp_processor_id());
		return;
	}
	owner = GET_PMU_OWNER();
	if (owner != ctx->ctx_task) {
		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
			smp_processor_id(),
			owner->pid, ctx->ctx_task->pid);
		return;
	}
	if (GET_PMU_CTX() != ctx) {
		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
			smp_processor_id(),
			GET_PMU_CTX(), ctx);
		return;
	}

	DPRINT(("[%d] on CPU%d forcing system wide stop for [%d]\n", current->pid, smp_processor_id(), ctx->ctx_task->pid));
David Mosberger's avatar
David Mosberger committed
1762
	/*
1763
	 * Update local PMU
David Mosberger's avatar
David Mosberger committed
1764
	 */
1765
	ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821
	ia64_srlz_i();
	/*
	 * update local cpuinfo
	 */
	PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
	PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
	PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);

	pfm_clear_psr_pp();

	/*
	 * also stop monitoring in the local interrupted task
	 */
	ia64_psr(regs)->pp = 0;

	SET_PMU_OWNER(NULL, NULL);
}

static void
pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
{
	int ret;

	DPRINT(("[%d] calling CPU%d for cleanup\n", current->pid, ctx->ctx_cpu));
	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
	DPRINT(("[%d] called CPU%d for cleanup ret=%d\n", current->pid, ctx->ctx_cpu, ret));
}
#endif /* CONFIG_SMP */

/*
 * called either on explicit close() or from exit_files().
 *
 * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero (fput()),i.e,
 * last task to access the file. Nobody else can access the file at this point.
 *
 * When called from exit_files(), the VMA has been freed because exit_mm()
 * is executed before exit_files().
 *
 * When called from exit_files(), the current task is not yet ZOMBIE but we will
 * flush the PMU state to the context. This means * that when we see the context
 * state as TERMINATED we are guranteed to have the latest PMU state available,
 * even if the task itself is in the middle of being ctxsw out.
 */
static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
static int
pfm_close(struct inode *inode, struct file *filp)
{
	pfm_context_t *ctx;
	struct task_struct *task;
	struct pt_regs *regs;
  	DECLARE_WAITQUEUE(wait, current);
	unsigned long flags;
	unsigned long smpl_buf_size = 0UL;
	void *smpl_buf_vaddr = NULL;
	void *smpl_buf_addr = NULL;
	int free_possible = 1;
1822
	int state, is_system;
1823 1824 1825

	{ u64 psr = pfm_get_psr();
	  BUG_ON((psr & IA64_PSR_I) == 0UL);
David Mosberger's avatar
David Mosberger committed
1826 1827
	}

1828 1829 1830 1831 1832 1833
	DPRINT(("pfm_close called private=%p\n", filp->private_data));

	if (!inode) {
		printk(KERN_ERR "pfm_close: NULL inode\n");
		return 0;
	}
David Mosberger's avatar
David Mosberger committed
1834

1835 1836 1837 1838
	if (PFM_IS_FILE(filp) == 0) {
		printk(KERN_ERR "perfmon: pfm_close: bad magic [%d]\n", current->pid);
		return -EBADF;
	}
David Mosberger's avatar
David Mosberger committed
1839

1840 1841 1842 1843 1844 1845 1846 1847
	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
		return -EBADF;
	}

	PROTECT_CTX(ctx, flags);

1848 1849 1850 1851 1852
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;

	task = PFM_CTX_TASK(ctx);

1853 1854 1855 1856 1857 1858 1859 1860 1861 1862
	/*
	 * remove our file from the async queue, if we use it
	 */
	if (filp->f_flags & FASYNC) {
		DPRINT(("[%d] before async_queue=%p\n", current->pid, ctx->ctx_async_queue));
		pfm_do_fasync (-1, filp, ctx, 0);
		DPRINT(("[%d] after async_queue=%p\n", current->pid, ctx->ctx_async_queue));
	}


1863
	DPRINT(("[%d] ctx_state=%d\n", current->pid, state));
1864

1865
	if (state == PFM_CTX_UNLOADED || state == PFM_CTX_TERMINATED) {
1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
		goto doit;
	}

	regs = ia64_task_regs(task);

	/*
	 * context still loaded/masked and self monitoring,
	 * we stop/unload and we destroy right here
	 *
	 * We always go here for system-wide sessions
	 */
	if (task == current) {
#ifdef CONFIG_SMP
		/*
		 * the task IS the owner but it migrated to another CPU: that's bad
		 * but we must handle this cleanly. Unfortunately, the kernel does
		 * not provide a mechanism to block migration (while the context is loaded).
		 *
		 * We need to release the resource on the ORIGINAL cpu.
		 */
1886
		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901

			DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));

			UNPROTECT_CTX(ctx, flags);

			pfm_syswide_cleanup_other_cpu(ctx);

			PROTECT_CTX(ctx, flags);

			/*
			 * short circuit pfm_context_unload();
			 */
			task->thread.pfm_context = NULL;
			ctx->ctx_task            = NULL;

1902
			ctx->ctx_state = state = PFM_CTX_UNLOADED;
1903 1904

			pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
1905

1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916
		} else
#endif /* CONFIG_SMP */
		{

			DPRINT(("forcing unload on [%d]\n", current->pid));
			/*
		 	* stop and unload, returning with state UNLOADED
		 	* and session unreserved.
		 	*/
			pfm_context_unload(ctx, NULL, 0, regs);

1917
			ctx->ctx_state = PFM_CTX_TERMINATED;
1918

1919
			DPRINT(("[%d] ctx_state=%d\n", current->pid, state));
1920 1921 1922 1923
		}
		goto doit;
	}

1924

1925 1926 1927 1928 1929
	/*
	 * The task is currently blocked or will block after an overflow.
	 * we must force it to wakeup to get out of the
	 * MASKED state and transition to the unloaded state by itself
	 */
1930
	if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) {
1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952

		/*
		 * set a "partial" zombie state to be checked
		 * upon return from down() in pfm_handle_work().
		 *
		 * We cannot use the ZOMBIE state, because it is checked
		 * by pfm_load_regs() which is called upon wakeup from down().
		 * In such cas, it would free the context and then we would
		 * return to pfm_handle_work() which would access the
		 * stale context. Instead, we set a flag invisible to pfm_load_regs()
		 * but visible to pfm_handle_work().
		 *
		 * For some window of time, we have a zombie context with
		 * ctx_state = MASKED  and not ZOMBIE
		 */
		ctx->ctx_fl_going_zombie = 1;

		/*
		 * force task to wake up from MASKED state
		 */
		up(&ctx->ctx_restart_sem);

1953
		DPRINT(("waking up ctx_state=%d for [%d]\n", state, current->pid));
1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977

		/*
		 * put ourself to sleep waiting for the other
		 * task to report completion
		 *
		 * the context is protected by mutex, therefore there
		 * is no risk of being notified of completion before
		 * begin actually on the waitq.
		 */
  		set_current_state(TASK_INTERRUPTIBLE);
  		add_wait_queue(&ctx->ctx_zombieq, &wait);

		UNPROTECT_CTX(ctx, flags);

		/*
		 * XXX: check for signals :
		 * 	- ok of explicit close
		 * 	- not ok when coming from exit_files()
		 */
      		schedule();


		PROTECT_CTX(ctx, flags);

1978

1979 1980 1981 1982 1983 1984
		remove_wait_queue(&ctx->ctx_zombieq, &wait);
  		set_current_state(TASK_RUNNING);

		/*
		 * context is terminated at this point
		 */
1985
		DPRINT(("after zombie wakeup ctx_state=%d for [%d]\n", state, current->pid));
1986 1987 1988 1989 1990 1991
	}
	else {
#ifdef CONFIG_SMP
		/*
	 	 * switch context to zombie state
	 	 */
1992
		ctx->ctx_state = PFM_CTX_ZOMBIE;
1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005

		DPRINT(("zombie ctx for [%d]\n", task->pid));
		/*
		 * cannot free the context on the spot. deferred until
		 * the task notices the ZOMBIE state
		 */
		free_possible = 0;
#else
		pfm_context_unload(ctx, NULL, 0, regs);
#endif
	}

doit:	/* cannot assume task is defined from now on */
2006 2007 2008 2009

	/* reload state, may have changed during  opening of critical section */
	state = ctx->ctx_state;

2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041
	/*
	 * the context is still attached to a task (possibly current)
	 * we cannot destroy it right now
	 */
	/*
	 * remove virtual mapping, if any. will be NULL when
	 * called from exit_files().
	 */
	if (ctx->ctx_smpl_vaddr) {
		smpl_buf_vaddr = ctx->ctx_smpl_vaddr;
		smpl_buf_size  = ctx->ctx_smpl_size;
		ctx->ctx_smpl_vaddr = NULL;
	}

	/*
	 * we must fre the sampling buffer right here because
	 * we cannot rely on it being cleaned up later by the
	 * monitored task. It is not possible to free vmalloc'ed
	 * memory in pfm_load_regs(). Instead, we remove the buffer
	 * now. should there be subsequent PMU overflow originally
	 * meant for sampling, the will be converted to spurious
	 * and that's fine because the monitoring tools is gone anyway.
	 */
	if (ctx->ctx_smpl_hdr) {
		smpl_buf_addr = ctx->ctx_smpl_hdr;
		smpl_buf_size = ctx->ctx_smpl_size;
		/* no more sampling */
		ctx->ctx_smpl_hdr = NULL;
	}

	DPRINT(("[%d] ctx_state=%d free_possible=%d vaddr=%p addr=%p size=%lu\n",
		current->pid,
2042
		state,
2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053
		free_possible,
		smpl_buf_vaddr,
		smpl_buf_addr,
		smpl_buf_size));

	if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt);

	/*
	 * UNLOADED and TERMINATED mean that the session has already been
	 * unreserved.
	 */
2054
	if (state == PFM_CTX_ZOMBIE) {
2055
		pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu);
David Mosberger's avatar
David Mosberger committed
2056 2057
	}

2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087
	/*
	 * disconnect file descriptor from context must be done
	 * before we unlock.
	 */
	filp->private_data = NULL;

	/*
	 * if we free on the spot, the context is now completely unreacheable
	 * from the callers side. The monitored task side is also cut, so we
	 * can freely cut.
	 *
	 * If we have a deferred free, only the caller side is disconnected.
	 */
	UNPROTECT_CTX(ctx, flags);

	/*
	 * if there was a mapping, then we systematically remove it
	 * at this point. Cannot be done inside critical section
	 * because some VM function reenables interrupts.
	 *
	 * All memory free operations (especially for vmalloc'ed memory)
	 * MUST be done with interrupts ENABLED.
	 */
	if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
	if (smpl_buf_addr)  pfm_rvfree(smpl_buf_addr, smpl_buf_size);

	/*
	 * return the memory used by the context
	 */
	if (free_possible) pfm_context_free(ctx);
David Mosberger's avatar
David Mosberger committed
2088 2089

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2090 2091
}

2092 2093
static int
pfm_no_open(struct inode *irrelevant, struct file *dontcare)
Linus Torvalds's avatar
Linus Torvalds committed
2094
{
2095 2096 2097
	DPRINT(("pfm_no_open called\n"));
	return -ENXIO;
}
Linus Torvalds's avatar
Linus Torvalds committed
2098

2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188
static struct file_operations pfm_file_ops = {
	.llseek   = pfm_lseek,
	.read     = pfm_read,
	.write    = pfm_write,
	.poll     = pfm_poll,
	.ioctl    = pfm_ioctl,
	.open     = pfm_no_open,	/* special open code to disallow open via /proc */
	.fasync   = pfm_fasync,
	.release  = pfm_close
};

static int
pfmfs_delete_dentry(struct dentry *dentry)
{
	return 1;
}
static struct dentry_operations pfmfs_dentry_operations = {
	d_delete:	pfmfs_delete_dentry,
};


static int
pfm_alloc_fd(struct file **cfile)
{
	int fd, ret = 0;
	struct file *file = NULL;
	struct inode * inode;
	char name[32];
	struct qstr this;

	fd = get_unused_fd();
	if (fd < 0) return -ENFILE;

	ret = -ENFILE;

	file = get_empty_filp();
	if (!file) goto out;

	/*
	 * allocate a new inode
	 */
	inode = new_inode(pfmfs_mnt->mnt_sb);
	if (!inode) goto out;

	DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));

	inode->i_sb   = pfmfs_mnt->mnt_sb;
	inode->i_mode = S_IFCHR|S_IRUGO;
	inode->i_sock = 0;
	inode->i_uid  = current->fsuid;
	inode->i_gid  = current->fsgid;

	sprintf(name, "[%lu]", inode->i_ino);
	this.name = name;
	this.len  = strlen(name);
	this.hash = inode->i_ino;

	ret = -ENOMEM;

	/*
	 * allocate a new dcache entry
	 */
	file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
	if (!file->f_dentry) goto out;

	file->f_dentry->d_op = &pfmfs_dentry_operations;

	d_add(file->f_dentry, inode);
	file->f_vfsmnt = mntget(pfmfs_mnt);

	file->f_op    = &pfm_file_ops;
	file->f_mode  = FMODE_READ;
	file->f_flags = O_RDONLY;
	file->f_pos   = 0;

	/*
	 * may have to delay until context is attached?
	 */
	fd_install(fd, file);

	/*
	 * the file structure we will use
	 */
	*cfile = file;

	return fd;
out:
	if (file) put_filp(file);
	put_unused_fd(fd);
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
2189 2190 2191
}

static void
2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208
pfm_free_fd(int fd, struct file *file)
{
	if (file) put_filp(file);
	put_unused_fd(fd);
}

/*
 * This function gets called from mm/mmap.c:exit_mmap() only when there is a sampling buffer
 * attached to the context AND the current task has a mapping for it, i.e., it is the original
 * creator of the context.
 *
 * This function is used to remember the fact that the vma describing the sampling buffer
 * has now been removed. It can only be called when no other tasks share the same mm context.
 *
 */
static void
pfm_vm_close(struct vm_area_struct *vma)
Linus Torvalds's avatar
Linus Torvalds committed
2209
{
2210 2211 2212 2213 2214 2215 2216
	pfm_context_t *ctx = (pfm_context_t *)vma->vm_private_data;
	unsigned long flags;

	PROTECT_CTX(ctx, flags);
	ctx->ctx_smpl_vaddr = NULL;
	UNPROTECT_CTX(ctx, flags);
	DPRINT(("[%d] clearing vaddr for ctx %p\n", current->pid, ctx));
Linus Torvalds's avatar
Linus Torvalds committed
2217
}
Linus Torvalds's avatar
Linus Torvalds committed
2218 2219

static int
Linus Torvalds's avatar
Linus Torvalds committed
2220
pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
Linus Torvalds's avatar
Linus Torvalds committed
2221
{
Linus Torvalds's avatar
Linus Torvalds committed
2222
	unsigned long page;
Linus Torvalds's avatar
Linus Torvalds committed
2223

2224
	DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
David Mosberger's avatar
David Mosberger committed
2225

Linus Torvalds's avatar
Linus Torvalds committed
2226
	while (size > 0) {
David Mosberger's avatar
David Mosberger committed
2227
		page = pfm_kvirt_to_pa(buf);
Linus Torvalds's avatar
Linus Torvalds committed
2228

2229
		if (pfm_remap_page_range(vma, addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM;
2230

Linus Torvalds's avatar
Linus Torvalds committed
2231 2232 2233 2234 2235 2236
		addr  += PAGE_SIZE;
		buf   += PAGE_SIZE;
		size  -= PAGE_SIZE;
	}
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
2237

Linus Torvalds's avatar
Linus Torvalds committed
2238
/*
2239
 * allocate a sampling buffer and remaps it into the user address space of the task
Linus Torvalds's avatar
Linus Torvalds committed
2240 2241
 */
static int
2242
pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
Linus Torvalds's avatar
Linus Torvalds committed
2243
{
2244
	struct mm_struct *mm = task->mm;
David Mosberger's avatar
David Mosberger committed
2245
	struct vm_area_struct *vma = NULL;
2246
	unsigned long size;
Linus Torvalds's avatar
Linus Torvalds committed
2247
	void *smpl_buf;
Linus Torvalds's avatar
Linus Torvalds committed
2248 2249


Linus Torvalds's avatar
Linus Torvalds committed
2250
	/*
2251
	 * the fixed header + requested size and align to page boundary
Linus Torvalds's avatar
Linus Torvalds committed
2252
	 */
2253
	size = PAGE_ALIGN(rsize);
2254

2255
	DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size));
2256

Linus Torvalds's avatar
Linus Torvalds committed
2257 2258
	/*
	 * check requested size to avoid Denial-of-service attacks
2259
	 * XXX: may have to refine this test
David Mosberger's avatar
David Mosberger committed
2260 2261
	 * Check against address space limit.
	 *
2262
	 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
David Mosberger's avatar
David Mosberger committed
2263
	 * 	return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
2264
	 */
2265
	if (size > task->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN;
Linus Torvalds's avatar
Linus Torvalds committed
2266

Linus Torvalds's avatar
Linus Torvalds committed
2267
	/*
David Mosberger's avatar
David Mosberger committed
2268 2269 2270
	 * We do the easy to undo allocations first.
 	 *
	 * pfm_rvmalloc(), clears the buffer, so there is no leak
Linus Torvalds's avatar
Linus Torvalds committed
2271
	 */
David Mosberger's avatar
David Mosberger committed
2272 2273
	smpl_buf = pfm_rvmalloc(size);
	if (smpl_buf == NULL) {
2274
		DPRINT(("Can't allocate sampling buffer\n"));
David Mosberger's avatar
David Mosberger committed
2275 2276
		return -ENOMEM;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2277

2278
	DPRINT(("[%d]  smpl_buf @%p\n", current->pid, smpl_buf));
Linus Torvalds's avatar
Linus Torvalds committed
2279

Linus Torvalds's avatar
Linus Torvalds committed
2280 2281
	/* allocate vma */
	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
David Mosberger's avatar
David Mosberger committed
2282
	if (!vma) {
2283
		DPRINT(("Cannot allocate vma\n"));
2284
		goto error_kmem;
David Mosberger's avatar
David Mosberger committed
2285
	}
Linus Torvalds's avatar
Linus Torvalds committed
2286
	/*
David Mosberger's avatar
David Mosberger committed
2287
	 * partially initialize the vma for the sampling buffer
2288 2289
	 *
	 * The VM_DONTCOPY flag is very important as it ensures that the mapping
2290
	 * will never be inherited for any child process (via fork()) which is always
2291
	 * what we want.
Linus Torvalds's avatar
Linus Torvalds committed
2292
	 */
2293 2294
	vma->vm_mm	     = mm;
	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED|VM_DONTCOPY;
David Mosberger's avatar
David Mosberger committed
2295
	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
2296
	vma->vm_ops	     = &pfm_vm_ops;
David Mosberger's avatar
David Mosberger committed
2297 2298
	vma->vm_pgoff	     = 0;
	vma->vm_file	     = NULL;
2299
	vma->vm_private_data = ctx;	/* information needed by the pfm_vm_close() function */
David Mosberger's avatar
David Mosberger committed
2300 2301 2302 2303 2304 2305

	/*
	 * Now we have everything we need and we can initialize
	 * and connect all the data structures
	 */

2306 2307
	ctx->ctx_smpl_hdr   = smpl_buf;
	ctx->ctx_smpl_size  = size; /* aligned size */
Linus Torvalds's avatar
Linus Torvalds committed
2308

David Mosberger's avatar
David Mosberger committed
2309 2310 2311 2312 2313 2314
	/*
	 * Let's do the difficult operations next.
	 *
	 * now we atomically find some area in the address space and
	 * remap the buffer in it.
	 */
2315
	down_write(&task->mm->mmap_sem);
Linus Torvalds's avatar
Linus Torvalds committed
2316

David Mosberger's avatar
David Mosberger committed
2317
	/* find some free area in address space, must have mmap sem held */
David Mosberger's avatar
David Mosberger committed
2318
	vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
David Mosberger's avatar
David Mosberger committed
2319
	if (vma->vm_start == 0UL) {
2320 2321
		DPRINT(("Cannot find unmapped area for size %ld\n", size));
		up_write(&task->mm->mmap_sem);
David Mosberger's avatar
David Mosberger committed
2322 2323 2324 2325
		goto error;
	}
	vma->vm_end = vma->vm_start + size;

2326
	DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
2327

2328
	/* can only be applied to current task, need to have the mm semaphore held when called */
David Mosberger's avatar
David Mosberger committed
2329
	if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
2330 2331
		DPRINT(("Can't remap buffer\n"));
		up_write(&task->mm->mmap_sem);
David Mosberger's avatar
David Mosberger committed
2332 2333
		goto error;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2334

Linus Torvalds's avatar
Linus Torvalds committed
2335
	/*
David Mosberger's avatar
David Mosberger committed
2336 2337
	 * now insert the vma in the vm list for the process, must be
	 * done with mmap lock held
Linus Torvalds's avatar
Linus Torvalds committed
2338 2339
	 */
	insert_vm_struct(mm, vma);
Linus Torvalds's avatar
Linus Torvalds committed
2340

Linus Torvalds's avatar
Linus Torvalds committed
2341
	mm->total_vm  += size >> PAGE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
2342

2343
	up_write(&task->mm->mmap_sem);
David Mosberger's avatar
David Mosberger committed
2344

Linus Torvalds's avatar
Linus Torvalds committed
2345
	/*
2346
	 * keep track of user level virtual address
Linus Torvalds's avatar
Linus Torvalds committed
2347
	 */
2348 2349
	ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
	*(unsigned long *)user_vaddr = vma->vm_start;
Linus Torvalds's avatar
Linus Torvalds committed
2350 2351 2352

	return 0;

David Mosberger's avatar
David Mosberger committed
2353
error:
2354 2355 2356
	kmem_cache_free(vm_area_cachep, vma);
error_kmem:
	pfm_rvfree(smpl_buf, size);
2357

2358
	return -ENOMEM;
2359 2360
}

David Mosberger's avatar
David Mosberger committed
2361 2362 2363 2364 2365 2366
/*
 * XXX: do something better here
 */
static int
pfm_bad_permissions(struct task_struct *task)
{
2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383
	/* inspired by ptrace_attach() */
	DPRINT(("[%d] cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
		current->pid,
		current->uid,
		current->gid,
		task->euid,
		task->suid,
		task->uid,
		task->egid,
		task->sgid));

	return ((current->uid != task->euid)
	    || (current->uid != task->suid)
	    || (current->uid != task->uid)
	    || (current->gid != task->egid)
	    || (current->gid != task->sgid)
	    || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
David Mosberger's avatar
David Mosberger committed
2384 2385
}

Linus Torvalds's avatar
Linus Torvalds committed
2386
static int
2387
pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
Linus Torvalds's avatar
Linus Torvalds committed
2388
{
Linus Torvalds's avatar
Linus Torvalds committed
2389 2390
	int ctx_flags;

Linus Torvalds's avatar
Linus Torvalds committed
2391
	/* valid signal */
Linus Torvalds's avatar
Linus Torvalds committed
2392

David Mosberger's avatar
David Mosberger committed
2393
	ctx_flags = pfx->ctx_flags;
Linus Torvalds's avatar
Linus Torvalds committed
2394

Linus Torvalds's avatar
Linus Torvalds committed
2395
	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
2396

David Mosberger's avatar
David Mosberger committed
2397
		/*
2398
		 * cannot block in this mode
David Mosberger's avatar
David Mosberger committed
2399
		 */
2400 2401
		if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
			DPRINT(("cannot use blocking mode when in system wide monitoring\n"));
Linus Torvalds's avatar
Linus Torvalds committed
2402 2403
			return -EINVAL;
		}
David Mosberger's avatar
David Mosberger committed
2404
	} else {
2405
	}
Linus Torvalds's avatar
Linus Torvalds committed
2406
	/* probably more to add here */
Linus Torvalds's avatar
Linus Torvalds committed
2407

Linus Torvalds's avatar
Linus Torvalds committed
2408
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2409
}
Linus Torvalds's avatar
Linus Torvalds committed
2410

Linus Torvalds's avatar
Linus Torvalds committed
2411
static int
2412 2413
pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags,
		     unsigned int cpu, pfarg_context_t *arg)
Linus Torvalds's avatar
Linus Torvalds committed
2414
{
2415 2416
	pfm_buffer_fmt_t *fmt = NULL;
	unsigned long size = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
2417
	void *uaddr = NULL;
2418 2419 2420
	void *fmt_arg = NULL;
	int ret = 0;
#define PFM_CTXARG_BUF_ARG(a)	(pfm_buffer_fmt_t *)(a+1)
Linus Torvalds's avatar
Linus Torvalds committed
2421

2422 2423 2424 2425 2426 2427
	/* invoke and lock buffer format, if found */
	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id, 0);
	if (fmt == NULL) {
		DPRINT(("[%d] cannot find buffer format\n", task->pid));
		return -EINVAL;
	}
David Mosberger's avatar
David Mosberger committed
2428 2429

	/*
2430
	 * buffer argument MUST be contiguous to pfarg_context_t
David Mosberger's avatar
David Mosberger committed
2431
	 */
2432
	if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg);
Linus Torvalds's avatar
Linus Torvalds committed
2433

2434
	ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
Linus Torvalds's avatar
Linus Torvalds committed
2435

2436
	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
Linus Torvalds's avatar
Linus Torvalds committed
2437

2438
	if (ret) goto error;
David Mosberger's avatar
David Mosberger committed
2439

2440 2441
	/* link buffer format and context */
	ctx->ctx_buf_fmt = fmt;
David Mosberger's avatar
David Mosberger committed
2442

2443 2444 2445 2446 2447
	/*
	 * check if buffer format wants to use perfmon buffer allocation/mapping service
	 */
	ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size);
	if (ret) goto error;
Linus Torvalds's avatar
Linus Torvalds committed
2448

2449 2450 2451 2452 2453 2454
	if (size) {
		/*
		 * buffer is always remapped into the caller's address space
		 */
		ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr);
		if (ret) goto error;
Linus Torvalds's avatar
Linus Torvalds committed
2455

2456 2457 2458 2459
		/* keep track of user address of buffer */
		arg->ctx_smpl_vaddr = uaddr;
	}
	ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);
David Mosberger's avatar
David Mosberger committed
2460

2461 2462 2463
error:
	return ret;
}
Linus Torvalds's avatar
Linus Torvalds committed
2464

2465 2466 2467 2468
static void
pfm_reset_pmu_state(pfm_context_t *ctx)
{
	int i;
Linus Torvalds's avatar
Linus Torvalds committed
2469

2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480
	/*
	 * install reset values for PMC.
	 */
	for (i=1; PMC_IS_LAST(i) == 0; i++) {
		if (PMC_IS_IMPL(i) == 0) continue;
		ctx->ctx_pmcs[i] = PMC_DFL_VAL(i);
		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i]));
	}
	/*
	 * PMD registers are set to 0UL when the context in memset()
	 */
Linus Torvalds's avatar
Linus Torvalds committed
2481

2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498
	/*
	 * On context switched restore, we must restore ALL pmc and ALL pmd even
	 * when they are not actively used by the task. In UP, the incoming process
	 * may otherwise pick up left over PMC, PMD state from the previous process.
	 * As opposed to PMD, stale PMC can cause harm to the incoming
	 * process because they may change what is being measured.
	 * Therefore, we must systematically reinstall the entire
	 * PMC state. In SMP, the same thing is possible on the
	 * same CPU but also on between 2 CPUs.
	 *
	 * The problem with PMD is information leaking especially
	 * to user level when psr.sp=0
	 *
	 * There is unfortunately no easy way to avoid this problem
	 * on either UP or SMP. This definitively slows down the
	 * pfm_load_regs() function.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
2499

2500 2501 2502 2503 2504 2505
	 /*
	  * bitmask of all PMCs accessible to this context
	  *
	  * PMC0 is treated differently.
	  */
	ctx->ctx_all_pmcs[0] = pmu_conf.impl_pmcs[0] & ~0x1;
Linus Torvalds's avatar
Linus Torvalds committed
2506

2507 2508 2509 2510
	/*
	 * bitmask of all PMDs that are accesible to this context
	 */
	ctx->ctx_all_pmds[0] = pmu_conf.impl_pmds[0];
Linus Torvalds's avatar
Linus Torvalds committed
2511

2512
	DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0]));
David Mosberger's avatar
David Mosberger committed
2513

2514 2515 2516 2517 2518 2519
	/*
	 * useful in case of re-enable after disable
	 */
	ctx->ctx_used_ibrs[0] = 0UL;
	ctx->ctx_used_dbrs[0] = 0UL;
}
David Mosberger's avatar
David Mosberger committed
2520

2521 2522 2523 2524 2525
static int
pfm_ctx_getsize(void *arg, size_t *sz)
{
	pfarg_context_t *req = (pfarg_context_t *)arg;
	pfm_buffer_fmt_t *fmt;
David Mosberger's avatar
David Mosberger committed
2526

2527
	*sz = 0;
Linus Torvalds's avatar
Linus Torvalds committed
2528

2529
	if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2530

2531 2532 2533 2534 2535 2536 2537 2538 2539
	/* no buffer locking here, will be called again */
	fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id, 1);
	if (fmt == NULL) {
		DPRINT(("cannot find buffer format\n"));
		return -EINVAL;
	}
	/* get just enough to copy in user parameters */
	*sz = fmt->fmt_arg_size;
	DPRINT(("arg_size=%lu\n", *sz));
David Mosberger's avatar
David Mosberger committed
2540

2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576
	return 0;
}



/*
 * cannot attach if :
 * 	- kernel task
 * 	- task not owned by caller
 * 	- task incompatible with context mode
 */
static int
pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
{
	/*
	 * no kernel task or task not owner by caller
	 */
	if (task->mm == NULL) {
		DPRINT(("[%d] task [%d] has not memory context (kernel thread)\n", current->pid, task->pid));
		return -EPERM;
	}
	if (pfm_bad_permissions(task)) {
		DPRINT(("[%d] no permission to attach to  [%d]\n", current->pid, task->pid));
		return -EPERM;
	}
	/*
	 * cannot block in self-monitoring mode
	 */
	if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
		DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
		return -EINVAL;
	}

	if (task->state == TASK_ZOMBIE) {
		DPRINT(("[%d] cannot attach to  zombie task [%d]\n", current->pid, task->pid));
		return -EBUSY;
Linus Torvalds's avatar
Linus Torvalds committed
2577
	}
Linus Torvalds's avatar
Linus Torvalds committed
2578

Linus Torvalds's avatar
Linus Torvalds committed
2579
	/*
2580
	 * always ok for self
Linus Torvalds's avatar
Linus Torvalds committed
2581
	 */
2582 2583 2584 2585 2586
	if (task == current) return 0;

	if (task->state != TASK_STOPPED) {
		DPRINT(("[%d] cannot attach to non-stopped task [%d] state=%ld\n", current->pid, task->pid, task->state));
		return -EBUSY;
Linus Torvalds's avatar
Linus Torvalds committed
2587
	}
2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607
	/*
	 * make sure the task is off any CPU
	 */
	pfm_wait_task_inactive(task);

	/* more to come... */

	return 0;
}

static int
pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
{
	struct task_struct *p = current;
	int ret;

	/* XXX: need to add more checks here */
	if (pid < 2) return -EPERM;

	if (pid != current->pid) {
Linus Torvalds's avatar
Linus Torvalds committed
2608

2609 2610 2611 2612 2613 2614 2615 2616
		read_lock(&tasklist_lock);

		p = find_task_by_pid(pid);

		/* make sure task cannot go away while we operate on it */
		if (p) get_task_struct(p);

		read_unlock(&tasklist_lock);
Linus Torvalds's avatar
Linus Torvalds committed
2617

2618 2619
		if (p == NULL) return -ESRCH;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2620

2621 2622 2623 2624 2625
	ret = pfm_task_incompatible(ctx, p);
	if (ret == 0) {
		*task = p;
	} else if (p != current) {
		pfm_put_task(p);
Linus Torvalds's avatar
Linus Torvalds committed
2626
	}
2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652
	return ret;
}



static int
pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	pfarg_context_t *req = (pfarg_context_t *)arg;
	struct file *filp;
	int ctx_flags;
	int ret;

	/* let's check the arguments first */
	ret = pfarg_is_sane(current, req);
	if (ret < 0) return ret;

	ctx_flags = req->ctx_flags;

	ret = -ENOMEM;

	ctx = pfm_context_alloc();
	if (!ctx) goto error;

	req->ctx_fd = ctx->ctx_fd = pfm_alloc_fd(&filp);
	if (req->ctx_fd < 0) goto error_file;
2653

David Mosberger's avatar
David Mosberger committed
2654
	/*
2655
	 * attach context to file
David Mosberger's avatar
David Mosberger committed
2656
	 */
2657
	filp->private_data = ctx;
David Mosberger's avatar
David Mosberger committed
2658

2659 2660 2661 2662 2663 2664 2665
	/*
	 * does the user want to sample?
	 */
	if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
		ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req);
		if (ret) goto buffer_error;
	}
David Mosberger's avatar
David Mosberger committed
2666

2667 2668 2669 2670
	/*
	 * init context protection lock
	 */
	spin_lock_init(&ctx->ctx_lock);
Linus Torvalds's avatar
Linus Torvalds committed
2671

2672 2673 2674
	/*
	 * context is unloaded
	 */
2675
	ctx->ctx_state = PFM_CTX_UNLOADED;
Linus Torvalds's avatar
Linus Torvalds committed
2676

2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688
	/*
	 * initialization of context's flags
	 */
	ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
	ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
	ctx->ctx_fl_unsecure	= (ctx_flags & PFM_FL_UNSECURE) ? 1: 0;
	ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
	ctx->ctx_fl_no_msg      = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
	/*
	 * will move to set properties
	 * ctx->ctx_fl_excl_idle   = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
	 */
Linus Torvalds's avatar
Linus Torvalds committed
2689

2690 2691 2692 2693
	/*
	 * init restart semaphore to locked
	 */
	sema_init(&ctx->ctx_restart_sem, 0);
David Mosberger's avatar
David Mosberger committed
2694

2695 2696 2697 2698 2699
	/*
	 * activation is used in SMP only
	 */
	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
	SET_LAST_CPU(ctx, -1);
Linus Torvalds's avatar
Linus Torvalds committed
2700

Linus Torvalds's avatar
Linus Torvalds committed
2701
	/*
2702
	 * initialize notification message queue
Linus Torvalds's avatar
Linus Torvalds committed
2703
	 */
2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717
	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
	init_waitqueue_head(&ctx->ctx_msgq_wait);
	init_waitqueue_head(&ctx->ctx_zombieq);

	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d unsecure=%d no_msg=%d ctx_fd=%d \n",
		ctx,
		ctx_flags,
		ctx->ctx_fl_system,
		ctx->ctx_fl_block,
		ctx->ctx_fl_excl_idle,
		ctx->ctx_fl_unsecure,
		ctx->ctx_fl_no_msg,
		ctx->ctx_fd));

Linus Torvalds's avatar
Linus Torvalds committed
2718
	/*
2719
	 * initialize soft PMU state
Linus Torvalds's avatar
Linus Torvalds committed
2720
	 */
2721
	pfm_reset_pmu_state(ctx);
Linus Torvalds's avatar
Linus Torvalds committed
2722

Linus Torvalds's avatar
Linus Torvalds committed
2723 2724 2725
	return 0;

buffer_error:
2726 2727 2728 2729 2730 2731
	pfm_free_fd(ctx->ctx_fd, filp);

	if (ctx->ctx_buf_fmt) {
		pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
	}
error_file:
Linus Torvalds's avatar
Linus Torvalds committed
2732
	pfm_context_free(ctx);
2733

2734
error:
Linus Torvalds's avatar
Linus Torvalds committed
2735
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
2736 2737
}

2738
static inline unsigned long
2739
pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756
{
	unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
	unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
	extern unsigned long carta_random32 (unsigned long seed);

	if (reg->flags & PFM_REGFL_RANDOM) {
		new_seed = carta_random32(old_seed);
		val -= (old_seed & mask);	/* counter values are negative numbers! */
		if ((mask >> 32) != 0)
			/* construct a full 64-bit random value: */
			new_seed |= carta_random32(old_seed >> 32) << 32;
		reg->seed = new_seed;
	}
	reg->lval = val;
	return val;
}

2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772
static void
pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
{
	unsigned long mask = ovfl_regs[0];
	unsigned long reset_others = 0UL;
	unsigned long val;
	int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);

	DPRINT_ovfl(("ovfl_regs=0x%lx flag=%d\n", ovfl_regs[0], flag));

	/*
	 * now restore reset value on sampling overflowed counters
	 */
	mask >>= PMU_FIRST_COUNTER;
	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {

David Mosberger's avatar
David Mosberger committed
2773 2774 2775 2776 2777 2778
		if ((mask & 0x1UL) == 0UL) continue;

		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
		reset_others        |= ctx->ctx_pmds[i].reset_pmds[0];

		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794
	}

	/*
	 * Now take care of resetting the other registers
	 */
	for(i = 0; reset_others; i++, reset_others >>= 1) {

		if ((reset_others & 0x1) == 0) continue;

		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);

		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
			  is_long_reset ? "long" : "short", i, val));
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
2795
static void
David Mosberger's avatar
David Mosberger committed
2796
pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
Linus Torvalds's avatar
Linus Torvalds committed
2797
{
David Mosberger's avatar
David Mosberger committed
2798 2799 2800
	unsigned long mask = ovfl_regs[0];
	unsigned long reset_others = 0UL;
	unsigned long val;
2801
	int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);
Linus Torvalds's avatar
Linus Torvalds committed
2802

2803 2804 2805 2806
	DPRINT_ovfl(("ovfl_regs=0x%lx flag=%d\n", ovfl_regs[0], flag));

	if (flag == PFM_PMD_NO_RESET) return;

2807
	if (ctx->ctx_state == PFM_CTX_MASKED) {
2808 2809 2810 2811
		pfm_reset_regs_masked(ctx, ovfl_regs, flag);
		return;
	}

Linus Torvalds's avatar
Linus Torvalds committed
2812 2813 2814
	/*
	 * now restore reset value on sampling overflowed counters
	 */
David Mosberger's avatar
David Mosberger committed
2815 2816 2817
	mask >>= PMU_FIRST_COUNTER;
	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {

David Mosberger's avatar
David Mosberger committed
2818
		if ((mask & 0x1UL) == 0UL) continue;
Linus Torvalds's avatar
Linus Torvalds committed
2819

David Mosberger's avatar
David Mosberger committed
2820 2821 2822 2823 2824 2825
		val           = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
		reset_others |= ctx->ctx_pmds[i].reset_pmds[0];

		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));

		pfm_write_soft_counter(ctx, i, val);
David Mosberger's avatar
David Mosberger committed
2826
	}
Linus Torvalds's avatar
Linus Torvalds committed
2827

David Mosberger's avatar
David Mosberger committed
2828 2829 2830 2831 2832 2833 2834
	/*
	 * Now take care of resetting the other registers
	 */
	for(i = 0; reset_others; i++, reset_others >>= 1) {

		if ((reset_others & 0x1) == 0) continue;

2835
		val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
David Mosberger's avatar
David Mosberger committed
2836 2837 2838 2839 2840

		if (PMD_IS_COUNTING(i)) {
			pfm_write_soft_counter(ctx, i, val);
		} else {
			ia64_set_pmd(i, val);
Linus Torvalds's avatar
Linus Torvalds committed
2841
		}
2842
		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
2843
			  is_long_reset ? "long" : "short", i, val));
Linus Torvalds's avatar
Linus Torvalds committed
2844
	}
2845
	ia64_srlz_d();
Linus Torvalds's avatar
Linus Torvalds committed
2846 2847 2848
}

static int
2849
pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
2850
{
2851 2852 2853 2854
	struct thread_struct *thread = NULL;
	pfarg_reg_t *req = (pfarg_reg_t *)arg;
	unsigned long value;
	unsigned long smpl_pmds, reset_pmds;
2855
	unsigned int cnum, reg_flags, flags;
2856 2857
	int i, can_access_pmu = 0, is_loaded, is_system;
	int is_monitor, is_counting, state;
2858
	int ret = -EINVAL;
2859
#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
David Mosberger's avatar
David Mosberger committed
2860

2861 2862 2863 2864 2865
	state     = ctx->ctx_state;
	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
	is_system = ctx->ctx_fl_system;

	if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL;
David Mosberger's avatar
David Mosberger committed
2866

Linus Torvalds's avatar
Linus Torvalds committed
2867

2868 2869 2870 2871 2872 2873 2874
	if (is_loaded) {
		thread = &ctx->ctx_task->thread;
		/*
		 * In system wide and when the context is loaded, access can only happen
		 * when the caller is running on the CPU being monitored by the session.
		 * It does not have to be the owner (ctx_task) of the context per se.
		 */
2875
		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
2876 2877 2878
			DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
			return -EBUSY;
		}
2879
		can_access_pmu = GET_PMU_OWNER() == ctx->ctx_task || is_system ? 1 : 0;
2880
	}
Linus Torvalds's avatar
Linus Torvalds committed
2881 2882 2883

	for (i = 0; i < count; i++, req++) {

2884 2885 2886 2887 2888
		cnum       = req->reg_num;
		reg_flags  = req->reg_flags;
		value      = req->reg_value;
		smpl_pmds  = req->reg_smpl_pmds[0];
		reset_pmds = req->reg_reset_pmds[0];
2889
		flags      = 0;
Linus Torvalds's avatar
Linus Torvalds committed
2890

2891 2892 2893
		is_counting = PMC_IS_COUNTING(cnum);
		is_monitor  = PMC_IS_MONITOR(cnum);

2894
		/*
David Mosberger's avatar
David Mosberger committed
2895 2896 2897 2898
		 * we reject all non implemented PMC as well
		 * as attempts to modify PMC[0-3] which are used
		 * as status registers by the PMU
		 */
David Mosberger's avatar
David Mosberger committed
2899
		if (PMC_IS_IMPL(cnum) == 0 || PMC_IS_CONTROL(cnum)) {
2900
			DPRINT(("pmc%u is unimplemented or invalid\n", cnum));
2901
			goto error;
Linus Torvalds's avatar
Linus Torvalds committed
2902
		}
David Mosberger's avatar
David Mosberger committed
2903
		/*
2904 2905 2906
		 * If the PMC is a monitor, then if the value is not the default:
		 * 	- system-wide session: PMCx.pm=1 (privileged monitor)
		 * 	- per-task           : PMCx.pm=0 (user monitor)
David Mosberger's avatar
David Mosberger committed
2907
		 */
2908
		if ((is_monitor || is_counting) && value != PMC_DFL_VAL(i) && PFM_CHECK_PMC_PM(ctx, cnum, value)) {
2909 2910 2911
			DPRINT(("pmc%u pmc_pm=%ld fl_system=%d\n",
				cnum,
				PMC_PM(cnum, value),
2912 2913
				ctx->ctx_fl_system));
			goto error;
David Mosberger's avatar
David Mosberger committed
2914 2915
		}

2916
		if (is_counting) {
2917
			pfm_monitor_t *p = (pfm_monitor_t *)&value;
2918 2919 2920 2921 2922 2923
			/*
		 	 * enforce generation of overflow interrupt. Necessary on all
		 	 * CPUs.
		 	 */
			p->pmc_oi = 1;

2924 2925
			if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
				flags |= PFM_REGFL_OVFL_NOTIFY;
Linus Torvalds's avatar
Linus Torvalds committed
2926
			}
2927

2928 2929
			if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;

2930 2931 2932 2933 2934 2935
			/* verify validity of smpl_pmds */
			if ((smpl_pmds & pmu_conf.impl_pmds[0]) != smpl_pmds) {
				DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
				goto error;
			}

2936 2937
			/* verify validity of reset_pmds */
			if ((reset_pmds & pmu_conf.impl_pmds[0]) != reset_pmds) {
2938
				DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
2939 2940
				goto error;
			}
2941 2942 2943
		} else {
			if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
				DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum));
2944
				goto error;
2945 2946
			}
			/* eventid on non-counting monitors are ignored */
Linus Torvalds's avatar
Linus Torvalds committed
2947
		}
2948

2949 2950 2951
		/*
		 * execute write checker, if any
		 */
2952
		if (PMC_WR_FUNC(cnum)) {
2953
			ret = PMC_WR_FUNC(cnum)(ctx->ctx_task, ctx, cnum, &value, regs);
2954 2955 2956
			if (ret) goto error;
			ret = -EINVAL;
		}
Linus Torvalds's avatar
Linus Torvalds committed
2957

2958 2959 2960
		/*
		 * no error on this register
		 */
2961
		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
Linus Torvalds's avatar
Linus Torvalds committed
2962

David Mosberger's avatar
David Mosberger committed
2963
		/*
2964
		 * Now we commit the changes to the software state
David Mosberger's avatar
David Mosberger committed
2965 2966
		 */

2967 2968
		/*
		 * update overflow information
David Mosberger's avatar
David Mosberger committed
2969
		 */
2970
		if (is_counting) {
2971 2972 2973 2974 2975 2976 2977 2978
			/*
		 	 * full flag update each time a register is programmed
		 	 */
			ctx->ctx_pmds[cnum].flags = flags;

			ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
			ctx->ctx_pmds[cnum].smpl_pmds[0]  = smpl_pmds;
			ctx->ctx_pmds[cnum].eventid       = req->reg_smpl_eventid;
2979

2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990
			/*
			 * Mark all PMDS to be accessed as used.
			 *
			 * We do not keep track of PMC because we have to
			 * systematically restore ALL of them.
			 *
			 * We do not update the used_monitors mask, because
			 * if we have not programmed them, then will be in
			 * a quiescent state, therefore we will not need to
			 * mask/restore then when context is MASKED.
			 */
2991
			CTX_USED_PMD(ctx, reset_pmds);
2992 2993 2994 2995 2996
			CTX_USED_PMD(ctx, smpl_pmds);
			/*
		 	 * make sure we do not try to reset on
		 	 * restart because we have established new values
		 	 */
2997
			if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
2998
		}
2999 3000
		/*
		 * Needed in case the user does not initialize the equivalent
3001 3002
		 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no
		 * possible leak here.
3003 3004 3005
		 */
		CTX_USED_PMD(ctx, pmu_conf.pmc_desc[cnum].dep_pmd[0]);

3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021
		/*
		 * keep track of the monitor PMC that we are using.
		 * we save the value of the pmc in ctx_pmcs[] and if
		 * the monitoring is not stopped for the context we also
		 * place it in the saved state area so that it will be
		 * picked up later by the context switch code.
		 *
		 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
		 *
		 * The value in t->pmc[] may be modified on overflow, i.e.,  when
		 * monitoring needs to be stopped.
		 */
		if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);

		/*
		 * update context state
David Mosberger's avatar
David Mosberger committed
3022
		 */
3023
		ctx->ctx_pmcs[cnum] = value;
David Mosberger's avatar
David Mosberger committed
3024

3025 3026 3027 3028
		if (is_loaded) {
			/*
			 * write thread state
			 */
3029
			if (is_system == 0) thread->pmcs[cnum] = value;
David Mosberger's avatar
David Mosberger committed
3030

3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049
			/*
			 * write hardware register if we can
			 */
			if (can_access_pmu) {
				ia64_set_pmc(cnum, value);
			}
#ifdef CONFIG_SMP
			else {
				/*
				 * per-task SMP only here
				 *
			 	 * we are guaranteed that the task is not running on the other CPU,
			 	 * we indicate that this PMD will need to be reloaded if the task
			 	 * is rescheduled on the CPU it ran last on.
			 	 */
				ctx->ctx_reload_pmcs[0] |= 1UL << cnum;
			}
#endif
		}
David Mosberger's avatar
David Mosberger committed
3050

3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063
		DPRINT(("pmc[%u]=0x%lx loaded=%d access_pmu=%d all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n",
			  cnum,
			  value,
			  is_loaded,
			  can_access_pmu,
			  ctx->ctx_all_pmcs[0],
			  ctx->ctx_used_pmds[0],
			  ctx->ctx_pmds[cnum].eventid,
			  smpl_pmds,
			  reset_pmds,
			  ctx->ctx_reload_pmcs[0],
			  ctx->ctx_used_monitors[0],
			  ctx->ctx_ovfl_regs[0]));
David Mosberger's avatar
David Mosberger committed
3064
	}
3065

3066 3067 3068 3069
	/*
	 * make sure the changes are visible
	 */
	if (can_access_pmu) ia64_srlz_d();
3070

3071
	return 0;
3072
error:
3073
	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
3074

3075
	req->reg_flags = PFM_REG_RETFL_EINVAL;
3076

3077
	DPRINT(("pmc[%u]=0x%lx error %d\n", cnum, value, ret));
3078

David Mosberger's avatar
David Mosberger committed
3079
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
3080 3081 3082
}

static int
3083
pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
3084
{
3085 3086
	struct thread_struct *thread = NULL;
	pfarg_reg_t *req = (pfarg_reg_t *)arg;
3087
	unsigned long value, hw_value;
David Mosberger's avatar
David Mosberger committed
3088
	unsigned int cnum;
3089 3090
	int i, can_access_pmu = 0, state;
	int is_counting, is_loaded, is_system;
3091
	int ret = -EINVAL;
David Mosberger's avatar
David Mosberger committed
3092 3093


3094 3095 3096 3097 3098
	state     = ctx->ctx_state;
	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
	is_system = ctx->ctx_fl_system;

	if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
3099

3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110
	/*
	 * on both UP and SMP, we can only write to the PMC when the task is
	 * the owner of the local PMU.
	 */
	if (is_loaded) {
		thread = &ctx->ctx_task->thread;
		/*
		 * In system wide and when the context is loaded, access can only happen
		 * when the caller is running on the CPU being monitored by the session.
		 * It does not have to be the owner (ctx_task) of the context per se.
		 */
3111
		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
3112 3113 3114
			DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
			return -EBUSY;
		}
3115
		can_access_pmu = GET_PMU_OWNER() == ctx->ctx_task || is_system ? 1 : 0;
3116
	}
3117

Linus Torvalds's avatar
Linus Torvalds committed
3118 3119
	for (i = 0; i < count; i++, req++) {

3120 3121
		cnum  = req->reg_num;
		value = req->reg_value;
3122

David Mosberger's avatar
David Mosberger committed
3123
		if (!PMD_IS_IMPL(cnum)) {
3124
			DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
David Mosberger's avatar
David Mosberger committed
3125 3126
			goto abort_mission;
		}
3127
		is_counting = PMD_IS_COUNTING(cnum);
Linus Torvalds's avatar
Linus Torvalds committed
3128

3129 3130 3131
		/*
		 * execute write checker, if any
		 */
3132 3133
		if (PMD_WR_FUNC(cnum)) {
			unsigned long v = value;
3134

3135
			ret = PMD_WR_FUNC(cnum)(ctx->ctx_task, ctx, cnum, &v, regs);
3136
			if (ret) goto abort_mission;
3137

3138
			value = v;
3139
			ret   = -EINVAL;
3140
		}
3141

3142 3143 3144
		/*
		 * no error on this register
		 */
3145
		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
David Mosberger's avatar
David Mosberger committed
3146 3147

		/*
3148
		 * now commit changes to software state
David Mosberger's avatar
David Mosberger committed
3149
		 */
3150
		hw_value = value;
3151

3152 3153 3154 3155 3156 3157 3158 3159
		/*
		 * update virtualized (64bits) counter
		 */
		if (is_counting) {
			/*
			 * write context state
			 */
			ctx->ctx_pmds[cnum].lval = value;
3160

3161 3162 3163 3164 3165 3166 3167
			/*
			 * when context is load we use the split value
			 */
			if (is_loaded) {
				hw_value = value &  pmu_conf.ovfl_val;
				value    = value & ~pmu_conf.ovfl_val;
			}
3168

3169 3170 3171 3172 3173
			/*
			 * update sampling periods
			 */
			ctx->ctx_pmds[cnum].long_reset  = req->reg_long_reset;
			ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;
3174

3175 3176 3177 3178 3179
			/*
			 * update randomization parameters
			 */
			ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
			ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
Linus Torvalds's avatar
Linus Torvalds committed
3180
		}
David Mosberger's avatar
David Mosberger committed
3181

3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193
		/*
		 * update context value
		 */
		ctx->ctx_pmds[cnum].val  = value;

		/*
		 * Keep track of what we use
		 *
		 * We do not keep track of PMC because we have to
		 * systematically restore ALL of them.
		 */
		CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum));
3194

3195 3196 3197
		/*
		 * mark this PMD register used as well
		 */
David Mosberger's avatar
David Mosberger committed
3198
		CTX_USED_PMD(ctx, RDEP(cnum));
Linus Torvalds's avatar
Linus Torvalds committed
3199

3200 3201 3202 3203
		/*
		 * make sure we do not try to reset on
		 * restart because we have established new values
		 */
3204
		if (is_counting && state == PFM_CTX_MASKED) {
3205 3206 3207 3208 3209 3210 3211
			ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
		}

		if (is_loaded) {
			/*
		 	 * write thread state
		 	 */
3212
			if (is_system == 0) thread->pmds[cnum] = hw_value;
Linus Torvalds's avatar
Linus Torvalds committed
3213

3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252
			/*
			 * write hardware register if we can
			 */
			if (can_access_pmu) {
				ia64_set_pmd(cnum, hw_value);
			} else {
#ifdef CONFIG_SMP
				/*
			 	 * we are guaranteed that the task is not running on the other CPU,
			 	 * we indicate that this PMD will need to be reloaded if the task
			 	 * is rescheduled on the CPU it ran last on.
			 	 */
				ctx->ctx_reload_pmds[0] |= 1UL << cnum;
#endif
			}
		}

		DPRINT(("pmd[%u]=0x%lx loaded=%d access_pmu=%d, hw_value=0x%lx ctx_pmd=0x%lx  short_reset=0x%lx "
			  "long_reset=0x%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n",
			cnum,
			value,
			is_loaded,
			can_access_pmu,
			hw_value,
			ctx->ctx_pmds[cnum].val,
			ctx->ctx_pmds[cnum].short_reset,
			ctx->ctx_pmds[cnum].long_reset,
			PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
			ctx->ctx_used_pmds[0],
			ctx->ctx_pmds[cnum].reset_pmds[0],
			ctx->ctx_reload_pmds[0],
			ctx->ctx_all_pmds[0],
			ctx->ctx_ovfl_regs[0]));
	}

	/*
	 * make changes visible
	 */
	if (can_access_pmu) ia64_srlz_d();
3253

3254 3255 3256 3257 3258 3259
	return 0;

abort_mission:
	/*
	 * for now, we have only one possibility for error
	 */
3260
	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
3261 3262 3263 3264 3265 3266

	/*
	 * we change the return value to EFAULT in case we cannot write register return code.
	 * The caller first must correct this error, then a resubmission of the request will
	 * eventually yield the EINVAL.
	 */
3267
	req->reg_flags = PFM_REG_RETFL_EINVAL;
3268

3269
	DPRINT(("pmd[%u]=0x%lx ret %d\n", cnum, value, ret));
3270

David Mosberger's avatar
David Mosberger committed
3271
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
3272 3273
}

3274 3275 3276 3277 3278 3279 3280 3281 3282
/*
 * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function.
 * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an
 * interrupt is delivered during the call, it will be kept pending until we leave, making
 * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are
 * guaranteed to return consistent data to the user, it may simply be old. It is not
 * trivial to treat the overflow while inside the call because you may end up in
 * some module sampling buffer code causing deadlocks.
 */
Linus Torvalds's avatar
Linus Torvalds committed
3283
static int
3284
pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
3285
{
3286 3287
	struct thread_struct *thread = NULL;
	unsigned long val = 0UL, lval ;
3288 3289
	pfarg_reg_t *req = (pfarg_reg_t *)arg;
	unsigned int cnum, reg_flags = 0;
3290 3291
	int i, can_access_pmu = 0, state;
	int is_loaded, is_system;
3292
	int ret = -EINVAL;
3293

Linus Torvalds's avatar
Linus Torvalds committed
3294
	/*
3295 3296
	 * access is possible when loaded only for
	 * self-monitoring tasks or in UP mode
Linus Torvalds's avatar
Linus Torvalds committed
3297
	 */
3298 3299 3300 3301 3302 3303

	state     = ctx->ctx_state;
	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
	is_system = ctx->ctx_fl_system;

	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
3304 3305 3306 3307 3308 3309 3310 3311

	if (is_loaded) {
		thread = &ctx->ctx_task->thread;
		/*
		 * In system wide and when the context is loaded, access can only happen
		 * when the caller is running on the CPU being monitored by the session.
		 * It does not have to be the owner (ctx_task) of the context per se.
		 */
3312
		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
3313 3314 3315
			DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
			return -EBUSY;
		}
3316 3317 3318 3319 3320 3321
		/*
		 * this can be true when not self-monitoring only in UP
		 */
		can_access_pmu = GET_PMU_OWNER() == ctx->ctx_task || is_system ? 1 : 0;

		if (can_access_pmu) ia64_srlz_d();
3322
	}
3323

3324 3325 3326 3327
	DPRINT(("enter loaded=%d access_pmu=%d ctx_state=%d\n",
		is_loaded,
		can_access_pmu,
		ctx->ctx_state));
Linus Torvalds's avatar
Linus Torvalds committed
3328

3329 3330 3331 3332
	/*
	 * on both UP and SMP, we can only read the PMD from the hardware register when
	 * the task is the owner of the local PMU.
	 */
David Mosberger's avatar
David Mosberger committed
3333

Linus Torvalds's avatar
Linus Torvalds committed
3334
	for (i = 0; i < count; i++, req++) {
3335

3336 3337 3338
		lval        = 0UL;
		cnum        = req->reg_num;
		reg_flags   = req->reg_flags;
3339

3340
		if (!PMD_IS_IMPL(cnum)) goto error;
3341 3342
		/*
		 * we can only read the register that we use. That includes
3343
		 * the one we explicitely initialize AND the one we want included
3344 3345 3346 3347 3348
		 * in the sampling buffer (smpl_regs).
		 *
		 * Having this restriction allows optimization in the ctxsw routine
		 * without compromising security (leaks)
		 */
3349
		if (!CTX_IS_USED_PMD(ctx, cnum)) goto error;
Linus Torvalds's avatar
Linus Torvalds committed
3350

David Mosberger's avatar
David Mosberger committed
3351 3352 3353 3354 3355
		/*
		 * If the task is not the current one, then we check if the
		 * PMU state is still in the local live register due to lazy ctxsw.
		 * If true, then we read directly from the registers.
		 */
3356
		if (can_access_pmu){
David Mosberger's avatar
David Mosberger committed
3357
			val = ia64_get_pmd(cnum);
David Mosberger's avatar
David Mosberger committed
3358
		} else {
3359 3360 3361 3362 3363
			/*
			 * context has been saved
			 * if context is zombie, then task does not exist anymore.
			 * In this case, we use the full value saved in the context (pfm_flush_regs()).
			 */
3364
			val = state == PFM_CTX_LOADED ? thread->pmds[cnum] : 0UL;
David Mosberger's avatar
David Mosberger committed
3365
		}
3366

3367
		if (PMD_IS_COUNTING(cnum)) {
Linus Torvalds's avatar
Linus Torvalds committed
3368
			/*
3369
			 * XXX: need to check for overflow when loaded
Linus Torvalds's avatar
Linus Torvalds committed
3370
			 */
3371
			val &= pmu_conf.ovfl_val;
3372
			val += ctx->ctx_pmds[cnum].val;
3373

3374 3375
			lval = ctx->ctx_pmds[cnum].lval;
		}
3376 3377 3378 3379 3380

		/*
		 * execute read checker, if any
		 */
		if (PMD_RD_FUNC(cnum)) {
3381
			unsigned long v = val;
3382
			ret = PMD_RD_FUNC(cnum)(ctx->ctx_task, ctx, cnum, &v, regs);
3383
			if (ret) goto error;
3384
			val = v;
3385
			ret = -EINVAL;
3386 3387
		}

3388
		PFM_REG_RETFLAG_SET(reg_flags, 0);
3389

3390 3391 3392 3393 3394 3395
		DPRINT(("pmd[%u]=0x%lx loaded=%d access_pmu=%d ctx_state=%d\n",
			cnum,
			val,
			is_loaded,
			can_access_pmu,
			ctx->ctx_state));
3396

3397 3398 3399 3400 3401
		/*
		 * update register return value, abort all if problem during copy.
		 * we only modify the reg_flags field. no check mode is fine because
		 * access has been verified upfront in sys_perfmonctl().
		 */
3402 3403 3404
		req->reg_value            = val;
		req->reg_flags            = reg_flags;
		req->reg_last_reset_val   = lval;
Linus Torvalds's avatar
Linus Torvalds committed
3405
	}
3406

Linus Torvalds's avatar
Linus Torvalds committed
3407
	return 0;
3408

3409
error:
3410
	PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL);
3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432

	req->reg_flags = PFM_REG_RETFL_EINVAL;

	DPRINT(("error pmd[%u]=0x%lx\n", cnum, val));

	return ret;
}

long
pfm_mod_write_pmcs(struct task_struct *task, pfarg_reg_t *req, unsigned int nreq, struct pt_regs *regs)
{
	pfm_context_t *ctx;

	if (task == NULL || req == NULL) return -EINVAL;

 	ctx = task->thread.pfm_context;

	if (ctx == NULL) return -EINVAL;

	/*
	 * for now limit to current task, which is enough when calling
	 * from overflow handler
3433
	 */
3434
	if (task != current) return -EBUSY;
David Mosberger's avatar
David Mosberger committed
3435

3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498
	return pfm_write_pmcs(ctx, req, nreq, regs);
}

long
pfm_mod_read_pmds(struct task_struct *task, pfarg_reg_t *req, unsigned int nreq, struct pt_regs *regs)
{
	pfm_context_t *ctx;

	if (task == NULL || req == NULL) return -EINVAL;

 	//ctx = task->thread.pfm_context;
 	ctx = GET_PMU_CTX();

	if (ctx == NULL) return -EINVAL;

	/*
	 * for now limit to current task, which is enough when calling
	 * from overflow handler
	 */
	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;

	return pfm_read_pmds(ctx, req, nreq, regs);
}

long
pfm_mod_fast_read_pmds(struct task_struct *task, unsigned long mask[4], unsigned long *addr, struct pt_regs *regs)
{
	pfm_context_t *ctx;
	unsigned long m, val;
	unsigned int j;

	if (task == NULL || addr == NULL) return -EINVAL;

 	//ctx = task->thread.pfm_context;
 	ctx = GET_PMU_CTX();

	if (ctx == NULL) return -EINVAL;

	/*
	 * for now limit to current task, which is enough when calling
	 * from overflow handler
	 */
	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;

	m = mask[0];
	for (j=0; m; m >>=1, j++) {

		if ((m & 0x1) == 0) continue;

		if (!(PMD_IS_IMPL(j)  && CTX_IS_USED_PMD(ctx, j)) ) return -EINVAL;

		if (PMD_IS_COUNTING(j)) {
			val = pfm_read_soft_counter(ctx, j);
		} else {
			val = ia64_get_pmd(j);
		}

		*addr++ = val;

		/* XXX: should call read checker routine? */
		DPRINT(("single_read_pmd[%u]=0x%lx\n", j, val));
	}
	return 0;
David Mosberger's avatar
David Mosberger committed
3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510
}

/*
 * Only call this function when a process it trying to
 * write the debug registers (reading is always allowed)
 */
int
pfm_use_debug_registers(struct task_struct *task)
{
	pfm_context_t *ctx = task->thread.pfm_context;
	int ret = 0;

3511 3512 3513
	if (pmu_conf.use_rr_dbregs == 0) return 0;

	DPRINT(("called for [%d]\n", task->pid));
David Mosberger's avatar
David Mosberger committed
3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532

	/*
	 * do it only once
	 */
	if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;

	/*
	 * Even on SMP, we do not need to use an atomic here because
	 * the only way in is via ptrace() and this is possible only when the
	 * process is stopped. Even in the case where the ctxsw out is not totally
	 * completed by the time we come here, there is no way the 'stopped' process
	 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
	 * So this is always safe.
	 */
	if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;

	LOCK_PFS();

	/*
David Mosberger's avatar
David Mosberger committed
3533 3534
	 * We cannot allow setting breakpoints when system wide monitoring
	 * sessions are using the debug registers.
David Mosberger's avatar
David Mosberger committed
3535 3536 3537 3538 3539 3540
	 */
	if (pfm_sessions.pfs_sys_use_dbregs> 0)
		ret = -1;
	else
		pfm_sessions.pfs_ptrace_use_dbregs++;

3541 3542 3543
	DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
		  pfm_sessions.pfs_ptrace_use_dbregs,
		  pfm_sessions.pfs_sys_use_dbregs,
David Mosberger's avatar
David Mosberger committed
3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563
		  task->pid, ret));

	UNLOCK_PFS();

	return ret;
}

/*
 * This function is called for every task that exits with the
 * IA64_THREAD_DBG_VALID set. This indicates a task which was
 * able to use the debug registers for debugging purposes via
 * ptrace(). Therefore we know it was not using them for
 * perfmormance monitoring, so we only decrement the number
 * of "ptraced" debug register users to keep the count up to date
 */
int
pfm_release_debug_registers(struct task_struct *task)
{
	int ret;

3564 3565
	if (pmu_conf.use_rr_dbregs == 0) return 0;

David Mosberger's avatar
David Mosberger committed
3566 3567
	LOCK_PFS();
	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
3568
		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
David Mosberger's avatar
David Mosberger committed
3569 3570 3571 3572 3573 3574 3575 3576 3577
		ret = -1;
	}  else {
		pfm_sessions.pfs_ptrace_use_dbregs--;
		ret = 0;
	}
	UNLOCK_PFS();

	return ret;
}
Linus Torvalds's avatar
Linus Torvalds committed
3578 3579

static int
3580
pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
3581
{
3582 3583 3584
	struct task_struct *task;
	pfm_buffer_fmt_t *fmt;
	pfm_ovfl_ctrl_t rst_ctrl;
David Mosberger's avatar
David Mosberger committed
3585
	int state, is_system;
3586 3587
	int ret = 0;

David Mosberger's avatar
David Mosberger committed
3588
	state     = ctx->ctx_state;
3589
	fmt       = ctx->ctx_buf_fmt;
David Mosberger's avatar
David Mosberger committed
3590 3591
	is_system = ctx->ctx_fl_system;
	task      = PFM_CTX_TASK(ctx);
3592

David Mosberger's avatar
David Mosberger committed
3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608
	switch(state) {
		case PFM_CTX_MASKED:
			break;
		case PFM_CTX_LOADED: 
			if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
			/* fall through */
		case PFM_CTX_UNLOADED:
		case PFM_CTX_ZOMBIE:
			DPRINT(("invalid state=%d\n", state));
			return -EBUSY;
		case PFM_CTX_TERMINATED:
			DPRINT(("context is terminated, nothing to do\n"));
			return 0;
		default:
			DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state));
			return -EINVAL;
3609
	}
Linus Torvalds's avatar
Linus Torvalds committed
3610

3611 3612 3613 3614 3615
	/*
 	 * In system wide and when the context is loaded, access can only happen
 	 * when the caller is running on the CPU being monitored by the session.
 	 * It does not have to be the owner (ctx_task) of the context per se.
 	 */
David Mosberger's avatar
David Mosberger committed
3616
	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
3617 3618 3619 3620 3621 3622 3623 3624 3625 3626
		DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
		return -EBUSY;
	}

	/* sanity check */
	if (unlikely(task == NULL)) {
		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
		return -EINVAL;
	}

David Mosberger's avatar
David Mosberger committed
3627
	if (task == current || is_system) {
3628 3629 3630 3631 3632

		fmt = ctx->ctx_buf_fmt;

		DPRINT(("restarting self %d ovfl=0x%lx\n",
			task->pid,
3633
			ctx->ctx_ovfl_regs[0]));
David Mosberger's avatar
David Mosberger committed
3634

3635
		if (CTX_HAS_SMPL(ctx)) {
Linus Torvalds's avatar
Linus Torvalds committed
3636

3637
			prefetch(ctx->ctx_smpl_hdr);
Linus Torvalds's avatar
Linus Torvalds committed
3638

David Mosberger's avatar
David Mosberger committed
3639 3640
			rst_ctrl.bits.mask_monitoring = 0;
			rst_ctrl.bits.reset_ovfl_pmds = 1;
Linus Torvalds's avatar
Linus Torvalds committed
3641

David Mosberger's avatar
David Mosberger committed
3642
			if (state == PFM_CTX_LOADED)
3643 3644 3645 3646
				ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
			else
				ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
		} else {
David Mosberger's avatar
David Mosberger committed
3647 3648
			rst_ctrl.bits.mask_monitoring = 0;
			rst_ctrl.bits.reset_ovfl_pmds = 1;
Linus Torvalds's avatar
Linus Torvalds committed
3649 3650
		}

3651
		if (ret == 0) {
David Mosberger's avatar
David Mosberger committed
3652 3653
			if (rst_ctrl.bits.reset_ovfl_pmds)
				pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
3654

David Mosberger's avatar
David Mosberger committed
3655
			if (rst_ctrl.bits.mask_monitoring == 0) {
3656 3657
				DPRINT(("resuming monitoring for [%d]\n", task->pid));

3658
				if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
3659 3660 3661 3662 3663 3664 3665 3666 3667 3668
			} else {
				DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));

				// cannot use pfm_stop_monitoring(task, regs);
			}
		}
		/*
		 * clear overflowed PMD mask to remove any stale information
		 */
		ctx->ctx_ovfl_regs[0] = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
3669

3670 3671 3672
		/*
		 * back to LOADED state
		 */
3673
		ctx->ctx_state = PFM_CTX_LOADED;
3674

Linus Torvalds's avatar
Linus Torvalds committed
3675
		return 0;
3676 3677
	}
	/* restart another task */
Linus Torvalds's avatar
Linus Torvalds committed
3678

David Mosberger's avatar
David Mosberger committed
3679 3680 3681
	/*
	 * if blocking, then post the semaphore.
	 * if non-blocking, then we ensure that the task will go into
3682 3683
	 * pfm_handle_work() before returning to user mode.
	 * We cannot explicitely reset another task, it MUST always
David Mosberger's avatar
David Mosberger committed
3684 3685 3686 3687 3688 3689
	 * be done by the task itself. This works for system wide because
	 * the tool that is controlling the session is doing "self-monitoring".
	 *
	 * XXX: what if the task never goes back to user?
	 *
	 */
Linus Torvalds's avatar
Linus Torvalds committed
3690
	if (CTX_OVFL_NOBLOCK(ctx) == 0) {
3691 3692
		DPRINT(("unblocking [%d] \n", task->pid));
		up(&ctx->ctx_restart_sem);
David Mosberger's avatar
David Mosberger committed
3693
	} else {
3694 3695
		DPRINT(("[%d] armed exit trap\n", task->pid));

3696
		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717

		PFM_SET_WORK_PENDING(task, 1);

		pfm_set_task_notify(task);

		/*
		 * XXX: send reschedule if task runs on another CPU
		 */
	}
	return 0;
}

static int
pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	unsigned int m = *(unsigned int *)arg;

	pfm_sysctl.debug = m == 0 ? 0 : 1;

	pfm_debug_var = pfm_sysctl.debug;

David Mosberger's avatar
David Mosberger committed
3718
	printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
3719

David Mosberger's avatar
David Mosberger committed
3720
	if (m == 0) {
3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735
		memset(pfm_stats, 0, sizeof(pfm_stats));
		for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL;
	}

	return 0;
}

static int
pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	struct thread_struct *thread = NULL;
	pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
	dbreg_t dbreg;
	unsigned int rnum;
	int first_time;
3736 3737 3738
	int ret = 0, state;
	int i, can_access_pmu = 0;
	int is_system, is_loaded;
3739 3740 3741

	if (pmu_conf.use_rr_dbregs == 0) return -EINVAL;

3742 3743 3744 3745 3746
	state     = ctx->ctx_state;
	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
	is_system = ctx->ctx_fl_system;

	if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL;
3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758

	/*
	 * on both UP and SMP, we can only write to the PMC when the task is
	 * the owner of the local PMU.
	 */
	if (is_loaded) {
		thread = &ctx->ctx_task->thread;
		/*
		 * In system wide and when the context is loaded, access can only happen
		 * when the caller is running on the CPU being monitored by the session.
		 * It does not have to be the owner (ctx_task) of the context per se.
		 */
3759
		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
3760 3761 3762
			DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
			return -EBUSY;
		}
3763
		can_access_pmu = GET_PMU_OWNER() == ctx->ctx_task || is_system ? 1 : 0;
3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791
	}

	/*
	 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
	 * ensuring that no real breakpoint can be installed via this call.
	 *
	 * IMPORTANT: regs can be NULL in this function
	 */

	first_time = ctx->ctx_fl_using_dbreg == 0;

	/*
	 * don't bother if we are loaded and task is being debugged
	 */
	if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
		DPRINT(("debug registers already in use for [%d]\n", ctx->ctx_task->pid));
		return -EBUSY;
	}

	/*
	 * check for debug registers in system wide mode
	 *
	 * We make the reservation even when context is not loaded
	 * to make sure we get our slot. Note that the PFM_LOAD_CONTEXT
	 * may still fail if the task has DBG_VALID set.
	 */
	LOCK_PFS();

3792
	if (first_time && is_system) {
3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938
		if (pfm_sessions.pfs_ptrace_use_dbregs)
			ret = -EBUSY;
		else
			pfm_sessions.pfs_sys_use_dbregs++;
	}

	UNLOCK_PFS();

	if (ret != 0) return ret;

	/*
	 * mark ourself as user of the debug registers for
	 * perfmon purposes.
	 */
	ctx->ctx_fl_using_dbreg = 1;

	/*
 	 * clear hardware registers to make sure we don't
 	 * pick up stale state.
	 *
	 * for a system wide session, we do not use
	 * thread.dbr, thread.ibr because this process
	 * never leaves the current CPU and the state
	 * is shared by all processes running on it
 	 */
	if (first_time && can_access_pmu) {
		DPRINT(("[%d] clearing ibrs, dbrs\n", ctx->ctx_task->pid));
		for (i=0; i < pmu_conf.num_ibrs; i++) {
			ia64_set_ibr(i, 0UL);
			ia64_srlz_i();
		}
		ia64_srlz_i();
		for (i=0; i < pmu_conf.num_dbrs; i++) {
			ia64_set_dbr(i, 0UL);
			ia64_srlz_d();
		}
		ia64_srlz_d();
	}

	/*
	 * Now install the values into the registers
	 */
	for (i = 0; i < count; i++, req++) {

		rnum      = req->dbreg_num;
		dbreg.val = req->dbreg_value;

		ret = -EINVAL;

		if ((mode == PFM_CODE_RR && !IBR_IS_IMPL(rnum)) || ((mode == PFM_DATA_RR) && !DBR_IS_IMPL(rnum))) {
			DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
				  rnum, dbreg.val, mode, i, count));

			goto abort_mission;
		}

		/*
		 * make sure we do not install enabled breakpoint
		 */
		if (rnum & 0x1) {
			if (mode == PFM_CODE_RR)
				dbreg.ibr.ibr_x = 0;
			else
				dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
		}

		PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);

		/*
		 * Debug registers, just like PMC, can only be modified
		 * by a kernel call. Moreover, perfmon() access to those
		 * registers are centralized in this routine. The hardware
		 * does not modify the value of these registers, therefore,
		 * if we save them as they are written, we can avoid having
		 * to save them on context switch out. This is made possible
		 * by the fact that when perfmon uses debug registers, ptrace()
		 * won't be able to modify them concurrently.
		 */
		if (mode == PFM_CODE_RR) {
			CTX_USED_IBR(ctx, rnum);

			if (can_access_pmu) ia64_set_ibr(rnum, dbreg.val);

			ctx->ctx_ibrs[rnum] = dbreg.val;

			DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x is_loaded=%d access_pmu=%d\n",
				rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu));
		} else {
			CTX_USED_DBR(ctx, rnum);

			if (can_access_pmu) ia64_set_dbr(rnum, dbreg.val);

			ctx->ctx_dbrs[rnum] = dbreg.val;

			DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x is_loaded=%d access_pmu=%d\n",
				rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu));
		}
	}

	return 0;

abort_mission:
	/*
	 * in case it was our first attempt, we undo the global modifications
	 */
	if (first_time) {
		LOCK_PFS();
		if (ctx->ctx_fl_system) {
			pfm_sessions.pfs_sys_use_dbregs--;
		}
		UNLOCK_PFS();
		ctx->ctx_fl_using_dbreg = 0;
	}
	/*
	 * install error return flag
	 */
	PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);

	return ret;
}

static int
pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
}

static int
pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
}

static int
pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	pfarg_features_t *req = (pfarg_features_t *)arg;

	req->ft_version = PFM_VERSION;
	return 0;
}

static int
pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	struct pt_regs *tregs;
David Mosberger's avatar
David Mosberger committed
3939
	struct task_struct *task = PFM_CTX_TASK(ctx);
3940
	int state, is_system;
3941

3942 3943
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;
3944

3945
	if (state != PFM_CTX_LOADED && state != PFM_CTX_MASKED) return -EINVAL;
3946 3947 3948 3949 3950 3951

	/*
 	 * In system wide and when the context is loaded, access can only happen
 	 * when the caller is running on the CPU being monitored by the session.
 	 * It does not have to be the owner (ctx_task) of the context per se.
 	 */
3952
	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
3953 3954 3955 3956 3957 3958 3959 3960 3961
		DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
		return -EBUSY;
	}

	/*
	 * in system mode, we need to update the PMU directly
	 * and the user level state of the caller, which may not
	 * necessarily be the creator of the context.
	 */
3962
	if (is_system) {
3963 3964 3965 3966 3967
		/*
		 * Update local PMU first
		 *
		 * disable dcr pp
		 */
3968
		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986
		ia64_srlz_i();

		/*
		 * update local cpuinfo
		 */
		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);

		/*
		 * stop monitoring, does srlz.i
		 */
		pfm_clear_psr_pp();

		/*
		 * stop monitoring in the caller
		 */
		ia64_psr(regs)->pp = 0;

		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
3987 3988
	}
	/*
3989
	 * per-task mode
Linus Torvalds's avatar
Linus Torvalds committed
3990 3991
	 */

David Mosberger's avatar
David Mosberger committed
3992
	if (task == current) {
3993 3994 3995 3996 3997 3998 3999 4000
		/* stop monitoring  at kernel level */
		pfm_clear_psr_up();

		/*
	 	 * stop monitoring at the user level
	 	 */
		ia64_psr(regs)->up = 0;
	} else {
David Mosberger's avatar
David Mosberger committed
4001
		tregs = ia64_task_regs(task);
4002 4003 4004 4005 4006 4007 4008 4009 4010

		/*
	 	 * stop monitoring at the user level
	 	 */
		ia64_psr(tregs)->up = 0;

		/*
		 * monitoring disabled in kernel at next reschedule
		 */
David Mosberger's avatar
David Mosberger committed
4011
		ctx->ctx_saved_psr_up = 0;
David Mosberger's avatar
David Mosberger committed
4012
		DPRINT(("pfm_stop: current [%d] task=[%d]\n", current->pid, task->pid));
Linus Torvalds's avatar
Linus Torvalds committed
4013 4014 4015 4016
	}
	return 0;
}

4017

Linus Torvalds's avatar
Linus Torvalds committed
4018
static int
4019
pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
4020
{
4021
	struct pt_regs *tregs;
4022
	int state, is_system;
Linus Torvalds's avatar
Linus Torvalds committed
4023

4024 4025 4026 4027
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;

	if (state != PFM_CTX_LOADED) return -EINVAL;
4028

4029 4030 4031 4032 4033
	/*
 	 * In system wide and when the context is loaded, access can only happen
 	 * when the caller is running on the CPU being monitored by the session.
 	 * It does not have to be the owner (ctx_task) of the context per se.
 	 */
4034
	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
4035 4036 4037
		DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
		return -EBUSY;
	}
4038

4039 4040 4041 4042 4043
	/*
	 * in system mode, we need to update the PMU directly
	 * and the user level state of the caller, which may not
	 * necessarily be the creator of the context.
	 */
4044
	if (is_system) {
4045

4046 4047 4048 4049
		/*
		 * set user level psr.pp for the caller
		 */
		ia64_psr(regs)->pp = 1;
4050

4051 4052 4053 4054 4055 4056 4057 4058 4059
		/*
		 * now update the local PMU and cpuinfo
		 */
		PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);

		/*
		 * start monitoring at kernel level
		 */
		pfm_set_psr_pp();
4060

4061
		/* enable dcr pp */
4062
		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
4063 4064
		ia64_srlz_i();

4065 4066
		return 0;
	}
4067

4068 4069 4070
	/*
	 * per-process mode
	 */
4071

4072
	if (ctx->ctx_task == current) {
4073

4074 4075
		/* start monitoring at kernel level */
		pfm_set_psr_up();
Linus Torvalds's avatar
Linus Torvalds committed
4076

4077 4078 4079 4080 4081 4082 4083
		/*
		 * activate monitoring at user level
		 */
		ia64_psr(regs)->up = 1;

	} else {
		tregs = ia64_task_regs(ctx->ctx_task);
4084 4085

		/*
4086 4087
		 * start monitoring at the kernel level the next
		 * time the task is scheduled
4088
		 */
David Mosberger's avatar
David Mosberger committed
4089
		ctx->ctx_saved_psr_up = IA64_PSR_UP;
4090 4091 4092 4093 4094

		/*
		 * activate monitoring at user level
		 */
		ia64_psr(tregs)->up = 1;
Linus Torvalds's avatar
Linus Torvalds committed
4095
	}
4096 4097
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
4098

4099
static int
4100 4101 4102 4103 4104 4105
pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	pfarg_reg_t *req = (pfarg_reg_t *)arg;
	unsigned int cnum;
	int i;
	int ret = -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
4106

4107
	for (i = 0; i < count; i++, req++) {
4108

4109
		cnum = req->reg_num;
4110

4111
		if (!PMC_IS_IMPL(cnum)) goto abort_mission;
4112

4113
		req->reg_value = PMC_DFL_VAL(cnum);
4114

4115
		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
4116

4117 4118
		DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
	}
4119
	return 0;
4120 4121 4122 4123

abort_mission:
	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
	return ret;
4124 4125
}

David Mosberger's avatar
David Mosberger committed
4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147
static int
pfm_check_task_exist(pfm_context_t *ctx)
{
	struct task_struct *g, *t;
	int ret = -ESRCH;

	read_lock(&tasklist_lock);

	do_each_thread (g, t) {
		if (t->thread.pfm_context == ctx) {
			ret = 0;
			break;
		}
	} while_each_thread (g, t);

	read_unlock(&tasklist_lock);

	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));

	return ret;
}

4148
static int
4149
pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4150
{
4151 4152 4153 4154 4155 4156 4157 4158 4159 4160
	struct task_struct *task;
	struct thread_struct *thread;
	struct pfm_context_t *old;
#ifndef CONFIG_SMP
	struct task_struct *owner_task = NULL;
#endif
	pfarg_load_t *req = (pfarg_load_t *)arg;
	unsigned long *pmcs_source, *pmds_source;
	int the_cpu;
	int ret = 0;
4161
	int state, is_system;
4162

4163 4164
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;
4165
	/*
4166
	 * can only load from unloaded or terminated state
4167
	 */
4168
	if (state != PFM_CTX_UNLOADED && state != PFM_CTX_TERMINATED) {
4169 4170 4171 4172 4173 4174
		DPRINT(("[%d] cannot load to [%d], invalid ctx_state=%d\n",
			current->pid,
			req->load_pid,
			ctx->ctx_state));
		return -EINVAL;
	}
4175

4176
	DPRINT(("load_pid [%d]\n", req->load_pid));
4177

4178 4179 4180 4181 4182 4183 4184 4185 4186
	if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
		DPRINT(("cannot use blocking mode on self for [%d]\n", current->pid));
		return -EINVAL;
	}

	ret = pfm_get_task(ctx, req->load_pid, &task);
	if (ret) {
		DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret));
		return ret;
4187
	}
Linus Torvalds's avatar
Linus Torvalds committed
4188

4189 4190
	ret = -EINVAL;

David Mosberger's avatar
David Mosberger committed
4191
	/*
4192
	 * system wide is self monitoring only
David Mosberger's avatar
David Mosberger committed
4193
	 */
4194
	if (is_system && task != current) {
4195 4196 4197 4198
		DPRINT(("system wide is self monitoring only current=%d load_pid=%d\n",
			current->pid,
			req->load_pid));
		goto error;
4199
	}
Linus Torvalds's avatar
Linus Torvalds committed
4200

4201
	thread = &task->thread;
Linus Torvalds's avatar
Linus Torvalds committed
4202

4203
	ret = -EBUSY;
Linus Torvalds's avatar
Linus Torvalds committed
4204

David Mosberger's avatar
David Mosberger committed
4205
	/*
4206 4207
	 * cannot load a context which is using range restrictions,
	 * into a task that is being debugged.
David Mosberger's avatar
David Mosberger committed
4208
	 */
4209 4210 4211 4212
	if (ctx->ctx_fl_using_dbreg && (thread->flags & IA64_THREAD_DBG_VALID)) {
		DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
		goto error;
	}
Linus Torvalds's avatar
Linus Torvalds committed
4213

David Mosberger's avatar
David Mosberger committed
4214
	/*
4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227
	 * SMP system-wide monitoring implies self-monitoring.
	 *
	 * The programming model expects the task to
	 * be pinned on a CPU throughout the session.
	 * Here we take note of the current CPU at the
	 * time the context is loaded. No call from
	 * another CPU will be allowed.
	 *
	 * The pinning via shed_setaffinity()
	 * must be done by the calling task prior
	 * to this call.
	 *
	 * systemwide: keep track of CPU this session is supposed to run on
David Mosberger's avatar
David Mosberger committed
4228
	 */
4229
	the_cpu = ctx->ctx_cpu = smp_processor_id();
Linus Torvalds's avatar
Linus Torvalds committed
4230

David Mosberger's avatar
David Mosberger committed
4231
	/*
4232
	 * now reserve the session
David Mosberger's avatar
David Mosberger committed
4233
	 */
4234
	ret = pfm_reserve_session(current, is_system, the_cpu);
4235
	if (ret) goto error;
Linus Torvalds's avatar
Linus Torvalds committed
4236

4237
	ret = -EBUSY;
David Mosberger's avatar
David Mosberger committed
4238
	/*
4239
	 * task is necessarily stopped at this point.
David Mosberger's avatar
David Mosberger committed
4240
	 *
4241 4242 4243 4244 4245
	 * If the previous context was zombie, then it got removed in
	 * pfm_save_regs(). Therefore we should not see it here.
	 * If we see a context, then this is an active context
	 *
	 * XXX: needs to be atomic
David Mosberger's avatar
David Mosberger committed
4246
	 */
4247 4248 4249 4250
	DPRINT(("[%d] before cmpxchg() old_ctx=%p new_ctx=%p\n",
		current->pid, 
		thread->pfm_context, ctx));

4251
	old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
4252 4253 4254
	if (old != NULL) {
		DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
		goto error_unres;
David Mosberger's avatar
David Mosberger committed
4255
	}
Linus Torvalds's avatar
Linus Torvalds committed
4256

4257 4258
	pfm_reset_msgq(ctx);

4259
	ctx->ctx_state = PFM_CTX_LOADED;
4260 4261 4262 4263

	/*
	 * link context to task
	 */
4264
	ctx->ctx_task = task;
Linus Torvalds's avatar
Linus Torvalds committed
4265

4266
	if (is_system) {
David Mosberger's avatar
David Mosberger committed
4267
		/*
4268 4269 4270 4271 4272 4273 4274 4275
		 * we load as stopped
		 */
		PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);

		if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
	} else {
		thread->flags |= IA64_THREAD_PM_VALID;
David Mosberger's avatar
David Mosberger committed
4276
	}
Linus Torvalds's avatar
Linus Torvalds committed
4277

4278 4279 4280 4281 4282 4283 4284 4285
	/*
	 * propagate into thread-state
	 */
	pfm_copy_pmds(task, ctx);
	pfm_copy_pmcs(task, ctx);

	pmcs_source = thread->pmcs;
	pmds_source = thread->pmds;
Linus Torvalds's avatar
Linus Torvalds committed
4286

David Mosberger's avatar
David Mosberger committed
4287
	/*
4288
	 * always the case for system-wide
David Mosberger's avatar
David Mosberger committed
4289
	 */
4290
	if (task == current) {
Linus Torvalds's avatar
Linus Torvalds committed
4291

4292
		if (is_system == 0) {
Linus Torvalds's avatar
Linus Torvalds committed
4293

4294 4295 4296
			/* allow user level control */
			ia64_psr(regs)->sp = 0;
			DPRINT(("clearing psr.sp for [%d]\n", task->pid));
Linus Torvalds's avatar
Linus Torvalds committed
4297

4298 4299 4300 4301 4302 4303 4304 4305 4306 4307
			SET_LAST_CPU(ctx, smp_processor_id());
			INC_ACTIVATION();
			SET_ACTIVATION(ctx);
#ifndef CONFIG_SMP
			/*
			 * push the other task out, if any
			 */
			owner_task = GET_PMU_OWNER();
			if (owner_task) pfm_lazy_save_regs(owner_task);
#endif
David Mosberger's avatar
David Mosberger committed
4308 4309
		}
		/*
4310 4311
		 * load all PMD from ctx to PMU (as opposed to thread state)
		 * restore all PMC from ctx to PMU
David Mosberger's avatar
David Mosberger committed
4312
		 */
4313 4314
		pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]);
		pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]);
Linus Torvalds's avatar
Linus Torvalds committed
4315

4316 4317
		ctx->ctx_reload_pmcs[0] = 0UL;
		ctx->ctx_reload_pmds[0] = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
4318

David Mosberger's avatar
David Mosberger committed
4319
		/*
4320
		 * guaranteed safe by earlier check against DBG_VALID
David Mosberger's avatar
David Mosberger committed
4321
		 */
4322 4323 4324
		if (ctx->ctx_fl_using_dbreg) {
			pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf.num_ibrs);
			pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf.num_dbrs);
David Mosberger's avatar
David Mosberger committed
4325 4326
		}
		/*
4327
		 * set new ownership
David Mosberger's avatar
David Mosberger committed
4328
		 */
4329
		SET_PMU_OWNER(task, ctx);
David Mosberger's avatar
David Mosberger committed
4330

4331 4332 4333 4334 4335 4336
		DPRINT(("context loaded on PMU for [%d]\n", task->pid));
	} else {
		/*
		 * when not current, task MUST be stopped, so this is safe
		 */
		regs = ia64_task_regs(task);
David Mosberger's avatar
David Mosberger committed
4337

4338 4339 4340
		/* force a full reload */
		ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
		SET_LAST_CPU(ctx, -1);
David Mosberger's avatar
David Mosberger committed
4341

4342
		/* initial saved psr (stopped) */
David Mosberger's avatar
David Mosberger committed
4343
		ctx->ctx_saved_psr_up = 0UL;
4344
		ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
David Mosberger's avatar
David Mosberger committed
4345

4346 4347 4348 4349 4350 4351 4352
		if (ctx->ctx_fl_unsecure) {
			ia64_psr(regs)->sp = 0;
			DPRINT(("context unsecured for [%d]\n", task->pid));
		}
	}

	ret = 0;
David Mosberger's avatar
David Mosberger committed
4353

4354 4355 4356 4357 4358 4359
error_unres:
	if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
error:
	/*
	 * release task, there is now a link with the context
	 */
4360
	if (is_system == 0 && task != current) {
David Mosberger's avatar
David Mosberger committed
4361
		pfm_put_task(task);
David Mosberger's avatar
David Mosberger committed
4362

David Mosberger's avatar
David Mosberger committed
4363 4364 4365
		if (ret == 0) {
			ret = pfm_check_task_exist(ctx);
			if (ret) {
4366 4367
				ctx->ctx_state = PFM_CTX_UNLOADED;
				ctx->ctx_task  = NULL;
David Mosberger's avatar
David Mosberger committed
4368 4369 4370
			}
		}
	}
4371
	return ret;
David Mosberger's avatar
David Mosberger committed
4372 4373
}

4374 4375 4376 4377 4378 4379 4380 4381 4382
/*
 * in this function, we do not need to increase the use count
 * for the task via get_task_struct(), because we hold the
 * context lock. If the task were to disappear while having
 * a context attached, it would go through pfm_exit_thread()
 * which also grabs the context lock  and would therefore be blocked
 * until we are here.
 */
static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx);
David Mosberger's avatar
David Mosberger committed
4383 4384

static int
4385
pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
David Mosberger's avatar
David Mosberger committed
4386
{
David Mosberger's avatar
David Mosberger committed
4387
	struct task_struct *task = PFM_CTX_TASK(ctx);
4388
	struct pt_regs *tregs;
4389
	int state, is_system;
David Mosberger's avatar
David Mosberger committed
4390

4391
	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
David Mosberger's avatar
David Mosberger committed
4392

4393 4394 4395
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;

4396 4397
	/*
	 * unload only when necessary
David Mosberger's avatar
David Mosberger committed
4398
	 */
4399
	if (state == PFM_CTX_TERMINATED || state == PFM_CTX_UNLOADED) {
4400 4401 4402
		DPRINT(("[%d] ctx_state=%d, nothing to do\n", current->pid, ctx->ctx_state));
		return 0;
	}
David Mosberger's avatar
David Mosberger committed
4403

4404 4405 4406 4407
	/*
	 * clear psr and dcr bits
	 */
	pfm_stop(ctx, NULL, 0, regs);
4408

4409
	ctx->ctx_state = state = PFM_CTX_UNLOADED;
4410

4411 4412 4413 4414 4415
	/*
	 * in system mode, we need to update the PMU directly
	 * and the user level state of the caller, which may not
	 * necessarily be the creator of the context.
	 */
4416
	if (is_system) {
Linus Torvalds's avatar
Linus Torvalds committed
4417

4418 4419 4420 4421 4422 4423 4424
		/*
		 * Update cpuinfo
		 *
		 * local PMU is taken care of in pfm_stop()
		 */
		PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
		PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
4425

4426 4427 4428 4429 4430
		/*
		 * save PMDs in context
		 * release ownership
		 */
		pfm_flush_pmds(current, ctx);
4431

4432 4433 4434 4435 4436
		/*
		 * at this point we are done with the PMU
		 * so we can unreserve the resource.
		 */
		pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
4437

4438 4439 4440 4441 4442 4443 4444 4445
		/*
		 * disconnect context from task
		 */
		task->thread.pfm_context = NULL;
		/*
		 * disconnect task from context
		 */
		ctx->ctx_task = NULL;
4446

4447 4448 4449 4450
		/*
		 * There is nothing more to cleanup here.
		 */
		return 0;
David Mosberger's avatar
David Mosberger committed
4451 4452
	}

4453 4454 4455 4456
	/*
	 * per-task mode
	 */
	tregs = task == current ? regs : ia64_task_regs(task);
David Mosberger's avatar
David Mosberger committed
4457

4458 4459 4460 4461 4462
	if (task == current || ctx->ctx_fl_unsecure) {
		/*
		 * cancel user level control
		 */
		ia64_psr(regs)->sp = 1;
4463

David Mosberger's avatar
David Mosberger committed
4464
		DPRINT(("setting psr.sp for [%d]\n", task->pid));
4465 4466 4467 4468 4469 4470
	}
	/*
	 * save PMDs to context
	 * release ownership
	 */
	pfm_flush_pmds(task, ctx);
David Mosberger's avatar
David Mosberger committed
4471

4472 4473 4474 4475 4476
	/*
	 * at this point we are done with the PMU
	 * so we can unreserve the resource.
	 */
	pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu);
4477

4478 4479 4480 4481 4482
	/*
	 * reset activation counter and psr
	 */
	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
	SET_LAST_CPU(ctx, -1);
David Mosberger's avatar
David Mosberger committed
4483

4484 4485 4486 4487
	/*
	 * PMU state will not be restored
	 */
	task->thread.flags &= ~IA64_THREAD_PM_VALID;
David Mosberger's avatar
David Mosberger committed
4488

4489 4490 4491 4492 4493
	/*
	 * break links between context and task
	 */
	task->thread.pfm_context  = NULL;
	ctx->ctx_task             = NULL;
David Mosberger's avatar
David Mosberger committed
4494

4495 4496
	PFM_SET_WORK_PENDING(task, 0);
	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
David Mosberger's avatar
David Mosberger committed
4497

4498
	DPRINT(("disconnected [%d] from context\n", task->pid));
David Mosberger's avatar
David Mosberger committed
4499

4500 4501
	return 0;
}
4502

4503 4504 4505 4506
static void
pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
{
	struct task_struct *task = ctx->ctx_task;
4507

4508 4509
	ia64_psr(regs)->up = 0;
	ia64_psr(regs)->sp = 1;
4510

4511 4512 4513
	if (GET_PMU_OWNER() == task) {
		DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
		SET_PMU_OWNER(NULL, NULL);
David Mosberger's avatar
David Mosberger committed
4514 4515
	}

4516 4517 4518 4519
	/*
	 * disconnect the task from the context and vice-versa
	 */
	PFM_SET_WORK_PENDING(task, 0);
David Mosberger's avatar
David Mosberger committed
4520

4521 4522
	task->thread.pfm_context  = NULL;
	task->thread.flags       &= ~IA64_THREAD_PM_VALID;
4523

4524
	DPRINT(("context <%d> force cleanup for [%d] by [%d]\n", ctx->ctx_fd, task->pid, current->pid));
David Mosberger's avatar
David Mosberger committed
4525 4526
}

4527 4528 4529 4530 4531 4532

/*
 * called only from exit_thread(): task == current
 */
void
pfm_exit_thread(struct task_struct *task)
4533
{
4534 4535 4536
	pfm_context_t *ctx;
	unsigned long flags;
	struct pt_regs *regs = ia64_task_regs(task);
David Mosberger's avatar
David Mosberger committed
4537
	int ret, state;
4538
	int free_ok = 0;
4539

4540
	ctx = PFM_GET_CTX(task);
4541

4542
	PROTECT_CTX(ctx, flags);
4543

4544
	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
4545

David Mosberger's avatar
David Mosberger committed
4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559
	state = ctx->ctx_state;
	switch(state) {
		case PFM_CTX_UNLOADED:
			/*
	 		 * come here only if attached
	 		 */
			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
			break;
		case PFM_CTX_LOADED:
		case PFM_CTX_MASKED:
			ret = pfm_context_unload(ctx, NULL, 0, regs);
			if (ret) {
				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, ctx->ctx_state, ret);
			}
4560
			ctx->ctx_state = PFM_CTX_TERMINATED;
David Mosberger's avatar
David Mosberger committed
4561
			DPRINT(("ctx terminated by [%d]\n", task->pid));
4562

David Mosberger's avatar
David Mosberger committed
4563 4564 4565 4566
			pfm_end_notify_user(ctx);
			break;
		case PFM_CTX_ZOMBIE:
			pfm_clear_psr_up();
4567

David Mosberger's avatar
David Mosberger committed
4568
			BUG_ON(ctx->ctx_smpl_hdr);
4569

David Mosberger's avatar
David Mosberger committed
4570
			pfm_force_cleanup(ctx, regs);
4571

David Mosberger's avatar
David Mosberger committed
4572 4573 4574 4575 4576
			free_ok = 1;
			break;
		default:
			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
			break;
4577
	}
4578 4579
	{ u64 psr = pfm_get_psr();
	  BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
David Mosberger's avatar
David Mosberger committed
4580
	  BUG_ON(GET_PMU_OWNER());
4581 4582
	}
	UNPROTECT_CTX(ctx, flags);
4583

4584 4585 4586 4587 4588
	/*
	 * All memory free operations (especially for vmalloc'ed memory)
	 * MUST be done with interrupts ENABLED.
	 */
	if (free_ok) pfm_context_free(ctx);
4589 4590
}

Linus Torvalds's avatar
Linus Torvalds committed
4591
/*
David Mosberger's avatar
David Mosberger committed
4592
 * functions MUST be listed in the increasing order of their index (see permfon.h)
Linus Torvalds's avatar
Linus Torvalds committed
4593
 */
4594 4595 4596 4597 4598 4599
#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz }
#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL }
#define PFM_CMD_PCLRWS	(PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP)
#define PFM_CMD_PCLRW	(PFM_CMD_FD|PFM_CMD_ARG_RW)
#define PFM_CMD_NONE	{ NULL, "no-cmd", 0, 0, 0, NULL}

David Mosberger's avatar
David Mosberger committed
4600
static pfm_cmd_desc_t pfm_cmd_tab[]={
4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634
/* 0  */PFM_CMD_NONE,
/* 1  */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
/* 2  */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
/* 3  */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
/* 4  */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS),
/* 5  */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS),
/* 6  */PFM_CMD_NONE,
/* 7  */PFM_CMD_NONE,
/* 8  */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize),
/* 9  */PFM_CMD_NONE,
/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW),
/* 11 */PFM_CMD_NONE,
/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL),
/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL),
/* 14 */PFM_CMD_NONE,
/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL),
/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS),
/* 18 */PFM_CMD_NONE,
/* 19 */PFM_CMD_NONE,
/* 20 */PFM_CMD_NONE,
/* 21 */PFM_CMD_NONE,
/* 22 */PFM_CMD_NONE,
/* 23 */PFM_CMD_NONE,
/* 24 */PFM_CMD_NONE,
/* 25 */PFM_CMD_NONE,
/* 26 */PFM_CMD_NONE,
/* 27 */PFM_CMD_NONE,
/* 28 */PFM_CMD_NONE,
/* 29 */PFM_CMD_NONE,
/* 30 */PFM_CMD_NONE,
/* 31 */PFM_CMD_NONE,
/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL),
/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL)
David Mosberger's avatar
David Mosberger committed
4635
};
4636
#define PFM_CMD_COUNT	(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
David Mosberger's avatar
David Mosberger committed
4637

Linus Torvalds's avatar
Linus Torvalds committed
4638
static int
4639
pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
Linus Torvalds's avatar
Linus Torvalds committed
4640
{
4641
	struct task_struct *task;
4642 4643 4644
	int state;

	state = ctx->ctx_state;
4645 4646 4647

	task = PFM_CTX_TASK(ctx);
	if (task == NULL) {
4648
		DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
4649
		return 0;
David Mosberger's avatar
David Mosberger committed
4650
	}
4651 4652 4653

	DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
				ctx->ctx_fd,
4654
				state,
4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669
				task->pid,
				task->state, PFM_CMD_STOPPED(cmd)));

	/*
	 * self-monitoring always ok.
	 *
	 * for system-wide the caller can either be the creator of the
	 * context (to one to which the context is attached to) OR
	 * a task running on the same CPU as the session.
	 */
	if (task == current || ctx->ctx_fl_system) return 0;

	/*
	 * context is UNLOADED, MASKED, TERMINATED we are safe to go
	 */
4670
	if (state != PFM_CTX_LOADED == 0) return 0;
4671

4672
	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691

	/*
	 * context is loaded, we must make sure the task is stopped
	 * We could lift this restriction for UP but it would mean that
	 * the user has no guarantee the task would not run between
	 * two successive calls to perfmonctl(). That's probably OK.
	 * If this user wants to ensure the task does not run, then
	 * the task must be stopped.
	 */
	if (PFM_CMD_STOPPED(cmd) && task->state != TASK_STOPPED) {
		DPRINT(("[%d] task not in stopped state\n", task->pid));
		return -EBUSY;
	}

	UNPROTECT_CTX(ctx, flags);

	pfm_wait_task_inactive(task);

	PROTECT_CTX(ctx, flags);
4692

4693
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
4694 4695
}

4696 4697 4698
/*
 * system-call entry point (must return long)
 */
4699
asmlinkage long
4700
sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
David Mosberger's avatar
David Mosberger committed
4701
		long arg8, long stack)
Linus Torvalds's avatar
Linus Torvalds committed
4702
{
David Mosberger's avatar
David Mosberger committed
4703
	struct pt_regs *regs = (struct pt_regs *)&stack;
4704 4705 4706 4707 4708 4709 4710 4711
	struct file *file = NULL;
	pfm_context_t *ctx = NULL;
	unsigned long flags = 0UL;
	void *args_k = NULL;
	long ret; /* will expand int return types */
	size_t base_sz, sz, xtra_sz = 0;
	int narg, completed_args = 0, call_made = 0;
#define PFM_MAX_ARGSIZE	4096
Linus Torvalds's avatar
Linus Torvalds committed
4712

4713
	/*
David Mosberger's avatar
David Mosberger committed
4714
	 * reject any call if perfmon was disabled at initialization time
David Mosberger's avatar
David Mosberger committed
4715
	 mask*/
David Mosberger's avatar
David Mosberger committed
4716
	if (PFM_IS_DISABLED()) return -ENOSYS;
Linus Torvalds's avatar
Linus Torvalds committed
4717

4718 4719 4720 4721
	if (unlikely(PFM_CMD_IS_VALID(cmd) == 0)) {
		DPRINT(("[%d] invalid cmd=%d\n", current->pid, cmd));
		return -EINVAL;
	}
Linus Torvalds's avatar
Linus Torvalds committed
4722

4723 4724 4725 4726 4727 4728
	DPRINT(("cmd=%s idx=%d valid=%d narg=0x%x argsz=%lu count=%d\n",
		PFM_CMD_NAME(cmd),
		PFM_CMD_IDX(cmd),
		PFM_CMD_IS_VALID(cmd),
		PFM_CMD_NARG(cmd),
		PFM_CMD_ARG_SIZE(cmd), count));
Linus Torvalds's avatar
Linus Torvalds committed
4729

4730 4731 4732
	/*
	 * check if number of arguments matches what the command expects
	 */
David Mosberger's avatar
David Mosberger committed
4733
	narg = PFM_CMD_NARG(cmd);
4734 4735
	if ((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))
		return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
4736

4737 4738
	/* get single argument size */
	base_sz = PFM_CMD_ARG_SIZE(cmd);
Linus Torvalds's avatar
Linus Torvalds committed
4739

4740 4741 4742 4743 4744 4745 4746 4747 4748
restart_args:
	sz = xtra_sz + base_sz*count;
	/*
	 * limit abuse to min page size
	 */
	if (unlikely(sz > PFM_MAX_ARGSIZE)) {
		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
		return -E2BIG;
	}
Linus Torvalds's avatar
Linus Torvalds committed
4749

4750 4751 4752 4753 4754 4755 4756
	/*
	 * allocate default-sized argument buffer
	 */
	if (count && args_k == NULL) {
		args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
		if (args_k == NULL) return -ENOMEM;
	}
David Mosberger's avatar
David Mosberger committed
4757

4758
	ret = -EFAULT;
David Mosberger's avatar
David Mosberger committed
4759

4760 4761 4762 4763 4764 4765 4766 4767 4768
	/*
	 * copy arguments
	 *
	 * assume sz = 0 for command without parameters
	 */
	if (sz && copy_from_user(args_k, arg, sz)) {
		DPRINT(("[%d] cannot copy_from_user %lu bytes @%p\n", current->pid, sz, arg));
		goto error_args;
	}
David Mosberger's avatar
David Mosberger committed
4769

4770 4771 4772 4773 4774 4775 4776 4777 4778
	/*
	 * check if command supports extra parameters
	 */
	if (completed_args == 0 && PFM_CMD_GETSIZE(cmd)) {
		/*
		 * get extra parameters size (based on main argument)
		 */
		ret = PFM_CMD_GETSIZE(cmd)(args_k, &xtra_sz);
		if (ret) goto error_args;
David Mosberger's avatar
David Mosberger committed
4779

4780
		completed_args = 1;
David Mosberger's avatar
David Mosberger committed
4781

4782
		DPRINT(("[%d] restart_args sz=%lu xtra_sz=%lu\n", current->pid, sz, xtra_sz));
David Mosberger's avatar
David Mosberger committed
4783

4784 4785 4786
		/* retry if necessary */
		if (xtra_sz) goto restart_args;
	}
4787

4788
	if (PFM_CMD_USE_FD(cmd))  {
4789

4790
		ret = -EBADF;
David Mosberger's avatar
David Mosberger committed
4791

4792 4793 4794 4795 4796 4797 4798 4799 4800
		file = fget(fd);
		if (file == NULL) {
			DPRINT(("[%d] invalid fd %d\n", current->pid, fd));
			goto error_args;
		}
		if (PFM_IS_FILE(file) == 0) {
			DPRINT(("[%d] fd %d not related to perfmon\n", current->pid, fd));
			goto error_args;
		}
David Mosberger's avatar
David Mosberger committed
4801 4802


4803 4804 4805 4806
		ctx = (pfm_context_t *)file->private_data;
		if (ctx == NULL) {
			DPRINT(("[%d] no context for fd %d\n", current->pid, fd));
			goto error_args;
Linus Torvalds's avatar
Linus Torvalds committed
4807
		}
David Mosberger's avatar
David Mosberger committed
4808

4809
		PROTECT_CTX(ctx, flags);
David Mosberger's avatar
David Mosberger committed
4810

4811 4812 4813 4814 4815 4816
		/*
		 * check task is stopped
		 */
		ret = pfm_check_task_state(ctx, cmd, flags);
		if (ret) goto abort_locked;
	}
Linus Torvalds's avatar
Linus Torvalds committed
4817

4818
	ret = (*pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func)(ctx, args_k, count, regs);
Linus Torvalds's avatar
Linus Torvalds committed
4819

4820
	call_made = 1;
4821

4822 4823 4824 4825 4826
abort_locked:
	if (ctx) {
		DPRINT(("[%d] context unlocked\n", current->pid));
		UNPROTECT_CTX(ctx, flags);
		fput(file);
4827 4828
	}

4829 4830
	/* copy argument back to user, if needed */
	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
4831

4832 4833
error_args:
	if (args_k) kfree(args_k);
4834

David Mosberger's avatar
David Mosberger committed
4835 4836
	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));

4837 4838
	return ret;
}
4839

4840 4841 4842 4843 4844 4845
static void
pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs)
{
	pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
	pfm_ovfl_ctrl_t rst_ctrl;
	int ret = 0;
Linus Torvalds's avatar
Linus Torvalds committed
4846

4847 4848 4849 4850 4851
	/*
	 * Unlock sampling buffer and reset index atomically
	 * XXX: not really needed when blocking
	 */
	if (CTX_HAS_SMPL(ctx)) {
4852

David Mosberger's avatar
David Mosberger committed
4853 4854
		rst_ctrl.bits.mask_monitoring = 0;
		rst_ctrl.bits.reset_ovfl_pmds = 1;
4855

4856 4857 4858 4859
		/* XXX: check return value */
		if (fmt->fmt_restart)
			ret = (*fmt->fmt_restart)(current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
	} else {
David Mosberger's avatar
David Mosberger committed
4860 4861
		rst_ctrl.bits.mask_monitoring = 0;
		rst_ctrl.bits.reset_ovfl_pmds = 1;
4862
	}
4863

4864
	if (ret == 0) {
David Mosberger's avatar
David Mosberger committed
4865 4866 4867 4868
		if (rst_ctrl.bits.reset_ovfl_pmds) {
			pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET);
		}
		if (rst_ctrl.bits.mask_monitoring == 0) {
4869
			DPRINT(("resuming monitoring\n"));
4870
			if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current);
4871 4872 4873
		} else {
			DPRINT(("stopping monitoring\n"));
			//pfm_stop_monitoring(current, regs);
4874
		}
4875
		ctx->ctx_state = PFM_CTX_LOADED;
4876 4877
	}
}
4878

4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889

/*
 * context MUST BE LOCKED when calling
 * can only be called for current
 */
static void
pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
{
	if (ctx->ctx_fl_system) {
		printk(KERN_ERR "perfmon: pfm_context_force_terminate [%d] is system-wide\n", current->pid);
		return;
4890 4891
	}
	/*
4892 4893 4894 4895 4896 4897
	 * we stop the whole thing, we do no need to flush
	 * we know we WERE masked
	 */
	pfm_clear_psr_up();
	ia64_psr(regs)->up = 0;
	ia64_psr(regs)->sp = 1;
4898

4899 4900
	/*
	 * disconnect the task from the context and vice-versa
4901
	 */
4902 4903 4904 4905 4906 4907 4908
	current->thread.pfm_context  = NULL;
	current->thread.flags       &= ~IA64_THREAD_PM_VALID;
	ctx->ctx_task = NULL;

	/*
	 * switch to terminated state
	 */
4909
	ctx->ctx_state = PFM_CTX_TERMINATED;
4910 4911 4912 4913 4914 4915 4916

	DPRINT(("context <%d> terminated for [%d]\n", ctx->ctx_fd, current->pid));

	/*
	 * and wakeup controlling task, indicating we are now disconnected
	 */
	wake_up_interruptible(&ctx->ctx_zombieq);
4917 4918

	/*
4919 4920 4921
	 * given that context is still locked, the controlling
	 * task will only get access when we return from
	 * pfm_handle_work().
4922
	 */
Linus Torvalds's avatar
Linus Torvalds committed
4923 4924
}

4925 4926
static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);

David Mosberger's avatar
David Mosberger committed
4927
void
4928
pfm_handle_work(void)
Linus Torvalds's avatar
Linus Torvalds committed
4929
{
4930 4931 4932 4933
	pfm_context_t *ctx;
	struct pt_regs *regs;
	unsigned long flags;
	unsigned long ovfl_regs;
4934
	unsigned int reason;
Linus Torvalds's avatar
Linus Torvalds committed
4935 4936
	int ret;

4937 4938
	ctx = PFM_GET_CTX(current);
	if (ctx == NULL) {
4939
		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
Linus Torvalds's avatar
Linus Torvalds committed
4940
		return;
Linus Torvalds's avatar
Linus Torvalds committed
4941
	}
4942 4943 4944 4945 4946 4947 4948 4949 4950

	PROTECT_CTX(ctx, flags);

	PFM_SET_WORK_PENDING(current, 0);

	pfm_clear_task_notify();

	regs = ia64_task_regs(current);

4951 4952 4953 4954 4955 4956
	/*
	 * extract reason for being here and clear
	 */
	reason = ctx->ctx_fl_trap_reason;
	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;

4957
	DPRINT(("[%d] reason=%d\n", current->pid, reason));
4958 4959

	/*
4960
	 * must be done before we check non-blocking mode
4961
	 */
4962
	if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
4963

4964
	ovfl_regs = ctx->ctx_ovfl_regs[0];
Linus Torvalds's avatar
Linus Torvalds committed
4965

4966 4967 4968 4969
	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
	if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;

	UNPROTECT_CTX(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
4970

4971
	DPRINT(("before block sleeping\n"));
Linus Torvalds's avatar
Linus Torvalds committed
4972 4973 4974 4975 4976 4977 4978

	/*
	 * may go through without blocking on SMP systems
	 * if restart has been received already by the time we call down()
	 */
	ret = down_interruptible(&ctx->ctx_restart_sem);

4979
	DPRINT(("after block sleeping ret=%d\n", ret));
Linus Torvalds's avatar
Linus Torvalds committed
4980

4981 4982 4983 4984 4985 4986 4987 4988
	PROTECT_CTX(ctx, flags);

	if (ctx->ctx_fl_going_zombie) {
do_zombie:
		DPRINT(("context is zombie, bailing out\n"));
		pfm_context_force_terminate(ctx, regs);
		goto nothing_to_do;
	}
Linus Torvalds's avatar
Linus Torvalds committed
4989 4990 4991
	/*
	 * in case of interruption of down() we don't restart anything
	 */
4992
	if (ret < 0) goto nothing_to_do;
Linus Torvalds's avatar
Linus Torvalds committed
4993

4994 4995 4996
skip_blocking:
	pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
	ctx->ctx_ovfl_regs[0] = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
4997

4998
nothing_to_do:
Linus Torvalds's avatar
Linus Torvalds committed
4999

5000
	UNPROTECT_CTX(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
5001 5002
}

David Mosberger's avatar
David Mosberger committed
5003
static int
5004
pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg)
David Mosberger's avatar
David Mosberger committed
5005
{
5006
	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
5007 5008 5009
		DPRINT(("ignoring overflow notification, owner is zombie\n"));
		return 0;
	}
David Mosberger's avatar
David Mosberger committed
5010

5011
	DPRINT(("[%d] waking up somebody\n", current->pid));
David Mosberger's avatar
David Mosberger committed
5012

5013
	if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait);
David Mosberger's avatar
David Mosberger committed
5014 5015

	/*
5016 5017 5018 5019 5020 5021 5022
	 * safe, we are not in intr handler, nor in ctxsw when
	 * we come here
	 */
	kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN);

	return 0;
}
David Mosberger's avatar
David Mosberger committed
5023

5024 5025 5026 5027
static int
pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
{
	pfm_msg_t *msg = NULL;
David Mosberger's avatar
David Mosberger committed
5028

5029 5030 5031 5032 5033 5034
	if (ctx->ctx_fl_no_msg == 0) {
		msg = pfm_get_new_msg(ctx);
		if (msg == NULL) {
			printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n");
			return -1;
		}
David Mosberger's avatar
David Mosberger committed
5035

5036 5037 5038 5039
		msg->pfm_ovfl_msg.msg_type         = PFM_MSG_OVFL;
		msg->pfm_ovfl_msg.msg_ctx_fd       = ctx->ctx_fd;
		msg->pfm_ovfl_msg.msg_active_set   = 0;
		msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds;
David Mosberger's avatar
David Mosberger committed
5040 5041 5042 5043
		msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
		msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
		msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
		msg->pfm_ovfl_msg.msg_tstamp       = ia64_get_itc(); /* relevant on UP only */
5044
	}
David Mosberger's avatar
David Mosberger committed
5045

5046 5047 5048 5049 5050 5051
	DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d pid=%d ovfl_pmds=0x%lx\n",
		msg,
		ctx->ctx_fl_no_msg,
		ctx->ctx_fd,
		current->pid,
		ovfl_pmds));
David Mosberger's avatar
David Mosberger committed
5052

5053 5054
	return pfm_notify_user(ctx, msg);
}
David Mosberger's avatar
David Mosberger committed
5055

5056 5057 5058 5059
static int
pfm_end_notify_user(pfm_context_t *ctx)
{
	pfm_msg_t *msg;
David Mosberger's avatar
David Mosberger committed
5060

5061 5062 5063 5064
	msg = pfm_get_new_msg(ctx);
	if (msg == NULL) {
		printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
		return -1;
David Mosberger's avatar
David Mosberger committed
5065
	}
5066

5067 5068 5069
	msg->pfm_end_msg.msg_type    = PFM_MSG_END;
	msg->pfm_end_msg.msg_ctx_fd  = ctx->ctx_fd;
	msg->pfm_ovfl_msg.msg_tstamp = ia64_get_itc(); /* relevant on UP only */
David Mosberger's avatar
David Mosberger committed
5070

5071 5072 5073 5074 5075 5076
	DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d pid=%d\n",
		msg,
		ctx->ctx_fl_no_msg,
		ctx->ctx_fd, current->pid));

	return pfm_notify_user(ctx, msg);
David Mosberger's avatar
David Mosberger committed
5077 5078
}

Linus Torvalds's avatar
Linus Torvalds committed
5079 5080
/*
 * main overflow processing routine.
5081
 * it can be called from the interrupt path or explicitely during the context switch code
Linus Torvalds's avatar
Linus Torvalds committed
5082
 */
5083 5084
static void
pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
5085
{
5086
	pfm_ovfl_arg_t ovfl_arg;
5087
	unsigned long mask;
David Mosberger's avatar
David Mosberger committed
5088
	unsigned long old_val;
5089
	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL;
David Mosberger's avatar
David Mosberger committed
5090
	unsigned long tstamp;
5091
	pfm_ovfl_ctrl_t	ovfl_ctrl;
David Mosberger's avatar
David Mosberger committed
5092
	unsigned int i, has_smpl;
5093
	int must_notify = 0;
Linus Torvalds's avatar
Linus Torvalds committed
5094

5095
	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring;
5096

Linus Torvalds's avatar
Linus Torvalds committed
5097 5098 5099
	/*
	 * sanity test. Should never happen
	 */
5100
	if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
Linus Torvalds's avatar
Linus Torvalds committed
5101

David Mosberger's avatar
David Mosberger committed
5102 5103
	tstamp = ia64_get_itc();

Linus Torvalds's avatar
Linus Torvalds committed
5104 5105
	mask = pmc0 >> PMU_FIRST_COUNTER;

David Mosberger's avatar
David Mosberger committed
5106
	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
5107 5108 5109 5110
		     "used_pmds=0x%lx reload_pmcs=0x%lx\n",
			pmc0,
			task ? task->pid: -1,
			(regs ? regs->cr_iip : 0),
David Mosberger's avatar
David Mosberger committed
5111 5112 5113
			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
			ctx->ctx_used_pmds[0],
			ctx->ctx_reload_pmcs[0]));
Linus Torvalds's avatar
Linus Torvalds committed
5114

5115 5116
	has_smpl = CTX_HAS_SMPL(ctx);

Linus Torvalds's avatar
Linus Torvalds committed
5117
	/*
5118 5119
	 * first we update the virtual counters
	 * assume there was a prior ia64_srlz_d() issued
Linus Torvalds's avatar
Linus Torvalds committed
5120
	 */
David Mosberger's avatar
David Mosberger committed
5121 5122 5123 5124
	for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {

		/* skip pmd which did not overflow */
		if ((mask & 0x1) == 0) continue;
Linus Torvalds's avatar
Linus Torvalds committed
5125

5126
		DPRINT_ovfl(("pmd[%d] overflowed hw_pmd=0x%lx ctx_pmd=0x%lx\n",
David Mosberger's avatar
David Mosberger committed
5127
			i, ia64_get_pmd(i), ctx->ctx_pmds[i].val));
Linus Torvalds's avatar
Linus Torvalds committed
5128 5129

		/*
5130 5131 5132 5133
		 * Note that the pmd is not necessarily 0 at this point as qualified events
		 * may have happened before the PMU was frozen. The residual count is not
		 * taken into consideration here but will be with any read of the pmd via
		 * pfm_read_pmds().
Linus Torvalds's avatar
Linus Torvalds committed
5134
		 */
5135 5136
		old_val               = ctx->ctx_pmds[i].val;
		ctx->ctx_pmds[i].val += 1 + pmu_conf.ovfl_val;
David Mosberger's avatar
David Mosberger committed
5137

Linus Torvalds's avatar
Linus Torvalds committed
5138
		/*
David Mosberger's avatar
David Mosberger committed
5139
		 * check for overflow condition
Linus Torvalds's avatar
Linus Torvalds committed
5140
		 */
5141
		if (likely(old_val > ctx->ctx_pmds[i].val)) {
David Mosberger's avatar
David Mosberger committed
5142
			ovfl_pmds |= 1UL << i;
5143
			if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
Linus Torvalds's avatar
Linus Torvalds committed
5144
		}
5145

5146 5147 5148
		DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx smpl_pmds=0x%lx\n",
			i, ctx->ctx_pmds[i].val, old_val,
			ia64_get_pmd(i) & pmu_conf.ovfl_val, ovfl_pmds, ovfl_notify, smpl_pmds));
Linus Torvalds's avatar
Linus Torvalds committed
5149
	}
Linus Torvalds's avatar
Linus Torvalds committed
5150

David Mosberger's avatar
David Mosberger committed
5151 5152 5153 5154 5155 5156 5157 5158 5159
	/*
	 * there was no 64-bit overflow, nothing else to do
	 */
	if (ovfl_pmds == 0UL) return;

	/* 
	 * reset all control bits
	 */
	ovfl_ctrl.val = 0;
5160

Linus Torvalds's avatar
Linus Torvalds committed
5161
	/*
David Mosberger's avatar
David Mosberger committed
5162 5163
	 * if a sampling format module exists, then we "cache" the overflow by 
	 * calling the module's handler() routine.
Linus Torvalds's avatar
Linus Torvalds committed
5164
	 */
David Mosberger's avatar
David Mosberger committed
5165 5166
	if (has_smpl) {
		unsigned long start_cycles, end_cycles;
5167
		unsigned long pmd_mask;
David Mosberger's avatar
David Mosberger committed
5168
		int j, k, ret = 0;
5169 5170
		int this_cpu = smp_processor_id();

David Mosberger's avatar
David Mosberger committed
5171
		pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
5172 5173 5174

		prefetch(ctx->ctx_smpl_hdr);

David Mosberger's avatar
David Mosberger committed
5175
		for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) {
5176

David Mosberger's avatar
David Mosberger committed
5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199
			mask = 1UL << i;

			if ((pmd_mask & 0x1) == 0) continue;

			ovfl_arg.ovfl_pmd      = (unsigned char )i;
			ovfl_arg.ovfl_notify   = ovfl_notify & mask ? 1 : 0;
			ovfl_arg.active_set    = 0;
			ovfl_arg.ovfl_ctrl.val = 0; /* module must fill in all fields */
			ovfl_arg.smpl_pmds[0]  = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];

			ovfl_arg.pmd_value      = ctx->ctx_pmds[i].val;
			ovfl_arg.pmd_last_reset = ctx->ctx_pmds[i].lval;
			ovfl_arg.pmd_eventid    = ctx->ctx_pmds[i].eventid;

			/*
		 	 * copy values of pmds of interest. Sampling format may copy them
		 	 * into sampling buffer.
		 	 */
			if (smpl_pmds) {
				for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
					if ((smpl_pmds & 0x1) == 0) continue;
					ovfl_arg.smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ?  pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
				}
Linus Torvalds's avatar
Linus Torvalds committed
5200
			}
Linus Torvalds's avatar
Linus Torvalds committed
5201

David Mosberger's avatar
David Mosberger committed
5202
			pfm_stats[this_cpu].pfm_smpl_handler_calls++;
Linus Torvalds's avatar
Linus Torvalds committed
5203

David Mosberger's avatar
David Mosberger committed
5204
			start_cycles = ia64_get_itc();
Linus Torvalds's avatar
Linus Torvalds committed
5205

David Mosberger's avatar
David Mosberger committed
5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220
			/*
		 	 * call custom buffer format record (handler) routine
		 	 */
			ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, &ovfl_arg, regs, tstamp);

			end_cycles = ia64_get_itc();

			/*
			 * For those controls, we take the union because they have
			 * an all or nothing behavior.
			 */
			ovfl_ctrl.bits.notify_user     |= ovfl_arg.ovfl_ctrl.bits.notify_user;
			ovfl_ctrl.bits.block_task      |= ovfl_arg.ovfl_ctrl.bits.block_task;
			ovfl_ctrl.bits.mask_monitoring |= ovfl_arg.ovfl_ctrl.bits.mask_monitoring;
			ovfl_ctrl.bits.reset_ovfl_pmds |= ovfl_arg.ovfl_ctrl.bits.reset_ovfl_pmds; /* yes or no */
Linus Torvalds's avatar
Linus Torvalds committed
5221

David Mosberger's avatar
David Mosberger committed
5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240
			pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
		}
		/*
		 * when the module cannot handle the rest of the overflows, we abort right here
		 */
		if (ret && pmd_mask) {
			DPRINT(("current [%d] handler aborts leftover ovfl_pmds=0x%lx\n",
				current->pid,
				pmd_mask<<PMU_FIRST_COUNTER));
		}
	} else {
		/*
		 * when no sampling module is used, then the default
		 * is to notify on overflow if requested by user
		 */
		ovfl_ctrl.bits.notify_user     = ovfl_notify ? 1 : 0;
		ovfl_ctrl.bits.block_task      = ovfl_notify ? 1 : 0;
		ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
		ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
5241 5242
	}

David Mosberger's avatar
David Mosberger committed
5243 5244 5245 5246 5247 5248 5249
	/*
	 * if we (still) have some overflowed PMD but no notification is requested
	 * then we use the short reset period.
	 */
	if (ovfl_ctrl.bits.reset_ovfl_pmds) {
		unsigned long bm = ovfl_pmds;
		pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
Linus Torvalds's avatar
Linus Torvalds committed
5250
	}
5251

David Mosberger's avatar
David Mosberger committed
5252
	if (ovfl_notify && ovfl_ctrl.bits.notify_user) {
5253
		/*
5254
		 * keep track of what to reset when unblocking
5255
		 */
5256 5257
		ctx->ctx_ovfl_regs[0] = ovfl_pmds;

David Mosberger's avatar
David Mosberger committed
5258 5259 5260 5261
		/*
		 * check for blocking context 
		 */
		if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) {
5262 5263 5264 5265

			ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK;

			/*
David Mosberger's avatar
David Mosberger committed
5266
			 * set the perfmon specific checking pending work for the task
5267 5268 5269 5270 5271 5272 5273 5274 5275
			 */
			PFM_SET_WORK_PENDING(task, 1);

			/*
			 * when coming from ctxsw, current still points to the
			 * previous task, therefore we must work with task and not current.
			 */
			pfm_set_task_notify(task);
		}
5276
		/*
5277 5278
		 * defer until state is changed (shorten spin window). the context is locked
		 * anyway, so the signal receiver would come spin for nothing.
5279
		 */
5280
		must_notify = 1;
5281
	}
5282

David Mosberger's avatar
David Mosberger committed
5283
	DPRINT_ovfl(("current [%d] owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
5284 5285 5286 5287 5288 5289
			current->pid,
			GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
			PFM_GET_WORK_PENDING(task),
			ctx->ctx_fl_trap_reason,
			ovfl_pmds,
			ovfl_notify,
David Mosberger's avatar
David Mosberger committed
5290
			ovfl_ctrl.bits.mask_monitoring ? 1 : 0));
5291 5292 5293
	/*
	 * in case monitoring must be stopped, we toggle the psr bits
	 */
David Mosberger's avatar
David Mosberger committed
5294
	if (ovfl_ctrl.bits.mask_monitoring) {
5295
		pfm_mask_monitoring(task);
5296
		ctx->ctx_state = PFM_CTX_MASKED;
5297
	}
David Mosberger's avatar
David Mosberger committed
5298

Linus Torvalds's avatar
Linus Torvalds committed
5299
	/*
5300
	 * send notification now
Linus Torvalds's avatar
Linus Torvalds committed
5301
	 */
5302
	if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify);
Linus Torvalds's avatar
Linus Torvalds committed
5303

5304 5305 5306 5307 5308 5309 5310 5311
	return;

sanity_check:
	printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
			smp_processor_id(),
			task ? task->pid : -1,
			pmc0);
	return;
Linus Torvalds's avatar
Linus Torvalds committed
5312

5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346
stop_monitoring:
	/*
	 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs().
	 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can
	 * come here as zombie only if the task is the current task. In which case, we
	 * can access the PMU  hardware directly.
	 *
	 * Note that zombies do have PM_VALID set. So here we do the minimal.
	 *
	 * In case the context was zombified it could not be reclaimed at the time
	 * the monitoring program exited. At this point, the PMU reservation has been
	 * returned, the sampiing buffer has been freed. We must convert this call
	 * into a spurious interrupt. However, we must also avoid infinite overflows
	 * by stopping monitoring for this task. We can only come here for a per-task
	 * context. All we need to do is to stop monitoring using the psr bits which
	 * are always task private. By re-enabling secure montioring, we ensure that
	 * the monitored task will not be able to re-activate monitoring.
	 * The task will eventually be context switched out, at which point the context
	 * will be reclaimed (that includes releasing ownership of the PMU).
	 *
	 * So there might be a window of time where the number of per-task session is zero
	 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie
	 * context. This is safe because if a per-task session comes in, it will push this one
	 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide
	 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will
	 * also push our zombie context out.
	 *
	 * Overall pretty hairy stuff....
	 */
	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
	pfm_clear_psr_up();
	ia64_psr(regs)->up = 0;
	ia64_psr(regs)->sp = 1;
	return;
Linus Torvalds's avatar
Linus Torvalds committed
5347 5348
}

5349 5350
static int
pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
5351
{
David Mosberger's avatar
David Mosberger committed
5352
	struct task_struct *task;
5353
	pfm_context_t *ctx;
5354 5355 5356 5357
	unsigned long flags;
	u64 pmc0;
	int this_cpu = smp_processor_id();
	int retval = 0;
Linus Torvalds's avatar
Linus Torvalds committed
5358

5359
	pfm_stats[this_cpu].pfm_ovfl_intr_count++;
David Mosberger's avatar
David Mosberger committed
5360

5361
	/*
David Mosberger's avatar
David Mosberger committed
5362 5363
	 * srlz.d done before arriving here
	 */
5364 5365 5366 5367
	pmc0 = ia64_get_pmc(0);

	task = GET_PMU_OWNER();
	ctx  = GET_PMU_CTX();
Linus Torvalds's avatar
Linus Torvalds committed
5368 5369 5370

	/*
	 * if we have some pending bits set
5371
	 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
Linus Torvalds's avatar
Linus Torvalds committed
5372
	 */
5373 5374
	if (PMC0_HAS_OVFL(pmc0) && task) {
		/*
5375
		 * we assume that pmc0.fr is always set here
Linus Torvalds's avatar
Linus Torvalds committed
5376
		 */
David Mosberger's avatar
David Mosberger committed
5377

5378
		/* sanity check */
5379
		if (!ctx) goto report_spurious;
5380

5381 5382 5383
		if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) {
			printk("perfmon: current [%d] owner = [%d] PMVALID=0 state=%d\n", current->pid, task->pid, ctx->ctx_state);
			goto report_spurious;
5384
		}
5385 5386 5387 5388 5389 5390 5391

		PROTECT_CTX_NOPRINT(ctx, flags);

		pfm_overflow_handler(task, ctx, pmc0, regs);

		UNPROTECT_CTX_NOPRINT(ctx, flags);

Linus Torvalds's avatar
Linus Torvalds committed
5392
	} else {
5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412
		pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++;
		retval = -1;
	}
	/*
	 * keep it unfrozen at all times
	 */
	pfm_unfreeze_pmu();

	return retval;

report_spurious:
	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
		this_cpu, task->pid);
	pfm_unfreeze_pmu();
	return -1;
}

static pfm_irq_handler_t
pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
{
David Mosberger's avatar
David Mosberger committed
5413
	unsigned long start_cycles, total_cycles;
5414 5415 5416 5417 5418 5419 5420 5421
	unsigned long min, max;
	int this_cpu;
	int ret;

	this_cpu = smp_processor_id();
	min      = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
	max      = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;

David Mosberger's avatar
David Mosberger committed
5422
	start_cycles = ia64_get_itc();
5423 5424 5425

	ret = pfm_do_interrupt_handler(irq, arg, regs);

David Mosberger's avatar
David Mosberger committed
5426
	total_cycles = ia64_get_itc();
5427 5428 5429 5430

	/*
	 * don't measure spurious interrupts
	 */
David Mosberger's avatar
David Mosberger committed
5431 5432 5433 5434 5435 5436 5437
	if (likely(ret == 0)) {
		total_cycles -= start_cycles;

		if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
		if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;

		pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
Linus Torvalds's avatar
Linus Torvalds committed
5438
	}
5439
	PFM_IRQ_HANDLER_RET();
Linus Torvalds's avatar
Linus Torvalds committed
5440 5441
}

5442

Linus Torvalds's avatar
Linus Torvalds committed
5443
/* for debug only */
Linus Torvalds's avatar
Linus Torvalds committed
5444
static int
5445
pfm_proc_info(char *page)
Linus Torvalds's avatar
Linus Torvalds committed
5446 5447
{
	char *p = page;
5448 5449
	pfm_buffer_fmt_t *b;
	unsigned long psr;
Linus Torvalds's avatar
Linus Torvalds committed
5450 5451
	int i;

5452 5453 5454
		p += sprintf(p, "model                     : %s\n", pmu_conf.pmu_name);
		p += sprintf(p, "fastctxsw                 : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
		p += sprintf(p, "ovfl_mask                 : 0x%lx\n", pmu_conf.ovfl_val);
David Mosberger's avatar
David Mosberger committed
5455

5456
	for(i=0; i < NR_CPUS; i++) {
5457
		if (cpu_online(i) == 0) continue;
5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472
		p += sprintf(p, "CPU%-2d overflow intrs      : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_count);
		p += sprintf(p, "CPU%-2d overflow cycles     : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_cycles);
		p += sprintf(p, "CPU%-2d overflow min        : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_cycles_min);
		p += sprintf(p, "CPU%-2d overflow max        : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_cycles_max);
		p += sprintf(p, "CPU%-2d smpl handler calls  : %lu\n", i, pfm_stats[i].pfm_smpl_handler_calls);
		p += sprintf(p, "CPU%-2d smpl handler cycles : %lu\n", i, pfm_stats[i].pfm_smpl_handler_cycles);
		p += sprintf(p, "CPU%-2d spurious intrs      : %lu\n", i, pfm_stats[i].pfm_spurious_ovfl_intr_count);
		p += sprintf(p, "CPU%-2d sysupdt count       : %lu\n", i, pfm_stats[i].pfm_sysupdt_count);
		p += sprintf(p, "CPU%-2d sysupdt cycles      : %lu\n", i, pfm_stats[i].pfm_sysupdt_cycles);
		p += sprintf(p, "CPU%-2d syst_wide           : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_SYST_WIDE ? 1 : 0);
		p += sprintf(p, "CPU%-2d dcr_pp              : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_DCR_PP ? 1 : 0);
		p += sprintf(p, "CPU%-2d exclude idle        : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0);
		p += sprintf(p, "CPU%-2d owner               : %d\n" , i, pfm_get_cpu_data(pmu_owner, i) ? pfm_get_cpu_data(pmu_owner, i)->pid: -1);
		p += sprintf(p, "CPU%-2d context             : %p\n" , i, pfm_get_cpu_data(pmu_ctx, i));
		p += sprintf(p, "CPU%-2d activations         : %lu\n", i, pfm_get_cpu_data(pmu_activation_number,i));
5473
	}
David Mosberger's avatar
David Mosberger committed
5474

5475
	if (num_online_cpus() == 1)
5476 5477 5478 5479 5480 5481 5482 5483 5484 5485
	{
		psr = pfm_get_psr();
		ia64_srlz_d();
		p += sprintf(p, "CPU%-2d psr                 : 0x%lx\n", smp_processor_id(), psr);
		p += sprintf(p, "CPU%-2d pmc0                : 0x%lx\n", smp_processor_id(), ia64_get_pmc(0));
		for(i=4; i < 8; i++) {
   			p += sprintf(p, "CPU%-2d pmc%u                : 0x%lx\n", smp_processor_id(), i, ia64_get_pmc(i));
   			p += sprintf(p, "CPU%-2d pmd%u                : 0x%lx\n", smp_processor_id(), i, ia64_get_pmd(i));
  		}
	}
5486

5487 5488 5489 5490 5491 5492
	LOCK_PFS();
		p += sprintf(p, "proc_sessions             : %u\n"
			"sys_sessions              : %u\n"
			"sys_use_dbregs            : %u\n"
			"ptrace_use_dbregs         : %u\n",
			pfm_sessions.pfs_task_sessions,
David Mosberger's avatar
David Mosberger committed
5493 5494 5495 5496
			pfm_sessions.pfs_sys_sessions,
			pfm_sessions.pfs_sys_use_dbregs,
			pfm_sessions.pfs_ptrace_use_dbregs);
	UNLOCK_PFS();
Linus Torvalds's avatar
Linus Torvalds committed
5497

5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521
	LOCK_BUF_FMT_LIST();

	for (b = pfm_buffer_fmt_list; b ; b = b->fmt_next) {
		p += sprintf(p, "format                    : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
				b->fmt_uuid[0],
				b->fmt_uuid[1],
				b->fmt_uuid[2],
				b->fmt_uuid[3],
				b->fmt_uuid[4],
				b->fmt_uuid[5],
				b->fmt_uuid[6],
				b->fmt_uuid[7],
				b->fmt_uuid[8],
				b->fmt_uuid[9],
				b->fmt_uuid[10],
				b->fmt_uuid[11],
				b->fmt_uuid[12],
				b->fmt_uuid[13],
				b->fmt_uuid[14],
				b->fmt_uuid[15],
				b->fmt_name);
	}
	UNLOCK_BUF_FMT_LIST();

Linus Torvalds's avatar
Linus Torvalds committed
5522 5523 5524
	return p - page;
}

David Mosberger's avatar
David Mosberger committed
5525
/* /proc interface, for debug only */
Linus Torvalds's avatar
Linus Torvalds committed
5526 5527 5528
static int
perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
{
5529
	int len = pfm_proc_info(page);
Linus Torvalds's avatar
Linus Torvalds committed
5530

Linus Torvalds's avatar
Linus Torvalds committed
5531
	if (len <= off+count) *eof = 1;
Linus Torvalds's avatar
Linus Torvalds committed
5532

Linus Torvalds's avatar
Linus Torvalds committed
5533 5534
	*start = page + off;
	len   -= off;
Linus Torvalds's avatar
Linus Torvalds committed
5535

Linus Torvalds's avatar
Linus Torvalds committed
5536 5537
	if (len>count) len = count;
	if (len<0) len = 0;
Linus Torvalds's avatar
Linus Torvalds committed
5538

Linus Torvalds's avatar
Linus Torvalds committed
5539
	return len;
Linus Torvalds's avatar
Linus Torvalds committed
5540 5541
}

5542
/*
5543
 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
5544
 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
5545 5546
 * is active or inactive based on mode. We must rely on the value in
 * local_cpu_data->pfm_syst_info
5547
 */
David Mosberger's avatar
David Mosberger committed
5548
void
5549
pfm_do_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
Linus Torvalds's avatar
Linus Torvalds committed
5550
{
5551 5552 5553
	struct pt_regs *regs;
	unsigned long dcr;
	unsigned long dcr_pp;
Linus Torvalds's avatar
Linus Torvalds committed
5554

5555
	dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
Linus Torvalds's avatar
Linus Torvalds committed
5556

David Mosberger's avatar
David Mosberger committed
5557
	/*
5558
	 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
5559 5560 5561
	 * on every CPU, so we can rely on the pid to identify the idle task.
	 */
	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
5562
		regs = ia64_task_regs(task);
5563 5564 5565 5566 5567
		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
		return;
	}
	/*
	 * if monitoring has started
David Mosberger's avatar
David Mosberger committed
5568
	 */
5569
	if (dcr_pp) {
5570
		dcr = ia64_getreg(_IA64_REG_CR_DCR);
5571 5572
		/*
		 * context switching in?
5573 5574 5575
		 */
		if (is_ctxswin) {
			/* mask monitoring for the idle task */
5576
			ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
5577 5578 5579 5580
			pfm_clear_psr_pp();
			ia64_srlz_i();
			return;
		}
5581
		/*
5582
		 * context switching out
5583
		 * restore monitoring for next task
5584
		 *
5585
		 * Due to inlining this odd if-then-else construction generates
5586 5587
		 * better code.
		 */
5588
		ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
5589 5590 5591
		pfm_set_psr_pp();
		ia64_srlz_i();
	}
David Mosberger's avatar
David Mosberger committed
5592
}
Linus Torvalds's avatar
Linus Torvalds committed
5593

David Mosberger's avatar
David Mosberger committed
5594
void
5595
pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
David Mosberger's avatar
David Mosberger committed
5596
{
5597
	unsigned long start, end;
5598

5599 5600
	pfm_stats[smp_processor_id()].pfm_sysupdt_count++;
	start = ia64_get_itc();
5601

5602
	pfm_do_syst_wide_update_task(task, info, is_ctxswin);
5603

5604 5605
	end = ia64_get_itc();
	pfm_stats[smp_processor_id()].pfm_sysupdt_cycles += end-start;
David Mosberger's avatar
David Mosberger committed
5606 5607
}

5608 5609 5610
#ifdef CONFIG_SMP
void
pfm_save_regs(struct task_struct *task)
David Mosberger's avatar
David Mosberger committed
5611 5612
{
	pfm_context_t *ctx;
5613
	struct thread_struct *t;
5614 5615
	unsigned long flags;
	u64 psr;
David Mosberger's avatar
David Mosberger committed
5616

David Mosberger's avatar
David Mosberger committed
5617

5618 5619 5620
	ctx = PFM_GET_CTX(task);
	if (ctx == NULL) goto save_error;
	t = &task->thread;
Linus Torvalds's avatar
Linus Torvalds committed
5621 5622

	/*
5623 5624 5625
 	 * we always come here with interrupts ALREADY disabled by
 	 * the scheduler. So we simply need to protect against concurrent
	 * access, not CPU concurrency.
Linus Torvalds's avatar
Linus Torvalds committed
5626
	 */
5627
	flags = pfm_protect_ctx_ctxsw(ctx);
5628

5629
	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
5630
		struct pt_regs *regs = ia64_task_regs(task);
5631

5632
		pfm_clear_psr_up();
David Mosberger's avatar
David Mosberger committed
5633

5634
		DPRINT(("ctx zombie, forcing cleanup for [%d]\n", task->pid));
David Mosberger's avatar
David Mosberger committed
5635

5636
		pfm_force_cleanup(ctx, regs);
5637

5638
		BUG_ON(ctx->ctx_smpl_hdr);
5639

5640 5641 5642
		pfm_unprotect_ctx_ctxsw(ctx, flags);

		pfm_context_free(ctx);
5643 5644 5645
		return;
	}

5646
	/*
5647
	 * sanity check
5648
	 */
5649
	if (ctx->ctx_last_activation != GET_ACTIVATION()) {
David Mosberger's avatar
David Mosberger committed
5650
		printk("ctx_activation=%lu activation=%lu state=%d: no save\n",
5651
				ctx->ctx_last_activation,
David Mosberger's avatar
David Mosberger committed
5652
				GET_ACTIVATION(), ctx->ctx_state);
Linus Torvalds's avatar
Linus Torvalds committed
5653

5654
		pfm_unprotect_ctx_ctxsw(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
5655

Linus Torvalds's avatar
Linus Torvalds committed
5656 5657
		return;
	}
5658

Linus Torvalds's avatar
Linus Torvalds committed
5659
	/*
5660
	 * save current PSR: needed because we modify it
Linus Torvalds's avatar
Linus Torvalds committed
5661
	 */
David Mosberger's avatar
David Mosberger committed
5662
	ia64_srlz_d();
5663
	psr = pfm_get_psr();
David Mosberger's avatar
David Mosberger committed
5664

David Mosberger's avatar
David Mosberger committed
5665 5666
	BUG_ON(psr & (IA64_PSR_I));

Linus Torvalds's avatar
Linus Torvalds committed
5667
	/*
5668 5669
	 * stop monitoring:
	 * This is the last instruction which may generate an overflow
5670
	 *
5671 5672
	 * We do not need to set psr.sp because, it is irrelevant in kernel.
	 * It will be restored from ipsr when going back to user level
Linus Torvalds's avatar
Linus Torvalds committed
5673
	 */
5674
	pfm_clear_psr_up();
David Mosberger's avatar
David Mosberger committed
5675

5676
	/*
David Mosberger's avatar
David Mosberger committed
5677
	 * keep a copy of psr.up (for reload)
5678
	 */
David Mosberger's avatar
David Mosberger committed
5679 5680 5681 5682 5683
	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;

	{ u64 foo = pfm_get_psr();
	  BUG_ON(foo & ((IA64_PSR_UP|IA64_PSR_PP)));
	}
David Mosberger's avatar
David Mosberger committed
5684 5685

	/*
5686 5687 5688
	 * release ownership of this PMU.
	 * PM interrupts are masked, so nothing
	 * can happen.
David Mosberger's avatar
David Mosberger committed
5689
	 */
5690
	SET_PMU_OWNER(NULL, NULL);
David Mosberger's avatar
David Mosberger committed
5691 5692

	/*
5693 5694 5695
	 * we systematically save the PMD as we have no
	 * guarantee we will be schedule at that same
	 * CPU again.
David Mosberger's avatar
David Mosberger committed
5696
	 */
5697
	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
David Mosberger's avatar
David Mosberger committed
5698 5699

	/*
5700 5701 5702
	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
	 * we will need it on the restore path to check
	 * for pending overflow.
David Mosberger's avatar
David Mosberger committed
5703
	 */
5704
	t->pmcs[0] = ia64_get_pmc(0);
5705

David Mosberger's avatar
David Mosberger committed
5706
	/*
5707
	 * unfreeze PMU if had pending overflows
David Mosberger's avatar
David Mosberger committed
5708
	 */
5709
	if (t->pmcs[0] & ~1UL) pfm_unfreeze_pmu();
Linus Torvalds's avatar
Linus Torvalds committed
5710

David Mosberger's avatar
David Mosberger committed
5711
	/*
5712 5713 5714 5715 5716
	 * finally, unmask interrupts and allow context
	 * access.
	 * Any pended overflow interrupt may be delivered
	 * here and will be treated as spurious because we
	 * have have no PMU owner anymore.
5717
	 */
5718
	pfm_unprotect_ctx_ctxsw(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
5719

5720
	return;
Linus Torvalds's avatar
Linus Torvalds committed
5721

5722 5723 5724 5725
save_error:
	printk(KERN_ERR "perfmon: pfm_save_regs CPU%d [%d] NULL context PM_VALID=%ld\n",
		smp_processor_id(), task->pid,
		task->thread.flags & IA64_THREAD_PM_VALID);
David Mosberger's avatar
David Mosberger committed
5726
}
Linus Torvalds's avatar
Linus Torvalds committed
5727

5728 5729
#else /* !CONFIG_SMP */

Linus Torvalds's avatar
Linus Torvalds committed
5730
/*
5731
 * in 2.5, interrupts are masked when we come here
Linus Torvalds's avatar
Linus Torvalds committed
5732 5733
 */
void
5734
pfm_save_regs(struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
5735 5736
{
	pfm_context_t *ctx;
5737
	u64 psr;
Linus Torvalds's avatar
Linus Torvalds committed
5738

5739 5740
	ctx = PFM_GET_CTX(task);
	if (ctx == NULL) goto save_error;
David Mosberger's avatar
David Mosberger committed
5741

5742 5743
	/*
	 * save current PSR: needed because we modify it
Linus Torvalds's avatar
Linus Torvalds committed
5744
	 */
5745
	psr = pfm_get_psr();
Linus Torvalds's avatar
Linus Torvalds committed
5746 5747 5748

	/*
	 * stop monitoring:
5749 5750 5751 5752
	 * This is the last instruction which may generate an overflow
	 *
	 * We do not need to set psr.sp because, it is irrelevant in kernel.
	 * It will be restored from ipsr when going back to user level
Linus Torvalds's avatar
Linus Torvalds committed
5753
	 */
5754
	pfm_clear_psr_up();
5755

5756
	/*
David Mosberger's avatar
David Mosberger committed
5757
	 * keep a copy of psr.up (for reload)
5758
	 */
David Mosberger's avatar
David Mosberger committed
5759
	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
David Mosberger's avatar
David Mosberger committed
5760

David Mosberger's avatar
David Mosberger committed
5761 5762 5763 5764
#if 1
	{ u64 foo = pfm_get_psr();
	  BUG_ON(foo & (IA64_PSR_I));
	  BUG_ON(foo & ((IA64_PSR_UP|IA64_PSR_PP)));
5765
	}
David Mosberger's avatar
David Mosberger committed
5766
#endif
5767 5768 5769 5770 5771 5772
	return;
save_error:
	printk(KERN_ERR "perfmon: pfm_save_regs CPU%d [%d] NULL context PM_VALID=%ld\n",
		smp_processor_id(), task->pid,
		task->thread.flags & IA64_THREAD_PM_VALID);
}
5773

5774 5775 5776 5777 5778 5779
static void
pfm_lazy_save_regs (struct task_struct *task)
{
	pfm_context_t *ctx;
	struct thread_struct *t;
	unsigned long flags;
David Mosberger's avatar
David Mosberger committed
5780

5781
#if 1
David Mosberger's avatar
David Mosberger committed
5782 5783
	{ u64 foo  = pfm_get_psr();
	  BUG_ON(foo & IA64_PSR_UP);
David Mosberger's avatar
David Mosberger committed
5784
	}
5785
#endif
Linus Torvalds's avatar
Linus Torvalds committed
5786

5787 5788 5789 5790
	ctx = PFM_GET_CTX(task);
	t   = &task->thread;

	DPRINT(("on [%d] used_pmds=0x%lx\n", task->pid, ctx->ctx_used_pmds[0]));
Linus Torvalds's avatar
Linus Torvalds committed
5791 5792

	/*
5793 5794 5795 5796 5797
	 * we need to mask PMU overflow here to
	 * make sure that we maintain pmc0 until
	 * we save it. overflow interrupts are
	 * treated as spurious if there is no
	 * owner.
Linus Torvalds's avatar
Linus Torvalds committed
5798
	 *
5799
	 * XXX: I don't think this is necessary
Linus Torvalds's avatar
Linus Torvalds committed
5800
	 */
5801
	PROTECT_CTX(ctx,flags);
Linus Torvalds's avatar
Linus Torvalds committed
5802 5803

	/*
5804 5805
	 * release ownership of this PMU.
	 * must be done before we save the registers.
Linus Torvalds's avatar
Linus Torvalds committed
5806
	 *
5807 5808
	 * after this call any PMU interrupt is treated
	 * as spurious.
Linus Torvalds's avatar
Linus Torvalds committed
5809
	 */
5810
	SET_PMU_OWNER(NULL, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
5811 5812

	/*
5813
	 * save all the pmds we use
Linus Torvalds's avatar
Linus Torvalds committed
5814
	 */
5815
	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
Linus Torvalds's avatar
Linus Torvalds committed
5816

Linus Torvalds's avatar
Linus Torvalds committed
5817
	/*
5818 5819 5820
	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
	 * it is needed to check for pended overflow
	 * on the restore path
Linus Torvalds's avatar
Linus Torvalds committed
5821
	 */
5822
	t->pmcs[0] = ia64_get_pmc(0);
Linus Torvalds's avatar
Linus Torvalds committed
5823

5824
	/*
5825
	 * unfreeze PMU if had pending overflows
5826
	 */
5827
	if (t->pmcs[0] & ~1UL) pfm_unfreeze_pmu();
Linus Torvalds's avatar
Linus Torvalds committed
5828

5829 5830 5831 5832
	/*
	 * now get can unmask PMU interrupts, they will
	 * be treated as purely spurious and we will not
	 * lose any information
David Mosberger's avatar
David Mosberger committed
5833
	 */
5834
	UNPROTECT_CTX(ctx,flags);
Linus Torvalds's avatar
Linus Torvalds committed
5835
}
5836
#endif /* CONFIG_SMP */
Linus Torvalds's avatar
Linus Torvalds committed
5837

5838 5839 5840
#ifdef CONFIG_SMP
void
pfm_load_regs (struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
5841
{
5842
	pfm_context_t *ctx;
5843 5844 5845
	struct thread_struct *t;
	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
	unsigned long flags;
David Mosberger's avatar
David Mosberger committed
5846
	u64 psr, psr_up;
5847

5848 5849 5850 5851
	ctx = PFM_GET_CTX(task);
	if (unlikely(ctx == NULL)) {
		printk(KERN_ERR "perfmon: pfm_load_regs() null context\n");
		return;
5852
	}
5853

David Mosberger's avatar
David Mosberger committed
5854 5855
	BUG_ON(GET_PMU_OWNER());

5856
	t     = &task->thread;
5857
	/*
5858
	 * possible on unload
5859
	 */
5860
	if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) {
David Mosberger's avatar
David Mosberger committed
5861
		printk("[%d] PM_VALID=0, nothing to do\n", task->pid);
5862
		return;
5863
	}
Linus Torvalds's avatar
Linus Torvalds committed
5864

Linus Torvalds's avatar
Linus Torvalds committed
5865
	/*
5866 5867 5868
 	 * we always come here with interrupts ALREADY disabled by
 	 * the scheduler. So we simply need to protect against concurrent
	 * access, not CPU concurrency.
Linus Torvalds's avatar
Linus Torvalds committed
5869
	 */
5870
	flags = pfm_protect_ctx_ctxsw(ctx);
David Mosberger's avatar
David Mosberger committed
5871 5872 5873 5874 5875 5876
	psr   = pfm_get_psr();

#if 1
	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
	BUG_ON(psr & IA64_PSR_I);
#endif
5877

5878
	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
5879
		struct pt_regs *regs = ia64_task_regs(task);
5880

5881
		BUG_ON(ctx->ctx_smpl_hdr);
5882

5883
		DPRINT(("ctx zombie, forcing cleanup for [%d]\n", task->pid));
David Mosberger's avatar
David Mosberger committed
5884

5885
		pfm_force_cleanup(ctx, regs);
5886

5887
		pfm_unprotect_ctx_ctxsw(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
5888

5889 5890 5891 5892
		/*
		 * this one (kmalloc'ed) is fine with interrupts disabled
		 */
		pfm_context_free(ctx);
Linus Torvalds's avatar
Linus Torvalds committed
5893

5894 5895
		return;
	}
David Mosberger's avatar
David Mosberger committed
5896

5897 5898 5899 5900 5901 5902 5903
	/*
	 * we restore ALL the debug registers to avoid picking up
	 * stale state.
	 */
	if (ctx->ctx_fl_using_dbreg) {
		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf.num_ibrs);
		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf.num_dbrs);
5904 5905
	}
	/*
David Mosberger's avatar
David Mosberger committed
5906
	 * retrieve saved psr.up
5907
	 */
David Mosberger's avatar
David Mosberger committed
5908
	psr_up = ctx->ctx_saved_psr_up;
David Mosberger's avatar
David Mosberger committed
5909

5910 5911 5912 5913 5914
	/*
	 * if we were the last user of the PMU on that CPU,
	 * then nothing to do except restore psr
	 */
	if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
David Mosberger's avatar
David Mosberger committed
5915

5916 5917 5918 5919 5920
		/*
		 * retrieve partial reload masks (due to user modifications)
		 */
		pmc_mask = ctx->ctx_reload_pmcs[0];
		pmd_mask = ctx->ctx_reload_pmds[0];
David Mosberger's avatar
David Mosberger committed
5921

5922 5923 5924 5925 5926 5927 5928 5929 5930
		if (pmc_mask || pmd_mask) DPRINT(("partial reload [%d] pmd_mask=0x%lx pmc_mask=0x%lx\n", task->pid, pmd_mask, pmc_mask));
	} else {
		/*
	 	 * To avoid leaking information to the user level when psr.sp=0,
	 	 * we must reload ALL implemented pmds (even the ones we don't use).
	 	 * In the kernel we only allow PFM_READ_PMDS on registers which
	 	 * we initialized or requested (sampling) so there is no risk there.
	 	 */
		pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
David Mosberger's avatar
David Mosberger committed
5931

5932 5933 5934 5935 5936 5937 5938 5939
		/*
	 	 * ALL accessible PMCs are systematically reloaded, unused registers
	 	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
	 	 * up stale configuration.
	 	 *
	 	 * PMC0 is never in the mask. It is always restored separately.
	 	 */
		pmc_mask = ctx->ctx_all_pmcs[0];
David Mosberger's avatar
David Mosberger committed
5940

David Mosberger's avatar
David Mosberger committed
5941 5942
		DPRINT(("full reload for [%d] activation=%lu last_activation=%lu last_cpu=%d pmd_mask=0x%lx pmc_mask=0x%lx\n",
			task->pid,
5943 5944
			GET_ACTIVATION(), ctx->ctx_last_activation,
			GET_LAST_CPU(ctx), pmd_mask, pmc_mask));
Linus Torvalds's avatar
Linus Torvalds committed
5945 5946

	}
5947
	/*
5948 5949 5950
	 * when context is MASKED, we will restore PMC with plm=0
	 * and PMD with stale information, but that's ok, nothing
	 * will be captured.
5951
	 *
5952
	 * XXX: optimize here
5953
	 */
5954 5955
	if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
	if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
5956

5957 5958 5959 5960 5961 5962 5963 5964
	/*
	 * check for pending overflow at the time the state
	 * was saved.
	 */
	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
		struct pt_regs *regs = ia64_task_regs(task);
		pfm_overflow_handler(task, ctx, t->pmcs[0], regs);
	}
5965

5966 5967 5968 5969 5970 5971 5972 5973
	/*
	 * we clear PMC0, to ensure that any in flight interrupt
	 * will not be attributed to the new context we are installing
	 * because the actual overflow has been processed above already.
	 * No real effect until we unmask interrupts at the end of the
	 * function.
	 */
	pfm_unfreeze_pmu();
5974

5975 5976 5977 5978 5979
	/*
	 * we just did a reload, so we reset the partial reload fields
	 */
	ctx->ctx_reload_pmcs[0] = 0UL;
	ctx->ctx_reload_pmds[0] = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
5980

5981
	SET_LAST_CPU(ctx, smp_processor_id());
Linus Torvalds's avatar
Linus Torvalds committed
5982

5983
	/*
5984
	 * dump activation value for this PMU
5985
	 */
5986
	INC_ACTIVATION();
5987
	/*
5988
	 * record current activation for this context
5989
	 */
5990
	SET_ACTIVATION(ctx);
Linus Torvalds's avatar
Linus Torvalds committed
5991 5992

	/*
5993 5994
	 * establish new ownership. Interrupts
	 * are still masked at this point.
Linus Torvalds's avatar
Linus Torvalds committed
5995
	 */
5996
	SET_PMU_OWNER(task, ctx);
Linus Torvalds's avatar
Linus Torvalds committed
5997

5998
	/*
David Mosberger's avatar
David Mosberger committed
5999
	 * restore the psr.up bit 
6000
	 */
David Mosberger's avatar
David Mosberger committed
6001
	if (likely(psr_up)) pfm_set_psr_up();
6002

6003 6004 6005 6006
	/*
	 * allow concurrent access to context
	 */
	pfm_unprotect_ctx_ctxsw(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
6007
}
6008 6009 6010 6011
#else /*  !CONFIG_SMP */
/*
 * reload PMU state for UP kernels
 * in 2.5 we come here with interrupts disabled
Linus Torvalds's avatar
Linus Torvalds committed
6012
 */
Linus Torvalds's avatar
Linus Torvalds committed
6013
void
6014
pfm_load_regs (struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
6015
{
6016 6017 6018 6019
	struct thread_struct *t;
	pfm_context_t *ctx;
	struct task_struct *owner;
	unsigned long pmd_mask, pmc_mask;
David Mosberger's avatar
David Mosberger committed
6020
	u64 psr, psr_up;
Linus Torvalds's avatar
Linus Torvalds committed
6021

David Mosberger's avatar
David Mosberger committed
6022 6023 6024 6025
	owner = GET_PMU_OWNER();
	ctx   = PFM_GET_CTX(task);
	t     = &task->thread;
	psr   = pfm_get_psr();
David Mosberger's avatar
David Mosberger committed
6026

6027
#if 1
David Mosberger's avatar
David Mosberger committed
6028 6029
	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
	BUG_ON(psr & IA64_PSR_I);
6030
#endif
David Mosberger's avatar
David Mosberger committed
6031

6032 6033 6034
	/*
	 * we restore ALL the debug registers to avoid picking up
	 * stale state.
David Mosberger's avatar
David Mosberger committed
6035
	 *
6036 6037 6038 6039 6040 6041 6042
	 * This must be done even when the task is still the owner
	 * as the registers may have been modified via ptrace()
	 * (not perfmon) by the previous task.
	 */
	if (ctx->ctx_fl_using_dbreg) {
		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf.num_ibrs);
		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf.num_dbrs);
David Mosberger's avatar
David Mosberger committed
6043
	}
Linus Torvalds's avatar
Linus Torvalds committed
6044

6045
	/*
David Mosberger's avatar
David Mosberger committed
6046
	 * retrieved saved psr.up
6047
	 */
David Mosberger's avatar
David Mosberger committed
6048
	psr_up = ctx->ctx_saved_psr_up;
David Mosberger's avatar
David Mosberger committed
6049

6050 6051 6052 6053 6054 6055 6056 6057 6058
	/*
	 * short path, our state is still there, just
	 * need to restore psr and we go
	 *
	 * we do not touch either PMC nor PMD. the psr is not touched
	 * by the overflow_handler. So we are safe w.r.t. to interrupt
	 * concurrency even without interrupt masking.
	 */
	if (likely(owner == task)) {
David Mosberger's avatar
David Mosberger committed
6059
		if (likely(psr_up)) pfm_set_psr_up();
6060
		return;
Linus Torvalds's avatar
Linus Torvalds committed
6061 6062
	}

6063
	DPRINT(("reload for [%d] owner=%d\n", task->pid, owner ? owner->pid : -1));
Linus Torvalds's avatar
Linus Torvalds committed
6064

6065 6066 6067 6068 6069 6070 6071
	/*
	 * someone else is still using the PMU, first push it out and
	 * then we'll be able to install our stuff !
	 *
	 * Upon return, there will be no owner for the current PMU
	 */
	if (owner) pfm_lazy_save_regs(owner);
Linus Torvalds's avatar
Linus Torvalds committed
6072

6073 6074 6075 6076 6077
	/*
	 * To avoid leaking information to the user level when psr.sp=0,
	 * we must reload ALL implemented pmds (even the ones we don't use).
	 * In the kernel we only allow PFM_READ_PMDS on registers which
	 * we initialized or requested (sampling) so there is no risk there.
Linus Torvalds's avatar
Linus Torvalds committed
6078
	 */
6079
	pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
David Mosberger's avatar
David Mosberger committed
6080

6081 6082 6083 6084 6085 6086 6087 6088
	/*
	 * ALL accessible PMCs are systematically reloaded, unused registers
	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
	 * up stale configuration.
	 *
	 * PMC0 is never in the mask. It is always restored separately
	 */
	pmc_mask = ctx->ctx_all_pmcs[0];
David Mosberger's avatar
David Mosberger committed
6089

6090 6091
	pfm_restore_pmds(t->pmds, pmd_mask);
	pfm_restore_pmcs(t->pmcs, pmc_mask);
David Mosberger's avatar
David Mosberger committed
6092

6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105
	/*
	 * Check for pending overflow when state was last saved.
	 * invoked handler is overflow status bits set.
	 *
	 * Any PMU overflow in flight at this point, will still
	 * be treated as spurious because we have no declared
	 * owner. Note that the first level interrupt handler
	 * DOES NOT TOUCH any PMC except PMC0 for which we have
	 * a copy already.
	 */
	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
		struct pt_regs *regs = ia64_task_regs(task);
		pfm_overflow_handler(task, ctx, t->pmcs[0], regs);
David Mosberger's avatar
David Mosberger committed
6106
	}
6107

David Mosberger's avatar
David Mosberger committed
6108
	/*
6109 6110 6111 6112 6113
	 * we clear PMC0, to ensure that any in flight interrupt
	 * will not be attributed to the new context we are installing
	 * because the actual overflow has been processed above already.
	 *
	 * This is an atomic operation.
David Mosberger's avatar
David Mosberger committed
6114
	 */
6115
	pfm_unfreeze_pmu();
David Mosberger's avatar
David Mosberger committed
6116

6117 6118 6119 6120 6121 6122 6123 6124 6125
	/*
	 * establish new ownership. If there was an in-flight
	 * overflow interrupt, it will be treated as spurious
	 * before and after the call, because no overflow
	 * status bit can possibly be set. No new overflow
	 * can be generated because, at this point, psr.up
	 * is still cleared.
	 */
	SET_PMU_OWNER(task, ctx);
David Mosberger's avatar
David Mosberger committed
6126

6127 6128 6129 6130
	/*
	 * restore the psr. This is the point at which
	 * new overflow interrupts can be generated again.
	 */
David Mosberger's avatar
David Mosberger committed
6131
	if (likely(psr_up)) pfm_set_psr_up();
David Mosberger's avatar
David Mosberger committed
6132
}
6133
#endif /* CONFIG_SMP */
David Mosberger's avatar
David Mosberger committed
6134 6135

/*
6136
 * this function assumes monitoring is stopped
David Mosberger's avatar
David Mosberger committed
6137
 */
6138 6139
static void
pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
David Mosberger's avatar
David Mosberger committed
6140
{
6141 6142 6143 6144
	u64 pmc0;
	unsigned long mask2, val, pmd_val;
	int i, can_access_pmu = 0;
	int is_self;
David Mosberger's avatar
David Mosberger committed
6145

6146 6147 6148 6149 6150
	/*
	 * is the caller the task being monitored (or which initiated the
	 * session for system wide measurements)
	 */
	is_self = ctx->ctx_task == task ? 1 : 0;
David Mosberger's avatar
David Mosberger committed
6151

6152 6153 6154 6155 6156 6157 6158 6159 6160
#ifdef CONFIG_SMP
	if (task == current) {
#else
	/*
	 * in UP, the state can still be in the registers
	 */
	if (task == current || GET_PMU_OWNER() == task) {
#endif
		can_access_pmu = 1;
David Mosberger's avatar
David Mosberger committed
6161
		/*
6162 6163 6164 6165 6166 6167
		 * Mark the PMU as not owned
		 * This will cause the interrupt handler to do nothing in case an overflow
		 * interrupt was in-flight
		 * This also guarantees that pmc0 will contain the final state
		 * It virtually gives us full control on overflow processing from that point
		 * on.
David Mosberger's avatar
David Mosberger committed
6168
		 */
6169
		SET_PMU_OWNER(NULL, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
6170

6171 6172 6173 6174 6175 6176 6177
		/*
		 * read current overflow status:
		 *
		 * we are guaranteed to read the final stable state
		 */
		ia64_srlz_d();
		pmc0 = ia64_get_pmc(0); /* slow */
David Mosberger's avatar
David Mosberger committed
6178

6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189
		/*
		 * reset freeze bit, overflow status information destroyed
		 */
		pfm_unfreeze_pmu();
	} else {
		pmc0 = task->thread.pmcs[0];
		/*
		 * clear whatever overflow status bits there were
		 */
		task->thread.pmcs[0] &= ~0x1;
	}
Linus Torvalds's avatar
Linus Torvalds committed
6190

6191 6192 6193 6194 6195 6196 6197 6198
	/*
	 * we save all the used pmds
	 * we take care of overflows for counting PMDs
	 *
	 * XXX: sampling situation is not taken into account here
	 */
	mask2 = ctx->ctx_used_pmds[0];
	for (i = 0; mask2; i++, mask2>>=1) {
Linus Torvalds's avatar
Linus Torvalds committed
6199

6200 6201
		/* skip non used pmds */
		if ((mask2 & 0x1) == 0) continue;
Linus Torvalds's avatar
Linus Torvalds committed
6202 6203

		/*
6204
		 * can access PMU always true in system wide mode
Linus Torvalds's avatar
Linus Torvalds committed
6205
		 */
6206
		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
Linus Torvalds's avatar
Linus Torvalds committed
6207

6208 6209 6210 6211 6212 6213
		if (PMD_IS_COUNTING(i)) {
			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
				task->pid,
				i,
				ctx->ctx_pmds[i].val,
				val & pmu_conf.ovfl_val));
Linus Torvalds's avatar
Linus Torvalds committed
6214 6215

			/*
6216
			 * we rebuild the full 64 bit value of the counter
Linus Torvalds's avatar
Linus Torvalds committed
6217
			 */
6218
			val = ctx->ctx_pmds[i].val + (val & pmu_conf.ovfl_val);
Linus Torvalds's avatar
Linus Torvalds committed
6219

6220 6221 6222 6223 6224 6225
			/*
			 * now everything is in ctx_pmds[] and we need
			 * to clear the saved context from save_regs() such that
			 * pfm_read_pmds() gets the correct value
			 */
			pmd_val = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
6226

6227 6228 6229 6230 6231 6232 6233
			/*
			 * take care of overflow inline
			 */
			if (pmc0 & (1UL << i)) {
				val += 1 + pmu_conf.ovfl_val;
				DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
			}
Linus Torvalds's avatar
Linus Torvalds committed
6234
		}
6235

6236
		DPRINT(("[%d] is_self=%d ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, is_self, i, val, pmd_val));
6237

6238
		if (is_self) task->thread.pmds[i] = pmd_val;
David Mosberger's avatar
David Mosberger committed
6239

6240 6241
		ctx->ctx_pmds[i].val = val;
	}
David Mosberger's avatar
David Mosberger committed
6242
}
Linus Torvalds's avatar
Linus Torvalds committed
6243

David Mosberger's avatar
David Mosberger committed
6244
static struct irqaction perfmon_irqaction = {
6245 6246 6247
	.handler = pfm_interrupt_handler,
	.flags   = SA_INTERRUPT,
	.name    = "perfmon"
David Mosberger's avatar
David Mosberger committed
6248 6249 6250 6251 6252
};

/*
 * perfmon initialization routine, called from the initcall() table
 */
6253 6254
static int init_pfm_fs(void);

David Mosberger's avatar
David Mosberger committed
6255
int __init
6256
pfm_init(void)
David Mosberger's avatar
David Mosberger committed
6257
{
6258
	unsigned int n, n_counters, i;
David Mosberger's avatar
David Mosberger committed
6259

6260 6261 6262 6263
	printk("perfmon: version %u.%u IRQ %u\n",
		PFM_VERSION_MAJ,
		PFM_VERSION_MIN,
		IA64_PERFMON_VECTOR);
David Mosberger's avatar
David Mosberger committed
6264

6265 6266 6267 6268 6269 6270 6271 6272
	/*
	 * PMU type sanity check
	 * XXX: maybe better to implement autodetection (but then we have a larger kernel)
	 */
	if (local_cpu_data->family != pmu_conf.pmu_family) {
		printk(KERN_INFO "perfmon: disabled, kernel only supports %s PMU family\n", pmu_conf.pmu_name);
		return -ENODEV;
	}
David Mosberger's avatar
David Mosberger committed
6273

6274
	/*
6275 6276
	 * compute the number of implemented PMD/PMC from the
	 * description tables
6277
	 */
6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294
	n = 0;
	for (i=0; PMC_IS_LAST(i) == 0;  i++) {
		if (PMC_IS_IMPL(i) == 0) continue;
		pmu_conf.impl_pmcs[i>>6] |= 1UL << (i&63);
		n++;
	}
	pmu_conf.num_pmcs = n;

	n = 0; n_counters = 0;
	for (i=0; PMD_IS_LAST(i) == 0;  i++) {
		if (PMD_IS_IMPL(i) == 0) continue;
		pmu_conf.impl_pmds[i>>6] |= 1UL << (i&63);
		n++;
		if (PMD_IS_COUNTING(i)) n_counters++;
	}
	pmu_conf.num_pmds      = n;
	pmu_conf.num_counters  = n_counters;
David Mosberger's avatar
David Mosberger committed
6295

6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311
	/*
	 * sanity checks on the number of debug registers
	 */
	if (pmu_conf.use_rr_dbregs) {
		if (pmu_conf.num_ibrs > IA64_NUM_DBG_REGS) {
			printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf.num_ibrs);
			return -1;
		}
		if (pmu_conf.num_dbrs > IA64_NUM_DBG_REGS) {
			printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf.num_ibrs);
			return -1;
		}
	}

	printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n",
	       pmu_conf.pmu_name,
6312
	       pmu_conf.num_pmcs,
6313 6314 6315
	       pmu_conf.num_pmds,
	       pmu_conf.num_counters,
	       ffz(pmu_conf.ovfl_val));
David Mosberger's avatar
David Mosberger committed
6316 6317 6318

	/* sanity check */
	if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) {
6319 6320
		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
		return -1;
David Mosberger's avatar
David Mosberger committed
6321 6322 6323
	}

	/*
6324
	 * create /proc/perfmon (mostly for debugging purposes)
David Mosberger's avatar
David Mosberger committed
6325 6326
	 */
	perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
6327 6328
	if (perfmon_dir == NULL) {
		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
6329
		return -1;
6330
	}
David Mosberger's avatar
David Mosberger committed
6331

6332
	/*
6333
	 * create /proc/sys/kernel/perfmon (for debugging purposes)
6334
	 */
6335 6336
	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);

6337 6338 6339
	/*
	 * initialize all our spinlocks
	 */
David Mosberger's avatar
David Mosberger committed
6340
	spin_lock_init(&pfm_sessions.pfs_lock);
6341 6342 6343 6344 6345
	spin_lock_init(&pfm_smpl_fmt_lock);

	init_pfm_fs();

	for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL;
David Mosberger's avatar
David Mosberger committed
6346

6347
	/* we are all set */
6348
	pmu_conf.enabled = 1;
6349

David Mosberger's avatar
David Mosberger committed
6350 6351
	return 0;
}
6352

6353
__initcall(pfm_init);
David Mosberger's avatar
David Mosberger committed
6354 6355

void
6356
pfm_init_percpu (void)
David Mosberger's avatar
David Mosberger committed
6357
{
6358 6359
	int i;

6360 6361 6362 6363 6364 6365 6366 6367 6368
	/*
	 * make sure no measurement is active
	 * (may inherit programmed PMCs from EFI).
	 */
	pfm_clear_psr_pp();
	pfm_clear_psr_up();


	if (smp_processor_id() == 0)
6369 6370
		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);

6371
	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
David Mosberger's avatar
David Mosberger committed
6372
	ia64_srlz_d();
6373 6374 6375 6376 6377 6378 6379 6380

	/*
	 * we first initialize the PMU to a stable state.
	 * the values may have been changed from their power-up
	 * values by software executed before the kernel took over.
	 *
	 * At this point, pmu_conf has not yet been initialized
	 *
6381 6382 6383 6384
	 * On McKinley, this code is ineffective until PMC4 is initialized
	 * but that's all right because we take care of pmc0 later.
	 *
	 * XXX: potential problems with pmc1.
6385
	 */
6386 6387 6388
	for (i=1; PMC_IS_LAST(i) == 0;  i++) {
		if (PMC_IS_IMPL(i) == 0) continue;
		ia64_set_pmc(i, PMC_DFL_VAL(i));
6389
	}
6390

6391
	for (i=0; PMD_IS_LAST(i) == 0; i++) {
6392
		if (PMD_IS_IMPL(i) == 0) continue;
6393 6394
		ia64_set_pmd(i, 0UL);
	}
6395 6396 6397 6398 6399

	/*
	 * we run with the PMU not frozen at all times
	 */
	pfm_unfreeze_pmu();
Linus Torvalds's avatar
Linus Torvalds committed
6400 6401
}

6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419
/*
 * used for debug purposes only
 */
void
dump_pmu_state(void)
{
	struct task_struct *task;
	struct thread_struct *t;
	pfm_context_t *ctx;
	unsigned long psr;
	int i;

	printk("current [%d] %s\n", current->pid, current->comm);

	task = GET_PMU_OWNER();
	ctx  = GET_PMU_CTX();

	printk("owner [%d] ctx=%p\n", task ? task->pid : -1, ctx);
Linus Torvalds's avatar
Linus Torvalds committed
6420

6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436
	psr = pfm_get_psr();

	printk("psr.pp=%ld psr.up=%ld\n", (psr >> IA64_PSR_PP_BIT) &0x1UL, (psr >> IA64_PSR_PP_BIT)&0x1UL);

	t = &current->thread;

	for (i=1; PMC_IS_LAST(i) == 0; i++) {
		if (PMC_IS_IMPL(i) == 0) continue;
		printk("pmc[%d]=0x%lx tpmc=0x%lx\n", i, ia64_get_pmc(i), t->pmcs[i]);
	}

	for (i=1; PMD_IS_LAST(i) == 0; i++) {
		if (PMD_IS_IMPL(i) == 0) continue;
		printk("pmd[%d]=0x%lx tpmd=0x%lx\n", i, ia64_get_pmd(i), t->pmds[i]);
	}
	if (ctx) {
David Mosberger's avatar
David Mosberger committed
6437
		printk("ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n",
6438 6439 6440 6441 6442
				ctx->ctx_state,
				ctx->ctx_smpl_vaddr,
				ctx->ctx_smpl_hdr,
				ctx->ctx_msgq_head,
				ctx->ctx_msgq_tail,
David Mosberger's avatar
David Mosberger committed
6443
				ctx->ctx_saved_psr_up);
6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471
	}
}

/*
 * called from process.c:copy_thread(). task is new child.
 */
void
pfm_inherit(struct task_struct *task, struct pt_regs *regs)
{
	struct thread_struct *thread;

	DPRINT(("perfmon: pfm_inherit clearing state for [%d] current [%d]\n", task->pid, current->pid));

	thread = &task->thread;

	/*
	 * cut links inherited from parent (current)
	 */
	thread->pfm_context = NULL;

	PFM_SET_WORK_PENDING(task, 0);

	/*
	 * restore default psr settings
	 */
	ia64_psr(regs)->pp = ia64_psr(regs)->up = 0;
	ia64_psr(regs)->sp = 1;
}
6472
#else  /* !CONFIG_PERFMON */
6473
asmlinkage long
6474 6475
sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
		long arg8, long stack)
Linus Torvalds's avatar
Linus Torvalds committed
6476 6477 6478
{
	return -ENOSYS;
}
6479
#endif /* CONFIG_PERFMON */