perfmon.c 165 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
 * This file implements the perfmon-2 subsystem which is used
David Mosberger's avatar
David Mosberger committed
3
 * to program the IA-64 Performance Monitoring Unit (PMU).
Linus Torvalds's avatar
Linus Torvalds committed
4
 *
5 6
 * The initial version of perfmon.c was written by
 * Ganesh Venkitachalam, IBM Corp.
David Mosberger's avatar
David Mosberger committed
7
 *
8 9 10 11 12
 * Then it was modified for perfmon-1.x by Stephane Eranian and 
 * David Mosberger, Hewlett Packard Co.
 * 
 * Version Perfmon-2.x is a rewrite of perfmon-1.x
 * by Stephane Eranian, Hewlett Packard Co. 
David Mosberger's avatar
David Mosberger committed
13
 *
14
 * Copyright (C) 1999-2003  Hewlett Packard Co
David Mosberger's avatar
David Mosberger committed
15 16
 *               Stephane Eranian <eranian@hpl.hp.com>
 *               David Mosberger-Tang <davidm@hpl.hp.com>
17 18 19
 *
 * More information about perfmon available at:
 * 	http://www.hpl.hp.com/research/linux/perfmon
Linus Torvalds's avatar
Linus Torvalds committed
20 21 22
 */

#include <linux/config.h>
David Mosberger's avatar
David Mosberger committed
23
#include <linux/module.h>
Linus Torvalds's avatar
Linus Torvalds committed
24 25 26 27 28
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/smp_lock.h>
#include <linux/proc_fs.h>
29
#include <linux/seq_file.h>
Linus Torvalds's avatar
Linus Torvalds committed
30 31 32
#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
33
#include <linux/sysctl.h>
34 35 36 37 38 39 40
#include <linux/list.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/vfs.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/version.h>
Linus Torvalds's avatar
Linus Torvalds committed
41

Linus Torvalds's avatar
Linus Torvalds committed
42
#include <asm/bitops.h>
Linus Torvalds's avatar
Linus Torvalds committed
43
#include <asm/errno.h>
44
#include <asm/intrinsics.h>
Linus Torvalds's avatar
Linus Torvalds committed
45 46
#include <asm/page.h>
#include <asm/perfmon.h>
Linus Torvalds's avatar
Linus Torvalds committed
47
#include <asm/processor.h>
Linus Torvalds's avatar
Linus Torvalds committed
48 49
#include <asm/signal.h>
#include <asm/system.h>
Linus Torvalds's avatar
Linus Torvalds committed
50
#include <asm/uaccess.h>
51
#include <asm/delay.h>
Linus Torvalds's avatar
Linus Torvalds committed
52 53

#ifdef CONFIG_PERFMON
Linus Torvalds's avatar
Linus Torvalds committed
54
/*
55
 * perfmon context state
Linus Torvalds's avatar
Linus Torvalds committed
56
 */
57 58 59 60
#define PFM_CTX_UNLOADED	1	/* context is not loaded onto any task */
#define PFM_CTX_LOADED		2	/* context is loaded onto a task */
#define PFM_CTX_MASKED		3	/* context is loaded but monitoring is masked due to overflow */
#define PFM_CTX_ZOMBIE		4	/* owner of the context is closing it */
Linus Torvalds's avatar
Linus Torvalds committed
61

62
#define PFM_INVALID_ACTIVATION	(~0UL)
Linus Torvalds's avatar
Linus Torvalds committed
63 64

/*
65
 * depth of message queue
Linus Torvalds's avatar
Linus Torvalds committed
66
 */
67 68
#define PFM_MAX_MSGS		32
#define PFM_CTXQ_EMPTY(g)	((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
69 70 71 72 73

/*
 * type of a PMU register (bitmask).
 * bitmask structure:
 * 	bit0   : register implemented
74
 * 	bit1   : end marker
75
 * 	bit2-3 : reserved
76 77 78
 * 	bit4   : pmc has pmc.pm
 * 	bit5   : pmc controls a counter (has pmc.oi), pmd is used as counter
 * 	bit6-7 : register type
79 80
 * 	bit8-31: reserved
 */
81
#define PFM_REG_NOTIMPL		0x0 /* not implemented at all */
82 83 84
#define PFM_REG_IMPL		0x1 /* register implemented */
#define PFM_REG_END		0x2 /* end marker */
#define PFM_REG_MONITOR		(0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
85
#define PFM_REG_COUNTING	(0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
86 87 88
#define PFM_REG_CONTROL		(0x4<<4|PFM_REG_IMPL) /* PMU control register */
#define	PFM_REG_CONFIG		(0x8<<4|PFM_REG_IMPL) /* configuration register */
#define PFM_REG_BUFFER	 	(0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
89

90 91
#define PMC_IS_LAST(i)	(pmu_conf->pmc_desc[i].type & PFM_REG_END)
#define PMD_IS_LAST(i)	(pmu_conf->pmd_desc[i].type & PFM_REG_END)
David Mosberger's avatar
David Mosberger committed
92

93
#define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
Linus Torvalds's avatar
Linus Torvalds committed
94

95
/* i assumed unsigned */
96 97
#define PMC_IS_IMPL(i)	  (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
#define PMD_IS_IMPL(i)	  (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
Linus Torvalds's avatar
Linus Torvalds committed
98

99
/* XXX: these assume that register i is implemented */
100 101 102 103
#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
#define PMC_IS_MONITOR(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR)  == PFM_REG_MONITOR)
#define PMC_IS_CONTROL(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL)  == PFM_REG_CONTROL)
David Mosberger's avatar
David Mosberger committed
104

105 106 107 108
#define PMC_DFL_VAL(i)     pmu_conf->pmc_desc[i].default_value
#define PMC_RSVD_MASK(i)   pmu_conf->pmc_desc[i].reserved_mask
#define PMD_PMD_DEP(i)	   pmu_conf->pmd_desc[i].dep_pmd[0]
#define PMC_PMD_DEP(i)	   pmu_conf->pmc_desc[i].dep_pmd[0]
109

110 111
#define PFM_NUM_IBRS	  IA64_NUM_DBG_REGS
#define PFM_NUM_DBRS	  IA64_NUM_DBG_REGS
David Mosberger's avatar
David Mosberger committed
112 113

#define CTX_OVFL_NOBLOCK(c)	((c)->ctx_fl_block == 0)
114 115 116
#define CTX_HAS_SMPL(c)		((c)->ctx_fl_is_sampling)
#define PFM_CTX_TASK(h)		(h)->ctx_task

117 118
#define PMU_PMC_OI		5 /* position of pmc.oi bit */

119 120 121 122
/* XXX: does not support more than 64 PMDs */
#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)

123
#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
David Mosberger's avatar
David Mosberger committed
124 125 126 127

#define CTX_USED_IBR(ctx,n) 	(ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
#define CTX_USED_DBR(ctx,n) 	(ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
#define CTX_USES_DBREGS(ctx)	(((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
128 129 130 131 132 133 134
#define PFM_CODE_RR	0	/* requesting code range restriction */
#define PFM_DATA_RR	1	/* requestion data range restriction */

#define PFM_CPUINFO_CLEAR(v)	pfm_get_cpu_var(pfm_syst_info) &= ~(v)
#define PFM_CPUINFO_SET(v)	pfm_get_cpu_var(pfm_syst_info) |= (v)
#define PFM_CPUINFO_GET()	pfm_get_cpu_var(pfm_syst_info)

135 136
#define RDEP(x)	(1UL<<(x))

137 138 139 140 141 142 143 144
/*
 * context protection macros
 * in SMP:
 * 	- we need to protect against CPU concurrency (spin_lock)
 * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
 * in UP:
 * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
 *
145
 * spin_lock_irqsave()/spin_lock_irqrestore():
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
 * 	in SMP: local_irq_disable + spin_lock
 * 	in UP : local_irq_disable
 *
 * spin_lock()/spin_lock():
 * 	in UP : removed automatically
 * 	in SMP: protect against context accesses from other CPU. interrupts
 * 	        are not masked. This is useful for the PMU interrupt handler
 * 	        because we know we will not get PMU concurrency in that code.
 */
#define PROTECT_CTX(c, f) \
	do {  \
		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
		spin_lock_irqsave(&(c)->ctx_lock, f); \
		DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
	} while(0)

#define UNPROTECT_CTX(c, f) \
	do { \
		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
	} while(0)

#define PROTECT_CTX_NOPRINT(c, f) \
	do {  \
		spin_lock_irqsave(&(c)->ctx_lock, f); \
	} while(0)


#define UNPROTECT_CTX_NOPRINT(c, f) \
	do { \
		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
	} while(0)


#define PROTECT_CTX_NOIRQ(c) \
	do {  \
		spin_lock(&(c)->ctx_lock); \
	} while(0)

#define UNPROTECT_CTX_NOIRQ(c) \
	do { \
		spin_unlock(&(c)->ctx_lock); \
	} while(0)


#ifdef CONFIG_SMP
David Mosberger's avatar
David Mosberger committed
192

193 194 195
#define GET_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)
#define INC_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)++
#define SET_ACTIVATION(c)	(c)->ctx_last_activation = GET_ACTIVATION()
David Mosberger's avatar
David Mosberger committed
196

197 198 199 200 201
#else /* !CONFIG_SMP */
#define SET_ACTIVATION(t) 	do {} while(0)
#define GET_ACTIVATION(t) 	do {} while(0)
#define INC_ACTIVATION(t) 	do {} while(0)
#endif /* CONFIG_SMP */
David Mosberger's avatar
David Mosberger committed
202

203 204 205 206
#define SET_PMU_OWNER(t, c)	do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
#define GET_PMU_OWNER()		pfm_get_cpu_var(pmu_owner)
#define GET_PMU_CTX()		pfm_get_cpu_var(pmu_ctx)

207 208
#define LOCK_PFS(g)	    	spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
#define UNLOCK_PFS(g)	    	spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
David Mosberger's avatar
David Mosberger committed
209 210

#define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
Linus Torvalds's avatar
Linus Torvalds committed
211

212 213 214 215 216
/*
 * cmp0 must be the value of pmc0
 */
#define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)

David Mosberger's avatar
David Mosberger committed
217 218
#define PFMFS_MAGIC 0xa0b4d889

Linus Torvalds's avatar
Linus Torvalds committed
219
/*
David Mosberger's avatar
David Mosberger committed
220
 * debugging
Linus Torvalds's avatar
Linus Torvalds committed
221
 */
222 223
#define PFM_DEBUGGING 1
#ifdef PFM_DEBUGGING
224
#define DPRINT(a) \
David Mosberger's avatar
David Mosberger committed
225
	do { \
226
		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
David Mosberger's avatar
David Mosberger committed
227
	} while (0)
Linus Torvalds's avatar
Linus Torvalds committed
228

229
#define DPRINT_ovfl(a) \
230
	do { \
231
		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
232
	} while (0)
233
#endif
Linus Torvalds's avatar
Linus Torvalds committed
234

David Mosberger's avatar
David Mosberger committed
235 236
/*
 * 64-bit software counter structure
David Mosberger's avatar
David Mosberger committed
237 238
 *
 * the next_reset_type is applied to the next call to pfm_reset_regs()
David Mosberger's avatar
David Mosberger committed
239
 */
Linus Torvalds's avatar
Linus Torvalds committed
240
typedef struct {
241 242 243 244 245 246 247 248 249 250
	unsigned long	val;		/* virtual 64bit counter value */
	unsigned long	lval;		/* last reset value */
	unsigned long	long_reset;	/* reset value on sampling overflow */
	unsigned long	short_reset;    /* reset value on overflow */
	unsigned long	reset_pmds[4];  /* which other pmds to reset when this counter overflows */
	unsigned long	smpl_pmds[4];   /* which pmds are accessed when counter overflow */
	unsigned long	seed;		/* seed for random-number generator */
	unsigned long	mask;		/* mask for random-number generator */
	unsigned int 	flags;		/* notify/do not notify */
	unsigned long	eventid;	/* overflow event identifier */
Linus Torvalds's avatar
Linus Torvalds committed
251 252 253
} pfm_counter_t;

/*
254
 * context flags
Linus Torvalds's avatar
Linus Torvalds committed
255 256
 */
typedef struct {
David Mosberger's avatar
David Mosberger committed
257 258 259
	unsigned int block:1;		/* when 1, task will blocked on user notifications */
	unsigned int system:1;		/* do system wide monitoring */
	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
260
	unsigned int is_sampling:1;	/* true if using a custom format */
261
	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
262 263 264
	unsigned int going_zombie:1;	/* context is zombie (MASKED+blocking) */
	unsigned int trap_reason:2;	/* reason for going into pfm_handle_work() */
	unsigned int no_msg:1;		/* no message sent on overflow */
265
	unsigned int can_restart:1;	/* allowed to issue a PFM_RESTART */
266
	unsigned int reserved:22;
Linus Torvalds's avatar
Linus Torvalds committed
267 268
} pfm_context_flags_t;

269
#define PFM_TRAP_REASON_NONE		0x0	/* default value */
270 271 272
#define PFM_TRAP_REASON_BLOCK		0x1	/* we need to block on overflow */
#define PFM_TRAP_REASON_RESET		0x2	/* we need to reset PMDs */

273

David Mosberger's avatar
David Mosberger committed
274 275 276
/*
 * perfmon context: encapsulates all the state of a monitoring session
 */
277

Linus Torvalds's avatar
Linus Torvalds committed
278
typedef struct pfm_context {
279
	spinlock_t		ctx_lock;		/* context protection */
Linus Torvalds's avatar
Linus Torvalds committed
280

281 282
	pfm_context_flags_t	ctx_flags;		/* bitmask of flags  (block reason incl.) */
	unsigned int		ctx_state;		/* state: active/inactive (no bitfield) */
Linus Torvalds's avatar
Linus Torvalds committed
283

284
	struct task_struct 	*ctx_task;		/* task to which context is attached */
Linus Torvalds's avatar
Linus Torvalds committed
285

David Mosberger's avatar
David Mosberger committed
286
	unsigned long		ctx_ovfl_regs[4];	/* which registers overflowed (notification) */
Linus Torvalds's avatar
Linus Torvalds committed
287

David Mosberger's avatar
David Mosberger committed
288
	struct semaphore	ctx_restart_sem;   	/* use for blocking notification mode */
Linus Torvalds's avatar
Linus Torvalds committed
289

290 291 292 293 294 295 296 297 298 299 300 301 302 303
	unsigned long		ctx_used_pmds[4];	/* bitmask of PMD used            */
	unsigned long		ctx_all_pmds[4];	/* bitmask of all accessible PMDs */
	unsigned long		ctx_reload_pmds[4];	/* bitmask of force reload PMD on ctxsw in */

	unsigned long		ctx_all_pmcs[4];	/* bitmask of all accessible PMCs */
	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */

	unsigned long		ctx_pmcs[IA64_NUM_PMC_REGS];	/*  saved copies of PMC values */

	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
304

305
	pfm_counter_t		ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
Linus Torvalds's avatar
Linus Torvalds committed
306

David Mosberger's avatar
David Mosberger committed
307
	u64			ctx_saved_psr_up;	/* only contains psr.up value */
Linus Torvalds's avatar
Linus Torvalds committed
308

309 310 311
	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
	unsigned int		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
Linus Torvalds's avatar
Linus Torvalds committed
312

313
	int			ctx_fd;			/* file descriptor used my this context */
314
	pfm_ovfl_arg_t		ctx_ovfl_arg;		/* argument to custom buffer format handler */
Linus Torvalds's avatar
Linus Torvalds committed
315

316 317 318 319 320 321 322 323 324 325 326 327
	pfm_buffer_fmt_t	*ctx_buf_fmt;		/* buffer format callbacks */
	void			*ctx_smpl_hdr;		/* points to sampling buffer header kernel vaddr */
	unsigned long		ctx_smpl_size;		/* size of sampling buffer */
	void			*ctx_smpl_vaddr;	/* user level virtual address of smpl buffer */

	wait_queue_head_t 	ctx_msgq_wait;
	pfm_msg_t		ctx_msgq[PFM_MAX_MSGS];
	int			ctx_msgq_head;
	int			ctx_msgq_tail;
	struct fasync_struct	*ctx_async_queue;

	wait_queue_head_t 	ctx_zombieq;		/* termination cleanup wait queue */
David Mosberger's avatar
David Mosberger committed
328
} pfm_context_t;
Linus Torvalds's avatar
Linus Torvalds committed
329

330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
/*
 * magic number used to verify that structure is really
 * a perfmon context
 */
#define PFM_IS_FILE(f)		((f)->f_op == &pfm_file_ops)

#define PFM_GET_CTX(t)	 	((pfm_context_t *)(t)->thread.pfm_context)

#ifdef CONFIG_SMP
#define SET_LAST_CPU(ctx, v)	(ctx)->ctx_last_cpu = (v)
#define GET_LAST_CPU(ctx)	(ctx)->ctx_last_cpu
#else
#define SET_LAST_CPU(ctx, v)	do {} while(0)
#define GET_LAST_CPU(ctx)	do {} while(0)
#endif


David Mosberger's avatar
David Mosberger committed
347 348 349
#define ctx_fl_block		ctx_flags.block
#define ctx_fl_system		ctx_flags.system
#define ctx_fl_using_dbreg	ctx_flags.using_dbreg
350
#define ctx_fl_is_sampling	ctx_flags.is_sampling
351
#define ctx_fl_excl_idle	ctx_flags.excl_idle
352 353 354
#define ctx_fl_going_zombie	ctx_flags.going_zombie
#define ctx_fl_trap_reason	ctx_flags.trap_reason
#define ctx_fl_no_msg		ctx_flags.no_msg
355
#define ctx_fl_can_restart	ctx_flags.can_restart
356 357 358

#define PFM_SET_WORK_PENDING(t, v)	do { (t)->thread.pfm_needs_checking = v; } while(0);
#define PFM_GET_WORK_PENDING(t)		(t)->thread.pfm_needs_checking
Linus Torvalds's avatar
Linus Torvalds committed
359

David Mosberger's avatar
David Mosberger committed
360 361 362 363 364
/*
 * global information about all sessions
 * mostly used to synchronize between system wide and per-process
 */
typedef struct {
365
	spinlock_t		pfs_lock;		   /* lock the structure */
Linus Torvalds's avatar
Linus Torvalds committed
366

367
	unsigned int		pfs_task_sessions;	   /* number of per task sessions */
368 369 370
	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
371
	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
David Mosberger's avatar
David Mosberger committed
372
} pfm_session_t;
Linus Torvalds's avatar
Linus Torvalds committed
373

374 375
/*
 * information about a PMC or PMD.
376
 * dep_pmd[]: a bitmask of dependent PMD registers
377 378
 * dep_pmc[]: a bitmask of dependent PMC registers
 */
379
typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
380
typedef struct {
381
	unsigned int		type;
382
	int			pm_pos;
383 384
	unsigned long		default_value;	/* power-on default value */
	unsigned long		reserved_mask;	/* bitmask of reserved bits */
385 386
	pfm_reg_check_t		read_check;
	pfm_reg_check_t		write_check;
387 388 389
	unsigned long		dep_pmd[4];
	unsigned long		dep_pmc[4];
} pfm_reg_desc_t;
390

391
/* assume cnum is a valid monitor */
392
#define PMC_PM(cnum, val)	(((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
393 394 395

/*
 * This structure is initialized at boot time and contains
396
 * a description of the PMU main characteristics.
397 398 399 400 401 402 403 404
 *
 * If the probe function is defined, detection is based
 * on its return value: 
 * 	- 0 means recognized PMU
 * 	- anything else means not supported
 * When the probe function is not defined, then the pmu_family field
 * is used and it must match the host CPU family such that:
 * 	- cpu->family & config->pmu_family != 0
405 406
 */
typedef struct {
407 408
	unsigned long  ovfl_val;	/* overflow value for counters */

409 410
	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
411 412 413 414 415 416 417 418

	unsigned int   num_pmcs;	/* number of PMCS: computed at init time */
	unsigned int   num_pmds;	/* number of PMDS: computed at init time */
	unsigned long  impl_pmcs[4];	/* bitmask of implemented PMCS */
	unsigned long  impl_pmds[4];	/* bitmask of implemented PMDS */

	char	      *pmu_name;	/* PMU family name */
	unsigned int  pmu_family;	/* cpuid family pattern used to identify pmu */
419
	unsigned int  flags;		/* pmu specific flags */
420 421 422
	unsigned int  num_ibrs;		/* number of IBRS: computed at init time */
	unsigned int  num_dbrs;		/* number of DBRS: computed at init time */
	unsigned int  num_counters;	/* PMC/PMD counting pairs : computed at init time */
423
	int           (*probe)(void);   /* customized probe routine */
424
	unsigned int  use_rr_dbregs:1;	/* set if debug registers used for range restriction */
425
} pmu_config_t;
426 427 428 429
/*
 * PMU specific flags
 */
#define PFM_PMU_IRQ_RESEND	1	/* PMU needs explicit IRQ resend */
430

David Mosberger's avatar
David Mosberger committed
431
/*
432
 * debug register related type definitions
David Mosberger's avatar
David Mosberger committed
433 434
 */
typedef struct {
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
	unsigned long ibr_mask:56;
	unsigned long ibr_plm:4;
	unsigned long ibr_ig:3;
	unsigned long ibr_x:1;
} ibr_mask_reg_t;

typedef struct {
	unsigned long dbr_mask:56;
	unsigned long dbr_plm:4;
	unsigned long dbr_ig:2;
	unsigned long dbr_w:1;
	unsigned long dbr_r:1;
} dbr_mask_reg_t;

typedef union {
	unsigned long  val;
	ibr_mask_reg_t ibr;
	dbr_mask_reg_t dbr;
} dbreg_t;

Linus Torvalds's avatar
Linus Torvalds committed
455 456

/*
David Mosberger's avatar
David Mosberger committed
457
 * perfmon command descriptions
Linus Torvalds's avatar
Linus Torvalds committed
458 459
 */
typedef struct {
460 461
	int		(*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
	char		*cmd_name;
David Mosberger's avatar
David Mosberger committed
462 463 464
	int		cmd_flags;
	unsigned int	cmd_narg;
	size_t		cmd_argsize;
465
	int		(*cmd_getsize)(void *arg, size_t *sz);
David Mosberger's avatar
David Mosberger committed
466
} pfm_cmd_desc_t;
Linus Torvalds's avatar
Linus Torvalds committed
467

468 469 470 471
#define PFM_CMD_FD		0x01	/* command requires a file descriptor */
#define PFM_CMD_ARG_READ	0x02	/* command must read argument(s) */
#define PFM_CMD_ARG_RW		0x04	/* command must read/write argument(s) */
#define PFM_CMD_STOP		0x08	/* command does not work on zombie context */
Linus Torvalds's avatar
Linus Torvalds committed
472 473


474 475 476 477 478
#define PFM_CMD_NAME(cmd)	pfm_cmd_tab[(cmd)].cmd_name
#define PFM_CMD_READ_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
#define PFM_CMD_RW_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
#define PFM_CMD_USE_FD(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
#define PFM_CMD_STOPPED(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
Linus Torvalds's avatar
Linus Torvalds committed
479

David Mosberger's avatar
David Mosberger committed
480 481
#define PFM_CMD_ARG_MANY	-1 /* cannot be zero */

482 483 484 485
typedef struct {
	int	debug;		/* turn on/off debugging via syslog */
	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
486
	int	expert_mode;	/* turn on/off value checking */
487
	int 	debug_pfm_read;
488 489 490
} pfm_sysctl_t;

typedef struct {
491
	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
492
	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
493 494 495 496 497 498
	unsigned long pfm_ovfl_intr_count; 		/* keep track of ovfl interrupts */
	unsigned long pfm_ovfl_intr_cycles;		/* cycles spent processing ovfl interrupts */
	unsigned long pfm_ovfl_intr_cycles_min;		/* min cycles spent processing ovfl interrupts */
	unsigned long pfm_ovfl_intr_cycles_max;		/* max cycles spent processing ovfl interrupts */
	unsigned long pfm_smpl_handler_calls;
	unsigned long pfm_smpl_handler_cycles;
499
	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
500
} pfm_stats_t;
David Mosberger's avatar
David Mosberger committed
501 502 503

/*
 * perfmon internal variables
Linus Torvalds's avatar
Linus Torvalds committed
504
 */
505 506
static pfm_stats_t		pfm_stats[NR_CPUS];
static pfm_session_t		pfm_sessions;	/* global sessions information */
507

508 509 510
static struct proc_dir_entry 	*perfmon_dir;
static pfm_uuid_t		pfm_null_uuid = {0,};

511 512
static spinlock_t		pfm_buffer_fmt_lock;
static LIST_HEAD(pfm_buffer_fmt_list);
513

514 515
static pmu_config_t		*pmu_conf;

516 517
/* sysctl() controls */
static pfm_sysctl_t pfm_sysctl;
518
int pfm_debug_var;
519 520 521 522 523

static ctl_table pfm_ctl_table[]={
	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
	{2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
	{3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
524
	{4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
525 526 527 528 529 530 531 532 533 534 535
	{ 0, },
};
static ctl_table pfm_sysctl_dir[] = {
	{1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
 	{0,},
};
static ctl_table pfm_sysctl_root[] = {
	{1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
 	{0,},
};
static struct ctl_table_header *pfm_sysctl_header;
536

537 538
static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
static int pfm_flush(struct file *filp);
Linus Torvalds's avatar
Linus Torvalds committed
539

540
#define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
#define pfm_get_cpu_data(a,b)		per_cpu(a, b)

static inline void
pfm_put_task(struct task_struct *task)
{
	if (task != current) put_task_struct(task);
}

static inline void
pfm_set_task_notify(struct task_struct *task)
{
	struct thread_info *info;

	info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
	set_bit(TIF_NOTIFY_RESUME, &info->flags);
}

static inline void
pfm_clear_task_notify(void)
{
	clear_thread_flag(TIF_NOTIFY_RESUME);
}

static inline void
pfm_reserve_page(unsigned long a)
{
	SetPageReserved(vmalloc_to_page((void *)a));
}
static inline void
pfm_unreserve_page(unsigned long a)
{
	ClearPageReserved(vmalloc_to_page((void*)a));
}

static inline int
pfm_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot)
{
	return remap_page_range(vma, from, phys_addr, size, prot);
}

static inline unsigned long
pfm_protect_ctx_ctxsw(pfm_context_t *x)
{
David Mosberger's avatar
David Mosberger committed
584
	spin_lock(&(x)->ctx_lock);
585 586 587 588 589 590 591 592 593
	return 0UL;
}

static inline unsigned long
pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
{
	spin_unlock(&(x)->ctx_lock);
}

David Mosberger's avatar
David Mosberger committed
594 595 596 597 598 599 600 601 602 603 604 605 606 607
static inline unsigned int
pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
{
	return do_munmap(mm, addr, len);
}

static inline unsigned long 
pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
{
	return get_unmapped_area(file, addr, len, pgoff, flags);
}


static struct super_block *
608
pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
David Mosberger's avatar
David Mosberger committed
609 610 611 612 613 614 615 616 617
{
	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
}

static struct file_system_type pfm_fs_type = {
	.name     = "pfmfs",
	.get_sb   = pfmfs_get_sb,
	.kill_sb  = kill_anon_super,
};
618

619 620 621 622
DEFINE_PER_CPU(unsigned long, pfm_syst_info);
DEFINE_PER_CPU(struct task_struct *, pmu_owner);
DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
DEFINE_PER_CPU(unsigned long, pmu_activation_number);
Linus Torvalds's avatar
Linus Torvalds committed
623

624

625 626
/* forward declaration */
static struct file_operations pfm_file_ops;
Linus Torvalds's avatar
Linus Torvalds committed
627

Linus Torvalds's avatar
Linus Torvalds committed
628
/*
David Mosberger's avatar
David Mosberger committed
629
 * forward declarations
Linus Torvalds's avatar
Linus Torvalds committed
630
 */
631
#ifndef CONFIG_SMP
David Mosberger's avatar
David Mosberger committed
632
static void pfm_lazy_save_regs (struct task_struct *ta);
633
#endif
David Mosberger's avatar
David Mosberger committed
634

635
void dump_pmu_state(const char *);
636
static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
637

638 639 640
#include "perfmon_itanium.h"
#include "perfmon_mckinley.h"
#include "perfmon_generic.h"
641 642 643 644 645 646 647 648

static pmu_config_t *pmu_confs[]={
	&pmu_conf_mck,
	&pmu_conf_ita,
	&pmu_conf_gen, /* must be last */
	NULL
};

649

650 651
static int pfm_end_notify_user(pfm_context_t *ctx);

652 653 654
static inline void
pfm_clear_psr_pp(void)
{
655
	ia64_rsm(IA64_PSR_PP);
656
	ia64_srlz_i();
657 658 659 660 661
}

static inline void
pfm_set_psr_pp(void)
{
662
	ia64_ssm(IA64_PSR_PP);
663
	ia64_srlz_i();
664 665 666 667 668
}

static inline void
pfm_clear_psr_up(void)
{
669
	ia64_rsm(IA64_PSR_UP);
670
	ia64_srlz_i();
671 672 673 674 675
}

static inline void
pfm_set_psr_up(void)
{
676
	ia64_ssm(IA64_PSR_UP);
677
	ia64_srlz_i();
678 679 680 681 682 683
}

static inline unsigned long
pfm_get_psr(void)
{
	unsigned long tmp;
684 685
	tmp = ia64_getreg(_IA64_REG_PSR);
	ia64_srlz_i();
686 687 688 689 690 691
	return tmp;
}

static inline void
pfm_set_psr_l(unsigned long val)
{
692 693
	ia64_setreg(_IA64_REG_PSR_L, val);
	ia64_srlz_i();
694 695
}

696 697 698 699 700 701 702 703 704 705 706 707 708
static inline void
pfm_freeze_pmu(void)
{
	ia64_set_pmc(0,1UL);
	ia64_srlz_d();
}

static inline void
pfm_unfreeze_pmu(void)
{
	ia64_set_pmc(0,0UL);
	ia64_srlz_d();
}
709

710 711 712 713 714 715 716
static inline void
pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
{
	int i;

	for (i=0; i < nibrs; i++) {
		ia64_set_ibr(i, ibrs[i]);
717
		ia64_dv_serialize_instruction();
718 719 720 721 722 723 724 725 726 727 728
	}
	ia64_srlz_i();
}

static inline void
pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
{
	int i;

	for (i=0; i < ndbrs; i++) {
		ia64_set_dbr(i, dbrs[i]);
729
		ia64_dv_serialize_data();
730 731 732 733
	}
	ia64_srlz_d();
}

734 735 736
/*
 * PMD[i] must be a counter. no check is made
 */
David Mosberger's avatar
David Mosberger committed
737 738 739
static inline unsigned long
pfm_read_soft_counter(pfm_context_t *ctx, int i)
{
740
	return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
David Mosberger's avatar
David Mosberger committed
741 742
}

743 744 745
/*
 * PMD[i] must be a counter. no check is made
 */
David Mosberger's avatar
David Mosberger committed
746 747 748
static inline void
pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
{
749
	unsigned long ovfl_val = pmu_conf->ovfl_val;
750 751

	ctx->ctx_pmds[i].val = val  & ~ovfl_val;
David Mosberger's avatar
David Mosberger committed
752 753 754 755
	/*
	 * writing to unimplemented part is ignore, so we do not need to
	 * mask off top part
	 */
756
	ia64_set_pmd(i, val & ovfl_val);
Linus Torvalds's avatar
Linus Torvalds committed
757 758
}

759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
static pfm_msg_t *
pfm_get_new_msg(pfm_context_t *ctx)
{
	int idx, next;

	next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;

	DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
	if (next == ctx->ctx_msgq_head) return NULL;

 	idx = 	ctx->ctx_msgq_tail;
	ctx->ctx_msgq_tail = next;

	DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));

	return ctx->ctx_msgq+idx;
}

static pfm_msg_t *
pfm_get_next_msg(pfm_context_t *ctx)
Linus Torvalds's avatar
Linus Torvalds committed
779
{
780 781 782 783 784 785 786 787 788 789 790
	pfm_msg_t *msg;

	DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));

	if (PFM_CTXQ_EMPTY(ctx)) return NULL;

	/*
	 * get oldest message
	 */
	msg = ctx->ctx_msgq+ctx->ctx_msgq_head;

Linus Torvalds's avatar
Linus Torvalds committed
791
	/*
792
	 * and move forward
Linus Torvalds's avatar
Linus Torvalds committed
793
	 */
794 795 796 797 798 799 800 801 802 803 804 805
	ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;

	DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));

	return msg;
}

static void
pfm_reset_msgq(pfm_context_t *ctx)
{
	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
	DPRINT(("ctx=%p msgq reset\n", ctx));
Linus Torvalds's avatar
Linus Torvalds committed
806 807
}

808

Linus Torvalds's avatar
Linus Torvalds committed
809
/* Here we want the physical address of the memory.
810 811
 * This is used when initializing the contents of the
 * area and marking the pages as reserved.
Linus Torvalds's avatar
Linus Torvalds committed
812
 */
Linus Torvalds's avatar
Linus Torvalds committed
813
static inline unsigned long
David Mosberger's avatar
David Mosberger committed
814
pfm_kvirt_to_pa(unsigned long adr)
Linus Torvalds's avatar
Linus Torvalds committed
815
{
Linus Torvalds's avatar
Linus Torvalds committed
816
	__u64 pa = ia64_tpa(adr);
Linus Torvalds's avatar
Linus Torvalds committed
817 818 819 820
	return pa;
}

static void *
David Mosberger's avatar
David Mosberger committed
821
pfm_rvmalloc(unsigned long size)
Linus Torvalds's avatar
Linus Torvalds committed
822 823
{
	void *mem;
824
	unsigned long addr;
Linus Torvalds's avatar
Linus Torvalds committed
825

826 827
	size = PAGE_ALIGN(size);
	mem  = vmalloc(size);
Linus Torvalds's avatar
Linus Torvalds committed
828
	if (mem) {
David Mosberger's avatar
David Mosberger committed
829
		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
830 831
		memset(mem, 0, size);
		addr = (unsigned long)mem;
Linus Torvalds's avatar
Linus Torvalds committed
832
		while (size > 0) {
833 834
			pfm_reserve_page(addr);
			addr+=PAGE_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
835 836 837 838 839 840 841
			size-=PAGE_SIZE;
		}
	}
	return mem;
}

static void
David Mosberger's avatar
David Mosberger committed
842
pfm_rvfree(void *mem, unsigned long size)
Linus Torvalds's avatar
Linus Torvalds committed
843
{
844
	unsigned long addr;
Linus Torvalds's avatar
Linus Torvalds committed
845 846

	if (mem) {
847 848
		DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
		addr = (unsigned long) mem;
849
		while ((long) size > 0) {
850 851
			pfm_unreserve_page(addr);
			addr+=PAGE_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
852 853 854 855
			size-=PAGE_SIZE;
		}
		vfree(mem);
	}
David Mosberger's avatar
David Mosberger committed
856 857 858
	return;
}

859 860
static pfm_context_t *
pfm_context_alloc(void)
David Mosberger's avatar
David Mosberger committed
861
{
862
	pfm_context_t *ctx;
David Mosberger's avatar
David Mosberger committed
863

864 865 866 867
	/* 
	 * allocate context descriptor 
	 * must be able to free with interrupts disabled
	 */
868 869 870 871
	ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
	if (ctx) {
		memset(ctx, 0, sizeof(pfm_context_t));
		DPRINT(("alloc ctx @%p\n", ctx));
David Mosberger's avatar
David Mosberger committed
872
	}
873 874
	return ctx;
}
David Mosberger's avatar
David Mosberger committed
875

876 877 878 879 880 881
static void
pfm_context_free(pfm_context_t *ctx)
{
	if (ctx) {
		DPRINT(("free ctx @%p\n", ctx));
		kfree(ctx);
David Mosberger's avatar
David Mosberger committed
882 883 884
	}
}

885 886
static void
pfm_mask_monitoring(struct task_struct *task)
David Mosberger's avatar
David Mosberger committed
887
{
888 889
	pfm_context_t *ctx = PFM_GET_CTX(task);
	struct thread_struct *th = &task->thread;
890
	unsigned long mask, val, ovfl_mask;
891
	int i;
David Mosberger's avatar
David Mosberger committed
892

893
	DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
894

895
	ovfl_mask = pmu_conf->ovfl_val;
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
	/*
	 * monitoring can only be masked as a result of a valid
	 * counter overflow. In UP, it means that the PMU still
	 * has an owner. Note that the owner can be different
	 * from the current task. However the PMU state belongs
	 * to the owner.
	 * In SMP, a valid overflow only happens when task is
	 * current. Therefore if we come here, we know that
	 * the PMU state belongs to the current task, therefore
	 * we can access the live registers.
	 *
	 * So in both cases, the live register contains the owner's
	 * state. We can ONLY touch the PMU registers and NOT the PSR.
	 *
	 * As a consequence to this call, the thread->pmds[] array
	 * contains stale information which must be ignored
	 * when context is reloaded AND monitoring is active (see
	 * pfm_restart).
	 */
	mask = ctx->ctx_used_pmds[0];
	for (i = 0; mask; i++, mask>>=1) {
		/* skip non used pmds */
		if ((mask & 0x1) == 0) continue;
		val = ia64_get_pmd(i);

		if (PMD_IS_COUNTING(i)) {
			/*
		 	 * we rebuild the full 64 bit value of the counter
		 	 */
925
			ctx->ctx_pmds[i].val += (val & ovfl_mask);
926 927 928
		} else {
			ctx->ctx_pmds[i].val = val;
		}
929
		DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
930 931
			i,
			ctx->ctx_pmds[i].val,
932
			val & ovfl_mask));
933 934 935 936 937 938 939 940 941 942 943 944 945 946
	}
	/*
	 * mask monitoring by setting the privilege level to 0
	 * we cannot use psr.pp/psr.up for this, it is controlled by
	 * the user
	 *
	 * if task is current, modify actual registers, otherwise modify
	 * thread save state, i.e., what will be restored in pfm_load_regs()
	 */
	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
		if ((mask & 0x1) == 0UL) continue;
		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
		th->pmcs[i] &= ~0xfUL;
947
		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
	}
	/*
	 * make all of this visible
	 */
	ia64_srlz_d();
}

/*
 * must always be done with task == current
 *
 * context must be in MASKED state when calling
 */
static void
pfm_restore_monitoring(struct task_struct *task)
{
	pfm_context_t *ctx = PFM_GET_CTX(task);
	struct thread_struct *th = &task->thread;
965
	unsigned long mask, ovfl_mask;
966
	unsigned long psr, val;
967 968 969
	int i, is_system;

	is_system = ctx->ctx_fl_system;
970
	ovfl_mask = pmu_conf->ovfl_val;
971 972 973 974 975

	if (task != current) {
		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
		return;
	}
976
	if (ctx->ctx_state != PFM_CTX_MASKED) {
977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
			task->pid, current->pid, ctx->ctx_state);
		return;
	}
	psr = pfm_get_psr();
	/*
	 * monitoring is masked via the PMC.
	 * As we restore their value, we do not want each counter to
	 * restart right away. We stop monitoring using the PSR,
	 * restore the PMC (and PMD) and then re-establish the psr
	 * as it was. Note that there can be no pending overflow at
	 * this point, because monitoring was MASKED.
	 *
	 * system-wide session are pinned and self-monitoring
	 */
992
	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
993
		/* disable dcr pp */
994
		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
		pfm_clear_psr_pp();
	} else {
		pfm_clear_psr_up();
	}
	/*
	 * first, we restore the PMD
	 */
	mask = ctx->ctx_used_pmds[0];
	for (i = 0; mask; i++, mask>>=1) {
		/* skip non used pmds */
		if ((mask & 0x1) == 0) continue;

		if (PMD_IS_COUNTING(i)) {
			/*
			 * we split the 64bit value according to
			 * counter width
			 */
1012 1013
			val = ctx->ctx_pmds[i].val & ovfl_mask;
			ctx->ctx_pmds[i].val &= ~ovfl_mask;
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
		} else {
			val = ctx->ctx_pmds[i].val;
		}
		ia64_set_pmd(i, val);

		DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
			i,
			ctx->ctx_pmds[i].val,
			val));
	}
	/*
	 * restore the PMCs
	 */
	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
		if ((mask & 0x1) == 0UL) continue;
		th->pmcs[i] = ctx->ctx_pmcs[i];
		ia64_set_pmc(i, th->pmcs[i]);
		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
	}
	ia64_srlz_d();

1036 1037 1038 1039 1040
	/*
	 * must restore DBR/IBR because could be modified while masked
	 * XXX: need to optimize 
	 */
	if (ctx->ctx_fl_using_dbreg) {
1041 1042
		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
1043 1044
	}

1045 1046 1047
	/*
	 * now restore PSR
	 */
1048
	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
1049
		/* enable dcr pp */
1050
		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
		ia64_srlz_i();
	}
	pfm_set_psr_l(psr);
}

static inline void
pfm_save_pmds(unsigned long *pmds, unsigned long mask)
{
	int i;

	ia64_srlz_d();

	for (i=0; mask; i++, mask>>=1) {
		if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
	}
}

/*
 * reload from thread state (used for ctxw only)
 */
static inline void
pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
{
	int i;
1075
	unsigned long val, ovfl_val = pmu_conf->ovfl_val;
1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091

	for (i=0; mask; i++, mask>>=1) {
		if ((mask & 0x1) == 0) continue;
		val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
		ia64_set_pmd(i, val);
	}
	ia64_srlz_d();
}

/*
 * propagate PMD from context to thread-state
 */
static inline void
pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
{
	struct thread_struct *thread = &task->thread;
1092
	unsigned long ovfl_val = pmu_conf->ovfl_val;
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
	unsigned long mask = ctx->ctx_all_pmds[0];
	unsigned long val;
	int i;

	DPRINT(("mask=0x%lx\n", mask));

	for (i=0; mask; i++, mask>>=1) {

		val = ctx->ctx_pmds[i].val;

		/*
		 * We break up the 64 bit value into 2 pieces
		 * the lower bits go to the machine state in the
		 * thread (will be reloaded on ctxsw in).
		 * The upper part stays in the soft-counter.
		 */
		if (PMD_IS_COUNTING(i)) {
			ctx->ctx_pmds[i].val = val & ~ovfl_val;
			 val &= ovfl_val;
		}
		thread->pmds[i] = val;

		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
			i,
			thread->pmds[i],
			ctx->ctx_pmds[i].val));
	}
}

/*
 * propagate PMC from context to thread-state
 */
static inline void
pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
{
	struct thread_struct *thread = &task->thread;
	unsigned long mask = ctx->ctx_all_pmcs[0];
	int i;

	DPRINT(("mask=0x%lx\n", mask));

	for (i=0; mask; i++, mask>>=1) {
		/* masking 0 with ovfl_val yields 0 */
		thread->pmcs[i] = ctx->ctx_pmcs[i];
		DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
	}
}



static inline void
pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
{
	int i;

	for (i=0; mask; i++, mask>>=1) {
		if ((mask & 0x1) == 0) continue;
		ia64_set_pmc(i, pmcs[i]);
	}
	ia64_srlz_d();
}

static inline int
pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
{
	return memcmp(a, b, sizeof(pfm_uuid_t));
}

static inline int
pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
{
	int ret = 0;
	if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
	return ret;
}

static inline int
pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
{
	int ret = 0;
	if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
	return ret;
}


static inline int
pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
		     int cpu, void *arg)
{
	int ret = 0;
	if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
	return ret;
}

static inline int
pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
		     int cpu, void *arg)
{
	int ret = 0;
	if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
	return ret;
}

static inline int
pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
{
	int ret = 0;
	if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
	return ret;
}

static inline int
pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
{
	int ret = 0;
	if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
	return ret;
}

1212 1213 1214 1215 1216
static pfm_buffer_fmt_t *
__pfm_find_buffer_fmt(pfm_uuid_t uuid)
{
	struct list_head * pos;
	pfm_buffer_fmt_t * entry;
1217

1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
	list_for_each(pos, &pfm_buffer_fmt_list) {
		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
		if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
			return entry;
	}
	return NULL;
}
 
/*
 * find a buffer format based on its uuid
 */
static pfm_buffer_fmt_t *
pfm_find_buffer_fmt(pfm_uuid_t uuid)
{
	pfm_buffer_fmt_t * fmt;
	spin_lock(&pfm_buffer_fmt_lock);
	fmt = __pfm_find_buffer_fmt(uuid);
	spin_unlock(&pfm_buffer_fmt_lock);
	return fmt;
}
 
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253
int
pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
{
	int ret = 0;

	/* some sanity checks */
	if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;

	/* we need at least a handler */
	if (fmt->fmt_handler == NULL) return -EINVAL;

	/*
	 * XXX: need check validity of fmt_arg_size
	 */

1254
	spin_lock(&pfm_buffer_fmt_lock);
1255

1256
	if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
1257 1258
		printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
		ret = -EBUSY;
1259 1260 1261 1262
		goto out;
	} 
	list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
	printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
1263

1264 1265 1266
out:
	spin_unlock(&pfm_buffer_fmt_lock);
 	return ret;
1267
}
David Mosberger's avatar
David Mosberger committed
1268
EXPORT_SYMBOL(pfm_register_buffer_fmt);
1269 1270 1271 1272

int
pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
{
1273
	pfm_buffer_fmt_t *fmt;
1274 1275
	int ret = 0;

1276
	spin_lock(&pfm_buffer_fmt_lock);
1277

1278 1279
	fmt = __pfm_find_buffer_fmt(uuid);
	if (!fmt) {
1280 1281
		printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
		ret = -EINVAL;
1282
		goto out;
1283
	}
1284 1285
	list_del_init(&fmt->fmt_list);
	printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
1286

1287 1288
out:
	spin_unlock(&pfm_buffer_fmt_lock);
1289 1290 1291
	return ret;

}
David Mosberger's avatar
David Mosberger committed
1292
EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
1293 1294 1295 1296

static int
pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
{
1297
	unsigned long flags;
1298 1299 1300
	/*
	 * validy checks on cpu_mask have been done upstream
	 */
1301
	LOCK_PFS(flags);
1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339

	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
		pfm_sessions.pfs_sys_sessions,
		pfm_sessions.pfs_task_sessions,
		pfm_sessions.pfs_sys_use_dbregs,
		is_syswide,
		cpu));

	if (is_syswide) {
		/*
		 * cannot mix system wide and per-task sessions
		 */
		if (pfm_sessions.pfs_task_sessions > 0UL) {
			DPRINT(("system wide not possible, %u conflicting task_sessions\n",
			  	pfm_sessions.pfs_task_sessions));
			goto abort;
		}

		if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;

		DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));

		pfm_sessions.pfs_sys_session[cpu] = task;

		pfm_sessions.pfs_sys_sessions++ ;

	} else {
		if (pfm_sessions.pfs_sys_sessions) goto abort;
		pfm_sessions.pfs_task_sessions++;
	}

	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
		pfm_sessions.pfs_sys_sessions,
		pfm_sessions.pfs_task_sessions,
		pfm_sessions.pfs_sys_use_dbregs,
		is_syswide,
		cpu));

1340
	UNLOCK_PFS(flags);
1341 1342 1343 1344 1345 1346 1347 1348

	return 0;

error_conflict:
	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
  		pfm_sessions.pfs_sys_session[cpu]->pid,
		smp_processor_id()));
abort:
1349
	UNLOCK_PFS(flags);
1350 1351 1352 1353 1354 1355 1356 1357

	return -EBUSY;

}

static int
pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
{
1358
	unsigned long flags;
1359 1360 1361
	/*
	 * validy checks on cpu_mask have been done upstream
	 */
1362
	LOCK_PFS(flags);
1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394

	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
		pfm_sessions.pfs_sys_sessions,
		pfm_sessions.pfs_task_sessions,
		pfm_sessions.pfs_sys_use_dbregs,
		is_syswide,
		cpu));


	if (is_syswide) {
		pfm_sessions.pfs_sys_session[cpu] = NULL;
		/*
		 * would not work with perfmon+more than one bit in cpu_mask
		 */
		if (ctx && ctx->ctx_fl_using_dbreg) {
			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
				printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
			} else {
				pfm_sessions.pfs_sys_use_dbregs--;
			}
		}
		pfm_sessions.pfs_sys_sessions--;
	} else {
		pfm_sessions.pfs_task_sessions--;
	}
	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
		pfm_sessions.pfs_sys_sessions,
		pfm_sessions.pfs_task_sessions,
		pfm_sessions.pfs_sys_use_dbregs,
		is_syswide,
		cpu));

1395
	UNLOCK_PFS(flags);
1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424

	return 0;
}

/*
 * removes virtual mapping of the sampling buffer.
 * IMPORTANT: cannot be called with interrupts disable, e.g. inside
 * a PROTECT_CTX() section.
 */
static int
pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
{
	int r;

	/* sanity checks */
	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
		return -EINVAL;
	}

	DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));

	/*
	 * does the actual unmapping
	 */
	down_write(&task->mm->mmap_sem);

	DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));

David Mosberger's avatar
David Mosberger committed
1425
	r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522

	up_write(&task->mm->mmap_sem);
	if (r !=0) {
		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
	}

	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));

	return 0;
}

/*
 * free actual physical storage used by sampling buffer
 */
#if 0
static int
pfm_free_smpl_buffer(pfm_context_t *ctx)
{
	pfm_buffer_fmt_t *fmt;

	if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;

	/*
	 * we won't use the buffer format anymore
	 */
	fmt = ctx->ctx_buf_fmt;

	DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
		ctx->ctx_smpl_hdr,
		ctx->ctx_smpl_size,
		ctx->ctx_smpl_vaddr));

	pfm_buf_fmt_exit(fmt, current, NULL, NULL);

	/*
	 * free the buffer
	 */
	pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);

	ctx->ctx_smpl_hdr  = NULL;
	ctx->ctx_smpl_size = 0UL;

	return 0;

invalid_free:
	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
	return -EINVAL;
}
#endif

static inline void
pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
{
	if (fmt == NULL) return;

	pfm_buf_fmt_exit(fmt, current, NULL, NULL);

}

/*
 * pfmfs should _never_ be mounted by userland - too much of security hassle,
 * no real gain from having the whole whorehouse mounted. So we don't need
 * any operations on the root directory. However, we need a non-trivial
 * d_name - pfm: will go nicely and kill the special-casing in procfs.
 */
static struct vfsmount *pfmfs_mnt;

static int __init
init_pfm_fs(void)
{
	int err = register_filesystem(&pfm_fs_type);
	if (!err) {
		pfmfs_mnt = kern_mount(&pfm_fs_type);
		err = PTR_ERR(pfmfs_mnt);
		if (IS_ERR(pfmfs_mnt))
			unregister_filesystem(&pfm_fs_type);
		else
			err = 0;
	}
	return err;
}

static void __exit
exit_pfm_fs(void)
{
	unregister_filesystem(&pfm_fs_type);
	mntput(pfmfs_mnt);
}

static loff_t
pfm_lseek(struct file *file, loff_t offset, int whence)
{
	DPRINT(("pfm_lseek called\n"));
	return -ESPIPE;
}

static ssize_t
David Mosberger's avatar
David Mosberger committed
1523
pfm_read(struct file *filp, char *buf, size_t size, loff_t *ppos)
1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669
{
	pfm_context_t *ctx;
	pfm_msg_t *msg;
	ssize_t ret;
	unsigned long flags;
  	DECLARE_WAITQUEUE(wait, current);
	if (PFM_IS_FILE(filp) == 0) {
		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
		return -EINVAL;
	}

	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
		return -EINVAL;
	}

	/*
	 * check even when there is no message
	 */
	if (size < sizeof(pfm_msg_t)) {
		DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
		return -EINVAL;
	}
	/*
	 * seeks are not allowed on message queues
	 */
	if (ppos != &filp->f_pos) return -ESPIPE;

	PROTECT_CTX(ctx, flags);

  	/*
	 * put ourselves on the wait queue
	 */
  	add_wait_queue(&ctx->ctx_msgq_wait, &wait);


  	for(;;) {
		/*
		 * check wait queue
		 */

  		set_current_state(TASK_INTERRUPTIBLE);

		DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));

		ret = 0;
		if(PFM_CTXQ_EMPTY(ctx) == 0) break;

		UNPROTECT_CTX(ctx, flags);

		/*
		 * check non-blocking read
		 */
      		ret = -EAGAIN;
		if(filp->f_flags & O_NONBLOCK) break;

		/*
		 * check pending signals
		 */
		if(signal_pending(current)) {
			ret = -EINTR;
			break;
		}
      		/*
		 * no message, so wait
		 */
      		schedule();

		PROTECT_CTX(ctx, flags);
	}
	DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
  	set_current_state(TASK_RUNNING);
	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);

	if (ret < 0) goto abort;

	ret = -EINVAL;
	msg = pfm_get_next_msg(ctx);
	if (msg == NULL) {
		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
		goto abort_locked;
	}

	DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));

	ret = -EFAULT;
  	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);

abort_locked:
	UNPROTECT_CTX(ctx, flags);
abort:
	return ret;
}

static ssize_t
pfm_write(struct file *file, const char *ubuf,
			  size_t size, loff_t *ppos)
{
	DPRINT(("pfm_write called\n"));
	return -EINVAL;
}

static unsigned int
pfm_poll(struct file *filp, poll_table * wait)
{
	pfm_context_t *ctx;
	unsigned long flags;
	unsigned int mask = 0;

	if (PFM_IS_FILE(filp) == 0) {
		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
		return 0;
	}

	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
		return 0;
	}


	DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));

	poll_wait(filp, &ctx->ctx_msgq_wait, wait);

	PROTECT_CTX(ctx, flags);

	if (PFM_CTXQ_EMPTY(ctx) == 0)
		mask =  POLLIN | POLLRDNORM;

	UNPROTECT_CTX(ctx, flags);

	DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));

	return mask;
}

static int
pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
{
	DPRINT(("pfm_ioctl called\n"));
	return -EINVAL;
}

/*
1670
 * context is locked when coming here and interrupts are disabled
1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710
 */
static inline int
pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
{
	int ret;

	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);

	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
		current->pid,
		fd,
		on,
		ctx->ctx_async_queue, ret));

	return ret;
}

static int
pfm_fasync(int fd, struct file *filp, int on)
{
	pfm_context_t *ctx;
	unsigned long flags;
	int ret;

	if (PFM_IS_FILE(filp) == 0) {
		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
		return -EBADF;
	}

	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
		return -EBADF;
	}


	PROTECT_CTX(ctx, flags);

	ret = pfm_do_fasync(fd, filp, ctx, on);

1711
	DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732
		fd,
		on,
		ctx->ctx_async_queue, ret));

	UNPROTECT_CTX(ctx, flags);

	return ret;
}

#ifdef CONFIG_SMP
/*
 * this function is exclusively called from pfm_close().
 * The context is not protected at that time, nor are interrupts
 * on the remote CPU. That's necessary to avoid deadlocks.
 */
static void
pfm_syswide_force_stop(void *info)
{
	pfm_context_t   *ctx = (pfm_context_t *)info;
	struct pt_regs *regs = ia64_task_regs(current);
	struct task_struct *owner;
1733 1734
	unsigned long flags;
	int ret;
1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755

	if (ctx->ctx_cpu != smp_processor_id()) {
		printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d  but on CPU%d\n",
			ctx->ctx_cpu,
			smp_processor_id());
		return;
	}
	owner = GET_PMU_OWNER();
	if (owner != ctx->ctx_task) {
		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
			smp_processor_id(),
			owner->pid, ctx->ctx_task->pid);
		return;
	}
	if (GET_PMU_CTX() != ctx) {
		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
			smp_processor_id(),
			GET_PMU_CTX(), ctx);
		return;
	}

1756
	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));	
David Mosberger's avatar
David Mosberger committed
1757
	/*
1758 1759 1760
	 * the context is already protected in pfm_close(), we simply
	 * need to mask interrupts to avoid a PMU interrupt race on
	 * this CPU
David Mosberger's avatar
David Mosberger committed
1761
	 */
1762
	local_irq_save(flags);
1763

1764 1765 1766 1767
	ret = pfm_context_unload(ctx, NULL, 0, regs);
	if (ret) {
		DPRINT(("context_unload returned %d\n", ret));
	}
1768 1769

	/*
1770
	 * unmask interrupts, PMU interrupts are now spurious here
1771
	 */
1772
	local_irq_restore(flags);
1773 1774 1775 1776 1777 1778 1779
}

static void
pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
{
	int ret;

1780
	DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
1781
	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
1782
	DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
1783 1784 1785 1786
}
#endif /* CONFIG_SMP */

/*
1787 1788
 * called for each close(). Partially free resources.
 * When caller is self-monitoring, the context is unloaded.
1789 1790
 */
static int
1791
pfm_flush(struct file *filp)
1792 1793 1794 1795 1796 1797 1798
{
	pfm_context_t *ctx;
	struct task_struct *task;
	struct pt_regs *regs;
	unsigned long flags;
	unsigned long smpl_buf_size = 0UL;
	void *smpl_buf_vaddr = NULL;
1799
	int state, is_system;
1800 1801

	if (PFM_IS_FILE(filp) == 0) {
1802
		DPRINT(("bad magic for\n"));
1803 1804
		return -EBADF;
	}
David Mosberger's avatar
David Mosberger committed
1805

1806 1807
	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
1808
		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
1809 1810
		return -EBADF;
	}
1811

1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
	/*
	 * remove our file from the async queue, if we use this mode.
	 * This can be done without the context being protected. We come
	 * here when the context has become unreacheable by other tasks.
	 *
	 * We may still have active monitoring at this point and we may
	 * end up in pfm_overflow_handler(). However, fasync_helper()
	 * operates with interrupts disabled and it cleans up the
	 * queue. If the PMU handler is called prior to entering
	 * fasync_helper() then it will send a signal. If it is
	 * invoked after, it will find an empty queue and no
	 * signal will be sent. In both case, we are safe
	 */
	if (filp->f_flags & FASYNC) {
1826
		DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
1827 1828
		pfm_do_fasync (-1, filp, ctx, 0);
	}
1829 1830 1831

	PROTECT_CTX(ctx, flags);

1832 1833 1834 1835
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;

	task = PFM_CTX_TASK(ctx);
1836
	regs = ia64_task_regs(task);
1837

1838 1839
	DPRINT(("ctx_state=%d is_current=%d\n",
		state,
1840
		task == current ? 1 : 0));
1841

1842 1843 1844
	/*
	 * if state == UNLOADED, then task is NULL
	 */
1845 1846

	/*
1847
	 * we must stop and unload because we are losing access to the context.
1848 1849 1850 1851 1852 1853 1854 1855 1856 1857
	 */
	if (task == current) {
#ifdef CONFIG_SMP
		/*
		 * the task IS the owner but it migrated to another CPU: that's bad
		 * but we must handle this cleanly. Unfortunately, the kernel does
		 * not provide a mechanism to block migration (while the context is loaded).
		 *
		 * We need to release the resource on the ORIGINAL cpu.
		 */
1858
		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
1859

1860 1861 1862 1863 1864
			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
			/*
			 * keep context protected but unmask interrupt for IPI
			 */
			local_irq_restore(flags);
1865 1866 1867 1868

			pfm_syswide_cleanup_other_cpu(ctx);

			/*
1869
			 * restore interrupt masking
1870
			 */
1871
			local_irq_save(flags);
1872

1873 1874 1875
			/*
			 * context is unloaded at this point
			 */
1876 1877 1878 1879
		} else
#endif /* CONFIG_SMP */
		{

1880
			DPRINT(("forcing unload\n"));
1881 1882 1883 1884 1885 1886
			/*
		 	* stop and unload, returning with state UNLOADED
		 	* and session unreserved.
		 	*/
			pfm_context_unload(ctx, NULL, 0, regs);

1887
			DPRINT(("ctx_state=%d\n", ctx->ctx_state));
1888 1889 1890
		}
	}

1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981
	/*
	 * remove virtual mapping, if any, for the calling task.
	 * cannot reset ctx field until last user is calling close().
	 *
	 * ctx_smpl_vaddr must never be cleared because it is needed
	 * by every task with access to the context
	 *
	 * When called from do_exit(), the mm context is gone already, therefore
	 * mm is NULL, i.e., the VMA is already gone  and we do not have to
	 * do anything here
	 */
	if (ctx->ctx_smpl_vaddr && current->mm) {
		smpl_buf_vaddr = ctx->ctx_smpl_vaddr;
		smpl_buf_size  = ctx->ctx_smpl_size;
	}

	UNPROTECT_CTX(ctx, flags);

	/*
	 * if there was a mapping, then we systematically remove it
	 * at this point. Cannot be done inside critical section
	 * because some VM function reenables interrupts.
	 *
	 */
	if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);

	return 0;
}
/*
 * called either on explicit close() or from exit_files(). 
 * Only the LAST user of the file gets to this point, i.e., it is
 * called only ONCE.
 *
 * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 
 * (fput()),i.e, last task to access the file. Nobody else can access the 
 * file at this point.
 *
 * When called from exit_files(), the VMA has been freed because exit_mm()
 * is executed before exit_files().
 *
 * When called from exit_files(), the current task is not yet ZOMBIE but we
 * flush the PMU state to the context. 
 */
static int
pfm_close(struct inode *inode, struct file *filp)
{
	pfm_context_t *ctx;
	struct task_struct *task;
	struct pt_regs *regs;
  	DECLARE_WAITQUEUE(wait, current);
	unsigned long flags;
	unsigned long smpl_buf_size = 0UL;
	void *smpl_buf_addr = NULL;
	int free_possible = 1;
	int state, is_system;

	DPRINT(("pfm_close called private=%p\n", filp->private_data));

	if (PFM_IS_FILE(filp) == 0) {
		DPRINT(("bad magic\n"));
		return -EBADF;
	}
	
	ctx = (pfm_context_t *)filp->private_data;
	if (ctx == NULL) {
		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
		return -EBADF;
	}

	PROTECT_CTX(ctx, flags);

	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;

	task = PFM_CTX_TASK(ctx);
	regs = ia64_task_regs(task);

	DPRINT(("ctx_state=%d is_current=%d\n", 
		state,
		task == current ? 1 : 0));

	/*
	 * if task == current, then pfm_flush() unloaded the context
	 */
	if (state == PFM_CTX_UNLOADED) goto doit;

	/*
	 * context is loaded/masked and task != current, we need to
	 * either force an unload or go zombie
	 */

1982 1983 1984
	/*
	 * The task is currently blocked or will block after an overflow.
	 * we must force it to wakeup to get out of the
1985 1986 1987
	 * MASKED state and transition to the unloaded state by itself.
	 *
	 * This situation is only possible for per-task mode
1988
	 */
1989
	if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) {
1990 1991 1992 1993 1994 1995 1996

		/*
		 * set a "partial" zombie state to be checked
		 * upon return from down() in pfm_handle_work().
		 *
		 * We cannot use the ZOMBIE state, because it is checked
		 * by pfm_load_regs() which is called upon wakeup from down().
1997
		 * In such case, it would free the context and then we would
1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
		 * return to pfm_handle_work() which would access the
		 * stale context. Instead, we set a flag invisible to pfm_load_regs()
		 * but visible to pfm_handle_work().
		 *
		 * For some window of time, we have a zombie context with
		 * ctx_state = MASKED  and not ZOMBIE
		 */
		ctx->ctx_fl_going_zombie = 1;

		/*
		 * force task to wake up from MASKED state
		 */
		up(&ctx->ctx_restart_sem);

2012
		DPRINT(("waking up ctx_state=%d\n", state));
2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036

		/*
		 * put ourself to sleep waiting for the other
		 * task to report completion
		 *
		 * the context is protected by mutex, therefore there
		 * is no risk of being notified of completion before
		 * begin actually on the waitq.
		 */
  		set_current_state(TASK_INTERRUPTIBLE);
  		add_wait_queue(&ctx->ctx_zombieq, &wait);

		UNPROTECT_CTX(ctx, flags);

		/*
		 * XXX: check for signals :
		 * 	- ok of explicit close
		 * 	- not ok when coming from exit_files()
		 */
      		schedule();


		PROTECT_CTX(ctx, flags);

2037

2038 2039 2040 2041
		remove_wait_queue(&ctx->ctx_zombieq, &wait);
  		set_current_state(TASK_RUNNING);

		/*
2042
		 * context is unloaded at this point
2043
		 */
2044
		DPRINT(("after zombie wakeup ctx_state=%d for\n", state));
2045
	}
2046
	else if (task != current) {
2047 2048 2049 2050
#ifdef CONFIG_SMP
		/*
	 	 * switch context to zombie state
	 	 */
2051
		ctx->ctx_state = PFM_CTX_ZOMBIE;
2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063

		DPRINT(("zombie ctx for [%d]\n", task->pid));
		/*
		 * cannot free the context on the spot. deferred until
		 * the task notices the ZOMBIE state
		 */
		free_possible = 0;
#else
		pfm_context_unload(ctx, NULL, 0, regs);
#endif
	}

2064
doit:
2065 2066 2067
	/* reload state, may have changed during  opening of critical section */
	state = ctx->ctx_state;

2068 2069 2070 2071 2072 2073
	/*
	 * the context is still attached to a task (possibly current)
	 * we cannot destroy it right now
	 */

	/*
2074
	 * we must free the sampling buffer right here because
2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086
	 * we cannot rely on it being cleaned up later by the
	 * monitored task. It is not possible to free vmalloc'ed
	 * memory in pfm_load_regs(). Instead, we remove the buffer
	 * now. should there be subsequent PMU overflow originally
	 * meant for sampling, the will be converted to spurious
	 * and that's fine because the monitoring tools is gone anyway.
	 */
	if (ctx->ctx_smpl_hdr) {
		smpl_buf_addr = ctx->ctx_smpl_hdr;
		smpl_buf_size = ctx->ctx_smpl_size;
		/* no more sampling */
		ctx->ctx_smpl_hdr = NULL;
2087
		ctx->ctx_fl_is_sampling = 0;
2088 2089
	}

2090
	DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n",
2091
		state,
2092 2093 2094 2095 2096 2097 2098
		free_possible,
		smpl_buf_addr,
		smpl_buf_size));

	if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt);

	/*
2099
	 * UNLOADED that the session has already been unreserved.
2100
	 */
2101
	if (state == PFM_CTX_ZOMBIE) {
2102
		pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu);
David Mosberger's avatar
David Mosberger committed
2103 2104
	}

2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129
	/*
	 * disconnect file descriptor from context must be done
	 * before we unlock.
	 */
	filp->private_data = NULL;

	/*
	 * if we free on the spot, the context is now completely unreacheable
	 * from the callers side. The monitored task side is also cut, so we
	 * can freely cut.
	 *
	 * If we have a deferred free, only the caller side is disconnected.
	 */
	UNPROTECT_CTX(ctx, flags);

	/*
	 * All memory free operations (especially for vmalloc'ed memory)
	 * MUST be done with interrupts ENABLED.
	 */
	if (smpl_buf_addr)  pfm_rvfree(smpl_buf_addr, smpl_buf_size);

	/*
	 * return the memory used by the context
	 */
	if (free_possible) pfm_context_free(ctx);
David Mosberger's avatar
David Mosberger committed
2130 2131

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2132 2133
}

2134 2135
static int
pfm_no_open(struct inode *irrelevant, struct file *dontcare)
Linus Torvalds's avatar
Linus Torvalds committed
2136
{
2137 2138 2139
	DPRINT(("pfm_no_open called\n"));
	return -ENXIO;
}
Linus Torvalds's avatar
Linus Torvalds committed
2140

2141 2142


2143 2144 2145 2146 2147 2148 2149 2150
static struct file_operations pfm_file_ops = {
	.llseek   = pfm_lseek,
	.read     = pfm_read,
	.write    = pfm_write,
	.poll     = pfm_poll,
	.ioctl    = pfm_ioctl,
	.open     = pfm_no_open,	/* special open code to disallow open via /proc */
	.fasync   = pfm_fasync,
2151 2152
	.release  = pfm_close,
	.flush	  = pfm_flush
2153 2154 2155 2156 2157 2158 2159
};

static int
pfmfs_delete_dentry(struct dentry *dentry)
{
	return 1;
}
2160

2161
static struct dentry_operations pfmfs_dentry_operations = {
2162
	.d_delete = pfmfs_delete_dentry,
2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213
};


static int
pfm_alloc_fd(struct file **cfile)
{
	int fd, ret = 0;
	struct file *file = NULL;
	struct inode * inode;
	char name[32];
	struct qstr this;

	fd = get_unused_fd();
	if (fd < 0) return -ENFILE;

	ret = -ENFILE;

	file = get_empty_filp();
	if (!file) goto out;

	/*
	 * allocate a new inode
	 */
	inode = new_inode(pfmfs_mnt->mnt_sb);
	if (!inode) goto out;

	DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));

	inode->i_sb   = pfmfs_mnt->mnt_sb;
	inode->i_mode = S_IFCHR|S_IRUGO;
	inode->i_sock = 0;
	inode->i_uid  = current->fsuid;
	inode->i_gid  = current->fsgid;

	sprintf(name, "[%lu]", inode->i_ino);
	this.name = name;
	this.len  = strlen(name);
	this.hash = inode->i_ino;

	ret = -ENOMEM;

	/*
	 * allocate a new dcache entry
	 */
	file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
	if (!file->f_dentry) goto out;

	file->f_dentry->d_op = &pfmfs_dentry_operations;

	d_add(file->f_dentry, inode);
	file->f_vfsmnt = mntget(pfmfs_mnt);
2214
	file->f_mapping = inode->i_mapping;
2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235

	file->f_op    = &pfm_file_ops;
	file->f_mode  = FMODE_READ;
	file->f_flags = O_RDONLY;
	file->f_pos   = 0;

	/*
	 * may have to delay until context is attached?
	 */
	fd_install(fd, file);

	/*
	 * the file structure we will use
	 */
	*cfile = file;

	return fd;
out:
	if (file) put_filp(file);
	put_unused_fd(fd);
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
2236 2237 2238
}

static void
2239 2240 2241 2242 2243 2244
pfm_free_fd(int fd, struct file *file)
{
	if (file) put_filp(file);
	put_unused_fd(fd);
}

Linus Torvalds's avatar
Linus Torvalds committed
2245
static int
Linus Torvalds's avatar
Linus Torvalds committed
2246
pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
Linus Torvalds's avatar
Linus Torvalds committed
2247
{
Linus Torvalds's avatar
Linus Torvalds committed
2248
	unsigned long page;
Linus Torvalds's avatar
Linus Torvalds committed
2249

2250
	DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
David Mosberger's avatar
David Mosberger committed
2251

Linus Torvalds's avatar
Linus Torvalds committed
2252
	while (size > 0) {
David Mosberger's avatar
David Mosberger committed
2253
		page = pfm_kvirt_to_pa(buf);
Linus Torvalds's avatar
Linus Torvalds committed
2254

2255
		if (pfm_remap_page_range(vma, addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM;
2256

Linus Torvalds's avatar
Linus Torvalds committed
2257 2258 2259 2260 2261 2262
		addr  += PAGE_SIZE;
		buf   += PAGE_SIZE;
		size  -= PAGE_SIZE;
	}
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
2263

Linus Torvalds's avatar
Linus Torvalds committed
2264
/*
2265
 * allocate a sampling buffer and remaps it into the user address space of the task
Linus Torvalds's avatar
Linus Torvalds committed
2266 2267
 */
static int
2268
pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
Linus Torvalds's avatar
Linus Torvalds committed
2269
{
2270
	struct mm_struct *mm = task->mm;
David Mosberger's avatar
David Mosberger committed
2271
	struct vm_area_struct *vma = NULL;
2272
	unsigned long size;
Linus Torvalds's avatar
Linus Torvalds committed
2273
	void *smpl_buf;
Linus Torvalds's avatar
Linus Torvalds committed
2274 2275


Linus Torvalds's avatar
Linus Torvalds committed
2276
	/*
2277
	 * the fixed header + requested size and align to page boundary
Linus Torvalds's avatar
Linus Torvalds committed
2278
	 */
2279
	size = PAGE_ALIGN(rsize);
2280

2281
	DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size));
2282

Linus Torvalds's avatar
Linus Torvalds committed
2283 2284
	/*
	 * check requested size to avoid Denial-of-service attacks
2285
	 * XXX: may have to refine this test
David Mosberger's avatar
David Mosberger committed
2286 2287
	 * Check against address space limit.
	 *
2288
	 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
David Mosberger's avatar
David Mosberger committed
2289
	 * 	return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
2290
	 */
2291
	if (size > task->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN;
Linus Torvalds's avatar
Linus Torvalds committed
2292

Linus Torvalds's avatar
Linus Torvalds committed
2293
	/*
David Mosberger's avatar
David Mosberger committed
2294 2295 2296
	 * We do the easy to undo allocations first.
 	 *
	 * pfm_rvmalloc(), clears the buffer, so there is no leak
Linus Torvalds's avatar
Linus Torvalds committed
2297
	 */
David Mosberger's avatar
David Mosberger committed
2298 2299
	smpl_buf = pfm_rvmalloc(size);
	if (smpl_buf == NULL) {
2300
		DPRINT(("Can't allocate sampling buffer\n"));
David Mosberger's avatar
David Mosberger committed
2301 2302
		return -ENOMEM;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2303

2304
	DPRINT(("smpl_buf @%p\n", smpl_buf));
Linus Torvalds's avatar
Linus Torvalds committed
2305

Linus Torvalds's avatar
Linus Torvalds committed
2306 2307
	/* allocate vma */
	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
David Mosberger's avatar
David Mosberger committed
2308
	if (!vma) {
2309
		DPRINT(("Cannot allocate vma\n"));
2310
		goto error_kmem;
David Mosberger's avatar
David Mosberger committed
2311
	}
Andrew Morton's avatar
Andrew Morton committed
2312 2313
	memset(vma, 0, sizeof(*vma));

Linus Torvalds's avatar
Linus Torvalds committed
2314
	/*
David Mosberger's avatar
David Mosberger committed
2315
	 * partially initialize the vma for the sampling buffer
2316 2317
	 *
	 * The VM_DONTCOPY flag is very important as it ensures that the mapping
2318
	 * will never be inherited for any child process (via fork()) which is always
2319
	 * what we want.
Linus Torvalds's avatar
Linus Torvalds committed
2320
	 */
2321
	vma->vm_mm	     = mm;
2322
	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED;
David Mosberger's avatar
David Mosberger committed
2323 2324 2325 2326 2327 2328 2329
	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */

	/*
	 * Now we have everything we need and we can initialize
	 * and connect all the data structures
	 */

2330 2331
	ctx->ctx_smpl_hdr   = smpl_buf;
	ctx->ctx_smpl_size  = size; /* aligned size */
Linus Torvalds's avatar
Linus Torvalds committed
2332

David Mosberger's avatar
David Mosberger committed
2333 2334 2335 2336 2337 2338
	/*
	 * Let's do the difficult operations next.
	 *
	 * now we atomically find some area in the address space and
	 * remap the buffer in it.
	 */
2339
	down_write(&task->mm->mmap_sem);
Linus Torvalds's avatar
Linus Torvalds committed
2340

David Mosberger's avatar
David Mosberger committed
2341
	/* find some free area in address space, must have mmap sem held */
David Mosberger's avatar
David Mosberger committed
2342
	vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
David Mosberger's avatar
David Mosberger committed
2343
	if (vma->vm_start == 0UL) {
2344 2345
		DPRINT(("Cannot find unmapped area for size %ld\n", size));
		up_write(&task->mm->mmap_sem);
David Mosberger's avatar
David Mosberger committed
2346 2347 2348 2349
		goto error;
	}
	vma->vm_end = vma->vm_start + size;

2350
	DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
2351

2352
	/* can only be applied to current task, need to have the mm semaphore held when called */
David Mosberger's avatar
David Mosberger committed
2353
	if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
2354 2355
		DPRINT(("Can't remap buffer\n"));
		up_write(&task->mm->mmap_sem);
David Mosberger's avatar
David Mosberger committed
2356 2357
		goto error;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2358

Linus Torvalds's avatar
Linus Torvalds committed
2359
	/*
David Mosberger's avatar
David Mosberger committed
2360 2361
	 * now insert the vma in the vm list for the process, must be
	 * done with mmap lock held
Linus Torvalds's avatar
Linus Torvalds committed
2362 2363
	 */
	insert_vm_struct(mm, vma);
Linus Torvalds's avatar
Linus Torvalds committed
2364

Linus Torvalds's avatar
Linus Torvalds committed
2365
	mm->total_vm  += size >> PAGE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
2366

2367
	up_write(&task->mm->mmap_sem);
David Mosberger's avatar
David Mosberger committed
2368

Linus Torvalds's avatar
Linus Torvalds committed
2369
	/*
2370
	 * keep track of user level virtual address
Linus Torvalds's avatar
Linus Torvalds committed
2371
	 */
2372 2373
	ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
	*(unsigned long *)user_vaddr = vma->vm_start;
Linus Torvalds's avatar
Linus Torvalds committed
2374 2375 2376

	return 0;

David Mosberger's avatar
David Mosberger committed
2377
error:
2378 2379 2380
	kmem_cache_free(vm_area_cachep, vma);
error_kmem:
	pfm_rvfree(smpl_buf, size);
2381

2382
	return -ENOMEM;
2383 2384
}

David Mosberger's avatar
David Mosberger committed
2385 2386 2387 2388 2389 2390
/*
 * XXX: do something better here
 */
static int
pfm_bad_permissions(struct task_struct *task)
{
2391
	/* inspired by ptrace_attach() */
2392
	DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406
		current->uid,
		current->gid,
		task->euid,
		task->suid,
		task->uid,
		task->egid,
		task->sgid));

	return ((current->uid != task->euid)
	    || (current->uid != task->suid)
	    || (current->uid != task->uid)
	    || (current->gid != task->egid)
	    || (current->gid != task->sgid)
	    || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
David Mosberger's avatar
David Mosberger committed
2407 2408
}

Linus Torvalds's avatar
Linus Torvalds committed
2409
static int
2410
pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
Linus Torvalds's avatar
Linus Torvalds committed
2411
{
Linus Torvalds's avatar
Linus Torvalds committed
2412 2413
	int ctx_flags;

Linus Torvalds's avatar
Linus Torvalds committed
2414
	/* valid signal */
Linus Torvalds's avatar
Linus Torvalds committed
2415

David Mosberger's avatar
David Mosberger committed
2416
	ctx_flags = pfx->ctx_flags;
Linus Torvalds's avatar
Linus Torvalds committed
2417

Linus Torvalds's avatar
Linus Torvalds committed
2418
	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
2419

David Mosberger's avatar
David Mosberger committed
2420
		/*
2421
		 * cannot block in this mode
David Mosberger's avatar
David Mosberger committed
2422
		 */
2423 2424
		if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
			DPRINT(("cannot use blocking mode when in system wide monitoring\n"));
Linus Torvalds's avatar
Linus Torvalds committed
2425 2426
			return -EINVAL;
		}
David Mosberger's avatar
David Mosberger committed
2427
	} else {
2428
	}
Linus Torvalds's avatar
Linus Torvalds committed
2429
	/* probably more to add here */
Linus Torvalds's avatar
Linus Torvalds committed
2430

Linus Torvalds's avatar
Linus Torvalds committed
2431
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2432
}
Linus Torvalds's avatar
Linus Torvalds committed
2433

Linus Torvalds's avatar
Linus Torvalds committed
2434
static int
2435 2436
pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags,
		     unsigned int cpu, pfarg_context_t *arg)
Linus Torvalds's avatar
Linus Torvalds committed
2437
{
2438 2439
	pfm_buffer_fmt_t *fmt = NULL;
	unsigned long size = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
2440
	void *uaddr = NULL;
2441 2442 2443
	void *fmt_arg = NULL;
	int ret = 0;
#define PFM_CTXARG_BUF_ARG(a)	(pfm_buffer_fmt_t *)(a+1)
Linus Torvalds's avatar
Linus Torvalds committed
2444

2445
	/* invoke and lock buffer format, if found */
2446
	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
2447 2448 2449 2450
	if (fmt == NULL) {
		DPRINT(("[%d] cannot find buffer format\n", task->pid));
		return -EINVAL;
	}
David Mosberger's avatar
David Mosberger committed
2451 2452

	/*
2453
	 * buffer argument MUST be contiguous to pfarg_context_t
David Mosberger's avatar
David Mosberger committed
2454
	 */
2455
	if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg);
Linus Torvalds's avatar
Linus Torvalds committed
2456

2457
	ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
Linus Torvalds's avatar
Linus Torvalds committed
2458

2459
	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
Linus Torvalds's avatar
Linus Torvalds committed
2460

2461
	if (ret) goto error;
David Mosberger's avatar
David Mosberger committed
2462

2463 2464
	/* link buffer format and context */
	ctx->ctx_buf_fmt = fmt;
David Mosberger's avatar
David Mosberger committed
2465

2466 2467 2468 2469 2470
	/*
	 * check if buffer format wants to use perfmon buffer allocation/mapping service
	 */
	ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size);
	if (ret) goto error;
Linus Torvalds's avatar
Linus Torvalds committed
2471

2472 2473 2474 2475 2476 2477
	if (size) {
		/*
		 * buffer is always remapped into the caller's address space
		 */
		ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr);
		if (ret) goto error;
Linus Torvalds's avatar
Linus Torvalds committed
2478

2479 2480 2481 2482
		/* keep track of user address of buffer */
		arg->ctx_smpl_vaddr = uaddr;
	}
	ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);
David Mosberger's avatar
David Mosberger committed
2483

2484 2485 2486
error:
	return ret;
}
Linus Torvalds's avatar
Linus Torvalds committed
2487

2488 2489 2490 2491
static void
pfm_reset_pmu_state(pfm_context_t *ctx)
{
	int i;
Linus Torvalds's avatar
Linus Torvalds committed
2492

2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503
	/*
	 * install reset values for PMC.
	 */
	for (i=1; PMC_IS_LAST(i) == 0; i++) {
		if (PMC_IS_IMPL(i) == 0) continue;
		ctx->ctx_pmcs[i] = PMC_DFL_VAL(i);
		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i]));
	}
	/*
	 * PMD registers are set to 0UL when the context in memset()
	 */
Linus Torvalds's avatar
Linus Torvalds committed
2504

2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521
	/*
	 * On context switched restore, we must restore ALL pmc and ALL pmd even
	 * when they are not actively used by the task. In UP, the incoming process
	 * may otherwise pick up left over PMC, PMD state from the previous process.
	 * As opposed to PMD, stale PMC can cause harm to the incoming
	 * process because they may change what is being measured.
	 * Therefore, we must systematically reinstall the entire
	 * PMC state. In SMP, the same thing is possible on the
	 * same CPU but also on between 2 CPUs.
	 *
	 * The problem with PMD is information leaking especially
	 * to user level when psr.sp=0
	 *
	 * There is unfortunately no easy way to avoid this problem
	 * on either UP or SMP. This definitively slows down the
	 * pfm_load_regs() function.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
2522

2523 2524 2525 2526 2527
	 /*
	  * bitmask of all PMCs accessible to this context
	  *
	  * PMC0 is treated differently.
	  */
2528
	ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
Linus Torvalds's avatar
Linus Torvalds committed
2529

2530 2531 2532
	/*
	 * bitmask of all PMDs that are accesible to this context
	 */
2533
	ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
Linus Torvalds's avatar
Linus Torvalds committed
2534

2535
	DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0]));
David Mosberger's avatar
David Mosberger committed
2536

2537 2538 2539 2540 2541 2542
	/*
	 * useful in case of re-enable after disable
	 */
	ctx->ctx_used_ibrs[0] = 0UL;
	ctx->ctx_used_dbrs[0] = 0UL;
}
David Mosberger's avatar
David Mosberger committed
2543

2544 2545 2546 2547 2548
static int
pfm_ctx_getsize(void *arg, size_t *sz)
{
	pfarg_context_t *req = (pfarg_context_t *)arg;
	pfm_buffer_fmt_t *fmt;
David Mosberger's avatar
David Mosberger committed
2549

2550
	*sz = 0;
Linus Torvalds's avatar
Linus Torvalds committed
2551

2552
	if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2553

2554
	fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id);
2555 2556 2557 2558 2559 2560 2561
	if (fmt == NULL) {
		DPRINT(("cannot find buffer format\n"));
		return -EINVAL;
	}
	/* get just enough to copy in user parameters */
	*sz = fmt->fmt_arg_size;
	DPRINT(("arg_size=%lu\n", *sz));
David Mosberger's avatar
David Mosberger committed
2562

2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580
	return 0;
}



/*
 * cannot attach if :
 * 	- kernel task
 * 	- task not owned by caller
 * 	- task incompatible with context mode
 */
static int
pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
{
	/*
	 * no kernel task or task not owner by caller
	 */
	if (task->mm == NULL) {
2581
		DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
2582 2583 2584
		return -EPERM;
	}
	if (pfm_bad_permissions(task)) {
2585
		DPRINT(("no permission to attach to  [%d]\n", task->pid));
2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596
		return -EPERM;
	}
	/*
	 * cannot block in self-monitoring mode
	 */
	if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
		DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
		return -EINVAL;
	}

	if (task->state == TASK_ZOMBIE) {
2597
		DPRINT(("cannot attach to  zombie task [%d]\n", task->pid));
2598
		return -EBUSY;
Linus Torvalds's avatar
Linus Torvalds committed
2599
	}
Linus Torvalds's avatar
Linus Torvalds committed
2600

Linus Torvalds's avatar
Linus Torvalds committed
2601
	/*
2602
	 * always ok for self
Linus Torvalds's avatar
Linus Torvalds committed
2603
	 */
2604 2605 2606
	if (task == current) return 0;

	if (task->state != TASK_STOPPED) {
2607
		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
2608
		return -EBUSY;
Linus Torvalds's avatar
Linus Torvalds committed
2609
	}
2610 2611 2612
	/*
	 * make sure the task is off any CPU
	 */
2613
	wait_task_inactive(task);
2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629

	/* more to come... */

	return 0;
}

static int
pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
{
	struct task_struct *p = current;
	int ret;

	/* XXX: need to add more checks here */
	if (pid < 2) return -EPERM;

	if (pid != current->pid) {
Linus Torvalds's avatar
Linus Torvalds committed
2630

2631 2632 2633 2634 2635 2636 2637 2638
		read_lock(&tasklist_lock);

		p = find_task_by_pid(pid);

		/* make sure task cannot go away while we operate on it */
		if (p) get_task_struct(p);

		read_unlock(&tasklist_lock);
Linus Torvalds's avatar
Linus Torvalds committed
2639

2640 2641
		if (p == NULL) return -ESRCH;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2642

2643 2644 2645 2646 2647
	ret = pfm_task_incompatible(ctx, p);
	if (ret == 0) {
		*task = p;
	} else if (p != current) {
		pfm_put_task(p);
Linus Torvalds's avatar
Linus Torvalds committed
2648
	}
2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674
	return ret;
}



static int
pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	pfarg_context_t *req = (pfarg_context_t *)arg;
	struct file *filp;
	int ctx_flags;
	int ret;

	/* let's check the arguments first */
	ret = pfarg_is_sane(current, req);
	if (ret < 0) return ret;

	ctx_flags = req->ctx_flags;

	ret = -ENOMEM;

	ctx = pfm_context_alloc();
	if (!ctx) goto error;

	req->ctx_fd = ctx->ctx_fd = pfm_alloc_fd(&filp);
	if (req->ctx_fd < 0) goto error_file;
2675

David Mosberger's avatar
David Mosberger committed
2676
	/*
2677
	 * attach context to file
David Mosberger's avatar
David Mosberger committed
2678
	 */
2679
	filp->private_data = ctx;
David Mosberger's avatar
David Mosberger committed
2680

2681 2682 2683 2684 2685 2686 2687
	/*
	 * does the user want to sample?
	 */
	if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
		ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req);
		if (ret) goto buffer_error;
	}
David Mosberger's avatar
David Mosberger committed
2688

2689 2690 2691 2692
	/*
	 * init context protection lock
	 */
	spin_lock_init(&ctx->ctx_lock);
Linus Torvalds's avatar
Linus Torvalds committed
2693

2694 2695 2696
	/*
	 * context is unloaded
	 */
2697
	ctx->ctx_state = PFM_CTX_UNLOADED;
Linus Torvalds's avatar
Linus Torvalds committed
2698

2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709
	/*
	 * initialization of context's flags
	 */
	ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
	ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
	ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
	ctx->ctx_fl_no_msg      = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
	/*
	 * will move to set properties
	 * ctx->ctx_fl_excl_idle   = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
	 */
Linus Torvalds's avatar
Linus Torvalds committed
2710

2711 2712 2713 2714
	/*
	 * init restart semaphore to locked
	 */
	sema_init(&ctx->ctx_restart_sem, 0);
David Mosberger's avatar
David Mosberger committed
2715

2716 2717 2718 2719 2720
	/*
	 * activation is used in SMP only
	 */
	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
	SET_LAST_CPU(ctx, -1);
Linus Torvalds's avatar
Linus Torvalds committed
2721

Linus Torvalds's avatar
Linus Torvalds committed
2722
	/*
2723
	 * initialize notification message queue
Linus Torvalds's avatar
Linus Torvalds committed
2724
	 */
2725 2726 2727 2728
	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
	init_waitqueue_head(&ctx->ctx_msgq_wait);
	init_waitqueue_head(&ctx->ctx_zombieq);

2729
	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
2730 2731 2732 2733 2734 2735 2736 2737
		ctx,
		ctx_flags,
		ctx->ctx_fl_system,
		ctx->ctx_fl_block,
		ctx->ctx_fl_excl_idle,
		ctx->ctx_fl_no_msg,
		ctx->ctx_fd));

Linus Torvalds's avatar
Linus Torvalds committed
2738
	/*
2739
	 * initialize soft PMU state
Linus Torvalds's avatar
Linus Torvalds committed
2740
	 */
2741
	pfm_reset_pmu_state(ctx);
Linus Torvalds's avatar
Linus Torvalds committed
2742

Linus Torvalds's avatar
Linus Torvalds committed
2743 2744 2745
	return 0;

buffer_error:
2746 2747 2748 2749 2750 2751
	pfm_free_fd(ctx->ctx_fd, filp);

	if (ctx->ctx_buf_fmt) {
		pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
	}
error_file:
Linus Torvalds's avatar
Linus Torvalds committed
2752
	pfm_context_free(ctx);
2753

2754
error:
Linus Torvalds's avatar
Linus Torvalds committed
2755
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
2756 2757
}

2758
static inline unsigned long
2759
pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776
{
	unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
	unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
	extern unsigned long carta_random32 (unsigned long seed);

	if (reg->flags & PFM_REGFL_RANDOM) {
		new_seed = carta_random32(old_seed);
		val -= (old_seed & mask);	/* counter values are negative numbers! */
		if ((mask >> 32) != 0)
			/* construct a full 64-bit random value: */
			new_seed |= carta_random32(old_seed >> 32) << 32;
		reg->seed = new_seed;
	}
	reg->lval = val;
	return val;
}

2777
static void
2778
pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
2779 2780 2781 2782
{
	unsigned long mask = ovfl_regs[0];
	unsigned long reset_others = 0UL;
	unsigned long val;
2783
	int i;
2784 2785 2786 2787 2788 2789 2790

	/*
	 * now restore reset value on sampling overflowed counters
	 */
	mask >>= PMU_FIRST_COUNTER;
	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {

David Mosberger's avatar
David Mosberger committed
2791 2792 2793 2794 2795 2796
		if ((mask & 0x1UL) == 0UL) continue;

		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
		reset_others        |= ctx->ctx_pmds[i].reset_pmds[0];

		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812
	}

	/*
	 * Now take care of resetting the other registers
	 */
	for(i = 0; reset_others; i++, reset_others >>= 1) {

		if ((reset_others & 0x1) == 0) continue;

		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);

		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
			  is_long_reset ? "long" : "short", i, val));
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
2813
static void
2814
pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
Linus Torvalds's avatar
Linus Torvalds committed
2815
{
David Mosberger's avatar
David Mosberger committed
2816 2817 2818
	unsigned long mask = ovfl_regs[0];
	unsigned long reset_others = 0UL;
	unsigned long val;
2819
	int i;
2820

2821
	DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset));
2822

2823
	if (ctx->ctx_state == PFM_CTX_MASKED) {
2824
		pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset);
2825 2826 2827
		return;
	}

Linus Torvalds's avatar
Linus Torvalds committed
2828 2829 2830
	/*
	 * now restore reset value on sampling overflowed counters
	 */
David Mosberger's avatar
David Mosberger committed
2831 2832 2833
	mask >>= PMU_FIRST_COUNTER;
	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {

David Mosberger's avatar
David Mosberger committed
2834
		if ((mask & 0x1UL) == 0UL) continue;
Linus Torvalds's avatar
Linus Torvalds committed
2835

David Mosberger's avatar
David Mosberger committed
2836 2837 2838 2839 2840 2841
		val           = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
		reset_others |= ctx->ctx_pmds[i].reset_pmds[0];

		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));

		pfm_write_soft_counter(ctx, i, val);
David Mosberger's avatar
David Mosberger committed
2842
	}
Linus Torvalds's avatar
Linus Torvalds committed
2843

David Mosberger's avatar
David Mosberger committed
2844 2845 2846 2847 2848 2849 2850
	/*
	 * Now take care of resetting the other registers
	 */
	for(i = 0; reset_others; i++, reset_others >>= 1) {

		if ((reset_others & 0x1) == 0) continue;

2851
		val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
David Mosberger's avatar
David Mosberger committed
2852 2853 2854 2855 2856

		if (PMD_IS_COUNTING(i)) {
			pfm_write_soft_counter(ctx, i, val);
		} else {
			ia64_set_pmd(i, val);
Linus Torvalds's avatar
Linus Torvalds committed
2857
		}
2858
		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
2859
			  is_long_reset ? "long" : "short", i, val));
Linus Torvalds's avatar
Linus Torvalds committed
2860
	}
2861
	ia64_srlz_d();
Linus Torvalds's avatar
Linus Torvalds committed
2862 2863 2864
}

static int
2865
pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
2866
{
2867
	struct thread_struct *thread = NULL;
2868
	struct task_struct *task;
2869
	pfarg_reg_t *req = (pfarg_reg_t *)arg;
2870 2871 2872
	unsigned long value, pmc_pm;
	unsigned long smpl_pmds, reset_pmds, impl_pmds;
	unsigned int cnum, reg_flags, flags, pmc_type;
2873
	int i, can_access_pmu = 0, is_loaded, is_system, expert_mode;
2874
	int is_monitor, is_counting, state;
2875
	int ret = -EINVAL;
2876
	pfm_reg_check_t	wr_func;
2877
#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
David Mosberger's avatar
David Mosberger committed
2878

2879 2880 2881
	state     = ctx->ctx_state;
	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
	is_system = ctx->ctx_fl_system;
2882
	task      = ctx->ctx_task;
2883
	impl_pmds = pmu_conf->impl_pmds[0];
2884

2885
	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
David Mosberger's avatar
David Mosberger committed
2886

2887
	if (is_loaded) {
2888
		thread = &task->thread;
2889 2890 2891 2892 2893
		/*
		 * In system wide and when the context is loaded, access can only happen
		 * when the caller is running on the CPU being monitored by the session.
		 * It does not have to be the owner (ctx_task) of the context per se.
		 */
2894
		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
2895
			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
2896 2897
			return -EBUSY;
		}
2898
		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
2899
	}
2900
	expert_mode = pfm_sysctl.expert_mode; 
Linus Torvalds's avatar
Linus Torvalds committed
2901 2902 2903

	for (i = 0; i < count; i++, req++) {

2904 2905 2906 2907 2908
		cnum       = req->reg_num;
		reg_flags  = req->reg_flags;
		value      = req->reg_value;
		smpl_pmds  = req->reg_smpl_pmds[0];
		reset_pmds = req->reg_reset_pmds[0];
2909
		flags      = 0;
Linus Torvalds's avatar
Linus Torvalds committed
2910

2911 2912 2913 2914 2915 2916

		if (cnum >= PMU_MAX_PMCS) {
			DPRINT(("pmc%u is invalid\n", cnum));
			goto error;
		}

2917 2918
		pmc_type   = pmu_conf->pmc_desc[cnum].type;
		pmc_pm     = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1;
2919 2920
		is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0;
		is_monitor  = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0;
2921

2922
		/*
David Mosberger's avatar
David Mosberger committed
2923 2924 2925 2926
		 * we reject all non implemented PMC as well
		 * as attempts to modify PMC[0-3] which are used
		 * as status registers by the PMU
		 */
2927 2928
		if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) {
			DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type));
2929
			goto error;
Linus Torvalds's avatar
Linus Torvalds committed
2930
		}
2931
		wr_func = pmu_conf->pmc_desc[cnum].write_check;
David Mosberger's avatar
David Mosberger committed
2932
		/*
2933 2934 2935
		 * If the PMC is a monitor, then if the value is not the default:
		 * 	- system-wide session: PMCx.pm=1 (privileged monitor)
		 * 	- per-task           : PMCx.pm=0 (user monitor)
David Mosberger's avatar
David Mosberger committed
2936
		 */
2937 2938
		if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) {
			DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
2939
				cnum,
2940 2941
				pmc_pm,
				is_system));
2942
			goto error;
David Mosberger's avatar
David Mosberger committed
2943 2944
		}

2945
		if (is_counting) {
2946 2947 2948 2949
			/*
		 	 * enforce generation of overflow interrupt. Necessary on all
		 	 * CPUs.
		 	 */
2950
			value |= 1 << PMU_PMC_OI;
2951

2952 2953
			if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
				flags |= PFM_REGFL_OVFL_NOTIFY;
Linus Torvalds's avatar
Linus Torvalds committed
2954
			}
2955

2956 2957
			if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;

2958
			/* verify validity of smpl_pmds */
2959
			if ((smpl_pmds & impl_pmds) != smpl_pmds) {
2960 2961 2962 2963
				DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
				goto error;
			}

2964
			/* verify validity of reset_pmds */
2965
			if ((reset_pmds & impl_pmds) != reset_pmds) {
2966
				DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
2967 2968
				goto error;
			}
2969 2970 2971
		} else {
			if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
				DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum));
2972
				goto error;
2973 2974
			}
			/* eventid on non-counting monitors are ignored */
Linus Torvalds's avatar
Linus Torvalds committed
2975
		}
2976

2977 2978 2979
		/*
		 * execute write checker, if any
		 */
2980 2981
		if (likely(expert_mode == 0 && wr_func)) {
			ret = (*wr_func)(task, ctx, cnum, &value, regs);
2982 2983 2984
			if (ret) goto error;
			ret = -EINVAL;
		}
Linus Torvalds's avatar
Linus Torvalds committed
2985

2986 2987 2988
		/*
		 * no error on this register
		 */
2989
		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
Linus Torvalds's avatar
Linus Torvalds committed
2990

David Mosberger's avatar
David Mosberger committed
2991
		/*
2992
		 * Now we commit the changes to the software state
David Mosberger's avatar
David Mosberger committed
2993 2994
		 */

2995 2996
		/*
		 * update overflow information
David Mosberger's avatar
David Mosberger committed
2997
		 */
2998
		if (is_counting) {
2999 3000 3001 3002 3003 3004 3005 3006
			/*
		 	 * full flag update each time a register is programmed
		 	 */
			ctx->ctx_pmds[cnum].flags = flags;

			ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
			ctx->ctx_pmds[cnum].smpl_pmds[0]  = smpl_pmds;
			ctx->ctx_pmds[cnum].eventid       = req->reg_smpl_eventid;
3007

3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018
			/*
			 * Mark all PMDS to be accessed as used.
			 *
			 * We do not keep track of PMC because we have to
			 * systematically restore ALL of them.
			 *
			 * We do not update the used_monitors mask, because
			 * if we have not programmed them, then will be in
			 * a quiescent state, therefore we will not need to
			 * mask/restore then when context is MASKED.
			 */
3019
			CTX_USED_PMD(ctx, reset_pmds);
3020 3021 3022 3023 3024
			CTX_USED_PMD(ctx, smpl_pmds);
			/*
		 	 * make sure we do not try to reset on
		 	 * restart because we have established new values
		 	 */
3025
			if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
3026
		}
3027 3028
		/*
		 * Needed in case the user does not initialize the equivalent
3029 3030
		 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no
		 * possible leak here.
3031
		 */
3032
		CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]);
3033

3034 3035 3036 3037 3038 3039 3040 3041 3042
		/*
		 * keep track of the monitor PMC that we are using.
		 * we save the value of the pmc in ctx_pmcs[] and if
		 * the monitoring is not stopped for the context we also
		 * place it in the saved state area so that it will be
		 * picked up later by the context switch code.
		 *
		 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
		 *
3043
		 * The value in thread->pmcs[] may be modified on overflow, i.e.,  when
3044 3045 3046 3047 3048 3049
		 * monitoring needs to be stopped.
		 */
		if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);

		/*
		 * update context state
David Mosberger's avatar
David Mosberger committed
3050
		 */
3051
		ctx->ctx_pmcs[cnum] = value;
David Mosberger's avatar
David Mosberger committed
3052

3053 3054 3055 3056
		if (is_loaded) {
			/*
			 * write thread state
			 */
3057
			if (is_system == 0) thread->pmcs[cnum] = value;
David Mosberger's avatar
David Mosberger committed
3058

3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077
			/*
			 * write hardware register if we can
			 */
			if (can_access_pmu) {
				ia64_set_pmc(cnum, value);
			}
#ifdef CONFIG_SMP
			else {
				/*
				 * per-task SMP only here
				 *
			 	 * we are guaranteed that the task is not running on the other CPU,
			 	 * we indicate that this PMD will need to be reloaded if the task
			 	 * is rescheduled on the CPU it ran last on.
			 	 */
				ctx->ctx_reload_pmcs[0] |= 1UL << cnum;
			}
#endif
		}
David Mosberger's avatar
David Mosberger committed
3078

3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091
		DPRINT(("pmc[%u]=0x%lx loaded=%d access_pmu=%d all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n",
			  cnum,
			  value,
			  is_loaded,
			  can_access_pmu,
			  ctx->ctx_all_pmcs[0],
			  ctx->ctx_used_pmds[0],
			  ctx->ctx_pmds[cnum].eventid,
			  smpl_pmds,
			  reset_pmds,
			  ctx->ctx_reload_pmcs[0],
			  ctx->ctx_used_monitors[0],
			  ctx->ctx_ovfl_regs[0]));
David Mosberger's avatar
David Mosberger committed
3092
	}
3093

3094 3095 3096 3097
	/*
	 * make sure the changes are visible
	 */
	if (can_access_pmu) ia64_srlz_d();
3098

3099
	return 0;
3100
error:
3101
	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
David Mosberger's avatar
David Mosberger committed
3102
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
3103 3104 3105
}

static int
3106
pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
3107
{
3108
	struct thread_struct *thread = NULL;
3109
	struct task_struct *task;
3110
	pfarg_reg_t *req = (pfarg_reg_t *)arg;
3111
	unsigned long value, hw_value, ovfl_mask;
David Mosberger's avatar
David Mosberger committed
3112
	unsigned int cnum;
3113
	int i, can_access_pmu = 0, state;
3114
	int is_counting, is_loaded, is_system, expert_mode;
3115
	int ret = -EINVAL;
3116
	pfm_reg_check_t wr_func;
David Mosberger's avatar
David Mosberger committed
3117 3118


3119 3120 3121
	state     = ctx->ctx_state;
	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
	is_system = ctx->ctx_fl_system;
3122
	ovfl_mask = pmu_conf->ovfl_val;
3123
	task      = ctx->ctx_task;
3124

3125
	if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
3126

3127 3128 3129 3130
	/*
	 * on both UP and SMP, we can only write to the PMC when the task is
	 * the owner of the local PMU.
	 */
3131 3132
	if (likely(is_loaded)) {
		thread = &task->thread;
3133 3134 3135 3136 3137
		/*
		 * In system wide and when the context is loaded, access can only happen
		 * when the caller is running on the CPU being monitored by the session.
		 * It does not have to be the owner (ctx_task) of the context per se.
		 */
3138
		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
3139
			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3140 3141
			return -EBUSY;
		}
3142
		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
3143
	}
3144
	expert_mode = pfm_sysctl.expert_mode; 
3145

Linus Torvalds's avatar
Linus Torvalds committed
3146 3147
	for (i = 0; i < count; i++, req++) {

3148 3149
		cnum  = req->reg_num;
		value = req->reg_value;
3150

David Mosberger's avatar
David Mosberger committed
3151
		if (!PMD_IS_IMPL(cnum)) {
3152
			DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
David Mosberger's avatar
David Mosberger committed
3153 3154
			goto abort_mission;
		}
3155
		is_counting = PMD_IS_COUNTING(cnum);
3156
		wr_func     = pmu_conf->pmd_desc[cnum].write_check;
Linus Torvalds's avatar
Linus Torvalds committed
3157

3158 3159 3160
		/*
		 * execute write checker, if any
		 */
3161
		if (unlikely(expert_mode == 0 && wr_func)) {
3162
			unsigned long v = value;
3163

3164
			ret = (*wr_func)(task, ctx, cnum, &v, regs);
3165
			if (ret) goto abort_mission;
3166

3167
			value = v;
3168
			ret   = -EINVAL;
3169
		}
3170

3171 3172 3173
		/*
		 * no error on this register
		 */
3174
		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
David Mosberger's avatar
David Mosberger committed
3175 3176

		/*
3177
		 * now commit changes to software state
David Mosberger's avatar
David Mosberger committed
3178
		 */
3179
		hw_value = value;
3180

3181 3182 3183 3184 3185 3186 3187 3188
		/*
		 * update virtualized (64bits) counter
		 */
		if (is_counting) {
			/*
			 * write context state
			 */
			ctx->ctx_pmds[cnum].lval = value;
3189

3190 3191 3192 3193
			/*
			 * when context is load we use the split value
			 */
			if (is_loaded) {
3194 3195
				hw_value = value &  ovfl_mask;
				value    = value & ~ovfl_mask;
3196
			}
Linus Torvalds's avatar
Linus Torvalds committed
3197
		}
3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208
		/*
		 * update reset values (not just for counters)
		 */
		ctx->ctx_pmds[cnum].long_reset  = req->reg_long_reset;
		ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;

		/*
		 * update randomization parameters (not just for counters)
		 */
		ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
		ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
David Mosberger's avatar
David Mosberger committed
3209

3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221
		/*
		 * update context value
		 */
		ctx->ctx_pmds[cnum].val  = value;

		/*
		 * Keep track of what we use
		 *
		 * We do not keep track of PMC because we have to
		 * systematically restore ALL of them.
		 */
		CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum));
3222

3223 3224 3225
		/*
		 * mark this PMD register used as well
		 */
David Mosberger's avatar
David Mosberger committed
3226
		CTX_USED_PMD(ctx, RDEP(cnum));
Linus Torvalds's avatar
Linus Torvalds committed
3227

3228 3229 3230 3231
		/*
		 * make sure we do not try to reset on
		 * restart because we have established new values
		 */
3232
		if (is_counting && state == PFM_CTX_MASKED) {
3233 3234 3235 3236 3237 3238 3239
			ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
		}

		if (is_loaded) {
			/*
		 	 * write thread state
		 	 */
3240
			if (is_system == 0) thread->pmds[cnum] = hw_value;
Linus Torvalds's avatar
Linus Torvalds committed
3241

3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280
			/*
			 * write hardware register if we can
			 */
			if (can_access_pmu) {
				ia64_set_pmd(cnum, hw_value);
			} else {
#ifdef CONFIG_SMP
				/*
			 	 * we are guaranteed that the task is not running on the other CPU,
			 	 * we indicate that this PMD will need to be reloaded if the task
			 	 * is rescheduled on the CPU it ran last on.
			 	 */
				ctx->ctx_reload_pmds[0] |= 1UL << cnum;
#endif
			}
		}

		DPRINT(("pmd[%u]=0x%lx loaded=%d access_pmu=%d, hw_value=0x%lx ctx_pmd=0x%lx  short_reset=0x%lx "
			  "long_reset=0x%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n",
			cnum,
			value,
			is_loaded,
			can_access_pmu,
			hw_value,
			ctx->ctx_pmds[cnum].val,
			ctx->ctx_pmds[cnum].short_reset,
			ctx->ctx_pmds[cnum].long_reset,
			PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
			ctx->ctx_used_pmds[0],
			ctx->ctx_pmds[cnum].reset_pmds[0],
			ctx->ctx_reload_pmds[0],
			ctx->ctx_all_pmds[0],
			ctx->ctx_ovfl_regs[0]));
	}

	/*
	 * make changes visible
	 */
	if (can_access_pmu) ia64_srlz_d();
3281

3282 3283 3284 3285 3286 3287
	return 0;

abort_mission:
	/*
	 * for now, we have only one possibility for error
	 */
3288
	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
David Mosberger's avatar
David Mosberger committed
3289
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
3290 3291
}

3292 3293 3294 3295 3296 3297 3298 3299 3300
/*
 * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function.
 * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an
 * interrupt is delivered during the call, it will be kept pending until we leave, making
 * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are
 * guaranteed to return consistent data to the user, it may simply be old. It is not
 * trivial to treat the overflow while inside the call because you may end up in
 * some module sampling buffer code causing deadlocks.
 */
Linus Torvalds's avatar
Linus Torvalds committed
3301
static int
3302
pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
3303
{
3304
	struct thread_struct *thread = NULL;
3305 3306
	struct task_struct *task;
	unsigned long val = 0UL, lval, ovfl_mask, sval;
3307 3308
	pfarg_reg_t *req = (pfarg_reg_t *)arg;
	unsigned int cnum, reg_flags = 0;
3309
	int i, can_access_pmu = 0, state;
3310
	int is_loaded, is_system, is_counting, expert_mode;
3311
	int ret = -EINVAL;
3312
	pfm_reg_check_t rd_func;
3313

Linus Torvalds's avatar
Linus Torvalds committed
3314
	/*
3315 3316
	 * access is possible when loaded only for
	 * self-monitoring tasks or in UP mode
Linus Torvalds's avatar
Linus Torvalds committed
3317
	 */
3318 3319 3320 3321

	state     = ctx->ctx_state;
	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
	is_system = ctx->ctx_fl_system;
3322
	ovfl_mask = pmu_conf->ovfl_val;
3323
	task      = ctx->ctx_task;
3324 3325

	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
3326

3327 3328
	if (likely(is_loaded)) {
		thread = &task->thread;
3329 3330 3331 3332 3333
		/*
		 * In system wide and when the context is loaded, access can only happen
		 * when the caller is running on the CPU being monitored by the session.
		 * It does not have to be the owner (ctx_task) of the context per se.
		 */
3334
		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
3335
			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3336 3337
			return -EBUSY;
		}
3338 3339 3340
		/*
		 * this can be true when not self-monitoring only in UP
		 */
3341
		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
3342 3343

		if (can_access_pmu) ia64_srlz_d();
3344
	}
3345
	expert_mode = pfm_sysctl.expert_mode; 
3346

3347
	DPRINT(("loaded=%d access_pmu=%d ctx_state=%d\n",
3348 3349
		is_loaded,
		can_access_pmu,
3350
		state));
Linus Torvalds's avatar
Linus Torvalds committed
3351

3352 3353 3354 3355
	/*
	 * on both UP and SMP, we can only read the PMD from the hardware register when
	 * the task is the owner of the local PMU.
	 */
David Mosberger's avatar
David Mosberger committed
3356

Linus Torvalds's avatar
Linus Torvalds committed
3357
	for (i = 0; i < count; i++, req++) {
3358

3359 3360
		cnum        = req->reg_num;
		reg_flags   = req->reg_flags;
3361

3362
		if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
3363 3364
		/*
		 * we can only read the register that we use. That includes
3365
		 * the one we explicitely initialize AND the one we want included
3366 3367 3368 3369 3370
		 * in the sampling buffer (smpl_regs).
		 *
		 * Having this restriction allows optimization in the ctxsw routine
		 * without compromising security (leaks)
		 */
3371 3372 3373 3374 3375
		if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error;

		sval        = ctx->ctx_pmds[cnum].val;
		lval        = ctx->ctx_pmds[cnum].lval;
		is_counting = PMD_IS_COUNTING(cnum);
Linus Torvalds's avatar
Linus Torvalds committed
3376

David Mosberger's avatar
David Mosberger committed
3377 3378 3379 3380 3381
		/*
		 * If the task is not the current one, then we check if the
		 * PMU state is still in the local live register due to lazy ctxsw.
		 * If true, then we read directly from the registers.
		 */
3382
		if (can_access_pmu){
David Mosberger's avatar
David Mosberger committed
3383
			val = ia64_get_pmd(cnum);
David Mosberger's avatar
David Mosberger committed
3384
		} else {
3385 3386 3387 3388 3389
			/*
			 * context has been saved
			 * if context is zombie, then task does not exist anymore.
			 * In this case, we use the full value saved in the context (pfm_flush_regs()).
			 */
3390
			val = is_loaded ? thread->pmds[cnum] : 0UL;
David Mosberger's avatar
David Mosberger committed
3391
		}
3392
		rd_func = pmu_conf->pmd_desc[cnum].read_check;
3393

3394
		if (is_counting) {
Linus Torvalds's avatar
Linus Torvalds committed
3395
			/*
3396
			 * XXX: need to check for overflow when loaded
Linus Torvalds's avatar
Linus Torvalds committed
3397
			 */
3398
			val &= ovfl_mask;
3399
			val += sval;
3400
		}
3401 3402 3403 3404

		/*
		 * execute read checker, if any
		 */
3405
		if (unlikely(expert_mode == 0 && rd_func)) {
3406
			unsigned long v = val;
3407
			ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs);
3408
			if (ret) goto error;
3409
			val = v;
3410
			ret = -EINVAL;
3411 3412
		}

3413
		PFM_REG_RETFLAG_SET(reg_flags, 0);
3414

3415
		DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
3416

3417 3418 3419 3420 3421
		/*
		 * update register return value, abort all if problem during copy.
		 * we only modify the reg_flags field. no check mode is fine because
		 * access has been verified upfront in sys_perfmonctl().
		 */
3422 3423 3424
		req->reg_value            = val;
		req->reg_flags            = reg_flags;
		req->reg_last_reset_val   = lval;
Linus Torvalds's avatar
Linus Torvalds committed
3425
	}
3426

Linus Torvalds's avatar
Linus Torvalds committed
3427
	return 0;
3428

3429
error:
3430
	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
3431 3432 3433
	return ret;
}

3434 3435
int
pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
3436 3437 3438
{
	pfm_context_t *ctx;

3439
	if (req == NULL) return -EINVAL;
3440

3441
 	ctx = GET_PMU_CTX();
3442 3443 3444 3445 3446 3447

	if (ctx == NULL) return -EINVAL;

	/*
	 * for now limit to current task, which is enough when calling
	 * from overflow handler
3448
	 */
3449
	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
David Mosberger's avatar
David Mosberger committed
3450

3451 3452
	return pfm_write_pmcs(ctx, req, nreq, regs);
}
David Mosberger's avatar
David Mosberger committed
3453
EXPORT_SYMBOL(pfm_mod_write_pmcs);
3454

3455 3456
int
pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
3457 3458 3459
{
	pfm_context_t *ctx;

3460
	if (req == NULL) return -EINVAL;
3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473

 	ctx = GET_PMU_CTX();

	if (ctx == NULL) return -EINVAL;

	/*
	 * for now limit to current task, which is enough when calling
	 * from overflow handler
	 */
	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;

	return pfm_read_pmds(ctx, req, nreq, regs);
}
David Mosberger's avatar
David Mosberger committed
3474
EXPORT_SYMBOL(pfm_mod_read_pmds);
3475

David Mosberger's avatar
David Mosberger committed
3476 3477 3478 3479 3480 3481 3482 3483
/*
 * Only call this function when a process it trying to
 * write the debug registers (reading is always allowed)
 */
int
pfm_use_debug_registers(struct task_struct *task)
{
	pfm_context_t *ctx = task->thread.pfm_context;
3484
	unsigned long flags;
David Mosberger's avatar
David Mosberger committed
3485 3486
	int ret = 0;

3487
	if (pmu_conf->use_rr_dbregs == 0) return 0;
3488 3489

	DPRINT(("called for [%d]\n", task->pid));
David Mosberger's avatar
David Mosberger committed
3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505

	/*
	 * do it only once
	 */
	if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;

	/*
	 * Even on SMP, we do not need to use an atomic here because
	 * the only way in is via ptrace() and this is possible only when the
	 * process is stopped. Even in the case where the ctxsw out is not totally
	 * completed by the time we come here, there is no way the 'stopped' process
	 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
	 * So this is always safe.
	 */
	if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;

3506
	LOCK_PFS(flags);
David Mosberger's avatar
David Mosberger committed
3507 3508

	/*
David Mosberger's avatar
David Mosberger committed
3509 3510
	 * We cannot allow setting breakpoints when system wide monitoring
	 * sessions are using the debug registers.
David Mosberger's avatar
David Mosberger committed
3511 3512 3513 3514 3515 3516
	 */
	if (pfm_sessions.pfs_sys_use_dbregs> 0)
		ret = -1;
	else
		pfm_sessions.pfs_ptrace_use_dbregs++;

3517 3518 3519
	DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
		  pfm_sessions.pfs_ptrace_use_dbregs,
		  pfm_sessions.pfs_sys_use_dbregs,
David Mosberger's avatar
David Mosberger committed
3520 3521
		  task->pid, ret));

3522
	UNLOCK_PFS(flags);
David Mosberger's avatar
David Mosberger committed
3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537

	return ret;
}

/*
 * This function is called for every task that exits with the
 * IA64_THREAD_DBG_VALID set. This indicates a task which was
 * able to use the debug registers for debugging purposes via
 * ptrace(). Therefore we know it was not using them for
 * perfmormance monitoring, so we only decrement the number
 * of "ptraced" debug register users to keep the count up to date
 */
int
pfm_release_debug_registers(struct task_struct *task)
{
3538
	unsigned long flags;
David Mosberger's avatar
David Mosberger committed
3539 3540
	int ret;

3541
	if (pmu_conf->use_rr_dbregs == 0) return 0;
3542

3543
	LOCK_PFS(flags);
David Mosberger's avatar
David Mosberger committed
3544
	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
3545
		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
David Mosberger's avatar
David Mosberger committed
3546 3547 3548 3549 3550
		ret = -1;
	}  else {
		pfm_sessions.pfs_ptrace_use_dbregs--;
		ret = 0;
	}
3551
	UNLOCK_PFS(flags);
David Mosberger's avatar
David Mosberger committed
3552 3553 3554

	return ret;
}
Linus Torvalds's avatar
Linus Torvalds committed
3555 3556

static int
3557
pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
3558
{
3559 3560 3561
	struct task_struct *task;
	pfm_buffer_fmt_t *fmt;
	pfm_ovfl_ctrl_t rst_ctrl;
David Mosberger's avatar
David Mosberger committed
3562
	int state, is_system;
3563 3564
	int ret = 0;

David Mosberger's avatar
David Mosberger committed
3565
	state     = ctx->ctx_state;
3566
	fmt       = ctx->ctx_buf_fmt;
David Mosberger's avatar
David Mosberger committed
3567 3568
	is_system = ctx->ctx_fl_system;
	task      = PFM_CTX_TASK(ctx);
3569

David Mosberger's avatar
David Mosberger committed
3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582
	switch(state) {
		case PFM_CTX_MASKED:
			break;
		case PFM_CTX_LOADED: 
			if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
			/* fall through */
		case PFM_CTX_UNLOADED:
		case PFM_CTX_ZOMBIE:
			DPRINT(("invalid state=%d\n", state));
			return -EBUSY;
		default:
			DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state));
			return -EINVAL;
3583
	}
Linus Torvalds's avatar
Linus Torvalds committed
3584

3585 3586 3587 3588 3589
	/*
 	 * In system wide and when the context is loaded, access can only happen
 	 * when the caller is running on the CPU being monitored by the session.
 	 * It does not have to be the owner (ctx_task) of the context per se.
 	 */
David Mosberger's avatar
David Mosberger committed
3590
	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
3591
		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3592 3593 3594 3595 3596 3597 3598 3599 3600
		return -EBUSY;
	}

	/* sanity check */
	if (unlikely(task == NULL)) {
		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
		return -EINVAL;
	}

David Mosberger's avatar
David Mosberger committed
3601
	if (task == current || is_system) {
3602 3603 3604 3605 3606

		fmt = ctx->ctx_buf_fmt;

		DPRINT(("restarting self %d ovfl=0x%lx\n",
			task->pid,
3607
			ctx->ctx_ovfl_regs[0]));
David Mosberger's avatar
David Mosberger committed
3608

3609
		if (CTX_HAS_SMPL(ctx)) {
Linus Torvalds's avatar
Linus Torvalds committed
3610

3611
			prefetch(ctx->ctx_smpl_hdr);
Linus Torvalds's avatar
Linus Torvalds committed
3612

David Mosberger's avatar
David Mosberger committed
3613
			rst_ctrl.bits.mask_monitoring = 0;
3614
			rst_ctrl.bits.reset_ovfl_pmds = 0;
Linus Torvalds's avatar
Linus Torvalds committed
3615

David Mosberger's avatar
David Mosberger committed
3616
			if (state == PFM_CTX_LOADED)
3617 3618 3619 3620
				ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
			else
				ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
		} else {
David Mosberger's avatar
David Mosberger committed
3621 3622
			rst_ctrl.bits.mask_monitoring = 0;
			rst_ctrl.bits.reset_ovfl_pmds = 1;
Linus Torvalds's avatar
Linus Torvalds committed
3623 3624
		}

3625
		if (ret == 0) {
David Mosberger's avatar
David Mosberger committed
3626 3627
			if (rst_ctrl.bits.reset_ovfl_pmds)
				pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
3628

David Mosberger's avatar
David Mosberger committed
3629
			if (rst_ctrl.bits.mask_monitoring == 0) {
3630 3631
				DPRINT(("resuming monitoring for [%d]\n", task->pid));

3632
				if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
3633 3634 3635 3636 3637 3638 3639 3640 3641 3642
			} else {
				DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));

				// cannot use pfm_stop_monitoring(task, regs);
			}
		}
		/*
		 * clear overflowed PMD mask to remove any stale information
		 */
		ctx->ctx_ovfl_regs[0] = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
3643

3644 3645 3646
		/*
		 * back to LOADED state
		 */
3647
		ctx->ctx_state = PFM_CTX_LOADED;
3648

3649 3650 3651 3652 3653
		/*
		 * XXX: not really useful for self monitoring
		 */
		ctx->ctx_fl_can_restart = 0;

Linus Torvalds's avatar
Linus Torvalds committed
3654
		return 0;
3655
	}
3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672

	/* 
	 * restart another task
	 */

	/*
	 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 
	 * one is seen by the task.
	 */
	if (state == PFM_CTX_MASKED) {
		if (ctx->ctx_fl_can_restart == 0) return -EINVAL;
		/*
		 * will prevent subsequent restart before this one is
		 * seen by other task
		 */
		ctx->ctx_fl_can_restart = 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
3673

David Mosberger's avatar
David Mosberger committed
3674
	/*
3675 3676 3677 3678 3679 3680 3681
	 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e.
	 * the task is blocked or on its way to block. That's the normal
	 * restart path. If the monitoring is not masked, then the task
	 * can be actively monitoring and we cannot directly intervene.
	 * Therefore we use the trap mechanism to catch the task and
	 * force it to reset the buffer/reset PMDs.
	 *
David Mosberger's avatar
David Mosberger committed
3682
	 * if non-blocking, then we ensure that the task will go into
3683
	 * pfm_handle_work() before returning to user mode.
3684
	 *
3685
	 * We cannot explicitely reset another task, it MUST always
David Mosberger's avatar
David Mosberger committed
3686
	 * be done by the task itself. This works for system wide because
3687 3688
	 * the tool that is controlling the session is logically doing 
	 * "self-monitoring".
David Mosberger's avatar
David Mosberger committed
3689
	 */
3690
	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
3691 3692
		DPRINT(("unblocking [%d] \n", task->pid));
		up(&ctx->ctx_restart_sem);
David Mosberger's avatar
David Mosberger committed
3693
	} else {
3694 3695
		DPRINT(("[%d] armed exit trap\n", task->pid));

3696
		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717

		PFM_SET_WORK_PENDING(task, 1);

		pfm_set_task_notify(task);

		/*
		 * XXX: send reschedule if task runs on another CPU
		 */
	}
	return 0;
}

static int
pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	unsigned int m = *(unsigned int *)arg;

	pfm_sysctl.debug = m == 0 ? 0 : 1;

	pfm_debug_var = pfm_sysctl.debug;

David Mosberger's avatar
David Mosberger committed
3718
	printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
3719

David Mosberger's avatar
David Mosberger committed
3720
	if (m == 0) {
3721 3722 3723 3724 3725 3726
		memset(pfm_stats, 0, sizeof(pfm_stats));
		for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL;
	}
	return 0;
}

3727 3728 3729
/*
 * arg can be NULL and count can be zero for this function
 */
3730 3731 3732 3733
static int
pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	struct thread_struct *thread = NULL;
3734
	struct task_struct *task;
3735
	pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
3736
	unsigned long flags;
3737 3738 3739
	dbreg_t dbreg;
	unsigned int rnum;
	int first_time;
3740 3741 3742
	int ret = 0, state;
	int i, can_access_pmu = 0;
	int is_system, is_loaded;
3743

3744
	if (pmu_conf->use_rr_dbregs == 0) return -EINVAL;
3745

3746 3747 3748
	state     = ctx->ctx_state;
	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
	is_system = ctx->ctx_fl_system;
3749
	task      = ctx->ctx_task;
3750

3751
	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
3752 3753 3754 3755 3756 3757

	/*
	 * on both UP and SMP, we can only write to the PMC when the task is
	 * the owner of the local PMU.
	 */
	if (is_loaded) {
3758
		thread = &task->thread;
3759 3760 3761 3762 3763
		/*
		 * In system wide and when the context is loaded, access can only happen
		 * when the caller is running on the CPU being monitored by the session.
		 * It does not have to be the owner (ctx_task) of the context per se.
		 */
3764
		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
3765
			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3766 3767
			return -EBUSY;
		}
3768
		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783
	}

	/*
	 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
	 * ensuring that no real breakpoint can be installed via this call.
	 *
	 * IMPORTANT: regs can be NULL in this function
	 */

	first_time = ctx->ctx_fl_using_dbreg == 0;

	/*
	 * don't bother if we are loaded and task is being debugged
	 */
	if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
3784
		DPRINT(("debug registers already in use for [%d]\n", task->pid));
3785 3786 3787 3788 3789 3790
		return -EBUSY;
	}

	/*
	 * check for debug registers in system wide mode
	 *
3791 3792 3793
	 * If though a check is done in pfm_context_load(),
	 * we must repeat it here, in case the registers are
	 * written after the context is loaded
3794
	 */
3795
	if (is_loaded) {
3796
		LOCK_PFS(flags);
3797

3798 3799 3800 3801 3802 3803
		if (first_time && is_system) {
			if (pfm_sessions.pfs_ptrace_use_dbregs)
				ret = -EBUSY;
			else
				pfm_sessions.pfs_sys_use_dbregs++;
		}
3804
		UNLOCK_PFS(flags);
3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824
	}

	if (ret != 0) return ret;

	/*
	 * mark ourself as user of the debug registers for
	 * perfmon purposes.
	 */
	ctx->ctx_fl_using_dbreg = 1;

	/*
 	 * clear hardware registers to make sure we don't
 	 * pick up stale state.
	 *
	 * for a system wide session, we do not use
	 * thread.dbr, thread.ibr because this process
	 * never leaves the current CPU and the state
	 * is shared by all processes running on it
 	 */
	if (first_time && can_access_pmu) {
3825
		DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
3826
		for (i=0; i < pmu_conf->num_ibrs; i++) {
3827
			ia64_set_ibr(i, 0UL);
3828
			ia64_dv_serialize_instruction();
3829 3830
		}
		ia64_srlz_i();
3831
		for (i=0; i < pmu_conf->num_dbrs; i++) {
3832
			ia64_set_dbr(i, 0UL);
3833
			ia64_dv_serialize_data();
3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847
		}
		ia64_srlz_d();
	}

	/*
	 * Now install the values into the registers
	 */
	for (i = 0; i < count; i++, req++) {

		rnum      = req->dbreg_num;
		dbreg.val = req->dbreg_value;

		ret = -EINVAL;

3848
		if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879
			DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
				  rnum, dbreg.val, mode, i, count));

			goto abort_mission;
		}

		/*
		 * make sure we do not install enabled breakpoint
		 */
		if (rnum & 0x1) {
			if (mode == PFM_CODE_RR)
				dbreg.ibr.ibr_x = 0;
			else
				dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
		}

		PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);

		/*
		 * Debug registers, just like PMC, can only be modified
		 * by a kernel call. Moreover, perfmon() access to those
		 * registers are centralized in this routine. The hardware
		 * does not modify the value of these registers, therefore,
		 * if we save them as they are written, we can avoid having
		 * to save them on context switch out. This is made possible
		 * by the fact that when perfmon uses debug registers, ptrace()
		 * won't be able to modify them concurrently.
		 */
		if (mode == PFM_CODE_RR) {
			CTX_USED_IBR(ctx, rnum);

3880 3881 3882 3883
			if (can_access_pmu) {
				ia64_set_ibr(rnum, dbreg.val);
				ia64_dv_serialize_instruction();
			}
3884 3885 3886 3887 3888 3889 3890 3891

			ctx->ctx_ibrs[rnum] = dbreg.val;

			DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x is_loaded=%d access_pmu=%d\n",
				rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu));
		} else {
			CTX_USED_DBR(ctx, rnum);

3892 3893 3894 3895
			if (can_access_pmu) {
				ia64_set_dbr(rnum, dbreg.val);
				ia64_dv_serialize_data();
			}
3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909
			ctx->ctx_dbrs[rnum] = dbreg.val;

			DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x is_loaded=%d access_pmu=%d\n",
				rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu));
		}
	}

	return 0;

abort_mission:
	/*
	 * in case it was our first attempt, we undo the global modifications
	 */
	if (first_time) {
3910
		LOCK_PFS(flags);
3911 3912 3913
		if (ctx->ctx_fl_system) {
			pfm_sessions.pfs_sys_use_dbregs--;
		}
3914
		UNLOCK_PFS(flags);
3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936
		ctx->ctx_fl_using_dbreg = 0;
	}
	/*
	 * install error return flag
	 */
	PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);

	return ret;
}

static int
pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
}

static int
pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
}

3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979
int
pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
{
	pfm_context_t *ctx;

	if (req == NULL) return -EINVAL;

 	ctx = GET_PMU_CTX();

	if (ctx == NULL) return -EINVAL;

	/*
	 * for now limit to current task, which is enough when calling
	 * from overflow handler
	 */
	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;

	return pfm_write_ibrs(ctx, req, nreq, regs);
}
EXPORT_SYMBOL(pfm_mod_write_ibrs);

int
pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
{
	pfm_context_t *ctx;

	if (req == NULL) return -EINVAL;

 	ctx = GET_PMU_CTX();

	if (ctx == NULL) return -EINVAL;

	/*
	 * for now limit to current task, which is enough when calling
	 * from overflow handler
	 */
	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;

	return pfm_write_dbrs(ctx, req, nreq, regs);
}
EXPORT_SYMBOL(pfm_mod_write_dbrs);


3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992
static int
pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	pfarg_features_t *req = (pfarg_features_t *)arg;

	req->ft_version = PFM_VERSION;
	return 0;
}

static int
pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	struct pt_regs *tregs;
David Mosberger's avatar
David Mosberger committed
3993
	struct task_struct *task = PFM_CTX_TASK(ctx);
3994
	int state, is_system;
3995

3996 3997
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;
3998

3999
	if (state != PFM_CTX_LOADED && state != PFM_CTX_MASKED) return -EINVAL;
4000 4001 4002 4003 4004 4005

	/*
 	 * In system wide and when the context is loaded, access can only happen
 	 * when the caller is running on the CPU being monitored by the session.
 	 * It does not have to be the owner (ctx_task) of the context per se.
 	 */
4006
	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
4007
		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
4008 4009
		return -EBUSY;
	}
4010
	DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
4011 4012 4013
		PFM_CTX_TASK(ctx)->pid,
		state,
		is_system));
4014 4015 4016 4017 4018
	/*
	 * in system mode, we need to update the PMU directly
	 * and the user level state of the caller, which may not
	 * necessarily be the creator of the context.
	 */
4019
	if (is_system) {
4020 4021 4022 4023 4024
		/*
		 * Update local PMU first
		 *
		 * disable dcr pp
		 */
4025
		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043
		ia64_srlz_i();

		/*
		 * update local cpuinfo
		 */
		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);

		/*
		 * stop monitoring, does srlz.i
		 */
		pfm_clear_psr_pp();

		/*
		 * stop monitoring in the caller
		 */
		ia64_psr(regs)->pp = 0;

		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
4044 4045
	}
	/*
4046
	 * per-task mode
Linus Torvalds's avatar
Linus Torvalds committed
4047 4048
	 */

David Mosberger's avatar
David Mosberger committed
4049
	if (task == current) {
4050 4051 4052 4053 4054 4055 4056 4057
		/* stop monitoring  at kernel level */
		pfm_clear_psr_up();

		/*
	 	 * stop monitoring at the user level
	 	 */
		ia64_psr(regs)->up = 0;
	} else {
David Mosberger's avatar
David Mosberger committed
4058
		tregs = ia64_task_regs(task);
4059 4060 4061 4062 4063 4064 4065 4066 4067

		/*
	 	 * stop monitoring at the user level
	 	 */
		ia64_psr(tregs)->up = 0;

		/*
		 * monitoring disabled in kernel at next reschedule
		 */
David Mosberger's avatar
David Mosberger committed
4068
		ctx->ctx_saved_psr_up = 0;
4069
		DPRINT(("task=[%d]\n", task->pid));
Linus Torvalds's avatar
Linus Torvalds committed
4070 4071 4072 4073
	}
	return 0;
}

4074

Linus Torvalds's avatar
Linus Torvalds committed
4075
static int
4076
pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
4077
{
4078
	struct pt_regs *tregs;
4079
	int state, is_system;
Linus Torvalds's avatar
Linus Torvalds committed
4080

4081 4082
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;
Linus Torvalds's avatar
Linus Torvalds committed
4083

4084
	if (state != PFM_CTX_LOADED) return -EINVAL;
4085

4086 4087 4088 4089 4090
	/*
 	 * In system wide and when the context is loaded, access can only happen
 	 * when the caller is running on the CPU being monitored by the session.
 	 * It does not have to be the owner (ctx_task) of the context per se.
 	 */
4091
	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
4092
		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
4093 4094
		return -EBUSY;
	}
4095

4096 4097 4098 4099 4100
	/*
	 * in system mode, we need to update the PMU directly
	 * and the user level state of the caller, which may not
	 * necessarily be the creator of the context.
	 */
4101
	if (is_system) {
4102

4103 4104 4105 4106
		/*
		 * set user level psr.pp for the caller
		 */
		ia64_psr(regs)->pp = 1;
4107

4108 4109 4110 4111 4112 4113 4114 4115 4116
		/*
		 * now update the local PMU and cpuinfo
		 */
		PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);

		/*
		 * start monitoring at kernel level
		 */
		pfm_set_psr_pp();
4117

4118
		/* enable dcr pp */
4119
		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
4120 4121
		ia64_srlz_i();

4122 4123
		return 0;
	}
4124

4125 4126 4127
	/*
	 * per-process mode
	 */
4128

4129
	if (ctx->ctx_task == current) {
4130

4131 4132
		/* start monitoring at kernel level */
		pfm_set_psr_up();
Linus Torvalds's avatar
Linus Torvalds committed
4133

4134 4135 4136 4137 4138 4139 4140
		/*
		 * activate monitoring at user level
		 */
		ia64_psr(regs)->up = 1;

	} else {
		tregs = ia64_task_regs(ctx->ctx_task);
4141 4142

		/*
4143 4144
		 * start monitoring at the kernel level the next
		 * time the task is scheduled
4145
		 */
David Mosberger's avatar
David Mosberger committed
4146
		ctx->ctx_saved_psr_up = IA64_PSR_UP;
4147 4148 4149 4150 4151

		/*
		 * activate monitoring at user level
		 */
		ia64_psr(tregs)->up = 1;
Linus Torvalds's avatar
Linus Torvalds committed
4152
	}
4153 4154
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
4155

4156
static int
4157 4158 4159 4160 4161 4162
pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
	pfarg_reg_t *req = (pfarg_reg_t *)arg;
	unsigned int cnum;
	int i;
	int ret = -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
4163

4164
	for (i = 0; i < count; i++, req++) {
4165

4166
		cnum = req->reg_num;
4167

4168
		if (!PMC_IS_IMPL(cnum)) goto abort_mission;
4169

4170
		req->reg_value = PMC_DFL_VAL(cnum);
4171

4172
		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
4173

4174 4175
		DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
	}
4176
	return 0;
4177 4178 4179 4180

abort_mission:
	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
	return ret;
4181 4182
}

David Mosberger's avatar
David Mosberger committed
4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204
static int
pfm_check_task_exist(pfm_context_t *ctx)
{
	struct task_struct *g, *t;
	int ret = -ESRCH;

	read_lock(&tasklist_lock);

	do_each_thread (g, t) {
		if (t->thread.pfm_context == ctx) {
			ret = 0;
			break;
		}
	} while_each_thread (g, t);

	read_unlock(&tasklist_lock);

	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));

	return ret;
}

4205
static int
4206
pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4207
{
4208 4209 4210
	struct task_struct *task;
	struct thread_struct *thread;
	struct pfm_context_t *old;
4211
	unsigned long flags;
4212 4213 4214 4215 4216 4217 4218
#ifndef CONFIG_SMP
	struct task_struct *owner_task = NULL;
#endif
	pfarg_load_t *req = (pfarg_load_t *)arg;
	unsigned long *pmcs_source, *pmds_source;
	int the_cpu;
	int ret = 0;
4219
	int state, is_system, set_dbregs = 0;
4220

4221 4222
	state     = ctx->ctx_state;
	is_system = ctx->ctx_fl_system;
4223
	/*
4224
	 * can only load from unloaded or terminated state
4225
	 */
4226 4227
	if (state != PFM_CTX_UNLOADED) {
		DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
4228 4229 4230 4231
			req->load_pid,
			ctx->ctx_state));
		return -EINVAL;
	}
4232

4233
	DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
4234

4235
	if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
4236
		DPRINT(("cannot use blocking mode on self\n"));
4237 4238 4239 4240 4241 4242 4243
		return -EINVAL;
	}

	ret = pfm_get_task(ctx, req->load_pid, &task);
	if (ret) {
		DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret));
		return ret;
4244
	}
Linus Torvalds's avatar
Linus Torvalds committed
4245

4246 4247
	ret = -EINVAL;

David Mosberger's avatar
David Mosberger committed
4248
	/*
4249
	 * system wide is self monitoring only
David Mosberger's avatar
David Mosberger committed
4250
	 */
4251
	if (is_system && task != current) {
4252
		DPRINT(("system wide is self monitoring only load_pid=%d\n",
4253 4254
			req->load_pid));
		goto error;
4255
	}
Linus Torvalds's avatar
Linus Torvalds committed
4256

4257
	thread = &task->thread;
Linus Torvalds's avatar
Linus Torvalds committed
4258

4259
	ret = 0;
David Mosberger's avatar
David Mosberger committed
4260
	/*
4261 4262
	 * cannot load a context which is using range restrictions,
	 * into a task that is being debugged.
David Mosberger's avatar
David Mosberger committed
4263
	 */
4264 4265 4266 4267 4268 4269
	if (ctx->ctx_fl_using_dbreg) {
		if (thread->flags & IA64_THREAD_DBG_VALID) {
			ret = -EBUSY;
			DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
			goto error;
		}
4270
		LOCK_PFS(flags);
4271 4272 4273 4274 4275 4276 4277

		if (is_system) {
			if (pfm_sessions.pfs_ptrace_use_dbregs) {
				DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
				ret = -EBUSY;
			} else {
				pfm_sessions.pfs_sys_use_dbregs++;
4278
				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
4279 4280 4281 4282
				set_dbregs = 1;
			}
		}

4283
		UNLOCK_PFS(flags);
4284 4285

		if (ret) goto error;
4286
	}
Linus Torvalds's avatar
Linus Torvalds committed
4287

David Mosberger's avatar
David Mosberger committed
4288
	/*
4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301
	 * SMP system-wide monitoring implies self-monitoring.
	 *
	 * The programming model expects the task to
	 * be pinned on a CPU throughout the session.
	 * Here we take note of the current CPU at the
	 * time the context is loaded. No call from
	 * another CPU will be allowed.
	 *
	 * The pinning via shed_setaffinity()
	 * must be done by the calling task prior
	 * to this call.
	 *
	 * systemwide: keep track of CPU this session is supposed to run on
David Mosberger's avatar
David Mosberger committed
4302
	 */
4303
	the_cpu = ctx->ctx_cpu = smp_processor_id();
Linus Torvalds's avatar
Linus Torvalds committed
4304

4305
	ret = -EBUSY;
David Mosberger's avatar
David Mosberger committed
4306
	/*
4307
	 * now reserve the session
David Mosberger's avatar
David Mosberger committed
4308
	 */
4309
	ret = pfm_reserve_session(current, is_system, the_cpu);
4310
	if (ret) goto error;
Linus Torvalds's avatar
Linus Torvalds committed
4311

David Mosberger's avatar
David Mosberger committed
4312
	/*
4313
	 * task is necessarily stopped at this point.
David Mosberger's avatar
David Mosberger committed
4314
	 *
4315 4316 4317 4318 4319
	 * If the previous context was zombie, then it got removed in
	 * pfm_save_regs(). Therefore we should not see it here.
	 * If we see a context, then this is an active context
	 *
	 * XXX: needs to be atomic
David Mosberger's avatar
David Mosberger committed
4320
	 */
4321
	DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
4322 4323
		thread->pfm_context, ctx));

4324
	old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
4325 4326 4327
	if (old != NULL) {
		DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
		goto error_unres;
David Mosberger's avatar
David Mosberger committed
4328
	}
Linus Torvalds's avatar
Linus Torvalds committed
4329

4330 4331
	pfm_reset_msgq(ctx);

4332
	ctx->ctx_state = PFM_CTX_LOADED;
4333 4334 4335 4336

	/*
	 * link context to task
	 */
4337
	ctx->ctx_task = task;
Linus Torvalds's avatar
Linus Torvalds committed
4338

4339
	if (is_system) {
David Mosberger's avatar
David Mosberger committed
4340
		/*
4341 4342 4343 4344 4345 4346 4347 4348
		 * we load as stopped
		 */
		PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);

		if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
	} else {
		thread->flags |= IA64_THREAD_PM_VALID;
David Mosberger's avatar
David Mosberger committed
4349
	}
Linus Torvalds's avatar
Linus Torvalds committed
4350

4351 4352 4353 4354 4355 4356 4357 4358
	/*
	 * propagate into thread-state
	 */
	pfm_copy_pmds(task, ctx);
	pfm_copy_pmcs(task, ctx);

	pmcs_source = thread->pmcs;
	pmds_source = thread->pmds;
Linus Torvalds's avatar
Linus Torvalds committed
4359

David Mosberger's avatar
David Mosberger committed
4360
	/*
4361
	 * always the case for system-wide
David Mosberger's avatar
David Mosberger committed
4362
	 */
4363
	if (task == current) {
Linus Torvalds's avatar
Linus Torvalds committed
4364

4365
		if (is_system == 0) {
Linus Torvalds's avatar
Linus Torvalds committed
4366

4367 4368 4369
			/* allow user level control */
			ia64_psr(regs)->sp = 0;
			DPRINT(("clearing psr.sp for [%d]\n", task->pid));
Linus Torvalds's avatar
Linus Torvalds committed
4370

4371 4372 4373 4374 4375 4376 4377 4378 4379 4380
			SET_LAST_CPU(ctx, smp_processor_id());
			INC_ACTIVATION();
			SET_ACTIVATION(ctx);
#ifndef CONFIG_SMP
			/*
			 * push the other task out, if any
			 */
			owner_task = GET_PMU_OWNER();
			if (owner_task) pfm_lazy_save_regs(owner_task);
#endif
David Mosberger's avatar
David Mosberger committed
4381 4382
		}
		/*
4383 4384
		 * load all PMD from ctx to PMU (as opposed to thread state)
		 * restore all PMC from ctx to PMU
David Mosberger's avatar
David Mosberger committed
4385
		 */
4386 4387
		pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]);
		pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]);
Linus Torvalds's avatar
Linus Torvalds committed
4388

4389 4390
		ctx->ctx_reload_pmcs[0] = 0UL;
		ctx->ctx_reload_pmds[0] = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
4391

David Mosberger's avatar
David Mosberger committed
4392
		/*
4393
		 * guaranteed safe by earlier check against DBG_VALID
David Mosberger's avatar
David Mosberger committed
4394
		 */
4395
		if (ctx->ctx_fl_using_dbreg) {
4396 4397
			pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
			pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
David Mosberger's avatar
David Mosberger committed
4398 4399
		}
		/*
4400
		 * set new ownership
David Mosberger's avatar
David Mosberger committed
4401
		 */
4402
		SET_PMU_OWNER(task, ctx);
David Mosberger's avatar
David Mosberger committed
4403

4404 4405 4406 4407 4408 4409
		DPRINT(("context loaded on PMU for [%d]\n", task->pid));
	} else {
		/*
		 * when not current, task MUST be stopped, so this is safe
		 */
		regs = ia64_task_regs(task);
David Mosberger's avatar
David Mosberger committed
4410

4411 4412 4413
		/* force a full reload */
		ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
		SET_LAST_CPU(ctx, -1);
David Mosberger's avatar
David Mosberger committed
4414

4415
		/* initial saved psr (stopped) */
David Mosberger's avatar
David Mosberger committed
4416
		ctx->ctx_saved_psr_up = 0UL;
4417 4418 4419 4420
		ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
	}

	ret = 0;
David Mosberger's avatar
David Mosberger committed
4421

4422 4423 4424
error_unres:
	if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
error:
4425 4426 4427 4428
	/*
	 * we must undo the dbregs setting (for system-wide)
	 */
	if (ret && set_dbregs) {
4429
		LOCK_PFS(flags);
4430
		pfm_sessions.pfs_sys_use_dbregs--;
4431
		UNLOCK_PFS(flags);
4432
	}
4433 4434 4435
	/*
	 * release task, there is now a link with the context
	 */
4436
	if (is_system == 0 && task != current) {
David Mosberger's avatar
David Mosberger committed
4437
		pfm_put_task(task);
David Mosberger's avatar
David Mosberger committed
4438

David Mosberger's avatar
David Mosberger committed
4439 4440 4441
		if (ret == 0) {
			ret = pfm_check_task_exist(ctx);
			if (ret) {
4442 4443
				ctx->ctx_state = PFM_CTX_UNLOADED;
				ctx->ctx_task  = NULL;
David Mosberger's avatar
David Mosberger committed
4444 4445 4446
			}
		}
	}
4447
	return ret;
David Mosberger's avatar
David Mosberger committed
4448 4449
}

4450 4451 4452 4453 4454 4455 4456 4457 4458
/*
 * in this function, we do not need to increase the use count
 * for the task via get_task_struct(), because we hold the
 * context lock. If the task were to disappear while having
 * a context attached, it would go through pfm_exit_thread()
 * which also grabs the context lock  and would therefore be blocked
 * until we are here.
 */
static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx);
David Mosberger's avatar
David Mosberger committed
4459 4460

static int
4461
pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
David Mosberger's avatar
David Mosberger committed
4462
{
David Mosberger's avatar
David Mosberger committed
4463
	struct task_struct *task = PFM_CTX_TASK(ctx);
4464
	struct pt_regs *tregs;
4465
	int prev_state, is_system;
4466
	int ret;
David Mosberger's avatar
David Mosberger committed
4467

4468
	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
David Mosberger's avatar
David Mosberger committed
4469

4470 4471
	prev_state = ctx->ctx_state;
	is_system  = ctx->ctx_fl_system;
4472

4473 4474
	/*
	 * unload only when necessary
David Mosberger's avatar
David Mosberger committed
4475
	 */
4476 4477
	if (prev_state == PFM_CTX_UNLOADED) {
		DPRINT(("ctx_state=%d, nothing to do\n", prev_state));
4478 4479
		return 0;
	}
David Mosberger's avatar
David Mosberger committed
4480

4481 4482 4483
	/*
	 * clear psr and dcr bits
	 */
4484 4485
	ret = pfm_stop(ctx, NULL, 0, regs);
	if (ret) return ret;
4486

4487
	ctx->ctx_state = PFM_CTX_UNLOADED;
4488

4489 4490 4491 4492 4493
	/*
	 * in system mode, we need to update the PMU directly
	 * and the user level state of the caller, which may not
	 * necessarily be the creator of the context.
	 */
4494
	if (is_system) {
Linus Torvalds's avatar
Linus Torvalds committed
4495

4496 4497 4498 4499 4500 4501 4502
		/*
		 * Update cpuinfo
		 *
		 * local PMU is taken care of in pfm_stop()
		 */
		PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
		PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
4503

4504 4505 4506 4507 4508
		/*
		 * save PMDs in context
		 * release ownership
		 */
		pfm_flush_pmds(current, ctx);
4509

4510 4511 4512 4513
		/*
		 * at this point we are done with the PMU
		 * so we can unreserve the resource.
		 */
4514 4515
		if (prev_state != PFM_CTX_ZOMBIE) 
			pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
4516

4517 4518 4519 4520 4521 4522 4523 4524
		/*
		 * disconnect context from task
		 */
		task->thread.pfm_context = NULL;
		/*
		 * disconnect task from context
		 */
		ctx->ctx_task = NULL;
4525

4526 4527 4528 4529
		/*
		 * There is nothing more to cleanup here.
		 */
		return 0;
David Mosberger's avatar
David Mosberger committed
4530 4531
	}

4532 4533 4534 4535
	/*
	 * per-task mode
	 */
	tregs = task == current ? regs : ia64_task_regs(task);
David Mosberger's avatar
David Mosberger committed
4536

4537
	if (task == current) {
4538 4539 4540 4541
		/*
		 * cancel user level control
		 */
		ia64_psr(regs)->sp = 1;
4542

David Mosberger's avatar
David Mosberger committed
4543
		DPRINT(("setting psr.sp for [%d]\n", task->pid));
4544 4545 4546 4547 4548 4549
	}
	/*
	 * save PMDs to context
	 * release ownership
	 */
	pfm_flush_pmds(task, ctx);
David Mosberger's avatar
David Mosberger committed
4550

4551 4552 4553
	/*
	 * at this point we are done with the PMU
	 * so we can unreserve the resource.
4554 4555
	 *
	 * when state was ZOMBIE, we have already unreserved.
4556
	 */
4557 4558
	if (prev_state != PFM_CTX_ZOMBIE) 
		pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu);
4559

4560 4561 4562 4563 4564
	/*
	 * reset activation counter and psr
	 */
	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
	SET_LAST_CPU(ctx, -1);
David Mosberger's avatar
David Mosberger committed
4565

4566 4567 4568 4569
	/*
	 * PMU state will not be restored
	 */
	task->thread.flags &= ~IA64_THREAD_PM_VALID;
David Mosberger's avatar
David Mosberger committed
4570

4571 4572 4573 4574 4575
	/*
	 * break links between context and task
	 */
	task->thread.pfm_context  = NULL;
	ctx->ctx_task             = NULL;
David Mosberger's avatar
David Mosberger committed
4576

4577
	PFM_SET_WORK_PENDING(task, 0);
4578 4579 4580 4581

	ctx->ctx_fl_trap_reason  = PFM_TRAP_REASON_NONE;
	ctx->ctx_fl_can_restart  = 0;
	ctx->ctx_fl_going_zombie = 0;
David Mosberger's avatar
David Mosberger committed
4582

4583
	DPRINT(("disconnected [%d] from context\n", task->pid));
David Mosberger's avatar
David Mosberger committed
4584

4585 4586
	return 0;
}
4587

4588

4589 4590
/*
 * called only from exit_thread(): task == current
4591
 * we come here only if current has a context attached (loaded or masked)
4592 4593 4594
 */
void
pfm_exit_thread(struct task_struct *task)
4595
{
4596 4597 4598
	pfm_context_t *ctx;
	unsigned long flags;
	struct pt_regs *regs = ia64_task_regs(task);
David Mosberger's avatar
David Mosberger committed
4599
	int ret, state;
4600
	int free_ok = 0;
4601

4602
	ctx = PFM_GET_CTX(task);
4603

4604
	PROTECT_CTX(ctx, flags);
4605

4606
	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
4607

David Mosberger's avatar
David Mosberger committed
4608 4609 4610 4611
	state = ctx->ctx_state;
	switch(state) {
		case PFM_CTX_UNLOADED:
			/*
4612 4613
	 		 * only comes to thios function if pfm_context is not NULL, i.e., cannot
			 * be in unloaded state
David Mosberger's avatar
David Mosberger committed
4614 4615 4616 4617 4618 4619 4620
	 		 */
			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
			break;
		case PFM_CTX_LOADED:
		case PFM_CTX_MASKED:
			ret = pfm_context_unload(ctx, NULL, 0, regs);
			if (ret) {
4621
				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
David Mosberger's avatar
David Mosberger committed
4622
			}
4623
			DPRINT(("ctx unloaded for current state was %d\n", state));
4624

David Mosberger's avatar
David Mosberger committed
4625 4626 4627
			pfm_end_notify_user(ctx);
			break;
		case PFM_CTX_ZOMBIE:
4628 4629 4630 4631
			ret = pfm_context_unload(ctx, NULL, 0, regs);
			if (ret) {
				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
			}
David Mosberger's avatar
David Mosberger committed
4632 4633 4634 4635 4636
			free_ok = 1;
			break;
		default:
			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
			break;
4637
	}
4638 4639
	UNPROTECT_CTX(ctx, flags);

4640 4641
	{ u64 psr = pfm_get_psr();
	  BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
David Mosberger's avatar
David Mosberger committed
4642
	  BUG_ON(GET_PMU_OWNER());
4643 4644
	  BUG_ON(ia64_psr(regs)->up);
	  BUG_ON(ia64_psr(regs)->pp);
4645
	}
4646

4647 4648 4649 4650 4651
	/*
	 * All memory free operations (especially for vmalloc'ed memory)
	 * MUST be done with interrupts ENABLED.
	 */
	if (free_ok) pfm_context_free(ctx);
4652 4653
}

Linus Torvalds's avatar
Linus Torvalds committed
4654
/*
David Mosberger's avatar
David Mosberger committed
4655
 * functions MUST be listed in the increasing order of their index (see permfon.h)
Linus Torvalds's avatar
Linus Torvalds committed
4656
 */
4657 4658 4659 4660 4661 4662
#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz }
#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL }
#define PFM_CMD_PCLRWS	(PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP)
#define PFM_CMD_PCLRW	(PFM_CMD_FD|PFM_CMD_ARG_RW)
#define PFM_CMD_NONE	{ NULL, "no-cmd", 0, 0, 0, NULL}

David Mosberger's avatar
David Mosberger committed
4663
static pfm_cmd_desc_t pfm_cmd_tab[]={
4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697
/* 0  */PFM_CMD_NONE,
/* 1  */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
/* 2  */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
/* 3  */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
/* 4  */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS),
/* 5  */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS),
/* 6  */PFM_CMD_NONE,
/* 7  */PFM_CMD_NONE,
/* 8  */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize),
/* 9  */PFM_CMD_NONE,
/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW),
/* 11 */PFM_CMD_NONE,
/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL),
/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL),
/* 14 */PFM_CMD_NONE,
/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL),
/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS),
/* 18 */PFM_CMD_NONE,
/* 19 */PFM_CMD_NONE,
/* 20 */PFM_CMD_NONE,
/* 21 */PFM_CMD_NONE,
/* 22 */PFM_CMD_NONE,
/* 23 */PFM_CMD_NONE,
/* 24 */PFM_CMD_NONE,
/* 25 */PFM_CMD_NONE,
/* 26 */PFM_CMD_NONE,
/* 27 */PFM_CMD_NONE,
/* 28 */PFM_CMD_NONE,
/* 29 */PFM_CMD_NONE,
/* 30 */PFM_CMD_NONE,
/* 31 */PFM_CMD_NONE,
/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL),
/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL)
David Mosberger's avatar
David Mosberger committed
4698
};
4699
#define PFM_CMD_COUNT	(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
David Mosberger's avatar
David Mosberger committed
4700

Linus Torvalds's avatar
Linus Torvalds committed
4701
static int
4702
pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
Linus Torvalds's avatar
Linus Torvalds committed
4703
{
4704
	struct task_struct *task;
4705
	int state, old_state;
4706

4707
recheck:
4708
	state = ctx->ctx_state;
4709
	task  = ctx->ctx_task;
4710 4711

	if (task == NULL) {
4712
		DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
4713
		return 0;
David Mosberger's avatar
David Mosberger committed
4714
	}
4715 4716

	DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
4717 4718 4719 4720
		ctx->ctx_fd,
		state,
		task->pid,
		task->state, PFM_CMD_STOPPED(cmd)));
4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731

	/*
	 * self-monitoring always ok.
	 *
	 * for system-wide the caller can either be the creator of the
	 * context (to one to which the context is attached to) OR
	 * a task running on the same CPU as the session.
	 */
	if (task == current || ctx->ctx_fl_system) return 0;

	/*
4732
	 * no command can operate on a zombie context
4733
	 */
4734 4735 4736 4737
	if (state == PFM_CTX_ZOMBIE) {
		DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
		return -EINVAL;
	}
4738

4739 4740 4741 4742
	/*
	 * if context is UNLOADED, MASKED we are safe to go
	 */
	if (state != PFM_CTX_LOADED) return 0;
4743 4744

	/*
4745
	 * context is LOADED, we must make sure the task is stopped
4746 4747 4748 4749 4750 4751
	 * We could lift this restriction for UP but it would mean that
	 * the user has no guarantee the task would not run between
	 * two successive calls to perfmonctl(). That's probably OK.
	 * If this user wants to ensure the task does not run, then
	 * the task must be stopped.
	 */
4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771
	if (PFM_CMD_STOPPED(cmd)) {
		if (task->state != TASK_STOPPED) {
			DPRINT(("[%d] task not in stopped state\n", task->pid));
			return -EBUSY;
		}
		/*
		 * task is now stopped, wait for ctxsw out
		 *
		 * This is an interesting point in the code.
		 * We need to unprotect the context because
		 * the pfm_save_regs() routines needs to grab
		 * the same lock. There are danger in doing
		 * this because it leaves a window open for
		 * another task to get access to the context
		 * and possibly change its state. The one thing
		 * that is not possible is for the context to disappear
		 * because we are protected by the VFS layer, i.e.,
		 * get_fd()/put_fd().
		 */
		old_state = state;
4772

4773
		UNPROTECT_CTX(ctx, flags);
4774

4775
		wait_task_inactive(task);
4776

4777
		PROTECT_CTX(ctx, flags);
4778

4779 4780 4781 4782 4783 4784 4785 4786
		/*
		 * we must recheck to verify if state has changed
		 */
		if (ctx->ctx_state != old_state) {
			DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
			goto recheck;
		}
	}
4787
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
4788 4789
}

4790 4791 4792
/*
 * system-call entry point (must return long)
 */
4793
asmlinkage long
4794
sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
David Mosberger's avatar
David Mosberger committed
4795
		long arg8, long stack)
Linus Torvalds's avatar
Linus Torvalds committed
4796
{
David Mosberger's avatar
David Mosberger committed
4797
	struct pt_regs *regs = (struct pt_regs *)&stack;
4798 4799 4800 4801 4802 4803
	struct file *file = NULL;
	pfm_context_t *ctx = NULL;
	unsigned long flags = 0UL;
	void *args_k = NULL;
	long ret; /* will expand int return types */
	size_t base_sz, sz, xtra_sz = 0;
4804 4805 4806
	int narg, completed_args = 0, call_made = 0, cmd_flags;
	int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
	int (*getsize)(void *arg, size_t *sz);
4807
#define PFM_MAX_ARGSIZE	4096
Linus Torvalds's avatar
Linus Torvalds committed
4808

4809
	/*
4810 4811
	 * reject any call if perfmon was disabled at initialization
	 */
4812
	if (unlikely(pmu_conf == NULL)) return -ENOSYS;
Linus Torvalds's avatar
Linus Torvalds committed
4813

4814
	if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
4815
		DPRINT(("invalid cmd=%d\n", cmd));
4816 4817
		return -EINVAL;
	}
Linus Torvalds's avatar
Linus Torvalds committed
4818

4819 4820 4821 4822 4823 4824 4825
	func      = pfm_cmd_tab[cmd].cmd_func;
	narg      = pfm_cmd_tab[cmd].cmd_narg;
	base_sz   = pfm_cmd_tab[cmd].cmd_argsize;
	getsize   = pfm_cmd_tab[cmd].cmd_getsize;
	cmd_flags = pfm_cmd_tab[cmd].cmd_flags;

	if (unlikely(func == NULL)) {
4826
		DPRINT(("invalid cmd=%d\n", cmd));
4827 4828 4829 4830
		return -EINVAL;
	}

	DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n",
4831
		PFM_CMD_NAME(cmd),
4832 4833 4834
		cmd,
		narg,
		base_sz,
4835
		count));
Linus Torvalds's avatar
Linus Torvalds committed
4836

4837 4838 4839
	/*
	 * check if number of arguments matches what the command expects
	 */
4840
	if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count)))
4841
		return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
4842

4843 4844 4845 4846 4847 4848 4849 4850 4851
restart_args:
	sz = xtra_sz + base_sz*count;
	/*
	 * limit abuse to min page size
	 */
	if (unlikely(sz > PFM_MAX_ARGSIZE)) {
		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
		return -E2BIG;
	}
Linus Torvalds's avatar
Linus Torvalds committed
4852

4853 4854 4855
	/*
	 * allocate default-sized argument buffer
	 */
4856
	if (likely(count && args_k == NULL)) {
4857 4858 4859
		args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
		if (args_k == NULL) return -ENOMEM;
	}
David Mosberger's avatar
David Mosberger committed
4860

4861
	ret = -EFAULT;
David Mosberger's avatar
David Mosberger committed
4862

4863 4864 4865 4866 4867 4868
	/*
	 * copy arguments
	 *
	 * assume sz = 0 for command without parameters
	 */
	if (sz && copy_from_user(args_k, arg, sz)) {
4869
		DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg));
4870 4871
		goto error_args;
	}
David Mosberger's avatar
David Mosberger committed
4872

4873 4874 4875
	/*
	 * check if command supports extra parameters
	 */
4876
	if (completed_args == 0 && getsize) {
4877 4878 4879
		/*
		 * get extra parameters size (based on main argument)
		 */
4880
		ret = (*getsize)(args_k, &xtra_sz);
4881
		if (ret) goto error_args;
David Mosberger's avatar
David Mosberger committed
4882

4883
		completed_args = 1;
David Mosberger's avatar
David Mosberger committed
4884

4885
		DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz));
David Mosberger's avatar
David Mosberger committed
4886

4887
		/* retry if necessary */
4888
		if (likely(xtra_sz)) goto restart_args;
4889
	}
4890

4891
	if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd;
4892

4893
	ret = -EBADF;
David Mosberger's avatar
David Mosberger committed
4894

4895 4896
	file = fget(fd);
	if (unlikely(file == NULL)) {
4897
		DPRINT(("invalid fd %d\n", fd));
4898 4899 4900
		goto error_args;
	}
	if (unlikely(PFM_IS_FILE(file) == 0)) {
4901
		DPRINT(("fd %d not related to perfmon\n", fd));
4902 4903
		goto error_args;
	}
David Mosberger's avatar
David Mosberger committed
4904

4905 4906
	ctx = (pfm_context_t *)file->private_data;
	if (unlikely(ctx == NULL)) {
4907
		DPRINT(("no context for fd %d\n", fd));
4908 4909 4910
		goto error_args;
	}
	prefetch(&ctx->ctx_state);
David Mosberger's avatar
David Mosberger committed
4911

4912
	PROTECT_CTX(ctx, flags);
David Mosberger's avatar
David Mosberger committed
4913

4914 4915 4916 4917 4918
	/*
	 * check task is stopped
	 */
	ret = pfm_check_task_state(ctx, cmd, flags);
	if (unlikely(ret)) goto abort_locked;
Linus Torvalds's avatar
Linus Torvalds committed
4919

4920 4921
skip_fd:
	ret = (*func)(ctx, args_k, count, regs);
Linus Torvalds's avatar
Linus Torvalds committed
4922

4923
	call_made = 1;
4924

4925
abort_locked:
4926
	if (likely(ctx)) {
4927
		DPRINT(("context unlocked\n"));
4928 4929
		UNPROTECT_CTX(ctx, flags);
		fput(file);
4930 4931
	}

4932 4933
	/* copy argument back to user, if needed */
	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
4934

4935 4936
error_args:
	if (args_k) kfree(args_k);
4937

David Mosberger's avatar
David Mosberger committed
4938 4939
	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));

4940 4941
	return ret;
}
4942

4943 4944 4945 4946 4947
static void
pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs)
{
	pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
	pfm_ovfl_ctrl_t rst_ctrl;
4948
	int state;
4949
	int ret = 0;
Linus Torvalds's avatar
Linus Torvalds committed
4950

4951
	state = ctx->ctx_state;
4952 4953 4954 4955 4956
	/*
	 * Unlock sampling buffer and reset index atomically
	 * XXX: not really needed when blocking
	 */
	if (CTX_HAS_SMPL(ctx)) {
4957

David Mosberger's avatar
David Mosberger committed
4958
		rst_ctrl.bits.mask_monitoring = 0;
4959
		rst_ctrl.bits.reset_ovfl_pmds = 0;
4960

4961 4962 4963 4964
		if (state == PFM_CTX_LOADED)
			ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
		else
			ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
4965
	} else {
David Mosberger's avatar
David Mosberger committed
4966 4967
		rst_ctrl.bits.mask_monitoring = 0;
		rst_ctrl.bits.reset_ovfl_pmds = 1;
4968
	}
4969

4970
	if (ret == 0) {
David Mosberger's avatar
David Mosberger committed
4971 4972 4973 4974
		if (rst_ctrl.bits.reset_ovfl_pmds) {
			pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET);
		}
		if (rst_ctrl.bits.mask_monitoring == 0) {
4975
			DPRINT(("resuming monitoring\n"));
4976
			if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current);
4977 4978 4979
		} else {
			DPRINT(("stopping monitoring\n"));
			//pfm_stop_monitoring(current, regs);
4980
		}
4981
		ctx->ctx_state = PFM_CTX_LOADED;
4982 4983
	}
}
4984

4985 4986 4987 4988 4989 4990 4991 4992 4993 4994
/*
 * context MUST BE LOCKED when calling
 * can only be called for current
 */
static void
pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
{
	if (ctx->ctx_fl_system) {
		printk(KERN_ERR "perfmon: pfm_context_force_terminate [%d] is system-wide\n", current->pid);
		return;
4995 4996
	}
	/*
4997 4998 4999 5000 5001 5002
	 * we stop the whole thing, we do no need to flush
	 * we know we WERE masked
	 */
	pfm_clear_psr_up();
	ia64_psr(regs)->up = 0;
	ia64_psr(regs)->sp = 1;
5003

5004 5005
	/*
	 * disconnect the task from the context and vice-versa
5006
	 */
5007 5008 5009 5010
	current->thread.pfm_context  = NULL;
	current->thread.flags       &= ~IA64_THREAD_PM_VALID;
	ctx->ctx_task = NULL;

5011
	DPRINT(("context terminated\n"));
5012 5013 5014 5015 5016

	/*
	 * and wakeup controlling task, indicating we are now disconnected
	 */
	wake_up_interruptible(&ctx->ctx_zombieq);
5017 5018

	/*
5019 5020 5021
	 * given that context is still locked, the controlling
	 * task will only get access when we return from
	 * pfm_handle_work().
5022
	 */
Linus Torvalds's avatar
Linus Torvalds committed
5023 5024
}

5025 5026
static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);

David Mosberger's avatar
David Mosberger committed
5027
void
5028
pfm_handle_work(void)
Linus Torvalds's avatar
Linus Torvalds committed
5029
{
5030 5031 5032 5033
	pfm_context_t *ctx;
	struct pt_regs *regs;
	unsigned long flags;
	unsigned long ovfl_regs;
5034
	unsigned int reason;
Linus Torvalds's avatar
Linus Torvalds committed
5035 5036
	int ret;

5037 5038
	ctx = PFM_GET_CTX(current);
	if (ctx == NULL) {
5039
		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
Linus Torvalds's avatar
Linus Torvalds committed
5040
		return;
Linus Torvalds's avatar
Linus Torvalds committed
5041
	}
5042 5043 5044 5045 5046 5047 5048 5049 5050

	PROTECT_CTX(ctx, flags);

	PFM_SET_WORK_PENDING(current, 0);

	pfm_clear_task_notify();

	regs = ia64_task_regs(current);

5051 5052 5053 5054 5055
	/*
	 * extract reason for being here and clear
	 */
	reason = ctx->ctx_fl_trap_reason;
	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
5056
	ovfl_regs = ctx->ctx_ovfl_regs[0];
5057

5058
	DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state));
5059 5060

	/*
5061
	 * must be done before we check for simple-reset mode
5062
	 */
5063
	if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
5064

Linus Torvalds's avatar
Linus Torvalds committed
5065

5066 5067 5068 5069
	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
	if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;

	UNPROTECT_CTX(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
5070

5071
	DPRINT(("before block sleeping\n"));
Linus Torvalds's avatar
Linus Torvalds committed
5072 5073 5074 5075 5076 5077 5078

	/*
	 * may go through without blocking on SMP systems
	 * if restart has been received already by the time we call down()
	 */
	ret = down_interruptible(&ctx->ctx_restart_sem);

5079
	DPRINT(("after block sleeping ret=%d\n", ret));
Linus Torvalds's avatar
Linus Torvalds committed
5080

5081 5082
	PROTECT_CTX(ctx, flags);

5083 5084 5085 5086 5087 5088 5089 5090
	/*
	 * we need to read the ovfl_regs only after wake-up
	 * because we may have had pfm_write_pmds() in between
	 * and that can changed PMD values and therefore 
	 * ovfl_regs is reset for these new PMD values.
	 */
	ovfl_regs = ctx->ctx_ovfl_regs[0];

5091 5092 5093 5094 5095 5096
	if (ctx->ctx_fl_going_zombie) {
do_zombie:
		DPRINT(("context is zombie, bailing out\n"));
		pfm_context_force_terminate(ctx, regs);
		goto nothing_to_do;
	}
Linus Torvalds's avatar
Linus Torvalds committed
5097 5098 5099
	/*
	 * in case of interruption of down() we don't restart anything
	 */
5100
	if (ret < 0) goto nothing_to_do;
Linus Torvalds's avatar
Linus Torvalds committed
5101

5102 5103 5104
skip_blocking:
	pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
	ctx->ctx_ovfl_regs[0] = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
5105

5106
nothing_to_do:
Linus Torvalds's avatar
Linus Torvalds committed
5107

5108
	UNPROTECT_CTX(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
5109 5110
}

David Mosberger's avatar
David Mosberger committed
5111
static int
5112
pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg)
David Mosberger's avatar
David Mosberger committed
5113
{
5114
	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
5115 5116 5117
		DPRINT(("ignoring overflow notification, owner is zombie\n"));
		return 0;
	}
David Mosberger's avatar
David Mosberger committed
5118

5119
	DPRINT(("waking up somebody\n"));
David Mosberger's avatar
David Mosberger committed
5120

5121
	if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait);
David Mosberger's avatar
David Mosberger committed
5122 5123

	/*
5124 5125 5126 5127 5128 5129 5130
	 * safe, we are not in intr handler, nor in ctxsw when
	 * we come here
	 */
	kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN);

	return 0;
}
David Mosberger's avatar
David Mosberger committed
5131

5132 5133 5134 5135
static int
pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
{
	pfm_msg_t *msg = NULL;
David Mosberger's avatar
David Mosberger committed
5136

5137 5138 5139 5140 5141 5142
	if (ctx->ctx_fl_no_msg == 0) {
		msg = pfm_get_new_msg(ctx);
		if (msg == NULL) {
			printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n");
			return -1;
		}
David Mosberger's avatar
David Mosberger committed
5143

5144 5145 5146 5147
		msg->pfm_ovfl_msg.msg_type         = PFM_MSG_OVFL;
		msg->pfm_ovfl_msg.msg_ctx_fd       = ctx->ctx_fd;
		msg->pfm_ovfl_msg.msg_active_set   = 0;
		msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds;
David Mosberger's avatar
David Mosberger committed
5148 5149 5150
		msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
		msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
		msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
5151
		msg->pfm_ovfl_msg.msg_tstamp       = 0UL;
5152
	}
David Mosberger's avatar
David Mosberger committed
5153

5154
	DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n",
5155 5156 5157 5158
		msg,
		ctx->ctx_fl_no_msg,
		ctx->ctx_fd,
		ovfl_pmds));
David Mosberger's avatar
David Mosberger committed
5159

5160 5161
	return pfm_notify_user(ctx, msg);
}
David Mosberger's avatar
David Mosberger committed
5162

5163 5164 5165 5166
static int
pfm_end_notify_user(pfm_context_t *ctx)
{
	pfm_msg_t *msg;
David Mosberger's avatar
David Mosberger committed
5167

5168 5169 5170 5171
	msg = pfm_get_new_msg(ctx);
	if (msg == NULL) {
		printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
		return -1;
David Mosberger's avatar
David Mosberger committed
5172
	}
5173 5174
	/* no leak */
	memset(msg, 0, sizeof(*msg));
5175

5176 5177
	msg->pfm_end_msg.msg_type    = PFM_MSG_END;
	msg->pfm_end_msg.msg_ctx_fd  = ctx->ctx_fd;
5178
	msg->pfm_ovfl_msg.msg_tstamp = 0UL;
David Mosberger's avatar
David Mosberger committed
5179

5180
	DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n",
5181 5182
		msg,
		ctx->ctx_fl_no_msg,
5183
		ctx->ctx_fd));
5184 5185

	return pfm_notify_user(ctx, msg);
David Mosberger's avatar
David Mosberger committed
5186 5187
}

Linus Torvalds's avatar
Linus Torvalds committed
5188 5189
/*
 * main overflow processing routine.
5190
 * it can be called from the interrupt path or explicitely during the context switch code
Linus Torvalds's avatar
Linus Torvalds committed
5191
 */
5192 5193
static void
pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
5194
{
5195
	pfm_ovfl_arg_t *ovfl_arg;
5196
	unsigned long mask;
5197 5198
	unsigned long old_val, ovfl_val, new_val;
	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
David Mosberger's avatar
David Mosberger committed
5199
	unsigned long tstamp;
5200
	pfm_ovfl_ctrl_t	ovfl_ctrl;
David Mosberger's avatar
David Mosberger committed
5201
	unsigned int i, has_smpl;
5202
	int must_notify = 0;
Linus Torvalds's avatar
Linus Torvalds committed
5203

5204
	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring;
5205

Linus Torvalds's avatar
Linus Torvalds committed
5206 5207 5208
	/*
	 * sanity test. Should never happen
	 */
5209
	if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
Linus Torvalds's avatar
Linus Torvalds committed
5210

5211
	tstamp   = ia64_get_itc();
5212
	mask     = pmc0 >> PMU_FIRST_COUNTER;
5213
	ovfl_val = pmu_conf->ovfl_val;
5214
	has_smpl = CTX_HAS_SMPL(ctx);
Linus Torvalds's avatar
Linus Torvalds committed
5215

David Mosberger's avatar
David Mosberger committed
5216
	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
5217
		     "used_pmds=0x%lx\n",
5218 5219 5220
			pmc0,
			task ? task->pid: -1,
			(regs ? regs->cr_iip : 0),
David Mosberger's avatar
David Mosberger committed
5221
			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
5222
			ctx->ctx_used_pmds[0]));
Linus Torvalds's avatar
Linus Torvalds committed
5223

5224

Linus Torvalds's avatar
Linus Torvalds committed
5225
	/*
5226 5227
	 * first we update the virtual counters
	 * assume there was a prior ia64_srlz_d() issued
Linus Torvalds's avatar
Linus Torvalds committed
5228
	 */
David Mosberger's avatar
David Mosberger committed
5229 5230 5231 5232
	for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {

		/* skip pmd which did not overflow */
		if ((mask & 0x1) == 0) continue;
Linus Torvalds's avatar
Linus Torvalds committed
5233 5234

		/*
5235 5236 5237 5238
		 * Note that the pmd is not necessarily 0 at this point as qualified events
		 * may have happened before the PMU was frozen. The residual count is not
		 * taken into consideration here but will be with any read of the pmd via
		 * pfm_read_pmds().
Linus Torvalds's avatar
Linus Torvalds committed
5239
		 */
5240 5241 5242
		old_val              = new_val = ctx->ctx_pmds[i].val;
		new_val             += 1 + ovfl_val;
		ctx->ctx_pmds[i].val = new_val;
David Mosberger's avatar
David Mosberger committed
5243

Linus Torvalds's avatar
Linus Torvalds committed
5244
		/*
David Mosberger's avatar
David Mosberger committed
5245
		 * check for overflow condition
Linus Torvalds's avatar
Linus Torvalds committed
5246
		 */
5247
		if (likely(old_val > new_val)) {
David Mosberger's avatar
David Mosberger committed
5248
			ovfl_pmds |= 1UL << i;
5249
			if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
Linus Torvalds's avatar
Linus Torvalds committed
5250
		}
5251

5252 5253 5254 5255 5256 5257 5258
		DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
			i,
			new_val,
			old_val,
			ia64_get_pmd(i) & ovfl_val,
			ovfl_pmds,
			ovfl_notify));
Linus Torvalds's avatar
Linus Torvalds committed
5259
	}
Linus Torvalds's avatar
Linus Torvalds committed
5260

David Mosberger's avatar
David Mosberger committed
5261 5262 5263 5264 5265 5266 5267 5268 5269
	/*
	 * there was no 64-bit overflow, nothing else to do
	 */
	if (ovfl_pmds == 0UL) return;

	/* 
	 * reset all control bits
	 */
	ovfl_ctrl.val = 0;
5270
	reset_pmds    = 0UL;
5271

Linus Torvalds's avatar
Linus Torvalds committed
5272
	/*
David Mosberger's avatar
David Mosberger committed
5273 5274
	 * if a sampling format module exists, then we "cache" the overflow by 
	 * calling the module's handler() routine.
Linus Torvalds's avatar
Linus Torvalds committed
5275
	 */
David Mosberger's avatar
David Mosberger committed
5276 5277
	if (has_smpl) {
		unsigned long start_cycles, end_cycles;
5278
		unsigned long pmd_mask;
David Mosberger's avatar
David Mosberger committed
5279
		int j, k, ret = 0;
5280 5281
		int this_cpu = smp_processor_id();

5282 5283
		pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
		ovfl_arg = &ctx->ctx_ovfl_arg;
5284 5285 5286

		prefetch(ctx->ctx_smpl_hdr);

David Mosberger's avatar
David Mosberger committed
5287
		for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) {
5288

David Mosberger's avatar
David Mosberger committed
5289 5290 5291 5292
			mask = 1UL << i;

			if ((pmd_mask & 0x1) == 0) continue;

5293 5294 5295 5296 5297
			ovfl_arg->ovfl_pmd      = (unsigned char )i;
			ovfl_arg->ovfl_notify   = ovfl_notify & mask ? 1 : 0;
			ovfl_arg->active_set    = 0;
			ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
			ovfl_arg->smpl_pmds[0]  = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
David Mosberger's avatar
David Mosberger committed
5298

5299 5300 5301
			ovfl_arg->pmd_value      = ctx->ctx_pmds[i].val;
			ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
			ovfl_arg->pmd_eventid    = ctx->ctx_pmds[i].eventid;
David Mosberger's avatar
David Mosberger committed
5302 5303 5304 5305 5306 5307 5308 5309

			/*
		 	 * copy values of pmds of interest. Sampling format may copy them
		 	 * into sampling buffer.
		 	 */
			if (smpl_pmds) {
				for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
					if ((smpl_pmds & 0x1) == 0) continue;
5310 5311
					ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ?  pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
					DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
David Mosberger's avatar
David Mosberger committed
5312
				}
Linus Torvalds's avatar
Linus Torvalds committed
5313
			}
Linus Torvalds's avatar
Linus Torvalds committed
5314

David Mosberger's avatar
David Mosberger committed
5315
			pfm_stats[this_cpu].pfm_smpl_handler_calls++;
Linus Torvalds's avatar
Linus Torvalds committed
5316

David Mosberger's avatar
David Mosberger committed
5317
			start_cycles = ia64_get_itc();
Linus Torvalds's avatar
Linus Torvalds committed
5318

David Mosberger's avatar
David Mosberger committed
5319 5320 5321
			/*
		 	 * call custom buffer format record (handler) routine
		 	 */
5322
			ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
David Mosberger's avatar
David Mosberger committed
5323 5324 5325 5326 5327 5328 5329

			end_cycles = ia64_get_itc();

			/*
			 * For those controls, we take the union because they have
			 * an all or nothing behavior.
			 */
5330 5331 5332
			ovfl_ctrl.bits.notify_user     |= ovfl_arg->ovfl_ctrl.bits.notify_user;
			ovfl_ctrl.bits.block_task      |= ovfl_arg->ovfl_ctrl.bits.block_task;
			ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
5333 5334 5335
			/*
			 * build the bitmask of pmds to reset now
			 */
5336
			if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
Linus Torvalds's avatar
Linus Torvalds committed
5337

David Mosberger's avatar
David Mosberger committed
5338 5339 5340 5341 5342 5343
			pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
		}
		/*
		 * when the module cannot handle the rest of the overflows, we abort right here
		 */
		if (ret && pmd_mask) {
5344
			DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n",
David Mosberger's avatar
David Mosberger committed
5345 5346
				pmd_mask<<PMU_FIRST_COUNTER));
		}
5347 5348 5349 5350
		/*
		 * remove the pmds we reset now from the set of pmds to reset in pfm_restart()
		 */
		ovfl_pmds &= ~reset_pmds;
David Mosberger's avatar
David Mosberger committed
5351 5352 5353 5354 5355 5356 5357 5358 5359
	} else {
		/*
		 * when no sampling module is used, then the default
		 * is to notify on overflow if requested by user
		 */
		ovfl_ctrl.bits.notify_user     = ovfl_notify ? 1 : 0;
		ovfl_ctrl.bits.block_task      = ovfl_notify ? 1 : 0;
		ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
		ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
5360 5361 5362 5363
		/*
		 * if needed, we reset all overflowed pmds
		 */
		if (ovfl_notify == 0) reset_pmds = ovfl_pmds;
5364 5365
	}

5366
	DPRINT(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n",
5367 5368
		ovfl_pmds,
		reset_pmds));
David Mosberger's avatar
David Mosberger committed
5369
	/*
5370
	 * reset the requested PMD registers using the short reset values
David Mosberger's avatar
David Mosberger committed
5371
	 */
5372 5373
	if (reset_pmds) {
		unsigned long bm = reset_pmds;
David Mosberger's avatar
David Mosberger committed
5374
		pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
Linus Torvalds's avatar
Linus Torvalds committed
5375
	}
5376

David Mosberger's avatar
David Mosberger committed
5377
	if (ovfl_notify && ovfl_ctrl.bits.notify_user) {
5378
		/*
5379
		 * keep track of what to reset when unblocking
5380
		 */
5381 5382
		ctx->ctx_ovfl_regs[0] = ovfl_pmds;

David Mosberger's avatar
David Mosberger committed
5383 5384 5385 5386
		/*
		 * check for blocking context 
		 */
		if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) {
5387 5388 5389 5390

			ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK;

			/*
David Mosberger's avatar
David Mosberger committed
5391
			 * set the perfmon specific checking pending work for the task
5392 5393 5394 5395 5396 5397 5398 5399 5400
			 */
			PFM_SET_WORK_PENDING(task, 1);

			/*
			 * when coming from ctxsw, current still points to the
			 * previous task, therefore we must work with task and not current.
			 */
			pfm_set_task_notify(task);
		}
5401
		/*
5402 5403
		 * defer until state is changed (shorten spin window). the context is locked
		 * anyway, so the signal receiver would come spin for nothing.
5404
		 */
5405
		must_notify = 1;
5406
	}
5407

5408
	DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
5409 5410 5411 5412 5413
			GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
			PFM_GET_WORK_PENDING(task),
			ctx->ctx_fl_trap_reason,
			ovfl_pmds,
			ovfl_notify,
David Mosberger's avatar
David Mosberger committed
5414
			ovfl_ctrl.bits.mask_monitoring ? 1 : 0));
5415 5416 5417
	/*
	 * in case monitoring must be stopped, we toggle the psr bits
	 */
David Mosberger's avatar
David Mosberger committed
5418
	if (ovfl_ctrl.bits.mask_monitoring) {
5419
		pfm_mask_monitoring(task);
5420
		ctx->ctx_state = PFM_CTX_MASKED;
5421
		ctx->ctx_fl_can_restart = 1;
5422
	}
David Mosberger's avatar
David Mosberger committed
5423

Linus Torvalds's avatar
Linus Torvalds committed
5424
	/*
5425
	 * send notification now
Linus Torvalds's avatar
Linus Torvalds committed
5426
	 */
5427
	if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify);
Linus Torvalds's avatar
Linus Torvalds committed
5428

5429 5430 5431 5432 5433 5434 5435 5436
	return;

sanity_check:
	printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
			smp_processor_id(),
			task ? task->pid : -1,
			pmc0);
	return;
Linus Torvalds's avatar
Linus Torvalds committed
5437

5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471
stop_monitoring:
	/*
	 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs().
	 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can
	 * come here as zombie only if the task is the current task. In which case, we
	 * can access the PMU  hardware directly.
	 *
	 * Note that zombies do have PM_VALID set. So here we do the minimal.
	 *
	 * In case the context was zombified it could not be reclaimed at the time
	 * the monitoring program exited. At this point, the PMU reservation has been
	 * returned, the sampiing buffer has been freed. We must convert this call
	 * into a spurious interrupt. However, we must also avoid infinite overflows
	 * by stopping monitoring for this task. We can only come here for a per-task
	 * context. All we need to do is to stop monitoring using the psr bits which
	 * are always task private. By re-enabling secure montioring, we ensure that
	 * the monitored task will not be able to re-activate monitoring.
	 * The task will eventually be context switched out, at which point the context
	 * will be reclaimed (that includes releasing ownership of the PMU).
	 *
	 * So there might be a window of time where the number of per-task session is zero
	 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie
	 * context. This is safe because if a per-task session comes in, it will push this one
	 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide
	 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will
	 * also push our zombie context out.
	 *
	 * Overall pretty hairy stuff....
	 */
	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
	pfm_clear_psr_up();
	ia64_psr(regs)->up = 0;
	ia64_psr(regs)->sp = 1;
	return;
Linus Torvalds's avatar
Linus Torvalds committed
5472 5473
}

5474 5475
static int
pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
5476
{
David Mosberger's avatar
David Mosberger committed
5477
	struct task_struct *task;
5478
	pfm_context_t *ctx;
5479 5480 5481 5482
	unsigned long flags;
	u64 pmc0;
	int this_cpu = smp_processor_id();
	int retval = 0;
Linus Torvalds's avatar
Linus Torvalds committed
5483

5484
	pfm_stats[this_cpu].pfm_ovfl_intr_count++;
David Mosberger's avatar
David Mosberger committed
5485

5486
	/*
David Mosberger's avatar
David Mosberger committed
5487 5488
	 * srlz.d done before arriving here
	 */
5489 5490 5491 5492
	pmc0 = ia64_get_pmc(0);

	task = GET_PMU_OWNER();
	ctx  = GET_PMU_CTX();
Linus Torvalds's avatar
Linus Torvalds committed
5493 5494 5495

	/*
	 * if we have some pending bits set
5496
	 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
Linus Torvalds's avatar
Linus Torvalds committed
5497
	 */
5498 5499
	if (PMC0_HAS_OVFL(pmc0) && task) {
		/*
5500
		 * we assume that pmc0.fr is always set here
Linus Torvalds's avatar
Linus Torvalds committed
5501
		 */
David Mosberger's avatar
David Mosberger committed
5502

5503
		/* sanity check */
5504
		if (!ctx) goto report_spurious1;
5505

5506 5507
		if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 
			goto report_spurious2;
5508 5509 5510 5511 5512 5513 5514

		PROTECT_CTX_NOPRINT(ctx, flags);

		pfm_overflow_handler(task, ctx, pmc0, regs);

		UNPROTECT_CTX_NOPRINT(ctx, flags);

Linus Torvalds's avatar
Linus Torvalds committed
5515
	} else {
5516 5517 5518 5519 5520 5521 5522 5523 5524 5525
		pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++;
		retval = -1;
	}
	/*
	 * keep it unfrozen at all times
	 */
	pfm_unfreeze_pmu();

	return retval;

5526
report_spurious1:
5527 5528 5529 5530
	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
		this_cpu, task->pid);
	pfm_unfreeze_pmu();
	return -1;
5531 5532 5533 5534 5535 5536
report_spurious2:
	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
		this_cpu, 
		task->pid);
	pfm_unfreeze_pmu();
	return -1;
5537 5538
}

5539
static irqreturn_t
5540 5541
pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
{
David Mosberger's avatar
David Mosberger committed
5542
	unsigned long start_cycles, total_cycles;
5543 5544 5545 5546
	unsigned long min, max;
	int this_cpu;
	int ret;

5547
	this_cpu = get_cpu();
5548 5549 5550
	min      = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
	max      = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;

David Mosberger's avatar
David Mosberger committed
5551
	start_cycles = ia64_get_itc();
5552 5553 5554

	ret = pfm_do_interrupt_handler(irq, arg, regs);

David Mosberger's avatar
David Mosberger committed
5555
	total_cycles = ia64_get_itc();
5556 5557 5558 5559

	/*
	 * don't measure spurious interrupts
	 */
David Mosberger's avatar
David Mosberger committed
5560 5561 5562 5563 5564 5565 5566
	if (likely(ret == 0)) {
		total_cycles -= start_cycles;

		if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
		if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;

		pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
Linus Torvalds's avatar
Linus Torvalds committed
5567
	}
5568 5569
	put_cpu_no_resched();
	return IRQ_HANDLED;
Linus Torvalds's avatar
Linus Torvalds committed
5570 5571
}

5572 5573 5574
/*
 * /proc/perfmon interface, for debug only
 */
5575

5576 5577 5578 5579
#define PFM_PROC_SHOW_HEADER	((void *)NR_CPUS+1)

static void *
pfm_proc_start(struct seq_file *m, loff_t *pos)
Linus Torvalds's avatar
Linus Torvalds committed
5580
{
5581 5582 5583
	if (*pos == 0) {
		return PFM_PROC_SHOW_HEADER;
	}
Linus Torvalds's avatar
Linus Torvalds committed
5584

5585 5586 5587 5588 5589
	while (*pos <= NR_CPUS) {
		if (cpu_online(*pos - 1)) {
			return (void *)*pos;
		}
		++*pos;
5590
	}
5591 5592
	return NULL;
}
5593

5594 5595 5596 5597 5598 5599
static void *
pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
{
	++*pos;
	return pfm_proc_start(m, pos);
}
Linus Torvalds's avatar
Linus Torvalds committed
5600

5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639
static void
pfm_proc_stop(struct seq_file *m, void *v)
{
}

static void
pfm_proc_show_header(struct seq_file *m)
{
	struct list_head * pos;
	pfm_buffer_fmt_t * entry;
	unsigned long flags;

 	seq_printf(m,
		"perfmon version           : %u.%u\n"
		"model                     : %s\n"
		"fastctxsw                 : %s\n"
		"expert mode               : %s\n"
		"ovfl_mask                 : 0x%lx\n"
		"PMU flags                 : 0x%x\n",
		PFM_VERSION_MAJ, PFM_VERSION_MIN,
		pmu_conf->pmu_name,
		pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
		pfm_sysctl.expert_mode > 0 ? "Yes": "No",
		pmu_conf->ovfl_val,
		pmu_conf->flags);

  	LOCK_PFS(flags);

 	seq_printf(m,
 		"proc_sessions             : %u\n"
 		"sys_sessions              : %u\n"
 		"sys_use_dbregs            : %u\n"
 		"ptrace_use_dbregs         : %u\n",
 		pfm_sessions.pfs_task_sessions,
 		pfm_sessions.pfs_sys_sessions,
 		pfm_sessions.pfs_sys_use_dbregs,
 		pfm_sessions.pfs_ptrace_use_dbregs);

  	UNLOCK_PFS(flags);
5640

5641
	spin_lock(&pfm_buffer_fmt_lock);
5642

5643 5644
	list_for_each(pos, &pfm_buffer_fmt_list) {
		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662
		seq_printf(m, "format                    : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
			entry->fmt_uuid[0],
			entry->fmt_uuid[1],
			entry->fmt_uuid[2],
			entry->fmt_uuid[3],
			entry->fmt_uuid[4],
			entry->fmt_uuid[5],
			entry->fmt_uuid[6],
			entry->fmt_uuid[7],
			entry->fmt_uuid[8],
			entry->fmt_uuid[9],
			entry->fmt_uuid[10],
			entry->fmt_uuid[11],
			entry->fmt_uuid[12],
			entry->fmt_uuid[13],
			entry->fmt_uuid[14],
			entry->fmt_uuid[15],
			entry->fmt_name);
5663 5664
	}
	spin_unlock(&pfm_buffer_fmt_lock);
5665

Linus Torvalds's avatar
Linus Torvalds committed
5666 5667 5668
}

static int
5669
pfm_proc_show(struct seq_file *m, void *v)
Linus Torvalds's avatar
Linus Torvalds committed
5670
{
5671 5672
	unsigned long psr;
	unsigned int i;
5673 5674 5675 5676 5677 5678
	int cpu;

	if (v == PFM_PROC_SHOW_HEADER) {
		pfm_proc_show_header(m);
		return 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
5679

5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711
	/* show info for CPU (v - 1) */

	cpu = (long)v - 1;
	seq_printf(m,
		"CPU%-2d overflow intrs      : %lu\n"
		"CPU%-2d overflow cycles     : %lu\n"
		"CPU%-2d overflow min        : %lu\n"
		"CPU%-2d overflow max        : %lu\n"
		"CPU%-2d smpl handler calls  : %lu\n"
		"CPU%-2d smpl handler cycles : %lu\n"
		"CPU%-2d spurious intrs      : %lu\n"
		"CPU%-2d replay   intrs      : %lu\n"
		"CPU%-2d syst_wide           : %d\n"
		"CPU%-2d dcr_pp              : %d\n"
		"CPU%-2d exclude idle        : %d\n"
		"CPU%-2d owner               : %d\n"
		"CPU%-2d context             : %p\n"
		"CPU%-2d activations         : %lu\n",
		cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles,
		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min,
		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max,
		cpu, pfm_stats[cpu].pfm_smpl_handler_calls,
		cpu, pfm_stats[cpu].pfm_smpl_handler_cycles,
		cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
		cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count,
		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0,
		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
		cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1,
		cpu, pfm_get_cpu_data(pmu_ctx, cpu),
		cpu, pfm_get_cpu_data(pmu_activation_number, cpu));
Linus Torvalds's avatar
Linus Torvalds committed
5712

5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733
	if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) {

		psr = pfm_get_psr();

		ia64_srlz_d();

		seq_printf(m, 
			"CPU%-2d psr                 : 0x%lx\n"
			"CPU%-2d pmc0                : 0x%lx\n", 
			cpu, psr,
			cpu, ia64_get_pmc(0));

		for (i=0; PMC_IS_LAST(i) == 0;  i++) {
			if (PMC_IS_COUNTING(i) == 0) continue;
   			seq_printf(m, 
				"CPU%-2d pmc%u                : 0x%lx\n"
   				"CPU%-2d pmd%u                : 0x%lx\n", 
				cpu, i, ia64_get_pmc(i),
				cpu, i, ia64_get_pmd(i));
  		}
	}
5734 5735
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
5736

5737 5738 5739 5740 5741 5742
struct seq_operations pfm_seq_ops = {
	.start =	pfm_proc_start,
 	.next =		pfm_proc_next,
 	.stop =		pfm_proc_stop,
 	.show =		pfm_proc_show
};
Linus Torvalds's avatar
Linus Torvalds committed
5743

5744 5745 5746 5747
static int
pfm_proc_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &pfm_seq_ops);
Linus Torvalds's avatar
Linus Torvalds committed
5748 5749
}

5750

5751
/*
5752
 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
5753
 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
5754 5755
 * is active or inactive based on mode. We must rely on the value in
 * local_cpu_data->pfm_syst_info
5756
 */
David Mosberger's avatar
David Mosberger committed
5757
void
5758
pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
Linus Torvalds's avatar
Linus Torvalds committed
5759
{
5760 5761 5762
	struct pt_regs *regs;
	unsigned long dcr;
	unsigned long dcr_pp;
Linus Torvalds's avatar
Linus Torvalds committed
5763

5764
	dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
Linus Torvalds's avatar
Linus Torvalds committed
5765

David Mosberger's avatar
David Mosberger committed
5766
	/*
5767
	 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
5768 5769 5770
	 * on every CPU, so we can rely on the pid to identify the idle task.
	 */
	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
5771
		regs = ia64_task_regs(task);
5772 5773 5774 5775 5776
		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
		return;
	}
	/*
	 * if monitoring has started
David Mosberger's avatar
David Mosberger committed
5777
	 */
5778
	if (dcr_pp) {
5779
		dcr = ia64_getreg(_IA64_REG_CR_DCR);
5780 5781
		/*
		 * context switching in?
5782 5783 5784
		 */
		if (is_ctxswin) {
			/* mask monitoring for the idle task */
5785
			ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
5786 5787 5788 5789
			pfm_clear_psr_pp();
			ia64_srlz_i();
			return;
		}
5790
		/*
5791
		 * context switching out
5792
		 * restore monitoring for next task
5793
		 *
5794
		 * Due to inlining this odd if-then-else construction generates
5795 5796
		 * better code.
		 */
5797
		ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
5798 5799 5800
		pfm_set_psr_pp();
		ia64_srlz_i();
	}
David Mosberger's avatar
David Mosberger committed
5801
}
Linus Torvalds's avatar
Linus Torvalds committed
5802

5803
#ifdef CONFIG_SMP
5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829

static void
pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
{
	struct task_struct *task = ctx->ctx_task;

	ia64_psr(regs)->up = 0;
	ia64_psr(regs)->sp = 1;

	if (GET_PMU_OWNER() == task) {
		DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
		SET_PMU_OWNER(NULL, NULL);
	}

	/*
	 * disconnect the task from the context and vice-versa
	 */
	PFM_SET_WORK_PENDING(task, 0);

	task->thread.pfm_context  = NULL;
	task->thread.flags       &= ~IA64_THREAD_PM_VALID;

	DPRINT(("force cleanup for [%d]\n",  task->pid));
}


5830 5831 5832
/*
 * in 2.6, interrupts are masked when we come here and the runqueue lock is held
 */
5833 5834
void
pfm_save_regs(struct task_struct *task)
David Mosberger's avatar
David Mosberger committed
5835 5836
{
	pfm_context_t *ctx;
5837
	struct thread_struct *t;
5838 5839
	unsigned long flags;
	u64 psr;
David Mosberger's avatar
David Mosberger committed
5840

David Mosberger's avatar
David Mosberger committed
5841

5842
	ctx = PFM_GET_CTX(task);
5843
	if (ctx == NULL) return;
5844
	t = &task->thread;
Linus Torvalds's avatar
Linus Torvalds committed
5845 5846

	/*
5847 5848 5849
 	 * we always come here with interrupts ALREADY disabled by
 	 * the scheduler. So we simply need to protect against concurrent
	 * access, not CPU concurrency.
Linus Torvalds's avatar
Linus Torvalds committed
5850
	 */
5851
	flags = pfm_protect_ctx_ctxsw(ctx);
5852

5853
	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
5854
		struct pt_regs *regs = ia64_task_regs(task);
5855

5856
		pfm_clear_psr_up();
David Mosberger's avatar
David Mosberger committed
5857

5858
		pfm_force_cleanup(ctx, regs);
5859

5860
		BUG_ON(ctx->ctx_smpl_hdr);
5861

5862 5863 5864
		pfm_unprotect_ctx_ctxsw(ctx, flags);

		pfm_context_free(ctx);
5865 5866 5867
		return;
	}

5868
	/*
5869
	 * sanity check
5870
	 */
5871 5872
	if (ctx->ctx_last_activation != GET_ACTIVATION()) {
		pfm_unprotect_ctx_ctxsw(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
5873 5874
		return;
	}
5875

Linus Torvalds's avatar
Linus Torvalds committed
5876
	/*
5877
	 * save current PSR: needed because we modify it
Linus Torvalds's avatar
Linus Torvalds committed
5878
	 */
David Mosberger's avatar
David Mosberger committed
5879
	ia64_srlz_d();
5880
	psr = pfm_get_psr();
David Mosberger's avatar
David Mosberger committed
5881

David Mosberger's avatar
David Mosberger committed
5882 5883
	BUG_ON(psr & (IA64_PSR_I));

Linus Torvalds's avatar
Linus Torvalds committed
5884
	/*
5885 5886
	 * stop monitoring:
	 * This is the last instruction which may generate an overflow
5887
	 *
5888 5889
	 * We do not need to set psr.sp because, it is irrelevant in kernel.
	 * It will be restored from ipsr when going back to user level
Linus Torvalds's avatar
Linus Torvalds committed
5890
	 */
5891
	pfm_clear_psr_up();
David Mosberger's avatar
David Mosberger committed
5892

5893
	/*
David Mosberger's avatar
David Mosberger committed
5894
	 * keep a copy of psr.up (for reload)
5895
	 */
David Mosberger's avatar
David Mosberger committed
5896 5897
	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;

David Mosberger's avatar
David Mosberger committed
5898
	/*
5899 5900 5901
	 * release ownership of this PMU.
	 * PM interrupts are masked, so nothing
	 * can happen.
David Mosberger's avatar
David Mosberger committed
5902
	 */
5903
	SET_PMU_OWNER(NULL, NULL);
David Mosberger's avatar
David Mosberger committed
5904 5905

	/*
5906 5907 5908
	 * we systematically save the PMD as we have no
	 * guarantee we will be schedule at that same
	 * CPU again.
David Mosberger's avatar
David Mosberger committed
5909
	 */
5910
	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
David Mosberger's avatar
David Mosberger committed
5911 5912

	/*
5913 5914 5915
	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
	 * we will need it on the restore path to check
	 * for pending overflow.
David Mosberger's avatar
David Mosberger committed
5916
	 */
5917
	t->pmcs[0] = ia64_get_pmc(0);
5918

David Mosberger's avatar
David Mosberger committed
5919
	/*
5920
	 * unfreeze PMU if had pending overflows
David Mosberger's avatar
David Mosberger committed
5921
	 */
5922
	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
Linus Torvalds's avatar
Linus Torvalds committed
5923

David Mosberger's avatar
David Mosberger committed
5924
	/*
5925 5926
	 * finally, allow context access.
	 * interrupts will still be masked after this call.
5927
	 */
5928
	pfm_unprotect_ctx_ctxsw(ctx, flags);
David Mosberger's avatar
David Mosberger committed
5929
}
Linus Torvalds's avatar
Linus Torvalds committed
5930

5931
#else /* !CONFIG_SMP */
Linus Torvalds's avatar
Linus Torvalds committed
5932
void
5933
pfm_save_regs(struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
5934 5935
{
	pfm_context_t *ctx;
5936
	u64 psr;
Linus Torvalds's avatar
Linus Torvalds committed
5937

5938
	ctx = PFM_GET_CTX(task);
5939
	if (ctx == NULL) return;
David Mosberger's avatar
David Mosberger committed
5940

5941 5942
	/*
	 * save current PSR: needed because we modify it
Linus Torvalds's avatar
Linus Torvalds committed
5943
	 */
5944
	psr = pfm_get_psr();
Linus Torvalds's avatar
Linus Torvalds committed
5945

5946
	BUG_ON(psr & (IA64_PSR_I));
5947

Linus Torvalds's avatar
Linus Torvalds committed
5948 5949
	/*
	 * stop monitoring:
5950 5951 5952 5953
	 * This is the last instruction which may generate an overflow
	 *
	 * We do not need to set psr.sp because, it is irrelevant in kernel.
	 * It will be restored from ipsr when going back to user level
Linus Torvalds's avatar
Linus Torvalds committed
5954
	 */
5955
	pfm_clear_psr_up();
5956

5957
	/*
David Mosberger's avatar
David Mosberger committed
5958
	 * keep a copy of psr.up (for reload)
5959
	 */
David Mosberger's avatar
David Mosberger committed
5960
	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
5961
}
5962

5963 5964 5965 5966 5967 5968
static void
pfm_lazy_save_regs (struct task_struct *task)
{
	pfm_context_t *ctx;
	struct thread_struct *t;
	unsigned long flags;
David Mosberger's avatar
David Mosberger committed
5969

5970 5971
	{ u64 psr  = pfm_get_psr();
	  BUG_ON(psr & IA64_PSR_UP);
David Mosberger's avatar
David Mosberger committed
5972
	}
Linus Torvalds's avatar
Linus Torvalds committed
5973

5974 5975 5976
	ctx = PFM_GET_CTX(task);
	t   = &task->thread;

Linus Torvalds's avatar
Linus Torvalds committed
5977
	/*
5978 5979 5980 5981 5982
	 * we need to mask PMU overflow here to
	 * make sure that we maintain pmc0 until
	 * we save it. overflow interrupts are
	 * treated as spurious if there is no
	 * owner.
Linus Torvalds's avatar
Linus Torvalds committed
5983
	 *
5984
	 * XXX: I don't think this is necessary
Linus Torvalds's avatar
Linus Torvalds committed
5985
	 */
5986
	PROTECT_CTX(ctx,flags);
Linus Torvalds's avatar
Linus Torvalds committed
5987 5988

	/*
5989 5990
	 * release ownership of this PMU.
	 * must be done before we save the registers.
Linus Torvalds's avatar
Linus Torvalds committed
5991
	 *
5992 5993
	 * after this call any PMU interrupt is treated
	 * as spurious.
Linus Torvalds's avatar
Linus Torvalds committed
5994
	 */
5995
	SET_PMU_OWNER(NULL, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
5996 5997

	/*
5998
	 * save all the pmds we use
Linus Torvalds's avatar
Linus Torvalds committed
5999
	 */
6000
	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
Linus Torvalds's avatar
Linus Torvalds committed
6001

Linus Torvalds's avatar
Linus Torvalds committed
6002
	/*
6003 6004 6005
	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
	 * it is needed to check for pended overflow
	 * on the restore path
Linus Torvalds's avatar
Linus Torvalds committed
6006
	 */
6007
	t->pmcs[0] = ia64_get_pmc(0);
Linus Torvalds's avatar
Linus Torvalds committed
6008

6009
	/*
6010
	 * unfreeze PMU if had pending overflows
6011
	 */
6012
	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
Linus Torvalds's avatar
Linus Torvalds committed
6013

6014 6015 6016 6017
	/*
	 * now get can unmask PMU interrupts, they will
	 * be treated as purely spurious and we will not
	 * lose any information
David Mosberger's avatar
David Mosberger committed
6018
	 */
6019
	UNPROTECT_CTX(ctx,flags);
Linus Torvalds's avatar
Linus Torvalds committed
6020
}
6021
#endif /* CONFIG_SMP */
Linus Torvalds's avatar
Linus Torvalds committed
6022

6023
#ifdef CONFIG_SMP
6024 6025 6026
/*
 * in 2.6, interrupts are masked when we come here and the runqueue lock is held
 */
6027 6028
void
pfm_load_regs (struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
6029
{
6030
	pfm_context_t *ctx;
6031 6032 6033
	struct thread_struct *t;
	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
	unsigned long flags;
David Mosberger's avatar
David Mosberger committed
6034
	u64 psr, psr_up;
6035
	int need_irq_resend;
6036

6037
	ctx = PFM_GET_CTX(task);
6038
	if (unlikely(ctx == NULL)) return;
6039

David Mosberger's avatar
David Mosberger committed
6040 6041
	BUG_ON(GET_PMU_OWNER());

6042
	t     = &task->thread;
6043
	/*
6044
	 * possible on unload
6045
	 */
6046
	if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
Linus Torvalds's avatar
Linus Torvalds committed
6047

Linus Torvalds's avatar
Linus Torvalds committed
6048
	/*
6049 6050 6051
 	 * we always come here with interrupts ALREADY disabled by
 	 * the scheduler. So we simply need to protect against concurrent
	 * access, not CPU concurrency.
Linus Torvalds's avatar
Linus Torvalds committed
6052
	 */
6053
	flags = pfm_protect_ctx_ctxsw(ctx);
David Mosberger's avatar
David Mosberger committed
6054 6055
	psr   = pfm_get_psr();

6056 6057
	need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;

David Mosberger's avatar
David Mosberger committed
6058 6059
	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
	BUG_ON(psr & IA64_PSR_I);
6060

6061
	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
6062
		struct pt_regs *regs = ia64_task_regs(task);
6063

6064
		BUG_ON(ctx->ctx_smpl_hdr);
6065

6066
		pfm_force_cleanup(ctx, regs);
6067

6068
		pfm_unprotect_ctx_ctxsw(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
6069

6070 6071 6072 6073
		/*
		 * this one (kmalloc'ed) is fine with interrupts disabled
		 */
		pfm_context_free(ctx);
Linus Torvalds's avatar
Linus Torvalds committed
6074

6075 6076
		return;
	}
David Mosberger's avatar
David Mosberger committed
6077

6078 6079 6080 6081 6082
	/*
	 * we restore ALL the debug registers to avoid picking up
	 * stale state.
	 */
	if (ctx->ctx_fl_using_dbreg) {
6083 6084
		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
6085 6086
	}
	/*
David Mosberger's avatar
David Mosberger committed
6087
	 * retrieve saved psr.up
6088
	 */
David Mosberger's avatar
David Mosberger committed
6089
	psr_up = ctx->ctx_saved_psr_up;
David Mosberger's avatar
David Mosberger committed
6090

6091 6092 6093 6094 6095
	/*
	 * if we were the last user of the PMU on that CPU,
	 * then nothing to do except restore psr
	 */
	if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
David Mosberger's avatar
David Mosberger committed
6096

6097 6098 6099 6100 6101
		/*
		 * retrieve partial reload masks (due to user modifications)
		 */
		pmc_mask = ctx->ctx_reload_pmcs[0];
		pmd_mask = ctx->ctx_reload_pmds[0];
David Mosberger's avatar
David Mosberger committed
6102

6103 6104 6105 6106 6107 6108 6109 6110
	} else {
		/*
	 	 * To avoid leaking information to the user level when psr.sp=0,
	 	 * we must reload ALL implemented pmds (even the ones we don't use).
	 	 * In the kernel we only allow PFM_READ_PMDS on registers which
	 	 * we initialized or requested (sampling) so there is no risk there.
	 	 */
		pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
David Mosberger's avatar
David Mosberger committed
6111

6112 6113 6114 6115 6116 6117 6118 6119
		/*
	 	 * ALL accessible PMCs are systematically reloaded, unused registers
	 	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
	 	 * up stale configuration.
	 	 *
	 	 * PMC0 is never in the mask. It is always restored separately.
	 	 */
		pmc_mask = ctx->ctx_all_pmcs[0];
Linus Torvalds's avatar
Linus Torvalds committed
6120
	}
6121
	/*
6122 6123 6124
	 * when context is MASKED, we will restore PMC with plm=0
	 * and PMD with stale information, but that's ok, nothing
	 * will be captured.
6125
	 *
6126
	 * XXX: optimize here
6127
	 */
6128 6129
	if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
	if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
6130

6131 6132 6133 6134 6135
	/*
	 * check for pending overflow at the time the state
	 * was saved.
	 */
	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
6136 6137 6138 6139 6140 6141 6142
		/*
		 * reload pmc0 with the overflow information
		 * On McKinley PMU, this will trigger a PMU interrupt
		 */
		ia64_set_pmc(0, t->pmcs[0]);
		ia64_srlz_d();
		t->pmcs[0] = 0UL;
6143

6144 6145 6146
		/*
		 * will replay the PMU interrupt
		 */
6147 6148
		if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);

6149
		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
6150
	}
6151

6152 6153 6154 6155 6156
	/*
	 * we just did a reload, so we reset the partial reload fields
	 */
	ctx->ctx_reload_pmcs[0] = 0UL;
	ctx->ctx_reload_pmds[0] = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
6157

6158
	SET_LAST_CPU(ctx, smp_processor_id());
Linus Torvalds's avatar
Linus Torvalds committed
6159

6160
	/*
6161
	 * dump activation value for this PMU
6162
	 */
6163
	INC_ACTIVATION();
6164
	/*
6165
	 * record current activation for this context
6166
	 */
6167
	SET_ACTIVATION(ctx);
Linus Torvalds's avatar
Linus Torvalds committed
6168 6169

	/*
6170
	 * establish new ownership. 
Linus Torvalds's avatar
Linus Torvalds committed
6171
	 */
6172
	SET_PMU_OWNER(task, ctx);
Linus Torvalds's avatar
Linus Torvalds committed
6173

6174
	/*
6175 6176 6177 6178
	 * restore the psr.up bit. measurement
	 * is active again.
	 * no PMU interrupt can happen at this point
	 * because we still have interrupts disabled.
6179
	 */
David Mosberger's avatar
David Mosberger committed
6180
	if (likely(psr_up)) pfm_set_psr_up();
6181

6182 6183 6184 6185
	/*
	 * allow concurrent access to context
	 */
	pfm_unprotect_ctx_ctxsw(ctx, flags);
Linus Torvalds's avatar
Linus Torvalds committed
6186
}
6187 6188 6189 6190
#else /*  !CONFIG_SMP */
/*
 * reload PMU state for UP kernels
 * in 2.5 we come here with interrupts disabled
Linus Torvalds's avatar
Linus Torvalds committed
6191
 */
Linus Torvalds's avatar
Linus Torvalds committed
6192
void
6193
pfm_load_regs (struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
6194
{
6195 6196 6197 6198
	struct thread_struct *t;
	pfm_context_t *ctx;
	struct task_struct *owner;
	unsigned long pmd_mask, pmc_mask;
David Mosberger's avatar
David Mosberger committed
6199
	u64 psr, psr_up;
6200
	int need_irq_resend;
Linus Torvalds's avatar
Linus Torvalds committed
6201

David Mosberger's avatar
David Mosberger committed
6202 6203 6204 6205
	owner = GET_PMU_OWNER();
	ctx   = PFM_GET_CTX(task);
	t     = &task->thread;
	psr   = pfm_get_psr();
David Mosberger's avatar
David Mosberger committed
6206

David Mosberger's avatar
David Mosberger committed
6207 6208
	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
	BUG_ON(psr & IA64_PSR_I);
David Mosberger's avatar
David Mosberger committed
6209

6210 6211 6212
	/*
	 * we restore ALL the debug registers to avoid picking up
	 * stale state.
David Mosberger's avatar
David Mosberger committed
6213
	 *
6214 6215 6216 6217 6218
	 * This must be done even when the task is still the owner
	 * as the registers may have been modified via ptrace()
	 * (not perfmon) by the previous task.
	 */
	if (ctx->ctx_fl_using_dbreg) {
6219 6220
		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
David Mosberger's avatar
David Mosberger committed
6221
	}
Linus Torvalds's avatar
Linus Torvalds committed
6222

6223
	/*
David Mosberger's avatar
David Mosberger committed
6224
	 * retrieved saved psr.up
6225
	 */
David Mosberger's avatar
David Mosberger committed
6226
	psr_up = ctx->ctx_saved_psr_up;
6227
	need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
David Mosberger's avatar
David Mosberger committed
6228

6229 6230 6231 6232 6233 6234 6235 6236 6237
	/*
	 * short path, our state is still there, just
	 * need to restore psr and we go
	 *
	 * we do not touch either PMC nor PMD. the psr is not touched
	 * by the overflow_handler. So we are safe w.r.t. to interrupt
	 * concurrency even without interrupt masking.
	 */
	if (likely(owner == task)) {
David Mosberger's avatar
David Mosberger committed
6238
		if (likely(psr_up)) pfm_set_psr_up();
6239
		return;
Linus Torvalds's avatar
Linus Torvalds committed
6240 6241
	}

6242 6243 6244 6245 6246 6247 6248
	/*
	 * someone else is still using the PMU, first push it out and
	 * then we'll be able to install our stuff !
	 *
	 * Upon return, there will be no owner for the current PMU
	 */
	if (owner) pfm_lazy_save_regs(owner);
Linus Torvalds's avatar
Linus Torvalds committed
6249

6250 6251 6252 6253 6254
	/*
	 * To avoid leaking information to the user level when psr.sp=0,
	 * we must reload ALL implemented pmds (even the ones we don't use).
	 * In the kernel we only allow PFM_READ_PMDS on registers which
	 * we initialized or requested (sampling) so there is no risk there.
Linus Torvalds's avatar
Linus Torvalds committed
6255
	 */
6256
	pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
David Mosberger's avatar
David Mosberger committed
6257

6258 6259 6260 6261 6262 6263 6264 6265
	/*
	 * ALL accessible PMCs are systematically reloaded, unused registers
	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
	 * up stale configuration.
	 *
	 * PMC0 is never in the mask. It is always restored separately
	 */
	pmc_mask = ctx->ctx_all_pmcs[0];
David Mosberger's avatar
David Mosberger committed
6266

6267 6268
	pfm_restore_pmds(t->pmds, pmd_mask);
	pfm_restore_pmcs(t->pmcs, pmc_mask);
David Mosberger's avatar
David Mosberger committed
6269

6270
	/*
6271 6272
	 * check for pending overflow at the time the state
	 * was saved.
6273 6274
	 */
	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
6275 6276 6277 6278 6279 6280
		/*
		 * reload pmc0 with the overflow information
		 * On McKinley PMU, this will trigger a PMU interrupt
		 */
		ia64_set_pmc(0, t->pmcs[0]);
		ia64_srlz_d();
6281

6282 6283 6284 6285 6286
		t->pmcs[0] = 0UL;

		/*
		 * will replay the PMU interrupt
		 */
6287 6288
		if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);

6289 6290
		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
	}
David Mosberger's avatar
David Mosberger committed
6291

6292
	/*
6293
	 * establish new ownership. 
6294 6295
	 */
	SET_PMU_OWNER(task, ctx);
David Mosberger's avatar
David Mosberger committed
6296

6297
	/*
6298 6299 6300 6301
	 * restore the psr.up bit. measurement
	 * is active again.
	 * no PMU interrupt can happen at this point
	 * because we still have interrupts disabled.
6302
	 */
David Mosberger's avatar
David Mosberger committed
6303
	if (likely(psr_up)) pfm_set_psr_up();
David Mosberger's avatar
David Mosberger committed
6304
}
6305
#endif /* CONFIG_SMP */
David Mosberger's avatar
David Mosberger committed
6306 6307

/*
6308
 * this function assumes monitoring is stopped
David Mosberger's avatar
David Mosberger committed
6309
 */
6310 6311
static void
pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
David Mosberger's avatar
David Mosberger committed
6312
{
6313
	u64 pmc0;
6314
	unsigned long mask2, val, pmd_val, ovfl_val;
6315 6316
	int i, can_access_pmu = 0;
	int is_self;
David Mosberger's avatar
David Mosberger committed
6317

6318 6319 6320 6321 6322
	/*
	 * is the caller the task being monitored (or which initiated the
	 * session for system wide measurements)
	 */
	is_self = ctx->ctx_task == task ? 1 : 0;
David Mosberger's avatar
David Mosberger committed
6323

6324 6325 6326 6327 6328 6329 6330 6331 6332
#ifdef CONFIG_SMP
	if (task == current) {
#else
	/*
	 * in UP, the state can still be in the registers
	 */
	if (task == current || GET_PMU_OWNER() == task) {
#endif
		can_access_pmu = 1;
David Mosberger's avatar
David Mosberger committed
6333
		/*
6334 6335 6336 6337 6338 6339
		 * Mark the PMU as not owned
		 * This will cause the interrupt handler to do nothing in case an overflow
		 * interrupt was in-flight
		 * This also guarantees that pmc0 will contain the final state
		 * It virtually gives us full control on overflow processing from that point
		 * on.
David Mosberger's avatar
David Mosberger committed
6340
		 */
6341
		SET_PMU_OWNER(NULL, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
6342

6343 6344 6345 6346 6347 6348 6349
		/*
		 * read current overflow status:
		 *
		 * we are guaranteed to read the final stable state
		 */
		ia64_srlz_d();
		pmc0 = ia64_get_pmc(0); /* slow */
David Mosberger's avatar
David Mosberger committed
6350

6351 6352 6353 6354 6355 6356 6357 6358 6359
		/*
		 * reset freeze bit, overflow status information destroyed
		 */
		pfm_unfreeze_pmu();
	} else {
		pmc0 = task->thread.pmcs[0];
		/*
		 * clear whatever overflow status bits there were
		 */
6360
		task->thread.pmcs[0] = 0;
6361
	}
6362
	ovfl_val = pmu_conf->ovfl_val;
6363 6364 6365 6366 6367 6368 6369 6370
	/*
	 * we save all the used pmds
	 * we take care of overflows for counting PMDs
	 *
	 * XXX: sampling situation is not taken into account here
	 */
	mask2 = ctx->ctx_used_pmds[0];
	for (i = 0; mask2; i++, mask2>>=1) {
Linus Torvalds's avatar
Linus Torvalds committed
6371

6372 6373
		/* skip non used pmds */
		if ((mask2 & 0x1) == 0) continue;
Linus Torvalds's avatar
Linus Torvalds committed
6374 6375

		/*
6376
		 * can access PMU always true in system wide mode
Linus Torvalds's avatar
Linus Torvalds committed
6377
		 */
6378
		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
Linus Torvalds's avatar
Linus Torvalds committed
6379

6380 6381 6382 6383 6384
		if (PMD_IS_COUNTING(i)) {
			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
				task->pid,
				i,
				ctx->ctx_pmds[i].val,
6385
				val & ovfl_val));
Linus Torvalds's avatar
Linus Torvalds committed
6386 6387

			/*
6388
			 * we rebuild the full 64 bit value of the counter
Linus Torvalds's avatar
Linus Torvalds committed
6389
			 */
6390
			val = ctx->ctx_pmds[i].val + (val & ovfl_val);
Linus Torvalds's avatar
Linus Torvalds committed
6391

6392 6393 6394 6395 6396 6397
			/*
			 * now everything is in ctx_pmds[] and we need
			 * to clear the saved context from save_regs() such that
			 * pfm_read_pmds() gets the correct value
			 */
			pmd_val = 0UL;
Linus Torvalds's avatar
Linus Torvalds committed
6398

6399 6400 6401 6402
			/*
			 * take care of overflow inline
			 */
			if (pmc0 & (1UL << i)) {
6403
				val += 1 + ovfl_val;
6404 6405
				DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
			}
Linus Torvalds's avatar
Linus Torvalds committed
6406
		}
6407

6408
		DPRINT(("[%d] is_self=%d ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, is_self, i, val, pmd_val));
6409

6410
		if (is_self) task->thread.pmds[i] = pmd_val;
David Mosberger's avatar
David Mosberger committed
6411

6412 6413
		ctx->ctx_pmds[i].val = val;
	}
David Mosberger's avatar
David Mosberger committed
6414
}
Linus Torvalds's avatar
Linus Torvalds committed
6415

David Mosberger's avatar
David Mosberger committed
6416
static struct irqaction perfmon_irqaction = {
6417 6418 6419
	.handler = pfm_interrupt_handler,
	.flags   = SA_INTERRUPT,
	.name    = "perfmon"
David Mosberger's avatar
David Mosberger committed
6420 6421 6422 6423 6424
};

/*
 * perfmon initialization routine, called from the initcall() table
 */
6425 6426
static int init_pfm_fs(void);

6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449
static int __init
pfm_probe_pmu(void)
{
	pmu_config_t **p;
	int family;

	family = local_cpu_data->family;
	p      = pmu_confs;

	while(*p) {
		if ((*p)->probe) {
			if ((*p)->probe() == 0) goto found;
		} else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) {
			goto found;
		}
		p++;
	}
	return -1;
found:
	pmu_conf = *p;
	return 0;
}

6450 6451 6452 6453 6454 6455 6456
static struct file_operations pfm_proc_fops = {
	.open		= pfm_proc_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= seq_release,
};

David Mosberger's avatar
David Mosberger committed
6457
int __init
6458
pfm_init(void)
David Mosberger's avatar
David Mosberger committed
6459
{
6460
	unsigned int n, n_counters, i;
David Mosberger's avatar
David Mosberger committed
6461

6462 6463 6464 6465
	printk("perfmon: version %u.%u IRQ %u\n",
		PFM_VERSION_MAJ,
		PFM_VERSION_MIN,
		IA64_PERFMON_VECTOR);
David Mosberger's avatar
David Mosberger committed
6466

6467 6468 6469
	if (pfm_probe_pmu()) {
		printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", 
				local_cpu_data->family);
6470 6471
		return -ENODEV;
	}
David Mosberger's avatar
David Mosberger committed
6472

6473
	/*
6474 6475
	 * compute the number of implemented PMD/PMC from the
	 * description tables
6476
	 */
6477 6478 6479
	n = 0;
	for (i=0; PMC_IS_LAST(i) == 0;  i++) {
		if (PMC_IS_IMPL(i) == 0) continue;
6480
		pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63);
6481 6482
		n++;
	}
6483
	pmu_conf->num_pmcs = n;
6484 6485 6486 6487

	n = 0; n_counters = 0;
	for (i=0; PMD_IS_LAST(i) == 0;  i++) {
		if (PMD_IS_IMPL(i) == 0) continue;
6488
		pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63);
6489 6490 6491
		n++;
		if (PMD_IS_COUNTING(i)) n_counters++;
	}
6492 6493
	pmu_conf->num_pmds      = n;
	pmu_conf->num_counters  = n_counters;
David Mosberger's avatar
David Mosberger committed
6494

6495 6496 6497
	/*
	 * sanity checks on the number of debug registers
	 */
6498 6499 6500 6501
	if (pmu_conf->use_rr_dbregs) {
		if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) {
			printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs);
			pmu_conf = NULL;
6502 6503
			return -1;
		}
6504 6505 6506
		if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) {
			printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs);
			pmu_conf = NULL;
6507 6508 6509 6510 6511
			return -1;
		}
	}

	printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n",
6512 6513 6514 6515 6516
	       pmu_conf->pmu_name,
	       pmu_conf->num_pmcs,
	       pmu_conf->num_pmds,
	       pmu_conf->num_counters,
	       ffz(pmu_conf->ovfl_val));
David Mosberger's avatar
David Mosberger committed
6517 6518

	/* sanity check */
6519
	if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
6520
		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
6521
		pmu_conf = NULL;
6522
		return -1;
David Mosberger's avatar
David Mosberger committed
6523 6524 6525
	}

	/*
6526
	 * create /proc/perfmon (mostly for debugging purposes)
David Mosberger's avatar
David Mosberger committed
6527
	 */
6528
 	perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
6529 6530
	if (perfmon_dir == NULL) {
		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
6531
		pmu_conf = NULL;
6532
		return -1;
6533
	}
6534 6535 6536 6537
  	/*
 	 * install customized file operations for /proc/perfmon entry
 	 */
 	perfmon_dir->proc_fops = &pfm_proc_fops;
David Mosberger's avatar
David Mosberger committed
6538

6539
	/*
6540
	 * create /proc/sys/kernel/perfmon (for debugging purposes)
6541
	 */
6542 6543
	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);

6544 6545 6546
	/*
	 * initialize all our spinlocks
	 */
David Mosberger's avatar
David Mosberger committed
6547
	spin_lock_init(&pfm_sessions.pfs_lock);
6548
	spin_lock_init(&pfm_buffer_fmt_lock);
6549 6550 6551 6552

	init_pfm_fs();

	for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL;
David Mosberger's avatar
David Mosberger committed
6553 6554 6555

	return 0;
}
6556

6557
__initcall(pfm_init);
David Mosberger's avatar
David Mosberger committed
6558

6559 6560 6561
/*
 * this function is called before pfm_init()
 */
David Mosberger's avatar
David Mosberger committed
6562
void
6563
pfm_init_percpu (void)
David Mosberger's avatar
David Mosberger committed
6564
{
6565 6566 6567 6568 6569 6570 6571
	/*
	 * make sure no measurement is active
	 * (may inherit programmed PMCs from EFI).
	 */
	pfm_clear_psr_pp();
	pfm_clear_psr_up();

6572 6573 6574 6575 6576
	/*
	 * we run with the PMU not frozen at all times
	 */
	pfm_unfreeze_pmu();

6577
	if (smp_processor_id() == 0)
6578 6579
		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);

6580
	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
David Mosberger's avatar
David Mosberger committed
6581
	ia64_srlz_d();
Linus Torvalds's avatar
Linus Torvalds committed
6582 6583
}

6584 6585 6586 6587
/*
 * used for debug purposes only
 */
void
6588
dump_pmu_state(const char *from)
6589 6590 6591
{
	struct task_struct *task;
	struct thread_struct *t;
6592
	struct pt_regs *regs;
6593
	pfm_context_t *ctx;
6594 6595 6596 6597
	unsigned long psr, dcr, info, flags;
	int i, this_cpu;

	local_irq_save(flags);
6598

6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614
	this_cpu = smp_processor_id();
	regs     = ia64_task_regs(current);
	info     = PFM_CPUINFO_GET();
	dcr      = ia64_getreg(_IA64_REG_CR_DCR);

	if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) {
		local_irq_restore(flags);
		return;
	}

	printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 
		this_cpu, 
		from, 
		current->pid, 
		regs->cr_iip,
		current->comm);
6615 6616 6617 6618

	task = GET_PMU_OWNER();
	ctx  = GET_PMU_CTX();

6619
	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
Linus Torvalds's avatar
Linus Torvalds committed
6620

6621 6622
	psr = pfm_get_psr();

6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634
	printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 
		this_cpu,
		ia64_get_pmc(0),
		psr & IA64_PSR_PP ? 1 : 0,
		psr & IA64_PSR_UP ? 1 : 0,
		dcr & IA64_DCR_PP ? 1 : 0,
		info,
		ia64_psr(regs)->up,
		ia64_psr(regs)->pp);

	ia64_psr(regs)->up = 0;
	ia64_psr(regs)->pp = 0;
6635 6636 6637 6638 6639

	t = &current->thread;

	for (i=1; PMC_IS_LAST(i) == 0; i++) {
		if (PMC_IS_IMPL(i) == 0) continue;
6640
		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
6641 6642 6643 6644
	}

	for (i=1; PMD_IS_LAST(i) == 0; i++) {
		if (PMD_IS_IMPL(i) == 0) continue;
6645
		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
6646
	}
6647

6648
	if (ctx) {
6649 6650
		printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n",
				this_cpu,
6651 6652 6653 6654 6655
				ctx->ctx_state,
				ctx->ctx_smpl_vaddr,
				ctx->ctx_smpl_hdr,
				ctx->ctx_msgq_head,
				ctx->ctx_msgq_tail,
David Mosberger's avatar
David Mosberger committed
6656
				ctx->ctx_saved_psr_up);
6657
	}
6658
	local_irq_restore(flags);
6659 6660 6661 6662 6663 6664 6665 6666 6667 6668
}

/*
 * called from process.c:copy_thread(). task is new child.
 */
void
pfm_inherit(struct task_struct *task, struct pt_regs *regs)
{
	struct thread_struct *thread;

6669
	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680

	thread = &task->thread;

	/*
	 * cut links inherited from parent (current)
	 */
	thread->pfm_context = NULL;

	PFM_SET_WORK_PENDING(task, 0);

	/*
6681
	 * the psr bits are already set properly in copy_threads()
6682 6683
	 */
}
6684
#else  /* !CONFIG_PERFMON */
6685
asmlinkage long
6686 6687
sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
		long arg8, long stack)
Linus Torvalds's avatar
Linus Torvalds committed
6688 6689 6690
{
	return -ENOSYS;
}
6691
#endif /* CONFIG_PERFMON */