Commit 57ef300e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arc-5.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

Pull ARC architecture updates from Vineet Gupta:

 - Perf support for raw events

 - boot log printing: return stack, action points

 - fix memset to avoid prefetchw bleeding past end of buffer

 - do_page_fault fix for mmap_sem held while returning to userspace

 - other misc fixes

* tag 'arc-5.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
  ARCv2: lib: memeset: fix doing prefetchw outside of buffer
  ARC: mm: do_page_fault fixes #1: relinquish mmap_sem if signal arrives while handle_mm_fault
  ARC: show_regs: lockdep: re-enable preemption
  ARC: show_regs: lockdep: avoid page allocator...
  ARC: perf: avoid kernel killing where it is possible
  ARC: perf: move HW events mapping to separate function
  ARC: perf: introduce Kernel PMU events support
  ARC: perf: trivial code cleanup
  ARC: perf: map generic branches to correct hardware condition
  ARC: adjust memblock_reserve of kernel memory
  arc: remove redundant kernel-space generic-y
  ARC: fix __ffs return value to avoid build warnings
  ARC: boot log: print Action point details
  ARCv2: boot log: BPU return stack depth
parents 49a57857 e6a72b7d
......@@ -3,23 +3,19 @@ generic-y += bugs.h
generic-y += compat.h
generic-y += device.h
generic-y += div64.h
generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += extable.h
generic-y += fb.h
generic-y += ftrace.h
generic-y += hardirq.h
generic-y += hw_irq.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kmap_types.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += msi.h
generic-y += parport.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += topology.h
......
......@@ -216,6 +216,14 @@ struct bcr_fp_arcv2 {
#endif
};
struct bcr_actionpoint {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:21, min:1, num:2, ver:8;
#else
unsigned int ver:8, num:2, min:1, pad:21;
#endif
};
#include <soc/arc/timers.h>
struct bcr_bpu_arcompact {
......@@ -283,7 +291,7 @@ struct cpuinfo_arc_cache {
};
struct cpuinfo_arc_bpu {
unsigned int ver, full, num_cache, num_pred;
unsigned int ver, full, num_cache, num_pred, ret_stk;
};
struct cpuinfo_arc_ccm {
......@@ -302,7 +310,7 @@ struct cpuinfo_arc {
struct {
unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
debug:1, ap:1, smart:1, rtt:1, pad3:4,
ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
} extn;
struct bcr_mpy extn_mpy;
......
......@@ -340,7 +340,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
/*
* __ffs: Similar to ffs, but zero based (0-31)
*/
static inline __attribute__ ((const)) int __ffs(unsigned long word)
static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
{
if (!word)
return word;
......@@ -400,9 +400,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x)
/*
* __ffs: Similar to ffs, but zero based (0-31)
*/
static inline __attribute__ ((const)) int __ffs(unsigned long x)
static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
{
int n;
unsigned long n;
asm volatile(
" ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */
......
......@@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = {
/* counts condition */
[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
/* All jump instructions that are taken */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
[PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
#ifdef CONFIG_ISA_ARCV2
[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
......
This diff is collapsed.
......@@ -123,6 +123,7 @@ static void read_arc_build_cfg_regs(void)
struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
const struct id_to_str *tbl;
struct bcr_isa_arcv2 isa;
struct bcr_actionpoint ap;
FIX_PTR(cpu);
......@@ -195,6 +196,7 @@ static void read_arc_build_cfg_regs(void)
cpu->bpu.full = bpu.ft;
cpu->bpu.num_cache = 256 << bpu.bce;
cpu->bpu.num_pred = 2048 << bpu.pte;
cpu->bpu.ret_stk = 4 << bpu.rse;
if (cpu->core.family >= 0x54) {
unsigned int exec_ctrl;
......@@ -207,8 +209,11 @@ static void read_arc_build_cfg_regs(void)
}
}
READ_BCR(ARC_REG_AP_BCR, bcr);
cpu->extn.ap = bcr.ver ? 1 : 0;
READ_BCR(ARC_REG_AP_BCR, ap);
if (ap.ver) {
cpu->extn.ap_num = 2 << ap.num;
cpu->extn.ap_full = !!ap.min;
}
READ_BCR(ARC_REG_SMART_BCR, bcr);
cpu->extn.smart = bcr.ver ? 1 : 0;
......@@ -216,8 +221,6 @@ static void read_arc_build_cfg_regs(void)
READ_BCR(ARC_REG_RTT_BCR, bcr);
cpu->extn.rtt = bcr.ver ? 1 : 0;
cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
/* some hacks for lack of feature BCR info in old ARC700 cores */
......@@ -299,10 +302,10 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
if (cpu->bpu.ver)
n += scnprintf(buf + n, len - n,
"BPU\t\t: %s%s match, cache:%d, Predict Table:%d",
"BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
IS_AVAIL1(cpu->bpu.full, "full"),
IS_AVAIL1(!cpu->bpu.full, "partial"),
cpu->bpu.num_cache, cpu->bpu.num_pred);
cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
if (is_isa_arcv2()) {
struct bcr_lpb lpb;
......@@ -336,11 +339,17 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
if (cpu->extn.debug)
n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n",
IS_AVAIL1(cpu->extn.ap, "ActionPoint "),
if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
IS_AVAIL1(cpu->extn.smart, "smaRT "),
IS_AVAIL1(cpu->extn.rtt, "RTT "));
if (cpu->extn.ap_num) {
n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
cpu->extn.ap_num,
cpu->extn.ap_full ? "full":"min");
}
n += scnprintf(buf + n, len - n, "\n");
}
if (cpu->dccm.sz || cpu->iccm.sz)
n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
......
......@@ -18,6 +18,8 @@
#include <asm/arcregs.h>
#include <asm/irqflags.h>
#define ARC_PATH_MAX 256
/*
* Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
* -Prints 3 regs per line and a CR.
......@@ -58,11 +60,12 @@ static void show_callee_regs(struct callee_regs *cregs)
print_reg_file(&(cregs->r13), 13);
}
static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
static void print_task_path_n_nm(struct task_struct *tsk)
{
char *path_nm = NULL;
struct mm_struct *mm;
struct file *exe_file;
char buf[ARC_PATH_MAX];
mm = get_task_mm(tsk);
if (!mm)
......@@ -72,7 +75,7 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
mmput(mm);
if (exe_file) {
path_nm = file_path(exe_file, buf, 255);
path_nm = file_path(exe_file, buf, ARC_PATH_MAX-1);
fput(exe_file);
}
......@@ -80,10 +83,9 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
}
static void show_faulting_vma(unsigned long address, char *buf)
static void show_faulting_vma(unsigned long address)
{
struct vm_area_struct *vma;
char *nm = buf;
struct mm_struct *active_mm = current->active_mm;
/* can't use print_vma_addr() yet as it doesn't check for
......@@ -96,8 +98,11 @@ static void show_faulting_vma(unsigned long address, char *buf)
* if the container VMA is not found
*/
if (vma && (vma->vm_start <= address)) {
char buf[ARC_PATH_MAX];
char *nm = "?";
if (vma->vm_file) {
nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1);
nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
if (IS_ERR(nm))
nm = "?";
}
......@@ -173,13 +178,14 @@ void show_regs(struct pt_regs *regs)
{
struct task_struct *tsk = current;
struct callee_regs *cregs;
char *buf;
buf = (char *)__get_free_page(GFP_KERNEL);
if (!buf)
return;
/*
* generic code calls us with preemption disabled, but some calls
* here could sleep, so re-enable to avoid lockdep splat
*/
preempt_enable();
print_task_path_n_nm(tsk, buf);
print_task_path_n_nm(tsk);
show_regs_print_info(KERN_INFO);
show_ecr_verbose(regs);
......@@ -189,7 +195,7 @@ void show_regs(struct pt_regs *regs)
(void *)regs->blink, (void *)regs->ret);
if (user_mode(regs))
show_faulting_vma(regs->ret, buf); /* faulting code, not data */
show_faulting_vma(regs->ret); /* faulting code, not data */
pr_info("[STAT32]: 0x%08lx", regs->status32);
......@@ -222,7 +228,7 @@ void show_regs(struct pt_regs *regs)
if (cregs)
show_callee_regs(cregs);
free_page((unsigned long)buf);
preempt_disable();
}
void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
......
......@@ -7,11 +7,39 @@
*/
#include <linux/linkage.h>
#include <asm/cache.h>
#undef PREALLOC_NOT_AVAIL
/*
* The memset implementation below is optimized to use prefetchw and prealloc
* instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
* If you want to implement optimized memset for other possible L1 data cache
* line lengths (32B and 128B) you should rewrite code carefully checking
* we don't call any prefetchw/prealloc instruction for L1 cache lines which
* don't belongs to memset area.
*/
#if L1_CACHE_SHIFT == 6
.macro PREALLOC_INSTR reg, off
prealloc [\reg, \off]
.endm
.macro PREFETCHW_INSTR reg, off
prefetchw [\reg, \off]
.endm
#else
.macro PREALLOC_INSTR
.endm
.macro PREFETCHW_INSTR
.endm
#endif
ENTRY_CFI(memset)
prefetchw [r0] ; Prefetch the write location
PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
mov.f 0, r2
;;; if size is zero
jz.d [blink]
......@@ -48,11 +76,8 @@ ENTRY_CFI(memset)
lpnz @.Lset64bytes
;; LOOP START
#ifdef PREALLOC_NOT_AVAIL
prefetchw [r3, 64] ;Prefetch the next write location
#else
prealloc [r3, 64]
#endif
PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
......@@ -85,7 +110,6 @@ ENTRY_CFI(memset)
lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
lpnz .Lset32bytes
;; LOOP START
prefetchw [r3, 32] ;Prefetch the next write location
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
......
......@@ -141,12 +141,17 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
*/
fault = handle_mm_fault(vma, address, flags);
/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
if (fatal_signal_pending(current)) {
if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
up_read(&mm->mmap_sem);
if (user_mode(regs))
/*
* if fault retry, mmap_sem already relinquished by core mm
* so OK to return to user mode (with signal handled first)
*/
if (fault & VM_FAULT_RETRY) {
if (!user_mode(regs))
goto no_context;
return;
}
}
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
......
......@@ -119,7 +119,8 @@ void __init setup_arch_memory(void)
*/
memblock_add_node(low_mem_start, low_mem_sz, 0);
memblock_reserve(low_mem_start, __pa(_end) - low_mem_start);
memblock_reserve(CONFIG_LINUX_LINK_BASE,
__pa(_end) - CONFIG_LINUX_LINK_BASE);
#ifdef CONFIG_BLK_DEV_INITRD
if (phys_initrd_size) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment