Commit 5400fc22 authored by Michael Ellerman's avatar Michael Ellerman

Merge branch 'topic/ppc-kvm' into next

Merge the topic branch we share with kvm-ppc, this brings in two xive
commits, one from Paul to rework HMI handling, and a minor cleanup to
drop an unused flag.
parents 02ef6dd8 76b03dc0
...@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void); ...@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
static inline void wait_for_subcore_guest_exit(void) { } static inline void wait_for_subcore_guest_exit(void) { }
static inline void wait_for_tb_resync(void) { } static inline void wait_for_tb_resync(void) { }
#endif #endif
struct pt_regs;
extern long hmi_handle_debugtrig(struct pt_regs *regs);
#endif /* __ASM_PPC64_HMI_H__ */ #endif /* __ASM_PPC64_HMI_H__ */
...@@ -241,6 +241,7 @@ ...@@ -241,6 +241,7 @@
#define H_GET_HCA_INFO 0x1B8 #define H_GET_HCA_INFO 0x1B8
#define H_GET_PERF_COUNT 0x1BC #define H_GET_PERF_COUNT 0x1BC
#define H_MANAGE_TRACE 0x1C0 #define H_MANAGE_TRACE 0x1C0
#define H_GET_CPU_CHARACTERISTICS 0x1C8
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
#define H_QUERY_INT_STATE 0x1E4 #define H_QUERY_INT_STATE 0x1E4
#define H_POLL_PENDING 0x1D8 #define H_POLL_PENDING 0x1D8
...@@ -330,6 +331,17 @@ ...@@ -330,6 +331,17 @@
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
/* >= 0 values are CPU number */ /* >= 0 values are CPU number */
/* H_GET_CPU_CHARACTERISTICS return values */
#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
/* Flag values used in H_REGISTER_PROC_TBL hcall */ /* Flag values used in H_REGISTER_PROC_TBL hcall */
#define PROC_TABLE_OP_MASK 0x18 #define PROC_TABLE_OP_MASK 0x18
#define PROC_TABLE_DEREG 0x10 #define PROC_TABLE_DEREG 0x10
...@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) ...@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
} }
} }
struct h_cpu_char_result {
u64 character;
u64 behaviour;
};
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */ #endif /* _ASM_POWERPC_HVCALL_H */
...@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu) ...@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
} }
static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
{
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
long rc;
rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
if (rc == H_SUCCESS) {
p->character = retbuf[0];
p->behaviour = retbuf[1];
}
return rc;
}
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
...@@ -431,8 +431,9 @@ ...@@ -431,8 +431,9 @@
#define SPRN_LPID 0x13F /* Logical Partition Identifier */ #define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif #endif
#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
#define SPRN_HMER 0x150 /* Hardware m? error recovery */ #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */ #define SPRN_PCR 0x152 /* Processor compatibility register */
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
......
...@@ -9,6 +9,41 @@ ...@@ -9,6 +9,41 @@
#ifndef _ASM_POWERPC_XIVE_REGS_H #ifndef _ASM_POWERPC_XIVE_REGS_H
#define _ASM_POWERPC_XIVE_REGS_H #define _ASM_POWERPC_XIVE_REGS_H
/*
* "magic" Event State Buffer (ESB) MMIO offsets.
*
* Each interrupt source has a 2-bit state machine called ESB
* which can be controlled by MMIO. It's made of 2 bits, P and
* Q. P indicates that an interrupt is pending (has been sent
* to a queue and is waiting for an EOI). Q indicates that the
* interrupt has been triggered while pending.
*
* This acts as a coalescing mechanism in order to guarantee
* that a given interrupt only occurs at most once in a queue.
*
* When doing an EOI, the Q bit will indicate if the interrupt
* needs to be re-triggered.
*
* The following offsets into the ESB MMIO allow to read or
* manipulate the PQ bits. They must be used with an 8-bytes
* load instruction. They all return the previous state of the
* interrupt (atomically).
*
* Additionally, some ESB pages support doing an EOI via a
* store at 0 and some ESBs support doing a trigger via a
* separate trigger page.
*/
#define XIVE_ESB_STORE_EOI 0x400 /* Store */
#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
#define XIVE_ESB_GET 0x800 /* Load */
#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1
/* /*
* Thread Management (aka "TM") registers * Thread Management (aka "TM") registers
*/ */
......
...@@ -58,6 +58,9 @@ struct xive_irq_data { ...@@ -58,6 +58,9 @@ struct xive_irq_data {
#define XIVE_IRQ_FLAG_EOI_FW 0x10 #define XIVE_IRQ_FLAG_EOI_FW 0x10
#define XIVE_IRQ_FLAG_H_INT_ESB 0x20 #define XIVE_IRQ_FLAG_H_INT_ESB 0x20
/* Special flag set by KVM for excalation interrupts */
#define XIVE_IRQ_NO_EOI 0x80
#define XIVE_INVALID_CHIP_ID -1 #define XIVE_INVALID_CHIP_ID -1
/* A queue tracking structure in a CPU */ /* A queue tracking structure in a CPU */
...@@ -72,41 +75,6 @@ struct xive_q { ...@@ -72,41 +75,6 @@ struct xive_q {
atomic_t pending_count; atomic_t pending_count;
}; };
/*
* "magic" Event State Buffer (ESB) MMIO offsets.
*
* Each interrupt source has a 2-bit state machine called ESB
* which can be controlled by MMIO. It's made of 2 bits, P and
* Q. P indicates that an interrupt is pending (has been sent
* to a queue and is waiting for an EOI). Q indicates that the
* interrupt has been triggered while pending.
*
* This acts as a coalescing mechanism in order to guarantee
* that a given interrupt only occurs at most once in a queue.
*
* When doing an EOI, the Q bit will indicate if the interrupt
* needs to be re-triggered.
*
* The following offsets into the ESB MMIO allow to read or
* manipulate the PQ bits. They must be used with an 8-bytes
* load instruction. They all return the previous state of the
* interrupt (atomically).
*
* Additionally, some ESB pages support doing an EOI via a
* store at 0 and some ESBs support doing a trigger via a
* separate trigger page.
*/
#define XIVE_ESB_STORE_EOI 0x400 /* Store */
#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
#define XIVE_ESB_GET 0x800 /* Load */
#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1
/* Global enable flags for the XIVE support */ /* Global enable flags for the XIVE support */
extern bool __xive_enabled; extern bool __xive_enabled;
......
...@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs) ...@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
return handled; return handled;
} }
long hmi_exception_realmode(struct pt_regs *regs) /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
static enum {
DTRIG_UNKNOWN,
DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
} hmer_debug_trig_function;
static int init_debug_trig_function(void)
{ {
__this_cpu_inc(irq_stat.hmi_exceptions); int pvr;
struct device_node *cpun;
#ifdef CONFIG_PPC_BOOK3S_64 struct property *prop = NULL;
/* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ const char *str;
if (pvr_version_is(PVR_POWER9)) {
unsigned long hmer = mfspr(SPRN_HMER); /* First look in the device tree */
preempt_disable();
/* Do we have the debug bit set */ cpun = of_get_cpu_node(smp_processor_id(), NULL);
if (hmer & PPC_BIT(17)) { if (cpun) {
hmer &= ~PPC_BIT(17); of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
mtspr(SPRN_HMER, hmer); prop, str) {
if (strcmp(str, "bit17-vector-ci-load") == 0)
/* hmer_debug_trig_function = DTRIG_VECTOR_CI;
* Now to avoid problems with soft-disable we else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
* only do the emulation if we are coming from hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
* user space
*/
if (user_mode(regs))
local_paca->hmi_p9_special_emu = 1;
/*
* Don't bother going to OPAL if that's the
* only relevant bit.
*/
if (!(hmer & mfspr(SPRN_HMEER)))
return local_paca->hmi_p9_special_emu;
} }
of_node_put(cpun);
}
preempt_enable();
/* If we found the property, don't look at PVR */
if (prop)
goto out;
pvr = mfspr(SPRN_PVR);
/* Check for POWER9 Nimbus (scale-out) */
if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
/* DD2.2 and later */
if ((pvr & 0xfff) >= 0x202)
hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
/* DD2.0 and DD2.1 - used for vector CI load emulation */
else if ((pvr & 0xfff) >= 0x200)
hmer_debug_trig_function = DTRIG_VECTOR_CI;
}
out:
switch (hmer_debug_trig_function) {
case DTRIG_VECTOR_CI:
pr_debug("HMI debug trigger used for vector CI load\n");
break;
case DTRIG_SUSPEND_ESCAPE:
pr_debug("HMI debug trigger used for TM suspend escape\n");
break;
default:
break;
} }
#endif /* CONFIG_PPC_BOOK3S_64 */ return 0;
}
__initcall(init_debug_trig_function);
/*
* Handle HMIs that occur as a result of a debug trigger.
* Return values:
* -1 means this is not a HMI cause that we know about
* 0 means no further handling is required
* 1 means further handling is required
*/
long hmi_handle_debugtrig(struct pt_regs *regs)
{
unsigned long hmer = mfspr(SPRN_HMER);
long ret = 0;
/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
if (!((hmer & HMER_DEBUG_TRIG)
&& hmer_debug_trig_function != DTRIG_UNKNOWN))
return -1;
hmer &= ~HMER_DEBUG_TRIG;
/* HMER is a write-AND register */
mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
switch (hmer_debug_trig_function) {
case DTRIG_VECTOR_CI:
/*
* Now to avoid problems with soft-disable we
* only do the emulation if we are coming from
* host user space
*/
if (regs && user_mode(regs))
ret = local_paca->hmi_p9_special_emu = 1;
break;
default:
break;
}
/*
* See if any other HMI causes remain to be handled
*/
if (hmer & mfspr(SPRN_HMEER))
return -1;
return ret;
}
/*
* Return values:
*/
long hmi_exception_realmode(struct pt_regs *regs)
{
int ret;
__this_cpu_inc(irq_stat.hmi_exceptions);
ret = hmi_handle_debugtrig(regs);
if (ret >= 0)
return ret;
wait_for_subcore_guest_exit(); wait_for_subcore_guest_exit();
......
...@@ -266,17 +266,19 @@ static void kvmppc_tb_resync_done(void) ...@@ -266,17 +266,19 @@ static void kvmppc_tb_resync_done(void)
* secondary threads to proceed. * secondary threads to proceed.
* - All secondary threads will eventually call opal hmi handler on * - All secondary threads will eventually call opal hmi handler on
* their exit path. * their exit path.
*
* Returns 1 if the timebase offset should be applied, 0 if not.
*/ */
long kvmppc_realmode_hmi_handler(void) long kvmppc_realmode_hmi_handler(void)
{ {
int ptid = local_paca->kvm_hstate.ptid;
bool resync_req; bool resync_req;
/* This is only called on primary thread. */
BUG_ON(ptid != 0);
__this_cpu_inc(irq_stat.hmi_exceptions); __this_cpu_inc(irq_stat.hmi_exceptions);
if (hmi_handle_debugtrig(NULL) >= 0)
return 1;
/* /*
* By now primary thread has already completed guest->host * By now primary thread has already completed guest->host
* partition switch but haven't signaled secondaries yet. * partition switch but haven't signaled secondaries yet.
......
...@@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x) ...@@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x)
} }
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
static int global_invalidates(struct kvm *kvm, unsigned long flags) static int global_invalidates(struct kvm *kvm)
{ {
int global; int global;
int cpu; int cpu;
...@@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, ...@@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) { if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
rb = compute_tlbie_rb(v, pte_r, pte_index); rb = compute_tlbie_rb(v, pte_r, pte_index);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/* /*
* The reference (R) and change (C) bits in a HPT * The reference (R) and change (C) bits in a HPT
* entry can be set by hardware at any time up until * entry can be set by hardware at any time up until
...@@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
if (kvm_is_radix(kvm)) if (kvm_is_radix(kvm))
return H_FUNCTION; return H_FUNCTION;
global = global_invalidates(kvm, 0); global = global_invalidates(kvm);
for (i = 0; i < 4 && ret == H_SUCCESS; ) { for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0; n = 0;
for (; i < 4; ++i) { for (; i < 4; ++i) {
...@@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
rb = compute_tlbie_rb(v, r, pte_index); rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
HPTE_V_ABSENT); HPTE_V_ABSENT);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
true);
/* Don't lose R/C bit updates done by hardware */ /* Don't lose R/C bit updates done by hardware */
r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
hpte[1] = cpu_to_be64(r); hpte[1] = cpu_to_be64(r);
......
...@@ -1909,16 +1909,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -1909,16 +1909,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
bne 27f bne 27f
bl kvmppc_realmode_hmi_handler bl kvmppc_realmode_hmi_handler
nop nop
cmpdi r3, 0
li r12, BOOK3S_INTERRUPT_HMI li r12, BOOK3S_INTERRUPT_HMI
/* /*
* At this point kvmppc_realmode_hmi_handler would have resync-ed * At this point kvmppc_realmode_hmi_handler may have resync-ed
* the TB. Hence it is not required to subtract guest timebase * the TB, and if it has, we must not subtract the guest timebase
* offset from timebase. So, skip it. * offset from the timebase. So, skip it.
* *
* Also, do not call kvmppc_subcore_exit_guest() because it has * Also, do not call kvmppc_subcore_exit_guest() because it has
* been invoked as part of kvmppc_realmode_hmi_handler(). * been invoked as part of kvmppc_realmode_hmi_handler().
*/ */
b 30f beq 30f
27: 27:
/* Subtract timebase offset from timebase */ /* Subtract timebase offset from timebase */
......
...@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d) ...@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
* EOI the source if it hasn't been disabled and hasn't * EOI the source if it hasn't been disabled and hasn't
* been passed-through to a KVM guest * been passed-through to a KVM guest
*/ */
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d)) if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
!(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd); xive_do_source_eoi(irqd_to_hwirq(d), xd);
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment