Commit a6014f1a authored by Radim Krčmář's avatar Radim Krčmář

Merge tag 'kvm-s390-next-4.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux

KVM: s390: fixes and improvements for 4.15

- Some initial preparation patches for exitless interrupts and crypto
- New capability for AIS migration
- Fixes
- merge of the sthyi tree from the base s390 team, which moves the sthyi
out of KVM into a shared function also for non-KVM
parents 61d75077 da9a1446
...@@ -4351,3 +4351,12 @@ This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its ...@@ -4351,3 +4351,12 @@ This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its
value is used to denote the target vcpu for a SynIC interrupt. For value is used to denote the target vcpu for a SynIC interrupt. For
compatibilty, KVM initializes this msr to KVM's internal vcpu index. When this compatibilty, KVM initializes this msr to KVM's internal vcpu index. When this
capability is absent, userspace can still query this msr's value. capability is absent, userspace can still query this msr's value.
8.13 KVM_CAP_S390_AIS_MIGRATION
Architectures: s390
Parameters: none
This capability indicates if the flic device will be able to get/set the
AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows
to discover this without having to create a flic device.
...@@ -151,8 +151,13 @@ struct kvm_s390_ais_all { ...@@ -151,8 +151,13 @@ struct kvm_s390_ais_all {
to an ISC (MSB0 bit 0 to ISC 0 and so on). The combination of simm bit and to an ISC (MSB0 bit 0 to ISC 0 and so on). The combination of simm bit and
nimm bit presents AIS mode for a ISC. nimm bit presents AIS mode for a ISC.
KVM_DEV_FLIC_AISM_ALL is indicated by KVM_CAP_S390_AIS_MIGRATION.
Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
FLIC with an unknown group or attribute gives the error code EINVAL (instead of FLIC with an unknown group or attribute gives the error code EINVAL (instead of
ENXIO, as specified in the API documentation). It is not possible to conclude ENXIO, as specified in the API documentation). It is not possible to conclude
that a FLIC operation is unavailable based on the error code resulting from a that a FLIC operation is unavailable based on the error code resulting from a
usage attempt. usage attempt.
Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero
schid is specified.
...@@ -685,11 +685,28 @@ struct kvm_s390_crypto { ...@@ -685,11 +685,28 @@ struct kvm_s390_crypto {
__u8 dea_kw; __u8 dea_kw;
}; };
#define APCB0_MASK_SIZE 1
struct kvm_s390_apcb0 {
__u64 apm[APCB0_MASK_SIZE]; /* 0x0000 */
__u64 aqm[APCB0_MASK_SIZE]; /* 0x0008 */
__u64 adm[APCB0_MASK_SIZE]; /* 0x0010 */
__u64 reserved18; /* 0x0018 */
};
#define APCB1_MASK_SIZE 4
struct kvm_s390_apcb1 {
__u64 apm[APCB1_MASK_SIZE]; /* 0x0000 */
__u64 aqm[APCB1_MASK_SIZE]; /* 0x0020 */
__u64 adm[APCB1_MASK_SIZE]; /* 0x0040 */
__u64 reserved60[4]; /* 0x0060 */
};
struct kvm_s390_crypto_cb { struct kvm_s390_crypto_cb {
__u8 reserved00[72]; /* 0x0000 */ struct kvm_s390_apcb0 apcb0; /* 0x0000 */
__u8 reserved20[0x0048 - 0x0020]; /* 0x0020 */
__u8 dea_wrapping_key_mask[24]; /* 0x0048 */ __u8 dea_wrapping_key_mask[24]; /* 0x0048 */
__u8 aes_wrapping_key_mask[32]; /* 0x0060 */ __u8 aes_wrapping_key_mask[32]; /* 0x0060 */
__u8 reserved80[128]; /* 0x0080 */ struct kvm_s390_apcb1 apcb1; /* 0x0080 */
}; };
/* /*
...@@ -736,7 +753,6 @@ struct kvm_arch{ ...@@ -736,7 +753,6 @@ struct kvm_arch{
wait_queue_head_t ipte_wq; wait_queue_head_t ipte_wq;
int ipte_lock_count; int ipte_lock_count;
struct mutex ipte_mutex; struct mutex ipte_mutex;
struct ratelimit_state sthyi_limit;
spinlock_t start_stop_lock; spinlock_t start_stop_lock;
struct sie_page2 *sie_page2; struct sie_page2 *sie_page2;
struct kvm_s390_cpu_model model; struct kvm_s390_cpu_model model;
......
...@@ -198,4 +198,5 @@ struct service_level { ...@@ -198,4 +198,5 @@ struct service_level {
int register_service_level(struct service_level *); int register_service_level(struct service_level *);
int unregister_service_level(struct service_level *); int unregister_service_level(struct service_level *);
int sthyi_fill(void *dst, u64 *rc);
#endif /* __ASM_S390_SYSINFO_H */ #endif /* __ASM_S390_SYSINFO_H */
#ifndef _UAPI_ASM_STHYI_H
#define _UAPI_ASM_STHYI_H
#define STHYI_FC_CP_IFL_CAP 0
#endif /* _UAPI_ASM_STHYI_H */
...@@ -315,7 +315,8 @@ ...@@ -315,7 +315,8 @@
#define __NR_pwritev2 377 #define __NR_pwritev2 377
#define __NR_s390_guarded_storage 378 #define __NR_s390_guarded_storage 378
#define __NR_statx 379 #define __NR_statx 379
#define NR_syscalls 380 #define __NR_s390_sthyi 380
#define NR_syscalls 381
/* /*
* There are some system calls that are not present on 64 bit, some * There are some system calls that are not present on 64 bit, some
......
...@@ -55,7 +55,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o ...@@ -55,7 +55,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o
extra-y += head.o head64.o vmlinux.lds extra-y += head.o head64.o vmlinux.lds
......
...@@ -180,3 +180,4 @@ COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); ...@@ -180,3 +180,4 @@ COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags);
...@@ -77,6 +77,7 @@ long sys_s390_runtime_instr(int command, int signum); ...@@ -77,6 +77,7 @@ long sys_s390_runtime_instr(int command, int signum);
long sys_s390_guarded_storage(int command, struct gs_cb __user *); long sys_s390_guarded_storage(int command, struct gs_cb __user *);
long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags);
DECLARE_PER_CPU(u64, mt_cycles[8]); DECLARE_PER_CPU(u64, mt_cycles[8]);
......
...@@ -8,22 +8,19 @@ ...@@ -8,22 +8,19 @@
* Copyright IBM Corp. 2016 * Copyright IBM Corp. 2016
* Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
*/ */
#include <linux/kvm_host.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/ratelimit.h> #include <linux/syscalls.h>
#include <linux/mutex.h>
#include <asm/kvm_host.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/sclp.h> #include <asm/sclp.h>
#include <asm/diag.h> #include <asm/diag.h>
#include <asm/sysinfo.h> #include <asm/sysinfo.h>
#include <asm/ebcdic.h> #include <asm/ebcdic.h>
#include <asm/facility.h>
#include "kvm-s390.h" #include <asm/sthyi.h>
#include "gaccess.h" #include "entry.h"
#include "trace.h"
#define DED_WEIGHT 0xffff #define DED_WEIGHT 0xffff
/* /*
...@@ -144,6 +141,21 @@ struct lpar_cpu_inf { ...@@ -144,6 +141,21 @@ struct lpar_cpu_inf {
struct cpu_inf ifl; struct cpu_inf ifl;
}; };
/*
* STHYI requires extensive locking in the higher hypervisors
* and is very computational/memory expensive. Therefore we
* cache the retrieved data whose valid period is 1s.
*/
#define CACHE_VALID_JIFFIES HZ
struct sthyi_info {
void *info;
unsigned long end;
};
static DEFINE_MUTEX(sthyi_mutex);
static struct sthyi_info sthyi_cache;
static inline u64 cpu_id(u8 ctidx, void *diag224_buf) static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
{ {
return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
...@@ -382,88 +394,124 @@ static void fill_diag(struct sthyi_sctns *sctns) ...@@ -382,88 +394,124 @@ static void fill_diag(struct sthyi_sctns *sctns)
vfree(diag204_buf); vfree(diag204_buf);
} }
static int sthyi(u64 vaddr) static int sthyi(u64 vaddr, u64 *rc)
{ {
register u64 code asm("0") = 0; register u64 code asm("0") = 0;
register u64 addr asm("2") = vaddr; register u64 addr asm("2") = vaddr;
register u64 rcode asm("3");
int cc; int cc;
asm volatile( asm volatile(
".insn rre,0xB2560000,%[code],%[addr]\n" ".insn rre,0xB2560000,%[code],%[addr]\n"
"ipm %[cc]\n" "ipm %[cc]\n"
"srl %[cc],28\n" "srl %[cc],28\n"
: [cc] "=d" (cc) : [cc] "=d" (cc), "=d" (rcode)
: [code] "d" (code), [addr] "a" (addr) : [code] "d" (code), [addr] "a" (addr)
: "3", "memory", "cc"); : "memory", "cc");
*rc = rcode;
return cc; return cc;
} }
int handle_sthyi(struct kvm_vcpu *vcpu) static int fill_dst(void *dst, u64 *rc)
{ {
int reg1, reg2, r = 0; struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
u64 code, addr, cc = 0;
struct sthyi_sctns *sctns = NULL;
if (!test_kvm_facility(vcpu->kvm, 74))
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
/* /*
* STHYI requires extensive locking in the higher hypervisors * If the facility is on, we don't want to emulate the instruction.
* and is very computational/memory expensive. Therefore we * We ask the hypervisor to provide the data.
* ratelimit the executions per VM.
*/ */
if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) { if (test_facility(74))
kvm_s390_retry_instr(vcpu); return sthyi((u64)dst, rc);
fill_hdr(sctns);
fill_stsi(sctns);
fill_diag(sctns);
*rc = 0;
return 0; return 0;
} }
static int sthyi_init_cache(void)
{
if (sthyi_cache.info)
return 0;
sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL);
if (!sthyi_cache.info)
return -ENOMEM;
sthyi_cache.end = jiffies - 1; /* expired */
return 0;
}
static int sthyi_update_cache(u64 *rc)
{
int r;
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2); memset(sthyi_cache.info, 0, PAGE_SIZE);
code = vcpu->run->s.regs.gprs[reg1]; r = fill_dst(sthyi_cache.info, rc);
addr = vcpu->run->s.regs.gprs[reg2]; if (r)
return r;
sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
return r;
}
vcpu->stat.instruction_sthyi++; /*
VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); * sthyi_fill - Fill page with data returned by the STHYI instruction
trace_kvm_s390_handle_sthyi(vcpu, code, addr); *
* @dst: Pointer to zeroed page
* @rc: Pointer for storing the return code of the instruction
*
* Fills the destination with system information returned by the STHYI
* instruction. The data is generated by emulation or execution of STHYI,
* if available. The return value is the condition code that would be
* returned, the rc parameter is the return code which is passed in
* register R2 + 1.
*/
int sthyi_fill(void *dst, u64 *rc)
{
int r;
if (reg1 == reg2 || reg1 & 1 || reg2 & 1) mutex_lock(&sthyi_mutex);
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); r = sthyi_init_cache();
if (r)
goto out;
if (code & 0xffff) { if (time_is_before_jiffies(sthyi_cache.end)) {
cc = 3; /* cache expired */
r = sthyi_update_cache(rc);
if (r)
goto out; goto out;
} }
*rc = 0;
memcpy(dst, sthyi_cache.info, PAGE_SIZE);
out:
mutex_unlock(&sthyi_mutex);
return r;
}
EXPORT_SYMBOL_GPL(sthyi_fill);
if (addr & ~PAGE_MASK) SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer,
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); u64 __user *, return_code, unsigned long, flags)
{
sctns = (void *)get_zeroed_page(GFP_KERNEL); u64 sthyi_rc;
if (!sctns) void *info;
int r;
if (flags)
return -EINVAL;
if (function_code != STHYI_FC_CP_IFL_CAP)
return -EOPNOTSUPP;
info = (void *)get_zeroed_page(GFP_KERNEL);
if (!info)
return -ENOMEM; return -ENOMEM;
r = sthyi_fill(info, &sthyi_rc);
/* if (r < 0)
* If we are a guest, we don't want to emulate an emulated goto out;
* instruction. We ask the hypervisor to provide the data. if (return_code && put_user(sthyi_rc, return_code)) {
*/ r = -EFAULT;
if (test_facility(74)) {
cc = sthyi((u64)sctns);
goto out; goto out;
} }
if (copy_to_user(buffer, info, PAGE_SIZE))
fill_hdr(sctns); r = -EFAULT;
fill_stsi(sctns);
fill_diag(sctns);
out: out:
if (!cc) { free_page((unsigned long)info);
r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
if (r) {
free_page((unsigned long)sctns);
return kvm_s390_inject_prog_cond(vcpu, r);
}
}
free_page((unsigned long)sctns);
vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0;
kvm_s390_set_psw_cc(vcpu, cc);
return r; return r;
} }
...@@ -388,3 +388,4 @@ SYSCALL(sys_preadv2,compat_sys_preadv2) ...@@ -388,3 +388,4 @@ SYSCALL(sys_preadv2,compat_sys_preadv2)
SYSCALL(sys_pwritev2,compat_sys_pwritev2) SYSCALL(sys_pwritev2,compat_sys_pwritev2)
SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */
SYSCALL(sys_statx,compat_sys_statx) SYSCALL(sys_statx,compat_sys_statx)
SYSCALL(sys_s390_sthyi,compat_sys_s390_sthyi)
...@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch ...@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o vsie.o kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += kvm.o
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <asm/kvm_host.h> #include <asm/kvm_host.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/sysinfo.h>
#include "kvm-s390.h" #include "kvm-s390.h"
#include "gaccess.h" #include "gaccess.h"
...@@ -360,6 +361,61 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) ...@@ -360,6 +361,61 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
/*
* Handle the sthyi instruction that provides the guest with system
* information, like current CPU resources available at each level of
* the machine.
*/
int handle_sthyi(struct kvm_vcpu *vcpu)
{
int reg1, reg2, r = 0;
u64 code, addr, cc = 0, rc = 0;
struct sthyi_sctns *sctns = NULL;
if (!test_kvm_facility(vcpu->kvm, 74))
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
code = vcpu->run->s.regs.gprs[reg1];
addr = vcpu->run->s.regs.gprs[reg2];
vcpu->stat.instruction_sthyi++;
VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
trace_kvm_s390_handle_sthyi(vcpu, code, addr);
if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (code & 0xffff) {
cc = 3;
rc = 4;
goto out;
}
if (addr & ~PAGE_MASK)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
sctns = (void *)get_zeroed_page(GFP_KERNEL);
if (!sctns)
return -ENOMEM;
cc = sthyi_fill(sctns, &rc);
out:
if (!cc) {
r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
if (r) {
free_page((unsigned long)sctns);
return kvm_s390_inject_prog_cond(vcpu, r);
}
}
free_page((unsigned long)sctns);
vcpu->run->s.regs.gprs[reg2 + 1] = rc;
kvm_s390_set_psw_cc(vcpu, cc);
return r;
}
static int handle_operexc(struct kvm_vcpu *vcpu) static int handle_operexc(struct kvm_vcpu *vcpu)
{ {
psw_t oldpsw, newpsw; psw_t oldpsw, newpsw;
......
...@@ -213,6 +213,16 @@ static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) ...@@ -213,6 +213,16 @@ static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
vcpu->arch.local_int.pending_irqs; vcpu->arch.local_int.pending_irqs;
} }
static inline int isc_to_irq_type(unsigned long isc)
{
return IRQ_PEND_IO_ISC_0 + isc;
}
static inline int irq_type_to_isc(unsigned long irq_type)
{
return irq_type - IRQ_PEND_IO_ISC_0;
}
static unsigned long disable_iscs(struct kvm_vcpu *vcpu, static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
unsigned long active_mask) unsigned long active_mask)
{ {
...@@ -220,7 +230,7 @@ static unsigned long disable_iscs(struct kvm_vcpu *vcpu, ...@@ -220,7 +230,7 @@ static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
for (i = 0; i <= MAX_ISC; i++) for (i = 0; i <= MAX_ISC; i++)
if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i))) if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i)); active_mask &= ~(1UL << (isc_to_irq_type(i)));
return active_mask; return active_mask;
} }
...@@ -901,7 +911,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, ...@@ -901,7 +911,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
fi = &vcpu->kvm->arch.float_int; fi = &vcpu->kvm->arch.float_int;
spin_lock(&fi->lock); spin_lock(&fi->lock);
isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0]; isc_list = &fi->lists[irq_type_to_isc(irq_type)];
inti = list_first_entry_or_null(isc_list, inti = list_first_entry_or_null(isc_list,
struct kvm_s390_interrupt_info, struct kvm_s390_interrupt_info,
list); list);
...@@ -1074,6 +1084,12 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) ...@@ -1074,6 +1084,12 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
* in kvm_vcpu_block without having the waitqueue set (polling) * in kvm_vcpu_block without having the waitqueue set (polling)
*/ */
vcpu->valid_wakeup = true; vcpu->valid_wakeup = true;
/*
* This is mostly to document, that the read in swait_active could
* be moved before other stores, leading to subtle races.
* All current users do not store or use an atomic like update
*/
smp_mb__after_atomic();
if (swait_active(&vcpu->wq)) { if (swait_active(&vcpu->wq)) {
/* /*
* The vcpu gave up the cpu voluntarily, mark it as a good * The vcpu gave up the cpu voluntarily, mark it as a good
...@@ -1395,7 +1411,7 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm, ...@@ -1395,7 +1411,7 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
list_del_init(&iter->list); list_del_init(&iter->list);
fi->counters[FIRQ_CNTR_IO] -= 1; fi->counters[FIRQ_CNTR_IO] -= 1;
if (list_empty(isc_list)) if (list_empty(isc_list))
clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); clear_bit(isc_to_irq_type(isc), &fi->pending_irqs);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
return iter; return iter;
} }
...@@ -1522,7 +1538,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) ...@@ -1522,7 +1538,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
isc = int_word_to_isc(inti->io.io_int_word); isc = int_word_to_isc(inti->io.io_int_word);
list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
list_add_tail(&inti->list, list); list_add_tail(&inti->list, list);
set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
return 0; return 0;
} }
...@@ -2175,6 +2191,8 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr) ...@@ -2175,6 +2191,8 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
return -EINVAL; return -EINVAL;
if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid))) if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid)))
return -EFAULT; return -EFAULT;
if (!schid)
return -EINVAL;
kfree(kvm_s390_get_io_int(kvm, isc_mask, schid)); kfree(kvm_s390_get_io_int(kvm, isc_mask, schid));
/* /*
* If userspace is conforming to the architecture, we can have at most * If userspace is conforming to the architecture, we can have at most
......
...@@ -395,6 +395,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -395,6 +395,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_USER_INSTR0: case KVM_CAP_S390_USER_INSTR0:
case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_CMMA_MIGRATION:
case KVM_CAP_S390_AIS: case KVM_CAP_S390_AIS:
case KVM_CAP_S390_AIS_MIGRATION:
r = 1; r = 1;
break; break;
case KVM_CAP_S390_MEM_OP: case KVM_CAP_S390_MEM_OP:
...@@ -1884,8 +1885,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -1884,8 +1885,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
rc = -ENOMEM; rc = -ENOMEM;
ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
kvm->arch.use_esca = 0; /* start with basic SCA */ kvm->arch.use_esca = 0; /* start with basic SCA */
if (!sclp.has_64bscao) if (!sclp.has_64bscao)
alloc_flags |= GFP_DMA; alloc_flags |= GFP_DMA;
......
...@@ -242,6 +242,8 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) ...@@ -242,6 +242,8 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu)); kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
} }
int handle_sthyi(struct kvm_vcpu *vcpu);
/* implemented in priv.c */ /* implemented in priv.c */
int is_valid_psw(psw_t *psw); int is_valid_psw(psw_t *psw);
int kvm_s390_handle_aa(struct kvm_vcpu *vcpu); int kvm_s390_handle_aa(struct kvm_vcpu *vcpu);
...@@ -268,9 +270,6 @@ void kvm_s390_vsie_destroy(struct kvm *kvm); ...@@ -268,9 +270,6 @@ void kvm_s390_vsie_destroy(struct kvm *kvm);
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in sthyi.c */
int handle_sthyi(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */ /* implemented in kvm-s390.c */
void kvm_s390_set_tod_clock_ext(struct kvm *kvm, void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
const struct kvm_s390_vm_tod_clock *gtod); const struct kvm_s390_vm_tod_clock *gtod);
......
...@@ -443,22 +443,14 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -443,22 +443,14 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* *
* Returns: - 0 on success * Returns: - 0 on success
* - -EINVAL if the gpa is not valid guest storage * - -EINVAL if the gpa is not valid guest storage
* - -ENOMEM if out of memory
*/ */
static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
{ {
struct page *page; struct page *page;
hva_t hva;
int rc;
hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); page = gfn_to_page(kvm, gpa_to_gfn(gpa));
if (kvm_is_error_hva(hva)) if (is_error_page(page))
return -EINVAL; return -EINVAL;
rc = get_user_pages_fast(hva, 1, 1, &page);
if (rc < 0)
return rc;
else if (rc != 1)
return -ENOMEM;
*hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
return 0; return 0;
} }
...@@ -466,11 +458,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) ...@@ -466,11 +458,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */ /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
{ {
struct page *page; kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
page = virt_to_page(hpa);
set_page_dirty_lock(page);
put_page(page);
/* mark the page always as dirty for migration */ /* mark the page always as dirty for migration */
mark_page_dirty(kvm, gpa_to_gfn(gpa)); mark_page_dirty(kvm, gpa_to_gfn(gpa));
} }
...@@ -557,7 +545,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -557,7 +545,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x003bU); rc = set_validity_icpt(scb_s, 0x003bU);
if (!rc) { if (!rc) {
rc = pin_guest_page(vcpu->kvm, gpa, &hpa); rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
if (rc == -EINVAL) if (rc)
rc = set_validity_icpt(scb_s, 0x0034U); rc = set_validity_icpt(scb_s, 0x0034U);
} }
if (rc) if (rc)
...@@ -574,10 +562,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -574,10 +562,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
} }
/* 256 bytes cannot cross page boundaries */ /* 256 bytes cannot cross page boundaries */
rc = pin_guest_page(vcpu->kvm, gpa, &hpa); rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
if (rc == -EINVAL) if (rc) {
rc = set_validity_icpt(scb_s, 0x0080U); rc = set_validity_icpt(scb_s, 0x0080U);
if (rc)
goto unpin; goto unpin;
}
scb_s->itdba = hpa; scb_s->itdba = hpa;
} }
...@@ -592,10 +580,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -592,10 +580,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* if this block gets bigger, we have to shadow it. * if this block gets bigger, we have to shadow it.
*/ */
rc = pin_guest_page(vcpu->kvm, gpa, &hpa); rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
if (rc == -EINVAL) if (rc) {
rc = set_validity_icpt(scb_s, 0x1310U); rc = set_validity_icpt(scb_s, 0x1310U);
if (rc)
goto unpin; goto unpin;
}
scb_s->gvrd = hpa; scb_s->gvrd = hpa;
} }
...@@ -607,11 +595,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -607,11 +595,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
} }
/* 64 bytes cannot cross page boundaries */ /* 64 bytes cannot cross page boundaries */
rc = pin_guest_page(vcpu->kvm, gpa, &hpa); rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
if (rc == -EINVAL) if (rc) {
rc = set_validity_icpt(scb_s, 0x0043U); rc = set_validity_icpt(scb_s, 0x0043U);
/* Validity 0x0044 will be checked by SIE */
if (rc)
goto unpin; goto unpin;
}
/* Validity 0x0044 will be checked by SIE */
scb_s->riccbd = hpa; scb_s->riccbd = hpa;
} }
if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
...@@ -635,10 +623,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -635,10 +623,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* cross page boundaries * cross page boundaries
*/ */
rc = pin_guest_page(vcpu->kvm, gpa, &hpa); rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
if (rc == -EINVAL) if (rc) {
rc = set_validity_icpt(scb_s, 0x10b0U); rc = set_validity_icpt(scb_s, 0x10b0U);
if (rc)
goto unpin; goto unpin;
}
scb_s->sdnxo = hpa | sdnxc; scb_s->sdnxo = hpa | sdnxc;
} }
return 0; return 0;
...@@ -663,7 +651,6 @@ static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, ...@@ -663,7 +651,6 @@ static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
* *
* Returns: - 0 if the scb was pinned. * Returns: - 0 if the scb was pinned.
* - > 0 if control has to be given to guest 2 * - > 0 if control has to be given to guest 2
* - -ENOMEM if out of memory
*/ */
static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
gpa_t gpa) gpa_t gpa)
...@@ -672,14 +659,13 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, ...@@ -672,14 +659,13 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
int rc; int rc;
rc = pin_guest_page(vcpu->kvm, gpa, &hpa); rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
if (rc == -EINVAL) { if (rc) {
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (!rc) WARN_ON_ONCE(rc);
rc = 1; return 1;
} }
if (!rc)
vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
return rc; return 0;
} }
/* /*
......
...@@ -667,6 +667,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, ...@@ -667,6 +667,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
bool *writable); bool *writable);
void kvm_release_pfn_clean(kvm_pfn_t pfn); void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn);
void kvm_get_pfn(kvm_pfn_t pfn); void kvm_get_pfn(kvm_pfn_t pfn);
......
...@@ -930,6 +930,7 @@ struct kvm_ppc_resize_hpt { ...@@ -930,6 +930,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_SMT_POSSIBLE 147 #define KVM_CAP_PPC_SMT_POSSIBLE 147
#define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_SYNIC2 148
#define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_HYPERV_VP_INDEX 149
#define KVM_CAP_S390_AIS_MIGRATION 150
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
......
...@@ -122,7 +122,6 @@ static void hardware_disable_all(void); ...@@ -122,7 +122,6 @@ static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus); static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
static void kvm_release_pfn_dirty(kvm_pfn_t pfn);
static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
__visible bool kvm_rebooting; __visible bool kvm_rebooting;
...@@ -1679,11 +1678,12 @@ void kvm_release_page_dirty(struct page *page) ...@@ -1679,11 +1678,12 @@ void kvm_release_page_dirty(struct page *page)
} }
EXPORT_SYMBOL_GPL(kvm_release_page_dirty); EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
static void kvm_release_pfn_dirty(kvm_pfn_t pfn) void kvm_release_pfn_dirty(kvm_pfn_t pfn)
{ {
kvm_set_pfn_dirty(pfn); kvm_set_pfn_dirty(pfn);
kvm_release_pfn_clean(pfn); kvm_release_pfn_clean(pfn);
} }
EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
void kvm_set_pfn_dirty(kvm_pfn_t pfn) void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment