Commit f244d910 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-s390-20140130' of...

Merge tag 'kvm-s390-20140130' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

Two new features are added by this patch set:
- The floating interrupt controller (flic) that allows us to inject,
  clear and inspect non-vcpu local interrupts. This also gives us an
  opportunity to fix deficiencies in our existing interrupt definitions.
- Support for asynchronous page faults via the pfault mechanism. Testing
  show significant guest performance improvements under host swap.
parents 4f34d683 536336c2
FLIC (floating interrupt controller)
====================================
FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some
machine check interruptions. All interrupts are stored in a per-vm list of
pending interrupts. FLIC performs operations on this list.
Only one FLIC instance may be instantiated.
FLIC provides support to
- add interrupts (KVM_DEV_FLIC_ENQUEUE)
- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
- enable/disable for the guest transparent async page faults
Groups:
KVM_DEV_FLIC_ENQUEUE
Passes a buffer and length into the kernel which are then injected into
the list of pending interrupts.
attr->addr contains the pointer to the buffer and attr->attr contains
the length of the buffer.
The format of the data structure kvm_s390_irq as it is copied from userspace
is defined in usr/include/linux/kvm.h.
KVM_DEV_FLIC_GET_ALL_IRQS
Copies all floating interrupts into a buffer provided by userspace.
When the buffer is too small it returns -ENOMEM, which is the indication
for userspace to try again with a bigger buffer.
All interrupts remain pending, i.e. are not deleted from the list of
currently pending interrupts.
attr->addr contains the userspace address of the buffer into which all
interrupt data will be copied.
attr->attr contains the size of the buffer in bytes.
KVM_DEV_FLIC_CLEAR_IRQS
Simply deletes all elements from the list of currently pending floating
interrupts. No interrupts are injected into the guest.
KVM_DEV_FLIC_APF_ENABLE
Enables async page faults for the guest. So in case of a major page fault
the host is allowed to handle this async and continues the guest.
KVM_DEV_FLIC_APF_DISABLE_WAIT
Disables async page faults for the guest and waits until already pending
async page faults are done. This is necessary to trigger a completion interrupt
for every init interrupt before migrating the interrupt list.
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/hrtimer.h> #include <linux/hrtimer.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <asm/debug.h> #include <asm/debug.h>
#include <asm/cpu.h> #include <asm/cpu.h>
...@@ -168,18 +169,6 @@ struct kvm_vcpu_stat { ...@@ -168,18 +169,6 @@ struct kvm_vcpu_stat {
u32 diagnose_9c; u32 diagnose_9c;
}; };
struct kvm_s390_io_info {
__u16 subchannel_id; /* 0x0b8 */
__u16 subchannel_nr; /* 0x0ba */
__u32 io_int_parm; /* 0x0bc */
__u32 io_int_word; /* 0x0c0 */
};
struct kvm_s390_ext_info {
__u32 ext_params;
__u64 ext_params2;
};
#define PGM_OPERATION 0x01 #define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02 #define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03 #define PGM_EXECUTE 0x03
...@@ -188,27 +177,6 @@ struct kvm_s390_ext_info { ...@@ -188,27 +177,6 @@ struct kvm_s390_ext_info {
#define PGM_SPECIFICATION 0x06 #define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07 #define PGM_DATA 0x07
struct kvm_s390_pgm_info {
__u16 code;
};
struct kvm_s390_prefix_info {
__u32 address;
};
struct kvm_s390_extcall_info {
__u16 code;
};
struct kvm_s390_emerg_info {
__u16 code;
};
struct kvm_s390_mchk_info {
__u64 cr14;
__u64 mcic;
};
struct kvm_s390_interrupt_info { struct kvm_s390_interrupt_info {
struct list_head list; struct list_head list;
u64 type; u64 type;
...@@ -246,6 +214,7 @@ struct kvm_s390_float_interrupt { ...@@ -246,6 +214,7 @@ struct kvm_s390_float_interrupt {
unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
/ sizeof(long)]; / sizeof(long)];
struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS]; struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
unsigned int irq_count;
}; };
...@@ -262,6 +231,10 @@ struct kvm_vcpu_arch { ...@@ -262,6 +231,10 @@ struct kvm_vcpu_arch {
u64 stidp_data; u64 stidp_data;
}; };
struct gmap *gmap; struct gmap *gmap;
#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
unsigned long pfault_token;
unsigned long pfault_select;
unsigned long pfault_compare;
}; };
struct kvm_vm_stat { struct kvm_vm_stat {
...@@ -275,6 +248,7 @@ struct kvm_arch{ ...@@ -275,6 +248,7 @@ struct kvm_arch{
struct sca_block *sca; struct sca_block *sca;
debug_info_t *dbf; debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int; struct kvm_s390_float_interrupt float_int;
struct kvm_device *flic;
struct gmap *gmap; struct gmap *gmap;
int css_support; int css_support;
}; };
...@@ -287,6 +261,24 @@ static inline bool kvm_is_error_hva(unsigned long addr) ...@@ -287,6 +261,24 @@ static inline bool kvm_is_error_hva(unsigned long addr)
return IS_ERR_VALUE(addr); return IS_ERR_VALUE(addr);
} }
#define ASYNC_PF_PER_VCPU 64
struct kvm_vcpu;
struct kvm_async_pf;
struct kvm_arch_async_pf {
unsigned long pfault_token;
};
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit; extern char sie_exit;
#endif #endif
...@@ -767,6 +767,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry) ...@@ -767,6 +767,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
* @table: pointer to the page directory * @table: pointer to the page directory
* @asce: address space control element for gmap page table * @asce: address space control element for gmap page table
* @crst_list: list of all crst tables used in the guest address space * @crst_list: list of all crst tables used in the guest address space
* @pfault_enabled: defines if pfaults are applicable for the guest
*/ */
struct gmap { struct gmap {
struct list_head list; struct list_head list;
...@@ -775,6 +776,7 @@ struct gmap { ...@@ -775,6 +776,7 @@ struct gmap {
unsigned long asce; unsigned long asce;
void *private; void *private;
struct list_head crst_list; struct list_head crst_list;
bool pfault_enabled;
}; };
/** /**
......
...@@ -79,6 +79,7 @@ struct thread_struct { ...@@ -79,6 +79,7 @@ struct thread_struct {
unsigned long ksp; /* kernel stack pointer */ unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment; mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */ unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
struct per_regs per_user; /* User specified PER registers */ struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */ struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */ unsigned long per_flags; /* Flags to control debug behavior */
......
...@@ -16,6 +16,22 @@ ...@@ -16,6 +16,22 @@
#define __KVM_S390 #define __KVM_S390
/* Device control API: s390-specific devices */
#define KVM_DEV_FLIC_GET_ALL_IRQS 1
#define KVM_DEV_FLIC_ENQUEUE 2
#define KVM_DEV_FLIC_CLEAR_IRQS 3
#define KVM_DEV_FLIC_APF_ENABLE 4
#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
* There are also sclp and machine checks. This gives us
* sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
* Lets round up to 8192 pages.
*/
#define KVM_S390_MAX_FLOAT_IRQS 266250
#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
/* for KVM_GET_REGS and KVM_SET_REGS */ /* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs { struct kvm_regs {
/* general purpose regs for s390 */ /* general purpose regs for s390 */
...@@ -57,4 +73,7 @@ struct kvm_sync_regs { ...@@ -57,4 +73,7 @@ struct kvm_sync_regs {
#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) #define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4) #define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
#endif #endif
...@@ -23,6 +23,8 @@ config KVM ...@@ -23,6 +23,8 @@ config KVM
select ANON_INODES select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_EVENTFD select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
---help--- ---help---
Support hosting paravirtualized guest machines using the SIE Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work virtualization capability on the mainframe. This should work
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
# as published by the Free Software Foundation. # as published by the Free Software Foundation.
KVM := ../../../virt/kvm KVM := ../../../virt/kvm
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "kvm-s390.h" #include "kvm-s390.h"
#include "trace.h" #include "trace.h"
#include "trace-s390.h" #include "trace-s390.h"
#include "gaccess.h"
static int diag_release_pages(struct kvm_vcpu *vcpu) static int diag_release_pages(struct kvm_vcpu *vcpu)
{ {
...@@ -46,6 +47,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) ...@@ -46,6 +47,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
{
struct prs_parm {
u16 code;
u16 subcode;
u16 parm_len;
u16 parm_version;
u64 token_addr;
u64 select_mask;
u64 compare_mask;
u64 zarch;
};
struct prs_parm parm;
int rc;
u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
unsigned long hva_token = KVM_HVA_ERR_BAD;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
switch (parm.subcode) {
case 0: /* TOKEN */
if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
/*
* If the pagefault handshake is already activated,
* the token must not be changed. We have to return
* decimal 8 instead, as mandated in SC24-6084.
*/
vcpu->run->s.regs.gprs[ry] = 8;
return 0;
}
if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
if (kvm_is_error_hva(hva_token))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
vcpu->arch.pfault_token = parm.token_addr;
vcpu->arch.pfault_select = parm.select_mask;
vcpu->arch.pfault_compare = parm.compare_mask;
vcpu->run->s.regs.gprs[ry] = 0;
rc = 0;
break;
case 1: /*
* CANCEL
* Specification allows to let already pending tokens survive
* the cancel, therefore to reduce code complexity, we assume
* all outstanding tokens are already pending.
*/
if (parm.token_addr || parm.select_mask ||
parm.compare_mask || parm.zarch)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
vcpu->run->s.regs.gprs[ry] = 0;
/*
* If the pfault handling was not established or is already
* canceled SC24-6084 requests to return decimal 4.
*/
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
vcpu->run->s.regs.gprs[ry] = 4;
else
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
rc = 0;
break;
default:
rc = -EOPNOTSUPP;
break;
}
return rc;
}
static int __diag_time_slice_end(struct kvm_vcpu *vcpu) static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{ {
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
...@@ -150,6 +232,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) ...@@ -150,6 +232,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
return __diag_time_slice_end(vcpu); return __diag_time_slice_end(vcpu);
case 0x9c: case 0x9c:
return __diag_time_slice_end_directed(vcpu); return __diag_time_slice_end_directed(vcpu);
case 0x258:
return __diag_page_ref_service(vcpu);
case 0x308: case 0x308:
return __diag_ipl_functions(vcpu); return __diag_ipl_functions(vcpu);
case 0x500: case 0x500:
......
...@@ -31,7 +31,7 @@ static int is_ioint(u64 type) ...@@ -31,7 +31,7 @@ static int is_ioint(u64 type)
return ((type & 0xfffe0000u) != 0xfffe0000u); return ((type & 0xfffe0000u) != 0xfffe0000u);
} }
static int psw_extint_disabled(struct kvm_vcpu *vcpu) int psw_extint_disabled(struct kvm_vcpu *vcpu)
{ {
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
} }
...@@ -78,11 +78,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, ...@@ -78,11 +78,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
return 1; return 1;
return 0; return 0;
case KVM_S390_INT_SERVICE: case KVM_S390_INT_SERVICE:
if (psw_extint_disabled(vcpu)) case KVM_S390_INT_PFAULT_INIT:
return 0; case KVM_S390_INT_PFAULT_DONE:
if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
return 1;
return 0;
case KVM_S390_INT_VIRTIO: case KVM_S390_INT_VIRTIO:
if (psw_extint_disabled(vcpu)) if (psw_extint_disabled(vcpu))
return 0; return 0;
...@@ -150,6 +147,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, ...@@ -150,6 +147,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EXTERNAL_CALL:
case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_EMERGENCY:
case KVM_S390_INT_SERVICE: case KVM_S390_INT_SERVICE:
case KVM_S390_INT_PFAULT_INIT:
case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO: case KVM_S390_INT_VIRTIO:
if (psw_extint_disabled(vcpu)) if (psw_extint_disabled(vcpu))
__set_cpuflag(vcpu, CPUSTAT_EXT_INT); __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
...@@ -223,6 +222,30 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, ...@@ -223,6 +222,30 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
rc |= put_guest(vcpu, inti->ext.ext_params, rc |= put_guest(vcpu, inti->ext.ext_params,
(u32 __user *)__LC_EXT_PARAMS); (u32 __user *)__LC_EXT_PARAMS);
break; break;
case KVM_S390_INT_PFAULT_INIT:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
inti->ext.ext_params2);
rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
rc |= put_guest(vcpu, inti->ext.ext_params2,
(u64 __user *) __LC_EXT_PARAMS2);
break;
case KVM_S390_INT_PFAULT_DONE:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
inti->ext.ext_params2);
rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
rc |= put_guest(vcpu, inti->ext.ext_params2,
(u64 __user *) __LC_EXT_PARAMS2);
break;
case KVM_S390_INT_VIRTIO: case KVM_S390_INT_VIRTIO:
VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
inti->ext.ext_params, inti->ext.ext_params2); inti->ext.ext_params, inti->ext.ext_params2);
...@@ -357,7 +380,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) ...@@ -357,7 +380,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
return 1; return 1;
} }
static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
{ {
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
...@@ -528,6 +551,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) ...@@ -528,6 +551,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
list_for_each_entry_safe(inti, n, &fi->list, list) { list_for_each_entry_safe(inti, n, &fi->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) { if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list); list_del(&inti->list);
fi->irq_count--;
deliver = 1; deliver = 1;
break; break;
} }
...@@ -583,6 +607,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) ...@@ -583,6 +607,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
if ((inti->type == KVM_S390_MCHK) && if ((inti->type == KVM_S390_MCHK) &&
__interrupt_is_deliverable(vcpu, inti)) { __interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list); list_del(&inti->list);
fi->irq_count--;
deliver = 1; deliver = 1;
break; break;
} }
...@@ -650,8 +675,10 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, ...@@ -650,8 +675,10 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
inti = iter; inti = iter;
break; break;
} }
if (inti) if (inti) {
list_del_init(&inti->list); list_del_init(&inti->list);
fi->irq_count--;
}
if (list_empty(&fi->list)) if (list_empty(&fi->list))
atomic_set(&fi->active, 0); atomic_set(&fi->active, 0);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
...@@ -659,53 +686,98 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, ...@@ -659,53 +686,98 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
return inti; return inti;
} }
int kvm_s390_inject_vm(struct kvm *kvm, static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
struct kvm_s390_interrupt *s390int)
{ {
struct kvm_s390_local_interrupt *li; struct kvm_s390_local_interrupt *li;
struct kvm_s390_float_interrupt *fi; struct kvm_s390_float_interrupt *fi;
struct kvm_s390_interrupt_info *inti, *iter; struct kvm_s390_interrupt_info *iter;
int sigcpu; int sigcpu;
int rc = 0;
mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
rc = -EINVAL;
goto unlock_fi;
}
fi->irq_count++;
if (!is_ioint(inti->type)) {
list_add_tail(&inti->list, &fi->list);
} else {
u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
/* Keep I/O interrupts sorted in isc order. */
list_for_each_entry(iter, &fi->list, list) {
if (!is_ioint(iter->type))
continue;
if (int_word_to_isc_bits(iter->io.io_int_word)
<= isc_bits)
continue;
break;
}
list_add_tail(&inti->list, &iter->list);
}
atomic_set(&fi->active, 1);
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
if (sigcpu == KVM_MAX_VCPUS) {
do {
sigcpu = fi->next_rr_cpu++;
if (sigcpu == KVM_MAX_VCPUS)
sigcpu = fi->next_rr_cpu = 0;
} while (fi->local_int[sigcpu] == NULL);
}
li = fi->local_int[sigcpu];
spin_lock_bh(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
if (waitqueue_active(li->wq))
wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
unlock_fi:
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
return rc;
}
int kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int)
{
struct kvm_s390_interrupt_info *inti;
inti = kzalloc(sizeof(*inti), GFP_KERNEL); inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti) if (!inti)
return -ENOMEM; return -ENOMEM;
switch (s390int->type) { inti->type = s390int->type;
switch (inti->type) {
case KVM_S390_INT_VIRTIO: case KVM_S390_INT_VIRTIO:
VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
s390int->parm, s390int->parm64); s390int->parm, s390int->parm64);
inti->type = s390int->type;
inti->ext.ext_params = s390int->parm; inti->ext.ext_params = s390int->parm;
inti->ext.ext_params2 = s390int->parm64; inti->ext.ext_params2 = s390int->parm64;
break; break;
case KVM_S390_INT_SERVICE: case KVM_S390_INT_SERVICE:
VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
inti->type = s390int->type;
inti->ext.ext_params = s390int->parm; inti->ext.ext_params = s390int->parm;
break; break;
case KVM_S390_PROGRAM_INT: case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_SIGP_STOP: inti->type = s390int->type;
case KVM_S390_INT_EXTERNAL_CALL: inti->ext.ext_params2 = s390int->parm64;
case KVM_S390_INT_EMERGENCY: break;
kfree(inti);
return -EINVAL;
case KVM_S390_MCHK: case KVM_S390_MCHK:
VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
s390int->parm64); s390int->parm64);
inti->type = s390int->type;
inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
inti->mchk.mcic = s390int->parm64; inti->mchk.mcic = s390int->parm64;
break; break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
if (s390int->type & IOINT_AI_MASK) if (inti->type & IOINT_AI_MASK)
VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
else else
VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
s390int->type & IOINT_CSSID_MASK, s390int->type & IOINT_CSSID_MASK,
s390int->type & IOINT_SSID_MASK, s390int->type & IOINT_SSID_MASK,
s390int->type & IOINT_SCHID_MASK); s390int->type & IOINT_SCHID_MASK);
inti->type = s390int->type;
inti->io.subchannel_id = s390int->parm >> 16; inti->io.subchannel_id = s390int->parm >> 16;
inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
inti->io.io_int_parm = s390int->parm64 >> 32; inti->io.io_int_parm = s390int->parm64 >> 32;
...@@ -718,43 +790,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, ...@@ -718,43 +790,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
2); 2);
mutex_lock(&kvm->lock); return __inject_vm(kvm, inti);
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
if (!is_ioint(inti->type))
list_add_tail(&inti->list, &fi->list);
else {
u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
/* Keep I/O interrupts sorted in isc order. */
list_for_each_entry(iter, &fi->list, list) {
if (!is_ioint(iter->type))
continue;
if (int_word_to_isc_bits(iter->io.io_int_word)
<= isc_bits)
continue;
break;
}
list_add_tail(&inti->list, &iter->list);
}
atomic_set(&fi->active, 1);
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
if (sigcpu == KVM_MAX_VCPUS) {
do {
sigcpu = fi->next_rr_cpu++;
if (sigcpu == KVM_MAX_VCPUS)
sigcpu = fi->next_rr_cpu = 0;
} while (fi->local_int[sigcpu] == NULL);
}
li = fi->local_int[sigcpu];
spin_lock_bh(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
if (waitqueue_active(li->wq))
wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
return 0;
} }
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
...@@ -814,6 +850,10 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, ...@@ -814,6 +850,10 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
inti->type = s390int->type; inti->type = s390int->type;
inti->mchk.mcic = s390int->parm64; inti->mchk.mcic = s390int->parm64;
break; break;
case KVM_S390_INT_PFAULT_INIT:
inti->type = s390int->type;
inti->ext.ext_params2 = s390int->parm64;
break;
case KVM_S390_INT_VIRTIO: case KVM_S390_INT_VIRTIO:
case KVM_S390_INT_SERVICE: case KVM_S390_INT_SERVICE:
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
...@@ -841,3 +881,232 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, ...@@ -841,3 +881,232 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
mutex_unlock(&vcpu->kvm->lock); mutex_unlock(&vcpu->kvm->lock);
return 0; return 0;
} }
static void clear_floating_interrupts(struct kvm *kvm)
{
struct kvm_s390_float_interrupt *fi;
struct kvm_s390_interrupt_info *n, *inti = NULL;
mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
list_for_each_entry_safe(inti, n, &fi->list, list) {
list_del(&inti->list);
kfree(inti);
}
fi->irq_count = 0;
atomic_set(&fi->active, 0);
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
}
static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
u8 *addr)
{
struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
struct kvm_s390_irq irq = {0};
irq.type = inti->type;
switch (inti->type) {
case KVM_S390_INT_PFAULT_INIT:
case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
case KVM_S390_INT_SERVICE:
irq.u.ext = inti->ext;
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
irq.u.io = inti->io;
break;
case KVM_S390_MCHK:
irq.u.mchk = inti->mchk;
break;
default:
return -EINVAL;
}
if (copy_to_user(uptr, &irq, sizeof(irq)))
return -EFAULT;
return 0;
}
static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
{
struct kvm_s390_interrupt_info *inti;
struct kvm_s390_float_interrupt *fi;
int ret = 0;
int n = 0;
mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
list_for_each_entry(inti, &fi->list, list) {
if (len < sizeof(struct kvm_s390_irq)) {
/* signal userspace to try again */
ret = -ENOMEM;
break;
}
ret = copy_irq_to_user(inti, buf);
if (ret)
break;
buf += sizeof(struct kvm_s390_irq);
len -= sizeof(struct kvm_s390_irq);
n++;
}
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
return ret < 0 ? ret : n;
}
static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r;
switch (attr->group) {
case KVM_DEV_FLIC_GET_ALL_IRQS:
r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
attr->attr);
break;
default:
r = -EINVAL;
}
return r;
}
static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
u64 addr)
{
struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
void *target = NULL;
void __user *source;
u64 size;
if (get_user(inti->type, (u64 __user *)addr))
return -EFAULT;
switch (inti->type) {
case KVM_S390_INT_PFAULT_INIT:
case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
case KVM_S390_INT_SERVICE:
target = (void *) &inti->ext;
source = &uptr->u.ext;
size = sizeof(inti->ext);
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
target = (void *) &inti->io;
source = &uptr->u.io;
size = sizeof(inti->io);
break;
case KVM_S390_MCHK:
target = (void *) &inti->mchk;
source = &uptr->u.mchk;
size = sizeof(inti->mchk);
break;
default:
return -EINVAL;
}
if (copy_from_user(target, source, size))
return -EFAULT;
return 0;
}
static int enqueue_floating_irq(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
struct kvm_s390_interrupt_info *inti = NULL;
int r = 0;
int len = attr->attr;
if (len % sizeof(struct kvm_s390_irq) != 0)
return -EINVAL;
else if (len > KVM_S390_FLIC_MAX_BUFFER)
return -EINVAL;
while (len >= sizeof(struct kvm_s390_irq)) {
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
return -ENOMEM;
r = copy_irq_from_user(inti, attr->addr);
if (r) {
kfree(inti);
return r;
}
r = __inject_vm(dev->kvm, inti);
if (r) {
kfree(inti);
return r;
}
len -= sizeof(struct kvm_s390_irq);
attr->addr += sizeof(struct kvm_s390_irq);
}
return r;
}
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
unsigned int i;
struct kvm_vcpu *vcpu;
switch (attr->group) {
case KVM_DEV_FLIC_ENQUEUE:
r = enqueue_floating_irq(dev, attr);
break;
case KVM_DEV_FLIC_CLEAR_IRQS:
r = 0;
clear_floating_interrupts(dev->kvm);
break;
case KVM_DEV_FLIC_APF_ENABLE:
dev->kvm->arch.gmap->pfault_enabled = 1;
break;
case KVM_DEV_FLIC_APF_DISABLE_WAIT:
dev->kvm->arch.gmap->pfault_enabled = 0;
/*
* Make sure no async faults are in transition when
* clearing the queues. So we don't need to worry
* about late coming workers.
*/
synchronize_srcu(&dev->kvm->srcu);
kvm_for_each_vcpu(i, vcpu, dev->kvm)
kvm_clear_async_pf_completion_queue(vcpu);
break;
default:
r = -EINVAL;
}
return r;
}
static int flic_create(struct kvm_device *dev, u32 type)
{
if (!dev)
return -EINVAL;
if (dev->kvm->arch.flic)
return -EINVAL;
dev->kvm->arch.flic = dev;
return 0;
}
static void flic_destroy(struct kvm_device *dev)
{
dev->kvm->arch.flic = NULL;
kfree(dev);
}
/* s390 floating irq controller (flic) */
struct kvm_device_ops kvm_flic_ops = {
.name = "kvm-flic",
.get_attr = flic_get_attr,
.set_attr = flic_set_attr,
.create = flic_create,
.destroy = flic_destroy,
};
...@@ -152,11 +152,13 @@ int kvm_dev_ioctl_check_extension(long ext) ...@@ -152,11 +152,13 @@ int kvm_dev_ioctl_check_extension(long ext)
#ifdef CONFIG_KVM_S390_UCONTROL #ifdef CONFIG_KVM_S390_UCONTROL
case KVM_CAP_S390_UCONTROL: case KVM_CAP_S390_UCONTROL:
#endif #endif
case KVM_CAP_ASYNC_PF:
case KVM_CAP_SYNC_REGS: case KVM_CAP_SYNC_REGS:
case KVM_CAP_ONE_REG: case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP: case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD: case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
r = 1; r = 1;
break; break;
case KVM_CAP_NR_VCPUS: case KVM_CAP_NR_VCPUS:
...@@ -254,6 +256,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -254,6 +256,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.gmap) if (!kvm->arch.gmap)
goto out_nogmap; goto out_nogmap;
kvm->arch.gmap->private = kvm; kvm->arch.gmap->private = kvm;
kvm->arch.gmap->pfault_enabled = 0;
} }
kvm->arch.css_support = 0; kvm->arch.css_support = 0;
...@@ -271,6 +274,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) ...@@ -271,6 +274,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{ {
VCPU_EVENT(vcpu, 3, "%s", "free cpu"); VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm)) { if (!kvm_is_ucontrol(vcpu->kvm)) {
clear_bit(63 - vcpu->vcpu_id, clear_bit(63 - vcpu->vcpu_id,
(unsigned long *) &vcpu->kvm->arch.sca->mcn); (unsigned long *) &vcpu->kvm->arch.sca->mcn);
...@@ -320,6 +324,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) ...@@ -320,6 +324,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
/* Section: vcpu related */ /* Section: vcpu related */
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
if (kvm_is_ucontrol(vcpu->kvm)) { if (kvm_is_ucontrol(vcpu->kvm)) {
vcpu->arch.gmap = gmap_alloc(current->mm); vcpu->arch.gmap = gmap_alloc(current->mm);
if (!vcpu->arch.gmap) if (!vcpu->arch.gmap)
...@@ -380,6 +386,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) ...@@ -380,6 +386,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.guest_fpregs.fpc = 0; vcpu->arch.guest_fpregs.fpc = 0;
asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->gbea = 1;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
} }
...@@ -553,6 +561,18 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, ...@@ -553,6 +561,18 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
r = put_user(vcpu->arch.sie_block->ckc, r = put_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr); (u64 __user *)reg->addr);
break; break;
case KVM_REG_S390_PFTOKEN:
r = put_user(vcpu->arch.pfault_token,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFCOMPARE:
r = put_user(vcpu->arch.pfault_compare,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFSELECT:
r = put_user(vcpu->arch.pfault_select,
(u64 __user *)reg->addr);
break;
default: default:
break; break;
} }
...@@ -582,6 +602,18 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, ...@@ -582,6 +602,18 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
r = get_user(vcpu->arch.sie_block->ckc, r = get_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr); (u64 __user *)reg->addr);
break; break;
case KVM_REG_S390_PFTOKEN:
r = get_user(vcpu->arch.pfault_token,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFCOMPARE:
r = get_user(vcpu->arch.pfault_compare,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFSELECT:
r = get_user(vcpu->arch.pfault_select,
(u64 __user *)reg->addr);
break;
default: default:
break; break;
} }
...@@ -700,10 +732,100 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) ...@@ -700,10 +732,100 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
{
long rc;
hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
struct mm_struct *mm = current->mm;
down_read(&mm->mmap_sem);
rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
return rc;
}
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
unsigned long token)
{
struct kvm_s390_interrupt inti;
inti.parm64 = token;
if (start_token) {
inti.type = KVM_S390_INT_PFAULT_INIT;
WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
} else {
inti.type = KVM_S390_INT_PFAULT_DONE;
WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
}
}
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
}
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
}
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
/* s390 will always inject the page directly */
}
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
{
/*
* s390 will always inject the page directly,
* but we still want check_async_completion to cleanup
*/
return true;
}
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
{
hva_t hva;
struct kvm_arch_async_pf arch;
int rc;
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
return 0;
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
vcpu->arch.pfault_compare)
return 0;
if (psw_extint_disabled(vcpu))
return 0;
if (kvm_cpu_has_interrupt(vcpu))
return 0;
if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
return 0;
if (!vcpu->arch.gmap->pfault_enabled)
return 0;
hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
return 0;
rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
return rc;
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu) static int vcpu_pre_run(struct kvm_vcpu *vcpu)
{ {
int rc, cpuflags; int rc, cpuflags;
/*
* On s390 notifications for arriving pages will be delivered directly
* to the guest but the house keeping for completed pfaults is
* handled outside the worker.
*/
kvm_check_async_pf_completion(vcpu);
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
if (need_resched()) if (need_resched())
...@@ -729,7 +851,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) ...@@ -729,7 +851,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{ {
int rc; int rc = -1;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode); vcpu->arch.sie_block->icptcode);
...@@ -743,7 +865,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) ...@@ -743,7 +865,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
current->thread.gmap_addr; current->thread.gmap_addr;
vcpu->run->s390_ucontrol.pgm_code = 0x10; vcpu->run->s390_ucontrol.pgm_code = 0x10;
rc = -EREMOTE; rc = -EREMOTE;
} else {
} else if (current->thread.gmap_pfault) {
trace_kvm_s390_major_guest_pfault(vcpu);
current->thread.gmap_pfault = 0;
if (kvm_arch_setup_async_pf(vcpu) ||
(kvm_arch_fault_in_sync(vcpu) >= 0))
rc = 0;
}
if (rc == -1) {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu); trace_kvm_s390_sie_fault(vcpu);
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
......
...@@ -159,4 +159,8 @@ void exit_sie_sync(struct kvm_vcpu *vcpu); ...@@ -159,4 +159,8 @@ void exit_sie_sync(struct kvm_vcpu *vcpu);
/* implemented in diag.c */ /* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
/* implemented in interrupt.c */
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int psw_extint_disabled(struct kvm_vcpu *vcpu);
#endif #endif
...@@ -224,6 +224,8 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) ...@@ -224,6 +224,8 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
{ {
int rc; int rc;
unsigned int i;
struct kvm_vcpu *v;
switch (parameter & 0xff) { switch (parameter & 0xff) {
case 0: case 0:
...@@ -231,6 +233,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) ...@@ -231,6 +233,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
break; break;
case 1: case 1:
case 2: case 2:
kvm_for_each_vcpu(i, v, vcpu->kvm) {
v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(v);
}
rc = SIGP_CC_ORDER_CODE_ACCEPTED; rc = SIGP_CC_ORDER_CODE_ACCEPTED;
break; break;
default: default:
......
...@@ -30,6 +30,52 @@ ...@@ -30,6 +30,52 @@
TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \ TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
__entry->pswmask, __entry->pswaddr, p_args) __entry->pswmask, __entry->pswaddr, p_args)
TRACE_EVENT(kvm_s390_major_guest_pfault,
TP_PROTO(VCPU_PROTO_COMMON),
TP_ARGS(VCPU_ARGS_COMMON),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
),
VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
);
TRACE_EVENT(kvm_s390_pfault_init,
TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
__field(long, pfault_token)
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
__entry->pfault_token = pfault_token;
),
VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
);
TRACE_EVENT(kvm_s390_pfault_done,
TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
__field(long, pfault_token)
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
__entry->pfault_token = pfault_token;
),
VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
);
/* /*
* Tracepoints for SIE entry and exit. * Tracepoints for SIE entry and exit.
*/ */
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADMAP 0x020000
#define VM_FAULT_BADACCESS 0x040000 #define VM_FAULT_BADACCESS 0x040000
#define VM_FAULT_SIGNAL 0x080000 #define VM_FAULT_SIGNAL 0x080000
#define VM_FAULT_PFAULT 0x100000
static unsigned long store_indication __read_mostly; static unsigned long store_indication __read_mostly;
...@@ -227,6 +228,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) ...@@ -227,6 +228,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
return; return;
} }
case VM_FAULT_BADCONTEXT: case VM_FAULT_BADCONTEXT:
case VM_FAULT_PFAULT:
do_no_context(regs); do_no_context(regs);
break; break;
case VM_FAULT_SIGNAL: case VM_FAULT_SIGNAL:
...@@ -264,6 +266,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) ...@@ -264,6 +266,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
*/ */
static inline int do_exception(struct pt_regs *regs, int access) static inline int do_exception(struct pt_regs *regs, int access)
{ {
#ifdef CONFIG_PGSTE
struct gmap *gmap;
#endif
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -304,9 +309,10 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -304,9 +309,10 @@ static inline int do_exception(struct pt_regs *regs, int access)
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE #ifdef CONFIG_PGSTE
if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { gmap = (struct gmap *)
address = __gmap_fault(address, ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
(struct gmap *) S390_lowcore.gmap); if (gmap) {
address = __gmap_fault(address, gmap);
if (address == -EFAULT) { if (address == -EFAULT) {
fault = VM_FAULT_BADMAP; fault = VM_FAULT_BADMAP;
goto out_up; goto out_up;
...@@ -315,6 +321,8 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -315,6 +321,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
fault = VM_FAULT_OOM; fault = VM_FAULT_OOM;
goto out_up; goto out_up;
} }
if (gmap->pfault_enabled)
flags |= FAULT_FLAG_RETRY_NOWAIT;
} }
#endif #endif
...@@ -371,9 +379,19 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -371,9 +379,19 @@ static inline int do_exception(struct pt_regs *regs, int access)
regs, address); regs, address);
} }
if (fault & VM_FAULT_RETRY) { if (fault & VM_FAULT_RETRY) {
#ifdef CONFIG_PGSTE
if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* FAULT_FLAG_RETRY_NOWAIT has been set,
* mmap_sem has not been released */
current->thread.gmap_pfault = 1;
fault = VM_FAULT_PFAULT;
goto out_up;
}
#endif
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */ * of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY; flags &= ~(FAULT_FLAG_ALLOW_RETRY |
FAULT_FLAG_RETRY_NOWAIT);
flags |= FAULT_FLAG_TRIED; flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
goto retry; goto retry;
......
...@@ -3328,7 +3328,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) ...@@ -3328,7 +3328,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu.direct_map; arch.direct_map = vcpu->arch.mmu.direct_map;
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
return kvm_setup_async_pf(vcpu, gva, gfn, &arch); return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
} }
static bool can_do_async_pf(struct kvm_vcpu *vcpu) static bool can_do_async_pf(struct kvm_vcpu *vcpu)
......
...@@ -192,7 +192,7 @@ struct kvm_async_pf { ...@@ -192,7 +192,7 @@ struct kvm_async_pf {
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch); struct kvm_arch_async_pf *arch);
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif #endif
...@@ -1064,6 +1064,7 @@ extern struct kvm_device_ops kvm_mpic_ops; ...@@ -1064,6 +1064,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_xics_ops;
extern struct kvm_device_ops kvm_vfio_ops; extern struct kvm_device_ops kvm_vfio_ops;
extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
extern struct kvm_device_ops kvm_flic_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
......
...@@ -413,6 +413,8 @@ struct kvm_s390_psw { ...@@ -413,6 +413,8 @@ struct kvm_s390_psw {
#define KVM_S390_PROGRAM_INT 0xfffe0001u #define KVM_S390_PROGRAM_INT 0xfffe0001u
#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u #define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
#define KVM_S390_RESTART 0xfffe0003u #define KVM_S390_RESTART 0xfffe0003u
#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
#define KVM_S390_MCHK 0xfffe1000u #define KVM_S390_MCHK 0xfffe1000u
#define KVM_S390_INT_VIRTIO 0xffff2603u #define KVM_S390_INT_VIRTIO 0xffff2603u
#define KVM_S390_INT_SERVICE 0xffff2401u #define KVM_S390_INT_SERVICE 0xffff2401u
...@@ -434,6 +436,69 @@ struct kvm_s390_interrupt { ...@@ -434,6 +436,69 @@ struct kvm_s390_interrupt {
__u64 parm64; __u64 parm64;
}; };
struct kvm_s390_io_info {
__u16 subchannel_id;
__u16 subchannel_nr;
__u32 io_int_parm;
__u32 io_int_word;
};
struct kvm_s390_ext_info {
__u32 ext_params;
__u32 pad;
__u64 ext_params2;
};
struct kvm_s390_pgm_info {
__u64 trans_exc_code;
__u64 mon_code;
__u64 per_address;
__u32 data_exc_code;
__u16 code;
__u16 mon_class_nr;
__u8 per_code;
__u8 per_atmid;
__u8 exc_access_id;
__u8 per_access_id;
__u8 op_access_id;
__u8 pad[3];
};
struct kvm_s390_prefix_info {
__u32 address;
};
struct kvm_s390_extcall_info {
__u16 code;
};
struct kvm_s390_emerg_info {
__u16 code;
};
struct kvm_s390_mchk_info {
__u64 cr14;
__u64 mcic;
__u64 failing_storage_address;
__u32 ext_damage_code;
__u32 pad;
__u8 fixed_logout[16];
};
struct kvm_s390_irq {
__u64 type;
union {
struct kvm_s390_io_info io;
struct kvm_s390_ext_info ext;
struct kvm_s390_pgm_info pgm;
struct kvm_s390_emerg_info emerg;
struct kvm_s390_extcall_info extcall;
struct kvm_s390_prefix_info prefix;
struct kvm_s390_mchk_info mchk;
char reserved[64];
} u;
};
/* for KVM_SET_GUEST_DEBUG */ /* for KVM_SET_GUEST_DEBUG */
#define KVM_GUESTDBG_ENABLE 0x00000001 #define KVM_GUESTDBG_ENABLE 0x00000001
...@@ -855,6 +920,7 @@ struct kvm_device_attr { ...@@ -855,6 +920,7 @@ struct kvm_device_attr {
#define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_ADD 1
#define KVM_DEV_VFIO_GROUP_DEL 2 #define KVM_DEV_VFIO_GROUP_DEL 2
#define KVM_DEV_TYPE_ARM_VGIC_V2 5 #define KVM_DEV_TYPE_ARM_VGIC_V2 5
#define KVM_DEV_TYPE_FLIC 6
/* /*
* ioctls for VM fds * ioctls for VM fds
......
...@@ -22,6 +22,10 @@ config KVM_MMIO ...@@ -22,6 +22,10 @@ config KVM_MMIO
config KVM_ASYNC_PF config KVM_ASYNC_PF
bool bool
# Toggle to switch between direct notification and batch job
config KVM_ASYNC_PF_SYNC
bool
config HAVE_KVM_MSI config HAVE_KVM_MSI
bool bool
......
...@@ -28,6 +28,21 @@ ...@@ -28,6 +28,21 @@
#include "async_pf.h" #include "async_pf.h"
#include <trace/events/kvm.h> #include <trace/events/kvm.h>
static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
kvm_arch_async_page_present(vcpu, work);
#endif
}
static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
#ifndef CONFIG_KVM_ASYNC_PF_SYNC
kvm_arch_async_page_present(vcpu, work);
#endif
}
static struct kmem_cache *async_pf_cache; static struct kmem_cache *async_pf_cache;
int kvm_async_pf_init(void) int kvm_async_pf_init(void)
...@@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work) ...@@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work)
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
kvm_async_page_present_sync(vcpu, apf);
unuse_mm(mm); unuse_mm(mm);
spin_lock(&vcpu->async_pf.lock); spin_lock(&vcpu->async_pf.lock);
...@@ -97,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) ...@@ -97,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
list_entry(vcpu->async_pf.queue.next, list_entry(vcpu->async_pf.queue.next,
typeof(*work), queue); typeof(*work), queue);
list_del(&work->queue); list_del(&work->queue);
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
flush_work(&work->work);
#else
if (cancel_work_sync(&work->work)) { if (cancel_work_sync(&work->work)) {
mmdrop(work->mm); mmdrop(work->mm);
kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
kmem_cache_free(async_pf_cache, work); kmem_cache_free(async_pf_cache, work);
} }
#endif
} }
spin_lock(&vcpu->async_pf.lock); spin_lock(&vcpu->async_pf.lock);
...@@ -138,7 +159,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) ...@@ -138,7 +159,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
} }
} }
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch) struct kvm_arch_async_pf *arch)
{ {
struct kvm_async_pf *work; struct kvm_async_pf *work;
...@@ -159,7 +180,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, ...@@ -159,7 +180,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
work->wakeup_all = false; work->wakeup_all = false;
work->vcpu = vcpu; work->vcpu = vcpu;
work->gva = gva; work->gva = gva;
work->addr = gfn_to_hva(vcpu->kvm, gfn); work->addr = hva;
work->arch = *arch; work->arch = *arch;
work->mm = current->mm; work->mm = current->mm;
atomic_inc(&work->mm->mm_count); atomic_inc(&work->mm->mm_count);
......
...@@ -2283,6 +2283,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, ...@@ -2283,6 +2283,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
case KVM_DEV_TYPE_ARM_VGIC_V2: case KVM_DEV_TYPE_ARM_VGIC_V2:
ops = &kvm_arm_vgic_v2_ops; ops = &kvm_arm_vgic_v2_ops;
break; break;
#endif
#ifdef CONFIG_S390
case KVM_DEV_TYPE_FLIC:
ops = &kvm_flic_ops;
break;
#endif #endif
default: default:
return -ENODEV; return -ENODEV;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment