Commit 63b5cf04 authored by Marcelo Tosatti's avatar Marcelo Tosatti

Merge tag 'kvm-s390-20140422' of...

Merge tag 'kvm-s390-20140422' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into queue

Lazy storage key handling
-------------------------
Linux does not use the ACC and F bits of the storage key. Newer Linux
versions also do not use the storage keys for dirty and reference
tracking. We can optimize the guest handling for those guests for faults
as well as page-in and page-out by simply not caring about the guest
visible storage key. We trap guest storage key instruction to enable
those keys only on demand.

Migration bitmap

Until now s390 never provided a proper dirty bitmap.  Let's provide a
proper migration bitmap for s390. We also change the user dirty tracking
to a fault based mechanism. This makes the host completely independent
from the storage keys. Long term this will allow us to back guest memory
with large pages.

per-VM device attributes
------------------------
To avoid the introduction of new ioctls, let's provide the
attribute semanantic also on the VM-"device".

Userspace controlled CMMA
-------------------------
The CMMA assist is changed from "always on" to "on if requested" via
per-VM device attributes. In addition a callback to reset all usage
states is provided.

Proper guest DAT handling for intercepts
----------------------------------------
While instructions handled by SIE take care of all addressing aspects,
KVM/s390 currently does not care about guest address translation of
intercepts. This worked out fine, because
- the s390 Linux kernel has a 1:1 mapping between kernel virtual<->real
 for all pages up to memory size
- intercepts happen only for a small amount of cases
- all of these intercepts happen to be in the kernel text for current
  distros

Of course we need to be better for other intercepts, kernel modules etc.
We provide the infrastructure and rework all in-kernel intercepts to work
on logical addresses (paging etc) instead of real ones. The code has
been running internally for several months now, so it is time for going
public.

GDB support
-----------
We provide breakpoints, single stepping and watchpoints.

Fixes/Cleanups
--------------
- Improve program check delivery
- Factor out the handling of transactional memory  on program checks
- Use the existing define __LC_PGM_TDB
- Several cleanups in the lowcore structure
- Documentation

NOTES
-----
- All patches touching base s390 are either ACKed or written by the s390
  maintainers
- One base KVM patch "KVM: add kvm_is_error_gpa() helper"
- One patch introduces the notion of VM device attributes
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>

Conflicts:
	include/uapi/linux/kvm.h
parents 5c7411e2 e325fe69
......@@ -2314,8 +2314,8 @@ struct kvm_create_device {
4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL
Type: device ioctl
Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
Type: device ioctl, vm ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
......@@ -2340,8 +2340,8 @@ struct kvm_device_attr {
4.81 KVM_HAS_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL
Type: device ioctl
Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
Type: device ioctl, vm ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
......
Generic vm interface
====================================
The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
struct kvm_device_attr as other devices, but targets VM-wide settings
and controls.
The groups and attributes per virtual machine, if any, are architecture
specific.
1. GROUP: KVM_S390_VM_MEM_CTRL
Architectures: s390
1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
Parameters: none
Returns: -EBUSY if already a vcpus is defined, otherwise 0
Enables CMMA for the virtual machine
1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
Parameteres: none
Returns: 0
Clear the CMMA status for all guest pages, so any pages the guest marked
as unused are again used any may not be reclaimed by the host.
......@@ -78,3 +78,5 @@ DIAGNOSE function code 'X'501 - KVM breakpoint
If the function code specifies 0x501, breakpoint functions may be performed.
This function code is handled by userspace.
This diagnose function code has no subfunctions and uses no parameters.
......@@ -57,6 +57,20 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
void smp_ctl_set_bit(int cr, int bit);
void smp_ctl_clear_bit(int cr, int bit);
union ctlreg0 {
unsigned long val;
struct {
#ifdef CONFIG_64BIT
unsigned long : 32;
#endif
unsigned long : 3;
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
unsigned long edat : 1; /* Enhanced-DAT-enablement control */
unsigned long : 23;
};
};
#ifdef CONFIG_SMP
# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
......
......@@ -39,9 +39,17 @@ struct sca_entry {
__u64 reserved2[2];
} __attribute__((packed));
union ipte_control {
unsigned long val;
struct {
unsigned long k : 1;
unsigned long kh : 31;
unsigned long kg : 32;
};
};
struct sca_block {
__u64 ipte_control;
union ipte_control ipte_control;
__u64 reserved[5];
__u64 mcn;
__u64 reserved2;
......@@ -85,12 +93,26 @@ struct kvm_s390_sie_block {
__u8 reserved40[4]; /* 0x0040 */
#define LCTL_CR0 0x8000
#define LCTL_CR6 0x0200
#define LCTL_CR9 0x0040
#define LCTL_CR10 0x0020
#define LCTL_CR11 0x0010
#define LCTL_CR14 0x0002
__u16 lctl; /* 0x0044 */
__s16 icpua; /* 0x0046 */
#define ICTL_PINT 0x20000000
#define ICTL_LPSW 0x00400000
#define ICTL_STCTL 0x00040000
#define ICTL_ISKE 0x00004000
#define ICTL_SSKE 0x00002000
#define ICTL_RRBE 0x00001000
__u32 ictl; /* 0x0048 */
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
#define ICPT_PROGI 0x08
#define ICPT_INSTPROGI 0x0C
#define ICPT_OPEREXC 0x2C
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
__u8 icptcode; /* 0x0050 */
__u8 reserved51; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
......@@ -109,9 +131,21 @@ struct kvm_s390_sie_block {
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
__u8 reservedb0[30]; /* 0x00b0 */
__u8 reservedb0[28]; /* 0x00b0 */
__u16 pgmilc; /* 0x00cc */
__u16 iprcc; /* 0x00ce */
__u8 reservedd0[48]; /* 0x00d0 */
__u32 dxc; /* 0x00d0 */
__u16 mcn; /* 0x00d4 */
__u8 perc; /* 0x00d6 */
__u8 peratmid; /* 0x00d7 */
__u64 peraddr; /* 0x00d8 */
__u8 eai; /* 0x00e0 */
__u8 peraid; /* 0x00e1 */
__u8 oai; /* 0x00e2 */
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
__u64 tecmc; /* 0x00e8 */
__u8 reservedf0[16]; /* 0x00f0 */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
__u8 reserved188[24]; /* 0x0188 */
......@@ -146,6 +180,8 @@ struct kvm_vcpu_stat {
u32 exit_instruction;
u32 instruction_lctl;
u32 instruction_lctlg;
u32 instruction_stctl;
u32 instruction_stctg;
u32 exit_program_interruption;
u32 exit_instr_and_program;
u32 deliver_external_call;
......@@ -164,6 +200,7 @@ struct kvm_vcpu_stat {
u32 instruction_stpx;
u32 instruction_stap;
u32 instruction_storage_key;
u32 instruction_ipte_interlock;
u32 instruction_stsch;
u32 instruction_chsc;
u32 instruction_stsi;
......@@ -190,6 +227,51 @@ struct kvm_vcpu_stat {
#define PGM_ADDRESSING 0x05
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07
#define PGM_FIXED_POINT_OVERFLOW 0x08
#define PGM_FIXED_POINT_DIVIDE 0x09
#define PGM_DECIMAL_OVERFLOW 0x0a
#define PGM_DECIMAL_DIVIDE 0x0b
#define PGM_HFP_EXPONENT_OVERFLOW 0x0c
#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d
#define PGM_HFP_SIGNIFICANCE 0x0e
#define PGM_HFP_DIVIDE 0x0f
#define PGM_SEGMENT_TRANSLATION 0x10
#define PGM_PAGE_TRANSLATION 0x11
#define PGM_TRANSLATION_SPEC 0x12
#define PGM_SPECIAL_OPERATION 0x13
#define PGM_OPERAND 0x15
#define PGM_TRACE_TABEL 0x16
#define PGM_SPACE_SWITCH 0x1c
#define PGM_HFP_SQUARE_ROOT 0x1d
#define PGM_PC_TRANSLATION_SPEC 0x1f
#define PGM_AFX_TRANSLATION 0x20
#define PGM_ASX_TRANSLATION 0x21
#define PGM_LX_TRANSLATION 0x22
#define PGM_EX_TRANSLATION 0x23
#define PGM_PRIMARY_AUTHORITY 0x24
#define PGM_SECONDARY_AUTHORITY 0x25
#define PGM_LFX_TRANSLATION 0x26
#define PGM_LSX_TRANSLATION 0x27
#define PGM_ALET_SPECIFICATION 0x28
#define PGM_ALEN_TRANSLATION 0x29
#define PGM_ALE_SEQUENCE 0x2a
#define PGM_ASTE_VALIDITY 0x2b
#define PGM_ASTE_SEQUENCE 0x2c
#define PGM_EXTENDED_AUTHORITY 0x2d
#define PGM_LSTE_SEQUENCE 0x2e
#define PGM_ASTE_INSTANCE 0x2f
#define PGM_STACK_FULL 0x30
#define PGM_STACK_EMPTY 0x31
#define PGM_STACK_SPECIFICATION 0x32
#define PGM_STACK_TYPE 0x33
#define PGM_STACK_OPERATION 0x34
#define PGM_ASCE_TYPE 0x38
#define PGM_REGION_FIRST_TRANS 0x39
#define PGM_REGION_SECOND_TRANS 0x3a
#define PGM_REGION_THIRD_TRANS 0x3b
#define PGM_MONITOR 0x40
#define PGM_PER 0x80
#define PGM_CRYPTO_OPERATION 0x119
struct kvm_s390_interrupt_info {
struct list_head list;
......@@ -229,6 +311,45 @@ struct kvm_s390_float_interrupt {
unsigned int irq_count;
};
struct kvm_hw_wp_info_arch {
unsigned long addr;
unsigned long phys_addr;
int len;
char *old_data;
};
struct kvm_hw_bp_info_arch {
unsigned long addr;
int len;
};
/*
* Only the upper 16 bits of kvm_guest_debug->control are arch specific.
* Further KVM_GUESTDBG flags which an be used from userspace can be found in
* arch/s390/include/uapi/asm/kvm.h
*/
#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
#define guestdbg_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
#define guestdbg_sstep_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
#define guestdbg_hw_bp_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
(vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
struct kvm_guestdbg_info_arch {
unsigned long cr0;
unsigned long cr9;
unsigned long cr10;
unsigned long cr11;
struct kvm_hw_bp_info_arch *hw_bp_info;
struct kvm_hw_wp_info_arch *hw_wp_info;
int nr_hw_bp;
int nr_hw_wp;
unsigned long last_bp;
};
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
......@@ -238,11 +359,13 @@ struct kvm_vcpu_arch {
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct tasklet_struct tasklet;
struct kvm_s390_pgm_info pgm;
union {
struct cpuid cpu_id;
u64 stidp_data;
};
struct gmap *gmap;
struct kvm_guestdbg_info_arch guestdbg;
#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
unsigned long pfault_token;
unsigned long pfault_select;
......@@ -285,7 +408,9 @@ struct kvm_arch{
struct gmap *gmap;
int css_support;
int use_irqchip;
int use_cmma;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
};
#define KVM_HVA_ERR_BAD (-1UL)
......
......@@ -56,13 +56,14 @@ struct _lowcore {
__u16 pgm_code; /* 0x008e */
__u32 trans_exc_code; /* 0x0090 */
__u16 mon_class_num; /* 0x0094 */
__u16 per_perc_atmid; /* 0x0096 */
__u8 per_code; /* 0x0096 */
__u8 per_atmid; /* 0x0097 */
__u32 per_address; /* 0x0098 */
__u32 monitor_code; /* 0x009c */
__u8 exc_access_id; /* 0x00a0 */
__u8 per_access_id; /* 0x00a1 */
__u8 op_access_id; /* 0x00a2 */
__u8 ar_access_id; /* 0x00a3 */
__u8 ar_mode_id; /* 0x00a3 */
__u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */
__u16 subchannel_id; /* 0x00b8 */
__u16 subchannel_nr; /* 0x00ba */
......@@ -196,12 +197,13 @@ struct _lowcore {
__u16 pgm_code; /* 0x008e */
__u32 data_exc_code; /* 0x0090 */
__u16 mon_class_num; /* 0x0094 */
__u16 per_perc_atmid; /* 0x0096 */
__u8 per_code; /* 0x0096 */
__u8 per_atmid; /* 0x0097 */
__u64 per_address; /* 0x0098 */
__u8 exc_access_id; /* 0x00a0 */
__u8 per_access_id; /* 0x00a1 */
__u8 op_access_id; /* 0x00a2 */
__u8 ar_access_id; /* 0x00a3 */
__u8 ar_mode_id; /* 0x00a3 */
__u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */
__u64 trans_exc_code; /* 0x00a8 */
__u64 monitor_code; /* 0x00b0 */
......
......@@ -16,6 +16,8 @@ typedef struct {
unsigned long vdso_base;
/* The mmu context has extended page tables. */
unsigned int has_pgste:1;
/* The mmu context uses storage keys. */
unsigned int use_skey:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
......
......@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
......
......@@ -22,7 +22,8 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *);
void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long);
void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
bool init_skey);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq);
......
......@@ -309,7 +309,8 @@ extern unsigned long MODULES_END;
#define PGSTE_HC_BIT 0x00200000UL
#define PGSTE_GR_BIT 0x00040000UL
#define PGSTE_GC_BIT 0x00020000UL
#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */
#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */
#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */
#else /* CONFIG_64BIT */
......@@ -391,7 +392,8 @@ extern unsigned long MODULES_END;
#define PGSTE_HC_BIT 0x0020000000000000UL
#define PGSTE_GR_BIT 0x0004000000000000UL
#define PGSTE_GC_BIT 0x0002000000000000UL
#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
#endif /* CONFIG_64BIT */
......@@ -466,6 +468,16 @@ static inline int mm_has_pgste(struct mm_struct *mm)
#endif
return 0;
}
static inline int mm_use_skey(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
if (mm->context.use_skey)
return 1;
#endif
return 0;
}
/*
* pgd/pmd/pte query functions
*/
......@@ -699,26 +711,17 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
#endif
}
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
unsigned long address, bits, skey;
if (pte_val(*ptep) & _PAGE_INVALID)
if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
return pgste;
address = pte_val(*ptep) & PAGE_MASK;
skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
/* Transfer dirty + referenced bit to host bits in pgste */
pgste_val(pgste) |= bits << 52;
page_set_storage_key(address, skey ^ bits, 0);
} else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
(bits & _PAGE_REFERENCED)) {
/* Transfer referenced bit to host bit in pgste */
pgste_val(pgste) |= PGSTE_HR_BIT;
page_reset_referenced(address);
}
/* Transfer page changed & referenced bit to guest bits in pgste */
pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
/* Copy page access key and fetch protection bit to pgste */
......@@ -729,25 +732,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
}
static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
if (pte_val(*ptep) & _PAGE_INVALID)
return pgste;
/* Get referenced bit from storage key */
if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
#endif
return pgste;
}
static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
unsigned long address;
unsigned long nkey;
if (pte_val(entry) & _PAGE_INVALID)
if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
return;
VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
address = pte_val(entry) & PAGE_MASK;
......@@ -757,15 +749,17 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
* key C/R to 0.
*/
nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
page_set_storage_key(address, nkey, 0);
#endif
}
static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
{
if (!MACHINE_HAS_ESOP &&
(pte_val(entry) & _PAGE_PRESENT) &&
(pte_val(entry) & _PAGE_WRITE)) {
if ((pte_val(entry) & _PAGE_PRESENT) &&
(pte_val(entry) & _PAGE_WRITE) &&
!(pte_val(entry) & _PAGE_INVALID)) {
if (!MACHINE_HAS_ESOP) {
/*
* Without enhanced suppression-on-protection force
* the dirty bit on for all writable ptes.
......@@ -773,7 +767,12 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
pte_val(entry) |= _PAGE_DIRTY;
pte_val(entry) &= ~_PAGE_PROTECT;
}
if (!(pte_val(entry) & _PAGE_PROTECT))
/* This pte allows write access, set user-dirty */
pgste_val(pgste) |= PGSTE_UC_BIT;
}
*ptep = entry;
return pgste;
}
/**
......@@ -839,6 +838,8 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *);
unsigned long gmap_fault(unsigned long address, struct gmap *);
void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
void __gmap_zap(unsigned long address, struct gmap *);
bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
void gmap_register_ipte_notifier(struct gmap_notifier *);
void gmap_unregister_ipte_notifier(struct gmap_notifier *);
......@@ -870,8 +871,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
pgste_set_key(ptep, pgste, entry);
pgste_set_pte(ptep, entry);
pgste_set_key(ptep, pgste, entry, mm);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
} else {
if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
......@@ -1017,45 +1018,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
}
#endif
/*
* Get (and clear) the user dirty bit for a pte.
*/
static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
pte_t *ptep)
{
pgste_t pgste;
int dirty = 0;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_all(ptep, pgste);
dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
pgste_val(pgste) &= ~PGSTE_HC_BIT;
pgste_set_unlock(ptep, pgste);
return dirty;
}
return dirty;
}
/*
* Get (and clear) the user referenced bit for a pte.
*/
static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
pte_t *ptep)
{
pgste_t pgste;
int young = 0;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_young(ptep, pgste);
young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
pgste_val(pgste) &= ~PGSTE_HR_BIT;
pgste_set_unlock(ptep, pgste);
}
return young;
}
static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
{
unsigned long pto = (unsigned long) ptep;
......@@ -1118,6 +1080,36 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
atomic_sub(0x10000, &mm->context.attach_count);
}
/*
* Get (and clear) the user dirty bit for a pte.
*/
static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep)
{
pgste_t pgste;
pte_t pte;
int dirty;
if (!mm_has_pgste(mm))
return 0;
pgste = pgste_get_lock(ptep);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
pgste_val(pgste) &= ~PGSTE_UC_BIT;
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_ipte_notify(mm, ptep, pgste);
__ptep_ipte(addr, ptep);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT;
else
pte_val(pte) |= _PAGE_INVALID;
*ptep = pte;
}
pgste_set_unlock(ptep, pgste);
return dirty;
}
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
......@@ -1137,7 +1129,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
pte = pte_mkold(pte);
if (mm_has_pgste(vma->vm_mm)) {
pgste_set_pte(ptep, pte);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else
*ptep = pte;
......@@ -1182,7 +1174,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
pgste = pgste_update_all(&pte, pgste, mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
......@@ -1205,7 +1197,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
ptep_flush_lazy(mm, address, ptep);
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
pgste = pgste_update_all(&pte, pgste, mm);
pgste_set(ptep, pgste);
}
return pte;
......@@ -1219,8 +1211,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get(ptep);
pgste_set_key(ptep, pgste, pte);
pgste_set_pte(ptep, pte);
pgste_set_key(ptep, pgste, pte, mm);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else
*ptep = pte;
......@@ -1246,7 +1238,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
_PGSTE_GPS_USAGE_UNUSED)
pte_val(pte) |= _PAGE_UNUSED;
pgste = pgste_update_all(&pte, pgste);
pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
......@@ -1278,7 +1270,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
pte_val(*ptep) = _PAGE_INVALID;
if (!full && mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
pgste = pgste_update_all(&pte, pgste, mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
......@@ -1301,7 +1293,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
pte = pte_wrprotect(pte);
if (mm_has_pgste(mm)) {
pgste_set_pte(ptep, pte);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else
*ptep = pte;
......@@ -1326,7 +1318,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
ptep_flush_direct(vma->vm_mm, address, ptep);
if (mm_has_pgste(vma->vm_mm)) {
pgste_set_pte(ptep, entry);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
} else
*ptep = entry;
......@@ -1734,6 +1726,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern int vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
extern void s390_enable_skey(void);
/*
* No page table caches to initialise
......
......@@ -16,6 +16,50 @@
PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
struct psw_bits {
unsigned long long : 1;
unsigned long long r : 1; /* PER-Mask */
unsigned long long : 3;
unsigned long long t : 1; /* DAT Mode */
unsigned long long i : 1; /* Input/Output Mask */
unsigned long long e : 1; /* External Mask */
unsigned long long key : 4; /* PSW Key */
unsigned long long : 1;
unsigned long long m : 1; /* Machine-Check Mask */
unsigned long long w : 1; /* Wait State */
unsigned long long p : 1; /* Problem State */
unsigned long long as : 2; /* Address Space Control */
unsigned long long cc : 2; /* Condition Code */
unsigned long long pm : 4; /* Program Mask */
unsigned long long ri : 1; /* Runtime Instrumentation */
unsigned long long : 6;
unsigned long long eaba : 2; /* Addressing Mode */
#ifdef CONFIG_64BIT
unsigned long long : 31;
unsigned long long ia : 64;/* Instruction Address */
#else
unsigned long long ia : 31;/* Instruction Address */
#endif
};
enum {
PSW_AMODE_24BIT = 0,
PSW_AMODE_31BIT = 1,
PSW_AMODE_64BIT = 3
};
enum {
PSW_AS_PRIMARY = 0,
PSW_AS_ACCREG = 1,
PSW_AS_SECONDARY = 2,
PSW_AS_HOME = 3
};
#define psw_bits(__psw) (*({ \
typecheck(psw_t, __psw); \
&(*(struct psw_bits *)(&(__psw))); \
}))
/*
* The pt_regs struct defines the way the registers are stored on
* the stack during a system call.
......
......@@ -28,7 +28,11 @@ struct sclp_ipl_info {
struct sclp_cpu_entry {
u8 address;
u8 reserved0[13];
u8 reserved0[2];
u8 : 3;
u8 siif : 1;
u8 : 4;
u8 reserved2[10];
u8 type;
u8 reserved1;
} __attribute__((packed));
......@@ -61,5 +65,6 @@ int sclp_pci_deconfigure(u32 fid);
int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
unsigned long sclp_get_hsa_size(void);
void sclp_early_detect(void);
int sclp_has_siif(void);
#endif /* _ASM_S390_SCLP_H */
......@@ -15,6 +15,7 @@
#include <linux/types.h>
#define __KVM_S390
#define __KVM_HAVE_GUEST_DEBUG
/* Device control API: s390-specific devices */
#define KVM_DEV_FLIC_GET_ALL_IRQS 1
......@@ -54,6 +55,13 @@ struct kvm_s390_io_adapter_req {
__u64 addr;
};
/* kvm attr_group on vm fd */
#define KVM_S390_VM_MEM_CTRL 0
/* kvm attributes for mem_ctrl */
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
#define KVM_S390_VM_MEM_CLR_CMMA 1
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
......@@ -72,11 +80,31 @@ struct kvm_fpu {
__u64 fprs[16];
};
#define KVM_GUESTDBG_USE_HW_BP 0x00010000
#define KVM_HW_BP 1
#define KVM_HW_WP_WRITE 2
#define KVM_SINGLESTEP 4
struct kvm_debug_exit_arch {
__u64 addr;
__u8 type;
__u8 pad[7]; /* Should be set to 0 */
};
struct kvm_hw_breakpoint {
__u64 addr;
__u64 phys_addr;
__u64 len;
__u8 type;
__u8 pad[7]; /* Should be set to 0 */
};
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
__u32 nr_hw_bp;
__u32 pad; /* Should be set to 0 */
struct kvm_hw_breakpoint __user *hw_bp;
};
#define KVM_SYNC_PREFIX (1UL << 0)
......
......@@ -89,16 +89,22 @@ int main(void)
DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id));
DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
......@@ -156,6 +162,8 @@ int main(void)
#ifdef CONFIG_32BIT
DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
#else /* CONFIG_32BIT */
DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
......
......@@ -391,8 +391,8 @@ ENTRY(pgm_check_handler)
jz pgm_kprobe
oi __TI_flags+3(%r12),_TIF_PER_TRAP
mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE
mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID
mvc __THREAD_per_cause(2,%r1),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
0: REENABLE_IRQS
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
l %r1,BASED(.Ljump_table)
......
......@@ -423,8 +423,8 @@ ENTRY(pgm_check_handler)
jz pgm_kprobe
oi __TI_flags+7(%r12),_TIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE
mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
0: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
......
......@@ -11,5 +11,7 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
kvm-objs += diag.o gaccess.o guestdbg.o
obj-$(CONFIG_KVM) += kvm.o
......@@ -64,12 +64,12 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
int rc;
u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
unsigned long hva_token = KVM_HVA_ERR_BAD;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
......@@ -89,8 +89,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
if (kvm_is_error_hva(hva_token))
if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
vcpu->arch.pfault_token = parm.token_addr;
......@@ -167,17 +166,11 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
switch (subcode) {
case 0:
case 1:
page_table_reset_pgste(current->mm, 0, TASK_SIZE);
return -EOPNOTSUPP;
case 3:
vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
page_table_reset_pgste(current->mm, 0, TASK_SIZE);
break;
case 4:
vcpu->run->s390_reset_flags = 0;
page_table_reset_pgste(current->mm, 0, TASK_SIZE);
break;
default:
return -EOPNOTSUPP;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -16,6 +16,7 @@
#include <linux/pagemap.h>
#include <asm/kvm_host.h>
#include <asm/asm-offsets.h>
#include "kvm-s390.h"
#include "gaccess.h"
......@@ -29,6 +30,7 @@ static const intercept_handler_t instruction_handlers[256] = {
[0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_b2,
[0xb6] = kvm_s390_handle_stctl,
[0xb7] = kvm_s390_handle_lctl,
[0xb9] = kvm_s390_handle_b9,
[0xe5] = kvm_s390_handle_e5,
......@@ -109,22 +111,112 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
static void __extract_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info)
{
memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
pgm_info->code = vcpu->arch.sie_block->iprcc;
switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
case PGM_ASX_TRANSLATION:
case PGM_EX_TRANSLATION:
case PGM_LFX_TRANSLATION:
case PGM_LSTE_SEQUENCE:
case PGM_LSX_TRANSLATION:
case PGM_LX_TRANSLATION:
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
case PGM_SPACE_SWITCH:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
case PGM_ASTE_INSTANCE:
case PGM_ASTE_SEQUENCE:
case PGM_ASTE_VALIDITY:
case PGM_EXTENDED_AUTHORITY:
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
break;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
case PGM_REGION_FIRST_TRANS:
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
case PGM_SEGMENT_TRANSLATION:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
pgm_info->op_access_id = vcpu->arch.sie_block->oai;
break;
case PGM_MONITOR:
pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_DATA:
pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
break;
case PGM_PROTECTION:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
break;
default:
break;
}
if (vcpu->arch.sie_block->iprcc & PGM_PER) {
pgm_info->per_code = vcpu->arch.sie_block->perc;
pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
pgm_info->per_address = vcpu->arch.sie_block->peraddr;
pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
}
}
/*
* restore ITDB to program-interruption TDB in guest lowcore
* and set TX abort indication if required
*/
static int handle_itdb(struct kvm_vcpu *vcpu)
{
struct kvm_s390_itdb *itdb;
int rc;
if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
return 0;
if (current->thread.per_flags & PER_FLAG_NO_TE)
return 0;
itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
if (rc)
return rc;
memset(itdb, 0, sizeof(*itdb));
return 0;
}
#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
static int handle_prog(struct kvm_vcpu *vcpu)
{
struct kvm_s390_pgm_info pgm_info;
int rc;
vcpu->stat.exit_program_interruption++;
/* Restore ITDB to Program-Interruption TDB in guest memory */
if (IS_TE_ENABLED(vcpu) &&
!(current->thread.per_flags & PER_FLAG_NO_TE) &&
IS_ITDB_VALID(vcpu)) {
copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba,
sizeof(struct kvm_s390_itdb));
memset((void *) vcpu->arch.sie_block->itdba, 0,
sizeof(struct kvm_s390_itdb));
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
kvm_s390_handle_per_event(vcpu);
/* the interrupt might have been filtered out completely */
if (vcpu->arch.sie_block->iprcc == 0)
return 0;
}
trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
rc = handle_itdb(vcpu);
if (rc)
return rc;
__extract_prog_irq(vcpu, &pgm_info);
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
......
This diff is collapsed.
This diff is collapsed.
......@@ -28,7 +28,6 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
/* Transactional Memory Execution related macros */
#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
#define TDB_ADDR 0x1800UL
#define TDB_FORMAT1 1
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
......@@ -130,6 +129,7 @@ void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
void kvm_s390_clear_float_irqs(struct kvm *kvm);
int __must_check kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int);
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
......@@ -137,6 +137,8 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid);
void kvm_s390_reinject_io_int(struct kvm *kvm,
struct kvm_s390_interrupt_info *inti);
int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in priv.c */
......@@ -145,6 +147,7 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
......@@ -158,14 +161,64 @@ void s390_vcpu_block(struct kvm_vcpu *vcpu);
void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
void exit_sie_sync(struct kvm_vcpu *vcpu);
/* are we going to support cmma? */
bool kvm_enabled_cmma(void);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
/* is cmma enabled */
bool kvm_s390_cmma_enabled(struct kvm *kvm);
int test_vfacility(unsigned long nr);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
/* implemented in interrupt.c */
int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info);
/**
* kvm_s390_inject_prog_cond - conditionally inject a program check
* @vcpu: virtual cpu
* @rc: original return/error code
*
* This function is supposed to be used after regular guest access functions
* failed, to conditionally inject a program check to a vcpu. The typical
* pattern would look like
*
* rc = write_guest(vcpu, addr, data, len);
* if (rc)
* return kvm_s390_inject_prog_cond(vcpu, rc);
*
* A negative return code from guest access functions implies an internal error
* like e.g. out of memory. In these cases no program check should be injected
* to the guest.
* A positive value implies that an exception happened while accessing a guest's
* memory. In this case all data belonging to the corresponding program check
* has been stored in vcpu->arch.pgm and can be injected with
* kvm_s390_inject_prog_irq().
*
* Returns: - the original @rc value if @rc was negative (internal error)
* - zero if @rc was already zero
* - zero or error code from injecting if @rc was positive
* (program check injected to @vcpu)
*/
static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
{
if (rc <= 0)
return rc;
return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
}
/* implemented in interrupt.c */
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int psw_extint_disabled(struct kvm_vcpu *vcpu);
void kvm_s390_destroy_adapters(struct kvm *kvm);
/* implemented in guestdbg.c */
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
#endif
This diff is collapsed.
......@@ -235,7 +235,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
struct kvm_vcpu *dst_vcpu = NULL;
struct kvm_s390_interrupt_info *inti;
int rc;
u8 tmp;
if (cpu_addr < KVM_MAX_VCPUS)
dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
......@@ -243,10 +242,13 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
return SIGP_CC_NOT_OPERATIONAL;
li = &dst_vcpu->arch.local_int;
/* make sure that the new value is valid memory */
address = address & 0x7fffe000u;
if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)) {
/*
* Make sure the new value is valid memory. We only need to check the
* first page, since address is 8k aligned and memory pieces are always
* at least 1MB aligned and have at least a size of 1MB.
*/
address &= 0x7fffe000u;
if (kvm_is_error_gpa(vcpu->kvm, address)) {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INVALID_PARAMETER;
return SIGP_CC_STATUS_STORED;
......
......@@ -30,6 +30,20 @@
TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
__entry->pswmask, __entry->pswaddr, p_args)
TRACE_EVENT(kvm_s390_skey_related_inst,
TP_PROTO(VCPU_PROTO_COMMON),
TP_ARGS(VCPU_ARGS_COMMON),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
),
VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
);
TRACE_EVENT(kvm_s390_major_guest_pfault,
TP_PROTO(VCPU_PROTO_COMMON),
TP_ARGS(VCPU_ARGS_COMMON),
......@@ -301,6 +315,31 @@ TRACE_EVENT(kvm_s390_handle_lctl,
__entry->reg1, __entry->reg3, __entry->addr)
);
TRACE_EVENT(kvm_s390_handle_stctl,
TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
__field(int, g)
__field(int, reg1)
__field(int, reg3)
__field(u64, addr)
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
__entry->g = g;
__entry->reg1 = reg1;
__entry->reg3 = reg3;
__entry->addr = addr;
),
VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
__entry->g ? "stctg" : "stctl",
__entry->reg1, __entry->reg3, __entry->addr)
);
TRACE_EVENT(kvm_s390_handle_prefix,
TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
TP_ARGS(VCPU_ARGS_COMMON, set, address),
......
......@@ -832,6 +832,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
}
spin_unlock(&gmap_notifier_lock);
}
EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
static inline int page_table_with_pgste(struct page *page)
{
......@@ -864,8 +865,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
return table;
}
......@@ -883,8 +883,8 @@ static inline void page_table_free_pgste(unsigned long *table)
__free_page(page);
}
static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
pmd_t *pmd, unsigned long addr, unsigned long end)
static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end, bool init_skey)
{
pte_t *start_pte, *pte;
spinlock_t *ptl;
......@@ -895,6 +895,22 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
do {
pgste = pgste_get_lock(pte);
pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
if (init_skey) {
unsigned long address;
pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
PGSTE_GR_BIT | PGSTE_GC_BIT);
/* skip invalid and not writable pages */
if (pte_val(*pte) & _PAGE_INVALID ||
!(pte_val(*pte) & _PAGE_WRITE)) {
pgste_set_unlock(pte, pgste);
continue;
}
address = pte_val(*pte) & PAGE_MASK;
page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
}
pgste_set_unlock(pte, pgste);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(start_pte, ptl);
......@@ -902,8 +918,8 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
return addr;
}
static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
pud_t *pud, unsigned long addr, unsigned long end)
static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end, bool init_skey)
{
unsigned long next;
pmd_t *pmd;
......@@ -913,14 +929,14 @@ static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd))
continue;
next = page_table_reset_pte(mm, pmd, addr, next);
next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
} while (pmd++, addr = next, addr != end);
return addr;
}
static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
pgd_t *pgd, unsigned long addr, unsigned long end)
static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end, bool init_skey)
{
unsigned long next;
pud_t *pud;
......@@ -930,14 +946,14 @@ static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
next = page_table_reset_pmd(mm, pud, addr, next);
next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
} while (pud++, addr = next, addr != end);
return addr;
}
void page_table_reset_pgste(struct mm_struct *mm,
unsigned long start, unsigned long end)
void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
unsigned long end, bool init_skey)
{
unsigned long addr, next;
pgd_t *pgd;
......@@ -949,7 +965,7 @@ void page_table_reset_pgste(struct mm_struct *mm,
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
next = page_table_reset_pud(mm, pgd, addr, next);
next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
} while (pgd++, addr = next, addr != end);
up_read(&mm->mmap_sem);
}
......@@ -989,7 +1005,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
/* changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) &
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
pgste_val(new) |= PGSTE_HC_BIT;
pgste_val(new) |= PGSTE_UC_BIT;
pgste_set_unlock(ptep, new);
pte_unmap_unlock(*ptep, ptl);
......@@ -1011,6 +1027,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
return NULL;
}
void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
unsigned long end, bool init_skey)
{
}
static inline void page_table_free_pgste(unsigned long *table)
{
}
......@@ -1357,6 +1378,50 @@ int s390_enable_sie(void)
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
*/
void s390_enable_skey(void)
{
/*
* To avoid races between multiple vcpus, ending in calling
* page_table_reset twice or more,
* the page_table_lock is taken for serialization.
*/
spin_lock(&current->mm->page_table_lock);
if (mm_use_skey(current->mm)) {
spin_unlock(&current->mm->page_table_lock);
return;
}
current->mm->context.use_skey = 1;
spin_unlock(&current->mm->page_table_lock);
page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
}
EXPORT_SYMBOL_GPL(s390_enable_skey);
/*
* Test and reset if a guest page is dirty
*/
bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
{
pte_t *pte;
spinlock_t *ptl;
bool dirty = false;
pte = get_locked_pte(gmap->mm, address, &ptl);
if (unlikely(!pte))
return false;
if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
dirty = true;
spin_unlock(ptl);
return dirty;
}
EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
......
......@@ -22,7 +22,8 @@ struct read_info_sccb {
u8 rnsize; /* 10 */
u8 _reserved0[16 - 11]; /* 11-15 */
u16 ncpurl; /* 16-17 */
u8 _reserved7[24 - 18]; /* 18-23 */
u16 cpuoff; /* 18-19 */
u8 _reserved7[24 - 20]; /* 20-23 */
u8 loadparm[8]; /* 24-31 */
u8 _reserved1[48 - 32]; /* 32-47 */
u64 facilities; /* 48-55 */
......@@ -45,6 +46,7 @@ static unsigned int sclp_con_has_linemode __initdata;
static unsigned long sclp_hsa_size;
static unsigned int sclp_max_cpu;
static struct sclp_ipl_info sclp_ipl_info;
static unsigned char sclp_siif;
u64 sclp_facilities;
u8 sclp_fac84;
......@@ -96,6 +98,9 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb)
static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
{
struct sclp_cpu_entry *cpue;
u16 boot_cpu_address, cpu;
if (sclp_read_info_early(sccb))
return;
......@@ -116,6 +121,15 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
sclp_max_cpu = sccb->hcpua + 1;
}
boot_cpu_address = stap();
cpue = (void *)sccb + sccb->cpuoff;
for (cpu = 0; cpu < sccb->ncpurl; cpue++, cpu++) {
if (boot_cpu_address != cpue->address)
continue;
sclp_siif = cpue->siif;
break;
}
/* Save IPL information */
sclp_ipl_info.is_valid = 1;
if (sccb->flags & 0x2)
......@@ -148,6 +162,12 @@ unsigned int sclp_get_max_cpu(void)
return sclp_max_cpu;
}
int sclp_has_siif(void)
{
return sclp_siif;
}
EXPORT_SYMBOL(sclp_has_siif);
/*
* This function will be called after sclp_facilities_detect(), which gets
* called from early.c code. The sclp_facilities_detect() function retrieves
......
......@@ -880,6 +880,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
return (hpa_t)pfn << PAGE_SHIFT;
}
static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
{
unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
return kvm_is_error_hva(hva);
}
static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
......
......@@ -745,6 +745,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_ENABLE_CAP_VM 98
#define KVM_CAP_S390_IRQCHIP 99
#define KVM_CAP_IOEVENTFD_NO_LENGTH 100
#define KVM_CAP_VM_ATTRIBUTES 101
#ifdef KVM_CAP_IRQ_ROUTING
......
......@@ -637,14 +637,12 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
*/
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
#ifndef CONFIG_S390
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
if (!memslot->dirty_bitmap)
return -ENOMEM;
#endif /* !CONFIG_S390 */
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment