Commit 61ec84f1 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-ppc-next' of...

Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

The highlights are:

* Enable VFIO device on PowerPC, from David Gibson
* Optimizations to speed up IPIs between vcpus in HV KVM,
  from Suresh Warrier (who is also Suresh E. Warrier)
* In-kernel handling of IOMMU hypercalls, and support for dynamic DMA
  windows (DDW), from Alexey Kardashevskiy.
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parents 433da860 58ded420
......@@ -3035,6 +3035,63 @@ Returns: 0 on success, -1 on error
Queues an SMI on the thread's vcpu.
4.97 KVM_CAP_PPC_MULTITCE
Capability: KVM_CAP_PPC_MULTITCE
Architectures: ppc
Type: vm
This capability means the kernel is capable of handling hypercalls
H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
space. This significantly accelerates DMA operations for PPC KVM guests.
User space should expect that its handlers for these hypercalls
are not going to be called if user space previously registered LIOBN
in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
user space might have to advertise it for the guest. For example,
IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
present in the "ibm,hypertas-functions" device-tree property.
The hypercalls mentioned above may or may not be processed successfully
in the kernel based fast path. If they can not be handled by the kernel,
they will get passed on to user space. So user space still has to have
an implementation for these despite the in kernel acceleration.
This capability is always enabled.
4.98 KVM_CREATE_SPAPR_TCE_64
Capability: KVM_CAP_SPAPR_TCE_64
Architectures: powerpc
Type: vm ioctl
Parameters: struct kvm_create_spapr_tce_64 (in)
Returns: file descriptor for manipulating the created TCE table
This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit
windows, described in 4.62 KVM_CREATE_SPAPR_TCE
This capability uses extended struct in ioctl interface:
/* for KVM_CAP_SPAPR_TCE_64 */
struct kvm_create_spapr_tce_64 {
__u64 liobn;
__u32 page_shift;
__u32 flags;
__u64 offset; /* in pages */
__u64 size; /* in pages */
};
The aim of extension is to support an additional bigger DMA window with
a variable page size.
KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and
a bus offset of the corresponding DMA window, @size and @offset are numbers
of IOMMU pages.
@flags are not used at the moment.
The rest of functionality is identical to KVM_CREATE_SPAPR_TCE.
5. The kvm_run structure
------------------------
......
......@@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
}
#endif
#define SPAPR_TCE_SHIFT 12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
......
......@@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
u64 liobn;
u32 window_size;
struct rcu_head rcu;
u32 page_shift;
u64 offset; /* in pages */
u64 size; /* window size in pages */
struct page *pages[0];
};
......
......@@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
struct kvm_create_spapr_tce_64 *args);
extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
struct kvm_vcpu *vcpu, unsigned long liobn);
extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long ioba, unsigned long npages);
extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
unsigned long tce);
extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
unsigned long *ua, unsigned long **prmap);
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages);
extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_value, unsigned long npages);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
......@@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
extern void kvmppc_alloc_host_rm_ops(void);
extern void kvmppc_free_host_rm_ops(void);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
......@@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xics_ipi_action(void);
extern int h_ipi_redirect;
#else
static inline void kvmppc_alloc_host_rm_ops(void) {};
static inline void kvmppc_free_host_rm_ops(void) {};
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
......@@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
#endif
/*
* Host-side operations we want to set up while running in real
* mode in the guest operating on the xics.
* Currently only VCPU wakeup is supported.
*/
union kvmppc_rm_state {
unsigned long raw;
struct {
u32 in_host;
u32 rm_action;
};
};
struct kvmppc_host_rm_core {
union kvmppc_rm_state rm_state;
void *rm_data;
char pad[112];
};
struct kvmppc_host_rm_ops {
struct kvmppc_host_rm_core *rm_core;
void (*vcpu_kick)(struct kvm_vcpu *vcpu);
};
extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV
......
......@@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
}
return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
}
unsigned long vmalloc_to_phys(void *vmalloc_addr);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */
......@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
#define PPC_MSG_TICK_BROADCAST 2
#define PPC_MSG_DEBUGGER_BREAK 3
/* This is only used by the powernv kernel */
#define PPC_MSG_RM_HOST_ACTION 4
/* for irq controllers that have dedicated ipis per message (4) */
extern int smp_request_message_ipi(int virq, int message);
extern const char *smp_ipi_name[];
......@@ -121,6 +124,7 @@ extern const char *smp_ipi_name[];
/* for irq controllers with only a single ipi */
extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
extern void smp_muxed_ipi_message_pass(int cpu, int msg);
extern void smp_muxed_ipi_set_message(int cpu, int msg);
extern irqreturn_t smp_ipi_demux(void);
void smp_init_pSeries(void);
......
......@@ -30,6 +30,7 @@
#ifdef CONFIG_PPC_ICP_NATIVE
extern int icp_native_init(void);
extern void icp_native_flush_interrupt(void);
extern void icp_native_cause_ipi_rm(int cpu);
#else
static inline int icp_native_init(void) { return -ENODEV; }
#endif
......
......@@ -333,6 +333,15 @@ struct kvm_create_spapr_tce {
__u32 window_size;
};
/* for KVM_CAP_SPAPR_TCE_64 */
struct kvm_create_spapr_tce_64 {
__u64 liobn;
__u32 page_shift;
__u32 flags;
__u64 offset; /* in pages */
__u64 size; /* in pages */
};
/* for KVM_ALLOCATE_RMA */
struct kvm_allocate_rma {
__u64 rma_size;
......
......@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
int messages; /* current messages */
long messages; /* current messages */
unsigned long data; /* data for cause ipi */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
......@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
info->data = data;
}
void smp_muxed_ipi_message_pass(int cpu, int msg)
void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
......@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
*/
smp_mb();
message[msg] = 1;
}
void smp_muxed_ipi_message_pass(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
smp_muxed_ipi_set_message(cpu, msg);
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
......@@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
}
#ifdef __BIG_ENDIAN__
#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
#else
#define IPI_MESSAGE(A) (1 << (8 * (A)))
#define IPI_MESSAGE(A) (1uL << (8 * (A)))
#endif
irqreturn_t smp_ipi_demux(void)
{
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
unsigned int all;
unsigned long all;
mb(); /* order any irq clear */
do {
all = xchg(&info->messages, 0);
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
/*
* Must check for PPC_MSG_RM_HOST_ACTION messages
* before PPC_MSG_CALL_FUNCTION messages because when
* a VM is destroyed, we call kick_all_cpus_sync()
* to ensure that any pending PPC_MSG_RM_HOST_ACTION
* messages have completed before we free any VCPUs.
*/
if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
kvmppc_xics_ipi_action();
#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
......
......@@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o
$(KVM)/eventfd.o $(KVM)/vfio.o
CFLAGS_e500_mmu.o := -I.
CFLAGS_e500_mmu_host.o := -I.
......
......@@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
......
......@@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
* Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
......@@ -36,28 +37,69 @@
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
{
return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
}
static long kvmppc_stt_npages(unsigned long window_size)
static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
{
return ALIGN((window_size >> SPAPR_TCE_SHIFT)
* sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
(tce_pages * sizeof(struct page *));
return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
}
static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
{
struct kvm *kvm = stt->kvm;
int i;
long ret = 0;
mutex_lock(&kvm->lock);
list_del(&stt->list);
for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
if (!current || !current->mm)
return ret; /* process exited */
down_write(&current->mm->mmap_sem);
if (inc) {
unsigned long locked, lock_limit;
locked = current->mm->locked_vm + stt_pages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
current->mm->locked_vm += stt_pages;
} else {
if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
stt_pages = current->mm->locked_vm;
current->mm->locked_vm -= stt_pages;
}
pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
inc ? '+' : '-',
stt_pages << PAGE_SHIFT,
current->mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK),
ret ? " - exceeded" : "");
up_write(&current->mm->mmap_sem);
return ret;
}
static void release_spapr_tce_table(struct rcu_head *head)
{
struct kvmppc_spapr_tce_table *stt = container_of(head,
struct kvmppc_spapr_tce_table, rcu);
unsigned long i, npages = kvmppc_tce_pages(stt->size);
for (i = 0; i < npages; i++)
__free_page(stt->pages[i]);
kfree(stt);
mutex_unlock(&kvm->lock);
kvm_put_kvm(kvm);
kfree(stt);
}
static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
......@@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
page = stt->pages[vmf->pgoff];
......@@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
release_spapr_tce_table(stt);
list_del_rcu(&stt->list);
kvm_put_kvm(stt->kvm);
kvmppc_account_memlimit(
kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table);
return 0;
}
......@@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = {
};
long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args)
struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
long npages;
unsigned long npages, size;
int ret = -ENOMEM;
int i;
if (!args->size)
return -EINVAL;
/* Check this LIOBN hasn't been previously allocated */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == args->liobn)
return -EBUSY;
}
npages = kvmppc_stt_npages(args->window_size);
size = args->size;
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret) {
stt = NULL;
goto fail;
}
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
......@@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
stt->liobn = args->liobn;
stt->window_size = args->window_size;
stt->page_shift = args->page_shift;
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
for (i = 0; i < npages; i++) {
......@@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kvm_get_kvm(kvm);
mutex_lock(&kvm->lock);
list_add(&stt->list, &kvm->arch.spapr_tce_tables);
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
mutex_unlock(&kvm->lock);
......@@ -148,3 +208,59 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
}
return ret;
}
long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages)
{
struct kvmppc_spapr_tce_table *stt;
long i, ret = H_SUCCESS, idx;
unsigned long entry, ua = 0;
u64 __user *tces, tce;
stt = kvmppc_find_table(vcpu, liobn);
if (!stt)
return H_TOO_HARD;
entry = ioba >> stt->page_shift;
/*
* SPAPR spec says that the maximum size of the list is 512 TCEs
* so the whole table fits in 4K page
*/
if (npages > 512)
return H_PARAMETER;
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
ret = kvmppc_ioba_validate(stt, ioba, npages);
if (ret != H_SUCCESS)
return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tces = (u64 __user *) ua;
for (i = 0; i < npages; ++i) {
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tce = be64_to_cpu(tce);
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
kvmppc_tce_put(stt, entry + i, tce);
}
unlock_exit:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
This diff is collapsed.
......@@ -81,6 +81,17 @@ static int target_smt_mode;
module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
#ifdef CONFIG_KVM_XICS
static struct kernel_param_ops module_param_ops = {
.set = param_set_int,
.get = param_get_int,
};
module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
......@@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (kvmppc_xics_enabled(vcpu)) {
ret = kvmppc_xics_hcall(vcpu, req);
break;
} /* fallthrough */
}
return RESUME_HOST;
case H_PUT_TCE:
ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_PUT_TCE_INDIRECT:
ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_STUFF_TCE:
ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
default:
return RESUME_HOST;
}
......@@ -2278,6 +2313,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
spin_unlock(&vc->lock);
}
/*
* Clear core from the list of active host cores as we are about to
* enter the guest. Only do this if it is the primary thread of the
* core (not if a subcore) that is entering the guest.
*/
static inline void kvmppc_clear_host_core(int cpu)
{
int core;
if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
return;
/*
* Memory barrier can be omitted here as we will do a smp_wmb()
* later in kvmppc_start_thread and we need ensure that state is
* visible to other CPUs only after we enter guest.
*/
core = cpu >> threads_shift;
kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
}
/*
* Advertise this core as an active host core since we exited the guest
* Only need to do this if it is the primary thread of the core that is
* exiting.
*/
static inline void kvmppc_set_host_core(int cpu)
{
int core;
if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
return;
/*
* Memory barrier can be omitted here because we do a spin_unlock
* immediately after this which provides the memory barrier.
*/
core = cpu >> threads_shift;
kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
}
/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
......@@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
kvmppc_clear_host_core(pcpu);
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
......@@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_ipi_thread(pcpu + i);
}
kvmppc_set_host_core(pcpu);
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
......@@ -2984,6 +3063,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto out_srcu;
}
#ifdef CONFIG_KVM_XICS
static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
void *hcpu)
{
unsigned long cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
kvmppc_set_host_core(cpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
kvmppc_clear_host_core(cpu);
break;
#endif
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block kvmppc_cpu_notifier = {
.notifier_call = kvmppc_cpu_notify,
};
/*
* Allocate a per-core structure for managing state about which cores are
* running in the host versus the guest and for exchanging data between
* real mode KVM and CPU running in the host.
* This is only done for the first VM.
* The allocated structure stays even if all VMs have stopped.
* It is only freed when the kvm-hv module is unloaded.
* It's OK for this routine to fail, we just don't support host
* core operations like redirecting H_IPI wakeups.
*/
void kvmppc_alloc_host_rm_ops(void)
{
struct kvmppc_host_rm_ops *ops;
unsigned long l_ops;
int cpu, core;
int size;
/* Not the first time here ? */
if (kvmppc_host_rm_ops_hv != NULL)
return;
ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
if (!ops)
return;
size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
ops->rm_core = kzalloc(size, GFP_KERNEL);
if (!ops->rm_core) {
kfree(ops);
return;
}
get_online_cpus();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
continue;
core = cpu >> threads_shift;
ops->rm_core[core].rm_state.in_host = 1;
}
ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
/*
* Make the contents of the kvmppc_host_rm_ops structure visible
* to other CPUs before we assign it to the global variable.
* Do an atomic assignment (no locks used here), but if someone
* beats us to it, just free our copy and return.
*/
smp_wmb();
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
put_online_cpus();
kfree(ops->rm_core);
kfree(ops);
return;
}
register_cpu_notifier(&kvmppc_cpu_notifier);
put_online_cpus();
}
void kvmppc_free_host_rm_ops(void)
{
if (kvmppc_host_rm_ops_hv) {
unregister_cpu_notifier(&kvmppc_cpu_notifier);
kfree(kvmppc_host_rm_ops_hv->rm_core);
kfree(kvmppc_host_rm_ops_hv);
kvmppc_host_rm_ops_hv = NULL;
}
}
#endif
static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned long lpcr, lpid;
......@@ -2996,6 +3183,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
return -ENOMEM;
kvm->arch.lpid = lpid;
kvmppc_alloc_host_rm_ops();
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
......@@ -3229,6 +3418,7 @@ static int kvmppc_book3s_init_hv(void)
static void kvmppc_book3s_exit_hv(void)
{
kvmppc_free_host_rm_ops();
kvmppc_hv_ops = NULL;
}
......
......@@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap)
kvmhv_interrupt_vcore(vc, ee);
}
}
struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
......@@ -17,12 +17,16 @@
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
#include "book3s_xics.h"
#define DEBUG_PASSUP
int h_ipi_redirect = 1;
EXPORT_SYMBOL(h_ipi_redirect);
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
......@@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
/* -- ICP routines -- */
#ifdef CONFIG_SMP
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
{
int hcpu;
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
icp_native_cause_ipi_rm(hcpu);
}
#else
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
#endif
/*
* We start the search from our current CPU Id in the core map
* and go in a circle until we get back to our ID looking for a
* core that is running in host context and that hasn't already
* been targeted for another rm_host_ops.
*
* In the future, could consider using a fairer algorithm (one
* that distributes the IPIs better)
*
* Returns -1, if no CPU could be found in the host
* Else, returns a CPU Id which has been reserved for use
*/
static inline int grab_next_hostcore(int start,
struct kvmppc_host_rm_core *rm_core, int max, int action)
{
bool success;
int core;
union kvmppc_rm_state old, new;
for (core = start + 1; core < max; core++) {
old = new = READ_ONCE(rm_core[core].rm_state);
if (!old.in_host || old.rm_action)
continue;
/* Try to grab this host core if not taken already. */
new.rm_action = action;
success = cmpxchg64(&rm_core[core].rm_state.raw,
old.raw, new.raw) == old.raw;
if (success) {
/*
* Make sure that the store to the rm_action is made
* visible before we return to caller (and the
* subsequent store to rm_data) to synchronize with
* the IPI handler.
*/
smp_wmb();
return core;
}
}
return -1;
}
static inline int find_available_hostcore(int action)
{
int core;
int my_core = smp_processor_id() >> threads_shift;
struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
if (core == -1)
core = grab_next_hostcore(core, rm_core, my_core, action);
return core;
}
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
int cpu;
int hcore;
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
......@@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
/* Check if the core is loaded, if not, too hard */
/*
* Check if the core is loaded,
* if not, find an available host core to post to wake the VCPU,
* if we can't find one, set up state to eventually return too hard.
*/
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
hcore = -1;
if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
if (hcore != -1) {
icp_send_hcore_msg(hcore, vcpu);
} else {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
}
return;
}
......@@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
bail:
return check_too_hard(xics, icp);
}
/* --- Non-real mode XICS-related built-in routines --- */
/**
* Host Operations poked by RM KVM
*/
static void rm_host_ipi_action(int action, void *data)
{
switch (action) {
case XICS_RM_KICK_VCPU:
kvmppc_host_rm_ops_hv->vcpu_kick(data);
break;
default:
WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
break;
}
}
void kvmppc_xics_ipi_action(void)
{
int core;
unsigned int cpu = smp_processor_id();
struct kvmppc_host_rm_core *rm_corep;
core = cpu >> threads_shift;
rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
if (rm_corep->rm_data) {
rm_host_ipi_action(rm_corep->rm_state.rm_action,
rm_corep->rm_data);
/* Order these stores against the real mode KVM */
rm_corep->rm_data = NULL;
smp_wmb();
rm_corep->rm_state.rm_action = 0;
}
}
......@@ -2006,8 +2006,8 @@ hcall_real_table:
.long 0 /* 0x12c */
.long 0 /* 0x130 */
.long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
.long 0 /* 0x138 */
.long 0 /* 0x13c */
.long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
.long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
.long 0 /* 0x140 */
.long 0 /* 0x144 */
.long 0 /* 0x148 */
......
......@@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
unsigned long tce = kvmppc_get_gpr(vcpu, 6);
unsigned long npages = kvmppc_get_gpr(vcpu, 7);
long rc;
rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
tce, npages);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
unsigned long npages = kvmppc_get_gpr(vcpu, 7);
long rc;
rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
......@@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_bulk_remove(vcpu);
case H_PUT_TCE:
return kvmppc_h_pr_put_tce(vcpu);
case H_PUT_TCE_INDIRECT:
return kvmppc_h_pr_put_tce_indirect(vcpu);
case H_STUFF_TCE:
return kvmppc_h_pr_stuff_tce(vcpu);
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);
......
......@@ -33,6 +33,7 @@
#include <asm/tlbflush.h>
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
#include <asm/iommu.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
......@@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
unsigned int i;
struct kvm_vcpu *vcpu;
#ifdef CONFIG_KVM_XICS
/*
* We call kick_all_cpus_sync() to ensure that all
* CPUs have executed any pending IPIs before we
* continue and free VCPUs structures below.
*/
if (is_kvmppc_hv_enabled(kvm))
kick_all_cpus_sync();
#endif
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
......@@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
......@@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_SMMU_INFO:
r = 1;
break;
case KVM_CAP_SPAPR_MULTITCE:
r = 1;
break;
#endif
default:
r = 0;
......@@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE_64: {
struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
goto out;
if (create_tce_64.flags) {
r = -EINVAL;
goto out;
}
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
goto out;
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
create_tce_64.liobn = create_tce.liobn;
create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
create_tce_64.offset = 0;
create_tce_64.size = create_tce.window_size >>
IOMMU_PAGE_SHIFT_4K;
create_tce_64.flags = 0;
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
case KVM_PPC_GET_SMMU_INFO: {
......
......@@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* CONFIG_DEBUG_VM */
unsigned long vmalloc_to_phys(void *va)
{
unsigned long pfn = vmalloc_to_pfn(va);
BUG_ON(!pfn);
return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
}
EXPORT_SYMBOL_GPL(vmalloc_to_phys);
......@@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
}
}
static unsigned long vmalloc_to_phys(void *v)
{
struct page *p = vmalloc_to_page(v);
BUG_ON(!p);
return page_to_phys(p) + offset_in_page(v);
}
/* */
struct event_uniq {
struct rb_node node;
......
......@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void icp_native_cause_ipi_rm(int cpu)
{
/*
* Currently not used to send IPIs to another CPU
* on the same core. Only caller is KVM real mode.
* Need the physical address of the XICS to be
* previously saved in kvm_hstate in the paca.
*/
unsigned long xics_phys;
/*
* Just like the cause_ipi functions, it is required to
* include a full barrier (out8 includes a sync) before
* causing the IPI.
*/
xics_phys = paca[cpu].kvm_hstate.xics_phys;
out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
}
#endif
/*
* Called when an interrupt is received on an off-line CPU to
* clear the interrupt, so that the CPU can go back to nap mode.
......
......@@ -862,6 +862,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
#define KVM_CAP_HYPERV_SYNIC 123
#define KVM_CAP_S390_RI 124
#define KVM_CAP_SPAPR_TCE_64 125
#ifdef KVM_CAP_IRQ_ROUTING
......@@ -1154,6 +1155,8 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_PPC_ALLOC_HTAB */
#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
#define KVM_CREATE_SPAPR_TCE_64 _IOW(KVMIO, 0xa8, \
struct kvm_create_spapr_tce_64)
/* Available with KVM_CAP_RMA */
#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
/* Available with KVM_CAP_PPC_HTAB_FD */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment