Commit 5422e951 authored by Cédric Le Goater's avatar Cédric Le Goater Committed by Paul Mackerras

KVM: PPC: Book3S HV: XIVE: Replace the 'destroy' method by a 'release' method

When a P9 sPAPR VM boots, the CAS negotiation process determines which
interrupt mode to use (XICS legacy or XIVE native) and invokes a
machine reset to activate the chosen mode.

We introduce 'release' methods for the XICS-on-XIVE and the XIVE
native KVM devices which are called when the file descriptor of the
device is closed after the TIMA and ESB pages have been unmapped.
They perform the necessary cleanups : clear the vCPU interrupt
presenters that could be attached and then destroy the device. The
'release' methods replace the 'destroy' methods as 'destroy' is not
called anymore once 'release' is. Compatibility with older QEMU is
nevertheless maintained.

This is not considered as a safe operation as the vCPUs are still
running and could be referencing the KVM device through their
presenters. To protect the system from any breakage, the kvmppc_xive
objects representing both KVM devices are now stored in an array under
the VM. Allocation is performed on first usage and memory is freed
only when the VM exits.

[paulus@ozlabs.org - Moved freeing of xive structures to book3s.c,
 put it under #ifdef CONFIG_KVM_XICS.]
Signed-off-by: default avatarCédric Le Goater <clg@kaod.org>
Reviewed-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent 2bde9b3e
...@@ -316,7 +316,11 @@ struct kvm_arch { ...@@ -316,7 +316,11 @@ struct kvm_arch {
#endif #endif
#ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics; struct kvmppc_xics *xics;
struct kvmppc_xive *xive; struct kvmppc_xive *xive; /* Current XIVE device in use */
struct {
struct kvmppc_xive *native;
struct kvmppc_xive *xics_on_xive;
} xive_devices;
struct kvmppc_passthru_irqmap *pimap; struct kvmppc_passthru_irqmap *pimap;
#endif #endif
struct kvmppc_ops *kvm_ops; struct kvmppc_ops *kvm_ops;
......
...@@ -915,6 +915,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) ...@@ -915,6 +915,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvmppc_rtas_tokens_free(kvm); kvmppc_rtas_tokens_free(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
#endif #endif
#ifdef CONFIG_KVM_XICS
/*
* Free the XIVE devices which are not directly freed by the
* device 'release' method
*/
kfree(kvm->arch.xive_devices.native);
kvm->arch.xive_devices.native = NULL;
kfree(kvm->arch.xive_devices.xics_on_xive);
kvm->arch.xive_devices.xics_on_xive = NULL;
#endif /* CONFIG_KVM_XICS */
} }
int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
......
...@@ -1100,9 +1100,15 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) ...@@ -1100,9 +1100,15 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{ {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvmppc_xive *xive = xc->xive; struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
int i; int i;
if (!kvmppc_xics_enabled(vcpu))
return;
if (!xc)
return;
pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
/* Ensure no interrupt is still routed to that VP */ /* Ensure no interrupt is still routed to that VP */
...@@ -1141,6 +1147,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) ...@@ -1141,6 +1147,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
} }
/* Free the VP */ /* Free the VP */
kfree(xc); kfree(xc);
/* Cleanup the vcpu */
vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
vcpu->arch.xive_vcpu = NULL;
} }
int kvmppc_xive_connect_vcpu(struct kvm_device *dev, int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
...@@ -1158,7 +1168,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, ...@@ -1158,7 +1168,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
} }
if (xive->kvm != vcpu->kvm) if (xive->kvm != vcpu->kvm)
return -EPERM; return -EPERM;
if (vcpu->arch.irq_type) if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY; return -EBUSY;
if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
pr_devel("Duplicate !\n"); pr_devel("Duplicate !\n");
...@@ -1824,12 +1834,26 @@ void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) ...@@ -1824,12 +1834,26 @@ void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
} }
} }
static void kvmppc_xive_free(struct kvm_device *dev) /*
* Called when device fd is closed
*/
static void kvmppc_xive_release(struct kvm_device *dev)
{ {
struct kvmppc_xive *xive = dev->private; struct kvmppc_xive *xive = dev->private;
struct kvm *kvm = xive->kvm; struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
int i; int i;
pr_devel("Releasing xive device\n");
/*
* When releasing the KVM device fd, the vCPUs can still be
* running and we should clean up the vCPU interrupt
* presenters first.
*/
kvm_for_each_vcpu(i, vcpu, kvm)
kvmppc_xive_cleanup_vcpu(vcpu);
debugfs_remove(xive->dentry); debugfs_remove(xive->dentry);
if (kvm) if (kvm)
...@@ -1846,11 +1870,42 @@ static void kvmppc_xive_free(struct kvm_device *dev) ...@@ -1846,11 +1870,42 @@ static void kvmppc_xive_free(struct kvm_device *dev)
if (xive->vp_base != XIVE_INVALID_VP) if (xive->vp_base != XIVE_INVALID_VP)
xive_native_free_vp_block(xive->vp_base); xive_native_free_vp_block(xive->vp_base);
/*
* A reference of the kvmppc_xive pointer is now kept under
* the xive_devices struct of the machine for reuse. It is
* freed when the VM is destroyed for now until we fix all the
* execution paths.
*/
kfree(xive);
kfree(dev); kfree(dev);
} }
/*
* When the guest chooses the interrupt mode (XICS legacy or XIVE
* native), the VM will switch of KVM device. The previous device will
* be "released" before the new one is created.
*
* Until we are sure all execution paths are well protected, provide a
* fail safe (transitional) method for device destruction, in which
* the XIVE device pointer is recycled and not directly freed.
*/
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
{
struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
&kvm->arch.xive_devices.native :
&kvm->arch.xive_devices.xics_on_xive;
struct kvmppc_xive *xive = *kvm_xive_device;
if (!xive) {
xive = kzalloc(sizeof(*xive), GFP_KERNEL);
*kvm_xive_device = xive;
} else {
memset(xive, 0, sizeof(*xive));
}
return xive;
}
static int kvmppc_xive_create(struct kvm_device *dev, u32 type) static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
{ {
struct kvmppc_xive *xive; struct kvmppc_xive *xive;
...@@ -1859,7 +1914,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) ...@@ -1859,7 +1914,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
pr_devel("Creating xive for partition\n"); pr_devel("Creating xive for partition\n");
xive = kzalloc(sizeof(*xive), GFP_KERNEL); xive = kvmppc_xive_get_device(kvm, type);
if (!xive) if (!xive)
return -ENOMEM; return -ENOMEM;
...@@ -2024,7 +2079,7 @@ struct kvm_device_ops kvm_xive_ops = { ...@@ -2024,7 +2079,7 @@ struct kvm_device_ops kvm_xive_ops = {
.name = "kvm-xive", .name = "kvm-xive",
.create = kvmppc_xive_create, .create = kvmppc_xive_create,
.init = kvmppc_xive_init, .init = kvmppc_xive_init,
.destroy = kvmppc_xive_free, .release = kvmppc_xive_release,
.set_attr = xive_set_attr, .set_attr = xive_set_attr,
.get_attr = xive_get_attr, .get_attr = xive_get_attr,
.has_attr = xive_has_attr, .has_attr = xive_has_attr,
......
...@@ -283,6 +283,7 @@ void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); ...@@ -283,6 +283,7 @@ void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb);
int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation); bool single_escalation);
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
#endif /* CONFIG_KVM_XICS */ #endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */ #endif /* _KVM_PPC_BOOK3S_XICS_H */
...@@ -964,15 +964,27 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, ...@@ -964,15 +964,27 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
return -ENXIO; return -ENXIO;
} }
static void kvmppc_xive_native_free(struct kvm_device *dev) /*
* Called when device fd is closed
*/
static void kvmppc_xive_native_release(struct kvm_device *dev)
{ {
struct kvmppc_xive *xive = dev->private; struct kvmppc_xive *xive = dev->private;
struct kvm *kvm = xive->kvm; struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
int i; int i;
debugfs_remove(xive->dentry); debugfs_remove(xive->dentry);
pr_devel("Destroying xive native device\n"); pr_devel("Releasing xive native device\n");
/*
* When releasing the KVM device fd, the vCPUs can still be
* running and we should clean up the vCPU interrupt
* presenters first.
*/
kvm_for_each_vcpu(i, vcpu, kvm)
kvmppc_xive_native_cleanup_vcpu(vcpu);
if (kvm) if (kvm)
kvm->arch.xive = NULL; kvm->arch.xive = NULL;
...@@ -987,7 +999,13 @@ static void kvmppc_xive_native_free(struct kvm_device *dev) ...@@ -987,7 +999,13 @@ static void kvmppc_xive_native_free(struct kvm_device *dev)
if (xive->vp_base != XIVE_INVALID_VP) if (xive->vp_base != XIVE_INVALID_VP)
xive_native_free_vp_block(xive->vp_base); xive_native_free_vp_block(xive->vp_base);
kfree(xive); /*
* A reference of the kvmppc_xive pointer is now kept under
* the xive_devices struct of the machine for reuse. It is
* freed when the VM is destroyed for now until we fix all the
* execution paths.
*/
kfree(dev); kfree(dev);
} }
...@@ -1002,7 +1020,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) ...@@ -1002,7 +1020,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
if (kvm->arch.xive) if (kvm->arch.xive)
return -EEXIST; return -EEXIST;
xive = kzalloc(sizeof(*xive), GFP_KERNEL); xive = kvmppc_xive_get_device(kvm, type);
if (!xive) if (!xive)
return -ENOMEM; return -ENOMEM;
...@@ -1182,7 +1200,7 @@ struct kvm_device_ops kvm_xive_native_ops = { ...@@ -1182,7 +1200,7 @@ struct kvm_device_ops kvm_xive_native_ops = {
.name = "kvm-xive-native", .name = "kvm-xive-native",
.create = kvmppc_xive_native_create, .create = kvmppc_xive_native_create,
.init = kvmppc_xive_native_init, .init = kvmppc_xive_native_init,
.destroy = kvmppc_xive_native_free, .release = kvmppc_xive_native_release,
.set_attr = kvmppc_xive_native_set_attr, .set_attr = kvmppc_xive_native_set_attr,
.get_attr = kvmppc_xive_native_get_attr, .get_attr = kvmppc_xive_native_get_attr,
.has_attr = kvmppc_xive_native_has_attr, .has_attr = kvmppc_xive_native_has_attr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment