Commit 75c46fa6 authored by Suresh Siddha's avatar Suresh Siddha Committed by Ingo Molnar

x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure

MSI and MSI-X support for interrupt remapping infrastructure.

MSI address register will be programmed with interrupt-remapping table
entry(IRTE) index and the IRTE will contain information about the vector,
cpu destination, etc.

For MSI-X, all the IRTE's will be consecutively allocated in the table,
and the address registers will contain the starting index to the block
and the data register will contain the subindex with in that block.

This also introduces a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this).

As MSI is edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flushing the hardware cache.
Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 89027d35
...@@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq) ...@@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq)
dynamic_irq_cleanup(irq); dynamic_irq_cleanup(irq);
#ifdef CONFIG_INTR_REMAP
free_irte(irq);
#endif
spin_lock_irqsave(&vector_lock, flags); spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq); __clear_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags); spin_unlock_irqrestore(&vector_lock, flags);
...@@ -2315,10 +2318,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms ...@@ -2315,10 +2318,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
tmp = TARGET_CPUS; tmp = TARGET_CPUS;
err = assign_irq_vector(irq, tmp); err = assign_irq_vector(irq, tmp);
if (!err) { if (err)
cpus_and(tmp, cfg->domain, tmp); return err;
dest = cpu_mask_to_apicid(tmp);
cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp);
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
struct irte irte;
int ir_index;
u16 sub_handle;
ir_index = map_irq_to_irte_handle(irq, &sub_handle);
BUG_ON(ir_index == -1);
memset (&irte, 0, sizeof(irte));
irte.present = 1;
irte.dst_mode = INT_DEST_MODE;
irte.trigger_mode = 0; /* edge */
irte.dlvry_mode = INT_DELIVERY_MODE;
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
modify_irte(irq, &irte);
msg->address_hi = MSI_ADDR_BASE_HI;
msg->data = sub_handle;
msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
MSI_ADDR_IR_SHV |
MSI_ADDR_IR_INDEX1(ir_index) |
MSI_ADDR_IR_INDEX2(ir_index);
} else
#endif
{
msg->address_hi = MSI_ADDR_BASE_HI; msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo = msg->address_lo =
MSI_ADDR_BASE_LO | MSI_ADDR_BASE_LO |
...@@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) ...@@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
write_msi_msg(irq, &msg); write_msi_msg(irq, &msg);
irq_desc[irq].affinity = mask; irq_desc[irq].affinity = mask;
} }
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
{
struct irq_cfg *cfg = irq_cfg + irq;
unsigned int dest;
cpumask_t tmp, cleanup_mask;
struct irte irte;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
if (get_irte(irq, &irte))
return;
if (assign_irq_vector(irq, mask))
return;
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
/*
* atomically update the IRTE with the new destination and vector.
*/
modify_irte(irq, &irte);
/*
* After this point, all the interrupts will start arriving
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
if (cfg->move_in_progress) {
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
cfg->move_in_progress = 0;
}
irq_desc[irq].affinity = mask;
}
#endif
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
/* /*
...@@ -2386,26 +2469,157 @@ static struct irq_chip msi_chip = { ...@@ -2386,26 +2469,157 @@ static struct irq_chip msi_chip = {
.retrigger = ioapic_retrigger_irq, .retrigger = ioapic_retrigger_irq,
}; };
int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) #ifdef CONFIG_INTR_REMAP
static struct irq_chip msi_ir_chip = {
.name = "IR-PCI-MSI",
.unmask = unmask_msi_irq,
.mask = mask_msi_irq,
.ack = ack_x2apic_edge,
#ifdef CONFIG_SMP
.set_affinity = ir_set_msi_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
};
/*
* Map the PCI dev to the corresponding remapping hardware unit
* and allocate 'nvec' consecutive interrupt-remapping table entries
* in it.
*/
static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
{
struct intel_iommu *iommu;
int index;
iommu = map_dev_to_ir(dev);
if (!iommu) {
printk(KERN_ERR
"Unable to map PCI %s to iommu\n", pci_name(dev));
return -ENOENT;
}
index = alloc_irte(iommu, irq, nvec);
if (index < 0) {
printk(KERN_ERR
"Unable to allocate %d IRTE for PCI %s\n", nvec,
pci_name(dev));
return -ENOSPC;
}
return index;
}
#endif
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
{ {
int ret;
struct msi_msg msg; struct msi_msg msg;
ret = msi_compose_msg(dev, irq, &msg);
if (ret < 0)
return ret;
set_irq_msi(irq, desc);
write_msi_msg(irq, &msg);
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
struct irq_desc *desc = irq_desc + irq;
/*
* irq migration in process context
*/
desc->status |= IRQ_MOVE_PCNTXT;
set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
} else
#endif
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
return 0;
}
int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
int irq, ret; int irq, ret;
irq = create_irq(); irq = create_irq();
if (irq < 0) if (irq < 0)
return irq; return irq;
ret = msi_compose_msg(dev, irq, &msg); #ifdef CONFIG_INTR_REMAP
if (!intr_remapping_enabled)
goto no_ir;
ret = msi_alloc_irte(dev, irq, 1);
if (ret < 0)
goto error;
no_ir:
#endif
ret = setup_msi_irq(dev, desc, irq);
if (ret < 0) { if (ret < 0) {
destroy_irq(irq); destroy_irq(irq);
return ret; return ret;
} }
return 0;
set_irq_msi(irq, desc); #ifdef CONFIG_INTR_REMAP
write_msi_msg(irq, &msg); error:
destroy_irq(irq);
return ret;
#endif
}
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int irq, ret, sub_handle;
struct msi_desc *desc;
#ifdef CONFIG_INTR_REMAP
struct intel_iommu *iommu = 0;
int index = 0;
#endif
sub_handle = 0;
list_for_each_entry(desc, &dev->msi_list, list) {
irq = create_irq();
if (irq < 0)
return irq;
#ifdef CONFIG_INTR_REMAP
if (!intr_remapping_enabled)
goto no_ir;
if (!sub_handle) {
/*
* allocate the consecutive block of IRTE's
* for 'nvec'
*/
index = msi_alloc_irte(dev, irq, nvec);
if (index < 0) {
ret = index;
goto error;
}
} else {
iommu = map_dev_to_ir(dev);
if (!iommu) {
ret = -ENOENT;
goto error;
}
/*
* setup the mapping between the irq and the IRTE
* base index, the sub_handle pointing to the
* appropriate interrupt remap table entry.
*/
set_irte_irq(irq, iommu, index, sub_handle);
}
no_ir:
#endif
ret = setup_msi_irq(dev, desc, irq);
if (ret < 0)
goto error;
sub_handle++;
}
return 0; return 0;
error:
destroy_irq(irq);
return ret;
} }
void arch_teardown_msi_irq(unsigned int irq) void arch_teardown_msi_irq(unsigned int irq)
......
...@@ -230,6 +230,17 @@ struct intel_iommu *map_ioapic_to_ir(int apic) ...@@ -230,6 +230,17 @@ struct intel_iommu *map_ioapic_to_ir(int apic)
return NULL; return NULL;
} }
struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
{
struct dmar_drhd_unit *drhd;
drhd = dmar_find_matched_drhd_unit(dev);
if (!drhd)
return NULL;
return drhd->iommu;
}
int free_irte(int irq) int free_irte(int irq)
{ {
int index, i; int index, i;
......
...@@ -48,4 +48,8 @@ ...@@ -48,4 +48,8 @@
#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
MSI_ADDR_DEST_ID_MASK) MSI_ADDR_DEST_ID_MASK)
#define MSI_ADDR_IR_EXT_INT (1 << 4)
#define MSI_ADDR_IR_SHV (1 << 3)
#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
#endif /* ASM_MSIDEF_H */ #endif /* ASM_MSIDEF_H */
...@@ -109,6 +109,7 @@ extern int flush_irte(int irq); ...@@ -109,6 +109,7 @@ extern int flush_irte(int irq);
extern int free_irte(int irq); extern int free_irte(int irq);
extern int irq_remapped(int irq); extern int irq_remapped(int irq);
extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
extern struct intel_iommu *map_ioapic_to_ir(int apic); extern struct intel_iommu *map_ioapic_to_ir(int apic);
#else #else
#define irq_remapped(irq) (0) #define irq_remapped(irq) (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment