Commit 4a5bb973 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-5.10b-rc1b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull more xen updates from Juergen Gross:

 - A single patch to fix the Xen security issue XSA-331 (malicious
   guests can DoS dom0 by triggering NULL-pointer dereferences or access
   to stale data).

 - A larger series to fix the Xen security issue XSA-332 (malicious
   guests can DoS dom0 by sending events at high frequency leading to
   dom0's vcpus being busy in IRQ handling for elongated times).

* tag 'for-linus-5.10b-rc1b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/events: block rogue events for some time
  xen/events: defer eoi in case of excessive number of events
  xen/events: use a common cpu hotplug hook for event channels
  xen/events: switch user event channels to lateeoi model
  xen/pciback: use lateeoi irq binding
  xen/pvcallsback: use lateeoi irq binding
  xen/scsiback: use lateeoi irq binding
  xen/netback: use lateeoi irq binding
  xen/blkback: use lateeoi irq binding
  xen/events: add a new "late EOI" evtchn framework
  xen/events: fix race in evtchn_fifo_unmask()
  xen/events: add a proper barrier to 2-level uevent unmasking
  xen/events: avoid removing an event channel while handling it
parents 709ebe6d 5f7f7740
...@@ -5970,6 +5970,14 @@ ...@@ -5970,6 +5970,14 @@
improve timer resolution at the expense of processing improve timer resolution at the expense of processing
more timer interrupts. more timer interrupts.
xen.event_eoi_delay= [XEN]
How long to delay EOI handling in case of event
storms (jiffies). Default is 10.
xen.event_loop_timeout= [XEN]
After which time (jiffies) the event handling loop
should start to delay EOI handling. Default is 2.
nopv= [X86,XEN,KVM,HYPER_V,VMWARE] nopv= [X86,XEN,KVM,HYPER_V,VMWARE]
Disables the PV optimizations forcing the guest to run Disables the PV optimizations forcing the guest to run
as generic guest with no PV drivers. Currently support as generic guest with no PV drivers. Currently support
......
...@@ -201,7 +201,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num) ...@@ -201,7 +201,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
static int do_block_io_op(struct xen_blkif_ring *ring); static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
static int dispatch_rw_block_io(struct xen_blkif_ring *ring, static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req, struct blkif_request *req,
struct pending_req *pending_req); struct pending_req *pending_req);
...@@ -612,6 +612,8 @@ int xen_blkif_schedule(void *arg) ...@@ -612,6 +612,8 @@ int xen_blkif_schedule(void *arg)
struct xen_vbd *vbd = &blkif->vbd; struct xen_vbd *vbd = &blkif->vbd;
unsigned long timeout; unsigned long timeout;
int ret; int ret;
bool do_eoi;
unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
set_freezable(); set_freezable();
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
...@@ -636,16 +638,23 @@ int xen_blkif_schedule(void *arg) ...@@ -636,16 +638,23 @@ int xen_blkif_schedule(void *arg)
if (timeout == 0) if (timeout == 0)
goto purge_gnt_list; goto purge_gnt_list;
do_eoi = ring->waiting_reqs;
ring->waiting_reqs = 0; ring->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */ smp_mb(); /* clear flag *before* checking for work */
ret = do_block_io_op(ring); ret = do_block_io_op(ring, &eoi_flags);
if (ret > 0) if (ret > 0)
ring->waiting_reqs = 1; ring->waiting_reqs = 1;
if (ret == -EACCES) if (ret == -EACCES)
wait_event_interruptible(ring->shutdown_wq, wait_event_interruptible(ring->shutdown_wq,
kthread_should_stop()); kthread_should_stop());
if (do_eoi && !ring->waiting_reqs) {
xen_irq_lateeoi(ring->irq, eoi_flags);
eoi_flags |= XEN_EOI_FLAG_SPURIOUS;
}
purge_gnt_list: purge_gnt_list:
if (blkif->vbd.feature_gnt_persistent && if (blkif->vbd.feature_gnt_persistent &&
time_after(jiffies, ring->next_lru)) { time_after(jiffies, ring->next_lru)) {
...@@ -1121,7 +1130,7 @@ static void end_block_io_op(struct bio *bio) ...@@ -1121,7 +1130,7 @@ static void end_block_io_op(struct bio *bio)
* and transmute it to the block API to hand it over to the proper block disk. * and transmute it to the block API to hand it over to the proper block disk.
*/ */
static int static int
__do_block_io_op(struct xen_blkif_ring *ring) __do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{ {
union blkif_back_rings *blk_rings = &ring->blk_rings; union blkif_back_rings *blk_rings = &ring->blk_rings;
struct blkif_request req; struct blkif_request req;
...@@ -1144,6 +1153,9 @@ __do_block_io_op(struct xen_blkif_ring *ring) ...@@ -1144,6 +1153,9 @@ __do_block_io_op(struct xen_blkif_ring *ring)
if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
break; break;
/* We've seen a request, so clear spurious eoi flag. */
*eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
if (kthread_should_stop()) { if (kthread_should_stop()) {
more_to_do = 1; more_to_do = 1;
break; break;
...@@ -1202,13 +1214,13 @@ __do_block_io_op(struct xen_blkif_ring *ring) ...@@ -1202,13 +1214,13 @@ __do_block_io_op(struct xen_blkif_ring *ring)
} }
static int static int
do_block_io_op(struct xen_blkif_ring *ring) do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{ {
union blkif_back_rings *blk_rings = &ring->blk_rings; union blkif_back_rings *blk_rings = &ring->blk_rings;
int more_to_do; int more_to_do;
do { do {
more_to_do = __do_block_io_op(ring); more_to_do = __do_block_io_op(ring, eoi_flags);
if (more_to_do) if (more_to_do)
break; break;
......
...@@ -246,9 +246,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref, ...@@ -246,9 +246,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
if (req_prod - rsp_prod > size) if (req_prod - rsp_prod > size)
goto fail; goto fail;
err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid,
xen_blkif_be_int, 0, evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
"blkif-backend", ring);
if (err < 0) if (err < 0)
goto fail; goto fail;
ring->irq = err; ring->irq = err;
......
...@@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */ ...@@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */
char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
struct xenvif *vif; /* Parent VIF */ struct xenvif *vif; /* Parent VIF */
/*
* TX/RX common EOI handling.
* When feature-split-event-channels = 0, interrupt handler sets
* NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set
* by the RX and TX interrupt handlers.
* RX and TX handler threads will issue an EOI when either
* NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or
* NETBK_TX_EOI) are set and they will reset those bits.
*/
atomic_t eoi_pending;
#define NETBK_RX_EOI 0x01
#define NETBK_TX_EOI 0x02
#define NETBK_COMMON_EOI 0x04
/* Use NAPI for guest TX */ /* Use NAPI for guest TX */
struct napi_struct napi; struct napi_struct napi;
/* When feature-split-event-channels = 0, tx_irq = rx_irq. */ /* When feature-split-event-channels = 0, tx_irq = rx_irq. */
...@@ -378,6 +392,7 @@ int xenvif_dealloc_kthread(void *data); ...@@ -378,6 +392,7 @@ int xenvif_dealloc_kthread(void *data);
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_action(struct xenvif_queue *queue);
void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
......
...@@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vif) ...@@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vif)
!vif->disabled; !vif->disabled;
} }
static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue)
{
bool rc;
rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
if (rc)
napi_schedule(&queue->napi);
return rc;
}
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
{ {
struct xenvif_queue *queue = dev_id; struct xenvif_queue *queue = dev_id;
int old;
if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)) old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending);
napi_schedule(&queue->napi); WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n");
if (!xenvif_handle_tx_interrupt(queue)) {
atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending);
xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
}
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struct *napi, int budget) ...@@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
return work_done; return work_done;
} }
static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue)
{
bool rc;
rc = xenvif_have_rx_work(queue, false);
if (rc)
xenvif_kick_thread(queue);
return rc;
}
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{ {
struct xenvif_queue *queue = dev_id; struct xenvif_queue *queue = dev_id;
int old;
xenvif_kick_thread(queue); old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending);
WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n");
if (!xenvif_handle_rx_interrupt(queue)) {
atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending);
xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
}
return IRQ_HANDLED; return IRQ_HANDLED;
} }
irqreturn_t xenvif_interrupt(int irq, void *dev_id) irqreturn_t xenvif_interrupt(int irq, void *dev_id)
{ {
xenvif_tx_interrupt(irq, dev_id); struct xenvif_queue *queue = dev_id;
xenvif_rx_interrupt(irq, dev_id); int old;
old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending);
WARN(old, "Interrupt while EOI pending\n");
/* Use bitwise or as we need to call both functions. */
if ((!xenvif_handle_tx_interrupt(queue) |
!xenvif_handle_rx_interrupt(queue))) {
atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending);
xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
}
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -605,7 +648,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, ...@@ -605,7 +648,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
if (req_prod - rsp_prod > RING_SIZE(&vif->ctrl)) if (req_prod - rsp_prod > RING_SIZE(&vif->ctrl))
goto err_unmap; goto err_unmap;
err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn);
if (err < 0) if (err < 0)
goto err_unmap; goto err_unmap;
...@@ -709,7 +752,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, ...@@ -709,7 +752,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
if (tx_evtchn == rx_evtchn) { if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */ /* feature-split-event-channels == 0 */
err = bind_interdomain_evtchn_to_irqhandler( err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, tx_evtchn, xenvif_interrupt, 0, queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
queue->name, queue); queue->name, queue);
if (err < 0) if (err < 0)
...@@ -720,7 +763,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, ...@@ -720,7 +763,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
/* feature-split-event-channels == 1 */ /* feature-split-event-channels == 1 */
snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
"%s-tx", queue->name); "%s-tx", queue->name);
err = bind_interdomain_evtchn_to_irqhandler( err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
queue->tx_irq_name, queue); queue->tx_irq_name, queue);
if (err < 0) if (err < 0)
...@@ -730,7 +773,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, ...@@ -730,7 +773,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
"%s-rx", queue->name); "%s-rx", queue->name);
err = bind_interdomain_evtchn_to_irqhandler( err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
queue->rx_irq_name, queue); queue->rx_irq_name, queue);
if (err < 0) if (err < 0)
......
...@@ -169,6 +169,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue) ...@@ -169,6 +169,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
if (more_to_do) if (more_to_do)
napi_schedule(&queue->napi); napi_schedule(&queue->napi);
else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI,
&queue->eoi_pending) &
(NETBK_TX_EOI | NETBK_COMMON_EOI))
xen_irq_lateeoi(queue->tx_irq, 0);
} }
static void tx_add_credit(struct xenvif_queue *queue) static void tx_add_credit(struct xenvif_queue *queue)
...@@ -1643,9 +1647,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) ...@@ -1643,9 +1647,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif)
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
{ {
struct xenvif *vif = data; struct xenvif *vif = data;
unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
while (xenvif_ctrl_work_todo(vif)) while (xenvif_ctrl_work_todo(vif)) {
xenvif_ctrl_action(vif); xenvif_ctrl_action(vif);
eoi_flag = 0;
}
xen_irq_lateeoi(irq, eoi_flag);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
......
...@@ -503,13 +503,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) ...@@ -503,13 +503,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
return queue->stalled && prod - cons >= 1; return queue->stalled && prod - cons >= 1;
} }
static bool xenvif_have_rx_work(struct xenvif_queue *queue) bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)
{ {
return xenvif_rx_ring_slots_available(queue) || return xenvif_rx_ring_slots_available(queue) ||
(queue->vif->stall_timeout && (queue->vif->stall_timeout &&
(xenvif_rx_queue_stalled(queue) || (xenvif_rx_queue_stalled(queue) ||
xenvif_rx_queue_ready(queue))) || xenvif_rx_queue_ready(queue))) ||
kthread_should_stop() || (test_kthread && kthread_should_stop()) ||
queue->vif->disabled; queue->vif->disabled;
} }
...@@ -540,15 +540,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) ...@@ -540,15 +540,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
if (xenvif_have_rx_work(queue)) if (xenvif_have_rx_work(queue, true))
return; return;
for (;;) { for (;;) {
long ret; long ret;
prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
if (xenvif_have_rx_work(queue)) if (xenvif_have_rx_work(queue, true))
break; break;
if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI,
&queue->eoi_pending) &
(NETBK_RX_EOI | NETBK_COMMON_EOI))
xen_irq_lateeoi(queue->rx_irq, 0);
ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
if (!ret) if (!ret)
break; break;
......
...@@ -91,6 +91,8 @@ static void evtchn_2l_unmask(evtchn_port_t port) ...@@ -91,6 +91,8 @@ static void evtchn_2l_unmask(evtchn_port_t port)
BUG_ON(!irqs_disabled()); BUG_ON(!irqs_disabled());
smp_wmb(); /* All writes before unmask must be visible. */
if (unlikely((cpu != cpu_from_evtchn(port)))) if (unlikely((cpu != cpu_from_evtchn(port))))
do_hypercall = 1; do_hypercall = 1;
else { else {
...@@ -159,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu, ...@@ -159,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
* a bitset of words which contain pending event bits. The second * a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves. * level is a bitset of pending events themselves.
*/ */
static void evtchn_2l_handle_events(unsigned cpu) static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
{ {
int irq; int irq;
xen_ulong_t pending_words; xen_ulong_t pending_words;
...@@ -240,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu) ...@@ -240,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
/* Process port. */ /* Process port. */
port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx; port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
irq = get_evtchn_to_irq(port); handle_irq_for_port(port, ctrl);
if (irq != -1)
generic_handle_irq(irq);
bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD; bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
......
...@@ -33,6 +33,10 @@ ...@@ -33,6 +33,10 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/irqnr.h> #include <linux/irqnr.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/cpuhotplug.h>
#include <linux/atomic.h>
#include <linux/ktime.h>
#ifdef CONFIG_X86 #ifdef CONFIG_X86
#include <asm/desc.h> #include <asm/desc.h>
...@@ -63,6 +67,15 @@ ...@@ -63,6 +67,15 @@
#include "events_internal.h" #include "events_internal.h"
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "xen."
static uint __read_mostly event_loop_timeout = 2;
module_param(event_loop_timeout, uint, 0644);
static uint __read_mostly event_eoi_delay = 10;
module_param(event_eoi_delay, uint, 0644);
const struct evtchn_ops *evtchn_ops; const struct evtchn_ops *evtchn_ops;
/* /*
...@@ -71,6 +84,24 @@ const struct evtchn_ops *evtchn_ops; ...@@ -71,6 +84,24 @@ const struct evtchn_ops *evtchn_ops;
*/ */
static DEFINE_MUTEX(irq_mapping_update_lock); static DEFINE_MUTEX(irq_mapping_update_lock);
/*
* Lock protecting event handling loop against removing event channels.
* Adding of event channels is no issue as the associated IRQ becomes active
* only after everything is setup (before request_[threaded_]irq() the handler
* can't be entered for an event, as the event channel will be unmasked only
* then).
*/
static DEFINE_RWLOCK(evtchn_rwlock);
/*
* Lock hierarchy:
*
* irq_mapping_update_lock
* evtchn_rwlock
* IRQ-desc lock
* percpu eoi_list_lock
*/
static LIST_HEAD(xen_irq_list_head); static LIST_HEAD(xen_irq_list_head);
/* IRQ <-> VIRQ mapping. */ /* IRQ <-> VIRQ mapping. */
...@@ -95,17 +126,20 @@ static bool (*pirq_needs_eoi)(unsigned irq); ...@@ -95,17 +126,20 @@ static bool (*pirq_needs_eoi)(unsigned irq);
static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_dynamic_chip;
static struct irq_chip xen_lateeoi_chip;
static struct irq_chip xen_percpu_chip; static struct irq_chip xen_percpu_chip;
static struct irq_chip xen_pirq_chip; static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data); static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data);
static DEFINE_PER_CPU(unsigned int, irq_epoch);
static void clear_evtchn_to_irq_row(unsigned row) static void clear_evtchn_to_irq_row(unsigned row)
{ {
unsigned col; unsigned col;
for (col = 0; col < EVTCHN_PER_ROW; col++) for (col = 0; col < EVTCHN_PER_ROW; col++)
evtchn_to_irq[row][col] = -1; WRITE_ONCE(evtchn_to_irq[row][col], -1);
} }
static void clear_evtchn_to_irq_all(void) static void clear_evtchn_to_irq_all(void)
...@@ -142,7 +176,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) ...@@ -142,7 +176,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
clear_evtchn_to_irq_row(row); clear_evtchn_to_irq_row(row);
} }
evtchn_to_irq[row][col] = irq; WRITE_ONCE(evtchn_to_irq[row][col], irq);
return 0; return 0;
} }
...@@ -152,7 +186,7 @@ int get_evtchn_to_irq(evtchn_port_t evtchn) ...@@ -152,7 +186,7 @@ int get_evtchn_to_irq(evtchn_port_t evtchn)
return -1; return -1;
if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
return -1; return -1;
return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
} }
/* Get info for IRQ */ /* Get info for IRQ */
...@@ -261,10 +295,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) ...@@ -261,10 +295,14 @@ static void xen_irq_info_cleanup(struct irq_info *info)
*/ */
evtchn_port_t evtchn_from_irq(unsigned irq) evtchn_port_t evtchn_from_irq(unsigned irq)
{ {
if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) const struct irq_info *info = NULL;
if (likely(irq < nr_irqs))
info = info_for_irq(irq);
if (!info)
return 0; return 0;
return info_for_irq(irq)->evtchn; return info->evtchn;
} }
unsigned int irq_from_evtchn(evtchn_port_t evtchn) unsigned int irq_from_evtchn(evtchn_port_t evtchn)
...@@ -375,9 +413,157 @@ void notify_remote_via_irq(int irq) ...@@ -375,9 +413,157 @@ void notify_remote_via_irq(int irq)
} }
EXPORT_SYMBOL_GPL(notify_remote_via_irq); EXPORT_SYMBOL_GPL(notify_remote_via_irq);
struct lateeoi_work {
struct delayed_work delayed;
spinlock_t eoi_list_lock;
struct list_head eoi_list;
};
static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
static void lateeoi_list_del(struct irq_info *info)
{
struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
unsigned long flags;
spin_lock_irqsave(&eoi->eoi_list_lock, flags);
list_del_init(&info->eoi_list);
spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
}
static void lateeoi_list_add(struct irq_info *info)
{
struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
struct irq_info *elem;
u64 now = get_jiffies_64();
unsigned long delay;
unsigned long flags;
if (now < info->eoi_time)
delay = info->eoi_time - now;
else
delay = 1;
spin_lock_irqsave(&eoi->eoi_list_lock, flags);
if (list_empty(&eoi->eoi_list)) {
list_add(&info->eoi_list, &eoi->eoi_list);
mod_delayed_work_on(info->eoi_cpu, system_wq,
&eoi->delayed, delay);
} else {
list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
if (elem->eoi_time <= info->eoi_time)
break;
}
list_add(&info->eoi_list, &elem->eoi_list);
}
spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
}
static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
{
evtchn_port_t evtchn;
unsigned int cpu;
unsigned int delay = 0;
evtchn = info->evtchn;
if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
return;
if (spurious) {
if ((1 << info->spurious_cnt) < (HZ << 2))
info->spurious_cnt++;
if (info->spurious_cnt > 1) {
delay = 1 << (info->spurious_cnt - 2);
if (delay > HZ)
delay = HZ;
if (!info->eoi_time)
info->eoi_cpu = smp_processor_id();
info->eoi_time = get_jiffies_64() + delay;
}
} else {
info->spurious_cnt = 0;
}
cpu = info->eoi_cpu;
if (info->eoi_time &&
(info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
lateeoi_list_add(info);
return;
}
info->eoi_time = 0;
unmask_evtchn(evtchn);
}
static void xen_irq_lateeoi_worker(struct work_struct *work)
{
struct lateeoi_work *eoi;
struct irq_info *info;
u64 now = get_jiffies_64();
unsigned long flags;
eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
read_lock_irqsave(&evtchn_rwlock, flags);
while (true) {
spin_lock(&eoi->eoi_list_lock);
info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
eoi_list);
if (info == NULL || now < info->eoi_time) {
spin_unlock(&eoi->eoi_list_lock);
break;
}
list_del_init(&info->eoi_list);
spin_unlock(&eoi->eoi_list_lock);
info->eoi_time = 0;
xen_irq_lateeoi_locked(info, false);
}
if (info)
mod_delayed_work_on(info->eoi_cpu, system_wq,
&eoi->delayed, info->eoi_time - now);
read_unlock_irqrestore(&evtchn_rwlock, flags);
}
static void xen_cpu_init_eoi(unsigned int cpu)
{
struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
spin_lock_init(&eoi->eoi_list_lock);
INIT_LIST_HEAD(&eoi->eoi_list);
}
void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
{
struct irq_info *info;
unsigned long flags;
read_lock_irqsave(&evtchn_rwlock, flags);
info = info_for_irq(irq);
if (info)
xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
read_unlock_irqrestore(&evtchn_rwlock, flags);
}
EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
static void xen_irq_init(unsigned irq) static void xen_irq_init(unsigned irq)
{ {
struct irq_info *info; struct irq_info *info;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* By default all event channels notify CPU#0. */ /* By default all event channels notify CPU#0. */
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
...@@ -392,6 +578,7 @@ static void xen_irq_init(unsigned irq) ...@@ -392,6 +578,7 @@ static void xen_irq_init(unsigned irq)
set_info_for_irq(irq, info); set_info_for_irq(irq, info);
INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head); list_add_tail(&info->list, &xen_irq_list_head);
} }
...@@ -440,16 +627,24 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) ...@@ -440,16 +627,24 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
static void xen_free_irq(unsigned irq) static void xen_free_irq(unsigned irq)
{ {
struct irq_info *info = info_for_irq(irq); struct irq_info *info = info_for_irq(irq);
unsigned long flags;
if (WARN_ON(!info)) if (WARN_ON(!info))
return; return;
write_lock_irqsave(&evtchn_rwlock, flags);
if (!list_empty(&info->eoi_list))
lateeoi_list_del(info);
list_del(&info->list); list_del(&info->list);
set_info_for_irq(irq, NULL); set_info_for_irq(irq, NULL);
WARN_ON(info->refcnt > 0); WARN_ON(info->refcnt > 0);
write_unlock_irqrestore(&evtchn_rwlock, flags);
kfree(info); kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */ /* Legacy IRQ descriptors are managed by the arch. */
...@@ -841,7 +1036,7 @@ int xen_pirq_from_irq(unsigned irq) ...@@ -841,7 +1036,7 @@ int xen_pirq_from_irq(unsigned irq)
} }
EXPORT_SYMBOL_GPL(xen_pirq_from_irq); EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
int bind_evtchn_to_irq(evtchn_port_t evtchn) static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
{ {
int irq; int irq;
int ret; int ret;
...@@ -858,7 +1053,7 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn) ...@@ -858,7 +1053,7 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn)
if (irq < 0) if (irq < 0)
goto out; goto out;
irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, irq_set_chip_and_handler_name(irq, chip,
handle_edge_irq, "event"); handle_edge_irq, "event");
ret = xen_irq_info_evtchn_setup(irq, evtchn); ret = xen_irq_info_evtchn_setup(irq, evtchn);
...@@ -879,8 +1074,19 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn) ...@@ -879,8 +1074,19 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn)
return irq; return irq;
} }
int bind_evtchn_to_irq(evtchn_port_t evtchn)
{
return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip);
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
{
return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{ {
struct evtchn_bind_ipi bind_ipi; struct evtchn_bind_ipi bind_ipi;
...@@ -922,8 +1128,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) ...@@ -922,8 +1128,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq; return irq;
} }
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain,
evtchn_port_t remote_port) evtchn_port_t remote_port,
struct irq_chip *chip)
{ {
struct evtchn_bind_interdomain bind_interdomain; struct evtchn_bind_interdomain bind_interdomain;
int err; int err;
...@@ -934,10 +1141,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, ...@@ -934,10 +1141,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain); &bind_interdomain);
return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
chip);
}
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
evtchn_port_t remote_port)
{
return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
&xen_dynamic_chip);
} }
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
evtchn_port_t remote_port)
{
return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
&xen_lateeoi_chip);
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
{ {
struct evtchn_status status; struct evtchn_status status;
...@@ -1034,14 +1257,15 @@ static void unbind_from_irq(unsigned int irq) ...@@ -1034,14 +1257,15 @@ static void unbind_from_irq(unsigned int irq)
mutex_unlock(&irq_mapping_update_lock); mutex_unlock(&irq_mapping_update_lock);
} }
int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
irq_handler_t handler, irq_handler_t handler,
unsigned long irqflags, unsigned long irqflags,
const char *devname, void *dev_id) const char *devname, void *dev_id,
struct irq_chip *chip)
{ {
int irq, retval; int irq, retval;
irq = bind_evtchn_to_irq(evtchn); irq = bind_evtchn_to_irq_chip(evtchn, chip);
if (irq < 0) if (irq < 0)
return irq; return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id); retval = request_irq(irq, handler, irqflags, devname, dev_id);
...@@ -1052,18 +1276,38 @@ int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, ...@@ -1052,18 +1276,38 @@ int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
return irq; return irq;
} }
int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
irq_handler_t handler,
unsigned long irqflags,
const char *devname, void *dev_id)
{
return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
devname, dev_id,
&xen_dynamic_chip);
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
evtchn_port_t remote_port,
irq_handler_t handler, irq_handler_t handler,
unsigned long irqflags, unsigned long irqflags,
const char *devname, const char *devname, void *dev_id)
void *dev_id) {
return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
devname, dev_id,
&xen_lateeoi_chip);
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
static int bind_interdomain_evtchn_to_irqhandler_chip(
unsigned int remote_domain, evtchn_port_t remote_port,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id, struct irq_chip *chip)
{ {
int irq, retval; int irq, retval;
irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
chip);
if (irq < 0) if (irq < 0)
return irq; return irq;
...@@ -1075,8 +1319,33 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, ...@@ -1075,8 +1319,33 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
return irq; return irq;
} }
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
evtchn_port_t remote_port,
irq_handler_t handler,
unsigned long irqflags,
const char *devname,
void *dev_id)
{
return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
remote_port, handler, irqflags, devname,
dev_id, &xen_dynamic_chip);
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
evtchn_port_t remote_port,
irq_handler_t handler,
unsigned long irqflags,
const char *devname,
void *dev_id)
{
return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
remote_port, handler, irqflags, devname,
dev_id, &xen_lateeoi_chip);
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler, irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id) unsigned long irqflags, const char *devname, void *dev_id)
...@@ -1189,7 +1458,7 @@ int evtchn_get(evtchn_port_t evtchn) ...@@ -1189,7 +1458,7 @@ int evtchn_get(evtchn_port_t evtchn)
goto done; goto done;
err = -EINVAL; err = -EINVAL;
if (info->refcnt <= 0) if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
goto done; goto done;
info->refcnt++; info->refcnt++;
...@@ -1228,21 +1497,81 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) ...@@ -1228,21 +1497,81 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq); notify_remote_via_irq(irq);
} }
struct evtchn_loop_ctrl {
ktime_t timeout;
unsigned count;
bool defer_eoi;
};
void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
{
int irq;
struct irq_info *info;
irq = get_evtchn_to_irq(port);
if (irq == -1)
return;
/*
* Check for timeout every 256 events.
* We are setting the timeout value only after the first 256
* events in order to not hurt the common case of few loop
* iterations. The 256 is basically an arbitrary value.
*
* In case we are hitting the timeout we need to defer all further
* EOIs in order to ensure to leave the event handling loop rather
* sooner than later.
*/
if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
ktime_t kt = ktime_get();
if (!ctrl->timeout) {
kt = ktime_add_ms(kt,
jiffies_to_msecs(event_loop_timeout));
ctrl->timeout = kt;
} else if (kt > ctrl->timeout) {
ctrl->defer_eoi = true;
}
}
info = info_for_irq(irq);
if (ctrl->defer_eoi) {
info->eoi_cpu = smp_processor_id();
info->irq_epoch = __this_cpu_read(irq_epoch);
info->eoi_time = get_jiffies_64() + event_eoi_delay;
}
generic_handle_irq(irq);
}
static void __xen_evtchn_do_upcall(void) static void __xen_evtchn_do_upcall(void)
{ {
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct evtchn_loop_ctrl ctrl = { 0 };
read_lock(&evtchn_rwlock);
do { do {
vcpu_info->evtchn_upcall_pending = 0; vcpu_info->evtchn_upcall_pending = 0;
xen_evtchn_handle_events(cpu); xen_evtchn_handle_events(cpu, &ctrl);
BUG_ON(!irqs_disabled()); BUG_ON(!irqs_disabled());
virt_rmb(); /* Hypervisor can set upcall pending. */ virt_rmb(); /* Hypervisor can set upcall pending. */
} while (vcpu_info->evtchn_upcall_pending); } while (vcpu_info->evtchn_upcall_pending);
read_unlock(&evtchn_rwlock);
/*
* Increment irq_epoch only now to defer EOIs only for
* xen_irq_lateeoi() invocations occurring from inside the loop
* above.
*/
__this_cpu_inc(irq_epoch);
} }
void xen_evtchn_do_upcall(struct pt_regs *regs) void xen_evtchn_do_upcall(struct pt_regs *regs)
...@@ -1606,6 +1935,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { ...@@ -1606,6 +1935,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
.irq_retrigger = retrigger_dynirq, .irq_retrigger = retrigger_dynirq,
}; };
static struct irq_chip xen_lateeoi_chip __read_mostly = {
/* The chip name needs to contain "xen-dyn" for irqbalance to work. */
.name = "xen-dyn-lateeoi",
.irq_disable = disable_dynirq,
.irq_mask = disable_dynirq,
.irq_unmask = enable_dynirq,
.irq_ack = mask_ack_dynirq,
.irq_mask_ack = mask_ack_dynirq,
.irq_set_affinity = set_affinity_irq,
.irq_retrigger = retrigger_dynirq,
};
static struct irq_chip xen_pirq_chip __read_mostly = { static struct irq_chip xen_pirq_chip __read_mostly = {
.name = "xen-pirq", .name = "xen-pirq",
...@@ -1676,12 +2020,31 @@ void xen_setup_callback_vector(void) {} ...@@ -1676,12 +2020,31 @@ void xen_setup_callback_vector(void) {}
static inline void xen_alloc_callback_vector(void) {} static inline void xen_alloc_callback_vector(void) {}
#endif #endif
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "xen."
static bool fifo_events = true; static bool fifo_events = true;
module_param(fifo_events, bool, 0); module_param(fifo_events, bool, 0);
static int xen_evtchn_cpu_prepare(unsigned int cpu)
{
int ret = 0;
xen_cpu_init_eoi(cpu);
if (evtchn_ops->percpu_init)
ret = evtchn_ops->percpu_init(cpu);
return ret;
}
static int xen_evtchn_cpu_dead(unsigned int cpu)
{
int ret = 0;
if (evtchn_ops->percpu_deinit)
ret = evtchn_ops->percpu_deinit(cpu);
return ret;
}
void __init xen_init_IRQ(void) void __init xen_init_IRQ(void)
{ {
int ret = -EINVAL; int ret = -EINVAL;
...@@ -1692,6 +2055,12 @@ void __init xen_init_IRQ(void) ...@@ -1692,6 +2055,12 @@ void __init xen_init_IRQ(void)
if (ret < 0) if (ret < 0)
xen_evtchn_2l_init(); xen_evtchn_2l_init();
xen_cpu_init_eoi(smp_processor_id());
cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
"xen/evtchn:prepare",
xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
sizeof(*evtchn_to_irq), GFP_KERNEL); sizeof(*evtchn_to_irq), GFP_KERNEL);
BUG_ON(!evtchn_to_irq); BUG_ON(!evtchn_to_irq);
......
...@@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(evtchn_port_t port) ...@@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(evtchn_port_t port)
return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
} }
/* /*
* Clear MASKED, spinning if BUSY is set. * Clear MASKED if not PENDING, spinning if BUSY is set.
* Return true if mask was cleared.
*/ */
static void clear_masked(volatile event_word_t *word) static bool clear_masked_cond(volatile event_word_t *word)
{ {
event_word_t new, old, w; event_word_t new, old, w;
w = *word; w = *word;
do { do {
if (w & (1 << EVTCHN_FIFO_PENDING))
return false;
old = w & ~(1 << EVTCHN_FIFO_BUSY); old = w & ~(1 << EVTCHN_FIFO_BUSY);
new = old & ~(1 << EVTCHN_FIFO_MASKED); new = old & ~(1 << EVTCHN_FIFO_MASKED);
w = sync_cmpxchg(word, old, new); w = sync_cmpxchg(word, old, new);
} while (w != old); } while (w != old);
return true;
} }
static void evtchn_fifo_unmask(evtchn_port_t port) static void evtchn_fifo_unmask(evtchn_port_t port)
...@@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(evtchn_port_t port) ...@@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(evtchn_port_t port)
BUG_ON(!irqs_disabled()); BUG_ON(!irqs_disabled());
clear_masked(word); if (!clear_masked_cond(word)) {
if (evtchn_fifo_is_pending(port)) {
struct evtchn_unmask unmask = { .port = port }; struct evtchn_unmask unmask = { .port = port };
(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
} }
...@@ -270,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word) ...@@ -270,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word)
return w & EVTCHN_FIFO_LINK_MASK; return w & EVTCHN_FIFO_LINK_MASK;
} }
static void handle_irq_for_port(evtchn_port_t port) static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
{
int irq;
irq = get_evtchn_to_irq(port);
if (irq != -1)
generic_handle_irq(irq);
}
static void consume_one_event(unsigned cpu,
struct evtchn_fifo_control_block *control_block, struct evtchn_fifo_control_block *control_block,
unsigned priority, unsigned long *ready, unsigned priority, unsigned long *ready)
bool drop)
{ {
struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
uint32_t head; uint32_t head;
...@@ -315,16 +310,17 @@ static void consume_one_event(unsigned cpu, ...@@ -315,16 +310,17 @@ static void consume_one_event(unsigned cpu,
clear_bit(priority, ready); clear_bit(priority, ready);
if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
if (unlikely(drop)) if (unlikely(!ctrl))
pr_warn("Dropping pending event for port %u\n", port); pr_warn("Dropping pending event for port %u\n", port);
else else
handle_irq_for_port(port); handle_irq_for_port(port, ctrl);
} }
q->head[priority] = head; q->head[priority] = head;
} }
static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) static void __evtchn_fifo_handle_events(unsigned cpu,
struct evtchn_loop_ctrl *ctrl)
{ {
struct evtchn_fifo_control_block *control_block; struct evtchn_fifo_control_block *control_block;
unsigned long ready; unsigned long ready;
...@@ -336,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) ...@@ -336,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
while (ready) { while (ready) {
q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
consume_one_event(cpu, control_block, q, &ready, drop); consume_one_event(cpu, ctrl, control_block, q, &ready);
ready |= xchg(&control_block->ready, 0); ready |= xchg(&control_block->ready, 0);
} }
} }
static void evtchn_fifo_handle_events(unsigned cpu) static void evtchn_fifo_handle_events(unsigned cpu,
struct evtchn_loop_ctrl *ctrl)
{ {
__evtchn_fifo_handle_events(cpu, false); __evtchn_fifo_handle_events(cpu, ctrl);
} }
static void evtchn_fifo_resume(void) static void evtchn_fifo_resume(void)
...@@ -380,21 +377,6 @@ static void evtchn_fifo_resume(void) ...@@ -380,21 +377,6 @@ static void evtchn_fifo_resume(void)
event_array_pages = 0; event_array_pages = 0;
} }
static const struct evtchn_ops evtchn_ops_fifo = {
.max_channels = evtchn_fifo_max_channels,
.nr_channels = evtchn_fifo_nr_channels,
.setup = evtchn_fifo_setup,
.bind_to_cpu = evtchn_fifo_bind_to_cpu,
.clear_pending = evtchn_fifo_clear_pending,
.set_pending = evtchn_fifo_set_pending,
.is_pending = evtchn_fifo_is_pending,
.test_and_set_mask = evtchn_fifo_test_and_set_mask,
.mask = evtchn_fifo_mask,
.unmask = evtchn_fifo_unmask,
.handle_events = evtchn_fifo_handle_events,
.resume = evtchn_fifo_resume,
};
static int evtchn_fifo_alloc_control_block(unsigned cpu) static int evtchn_fifo_alloc_control_block(unsigned cpu)
{ {
void *control_block = NULL; void *control_block = NULL;
...@@ -417,19 +399,36 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu) ...@@ -417,19 +399,36 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu)
return ret; return ret;
} }
static int xen_evtchn_cpu_prepare(unsigned int cpu) static int evtchn_fifo_percpu_init(unsigned int cpu)
{ {
if (!per_cpu(cpu_control_block, cpu)) if (!per_cpu(cpu_control_block, cpu))
return evtchn_fifo_alloc_control_block(cpu); return evtchn_fifo_alloc_control_block(cpu);
return 0; return 0;
} }
static int xen_evtchn_cpu_dead(unsigned int cpu) static int evtchn_fifo_percpu_deinit(unsigned int cpu)
{ {
__evtchn_fifo_handle_events(cpu, true); __evtchn_fifo_handle_events(cpu, NULL);
return 0; return 0;
} }
static const struct evtchn_ops evtchn_ops_fifo = {
.max_channels = evtchn_fifo_max_channels,
.nr_channels = evtchn_fifo_nr_channels,
.setup = evtchn_fifo_setup,
.bind_to_cpu = evtchn_fifo_bind_to_cpu,
.clear_pending = evtchn_fifo_clear_pending,
.set_pending = evtchn_fifo_set_pending,
.is_pending = evtchn_fifo_is_pending,
.test_and_set_mask = evtchn_fifo_test_and_set_mask,
.mask = evtchn_fifo_mask,
.unmask = evtchn_fifo_unmask,
.handle_events = evtchn_fifo_handle_events,
.resume = evtchn_fifo_resume,
.percpu_init = evtchn_fifo_percpu_init,
.percpu_deinit = evtchn_fifo_percpu_deinit,
};
int __init xen_evtchn_fifo_init(void) int __init xen_evtchn_fifo_init(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
...@@ -443,9 +442,5 @@ int __init xen_evtchn_fifo_init(void) ...@@ -443,9 +442,5 @@ int __init xen_evtchn_fifo_init(void)
evtchn_ops = &evtchn_ops_fifo; evtchn_ops = &evtchn_ops_fifo;
cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
"xen/evtchn:prepare",
xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
return ret; return ret;
} }
...@@ -30,11 +30,16 @@ enum xen_irq_type { ...@@ -30,11 +30,16 @@ enum xen_irq_type {
*/ */
struct irq_info { struct irq_info {
struct list_head list; struct list_head list;
int refcnt; struct list_head eoi_list;
short refcnt;
short spurious_cnt;
enum xen_irq_type type; /* type */ enum xen_irq_type type; /* type */
unsigned irq; unsigned irq;
evtchn_port_t evtchn; /* event channel */ evtchn_port_t evtchn; /* event channel */
unsigned short cpu; /* cpu bound */ unsigned short cpu; /* cpu bound */
unsigned short eoi_cpu; /* EOI must happen on this cpu */
unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
u64 eoi_time; /* Time in jiffies when to EOI. */
union { union {
unsigned short virq; unsigned short virq;
...@@ -53,6 +58,8 @@ struct irq_info { ...@@ -53,6 +58,8 @@ struct irq_info {
#define PIRQ_SHAREABLE (1 << 1) #define PIRQ_SHAREABLE (1 << 1)
#define PIRQ_MSI_GROUP (1 << 2) #define PIRQ_MSI_GROUP (1 << 2)
struct evtchn_loop_ctrl;
struct evtchn_ops { struct evtchn_ops {
unsigned (*max_channels)(void); unsigned (*max_channels)(void);
unsigned (*nr_channels)(void); unsigned (*nr_channels)(void);
...@@ -67,14 +74,18 @@ struct evtchn_ops { ...@@ -67,14 +74,18 @@ struct evtchn_ops {
void (*mask)(evtchn_port_t port); void (*mask)(evtchn_port_t port);
void (*unmask)(evtchn_port_t port); void (*unmask)(evtchn_port_t port);
void (*handle_events)(unsigned cpu); void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
void (*resume)(void); void (*resume)(void);
int (*percpu_init)(unsigned int cpu);
int (*percpu_deinit)(unsigned int cpu);
}; };
extern const struct evtchn_ops *evtchn_ops; extern const struct evtchn_ops *evtchn_ops;
extern int **evtchn_to_irq; extern int **evtchn_to_irq;
int get_evtchn_to_irq(evtchn_port_t evtchn); int get_evtchn_to_irq(evtchn_port_t evtchn);
void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
struct irq_info *info_for_irq(unsigned irq); struct irq_info *info_for_irq(unsigned irq);
unsigned cpu_from_irq(unsigned irq); unsigned cpu_from_irq(unsigned irq);
...@@ -132,9 +143,10 @@ static inline void unmask_evtchn(evtchn_port_t port) ...@@ -132,9 +143,10 @@ static inline void unmask_evtchn(evtchn_port_t port)
return evtchn_ops->unmask(port); return evtchn_ops->unmask(port);
} }
static inline void xen_evtchn_handle_events(unsigned cpu) static inline void xen_evtchn_handle_events(unsigned cpu,
struct evtchn_loop_ctrl *ctrl)
{ {
return evtchn_ops->handle_events(cpu); return evtchn_ops->handle_events(cpu, ctrl);
} }
static inline void xen_evtchn_resume(void) static inline void xen_evtchn_resume(void)
......
...@@ -167,7 +167,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) ...@@ -167,7 +167,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
"Interrupt for port %u, but apparently not enabled; per-user %p\n", "Interrupt for port %u, but apparently not enabled; per-user %p\n",
evtchn->port, u); evtchn->port, u);
disable_irq_nosync(irq);
evtchn->enabled = false; evtchn->enabled = false;
spin_lock(&u->ring_prod_lock); spin_lock(&u->ring_prod_lock);
...@@ -293,7 +292,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, ...@@ -293,7 +292,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
evtchn = find_evtchn(u, port); evtchn = find_evtchn(u, port);
if (evtchn && !evtchn->enabled) { if (evtchn && !evtchn->enabled) {
evtchn->enabled = true; evtchn->enabled = true;
enable_irq(irq_from_evtchn(port)); xen_irq_lateeoi(irq_from_evtchn(port), 0);
} }
} }
...@@ -393,7 +392,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port) ...@@ -393,7 +392,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
if (rc < 0) if (rc < 0)
goto err; goto err;
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
u->name, evtchn); u->name, evtchn);
if (rc < 0) if (rc < 0)
goto err; goto err;
......
...@@ -66,6 +66,7 @@ struct sock_mapping { ...@@ -66,6 +66,7 @@ struct sock_mapping {
atomic_t write; atomic_t write;
atomic_t io; atomic_t io;
atomic_t release; atomic_t release;
atomic_t eoi;
void (*saved_data_ready)(struct sock *sk); void (*saved_data_ready)(struct sock *sk);
struct pvcalls_ioworker ioworker; struct pvcalls_ioworker ioworker;
}; };
...@@ -87,7 +88,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev, ...@@ -87,7 +88,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev,
struct pvcalls_fedata *fedata, struct pvcalls_fedata *fedata,
struct sock_mapping *map); struct sock_mapping *map);
static void pvcalls_conn_back_read(void *opaque) static bool pvcalls_conn_back_read(void *opaque)
{ {
struct sock_mapping *map = (struct sock_mapping *)opaque; struct sock_mapping *map = (struct sock_mapping *)opaque;
struct msghdr msg; struct msghdr msg;
...@@ -107,17 +108,17 @@ static void pvcalls_conn_back_read(void *opaque) ...@@ -107,17 +108,17 @@ static void pvcalls_conn_back_read(void *opaque)
virt_mb(); virt_mb();
if (error) if (error)
return; return false;
size = pvcalls_queued(prod, cons, array_size); size = pvcalls_queued(prod, cons, array_size);
if (size >= array_size) if (size >= array_size)
return; return false;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) { if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
atomic_set(&map->read, 0); atomic_set(&map->read, 0);
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
flags); flags);
return; return true;
} }
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
wanted = array_size - size; wanted = array_size - size;
...@@ -141,7 +142,7 @@ static void pvcalls_conn_back_read(void *opaque) ...@@ -141,7 +142,7 @@ static void pvcalls_conn_back_read(void *opaque)
ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT); ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
WARN_ON(ret > wanted); WARN_ON(ret > wanted);
if (ret == -EAGAIN) /* shouldn't happen */ if (ret == -EAGAIN) /* shouldn't happen */
return; return true;
if (!ret) if (!ret)
ret = -ENOTCONN; ret = -ENOTCONN;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
...@@ -160,10 +161,10 @@ static void pvcalls_conn_back_read(void *opaque) ...@@ -160,10 +161,10 @@ static void pvcalls_conn_back_read(void *opaque)
virt_wmb(); virt_wmb();
notify_remote_via_irq(map->irq); notify_remote_via_irq(map->irq);
return; return true;
} }
static void pvcalls_conn_back_write(struct sock_mapping *map) static bool pvcalls_conn_back_write(struct sock_mapping *map)
{ {
struct pvcalls_data_intf *intf = map->ring; struct pvcalls_data_intf *intf = map->ring;
struct pvcalls_data *data = &map->data; struct pvcalls_data *data = &map->data;
...@@ -180,7 +181,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) ...@@ -180,7 +181,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
array_size = XEN_FLEX_RING_SIZE(map->ring_order); array_size = XEN_FLEX_RING_SIZE(map->ring_order);
size = pvcalls_queued(prod, cons, array_size); size = pvcalls_queued(prod, cons, array_size);
if (size == 0) if (size == 0)
return; return false;
memset(&msg, 0, sizeof(msg)); memset(&msg, 0, sizeof(msg));
msg.msg_flags |= MSG_DONTWAIT; msg.msg_flags |= MSG_DONTWAIT;
...@@ -198,12 +199,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) ...@@ -198,12 +199,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
atomic_set(&map->write, 0); atomic_set(&map->write, 0);
ret = inet_sendmsg(map->sock, &msg, size); ret = inet_sendmsg(map->sock, &msg, size);
if (ret == -EAGAIN || (ret >= 0 && ret < size)) { if (ret == -EAGAIN) {
atomic_inc(&map->write); atomic_inc(&map->write);
atomic_inc(&map->io); atomic_inc(&map->io);
return true;
} }
if (ret == -EAGAIN)
return;
/* write the data, then update the indexes */ /* write the data, then update the indexes */
virt_wmb(); virt_wmb();
...@@ -216,9 +216,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) ...@@ -216,9 +216,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
} }
/* update the indexes, then notify the other end */ /* update the indexes, then notify the other end */
virt_wmb(); virt_wmb();
if (prod != cons + ret) if (prod != cons + ret) {
atomic_inc(&map->write); atomic_inc(&map->write);
atomic_inc(&map->io);
}
notify_remote_via_irq(map->irq); notify_remote_via_irq(map->irq);
return true;
} }
static void pvcalls_back_ioworker(struct work_struct *work) static void pvcalls_back_ioworker(struct work_struct *work)
...@@ -227,6 +231,7 @@ static void pvcalls_back_ioworker(struct work_struct *work) ...@@ -227,6 +231,7 @@ static void pvcalls_back_ioworker(struct work_struct *work)
struct pvcalls_ioworker, register_work); struct pvcalls_ioworker, register_work);
struct sock_mapping *map = container_of(ioworker, struct sock_mapping, struct sock_mapping *map = container_of(ioworker, struct sock_mapping,
ioworker); ioworker);
unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
while (atomic_read(&map->io) > 0) { while (atomic_read(&map->io) > 0) {
if (atomic_read(&map->release) > 0) { if (atomic_read(&map->release) > 0) {
...@@ -234,10 +239,18 @@ static void pvcalls_back_ioworker(struct work_struct *work) ...@@ -234,10 +239,18 @@ static void pvcalls_back_ioworker(struct work_struct *work)
return; return;
} }
if (atomic_read(&map->read) > 0) if (atomic_read(&map->read) > 0 &&
pvcalls_conn_back_read(map); pvcalls_conn_back_read(map))
if (atomic_read(&map->write) > 0) eoi_flags = 0;
pvcalls_conn_back_write(map); if (atomic_read(&map->write) > 0 &&
pvcalls_conn_back_write(map))
eoi_flags = 0;
if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) {
atomic_set(&map->eoi, 0);
xen_irq_lateeoi(map->irq, eoi_flags);
eoi_flags = XEN_EOI_FLAG_SPURIOUS;
}
atomic_dec(&map->io); atomic_dec(&map->io);
} }
...@@ -334,12 +347,9 @@ static struct sock_mapping *pvcalls_new_active_socket( ...@@ -334,12 +347,9 @@ static struct sock_mapping *pvcalls_new_active_socket(
goto out; goto out;
map->bytes = page; map->bytes = page;
ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, ret = bind_interdomain_evtchn_to_irqhandler_lateeoi(
evtchn, fedata->dev->otherend_id, evtchn,
pvcalls_back_conn_event, pvcalls_back_conn_event, 0, "pvcalls-backend", map);
0,
"pvcalls-backend",
map);
if (ret < 0) if (ret < 0)
goto out; goto out;
map->irq = ret; map->irq = ret;
...@@ -873,15 +883,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id) ...@@ -873,15 +883,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
{ {
struct xenbus_device *dev = dev_id; struct xenbus_device *dev = dev_id;
struct pvcalls_fedata *fedata = NULL; struct pvcalls_fedata *fedata = NULL;
unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
if (dev == NULL) if (dev) {
return IRQ_HANDLED;
fedata = dev_get_drvdata(&dev->dev); fedata = dev_get_drvdata(&dev->dev);
if (fedata == NULL) if (fedata) {
return IRQ_HANDLED;
pvcalls_back_work(fedata); pvcalls_back_work(fedata);
eoi_flags = 0;
}
}
xen_irq_lateeoi(irq, eoi_flags);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -891,12 +904,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) ...@@ -891,12 +904,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
struct pvcalls_ioworker *iow; struct pvcalls_ioworker *iow;
if (map == NULL || map->sock == NULL || map->sock->sk == NULL || if (map == NULL || map->sock == NULL || map->sock->sk == NULL ||
map->sock->sk->sk_user_data != map) map->sock->sk->sk_user_data != map) {
xen_irq_lateeoi(irq, 0);
return IRQ_HANDLED; return IRQ_HANDLED;
}
iow = &map->ioworker; iow = &map->ioworker;
atomic_inc(&map->write); atomic_inc(&map->write);
atomic_inc(&map->eoi);
atomic_inc(&map->io); atomic_inc(&map->io);
queue_work(iow->wq, &iow->register_work); queue_work(iow->wq, &iow->register_work);
...@@ -932,7 +948,7 @@ static int backend_connect(struct xenbus_device *dev) ...@@ -932,7 +948,7 @@ static int backend_connect(struct xenbus_device *dev)
goto error; goto error;
} }
err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn); err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn);
if (err < 0) if (err < 0)
goto error; goto error;
fedata->irq = err; fedata->irq = err;
......
...@@ -734,10 +734,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, ...@@ -734,10 +734,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
wmb(); wmb();
notify_remote_via_irq(pdev->evtchn_irq); notify_remote_via_irq(pdev->evtchn_irq);
/* Enable IRQ to signal "request done". */
xen_pcibk_lateeoi(pdev, 0);
ret = wait_event_timeout(xen_pcibk_aer_wait_queue, ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
!(test_bit(_XEN_PCIB_active, (unsigned long *) !(test_bit(_XEN_PCIB_active, (unsigned long *)
&sh_info->flags)), 300*HZ); &sh_info->flags)), 300*HZ);
/* Enable IRQ for pcifront request if not already active. */
if (!test_bit(_PDEVF_op_active, &pdev->flags))
xen_pcibk_lateeoi(pdev, 0);
if (!ret) { if (!ret) {
if (test_bit(_XEN_PCIB_active, if (test_bit(_XEN_PCIB_active,
(unsigned long *)&sh_info->flags)) { (unsigned long *)&sh_info->flags)) {
...@@ -751,12 +758,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, ...@@ -751,12 +758,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
} }
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
if (test_bit(_XEN_PCIF_active,
(unsigned long *)&sh_info->flags)) {
dev_dbg(&psdev->dev->dev, "schedule pci_conf service\n");
xen_pcibk_test_and_schedule_op(psdev->pdev);
}
res = (pci_ers_result_t)aer_op->err; res = (pci_ers_result_t)aer_op->err;
return res; return res;
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <xen/events.h>
#include <xen/interface/io/pciif.h> #include <xen/interface/io/pciif.h>
#define DRV_NAME "xen-pciback" #define DRV_NAME "xen-pciback"
...@@ -27,6 +28,8 @@ struct pci_dev_entry { ...@@ -27,6 +28,8 @@ struct pci_dev_entry {
#define PDEVF_op_active (1<<(_PDEVF_op_active)) #define PDEVF_op_active (1<<(_PDEVF_op_active))
#define _PCIB_op_pending (1) #define _PCIB_op_pending (1)
#define PCIB_op_pending (1<<(_PCIB_op_pending)) #define PCIB_op_pending (1<<(_PCIB_op_pending))
#define _EOI_pending (2)
#define EOI_pending (1<<(_EOI_pending))
struct xen_pcibk_device { struct xen_pcibk_device {
void *pci_dev_data; void *pci_dev_data;
...@@ -183,10 +186,15 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) ...@@ -183,10 +186,15 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data); void xen_pcibk_do_op(struct work_struct *data);
static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
unsigned int eoi_flag)
{
if (test_and_clear_bit(_EOI_pending, &pdev->flags))
xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag);
}
int xen_pcibk_xenbus_register(void); int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void); void xen_pcibk_xenbus_unregister(void);
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
#endif #endif
/* Handles shared IRQs that can to device domain and control domain. */ /* Handles shared IRQs that can to device domain and control domain. */
......
...@@ -276,26 +276,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, ...@@ -276,26 +276,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
return 0; return 0;
} }
#endif #endif
static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev)
{
return test_bit(_XEN_PCIF_active,
(unsigned long *)&pdev->sh_info->flags) &&
!test_and_set_bit(_PDEVF_op_active, &pdev->flags);
}
/* /*
* Now the same evtchn is used for both pcifront conf_read_write request * Now the same evtchn is used for both pcifront conf_read_write request
* as well as pcie aer front end ack. We use a new work_queue to schedule * as well as pcie aer front end ack. We use a new work_queue to schedule
* xen_pcibk conf_read_write service for avoiding confict with aer_core * xen_pcibk conf_read_write service for avoiding confict with aer_core
* do_recovery job which also use the system default work_queue * do_recovery job which also use the system default work_queue
*/ */
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
{ {
bool eoi = true;
/* Check that frontend is requesting an operation and that we are not /* Check that frontend is requesting an operation and that we are not
* already processing a request */ * already processing a request */
if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) if (xen_pcibk_test_op_pending(pdev)) {
&& !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
schedule_work(&pdev->op_work); schedule_work(&pdev->op_work);
eoi = false;
} }
/*_XEN_PCIB_active should have been cleared by pcifront. And also make /*_XEN_PCIB_active should have been cleared by pcifront. And also make
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& test_bit(_PCIB_op_pending, &pdev->flags)) { && test_bit(_PCIB_op_pending, &pdev->flags)) {
wake_up(&xen_pcibk_aer_wait_queue); wake_up(&xen_pcibk_aer_wait_queue);
eoi = false;
} }
/* EOI if there was nothing to do. */
if (eoi)
xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS);
} }
/* Performing the configuration space reads/writes must not be done in atomic /* Performing the configuration space reads/writes must not be done in atomic
...@@ -303,10 +318,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) ...@@ -303,10 +318,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
* use of semaphores). This function is intended to be called from a work * use of semaphores). This function is intended to be called from a work
* queue in process context taking a struct xen_pcibk_device as a parameter */ * queue in process context taking a struct xen_pcibk_device as a parameter */
void xen_pcibk_do_op(struct work_struct *data) static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev)
{ {
struct xen_pcibk_device *pdev =
container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev; struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->op; struct xen_pci_op *op = &pdev->op;
...@@ -379,16 +392,31 @@ void xen_pcibk_do_op(struct work_struct *data) ...@@ -379,16 +392,31 @@ void xen_pcibk_do_op(struct work_struct *data)
smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags); clear_bit(_PDEVF_op_active, &pdev->flags);
smp_mb__after_atomic(); /* /before/ final check for work */ smp_mb__after_atomic(); /* /before/ final check for work */
}
/* Check to see if the driver domain tried to start another request in void xen_pcibk_do_op(struct work_struct *data)
* between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. {
*/ struct xen_pcibk_device *pdev =
xen_pcibk_test_and_schedule_op(pdev); container_of(data, struct xen_pcibk_device, op_work);
do {
xen_pcibk_do_one_op(pdev);
} while (xen_pcibk_test_op_pending(pdev));
xen_pcibk_lateeoi(pdev, 0);
} }
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
{ {
struct xen_pcibk_device *pdev = dev_id; struct xen_pcibk_device *pdev = dev_id;
bool eoi;
/* IRQs might come in before pdev->evtchn_irq is written. */
if (unlikely(pdev->evtchn_irq != irq))
pdev->evtchn_irq = irq;
eoi = test_and_set_bit(_EOI_pending, &pdev->flags);
WARN(eoi, "IRQ while EOI pending\n");
xen_pcibk_test_and_schedule_op(pdev); xen_pcibk_test_and_schedule_op(pdev);
......
...@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, ...@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
pdev->sh_info = vaddr; pdev->sh_info = vaddr;
err = bind_interdomain_evtchn_to_irqhandler( err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev); 0, DRV_NAME, pdev);
if (err < 0) { if (err < 0) {
......
...@@ -91,7 +91,6 @@ struct vscsibk_info { ...@@ -91,7 +91,6 @@ struct vscsibk_info {
unsigned int irq; unsigned int irq;
struct vscsiif_back_ring ring; struct vscsiif_back_ring ring;
int ring_error;
spinlock_t ring_lock; spinlock_t ring_lock;
atomic_t nr_unreplied_reqs; atomic_t nr_unreplied_reqs;
...@@ -722,7 +721,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info, ...@@ -722,7 +721,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info,
return pending_req; return pending_req;
} }
static int scsiback_do_cmd_fn(struct vscsibk_info *info) static int scsiback_do_cmd_fn(struct vscsibk_info *info,
unsigned int *eoi_flags)
{ {
struct vscsiif_back_ring *ring = &info->ring; struct vscsiif_back_ring *ring = &info->ring;
struct vscsiif_request ring_req; struct vscsiif_request ring_req;
...@@ -739,11 +739,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) ...@@ -739,11 +739,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
rc = ring->rsp_prod_pvt; rc = ring->rsp_prod_pvt;
pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n", pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
info->domid, rp, rc, rp - rc); info->domid, rp, rc, rp - rc);
info->ring_error = 1; return -EINVAL;
return 0;
} }
while ((rc != rp)) { while ((rc != rp)) {
*eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
break; break;
...@@ -802,13 +803,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) ...@@ -802,13 +803,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
static irqreturn_t scsiback_irq_fn(int irq, void *dev_id) static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
{ {
struct vscsibk_info *info = dev_id; struct vscsibk_info *info = dev_id;
int rc;
unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
if (info->ring_error) while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0)
return IRQ_HANDLED;
while (scsiback_do_cmd_fn(info))
cond_resched(); cond_resched();
/* In case of a ring error we keep the event channel masked. */
if (!rc)
xen_irq_lateeoi(irq, eoi_flags);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -829,7 +833,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref, ...@@ -829,7 +833,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
sring = (struct vscsiif_sring *)area; sring = (struct vscsiif_sring *)area;
BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
err = bind_interdomain_evtchn_to_irq(info->domid, evtchn); err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn);
if (err < 0) if (err < 0)
goto unmap_page; goto unmap_page;
...@@ -1253,7 +1257,6 @@ static int scsiback_probe(struct xenbus_device *dev, ...@@ -1253,7 +1257,6 @@ static int scsiback_probe(struct xenbus_device *dev,
info->domid = dev->otherend_id; info->domid = dev->otherend_id;
spin_lock_init(&info->ring_lock); spin_lock_init(&info->ring_lock);
info->ring_error = 0;
atomic_set(&info->nr_unreplied_reqs, 0); atomic_set(&info->nr_unreplied_reqs, 0);
init_waitqueue_head(&info->waiting_to_free); init_waitqueue_head(&info->waiting_to_free);
info->dev = dev; info->dev = dev;
......
...@@ -15,10 +15,15 @@ ...@@ -15,10 +15,15 @@
unsigned xen_evtchn_nr_channels(void); unsigned xen_evtchn_nr_channels(void);
int bind_evtchn_to_irq(evtchn_port_t evtchn); int bind_evtchn_to_irq(evtchn_port_t evtchn);
int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
irq_handler_t handler, irq_handler_t handler,
unsigned long irqflags, const char *devname, unsigned long irqflags, const char *devname,
void *dev_id); void *dev_id);
int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
irq_handler_t handler,
unsigned long irqflags, const char *devname,
void *dev_id);
int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu);
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler, irq_handler_t handler,
...@@ -32,12 +37,20 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, ...@@ -32,12 +37,20 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
void *dev_id); void *dev_id);
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
evtchn_port_t remote_port); evtchn_port_t remote_port);
int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
evtchn_port_t remote_port);
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
evtchn_port_t remote_port, evtchn_port_t remote_port,
irq_handler_t handler, irq_handler_t handler,
unsigned long irqflags, unsigned long irqflags,
const char *devname, const char *devname,
void *dev_id); void *dev_id);
int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
evtchn_port_t remote_port,
irq_handler_t handler,
unsigned long irqflags,
const char *devname,
void *dev_id);
/* /*
* Common unbind function for all event sources. Takes IRQ to unbind from. * Common unbind function for all event sources. Takes IRQ to unbind from.
...@@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, ...@@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
*/ */
void unbind_from_irqhandler(unsigned int irq, void *dev_id); void unbind_from_irqhandler(unsigned int irq, void *dev_id);
/*
* Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi
* functions above.
*/
void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags);
/* Signal an event was spurious, i.e. there was no action resulting from it. */
#define XEN_EOI_FLAG_SPURIOUS 0x00000001
#define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX #define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX
#define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT #define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT
#define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN #define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment