Commit b96ea616 authored by Michael Ellerman's avatar Michael Ellerman

Merge VAS page fault handling into next

As described by Haren:

On Power9, Virtual Accelerator Switchboard (VAS) allows user space or
kernel to communicate with Nest Accelerator (NX) directly using
COPY/PASTE instructions. NX provides various functionalities such as
compression, encryption and etc. But only compression (842 and GZIP
formats) is supported in Linux kernel on power9.

842 compression driver (drivers/crypto/nx/nx-842-powernv.c) is already
included in Linux. Only GZIP support will be available from user
space.

Applications can issue GZIP compression / decompression requests to NX
with COPY/PASTE instructions. When NX is processing these requests,
can hit fault on the request buffer (not in memory). It issues an
interrupt and pastes fault CRB in fault FIFO. Expects kernel to handle
this fault and return credits for both send and fault windows after
processing.

This patch series adds IRQ and fault window setup, and NX fault
handling:
  - Alloc IRQ and trigger port address, and configure IRQ per VAS
    instance.
  - Set port# for each window to generate an interrupt when noticed
    fault.
  - Set fault window and FIFO on which NX paste fault CRB.
  - Setup IRQ thread fault handler per VAS instance.
  - When receiving an interrupt, Read CRBs from fault FIFO and update
    coprocessor_status_block (CSB) in the corresponding CRB with
    translation failure (CSB_CC_TRANSLATION). After issuing NX
    requests, process polls on CSB address. When it sees translation
    error, can touch the request buffer to bring the page in to memory
    and reissue NX request.
  - If copy_to_user fails on user space CSB address, OS sends SEGV
    signal.
parents ae83d0b4 c420644c
...@@ -116,6 +116,9 @@ typedef struct { ...@@ -116,6 +116,9 @@ typedef struct {
/* Number of users of the external (Nest) MMU */ /* Number of users of the external (Nest) MMU */
atomic_t copros; atomic_t copros;
/* Number of user space windows opened in process mm_context */
atomic_t vas_windows;
struct hash_mm_context *hash_context; struct hash_mm_context *hash_context;
unsigned long vdso_base; unsigned long vdso_base;
......
...@@ -108,6 +108,17 @@ struct data_descriptor_entry { ...@@ -108,6 +108,17 @@ struct data_descriptor_entry {
__be64 address; __be64 address;
} __packed __aligned(DDE_ALIGN); } __packed __aligned(DDE_ALIGN);
/* 4.3.2 NX-stamped Fault CRB */
#define NX_STAMP_ALIGN (0x10)
struct nx_fault_stamp {
__be64 fault_storage_addr;
__be16 reserved;
__u8 flags;
__u8 fault_status;
__be32 pswid;
} __packed __aligned(NX_STAMP_ALIGN);
/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */ /* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
...@@ -135,10 +146,15 @@ struct coprocessor_request_block { ...@@ -135,10 +146,15 @@ struct coprocessor_request_block {
struct coprocessor_completion_block ccb; struct coprocessor_completion_block ccb;
u8 reserved[48]; union {
struct nx_fault_stamp nx;
u8 reserved[16];
} stamp;
u8 reserved[32];
struct coprocessor_status_block csb; struct coprocessor_status_block csb;
} __packed __aligned(CRB_ALIGN); } __packed;
/* RFC02167 Initiate Coprocessor Instructions document /* RFC02167 Initiate Coprocessor Instructions document
......
...@@ -185,11 +185,41 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) ...@@ -185,11 +185,41 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
dec_mm_active_cpus(mm); dec_mm_active_cpus(mm);
} }
} }
/*
* vas_windows counter shows number of open windows in the mm
* context. During context switch, use this counter to clear the
* foreign real address mapping (CP_ABORT) for the thread / process
* that intend to use COPY/PASTE. When a process closes all windows,
* disable CP_ABORT which is expensive to run.
*
* For user context, register a copro so that TLBIs are seen by the
* nest MMU. mm_context_add/remove_vas_window() are used only for user
* space windows.
*/
static inline void mm_context_add_vas_window(struct mm_struct *mm)
{
atomic_inc(&mm->context.vas_windows);
mm_context_add_copro(mm);
}
static inline void mm_context_remove_vas_window(struct mm_struct *mm)
{
int v;
mm_context_remove_copro(mm);
v = atomic_dec_if_positive(&mm->context.vas_windows);
/* Detect imbalance between add and remove */
WARN_ON(v < 0);
}
#else #else
static inline void inc_mm_active_cpus(struct mm_struct *mm) { } static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
static inline void dec_mm_active_cpus(struct mm_struct *mm) { } static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
static inline void mm_context_add_copro(struct mm_struct *mm) { } static inline void mm_context_add_copro(struct mm_struct *mm) { }
static inline void mm_context_remove_copro(struct mm_struct *mm) { } static inline void mm_context_remove_copro(struct mm_struct *mm) { }
static inline void mm_context_add_vas_windows(struct mm_struct *mm) { }
static inline void mm_context_remove_vas_windows(struct mm_struct *mm) { }
#endif #endif
......
...@@ -272,7 +272,6 @@ struct thread_struct { ...@@ -272,7 +272,6 @@ struct thread_struct {
unsigned mmcr0; unsigned mmcr0;
unsigned used_ebb; unsigned used_ebb;
unsigned int used_vas;
#endif #endif
}; };
......
...@@ -102,8 +102,6 @@ static inline void clear_task_ebb(struct task_struct *t) ...@@ -102,8 +102,6 @@ static inline void clear_task_ebb(struct task_struct *t)
#endif #endif
} }
extern int set_thread_uses_vas(void);
extern int set_thread_tidr(struct task_struct *t); extern int set_thread_tidr(struct task_struct *t);
#endif /* _ASM_POWERPC_SWITCH_TO_H */ #endif /* _ASM_POWERPC_SWITCH_TO_H */
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#ifndef _ASM_POWERPC_XIVE_H #ifndef _ASM_POWERPC_XIVE_H
#define _ASM_POWERPC_XIVE_H #define _ASM_POWERPC_XIVE_H
#include <asm/opal-api.h>
#define XIVE_INVALID_VP 0xffffffff #define XIVE_INVALID_VP 0xffffffff
#ifdef CONFIG_PPC_XIVE #ifdef CONFIG_PPC_XIVE
...@@ -108,7 +110,6 @@ void xive_native_free_vp_block(u32 vp_base); ...@@ -108,7 +110,6 @@ void xive_native_free_vp_block(u32 vp_base);
int xive_native_populate_irq_data(u32 hw_irq, int xive_native_populate_irq_data(u32 hw_irq,
struct xive_irq_data *data); struct xive_irq_data *data);
void xive_cleanup_irq_data(struct xive_irq_data *xd); void xive_cleanup_irq_data(struct xive_irq_data *xd);
u32 xive_native_alloc_irq(void);
void xive_native_free_irq(u32 irq); void xive_native_free_irq(u32 irq);
int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq); int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
...@@ -137,6 +138,12 @@ int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, ...@@ -137,6 +138,12 @@ int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
u32 qindex); u32 qindex);
int xive_native_get_vp_state(u32 vp_id, u64 *out_state); int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
bool xive_native_has_queue_state_support(void); bool xive_native_has_queue_state_support(void);
extern u32 xive_native_alloc_irq_on_chip(u32 chip_id);
static inline u32 xive_native_alloc_irq(void)
{
return xive_native_alloc_irq_on_chip(OPAL_XIVE_ANY_CHIP);
}
#else #else
......
...@@ -1228,7 +1228,8 @@ struct task_struct *__switch_to(struct task_struct *prev, ...@@ -1228,7 +1228,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
* mappings, we must issue a cp_abort to clear any state and * mappings, we must issue a cp_abort to clear any state and
* prevent snooping, corruption or a covert channel. * prevent snooping, corruption or a covert channel.
*/ */
if (current->thread.used_vas) if (current->mm &&
atomic_read(&current->mm->context.vas_windows))
asm volatile(PPC_CP_ABORT); asm volatile(PPC_CP_ABORT);
} }
#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_BOOK3S_64 */
...@@ -1467,27 +1468,6 @@ void arch_setup_new_exec(void) ...@@ -1467,27 +1468,6 @@ void arch_setup_new_exec(void)
} }
#endif #endif
int set_thread_uses_vas(void)
{
#ifdef CONFIG_PPC_BOOK3S_64
if (!cpu_has_feature(CPU_FTR_ARCH_300))
return -EINVAL;
current->thread.used_vas = 1;
/*
* Even a process that has no foreign real address mapping can use
* an unpaired COPY instruction (to no real effect). Issue CP_ABORT
* to clear any pending COPY and prevent a covert channel.
*
* __switch_to() will issue CP_ABORT on future context switches.
*/
asm volatile(PPC_CP_ABORT);
#endif /* CONFIG_PPC_BOOK3S_64 */
return 0;
}
#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC64
/** /**
* Assign a TIDR (thread ID) for task @t and set it in the thread * Assign a TIDR (thread ID) for task @t and set it in the thread
......
...@@ -17,7 +17,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o ...@@ -17,7 +17,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o
obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_OCXL_BASE) += ocxl.o
obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
...@@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private) ...@@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private)
seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
window->tx_win ? "Send" : "Receive"); window->tx_win ? "Send" : "Receive");
seq_printf(s, "Pid : %d\n", window->pid); seq_printf(s, "Pid : %d\n", vas_window_pid(window));
unlock: unlock:
mutex_unlock(&vas_mutex); mutex_unlock(&vas_mutex);
......
This diff is collapsed.
This diff is collapsed.
...@@ -14,7 +14,10 @@ ...@@ -14,7 +14,10 @@
#include <linux/of_platform.h> #include <linux/of_platform.h>
#include <linux/of_address.h> #include <linux/of_address.h>
#include <linux/of.h> #include <linux/of.h>
#include <linux/irqdomain.h>
#include <linux/interrupt.h>
#include <asm/prom.h> #include <asm/prom.h>
#include <asm/xive.h>
#include "vas.h" #include "vas.h"
...@@ -23,12 +26,37 @@ static LIST_HEAD(vas_instances); ...@@ -23,12 +26,37 @@ static LIST_HEAD(vas_instances);
static DEFINE_PER_CPU(int, cpu_vas_id); static DEFINE_PER_CPU(int, cpu_vas_id);
static int vas_irq_fault_window_setup(struct vas_instance *vinst)
{
char devname[64];
int rc = 0;
snprintf(devname, sizeof(devname), "vas-%d", vinst->vas_id);
rc = request_threaded_irq(vinst->virq, vas_fault_handler,
vas_fault_thread_fn, 0, devname, vinst);
if (rc) {
pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n",
vinst->vas_id, vinst->virq, rc);
goto out;
}
rc = vas_setup_fault_window(vinst);
if (rc)
free_irq(vinst->virq, vinst);
out:
return rc;
}
static int init_vas_instance(struct platform_device *pdev) static int init_vas_instance(struct platform_device *pdev)
{ {
int rc, cpu, vasid;
struct resource *res;
struct vas_instance *vinst;
struct device_node *dn = pdev->dev.of_node; struct device_node *dn = pdev->dev.of_node;
struct vas_instance *vinst;
struct xive_irq_data *xd;
uint32_t chipid, hwirq;
struct resource *res;
int rc, cpu, vasid;
rc = of_property_read_u32(dn, "ibm,vas-id", &vasid); rc = of_property_read_u32(dn, "ibm,vas-id", &vasid);
if (rc) { if (rc) {
...@@ -36,6 +64,12 @@ static int init_vas_instance(struct platform_device *pdev) ...@@ -36,6 +64,12 @@ static int init_vas_instance(struct platform_device *pdev)
return -ENODEV; return -ENODEV;
} }
rc = of_property_read_u32(dn, "ibm,chip-id", &chipid);
if (rc) {
pr_err("No ibm,chip-id property for %s?\n", pdev->name);
return -ENODEV;
}
if (pdev->num_resources != 4) { if (pdev->num_resources != 4) {
pr_err("Unexpected DT configuration for [%s, %d]\n", pr_err("Unexpected DT configuration for [%s, %d]\n",
pdev->name, vasid); pdev->name, vasid);
...@@ -69,9 +103,32 @@ static int init_vas_instance(struct platform_device *pdev) ...@@ -69,9 +103,32 @@ static int init_vas_instance(struct platform_device *pdev)
vinst->paste_win_id_shift = 63 - res->end; vinst->paste_win_id_shift = 63 - res->end;
pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, " hwirq = xive_native_alloc_irq_on_chip(chipid);
"paste_win_id_shift 0x%llx\n", pdev->name, vasid, if (!hwirq) {
vinst->paste_base_addr, vinst->paste_win_id_shift); pr_err("Inst%d: Unable to allocate global irq for chip %d\n",
vinst->vas_id, chipid);
return -ENOENT;
}
vinst->virq = irq_create_mapping(NULL, hwirq);
if (!vinst->virq) {
pr_err("Inst%d: Unable to map global irq %d\n",
vinst->vas_id, hwirq);
return -EINVAL;
}
xd = irq_get_handler_data(vinst->virq);
if (!xd) {
pr_err("Inst%d: Invalid virq %d\n",
vinst->vas_id, vinst->virq);
return -EINVAL;
}
vinst->irq_port = xd->trig_page;
pr_devel("Initialized instance [%s, %d] paste_base 0x%llx paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n",
pdev->name, vasid, vinst->paste_base_addr,
vinst->paste_win_id_shift, vinst->virq,
vinst->irq_port);
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn)) if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn))
...@@ -82,6 +139,22 @@ static int init_vas_instance(struct platform_device *pdev) ...@@ -82,6 +139,22 @@ static int init_vas_instance(struct platform_device *pdev)
list_add(&vinst->node, &vas_instances); list_add(&vinst->node, &vas_instances);
mutex_unlock(&vas_mutex); mutex_unlock(&vas_mutex);
spin_lock_init(&vinst->fault_lock);
/*
* IRQ and fault handling setup is needed only for user space
* send windows.
*/
if (vinst->virq) {
rc = vas_irq_fault_window_setup(vinst);
/*
* Fault window is used only for user space send windows.
* So if vinst->virq is NULL, tx_win_open returns -ENODEV
* for user space.
*/
if (rc)
vinst->virq = 0;
}
vas_instance_init_dbgdir(vinst); vas_instance_init_dbgdir(vinst);
dev_set_drvdata(&pdev->dev, vinst); dev_set_drvdata(&pdev->dev, vinst);
......
...@@ -101,11 +101,9 @@ ...@@ -101,11 +101,9 @@
/* /*
* Initial per-process credits. * Initial per-process credits.
* Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED) * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED)
* Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED)
* *
* TODO: Needs tuning for per-process credits * TODO: Needs tuning for per-process credits
*/ */
#define VAS_RX_WCREDS_MAX ((64 << 10) - 1)
#define VAS_TX_WCREDS_MAX ((4 << 10) - 1) #define VAS_TX_WCREDS_MAX ((4 << 10) - 1)
#define VAS_WCREDS_DEFAULT (1 << 10) #define VAS_WCREDS_DEFAULT (1 << 10)
...@@ -295,6 +293,22 @@ enum vas_notify_after_count { ...@@ -295,6 +293,22 @@ enum vas_notify_after_count {
VAS_NOTIFY_AFTER_2 VAS_NOTIFY_AFTER_2
}; };
/*
* NX can generate an interrupt for multiple faults and expects kernel
* to process all of them. So read all valid CRB entries until find the
* invalid one. So use pswid which is pasted by NX and ccw[0] (reserved
* bit in BE) to check valid CRB. CCW[0] will not be touched by user
* space. Application gets CRB formt error if it updates this bit.
*
* Invalidate FIFO during allocation and process all entries from last
* successful read until finds invalid pswid and ccw[0] values.
* After reading each CRB entry from fault FIFO, the kernel invalidate
* it by updating pswid with FIFO_INVALID_ENTRY and CCW[0] with
* CCW0_INVALID.
*/
#define FIFO_INVALID_ENTRY 0xffffffff
#define CCW0_INVALID 1
/* /*
* One per instance of VAS. Each instance will have a separate set of * One per instance of VAS. Each instance will have a separate set of
* receive windows, one per coprocessor type. * receive windows, one per coprocessor type.
...@@ -313,6 +327,15 @@ struct vas_instance { ...@@ -313,6 +327,15 @@ struct vas_instance {
u64 paste_base_addr; u64 paste_base_addr;
u64 paste_win_id_shift; u64 paste_win_id_shift;
u64 irq_port;
int virq;
int fault_crbs;
int fault_fifo_size;
int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */
spinlock_t fault_lock; /* Protects fifo_in_progress update */
void *fault_fifo;
struct vas_window *fault_win; /* Fault window */
struct mutex mutex; struct mutex mutex;
struct vas_window *rxwin[VAS_COP_TYPE_MAX]; struct vas_window *rxwin[VAS_COP_TYPE_MAX];
struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
...@@ -333,7 +356,9 @@ struct vas_window { ...@@ -333,7 +356,9 @@ struct vas_window {
bool user_win; /* True if user space window */ bool user_win; /* True if user space window */
void *hvwc_map; /* HV window context */ void *hvwc_map; /* HV window context */
void *uwc_map; /* OS/User window context */ void *uwc_map; /* OS/User window context */
pid_t pid; /* Linux process id of owner */ struct pid *pid; /* Linux process id of owner */
struct pid *tgid; /* Thread group ID of owner */
struct mm_struct *mm; /* Linux process mm_struct */
int wcreds_max; /* Window credits */ int wcreds_max; /* Window credits */
char *dbgname; char *dbgname;
...@@ -406,6 +431,17 @@ extern void vas_init_dbgdir(void); ...@@ -406,6 +431,17 @@ extern void vas_init_dbgdir(void);
extern void vas_instance_init_dbgdir(struct vas_instance *vinst); extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
extern void vas_window_init_dbgdir(struct vas_window *win); extern void vas_window_init_dbgdir(struct vas_window *win);
extern void vas_window_free_dbgdir(struct vas_window *win); extern void vas_window_free_dbgdir(struct vas_window *win);
extern int vas_setup_fault_window(struct vas_instance *vinst);
extern irqreturn_t vas_fault_thread_fn(int irq, void *data);
extern irqreturn_t vas_fault_handler(int irq, void *dev_id);
extern void vas_return_credit(struct vas_window *window, bool tx);
extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
uint32_t pswid);
static inline int vas_window_pid(struct vas_window *window)
{
return pid_vnr(window->pid);
}
static inline void vas_log_write(struct vas_window *win, char *name, static inline void vas_log_write(struct vas_window *win, char *name,
void *regptr, u64 val) void *regptr, u64 val)
...@@ -444,6 +480,21 @@ static inline u64 read_hvwc_reg(struct vas_window *win, ...@@ -444,6 +480,21 @@ static inline u64 read_hvwc_reg(struct vas_window *win,
return in_be64(win->hvwc_map+reg); return in_be64(win->hvwc_map+reg);
} }
/*
* Encode/decode the Partition Send Window ID (PSWID) for a window in
* a way that we can uniquely identify any window in the system. i.e.
* we should be able to locate the 'struct vas_window' given the PSWID.
*
* Bits Usage
* 0:7 VAS id (8 bits)
* 8:15 Unused, 0 (3 bits)
* 16:31 Window id (16 bits)
*/
static inline u32 encode_pswid(int vasid, int winid)
{
return ((u32)winid | (vasid << (31 - 7)));
}
static inline void decode_pswid(u32 pswid, int *vasid, int *winid) static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
{ {
if (vasid) if (vasid)
......
...@@ -280,12 +280,12 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) ...@@ -280,12 +280,12 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
u32 xive_native_alloc_irq(void) u32 xive_native_alloc_irq_on_chip(u32 chip_id)
{ {
s64 rc; s64 rc;
for (;;) { for (;;) {
rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP); rc = opal_xive_allocate_irq(chip_id);
if (rc != OPAL_BUSY) if (rc != OPAL_BUSY)
break; break;
msleep(OPAL_BUSY_DELAY_MS); msleep(OPAL_BUSY_DELAY_MS);
...@@ -294,7 +294,7 @@ u32 xive_native_alloc_irq(void) ...@@ -294,7 +294,7 @@ u32 xive_native_alloc_irq(void)
return 0; return 0;
return rc; return rc;
} }
EXPORT_SYMBOL_GPL(xive_native_alloc_irq); EXPORT_SYMBOL_GPL(xive_native_alloc_irq_on_chip);
void xive_native_free_irq(u32 irq) void xive_native_free_irq(u32 irq)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment