Commit dcf3c935 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml

Pull UML updates from Richard Weinberger:

 - Support for optimized routines based on the host CPU

 - Support for PCI via virtio

 - Various fixes

* tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
  um: remove unneeded semicolon in um_arch.c
  um: Remove the repeated declaration
  um: fix error return code in winch_tramp()
  um: fix error return code in slip_open()
  um: Fix stack pointer alignment
  um: implement flush_cache_vmap/flush_cache_vunmap
  um: add a UML specific futex implementation
  um: enable the use of optimized xor routines in UML
  um: Add support for host CPU flags and alignment
  um: allow not setting extra rpaths in the linux binary
  um: virtio/pci: enable suspend/resume
  um: add PCI over virtio emulation driver
  um: irqs: allow invoking time-travel handler multiple times
  um: time-travel/signals: fix ndelay() in interrupt
  um: expose time-travel mode to userspace side
  um: export signals_enabled directly
  um: remove unused smp_sigio_handler() declaration
  lib: add iomem emulation (logic_iomem)
  um: allow disabling NO_IOMEM
parents 7a400bf2 1aee0201
...@@ -15,7 +15,7 @@ config UML ...@@ -15,7 +15,7 @@ config UML
select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_BUGVERBOSE
select NO_DMA select NO_DMA if !UML_DMA_EMULATION
select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES select GENERIC_CPU_DEVICES
select HAVE_GCC_PLUGINS select HAVE_GCC_PLUGINS
...@@ -26,7 +26,22 @@ config MMU ...@@ -26,7 +26,22 @@ config MMU
bool bool
default y default y
config UML_DMA_EMULATION
bool
config NO_IOMEM config NO_IOMEM
bool "disable IOMEM" if EXPERT
depends on !INDIRECT_IOMEM
default y
config UML_IOMEM_EMULATION
bool
select INDIRECT_IOMEM
select GENERIC_PCI_IOMAP
select GENERIC_IOMAP
select NO_GENERIC_PCI_IOPORT_MAP
config NO_IOPORT_MAP
def_bool y def_bool y
config ISA config ISA
...@@ -61,6 +76,9 @@ config NR_CPUS ...@@ -61,6 +76,9 @@ config NR_CPUS
range 1 1 range 1 1
default 1 default 1
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
source "arch/$(HEADER_ARCH)/um/Kconfig" source "arch/$(HEADER_ARCH)/um/Kconfig"
config MAY_HAVE_RUNTIME_DEPS config MAY_HAVE_RUNTIME_DEPS
...@@ -91,6 +109,19 @@ config LD_SCRIPT_DYN ...@@ -91,6 +109,19 @@ config LD_SCRIPT_DYN
depends on !LD_SCRIPT_STATIC depends on !LD_SCRIPT_STATIC
select MODULE_REL_CRCS if MODVERSIONS select MODULE_REL_CRCS if MODVERSIONS
config LD_SCRIPT_DYN_RPATH
bool "set rpath in the binary" if EXPERT
default y
depends on LD_SCRIPT_DYN
help
Add /lib (and /lib64 for 64-bit) to the linux binary's rpath
explicitly.
You may need to turn this off if compiling for nix systems
that have their libraries in random /nix directories and
might otherwise unexpected use libraries from /lib or /lib64
instead of the desired ones.
config HOSTFS config HOSTFS
tristate "Host filesystem" tristate "Host filesystem"
help help
......
...@@ -118,7 +118,8 @@ archprepare: ...@@ -118,7 +118,8 @@ archprepare:
$(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie) LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
-fno-stack-protector $(call cc-option, -fno-stack-protector-all) -fno-stack-protector $(call cc-option, -fno-stack-protector-all)
......
...@@ -357,3 +357,23 @@ config UML_RTC ...@@ -357,3 +357,23 @@ config UML_RTC
rtcwake, especially in time-travel mode. This driver enables that rtcwake, especially in time-travel mode. This driver enables that
by providing a fake RTC clock that causes a wakeup at the right by providing a fake RTC clock that causes a wakeup at the right
time. time.
config UML_PCI_OVER_VIRTIO
bool "Enable PCI over VIRTIO device simulation"
# in theory, just VIRTIO is enough, but that causes recursion
depends on VIRTIO_UML
select FORCE_PCI
select UML_IOMEM_EMULATION
select UML_DMA_EMULATION
select PCI_MSI
select PCI_MSI_IRQ_DOMAIN
select PCI_LOCKLESS_CONFIG
config UML_PCI_OVER_VIRTIO_DEVICE_ID
int "set the virtio device ID for PCI emulation"
default -1
depends on UML_PCI_OVER_VIRTIO
help
There's no official device ID assigned (yet), set the one you
wish to use for experimentation here. The default of -1 is
not valid and will cause the driver to fail at probe.
...@@ -64,6 +64,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o ...@@ -64,6 +64,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_UML_RANDOM) += random.o
obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
obj-$(CONFIG_UML_RTC) += rtc.o obj-$(CONFIG_UML_RTC) += rtc.o
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o
# pcap_user.o must be added explicitly. # pcap_user.o must be added explicitly.
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
......
...@@ -256,7 +256,8 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, ...@@ -256,7 +256,8 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
goto out_close; goto out_close;
} }
if (os_set_fd_block(*fd_out, 0)) { err = os_set_fd_block(*fd_out, 0);
if (err) {
printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd " printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd "
"non-blocking.\n"); "non-blocking.\n");
goto out_close; goto out_close;
......
...@@ -145,7 +145,8 @@ static int slip_open(void *data) ...@@ -145,7 +145,8 @@ static int slip_open(void *data)
} }
sfd = err; sfd = err;
if (set_up_tty(sfd)) err = set_up_tty(sfd);
if (err)
goto out_close2; goto out_close2;
pri->slave = sfd; pri->slave = sfd;
......
...@@ -1242,8 +1242,7 @@ static int __init ubd_driver_init(void){ ...@@ -1242,8 +1242,7 @@ static int __init ubd_driver_init(void){
* enough. So use anyway the io thread. */ * enough. So use anyway the io thread. */
} }
stack = alloc_stack(0, 0); stack = alloc_stack(0, 0);
io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
&thread_fd);
if(io_pid < 0){ if(io_pid < 0){
printk(KERN_ERR printk(KERN_ERR
"ubd : Failed to start I/O thread (errno = %d) - " "ubd : Failed to start I/O thread (errno = %d) - "
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/logic_iomem.h>
#include <linux/irqdomain.h>
#include <linux/virtio_pcidev.h>
#include <linux/virtio-uml.h>
#include <linux/delay.h>
#include <linux/msi.h>
#include <asm/unaligned.h>
#include <irq_kern.h>
#define MAX_DEVICES 8
#define MAX_MSI_VECTORS 32
#define CFG_SPACE_SIZE 4096
/* for MSI-X we have a 32-bit payload */
#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
#define NUM_IRQ_MSGS 10
#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1))
#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1)
struct um_pci_device {
struct virtio_device *vdev;
/* for now just standard BARs */
u8 resptr[PCI_STD_NUM_BARS];
struct virtqueue *cmd_vq, *irq_vq;
#define UM_PCI_STAT_WAITING 0
unsigned long status;
int irq;
};
struct um_pci_device_reg {
struct um_pci_device *dev;
void __iomem *iomem;
};
static struct pci_host_bridge *bridge;
static DEFINE_MUTEX(um_pci_mtx);
static struct um_pci_device_reg um_pci_devices[MAX_DEVICES];
static struct fwnode_handle *um_pci_fwnode;
static struct irq_domain *um_pci_inner_domain;
static struct irq_domain *um_pci_msi_domain;
static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
#define UM_VIRT_PCI_MAXDELAY 40000
static int um_pci_send_cmd(struct um_pci_device *dev,
struct virtio_pcidev_msg *cmd,
unsigned int cmd_size,
const void *extra, unsigned int extra_size,
void *out, unsigned int out_size)
{
struct scatterlist out_sg, extra_sg, in_sg;
struct scatterlist *sgs_list[] = {
[0] = &out_sg,
[1] = extra ? &extra_sg : &in_sg,
[2] = extra ? &in_sg : NULL,
};
int delay_count = 0;
int ret, len;
bool posted;
if (WARN_ON(cmd_size < sizeof(*cmd)))
return -EINVAL;
switch (cmd->op) {
case VIRTIO_PCIDEV_OP_CFG_WRITE:
case VIRTIO_PCIDEV_OP_MMIO_WRITE:
case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
/* in PCI, writes are posted, so don't wait */
posted = !out;
WARN_ON(!posted);
break;
default:
posted = false;
break;
}
if (posted) {
u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
if (ncmd) {
memcpy(ncmd, cmd, cmd_size);
if (extra)
memcpy(ncmd + cmd_size, extra, extra_size);
cmd = (void *)ncmd;
cmd_size += extra_size;
extra = NULL;
extra_size = 0;
} else {
/* try without allocating memory */
posted = false;
}
}
sg_init_one(&out_sg, cmd, cmd_size);
if (extra)
sg_init_one(&extra_sg, extra, extra_size);
if (out)
sg_init_one(&in_sg, out, out_size);
/* add to internal virtio queue */
ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
extra ? 2 : 1,
out ? 1 : 0,
posted ? cmd : HANDLE_NO_FREE(cmd),
GFP_ATOMIC);
if (ret)
return ret;
if (posted) {
virtqueue_kick(dev->cmd_vq);
return 0;
}
/* kick and poll for getting a response on the queue */
set_bit(UM_PCI_STAT_WAITING, &dev->status);
virtqueue_kick(dev->cmd_vq);
while (1) {
void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
if (completed == HANDLE_NO_FREE(cmd))
break;
if (completed && !HANDLE_IS_NO_FREE(completed))
kfree(completed);
if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
++delay_count > UM_VIRT_PCI_MAXDELAY,
"um virt-pci delay: %d", delay_count)) {
ret = -EIO;
break;
}
udelay(1);
}
clear_bit(UM_PCI_STAT_WAITING, &dev->status);
return ret;
}
static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
int size)
{
struct um_pci_device_reg *reg = priv;
struct um_pci_device *dev = reg->dev;
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_READ,
.size = size,
.addr = offset,
};
/* maximum size - we may only use parts of it */
u8 data[8];
if (!dev)
return ~0ULL;
memset(data, 0xff, sizeof(data));
switch (size) {
case 1:
case 2:
case 4:
#ifdef CONFIG_64BIT
case 8:
#endif
break;
default:
WARN(1, "invalid config space read size %d\n", size);
return ~0ULL;
}
if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0,
data, sizeof(data)))
return ~0ULL;
switch (size) {
case 1:
return data[0];
case 2:
return le16_to_cpup((void *)data);
case 4:
return le32_to_cpup((void *)data);
#ifdef CONFIG_64BIT
case 8:
return le64_to_cpup((void *)data);
#endif
default:
return ~0ULL;
}
}
static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
unsigned long val)
{
struct um_pci_device_reg *reg = priv;
struct um_pci_device *dev = reg->dev;
struct {
struct virtio_pcidev_msg hdr;
/* maximum size - we may only use parts of it */
u8 data[8];
} msg = {
.hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
.size = size,
.addr = offset,
},
};
if (!dev)
return;
switch (size) {
case 1:
msg.data[0] = (u8)val;
break;
case 2:
put_unaligned_le16(val, (void *)msg.data);
break;
case 4:
put_unaligned_le32(val, (void *)msg.data);
break;
#ifdef CONFIG_64BIT
case 8:
put_unaligned_le64(val, (void *)msg.data);
break;
#endif
default:
WARN(1, "invalid config space write size %d\n", size);
return;
}
WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
}
static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
.read = um_pci_cfgspace_read,
.write = um_pci_cfgspace_write,
};
static void um_pci_bar_copy_from(void *priv, void *buffer,
unsigned int offset, int size)
{
u8 *resptr = priv;
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_MMIO_READ,
.bar = *resptr,
.size = size,
.addr = offset,
};
memset(buffer, 0xff, size);
um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
}
static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
int size)
{
/* maximum size - we may only use parts of it */
u8 data[8];
switch (size) {
case 1:
case 2:
case 4:
#ifdef CONFIG_64BIT
case 8:
#endif
break;
default:
WARN(1, "invalid config space read size %d\n", size);
return ~0ULL;
}
um_pci_bar_copy_from(priv, data, offset, size);
switch (size) {
case 1:
return data[0];
case 2:
return le16_to_cpup((void *)data);
case 4:
return le32_to_cpup((void *)data);
#ifdef CONFIG_64BIT
case 8:
return le64_to_cpup((void *)data);
#endif
default:
return ~0ULL;
}
}
static void um_pci_bar_copy_to(void *priv, unsigned int offset,
const void *buffer, int size)
{
u8 *resptr = priv;
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
.bar = *resptr,
.size = size,
.addr = offset,
};
um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
}
static void um_pci_bar_write(void *priv, unsigned int offset, int size,
unsigned long val)
{
/* maximum size - we may only use parts of it */
u8 data[8];
switch (size) {
case 1:
data[0] = (u8)val;
break;
case 2:
put_unaligned_le16(val, (void *)data);
break;
case 4:
put_unaligned_le32(val, (void *)data);
break;
#ifdef CONFIG_64BIT
case 8:
put_unaligned_le64(val, (void *)data);
break;
#endif
default:
WARN(1, "invalid config space write size %d\n", size);
return;
}
um_pci_bar_copy_to(priv, offset, data, size);
}
static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
{
u8 *resptr = priv;
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct {
struct virtio_pcidev_msg hdr;
u8 data;
} msg = {
.hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
.bar = *resptr,
.size = size,
.addr = offset,
},
.data = value,
};
um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
}
static const struct logic_iomem_ops um_pci_device_bar_ops = {
.read = um_pci_bar_read,
.write = um_pci_bar_write,
.set = um_pci_bar_set,
.copy_from = um_pci_bar_copy_from,
.copy_to = um_pci_bar_copy_to,
};
static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
int where)
{
struct um_pci_device_reg *dev;
unsigned int busn = bus->number;
if (busn > 0)
return NULL;
/* not allowing functions for now ... */
if (devfn % 8)
return NULL;
if (devfn / 8 >= ARRAY_SIZE(um_pci_devices))
return NULL;
dev = &um_pci_devices[devfn / 8];
if (!dev)
return NULL;
return (void __iomem *)((unsigned long)dev->iomem + where);
}
static struct pci_ops um_pci_ops = {
.map_bus = um_pci_map_bus,
.read = pci_generic_config_read,
.write = pci_generic_config_write,
};
static void um_pci_rescan(void)
{
pci_lock_rescan_remove();
pci_rescan_bus(bridge->bus);
pci_unlock_rescan_remove();
}
static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
{
struct scatterlist sg[1];
sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
kfree(buf);
else if (kick)
virtqueue_kick(vq);
}
static void um_pci_handle_irq_message(struct virtqueue *vq,
struct virtio_pcidev_msg *msg)
{
struct virtio_device *vdev = vq->vdev;
struct um_pci_device *dev = vdev->priv;
/* we should properly chain interrupts, but on ARCH=um we don't care */
switch (msg->op) {
case VIRTIO_PCIDEV_OP_INT:
generic_handle_irq(dev->irq);
break;
case VIRTIO_PCIDEV_OP_MSI:
/* our MSI message is just the interrupt number */
if (msg->size == sizeof(u32))
generic_handle_irq(le32_to_cpup((void *)msg->data));
else
generic_handle_irq(le16_to_cpup((void *)msg->data));
break;
case VIRTIO_PCIDEV_OP_PME:
/* nothing to do - we already woke up due to the message */
break;
default:
dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
break;
}
}
static void um_pci_cmd_vq_cb(struct virtqueue *vq)
{
struct virtio_device *vdev = vq->vdev;
struct um_pci_device *dev = vdev->priv;
void *cmd;
int len;
if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
return;
while ((cmd = virtqueue_get_buf(vq, &len))) {
if (WARN_ON(HANDLE_IS_NO_FREE(cmd)))
continue;
kfree(cmd);
}
}
static void um_pci_irq_vq_cb(struct virtqueue *vq)
{
struct virtio_pcidev_msg *msg;
int len;
while ((msg = virtqueue_get_buf(vq, &len))) {
if (len >= sizeof(*msg))
um_pci_handle_irq_message(vq, msg);
/* recycle the message buffer */
um_pci_irq_vq_addbuf(vq, msg, true);
}
}
static int um_pci_init_vqs(struct um_pci_device *dev)
{
struct virtqueue *vqs[2];
static const char *const names[2] = { "cmd", "irq" };
vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb };
int err, i;
err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL);
if (err)
return err;
dev->cmd_vq = vqs[0];
dev->irq_vq = vqs[1];
for (i = 0; i < NUM_IRQ_MSGS; i++) {
void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
if (msg)
um_pci_irq_vq_addbuf(dev->irq_vq, msg, false);
}
virtqueue_kick(dev->irq_vq);
return 0;
}
static int um_pci_virtio_probe(struct virtio_device *vdev)
{
struct um_pci_device *dev;
int i, free = -1;
int err = -ENOSPC;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
dev->vdev = vdev;
vdev->priv = dev;
mutex_lock(&um_pci_mtx);
for (i = 0; i < MAX_DEVICES; i++) {
if (um_pci_devices[i].dev)
continue;
free = i;
break;
}
if (free < 0)
goto error;
err = um_pci_init_vqs(dev);
if (err)
goto error;
dev->irq = irq_alloc_desc(numa_node_id());
if (dev->irq < 0) {
err = dev->irq;
goto error;
}
um_pci_devices[free].dev = dev;
vdev->priv = dev;
mutex_unlock(&um_pci_mtx);
device_set_wakeup_enable(&vdev->dev, true);
/*
* In order to do suspend-resume properly, don't allow VQs
* to be suspended.
*/
virtio_uml_set_no_vq_suspend(vdev, true);
um_pci_rescan();
return 0;
error:
mutex_unlock(&um_pci_mtx);
kfree(dev);
return err;
}
static void um_pci_virtio_remove(struct virtio_device *vdev)
{
struct um_pci_device *dev = vdev->priv;
int i;
/* Stop all virtqueues */
vdev->config->reset(vdev);
vdev->config->del_vqs(vdev);
device_set_wakeup_enable(&vdev->dev, false);
mutex_lock(&um_pci_mtx);
for (i = 0; i < MAX_DEVICES; i++) {
if (um_pci_devices[i].dev != dev)
continue;
um_pci_devices[i].dev = NULL;
irq_free_desc(dev->irq);
}
mutex_unlock(&um_pci_mtx);
um_pci_rescan();
kfree(dev);
}
static struct virtio_device_id id_table[] = {
{ CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
{ 0 },
};
MODULE_DEVICE_TABLE(virtio, id_table);
static struct virtio_driver um_pci_virtio_driver = {
.driver.name = "virtio-pci",
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = um_pci_virtio_probe,
.remove = um_pci_virtio_remove,
};
static struct resource virt_cfgspace_resource = {
.name = "PCI config space",
.start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE,
.end = 0xf0000000 - 1,
.flags = IORESOURCE_MEM,
};
static long um_pci_map_cfgspace(unsigned long offset, size_t size,
const struct logic_iomem_ops **ops,
void **priv)
{
if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE))
return -EINVAL;
if (offset / CFG_SPACE_SIZE < MAX_DEVICES) {
*ops = &um_pci_device_cfgspace_ops;
*priv = &um_pci_devices[offset / CFG_SPACE_SIZE];
return 0;
}
WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size);
return -ENOENT;
}
static const struct logic_iomem_region_ops um_pci_cfgspace_ops = {
.map = um_pci_map_cfgspace,
};
static struct resource virt_iomem_resource = {
.name = "PCI iomem",
.start = 0xf0000000,
.end = 0xffffffff,
.flags = IORESOURCE_MEM,
};
struct um_pci_map_iomem_data {
unsigned long offset;
size_t size;
const struct logic_iomem_ops **ops;
void **priv;
long ret;
};
static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data)
{
struct um_pci_map_iomem_data *data = _data;
struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
struct um_pci_device *dev;
int i;
if (!reg->dev)
return 0;
for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) {
struct resource *r = &pdev->resource[i];
if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM)
continue;
/*
* must be the whole or part of the resource,
* not allowed to only overlap
*/
if (data->offset < r->start || data->offset > r->end)
continue;
if (data->offset + data->size - 1 > r->end)
continue;
dev = reg->dev;
*data->ops = &um_pci_device_bar_ops;
dev->resptr[i] = i;
*data->priv = &dev->resptr[i];
data->ret = data->offset - r->start;
/* no need to continue */
return 1;
}
return 0;
}
static long um_pci_map_iomem(unsigned long offset, size_t size,
const struct logic_iomem_ops **ops,
void **priv)
{
struct um_pci_map_iomem_data data = {
/* we want the full address here */
.offset = offset + virt_iomem_resource.start,
.size = size,
.ops = ops,
.priv = priv,
.ret = -ENOENT,
};
pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data);
return data.ret;
}
static const struct logic_iomem_region_ops um_pci_iomem_ops = {
.map = um_pci_map_iomem,
};
static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
/*
* This is a very low address and not actually valid 'physical' memory
* in UML, so we can simply map MSI(-X) vectors to there, it cannot be
* legitimately written to by the device in any other way.
* We use the (virtual) IRQ number here as the message to simplify the
* code that receives the message, where for now we simply trust the
* device to send the correct message.
*/
msg->address_hi = 0;
msg->address_lo = 0xa0000;
msg->data = data->irq;
}
static struct irq_chip um_pci_msi_bottom_irq_chip = {
.name = "UM virtio MSI",
.irq_compose_msi_msg = um_pci_compose_msi_msg,
};
static int um_pci_inner_domain_alloc(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs,
void *args)
{
unsigned long bit;
WARN_ON(nr_irqs != 1);
mutex_lock(&um_pci_mtx);
bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS);
if (bit >= MAX_MSI_VECTORS) {
mutex_unlock(&um_pci_mtx);
return -ENOSPC;
}
set_bit(bit, um_pci_msi_used);
mutex_unlock(&um_pci_mtx);
irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip,
domain->host_data, handle_simple_irq,
NULL, NULL);
return 0;
}
static void um_pci_inner_domain_free(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
mutex_lock(&um_pci_mtx);
if (!test_bit(d->hwirq, um_pci_msi_used))
pr_err("trying to free unused MSI#%lu\n", d->hwirq);
else
__clear_bit(d->hwirq, um_pci_msi_used);
mutex_unlock(&um_pci_mtx);
}
static const struct irq_domain_ops um_pci_inner_domain_ops = {
.alloc = um_pci_inner_domain_alloc,
.free = um_pci_inner_domain_free,
};
static struct irq_chip um_pci_msi_irq_chip = {
.name = "UM virtio PCIe MSI",
.irq_mask = pci_msi_mask_irq,
.irq_unmask = pci_msi_unmask_irq,
};
static struct msi_domain_info um_pci_msi_domain_info = {
.flags = MSI_FLAG_USE_DEF_DOM_OPS |
MSI_FLAG_USE_DEF_CHIP_OPS |
MSI_FLAG_PCI_MSIX,
.chip = &um_pci_msi_irq_chip,
};
static struct resource busn_resource = {
.name = "PCI busn",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUS,
};
static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
{
struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
if (WARN_ON(!reg->dev))
return -EINVAL;
/* Yes, we map all pins to the same IRQ ... doesn't matter for now. */
return reg->dev->irq;
}
void *pci_root_bus_fwnode(struct pci_bus *bus)
{
return um_pci_fwnode;
}
int um_pci_init(void)
{
int err, i;
WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
&um_pci_cfgspace_ops));
WARN_ON(logic_iomem_add_region(&virt_iomem_resource,
&um_pci_iomem_ops));
if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
"No virtio device ID configured for PCI - no PCI support\n"))
return 0;
bridge = pci_alloc_host_bridge(0);
if (!bridge)
return -ENOMEM;
um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
if (!um_pci_fwnode) {
err = -ENOMEM;
goto free;
}
um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS,
MAX_MSI_VECTORS, 0,
&um_pci_inner_domain_ops, NULL);
if (!um_pci_inner_domain) {
err = -ENOMEM;
goto free;
}
um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode,
&um_pci_msi_domain_info,
um_pci_inner_domain);
if (!um_pci_msi_domain) {
err = -ENOMEM;
goto free;
}
pci_add_resource(&bridge->windows, &virt_iomem_resource);
pci_add_resource(&bridge->windows, &busn_resource);
bridge->ops = &um_pci_ops;
bridge->map_irq = um_pci_map_irq;
for (i = 0; i < MAX_DEVICES; i++) {
resource_size_t start;
start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE;
um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE);
if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) {
err = -ENOMEM;
goto free;
}
}
err = pci_host_probe(bridge);
if (err)
goto free;
err = register_virtio_driver(&um_pci_virtio_driver);
if (err)
goto free;
return 0;
free:
if (um_pci_inner_domain)
irq_domain_remove(um_pci_inner_domain);
if (um_pci_fwnode)
irq_domain_free_fwnode(um_pci_fwnode);
pci_free_resource_list(&bridge->windows);
pci_free_host_bridge(bridge);
return err;
}
module_init(um_pci_init);
void um_pci_exit(void)
{
unregister_virtio_driver(&um_pci_virtio_driver);
irq_domain_remove(um_pci_msi_domain);
irq_domain_remove(um_pci_inner_domain);
pci_free_resource_list(&bridge->windows);
pci_free_host_bridge(bridge);
}
module_exit(um_pci_exit);
...@@ -56,6 +56,7 @@ struct virtio_uml_device { ...@@ -56,6 +56,7 @@ struct virtio_uml_device {
u8 status; u8 status;
u8 registered:1; u8 registered:1;
u8 suspended:1; u8 suspended:1;
u8 no_vq_suspend:1;
u8 config_changed_irq:1; u8 config_changed_irq:1;
uint64_t vq_irq_vq_map; uint64_t vq_irq_vq_map;
...@@ -1098,6 +1099,19 @@ static void virtio_uml_release_dev(struct device *d) ...@@ -1098,6 +1099,19 @@ static void virtio_uml_release_dev(struct device *d)
kfree(vu_dev); kfree(vu_dev);
} }
void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
bool no_vq_suspend)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
if (WARN_ON(vdev->config != &virtio_uml_config_ops))
return;
vu_dev->no_vq_suspend = no_vq_suspend;
dev_info(&vdev->dev, "%sabled VQ suspend\n",
no_vq_suspend ? "dis" : "en");
}
/* Platform device */ /* Platform device */
static int virtio_uml_probe(struct platform_device *pdev) static int virtio_uml_probe(struct platform_device *pdev)
...@@ -1302,13 +1316,16 @@ MODULE_DEVICE_TABLE(of, virtio_uml_match); ...@@ -1302,13 +1316,16 @@ MODULE_DEVICE_TABLE(of, virtio_uml_match);
static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
{ {
struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
struct virtqueue *vq;
virtio_device_for_each_vq((&vu_dev->vdev), vq) { if (!vu_dev->no_vq_suspend) {
struct virtio_uml_vq_info *info = vq->priv; struct virtqueue *vq;
info->suspended = true; virtio_device_for_each_vq((&vu_dev->vdev), vq) {
vhost_user_set_vring_enable(vu_dev, vq->index, false); struct virtio_uml_vq_info *info = vq->priv;
info->suspended = true;
vhost_user_set_vring_enable(vu_dev, vq->index, false);
}
} }
if (!device_may_wakeup(&vu_dev->vdev.dev)) { if (!device_may_wakeup(&vu_dev->vdev.dev)) {
...@@ -1322,13 +1339,16 @@ static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) ...@@ -1322,13 +1339,16 @@ static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
static int virtio_uml_resume(struct platform_device *pdev) static int virtio_uml_resume(struct platform_device *pdev)
{ {
struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
struct virtqueue *vq;
virtio_device_for_each_vq((&vu_dev->vdev), vq) { if (!vu_dev->no_vq_suspend) {
struct virtio_uml_vq_info *info = vq->priv; struct virtqueue *vq;
virtio_device_for_each_vq((&vu_dev->vdev), vq) {
struct virtio_uml_vq_info *info = vq->priv;
info->suspended = false; info->suspended = false;
vhost_user_set_vring_enable(vu_dev, vq->index, true); vhost_user_set_vring_enable(vu_dev, vq->index, true);
}
} }
vu_dev->suspended = false; vu_dev->suspended = false;
......
...@@ -7,8 +7,8 @@ generic-y += device.h ...@@ -7,8 +7,8 @@ generic-y += device.h
generic-y += emergency-restart.h generic-y += emergency-restart.h
generic-y += exec.h generic-y += exec.h
generic-y += extable.h generic-y += extable.h
generic-y += fb.h
generic-y += ftrace.h generic-y += ftrace.h
generic-y += futex.h
generic-y += hw_irq.h generic-y += hw_irq.h
generic-y += irq_regs.h generic-y += irq_regs.h
generic-y += irq_work.h generic-y += irq_work.h
...@@ -17,7 +17,6 @@ generic-y += mcs_spinlock.h ...@@ -17,7 +17,6 @@ generic-y += mcs_spinlock.h
generic-y += mmiowb.h generic-y += mmiowb.h
generic-y += module.lds.h generic-y += module.lds.h
generic-y += param.h generic-y += param.h
generic-y += pci.h
generic-y += percpu.h generic-y += percpu.h
generic-y += preempt.h generic-y += preempt.h
generic-y += softirq_stack.h generic-y += softirq_stack.h
...@@ -27,3 +26,4 @@ generic-y += trace_clock.h ...@@ -27,3 +26,4 @@ generic-y += trace_clock.h
generic-y += word-at-a-time.h generic-y += word-at-a-time.h
generic-y += kprobes.h generic-y += kprobes.h
generic-y += mm_hooks.h generic-y += mm_hooks.h
generic-y += vga.h
#ifndef __UM_ASM_CACHEFLUSH_H
#define __UM_ASM_CACHEFLUSH_H
#include <asm/tlbflush.h>
#define flush_cache_vmap flush_tlb_kernel_range
#define flush_cache_vunmap flush_tlb_kernel_range
#include <asm-generic/cacheflush.h>
#endif /* __UM_ASM_CACHEFLUSH_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_UM_CPUFEATURE_H
#define _ASM_UM_CPUFEATURE_H
#include <asm/processor.h>
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
#include <asm/asm.h>
#include <linux/bitops.h>
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
#define X86_CAP_FMT "%s"
#define x86_cap_flag(flag) x86_cap_flags[flag]
/*
* In order to save room, we index into this array by doing
* X86_BUG_<name> - NCAPINTS*32.
*/
extern const char * const x86_bug_flags[NBUGINTS*32];
#define test_cpu_cap(c, bit) \
test_bit(bit, (unsigned long *)((c)->x86_capability))
/*
* There are 32 bits/features in each mask word. The high bits
* (selected with (bit>>5) give us the word number and the low 5
* bits give us the bit/feature number inside the word.
* (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
* see if it is set in the mask word.
*/
#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \
(((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
#define cpu_has(c, bit) \
test_cpu_cap(c, bit)
#define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
x86_this_cpu_test_bit(bit, \
(unsigned long __percpu *)&cpu_info.x86_capability))
/*
* This macro is for detection of features which need kernel
* infrastructure to be used. It may *not* directly test the CPU
* itself. Use the cpu_has() family if you want true runtime
* testing of CPU features, like in hypervisor code where you are
* supporting a possible guest feature where host support for it
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
extern void setup_clear_cpu_cap(unsigned int bit);
#define setup_force_cpu_cap(bit) do { \
set_cpu_cap(&boot_cpu_data, bit); \
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO)
/*
* Workaround for the sake of BPF compilation which utilizes kernel
* headers, but clang does not support ASM GOTO and fails the build.
*/
#ifndef __BPF_TRACING__
#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
#endif
#define static_cpu_has(bit) boot_cpu_has(bit)
#else
/*
* Static testing of CPU features. Used the same as boot_cpu_has(). It
* statically patches the target code for additional performance. Use
* static_cpu_has() only in fast paths, where every cycle counts. Which
* means that the boot_cpu_has() variant is already fast enough for the
* majority of cases and you should stick to using it as it is generally
* only two instructions: a RIP-relative MOV and a TEST.
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
asm_volatile_goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
"3:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 4f - .\n" /* repl offset */
" .word %P[always]\n" /* always replace */
" .byte 3b - 1b\n" /* src len */
" .byte 5f - 4f\n" /* repl len */
" .byte 3b - 2b\n" /* pad len */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"4: jmp %l[t_no]\n"
"5:\n"
".previous\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 0\n" /* no replacement */
" .word %P[feature]\n" /* feature bit */
" .byte 3b - 1b\n" /* src len */
" .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
".section .altinstr_aux,\"ax\"\n"
"6:\n"
" testb %[bitnum],%[cap_byte]\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
".previous\n"
: : [feature] "i" (bit),
[always] "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
t_yes:
return true;
t_no:
return false;
}
#define static_cpu_has(bit) \
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
boot_cpu_has(bit) : \
_static_cpu_has(bit) \
)
#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
#define static_cpu_has_bug(bit) static_cpu_has((bit))
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
#define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit))
#define MAX_CPU_FEATURES (NCAPINTS * 32)
#define cpu_have_feature boot_cpu_has
#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X"
#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
boot_cpu_data.x86_model
#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
#endif /* _ASM_UM_CPUFEATURE_H */
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_UM_FPU_API_H
#define _ASM_UM_FPU_API_H
/* Copyright (c) 2020 Cambridge Greys Ltd
* Copyright (c) 2020 Red Hat Inc.
* A set of "dummy" defines to allow the direct inclusion
* of x86 optimized copy, xor, etc routines into the
* UML code tree. */
#define kernel_fpu_begin() (void)0
#define kernel_fpu_end() (void)0
static inline bool irq_fpu_usable(void)
{
return true;
}
#endif
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_UM_FUTEX_H
#define _ASM_UM_FUTEX_H
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <asm/errno.h>
int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr);
int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval);
#endif
...@@ -3,16 +3,23 @@ ...@@ -3,16 +3,23 @@
#define _ASM_UM_IO_H #define _ASM_UM_IO_H
#include <linux/types.h> #include <linux/types.h>
/* get emulated iomem (if desired) */
#include <asm-generic/logic_io.h>
#ifndef ioremap
#define ioremap ioremap #define ioremap ioremap
static inline void __iomem *ioremap(phys_addr_t offset, size_t size) static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
{ {
return NULL; return NULL;
} }
#endif /* ioremap */
#ifndef iounmap
#define iounmap iounmap #define iounmap iounmap
static inline void iounmap(void __iomem *addr) static inline void iounmap(void __iomem *addr)
{ {
} }
#endif /* iounmap */
#include <asm-generic/io.h> #include <asm-generic/io.h>
......
...@@ -31,7 +31,13 @@ ...@@ -31,7 +31,13 @@
#endif #endif
#define NR_IRQS 64 #define UM_LAST_SIGNAL_IRQ 64
/* If we have (simulated) PCI MSI, allow 64 more interrupt numbers for it */
#ifdef CONFIG_PCI_MSI
#define NR_IRQS (UM_LAST_SIGNAL_IRQ + 64)
#else
#define NR_IRQS UM_LAST_SIGNAL_IRQ
#endif /* CONFIG_PCI_MSI */
#include <asm-generic/irq.h> #include <asm-generic/irq.h>
#endif #endif
...@@ -2,15 +2,15 @@ ...@@ -2,15 +2,15 @@
#ifndef __UM_IRQFLAGS_H #ifndef __UM_IRQFLAGS_H
#define __UM_IRQFLAGS_H #define __UM_IRQFLAGS_H
extern int get_signals(void); extern int signals_enabled;
extern int set_signals(int enable); int set_signals(int enable);
extern void block_signals(void); void block_signals(void);
extern void unblock_signals(void); void unblock_signals(void);
#define arch_local_save_flags arch_local_save_flags #define arch_local_save_flags arch_local_save_flags
static inline unsigned long arch_local_save_flags(void) static inline unsigned long arch_local_save_flags(void)
{ {
return get_signals(); return signals_enabled;
} }
#define arch_local_irq_restore arch_local_irq_restore #define arch_local_irq_restore arch_local_irq_restore
......
#include <asm-generic/msi.h>
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_UM_PCI_H
#define __ASM_UM_PCI_H
#include <linux/types.h>
#include <asm/io.h>
#define PCIBIOS_MIN_IO 0
#define PCIBIOS_MIN_MEM 0
#define pcibios_assign_all_busses() 1
extern int isa_dma_bridge_buggy;
#ifdef CONFIG_PCI
static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
{
/* no legacy IRQs */
return -ENODEV;
}
#endif
#ifdef CONFIG_PCI_DOMAINS
static inline int pci_proc_domain(struct pci_bus *bus)
{
/* always show the domain in /proc */
return 1;
}
#endif /* CONFIG_PCI */
#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
/*
* This is a bit of an annoying hack, and it assumes we only have
* the virt-pci (if anything). Which is true, but still.
*/
void *pci_root_bus_fwnode(struct pci_bus *bus);
#define pci_root_bus_fwnode pci_root_bus_fwnode
#endif
#endif /* __ASM_UM_PCI_H */
...@@ -16,6 +16,8 @@ struct task_struct; ...@@ -16,6 +16,8 @@ struct task_struct;
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <asm/cpufeatures.h>
struct mm_struct; struct mm_struct;
struct thread_struct { struct thread_struct {
...@@ -90,12 +92,18 @@ extern void start_thread(struct pt_regs *regs, unsigned long entry, ...@@ -90,12 +92,18 @@ extern void start_thread(struct pt_regs *regs, unsigned long entry,
struct cpuinfo_um { struct cpuinfo_um {
unsigned long loops_per_jiffy; unsigned long loops_per_jiffy;
int ipi_pipe[2]; int ipi_pipe[2];
int cache_alignment;
union {
__u32 x86_capability[NCAPINTS + NBUGINTS];
unsigned long x86_capability_alignment;
};
}; };
extern struct cpuinfo_um boot_cpu_data; extern struct cpuinfo_um boot_cpu_data;
#define cpu_data (&boot_cpu_data) #define cpu_data (&boot_cpu_data)
#define current_cpu_data boot_cpu_data #define current_cpu_data boot_cpu_data
#define cache_line_size() (boot_cpu_data.cache_alignment)
#define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf) #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
extern unsigned long get_wchan(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p);
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm-generic/cacheflush.h> #include <asm/cacheflush.h>
#include <asm-generic/tlb.h> #include <asm-generic/tlb.h>
#endif #endif
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#include <asm-generic/xor.h> #ifndef _ASM_UM_XOR_H
#define _ASM_UM_XOR_H
#ifdef CONFIG_64BIT
#undef CONFIG_X86_32
#else
#define CONFIG_X86_32 1
#endif
#include <asm/cpufeature.h>
#include <../../x86/include/asm/xor.h>
#include <linux/time-internal.h> #include <linux/time-internal.h>
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
#undef XOR_SELECT_TEMPLATE
/* pick an arbitrary one - measuring isn't possible with inf-cpu */ /* pick an arbitrary one - measuring isn't possible with inf-cpu */
#define XOR_SELECT_TEMPLATE(x) \ #define XOR_SELECT_TEMPLATE(x) \
(time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL) (time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL)
#endif
#endif
...@@ -8,17 +8,11 @@ ...@@ -8,17 +8,11 @@
#define __TIMER_INTERNAL_H__ #define __TIMER_INTERNAL_H__
#include <linux/list.h> #include <linux/list.h>
#include <asm/bug.h> #include <asm/bug.h>
#include <shared/timetravel.h>
#define TIMER_MULTIPLIER 256 #define TIMER_MULTIPLIER 256
#define TIMER_MIN_DELTA 500 #define TIMER_MIN_DELTA 500
enum time_travel_mode {
TT_MODE_OFF,
TT_MODE_BASIC,
TT_MODE_INFCPU,
TT_MODE_EXTERNAL,
};
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
struct time_travel_event { struct time_travel_event {
unsigned long long time; unsigned long long time;
...@@ -27,8 +21,6 @@ struct time_travel_event { ...@@ -27,8 +21,6 @@ struct time_travel_event {
bool pending, onstack; bool pending, onstack;
}; };
extern enum time_travel_mode time_travel_mode;
void time_travel_sleep(void); void time_travel_sleep(void);
static inline void static inline void
...@@ -62,8 +54,6 @@ bool time_travel_del_event(struct time_travel_event *e); ...@@ -62,8 +54,6 @@ bool time_travel_del_event(struct time_travel_event *e);
struct time_travel_event { struct time_travel_event {
}; };
#define time_travel_mode TT_MODE_OFF
static inline void time_travel_sleep(void) static inline void time_travel_sleep(void)
{ {
} }
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#ifndef __VIRTIO_UML_H__
#define __VIRTIO_UML_H__
void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
bool no_vq_suspend);
#endif /* __VIRTIO_UML_H__ */
...@@ -17,6 +17,7 @@ enum um_irq_type { ...@@ -17,6 +17,7 @@ enum um_irq_type {
struct siginfo; struct siginfo;
extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
void sigio_run_timetravel_handlers(void);
extern void free_irq_by_fd(int fd); extern void free_irq_by_fd(int fd);
extern void deactivate_fd(int fd, int irqnum); extern void deactivate_fd(int fd, int irqnum);
extern int deactivate_all_fds(void); extern int deactivate_all_fds(void);
......
...@@ -33,7 +33,6 @@ extern int handle_page_fault(unsigned long address, unsigned long ip, ...@@ -33,7 +33,6 @@ extern int handle_page_fault(unsigned long address, unsigned long ip,
int is_write, int is_user, int *code_out); int is_write, int is_user, int *code_out);
extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs); extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
extern int smp_sigio_handler(void);
extern void initial_thread_cb(void (*proc)(void *), void *arg); extern void initial_thread_cb(void (*proc)(void *), void *arg);
extern int is_syscall(unsigned long addr); extern int is_syscall(unsigned long addr);
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <sysdep/archsetjmp.h> #include <sysdep/archsetjmp.h>
#include <os.h> #include <os.h>
extern int signals_enabled;
extern int setjmp(jmp_buf); extern int setjmp(jmp_buf);
extern void longjmp(jmp_buf, int); extern void longjmp(jmp_buf, int);
...@@ -12,13 +13,12 @@ extern void longjmp(jmp_buf, int); ...@@ -12,13 +13,12 @@ extern void longjmp(jmp_buf, int);
longjmp(*buf, val); \ longjmp(*buf, val); \
} while(0) } while(0)
#define UML_SETJMP(buf) ({ \ #define UML_SETJMP(buf) ({ \
int n; \ int n, enable; \
volatile int enable; \ enable = *(volatile int *)&signals_enabled; \
enable = get_signals(); \ n = setjmp(*buf); \
n = setjmp(*buf); \ if(n != 0) \
if(n != 0) \ set_signals_trace(enable); \
set_signals_trace(enable); \
n; }) n; })
#endif #endif
...@@ -187,6 +187,9 @@ int os_poll(unsigned int n, const int *fds); ...@@ -187,6 +187,9 @@ int os_poll(unsigned int n, const int *fds);
extern void os_early_checks(void); extern void os_early_checks(void);
extern void os_check_bugs(void); extern void os_check_bugs(void);
extern void check_host_supports_tls(int *supports_tls, int *tls_min); extern void check_host_supports_tls(int *supports_tls, int *tls_min);
extern void get_host_cpu_features(
void (*flags_helper_func)(char *line),
void (*cache_helper_func)(char *line));
/* mem.c */ /* mem.c */
extern int create_mem_file(unsigned long long len); extern int create_mem_file(unsigned long long len);
...@@ -211,7 +214,6 @@ extern int os_protect_memory(void *addr, unsigned long len, ...@@ -211,7 +214,6 @@ extern int os_protect_memory(void *addr, unsigned long len,
extern int os_unmap_memory(void *addr, int len); extern int os_unmap_memory(void *addr, int len);
extern int os_drop_memory(void *addr, int length); extern int os_drop_memory(void *addr, int length);
extern int can_drop_memory(void); extern int can_drop_memory(void);
extern void os_flush_stdout(void);
extern int os_mincore(void *addr, unsigned long len); extern int os_mincore(void *addr, unsigned long len);
/* execvp.c */ /* execvp.c */
...@@ -237,12 +239,14 @@ extern void send_sigio_to_self(void); ...@@ -237,12 +239,14 @@ extern void send_sigio_to_self(void);
extern int change_sig(int signal, int on); extern int change_sig(int signal, int on);
extern void block_signals(void); extern void block_signals(void);
extern void unblock_signals(void); extern void unblock_signals(void);
extern int get_signals(void);
extern int set_signals(int enable); extern int set_signals(int enable);
extern int set_signals_trace(int enable); extern int set_signals_trace(int enable);
extern int os_is_signal_stack(void); extern int os_is_signal_stack(void);
extern void deliver_alarm(void); extern void deliver_alarm(void);
extern void register_pm_wake_signal(void); extern void register_pm_wake_signal(void);
extern void block_signals_hard(void);
extern void unblock_signals_hard(void);
extern void mark_sigio_pending(void);
/* util.c */ /* util.c */
extern void stack_protections(unsigned long address); extern void stack_protections(unsigned long address);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2019-2021 Intel Corporation
*/
#ifndef _UM_TIME_TRAVEL_H_
#define _UM_TIME_TRAVEL_H_
enum time_travel_mode {
TT_MODE_OFF,
TT_MODE_BASIC,
TT_MODE_INFCPU,
TT_MODE_EXTERNAL,
};
#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \
defined(CONFIG_UML_TIME_TRAVEL_SUPPORT)
extern enum time_travel_mode time_travel_mode;
#else
#define time_travel_mode TT_MODE_OFF
#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */
#endif /* _UM_TIME_TRAVEL_H_ */
...@@ -17,18 +17,19 @@ extra-y := vmlinux.lds ...@@ -17,18 +17,19 @@ extra-y := vmlinux.lds
obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
physmem.o process.o ptrace.o reboot.o sigio.o \ physmem.o process.o ptrace.o reboot.o sigio.o \
signal.o syscall.o sysrq.o time.o tlb.o trap.o \ signal.o syscall.o sysrq.o time.o tlb.o trap.o \
um_arch.o umid.o maccess.o kmsg_dump.o skas/ um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_GPROF) += gprof_syms.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
USER_OBJS := config.o USER_OBJS := config.o
include arch/um/scripts/Makefile.rules include arch/um/scripts/Makefile.rules
targets := config.c config.tmp targets := config.c config.tmp capflags.c
# Be careful with the below Sed code - sed is pitfall-rich! # Be careful with the below Sed code - sed is pitfall-rich!
# We use sed to lower build requirements, for "embedded" builders for instance. # We use sed to lower build requirements, for "embedded" builders for instance.
...@@ -43,6 +44,15 @@ quiet_cmd_quote1 = QUOTE $@ ...@@ -43,6 +44,15 @@ quiet_cmd_quote1 = QUOTE $@
$(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE
$(call if_changed,quote2) $(call if_changed,quote2)
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^
cpufeature = $(src)/../../x86/include/asm/cpufeatures.h
vmxfeature = $(src)/../../x86/include/asm/vmxfeatures.h
$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/../../x86/kernel/cpu/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
quiet_cmd_quote2 = QUOTE $@ quiet_cmd_quote2 = QUOTE $@
cmd_quote2 = sed -e '/CONFIG/{' \ cmd_quote2 = sed -e '/CONFIG/{' \
-e 's/"CONFIG"//' \ -e 's/"CONFIG"//' \
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#include <asm/iomap.h>
#include <asm-generic/pci_iomap.h>
void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port,
unsigned int nr)
{
return NULL;
}
...@@ -56,7 +56,7 @@ struct irq_entry { ...@@ -56,7 +56,7 @@ struct irq_entry {
static DEFINE_SPINLOCK(irq_lock); static DEFINE_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds); static LIST_HEAD(active_fds);
static DECLARE_BITMAP(irqs_allocated, NR_IRQS); static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
static bool irqs_suspended; static bool irqs_suspended;
static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
...@@ -101,10 +101,12 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry, ...@@ -101,10 +101,12 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
if (!reg->timetravel_handler) if (!reg->timetravel_handler)
return false; return false;
/* prevent nesting - we'll get it again later when we SIGIO ourselves */ /*
if (reg->pending_on_resume) * Handle all messages - we might get multiple even while
return true; * interrupts are already suspended, due to suspend order
* etc. Note that time_travel_add_irq_event() will not add
* an event twice, if it's pending already "first wins".
*/
reg->timetravel_handler(reg->irq, entry->fd, reg->id, &reg->event); reg->timetravel_handler(reg->irq, entry->fd, reg->id, &reg->event);
if (!reg->event.pending) if (!reg->event.pending)
...@@ -123,7 +125,8 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry, ...@@ -123,7 +125,8 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
#endif #endif
static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
struct uml_pt_regs *regs) struct uml_pt_regs *regs,
bool timetravel_handlers_only)
{ {
struct irq_reg *reg = &entry->reg[t]; struct irq_reg *reg = &entry->reg[t];
...@@ -136,18 +139,29 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type ...@@ -136,18 +139,29 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type
if (irq_do_timetravel_handler(entry, t)) if (irq_do_timetravel_handler(entry, t))
return; return;
if (irqs_suspended) /*
* If we're called to only run time-travel handlers then don't
* actually proceed but mark sigio as pending (if applicable).
* For suspend/resume, timetravel_handlers_only may be true
* despite time-travel not being configured and used.
*/
if (timetravel_handlers_only) {
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
mark_sigio_pending();
#endif
return; return;
}
irq_io_loop(reg, regs); irq_io_loop(reg, regs);
} }
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) static void _sigio_handler(struct uml_pt_regs *regs,
bool timetravel_handlers_only)
{ {
struct irq_entry *irq_entry; struct irq_entry *irq_entry;
int n, i; int n, i;
if (irqs_suspended && !um_irq_timetravel_handler_used()) if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
return; return;
while (1) { while (1) {
...@@ -172,14 +186,20 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) ...@@ -172,14 +186,20 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
irq_entry = os_epoll_get_data_pointer(i); irq_entry = os_epoll_get_data_pointer(i);
for (t = 0; t < NUM_IRQ_TYPES; t++) for (t = 0; t < NUM_IRQ_TYPES; t++)
sigio_reg_handler(i, irq_entry, t, regs); sigio_reg_handler(i, irq_entry, t, regs,
timetravel_handlers_only);
} }
} }
if (!irqs_suspended) if (!timetravel_handlers_only)
free_irqs(); free_irqs();
} }
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
_sigio_handler(regs, irqs_suspended);
}
static struct irq_entry *get_irq_entry_by_fd(int fd) static struct irq_entry *get_irq_entry_by_fd(int fd)
{ {
struct irq_entry *walk; struct irq_entry *walk;
...@@ -399,7 +419,8 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) ...@@ -399,7 +419,8 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
void um_free_irq(int irq, void *dev) void um_free_irq(int irq, void *dev)
{ {
if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq)) if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ,
"freeing invalid irq %d", irq))
return; return;
free_irq_by_irq_and_dev(irq, dev); free_irq_by_irq_and_dev(irq, dev);
...@@ -467,6 +488,11 @@ int um_request_irq_tt(int irq, int fd, enum um_irq_type type, ...@@ -467,6 +488,11 @@ int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
devname, dev_id, timetravel_handler); devname, dev_id, timetravel_handler);
} }
EXPORT_SYMBOL(um_request_irq_tt); EXPORT_SYMBOL(um_request_irq_tt);
void sigio_run_timetravel_handlers(void)
{
_sigio_handler(NULL, true);
}
#endif #endif
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
...@@ -623,7 +649,7 @@ void __init init_IRQ(void) ...@@ -623,7 +649,7 @@ void __init init_IRQ(void)
irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
for (i = 1; i < NR_IRQS; i++) for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
/* Initialize EPOLL Loop */ /* Initialize EPOLL Loop */
os_setup_epoll(); os_setup_epoll();
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <os.h> #include <os.h>
EXPORT_SYMBOL(set_signals); EXPORT_SYMBOL(set_signals);
EXPORT_SYMBOL(get_signals); EXPORT_SYMBOL(signals_enabled);
EXPORT_SYMBOL(os_stat_fd); EXPORT_SYMBOL(os_stat_fd);
EXPORT_SYMBOL(os_stat_file); EXPORT_SYMBOL(os_stat_file);
......
...@@ -29,7 +29,7 @@ stub_clone_handler(void) ...@@ -29,7 +29,7 @@ stub_clone_handler(void)
long err; long err;
err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
(unsigned long)data + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); (unsigned long)data + UM_KERN_PAGE_SIZE / 2);
if (err) { if (err) {
data->parent_err = err; data->parent_err = err;
goto done; goto done;
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <asm/current.h> #include <asm/current.h>
#include <asm/page.h> #include <asm/page.h>
#include <kern_util.h> #include <kern_util.h>
#include <asm/futex.h>
#include <os.h> #include <os.h>
pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
...@@ -248,3 +249,138 @@ long __strnlen_user(const void __user *str, long len) ...@@ -248,3 +249,138 @@ long __strnlen_user(const void __user *str, long len)
return 0; return 0;
} }
EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(__strnlen_user);
/**
* arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
* argument and comparison of the previous
* futex value with another constant.
*
* @encoded_op: encoded operation to execute
* @uaddr: pointer to user space address
*
* Return:
* 0 - On success
* -EFAULT - User access resulted in a page fault
* -EAGAIN - Atomic operation was unable to complete due to contention
* -ENOSYS - Operation not supported
*/
int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
{
int oldval, ret;
struct page *page;
unsigned long addr = (unsigned long) uaddr;
pte_t *pte;
ret = -EFAULT;
if (!access_ok(uaddr, sizeof(*uaddr)))
return -EFAULT;
preempt_disable();
pte = maybe_map(addr, 1);
if (pte == NULL)
goto out_inuser;
page = pte_page(*pte);
#ifdef CONFIG_64BIT
pagefault_disable();
addr = (unsigned long) page_address(page) +
(((unsigned long) addr) & ~PAGE_MASK);
#else
addr = (unsigned long) kmap_atomic(page) +
((unsigned long) addr & ~PAGE_MASK);
#endif
uaddr = (u32 *) addr;
oldval = *uaddr;
ret = 0;
switch (op) {
case FUTEX_OP_SET:
*uaddr = oparg;
break;
case FUTEX_OP_ADD:
*uaddr += oparg;
break;
case FUTEX_OP_OR:
*uaddr |= oparg;
break;
case FUTEX_OP_ANDN:
*uaddr &= ~oparg;
break;
case FUTEX_OP_XOR:
*uaddr ^= oparg;
break;
default:
ret = -ENOSYS;
}
#ifdef CONFIG_64BIT
pagefault_enable();
#else
kunmap_atomic((void *)addr);
#endif
out_inuser:
preempt_enable();
if (ret == 0)
*oval = oldval;
return ret;
}
EXPORT_SYMBOL(arch_futex_atomic_op_inuser);
/**
* futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
* uaddr with newval if the current value is
* oldval.
* @uval: pointer to store content of @uaddr
* @uaddr: pointer to user space address
* @oldval: old value
* @newval: new value to store to @uaddr
*
* Return:
* 0 - On success
* -EFAULT - User access resulted in a page fault
* -EAGAIN - Atomic operation was unable to complete due to contention
* -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
*/
int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
struct page *page;
pte_t *pte;
int ret = -EFAULT;
if (!access_ok(uaddr, sizeof(*uaddr)))
return -EFAULT;
preempt_disable();
pte = maybe_map((unsigned long) uaddr, 1);
if (pte == NULL)
goto out_inatomic;
page = pte_page(*pte);
#ifdef CONFIG_64BIT
pagefault_disable();
uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK);
#else
uaddr = kmap_atomic(page) + ((unsigned long) uaddr & ~PAGE_MASK);
#endif
*uval = *uaddr;
ret = cmpxchg(uaddr, oldval, newval);
#ifdef CONFIG_64BIT
pagefault_enable();
#else
kunmap_atomic(uaddr);
#endif
ret = 0;
out_inatomic:
preempt_enable();
return ret;
}
EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic);
...@@ -68,23 +68,15 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg, ...@@ -68,23 +68,15 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
int ret; int ret;
/* /*
* Poll outside the locked section (if we're not called to only read * We can't unlock here, but interrupt signals with a timetravel_handler
* the response) so we can get interrupts for e.g. virtio while we're * (see um_request_irq_tt) get to the timetravel_handler anyway.
* here, but then we need to lock to not get interrupted between the
* read of the message and write of the ACK.
*/ */
if (mode != TTMH_READ) { if (mode != TTMH_READ) {
bool disabled = irqs_disabled(); BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
BUG_ON(mode == TTMH_IDLE && !disabled);
if (disabled)
local_irq_enable();
while (os_poll(1, &time_travel_ext_fd) != 0) { while (os_poll(1, &time_travel_ext_fd) != 0) {
/* nothing */ /* nothing */
} }
if (disabled)
local_irq_disable();
} }
ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
...@@ -123,15 +115,15 @@ static u64 time_travel_ext_req(u32 op, u64 time) ...@@ -123,15 +115,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
.time = time, .time = time,
.seq = mseq, .seq = mseq,
}; };
unsigned long flags;
/* /*
* We need to save interrupts here and only restore when we * We need to block even the timetravel handlers of SIGIO here and
* got the ACK - otherwise we can get interrupted and send * only restore their use when we got the ACK - otherwise we may
* another request while we're still waiting for an ACK, but * (will) get interrupted by that, try to queue the IRQ for future
* the peer doesn't know we got interrupted and will send * processing and thus send another request while we're still waiting
* the ACKs in the same order as the message, but we'd need * for an ACK, but the peer doesn't know we got interrupted and will
* to see them in the opposite order ... * send the ACKs in the same order as the message, but we'd need to
* see them in the opposite order ...
* *
* This wouldn't matter *too* much, but some ACKs carry the * This wouldn't matter *too* much, but some ACKs carry the
* current time (for UM_TIMETRAVEL_GET) and getting another * current time (for UM_TIMETRAVEL_GET) and getting another
...@@ -140,7 +132,7 @@ static u64 time_travel_ext_req(u32 op, u64 time) ...@@ -140,7 +132,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
* The sequence number assignment that happens here lets us * The sequence number assignment that happens here lets us
* debug such message handling issues more easily. * debug such message handling issues more easily.
*/ */
local_irq_save(flags); block_signals_hard();
os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
while (msg.op != UM_TIMETRAVEL_ACK) while (msg.op != UM_TIMETRAVEL_ACK)
...@@ -152,7 +144,7 @@ static u64 time_travel_ext_req(u32 op, u64 time) ...@@ -152,7 +144,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
if (op == UM_TIMETRAVEL_GET) if (op == UM_TIMETRAVEL_GET)
time_travel_set_time(msg.time); time_travel_set_time(msg.time);
local_irq_restore(flags); unblock_signals_hard();
return msg.time; return msg.time;
} }
...@@ -352,9 +344,6 @@ void deliver_time_travel_irqs(void) ...@@ -352,9 +344,6 @@ void deliver_time_travel_irqs(void)
while ((e = list_first_entry_or_null(&time_travel_irqs, while ((e = list_first_entry_or_null(&time_travel_irqs,
struct time_travel_event, struct time_travel_event,
list))) { list))) {
WARN(e->time != time_travel_time,
"time moved from %lld to %lld before IRQ delivery\n",
time_travel_time, e->time);
list_del(&e->list); list_del(&e->list);
e->pending = false; e->pending = false;
e->fn(e); e->fn(e);
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/ctype.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/panic_notifier.h> #include <linux/panic_notifier.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
...@@ -17,6 +18,7 @@ ...@@ -17,6 +18,7 @@
#include <linux/suspend.h> #include <linux/suspend.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <as-layout.h> #include <as-layout.h>
...@@ -51,9 +53,13 @@ static void __init add_arg(char *arg) ...@@ -51,9 +53,13 @@ static void __init add_arg(char *arg)
*/ */
struct cpuinfo_um boot_cpu_data = { struct cpuinfo_um boot_cpu_data = {
.loops_per_jiffy = 0, .loops_per_jiffy = 0,
.ipi_pipe = { -1, -1 } .ipi_pipe = { -1, -1 },
.cache_alignment = L1_CACHE_BYTES,
.x86_capability = { 0 }
}; };
EXPORT_SYMBOL(boot_cpu_data);
union thread_union cpu0_irqstack union thread_union cpu0_irqstack
__section(".data..init_irqstack") = __section(".data..init_irqstack") =
{ .thread_info = INIT_THREAD_INFO(init_task) }; { .thread_info = INIT_THREAD_INFO(init_task) };
...@@ -63,17 +69,25 @@ static char host_info[(__NEW_UTS_LEN + 1) * 5]; ...@@ -63,17 +69,25 @@ static char host_info[(__NEW_UTS_LEN + 1) * 5];
static int show_cpuinfo(struct seq_file *m, void *v) static int show_cpuinfo(struct seq_file *m, void *v)
{ {
int index = 0; int i = 0;
seq_printf(m, "processor\t: %d\n", index); seq_printf(m, "processor\t: %d\n", i);
seq_printf(m, "vendor_id\t: User Mode Linux\n"); seq_printf(m, "vendor_id\t: User Mode Linux\n");
seq_printf(m, "model name\t: UML\n"); seq_printf(m, "model name\t: UML\n");
seq_printf(m, "mode\t\t: skas\n"); seq_printf(m, "mode\t\t: skas\n");
seq_printf(m, "host\t\t: %s\n", host_info); seq_printf(m, "host\t\t: %s\n", host_info);
seq_printf(m, "bogomips\t: %lu.%02lu\n\n", seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no");
seq_printf(m, "flags\t\t:");
for (i = 0; i < 32*NCAPINTS; i++)
if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL))
seq_printf(m, " %s", x86_cap_flags[i]);
seq_printf(m, "\n");
seq_printf(m, "cache_alignment\t: %d\n", boot_cpu_data.cache_alignment);
seq_printf(m, "bogomips\t: %lu.%02lu\n",
loops_per_jiffy/(500000/HZ), loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100); (loops_per_jiffy/(5000/HZ)) % 100);
return 0; return 0;
} }
...@@ -262,6 +276,30 @@ EXPORT_SYMBOL(end_iomem); ...@@ -262,6 +276,30 @@ EXPORT_SYMBOL(end_iomem);
#define MIN_VMALLOC (32 * 1024 * 1024) #define MIN_VMALLOC (32 * 1024 * 1024)
static void parse_host_cpu_flags(char *line)
{
int i;
for (i = 0; i < 32*NCAPINTS; i++) {
if ((x86_cap_flags[i] != NULL) && strstr(line, x86_cap_flags[i]))
set_cpu_cap(&boot_cpu_data, i);
}
}
static void parse_cache_line(char *line)
{
long res;
char *to_parse = strstr(line, ":");
if (to_parse) {
to_parse++;
while (*to_parse != 0 && isspace(*to_parse)) {
to_parse++;
}
if (kstrtoul(to_parse, 10, &res) == 0 && is_power_of_2(res))
boot_cpu_data.cache_alignment = res;
else
boot_cpu_data.cache_alignment = L1_CACHE_BYTES;
}
}
int __init linux_main(int argc, char **argv) int __init linux_main(int argc, char **argv)
{ {
unsigned long avail, diff; unsigned long avail, diff;
...@@ -298,6 +336,8 @@ int __init linux_main(int argc, char **argv) ...@@ -298,6 +336,8 @@ int __init linux_main(int argc, char **argv)
/* OS sanity checks that need to happen before the kernel runs */ /* OS sanity checks that need to happen before the kernel runs */
os_early_checks(); os_early_checks();
get_host_cpu_features(parse_host_cpu_flags, parse_cache_line);
brk_start = (unsigned long) sbrk(0); brk_start = (unsigned long) sbrk(0);
/* /*
......
...@@ -64,7 +64,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) ...@@ -64,7 +64,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
goto out_close; goto out_close;
} }
sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); sp = stack + UM_KERN_PAGE_SIZE;
data.pre_exec = pre_exec; data.pre_exec = pre_exec;
data.pre_data = pre_data; data.pre_data = pre_data;
data.argv = argv; data.argv = argv;
...@@ -120,7 +120,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, ...@@ -120,7 +120,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
if (stack == 0) if (stack == 0)
return -ENOMEM; return -ENOMEM;
sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); sp = stack + UM_KERN_PAGE_SIZE;
pid = clone(proc, (void *) sp, flags, arg); pid = clone(proc, (void *) sp, flags, arg);
if (pid < 0) { if (pid < 0) {
err = -errno; err = -errno;
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <sysdep/mcontext.h> #include <sysdep/mcontext.h>
#include <um_malloc.h> #include <um_malloc.h>
#include <sys/ucontext.h> #include <sys/ucontext.h>
#include <timetravel.h>
void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
[SIGTRAP] = relay_signal, [SIGTRAP] = relay_signal,
...@@ -62,17 +63,30 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) ...@@ -62,17 +63,30 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
#define SIGALRM_BIT 1 #define SIGALRM_BIT 1
#define SIGALRM_MASK (1 << SIGALRM_BIT) #define SIGALRM_MASK (1 << SIGALRM_BIT)
static int signals_enabled; int signals_enabled;
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
static int signals_blocked;
#else
#define signals_blocked false
#endif
static unsigned int signals_pending; static unsigned int signals_pending;
static unsigned int signals_active = 0; static unsigned int signals_active = 0;
void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
{ {
int enabled; int enabled = signals_enabled;
enabled = signals_enabled; if ((signals_blocked || !enabled) && (sig == SIGIO)) {
if (!enabled && (sig == SIGIO)) { /*
signals_pending |= SIGIO_MASK; * In TT_MODE_EXTERNAL, need to still call time-travel
* handlers unless signals are also blocked for the
* external time message processing. This will mark
* signals_pending by itself (only if necessary.)
*/
if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL)
sigio_run_timetravel_handlers();
else
signals_pending |= SIGIO_MASK;
return; return;
} }
...@@ -129,7 +143,7 @@ void set_sigstack(void *sig_stack, int size) ...@@ -129,7 +143,7 @@ void set_sigstack(void *sig_stack, int size)
stack_t stack = { stack_t stack = {
.ss_flags = 0, .ss_flags = 0,
.ss_sp = sig_stack, .ss_sp = sig_stack,
.ss_size = size - sizeof(void *) .ss_size = size
}; };
if (sigaltstack(&stack, NULL) != 0) if (sigaltstack(&stack, NULL) != 0)
...@@ -334,11 +348,6 @@ void unblock_signals(void) ...@@ -334,11 +348,6 @@ void unblock_signals(void)
} }
} }
int get_signals(void)
{
return signals_enabled;
}
int set_signals(int enable) int set_signals(int enable)
{ {
int ret; int ret;
...@@ -368,6 +377,39 @@ int set_signals_trace(int enable) ...@@ -368,6 +377,39 @@ int set_signals_trace(int enable)
return ret; return ret;
} }
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
void mark_sigio_pending(void)
{
signals_pending |= SIGIO_MASK;
}
void block_signals_hard(void)
{
if (signals_blocked)
return;
signals_blocked = 1;
barrier();
}
void unblock_signals_hard(void)
{
if (!signals_blocked)
return;
/* Must be set to 0 before we check the pending bits etc. */
signals_blocked = 0;
barrier();
if (signals_pending && signals_enabled) {
/* this is a bit inefficient, but that's not really important */
block_signals();
unblock_signals();
} else if (signals_pending & SIGIO_MASK) {
/* we need to run time-travel handlers even if not enabled */
sigio_run_timetravel_handlers();
}
}
#endif
int os_is_signal_stack(void) int os_is_signal_stack(void)
{ {
stack_t ss; stack_t ss;
......
...@@ -327,7 +327,7 @@ int start_userspace(unsigned long stub_stack) ...@@ -327,7 +327,7 @@ int start_userspace(unsigned long stub_stack)
} }
/* set stack pointer to the end of the stack page, so it can grow downwards */ /* set stack pointer to the end of the stack page, so it can grow downwards */
sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
flags = CLONE_FILES | SIGCHLD; flags = CLONE_FILES | SIGCHLD;
......
...@@ -321,6 +321,38 @@ static void __init check_coredump_limit(void) ...@@ -321,6 +321,38 @@ static void __init check_coredump_limit(void)
os_info("%llu\n", (unsigned long long)lim.rlim_max); os_info("%llu\n", (unsigned long long)lim.rlim_max);
} }
void __init get_host_cpu_features(
void (*flags_helper_func)(char *line),
void (*cache_helper_func)(char *line))
{
FILE *cpuinfo;
char *line = NULL;
size_t len = 0;
int done_parsing = 0;
cpuinfo = fopen("/proc/cpuinfo", "r");
if (cpuinfo == NULL) {
os_info("Failed to get host CPU features\n");
} else {
while ((getline(&line, &len, cpuinfo)) != -1) {
if (strstr(line, "flags")) {
flags_helper_func(line);
done_parsing++;
}
if (strstr(line, "cache_alignment")) {
cache_helper_func(line);
done_parsing++;
}
free(line);
line = NULL;
if (done_parsing > 1)
break;
}
fclose(cpuinfo);
}
}
void __init os_early_checks(void) void __init os_early_checks(void)
{ {
int pid; int pid;
......
...@@ -44,7 +44,7 @@ ELF_FORMAT := elf64-x86-64 ...@@ -44,7 +44,7 @@ ELF_FORMAT := elf64-x86-64
# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example. # Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64 LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib64
LINK-y += -m64 LINK-y += -m64
endif endif
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
# #
menu "Input device support" menu "Input device support"
depends on !UML
config INPUT config INPUT
tristate "Generic input layer (needed for keyboard, mouse, ...)" if EXPERT tristate "Generic input layer (needed for keyboard, mouse, ...)" if EXPERT
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# #
config GAMEPORT config GAMEPORT
tristate "Gameport support" tristate "Gameport support"
depends on !UML
help help
Gameport support is for the standard 15-pin PC gameport. If you Gameport support is for the standard 15-pin PC gameport. If you
have a joystick, gamepad, gameport card, a soundcard with a gameport have a joystick, gamepad, gameport card, a soundcard with a gameport
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# #
menuconfig INPUT_JOYSTICK menuconfig INPUT_JOYSTICK
bool "Joysticks/Gamepads" bool "Joysticks/Gamepads"
depends on !UML
help help
If you have a joystick, 6dof controller, gamepad, steering wheel, If you have a joystick, 6dof controller, gamepad, steering wheel,
weapon control system or something like that you can say Y here weapon control system or something like that you can say Y here
......
...@@ -12,9 +12,8 @@ if TTY ...@@ -12,9 +12,8 @@ if TTY
config VT config VT
bool "Virtual terminal" if EXPERT bool "Virtual terminal" if EXPERT
depends on !UML
select INPUT select INPUT
default y default y if !UML
help help
If you say Y here, you will get support for terminal devices with If you say Y here, you will get support for terminal devices with
display and keyboard devices. These are called "virtual" because you display and keyboard devices. These are called "virtual" because you
...@@ -78,7 +77,7 @@ config VT_CONSOLE_SLEEP ...@@ -78,7 +77,7 @@ config VT_CONSOLE_SLEEP
config HW_CONSOLE config HW_CONSOLE
bool bool
depends on VT && !UML depends on VT
default y default y
config VT_HW_CONSOLE_BINDING config VT_HW_CONSOLE_BINDING
......
...@@ -9,7 +9,7 @@ config VGA_CONSOLE ...@@ -9,7 +9,7 @@ config VGA_CONSOLE
bool "VGA text console" if EXPERT || !X86 bool "VGA text console" if EXPERT || !X86
depends on !4xx && !PPC_8xx && !SPARC && !M68K && !PARISC && !SUPERH && \ depends on !4xx && !PPC_8xx && !SPARC && !M68K && !PARISC && !SUPERH && \
(!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \ (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \
!ARM64 && !ARC && !MICROBLAZE && !OPENRISC && !NDS32 && !S390 !ARM64 && !ARC && !MICROBLAZE && !OPENRISC && !NDS32 && !S390 && !UML
default y default y
help help
Saying Y here will allow you to use Linux in text mode through a Saying Y here will allow you to use Linux in text mode through a
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021 Intel Corporation
* Author: johannes@sipsolutions.net
*/
#ifndef _LOGIC_IO_H
#define _LOGIC_IO_H
#include <linux/types.h>
/* include this file into asm/io.h */
#ifdef CONFIG_INDIRECT_IOMEM
#ifdef CONFIG_INDIRECT_IOMEM_FALLBACK
/*
* If you want emulated IO memory to fall back to 'normal' IO memory
* if a region wasn't registered as emulated, then you need to have
* all of the real_* functions implemented.
*/
#if !defined(real_ioremap) || !defined(real_iounmap) || \
!defined(real_raw_readb) || !defined(real_raw_writeb) || \
!defined(real_raw_readw) || !defined(real_raw_writew) || \
!defined(real_raw_readl) || !defined(real_raw_writel) || \
(defined(CONFIG_64BIT) && \
(!defined(real_raw_readq) || !defined(real_raw_writeq))) || \
!defined(real_memset_io) || \
!defined(real_memcpy_fromio) || \
!defined(real_memcpy_toio)
#error "Must provide fallbacks for real IO memory access"
#endif /* defined ... */
#endif /* CONFIG_INDIRECT_IOMEM_FALLBACK */
#define ioremap ioremap
void __iomem *ioremap(phys_addr_t offset, size_t size);
#define iounmap iounmap
void iounmap(void __iomem *addr);
#define __raw_readb __raw_readb
u8 __raw_readb(const volatile void __iomem *addr);
#define __raw_readw __raw_readw
u16 __raw_readw(const volatile void __iomem *addr);
#define __raw_readl __raw_readl
u32 __raw_readl(const volatile void __iomem *addr);
#ifdef CONFIG_64BIT
#define __raw_readq __raw_readq
u64 __raw_readq(const volatile void __iomem *addr);
#endif /* CONFIG_64BIT */
#define __raw_writeb __raw_writeb
void __raw_writeb(u8 value, volatile void __iomem *addr);
#define __raw_writew __raw_writew
void __raw_writew(u16 value, volatile void __iomem *addr);
#define __raw_writel __raw_writel
void __raw_writel(u32 value, volatile void __iomem *addr);
#ifdef CONFIG_64BIT
#define __raw_writeq __raw_writeq
void __raw_writeq(u64 value, volatile void __iomem *addr);
#endif /* CONFIG_64BIT */
#define memset_io memset_io
void memset_io(volatile void __iomem *addr, int value, size_t size);
#define memcpy_fromio memcpy_fromio
void memcpy_fromio(void *buffer, const volatile void __iomem *addr,
size_t size);
#define memcpy_toio memcpy_toio
void memcpy_toio(volatile void __iomem *addr, const void *buffer, size_t size);
#endif /* CONFIG_INDIRECT_IOMEM */
#endif /* _LOGIC_IO_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021 Intel Corporation
* Author: johannes@sipsolutions.net
*/
#ifndef __LOGIC_IOMEM_H
#define __LOGIC_IOMEM_H
#include <linux/types.h>
#include <linux/ioport.h>
/**
* struct logic_iomem_ops - emulated IO memory ops
* @read: read an 8, 16, 32 or 64 bit quantity from the given offset,
* size is given in bytes (1, 2, 4 or 8)
* (64-bit only necessary if CONFIG_64BIT is set)
* @write: write an 8, 16 32 or 64 bit quantity to the given offset,
* size is given in bytes (1, 2, 4 or 8)
* (64-bit only necessary if CONFIG_64BIT is set)
* @set: optional, for memset_io()
* @copy_from: optional, for memcpy_fromio()
* @copy_to: optional, for memcpy_toio()
* @unmap: optional, this region is getting unmapped
*/
struct logic_iomem_ops {
unsigned long (*read)(void *priv, unsigned int offset, int size);
void (*write)(void *priv, unsigned int offset, int size,
unsigned long val);
void (*set)(void *priv, unsigned int offset, u8 value, int size);
void (*copy_from)(void *priv, void *buffer, unsigned int offset,
int size);
void (*copy_to)(void *priv, unsigned int offset, const void *buffer,
int size);
void (*unmap)(void *priv);
};
/**
* struct logic_iomem_region_ops - ops for an IO memory handler
* @map: map a range in the registered IO memory region, must
* fill *ops with the ops and may fill *priv to be passed
* to the ops. The offset is given as the offset into the
* registered resource region.
* The return value is negative for errors, or >= 0 for
* success. On success, the return value is added to the
* offset for later ops, to allow for partial mappings.
*/
struct logic_iomem_region_ops {
long (*map)(unsigned long offset, size_t size,
const struct logic_iomem_ops **ops,
void **priv);
};
/**
* logic_iomem_add_region - register an IO memory region
* @resource: the resource description for this region
* @ops: the IO memory mapping ops for this resource
*/
int logic_iomem_add_region(struct resource *resource,
const struct logic_iomem_region_ops *ops);
#endif /* __LOGIC_IOMEM_H */
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* Copyright (C) 2021 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#ifndef _UAPI_LINUX_VIRTIO_PCIDEV_H
#define _UAPI_LINUX_VIRTIO_PCIDEV_H
#include <linux/types.h>
/**
* enum virtio_pcidev_ops - virtual PCI device operations
* @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8;
* the @data field should be filled in by the device (in little endian).
* @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8;
* the @data field contains the data to write (in little endian).
* @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable;
* the @data field should be filled in by the device (in little endian).
* @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable;
* the @data field contains the data to write (in little endian).
* @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but
* the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE)
* @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for
* the number
* @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports
* the 16- or 32-bit write that would otherwise be done into memory,
* analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above
* @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be
* all zeroes) to signal the PME# pin.
*/
enum virtio_pcidev_ops {
VIRTIO_PCIDEV_OP_RESERVED = 0,
VIRTIO_PCIDEV_OP_CFG_READ,
VIRTIO_PCIDEV_OP_CFG_WRITE,
VIRTIO_PCIDEV_OP_MMIO_READ,
VIRTIO_PCIDEV_OP_MMIO_WRITE,
VIRTIO_PCIDEV_OP_MMIO_MEMSET,
VIRTIO_PCIDEV_OP_INT,
VIRTIO_PCIDEV_OP_MSI,
VIRTIO_PCIDEV_OP_PME,
};
/**
* struct virtio_pcidev_msg - virtio PCI device operation
* @op: the operation to do
* @bar: the bar (only with BAR read/write messages)
* @reserved: reserved
* @size: the size of the read/write (in bytes)
* @addr: the address to read/write
* @data: the data, normally @size long, but just one byte for
* %VIRTIO_PCIDEV_OP_MMIO_MEMSET
*
* Note: the fields are all in native (CPU) endian, however, the
* @data values will often be in little endian (see the ops above.)
*/
struct virtio_pcidev_msg {
__u8 op;
__u8 bar;
__u16 reserved;
__u32 size;
__u64 addr;
__u8 data[];
};
#endif /* _UAPI_LINUX_VIRTIO_PCIDEV_H */
...@@ -102,6 +102,20 @@ config INDIRECT_PIO ...@@ -102,6 +102,20 @@ config INDIRECT_PIO
When in doubt, say N. When in doubt, say N.
config INDIRECT_IOMEM
bool
help
This is selected by other options/architectures to provide the
emulated iomem accessors.
config INDIRECT_IOMEM_FALLBACK
bool
depends on INDIRECT_IOMEM
help
If INDIRECT_IOMEM is selected, this enables falling back to plain
mmio accesses when the IO memory address is not a registered
emulated region.
config CRC_CCITT config CRC_CCITT
tristate "CRC-CCITT functions" tristate "CRC-CCITT functions"
help help
......
...@@ -148,6 +148,8 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o ...@@ -148,6 +148,8 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
lib-y += logic_pio.o lib-y += logic_pio.o
lib-$(CONFIG_INDIRECT_IOMEM) += logic_iomem.o
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_BTREE) += btree.o
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/logic_iomem.h>
struct logic_iomem_region {
const struct resource *res;
const struct logic_iomem_region_ops *ops;
struct list_head list;
};
struct logic_iomem_area {
const struct logic_iomem_ops *ops;
void *priv;
};
#define AREA_SHIFT 24
#define MAX_AREA_SIZE (1 << AREA_SHIFT)
#define MAX_AREAS ((1ULL<<32) / MAX_AREA_SIZE)
#define AREA_BITS ((MAX_AREAS - 1) << AREA_SHIFT)
#define AREA_MASK (MAX_AREA_SIZE - 1)
#ifdef CONFIG_64BIT
#define IOREMAP_BIAS 0xDEAD000000000000UL
#define IOREMAP_MASK 0xFFFFFFFF00000000UL
#else
#define IOREMAP_BIAS 0
#define IOREMAP_MASK 0
#endif
static DEFINE_MUTEX(regions_mtx);
static LIST_HEAD(regions_list);
static struct logic_iomem_area mapped_areas[MAX_AREAS];
int logic_iomem_add_region(struct resource *resource,
const struct logic_iomem_region_ops *ops)
{
struct logic_iomem_region *rreg;
int err;
if (WARN_ON(!resource || !ops))
return -EINVAL;
if (WARN_ON((resource->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM))
return -EINVAL;
rreg = kzalloc(sizeof(*rreg), GFP_KERNEL);
if (!rreg)
return -ENOMEM;
err = request_resource(&iomem_resource, resource);
if (err) {
kfree(rreg);
return -ENOMEM;
}
mutex_lock(&regions_mtx);
rreg->res = resource;
rreg->ops = ops;
list_add_tail(&rreg->list, &regions_list);
mutex_unlock(&regions_mtx);
return 0;
}
EXPORT_SYMBOL(logic_iomem_add_region);
#ifndef CONFIG_LOGIC_IOMEM_FALLBACK
static void __iomem *real_ioremap(phys_addr_t offset, size_t size)
{
WARN(1, "invalid ioremap(0x%llx, 0x%zx)\n",
(unsigned long long)offset, size);
return NULL;
}
static void real_iounmap(void __iomem *addr)
{
WARN(1, "invalid iounmap for addr 0x%llx\n",
(unsigned long long)addr);
}
#endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
void __iomem *ioremap(phys_addr_t offset, size_t size)
{
void __iomem *ret = NULL;
struct logic_iomem_region *rreg, *found = NULL;
int i;
mutex_lock(&regions_mtx);
list_for_each_entry(rreg, &regions_list, list) {
if (rreg->res->start > offset)
continue;
if (rreg->res->end < offset + size - 1)
continue;
found = rreg;
break;
}
if (!found)
goto out;
for (i = 0; i < MAX_AREAS; i++) {
long offs;
if (mapped_areas[i].ops)
continue;
offs = rreg->ops->map(offset - found->res->start,
size, &mapped_areas[i].ops,
&mapped_areas[i].priv);
if (offs < 0) {
mapped_areas[i].ops = NULL;
break;
}
if (WARN_ON(!mapped_areas[i].ops)) {
mapped_areas[i].ops = NULL;
break;
}
ret = (void __iomem *)(IOREMAP_BIAS + (i << AREA_SHIFT) + offs);
break;
}
out:
mutex_unlock(&regions_mtx);
if (ret)
return ret;
return real_ioremap(offset, size);
}
EXPORT_SYMBOL(ioremap);
static inline struct logic_iomem_area *
get_area(const volatile void __iomem *addr)
{
unsigned long a = (unsigned long)addr;
unsigned int idx;
if (WARN_ON((a & IOREMAP_MASK) != IOREMAP_BIAS))
return NULL;
idx = (a & AREA_BITS) >> AREA_SHIFT;
if (mapped_areas[idx].ops)
return &mapped_areas[idx];
return NULL;
}
void iounmap(void __iomem *addr)
{
struct logic_iomem_area *area = get_area(addr);
if (!area) {
real_iounmap(addr);
return;
}
if (area->ops->unmap)
area->ops->unmap(area->priv);
mutex_lock(&regions_mtx);
area->ops = NULL;
area->priv = NULL;
mutex_unlock(&regions_mtx);
}
EXPORT_SYMBOL(iounmap);
#ifndef CONFIG_LOGIC_IOMEM_FALLBACK
#define MAKE_FALLBACK(op, sz) \
static u##sz real_raw_read ## op(const volatile void __iomem *addr) \
{ \
WARN(1, "Invalid read" #op " at address %llx\n", \
(unsigned long long)addr); \
return (u ## sz)~0ULL; \
} \
\
void real_raw_write ## op(u ## sz val, volatile void __iomem *addr) \
{ \
WARN(1, "Invalid writeq" #op " of 0x%llx at address %llx\n", \
(unsigned long long)val, (unsigned long long)addr); \
} \
MAKE_FALLBACK(b, 8);
MAKE_FALLBACK(w, 16);
MAKE_FALLBACK(l, 32);
#ifdef CONFIG_64BIT
MAKE_FALLBACK(q, 64);
#endif
static void real_memset_io(volatile void __iomem *addr, int value, size_t size)
{
WARN(1, "Invalid memset_io at address 0x%llx\n",
(unsigned long long)addr);
}
static void real_memcpy_fromio(void *buffer, const volatile void __iomem *addr,
size_t size)
{
WARN(1, "Invalid memcpy_fromio at address 0x%llx\n",
(unsigned long long)addr);
memset(buffer, 0xff, size);
}
static void real_memcpy_toio(volatile void __iomem *addr, const void *buffer,
size_t size)
{
WARN(1, "Invalid memcpy_toio at address 0x%llx\n",
(unsigned long long)addr);
}
#endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
#define MAKE_OP(op, sz) \
u##sz __raw_read ## op(const volatile void __iomem *addr) \
{ \
struct logic_iomem_area *area = get_area(addr); \
\
if (!area) \
return real_raw_read ## op(addr); \
\
return (u ## sz) area->ops->read(area->priv, \
(unsigned long)addr & AREA_MASK,\
sz / 8); \
} \
EXPORT_SYMBOL(__raw_read ## op); \
\
void __raw_write ## op(u ## sz val, volatile void __iomem *addr) \
{ \
struct logic_iomem_area *area = get_area(addr); \
\
if (!area) { \
real_raw_write ## op(val, addr); \
return; \
} \
\
area->ops->write(area->priv, \
(unsigned long)addr & AREA_MASK, \
sz / 8, val); \
} \
EXPORT_SYMBOL(__raw_write ## op)
MAKE_OP(b, 8);
MAKE_OP(w, 16);
MAKE_OP(l, 32);
#ifdef CONFIG_64BIT
MAKE_OP(q, 64);
#endif
void memset_io(volatile void __iomem *addr, int value, size_t size)
{
struct logic_iomem_area *area = get_area(addr);
unsigned long offs, start;
if (!area) {
real_memset_io(addr, value, size);
return;
}
start = (unsigned long)addr & AREA_MASK;
if (area->ops->set) {
area->ops->set(area->priv, start, value, size);
return;
}
for (offs = 0; offs < size; offs++)
area->ops->write(area->priv, start + offs, 1, value);
}
EXPORT_SYMBOL(memset_io);
void memcpy_fromio(void *buffer, const volatile void __iomem *addr,
size_t size)
{
struct logic_iomem_area *area = get_area(addr);
u8 *buf = buffer;
unsigned long offs, start;
if (!area) {
real_memcpy_fromio(buffer, addr, size);
return;
}
start = (unsigned long)addr & AREA_MASK;
if (area->ops->copy_from) {
area->ops->copy_from(area->priv, buffer, start, size);
return;
}
for (offs = 0; offs < size; offs++)
buf[offs] = area->ops->read(area->priv, start + offs, 1);
}
EXPORT_SYMBOL(memcpy_fromio);
void memcpy_toio(volatile void __iomem *addr, const void *buffer, size_t size)
{
struct logic_iomem_area *area = get_area(addr);
const u8 *buf = buffer;
unsigned long offs, start;
if (!area) {
real_memcpy_toio(addr, buffer, size);
return;
}
start = (unsigned long)addr & AREA_MASK;
if (area->ops->copy_to) {
area->ops->copy_to(area->priv, start, buffer, size);
return;
}
for (offs = 0; offs < size; offs++)
area->ops->write(area->priv, start + offs, 1, buf[offs]);
}
EXPORT_SYMBOL(memcpy_toio);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment