Commit 64145482 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - vdpa sim refactoring

 - virtio mem: Big Block Mode support

 - misc cleanus, fixes

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (61 commits)
  vdpa: Use simpler version of ida allocation
  vdpa: Add missing comment for virtqueue count
  uapi: virtio_ids: add missing device type IDs from OASIS spec
  uapi: virtio_ids.h: consistent indentions
  vhost scsi: fix error return code in vhost_scsi_set_endpoint()
  virtio_ring: Fix two use after free bugs
  virtio_net: Fix error code in probe()
  virtio_ring: Cut and paste bugs in vring_create_virtqueue_packed()
  tools/virtio: add barrier for aarch64
  tools/virtio: add krealloc_array
  tools/virtio: include asm/bug.h
  vdpa/mlx5: Use write memory barrier after updating CQ index
  vdpa: split vdpasim to core and net modules
  vdpa_sim: split vdpasim_virtqueue's iov field in out_iov and in_iov
  vdpa_sim: make vdpasim->buffer size configurable
  vdpa_sim: use kvmalloc to allocate vdpasim->buffer
  vdpa_sim: set vringh notify callback
  vdpa_sim: add set_config callback in vdpasim_dev_attr
  vdpa_sim: add get_config callback in vdpasim_dev_attr
  vdpa_sim: make 'config' generic and usable for any device type
  ...
parents 58cf05f5 418eddef
...@@ -3072,6 +3072,7 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -3072,6 +3072,7 @@ static int virtnet_probe(struct virtio_device *vdev)
dev_err(&vdev->dev, dev_err(&vdev->dev,
"device MTU appears to have changed it is now %d < %d", "device MTU appears to have changed it is now %d < %d",
mtu, dev->min_mtu); mtu, dev->min_mtu);
err = -EINVAL;
goto free; goto free;
} }
......
...@@ -9,21 +9,24 @@ menuconfig VDPA ...@@ -9,21 +9,24 @@ menuconfig VDPA
if VDPA if VDPA
config VDPA_SIM config VDPA_SIM
tristate "vDPA device simulator" tristate "vDPA device simulator core"
depends on RUNTIME_TESTING_MENU && HAS_DMA depends on RUNTIME_TESTING_MENU && HAS_DMA
select DMA_OPS select DMA_OPS
select VHOST_RING select VHOST_RING
help
Enable this module to support vDPA device simulators. These devices
are used for testing, prototyping and development of vDPA.
config VDPA_SIM_NET
tristate "vDPA simulator for networking device"
depends on VDPA_SIM
select GENERIC_NET_UTILS select GENERIC_NET_UTILS
default n
help help
vDPA networking device simulator which loop TX traffic back vDPA networking device simulator which loops TX traffic back to RX.
to RX. This device is used for testing, prototyping and
development of vDPA.
config IFCVF config IFCVF
tristate "Intel IFC VF vDPA driver" tristate "Intel IFC VF vDPA driver"
depends on PCI_MSI depends on PCI_MSI
default n
help help
This kernel module can drive Intel IFC VF NIC to offload This kernel module can drive Intel IFC VF NIC to offload
virtio dataplane traffic to hardware. virtio dataplane traffic to hardware.
...@@ -42,7 +45,6 @@ config MLX5_VDPA_NET ...@@ -42,7 +45,6 @@ config MLX5_VDPA_NET
tristate "vDPA driver for ConnectX devices" tristate "vDPA driver for ConnectX devices"
select MLX5_VDPA select MLX5_VDPA
depends on MLX5_CORE depends on MLX5_CORE
default n
help help
VDPA network driver for ConnectX6 and newer. Provides offloading VDPA network driver for ConnectX6 and newer. Provides offloading
of virtio net datapath such that descriptors put on the ring will of virtio net datapath such that descriptors put on the ring will
......
...@@ -417,16 +417,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -417,16 +417,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return ret; return ret;
} }
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) { if (ret) {
IFCVF_ERR(pdev, "No usable DMA confiugration\n"); IFCVF_ERR(pdev, "No usable DMA configuration\n");
return ret;
}
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (ret) {
IFCVF_ERR(pdev,
"No usable coherent DMA confiugration\n");
return ret; return ret;
} }
......
...@@ -479,6 +479,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) ...@@ -479,6 +479,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
{ {
mlx5_cq_set_ci(&mvq->cq.mcq); mlx5_cq_set_ci(&mvq->cq.mcq);
/* make sure CQ cosumer update is visible to the hardware before updating
* RX doorbell record.
*/
dma_wmb();
rx_post(&mvq->vqqp, num); rx_post(&mvq->vqqp, num);
if (mvq->event_cb.callback) if (mvq->event_cb.callback)
mvq->event_cb.callback(mvq->event_cb.private); mvq->event_cb.callback(mvq->event_cb.private);
......
...@@ -89,7 +89,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, ...@@ -89,7 +89,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
if (!vdev) if (!vdev)
goto err; goto err;
err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL); err = ida_alloc(&vdpa_index_ida, GFP_KERNEL);
if (err < 0) if (err < 0)
goto err_ida; goto err_ida;
......
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
*/
#ifndef _VDPA_SIM_H
#define _VDPA_SIM_H
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/virtio_byteorder.h>
#include <linux/vhost_iotlb.h>
#include <uapi/linux/virtio_config.h>
#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \
(1ULL << VIRTIO_F_VERSION_1) | \
(1ULL << VIRTIO_F_ACCESS_PLATFORM))
struct vdpasim;
struct vdpasim_virtqueue {
struct vringh vring;
struct vringh_kiov in_iov;
struct vringh_kiov out_iov;
unsigned short head;
bool ready;
u64 desc_addr;
u64 device_addr;
u64 driver_addr;
u32 num;
void *private;
irqreturn_t (*cb)(void *data);
};
struct vdpasim_dev_attr {
u64 supported_features;
size_t config_size;
size_t buffer_size;
int nvqs;
u32 id;
work_func_t work_fn;
void (*get_config)(struct vdpasim *vdpasim, void *config);
void (*set_config)(struct vdpasim *vdpasim, const void *config);
};
/* State of each vdpasim device */
struct vdpasim {
struct vdpa_device vdpa;
struct vdpasim_virtqueue *vqs;
struct work_struct work;
struct vdpasim_dev_attr dev_attr;
/* spinlock to synchronize virtqueue state */
spinlock_t lock;
/* virtio config according to device type */
void *config;
struct vhost_iotlb *iommu;
void *buffer;
u32 status;
u32 generation;
u64 features;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr);
/* TODO: cross-endian support */
static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
{
return virtio_legacy_is_little_endian() ||
(vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
}
static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
{
return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
}
static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
{
return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
}
static inline u32 vdpasim32_to_cpu(struct vdpasim *vdpasim, __virtio32 val)
{
return __virtio32_to_cpu(vdpasim_is_little_endian(vdpasim), val);
}
static inline __virtio32 cpu_to_vdpasim32(struct vdpasim *vdpasim, u32 val)
{
return __cpu_to_virtio32(vdpasim_is_little_endian(vdpasim), val);
}
static inline u64 vdpasim64_to_cpu(struct vdpasim *vdpasim, __virtio64 val)
{
return __virtio64_to_cpu(vdpasim_is_little_endian(vdpasim), val);
}
static inline __virtio64 cpu_to_vdpasim64(struct vdpasim *vdpasim, u64 val)
{
return __cpu_to_virtio64(vdpasim_is_little_endian(vdpasim), val);
}
#endif
// SPDX-License-Identifier: GPL-2.0-only
/*
* VDPA simulator for networking device.
*
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
* Author: Jason Wang <jasowang@redhat.com>
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/etherdevice.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <uapi/linux/virtio_net.h>
#include "vdpa_sim.h"
#define DRV_VERSION "0.1"
#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
#define DRV_DESC "vDPA Device Simulator for networking device"
#define DRV_LICENSE "GPL v2"
#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \
(1ULL << VIRTIO_NET_F_MAC))
#define VDPASIM_NET_VQ_NUM 2
static char *macaddr;
module_param(macaddr, charp, 0);
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
u8 macaddr_buf[ETH_ALEN];
static struct vdpasim *vdpasim_net_dev;
static void vdpasim_net_work(struct work_struct *work)
{
struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
ssize_t read, write;
size_t total_write;
int pkts = 0;
int err;
spin_lock(&vdpasim->lock);
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
if (!txq->ready || !rxq->ready)
goto out;
while (true) {
total_write = 0;
err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL,
&txq->head, GFP_ATOMIC);
if (err <= 0)
break;
err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov,
&rxq->head, GFP_ATOMIC);
if (err <= 0) {
vringh_complete_iotlb(&txq->vring, txq->head, 0);
break;
}
while (true) {
read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
vdpasim->buffer,
PAGE_SIZE);
if (read <= 0)
break;
write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
vdpasim->buffer, read);
if (write <= 0)
break;
total_write += write;
}
/* Make sure data is wrote before advancing index */
smp_wmb();
vringh_complete_iotlb(&txq->vring, txq->head, 0);
vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
/* Make sure used is visible before rasing the interrupt. */
smp_wmb();
local_bh_disable();
if (vringh_need_notify_iotlb(&txq->vring) > 0)
vringh_notify(&txq->vring);
if (vringh_need_notify_iotlb(&rxq->vring) > 0)
vringh_notify(&rxq->vring);
local_bh_enable();
if (++pkts > 4) {
schedule_work(&vdpasim->work);
goto out;
}
}
out:
spin_unlock(&vdpasim->lock);
}
static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
{
struct virtio_net_config *net_config =
(struct virtio_net_config *)config;
net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
memcpy(net_config->mac, macaddr_buf, ETH_ALEN);
}
static int __init vdpasim_net_init(void)
{
struct vdpasim_dev_attr dev_attr = {};
int ret;
if (macaddr) {
mac_pton(macaddr, macaddr_buf);
if (!is_valid_ether_addr(macaddr_buf)) {
ret = -EADDRNOTAVAIL;
goto out;
}
} else {
eth_random_addr(macaddr_buf);
}
dev_attr.id = VIRTIO_ID_NET;
dev_attr.supported_features = VDPASIM_NET_FEATURES;
dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
dev_attr.config_size = sizeof(struct virtio_net_config);
dev_attr.get_config = vdpasim_net_get_config;
dev_attr.work_fn = vdpasim_net_work;
dev_attr.buffer_size = PAGE_SIZE;
vdpasim_net_dev = vdpasim_create(&dev_attr);
if (IS_ERR(vdpasim_net_dev)) {
ret = PTR_ERR(vdpasim_net_dev);
goto out;
}
ret = vdpa_register_device(&vdpasim_net_dev->vdpa);
if (ret)
goto put_dev;
return 0;
put_dev:
put_device(&vdpasim_net_dev->vdpa.dev);
out:
return ret;
}
static void __exit vdpasim_net_exit(void)
{
struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa;
vdpa_unregister_device(vdpa);
}
module_init(vdpasim_net_init);
module_exit(vdpasim_net_exit);
MODULE_VERSION(DRV_VERSION);
MODULE_LICENSE(DRV_LICENSE);
MODULE_AUTHOR(DRV_AUTHOR);
MODULE_DESCRIPTION(DRV_DESC);
...@@ -1643,7 +1643,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ...@@ -1643,7 +1643,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
if (!vhost_vq_is_setup(vq)) if (!vhost_vq_is_setup(vq))
continue; continue;
if (vhost_scsi_setup_vq_cmds(vq, vq->num)) ret = vhost_scsi_setup_vq_cmds(vq, vq->num);
if (ret)
goto destroy_vq_cmds; goto destroy_vq_cmds;
} }
......
...@@ -245,14 +245,10 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, ...@@ -245,14 +245,10 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
return -EFAULT; return -EFAULT;
if (vhost_vdpa_config_validate(v, &config)) if (vhost_vdpa_config_validate(v, &config))
return -EINVAL; return -EINVAL;
buf = kvzalloc(config.len, GFP_KERNEL);
if (!buf)
return -ENOMEM;
if (copy_from_user(buf, c->buf, config.len)) { buf = vmemdup_user(c->buf, config.len);
kvfree(buf); if (IS_ERR(buf))
return -EFAULT; return PTR_ERR(buf);
}
ops->set_config(vdpa, config.off, buf, config.len); ops->set_config(vdpa, config.off, buf, config.len);
......
This diff is collapsed.
...@@ -1608,7 +1608,6 @@ static struct virtqueue *vring_create_virtqueue_packed( ...@@ -1608,7 +1608,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
vq->num_added = 0; vq->num_added = 0;
vq->packed_ring = true; vq->packed_ring = true;
vq->use_dma_api = vring_use_dma_api(vdev); vq->use_dma_api = vring_use_dma_api(vdev);
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG #ifdef DEBUG
vq->in_use = false; vq->in_use = false;
vq->last_add_time_valid = false; vq->last_add_time_valid = false;
...@@ -1669,6 +1668,7 @@ static struct virtqueue *vring_create_virtqueue_packed( ...@@ -1669,6 +1668,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
cpu_to_le16(vq->packed.event_flags_shadow); cpu_to_le16(vq->packed.event_flags_shadow);
} }
list_add_tail(&vq->vq.list, &vdev->vqs);
return &vq->vq; return &vq->vq;
err_desc_extra: err_desc_extra:
...@@ -1676,9 +1676,9 @@ static struct virtqueue *vring_create_virtqueue_packed( ...@@ -1676,9 +1676,9 @@ static struct virtqueue *vring_create_virtqueue_packed(
err_desc_state: err_desc_state:
kfree(vq); kfree(vq);
err_vq: err_vq:
vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
err_device: err_device:
vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
err_driver: err_driver:
vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
err_ring: err_ring:
...@@ -2085,7 +2085,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, ...@@ -2085,7 +2085,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->last_used_idx = 0; vq->last_used_idx = 0;
vq->num_added = 0; vq->num_added = 0;
vq->use_dma_api = vring_use_dma_api(vdev); vq->use_dma_api = vring_use_dma_api(vdev);
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG #ifdef DEBUG
vq->in_use = false; vq->in_use = false;
vq->last_add_time_valid = false; vq->last_add_time_valid = false;
...@@ -2127,6 +2126,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, ...@@ -2127,6 +2126,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
memset(vq->split.desc_state, 0, vring.num * memset(vq->split.desc_state, 0, vring.num *
sizeof(struct vring_desc_state_split)); sizeof(struct vring_desc_state_split));
list_add_tail(&vq->vq.list, &vdev->vqs);
return &vq->vq; return &vq->vq;
} }
EXPORT_SYMBOL_GPL(__vring_new_virtqueue); EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
......
...@@ -42,6 +42,7 @@ struct vdpa_vq_state { ...@@ -42,6 +42,7 @@ struct vdpa_vq_state {
* @config: the configuration ops for this device. * @config: the configuration ops for this device.
* @index: device index * @index: device index
* @features_valid: were features initialized? for legacy guests * @features_valid: were features initialized? for legacy guests
* @nvqs: maximum number of supported virtqueues
*/ */
struct vdpa_device { struct vdpa_device {
struct device dev; struct device dev;
......
...@@ -34,15 +34,21 @@ ...@@ -34,15 +34,21 @@
#define VIRTIO_ID_CONSOLE 3 /* virtio console */ #define VIRTIO_ID_CONSOLE 3 /* virtio console */
#define VIRTIO_ID_RNG 4 /* virtio rng */ #define VIRTIO_ID_RNG 4 /* virtio rng */
#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ #define VIRTIO_ID_BALLOON 5 /* virtio balloon */
#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */
#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ #define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */
#define VIRTIO_ID_SCSI 8 /* virtio scsi */ #define VIRTIO_ID_SCSI 8 /* virtio scsi */
#define VIRTIO_ID_9P 9 /* 9p virtio console */ #define VIRTIO_ID_9P 9 /* 9p virtio console */
#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */
#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
#define VIRTIO_ID_CAIF 12 /* Virtio caif */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */
#define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */
#define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */
#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#define VIRTIO_ID_MEM 24 /* virtio mem */ #define VIRTIO_ID_MEM 24 /* virtio mem */
#define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_FS 26 /* virtio filesystem */
......
...@@ -1784,39 +1784,112 @@ int remove_memory(int nid, u64 start, u64 size) ...@@ -1784,39 +1784,112 @@ int remove_memory(int nid, u64 start, u64 size)
} }
EXPORT_SYMBOL_GPL(remove_memory); EXPORT_SYMBOL_GPL(remove_memory);
static int try_offline_memory_block(struct memory_block *mem, void *arg)
{
uint8_t online_type = MMOP_ONLINE_KERNEL;
uint8_t **online_types = arg;
struct page *page;
int rc;
/*
* Sense the online_type via the zone of the memory block. Offlining
* with multiple zones within one memory block will be rejected
* by offlining code ... so we don't care about that.
*/
page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr));
if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE)
online_type = MMOP_ONLINE_MOVABLE;
rc = device_offline(&mem->dev);
/*
* Default is MMOP_OFFLINE - change it only if offlining succeeded,
* so try_reonline_memory_block() can do the right thing.
*/
if (!rc)
**online_types = online_type;
(*online_types)++;
/* Ignore if already offline. */
return rc < 0 ? rc : 0;
}
static int try_reonline_memory_block(struct memory_block *mem, void *arg)
{
uint8_t **online_types = arg;
int rc;
if (**online_types != MMOP_OFFLINE) {
mem->online_type = **online_types;
rc = device_online(&mem->dev);
if (rc < 0)
pr_warn("%s: Failed to re-online memory: %d",
__func__, rc);
}
/* Continue processing all remaining memory blocks. */
(*online_types)++;
return 0;
}
/* /*
* Try to offline and remove a memory block. Might take a long time to * Try to offline and remove memory. Might take a long time to finish in case
* finish in case memory is still in use. Primarily useful for memory devices * memory is still in use. Primarily useful for memory devices that logically
* that logically unplugged all memory (so it's no longer in use) and want to * unplugged all memory (so it's no longer in use) and want to offline + remove
* offline + remove the memory block. * that memory.
*/ */
int offline_and_remove_memory(int nid, u64 start, u64 size) int offline_and_remove_memory(int nid, u64 start, u64 size)
{ {
struct memory_block *mem; const unsigned long mb_count = size / memory_block_size_bytes();
int rc = -EINVAL; uint8_t *online_types, *tmp;
int rc;
if (!IS_ALIGNED(start, memory_block_size_bytes()) || if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
size != memory_block_size_bytes()) !IS_ALIGNED(size, memory_block_size_bytes()) || !size)
return rc; return -EINVAL;
/*
* We'll remember the old online type of each memory block, so we can
* try to revert whatever we did when offlining one memory block fails
* after offlining some others succeeded.
*/
online_types = kmalloc_array(mb_count, sizeof(*online_types),
GFP_KERNEL);
if (!online_types)
return -ENOMEM;
/*
* Initialize all states to MMOP_OFFLINE, so when we abort processing in
* try_offline_memory_block(), we'll skip all unprocessed blocks in
* try_reonline_memory_block().
*/
memset(online_types, MMOP_OFFLINE, mb_count);
lock_device_hotplug(); lock_device_hotplug();
mem = find_memory_block(__pfn_to_section(PFN_DOWN(start)));
if (mem) tmp = online_types;
rc = device_offline(&mem->dev); rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block);
/* Ignore if the device is already offline. */
if (rc > 0)
rc = 0;
/* /*
* In case we succeeded to offline the memory block, remove it. * In case we succeeded to offline all memory, remove it.
* This cannot fail as it cannot get onlined in the meantime. * This cannot fail as it cannot get onlined in the meantime.
*/ */
if (!rc) { if (!rc) {
rc = try_remove_memory(nid, start, size); rc = try_remove_memory(nid, start, size);
WARN_ON_ONCE(rc); if (rc)
pr_err("%s: Failed to remove memory: %d", __func__, rc);
}
/*
* Rollback what we did. While memory onlining might theoretically fail
* (nacked by a notifier), it barely ever happens.
*/
if (rc) {
tmp = online_types;
walk_memory_blocks(start, size, &tmp,
try_reonline_memory_block);
} }
unlock_device_hotplug(); unlock_device_hotplug();
kfree(online_types);
return rc; return rc;
} }
EXPORT_SYMBOL_GPL(offline_and_remove_memory); EXPORT_SYMBOL_GPL(offline_and_remove_memory);
......
...@@ -16,6 +16,16 @@ ...@@ -16,6 +16,16 @@
# define mb() abort() # define mb() abort()
# define dma_rmb() abort() # define dma_rmb() abort()
# define dma_wmb() abort() # define dma_wmb() abort()
#elif defined(__aarch64__)
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define virt_mb() __sync_synchronize()
#define virt_rmb() dmb(ishld)
#define virt_wmb() dmb(ishst)
#define virt_store_mb(var, value) do { WRITE_ONCE(var, value); dmb(ish); } while (0)
/* Weak barriers should be used. If not - it's a bug */
# define mb() abort()
# define dma_rmb() abort()
# define dma_wmb() abort()
#else #else
#error Please fill in barrier macros #error Please fill in barrier macros
#endif #endif
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#ifndef BUG_H #ifndef BUG_H
#define BUG_H #define BUG_H
#include <asm/bug.h>
#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
#define BUILD_BUG_ON(x) #define BUILD_BUG_ON(x)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/overflow.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/printk.h> #include <linux/printk.h>
#include <linux/bug.h> #include <linux/bug.h>
...@@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr) ...@@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr)
# define unlikely(x) (__builtin_expect(!!(x), 0)) # define unlikely(x) (__builtin_expect(!!(x), 0))
# endif # endif
static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp)
{
size_t bytes;
if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
return NULL;
return krealloc(p, bytes, gfp);
}
#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#ifdef DEBUG #ifdef DEBUG
#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
...@@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr) ...@@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr)
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0)
#define min(x, y) ({ \ #define min(x, y) ({ \
typeof(x) _min1 = (x); \ typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \ typeof(y) _min2 = (y); \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment