Commit 27f85279 authored by David Hildenbrand's avatar David Hildenbrand Committed by Linus Torvalds

virtio-mem: don't special-case ZONE_MOVABLE

When introducing virtio-mem, the semantics of ZONE_MOVABLE were rather
unclear, which is why we special-cased ZONE_MOVABLE such that partially
plugged blocks would never end up in ZONE_MOVABLE.

Now that the semantics are much clearer (and will be documented in a
follow-up patch including the new virtio-mem behavior), let's allow to
online partially plugged memory blocks to ZONE_MOVABLE and also consider
memory blocks that were onlined to ZONE_MOVABLE when unplugging memory.
While unplugged memory pages are, in general, unmovable, they can be
skipped when offlining memory.

virtio-mem only unplugs fairly big chunks (in the megabyte range) and
rather tries to shrink the memory region than randomly choosing memory.
In theory, if all other pages in the movable zone would be movable,
virtio-mem would only shrink that zone and not create any kind of
fragmentation.

In the future, we might want to remember the zone again and use the
information when (un)plugging memory.  For now, let's keep it simple.

Note: Support for defragmentation is planned, to deal with fragmentation
after unplug due to memory chunks within memory blocks that could not get
unplugged before (e.g., somebody pinning pages within ZONE_MOVABLE for a
longer time).
Signed-off-by: default avatarDavid Hildenbrand <david@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Qian Cai <cai@lca.pw>
Link: http://lkml.kernel.org/r/20200816125333.7434-6-david@redhat.comSigned-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1c31cb49
...@@ -36,18 +36,10 @@ enum virtio_mem_mb_state { ...@@ -36,18 +36,10 @@ enum virtio_mem_mb_state {
VIRTIO_MEM_MB_STATE_OFFLINE, VIRTIO_MEM_MB_STATE_OFFLINE,
/* Partially plugged, fully added to Linux, offline. */ /* Partially plugged, fully added to Linux, offline. */
VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL, VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL,
/* Fully plugged, fully added to Linux, online (!ZONE_MOVABLE). */ /* Fully plugged, fully added to Linux, online. */
VIRTIO_MEM_MB_STATE_ONLINE, VIRTIO_MEM_MB_STATE_ONLINE,
/* Partially plugged, fully added to Linux, online (!ZONE_MOVABLE). */ /* Partially plugged, fully added to Linux, online. */
VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL, VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL,
/*
* Fully plugged, fully added to Linux, online (ZONE_MOVABLE).
* We are not allowed to allocate (unplug) parts of this block that
* are not movable (similar to gigantic pages). We will never allow
* to online OFFLINE_PARTIAL to ZONE_MOVABLE (as they would contain
* unmovable parts).
*/
VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE,
VIRTIO_MEM_MB_STATE_COUNT VIRTIO_MEM_MB_STATE_COUNT
}; };
...@@ -526,21 +518,10 @@ static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id) ...@@ -526,21 +518,10 @@ static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id)
} }
static int virtio_mem_notify_going_online(struct virtio_mem *vm, static int virtio_mem_notify_going_online(struct virtio_mem *vm,
unsigned long mb_id, unsigned long mb_id)
enum zone_type zone)
{ {
switch (virtio_mem_mb_get_state(vm, mb_id)) { switch (virtio_mem_mb_get_state(vm, mb_id)) {
case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
/*
* We won't allow to online a partially plugged memory block
* to the MOVABLE zone - it would contain unmovable parts.
*/
if (zone == ZONE_MOVABLE) {
dev_warn_ratelimited(&vm->vdev->dev,
"memory block has holes, MOVABLE not supported\n");
return NOTIFY_BAD;
}
return NOTIFY_OK;
case VIRTIO_MEM_MB_STATE_OFFLINE: case VIRTIO_MEM_MB_STATE_OFFLINE:
return NOTIFY_OK; return NOTIFY_OK;
default: default:
...@@ -560,7 +541,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, ...@@ -560,7 +541,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm,
VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
break; break;
case VIRTIO_MEM_MB_STATE_ONLINE: case VIRTIO_MEM_MB_STATE_ONLINE:
case VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE:
virtio_mem_mb_set_state(vm, mb_id, virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_OFFLINE); VIRTIO_MEM_MB_STATE_OFFLINE);
break; break;
...@@ -579,24 +559,17 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, ...@@ -579,24 +559,17 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm,
virtio_mem_retry(vm); virtio_mem_retry(vm);
} }
static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id, static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id)
enum zone_type zone)
{ {
unsigned long nb_offline; unsigned long nb_offline;
switch (virtio_mem_mb_get_state(vm, mb_id)) { switch (virtio_mem_mb_get_state(vm, mb_id)) {
case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
BUG_ON(zone == ZONE_MOVABLE);
virtio_mem_mb_set_state(vm, mb_id, virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
break; break;
case VIRTIO_MEM_MB_STATE_OFFLINE: case VIRTIO_MEM_MB_STATE_OFFLINE:
if (zone == ZONE_MOVABLE) virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE);
virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE);
else
virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE);
break; break;
default: default:
BUG(); BUG();
...@@ -675,7 +648,6 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, ...@@ -675,7 +648,6 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
const unsigned long start = PFN_PHYS(mhp->start_pfn); const unsigned long start = PFN_PHYS(mhp->start_pfn);
const unsigned long size = PFN_PHYS(mhp->nr_pages); const unsigned long size = PFN_PHYS(mhp->nr_pages);
const unsigned long mb_id = virtio_mem_phys_to_mb_id(start); const unsigned long mb_id = virtio_mem_phys_to_mb_id(start);
enum zone_type zone;
int rc = NOTIFY_OK; int rc = NOTIFY_OK;
if (!virtio_mem_overlaps_range(vm, start, size)) if (!virtio_mem_overlaps_range(vm, start, size))
...@@ -717,8 +689,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, ...@@ -717,8 +689,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
break; break;
} }
vm->hotplug_active = true; vm->hotplug_active = true;
zone = page_zonenum(pfn_to_page(mhp->start_pfn)); rc = virtio_mem_notify_going_online(vm, mb_id);
rc = virtio_mem_notify_going_online(vm, mb_id, zone);
break; break;
case MEM_OFFLINE: case MEM_OFFLINE:
virtio_mem_notify_offline(vm, mb_id); virtio_mem_notify_offline(vm, mb_id);
...@@ -726,8 +697,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, ...@@ -726,8 +697,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
mutex_unlock(&vm->hotplug_mutex); mutex_unlock(&vm->hotplug_mutex);
break; break;
case MEM_ONLINE: case MEM_ONLINE:
zone = page_zonenum(pfn_to_page(mhp->start_pfn)); virtio_mem_notify_online(vm, mb_id);
virtio_mem_notify_online(vm, mb_id, zone);
vm->hotplug_active = false; vm->hotplug_active = false;
mutex_unlock(&vm->hotplug_mutex); mutex_unlock(&vm->hotplug_mutex);
break; break;
...@@ -1906,8 +1876,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) ...@@ -1906,8 +1876,7 @@ static void virtio_mem_remove(struct virtio_device *vdev)
if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] || if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] || vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] || vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL] || vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) {
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE]) {
dev_warn(&vdev->dev, "device still has system memory added\n"); dev_warn(&vdev->dev, "device still has system memory added\n");
} else { } else {
virtio_mem_delete_resource(vm); virtio_mem_delete_resource(vm);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment