Commit fe514739 authored by Dan Williams's avatar Dan Williams

libnvdimm: fix blk free space accounting

Commit a1f3e4d6 "libnvdimm, region: update nd_region_available_dpa()
for multi-pmem support" reworked blk dpa (DIMM Physical Address)
accounting to comprehend multiple pmem namespace allocations aliasing
with a given blk-dpa range.

The following call trace is a result of failing to account for allocated
blk capacity.

 WARNING: CPU: 1 PID: 2433 at tools/testing/nvdimm/../../../drivers/nvdimm/names
4 size_store+0x6f3/0x930 [libnvdimm]
 nd_region region5: allocation underrun: 0x0 of 0x1000000 bytes
 [..]
 Call Trace:
  dump_stack+0x86/0xc3
  __warn+0xcb/0xf0
  warn_slowpath_fmt+0x5f/0x80
  size_store+0x6f3/0x930 [libnvdimm]
  dev_attr_store+0x18/0x30

If a given blk-dpa allocation does not alias with any pmem ranges then
the full allocation should be accounted as busy space, not the size of
the current pmem contribution to the region.

The thinkos that led to this confusion was not realizing that the struct
resource management is already guaranteeing no collisions between pmem
allocations and blk allocations on the same dimm. Also, we do not try to
support blk allocations in aliased pmem holes.

This patch also fixes a case where the available blk goes negative.

Cc: <stable@vger.kernel.org>
Fixes: a1f3e4d6 ("libnvdimm, region: update nd_region_available_dpa() for multi-pmem support").
Reported-by: default avatarDariusz Dokupil <dariusz.dokupil@intel.com>
Reported-by: default avatarDave Jiang <dave.jiang@intel.com>
Reported-by: default avatarVishal Verma <vishal.l.verma@intel.com>
Tested-by: default avatarDave Jiang <dave.jiang@intel.com>
Tested-by: default avatarVishal Verma <vishal.l.verma@intel.com>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent b03b99a3
...@@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create); ...@@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create);
int alias_dpa_busy(struct device *dev, void *data) int alias_dpa_busy(struct device *dev, void *data)
{ {
resource_size_t map_end, blk_start, new, busy; resource_size_t map_end, blk_start, new;
struct blk_alloc_info *info = data; struct blk_alloc_info *info = data;
struct nd_mapping *nd_mapping; struct nd_mapping *nd_mapping;
struct nd_region *nd_region; struct nd_region *nd_region;
...@@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data) ...@@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data)
retry: retry:
/* /*
* Find the free dpa from the end of the last pmem allocation to * Find the free dpa from the end of the last pmem allocation to
* the end of the interleave-set mapping that is not already * the end of the interleave-set mapping.
* covered by a blk allocation.
*/ */
busy = 0;
for_each_dpa_resource(ndd, res) { for_each_dpa_resource(ndd, res) {
if (strncmp(res->name, "pmem", 4) != 0)
continue;
if ((res->start >= blk_start && res->start < map_end) if ((res->start >= blk_start && res->start < map_end)
|| (res->end >= blk_start || (res->end >= blk_start
&& res->end <= map_end)) { && res->end <= map_end)) {
if (strncmp(res->name, "pmem", 4) == 0) { new = max(blk_start, min(map_end + 1, res->end + 1));
new = max(blk_start, min(map_end + 1, if (new != blk_start) {
res->end + 1)); blk_start = new;
if (new != blk_start) { goto retry;
blk_start = new; }
goto retry;
}
} else
busy += min(map_end, res->end)
- max(nd_mapping->start, res->start) + 1;
} else if (nd_mapping->start > res->start
&& map_end < res->end) {
/* total eclipse of the PMEM region mapping */
busy += nd_mapping->size;
break;
} }
} }
...@@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data) ...@@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data)
return 1; return 1;
} }
info->available -= blk_start - nd_mapping->start + busy; info->available -= blk_start - nd_mapping->start;
return 0; return 0;
} }
static int blk_dpa_busy(struct device *dev, void *data)
{
struct blk_alloc_info *info = data;
struct nd_mapping *nd_mapping;
struct nd_region *nd_region;
resource_size_t map_end;
int i;
if (!is_nd_pmem(dev))
return 0;
nd_region = to_nd_region(dev);
for (i = 0; i < nd_region->ndr_mappings; i++) {
nd_mapping = &nd_region->mapping[i];
if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
break;
}
if (i >= nd_region->ndr_mappings)
return 0;
map_end = nd_mapping->start + nd_mapping->size - 1;
if (info->res->start >= nd_mapping->start
&& info->res->start < map_end) {
if (info->res->end <= map_end) {
info->busy = 0;
return 1;
} else {
info->busy -= info->res->end - map_end;
return 0;
}
} else if (info->res->end >= nd_mapping->start
&& info->res->end <= map_end) {
info->busy -= nd_mapping->start - info->res->start;
return 0;
} else {
info->busy -= nd_mapping->size;
return 0;
}
}
/** /**
* nd_blk_available_dpa - account the unused dpa of BLK region * nd_blk_available_dpa - account the unused dpa of BLK region
* @nd_mapping: container of dpa-resource-root + labels * @nd_mapping: container of dpa-resource-root + labels
...@@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region) ...@@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
for_each_dpa_resource(ndd, res) { for_each_dpa_resource(ndd, res) {
if (strncmp(res->name, "blk", 3) != 0) if (strncmp(res->name, "blk", 3) != 0)
continue; continue;
info.available -= resource_size(res);
info.res = res;
info.busy = resource_size(res);
device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
info.available -= info.busy;
} }
return info.available; return info.available;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment