Commit 264015f8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "Outside of the new ACPI-NFIT hot-add support this pull request is more
  notable for what it does not contain, than what it does.  There were a
  handful of development topics this cycle, dax get_user_pages, dax
  fsync, and raw block dax, that need more more iteration and will wait
  for 4.5.

  The patches to make devm and the pmem driver NUMA aware have been in
  -next for several weeks.  The hot-add support has not, but is
  contained to the NFIT driver and is passing unit tests.  The coredump
  support is straightforward and was looked over by Jeff.  All of it has
  received a 0day build success notification across 107 configs.

  Summary:

   - Add support for the ACPI 6.0 NFIT hot add mechanism to process
     updates of the NFIT at runtime.

   - Teach the coredump implementation how to filter out DAX mappings.

   - Introduce NUMA hints for allocations made by the pmem driver, and
     as a side effect all devm allocations now hint their NUMA node by
     default"

* tag 'libnvdimm-for-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  coredump: add DAX filtering for FDPIC ELF coredumps
  coredump: add DAX filtering for ELF coredumps
  acpi: nfit: Add support for hot-add
  nfit: in acpi_nfit_init, break on a 0-length table
  pmem, memremap: convert to numa aware allocations
  devm_memremap_pages: use numa_mem_id
  devm: make allocations numa aware by default
  devm_memremap: convert to return ERR_PTR
  devm_memunmap: use devres_release()
  pmem: kill memremap_pmem()
  x86, mm: quiet arch_add_memory()
parents d55fc378 ab27a8d0
......@@ -1605,16 +1605,16 @@ Documentation/accounting.
---------------------------------------------------------------
When a process is dumped, all anonymous memory is written to a core file as
long as the size of the core file isn't limited. But sometimes we don't want
to dump some memory segments, for example, huge shared memory. Conversely,
sometimes we want to save file-backed memory segments into a core file, not
only the individual files.
to dump some memory segments, for example, huge shared memory or DAX.
Conversely, sometimes we want to save file-backed memory segments into a core
file, not only the individual files.
/proc/<pid>/coredump_filter allows you to customize which memory segments
will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
of memory types. If a bit of the bitmask is set, memory segments of the
corresponding memory type are dumped, otherwise they are not dumped.
The following 7 memory types are supported:
The following 9 memory types are supported:
- (bit 0) anonymous private memory
- (bit 1) anonymous shared memory
- (bit 2) file-backed private memory
......@@ -1623,20 +1623,22 @@ The following 7 memory types are supported:
effective only if the bit 2 is cleared)
- (bit 5) hugetlb private memory
- (bit 6) hugetlb shared memory
- (bit 7) DAX private memory
- (bit 8) DAX shared memory
Note that MMIO pages such as frame buffer are never dumped and vDSO pages
are always dumped regardless of the bitmask status.
Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
effected by bit 5-6.
Note that bits 0-4 don't affect hugetlb or DAX memory. hugetlb memory is
only affected by bit 5-6, and DAX is only affected by bits 7-8.
Default value of coredump_filter is 0x23; this means all anonymous memory
segments and hugetlb private memory are dumped.
The default value of coredump_filter is 0x33; this means all anonymous memory
segments, ELF header pages and hugetlb private memory are dumped.
If you don't want to dump all shared memory segments attached to pid 1234,
write 0x21 to the process's proc file.
write 0x31 to the process's proc file.
$ echo 0x21 > /proc/1234/coredump_filter
$ echo 0x31 > /proc/1234/coredump_filter
When a new process is created, the process inherits the bitmask status from its
parent. It is useful to set up coredump_filter before the program runs.
......
......@@ -354,7 +354,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
}
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
pr_debug(" [mem %#010lx-%#010lx] page %s\n",
mr[i].start, mr[i].end - 1,
page_size_string(&mr[i]));
......@@ -401,7 +401,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
unsigned long ret = 0;
int nr_range, i;
pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n",
pr_debug("init_memory_mapping: [mem %#010lx-%#010lx]\n",
start, end - 1);
memset(mr, 0, sizeof(mr));
......
......@@ -1270,7 +1270,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
/* check to see if we have contiguous blocks */
if (p_end != p || node_start != node) {
if (p_start)
printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
addr_start, addr_end-1, p_start, p_end-1, node_start);
addr_start = addr;
node_start = node;
......@@ -1368,7 +1368,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
void __meminit vmemmap_populate_print_last(void)
{
if (p_start) {
printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
addr_start, addr_end-1, p_start, p_end-1, node_start);
p_start = NULL;
p_end = NULL;
......
......@@ -33,6 +33,15 @@ static bool force_enable_dimms;
module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
struct nfit_table_prev {
struct list_head spas;
struct list_head memdevs;
struct list_head dcrs;
struct list_head bdws;
struct list_head idts;
struct list_head flushes;
};
static u8 nfit_uuid[NFIT_UUID_MAX][16];
const u8 *to_nfit_uuid(enum nfit_uuids id)
......@@ -221,12 +230,20 @@ static int nfit_spa_type(struct acpi_nfit_system_address *spa)
}
static bool add_spa(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_system_address *spa)
{
struct device *dev = acpi_desc->dev;
struct nfit_spa *nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa),
GFP_KERNEL);
struct nfit_spa *nfit_spa;
list_for_each_entry(nfit_spa, &prev->spas, list) {
if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
list_move_tail(&nfit_spa->list, &acpi_desc->spas);
return true;
}
}
nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL);
if (!nfit_spa)
return false;
INIT_LIST_HEAD(&nfit_spa->list);
......@@ -239,12 +256,19 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
}
static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_memory_map *memdev)
{
struct device *dev = acpi_desc->dev;
struct nfit_memdev *nfit_memdev = devm_kzalloc(dev,
sizeof(*nfit_memdev), GFP_KERNEL);
struct nfit_memdev *nfit_memdev;
list_for_each_entry(nfit_memdev, &prev->memdevs, list)
if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
return true;
}
nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL);
if (!nfit_memdev)
return false;
INIT_LIST_HEAD(&nfit_memdev->list);
......@@ -257,12 +281,19 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
}
static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_control_region *dcr)
{
struct device *dev = acpi_desc->dev;
struct nfit_dcr *nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr),
GFP_KERNEL);
struct nfit_dcr *nfit_dcr;
list_for_each_entry(nfit_dcr, &prev->dcrs, list)
if (memcmp(nfit_dcr->dcr, dcr, sizeof(*dcr)) == 0) {
list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
return true;
}
nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL);
if (!nfit_dcr)
return false;
INIT_LIST_HEAD(&nfit_dcr->list);
......@@ -274,12 +305,19 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
}
static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_data_region *bdw)
{
struct device *dev = acpi_desc->dev;
struct nfit_bdw *nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw),
GFP_KERNEL);
struct nfit_bdw *nfit_bdw;
list_for_each_entry(nfit_bdw, &prev->bdws, list)
if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
return true;
}
nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL);
if (!nfit_bdw)
return false;
INIT_LIST_HEAD(&nfit_bdw->list);
......@@ -291,12 +329,19 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
}
static bool add_idt(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_interleave *idt)
{
struct device *dev = acpi_desc->dev;
struct nfit_idt *nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt),
GFP_KERNEL);
struct nfit_idt *nfit_idt;
list_for_each_entry(nfit_idt, &prev->idts, list)
if (memcmp(nfit_idt->idt, idt, sizeof(*idt)) == 0) {
list_move_tail(&nfit_idt->list, &acpi_desc->idts);
return true;
}
nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL);
if (!nfit_idt)
return false;
INIT_LIST_HEAD(&nfit_idt->list);
......@@ -308,12 +353,19 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
}
static bool add_flush(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_flush_address *flush)
{
struct device *dev = acpi_desc->dev;
struct nfit_flush *nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush),
GFP_KERNEL);
struct nfit_flush *nfit_flush;
list_for_each_entry(nfit_flush, &prev->flushes, list)
if (memcmp(nfit_flush->flush, flush, sizeof(*flush)) == 0) {
list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
return true;
}
nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL);
if (!nfit_flush)
return false;
INIT_LIST_HEAD(&nfit_flush->list);
......@@ -324,8 +376,8 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
return true;
}
static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
const void *end)
static void *add_table(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev, void *table, const void *end)
{
struct device *dev = acpi_desc->dev;
struct acpi_nfit_header *hdr;
......@@ -335,29 +387,35 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
return NULL;
hdr = table;
if (!hdr->length) {
dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
hdr->type);
return NULL;
}
switch (hdr->type) {
case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
if (!add_spa(acpi_desc, table))
if (!add_spa(acpi_desc, prev, table))
return err;
break;
case ACPI_NFIT_TYPE_MEMORY_MAP:
if (!add_memdev(acpi_desc, table))
if (!add_memdev(acpi_desc, prev, table))
return err;
break;
case ACPI_NFIT_TYPE_CONTROL_REGION:
if (!add_dcr(acpi_desc, table))
if (!add_dcr(acpi_desc, prev, table))
return err;
break;
case ACPI_NFIT_TYPE_DATA_REGION:
if (!add_bdw(acpi_desc, table))
if (!add_bdw(acpi_desc, prev, table))
return err;
break;
case ACPI_NFIT_TYPE_INTERLEAVE:
if (!add_idt(acpi_desc, table))
if (!add_idt(acpi_desc, prev, table))
return err;
break;
case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
if (!add_flush(acpi_desc, table))
if (!add_flush(acpi_desc, prev, table))
return err;
break;
case ACPI_NFIT_TYPE_SMBIOS:
......@@ -802,12 +860,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
if (nvdimm) {
/*
* If for some reason we find multiple DCRs the
* first one wins
*/
dev_err(acpi_desc->dev, "duplicate DCR detected: %s\n",
nvdimm_name(nvdimm));
dimm_count++;
continue;
}
......@@ -1476,6 +1529,9 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct resource res;
int count = 0, rc;
if (nfit_spa->is_registered)
return 0;
if (spa->range_index == 0) {
dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
__func__);
......@@ -1529,6 +1585,8 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc))
return -ENOMEM;
}
nfit_spa->is_registered = 1;
return 0;
}
......@@ -1545,71 +1603,101 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
return 0;
}
static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev)
{
struct device *dev = acpi_desc->dev;
if (!list_empty(&prev->spas) ||
!list_empty(&prev->memdevs) ||
!list_empty(&prev->dcrs) ||
!list_empty(&prev->bdws) ||
!list_empty(&prev->idts) ||
!list_empty(&prev->flushes)) {
dev_err(dev, "new nfit deletes entries (unsupported)\n");
return -ENXIO;
}
return 0;
}
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
{
struct device *dev = acpi_desc->dev;
struct nfit_table_prev prev;
const void *end;
u8 *data;
int rc;
INIT_LIST_HEAD(&acpi_desc->spa_maps);
INIT_LIST_HEAD(&acpi_desc->spas);
INIT_LIST_HEAD(&acpi_desc->dcrs);
INIT_LIST_HEAD(&acpi_desc->bdws);
INIT_LIST_HEAD(&acpi_desc->idts);
INIT_LIST_HEAD(&acpi_desc->flushes);
INIT_LIST_HEAD(&acpi_desc->memdevs);
INIT_LIST_HEAD(&acpi_desc->dimms);
mutex_init(&acpi_desc->spa_map_mutex);
mutex_lock(&acpi_desc->init_mutex);
INIT_LIST_HEAD(&prev.spas);
INIT_LIST_HEAD(&prev.memdevs);
INIT_LIST_HEAD(&prev.dcrs);
INIT_LIST_HEAD(&prev.bdws);
INIT_LIST_HEAD(&prev.idts);
INIT_LIST_HEAD(&prev.flushes);
list_cut_position(&prev.spas, &acpi_desc->spas,
acpi_desc->spas.prev);
list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
acpi_desc->memdevs.prev);
list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
acpi_desc->dcrs.prev);
list_cut_position(&prev.bdws, &acpi_desc->bdws,
acpi_desc->bdws.prev);
list_cut_position(&prev.idts, &acpi_desc->idts,
acpi_desc->idts.prev);
list_cut_position(&prev.flushes, &acpi_desc->flushes,
acpi_desc->flushes.prev);
data = (u8 *) acpi_desc->nfit;
end = data + sz;
data += sizeof(struct acpi_table_nfit);
while (!IS_ERR_OR_NULL(data))
data = add_table(acpi_desc, data, end);
data = add_table(acpi_desc, &prev, data, end);
if (IS_ERR(data)) {
dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
PTR_ERR(data));
return PTR_ERR(data);
rc = PTR_ERR(data);
goto out_unlock;
}
if (nfit_mem_init(acpi_desc) != 0)
return -ENOMEM;
rc = acpi_nfit_check_deletions(acpi_desc, &prev);
if (rc)
goto out_unlock;
if (nfit_mem_init(acpi_desc) != 0) {
rc = -ENOMEM;
goto out_unlock;
}
acpi_nfit_init_dsms(acpi_desc);
rc = acpi_nfit_register_dimms(acpi_desc);
if (rc)
return rc;
goto out_unlock;
rc = acpi_nfit_register_regions(acpi_desc);
return acpi_nfit_register_regions(acpi_desc);
out_unlock:
mutex_unlock(&acpi_desc->init_mutex);
return rc;
}
EXPORT_SYMBOL_GPL(acpi_nfit_init);
static int acpi_nfit_add(struct acpi_device *adev)
static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev)
{
struct nvdimm_bus_descriptor *nd_desc;
struct acpi_nfit_desc *acpi_desc;
struct device *dev = &adev->dev;
struct acpi_table_header *tbl;
acpi_status status = AE_OK;
acpi_size sz;
int rc;
status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz);
if (ACPI_FAILURE(status)) {
dev_err(dev, "failed to find NFIT\n");
return -ENXIO;
}
acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
if (!acpi_desc)
return -ENOMEM;
return ERR_PTR(-ENOMEM);
dev_set_drvdata(dev, acpi_desc);
acpi_desc->dev = dev;
acpi_desc->nfit = (struct acpi_table_nfit *) tbl;
acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
nd_desc = &acpi_desc->nd_desc;
nd_desc->provider_name = "ACPI.NFIT";
......@@ -1617,8 +1705,57 @@ static int acpi_nfit_add(struct acpi_device *adev)
nd_desc->attr_groups = acpi_nfit_attribute_groups;
acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc);
if (!acpi_desc->nvdimm_bus)
return -ENXIO;
if (!acpi_desc->nvdimm_bus) {
devm_kfree(dev, acpi_desc);
return ERR_PTR(-ENXIO);
}
INIT_LIST_HEAD(&acpi_desc->spa_maps);
INIT_LIST_HEAD(&acpi_desc->spas);
INIT_LIST_HEAD(&acpi_desc->dcrs);
INIT_LIST_HEAD(&acpi_desc->bdws);
INIT_LIST_HEAD(&acpi_desc->idts);
INIT_LIST_HEAD(&acpi_desc->flushes);
INIT_LIST_HEAD(&acpi_desc->memdevs);
INIT_LIST_HEAD(&acpi_desc->dimms);
mutex_init(&acpi_desc->spa_map_mutex);
mutex_init(&acpi_desc->init_mutex);
return acpi_desc;
}
static int acpi_nfit_add(struct acpi_device *adev)
{
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_nfit_desc *acpi_desc;
struct device *dev = &adev->dev;
struct acpi_table_header *tbl;
acpi_status status = AE_OK;
acpi_size sz;
int rc;
status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz);
if (ACPI_FAILURE(status)) {
/* This is ok, we could have an nvdimm hotplugged later */
dev_dbg(dev, "failed to find NFIT at startup\n");
return 0;
}
acpi_desc = acpi_nfit_desc_init(adev);
if (IS_ERR(acpi_desc)) {
dev_err(dev, "%s: error initializing acpi_desc: %ld\n",
__func__, PTR_ERR(acpi_desc));
return PTR_ERR(acpi_desc);
}
acpi_desc->nfit = (struct acpi_table_nfit *) tbl;
/* Evaluate _FIT and override with that if present */
status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
if (ACPI_SUCCESS(status) && buf.length > 0) {
acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer;
sz = buf.length;
}
rc = acpi_nfit_init(acpi_desc, sz);
if (rc) {
......@@ -1636,6 +1773,54 @@ static int acpi_nfit_remove(struct acpi_device *adev)
return 0;
}
static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_table_nfit *nfit_saved;
struct device *dev = &adev->dev;
acpi_status status;
int ret;
dev_dbg(dev, "%s: event: %d\n", __func__, event);
device_lock(dev);
if (!dev->driver) {
/* dev->driver may be null if we're being removed */
dev_dbg(dev, "%s: no driver found for dev\n", __func__);
return;
}
if (!acpi_desc) {
acpi_desc = acpi_nfit_desc_init(adev);
if (IS_ERR(acpi_desc)) {
dev_err(dev, "%s: error initializing acpi_desc: %ld\n",
__func__, PTR_ERR(acpi_desc));
goto out_unlock;
}
}
/* Evaluate _FIT */
status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
if (ACPI_FAILURE(status)) {
dev_err(dev, "failed to evaluate _FIT\n");
goto out_unlock;
}
nfit_saved = acpi_desc->nfit;
acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer;
ret = acpi_nfit_init(acpi_desc, buf.length);
if (!ret) {
/* Merge failed, restore old nfit, and exit */
acpi_desc->nfit = nfit_saved;
dev_err(dev, "failed to merge updated NFIT\n");
}
kfree(buf.pointer);
out_unlock:
device_unlock(dev);
}
static const struct acpi_device_id acpi_nfit_ids[] = {
{ "ACPI0012", 0 },
{ "", 0 },
......@@ -1648,6 +1833,7 @@ static struct acpi_driver acpi_nfit_driver = {
.ops = {
.add = acpi_nfit_add,
.remove = acpi_nfit_remove,
.notify = acpi_nfit_notify,
},
};
......
......@@ -48,6 +48,7 @@ enum {
struct nfit_spa {
struct acpi_nfit_system_address *spa;
struct list_head list;
int is_registered;
};
struct nfit_dcr {
......@@ -97,6 +98,7 @@ struct acpi_nfit_desc {
struct nvdimm_bus_descriptor nd_desc;
struct acpi_table_nfit *nfit;
struct mutex spa_map_mutex;
struct mutex init_mutex;
struct list_head spa_maps;
struct list_head memdevs;
struct list_head flushes;
......
......@@ -82,12 +82,12 @@ static struct devres_group * node_to_group(struct devres_node *node)
}
static __always_inline struct devres * alloc_dr(dr_release_t release,
size_t size, gfp_t gfp)
size_t size, gfp_t gfp, int nid)
{
size_t tot_size = sizeof(struct devres) + size;
struct devres *dr;
dr = kmalloc_track_caller(tot_size, gfp);
dr = kmalloc_node_track_caller(tot_size, gfp, nid);
if (unlikely(!dr))
return NULL;
......@@ -106,24 +106,25 @@ static void add_dr(struct device *dev, struct devres_node *node)
}
#ifdef CONFIG_DEBUG_DEVRES
void * __devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
void * __devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid,
const char *name)
{
struct devres *dr;
dr = alloc_dr(release, size, gfp | __GFP_ZERO);
dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid);
if (unlikely(!dr))
return NULL;
set_node_dbginfo(&dr->node, name, size);
return dr->data;
}
EXPORT_SYMBOL_GPL(__devres_alloc);
EXPORT_SYMBOL_GPL(__devres_alloc_node);
#else
/**
* devres_alloc - Allocate device resource data
* @release: Release function devres will be associated with
* @size: Allocation size
* @gfp: Allocation flags
* @nid: NUMA node
*
* Allocate devres of @size bytes. The allocated area is zeroed, then
* associated with @release. The returned pointer can be passed to
......@@ -132,16 +133,16 @@ EXPORT_SYMBOL_GPL(__devres_alloc);
* RETURNS:
* Pointer to allocated devres on success, NULL on failure.
*/
void * devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
void * devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid)
{
struct devres *dr;
dr = alloc_dr(release, size, gfp | __GFP_ZERO);
dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid);
if (unlikely(!dr))
return NULL;
return dr->data;
}
EXPORT_SYMBOL_GPL(devres_alloc);
EXPORT_SYMBOL_GPL(devres_alloc_node);
#endif
/**
......@@ -776,7 +777,7 @@ void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
struct devres *dr;
/* use raw alloc_dr for kmalloc caller tracing */
dr = alloc_dr(devm_kmalloc_release, size, gfp);
dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev));
if (unlikely(!dr))
return NULL;
......
......@@ -150,18 +150,15 @@ static struct pmem_device *pmem_alloc(struct device *dev,
return ERR_PTR(-EBUSY);
}
if (pmem_should_map_pages(dev)) {
void *addr = devm_memremap_pages(dev, res);
if (pmem_should_map_pages(dev))
pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res);
else
pmem->virt_addr = (void __pmem *) devm_memremap(dev,
pmem->phys_addr, pmem->size,
ARCH_MEMREMAP_PMEM);
if (IS_ERR(addr))
return addr;
pmem->virt_addr = (void __pmem *) addr;
} else {
pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr,
pmem->size);
if (!pmem->virt_addr)
return ERR_PTR(-ENXIO);
}
if (IS_ERR(pmem->virt_addr))
return (void __force *) pmem->virt_addr;
return pmem;
}
......@@ -179,9 +176,10 @@ static void pmem_detach_disk(struct pmem_device *pmem)
static int pmem_attach_disk(struct device *dev,
struct nd_namespace_common *ndns, struct pmem_device *pmem)
{
int nid = dev_to_node(dev);
struct gendisk *disk;
pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid);
if (!pmem->pmem_queue)
return -ENOMEM;
......@@ -191,7 +189,7 @@ static int pmem_attach_disk(struct device *dev,
blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
disk = alloc_disk(0);
disk = alloc_disk_node(0, nid);
if (!disk) {
blk_cleanup_queue(pmem->pmem_queue);
return -ENOMEM;
......@@ -363,8 +361,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
/* establish pfn range for lookup, and switch to direct map */
pmem = dev_get_drvdata(dev);
memunmap_pmem(dev, pmem->virt_addr);
pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res);
devm_memunmap(dev, (void __force *) pmem->virt_addr);
pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res);
if (IS_ERR(pmem->virt_addr)) {
rc = PTR_ERR(pmem->virt_addr);
goto err;
......
......@@ -35,6 +35,7 @@
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
#include <linux/dax.h>
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/page.h>
......@@ -1236,6 +1237,15 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
if (vma->vm_flags & VM_DONTDUMP)
return 0;
/* support for DAX */
if (vma_is_dax(vma)) {
if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
goto whole;
if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
goto whole;
return 0;
}
/* Hugetlb memory check */
if (vma->vm_flags & VM_HUGETLB) {
if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
......
......@@ -35,6 +35,7 @@
#include <linux/elf-fdpic.h>
#include <linux/elfcore.h>
#include <linux/coredump.h>
#include <linux/dax.h>
#include <asm/uaccess.h>
#include <asm/param.h>
......@@ -1231,6 +1232,20 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
return 0;
}
/* support for DAX */
if (vma_is_dax(vma)) {
if (vma->vm_flags & VM_SHARED) {
dump_ok = test_bit(MMF_DUMP_DAX_SHARED, &mm_flags);
kdcore("%08lx: %08lx: %s (DAX shared)", vma->vm_start,
vma->vm_flags, dump_ok ? "yes" : "no");
} else {
dump_ok = test_bit(MMF_DUMP_DAX_PRIVATE, &mm_flags);
kdcore("%08lx: %08lx: %s (DAX private)", vma->vm_start,
vma->vm_flags, dump_ok ? "yes" : "no");
}
return dump_ok;
}
/* By default, dump shared memory if mapped from an anonymous file. */
if (vma->vm_flags & VM_SHARED) {
if (file_inode(vma->vm_file)->i_nlink == 0) {
......
......@@ -604,13 +604,21 @@ typedef void (*dr_release_t)(struct device *dev, void *res);
typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data);
#ifdef CONFIG_DEBUG_DEVRES
extern void *__devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
const char *name);
extern void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
int nid, const char *name);
#define devres_alloc(release, size, gfp) \
__devres_alloc(release, size, gfp, #release)
__devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release)
#define devres_alloc_node(release, size, gfp, nid) \
__devres_alloc_node(release, size, gfp, nid, #release)
#else
extern void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp);
extern void *devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
int nid);
static inline void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
{
return devres_alloc_node(release, size, gfp, NUMA_NO_NODE);
}
#endif
extern void devres_for_each_res(struct device *dev, dr_release_t release,
dr_match_t match, void *match_data,
void (*fn)(struct device *, void *, void *),
......
......@@ -65,11 +65,6 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si
memcpy(dst, (void __force const *) src, size);
}
static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
{
devm_memunmap(dev, (void __force *) addr);
}
static inline bool arch_has_pmem_api(void)
{
return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
......@@ -93,7 +88,7 @@ static inline bool arch_has_wmb_pmem(void)
* These defaults seek to offer decent performance and minimize the
* window between i/o completion and writes being durable on media.
* However, it is undefined / architecture specific whether
* default_memremap_pmem + default_memcpy_to_pmem is sufficient for
* ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
* making data durable relative to i/o completion.
*/
static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
......@@ -116,25 +111,6 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
memset((void __force *)addr, 0, size);
}
/**
* memremap_pmem - map physical persistent memory for pmem api
* @offset: physical address of persistent memory
* @size: size of the mapping
*
* Establish a mapping of the architecture specific memory type expected
* by memcpy_to_pmem() and wmb_pmem(). For example, it may be
* the case that an uncacheable or writethrough mapping is sufficient,
* or a writeback mapping provided memcpy_to_pmem() and
* wmb_pmem() arrange for the data to be written through the
* cache to persistent media.
*/
static inline void __pmem *memremap_pmem(struct device *dev,
resource_size_t offset, unsigned long size)
{
return (void __pmem *) devm_memremap(dev, offset, size,
ARCH_MEMREMAP_PMEM);
}
/**
* memcpy_to_pmem - copy data to persistent memory
* @dst: destination buffer for the copy
......
......@@ -484,9 +484,11 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_DUMP_ELF_HEADERS 6
#define MMF_DUMP_HUGETLB_PRIVATE 7
#define MMF_DUMP_HUGETLB_SHARED 8
#define MMF_DUMP_DAX_PRIVATE 9
#define MMF_DUMP_DAX_SHARED 10
#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
#define MMF_DUMP_FILTER_BITS 7
#define MMF_DUMP_FILTER_BITS 9
#define MMF_DUMP_FILTER_MASK \
(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
#define MMF_DUMP_FILTER_DEFAULT \
......
......@@ -124,9 +124,10 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
{
void **ptr, *addr;
ptr = devres_alloc(devm_memremap_release, sizeof(*ptr), GFP_KERNEL);
ptr = devres_alloc_node(devm_memremap_release, sizeof(*ptr), GFP_KERNEL,
dev_to_node(dev));
if (!ptr)
return NULL;
return ERR_PTR(-ENOMEM);
addr = memremap(offset, size, flags);
if (addr) {
......@@ -141,9 +142,8 @@ EXPORT_SYMBOL(devm_memremap);
void devm_memunmap(struct device *dev, void *addr)
{
WARN_ON(devres_destroy(dev, devm_memremap_release, devm_memremap_match,
addr));
memunmap(addr);
WARN_ON(devres_release(dev, devm_memremap_release,
devm_memremap_match, addr));
}
EXPORT_SYMBOL(devm_memunmap);
......@@ -176,8 +176,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
if (is_ram == REGION_INTERSECTS)
return __va(res->start);
page_map = devres_alloc(devm_memremap_pages_release,
sizeof(*page_map), GFP_KERNEL);
page_map = devres_alloc_node(devm_memremap_pages_release,
sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
if (!page_map)
return ERR_PTR(-ENOMEM);
......@@ -185,7 +185,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
nid = dev_to_node(dev);
if (nid < 0)
nid = 0;
nid = numa_mem_id();
error = arch_add_memory(nid, res->start, resource_size(res), true);
if (error) {
......
......@@ -17,8 +17,10 @@
#include <linux/vmalloc.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/ndctl.h>
#include <linux/sizes.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <nfit.h>
#include <nd.h>
......@@ -44,6 +46,15 @@
* +------+ | blk5.0 | pm1.0 | 3 region5
* +-------------------------+----------+-+-------+
*
* +--+---+
* | cpu1 |
* +--+---+ (Hotplug DIMM)
* | +----------------------------------------------+
* +--+---+ | blk6.0/pm7.0 | 4 region6/7
* | imc0 +--+----------------------------------------------+
* +------+
*
*
* *) In this layout we have four dimms and two memory controllers in one
* socket. Each unique interface (BLK or PMEM) to DPA space
* is identified by a region device with a dynamically assigned id.
......@@ -85,8 +96,8 @@
* reference an NVDIMM.
*/
enum {
NUM_PM = 2,
NUM_DCR = 4,
NUM_PM = 3,
NUM_DCR = 5,
NUM_BDW = NUM_DCR,
NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
......@@ -115,6 +126,7 @@ static u32 handle[NUM_DCR] = {
[1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1),
[2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0),
[3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1),
[4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
};
struct nfit_test {
......@@ -138,6 +150,7 @@ struct nfit_test {
dma_addr_t *dcr_dma;
int (*alloc)(struct nfit_test *t);
void (*setup)(struct nfit_test *t);
int setup_hotplug;
};
static struct nfit_test *to_nfit_test(struct device *dev)
......@@ -428,6 +441,10 @@ static int nfit_test0_alloc(struct nfit_test *t)
if (!t->spa_set[1])
return -ENOMEM;
t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]);
if (!t->spa_set[2])
return -ENOMEM;
for (i = 0; i < NUM_DCR; i++) {
t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]);
if (!t->dimm[i])
......@@ -950,6 +967,126 @@ static void nfit_test0_setup(struct nfit_test *t)
flush->hint_count = 1;
flush->hint_address[0] = t->flush_dma[3];
if (t->setup_hotplug) {
offset = offset + sizeof(struct acpi_nfit_flush_address) * 4;
/* dcr-descriptor4 */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 4+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr->serial_number = ~handle[4];
dcr->windows = 1;
dcr->window_size = DCR_SIZE;
dcr->command_offset = 0;
dcr->command_size = 8;
dcr->status_offset = 8;
dcr->status_size = 4;
offset = offset + sizeof(struct acpi_nfit_control_region);
/* bdw4 (spa/dcr4, dimm4) */
bdw = nfit_buf + offset;
bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
bdw->header.length = sizeof(struct acpi_nfit_data_region);
bdw->region_index = 4+1;
bdw->windows = 1;
bdw->offset = 0;
bdw->size = BDW_SIZE;
bdw->capacity = DIMM_SIZE;
bdw->start_address = 0;
offset = offset + sizeof(struct acpi_nfit_data_region);
/* spa10 (dcr4) dimm4 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 10+1;
spa->address = t->dcr_dma[4];
spa->length = DCR_SIZE;
/*
* spa11 (single-dimm interleave for hotplug, note storage
* does not actually alias the related block-data-window
* regions)
*/
spa = nfit_buf + offset + sizeof(*spa);
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
spa->range_index = 11+1;
spa->address = t->spa_set_dma[2];
spa->length = SPA0_SIZE;
/* spa12 (bdw for dcr4) dimm4 */
spa = nfit_buf + offset + sizeof(*spa) * 2;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 12+1;
spa->address = t->dimm_dma[4];
spa->length = DIMM_SIZE;
offset = offset + sizeof(*spa) * 3;
/* mem-region14 (spa/dcr4, dimm4) */
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[4];
memdev->physical_id = 4;
memdev->region_id = 0;
memdev->range_index = 10+1;
memdev->region_index = 4+1;
memdev->region_size = 0;
memdev->region_offset = 0;
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
/* mem-region15 (spa0, dimm4) */
memdev = nfit_buf + offset +
sizeof(struct acpi_nfit_memory_map);
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[4];
memdev->physical_id = 4;
memdev->region_id = 0;
memdev->range_index = 11+1;
memdev->region_index = 4+1;
memdev->region_size = SPA0_SIZE;
memdev->region_offset = t->spa_set_dma[2];
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
/* mem-region16 (spa/dcr4, dimm4) */
memdev = nfit_buf + offset +
sizeof(struct acpi_nfit_memory_map) * 2;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[4];
memdev->physical_id = 4;
memdev->region_id = 0;
memdev->range_index = 12+1;
memdev->region_index = 4+1;
memdev->region_size = 0;
memdev->region_offset = 0;
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
offset = offset + sizeof(struct acpi_nfit_memory_map) * 3;
/* flush3 (dimm4) */
flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = sizeof(struct acpi_nfit_flush_address);
flush->device_handle = handle[4];
flush->hint_count = 1;
flush->hint_address[0] = t->flush_dma[4];
}
acpi_desc = &t->acpi_desc;
set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
......@@ -1108,6 +1245,29 @@ static int nfit_test_probe(struct platform_device *pdev)
if (!acpi_desc->nvdimm_bus)
return -ENXIO;
INIT_LIST_HEAD(&acpi_desc->spa_maps);
INIT_LIST_HEAD(&acpi_desc->spas);
INIT_LIST_HEAD(&acpi_desc->dcrs);
INIT_LIST_HEAD(&acpi_desc->bdws);
INIT_LIST_HEAD(&acpi_desc->idts);
INIT_LIST_HEAD(&acpi_desc->flushes);
INIT_LIST_HEAD(&acpi_desc->memdevs);
INIT_LIST_HEAD(&acpi_desc->dimms);
mutex_init(&acpi_desc->spa_map_mutex);
mutex_init(&acpi_desc->init_mutex);
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
if (rc) {
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
return rc;
}
if (nfit_test->setup != nfit_test0_setup)
return 0;
nfit_test->setup_hotplug = 1;
nfit_test->setup(nfit_test);
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
if (rc) {
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment