Commit d1a5f2b4 authored by Dan Williams's avatar Dan Williams

block: use DAX for partition table reads

Avoid populating pagecache when the block device is in DAX mode.
Otherwise these page cache entries collide with the fsync/msync
implementation and break data durability guarantees.

Cc: Jan Kara <jack@suse.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reported-by: default avatarRoss Zwisler <ross.zwisler@linux.intel.com>
Tested-by: default avatarRoss Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: default avatarMatthew Wilcox <willy@linux.intel.com>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 9f4736fe
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/kmod.h> #include <linux/kmod.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/genhd.h> #include <linux/genhd.h>
#include <linux/dax.h>
#include <linux/blktrace_api.h> #include <linux/blktrace_api.h>
#include "partitions/check.h" #include "partitions/check.h"
...@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) ...@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
return 0; return 0;
} }
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
{ {
struct address_space *mapping = bdev->bd_inode->i_mapping; struct address_space *mapping = bdev->bd_inode->i_mapping;
struct page *page;
page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
NULL); NULL);
}
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{
struct page *page;
/* don't populate page cache for dax capable devices */
if (IS_DAX(bdev->bd_inode))
page = read_dax_sector(bdev, n);
else
page = read_pagecache_sector(bdev, n);
if (!IS_ERR(page)) { if (!IS_ERR(page)) {
if (PageError(page)) if (PageError(page))
goto fail; goto fail;
......
...@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev, ...@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
blk_queue_exit(bdev->bd_queue); blk_queue_exit(bdev->bd_queue);
} }
struct page *read_dax_sector(struct block_device *bdev, sector_t n)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
struct blk_dax_ctl dax = {
.size = PAGE_SIZE,
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
};
long rc;
if (!page)
return ERR_PTR(-ENOMEM);
rc = dax_map_atomic(bdev, &dax);
if (rc < 0)
return ERR_PTR(rc);
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
dax_unmap_atomic(bdev, &dax);
return page;
}
/* /*
* dax_clear_blocks() is called from within transaction context from XFS, * dax_clear_blocks() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS * and hence this means the stack from this point must follow GFP_NOFS
......
...@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, ...@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t); dax_iodone_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t); dax_iodone_t);
#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
#else
static inline struct page *read_dax_sector(struct block_device *bdev,
sector_t n)
{
return ERR_PTR(-ENXIO);
}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
unsigned int flags, get_block_t, dax_iodone_t); unsigned int flags, get_block_t, dax_iodone_t);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment