Commit 4759d386 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull DAX updates from Dan Williams:
 "The completion of Jan's DAX work for 4.10.

  As I mentioned in the libnvdimm-for-4.10 pull request, these are some
  final fixes for the DAX dirty-cacheline-tracking invalidation work
  that was merged through the -mm, ext4, and xfs trees in -rc1. These
  patches were prepared prior to the merge window, but we waited for
  4.10-rc1 to have a stable merge base after all the prerequisites were
  merged.

  Quoting Jan on the overall changes in these patches:

     "So I'd like all these 6 patches to go for rc2. The first three
      patches fix invalidation of exceptional DAX entries (a bug which
      is there for a long time) - without these patches data loss can
      occur on power failure even though user called fsync(2). The other
      three patches change locking of DAX faults so that ->iomap_begin()
      is called in a more relaxed locking context and we are safe to
      start a transaction there for ext4"

  These have received a build success notification from the kbuild
  robot, and pass the latest libnvdimm unit tests. There have not been
  any -next releases since -rc1, so they have not appeared there"

* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  ext4: Simplify DAX fault path
  dax: Call ->iomap_begin without entry lock during dax fault
  dax: Finish fault completely when loading holes
  dax: Avoid page invalidation races and unnecessary radix tree traversals
  mm: Invalidate DAX radix tree entries only if appropriate
  ext2: Return BH_New buffers for zeroed blocks
parents 238d1d0f 1db17542
This diff is collapsed.
...@@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode, ...@@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode,
mutex_unlock(&ei->truncate_mutex); mutex_unlock(&ei->truncate_mutex);
goto cleanup; goto cleanup;
} }
} else {
*new = true;
} }
*new = true;
ext2_splice_branch(inode, iblock, partial, indirect_blks, count); ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
mutex_unlock(&ei->truncate_mutex); mutex_unlock(&ei->truncate_mutex);
......
...@@ -258,7 +258,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -258,7 +258,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
int result; int result;
handle_t *handle = NULL;
struct inode *inode = file_inode(vma->vm_file); struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE; bool write = vmf->flags & FAULT_FLAG_WRITE;
...@@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (write) { if (write) {
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem); }
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, down_read(&EXT4_I(inode)->i_mmap_sem);
EXT4_DATA_TRANS_BLOCKS(sb)); result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
} else up_read(&EXT4_I(inode)->i_mmap_sem);
down_read(&EXT4_I(inode)->i_mmap_sem); if (write)
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb); sb_end_pagefault(sb);
} else
up_read(&EXT4_I(inode)->i_mmap_sem);
return result; return result;
} }
...@@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, ...@@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, unsigned int flags) pmd_t *pmd, unsigned int flags)
{ {
int result; int result;
handle_t *handle = NULL;
struct inode *inode = file_inode(vma->vm_file); struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
bool write = flags & FAULT_FLAG_WRITE; bool write = flags & FAULT_FLAG_WRITE;
...@@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, ...@@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (write) { if (write) {
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
ext4_chunk_trans_blocks(inode,
PMD_SIZE / PAGE_SIZE));
} else
down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else {
result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
&ext4_iomap_ops);
} }
down_read(&EXT4_I(inode)->i_mmap_sem);
if (write) { result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
if (!IS_ERR(handle)) &ext4_iomap_ops);
ext4_journal_stop(handle); up_read(&EXT4_I(inode)->i_mmap_sem);
up_read(&EXT4_I(inode)->i_mmap_sem); if (write)
sb_end_pagefault(sb); sb_end_pagefault(sb);
} else
up_read(&EXT4_I(inode)->i_mmap_sem);
return result; return result;
} }
......
...@@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
struct iomap_ops *ops); struct iomap_ops *ops);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
void dax_wake_mapping_entry_waiter(struct address_space *mapping, void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, void *entry, bool wake_all); pgoff_t index, void *entry, bool wake_all);
......
...@@ -24,20 +24,12 @@ ...@@ -24,20 +24,12 @@
#include <linux/rmap.h> #include <linux/rmap.h>
#include "internal.h" #include "internal.h"
static void clear_exceptional_entry(struct address_space *mapping, static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
pgoff_t index, void *entry) void *entry)
{ {
struct radix_tree_node *node; struct radix_tree_node *node;
void **slot; void **slot;
/* Handled by shmem itself */
if (shmem_mapping(mapping))
return;
if (dax_mapping(mapping)) {
dax_delete_mapping_entry(mapping, index);
return;
}
spin_lock_irq(&mapping->tree_lock); spin_lock_irq(&mapping->tree_lock);
/* /*
* Regular page slots are stabilized by the page lock even * Regular page slots are stabilized by the page lock even
...@@ -55,6 +47,56 @@ static void clear_exceptional_entry(struct address_space *mapping, ...@@ -55,6 +47,56 @@ static void clear_exceptional_entry(struct address_space *mapping,
spin_unlock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock);
} }
/*
* Unconditionally remove exceptional entry. Usually called from truncate path.
*/
static void truncate_exceptional_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
/* Handled by shmem itself */
if (shmem_mapping(mapping))
return;
if (dax_mapping(mapping)) {
dax_delete_mapping_entry(mapping, index);
return;
}
clear_shadow_entry(mapping, index, entry);
}
/*
* Invalidate exceptional entry if easily possible. This handles exceptional
* entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
* clean entries.
*/
static int invalidate_exceptional_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
/* Handled by shmem itself */
if (shmem_mapping(mapping))
return 1;
if (dax_mapping(mapping))
return dax_invalidate_mapping_entry(mapping, index);
clear_shadow_entry(mapping, index, entry);
return 1;
}
/*
* Invalidate exceptional entry if clean. This handles exceptional entries for
* invalidate_inode_pages2() so for DAX it evicts only clean entries.
*/
static int invalidate_exceptional_entry2(struct address_space *mapping,
pgoff_t index, void *entry)
{
/* Handled by shmem itself */
if (shmem_mapping(mapping))
return 1;
if (dax_mapping(mapping))
return dax_invalidate_mapping_entry_sync(mapping, index);
clear_shadow_entry(mapping, index, entry);
return 1;
}
/** /**
* do_invalidatepage - invalidate part or all of a page * do_invalidatepage - invalidate part or all of a page
* @page: the page which is affected * @page: the page which is affected
...@@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping, ...@@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
break; break;
if (radix_tree_exceptional_entry(page)) { if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page); truncate_exceptional_entry(mapping, index,
page);
continue; continue;
} }
...@@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping, ...@@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
} }
if (radix_tree_exceptional_entry(page)) { if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page); truncate_exceptional_entry(mapping, index,
page);
continue; continue;
} }
...@@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, ...@@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
break; break;
if (radix_tree_exceptional_entry(page)) { if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page); invalidate_exceptional_entry(mapping, index,
page);
continue; continue;
} }
...@@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, ...@@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
break; break;
if (radix_tree_exceptional_entry(page)) { if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page); if (!invalidate_exceptional_entry2(mapping,
index, page))
ret = -EBUSY;
continue; continue;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment