• Huang Ying's avatar
    migrate: fix syscall move_pages() return value for failure · a7504ed1
    Huang Ying authored
    Patch series "migrate_pages(): fix several bugs in error path", v3.
    
    During review the code of migrate_pages() and build a test program for
    it.  Several bugs in error path are identified and fixed in this
    series.
    
    Most patches are tested via
    
    - Apply error-inject.patch in Linux kernel
    - Compile test-migrate.c (with -lnuma)
    - Test with test-migrate.sh
    
    error-inject.patch, test-migrate.c, and test-migrate.sh are as below.
    It turns out that error injection is an important tool to fix bugs in
    error path.
    
    
    This patch (of 8):
    
    The return value of move_pages() syscall is incorrect when counting
    the remaining pages to be migrated.  For example, for the following
    test program,
    
    "
     #define _GNU_SOURCE
    
     #include <stdbool.h>
     #include <stdio.h>
     #include <string.h>
     #include <stdlib.h>
     #include <errno.h>
    
     #include <fcntl.h>
     #include <sys/uio.h>
     #include <sys/mman.h>
     #include <sys/types.h>
     #include <unistd.h>
     #include <numaif.h>
     #include <numa.h>
    
     #ifndef MADV_FREE
     #define MADV_FREE	8		/* free pages only if memory pressure */
     #endif
    
     #define ONE_MB		(1024 * 1024)
     #define MAP_SIZE	(16 * ONE_MB)
     #define THP_SIZE	(2 * ONE_MB)
     #define THP_MASK	(THP_SIZE - 1)
    
     #define ERR_EXIT_ON(cond, msg)					\
    	 do {							\
    		 int __cond_in_macro = (cond);			\
    		 if (__cond_in_macro)				\
    			 error_exit(__cond_in_macro, (msg));	\
    	 } while (0)
    
     void error_msg(int ret, int nr, int *status, const char *msg)
     {
    	 int i;
    
    	 fprintf(stderr, "Error: %s, ret : %d, error: %s\n",
    		 msg, ret, strerror(errno));
    
    	 if (!nr)
    		 return;
    	 fprintf(stderr, "status: ");
    	 for (i = 0; i < nr; i++)
    		 fprintf(stderr, "%d ", status[i]);
    	 fprintf(stderr, "\n");
     }
    
     void error_exit(int ret, const char *msg)
     {
    	 error_msg(ret, 0, NULL, msg);
    	 exit(1);
     }
    
     int page_size;
    
     bool do_vmsplice;
     bool do_thp;
    
     static int pipe_fds[2];
     void *addr;
     char *pn;
     char *pn1;
     void *pages[2];
     int status[2];
    
     void prepare()
     {
    	 int ret;
    	 struct iovec iov;
    
    	 if (addr) {
    		 munmap(addr, MAP_SIZE);
    		 close(pipe_fds[0]);
    		 close(pipe_fds[1]);
    	 }
    
    	 ret = pipe(pipe_fds);
    	 ERR_EXIT_ON(ret, "pipe");
    
    	 addr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
    		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    	 ERR_EXIT_ON(addr == MAP_FAILED, "mmap");
    	 if (do_thp) {
    		 ret = madvise(addr, MAP_SIZE, MADV_HUGEPAGE);
    		 ERR_EXIT_ON(ret, "advise hugepage");
    	 }
    
    	 pn = (char *)(((unsigned long)addr + THP_SIZE) & ~THP_MASK);
    	 pn1 = pn + THP_SIZE;
    	 pages[0] = pn;
    	 pages[1] = pn1;
    	 *pn = 1;
    
    	 if (do_vmsplice) {
    		 iov.iov_base = pn;
    		 iov.iov_len = page_size;
    		 ret = vmsplice(pipe_fds[1], &iov, 1, 0);
    		 ERR_EXIT_ON(ret < 0, "vmsplice");
    	 }
    
    	 status[0] = status[1] = 1024;
     }
    
     void test_migrate()
     {
    	 int ret;
    	 int nodes[2] = { 1, 1 };
    	 pid_t pid = getpid();
    
    	 prepare();
    	 ret = move_pages(pid, 1, pages, nodes, status, MPOL_MF_MOVE_ALL);
    	 error_msg(ret, 1, status, "move 1 page");
    
    	 prepare();
    	 ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
    	 error_msg(ret, 2, status, "move 2 pages, page 1 not mapped");
    
    	 prepare();
    	 *pn1 = 1;
    	 ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
    	 error_msg(ret, 2, status, "move 2 pages");
    
    	 prepare();
    	 *pn1 = 1;
    	 nodes[1] = 0;
    	 ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
    	 error_msg(ret, 2, status, "move 2 pages, page 1 to node 0");
     }
    
     int main(int argc, char *argv[])
     {
    	 numa_run_on_node(0);
    	 page_size = getpagesize();
    
    	 test_migrate();
    
    	 fprintf(stderr, "\nMake page 0 cannot be migrated:\n");
    	 do_vmsplice = true;
    	 test_migrate();
    
    	 fprintf(stderr, "\nTest THP:\n");
    	 do_thp = true;
    	 do_vmsplice = false;
    	 test_migrate();
    
    	 fprintf(stderr, "\nTHP: make page 0 cannot be migrated:\n");
    	 do_vmsplice = true;
    	 test_migrate();
    
    	 return 0;
     }
    "
    
    The output of the current kernel is,
    
    "
    Error: move 1 page, ret : 0, error: Success
    status: 1
    Error: move 2 pages, page 1 not mapped, ret : 0, error: Success
    status: 1 -14
    Error: move 2 pages, ret : 0, error: Success
    status: 1 1
    Error: move 2 pages, page 1 to node 0, ret : 0, error: Success
    status: 1 0
    
    Make page 0 cannot be migrated:
    Error: move 1 page, ret : 0, error: Success
    status: 1024
    Error: move 2 pages, page 1 not mapped, ret : 1, error: Success
    status: 1024 -14
    Error: move 2 pages, ret : 0, error: Success
    status: 1024 1024
    Error: move 2 pages, page 1 to node 0, ret : 1, error: Success
    status: 1024 1024
    "
    
    While the expected output is,
    
    "
    Error: move 1 page, ret : 0, error: Success
    status: 1
    Error: move 2 pages, page 1 not mapped, ret : 0, error: Success
    status: 1 -14
    Error: move 2 pages, ret : 0, error: Success
    status: 1 1
    Error: move 2 pages, page 1 to node 0, ret : 0, error: Success
    status: 1 0
    
    Make page 0 cannot be migrated:
    Error: move 1 page, ret : 1, error: Success
    status: 1024
    Error: move 2 pages, page 1 not mapped, ret : 1, error: Success
    status: 1024 -14
    Error: move 2 pages, ret : 1, error: Success
    status: 1024 1024
    Error: move 2 pages, page 1 to node 0, ret : 2, error: Success
    status: 1024 1024
    "
    
    Fix this via correcting the remaining pages counting.  With the fix,
    the output for the test program as above is expected.
    
    Link: https://lkml.kernel.org/r/20220817081408.513338-1-ying.huang@intel.com
    Link: https://lkml.kernel.org/r/20220817081408.513338-2-ying.huang@intel.com
    Fixes: 5984fabb ("mm: move_pages: report the number of non-attempted pages")
    Signed-off-by: default avatar"Huang, Ying" <ying.huang@intel.com>
    Reviewed-by: default avatarOscar Salvador <osalvador@suse.de>
    Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
    Cc: Zi Yan <ziy@nvidia.com>
    Cc: Yang Shi <shy828301@gmail.com>
    Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
    a7504ed1
migrate.c 69.2 KB