1. 11 Apr, 2018 9 commits
    • Steven Rostedt's avatar
      mm, vmscan, tracing: use pointer to reclaim_stat struct in trace event · d51d1e64
      Steven Rostedt authored
      The trace event trace_mm_vmscan_lru_shrink_inactive() currently has 12
      parameters! Seven of them are from the reclaim_stat structure.  This
      structure is currently local to mm/vmscan.c.  By moving it to the global
      vmstat.h header, we can also reference it from the vmscan tracepoints.
      In moving it, it brings down the overhead of passing so many arguments
      to the trace event.  In the future, we may limit the number of arguments
      that a trace event may pass (ideally just 6, but more realistically it
      may be 8).
      
      Before this patch, the code to call the trace event is this:
      
       0f 83 aa fe ff ff       jae    ffffffff811e6261 <shrink_inactive_list+0x1e1>
       48 8b 45 a0             mov    -0x60(%rbp),%rax
       45 8b 64 24 20          mov    0x20(%r12),%r12d
       44 8b 6d d4             mov    -0x2c(%rbp),%r13d
       8b 4d d0                mov    -0x30(%rbp),%ecx
       44 8b 75 cc             mov    -0x34(%rbp),%r14d
       44 8b 7d c8             mov    -0x38(%rbp),%r15d
       48 89 45 90             mov    %rax,-0x70(%rbp)
       8b 83 b8 fe ff ff       mov    -0x148(%rbx),%eax
       8b 55 c0                mov    -0x40(%rbp),%edx
       8b 7d c4                mov    -0x3c(%rbp),%edi
       8b 75 b8                mov    -0x48(%rbp),%esi
       89 45 80                mov    %eax,-0x80(%rbp)
       65 ff 05 e4 f7 e2 7e    incl   %gs:0x7ee2f7e4(%rip)        # 15bd0 <__preempt_count>
       48 8b 05 75 5b 13 01    mov    0x1135b75(%rip),%rax        # ffffffff8231bf68 <__tracepoint_mm_vmscan_lru_shrink_inactive+0x28>
       48 85 c0                test   %rax,%rax
       74 72                   je     ffffffff811e646a <shrink_inactive_list+0x3ea>
       48 89 c3                mov    %rax,%rbx
       4c 8b 10                mov    (%rax),%r10
       89 f8                   mov    %edi,%eax
       48 89 85 68 ff ff ff    mov    %rax,-0x98(%rbp)
       89 f0                   mov    %esi,%eax
       48 89 85 60 ff ff ff    mov    %rax,-0xa0(%rbp)
       89 c8                   mov    %ecx,%eax
       48 89 85 78 ff ff ff    mov    %rax,-0x88(%rbp)
       89 d0                   mov    %edx,%eax
       48 89 85 70 ff ff ff    mov    %rax,-0x90(%rbp)
       8b 45 8c                mov    -0x74(%rbp),%eax
       48 8b 7b 08             mov    0x8(%rbx),%rdi
       48 83 c3 18             add    $0x18,%rbx
       50                      push   %rax
       41 54                   push   %r12
       41 55                   push   %r13
       ff b5 78 ff ff ff       pushq  -0x88(%rbp)
       41 56                   push   %r14
       41 57                   push   %r15
       ff b5 70 ff ff ff       pushq  -0x90(%rbp)
       4c 8b 8d 68 ff ff ff    mov    -0x98(%rbp),%r9
       4c 8b 85 60 ff ff ff    mov    -0xa0(%rbp),%r8
       48 8b 4d 98             mov    -0x68(%rbp),%rcx
       48 8b 55 90             mov    -0x70(%rbp),%rdx
       8b 75 80                mov    -0x80(%rbp),%esi
       41 ff d2                callq  *%r10
      
      After the patch:
      
       0f 83 a8 fe ff ff       jae    ffffffff811e626d <shrink_inactive_list+0x1cd>
       8b 9b b8 fe ff ff       mov    -0x148(%rbx),%ebx
       45 8b 64 24 20          mov    0x20(%r12),%r12d
       4c 8b 6d a0             mov    -0x60(%rbp),%r13
       65 ff 05 f5 f7 e2 7e    incl   %gs:0x7ee2f7f5(%rip)        # 15bd0 <__preempt_count>
       4c 8b 35 86 5b 13 01    mov    0x1135b86(%rip),%r14        # ffffffff8231bf68 <__tracepoint_mm_vmscan_lru_shrink_inactive+0x28>
       4d 85 f6                test   %r14,%r14
       74 2a                   je     ffffffff811e6411 <shrink_inactive_list+0x371>
       49 8b 06                mov    (%r14),%rax
       8b 4d 8c                mov    -0x74(%rbp),%ecx
       49 8b 7e 08             mov    0x8(%r14),%rdi
       49 83 c6 18             add    $0x18,%r14
       4c 89 ea                mov    %r13,%rdx
       45 89 e1                mov    %r12d,%r9d
       4c 8d 45 b8             lea    -0x48(%rbp),%r8
       89 de                   mov    %ebx,%esi
       51                      push   %rcx
       48 8b 4d 98             mov    -0x68(%rbp),%rcx
       ff d0                   callq  *%rax
      
      Link: http://lkml.kernel.org/r/2559d7cb-ec60-1200-2362-04fa34fd02bb@fb.com
      Link: http://lkml.kernel.org/r/20180322121003.4177af15@gandalf.local.homeSigned-off-by: default avatarSteven Rostedt (VMware) <rostedt@goodmis.org>
      Reported-by: default avatarAlexei Starovoitov <ast@fb.com>
      Acked-by: default avatarDavid Rientjes <rientjes@google.com>
      Acked-by: default avatarMichal Hocko <mhocko@suse.com>
      Cc: Mel Gorman <mgorman@suse.de>
      Cc: Vlastimil Babka <vbabka@suse.cz>
      Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
      Cc: Alexei Starovoitov <ast@fb.com>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      d51d1e64
    • Andrey Ryabinin's avatar
      mm/vmscan: don't mess with pgdat->flags in memcg reclaim · e3c1ac58
      Andrey Ryabinin authored
      memcg reclaim may alter pgdat->flags based on the state of LRU lists in
      cgroup and its children.  PGDAT_WRITEBACK may force kswapd to sleep
      congested_wait(), PGDAT_DIRTY may force kswapd to writeback filesystem
      pages.  But the worst here is PGDAT_CONGESTED, since it may force all
      direct reclaims to stall in wait_iff_congested().  Note that only kswapd
      have powers to clear any of these bits.  This might just never happen if
      cgroup limits configured that way.  So all direct reclaims will stall as
      long as we have some congested bdi in the system.
      
      Leave all pgdat->flags manipulations to kswapd.  kswapd scans the whole
      pgdat, only kswapd can clear pgdat->flags once node is balanced, thus
      it's reasonable to leave all decisions about node state to kswapd.
      
      Why only kswapd? Why not allow to global direct reclaim change these
      flags? It is because currently only kswapd can clear these flags.  I'm
      less worried about the case when PGDAT_CONGESTED falsely not set, and
      more worried about the case when it falsely set.  If direct reclaimer
      sets PGDAT_CONGESTED, do we have guarantee that after the congestion
      problem is sorted out, kswapd will be woken up and clear the flag? It
      seems like there is no such guarantee.  E.g.  direct reclaimers may
      eventually balance pgdat and kswapd simply won't wake up (see
      wakeup_kswapd()).
      
      Moving pgdat->flags manipulation to kswapd, means that cgroup2 recalim
      now loses its congestion throttling mechanism.  Add per-cgroup
      congestion state and throttle cgroup2 reclaimers if memcg is in
      congestion state.
      
      Currently there is no need in per-cgroup PGDAT_WRITEBACK and PGDAT_DIRTY
      bits since they alter only kswapd behavior.
      
      The problem could be easily demonstrated by creating heavy congestion in
      one cgroup:
      
          echo "+memory" > /sys/fs/cgroup/cgroup.subtree_control
          mkdir -p /sys/fs/cgroup/congester
          echo 512M > /sys/fs/cgroup/congester/memory.max
          echo $$ > /sys/fs/cgroup/congester/cgroup.procs
          /* generate a lot of diry data on slow HDD */
          while true; do dd if=/dev/zero of=/mnt/sdb/zeroes bs=1M count=1024; done &
          ....
          while true; do dd if=/dev/zero of=/mnt/sdb/zeroes bs=1M count=1024; done &
      
      and some job in another cgroup:
      
          mkdir /sys/fs/cgroup/victim
          echo 128M > /sys/fs/cgroup/victim/memory.max
      
          # time cat /dev/sda > /dev/null
          real    10m15.054s
          user    0m0.487s
          sys     1m8.505s
      
      According to the tracepoint in wait_iff_congested(), the 'cat' spent 50%
      of the time sleeping there.
      
      With the patch, cat don't waste time anymore:
      
          # time cat /dev/sda > /dev/null
          real    5m32.911s
          user    0m0.411s
          sys     0m56.664s
      
      [aryabinin@virtuozzo.com: congestion state should be per-node]
        Link: http://lkml.kernel.org/r/20180406135215.10057-1-aryabinin@virtuozzo.com
      [ayabinin@virtuozzo.com: make congestion state per-cgroup-per-node instead of just per-cgroup[
        Link: http://lkml.kernel.org/r/20180406180254.8970-2-aryabinin@virtuozzo.com
      Link: http://lkml.kernel.org/r/20180323152029.11084-5-aryabinin@virtuozzo.comSigned-off-by: default avatarAndrey Ryabinin <aryabinin@virtuozzo.com>
      Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
      Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
      Cc: Mel Gorman <mgorman@techsingularity.net>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Michal Hocko <mhocko@kernel.org>
      Cc: Steven Rostedt <rostedt@goodmis.org>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      e3c1ac58
    • Andrey Ryabinin's avatar
      mm/vmscan: don't change pgdat state on base of a single LRU list state · d108c772
      Andrey Ryabinin authored
      We have separate LRU list for each memory cgroup.  Memory reclaim
      iterates over cgroups and calls shrink_inactive_list() every inactive
      LRU list.  Based on the state of a single LRU shrink_inactive_list() may
      flag the whole node as dirty,congested or under writeback.  This is
      obviously wrong and hurtful.  It's especially hurtful when we have
      possibly small congested cgroup in system.  Than *all* direct reclaims
      waste time by sleeping in wait_iff_congested().  And the more memcgs in
      the system we have the longer memory allocation stall is, because
      wait_iff_congested() called on each lru-list scan.
      
      Sum reclaim stats across all visited LRUs on node and flag node as
      dirty, congested or under writeback based on that sum.  Also call
      congestion_wait(), wait_iff_congested() once per pgdat scan, instead of
      once per lru-list scan.
      
      This only fixes the problem for global reclaim case.  Per-cgroup reclaim
      may alter global pgdat flags too, which is wrong.  But that is separate
      issue and will be addressed in the next patch.
      
      This change will not have any effect on a systems with all workload
      concentrated in a single cgroup.
      
      [aryabinin@virtuozzo.com: check nr_writeback against all nr_taken, not just file]
        Link: http://lkml.kernel.org/r/20180406180254.8970-1-aryabinin@virtuozzo.com
      Link: http://lkml.kernel.org/r/20180323152029.11084-4-aryabinin@virtuozzo.comSigned-off-by: default avatarAndrey Ryabinin <aryabinin@virtuozzo.com>
      Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
      Cc: Mel Gorman <mgorman@techsingularity.net>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Michal Hocko <mhocko@kernel.org>
      Cc: Steven Rostedt <rostedt@goodmis.org>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      d108c772
    • Andrey Ryabinin's avatar
      mm/vmscan: remove redundant current_may_throttle() check · c4fd4fa5
      Andrey Ryabinin authored
      Only kswapd can have non-zero nr_immediate, and current_may_throttle()
      is always true for kswapd (PF_LESS_THROTTLE bit is never set) thus it's
      enough to check stat.nr_immediate only.
      
      Link: http://lkml.kernel.org/r/20180315164553.17856-4-aryabinin@virtuozzo.comSigned-off-by: default avatarAndrey Ryabinin <aryabinin@virtuozzo.com>
      Acked-by: default avatarMichal Hocko <mhocko@suse.com>
      Cc: Shakeel Butt <shakeelb@google.com>
      Cc: Mel Gorman <mgorman@techsingularity.net>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      c4fd4fa5
    • Andrey Ryabinin's avatar
      mm/vmscan: update stale comments · 894befec
      Andrey Ryabinin authored
      Update some comments that became stale since transiton from per-zone to
      per-node reclaim.
      
      Link: http://lkml.kernel.org/r/20180315164553.17856-2-aryabinin@virtuozzo.comSigned-off-by: default avatarAndrey Ryabinin <aryabinin@virtuozzo.com>
      Acked-by: default avatarMichal Hocko <mhocko@suse.com>
      Cc: Shakeel Butt <shakeelb@google.com>
      Cc: Mel Gorman <mgorman@techsingularity.net>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      894befec
    • Roman Gushchin's avatar
      mm: treat indirectly reclaimable memory as free in overcommit logic · d79f7aa4
      Roman Gushchin authored
      Indirectly reclaimable memory can consume a significant part of total
      memory and it's actually reclaimable (it will be released under actual
      memory pressure).
      
      So, the overcommit logic should treat it as free.
      
      Otherwise, it's possible to cause random system-wide memory allocation
      failures by consuming a significant amount of memory by indirectly
      reclaimable memory, e.g.  dentry external names.
      
      If overcommit policy GUESS is used, it might be used for denial of
      service attack under some conditions.
      
      The following program illustrates the approach.  It causes the kernel to
      allocate an unreclaimable kmalloc-256 chunk for each stat() call, so
      that at some point the overcommit logic may start blocking large
      allocation system-wide.
      
        int main()
        {
        	char buf[256];
        	unsigned long i;
        	struct stat statbuf;
      
        	buf[0] = '/';
        	for (i = 1; i < sizeof(buf); i++)
        		buf[i] = '_';
      
        	for (i = 0; 1; i++) {
        		sprintf(&buf[248], "%8lu", i);
        		stat(buf, &statbuf);
        	}
      
        	return 0;
        }
      
      This patch in combination with related indirectly reclaimable memory
      patches closes this issue.
      
      Link: http://lkml.kernel.org/r/20180313130041.8078-1-guro@fb.comSigned-off-by: default avatarRoman Gushchin <guro@fb.com>
      Reviewed-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Cc: Alexander Viro <viro@zeniv.linux.org.uk>
      Cc: Michal Hocko <mhocko@suse.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      d79f7aa4
    • Roman Gushchin's avatar
      dcache: account external names as indirectly reclaimable memory · f1782c9b
      Roman Gushchin authored
      I received a report about suspicious growth of unreclaimable slabs on
      some machines.  I've found that it happens on machines with low memory
      pressure, and these unreclaimable slabs are external names attached to
      dentries.
      
      External names are allocated using generic kmalloc() function, so they
      are accounted as unreclaimable.  But they are held by dentries, which
      are reclaimable, and they will be reclaimed under the memory pressure.
      
      In particular, this breaks MemAvailable calculation, as it doesn't take
      unreclaimable slabs into account.  This leads to a silly situation, when
      a machine is almost idle, has no memory pressure and therefore has a big
      dentry cache.  And the resulting MemAvailable is too low to start a new
      workload.
      
      To address the issue, the NR_INDIRECTLY_RECLAIMABLE_BYTES counter is
      used to track the amount of memory, consumed by external names.  The
      counter is increased in the dentry allocation path, if an external name
      structure is allocated; and it's decreased in the dentry freeing path.
      
      To reproduce the problem I've used the following Python script:
      
        import os
      
        for iter in range (0, 10000000):
            try:
                name = ("/some_long_name_%d" % iter) + "_" * 220
                os.stat(name)
            except Exception:
                pass
      
      Without this patch:
        $ cat /proc/meminfo | grep MemAvailable
        MemAvailable:    7811688 kB
        $ python indirect.py
        $ cat /proc/meminfo | grep MemAvailable
        MemAvailable:    2753052 kB
      
      With the patch:
        $ cat /proc/meminfo | grep MemAvailable
        MemAvailable:    7809516 kB
        $ python indirect.py
        $ cat /proc/meminfo | grep MemAvailable
        MemAvailable:    7749144 kB
      
      [guro@fb.com: fix indirectly reclaimable memory accounting for CONFIG_SLOB]
        Link: http://lkml.kernel.org/r/20180312194140.19517-1-guro@fb.com
      [guro@fb.com: fix indirectly reclaimable memory accounting]
        Link: http://lkml.kernel.org/r/20180313125701.7955-1-guro@fb.com
      Link: http://lkml.kernel.org/r/20180305133743.12746-5-guro@fb.comSigned-off-by: default avatarRoman Gushchin <guro@fb.com>
      Reviewed-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Cc: Alexander Viro <viro@zeniv.linux.org.uk>
      Cc: Michal Hocko <mhocko@suse.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Mel Gorman <mgorman@techsingularity.net>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      f1782c9b
    • Roman Gushchin's avatar
      mm: treat indirectly reclaimable memory as available in MemAvailable · 034ebf65
      Roman Gushchin authored
      Adjust /proc/meminfo MemAvailable calculation by adding the amount of
      indirectly reclaimable memory (rounded to the PAGE_SIZE).
      
      Link: http://lkml.kernel.org/r/20180305133743.12746-4-guro@fb.comSigned-off-by: default avatarRoman Gushchin <guro@fb.com>
      Reviewed-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Cc: Alexander Viro <viro@zeniv.linux.org.uk>
      Cc: Michal Hocko <mhocko@suse.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Mel Gorman <mgorman@techsingularity.net>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      034ebf65
    • Roman Gushchin's avatar
      mm: introduce NR_INDIRECTLY_RECLAIMABLE_BYTES · eb592546
      Roman Gushchin authored
      Patch series "indirectly reclaimable memory", v2.
      
      This patchset introduces the concept of indirectly reclaimable memory
      and applies it to fix the issue of when a big number of dentries with
      external names can significantly affect the MemAvailable value.
      
      This patch (of 3):
      
      Introduce a concept of indirectly reclaimable memory and adds the
      corresponding memory counter and /proc/vmstat item.
      
      Indirectly reclaimable memory is any sort of memory, used by the kernel
      (except of reclaimable slabs), which is actually reclaimable, i.e.  will
      be released under memory pressure.
      
      The counter is in bytes, as it's not always possible to count such
      objects in pages.  The name contains BYTES by analogy to
      NR_KERNEL_STACK_KB.
      
      Link: http://lkml.kernel.org/r/20180305133743.12746-2-guro@fb.comSigned-off-by: default avatarRoman Gushchin <guro@fb.com>
      Reviewed-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Cc: Alexander Viro <viro@zeniv.linux.org.uk>
      Cc: Michal Hocko <mhocko@suse.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Mel Gorman <mgorman@techsingularity.net>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      eb592546
  2. 10 Apr, 2018 13 commits
    • Linus Torvalds's avatar
      Merge tag 'for-linus' of git://linux-c6x.org/git/projects/linux-c6x-upstreaming · f77cfbe6
      Linus Torvalds authored
      Pull c6x updates from Mark Salter.
      
      * tag 'for-linus' of git://linux-c6x.org/git/projects/linux-c6x-upstreaming:
        c6x: pass endianness info to sparse
        c6x: fix platforms/plldata.c get_coreid build error
        c6x: remove unused KTHREAD_SIZE definition
      f77cfbe6
    • Linus Torvalds's avatar
      Merge tag 'mips_4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/mips · 948869fa
      Linus Torvalds authored
      Pull MIPS updates from James Hogan:
       "These are the main MIPS changes for 4.17. Rough overview:
      
         (1) generic platform: Add support for Microsemi Ocelot SoCs
      
         (2) crypto: Add CRC32 and CRC32C HW acceleration module
      
         (3) Various cleanups and misc improvements
      
        More detailed summary:
      
        Miscellaneous:
         - hang more efficiently on halt/powerdown/restart
         - pm-cps: Block system suspend when a JTAG probe is present
         - expand make help text for generic defconfigs
         - refactor handling of legacy defconfigs
         - determine the entry point from the ELF file header to fix microMIPS
           for certain toolchains
         - introduce isa-rev.h for MIPS_ISA_REV and use to simplify other code
      
        Minor cleanups:
         - DTS: boston/ci20: Unit name cleanups and correction
         - kdump: Make the default for PHYSICAL_START always 64-bit
         - constify gpio_led in Alchemy, AR7, and TXX9
         - silence a couple of W=1 warnings
         - remove duplicate includes
      
        Platform support:
        Generic platform:
         - add support for Microsemi Ocelot
         - dt-bindings: Add vendor prefix for Microsemi Corporation
         - dt-bindings: Add bindings for Microsemi SoCs
         - add ocelot SoC & PCB123 board DTS files
         - MAINTAINERS: Add entry for Microsemi MIPS SoCs
         - enable crc32-mips on r6 configs
      
        ath79:
         - fix AR724X_PLL_REG_PCIE_CONFIG offset
      
        BCM47xx:
         - firmware: Use mac_pton() for MAC address parsing
         - add Luxul XAP1500/XWR1750 WiFi LEDs
         - use standard reset button for Luxul XWR-1750
      
        BMIPS:
         - enable CONFIG_BRCMSTB_PM in bmips_stb_defconfig for build coverage
         - add STB PM, wake-up timer, watchdog DT nodes
      
        Octeon:
         - drop '.' after newlines in printk calls
      
        ralink:
         - pci-mt7621: Enable PCIe on MT7688"
      
      * tag 'mips_4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/mips: (37 commits)
        MIPS: BCM47XX: Use standard reset button for Luxul XWR-1750
        MIPS: BCM47XX: Add Luxul XAP1500/XWR1750 WiFi LEDs
        MIPS: Make the default for PHYSICAL_START always 64-bit
        MIPS: Use the entry point from the ELF file header
        MAINTAINERS: Add entry for Microsemi MIPS SoCs
        MIPS: generic: Add support for Microsemi Ocelot
        MIPS: mscc: Add ocelot PCB123 device tree
        MIPS: mscc: Add ocelot dtsi
        dt-bindings: mips: Add bindings for Microsemi SoCs
        dt-bindings: Add vendor prefix for Microsemi Corporation
        MIPS: ath79: Fix AR724X_PLL_REG_PCIE_CONFIG offset
        MIPS: pci-mt7620: Enable PCIe on MT7688
        MIPS: pm-cps: Block system suspend when a JTAG probe is present
        MIPS: VDSO: Replace __mips_isa_rev with MIPS_ISA_REV
        MIPS: BPF: Replace __mips_isa_rev with MIPS_ISA_REV
        MIPS: cpu-features.h: Replace __mips_isa_rev with MIPS_ISA_REV
        MIPS: Introduce isa-rev.h to define MIPS_ISA_REV
        MIPS: Hang more efficiently on halt/powerdown/restart
        FIRMWARE: bcm47xx_nvram: Replace mac address parsing
        MIPS: BMIPS: Add Broadcom STB watchdog nodes
        ...
      948869fa
    • Linus Torvalds's avatar
      Merge tag 'trace-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace · 2a56bb59
      Linus Torvalds authored
      Pull tracing updates from Steven Rostedt:
       "New features:
      
         - Tom Zanussi's extended histogram work.
      
           This adds the synthetic events to have histograms from multiple
           event data Adds triggers "onmatch" and "onmax" to call the
           synthetic events Several updates to the histogram code from this
      
         - Allow way to nest ring buffer calls in the same context
      
         - Allow absolute time stamps in ring buffer
      
         - Rewrite of filter code parsing based on Al Viro's suggestions
      
         - Setting of trace_clock to global if TSC is unstable (on boot)
      
         - Better OOM handling when allocating large ring buffers
      
         - Added initcall tracepoints (consolidated initcall_debug code with
           them)
      
        And other various fixes and clean ups"
      
      * tag 'trace-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (68 commits)
        init: Have initcall_debug still work without CONFIG_TRACEPOINTS
        init, tracing: Have printk come through the trace events for initcall_debug
        init, tracing: instrument security and console initcall trace events
        init, tracing: Add initcall trace events
        tracing: Add rcu dereference annotation for test func that touches filter->prog
        tracing: Add rcu dereference annotation for filter->prog
        tracing: Fixup logic inversion on setting trace_global_clock defaults
        tracing: Hide global trace clock from lockdep
        ring-buffer: Add set/clear_current_oom_origin() during allocations
        ring-buffer: Check if memory is available before allocation
        lockdep: Add print_irqtrace_events() to __warn
        vsprintf: Do not preprocess non-dereferenced pointers for bprintf (%px and %pK)
        tracing: Uninitialized variable in create_tracing_map_fields()
        tracing: Make sure variable string fields are NULL-terminated
        tracing: Add action comparisons when testing matching hist triggers
        tracing: Don't add flag strings when displaying variable references
        tracing: Fix display of hist trigger expressions containing timestamps
        ftrace: Drop a VLA in module_exists()
        tracing: Mention trace_clock=global when warning about unstable clocks
        tracing: Default to using trace_global_clock if sched_clock is unstable
        ...
      2a56bb59
    • Linus Torvalds's avatar
      Merge tag 'libnvdimm-for-4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm · 9f3a0941
      Linus Torvalds authored
      Pull libnvdimm updates from Dan Williams:
       "This cycle was was not something I ever want to repeat as there were
        several late changes that have only now just settled.
      
        Half of the branch up to commit d2c997c0 ("fs, dax: use
        page->mapping to warn...") have been in -next for several releases.
        The of_pmem driver and the address range scrub rework were late
        arrivals, and the dax work was scaled back at the last moment.
      
        The of_pmem driver missed a previous merge window due to an oversight.
        A sense of obligation to rectify that miss is why it is included for
        4.17. It has acks from PowerPC folks. Stephen reported a build failure
        that only occurs when merging it with your latest tree, for now I have
        fixed that up by disabling modular builds of of_pmem. A test merge
        with your tree has received a build success report from the 0day robot
        over 156 configs.
      
        An initial version of the ARS rework was submitted before the merge
        window. It is self contained to libnvdimm, a net code reduction, and
        passing all unit tests.
      
        The filesystem-dax changes are based on the wait_var_event()
        functionality from tip/sched/core. However, late review feedback
        showed that those changes regressed truncate performance to a large
        degree. The branch was rewound to drop the truncate behavior change
        and now only includes preparation patches and cleanups (with full acks
        and reviews). The finalization of this dax-dma-vs-trnucate work will
        need to wait for 4.18.
      
        Summary:
      
         - A rework of the filesytem-dax implementation provides for detection
           of unmap operations (truncate / hole punch) colliding with
           in-progress device-DMA. A fix for these collisions remains a
           work-in-progress pending resolution of truncate latency and
           starvation regressions.
      
         - The of_pmem driver expands the users of libnvdimm outside of x86
           and ACPI to describe an implementation of persistent memory on
           PowerPC with Open Firmware / Device tree.
      
         - Address Range Scrub (ARS) handling is completely rewritten to
           account for the fact that ARS may run for 100s of seconds and there
           is no platform defined way to cancel it. ARS will now no longer
           block namespace initialization.
      
         - The NVDIMM Namespace Label implementation is updated to handle
           label areas as small as 1K, down from 128K.
      
         - Miscellaneous cleanups and updates to unit test infrastructure"
      
      * tag 'libnvdimm-for-4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (39 commits)
        libnvdimm, of_pmem: workaround OF_NUMA=n build error
        nfit, address-range-scrub: add module option to skip initial ars
        nfit, address-range-scrub: rework and simplify ARS state machine
        nfit, address-range-scrub: determine one platform max_ars value
        powerpc/powernv: Create platform devs for nvdimm buses
        doc/devicetree: Persistent memory region bindings
        libnvdimm: Add device-tree based driver
        libnvdimm: Add of_node to region and bus descriptors
        libnvdimm, region: quiet region probe
        libnvdimm, namespace: use a safe lookup for dimm device name
        libnvdimm, dimm: fix dpa reservation vs uninitialized label area
        libnvdimm, testing: update the default smart ctrl_temperature
        libnvdimm, testing: Add emulation for smart injection commands
        nfit, address-range-scrub: introduce nfit_spa->ars_state
        libnvdimm: add an api to cast a 'struct nd_region' to its 'struct device'
        nfit, address-range-scrub: fix scrub in-progress reporting
        dax, dm: allow device-mapper to operate without dax support
        dax: introduce CONFIG_DAX_DRIVER
        fs, dax: use page->mapping to warn if truncate collides with a busy page
        ext2, dax: introduce ext2_dax_aops
        ...
      9f3a0941
    • Linus Torvalds's avatar
      Merge tag 'rtc-4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux · fbe173e3
      Linus Torvalds authored
      Pull RTC updates from Alexandre Belloni:
       "This contains a few series that have been in preparation for a while
        and that will help systems with RTCs that will fail in 2038, 2069 or
        2100.
      
        Subsystem:
         - Add tracepoints
         - Rework of the RTC/nvmem API to allow drivers to discard struct
           nvmem_config after registration
         - New range API, drivers can now expose the useful range of the RTC
         - New offset API the core is now able to add an offset to the RTC
           time, modifying the supported range.
         - Multiple rtc_time64_to_tm fixes
         - Handle time_t overflow on 32 bit platforms in the core instead of
           letting drivers do crazy things.
         - remove rtc_control API
      
        New driver:
         - Intersil ISL12026
      
        Drivers:
         - Drivers exposing the RTC non volatile memory have been converted to
           use nvmem
         - Removed useless time and date validation
         - Removed an indirection pattern that was a cargo cult from ancient
           drivers
         - Removed VLA usage
         - Fixed a possible race condition in probe functions
         - AB8540 support is dropped from ab8500
         - pcf85363 now has alarm support"
      
      * tag 'rtc-4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux: (128 commits)
        rtc: snvs: Fix usage of snvs_rtc_enable
        rtc: mt7622: fix module autoloading for OF platform drivers
        rtc: isl12022: use true and false for boolean values
        rtc: ab8500: Drop AB8540 support
        rtc: remove a warning during scripts/kernel-doc step
        rtc: 88pm860x: remove artificial limitation
        rtc: 88pm80x: remove artificial limitation
        rtc: st-lpc: remove artificial limitation
        rtc: mrst: remove artificial limitation
        rtc: mv: remove artificial limitation
        rtc: hctosys: Ensure system time doesn't overflow time_t
        parisc: time: stop validating rtc_time in .read_time
        rtc: pcf85063: fix clearing bits in pcf85063_start_clock
        rtc: at91sam9: Set name of regmap_config
        rtc: s5m: Remove VLA usage
        rtc: s5m: Move enum from rtc.h to rtc-s5m.c
        rtc: remove VLA usage
        rtc: Add useful timestamp definitions
        rtc: Add one offset seconds to expand RTC range
        rtc: Factor out the RTC range validation into rtc_valid_range()
        ...
      fbe173e3
    • Linus Torvalds's avatar
      Merge tag 'fbdev-v4.17' of git://github.com/bzolnier/linux · 5e630afd
      Linus Torvalds authored
      Pull fbdev updates from Bartlomiej Zolnierkiewicz:
       "There is nothing really major here, just a couple of small bugfixes,
        improvements and cleanups:
      
         - make it possible to load radeonfb driver when offb driver is loaded
           first (Mathieu Malaterre)
      
         - fix memory leak in offb driver (Mathieu Malaterre)
      
         - fix unaligned access in udlfb driver (Ladislav Michl)
      
         - convert atmel_lcdfb driver to use GPIO descriptors (Ludovic
           Desroches)
      
         - avoid mismatched prototypes in sisfb driver (Arnd Bergmann)
      
         - remove VLA usage from viafb driver (Gustavo A. R. Silva)
      
         - add missing help text to FB_I810_I2 config option (Ulf Magnusson)
      
         - misc fixes (Gustavo A. R. Silva, Colin Ian King, Markus Elfring)
      
         - remove dead code from s3c-fb driver for Exynos and S5PV210
           platforms
      
         - misc cleanups (Corentin Labbe, Ladislav Michl, Ulf Magnusson,
           Vladimir Zapolskiy, Markus Elfring)"
      
      * tag 'fbdev-v4.17' of git://github.com/bzolnier/linux: (32 commits)
        video: fbdev: s3c-fb: remove dead platform code for Exynos and S5PV210 platforms
        video: au1100fb: Delete an unnecessary variable initialisation in au1100fb_drv_probe()
        video: au1100fb: Improve a size determination in au1100fb_drv_probe()
        video: au1100fb: Delete an error message for a failed memory allocation in au1100fb_drv_probe()
        video/console/sticore: Delete an error message for a failed memory allocation in sti_try_rom_generic()
        video: ARM CLCD: Improve a size determination in clcdfb_probe()
        video: ARM CLCD: Delete an error message for a failed memory allocation in clcdfb_probe()
        video: matroxfb: Delete an error message for a failed memory allocation in matroxfb_crtc2_probe()
        video: s3c-fb: Improve a size determination in s3c_fb_probe()
        video: s3c-fb: Delete an error message for a failed memory allocation in s3c_fb_probe()
        video: fsl-diu-fb: Delete an error message for a failed memory allocation in fsl_diu_init()
        video: ssd1307fb: Improve a size determination in ssd1307fb_probe()
        video: smscufx: Delete an error message for a failed memory allocation in ufx_realloc_framebuffer()
        video: smscufx: Return an error code only as a constant in ufx_realloc_framebuffer()
        video: smscufx: Less checks in ufx_usb_probe() after error detection
        video: udlfb: Return an error code only as a constant in dlfb_realloc_framebuffer()
        video/fbdev/stifb: Delete an error message for a failed memory allocation in stifb_init_fb()
        video/fbdev/stifb: Return -ENOMEM after a failed kzalloc() in stifb_init_fb()
        video: fbdev: aty128fb: use true and false for boolean values
        fbdev: aty: fix missing indentation in if statement
        ...
      5e630afd
    • Linus Torvalds's avatar
      Merge tag 'sound-fix-4.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound · 7aa1cf25
      Linus Torvalds authored
      Pull sound fixes from Takashi Iwai:
       "The main purpose of this pull request is a fix for a regression in the
        recent PCM OSS emulation code that may lead to RCU stall. Since
        syzkaller hits this too often, I send the pull request now with a
        minimal collection. Possibly another pull request may follow before
        RC1.
      
        The other fixes here are for USB-audio class 2 and 3 to improve the
        parser for the clock descriptors. These are rather cleanups but good
        for security, too.
      
        Last but not least, another included fix is the trivial one to remove
        superfluous WARN_ON() that annoyed syzbot"
      
      * tag 'sound-fix-4.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound:
        ALSA: pcm: Remove WARN_ON() at snd_pcm_hw_params() error
        ALSA: pcm: Fix endless loop for XRUN recovery in OSS emulation
        ALSA: usb-audio: Add sanity checks in UAC3 clock parsers
        ALSA: usb-audio: More strict sanity checks for clock parsers
        ALSA: usb-audio: Refactor clock finder helpers
      7aa1cf25
    • Linus Torvalds's avatar
      Merge tag 'media/v4.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media · d3626005
      Linus Torvalds authored
      Pull media fixes from Mauro Carvalho Chehab:
       "A series of media updates/fixes for 4.17.
      
        There are two important core fix patches in this series:
      
         - A regression fix on Kernel 4.16 with causes it to not work with
           some input devices that depend on media core
      
         - A fix at compat32 bits with causes it to OOPS on overlay, and
           affects the Kernels where the CVE-2017-13166 was backported
      
        The remaining ones are other random fixes at the documentation and on
        drivers.
      
        The biggest part of this series is a set of 18 patches for the Intel
        atomisp driver. Currently, it produces hundreds of warnings/errors on
        sparse/smatch, causing me to sometimes ignore new warnings on other
        drivers that are not so broken. This driver is on really poor state,
        even for staging standards: it has several layers of abstraction on
        it, and it supports two different hardware. Selecting between them
        require to add a define (there isn't even a Kconfig option for such
        purpose). Just on this smatch cleanup, I could easily get rid of 8
        "do-nothing" files. So, I'm seriously considering its removal from
        upstream, if I don't see any real work on addressing the problems
        there along this year"
      
      * tag 'media/v4.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media: (48 commits)
        media: v4l2-core: fix size of devnode_nums[] bitarray
        media: v4l2-compat-ioctl32: don't oops on overlay
        media: i2c: adv748x: afe: fix sparse warning
        media: extended-controls.rst: transmitter -> receiver
        media: staging: atomisp: stop duplicating input format types
        media: staging: atomisp: get rid of an unused var
        media: staging: atomisp: stop mixing enum types
        media: staging: atomisp: get rid of some static warnings
        media: staging: atomisp: use %p to print pointers
        media: staging: atomisp: remove an useless check
        media: staging: atomisp: avoid a warning if 32 bits build
        media: staging: atomisp: don't access a NULL var
        media: staging: atomisp: Get rid of *default.host.[ch]
        media: staging: atomisp: get rid of an unused function
        media: staging: atomisp: remove unused set_pd_base()
        media: staging: atomisp: fix endianess issues
        media: staging: atomisp: add a missing include
        media: staging: atomisp: get rid of stupid statements
        media: staging: atomisp: declare static vars as such
        media: staging: atomisp: ia_css_output.host: don't use var before check
        ...
      d3626005
    • Luc Van Oostenryck's avatar
      c6x: pass endianness info to sparse · 85fa2cc5
      Luc Van Oostenryck authored
      c6x depends on the macro '_BIG_ENDIAN' being defined or not
      to correctly select or define endian-specific macros, structures
      or pieces of code.
      
      This macro is predefined by the compiler but sparse knows nothing
      about it and thus may pre-process files differently from what
      gcc would.
      
      Fix this by passing '-D_BIG_ENDIAN' when compiling a big-endian
      kernel, like GCC would have done.
      
      To: Mark Salter <msalter@redhat.com>
      To: Aurelien Jacquiot <a-jacquiot@ti.com>
      CC: linux-c6x-dev@linux-c6x.org
      Signed-off-by: default avatarLuc Van Oostenryck <luc.vanoostenryck@gmail.com>
      Signed-off-by: default avatarMark Salter <msalter@redhat.com>
      85fa2cc5
    • Randy Dunlap's avatar
      c6x: fix platforms/plldata.c get_coreid build error · 319938bd
      Randy Dunlap authored
      Fix build error reported by the 0day bot by including the header
      file for that macro.
      
      Fixes this build error: (should fix; not tested)
      arch/c6x/platforms/plldata.c: In function 'c6472_setup_clocks':
      arch/c6x/platforms/plldata.c:279:33: error: implicit declaration of function 'get_coreid'; did you mean 'get_order'? [-Werror=implicit-function-declaration]
            c6x_core_clk.parent = &sysclks[get_coreid() + 1];
      Reported-by: default avatarFengguang Wu <fengguang.wu@intel.com>
      Cc: Mark Salter <msalter@redhat.com>
      Cc: Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
      Cc: linux-c6x-dev@linux-c6x.org
      Cc: Ingo Molnar <mingo@kernel.org>
      Signed-off-by: default avatarRandy Dunlap <rdunlap@infradead.org>
      Signed-off-by: default avatarMark Salter <msalter@redhat.com>
      319938bd
    • Jérémy Lefaure's avatar
      c6x: remove unused KTHREAD_SIZE definition · f5ad907e
      Jérémy Lefaure authored
      KTHREAD_SIZE has never been used since it has been defined for c6x arch.
      Let's remove this useless definition.
      Signed-off-by: default avatarJérémy Lefaure <jeremy.lefaure@lse.epita.fr>
      Signed-off-by: default avatarMark Salter <msalter@redhat.com>
      f5ad907e
    • Linus Torvalds's avatar
      Merge tag 'hwmon-for-linus-v4.17' of... · 71219b34
      Linus Torvalds authored
      Merge tag 'hwmon-for-linus-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
      
      Pull hwmon updates from Guenter Roeck:
      
       - added chip support: new Centaur CPUs, ADM1272, NCT6796D
      
       - ucd9000: added debugfs attributes, gpio support
      
       - cleanup and minor bug fixes
      
      * tag 'hwmon-for-linus-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging:
        hwmon: (via-cputemp) support new centaur CPUs
        hwmon: (nct6775) Fix writing pwmX_mode
        hwmon: (lm92) Add max6635 to lm92_id[]
        hwmon: (pmbus/adm1275) Add support for ADM1272
        hwmon: (lm92) Do not try to detect MAX6635
        hwmon: (ucd9000) Add debugfs attributes to provide mfr_status
        hwmon: (ucd9000) Add gpio chip interface
        hwmon: (nct6775) Add support for NCT6796D
        hwmon: (nct6775) Initialize boolean variables with declaration
        hwmon: (nct6775) Improve fan6/pwm6 support
        hwmon: (nct6775) Use NUM_FAN consistently
        hwmon: (g762) handle cleanup with devm_add_action
        hwmon: (sht3x) Update data sheet URL
        hwmon: (sht21) Update data sheet URLs
        hwmon: (pmbus/adm1275) Accept negative page register values
        hwmon: (pmbus/max8688) Accept negative page register values
      71219b34
    • Linus Torvalds's avatar
      Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net · c18bb396
      Linus Torvalds authored
      Pull networking fixes from David Miller:
      
       1) The sockmap code has to free socket memory on close if there is
          corked data, from John Fastabend.
      
       2) Tunnel names coming from userspace need to be length validated. From
          Eric Dumazet.
      
       3) arp_filter() has to take VRFs properly into account, from Miguel
          Fadon Perlines.
      
       4) Fix oops in error path of tcf_bpf_init(), from Davide Caratti.
      
       5) Missing idr_remove() in u32_delete_key(), from Cong Wang.
      
       6) More syzbot stuff. Several use of uninitialized value fixes all
          over, from Eric Dumazet.
      
       7) Do not leak kernel memory to userspace in sctp, also from Eric
          Dumazet.
      
       8) Discard frames from unused ports in DSA, from Andrew Lunn.
      
       9) Fix DMA mapping and reset/failover problems in ibmvnic, from Thomas
          Falcon.
      
      10) Do not access dp83640 PHY registers prematurely after reset, from
          Esben Haabendal.
      
      * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (46 commits)
        vhost-net: set packet weight of tx polling to 2 * vq size
        net: thunderx: rework mac addresses list to u64 array
        inetpeer: fix uninit-value in inet_getpeer
        dp83640: Ensure against premature access to PHY registers after reset
        devlink: convert occ_get op to separate registration
        ARM: dts: ls1021a: Specify TBIPA register address
        net/fsl_pq_mdio: Allow explicit speficition of TBIPA address
        ibmvnic: Do not reset CRQ for Mobility driver resets
        ibmvnic: Fix failover case for non-redundant configuration
        ibmvnic: Fix reset scheduler error handling
        ibmvnic: Zero used TX descriptor counter on reset
        ibmvnic: Fix DMA mapping mistakes
        tipc: use the right skb in tipc_sk_fill_sock_diag()
        sctp: sctp_sockaddr_af must check minimal addr length for AF_INET6
        net: dsa: Discard frames from unused ports
        sctp: do not leak kernel memory to user space
        soreuseport: initialise timewait reuseport field
        ipv4: fix uninit-value in ip_route_output_key_hash_rcu()
        dccp: initialize ireq->ir_mark
        net: fix uninit-value in __hw_addr_add_ex()
        ...
      c18bb396
  3. 09 Apr, 2018 18 commits
    • Linus Torvalds's avatar
      Merge branch 'work.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs · fd3b36d2
      Linus Torvalds authored
      Pull vfs namei updates from Al Viro:
      
       - make lookup_one_len() safe with parent locked only shared(incoming
         afs series wants that)
      
       - fix of getname_kernel() regression from 2015 (-stable fodder, that
         one).
      
      * 'work.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
        getname_kernel() needs to make sure that ->name != ->iname in long case
        make lookup_one_len() safe to use with directory locked shared
        new helper: __lookup_slow()
        merge common parts of lookup_one_len{,_unlocked} into common helper
      fd3b36d2
    • Linus Torvalds's avatar
      Merge tag 'for-linus-4.17-ofs' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux · 8ea4a5d8
      Linus Torvalds authored
      Pull orangefs updates from Mike Marshall:
       "Fixes and cleanups:
      
         - Documentation cleanups
      
         - removal of unused code
      
         - make some structs static
      
         - implement Orangefs vm_operations fault callout
      
         - eliminate two single-use functions and put their cleaned up code in
           line.
      
         - replace a vmalloc/memset instance with vzalloc
      
         - fix a race condition bug in wait code"
      
      * tag 'for-linus-4.17-ofs' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux:
        Orangefs: documentation updates
        orangefs: document package install and xfstests procedure
        orangefs: remove unused code
        orangefs: make several *_operations structs static
        orangefs: implement vm_ops->fault
        orangefs: open code short single-use functions
        orangefs: replace vmalloc and memset with vzalloc
        orangefs: bug fix for a race condition when getting a slot
      8ea4a5d8
    • Linus Torvalds's avatar
      Merge tag 'pstore-v4.17-rc1-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux · 190f2ace
      Linus Torvalds authored
      Pull pstore fix from Kees Cook:
       "Fix another compression Kconfig combination missed in testing (Tobias
        Regnery)"
      
      * tag 'pstore-v4.17-rc1-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
        pstore: fix crypto dependencies without compression
      190f2ace
    • Stephen Smalley's avatar
      selinux: fix missing dput() before selinuxfs unmount · fd40ffc7
      Stephen Smalley authored
      Commit 0619f0f5 ("selinux: wrap selinuxfs state") triggers a BUG
      when SELinux is runtime-disabled (i.e. systemd or equivalent disables
      SELinux before initial policy load via /sys/fs/selinux/disable based on
      /etc/selinux/config SELINUX=disabled).
      
      This does not manifest if SELinux is disabled via kernel command line
      argument or if SELinux is enabled (permissive or enforcing).
      
      Before:
        SELinux:  Disabled at runtime.
        BUG: Dentry 000000006d77e5c7{i=17,n=null}  still in use (1) [unmount of selinuxfs selinuxfs]
      
      After:
        SELinux:  Disabled at runtime.
      
      Fixes: 0619f0f5 ("selinux: wrap selinuxfs state")
      Reported-by: default avatarTetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
      Reported-by: default avatarDmitry Vyukov <dvyukov@google.com>
      Signed-off-by: default avatarStephen Smalley <sds@tycho.nsa.gov>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      fd40ffc7
    • Linus Torvalds's avatar
      Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm · d8312a3f
      Linus Torvalds authored
      Pull kvm updates from Paolo Bonzini:
       "ARM:
         - VHE optimizations
      
         - EL2 address space randomization
      
         - speculative execution mitigations ("variant 3a", aka execution past
           invalid privilege register access)
      
         - bugfixes and cleanups
      
        PPC:
         - improvements for the radix page fault handler for HV KVM on POWER9
      
        s390:
         - more kvm stat counters
      
         - virtio gpu plumbing
      
         - documentation
      
         - facilities improvements
      
        x86:
         - support for VMware magic I/O port and pseudo-PMCs
      
         - AMD pause loop exiting
      
         - support for AMD core performance extensions
      
         - support for synchronous register access
      
         - expose nVMX capabilities to userspace
      
         - support for Hyper-V signaling via eventfd
      
         - use Enlightened VMCS when running on Hyper-V
      
         - allow userspace to disable MWAIT/HLT/PAUSE vmexits
      
         - usual roundup of optimizations and nested virtualization bugfixes
      
        Generic:
         - API selftest infrastructure (though the only tests are for x86 as
           of now)"
      
      * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (174 commits)
        kvm: x86: fix a prototype warning
        kvm: selftests: add sync_regs_test
        kvm: selftests: add API testing infrastructure
        kvm: x86: fix a compile warning
        KVM: X86: Add Force Emulation Prefix for "emulate the next instruction"
        KVM: X86: Introduce handle_ud()
        KVM: vmx: unify adjacent #ifdefs
        x86: kvm: hide the unused 'cpu' variable
        KVM: VMX: remove bogus WARN_ON in handle_ept_misconfig
        Revert "KVM: X86: Fix SMRAM accessing even if VM is shutdown"
        kvm: Add emulation for movups/movupd
        KVM: VMX: raise internal error for exception during invalid protected mode state
        KVM: nVMX: Optimization: Dont set KVM_REQ_EVENT when VMExit with nested_run_pending
        KVM: nVMX: Require immediate-exit when event reinjected to L2 and L1 event pending
        KVM: x86: Fix misleading comments on handling pending exceptions
        KVM: x86: Rename interrupt.pending to interrupt.injected
        KVM: VMX: No need to clear pending NMI/interrupt on inject realmode interrupt
        x86/kvm: use Enlightened VMCS when running on Hyper-V
        x86/hyper-v: detect nested features
        x86/hyper-v: define struct hv_enlightened_vmcs and clean field bits
        ...
      d8312a3f
    • Dan Williams's avatar
      e13e75b8
    • Dan Williams's avatar
      1ed41b56
    • Linus Torvalds's avatar
      Fix subtle macro variable shadowing in min_not_zero() · e9092d0d
      Linus Torvalds authored
      Commit 3c8ba0d6 ("kernel.h: Retain constant expression output for
      max()/min()") rewrote our min/max macros to be very clever, but in the
      meantime resurrected a variable name shadow issue that we had had
      previously fixed in commit 589a9785 ("min/max: remove sparse
      warnings when they're nested").
      
      That commit talks about the sparse warnings that this shadowing causes,
      which we ignored as just a minor annoyance.  But it turns out that the
      sparse warning is the least of our problems.  We actually have a real
      bug due to the shadowing through the interaction with "min_not_zero()",
      which ends up doing
      
         min(__x, __y)
      
      internally, and then the new declaration of "__x" and "__y" as new
      variables in __cmp_once() results in a complete mess of an expression,
      and "min_not_zero()" doesn't work at all.
      
      For some odd reason, this only ever caused (reported) problems on s390,
      even though it is a generic issue and most of the (obviously successful)
      testing of the problematic commit had happened on other architectures.
      
      Quoting Sebastian Ott:
       "What happened is that the bio build by the partition detection code
        was attempted to be split by the block layer because the block queue
        had a max_sector setting of 0. blk_queue_max_hw_sectors uses
        min_not_zero."
      
      So re-introduce the use of __UNIQUE_ID() to make sure that the min/max
      macros do not have these kinds of clashes.
      
      [ That said, __UNIQUE_ID() itself has several issues that make it less
        than wonderful.
      
        In particular, the "uniqueness" has a fallback on the line number,
        which means that it's not actually unique in more complex cases if you
        don't build with gcc or clang (which have working unique counters that
        aren't tied to line numbers).
      
        That historical broken fallback also means that we have that pointless
        "prefix" argument that doesn't actually make much sense _except_ for
        the known-broken case. Oh well. ]
      
      Fixes: 3c8ba0d6 ("kernel.h: Retain constant expression output for max()/min()")
      Reported-and-tested-by: default avatarSebastian Ott <sebott@linux.vnet.ibm.com>
      Cc: Kees Cook <keescook@chromium.org>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      e9092d0d
    • Linus Torvalds's avatar
      Merge branch 'for-linus-sa1100' of git://git.armlinux.org.uk/~rmk/linux-arm · 7886e8aa
      Linus Torvalds authored
      Pull ARM SA1100 updates from Russell King:
       "We have support for arbitary MMIO registers providing platform GPIOs,
        which allows us to abstract some of the SA11x0 CF support.
      
        This set of updates makes that change"
      
      * 'for-linus-sa1100' of git://git.armlinux.org.uk/~rmk/linux-arm:
        ARM: sa1100/simpad: switch simpad CF to use gpiod APIs
        ARM: sa1100/shannon: convert to generic CF sockets
        ARM: sa1100/nanoengine: convert to generic CF sockets
        ARM: sa1100/h3xxx: switch h3xxx PCMCIA to use gpiod APIs
        ARM: sa1100/cerf: convert to generic CF sockets
        ARM: sa1100/assabet: convert to generic CF sockets
        ARM: sa1100: provide infrastructure to support generic CF sockets
        pcmcia: sa1100: provide generic CF support
      7886e8aa
    • Linus Torvalds's avatar
      Merge branch 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm · 4a1e0052
      Linus Torvalds authored
      Pull ARM updates from Russell King:
       "A number of core ARM changes:
      
         - Refactoring linker script by Nicolas Pitre
      
         - Enable source fortification
      
         - Add support for Cortex R8"
      
      * 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm:
        ARM: decompressor: fix warning introduced in fortify patch
        ARM: 8751/1: Add support for Cortex-R8 processor
        ARM: 8749/1: Kconfig: Add ARCH_HAS_FORTIFY_SOURCE
        ARM: simplify and fix linker script for TCM
        ARM: linker script: factor out TCM bits
        ARM: linker script: factor out vectors and stubs
        ARM: linker script: factor out unwinding table sections
        ARM: linker script: factor out stuff for the .text section
        ARM: linker script: factor out stuff for the DISCARD section
        ARM: linker script: factor out some common definitions between XIP and non-XIP
      4a1e0052
    • Linus Torvalds's avatar
      Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gerg/m68knommu · 2025fef0
      Linus Torvalds authored
      Pull m68knommu update from Greg Ungerer:
       "Only a single fix to set the DMA masks in the ColdFire FEC platform
        data structure.
      
        This stops the warning from dma-mapping.h at boot time"
      
      * 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gerg/m68knommu:
        m68k: set dma and coherent masks for platform FEC ethernets
      2025fef0
    • Linus Torvalds's avatar
      Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha · 5148408a
      Linus Torvalds authored
      Pull alpha updates from Matt Turner:
       "A few small changes for alpha"
      
      * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha:
        alpha: io: reorder barriers to guarantee writeX() and iowriteX() ordering
        alpha: Implement CPU vulnerabilities sysfs functions.
        alpha: rtc: stop validating rtc_time in .read_time
        alpha: rtc: remove unused set_mmss ops
      5148408a
    • Dan Williams's avatar
      libnvdimm, of_pmem: workaround OF_NUMA=n build error · 291717b6
      Dan Williams authored
      Stephen reports that an x86 allmodconfig build fails to build the
      of_pmem driver due to a missing definition of of_node_to_nid(). That
      helper is currently only exported in the OF_NUMA=y case. In other cases,
      ppc and sparc, it is a weak symbol, and outside of those platforms it is
      a static inline.
      
      Until an OF_NUMA=n configuration can reliably support usage of
      of_node_to_nid() in modules across architectures, mark this driver as
      'bool' instead of 'tristate'.
      
      Cc: Rob Herring <robh@kernel.org>
      Cc: Oliver O'Halloran <oohall@gmail.com>
      Reported-by: default avatarStephen Rothwell <sfr@canb.auug.org.au>
      Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
      291717b6
    • Linus Torvalds's avatar
      Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux · becdce1c
      Linus Torvalds authored
      Pull s390 updates from Martin Schwidefsky:
      
       - Improvements for the spectre defense:
          * The spectre related code is consolidated to a single file
            nospec-branch.c
          * Automatic enable/disable for the spectre v2 defenses (expoline vs.
            nobp)
          * Syslog messages for specve v2 are added
          * Enable CONFIG_GENERIC_CPU_VULNERABILITIES and define the attribute
            functions for spectre v1 and v2
      
       - Add helper macros for assembler alternatives and use them to shorten
         the code in entry.S.
      
       - Add support for persistent configuration data via the SCLP Store Data
         interface. The H/W interface requires a page table that uses 4K pages
         only, the code to setup such an address space is added as well.
      
       - Enable virtio GPU emulation in QEMU. To do this the depends
         statements for a few common Kconfig options are modified.
      
       - Add support for format-3 channel path descriptors and add a binary
         sysfs interface to export the associated utility strings.
      
       - Add a sysfs attribute to control the IFCC handling in case of
         constant channel errors.
      
       - The vfio-ccw changes from Cornelia.
      
       - Bug fixes and cleanups.
      
      * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (40 commits)
        s390/kvm: improve stack frame constants in entry.S
        s390/lpp: use assembler alternatives for the LPP instruction
        s390/entry.S: use assembler alternatives
        s390: add assembler macros for CPU alternatives
        s390: add sysfs attributes for spectre
        s390: report spectre mitigation via syslog
        s390: add automatic detection of the spectre defense
        s390: move nobp parameter functions to nospec-branch.c
        s390/cio: add util_string sysfs attribute
        s390/chsc: query utility strings via fmt3 channel path descriptor
        s390/cio: rename struct channel_path_desc
        s390/cio: fix unbind of io_subchannel_driver
        s390/qdio: split up CCQ handling for EQBS / SQBS
        s390/qdio: don't retry EQBS after CCQ 96
        s390/qdio: restrict buffer merging to eligible devices
        s390/qdio: don't merge ERROR output buffers
        s390/qdio: simplify math in get_*_buffer_frontier()
        s390/decompressor: trim uncompressed image head during the build
        s390/crypto: Fix kernel crash on aes_s390 module remove.
        s390/defkeymap: fix global init to zero
        ...
      becdce1c
    • Takashi Iwai's avatar
      ALSA: pcm: Remove WARN_ON() at snd_pcm_hw_params() error · e1a3a981
      Takashi Iwai authored
      snd_pcm_hw_params() (more exactly snd_pcm_hw_params_choose()) contains
      a check of the return error from snd_pcm_hw_param_first() and _last()
      with snd_BUG_ON() -- i.e. it may trigger WARN_ON() depending on the
      kconfig.
      
      This was a valid check in the past, as these functions shouldn't
      return any error if the parameters have been already refined via
      snd_pcm_hw_refine() beforehand.  However, the recent rewrite
      introduced a kmalloc() in snd_pcm_hw_refine() for removing VLA, and
      this brought a possibility to trigger an error.  As a result, syzbot
      caught lots of superfluous kernel WARN_ON() and paniced via fault
      injection.
      
      As the WARN_ON() is no longer valid with the introduction of
      kmalloc(), let's drop snd_BUG_ON() check, in order to make the world
      peaceful place again.
      
      Reported-by: syzbot+803e0047ac3a3096bb4f@syzkaller.appspotmail.com
      Fixes: 5730f9f7 ("ALSA: pcm: Remove VLA usage")
      Signed-off-by: default avatarTakashi Iwai <tiwai@suse.de>
      e1a3a981
    • haibinzhang(张海斌)'s avatar
      vhost-net: set packet weight of tx polling to 2 * vq size · a2ac9990
      haibinzhang(张海斌) authored
      handle_tx will delay rx for tens or even hundreds of milliseconds when tx busy
      polling udp packets with small length(e.g. 1byte udp payload), because setting
      VHOST_NET_WEIGHT takes into account only sent-bytes but no single packet length.
      
      Ping-Latencies shown below were tested between two Virtual Machines using
      netperf (UDP_STREAM, len=1), and then another machine pinged the client:
      
      vq size=256
      Packet-Weight   Ping-Latencies(millisecond)
                         min      avg       max
      Origin           3.319   18.489    57.303
      64               1.643    2.021     2.552
      128              1.825    2.600     3.224
      256              1.997    2.710     4.295
      512              1.860    3.171     4.631
      1024             2.002    4.173     9.056
      2048             2.257    5.650     9.688
      4096             2.093    8.508    15.943
      
      vq size=512
      Packet-Weight   Ping-Latencies(millisecond)
                         min      avg       max
      Origin           6.537   29.177    66.245
      64               2.798    3.614     4.403
      128              2.861    3.820     4.775
      256              3.008    4.018     4.807
      512              3.254    4.523     5.824
      1024             3.079    5.335     7.747
      2048             3.944    8.201    12.762
      4096             4.158   11.057    19.985
      
      Seems pretty consistent, a small dip at 2 VQ sizes.
      Ring size is a hint from device about a burst size it can tolerate. Based on
      benchmarks, set the weight to 2 * vq size.
      
      To evaluate this change, another tests were done using netperf(RR, TX) between
      two machines with Intel(R) Xeon(R) Gold 6133 CPU @ 2.50GHz, and vq size was
      tweaked through qemu. Results shown below does not show obvious changes.
      
      vq size=256 TCP_RR                vq size=512 TCP_RR
      size/sessions/+thu%/+normalize%   size/sessions/+thu%/+normalize%
         1/       1/  -7%/        -2%      1/       1/   0%/        -2%
         1/       4/  +1%/         0%      1/       4/  +1%/         0%
         1/       8/  +1%/        -2%      1/       8/   0%/        +1%
        64/       1/  -6%/         0%     64/       1/  +7%/        +3%
        64/       4/   0%/        +2%     64/       4/  -1%/        +1%
        64/       8/   0%/         0%     64/       8/  -1%/        -2%
       256/       1/  -3%/        -4%    256/       1/  -4%/        -2%
       256/       4/  +3%/        +4%    256/       4/  +1%/        +2%
       256/       8/  +2%/         0%    256/       8/  +1%/        -1%
      
      vq size=256 UDP_RR                vq size=512 UDP_RR
      size/sessions/+thu%/+normalize%   size/sessions/+thu%/+normalize%
         1/       1/  -5%/        +1%      1/       1/  -3%/        -2%
         1/       4/  +4%/        +1%      1/       4/  -2%/        +2%
         1/       8/  -1%/        -1%      1/       8/  -1%/         0%
        64/       1/  -2%/        -3%     64/       1/  +1%/        +1%
        64/       4/  -5%/        -1%     64/       4/  +2%/         0%
        64/       8/   0%/        -1%     64/       8/  -2%/        +1%
       256/       1/  +7%/        +1%    256/       1/  -7%/         0%
       256/       4/  +1%/        +1%    256/       4/  -3%/        -4%
       256/       8/  +2%/        +2%    256/       8/  +1%/        +1%
      
      vq size=256 TCP_STREAM            vq size=512 TCP_STREAM
      size/sessions/+thu%/+normalize%   size/sessions/+thu%/+normalize%
        64/       1/   0%/        -3%     64/       1/   0%/         0%
        64/       4/  +3%/        -1%     64/       4/  -2%/        +4%
        64/       8/  +9%/        -4%     64/       8/  -1%/        +2%
       256/       1/  +1%/        -4%    256/       1/  +1%/        +1%
       256/       4/  -1%/        -1%    256/       4/  -3%/         0%
       256/       8/  +7%/        +5%    256/       8/  -3%/         0%
       512/       1/  +1%/         0%    512/       1/  -1%/        -1%
       512/       4/  +1%/        -1%    512/       4/   0%/         0%
       512/       8/  +7%/        -5%    512/       8/  +6%/        -1%
      1024/       1/   0%/        -1%   1024/       1/   0%/        +1%
      1024/       4/  +3%/         0%   1024/       4/  +1%/         0%
      1024/       8/  +8%/        +5%   1024/       8/  -1%/         0%
      2048/       1/  +2%/        +2%   2048/       1/  -1%/         0%
      2048/       4/  +1%/         0%   2048/       4/   0%/        -1%
      2048/       8/  -2%/         0%   2048/       8/   5%/        -1%
      4096/       1/  -2%/         0%   4096/       1/  -2%/         0%
      4096/       4/  +2%/         0%   4096/       4/   0%/         0%
      4096/       8/  +9%/        -2%   4096/       8/  -5%/        -1%
      Acked-by: default avatarMichael S. Tsirkin <mst@redhat.com>
      Signed-off-by: default avatarHaibin Zhang <haibinzhang@tencent.com>
      Signed-off-by: default avatarYunfang Tai <yunfangtai@tencent.com>
      Signed-off-by: default avatarLidong Chen <lidongchen@tencent.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      a2ac9990
    • Vadim Lomovtsev's avatar
      net: thunderx: rework mac addresses list to u64 array · 9b5c4dfb
      Vadim Lomovtsev authored
      It is too expensive to pass u64 values via linked list, instead
      allocate array for them by overall number of mac addresses from netdev.
      
      This eventually removes multiple kmalloc() calls, aviod memory
      fragmentation and allow to put single null check on kmalloc
      return value in order to prevent a potential null pointer dereference.
      
      Addresses-Coverity-ID: 1467429 ("Dereference null return value")
      Fixes: 37c3347e ("net: thunderx: add ndo_set_rx_mode callback implementation for VF")
      Reported-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
      Signed-off-by: default avatarVadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      9b5c4dfb
    • Eric Dumazet's avatar
      inetpeer: fix uninit-value in inet_getpeer · b6a37e5e
      Eric Dumazet authored
      syzbot/KMSAN reported that p->dtime was read while it was
      not yet initialized in :
      
      	delta = (__u32)jiffies - p->dtime;
      	if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
      		gc_stack[i] = NULL;
      
      This is a false positive, because the inetpeer wont be erased
      from rb-tree if the refcount_dec_if_one(&p->refcnt) does not
      succeed. And this wont happen before first inet_putpeer() call
      for this inetpeer has been done, and ->dtime field is written
      exactly before the refcount_dec_and_test(&p->refcnt).
      
      The KMSAN report was :
      
      BUG: KMSAN: uninit-value in inet_peer_gc net/ipv4/inetpeer.c:163 [inline]
      BUG: KMSAN: uninit-value in inet_getpeer+0x1567/0x1e70 net/ipv4/inetpeer.c:228
      CPU: 0 PID: 9494 Comm: syz-executor5 Not tainted 4.16.0+ #82
      Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
      Call Trace:
       __dump_stack lib/dump_stack.c:17 [inline]
       dump_stack+0x185/0x1d0 lib/dump_stack.c:53
       kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
       __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676
       inet_peer_gc net/ipv4/inetpeer.c:163 [inline]
       inet_getpeer+0x1567/0x1e70 net/ipv4/inetpeer.c:228
       inet_getpeer_v4 include/net/inetpeer.h:110 [inline]
       icmpv4_xrlim_allow net/ipv4/icmp.c:330 [inline]
       icmp_send+0x2b44/0x3050 net/ipv4/icmp.c:725
       ip_options_compile+0x237c/0x29f0 net/ipv4/ip_options.c:472
       ip_rcv_options net/ipv4/ip_input.c:284 [inline]
       ip_rcv_finish+0xda8/0x16d0 net/ipv4/ip_input.c:365
       NF_HOOK include/linux/netfilter.h:288 [inline]
       ip_rcv+0x119d/0x16f0 net/ipv4/ip_input.c:493
       __netif_receive_skb_core+0x47cf/0x4a80 net/core/dev.c:4562
       __netif_receive_skb net/core/dev.c:4627 [inline]
       netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701
       netif_receive_skb+0x230/0x240 net/core/dev.c:4725
       tun_rx_batched drivers/net/tun.c:1555 [inline]
       tun_get_user+0x6d88/0x7580 drivers/net/tun.c:1962
       tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
       do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776
       do_iter_write+0x30d/0xd40 fs/read_write.c:932
       vfs_writev fs/read_write.c:977 [inline]
       do_writev+0x3c9/0x830 fs/read_write.c:1012
       SYSC_writev+0x9b/0xb0 fs/read_write.c:1085
       SyS_writev+0x56/0x80 fs/read_write.c:1082
       do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x3d/0xa2
      RIP: 0033:0x455111
      RSP: 002b:00007fae0365cba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000014
      RAX: ffffffffffffffda RBX: 000000000000002e RCX: 0000000000455111
      RDX: 0000000000000001 RSI: 00007fae0365cbf0 RDI: 00000000000000fc
      RBP: 0000000020000040 R08: 00000000000000fc R09: 0000000000000000
      R10: 000000000000002e R11: 0000000000000293 R12: 00000000ffffffff
      R13: 0000000000000658 R14: 00000000006fc8e0 R15: 0000000000000000
      
      Uninit was created at:
       kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
       kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
       kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
       kmem_cache_alloc+0xaab/0xb90 mm/slub.c:2756
       inet_getpeer+0xed8/0x1e70 net/ipv4/inetpeer.c:210
       inet_getpeer_v4 include/net/inetpeer.h:110 [inline]
       ip4_frag_init+0x4d1/0x740 net/ipv4/ip_fragment.c:153
       inet_frag_alloc net/ipv4/inet_fragment.c:369 [inline]
       inet_frag_create net/ipv4/inet_fragment.c:385 [inline]
       inet_frag_find+0x7da/0x1610 net/ipv4/inet_fragment.c:418
       ip_find net/ipv4/ip_fragment.c:275 [inline]
       ip_defrag+0x448/0x67a0 net/ipv4/ip_fragment.c:676
       ip_check_defrag+0x775/0xda0 net/ipv4/ip_fragment.c:724
       packet_rcv_fanout+0x2a8/0x8d0 net/packet/af_packet.c:1447
       deliver_skb net/core/dev.c:1897 [inline]
       deliver_ptype_list_skb net/core/dev.c:1912 [inline]
       __netif_receive_skb_core+0x314a/0x4a80 net/core/dev.c:4545
       __netif_receive_skb net/core/dev.c:4627 [inline]
       netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701
       netif_receive_skb+0x230/0x240 net/core/dev.c:4725
       tun_rx_batched drivers/net/tun.c:1555 [inline]
       tun_get_user+0x6d88/0x7580 drivers/net/tun.c:1962
       tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
       do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776
       do_iter_write+0x30d/0xd40 fs/read_write.c:932
       vfs_writev fs/read_write.c:977 [inline]
       do_writev+0x3c9/0x830 fs/read_write.c:1012
       SYSC_writev+0x9b/0xb0 fs/read_write.c:1085
       SyS_writev+0x56/0x80 fs/read_write.c:1082
       do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x3d/0xa2
      Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
      Reported-by: default avatarsyzbot <syzkaller@googlegroups.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      b6a37e5e