1. 21 Dec, 2018 28 commits
  2. 17 Dec, 2018 12 commits
    • Greg Kroah-Hartman's avatar
      Linux 4.14.89 · 3beeb261
      Greg Kroah-Hartman authored
      3beeb261
    • Eric Dumazet's avatar
      tcp: lack of available data can also cause TSO defer · 4465b31b
      Eric Dumazet authored
      commit f9bfe4e6 upstream.
      
      tcp_tso_should_defer() can return true in three different cases :
      
       1) We are cwnd-limited
       2) We are rwnd-limited
       3) We are application limited.
      
      Neal pointed out that my recent fix went too far, since
      it assumed that if we were not in 1) case, we must be rwnd-limited
      
      Fix this by properly populating the is_cwnd_limited and
      is_rwnd_limited booleans.
      
      After this change, we can finally move the silly check for FIN
      flag only for the application-limited case.
      
      The same move for EOR bit will be handled in net-next,
      since commit 1c09f7d0 ("tcp: do not try to defer skbs
      with eor mark (MSG_EOR)") is scheduled for linux-4.21
      
      Tested by running 200 concurrent netperf -t TCP_RR -- -r 60000,100
      and checking none of them was rwnd_limited in the chrono_stat
      output from "ss -ti" command.
      
      Fixes: 41727549 ("tcp: Do not underestimate rwnd_limited")
      Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
      Suggested-by: default avatarNeal Cardwell <ncardwell@google.com>
      Reviewed-by: default avatarNeal Cardwell <ncardwell@google.com>
      Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
      Reviewed-by: default avatarYuchung Cheng <ycheng@google.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      4465b31b
    • Piotr Stankiewicz's avatar
      IB/hfi1: Fix an out-of-bounds access in get_hw_stats · 01a16601
      Piotr Stankiewicz authored
      commit 36d84219 upstream.
      
      When running with KASAN, the following trace is produced:
      
      [   62.535888]
      
      ==================================================================
      [   62.544930] BUG: KASAN: slab-out-of-bounds in
      gut_hw_stats+0x122/0x230 [hfi1]
      [   62.553856] Write of size 8 at addr ffff88080e8d6330 by task
      kworker/0:1/14
      
      [   62.565333] CPU: 0 PID: 14 Comm: kworker/0:1 Not tainted
      4.19.0-test-build-kasan+ #8
      [   62.575087] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS
      SE5C610.86B.01.01.0019.101220160604 10/12/2016
      [   62.587951] Workqueue: events work_for_cpu_fn
      [   62.594050] Call Trace:
      [   62.598023]  dump_stack+0xc6/0x14c
      [   62.603089]  ? dump_stack_print_info.cold.1+0x2f/0x2f
      [   62.610041]  ? kmsg_dump_rewind_nolock+0x59/0x59
      [   62.616615]  ? get_hw_stats+0x122/0x230 [hfi1]
      [   62.622985]  print_address_description+0x6c/0x23c
      [   62.629744]  ? get_hw_stats+0x122/0x230 [hfi1]
      [   62.636108]  kasan_report.cold.6+0x241/0x308
      [   62.642365]  get_hw_stats+0x122/0x230 [hfi1]
      [   62.648703]  ? hfi1_alloc_rn+0x40/0x40 [hfi1]
      [   62.655088]  ? __kmalloc+0x110/0x240
      [   62.660695]  ? hfi1_alloc_rn+0x40/0x40 [hfi1]
      [   62.667142]  setup_hw_stats+0xd8/0x430 [ib_core]
      [   62.673972]  ? show_hfi+0x50/0x50 [hfi1]
      [   62.680026]  ib_device_register_sysfs+0x165/0x180 [ib_core]
      [   62.687995]  ib_register_device+0x5a2/0xa10 [ib_core]
      [   62.695340]  ? show_hfi+0x50/0x50 [hfi1]
      [   62.701421]  ? ib_unregister_device+0x2e0/0x2e0 [ib_core]
      [   62.709222]  ? __vmalloc_node_range+0x2d0/0x380
      [   62.716131]  ? rvt_driver_mr_init+0x11f/0x2d0 [rdmavt]
      [   62.723735]  ? vmalloc_node+0x5c/0x70
      [   62.729697]  ? rvt_driver_mr_init+0x11f/0x2d0 [rdmavt]
      [   62.737347]  ? rvt_driver_mr_init+0x1f5/0x2d0 [rdmavt]
      [   62.744998]  ? __rvt_alloc_mr+0x110/0x110 [rdmavt]
      [   62.752315]  ? rvt_rc_error+0x140/0x140 [rdmavt]
      [   62.759434]  ? rvt_vma_open+0x30/0x30 [rdmavt]
      [   62.766364]  ? mutex_unlock+0x1d/0x40
      [   62.772445]  ? kmem_cache_create_usercopy+0x15d/0x230
      [   62.780115]  rvt_register_device+0x1f6/0x360 [rdmavt]
      [   62.787823]  ? rvt_get_port_immutable+0x180/0x180 [rdmavt]
      [   62.796058]  ? __get_txreq+0x400/0x400 [hfi1]
      [   62.802969]  ? memcpy+0x34/0x50
      [   62.808611]  hfi1_register_ib_device+0xde6/0xeb0 [hfi1]
      [   62.816601]  ? hfi1_get_npkeys+0x10/0x10 [hfi1]
      [   62.823760]  ? hfi1_init+0x89f/0x9a0 [hfi1]
      [   62.830469]  ? hfi1_setup_eagerbufs+0xad0/0xad0 [hfi1]
      [   62.838204]  ? pcie_capability_clear_and_set_word+0xcd/0xe0
      [   62.846429]  ? pcie_capability_read_word+0xd0/0xd0
      [   62.853791]  ? hfi1_pcie_init+0x187/0x4b0 [hfi1]
      [   62.860958]  init_one+0x67f/0xae0 [hfi1]
      [   62.867301]  ? hfi1_init+0x9a0/0x9a0 [hfi1]
      [   62.873876]  ? wait_woken+0x130/0x130
      [   62.879860]  ? read_word_at_a_time+0xe/0x20
      [   62.886329]  ? strscpy+0x14b/0x280
      [   62.891998]  ? hfi1_init+0x9a0/0x9a0 [hfi1]
      [   62.898405]  local_pci_probe+0x70/0xd0
      [   62.904295]  ? pci_device_shutdown+0x90/0x90
      [   62.910833]  work_for_cpu_fn+0x29/0x40
      [   62.916750]  process_one_work+0x584/0x960
      [   62.922974]  ? rcu_work_rcufn+0x40/0x40
      [   62.928991]  ? __schedule+0x396/0xdc0
      [   62.934806]  ? __sched_text_start+0x8/0x8
      [   62.941020]  ? pick_next_task_fair+0x68b/0xc60
      [   62.947674]  ? run_rebalance_domains+0x260/0x260
      [   62.954471]  ? __list_add_valid+0x29/0xa0
      [   62.960607]  ? move_linked_works+0x1c7/0x230
      [   62.967077]  ?
      trace_event_raw_event_workqueue_execute_start+0x140/0x140
      [   62.976248]  ? mutex_lock+0xa6/0x100
      [   62.982029]  ? __mutex_lock_slowpath+0x10/0x10
      [   62.988795]  ? __switch_to+0x37a/0x710
      [   62.994731]  worker_thread+0x62e/0x9d0
      [   63.000602]  ? max_active_store+0xf0/0xf0
      [   63.006828]  ? __switch_to_asm+0x40/0x70
      [   63.012932]  ? __switch_to_asm+0x34/0x70
      [   63.019013]  ? __switch_to_asm+0x40/0x70
      [   63.025042]  ? __switch_to_asm+0x34/0x70
      [   63.031030]  ? __switch_to_asm+0x40/0x70
      [   63.037006]  ? __schedule+0x396/0xdc0
      [   63.042660]  ? kmem_cache_alloc_trace+0xf3/0x1f0
      [   63.049323]  ? kthread+0x59/0x1d0
      [   63.054594]  ? ret_from_fork+0x35/0x40
      [   63.060257]  ? __sched_text_start+0x8/0x8
      [   63.066212]  ? schedule+0xcf/0x250
      [   63.071529]  ? __wake_up_common+0x110/0x350
      [   63.077794]  ? __schedule+0xdc0/0xdc0
      [   63.083348]  ? wait_woken+0x130/0x130
      [   63.088963]  ? finish_task_switch+0x1f1/0x520
      [   63.095258]  ? kasan_unpoison_shadow+0x30/0x40
      [   63.101792]  ? __init_waitqueue_head+0xa0/0xd0
      [   63.108183]  ? replenish_dl_entity.cold.60+0x18/0x18
      [   63.115151]  ? _raw_spin_lock_irqsave+0x25/0x50
      [   63.121754]  ? max_active_store+0xf0/0xf0
      [   63.127753]  kthread+0x1ae/0x1d0
      [   63.132894]  ? kthread_bind+0x30/0x30
      [   63.138422]  ret_from_fork+0x35/0x40
      
      [   63.146973] Allocated by task 14:
      [   63.152077]  kasan_kmalloc+0xbf/0xe0
      [   63.157471]  __kmalloc+0x110/0x240
      [   63.162804]  init_cntrs+0x34d/0xdf0 [hfi1]
      [   63.168883]  hfi1_init_dd+0x29a3/0x2f90 [hfi1]
      [   63.175244]  init_one+0x551/0xae0 [hfi1]
      [   63.181065]  local_pci_probe+0x70/0xd0
      [   63.186759]  work_for_cpu_fn+0x29/0x40
      [   63.192310]  process_one_work+0x584/0x960
      [   63.198163]  worker_thread+0x62e/0x9d0
      [   63.203843]  kthread+0x1ae/0x1d0
      [   63.208874]  ret_from_fork+0x35/0x40
      
      [   63.217203] Freed by task 1:
      [   63.221844]  __kasan_slab_free+0x12e/0x180
      [   63.227844]  kfree+0x92/0x1a0
      [   63.232570]  single_release+0x3a/0x60
      [   63.238024]  __fput+0x1d9/0x480
      [   63.242911]  task_work_run+0x139/0x190
      [   63.248440]  exit_to_usermode_loop+0x191/0x1a0
      [   63.254814]  do_syscall_64+0x301/0x330
      [   63.260283]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
      
      [   63.270199] The buggy address belongs to the object at
      ffff88080e8d5500
       which belongs to the cache kmalloc-4096 of size 4096
      [   63.287247] The buggy address is located 3632 bytes inside of
       4096-byte region [ffff88080e8d5500, ffff88080e8d6500)
      [   63.303564] The buggy address belongs to the page:
      [   63.310447] page:ffffea00203a3400 count:1 mapcount:0
      mapping:ffff88081380e840 index:0x0 compound_mapcount: 0
      [   63.323102] flags: 0x2fffff80008100(slab|head)
      [   63.329775] raw: 002fffff80008100 0000000000000000 0000000100000001
      ffff88081380e840
      [   63.340175] raw: 0000000000000000 0000000000070007 00000001ffffffff
      0000000000000000
      [   63.350564] page dumped because: kasan: bad access detected
      
      [   63.361974] Memory state around the buggy address:
      [   63.369137]  ffff88080e8d6200: 00 00 00 00 00 00 00 00 00 00 00 00 00
      00 00 00
      [   63.379082]  ffff88080e8d6280: 00 00 00 00 00 00 00 00 00 00 00 00 00
      00 00 00
      [   63.389032] >ffff88080e8d6300: 00 00 00 00 00 00 fc fc fc fc fc fc fc
      fc fc fc
      [   63.398944]                                      ^
      [   63.406141]  ffff88080e8d6380: fc fc fc fc fc fc fc fc fc fc fc fc fc
      fc fc fc
      [   63.416109]  ffff88080e8d6400: fc fc fc fc fc fc fc fc fc fc fc fc fc
      fc fc fc
      [   63.426099]
      ==================================================================
      
      The trace happens because get_hw_stats() assumes there is room in the
      memory allocated in init_cntrs() to accommodate the driver counters.
      Unfortunately, that routine only allocated space for the device
      counters.
      
      Fix by insuring the allocation has room for the additional driver
      counters.
      
      Cc: <Stable@vger.kernel.org> # v4.14+
      Fixes: b7481944 ("IB/hfi1: Show statistics counters under IB stats interface")
      Reviewed-by: default avatarMike Marciniczyn <mike.marciniszyn@intel.com>
      Reviewed-by: default avatarMike Ruhl <michael.j.ruhl@intel.com>
      Signed-off-by: default avatarPiotr Stankiewicz <piotr.stankiewicz@intel.com>
      Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
      Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      01a16601
    • Kailang Yang's avatar
      ALSA: hda/realtek - Fixed headphone issue for ALC700 · d655a1a6
      Kailang Yang authored
      commit bde1a745 upstream.
      
      If it plugged headphone or headset into the jack, then
      do the reboot, it will have a chance to cause headphone no sound.
      It just need to run the headphone mode procedure after boot time.
      The issue will be fixed.
      It also suitable for ALC234 ALC274 and ALC294.
      Signed-off-by: default avatarKailang Yang <kailang@realtek.com>
      Cc: <stable@vger.kernel.org>
      Signed-off-by: default avatarTakashi Iwai <tiwai@suse.de>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      d655a1a6
    • Takashi Sakamoto's avatar
      ALSA: fireface: fix reference to wrong register for clock configuration · 62711dc6
      Takashi Sakamoto authored
      commit fa9c98e4 upstream.
      
      In an initial commit, 'SYNC_STATUS' register is referred to get
      clock configuration, however this is wrong, according to my local
      note at hand for reverse-engineering about packet dump. It should
      be 'CLOCK_CONFIG' register. Actually, ff400_dump_clock_config()
      is correctly programmed.
      
      This commit fixes the bug.
      
      Cc: <stable@vger.kernel.org> # v4.12+
      Fixes: 76fdb3a9 ('ALSA: fireface: add support for Fireface 400')
      Signed-off-by: default avatarTakashi Sakamoto <o-takashi@sakamocchi.jp>
      Signed-off-by: default avatarTakashi Iwai <tiwai@suse.de>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      62711dc6
    • Guenter Roeck's avatar
      staging: speakup: Replace strncpy with memcpy · 16906e5a
      Guenter Roeck authored
      commit fd29edc7 upstream.
      
      gcc 8.1.0 generates the following warnings.
      
      drivers/staging/speakup/kobjects.c: In function 'punc_store':
      drivers/staging/speakup/kobjects.c:522:2: warning:
      	'strncpy' output truncated before terminating nul
      	copying as many bytes from a string as its length
      drivers/staging/speakup/kobjects.c:504:6: note: length computed here
      
      drivers/staging/speakup/kobjects.c: In function 'synth_store':
      drivers/staging/speakup/kobjects.c:391:2: warning:
      	'strncpy' output truncated before terminating nul
      	copying as many bytes from a string as its length
      drivers/staging/speakup/kobjects.c:388:8: note: length computed here
      
      Using strncpy() is indeed less than perfect since the length of data to
      be copied has already been determined with strlen(). Replace strncpy()
      with memcpy() to address the warning and optimize the code a little.
      Signed-off-by: default avatarGuenter Roeck <linux@roeck-us.net>
      Reviewed-by: default avatarSamuel Thibault <samuel.thibault@ens-lyon.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      16906e5a
    • Tigran Mkrtchyan's avatar
      flexfiles: enforce per-mirror stateid only for v4 DSes · 5d2cc520
      Tigran Mkrtchyan authored
      commit 320f35b7 upstream.
      
      Since commit bb21ce0a we always enforce per-mirror stateid.
      However, this makes sense only for v4+ servers.
      Signed-off-by: default avatarTigran Mkrtchyan <tigran.mkrtchyan@desy.de>
      Signed-off-by: default avatarTrond Myklebust <trond.myklebust@hammerspace.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      5d2cc520
    • Davidlohr Bueso's avatar
      lib/rbtree-test: lower default params · 891e5a89
      Davidlohr Bueso authored
      commit 0b548e33 upstream.
      
      Fengguang reported soft lockups while running the rbtree and interval
      tree test modules.  The logic for these tests all occur in init phase,
      and we currently are pounding with the default values for number of
      nodes and number of iterations of each test.  Reduce the latter by two
      orders of magnitude.  This does not influence the value of the tests in
      that one thousand times by default is enough to get the picture.
      
      Link: http://lkml.kernel.org/r/20171109161715.xai2dtwqw2frhkcm@linux-n805Signed-off-by: default avatarDavidlohr Bueso <dbueso@suse.de>
      Reported-by: default avatarFengguang Wu <fengguang.wu@intel.com>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      Cc: Guenter Roeck <linux@roeck-us.net>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      891e5a89
    • Petr Mladek's avatar
      printk: Wake klogd when passing console_lock owner · 16c9a316
      Petr Mladek authored
      [ Upstream commit c14376de ]
      
      wake_klogd is a local variable in console_unlock(). The information
      is lost when the console_lock owner using the busy wait added by
      the commit dbdda842 ("printk: Add console owner and waiter
      logic to load balance console writes"). The following race is
      possible:
      
      CPU0				CPU1
      console_unlock()
      
        for (;;)
           /* calling console for last message */
      
      				printk()
      				  log_store()
      				    log_next_seq++;
      
           /* see new message */
           if (seen_seq != log_next_seq) {
      	wake_klogd = true;
      	seen_seq = log_next_seq;
           }
      
           console_lock_spinning_enable();
      
      				  if (console_trylock_spinning())
      				     /* spinning */
      
           if (console_lock_spinning_disable_and_check()) {
      	printk_safe_exit_irqrestore(flags);
      	return;
      
      				  console_unlock()
      				    if (seen_seq != log_next_seq) {
      				    /* already seen */
      				    /* nothing to do */
      
      Result: Nobody would wakeup klogd.
      
      One solution would be to make a global variable from wake_klogd.
      But then we would need to manipulate it under a lock or so.
      
      This patch wakes klogd also when console_lock is passed to the
      spinning waiter. It looks like the right way to go. Also userspace
      should have a chance to see and store any "flood" of messages.
      
      Note that the very late klogd wake up was a historic solution.
      It made sense on single CPU systems or when sys_syslog() operations
      were synchronized using the big kernel lock like in v2.1.113.
      But it is questionable these days.
      
      Fixes: dbdda842 ("printk: Add console owner and waiter logic to load balance console writes")
      Link: http://lkml.kernel.org/r/20180226155734.dzwg3aovqnwtvkoy@pathway.suse.cz
      Cc: Steven Rostedt <rostedt@goodmis.org>
      Cc: linux-kernel@vger.kernel.org
      Cc: Tejun Heo <tj@kernel.org>
      Suggested-by: default avatarSergey Senozhatsky <sergey.senozhatsky@gmail.com>
      Reviewed-by: default avatarSergey Senozhatsky <sergey.senozhatsky@gmail.com>
      Signed-off-by: default avatarPetr Mladek <pmladek@suse.com>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      16c9a316
    • Sergey Senozhatsky's avatar
      printk: Never set console_may_schedule in console_trylock() · 08b7a8f8
      Sergey Senozhatsky authored
      [ Upstream commit fd5f7cde ]
      
      This patch, basically, reverts commit 6b97a20d ("printk:
      set may_schedule for some of console_trylock() callers").
      That commit was a mistake, it introduced a big dependency
      on the scheduler, by enabling preemption under console_sem
      in printk()->console_unlock() path, which is rather too
      critical. The patch did not significantly reduce the
      possibilities of printk() lockups, but made it possible to
      stall printk(), as has been reported by Tetsuo Handa [1].
      
      Another issues is that preemption under console_sem also
      messes up with Steven Rostedt's hand off scheme, by making
      it possible to sleep with console_sem both in console_unlock()
      and in vprintk_emit(), after acquiring the console_sem
      ownership (anywhere between printk_safe_exit_irqrestore() in
      console_trylock_spinning() and printk_safe_enter_irqsave()
      in console_unlock()). This makes hand off less likely and,
      at the same time, may result in a significant amount of
      pending logbuf messages. Preempted console_sem owner makes
      it impossible for other CPUs to emit logbuf messages, but
      does not make it impossible for other CPUs to append new
      messages to the logbuf.
      
      Reinstate the old behavior and make printk() non-preemptible.
      Should any printk() lockup reports arrive they must be handled
      in a different way.
      
      [1] http://lkml.kernel.org/r/201603022101.CAH73907.OVOOMFHFFtQJSL%20()%20I-love%20!%20SAKURA%20!%20ne%20!%20jp
      Fixes: 6b97a20d ("printk: set may_schedule for some of console_trylock() callers")
      Link: http://lkml.kernel.org/r/20180116044716.GE6607@jagdpanzerIV
      To: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
      Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: akpm@linux-foundation.org
      Cc: linux-mm@kvack.org
      Cc: Cong Wang <xiyou.wangcong@gmail.com>
      Cc: Dave Hansen <dave.hansen@intel.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Mel Gorman <mgorman@suse.de>
      Cc: Michal Hocko <mhocko@kernel.org>
      Cc: Vlastimil Babka <vbabka@suse.cz>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Jan Kara <jack@suse.cz>
      Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
      Cc: Byungchul Park <byungchul.park@lge.com>
      Cc: Pavel Machek <pavel@ucw.cz>
      Cc: linux-kernel@vger.kernel.org
      Signed-off-by: default avatarSergey Senozhatsky <sergey.senozhatsky@gmail.com>
      Reported-by: default avatarTetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
      Reviewed-by: default avatarSteven Rostedt (VMware) <rostedt@goodmis.org>
      Signed-off-by: default avatarPetr Mladek <pmladek@suse.com>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      08b7a8f8
    • Petr Mladek's avatar
      printk: Hide console waiter logic into helpers · ef433725
      Petr Mladek authored
      [ Upstream commit c162d5b4 ]
      
      The commit ("printk: Add console owner and waiter logic to load balance
      console writes") made vprintk_emit() and console_unlock() even more
      complicated.
      
      This patch extracts the new code into 3 helper functions. They should
      help to keep it rather self-contained. It will be easier to use and
      maintain.
      
      This patch just shuffles the existing code. It does not change
      the functionality.
      
      Link: http://lkml.kernel.org/r/20180112160837.GD24497@linux.suse
      Cc: akpm@linux-foundation.org
      Cc: linux-mm@kvack.org
      Cc: Cong Wang <xiyou.wangcong@gmail.com>
      Cc: Dave Hansen <dave.hansen@intel.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Mel Gorman <mgorman@suse.de>
      Cc: Michal Hocko <mhocko@kernel.org>
      Cc: Vlastimil Babka <vbabka@suse.cz>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Jan Kara <jack@suse.cz>
      Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
      Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
      Cc: rostedt@home.goodmis.org
      Cc: Byungchul Park <byungchul.park@lge.com>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Pavel Machek <pavel@ucw.cz>
      Cc: linux-kernel@vger.kernel.org
      Reviewed-by: default avatarSteven Rostedt (VMware) <rostedt@goodmis.org>
      Acked-by: default avatarSergey Senozhatsky <sergey.senozhatsky@gmail.com>
      Signed-off-by: default avatarPetr Mladek <pmladek@suse.com>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      ef433725
    • Steven Rostedt (VMware)'s avatar
      printk: Add console owner and waiter logic to load balance console writes · 59423114
      Steven Rostedt (VMware) authored
      [ Upstream commit dbdda842 ]
      
      This patch implements what I discussed in Kernel Summit. I added
      lockdep annotation (hopefully correctly), and it hasn't had any splats
      (since I fixed some bugs in the first iterations). It did catch
      problems when I had the owner covering too much. But now that the owner
      is only set when actively calling the consoles, lockdep has stayed
      quiet.
      
      Here's the design again:
      
      I added a "console_owner" which is set to a task that is actively
      writing to the consoles. It is *not* the same as the owner of the
      console_lock. It is only set when doing the calls to the console
      functions. It is protected by a console_owner_lock which is a raw spin
      lock.
      
      There is a console_waiter. This is set when there is an active console
      owner that is not current, and waiter is not set. This too is protected
      by console_owner_lock.
      
      In printk() when it tries to write to the consoles, we have:
      
      	if (console_trylock())
      		console_unlock();
      
      Now I added an else, which will check if there is an active owner, and
      no current waiter. If that is the case, then console_waiter is set, and
      the task goes into a spin until it is no longer set.
      
      When the active console owner finishes writing the current message to
      the consoles, it grabs the console_owner_lock and sees if there is a
      waiter, and clears console_owner.
      
      If there is a waiter, then it breaks out of the loop, clears the waiter
      flag (because that will release the waiter from its spin), and exits.
      Note, it does *not* release the console semaphore. Because it is a
      semaphore, there is no owner. Another task may release it. This means
      that the waiter is guaranteed to be the new console owner! Which it
      becomes.
      
      Then the waiter calls console_unlock() and continues to write to the
      consoles.
      
      If another task comes along and does a printk() it too can become the
      new waiter, and we wash rinse and repeat!
      
      By Petr Mladek about possible new deadlocks:
      
      The thing is that we move console_sem only to printk() call
      that normally calls console_unlock() as well. It means that
      the transferred owner should not bring new type of dependencies.
      As Steven said somewhere: "If there is a deadlock, it was
      there even before."
      
      We could look at it from this side. The possible deadlock would
      look like:
      
      CPU0                            CPU1
      
      console_unlock()
      
        console_owner = current;
      
      				spin_lockA()
      				  printk()
      				    spin = true;
      				    while (...)
      
          call_console_drivers()
            spin_lockA()
      
      This would be a deadlock. CPU0 would wait for the lock A.
      While CPU1 would own the lockA and would wait for CPU0
      to finish calling the console drivers and pass the console_sem
      owner.
      
      But if the above is true than the following scenario was
      already possible before:
      
      CPU0
      
      spin_lockA()
        printk()
          console_unlock()
            call_console_drivers()
      	spin_lockA()
      
      By other words, this deadlock was there even before. Such
      deadlocks are prevented by using printk_deferred() in
      the sections guarded by the lock A.
      
      By Steven Rostedt:
      
      To demonstrate the issue, this module has been shown to lock up a
      system with 4 CPUs and a slow console (like a serial console). It is
      also able to lock up a 8 CPU system with only a fast (VGA) console, by
      passing in "loops=100". The changes in this commit prevent this module
      from locking up the system.
      
       #include <linux/module.h>
       #include <linux/delay.h>
       #include <linux/sched.h>
       #include <linux/mutex.h>
       #include <linux/workqueue.h>
       #include <linux/hrtimer.h>
      
       static bool stop_testing;
       static unsigned int loops = 1;
      
       static void preempt_printk_workfn(struct work_struct *work)
       {
       	int i;
      
       	while (!READ_ONCE(stop_testing)) {
       		for (i = 0; i < loops && !READ_ONCE(stop_testing); i++) {
       			preempt_disable();
       			pr_emerg("%5d%-75s\n", smp_processor_id(),
       				 " XXX NOPREEMPT");
       			preempt_enable();
       		}
       		msleep(1);
       	}
       }
      
       static struct work_struct __percpu *works;
      
       static void finish(void)
       {
       	int cpu;
      
       	WRITE_ONCE(stop_testing, true);
       	for_each_online_cpu(cpu)
       		flush_work(per_cpu_ptr(works, cpu));
       	free_percpu(works);
       }
      
       static int __init test_init(void)
       {
       	int cpu;
      
       	works = alloc_percpu(struct work_struct);
       	if (!works)
       		return -ENOMEM;
      
       	/*
       	 * This is just a test module. This will break if you
       	 * do any CPU hot plugging between loading and
       	 * unloading the module.
       	 */
      
       	for_each_online_cpu(cpu) {
       		struct work_struct *work = per_cpu_ptr(works, cpu);
      
       		INIT_WORK(work, &preempt_printk_workfn);
       		schedule_work_on(cpu, work);
       	}
      
       	return 0;
       }
      
       static void __exit test_exit(void)
       {
       	finish();
       }
      
       module_param(loops, uint, 0);
       module_init(test_init);
       module_exit(test_exit);
       MODULE_LICENSE("GPL");
      
      Link: http://lkml.kernel.org/r/20180110132418.7080-2-pmladek@suse.com
      Cc: akpm@linux-foundation.org
      Cc: linux-mm@kvack.org
      Cc: Cong Wang <xiyou.wangcong@gmail.com>
      Cc: Dave Hansen <dave.hansen@intel.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Mel Gorman <mgorman@suse.de>
      Cc: Michal Hocko <mhocko@kernel.org>
      Cc: Vlastimil Babka <vbabka@suse.cz>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Jan Kara <jack@suse.cz>
      Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
      Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
      Cc: Byungchul Park <byungchul.park@lge.com>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Pavel Machek <pavel@ucw.cz>
      Cc: linux-kernel@vger.kernel.org
      Signed-off-by: default avatarSteven Rostedt (VMware) <rostedt@goodmis.org>
      [pmladek@suse.com: Commit message about possible deadlocks]
      Acked-by: default avatarSergey Senozhatsky <sergey.senozhatsky@gmail.com>
      Signed-off-by: default avatarPetr Mladek <pmladek@suse.com>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      59423114