1. 15 Feb, 2017 1 commit
    • Sergey Senozhatsky's avatar
      timekeeping: Use deferred printk() in debug code · f222449c
      Sergey Senozhatsky authored
      We cannot do printk() from tk_debug_account_sleep_time(), because
      tk_debug_account_sleep_time() is called under tk_core seq lock.
      The reason why printk() is unsafe there is that console_sem may
      invoke scheduler (up()->wake_up_process()->activate_task()), which,
      in turn, can return back to timekeeping code, for instance, via
      get_time()->ktime_get(), deadlocking the system on tk_core seq lock.
      
      [   48.950592] ======================================================
      [   48.950622] [ INFO: possible circular locking dependency detected ]
      [   48.950622] 4.10.0-rc7-next-20170213+ #101 Not tainted
      [   48.950622] -------------------------------------------------------
      [   48.950622] kworker/0:0/3 is trying to acquire lock:
      [   48.950653]  (tk_core){----..}, at: [<c01cc624>] retrigger_next_event+0x4c/0x90
      [   48.950683]
                     but task is already holding lock:
      [   48.950683]  (hrtimer_bases.lock){-.-...}, at: [<c01cc610>] retrigger_next_event+0x38/0x90
      [   48.950714]
                     which lock already depends on the new lock.
      
      [   48.950714]
                     the existing dependency chain (in reverse order) is:
      [   48.950714]
                     -> #5 (hrtimer_bases.lock){-.-...}:
      [   48.950744]        _raw_spin_lock_irqsave+0x50/0x64
      [   48.950775]        lock_hrtimer_base+0x28/0x58
      [   48.950775]        hrtimer_start_range_ns+0x20/0x5c8
      [   48.950775]        __enqueue_rt_entity+0x320/0x360
      [   48.950805]        enqueue_rt_entity+0x2c/0x44
      [   48.950805]        enqueue_task_rt+0x24/0x94
      [   48.950836]        ttwu_do_activate+0x54/0xc0
      [   48.950836]        try_to_wake_up+0x248/0x5c8
      [   48.950836]        __setup_irq+0x420/0x5f0
      [   48.950836]        request_threaded_irq+0xdc/0x184
      [   48.950866]        devm_request_threaded_irq+0x58/0xa4
      [   48.950866]        omap_i2c_probe+0x530/0x6a0
      [   48.950897]        platform_drv_probe+0x50/0xb0
      [   48.950897]        driver_probe_device+0x1f8/0x2cc
      [   48.950897]        __driver_attach+0xc0/0xc4
      [   48.950927]        bus_for_each_dev+0x6c/0xa0
      [   48.950927]        bus_add_driver+0x100/0x210
      [   48.950927]        driver_register+0x78/0xf4
      [   48.950958]        do_one_initcall+0x3c/0x16c
      [   48.950958]        kernel_init_freeable+0x20c/0x2d8
      [   48.950958]        kernel_init+0x8/0x110
      [   48.950988]        ret_from_fork+0x14/0x24
      [   48.950988]
                     -> #4 (&rt_b->rt_runtime_lock){-.-...}:
      [   48.951019]        _raw_spin_lock+0x40/0x50
      [   48.951019]        rq_offline_rt+0x9c/0x2bc
      [   48.951019]        set_rq_offline.part.2+0x2c/0x58
      [   48.951049]        rq_attach_root+0x134/0x144
      [   48.951049]        cpu_attach_domain+0x18c/0x6f4
      [   48.951049]        build_sched_domains+0xba4/0xd80
      [   48.951080]        sched_init_smp+0x68/0x10c
      [   48.951080]        kernel_init_freeable+0x160/0x2d8
      [   48.951080]        kernel_init+0x8/0x110
      [   48.951080]        ret_from_fork+0x14/0x24
      [   48.951110]
                     -> #3 (&rq->lock){-.-.-.}:
      [   48.951110]        _raw_spin_lock+0x40/0x50
      [   48.951141]        task_fork_fair+0x30/0x124
      [   48.951141]        sched_fork+0x194/0x2e0
      [   48.951141]        copy_process.part.5+0x448/0x1a20
      [   48.951171]        _do_fork+0x98/0x7e8
      [   48.951171]        kernel_thread+0x2c/0x34
      [   48.951171]        rest_init+0x1c/0x18c
      [   48.951202]        start_kernel+0x35c/0x3d4
      [   48.951202]        0x8000807c
      [   48.951202]
                     -> #2 (&p->pi_lock){-.-.-.}:
      [   48.951232]        _raw_spin_lock_irqsave+0x50/0x64
      [   48.951232]        try_to_wake_up+0x30/0x5c8
      [   48.951232]        up+0x4c/0x60
      [   48.951263]        __up_console_sem+0x2c/0x58
      [   48.951263]        console_unlock+0x3b4/0x650
      [   48.951263]        vprintk_emit+0x270/0x474
      [   48.951293]        vprintk_default+0x20/0x28
      [   48.951293]        printk+0x20/0x30
      [   48.951324]        kauditd_hold_skb+0x94/0xb8
      [   48.951324]        kauditd_thread+0x1a4/0x56c
      [   48.951324]        kthread+0x104/0x148
      [   48.951354]        ret_from_fork+0x14/0x24
      [   48.951354]
                     -> #1 ((console_sem).lock){-.....}:
      [   48.951385]        _raw_spin_lock_irqsave+0x50/0x64
      [   48.951385]        down_trylock+0xc/0x2c
      [   48.951385]        __down_trylock_console_sem+0x24/0x80
      [   48.951385]        console_trylock+0x10/0x8c
      [   48.951416]        vprintk_emit+0x264/0x474
      [   48.951416]        vprintk_default+0x20/0x28
      [   48.951416]        printk+0x20/0x30
      [   48.951446]        tk_debug_account_sleep_time+0x5c/0x70
      [   48.951446]        __timekeeping_inject_sleeptime.constprop.3+0x170/0x1a0
      [   48.951446]        timekeeping_resume+0x218/0x23c
      [   48.951477]        syscore_resume+0x94/0x42c
      [   48.951477]        suspend_enter+0x554/0x9b4
      [   48.951477]        suspend_devices_and_enter+0xd8/0x4b4
      [   48.951507]        enter_state+0x934/0xbd4
      [   48.951507]        pm_suspend+0x14/0x70
      [   48.951507]        state_store+0x68/0xc8
      [   48.951538]        kernfs_fop_write+0xf4/0x1f8
      [   48.951538]        __vfs_write+0x1c/0x114
      [   48.951538]        vfs_write+0xa0/0x168
      [   48.951568]        SyS_write+0x3c/0x90
      [   48.951568]        __sys_trace_return+0x0/0x10
      [   48.951568]
                     -> #0 (tk_core){----..}:
      [   48.951599]        lock_acquire+0xe0/0x294
      [   48.951599]        ktime_get_update_offsets_now+0x5c/0x1d4
      [   48.951629]        retrigger_next_event+0x4c/0x90
      [   48.951629]        on_each_cpu+0x40/0x7c
      [   48.951629]        clock_was_set_work+0x14/0x20
      [   48.951660]        process_one_work+0x2b4/0x808
      [   48.951660]        worker_thread+0x3c/0x550
      [   48.951660]        kthread+0x104/0x148
      [   48.951690]        ret_from_fork+0x14/0x24
      [   48.951690]
                     other info that might help us debug this:
      
      [   48.951690] Chain exists of:
                       tk_core --> &rt_b->rt_runtime_lock --> hrtimer_bases.lock
      
      [   48.951721]  Possible unsafe locking scenario:
      
      [   48.951721]        CPU0                    CPU1
      [   48.951721]        ----                    ----
      [   48.951721]   lock(hrtimer_bases.lock);
      [   48.951751]                                lock(&rt_b->rt_runtime_lock);
      [   48.951751]                                lock(hrtimer_bases.lock);
      [   48.951751]   lock(tk_core);
      [   48.951782]
                      *** DEADLOCK ***
      
      [   48.951782] 3 locks held by kworker/0:0/3:
      [   48.951782]  #0:  ("events"){.+.+.+}, at: [<c0156590>] process_one_work+0x1f8/0x808
      [   48.951812]  #1:  (hrtimer_work){+.+...}, at: [<c0156590>] process_one_work+0x1f8/0x808
      [   48.951843]  #2:  (hrtimer_bases.lock){-.-...}, at: [<c01cc610>] retrigger_next_event+0x38/0x90
      [   48.951843]   stack backtrace:
      [   48.951873] CPU: 0 PID: 3 Comm: kworker/0:0 Not tainted 4.10.0-rc7-next-20170213+
      [   48.951904] Workqueue: events clock_was_set_work
      [   48.951904] [<c0110208>] (unwind_backtrace) from [<c010c224>] (show_stack+0x10/0x14)
      [   48.951934] [<c010c224>] (show_stack) from [<c04ca6c0>] (dump_stack+0xac/0xe0)
      [   48.951934] [<c04ca6c0>] (dump_stack) from [<c019b5cc>] (print_circular_bug+0x1d0/0x308)
      [   48.951965] [<c019b5cc>] (print_circular_bug) from [<c019d2a8>] (validate_chain+0xf50/0x1324)
      [   48.951965] [<c019d2a8>] (validate_chain) from [<c019ec18>] (__lock_acquire+0x468/0x7e8)
      [   48.951995] [<c019ec18>] (__lock_acquire) from [<c019f634>] (lock_acquire+0xe0/0x294)
      [   48.951995] [<c019f634>] (lock_acquire) from [<c01d0ea0>] (ktime_get_update_offsets_now+0x5c/0x1d4)
      [   48.952026] [<c01d0ea0>] (ktime_get_update_offsets_now) from [<c01cc624>] (retrigger_next_event+0x4c/0x90)
      [   48.952026] [<c01cc624>] (retrigger_next_event) from [<c01e4e24>] (on_each_cpu+0x40/0x7c)
      [   48.952056] [<c01e4e24>] (on_each_cpu) from [<c01cafc4>] (clock_was_set_work+0x14/0x20)
      [   48.952056] [<c01cafc4>] (clock_was_set_work) from [<c015664c>] (process_one_work+0x2b4/0x808)
      [   48.952087] [<c015664c>] (process_one_work) from [<c0157774>] (worker_thread+0x3c/0x550)
      [   48.952087] [<c0157774>] (worker_thread) from [<c015d644>] (kthread+0x104/0x148)
      [   48.952087] [<c015d644>] (kthread) from [<c0107830>] (ret_from_fork+0x14/0x24)
      
      Replace printk() with printk_deferred(), which does not call into
      the scheduler.
      
      Fixes: 0bf43f15 ("timekeeping: Prints the amounts of time spent during suspend")
      Reported-and-tested-by: default avatarTony Lindgren <tony@atomide.com>
      Signed-off-by: default avatarSergey Senozhatsky <sergey.senozhatsky@gmail.com>
      Cc: Petr Mladek <pmladek@suse.com>
      Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: "Rafael J . Wysocki" <rjw@rjwysocki.net>
      Cc: Steven Rostedt <rostedt@goodmis.org>
      Cc: John Stultz <john.stultz@linaro.org>
      Cc: "[4.9+]" <stable@vger.kernel.org>
      Link: http://lkml.kernel.org/r/20170215044332.30449-1-sergey.senozhatsky@gmail.comSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
      f222449c
  2. 13 Feb, 2017 1 commit
    • Mike Galbraith's avatar
      tick/broadcast: Prevent deadlock on tick_broadcast_lock · 202461e2
      Mike Galbraith authored
      tick_broadcast_lock is taken from interrupt context, but the following call
      chain takes the lock without disabling interrupts:
      
      [   12.703736]  _raw_spin_lock+0x3b/0x50
      [   12.703738]  tick_broadcast_control+0x5a/0x1a0
      [   12.703742]  intel_idle_cpu_online+0x22/0x100
      [   12.703744]  cpuhp_invoke_callback+0x245/0x9d0
      [   12.703752]  cpuhp_thread_fun+0x52/0x110
      [   12.703754]  smpboot_thread_fn+0x276/0x320
      
      So the following deadlock can happen:
      
         lock(tick_broadcast_lock);
         <Interrupt>
            lock(tick_broadcast_lock);
      
      intel_idle_cpu_online() is the only place which violates the calling
      convention of tick_broadcast_control(). This was caused by the removal of
      the smp function call in course of the cpu hotplug rework.
      
      Instead of slapping local_irq_disable/enable() at the call site, we can
      relax the calling convention and handle it in the core code, which makes
      the whole machinery more robust.
      
      Fixes: 29d7bbad ("intel_idle: Remove superfluous SMP fuction call")
      Reported-by: default avatarGabriel C <nix.or.die@gmail.com>
      Signed-off-by: default avatarMike Galbraith <efault@gmx.de>
      Cc: Ruslan Ruslichenko <rruslich@cisco.com>
      Cc: Jiri Slaby <jslaby@suse.cz>
      Cc: Greg KH <gregkh@linuxfoundation.org>
      Cc: Borislav Petkov <bp@alien8.de>
      Cc: lwn@lwn.net
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Anna-Maria Gleixner <anna-maria@linutronix.de>
      Cc: Sebastian Siewior <bigeasy@linutronix.de>
      Cc: stable <stable@vger.kernel.org>
      Link: http://lkml.kernel.org/r/1486953115.5912.4.camel@gmx.deSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
      202461e2
  3. 12 Feb, 2017 1 commit
  4. 11 Feb, 2017 8 commits
  5. 10 Feb, 2017 25 commits
  6. 09 Feb, 2017 4 commits