Commit 19b4a8d5 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
  rcu: Move propagation of ->completed from rcu_start_gp() to rcu_report_qs_rsp()
  rcu: Remove rcu_needs_cpu_flush() to avoid false quiescent states
  rcu: Wire up RCU_BOOST_PRIO for rcutree
  rcu: Make rcu_torture_boost() exit loops at end of test
  rcu: Make rcu_torture_fqs() exit loops at end of test
  rcu: Permit rt_mutex_unlock() with irqs disabled
  rcu: Avoid having just-onlined CPU resched itself when RCU is idle
  rcu: Suppress NMI backtraces when stall ends before dump
  rcu: Prohibit grace periods during early boot
  rcu: Simplify unboosting checks
  rcu: Prevent early boot set_need_resched() from __rcu_pending()
  rcu: Dump local stack if cannot dump all CPUs' stacks
  rcu: Move __rcu_read_unlock()'s barrier() within if-statement
  rcu: Improve rcu_assign_pointer() and RCU_INIT_POINTER() documentation
  rcu: Make rcu_assign_pointer() unconditionally insert a memory barrier
  rcu: Make rcu_implicit_dynticks_qs() locals be correct size
  rcu: Eliminate in_irq() checks in rcu_enter_nohz()
  nohz: Remove nohz_cpu_mask
  rcu: Document interpretation of RCU-lockdep splats
  rcu: Allow rcutorture's stat_interval parameter to be changed at runtime
  ...
parents 3cfef952 048b7180
......@@ -95,7 +95,7 @@ not to return until all ongoing NMI handlers exit. It is therefore safe
to free up the handler's data as soon as synchronize_sched() returns.
Important note: for this to work, the architecture in question must
invoke irq_enter() and irq_exit() on NMI entry and exit, respectively.
invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
Answer to Quick Quiz
......
Lockdep-RCU was added to the Linux kernel in early 2010
(http://lwn.net/Articles/371986/). This facility checks for some common
misuses of the RCU API, most notably using one of the rcu_dereference()
family to access an RCU-protected pointer without the proper protection.
When such misuse is detected, an lockdep-RCU splat is emitted.
The usual cause of a lockdep-RCU slat is someone accessing an
RCU-protected data structure without either (1) being in the right kind of
RCU read-side critical section or (2) holding the right update-side lock.
This problem can therefore be serious: it might result in random memory
overwriting or worse. There can of course be false positives, this
being the real world and all that.
So let's look at an example RCU lockdep splat from 3.0-rc5, one that
has long since been fixed:
===============================
[ INFO: suspicious RCU usage. ]
-------------------------------
block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 0
3 locks held by scsi_scan_6/1552:
#0: (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>]
scsi_scan_host_selected+0x5a/0x150
#1: (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>]
elevator_exit+0x22/0x60
#2: (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>]
cfq_exit_queue+0x43/0x190
stack backtrace:
Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
Call Trace:
[<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
[<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
[<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
[<ffffffff812a5046>] elevator_exit+0x36/0x60
[<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
[<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
[<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
[<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
[<ffffffff817da069>] ? error_exit+0x29/0xb0
[<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
[<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
[<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
[<ffffffff817da069>] ? error_exit+0x29/0xb0
[<ffffffff812bcc60>] ? kobject_del+0x40/0x40
[<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
[<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
[<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
[<ffffffff8145f170>] do_scan_async+0x20/0x160
[<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
[<ffffffff810975b6>] kthread+0xa6/0xb0
[<ffffffff817db154>] kernel_thread_helper+0x4/0x10
[<ffffffff81066430>] ? finish_task_switch+0x80/0x110
[<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
[<ffffffff81097510>] ? __init_kthread_worker+0x70/0x70
[<ffffffff817db150>] ? gs_change+0xb/0xb
Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows:
if (rcu_dereference(ioc->ioc_data) == cic) {
This form says that it must be in a plain vanilla RCU read-side critical
section, but the "other info" list above shows that this is not the
case. Instead, we hold three locks, one of which might be RCU related.
And maybe that lock really does protect this reference. If so, the fix
is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
take the struct request_queue "q" from cfq_exit_queue() as an argument,
which would permit us to invoke rcu_dereference_protected as follows:
if (rcu_dereference_protected(ioc->ioc_data,
lockdep_is_held(&q->queue_lock)) == cic) {
With this change, there would be no lockdep-RCU splat emitted if this
code was invoked either from within an RCU read-side critical section
or with the ->queue_lock held. In particular, this would have suppressed
the above lockdep-RCU splat because ->queue_lock is held (see #2 in the
list above).
On the other hand, perhaps we really do need an RCU read-side critical
section. In this case, the critical section must span the use of the
return value from rcu_dereference(), or at least until there is some
reference count incremented or some such. One way to handle this is to
add rcu_read_lock() and rcu_read_unlock() as follows:
rcu_read_lock();
if (rcu_dereference(ioc->ioc_data) == cic) {
spin_lock(&ioc->lock);
rcu_assign_pointer(ioc->ioc_data, NULL);
spin_unlock(&ioc->lock);
}
rcu_read_unlock();
With this change, the rcu_dereference() is always within an RCU
read-side critical section, which again would have suppressed the
above lockdep-RCU splat.
But in this particular case, we don't actually deference the pointer
returned from rcu_dereference(). Instead, that pointer is just compared
to the cic pointer, which means that the rcu_dereference() can be replaced
by rcu_access_pointer() as follows:
if (rcu_access_pointer(ioc->ioc_data) == cic) {
Because it is legal to invoke rcu_access_pointer() without protection,
this change would also suppress the above lockdep-RCU splat.
......@@ -32,9 +32,27 @@ checking of rcu_dereference() primitives:
srcu_dereference(p, sp):
Check for SRCU read-side critical section.
rcu_dereference_check(p, c):
Use explicit check expression "c". This is useful in
code that is invoked by both readers and updaters.
rcu_dereference_raw(p)
Use explicit check expression "c" along with
rcu_read_lock_held(). This is useful in code that is
invoked by both RCU readers and updaters.
rcu_dereference_bh_check(p, c):
Use explicit check expression "c" along with
rcu_read_lock_bh_held(). This is useful in code that
is invoked by both RCU-bh readers and updaters.
rcu_dereference_sched_check(p, c):
Use explicit check expression "c" along with
rcu_read_lock_sched_held(). This is useful in code that
is invoked by both RCU-sched readers and updaters.
srcu_dereference_check(p, c):
Use explicit check expression "c" along with
srcu_read_lock_held()(). This is useful in code that
is invoked by both SRCU readers and updaters.
rcu_dereference_index_check(p, c):
Use explicit check expression "c", but the caller
must supply one of the rcu_read_lock_held() functions.
This is useful in code that uses RCU-protected arrays
that is invoked by both RCU readers and updaters.
rcu_dereference_raw(p):
Don't check. (Use sparingly, if at all.)
rcu_dereference_protected(p, c):
Use explicit check expression "c", and omit all barriers
......@@ -48,13 +66,11 @@ checking of rcu_dereference() primitives:
value of the pointer itself, for example, against NULL.
The rcu_dereference_check() check expression can be any boolean
expression, but would normally include one of the rcu_read_lock_held()
family of functions and a lockdep expression. However, any boolean
expression can be used. For a moderately ornate example, consider
the following:
expression, but would normally include a lockdep expression. However,
any boolean expression can be used. For a moderately ornate example,
consider the following:
file = rcu_dereference_check(fdt->fd[fd],
rcu_read_lock_held() ||
lockdep_is_held(&files->file_lock) ||
atomic_read(&files->count) == 1);
......@@ -62,7 +78,7 @@ This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
and, if CONFIG_PROVE_RCU is configured, verifies that this expression
is used in:
1. An RCU read-side critical section, or
1. An RCU read-side critical section (implicit), or
2. with files->file_lock held, or
3. on an unshared files_struct.
......
......@@ -42,7 +42,7 @@ fqs_holdoff Holdoff time (in microseconds) between consecutive calls
fqs_stutter Wait time (in seconds) between consecutive bursts
of calls to force_quiescent_state().
irqreaders Says to invoke RCU readers from irq level. This is currently
irqreader Says to invoke RCU readers from irq level. This is currently
done via timers. Defaults to "1" for variants of RCU that
permit this. (Or, more accurately, variants of RCU that do
-not- permit this know to ignore this variable.)
......@@ -79,19 +79,68 @@ stutter The length of time to run the test before pausing for this
Specifying "stutter=0" causes the test to run continuously
without pausing, which is the old default behavior.
test_boost Whether or not to test the ability of RCU to do priority
boosting. Defaults to "test_boost=1", which performs
RCU priority-inversion testing only if the selected
RCU implementation supports priority boosting. Specifying
"test_boost=0" never performs RCU priority-inversion
testing. Specifying "test_boost=2" performs RCU
priority-inversion testing even if the selected RCU
implementation does not support RCU priority boosting,
which can be used to test rcutorture's ability to
carry out RCU priority-inversion testing.
test_boost_interval
The number of seconds in an RCU priority-inversion test
cycle. Defaults to "test_boost_interval=7". It is
usually wise for this value to be relatively prime to
the value selected for "stutter".
test_boost_duration
The number of seconds to do RCU priority-inversion testing
within any given "test_boost_interval". Defaults to
"test_boost_duration=4".
test_no_idle_hz Whether or not to test the ability of RCU to operate in
a kernel that disables the scheduling-clock interrupt to
idle CPUs. Boolean parameter, "1" to test, "0" otherwise.
Defaults to omitting this test.
torture_type The type of RCU to test: "rcu" for the rcu_read_lock() API,
"rcu_sync" for rcu_read_lock() with synchronous reclamation,
"rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for
rcu_read_lock_bh() with synchronous reclamation, "srcu" for
the "srcu_read_lock()" API, "sched" for the use of
preempt_disable() together with synchronize_sched(),
and "sched_expedited" for the use of preempt_disable()
with synchronize_sched_expedited().
torture_type The type of RCU to test, with string values as follows:
"rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu().
"rcu_sync": rcu_read_lock(), rcu_read_unlock(), and
synchronize_rcu().
"rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and
synchronize_rcu_expedited().
"rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and
call_rcu_bh().
"rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(),
and synchronize_rcu_bh().
"rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(),
and synchronize_rcu_bh_expedited().
"srcu": srcu_read_lock(), srcu_read_unlock() and
synchronize_srcu().
"srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
synchronize_srcu_expedited().
"sched": preempt_disable(), preempt_enable(), and
call_rcu_sched().
"sched_sync": preempt_disable(), preempt_enable(), and
synchronize_sched().
"sched_expedited": preempt_disable(), preempt_enable(), and
synchronize_sched_expedited().
Defaults to "rcu".
verbose Enable debug printk()s. Default is disabled.
......@@ -100,12 +149,12 @@ OUTPUT
The statistics output is as follows:
rcu-torture: --- Start of test: nreaders=16 stat_interval=0 verbose=0
rcu-torture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915
rcu-torture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0
rcu-torture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0
rcu-torture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0
rcu-torture: --- End of test
rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
The command "dmesg | grep torture:" will extract this information on
most systems. On more esoteric configurations, it may be necessary to
......@@ -113,26 +162,55 @@ use other commands to access the output of the printk()s used by
the RCU torture test. The printk()s use KERN_ALERT, so they should
be evident. ;-)
The first and last lines show the rcutorture module parameters, and the
last line shows either "SUCCESS" or "FAILURE", based on rcutorture's
automatic determination as to whether RCU operated correctly.
The entries are as follows:
o "rtc": The hexadecimal address of the structure currently visible
to readers.
o "ver": The number of times since boot that the rcutw writer task
o "ver": The number of times since boot that the RCU writer task
has changed the structure visible to readers.
o "tfle": If non-zero, indicates that the "torture freelist"
containing structure to be placed into the "rtc" area is empty.
containing structures to be placed into the "rtc" area is empty.
This condition is important, since it can fool you into thinking
that RCU is working when it is not. :-/
o "rta": Number of structures allocated from the torture freelist.
o "rtaf": Number of allocations from the torture freelist that have
failed due to the list being empty.
failed due to the list being empty. It is not unusual for this
to be non-zero, but it is bad for it to be a large fraction of
the value indicated by "rta".
o "rtf": Number of frees into the torture freelist.
o "rtmbe": A non-zero value indicates that rcutorture believes that
rcu_assign_pointer() and rcu_dereference() are not working
correctly. This value should be zero.
o "rtbke": rcutorture was unable to create the real-time kthreads
used to force RCU priority inversion. This value should be zero.
o "rtbre": Although rcutorture successfully created the kthreads
used to force RCU priority inversion, it was unable to set them
to the real-time priority level of 1. This value should be zero.
o "rtbf": The number of times that RCU priority boosting failed
to resolve RCU priority inversion.
o "rtb": The number of times that rcutorture attempted to force
an RCU priority inversion condition. If you are testing RCU
priority boosting via the "test_boost" module parameter, this
value should be non-zero.
o "nt": The number of times rcutorture ran RCU read-side code from
within a timer handler. This value should be non-zero only
if you specified the "irqreader" module parameter.
o "Reader Pipe": Histogram of "ages" of structures seen by readers.
If any entries past the first two are non-zero, RCU is broken.
And rcutorture prints the error flag string "!!!" to make sure
......@@ -162,26 +240,15 @@ o "Free-Block Circulation": Shows the number of torture structures
somehow gets incremented farther than it should.
Different implementations of RCU can provide implementation-specific
additional information. For example, SRCU provides the following:
additional information. For example, SRCU provides the following
additional line:
srcu-torture: rtc: f8cf46a8 ver: 355 tfle: 0 rta: 356 rtaf: 0 rtf: 346 rtmbe: 0
srcu-torture: Reader Pipe: 559738 939 0 0 0 0 0 0 0 0 0
srcu-torture: Reader Batch: 560434 243 0 0 0 0 0 0 0 0
srcu-torture: Free-Block Circulation: 355 354 353 352 351 350 349 348 347 346 0
srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1)
The first four lines are similar to those for RCU. The last line shows
the per-CPU counter state. The numbers in parentheses are the values
of the "old" and "current" counters for the corresponding CPU. The
"idx" value maps the "old" and "current" values to the underlying array,
and is useful for debugging.
Similarly, sched_expedited RCU provides the following:
sched_expedited-torture: rtc: d0000000016c1880 ver: 1090796 tfle: 0 rta: 1090796 rtaf: 0 rtf: 1090787 rtmbe: 0 nt: 27713319
sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0
sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0
sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
This line shows the per-CPU counter state. The numbers in parentheses are
the values of the "old" and "current" counters for the corresponding CPU.
The "idx" value maps the "old" and "current" values to the underlying
array, and is useful for debugging.
USAGE
......
......@@ -33,23 +33,23 @@ rcu/rcuboost:
The output of "cat rcu/rcudata" looks as follows:
rcu_sched:
0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
rcu_bh:
0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
The first section lists the rcu_data structures for rcu_sched, the second
for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
......@@ -84,7 +84,7 @@ o "pq" indicates that this CPU has passed through a quiescent state
CPU has not yet reported that fact, (2) some other CPU has not
yet reported for this grace period, or (3) both.
o "pqc" indicates which grace period the last-observed quiescent
o "pgp" indicates which grace period the last-observed quiescent
state for this CPU corresponds to. This is important for handling
the race between CPU 0 reporting an extended dynticks-idle
quiescent state for CPU 1 and CPU 1 suddenly waking up and
......@@ -184,10 +184,14 @@ o "kt" is the per-CPU kernel-thread state. The digit preceding
The number after the final slash is the CPU that the kthread
is actually running on.
This field is displayed only for CONFIG_RCU_BOOST kernels.
o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
the number of times that this CPU's per-CPU kthread has gone
through its loop servicing invoke_rcu_cpu_kthread() requests.
This field is displayed only for CONFIG_RCU_BOOST kernels.
o "b" is the batch limit for this CPU. If more than this number
of RCU callbacks is ready to invoke, then the remainder will
be deferred.
......
......@@ -548,7 +548,7 @@ do { \
#endif
#ifdef CONFIG_PROVE_RCU
extern void lockdep_rcu_dereference(const char *file, const int line);
void lockdep_rcu_suspicious(const char *file, const int line, const char *s);
#endif
#endif /* __LINUX_LOCKDEP_H */
This diff is collapsed.
......@@ -27,9 +27,23 @@
#include <linux/cache.h>
#ifdef CONFIG_RCU_BOOST
static inline void rcu_init(void)
{
}
#else /* #ifdef CONFIG_RCU_BOOST */
void rcu_init(void);
#endif /* #else #ifdef CONFIG_RCU_BOOST */
static inline void rcu_barrier_bh(void)
{
wait_rcu_gp(call_rcu_bh);
}
static inline void rcu_barrier_sched(void)
{
wait_rcu_gp(call_rcu_sched);
}
#ifdef CONFIG_TINY_RCU
......@@ -45,9 +59,13 @@ static inline void rcu_barrier(void)
#else /* #ifdef CONFIG_TINY_RCU */
void rcu_barrier(void);
void synchronize_rcu_expedited(void);
static inline void rcu_barrier(void)
{
wait_rcu_gp(call_rcu);
}
#endif /* #else #ifdef CONFIG_TINY_RCU */
static inline void synchronize_rcu_bh(void)
......
......@@ -67,6 +67,8 @@ static inline void synchronize_rcu_bh_expedited(void)
}
extern void rcu_barrier(void);
extern void rcu_barrier_bh(void);
extern void rcu_barrier_sched(void);
extern unsigned long rcutorture_testseq;
extern unsigned long rcutorture_vernum;
......
......@@ -270,7 +270,6 @@ extern void init_idle_bootup_task(struct task_struct *idle);
extern int runqueue_is_locked(int cpu);
extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern void select_nohz_load_balancer(int stop_tick);
extern int get_nohz_timer_target(void);
......@@ -1260,9 +1259,6 @@ struct task_struct {
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
int rcu_boosted;
#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
......
......@@ -238,6 +238,16 @@ struct ustat {
char f_fpack[6];
};
/**
* struct rcu_head - callback structure for use with RCU
* @next: next update requests in a list
* @func: actual update function to call after the grace period.
*/
struct rcu_head {
struct rcu_head *next;
void (*func)(struct rcu_head *head);
};
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#endif /* _LINUX_TYPES_H */
This diff is collapsed.
......@@ -391,7 +391,7 @@ config TREE_RCU
config TREE_PREEMPT_RCU
bool "Preemptible tree-based hierarchical RCU"
depends on PREEMPT
depends on PREEMPT && SMP
help
This option selects the RCU implementation that is
designed for very large SMP systems with hundreds or
......@@ -401,7 +401,7 @@ config TREE_PREEMPT_RCU
config TINY_RCU
bool "UP-only small-memory-footprint RCU"
depends on !SMP
depends on !PREEMPT && !SMP
help
This option selects the RCU implementation that is
designed for UP systems from which real-time response
......@@ -410,7 +410,7 @@ config TINY_RCU
config TINY_PREEMPT_RCU
bool "Preemptible UP-only small-memory-footprint RCU"
depends on !SMP && PREEMPT
depends on PREEMPT && !SMP
help
This option selects the RCU implementation that is designed
for real-time UP systems. This option greatly reduces the
......
......@@ -1145,10 +1145,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
if (debug_locks_silent)
return 0;
printk("\n=======================================================\n");
printk( "[ INFO: possible circular locking dependency detected ]\n");
printk("\n");
printk("======================================================\n");
printk("[ INFO: possible circular locking dependency detected ]\n");
print_kernel_version();
printk( "-------------------------------------------------------\n");
printk("-------------------------------------------------------\n");
printk("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(check_src);
......@@ -1482,11 +1483,12 @@ print_bad_irq_dependency(struct task_struct *curr,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
printk("\n======================================================\n");
printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
printk("\n");
printk("======================================================\n");
printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
irqclass, irqclass);
print_kernel_version();
printk( "------------------------------------------------------\n");
printk("------------------------------------------------------\n");
printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
......@@ -1711,10 +1713,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
printk("\n=============================================\n");
printk( "[ INFO: possible recursive locking detected ]\n");
printk("\n");
printk("=============================================\n");
printk("[ INFO: possible recursive locking detected ]\n");
print_kernel_version();
printk( "---------------------------------------------\n");
printk("---------------------------------------------\n");
printk("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(next);
......@@ -2217,10 +2220,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
printk("\n=================================\n");
printk( "[ INFO: inconsistent lock state ]\n");
printk("\n");
printk("=================================\n");
printk("[ INFO: inconsistent lock state ]\n");
print_kernel_version();
printk( "---------------------------------\n");
printk("---------------------------------\n");
printk("inconsistent {%s} -> {%s} usage.\n",
usage_str[prev_bit], usage_str[new_bit]);
......@@ -2281,10 +2285,11 @@ print_irq_inversion_bug(struct task_struct *curr,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
printk("\n=========================================================\n");
printk( "[ INFO: possible irq lock inversion dependency detected ]\n");
printk("\n");
printk("=========================================================\n");
printk("[ INFO: possible irq lock inversion dependency detected ]\n");
print_kernel_version();
printk( "---------------------------------------------------------\n");
printk("---------------------------------------------------------\n");
printk("%s/%d just changed the state of lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(this);
......@@ -3161,9 +3166,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
if (debug_locks_silent)
return 0;
printk("\n=====================================\n");
printk( "[ BUG: bad unlock balance detected! ]\n");
printk( "-------------------------------------\n");
printk("\n");
printk("=====================================\n");
printk("[ BUG: bad unlock balance detected! ]\n");
printk("-------------------------------------\n");
printk("%s/%d is trying to release lock (",
curr->comm, task_pid_nr(curr));
print_lockdep_cache(lock);
......@@ -3604,9 +3610,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
if (debug_locks_silent)
return 0;
printk("\n=================================\n");
printk( "[ BUG: bad contention detected! ]\n");
printk( "---------------------------------\n");
printk("\n");
printk("=================================\n");
printk("[ BUG: bad contention detected! ]\n");
printk("---------------------------------\n");
printk("%s/%d is trying to contend lock (",
curr->comm, task_pid_nr(curr));
print_lockdep_cache(lock);
......@@ -3977,9 +3984,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
if (debug_locks_silent)
return;
printk("\n=========================\n");
printk( "[ BUG: held lock freed! ]\n");
printk( "-------------------------\n");
printk("\n");
printk("=========================\n");
printk("[ BUG: held lock freed! ]\n");
printk("-------------------------\n");
printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
print_lock(hlock);
......@@ -4033,9 +4041,10 @@ static void print_held_locks_bug(struct task_struct *curr)
if (debug_locks_silent)
return;
printk("\n=====================================\n");
printk( "[ BUG: lock held at task exit time! ]\n");
printk( "-------------------------------------\n");
printk("\n");
printk("=====================================\n");
printk("[ BUG: lock held at task exit time! ]\n");
printk("-------------------------------------\n");
printk("%s/%d is exiting with locks still held!\n",
curr->comm, task_pid_nr(curr));
lockdep_print_held_locks(curr);
......@@ -4129,16 +4138,17 @@ void lockdep_sys_exit(void)
if (unlikely(curr->lockdep_depth)) {
if (!debug_locks_off())
return;
printk("\n================================================\n");
printk( "[ BUG: lock held when returning to user space! ]\n");
printk( "------------------------------------------------\n");
printk("\n");
printk("================================================\n");
printk("[ BUG: lock held when returning to user space! ]\n");
printk("------------------------------------------------\n");
printk("%s/%d is leaving the kernel with locks still held!\n",
curr->comm, curr->pid);
lockdep_print_held_locks(curr);
}
}
void lockdep_rcu_dereference(const char *file, const int line)
void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
{
struct task_struct *curr = current;
......@@ -4147,15 +4157,15 @@ void lockdep_rcu_dereference(const char *file, const int line)
return;
#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
/* Note: the following can be executed concurrently, so be careful. */
printk("\n===================================================\n");
printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
printk( "---------------------------------------------------\n");
printk("%s:%d invoked rcu_dereference_check() without protection!\n",
file, line);
printk("\n");
printk("===============================\n");
printk("[ INFO: suspicious RCU usage. ]\n");
printk("-------------------------------\n");
printk("%s:%d %s!\n", file, line, s);
printk("\nother info that might help us debug this:\n\n");
printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
lockdep_print_held_locks(curr);
printk("\nstack backtrace:\n");
dump_stack();
}
EXPORT_SYMBOL_GPL(lockdep_rcu_dereference);
EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
......@@ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task);
*/
struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
{
rcu_lockdep_assert(rcu_read_lock_held());
rcu_lockdep_assert(rcu_read_lock_held(),
"find_task_by_pid_ns() needs rcu_read_lock()"
" protection");
return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
}
......
/*
* Read-Copy Update definitions shared among RCU implementations.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright IBM Corporation, 2011
*
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#ifndef __LINUX_RCU_H
#define __LINUX_RCU_H
#ifdef CONFIG_RCU_TRACE
#define RCU_TRACE(stmt) stmt
#else /* #ifdef CONFIG_RCU_TRACE */
#define RCU_TRACE(stmt)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
/*
* debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
* by call_rcu() and rcu callback execution, and are therefore not part of the
* RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
*/
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
# define STATE_RCU_HEAD_READY 0
# define STATE_RCU_HEAD_QUEUED 1
extern struct debug_obj_descr rcuhead_debug_descr;
static inline void debug_rcu_head_queue(struct rcu_head *head)
{
WARN_ON_ONCE((unsigned long)head & 0x3);
debug_object_activate(head, &rcuhead_debug_descr);
debug_object_active_state(head, &rcuhead_debug_descr,
STATE_RCU_HEAD_READY,
STATE_RCU_HEAD_QUEUED);
}
static inline void debug_rcu_head_unqueue(struct rcu_head *head)
{
debug_object_active_state(head, &rcuhead_debug_descr,
STATE_RCU_HEAD_QUEUED,
STATE_RCU_HEAD_READY);
debug_object_deactivate(head, &rcuhead_debug_descr);
}
#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
static inline void debug_rcu_head_queue(struct rcu_head *head)
{
}
static inline void debug_rcu_head_unqueue(struct rcu_head *head)
{
}
#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
extern void kfree(const void *);
static inline void __rcu_reclaim(char *rn, struct rcu_head *head)
{
unsigned long offset = (unsigned long)head->func;
if (__is_kfree_rcu_offset(offset)) {
RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
kfree((void *)head - offset);
} else {
RCU_TRACE(trace_rcu_invoke_callback(rn, head));
head->func(head);
}
}
#endif /* __LINUX_RCU_H */
......@@ -46,6 +46,11 @@
#include <linux/module.h>
#include <linux/hardirq.h>
#define CREATE_TRACE_POINTS
#include <trace/events/rcu.h>
#include "rcu.h"
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
......@@ -94,11 +99,16 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
struct rcu_synchronize {
struct rcu_head head;
struct completion completion;
};
/*
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
*/
void wakeme_after_rcu(struct rcu_head *head)
static void wakeme_after_rcu(struct rcu_head *head)
{
struct rcu_synchronize *rcu;
......@@ -106,6 +116,20 @@ void wakeme_after_rcu(struct rcu_head *head)
complete(&rcu->completion);
}
void wait_rcu_gp(call_rcu_func_t crf)
{
struct rcu_synchronize rcu;
init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
crf(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(wait_rcu_gp);
#ifdef CONFIG_PROVE_RCU
/*
* wrapper function to avoid #include problems.
......
......@@ -37,16 +37,17 @@
#include <linux/cpu.h>
#include <linux/prefetch.h>
/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
static struct task_struct *rcu_kthread_task;
static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
static unsigned long have_rcu_kthread_work;
#ifdef CONFIG_RCU_TRACE
#include <trace/events/rcu.h>
#endif /* #else #ifdef CONFIG_RCU_TRACE */
#include "rcu.h"
/* Forward declarations for rcutiny_plugin.h. */
struct rcu_ctrlblk;
static void invoke_rcu_kthread(void);
static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
static int rcu_kthread(void *arg);
static void invoke_rcu_callbacks(void);
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
static void rcu_process_callbacks(struct softirq_action *unused);
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp);
......@@ -95,16 +96,6 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
return 0;
}
/*
* Wake up rcu_kthread() to process callbacks now eligible for invocation
* or to boost readers.
*/
static void invoke_rcu_kthread(void)
{
have_rcu_kthread_work = 1;
wake_up(&rcu_kthread_wq);
}
/*
* Record an rcu quiescent state. And an rcu_bh quiescent state while we
* are at it, given that any rcu quiescent state is also an rcu_bh
......@@ -117,7 +108,7 @@ void rcu_sched_qs(int cpu)
local_irq_save(flags);
if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
rcu_qsctr_help(&rcu_bh_ctrlblk))
invoke_rcu_kthread();
invoke_rcu_callbacks();
local_irq_restore(flags);
}
......@@ -130,7 +121,7 @@ void rcu_bh_qs(int cpu)
local_irq_save(flags);
if (rcu_qsctr_help(&rcu_bh_ctrlblk))
invoke_rcu_kthread();
invoke_rcu_callbacks();
local_irq_restore(flags);
}
......@@ -154,18 +145,23 @@ void rcu_check_callbacks(int cpu, int user)
* Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
* whose grace period has elapsed.
*/
static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
{
char *rn = NULL;
struct rcu_head *next, *list;
unsigned long flags;
RCU_TRACE(int cb_count = 0);
/* If no RCU callbacks ready to invoke, just return. */
if (&rcp->rcucblist == rcp->donetail)
if (&rcp->rcucblist == rcp->donetail) {
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
RCU_TRACE(trace_rcu_batch_end(rcp->name, 0));
return;
}
/* Move the ready-to-invoke callbacks to a local list. */
local_irq_save(flags);
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
list = rcp->rcucblist;
rcp->rcucblist = *rcp->donetail;
*rcp->donetail = NULL;
......@@ -176,49 +172,26 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
local_irq_restore(flags);
/* Invoke the callbacks on the local list. */
RCU_TRACE(rn = rcp->name);
while (list) {
next = list->next;
prefetch(next);
debug_rcu_head_unqueue(list);
local_bh_disable();
__rcu_reclaim(list);
__rcu_reclaim(rn, list);
local_bh_enable();
list = next;
RCU_TRACE(cb_count++);
}
RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count));
}
/*
* This kthread invokes RCU callbacks whose grace periods have
* elapsed. It is awakened as needed, and takes the place of the
* RCU_SOFTIRQ that was used previously for this purpose.
* This is a kthread, but it is never stopped, at least not until
* the system goes down.
*/
static int rcu_kthread(void *arg)
static void rcu_process_callbacks(struct softirq_action *unused)
{
unsigned long work;
unsigned long morework;
unsigned long flags;
for (;;) {
wait_event_interruptible(rcu_kthread_wq,
have_rcu_kthread_work != 0);
morework = rcu_boost();
local_irq_save(flags);
work = have_rcu_kthread_work;
have_rcu_kthread_work = morework;
local_irq_restore(flags);
if (work) {
rcu_process_callbacks(&rcu_sched_ctrlblk);
rcu_process_callbacks(&rcu_bh_ctrlblk);
__rcu_process_callbacks(&rcu_sched_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
rcu_preempt_process_callbacks();
}
schedule_timeout_interruptible(1); /* Leave CPU for others. */
}
return 0; /* Not reached, but needed to shut gcc up. */
}
/*
......@@ -280,45 +253,3 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
__call_rcu(head, func, &rcu_bh_ctrlblk);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
void rcu_barrier_bh(void)
{
struct rcu_synchronize rcu;
init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_bh(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(rcu_barrier_bh);
void rcu_barrier_sched(void)
{
struct rcu_synchronize rcu;
init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_sched(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
/*
* Spawn the kthread that invokes RCU callbacks.
*/
static int __init rcu_spawn_kthreads(void)
{
struct sched_param sp;
rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
sp.sched_priority = RCU_BOOST_PRIO;
sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
return 0;
}
early_initcall(rcu_spawn_kthreads);
......@@ -26,29 +26,26 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#ifdef CONFIG_RCU_TRACE
#define RCU_TRACE(stmt) stmt
#else /* #ifdef CONFIG_RCU_TRACE */
#define RCU_TRACE(stmt)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
struct rcu_head **curtail; /* ->next pointer of last CB. */
RCU_TRACE(long qlen); /* Number of pending CBs. */
RCU_TRACE(char *name); /* Name of RCU type. */
};
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_sched_ctrlblk = {
.donetail = &rcu_sched_ctrlblk.rcucblist,
.curtail = &rcu_sched_ctrlblk.rcucblist,
RCU_TRACE(.name = "rcu_sched")
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.donetail = &rcu_bh_ctrlblk.rcucblist,
.curtail = &rcu_bh_ctrlblk.rcucblist,
RCU_TRACE(.name = "rcu_bh")
};
#ifdef CONFIG_DEBUG_LOCK_ALLOC
......@@ -131,6 +128,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
.rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
.nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
.blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
RCU_TRACE(.rcb.name = "rcu_preempt")
};
static int rcu_preempted_readers_exp(void);
......@@ -247,6 +245,13 @@ static void show_tiny_preempt_stats(struct seq_file *m)
#include "rtmutex_common.h"
#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
/* Controls for rcu_kthread() kthread. */
static struct task_struct *rcu_kthread_task;
static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
static unsigned long have_rcu_kthread_work;
/*
* Carry out RCU priority boosting on the task indicated by ->boost_tasks,
* and advance ->boost_tasks to the next task in the ->blkd_tasks list.
......@@ -334,7 +339,7 @@ static int rcu_initiate_boost(void)
if (rcu_preempt_ctrlblk.exp_tasks == NULL)
rcu_preempt_ctrlblk.boost_tasks =
rcu_preempt_ctrlblk.gp_tasks;
invoke_rcu_kthread();
invoke_rcu_callbacks();
} else
RCU_TRACE(rcu_initiate_boost_trace());
return 1;
......@@ -352,14 +357,6 @@ static void rcu_preempt_boost_start_gp(void)
#else /* #ifdef CONFIG_RCU_BOOST */
/*
* If there is no RCU priority boosting, we don't boost.
*/
static int rcu_boost(void)
{
return 0;
}
/*
* If there is no RCU priority boosting, we don't initiate boosting,
* but we do indicate whether there are blocked readers blocking the
......@@ -427,7 +424,7 @@ static void rcu_preempt_cpu_qs(void)
/* If there are done callbacks, cause them to be invoked. */
if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
invoke_rcu_kthread();
invoke_rcu_callbacks();
}
/*
......@@ -648,7 +645,7 @@ static void rcu_preempt_check_callbacks(void)
rcu_preempt_cpu_qs();
if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
rcu_preempt_ctrlblk.rcb.donetail)
invoke_rcu_kthread();
invoke_rcu_callbacks();
if (rcu_preempt_gp_in_progress() &&
rcu_cpu_blocking_cur_gp() &&
rcu_preempt_running_reader())
......@@ -674,7 +671,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
*/
static void rcu_preempt_process_callbacks(void)
{
rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
__rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
}
/*
......@@ -697,20 +694,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu);
void rcu_barrier(void)
{
struct rcu_synchronize rcu;
init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(rcu_barrier);
/*
* synchronize_rcu - wait until a grace period has elapsed.
*
......@@ -863,15 +846,6 @@ static void show_tiny_preempt_stats(struct seq_file *m)
#endif /* #ifdef CONFIG_RCU_TRACE */
/*
* Because preemptible RCU does not exist, it is never necessary to
* boost preempted RCU readers.
*/
static int rcu_boost(void)
{
return 0;
}
/*
* Because preemptible RCU does not exist, it never has any callbacks
* to check.
......@@ -898,6 +872,78 @@ static void rcu_preempt_process_callbacks(void)
#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
/*
* Wake up rcu_kthread() to process callbacks now eligible for invocation
* or to boost readers.
*/
static void invoke_rcu_callbacks(void)
{
have_rcu_kthread_work = 1;
wake_up(&rcu_kthread_wq);
}
/*
* This kthread invokes RCU callbacks whose grace periods have
* elapsed. It is awakened as needed, and takes the place of the
* RCU_SOFTIRQ that is used for this purpose when boosting is disabled.
* This is a kthread, but it is never stopped, at least not until
* the system goes down.
*/
static int rcu_kthread(void *arg)
{
unsigned long work;
unsigned long morework;
unsigned long flags;
for (;;) {
wait_event_interruptible(rcu_kthread_wq,
have_rcu_kthread_work != 0);
morework = rcu_boost();
local_irq_save(flags);
work = have_rcu_kthread_work;
have_rcu_kthread_work = morework;
local_irq_restore(flags);
if (work)
rcu_process_callbacks(NULL);
schedule_timeout_interruptible(1); /* Leave CPU for others. */
}
return 0; /* Not reached, but needed to shut gcc up. */
}
/*
* Spawn the kthread that invokes RCU callbacks.
*/
static int __init rcu_spawn_kthreads(void)
{
struct sched_param sp;
rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
sp.sched_priority = RCU_BOOST_PRIO;
sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
return 0;
}
early_initcall(rcu_spawn_kthreads);
#else /* #ifdef CONFIG_RCU_BOOST */
/*
* Start up softirq processing of callbacks.
*/
void invoke_rcu_callbacks(void)
{
raise_softirq(RCU_SOFTIRQ);
}
void rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
}
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#include <linux/kernel_stat.h>
......@@ -913,12 +959,6 @@ void __init rcu_scheduler_starting(void)
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_RCU_BOOST
#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
#else /* #ifdef CONFIG_RCU_BOOST */
#define RCU_BOOST_PRIO 1
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_TRACE
#ifdef CONFIG_RCU_BOOST
......
......@@ -73,7 +73,7 @@ module_param(nreaders, int, 0444);
MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
module_param(nfakewriters, int, 0444);
MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
module_param(stat_interval, int, 0444);
module_param(stat_interval, int, 0644);
MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
module_param(verbose, bool, 0444);
MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
......@@ -480,30 +480,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
}
struct rcu_bh_torture_synchronize {
struct rcu_head head;
struct completion completion;
};
static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head)
{
struct rcu_bh_torture_synchronize *rcu;
rcu = container_of(head, struct rcu_bh_torture_synchronize, head);
complete(&rcu->completion);
}
static void rcu_bh_torture_synchronize(void)
{
struct rcu_bh_torture_synchronize rcu;
init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
wait_for_completion(&rcu.completion);
destroy_rcu_head_on_stack(&rcu.head);
}
static struct rcu_torture_ops rcu_bh_ops = {
.init = NULL,
.cleanup = NULL,
......@@ -512,7 +488,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
.readunlock = rcu_bh_torture_read_unlock,
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_bh_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.sync = synchronize_rcu_bh,
.cb_barrier = rcu_barrier_bh,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
......@@ -528,7 +504,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.readunlock = rcu_bh_torture_read_unlock,
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.sync = synchronize_rcu_bh,
.cb_barrier = NULL,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
......@@ -536,6 +512,22 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.name = "rcu_bh_sync"
};
static struct rcu_torture_ops rcu_bh_expedited_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_bh_expedited,
.cb_barrier = NULL,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh_expedited"
};
/*
* Definitions for srcu torture testing.
*/
......@@ -659,11 +651,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
}
static void sched_torture_synchronize(void)
{
synchronize_sched();
}
static struct rcu_torture_ops sched_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
......@@ -672,7 +659,7 @@ static struct rcu_torture_ops sched_ops = {
.readunlock = sched_torture_read_unlock,
.completed = rcu_no_completed,
.deferred_free = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.sync = synchronize_sched,
.cb_barrier = rcu_barrier_sched,
.fqs = rcu_sched_force_quiescent_state,
.stats = NULL,
......@@ -688,7 +675,7 @@ static struct rcu_torture_ops sched_sync_ops = {
.readunlock = sched_torture_read_unlock,
.completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.sync = synchronize_sched,
.cb_barrier = NULL,
.fqs = rcu_sched_force_quiescent_state,
.stats = NULL,
......@@ -754,7 +741,7 @@ static int rcu_torture_boost(void *arg)
do {
/* Wait for the next test interval. */
oldstarttime = boost_starttime;
while (jiffies - oldstarttime > ULONG_MAX / 2) {
while (ULONG_CMP_LT(jiffies, oldstarttime)) {
schedule_timeout_uninterruptible(1);
rcu_stutter_wait("rcu_torture_boost");
if (kthread_should_stop() ||
......@@ -765,7 +752,7 @@ static int rcu_torture_boost(void *arg)
/* Do one boost-test interval. */
endtime = oldstarttime + test_boost_duration * HZ;
call_rcu_time = jiffies;
while (jiffies - endtime > ULONG_MAX / 2) {
while (ULONG_CMP_LT(jiffies, endtime)) {
/* If we don't have a callback in flight, post one. */
if (!rbi.inflight) {
smp_mb(); /* RCU core before ->inflight = 1. */
......@@ -792,7 +779,8 @@ static int rcu_torture_boost(void *arg)
* interval. Besides, we are running at RT priority,
* so delays should be relatively rare.
*/
while (oldstarttime == boost_starttime) {
while (oldstarttime == boost_starttime &&
!kthread_should_stop()) {
if (mutex_trylock(&boost_mutex)) {
boost_starttime = jiffies +
test_boost_interval * HZ;
......@@ -809,11 +797,11 @@ checkwait: rcu_stutter_wait("rcu_torture_boost");
/* Clean up and exit. */
VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
destroy_rcu_head_on_stack(&rbi.rcu);
rcutorture_shutdown_absorb("rcu_torture_boost");
while (!kthread_should_stop() || rbi.inflight)
schedule_timeout_uninterruptible(1);
smp_mb(); /* order accesses to ->inflight before stack-frame death. */
destroy_rcu_head_on_stack(&rbi.rcu);
return 0;
}
......@@ -831,11 +819,13 @@ rcu_torture_fqs(void *arg)
VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
do {
fqs_resume_time = jiffies + fqs_stutter * HZ;
while (jiffies - fqs_resume_time > LONG_MAX) {
while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
!kthread_should_stop()) {
schedule_timeout_interruptible(1);
}
fqs_burst_remaining = fqs_duration;
while (fqs_burst_remaining > 0) {
while (fqs_burst_remaining > 0 &&
!kthread_should_stop()) {
cur_ops->fqs();
udelay(fqs_holdoff);
fqs_burst_remaining -= fqs_holdoff;
......@@ -1280,7 +1270,8 @@ static int rcutorture_booster_init(int cpu)
/* Don't allow time recalculation while creating a new task. */
mutex_lock(&boost_mutex);
VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL,
boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
cpu_to_node(cpu),
"rcu_torture_boost");
if (IS_ERR(boost_tasks[cpu])) {
retval = PTR_ERR(boost_tasks[cpu]);
......@@ -1424,7 +1415,7 @@ rcu_torture_init(void)
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
&rcu_bh_ops, &rcu_bh_sync_ops,
&rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
&srcu_ops, &srcu_expedited_ops,
&sched_ops, &sched_sync_ops, &sched_expedited_ops, };
......
This diff is collapsed.
......@@ -230,9 +230,9 @@ struct rcu_data {
/* in order to detect GP end. */
unsigned long gpnum; /* Highest gp number that this CPU */
/* is aware of having started. */
unsigned long passed_quiesc_completed;
/* Value of completed at time of qs. */
bool passed_quiesc; /* User-mode/idle loop etc. */
unsigned long passed_quiesce_gpnum;
/* gpnum at time of quiescent state. */
bool passed_quiesce; /* User-mode/idle loop etc. */
bool qs_pending; /* Core waits for quiesc state. */
bool beenonline; /* CPU online at least once. */
bool preemptible; /* Preemptible RCU? */
......@@ -299,6 +299,7 @@ struct rcu_data {
unsigned long n_rp_need_nothing;
int cpu;
struct rcu_state *rsp;
};
/* Values for signaled field in struct rcu_state. */
......@@ -417,6 +418,13 @@ extern struct rcu_state rcu_preempt_state;
DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DECLARE_PER_CPU(char, rcu_cpu_has_work);
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifndef RCU_TREE_NONCORE
/* Forward declarations for rcutree_plugin.h */
......@@ -430,7 +438,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
static void rcu_stop_cpu_kthread(int cpu);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
static void rcu_print_detail_task_stall(struct rcu_state *rsp);
static void rcu_print_task_stall(struct rcu_node *rnp);
static int rcu_print_task_stall(struct rcu_node *rnp);
static void rcu_preempt_stall_reset(void);
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
......@@ -450,7 +458,6 @@ static int rcu_preempt_needs_cpu(int cpu);
static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
static void rcu_preempt_send_cbs_to_online(void);
static void __init __rcu_init_preempt(void);
static void rcu_needs_cpu_flush(void);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static void invoke_rcu_callbacks_kthread(void);
......
This diff is collapsed.
......@@ -48,11 +48,6 @@
#ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu);
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DECLARE_PER_CPU(char, rcu_cpu_has_work);
static char convert_kthread_status(unsigned int kthread_status)
{
if (kthread_status > RCU_KTHREAD_MAX)
......@@ -66,11 +61,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d",
seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
rdp->completed, rdp->gpnum,
rdp->passed_quiesc, rdp->passed_quiesc_completed,
rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
rdp->qs_pending);
#ifdef CONFIG_NO_HZ
seq_printf(m, " dt=%d/%d/%d df=%lu",
......@@ -144,7 +139,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
rdp->cpu,
cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
rdp->completed, rdp->gpnum,
rdp->passed_quiesc, rdp->passed_quiesc_completed,
rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
rdp->qs_pending);
#ifdef CONFIG_NO_HZ
seq_printf(m, ",%d,%d,%d,%lu",
......@@ -175,7 +170,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
static int show_rcudata_csv(struct seq_file *m, void *unused)
{
seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
#ifdef CONFIG_NO_HZ
seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
#endif /* #ifdef CONFIG_NO_HZ */
......
......@@ -579,6 +579,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct rt_mutex_waiter *waiter)
{
int ret = 0;
int was_disabled;
for (;;) {
/* Try to acquire the lock: */
......@@ -601,10 +602,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
raw_spin_unlock(&lock->wait_lock);
was_disabled = irqs_disabled();
if (was_disabled)
local_irq_enable();
debug_rt_mutex_print_deadlock(waiter);
schedule_rt_mutex(lock);
if (was_disabled)
local_irq_disable();
raw_spin_lock(&lock->wait_lock);
set_current_state(state);
}
......
......@@ -4213,6 +4213,7 @@ static inline void schedule_debug(struct task_struct *prev)
*/
if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
__schedule_bug(prev);
rcu_sleep_check();
profile_hit(SCHED_PROFILING, __builtin_return_address(0));
......@@ -5954,15 +5955,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
ftrace_graph_init_idle_task(idle, cpu);
}
/*
* In a system that switches off the HZ timer nohz_cpu_mask
* indicates which cpus entered this state. This is used
* in the rcu update to wait only for active cpus. For system
* which do not switch off the HZ timer nohz_cpu_mask should
* always be CPU_BITS_NONE.
*/
cpumask_var_t nohz_cpu_mask;
/*
* Increase the granularity value when there are more CPUs,
* because with more CPUs the 'effective latency' as visible
......@@ -8175,8 +8167,6 @@ void __init sched_init(void)
*/
current->sched_class = &fair_sched_class;
/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
#ifdef CONFIG_SMP
zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
#ifdef CONFIG_NO_HZ
......@@ -8206,6 +8196,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
{
static unsigned long prev_jiffy; /* ratelimiting */
rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
system_state != SYSTEM_RUNNING || oops_in_progress)
return;
......
......@@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now)
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
unsigned long flags;
cpumask_clear_cpu(cpu, nohz_cpu_mask);
ts->idle_waketime = now;
local_irq_save(flags);
......@@ -389,9 +388,6 @@ void tick_nohz_stop_sched_tick(int inidle)
else
expires.tv64 = KTIME_MAX;
if (delta_jiffies > 1)
cpumask_set_cpu(cpu, nohz_cpu_mask);
/* Skip reprogram of event if its not changed */
if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
goto out;
......@@ -441,7 +437,6 @@ void tick_nohz_stop_sched_tick(int inidle)
* softirq.
*/
tick_do_update_jiffies64(ktime_get());
cpumask_clear_cpu(cpu, nohz_cpu_mask);
}
raise_softirq_irqoff(TIMER_SOFTIRQ);
out:
......@@ -524,7 +519,6 @@ void tick_nohz_restart_sched_tick(void)
/* Update jiffies first */
select_nohz_load_balancer(0);
tick_do_update_jiffies64(now);
cpumask_clear_cpu(cpu, nohz_cpu_mask);
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment