Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
a5acbfbd
Commit
a5acbfbd
authored
Mar 10, 2016
by
Rafael J. Wysocki
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'pm-cpufreq-governor' into pm-cpufreq
parents
edd4a893
adaf9fcd
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
1031 additions
and
1148 deletions
+1031
-1148
drivers/cpufreq/Kconfig
drivers/cpufreq/Kconfig
+1
-0
drivers/cpufreq/amd_freq_sensitivity.c
drivers/cpufreq/amd_freq_sensitivity.c
+4
-4
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq.c
+67
-98
drivers/cpufreq/cpufreq_conservative.c
drivers/cpufreq/cpufreq_conservative.c
+124
-152
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/cpufreq_governor.c
+403
-363
drivers/cpufreq/cpufreq_governor.h
drivers/cpufreq/cpufreq_governor.h
+84
-177
drivers/cpufreq/cpufreq_ondemand.c
drivers/cpufreq/cpufreq_ondemand.c
+151
-284
drivers/cpufreq/cpufreq_ondemand.h
drivers/cpufreq/cpufreq_ondemand.h
+30
-0
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/intel_pstate.c
+39
-64
include/linux/cpufreq.h
include/linux/cpufreq.h
+0
-5
include/linux/sched.h
include/linux/sched.h
+9
-0
kernel/sched/Makefile
kernel/sched/Makefile
+1
-0
kernel/sched/cpufreq.c
kernel/sched/cpufreq.c
+37
-0
kernel/sched/deadline.c
kernel/sched/deadline.c
+4
-0
kernel/sched/fair.c
kernel/sched/fair.c
+25
-1
kernel/sched/rt.c
kernel/sched/rt.c
+4
-0
kernel/sched/sched.h
kernel/sched/sched.h
+48
-0
No files found.
drivers/cpufreq/Kconfig
View file @
a5acbfbd
...
...
@@ -19,6 +19,7 @@ config CPU_FREQ
if CPU_FREQ
config CPU_FREQ_GOV_COMMON
select IRQ_WORK
bool
config CPU_FREQ_BOOST_SW
...
...
drivers/cpufreq/amd_freq_sensitivity.c
View file @
a5acbfbd
...
...
@@ -21,7 +21,7 @@
#include <asm/msr.h>
#include <asm/cpufeature.h>
#include "cpufreq_
governor
.h"
#include "cpufreq_
ondemand
.h"
#define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080
#define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081
...
...
@@ -45,10 +45,10 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
long
d_actual
,
d_reference
;
struct
msr
actual
,
reference
;
struct
cpu_data_t
*
data
=
&
per_cpu
(
cpu_data
,
policy
->
cpu
);
struct
dbs_data
*
od_data
=
policy
->
governor_data
;
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
dbs_data
*
od_data
=
policy_dbs
->
dbs_data
;
struct
od_dbs_tuners
*
od_tuners
=
od_data
->
tuners
;
struct
od_cpu_dbs_info_s
*
od_info
=
od_data
->
cdata
->
get_cpu_dbs_info_s
(
policy
->
cpu
);
struct
od_policy_dbs_info
*
od_info
=
to_dbs_info
(
policy_dbs
);
if
(
!
od_info
->
freq_table
)
return
freq_next
;
...
...
drivers/cpufreq/cpufreq.c
View file @
a5acbfbd
...
...
@@ -64,7 +64,6 @@ static LIST_HEAD(cpufreq_governor_list);
static
struct
cpufreq_driver
*
cpufreq_driver
;
static
DEFINE_PER_CPU
(
struct
cpufreq_policy
*
,
cpufreq_cpu_data
);
static
DEFINE_RWLOCK
(
cpufreq_driver_lock
);
DEFINE_MUTEX
(
cpufreq_governor_lock
);
/* Flag to suspend/resume CPUFreq governors */
static
bool
cpufreq_suspended
;
...
...
@@ -75,10 +74,8 @@ static inline bool has_target(void)
}
/* internal prototypes */
static
int
__cpufreq_governor
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
);
static
int
cpufreq_governor
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
);
static
unsigned
int
__cpufreq_get
(
struct
cpufreq_policy
*
policy
);
static
void
handle_update
(
struct
work_struct
*
work
);
/**
* Two notifier lists: the "policy" list is involved in the
...
...
@@ -955,30 +952,38 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
if
(
cpumask_test_cpu
(
cpu
,
policy
->
cpus
))
return
0
;
down_write
(
&
policy
->
rwsem
);
if
(
has_target
())
{
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_STOP
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_STOP
);
if
(
ret
)
{
pr_err
(
"%s: Failed to stop governor
\n
"
,
__func__
);
return
ret
;
goto
unlock
;
}
}
down_write
(
&
policy
->
rwsem
);
cpumask_set_cpu
(
cpu
,
policy
->
cpus
);
up_write
(
&
policy
->
rwsem
);
if
(
has_target
())
{
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
if
(
!
ret
)
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
if
(
ret
)
{
if
(
ret
)
pr_err
(
"%s: Failed to start governor
\n
"
,
__func__
);
return
ret
;
}
}
return
0
;
unlock:
up_write
(
&
policy
->
rwsem
);
return
ret
;
}
static
void
handle_update
(
struct
work_struct
*
work
)
{
struct
cpufreq_policy
*
policy
=
container_of
(
work
,
struct
cpufreq_policy
,
update
);
unsigned
int
cpu
=
policy
->
cpu
;
pr_debug
(
"handle_update for cpu %u called
\n
"
,
cpu
);
cpufreq_update_policy
(
cpu
);
}
static
struct
cpufreq_policy
*
cpufreq_policy_alloc
(
unsigned
int
cpu
)
...
...
@@ -1267,9 +1272,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
return
ret
;
}
static
void
cpufreq_offline
_prepare
(
unsigned
int
cpu
)
static
void
cpufreq_offline
(
unsigned
int
cpu
)
{
struct
cpufreq_policy
*
policy
;
int
ret
;
pr_debug
(
"%s: unregistering CPU %u
\n
"
,
__func__
,
cpu
);
...
...
@@ -1279,13 +1285,13 @@ static void cpufreq_offline_prepare(unsigned int cpu)
return
;
}
down_write
(
&
policy
->
rwsem
);
if
(
has_target
())
{
int
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_STOP
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_STOP
);
if
(
ret
)
pr_err
(
"%s: Failed to stop governor
\n
"
,
__func__
);
}
down_write
(
&
policy
->
rwsem
);
cpumask_clear_cpu
(
cpu
,
policy
->
cpus
);
if
(
policy_is_inactive
(
policy
))
{
...
...
@@ -1298,39 +1304,27 @@ static void cpufreq_offline_prepare(unsigned int cpu)
/* Nominate new CPU */
policy
->
cpu
=
cpumask_any
(
policy
->
cpus
);
}
up_write
(
&
policy
->
rwsem
);
/* Start governor again for active policy */
if
(
!
policy_is_inactive
(
policy
))
{
if
(
has_target
())
{
int
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
if
(
!
ret
)
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
if
(
ret
)
pr_err
(
"%s: Failed to start governor
\n
"
,
__func__
);
}
}
else
if
(
cpufreq_driver
->
stop_cpu
)
{
cpufreq_driver
->
stop_cpu
(
policy
);
}
}
static
void
cpufreq_offline_finish
(
unsigned
int
cpu
)
{
struct
cpufreq_policy
*
policy
=
per_cpu
(
cpufreq_cpu_data
,
cpu
);
if
(
!
policy
)
{
pr_debug
(
"%s: No cpu_data found
\n
"
,
__func__
);
return
;
goto
unlock
;
}
/* Only proceed for inactive policies */
if
(
!
policy_is_inactive
(
policy
))
return
;
if
(
cpufreq_driver
->
stop_cpu
)
cpufreq_driver
->
stop_cpu
(
policy
);
/* If cpu is last user of policy, free policy */
if
(
has_target
())
{
int
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_EXIT
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_EXIT
);
if
(
ret
)
pr_err
(
"%s: Failed to exit governor
\n
"
,
__func__
);
}
...
...
@@ -1344,6 +1338,9 @@ static void cpufreq_offline_finish(unsigned int cpu)
cpufreq_driver
->
exit
(
policy
);
policy
->
freq_table
=
NULL
;
}
unlock:
up_write
(
&
policy
->
rwsem
);
}
/**
...
...
@@ -1359,10 +1356,8 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
if
(
!
policy
)
return
;
if
(
cpu_online
(
cpu
))
{
cpufreq_offline_prepare
(
cpu
);
cpufreq_offline_finish
(
cpu
);
}
if
(
cpu_online
(
cpu
))
cpufreq_offline
(
cpu
);
cpumask_clear_cpu
(
cpu
,
policy
->
real_cpus
);
remove_cpu_dev_symlink
(
policy
,
cpu
);
...
...
@@ -1371,15 +1366,6 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
cpufreq_policy_free
(
policy
,
true
);
}
static
void
handle_update
(
struct
work_struct
*
work
)
{
struct
cpufreq_policy
*
policy
=
container_of
(
work
,
struct
cpufreq_policy
,
update
);
unsigned
int
cpu
=
policy
->
cpu
;
pr_debug
(
"handle_update for cpu %u called
\n
"
,
cpu
);
cpufreq_update_policy
(
cpu
);
}
/**
* cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're
* in deep trouble.
...
...
@@ -1542,6 +1528,7 @@ EXPORT_SYMBOL(cpufreq_generic_suspend);
void
cpufreq_suspend
(
void
)
{
struct
cpufreq_policy
*
policy
;
int
ret
;
if
(
!
cpufreq_driver
)
return
;
...
...
@@ -1552,7 +1539,11 @@ void cpufreq_suspend(void)
pr_debug
(
"%s: Suspending Governors
\n
"
,
__func__
);
for_each_active_policy
(
policy
)
{
if
(
__cpufreq_governor
(
policy
,
CPUFREQ_GOV_STOP
))
down_write
(
&
policy
->
rwsem
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_STOP
);
up_write
(
&
policy
->
rwsem
);
if
(
ret
)
pr_err
(
"%s: Failed to stop governor for policy: %p
\n
"
,
__func__
,
policy
);
else
if
(
cpufreq_driver
->
suspend
...
...
@@ -1574,6 +1565,7 @@ void cpufreq_suspend(void)
void
cpufreq_resume
(
void
)
{
struct
cpufreq_policy
*
policy
;
int
ret
;
if
(
!
cpufreq_driver
)
return
;
...
...
@@ -1586,13 +1578,20 @@ void cpufreq_resume(void)
pr_debug
(
"%s: Resuming Governors
\n
"
,
__func__
);
for_each_active_policy
(
policy
)
{
if
(
cpufreq_driver
->
resume
&&
cpufreq_driver
->
resume
(
policy
))
if
(
cpufreq_driver
->
resume
&&
cpufreq_driver
->
resume
(
policy
))
{
pr_err
(
"%s: Failed to resume driver: %p
\n
"
,
__func__
,
policy
);
else
if
(
__cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
)
||
__cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
))
pr_err
(
"%s: Failed to start governor for policy: %p
\n
"
,
__func__
,
policy
);
}
else
{
down_write
(
&
policy
->
rwsem
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
if
(
!
ret
)
cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
up_write
(
&
policy
->
rwsem
);
if
(
ret
)
pr_err
(
"%s: Failed to start governor for policy: %p
\n
"
,
__func__
,
policy
);
}
}
/*
...
...
@@ -1878,8 +1877,7 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void)
return
NULL
;
}
static
int
__cpufreq_governor
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
)
static
int
cpufreq_governor
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
)
{
int
ret
;
...
...
@@ -1913,21 +1911,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
pr_debug
(
"%s: for CPU %u, event %u
\n
"
,
__func__
,
policy
->
cpu
,
event
);
mutex_lock
(
&
cpufreq_governor_lock
);
if
((
policy
->
governor_enabled
&&
event
==
CPUFREQ_GOV_START
)
||
(
!
policy
->
governor_enabled
&&
(
event
==
CPUFREQ_GOV_LIMITS
||
event
==
CPUFREQ_GOV_STOP
)))
{
mutex_unlock
(
&
cpufreq_governor_lock
);
return
-
EBUSY
;
}
if
(
event
==
CPUFREQ_GOV_STOP
)
policy
->
governor_enabled
=
false
;
else
if
(
event
==
CPUFREQ_GOV_START
)
policy
->
governor_enabled
=
true
;
mutex_unlock
(
&
cpufreq_governor_lock
);
ret
=
policy
->
governor
->
governor
(
policy
,
event
);
if
(
!
ret
)
{
...
...
@@ -1935,14 +1918,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
policy
->
governor
->
initialized
++
;
else
if
(
event
==
CPUFREQ_GOV_POLICY_EXIT
)
policy
->
governor
->
initialized
--
;
}
else
{
/* Restore original values */
mutex_lock
(
&
cpufreq_governor_lock
);
if
(
event
==
CPUFREQ_GOV_STOP
)
policy
->
governor_enabled
=
true
;
else
if
(
event
==
CPUFREQ_GOV_START
)
policy
->
governor_enabled
=
false
;
mutex_unlock
(
&
cpufreq_governor_lock
);
}
if
(((
event
==
CPUFREQ_GOV_POLICY_INIT
)
&&
ret
)
||
...
...
@@ -2097,7 +2072,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
old_gov
=
policy
->
governor
;
/* end old governor */
if
(
old_gov
)
{
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_STOP
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_STOP
);
if
(
ret
)
{
/* This can happen due to race with other operations */
pr_debug
(
"%s: Failed to Stop Governor: %s (%d)
\n
"
,
...
...
@@ -2105,10 +2080,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
return
ret
;
}
up_write
(
&
policy
->
rwsem
);
ret
=
__cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_EXIT
);
down_write
(
&
policy
->
rwsem
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_EXIT
);
if
(
ret
)
{
pr_err
(
"%s: Failed to Exit Governor: %s (%d)
\n
"
,
__func__
,
old_gov
->
name
,
ret
);
...
...
@@ -2118,32 +2090,30 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
/* start new governor */
policy
->
governor
=
new_policy
->
governor
;
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_INIT
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_INIT
);
if
(
!
ret
)
{
ret
=
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
ret
=
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
if
(
!
ret
)
goto
out
;
up_write
(
&
policy
->
rwsem
);
__cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_EXIT
);
down_write
(
&
policy
->
rwsem
);
cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_EXIT
);
}
/* new governor failed, so re-start old one */
pr_debug
(
"starting governor %s failed
\n
"
,
policy
->
governor
->
name
);
if
(
old_gov
)
{
policy
->
governor
=
old_gov
;
if
(
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_INIT
))
if
(
cpufreq_governor
(
policy
,
CPUFREQ_GOV_POLICY_INIT
))
policy
->
governor
=
NULL
;
else
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
cpufreq_governor
(
policy
,
CPUFREQ_GOV_START
);
}
return
ret
;
out:
pr_debug
(
"governor: change or update limits
\n
"
);
return
__
cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
return
cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
}
/**
...
...
@@ -2210,11 +2180,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
break
;
case
CPU_DOWN_PREPARE
:
cpufreq_offline_prepare
(
cpu
);
break
;
case
CPU_POST_DEAD
:
cpufreq_offline_finish
(
cpu
);
cpufreq_offline
(
cpu
);
break
;
case
CPU_DOWN_FAILED
:
...
...
@@ -2247,8 +2213,11 @@ static int cpufreq_boost_set_sw(int state)
__func__
);
break
;
}
down_write
(
&
policy
->
rwsem
);
policy
->
user_policy
.
max
=
policy
->
max
;
__cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
cpufreq_governor
(
policy
,
CPUFREQ_GOV_LIMITS
);
up_write
(
&
policy
->
rwsem
);
}
}
...
...
drivers/cpufreq/cpufreq_conservative.c
View file @
a5acbfbd
...
...
@@ -14,6 +14,22 @@
#include <linux/slab.h>
#include "cpufreq_governor.h"
struct
cs_policy_dbs_info
{
struct
policy_dbs_info
policy_dbs
;
unsigned
int
down_skip
;
unsigned
int
requested_freq
;
};
static
inline
struct
cs_policy_dbs_info
*
to_dbs_info
(
struct
policy_dbs_info
*
policy_dbs
)
{
return
container_of
(
policy_dbs
,
struct
cs_policy_dbs_info
,
policy_dbs
);
}
struct
cs_dbs_tuners
{
unsigned
int
down_threshold
;
unsigned
int
freq_step
;
};
/* Conservative governor macros */
#define DEF_FREQUENCY_UP_THRESHOLD (80)
#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
...
...
@@ -21,18 +37,6 @@
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (10)
static
DEFINE_PER_CPU
(
struct
cs_cpu_dbs_info_s
,
cs_cpu_dbs_info
);
static
int
cs_cpufreq_governor_dbs
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
);
static
struct
cpufreq_governor
cpufreq_gov_conservative
=
{
.
name
=
"conservative"
,
.
governor
=
cs_cpufreq_governor_dbs
,
.
max_transition_latency
=
TRANSITION_LATENCY_LIMIT
,
.
owner
=
THIS_MODULE
,
};
static
inline
unsigned
int
get_freq_target
(
struct
cs_dbs_tuners
*
cs_tuners
,
struct
cpufreq_policy
*
policy
)
{
...
...
@@ -54,27 +58,28 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
* Any frequency increase takes it to the maximum frequency. Frequency reduction
* happens at minimum steps of 5% (default) of maximum frequency
*/
static
void
cs_check_cpu
(
int
cpu
,
unsigned
int
load
)
static
unsigned
int
cs_dbs_timer
(
struct
cpufreq_policy
*
policy
)
{
struct
cs_cpu_dbs_info_s
*
dbs_info
=
&
per_cpu
(
cs_cpu_dbs_info
,
cpu
)
;
struct
c
pufreq_policy
*
policy
=
dbs_info
->
cdbs
.
shared
->
policy
;
struct
dbs_data
*
dbs_data
=
policy
->
governor
_data
;
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
c
s_policy_dbs_info
*
dbs_info
=
to_dbs_info
(
policy_dbs
)
;
struct
dbs_data
*
dbs_data
=
policy
_dbs
->
dbs
_data
;
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
unsigned
int
load
=
dbs_update
(
policy
);
/*
* break out if we 'cannot' reduce the speed as the user might
* want freq_step to be zero
*/
if
(
cs_tuners
->
freq_step
==
0
)
return
;
goto
out
;
/* Check for frequency increase */
if
(
load
>
cs_tuners
->
up_threshold
)
{
if
(
load
>
dbs_data
->
up_threshold
)
{
dbs_info
->
down_skip
=
0
;
/* if we are already at full speed then break out early */
if
(
dbs_info
->
requested_freq
==
policy
->
max
)
return
;
goto
out
;
dbs_info
->
requested_freq
+=
get_freq_target
(
cs_tuners
,
policy
);
...
...
@@ -83,12 +88,12 @@ static void cs_check_cpu(int cpu, unsigned int load)
__cpufreq_driver_target
(
policy
,
dbs_info
->
requested_freq
,
CPUFREQ_RELATION_H
);
return
;
goto
out
;
}
/* if sampling_down_factor is active break out early */
if
(
++
dbs_info
->
down_skip
<
cs_tuners
->
sampling_down_factor
)
return
;
if
(
++
dbs_info
->
down_skip
<
dbs_data
->
sampling_down_factor
)
goto
out
;
dbs_info
->
down_skip
=
0
;
/* Check for frequency decrease */
...
...
@@ -98,7 +103,7 @@ static void cs_check_cpu(int cpu, unsigned int load)
* if we cannot reduce the frequency anymore, break out early
*/
if
(
policy
->
cur
==
policy
->
min
)
return
;
goto
out
;
freq_target
=
get_freq_target
(
cs_tuners
,
policy
);
if
(
dbs_info
->
requested_freq
>
freq_target
)
...
...
@@ -108,58 +113,25 @@ static void cs_check_cpu(int cpu, unsigned int load)
__cpufreq_driver_target
(
policy
,
dbs_info
->
requested_freq
,
CPUFREQ_RELATION_L
);
return
;
}
}
static
unsigned
int
cs_dbs_timer
(
struct
cpufreq_policy
*
policy
,
bool
modify_all
)
{
struct
dbs_data
*
dbs_data
=
policy
->
governor_data
;
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
if
(
modify_all
)
dbs_check_cpu
(
dbs_data
,
policy
->
cpu
);
return
delay_for_sampling_rate
(
cs_tuners
->
sampling_rate
);
out:
return
dbs_data
->
sampling_rate
;
}
static
int
dbs_cpufreq_notifier
(
struct
notifier_block
*
nb
,
unsigned
long
val
,
void
*
data
)
{
struct
cpufreq_freqs
*
freq
=
data
;
struct
cs_cpu_dbs_info_s
*
dbs_info
=
&
per_cpu
(
cs_cpu_dbs_info
,
freq
->
cpu
);
struct
cpufreq_policy
*
policy
=
cpufreq_cpu_get_raw
(
freq
->
cpu
);
if
(
!
policy
)
return
0
;
/* policy isn't governed by conservative governor */
if
(
policy
->
governor
!=
&
cpufreq_gov_conservative
)
return
0
;
/*
* we only care if our internally tracked freq moves outside the 'valid'
* ranges of frequency available to us otherwise we do not change it
*/
if
(
dbs_info
->
requested_freq
>
policy
->
max
||
dbs_info
->
requested_freq
<
policy
->
min
)
dbs_info
->
requested_freq
=
freq
->
new
;
return
0
;
}
void
*
data
);
static
struct
notifier_block
cs_cpufreq_notifier_block
=
{
.
notifier_call
=
dbs_cpufreq_notifier
,
};
/************************** sysfs interface ************************/
static
struct
common_dbs_data
cs_dbs_cdata
;
static
struct
dbs_governor
cs_dbs_gov
;
static
ssize_t
store_sampling_down_factor
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
unsigned
int
input
;
int
ret
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
...
...
@@ -167,22 +139,7 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
if
(
ret
!=
1
||
input
>
MAX_SAMPLING_DOWN_FACTOR
||
input
<
1
)
return
-
EINVAL
;
cs_tuners
->
sampling_down_factor
=
input
;
return
count
;
}
static
ssize_t
store_sampling_rate
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
unsigned
int
input
;
int
ret
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
if
(
ret
!=
1
)
return
-
EINVAL
;
cs_tuners
->
sampling_rate
=
max
(
input
,
dbs_data
->
min_sampling_rate
);
dbs_data
->
sampling_down_factor
=
input
;
return
count
;
}
...
...
@@ -197,7 +154,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
if
(
ret
!=
1
||
input
>
100
||
input
<=
cs_tuners
->
down_threshold
)
return
-
EINVAL
;
cs_tuners
->
up_threshold
=
input
;
dbs_data
->
up_threshold
=
input
;
return
count
;
}
...
...
@@ -211,7 +168,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
/* cannot be lower than 11 otherwise freq will not fall */
if
(
ret
!=
1
||
input
<
11
||
input
>
100
||
input
>=
cs_tuners
->
up_threshold
)
input
>=
dbs_data
->
up_threshold
)
return
-
EINVAL
;
cs_tuners
->
down_threshold
=
input
;
...
...
@@ -221,8 +178,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
static
ssize_t
store_ignore_nice_load
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
unsigned
int
input
,
j
;
unsigned
int
input
;
int
ret
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
...
...
@@ -232,21 +188,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
if
(
input
>
1
)
input
=
1
;
if
(
input
==
cs_tuners
->
ignore_nice_load
)
/* nothing to do */
if
(
input
==
dbs_data
->
ignore_nice_load
)
/* nothing to do */
return
count
;
cs_tuners
->
ignore_nice_load
=
input
;
dbs_data
->
ignore_nice_load
=
input
;
/* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu
(
j
)
{
struct
cs_cpu_dbs_info_s
*
dbs_info
;
dbs_info
=
&
per_cpu
(
cs_cpu_dbs_info
,
j
);
dbs_info
->
cdbs
.
prev_cpu_idle
=
get_cpu_idle_time
(
j
,
&
dbs_info
->
cdbs
.
prev_cpu_wall
,
0
);
if
(
cs_tuners
->
ignore_nice_load
)
dbs_info
->
cdbs
.
prev_cpu_nice
=
kcpustat_cpu
(
j
).
cpustat
[
CPUTIME_NICE
];
}
gov_update_cpu_data
(
dbs_data
);
return
count
;
}
...
...
@@ -272,55 +221,47 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf,
return
count
;
}
show_store_one
(
cs
,
sampling_rate
);
show_store_one
(
cs
,
sampling_down_factor
);
show_store_one
(
cs
,
up_threshold
);
show_store_one
(
cs
,
down_threshol
d
);
show_store_one
(
cs
,
ignore_nice_load
);
show_store_one
(
cs
,
freq_step
);
declare_show_sampling_rate_min
(
cs
);
gov_
sys_pol_
attr_rw
(
sampling_rate
);
gov_
sys_pol_
attr_rw
(
sampling_down_factor
);
gov_
sys_pol_
attr_rw
(
up_threshold
);
gov_
sys_pol_attr_rw
(
down_threshol
d
);
gov_
sys_pol_attr_rw
(
ignore_nice_load
);
gov_
sys_pol_attr_rw
(
freq_step
);
gov_
sys_pol_attr_ro
(
sampling_rate_min
);
static
struct
attribute
*
dbs_attributes_gov_sy
s
[]
=
{
&
sampling_rate_min_gov_sys
.
attr
,
&
sampling_rate
_gov_sys
.
attr
,
&
sampling_down_factor
_gov_sys
.
attr
,
&
up_threshold
_gov_sys
.
attr
,
&
down_threshold
_gov_sys
.
attr
,
&
ignore_nice_load
_gov_sys
.
attr
,
&
freq_step
_gov_sys
.
attr
,
gov_show_one_common
(
sampling_rate
);
gov_show_one_common
(
sampling_down_factor
);
gov_show_one_common
(
up_threshold
);
gov_show_one_common
(
ignore_nice_loa
d
);
gov_show_one_common
(
min_sampling_rate
);
gov_show_one
(
cs
,
down_threshold
);
gov_show_one
(
cs
,
freq_step
);
gov_attr_rw
(
sampling_rate
);
gov_attr_rw
(
sampling_down_factor
);
gov_attr_rw
(
up_threshold
);
gov_
attr_rw
(
ignore_nice_loa
d
);
gov_
attr_ro
(
min_sampling_rate
);
gov_
attr_rw
(
down_threshold
);
gov_
attr_rw
(
freq_step
);
static
struct
attribute
*
cs_attribute
s
[]
=
{
&
min_sampling_rate
.
attr
,
&
sampling_rate
.
attr
,
&
sampling_down_factor
.
attr
,
&
up_threshold
.
attr
,
&
down_threshold
.
attr
,
&
ignore_nice_load
.
attr
,
&
freq_step
.
attr
,
NULL
};
static
struct
attribute_group
cs_attr_group_gov_sys
=
{
.
attrs
=
dbs_attributes_gov_sys
,
.
name
=
"conservative"
,
};
/************************** sysfs end ************************/
static
struct
attribute
*
dbs_attributes_gov_pol
[]
=
{
&
sampling_rate_min_gov_pol
.
attr
,
&
sampling_rate_gov_pol
.
attr
,
&
sampling_down_factor_gov_pol
.
attr
,
&
up_threshold_gov_pol
.
attr
,
&
down_threshold_gov_pol
.
attr
,
&
ignore_nice_load_gov_pol
.
attr
,
&
freq_step_gov_pol
.
attr
,
NULL
};
static
struct
policy_dbs_info
*
cs_alloc
(
void
)
{
struct
cs_policy_dbs_info
*
dbs_info
;
static
struct
attribute_group
cs_attr_group_gov_pol
=
{
.
attrs
=
dbs_attributes_gov_pol
,
.
name
=
"conservative"
,
};
dbs_info
=
kzalloc
(
sizeof
(
*
dbs_info
),
GFP_KERNEL
);
return
dbs_info
?
&
dbs_info
->
policy_dbs
:
NULL
;
}
/************************** sysfs end ************************/
static
void
cs_free
(
struct
policy_dbs_info
*
policy_dbs
)
{
kfree
(
to_dbs_info
(
policy_dbs
));
}
static
int
cs_init
(
struct
dbs_data
*
dbs_data
,
bool
notify
)
{
...
...
@@ -332,11 +273,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify)
return
-
ENOMEM
;
}
tuners
->
up_threshold
=
DEF_FREQUENCY_UP_THRESHOLD
;
tuners
->
down_threshold
=
DEF_FREQUENCY_DOWN_THRESHOLD
;
tuners
->
sampling_down_factor
=
DEF_SAMPLING_DOWN_FACTOR
;
tuners
->
ignore_nice_load
=
0
;
tuners
->
freq_step
=
DEF_FREQUENCY_STEP
;
dbs_data
->
up_threshold
=
DEF_FREQUENCY_UP_THRESHOLD
;
dbs_data
->
sampling_down_factor
=
DEF_SAMPLING_DOWN_FACTOR
;
dbs_data
->
ignore_nice_load
=
0
;
dbs_data
->
tuners
=
tuners
;
dbs_data
->
min_sampling_rate
=
MIN_SAMPLING_RATE_RATIO
*
...
...
@@ -358,35 +299,66 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify)
kfree
(
dbs_data
->
tuners
);
}
define_get_cpu_dbs_routines
(
cs_cpu_dbs_info
);
static
void
cs_start
(
struct
cpufreq_policy
*
policy
)
{
struct
cs_policy_dbs_info
*
dbs_info
=
to_dbs_info
(
policy
->
governor_data
);
dbs_info
->
down_skip
=
0
;
dbs_info
->
requested_freq
=
policy
->
cur
;
}
static
struct
common_dbs_data
cs_dbs_cdata
=
{
.
governor
=
GOV_CONSERVATIVE
,
.
attr_group_gov_sys
=
&
cs_attr_group_gov_sys
,
.
attr_group_gov_pol
=
&
cs_attr_group_gov_pol
,
.
get_cpu_cdbs
=
get_cpu_cdbs
,
.
get_cpu_dbs_info_s
=
get_cpu_dbs_info_s
,
static
struct
dbs_governor
cs_dbs_gov
=
{
.
gov
=
{
.
name
=
"conservative"
,
.
governor
=
cpufreq_governor_dbs
,
.
max_transition_latency
=
TRANSITION_LATENCY_LIMIT
,
.
owner
=
THIS_MODULE
,
},
.
kobj_type
=
{
.
default_attrs
=
cs_attributes
},
.
gov_dbs_timer
=
cs_dbs_timer
,
.
gov_check_cpu
=
cs_check_cpu
,
.
alloc
=
cs_alloc
,
.
free
=
cs_free
,
.
init
=
cs_init
,
.
exit
=
cs_exit
,
.
mutex
=
__MUTEX_INITIALIZER
(
cs_dbs_cdata
.
mutex
)
,
.
start
=
cs_start
,
};
static
int
cs_cpufreq_governor_dbs
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
)
#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov)
static
int
dbs_cpufreq_notifier
(
struct
notifier_block
*
nb
,
unsigned
long
val
,
void
*
data
)
{
return
cpufreq_governor_dbs
(
policy
,
&
cs_dbs_cdata
,
event
);
struct
cpufreq_freqs
*
freq
=
data
;
struct
cpufreq_policy
*
policy
=
cpufreq_cpu_get_raw
(
freq
->
cpu
);
struct
cs_policy_dbs_info
*
dbs_info
;
if
(
!
policy
)
return
0
;
/* policy isn't governed by conservative governor */
if
(
policy
->
governor
!=
CPU_FREQ_GOV_CONSERVATIVE
)
return
0
;
dbs_info
=
to_dbs_info
(
policy
->
governor_data
);
/*
* we only care if our internally tracked freq moves outside the 'valid'
* ranges of frequency available to us otherwise we do not change it
*/
if
(
dbs_info
->
requested_freq
>
policy
->
max
||
dbs_info
->
requested_freq
<
policy
->
min
)
dbs_info
->
requested_freq
=
freq
->
new
;
return
0
;
}
static
int
__init
cpufreq_gov_dbs_init
(
void
)
{
return
cpufreq_register_governor
(
&
cpufreq_gov_conservative
);
return
cpufreq_register_governor
(
CPU_FREQ_GOV_CONSERVATIVE
);
}
static
void
__exit
cpufreq_gov_dbs_exit
(
void
)
{
cpufreq_unregister_governor
(
&
cpufreq_gov_conservative
);
cpufreq_unregister_governor
(
CPU_FREQ_GOV_CONSERVATIVE
);
}
MODULE_AUTHOR
(
"Alexander Clouter <alex@digriz.org.uk>"
);
...
...
@@ -398,7 +370,7 @@ MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
struct
cpufreq_governor
*
cpufreq_default_governor
(
void
)
{
return
&
cpufreq_gov_conservative
;
return
CPU_FREQ_GOV_CONSERVATIVE
;
}
fs_initcall
(
cpufreq_gov_dbs_init
);
...
...
drivers/cpufreq/cpufreq_governor.c
View file @
a5acbfbd
...
...
@@ -18,95 +18,193 @@
#include <linux/export.h>
#include <linux/kernel_stat.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include "cpufreq_governor.h"
static
struct
attribute_group
*
get_sysfs_attr
(
struct
dbs_data
*
dbs_data
)
{
if
(
have_governor_per_policy
())
return
dbs_data
->
cdata
->
attr_group_gov_pol
;
else
return
dbs_data
->
cdata
->
attr_group_gov_sys
;
}
static
DEFINE_PER_CPU
(
struct
cpu_dbs_info
,
cpu_dbs
);
static
DEFINE_MUTEX
(
gov_dbs_data_mutex
);
void
dbs_check_cpu
(
struct
dbs_data
*
dbs_data
,
int
cpu
)
/* Common sysfs tunables */
/**
* store_sampling_rate - update sampling rate effective immediately if needed.
*
* If new rate is smaller than the old, simply updating
* dbs.sampling_rate might not be appropriate. For example, if the
* original sampling_rate was 1 second and the requested new sampling rate is 10
* ms because the user needs immediate reaction from ondemand governor, but not
* sure if higher frequency will be required or not, then, the governor may
* change the sampling rate too late; up to 1 second later. Thus, if we are
* reducing the sampling rate, we need to make the new value effective
* immediately.
*
* This must be called with dbs_data->mutex held, otherwise traversing
* policy_dbs_list isn't safe.
*/
ssize_t
store_sampling_rate
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
struct
cpu_dbs_info
*
cdbs
=
dbs_data
->
cdata
->
get_cpu_cdbs
(
cpu
);
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
struct
cpufreq_policy
*
policy
=
cdbs
->
shared
->
policy
;
unsigned
int
sampling_rate
;
unsigned
int
max_load
=
0
;
unsigned
int
ignore_nice
;
unsigned
int
j
;
struct
policy_dbs_info
*
policy_dbs
;
unsigned
int
rate
;
int
ret
;
ret
=
sscanf
(
buf
,
"%u"
,
&
rate
);
if
(
ret
!=
1
)
return
-
EINVAL
;
if
(
dbs_data
->
cdata
->
governor
==
GOV_ONDEMAND
)
{
struct
od_cpu_dbs_info_s
*
od_dbs_info
=
dbs_data
->
cdata
->
get_cpu_dbs_info_s
(
cpu
);
dbs_data
->
sampling_rate
=
max
(
rate
,
dbs_data
->
min_sampling_rate
);
/*
* We are operating under dbs_data->mutex and so the list and its
* entries can't be freed concurrently.
*/
list_for_each_entry
(
policy_dbs
,
&
dbs_data
->
policy_dbs_list
,
list
)
{
mutex_lock
(
&
policy_dbs
->
timer_mutex
);
/*
* Sometimes, the ondemand governor uses an additional
* multiplier to give long delays. So apply this multiplier to
* the 'sampling_rate', so as to keep the wake-up-from-idle
* detection logic a bit conservative.
* On 32-bit architectures this may race with the
* sample_delay_ns read in dbs_update_util_handler(), but that
* really doesn't matter. If the read returns a value that's
* too big, the sample will be skipped, but the next invocation
* of dbs_update_util_handler() (when the update has been
* completed) will take a sample.
*
* If this runs in parallel with dbs_work_handler(), we may end
* up overwriting the sample_delay_ns value that it has just
* written, but it will be corrected next time a sample is
* taken, so it shouldn't be significant.
*/
sampling_rate
=
od_tuners
->
sampling_rate
;
sampling_rate
*=
od_dbs_info
->
rate_mult
;
gov_update_sample_delay
(
policy_dbs
,
0
);
mutex_unlock
(
&
policy_dbs
->
timer_mutex
);
}
ignore_nice
=
od_tuners
->
ignore_nice_load
;
}
else
{
sampling_rate
=
cs_tuners
->
sampling_rate
;
ignore_nice
=
cs_tuners
->
ignore_nice_load
;
return
count
;
}
EXPORT_SYMBOL_GPL
(
store_sampling_rate
);
/**
* gov_update_cpu_data - Update CPU load data.
* @dbs_data: Top-level governor data pointer.
*
* Update CPU load data for all CPUs in the domain governed by @dbs_data
* (that may be a single policy or a bunch of them if governor tunables are
* system-wide).
*
* Call under the @dbs_data mutex.
*/
void
gov_update_cpu_data
(
struct
dbs_data
*
dbs_data
)
{
struct
policy_dbs_info
*
policy_dbs
;
list_for_each_entry
(
policy_dbs
,
&
dbs_data
->
policy_dbs_list
,
list
)
{
unsigned
int
j
;
for_each_cpu
(
j
,
policy_dbs
->
policy
->
cpus
)
{
struct
cpu_dbs_info
*
j_cdbs
=
&
per_cpu
(
cpu_dbs
,
j
);
j_cdbs
->
prev_cpu_idle
=
get_cpu_idle_time
(
j
,
&
j_cdbs
->
prev_cpu_wall
,
dbs_data
->
io_is_busy
);
if
(
dbs_data
->
ignore_nice_load
)
j_cdbs
->
prev_cpu_nice
=
kcpustat_cpu
(
j
).
cpustat
[
CPUTIME_NICE
];
}
}
}
EXPORT_SYMBOL_GPL
(
gov_update_cpu_data
);
static
inline
struct
dbs_data
*
to_dbs_data
(
struct
kobject
*
kobj
)
{
return
container_of
(
kobj
,
struct
dbs_data
,
kobj
);
}
static
inline
struct
governor_attr
*
to_gov_attr
(
struct
attribute
*
attr
)
{
return
container_of
(
attr
,
struct
governor_attr
,
attr
);
}
static
ssize_t
governor_show
(
struct
kobject
*
kobj
,
struct
attribute
*
attr
,
char
*
buf
)
{
struct
dbs_data
*
dbs_data
=
to_dbs_data
(
kobj
);
struct
governor_attr
*
gattr
=
to_gov_attr
(
attr
);
return
gattr
->
show
(
dbs_data
,
buf
);
}
static
ssize_t
governor_store
(
struct
kobject
*
kobj
,
struct
attribute
*
attr
,
const
char
*
buf
,
size_t
count
)
{
struct
dbs_data
*
dbs_data
=
to_dbs_data
(
kobj
);
struct
governor_attr
*
gattr
=
to_gov_attr
(
attr
);
int
ret
=
-
EBUSY
;
mutex_lock
(
&
dbs_data
->
mutex
);
if
(
dbs_data
->
usage_count
)
ret
=
gattr
->
store
(
dbs_data
,
buf
,
count
);
mutex_unlock
(
&
dbs_data
->
mutex
);
return
ret
;
}
/*
* Sysfs Ops for accessing governor attributes.
*
* All show/store invocations for governor specific sysfs attributes, will first
* call the below show/store callbacks and the attribute specific callback will
* be called from within it.
*/
static
const
struct
sysfs_ops
governor_sysfs_ops
=
{
.
show
=
governor_show
,
.
store
=
governor_store
,
};
unsigned
int
dbs_update
(
struct
cpufreq_policy
*
policy
)
{
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
dbs_data
*
dbs_data
=
policy_dbs
->
dbs_data
;
unsigned
int
ignore_nice
=
dbs_data
->
ignore_nice_load
;
unsigned
int
max_load
=
0
;
unsigned
int
sampling_rate
,
io_busy
,
j
;
/*
* Sometimes governors may use an additional multiplier to increase
* sample delays temporarily. Apply that multiplier to sampling_rate
* so as to keep the wake-up-from-idle detection logic a bit
* conservative.
*/
sampling_rate
=
dbs_data
->
sampling_rate
*
policy_dbs
->
rate_mult
;
/*
* For the purpose of ondemand, waiting for disk IO is an indication
* that you're performance critical, and not that the system is actually
* idle, so do not add the iowait time to the CPU idle time then.
*/
io_busy
=
dbs_data
->
io_is_busy
;
/* Get Absolute Load */
for_each_cpu
(
j
,
policy
->
cpus
)
{
struct
cpu_dbs_info
*
j_cdbs
;
struct
cpu_dbs_info
*
j_cdbs
=
&
per_cpu
(
cpu_dbs
,
j
)
;
u64
cur_wall_time
,
cur_idle_time
;
unsigned
int
idle_time
,
wall_time
;
unsigned
int
load
;
int
io_busy
=
0
;
j_cdbs
=
dbs_data
->
cdata
->
get_cpu_cdbs
(
j
);
/*
* For the purpose of ondemand, waiting for disk IO is
* an indication that you're performance critical, and
* not that the system is actually idle. So do not add
* the iowait time to the cpu idle time.
*/
if
(
dbs_data
->
cdata
->
governor
==
GOV_ONDEMAND
)
io_busy
=
od_tuners
->
io_is_busy
;
cur_idle_time
=
get_cpu_idle_time
(
j
,
&
cur_wall_time
,
io_busy
);
wall_time
=
(
unsigned
int
)
(
cur_wall_time
-
j_cdbs
->
prev_cpu_wall
);
wall_time
=
cur_wall_time
-
j_cdbs
->
prev_cpu_wall
;
j_cdbs
->
prev_cpu_wall
=
cur_wall_time
;
if
(
cur_idle_time
<
j_cdbs
->
prev_cpu_idle
)
cur_idle_time
=
j_cdbs
->
prev_cpu_idle
;
idle_time
=
(
unsigned
int
)
(
cur_idle_time
-
j_cdbs
->
prev_cpu_idle
)
;
j_cdbs
->
prev_cpu_idle
=
cur_idle_time
;
if
(
cur_idle_time
<
=
j_cdbs
->
prev_cpu_idle
)
{
idle_time
=
0
;
}
else
{
idle_time
=
cur_idle_time
-
j_cdbs
->
prev_cpu_idle
;
j_cdbs
->
prev_cpu_idle
=
cur_idle_time
;
}
if
(
ignore_nice
)
{
u64
cur_nice
;
unsigned
long
cur_nice_jiffies
;
cur_nice
=
kcpustat_cpu
(
j
).
cpustat
[
CPUTIME_NICE
]
-
cdbs
->
prev_cpu_nice
;
/*
* Assumption: nice time between sampling periods will
* be less than 2^32 jiffies for 32 bit sys
*/
cur_nice_jiffies
=
(
unsigned
long
)
cputime64_to_jiffies64
(
cur_nice
);
u64
cur_nice
=
kcpustat_cpu
(
j
).
cpustat
[
CPUTIME_NICE
];
cdbs
->
prev_cpu_nice
=
kcpustat_cpu
(
j
).
cpustat
[
CPUTIME_NICE
];
idle_time
+=
jiffies_to_usecs
(
cur_nice_jiffies
);
idle_time
+=
cputime_to_usecs
(
cur_nice
-
j_cdbs
->
prev_cpu_nice
);
j_cdbs
->
prev_cpu_nice
=
cur_nice
;
}
if
(
unlikely
(
!
wall_time
||
wall_time
<
idle_time
))
...
...
@@ -128,10 +226,10 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
* dropped down. So we perform the copy only once, upon the
* first wake-up from idle.)
*
* Detecting this situation is easy: the governor's
deferrable
*
timer would not have fired during CPU-idle periods. Hence
*
an unusually large 'wall_time' (as compared to the sampling
* rate) indicates this scenario.
* Detecting this situation is easy: the governor's
utilization
*
update handler would not have run during CPU-idle periods.
*
Hence, an unusually large 'wall_time' (as compared to the
*
sampling
rate) indicates this scenario.
*
* prev_load can be zero in two cases and we must recalculate it
* for both cases:
...
...
@@ -156,222 +254,224 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
if
(
load
>
max_load
)
max_load
=
load
;
}
dbs_data
->
cdata
->
gov_check_cpu
(
cpu
,
max_load
);
return
max_load
;
}
EXPORT_SYMBOL_GPL
(
dbs_
check_cpu
);
EXPORT_SYMBOL_GPL
(
dbs_
update
);
void
gov_add_timers
(
struct
cpufreq_policy
*
policy
,
unsigned
int
delay
)
static
void
gov_set_update_util
(
struct
policy_dbs_info
*
policy_dbs
,
unsigned
int
delay_us
)
{
struct
dbs_data
*
dbs_data
=
policy
->
governor_data
;
struct
cpu_dbs_info
*
cdbs
;
struct
cpufreq_policy
*
policy
=
policy_dbs
->
policy
;
int
cpu
;
gov_update_sample_delay
(
policy_dbs
,
delay_us
);
policy_dbs
->
last_sample_time
=
0
;
for_each_cpu
(
cpu
,
policy
->
cpus
)
{
cdbs
=
dbs_data
->
cdata
->
get_cpu_cdbs
(
cpu
);
cdbs
->
timer
.
expires
=
jiffies
+
delay
;
add_timer_on
(
&
cdbs
->
timer
,
cpu
);
struct
cpu_dbs_info
*
cdbs
=
&
per_cpu
(
cpu_dbs
,
cpu
);
cpufreq_set_update_util_data
(
cpu
,
&
cdbs
->
update_util
);
}
}
EXPORT_SYMBOL_GPL
(
gov_add_timers
);
static
inline
void
gov_c
ancel_timers
(
struct
cpufreq_policy
*
policy
)
static
inline
void
gov_c
lear_update_util
(
struct
cpufreq_policy
*
policy
)
{
struct
dbs_data
*
dbs_data
=
policy
->
governor_data
;
struct
cpu_dbs_info
*
cdbs
;
int
i
;
for_each_cpu
(
i
,
policy
->
cpus
)
{
cdbs
=
dbs_data
->
cdata
->
get_cpu_cdbs
(
i
);
del_timer_sync
(
&
cdbs
->
timer
);
}
}
for_each_cpu
(
i
,
policy
->
cpus
)
cpufreq_set_update_util_data
(
i
,
NULL
);
void
gov_cancel_work
(
struct
cpu_common_dbs_info
*
shared
)
{
/* Tell dbs_timer_handler() to skip queuing up work items. */
atomic_inc
(
&
shared
->
skip_work
);
/*
* If dbs_timer_handler() is already running, it may not notice the
* incremented skip_work, so wait for it to complete to prevent its work
* item from being queued up after the cancel_work_sync() below.
*/
gov_cancel_timers
(
shared
->
policy
);
/*
* In case dbs_timer_handler() managed to run and spawn a work item
* before the timers have been canceled, wait for that work item to
* complete and then cancel all of the timers set up by it. If
* dbs_timer_handler() runs again at that point, it will see the
* positive value of skip_work and won't spawn any more work items.
*/
cancel_work_sync
(
&
shared
->
work
);
gov_cancel_timers
(
shared
->
policy
);
atomic_set
(
&
shared
->
skip_work
,
0
);
synchronize_sched
();
}
EXPORT_SYMBOL_GPL
(
gov_cancel_work
);
/* Will return if we need to evaluate cpu load again or not */
static
bool
need_load_eval
(
struct
cpu_common_dbs_info
*
shared
,
unsigned
int
sampling_rate
)
static
void
gov_cancel_work
(
struct
cpufreq_policy
*
policy
)
{
if
(
policy_is_shared
(
shared
->
policy
))
{
ktime_t
time_now
=
ktime_get
();
s64
delta_us
=
ktime_us_delta
(
time_now
,
shared
->
time_stamp
);
/* Do nothing if we recently have sampled */
if
(
delta_us
<
(
s64
)(
sampling_rate
/
2
))
return
false
;
else
shared
->
time_stamp
=
time_now
;
}
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
return
true
;
gov_clear_update_util
(
policy_dbs
->
policy
);
irq_work_sync
(
&
policy_dbs
->
irq_work
);
cancel_work_sync
(
&
policy_dbs
->
work
);
atomic_set
(
&
policy_dbs
->
work_count
,
0
);
policy_dbs
->
work_in_progress
=
false
;
}
static
void
dbs_work_handler
(
struct
work_struct
*
work
)
{
struct
cpu_common_dbs_info
*
shared
=
container_of
(
work
,
struct
cpu_common_dbs_info
,
work
);
struct
policy_dbs_info
*
policy_dbs
;
struct
cpufreq_policy
*
policy
;
struct
dbs_data
*
dbs_data
;
unsigned
int
sampling_rate
,
delay
;
bool
eval_load
;
policy
=
shared
->
policy
;
dbs_data
=
policy
->
governor_data
;
struct
dbs_governor
*
gov
;
/* Kill all timers */
gov_cancel_timers
(
policy
);
policy_dbs
=
container_of
(
work
,
struct
policy_dbs_info
,
work
);
policy
=
policy_dbs
->
policy
;
gov
=
dbs_governor_of
(
policy
);
if
(
dbs_data
->
cdata
->
governor
==
GOV_CONSERVATIVE
)
{
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
sampling_rate
=
cs_tuners
->
sampling_rate
;
}
else
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
sampling_rate
=
od_tuners
->
sampling_rate
;
}
eval_load
=
need_load_eval
(
shared
,
sampling_rate
);
/*
* Make sure cpufreq_governor_limits() isn't evaluating load or the
* ondemand governor isn't updating the sampling rate in parallel.
*/
mutex_lock
(
&
policy_dbs
->
timer_mutex
);
gov_update_sample_delay
(
policy_dbs
,
gov
->
gov_dbs_timer
(
policy
));
mutex_unlock
(
&
policy_dbs
->
timer_mutex
);
/* Allow the utilization update handler to queue up more work. */
atomic_set
(
&
policy_dbs
->
work_count
,
0
);
/*
* Make sure cpufreq_governor_limits() isn't evaluating load in
* parallel.
* If the update below is reordered with respect to the sample delay
* modification, the utilization update handler may end up using a stale
* sample delay value.
*/
mutex_lock
(
&
shared
->
timer_mutex
);
delay
=
dbs_data
->
cdata
->
gov_dbs_timer
(
policy
,
eval_load
)
;
mutex_unlock
(
&
shared
->
timer_mutex
);
smp_wmb
(
);
policy_dbs
->
work_in_progress
=
false
;
}
atomic_dec
(
&
shared
->
skip_work
);
static
void
dbs_irq_work
(
struct
irq_work
*
irq_work
)
{
struct
policy_dbs_info
*
policy_dbs
;
gov_add_timers
(
policy
,
delay
);
policy_dbs
=
container_of
(
irq_work
,
struct
policy_dbs_info
,
irq_work
);
schedule_work
(
&
policy_dbs
->
work
);
}
static
void
dbs_timer_handler
(
unsigned
long
data
)
static
void
dbs_update_util_handler
(
struct
update_util_data
*
data
,
u64
time
,
unsigned
long
util
,
unsigned
long
max
)
{
struct
cpu_dbs_info
*
cdbs
=
(
struct
cpu_dbs_info
*
)
data
;
struct
cpu_common_dbs_info
*
shared
=
cdbs
->
shared
;
struct
cpu_dbs_info
*
cdbs
=
container_of
(
data
,
struct
cpu_dbs_info
,
update_util
);
struct
policy_dbs_info
*
policy_dbs
=
cdbs
->
policy_dbs
;
u64
delta_ns
,
lst
;
/*
* Timer handler may not be allowed to queue the work at the moment,
* because:
* - Another timer handler has done that
* - We are stopping the governor
* - Or we are updating the sampling rate of the ondemand governor
* The work may not be allowed to be queued up right now.
* Possible reasons:
* - Work has already been queued up or is in progress.
* - It is too early (too little time from the previous sample).
*/
if
(
atomic_inc_return
(
&
shared
->
skip_work
)
>
1
)
atomic_dec
(
&
shared
->
skip_work
);
else
queue_work
(
system_wq
,
&
shared
->
work
);
}
if
(
policy_dbs
->
work_in_progress
)
return
;
static
void
set_sampling_rate
(
struct
dbs_data
*
dbs_data
,
unsigned
int
sampling_rate
)
{
if
(
dbs_data
->
cdata
->
governor
==
GOV_CONSERVATIVE
)
{
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
cs_tuners
->
sampling_rate
=
sampling_rate
;
}
else
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
od_tuners
->
sampling_rate
=
sampling_rate
;
/*
* If the reads below are reordered before the check above, the value
* of sample_delay_ns used in the computation may be stale.
*/
smp_rmb
();
lst
=
READ_ONCE
(
policy_dbs
->
last_sample_time
);
delta_ns
=
time
-
lst
;
if
((
s64
)
delta_ns
<
policy_dbs
->
sample_delay_ns
)
return
;
/*
* If the policy is not shared, the irq_work may be queued up right away
* at this point. Otherwise, we need to ensure that only one of the
* CPUs sharing the policy will do that.
*/
if
(
policy_dbs
->
is_shared
)
{
if
(
!
atomic_add_unless
(
&
policy_dbs
->
work_count
,
1
,
1
))
return
;
/*
* If another CPU updated last_sample_time in the meantime, we
* shouldn't be here, so clear the work counter and bail out.
*/
if
(
unlikely
(
lst
!=
READ_ONCE
(
policy_dbs
->
last_sample_time
)))
{
atomic_set
(
&
policy_dbs
->
work_count
,
0
);
return
;
}
}
policy_dbs
->
last_sample_time
=
time
;
policy_dbs
->
work_in_progress
=
true
;
irq_work_queue
(
&
policy_dbs
->
irq_work
);
}
static
int
alloc_common
_dbs_info
(
struct
cpufreq_policy
*
policy
,
struct
common_dbs_data
*
cdata
)
static
struct
policy_dbs_info
*
alloc_policy
_dbs_info
(
struct
cpufreq_policy
*
policy
,
struct
dbs_governor
*
gov
)
{
struct
cpu_common_dbs_info
*
shared
;
struct
policy_dbs_info
*
policy_dbs
;
int
j
;
/* Allocate memory for
the common information for policy->cpus
*/
shared
=
kzalloc
(
sizeof
(
*
shared
),
GFP_KERNEL
);
if
(
!
shared
)
return
-
ENOMEM
;
/* Allocate memory for
per-policy governor data.
*/
policy_dbs
=
gov
->
alloc
(
);
if
(
!
policy_dbs
)
return
NULL
;
/* Set shared for all CPUs, online+offline */
for_each_cpu
(
j
,
policy
->
related_cpus
)
cdata
->
get_cpu_cdbs
(
j
)
->
shared
=
shared
;
policy_dbs
->
policy
=
policy
;
mutex_init
(
&
policy_dbs
->
timer_mutex
);
atomic_set
(
&
policy_dbs
->
work_count
,
0
);
init_irq_work
(
&
policy_dbs
->
irq_work
,
dbs_irq_work
);
INIT_WORK
(
&
policy_dbs
->
work
,
dbs_work_handler
);
mutex_init
(
&
shared
->
timer_mutex
);
atomic_set
(
&
shared
->
skip_work
,
0
);
INIT_WORK
(
&
shared
->
work
,
dbs_work_handler
);
return
0
;
/* Set policy_dbs for all CPUs, online+offline */
for_each_cpu
(
j
,
policy
->
related_cpus
)
{
struct
cpu_dbs_info
*
j_cdbs
=
&
per_cpu
(
cpu_dbs
,
j
);
j_cdbs
->
policy_dbs
=
policy_dbs
;
j_cdbs
->
update_util
.
func
=
dbs_update_util_handler
;
}
return
policy_dbs
;
}
static
void
free_
common_dbs_info
(
struct
cpufreq_policy
*
policy
,
struct
common_dbs_data
*
cdata
)
static
void
free_
policy_dbs_info
(
struct
policy_dbs_info
*
policy_dbs
,
struct
dbs_governor
*
gov
)
{
struct
cpu_dbs_info
*
cdbs
=
cdata
->
get_cpu_cdbs
(
policy
->
cpu
);
struct
cpu_common_dbs_info
*
shared
=
cdbs
->
shared
;
int
j
;
mutex_destroy
(
&
shared
->
timer_mutex
);
mutex_destroy
(
&
policy_dbs
->
timer_mutex
);
for_each_cpu
(
j
,
policy
->
cpus
)
cdata
->
get_cpu_cdbs
(
j
)
->
shared
=
NULL
;
for_each_cpu
(
j
,
policy
_dbs
->
policy
->
related_cpus
)
{
struct
cpu_dbs_info
*
j_cdbs
=
&
per_cpu
(
cpu_dbs
,
j
)
;
kfree
(
shared
);
j_cdbs
->
policy_dbs
=
NULL
;
j_cdbs
->
update_util
.
func
=
NULL
;
}
gov
->
free
(
policy_dbs
);
}
static
int
cpufreq_governor_init
(
struct
cpufreq_policy
*
policy
,
struct
dbs_data
*
dbs_data
,
struct
common_dbs_data
*
cdata
)
static
int
cpufreq_governor_init
(
struct
cpufreq_policy
*
policy
)
{
struct
dbs_governor
*
gov
=
dbs_governor_of
(
policy
);
struct
dbs_data
*
dbs_data
;
struct
policy_dbs_info
*
policy_dbs
;
unsigned
int
latency
;
int
ret
;
int
ret
=
0
;
/* State should be equivalent to EXIT */
if
(
policy
->
governor_data
)
return
-
EBUSY
;
if
(
dbs_data
)
{
if
(
WARN_ON
(
have_governor_per_policy
())
)
return
-
EINVAL
;
policy_dbs
=
alloc_policy_dbs_info
(
policy
,
gov
);
if
(
!
policy_dbs
)
return
-
ENOMEM
;
ret
=
alloc_common_dbs_info
(
policy
,
cdata
);
if
(
ret
)
return
ret
;
/* Protect gov->gdbs_data against concurrent updates. */
mutex_lock
(
&
gov_dbs_data_mutex
);
dbs_data
=
gov
->
gdbs_data
;
if
(
dbs_data
)
{
if
(
WARN_ON
(
have_governor_per_policy
()))
{
ret
=
-
EINVAL
;
goto
free_policy_dbs_info
;
}
policy_dbs
->
dbs_data
=
dbs_data
;
policy
->
governor_data
=
policy_dbs
;
mutex_lock
(
&
dbs_data
->
mutex
);
dbs_data
->
usage_count
++
;
policy
->
governor_data
=
dbs_data
;
return
0
;
list_add
(
&
policy_dbs
->
list
,
&
dbs_data
->
policy_dbs_list
);
mutex_unlock
(
&
dbs_data
->
mutex
);
goto
out
;
}
dbs_data
=
kzalloc
(
sizeof
(
*
dbs_data
),
GFP_KERNEL
);
if
(
!
dbs_data
)
return
-
ENOMEM
;
ret
=
alloc_common_dbs_info
(
policy
,
cdata
);
if
(
ret
)
goto
free_dbs_data
;
if
(
!
dbs_data
)
{
ret
=
-
ENOMEM
;
goto
free_policy_dbs_info
;
}
dbs_data
->
cdata
=
cdata
;
dbs_data
->
usage_count
=
1
;
INIT_LIST_HEAD
(
&
dbs_data
->
policy_dbs_list
)
;
mutex_init
(
&
dbs_data
->
mutex
)
;
ret
=
cdata
->
init
(
dbs_data
,
!
policy
->
governor
->
initialized
);
ret
=
gov
->
init
(
dbs_data
,
!
policy
->
governor
->
initialized
);
if
(
ret
)
goto
free_
common
_dbs_info
;
goto
free_
policy
_dbs_info
;
/* policy latency is in ns. Convert it to us first */
latency
=
policy
->
cpuinfo
.
transition_latency
/
1000
;
...
...
@@ -381,216 +481,156 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
/* Bring kernel and HW constraints together */
dbs_data
->
min_sampling_rate
=
max
(
dbs_data
->
min_sampling_rate
,
MIN_LATENCY_MULTIPLIER
*
latency
);
set_sampling_rate
(
dbs_data
,
max
(
dbs_data
->
min_sampling_rate
,
latency
*
LATENCY_MULTIPLIER
)
);
dbs_data
->
sampling_rate
=
max
(
dbs_data
->
min_sampling_rate
,
LATENCY_MULTIPLIER
*
latency
);
if
(
!
have_governor_per_policy
())
cdata
->
gdbs_data
=
dbs_data
;
gov
->
gdbs_data
=
dbs_data
;
policy
->
governor_data
=
dbs_data
;
policy
->
governor_data
=
policy_dbs
;
ret
=
sysfs_create_group
(
get_governor_parent_kobj
(
policy
),
get_sysfs_attr
(
dbs_data
));
if
(
ret
)
goto
reset_gdbs_data
;
policy_dbs
->
dbs_data
=
dbs_data
;
dbs_data
->
usage_count
=
1
;
list_add
(
&
policy_dbs
->
list
,
&
dbs_data
->
policy_dbs_list
);
return
0
;
gov
->
kobj_type
.
sysfs_ops
=
&
governor_sysfs_ops
;
ret
=
kobject_init_and_add
(
&
dbs_data
->
kobj
,
&
gov
->
kobj_type
,
get_governor_parent_kobj
(
policy
),
"%s"
,
gov
->
gov
.
name
);
if
(
!
ret
)
goto
out
;
/* Failure, so roll back. */
pr_err
(
"cpufreq: Governor initialization failed (dbs_data kobject init error %d)
\n
"
,
ret
);
reset_gdbs_data:
policy
->
governor_data
=
NULL
;
if
(
!
have_governor_per_policy
())
cdata
->
gdbs_data
=
NULL
;
cdata
->
exit
(
dbs_data
,
!
policy
->
governor
->
initialized
);
free_common_dbs_info:
free_common_dbs_info
(
policy
,
cdata
);
free_dbs_data:
gov
->
gdbs_data
=
NULL
;
gov
->
exit
(
dbs_data
,
!
policy
->
governor
->
initialized
);
kfree
(
dbs_data
);
free_policy_dbs_info:
free_policy_dbs_info
(
policy_dbs
,
gov
);
out:
mutex_unlock
(
&
gov_dbs_data_mutex
);
return
ret
;
}
static
int
cpufreq_governor_exit
(
struct
cpufreq_policy
*
policy
,
struct
dbs_data
*
dbs_data
)
static
int
cpufreq_governor_exit
(
struct
cpufreq_policy
*
policy
)
{
struct
common_dbs_data
*
cdata
=
dbs_data
->
cdata
;
struct
cpu_dbs_info
*
cdbs
=
cdata
->
get_cpu_cdbs
(
policy
->
cpu
);
struct
dbs_governor
*
gov
=
dbs_governor_of
(
policy
);
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
dbs_data
*
dbs_data
=
policy_dbs
->
dbs_data
;
int
count
;
/* State should be equivalent to INIT */
if
(
!
cdbs
->
shared
||
cdbs
->
shared
->
policy
)
return
-
EBUSY
;
/* Protect gov->gdbs_data against concurrent updates. */
mutex_lock
(
&
gov_dbs_data_mutex
);
mutex_lock
(
&
dbs_data
->
mutex
);
list_del
(
&
policy_dbs
->
list
);
count
=
--
dbs_data
->
usage_count
;
mutex_unlock
(
&
dbs_data
->
mutex
);
if
(
!--
dbs_data
->
usage_count
)
{
sysfs_remove_group
(
get_governor_parent_kobj
(
policy
),
get_sysfs_attr
(
dbs_data
));
if
(
!
count
)
{
kobject_put
(
&
dbs_data
->
kobj
);
policy
->
governor_data
=
NULL
;
if
(
!
have_governor_per_policy
())
cdata
->
gdbs_data
=
NULL
;
gov
->
gdbs_data
=
NULL
;
cdata
->
exit
(
dbs_data
,
policy
->
governor
->
initialized
==
1
);
gov
->
exit
(
dbs_data
,
policy
->
governor
->
initialized
==
1
);
mutex_destroy
(
&
dbs_data
->
mutex
);
kfree
(
dbs_data
);
}
else
{
policy
->
governor_data
=
NULL
;
}
free_common_dbs_info
(
policy
,
cdata
);
free_policy_dbs_info
(
policy_dbs
,
gov
);
mutex_unlock
(
&
gov_dbs_data_mutex
);
return
0
;
}
static
int
cpufreq_governor_start
(
struct
cpufreq_policy
*
policy
,
struct
dbs_data
*
dbs_data
)
static
int
cpufreq_governor_start
(
struct
cpufreq_policy
*
policy
)
{
struct
common_dbs_data
*
cdata
=
dbs_data
->
cdata
;
unsigned
int
sampling_rate
,
ignore_nice
,
j
,
cpu
=
policy
->
cpu
;
struct
cpu_dbs_info
*
cdbs
=
cdata
->
get_cpu_cdbs
(
cpu
)
;
struct
cpu_common_dbs_info
*
shared
=
cdbs
->
shared
;
int
io_busy
=
0
;
struct
dbs_governor
*
gov
=
dbs_governor_of
(
policy
)
;
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
dbs_data
*
dbs_data
=
policy_dbs
->
dbs_data
;
unsigned
int
sampling_rate
,
ignore_nice
,
j
;
unsigned
int
io_busy
;
if
(
!
policy
->
cur
)
return
-
EINVAL
;
/* State should be equivalent to INIT */
if
(
!
shared
||
shared
->
policy
)
return
-
EBUSY
;
policy_dbs
->
is_shared
=
policy_is_shared
(
policy
);
policy_dbs
->
rate_mult
=
1
;
if
(
cdata
->
governor
==
GOV_CONSERVATIVE
)
{
struct
cs_dbs_tuners
*
cs_tuners
=
dbs_data
->
tuners
;
sampling_rate
=
cs_tuners
->
sampling_rate
;
ignore_nice
=
cs_tuners
->
ignore_nice_load
;
}
else
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
sampling_rate
=
od_tuners
->
sampling_rate
;
ignore_nice
=
od_tuners
->
ignore_nice_load
;
io_busy
=
od_tuners
->
io_is_busy
;
}
shared
->
policy
=
policy
;
shared
->
time_stamp
=
ktime_get
();
sampling_rate
=
dbs_data
->
sampling_rate
;
ignore_nice
=
dbs_data
->
ignore_nice_load
;
io_busy
=
dbs_data
->
io_is_busy
;
for_each_cpu
(
j
,
policy
->
cpus
)
{
struct
cpu_dbs_info
*
j_cdbs
=
cdata
->
get_cpu_cdbs
(
j
);
struct
cpu_dbs_info
*
j_cdbs
=
&
per_cpu
(
cpu_dbs
,
j
);
unsigned
int
prev_load
;
j_cdbs
->
prev_cpu_idle
=
get_cpu_idle_time
(
j
,
&
j_cdbs
->
prev_cpu_wall
,
io_busy
);
j_cdbs
->
prev_cpu_idle
=
get_cpu_idle_time
(
j
,
&
j_cdbs
->
prev_cpu_wall
,
io_busy
);
prev_load
=
(
unsigned
int
)(
j_cdbs
->
prev_cpu_wall
-
j_cdbs
->
prev_cpu_idle
);
j_cdbs
->
prev_load
=
100
*
prev_load
/
(
unsigned
int
)
j_cdbs
->
prev_cpu_wall
;
prev_load
=
j_cdbs
->
prev_cpu_wall
-
j_cdbs
->
prev_cpu_idle
;
j_cdbs
->
prev_load
=
100
*
prev_load
/
(
unsigned
int
)
j_cdbs
->
prev_cpu_wall
;
if
(
ignore_nice
)
j_cdbs
->
prev_cpu_nice
=
kcpustat_cpu
(
j
).
cpustat
[
CPUTIME_NICE
];
__setup_timer
(
&
j_cdbs
->
timer
,
dbs_timer_handler
,
(
unsigned
long
)
j_cdbs
,
TIMER_DEFERRABLE
|
TIMER_IRQSAFE
);
}
if
(
cdata
->
governor
==
GOV_CONSERVATIVE
)
{
struct
cs_cpu_dbs_info_s
*
cs_dbs_info
=
cdata
->
get_cpu_dbs_info_s
(
cpu
);
cs_dbs_info
->
down_skip
=
0
;
cs_dbs_info
->
requested_freq
=
policy
->
cur
;
}
else
{
struct
od_ops
*
od_ops
=
cdata
->
gov_ops
;
struct
od_cpu_dbs_info_s
*
od_dbs_info
=
cdata
->
get_cpu_dbs_info_s
(
cpu
);
od_dbs_info
->
rate_mult
=
1
;
od_dbs_info
->
sample_type
=
OD_NORMAL_SAMPLE
;
od_ops
->
powersave_bias_init_cpu
(
cpu
);
}
gov
->
start
(
policy
);
gov_
add_timers
(
policy
,
delay_for_sampling_rate
(
sampling_rate
)
);
gov_
set_update_util
(
policy_dbs
,
sampling_rate
);
return
0
;
}
static
int
cpufreq_governor_stop
(
struct
cpufreq_policy
*
policy
,
struct
dbs_data
*
dbs_data
)
static
int
cpufreq_governor_stop
(
struct
cpufreq_policy
*
policy
)
{
struct
cpu_dbs_info
*
cdbs
=
dbs_data
->
cdata
->
get_cpu_cdbs
(
policy
->
cpu
);
struct
cpu_common_dbs_info
*
shared
=
cdbs
->
shared
;
/* State should be equivalent to START */
if
(
!
shared
||
!
shared
->
policy
)
return
-
EBUSY
;
gov_cancel_work
(
shared
);
shared
->
policy
=
NULL
;
gov_cancel_work
(
policy
);
return
0
;
}
static
int
cpufreq_governor_limits
(
struct
cpufreq_policy
*
policy
,
struct
dbs_data
*
dbs_data
)
static
int
cpufreq_governor_limits
(
struct
cpufreq_policy
*
policy
)
{
struct
common_dbs_data
*
cdata
=
dbs_data
->
cdata
;
unsigned
int
cpu
=
policy
->
cpu
;
struct
cpu_dbs_info
*
cdbs
=
cdata
->
get_cpu_cdbs
(
cpu
);
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
/* State should be equivalent to START */
if
(
!
cdbs
->
shared
||
!
cdbs
->
shared
->
policy
)
return
-
EBUSY
;
mutex_lock
(
&
policy_dbs
->
timer_mutex
);
if
(
policy
->
max
<
policy
->
cur
)
__cpufreq_driver_target
(
policy
,
policy
->
max
,
CPUFREQ_RELATION_H
);
else
if
(
policy
->
min
>
policy
->
cur
)
__cpufreq_driver_target
(
policy
,
policy
->
min
,
CPUFREQ_RELATION_L
);
gov_update_sample_delay
(
policy_dbs
,
0
);
mutex_lock
(
&
cdbs
->
shared
->
timer_mutex
);
if
(
policy
->
max
<
cdbs
->
shared
->
policy
->
cur
)
__cpufreq_driver_target
(
cdbs
->
shared
->
policy
,
policy
->
max
,
CPUFREQ_RELATION_H
);
else
if
(
policy
->
min
>
cdbs
->
shared
->
policy
->
cur
)
__cpufreq_driver_target
(
cdbs
->
shared
->
policy
,
policy
->
min
,
CPUFREQ_RELATION_L
);
dbs_check_cpu
(
dbs_data
,
cpu
);
mutex_unlock
(
&
cdbs
->
shared
->
timer_mutex
);
mutex_unlock
(
&
policy_dbs
->
timer_mutex
);
return
0
;
}
int
cpufreq_governor_dbs
(
struct
cpufreq_policy
*
policy
,
struct
common_dbs_data
*
cdata
,
unsigned
int
event
)
int
cpufreq_governor_dbs
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
)
{
struct
dbs_data
*
dbs_data
;
int
ret
;
/* Lock governor to block concurrent initialization of governor */
mutex_lock
(
&
cdata
->
mutex
);
if
(
have_governor_per_policy
())
dbs_data
=
policy
->
governor_data
;
else
dbs_data
=
cdata
->
gdbs_data
;
if
(
!
dbs_data
&&
(
event
!=
CPUFREQ_GOV_POLICY_INIT
))
{
ret
=
-
EINVAL
;
goto
unlock
;
}
switch
(
event
)
{
case
CPUFREQ_GOV_POLICY_INIT
:
ret
=
cpufreq_governor_init
(
policy
,
dbs_data
,
cdata
);
break
;
case
CPUFREQ_GOV_POLICY_EXIT
:
ret
=
cpufreq_governor_exit
(
policy
,
dbs_data
);
break
;
case
CPUFREQ_GOV_START
:
ret
=
cpufreq_governor_start
(
policy
,
dbs_data
);
break
;
case
CPUFREQ_GOV_STOP
:
ret
=
cpufreq_governor_stop
(
policy
,
dbs_data
);
break
;
case
CPUFREQ_GOV_LIMITS
:
ret
=
cpufreq_governor_limits
(
policy
,
dbs_data
);
break
;
default:
ret
=
-
EINVAL
;
if
(
event
==
CPUFREQ_GOV_POLICY_INIT
)
{
return
cpufreq_governor_init
(
policy
);
}
else
if
(
policy
->
governor_data
)
{
switch
(
event
)
{
case
CPUFREQ_GOV_POLICY_EXIT
:
return
cpufreq_governor_exit
(
policy
);
case
CPUFREQ_GOV_START
:
return
cpufreq_governor_start
(
policy
);
case
CPUFREQ_GOV_STOP
:
return
cpufreq_governor_stop
(
policy
);
case
CPUFREQ_GOV_LIMITS
:
return
cpufreq_governor_limits
(
policy
);
}
}
unlock:
mutex_unlock
(
&
cdata
->
mutex
);
return
ret
;
return
-
EINVAL
;
}
EXPORT_SYMBOL_GPL
(
cpufreq_governor_dbs
);
drivers/cpufreq/cpufreq_governor.h
View file @
a5acbfbd
...
...
@@ -18,6 +18,7 @@
#define _CPUFREQ_GOVERNOR_H
#include <linux/atomic.h>
#include <linux/irq_work.h>
#include <linux/cpufreq.h>
#include <linux/kernel_stat.h>
#include <linux/module.h>
...
...
@@ -41,96 +42,68 @@
enum
{
OD_NORMAL_SAMPLE
,
OD_SUB_SAMPLE
};
/*
* Macro for creating governors sysfs routines
*
* - gov_sys: One governor instance per whole system
* - gov_pol: One governor instance per policy
* Abbreviations:
* dbs: used as a shortform for demand based switching It helps to keep variable
* names smaller, simpler
* cdbs: common dbs
* od_*: On-demand governor
* cs_*: Conservative governor
*/
/* Create attributes */
#define gov_sys_attr_ro(_name) \
static struct global_attr _name##_gov_sys = \
__ATTR(_name, 0444, show_##_name##_gov_sys, NULL)
#define gov_sys_attr_rw(_name) \
static struct global_attr _name##_gov_sys = \
__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys)
#define gov_pol_attr_ro(_name) \
static struct freq_attr _name##_gov_pol = \
__ATTR(_name, 0444, show_##_name##_gov_pol, NULL)
#define gov_pol_attr_rw(_name) \
static struct freq_attr _name##_gov_pol = \
__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
/* Governor demand based switching data (per-policy or global). */
struct
dbs_data
{
int
usage_count
;
void
*
tuners
;
unsigned
int
min_sampling_rate
;
unsigned
int
ignore_nice_load
;
unsigned
int
sampling_rate
;
unsigned
int
sampling_down_factor
;
unsigned
int
up_threshold
;
unsigned
int
io_is_busy
;
#define gov_sys_pol_attr_rw(_name) \
gov_sys_attr_rw(_name); \
gov_pol_attr_rw(_name)
struct
kobject
kobj
;
struct
list_head
policy_dbs_list
;
/*
* Protect concurrent updates to governor tunables from sysfs,
* policy_dbs_list and usage_count.
*/
struct
mutex
mutex
;
};
#define gov_sys_pol_attr_ro(_name) \
gov_sys_attr_ro(_name); \
gov_pol_attr_ro(_name)
/* Governor's specific attributes */
struct
dbs_data
;
struct
governor_attr
{
struct
attribute
attr
;
ssize_t
(
*
show
)(
struct
dbs_data
*
dbs_data
,
char
*
buf
);
ssize_t
(
*
store
)(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
);
};
/* Create show/store routines */
#define show_one(_gov, file_name) \
static ssize_t show_##file_name##_gov_sys \
(struct kobject *kobj, struct attribute *attr, char *buf) \
#define gov_show_one(_gov, file_name) \
static ssize_t show_##file_name \
(struct dbs_data *dbs_data, char *buf) \
{ \
struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \
return sprintf(buf, "%u\n", tuners->file_name); \
} \
\
static ssize_t show_##file_name##_gov_pol \
(struct cpufreq_policy *policy, char *buf) \
{ \
struct dbs_data *dbs_data = policy->governor_data; \
struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \
return sprintf(buf, "%u\n", tuners->file_name); \
}
#define store_one(_gov, file_name) \
static ssize_t store_##file_name##_gov_sys \
(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \
{ \
struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \
return store_##file_name(dbs_data, buf, count); \
} \
\
static ssize_t store_##file_name##_gov_pol \
(struct cpufreq_policy *policy, const char *buf, size_t count) \
#define gov_show_one_common(file_name) \
static ssize_t show_##file_name \
(struct dbs_data *dbs_data, char *buf) \
{ \
struct dbs_data *dbs_data = policy->governor_data; \
return store_##file_name(dbs_data, buf, count); \
return sprintf(buf, "%u\n", dbs_data->file_name); \
}
#define
show_store_one(_gov, file_name)
\
s
how_one(_gov, file_name);
\
store_one(_gov, file_name
)
#define
gov_attr_ro(_name)
\
s
tatic struct governor_attr _name =
\
__ATTR(_name, 0444, show_##_name, NULL
)
/* create helper routines */
#define define_get_cpu_dbs_routines(_dbs_info) \
static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \
{ \
return &per_cpu(_dbs_info, cpu).cdbs; \
} \
\
static void *get_cpu_dbs_info_s(int cpu) \
{ \
return &per_cpu(_dbs_info, cpu); \
}
/*
* Abbreviations:
* dbs: used as a shortform for demand based switching It helps to keep variable
* names smaller, simpler
* cdbs: common dbs
* od_*: On-demand governor
* cs_*: Conservative governor
*/
#define gov_attr_rw(_name) \
static struct governor_attr _name = \
__ATTR(_name, 0644, show_##_name, store_##_name)
/* Common to all CPUs of a policy */
struct
cpu_common
_dbs_info
{
struct
policy
_dbs_info
{
struct
cpufreq_policy
*
policy
;
/*
* Per policy mutex that serializes load evaluation from limit-change
...
...
@@ -138,11 +111,27 @@ struct cpu_common_dbs_info {
*/
struct
mutex
timer_mutex
;
ktime_t
time_stamp
;
atomic_t
skip_work
;
u64
last_sample_time
;
s64
sample_delay_ns
;
atomic_t
work_count
;
struct
irq_work
irq_work
;
struct
work_struct
work
;
/* dbs_data may be shared between multiple policy objects */
struct
dbs_data
*
dbs_data
;
struct
list_head
list
;
/* Multiplier for increasing sample delay temporarily. */
unsigned
int
rate_mult
;
/* Status indicators */
bool
is_shared
;
/* This object is used by multiple CPUs */
bool
work_in_progress
;
/* Work is being queued up or in progress */
};
static
inline
void
gov_update_sample_delay
(
struct
policy_dbs_info
*
policy_dbs
,
unsigned
int
delay_us
)
{
policy_dbs
->
sample_delay_ns
=
delay_us
*
NSEC_PER_USEC
;
}
/* Per cpu structures */
struct
cpu_dbs_info
{
u64
prev_cpu_idle
;
...
...
@@ -155,54 +144,14 @@ struct cpu_dbs_info {
* wake-up from idle.
*/
unsigned
int
prev_load
;
struct
timer_list
timer
;
struct
cpu_common_dbs_info
*
shared
;
};
struct
od_cpu_dbs_info_s
{
struct
cpu_dbs_info
cdbs
;
struct
cpufreq_frequency_table
*
freq_table
;
unsigned
int
freq_lo
;
unsigned
int
freq_lo_jiffies
;
unsigned
int
freq_hi_jiffies
;
unsigned
int
rate_mult
;
unsigned
int
sample_type
:
1
;
};
struct
cs_cpu_dbs_info_s
{
struct
cpu_dbs_info
cdbs
;
unsigned
int
down_skip
;
unsigned
int
requested_freq
;
};
/* Per policy Governors sysfs tunables */
struct
od_dbs_tuners
{
unsigned
int
ignore_nice_load
;
unsigned
int
sampling_rate
;
unsigned
int
sampling_down_factor
;
unsigned
int
up_threshold
;
unsigned
int
powersave_bias
;
unsigned
int
io_is_busy
;
};
struct
cs_dbs_tuners
{
unsigned
int
ignore_nice_load
;
unsigned
int
sampling_rate
;
unsigned
int
sampling_down_factor
;
unsigned
int
up_threshold
;
unsigned
int
down_threshold
;
unsigned
int
freq_step
;
struct
update_util_data
update_util
;
struct
policy_dbs_info
*
policy_dbs
;
};
/* Common Governor data across policies */
struct
dbs_data
;
struct
common_dbs_data
{
/* Common across governors */
#define GOV_ONDEMAND 0
#define GOV_CONSERVATIVE 1
int
governor
;
struct
attribute_group
*
attr_group_gov_sys
;
/* one governor - system */
struct
attribute_group
*
attr_group_gov_pol
;
/* one governor - policy */
struct
dbs_governor
{
struct
cpufreq_governor
gov
;
struct
kobj_type
kobj_type
;
/*
* Common data for platforms that don't set
...
...
@@ -210,74 +159,32 @@ struct common_dbs_data {
*/
struct
dbs_data
*
gdbs_data
;
struct
cpu_dbs_info
*
(
*
get_cpu_cdbs
)(
int
cpu
);
void
*
(
*
get_cpu_dbs_info_s
)(
int
cpu
);
unsigned
int
(
*
gov_dbs_timer
)(
struct
cpufreq_policy
*
policy
,
bool
modify_all
);
void
(
*
gov_check_cpu
)(
int
cpu
,
unsigned
int
load
);
unsigned
int
(
*
gov_dbs_timer
)(
struct
cpufreq_policy
*
policy
);
struct
policy_dbs_info
*
(
*
alloc
)(
void
);
void
(
*
free
)(
struct
policy_dbs_info
*
policy_dbs
);
int
(
*
init
)(
struct
dbs_data
*
dbs_data
,
bool
notify
);
void
(
*
exit
)(
struct
dbs_data
*
dbs_data
,
bool
notify
);
/* Governor specific ops, see below */
void
*
gov_ops
;
/*
* Protects governor's data (struct dbs_data and struct common_dbs_data)
*/
struct
mutex
mutex
;
void
(
*
start
)(
struct
cpufreq_policy
*
policy
);
};
/* Governor Per policy data */
struct
dbs_data
{
struct
common_dbs_data
*
cdata
;
unsigned
int
min_sampling_rate
;
int
usage_count
;
void
*
tuners
;
};
static
inline
struct
dbs_governor
*
dbs_governor_of
(
struct
cpufreq_policy
*
policy
)
{
return
container_of
(
policy
->
governor
,
struct
dbs_governor
,
gov
);
}
/* Governor specific op
s, will be passed to dbs_data->gov_op
s */
/* Governor specific op
eration
s */
struct
od_ops
{
void
(
*
powersave_bias_init_cpu
)(
int
cpu
);
unsigned
int
(
*
powersave_bias_target
)(
struct
cpufreq_policy
*
policy
,
unsigned
int
freq_next
,
unsigned
int
relation
);
void
(
*
freq_increase
)(
struct
cpufreq_policy
*
policy
,
unsigned
int
freq
);
};
static
inline
int
delay_for_sampling_rate
(
unsigned
int
sampling_rate
)
{
int
delay
=
usecs_to_jiffies
(
sampling_rate
);
/* We want all CPUs to do sampling nearly on same jiffy */
if
(
num_online_cpus
()
>
1
)
delay
-=
jiffies
%
delay
;
return
delay
;
}
#define declare_show_sampling_rate_min(_gov) \
static ssize_t show_sampling_rate_min_gov_sys \
(struct kobject *kobj, struct attribute *attr, char *buf) \
{ \
struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \
return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \
} \
\
static ssize_t show_sampling_rate_min_gov_pol \
(struct cpufreq_policy *policy, char *buf) \
{ \
struct dbs_data *dbs_data = policy->governor_data; \
return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \
}
extern
struct
mutex
cpufreq_governor_lock
;
void
gov_add_timers
(
struct
cpufreq_policy
*
policy
,
unsigned
int
delay
);
void
gov_cancel_work
(
struct
cpu_common_dbs_info
*
shared
);
void
dbs_check_cpu
(
struct
dbs_data
*
dbs_data
,
int
cpu
);
int
cpufreq_governor_dbs
(
struct
cpufreq_policy
*
policy
,
struct
common_dbs_data
*
cdata
,
unsigned
int
event
);
unsigned
int
dbs_update
(
struct
cpufreq_policy
*
policy
);
int
cpufreq_governor_dbs
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
);
void
od_register_powersave_bias_handler
(
unsigned
int
(
*
f
)
(
struct
cpufreq_policy
*
,
unsigned
int
,
unsigned
int
),
unsigned
int
powersave_bias
);
void
od_unregister_powersave_bias_handler
(
void
);
ssize_t
store_sampling_rate
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
);
void
gov_update_cpu_data
(
struct
dbs_data
*
dbs_data
);
#endif
/* _CPUFREQ_GOVERNOR_H */
drivers/cpufreq/cpufreq_ondemand.c
View file @
a5acbfbd
...
...
@@ -16,7 +16,8 @@
#include <linux/percpu-defs.h>
#include <linux/slab.h>
#include <linux/tick.h>
#include "cpufreq_governor.h"
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
#define DEF_FREQUENCY_UP_THRESHOLD (80)
...
...
@@ -27,22 +28,10 @@
#define MIN_FREQUENCY_UP_THRESHOLD (11)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
static
DEFINE_PER_CPU
(
struct
od_cpu_dbs_info_s
,
od_cpu_dbs_info
);
static
struct
od_ops
od_ops
;
static
struct
cpufreq_governor
cpufreq_gov_ondemand
;
static
unsigned
int
default_powersave_bias
;
static
void
ondemand_powersave_bias_init_cpu
(
int
cpu
)
{
struct
od_cpu_dbs_info_s
*
dbs_info
=
&
per_cpu
(
od_cpu_dbs_info
,
cpu
);
dbs_info
->
freq_table
=
cpufreq_frequency_get_table
(
cpu
);
dbs_info
->
freq_lo
=
0
;
}
/*
* Not all CPUs want IO time to be accounted as busy; this depends on how
* efficient idling at a higher frequency/voltage is.
...
...
@@ -68,8 +57,8 @@ static int should_io_be_busy(void)
/*
* Find right freq to be set now with powersave_bias on.
* Returns the freq_hi to be used right now and will set freq_hi_
jiffie
s,
* freq_lo, and freq_lo_
jiffie
s in percpu area for averaging freqs.
* Returns the freq_hi to be used right now and will set freq_hi_
delay_u
s,
* freq_lo, and freq_lo_
delay_u
s in percpu area for averaging freqs.
*/
static
unsigned
int
generic_powersave_bias_target
(
struct
cpufreq_policy
*
policy
,
unsigned
int
freq_next
,
unsigned
int
relation
)
...
...
@@ -77,15 +66,15 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
unsigned
int
freq_req
,
freq_reduc
,
freq_avg
;
unsigned
int
freq_hi
,
freq_lo
;
unsigned
int
index
=
0
;
unsigned
int
jiffies_total
,
jiffies_hi
,
jiffies_lo
;
struct
od_cpu_dbs_info_s
*
dbs_info
=
&
per_cpu
(
od_cpu_dbs_info
,
policy
->
cpu
);
struct
dbs_data
*
dbs_data
=
policy
->
governor
_data
;
unsigned
int
delay_hi_us
;
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
od_policy_dbs_info
*
dbs_info
=
to_dbs_info
(
policy_dbs
);
struct
dbs_data
*
dbs_data
=
policy
_dbs
->
dbs
_data
;
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
if
(
!
dbs_info
->
freq_table
)
{
dbs_info
->
freq_lo
=
0
;
dbs_info
->
freq_lo_
jiffie
s
=
0
;
dbs_info
->
freq_lo_
delay_u
s
=
0
;
return
freq_next
;
}
...
...
@@ -108,31 +97,30 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
/* Find out how long we have to be in hi and lo freqs */
if
(
freq_hi
==
freq_lo
)
{
dbs_info
->
freq_lo
=
0
;
dbs_info
->
freq_lo_
jiffie
s
=
0
;
dbs_info
->
freq_lo_
delay_u
s
=
0
;
return
freq_lo
;
}
jiffies_total
=
usecs_to_jiffies
(
od_tuners
->
sampling_rate
);
jiffies_hi
=
(
freq_avg
-
freq_lo
)
*
jiffies_total
;
jiffies_hi
+=
((
freq_hi
-
freq_lo
)
/
2
);
jiffies_hi
/=
(
freq_hi
-
freq_lo
);
jiffies_lo
=
jiffies_total
-
jiffies_hi
;
delay_hi_us
=
(
freq_avg
-
freq_lo
)
*
dbs_data
->
sampling_rate
;
delay_hi_us
+=
(
freq_hi
-
freq_lo
)
/
2
;
delay_hi_us
/=
freq_hi
-
freq_lo
;
dbs_info
->
freq_hi_delay_us
=
delay_hi_us
;
dbs_info
->
freq_lo
=
freq_lo
;
dbs_info
->
freq_lo_jiffies
=
jiffies_lo
;
dbs_info
->
freq_hi_jiffies
=
jiffies_hi
;
dbs_info
->
freq_lo_delay_us
=
dbs_data
->
sampling_rate
-
delay_hi_us
;
return
freq_hi
;
}
static
void
ondemand_powersave_bias_init
(
void
)
static
void
ondemand_powersave_bias_init
(
struct
cpufreq_policy
*
policy
)
{
int
i
;
for_each_online_cpu
(
i
)
{
ondemand_powersave_bias_init_cpu
(
i
);
}
struct
od_policy_dbs_info
*
dbs_info
=
to_dbs_info
(
policy
->
governor_data
)
;
dbs_info
->
freq_table
=
cpufreq_frequency_get_table
(
policy
->
cpu
);
dbs_info
->
freq_lo
=
0
;
}
static
void
dbs_freq_increase
(
struct
cpufreq_policy
*
policy
,
unsigned
int
freq
)
{
struct
dbs_data
*
dbs_data
=
policy
->
governor_data
;
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
dbs_data
*
dbs_data
=
policy_dbs
->
dbs_data
;
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
if
(
od_tuners
->
powersave_bias
)
...
...
@@ -150,21 +138,21 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
* (default), then we try to increase frequency. Else, we adjust the frequency
* proportional to load.
*/
static
void
od_
check_cpu
(
int
cpu
,
unsigned
int
load
)
static
void
od_
update
(
struct
cpufreq_policy
*
policy
)
{
struct
od_cpu_dbs_info_s
*
dbs_info
=
&
per_cpu
(
od_cpu_dbs_info
,
cpu
)
;
struct
cpufreq_policy
*
policy
=
dbs_info
->
cdbs
.
shared
->
policy
;
struct
dbs_data
*
dbs_data
=
policy
->
governor
_data
;
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
od_policy_dbs_info
*
dbs_info
=
to_dbs_info
(
policy_dbs
)
;
struct
dbs_data
*
dbs_data
=
policy
_dbs
->
dbs
_data
;
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
unsigned
int
load
=
dbs_update
(
policy
);
dbs_info
->
freq_lo
=
0
;
/* Check for frequency increase */
if
(
load
>
od_tuners
->
up_threshold
)
{
if
(
load
>
dbs_data
->
up_threshold
)
{
/* If switching to max speed, apply sampling_down_factor */
if
(
policy
->
cur
<
policy
->
max
)
dbs_info
->
rate_mult
=
od_tuners
->
sampling_down_factor
;
policy_dbs
->
rate_mult
=
dbs_data
->
sampling_down_factor
;
dbs_freq_increase
(
policy
,
policy
->
max
);
}
else
{
/* Calculate the next frequency proportional to load */
...
...
@@ -175,177 +163,70 @@ static void od_check_cpu(int cpu, unsigned int load)
freq_next
=
min_f
+
load
*
(
max_f
-
min_f
)
/
100
;
/* No longer fully busy, reset rate_mult */
dbs_info
->
rate_mult
=
1
;
policy_dbs
->
rate_mult
=
1
;
if
(
!
od_tuners
->
powersave_bias
)
{
__cpufreq_driver_target
(
policy
,
freq_next
,
CPUFREQ_RELATION_C
);
return
;
}
if
(
od_tuners
->
powersave_bias
)
freq_next
=
od_ops
.
powersave_bias_target
(
policy
,
freq_next
,
CPUFREQ_RELATION_L
);
freq_next
=
od_ops
.
powersave_bias_target
(
policy
,
freq_next
,
CPUFREQ_RELATION_L
);
__cpufreq_driver_target
(
policy
,
freq_next
,
CPUFREQ_RELATION_C
);
}
}
static
unsigned
int
od_dbs_timer
(
struct
cpufreq_policy
*
policy
,
bool
modify_all
)
static
unsigned
int
od_dbs_timer
(
struct
cpufreq_policy
*
policy
)
{
struct
dbs_data
*
dbs_data
=
policy
->
governor_data
;
unsigned
int
cpu
=
policy
->
cpu
;
struct
od_cpu_dbs_info_s
*
dbs_info
=
&
per_cpu
(
od_cpu_dbs_info
,
cpu
);
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
int
delay
=
0
,
sample_type
=
dbs_info
->
sample_type
;
if
(
!
modify_all
)
goto
max_delay
;
struct
policy_dbs_info
*
policy_dbs
=
policy
->
governor_data
;
struct
dbs_data
*
dbs_data
=
policy_dbs
->
dbs_data
;
struct
od_policy_dbs_info
*
dbs_info
=
to_dbs_info
(
policy_dbs
);
int
sample_type
=
dbs_info
->
sample_type
;
/* Common NORMAL_SAMPLE setup */
dbs_info
->
sample_type
=
OD_NORMAL_SAMPLE
;
if
(
sample_type
==
OD_SUB_SAMPLE
)
{
delay
=
dbs_info
->
freq_lo_jiffies
;
/*
* OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore
* it then.
*/
if
(
sample_type
==
OD_SUB_SAMPLE
&&
policy_dbs
->
sample_delay_ns
>
0
)
{
__cpufreq_driver_target
(
policy
,
dbs_info
->
freq_lo
,
CPUFREQ_RELATION_H
);
}
else
{
dbs_check_cpu
(
dbs_data
,
cpu
);
if
(
dbs_info
->
freq_lo
)
{
/* Setup timer for SUB_SAMPLE */
dbs_info
->
sample_type
=
OD_SUB_SAMPLE
;
delay
=
dbs_info
->
freq_hi_jiffies
;
}
return
dbs_info
->
freq_lo_delay_us
;
}
max_delay:
if
(
!
delay
)
delay
=
delay_for_sampling_rate
(
od_tuners
->
sampling_rate
*
dbs_info
->
rate_mult
);
return
delay
;
}
/************************** sysfs interface ************************/
static
struct
common_dbs_data
od_dbs_cdata
;
od_update
(
policy
);
/**
* update_sampling_rate - update sampling rate effective immediately if needed.
* @new_rate: new sampling rate
*
* If new rate is smaller than the old, simply updating
* dbs_tuners_int.sampling_rate might not be appropriate. For example, if the
* original sampling_rate was 1 second and the requested new sampling rate is 10
* ms because the user needs immediate reaction from ondemand governor, but not
* sure if higher frequency will be required or not, then, the governor may
* change the sampling rate too late; up to 1 second later. Thus, if we are
* reducing the sampling rate, we need to make the new value effective
* immediately.
*/
static
void
update_sampling_rate
(
struct
dbs_data
*
dbs_data
,
unsigned
int
new_rate
)
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
struct
cpumask
cpumask
;
int
cpu
;
od_tuners
->
sampling_rate
=
new_rate
=
max
(
new_rate
,
dbs_data
->
min_sampling_rate
);
/*
* Lock governor so that governor start/stop can't execute in parallel.
*/
mutex_lock
(
&
od_dbs_cdata
.
mutex
);
cpumask_copy
(
&
cpumask
,
cpu_online_mask
);
for_each_cpu
(
cpu
,
&
cpumask
)
{
struct
cpufreq_policy
*
policy
;
struct
od_cpu_dbs_info_s
*
dbs_info
;
struct
cpu_dbs_info
*
cdbs
;
struct
cpu_common_dbs_info
*
shared
;
unsigned
long
next_sampling
,
appointed_at
;
dbs_info
=
&
per_cpu
(
od_cpu_dbs_info
,
cpu
);
cdbs
=
&
dbs_info
->
cdbs
;
shared
=
cdbs
->
shared
;
/*
* A valid shared and shared->policy means governor hasn't
* stopped or exited yet.
*/
if
(
!
shared
||
!
shared
->
policy
)
continue
;
policy
=
shared
->
policy
;
/* clear all CPUs of this policy */
cpumask_andnot
(
&
cpumask
,
&
cpumask
,
policy
->
cpus
);
/*
* Update sampling rate for CPUs whose policy is governed by
* dbs_data. In case of governor_per_policy, only a single
* policy will be governed by dbs_data, otherwise there can be
* multiple policies that are governed by the same dbs_data.
*/
if
(
dbs_data
!=
policy
->
governor_data
)
continue
;
/*
* Checking this for any CPU should be fine, timers for all of
* them are scheduled together.
*/
next_sampling
=
jiffies
+
usecs_to_jiffies
(
new_rate
);
appointed_at
=
dbs_info
->
cdbs
.
timer
.
expires
;
if
(
time_before
(
next_sampling
,
appointed_at
))
{
gov_cancel_work
(
shared
);
gov_add_timers
(
policy
,
usecs_to_jiffies
(
new_rate
));
}
if
(
dbs_info
->
freq_lo
)
{
/* Setup timer for SUB_SAMPLE */
dbs_info
->
sample_type
=
OD_SUB_SAMPLE
;
return
dbs_info
->
freq_hi_delay_us
;
}
mutex_unlock
(
&
od_dbs_cdata
.
mutex
)
;
return
dbs_data
->
sampling_rate
*
policy_dbs
->
rate_mult
;
}
static
ssize_t
store_sampling_rate
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
unsigned
int
input
;
int
ret
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
if
(
ret
!=
1
)
return
-
EINVAL
;
update_sampling_rate
(
dbs_data
,
input
);
return
count
;
}
/************************** sysfs interface ************************/
static
struct
dbs_governor
od_dbs_gov
;
static
ssize_t
store_io_is_busy
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
unsigned
int
input
;
int
ret
;
unsigned
int
j
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
if
(
ret
!=
1
)
return
-
EINVAL
;
od_tuners
->
io_is_busy
=
!!
input
;
dbs_data
->
io_is_busy
=
!!
input
;
/* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu
(
j
)
{
struct
od_cpu_dbs_info_s
*
dbs_info
=
&
per_cpu
(
od_cpu_dbs_info
,
j
);
dbs_info
->
cdbs
.
prev_cpu_idle
=
get_cpu_idle_time
(
j
,
&
dbs_info
->
cdbs
.
prev_cpu_wall
,
od_tuners
->
io_is_busy
);
}
gov_update_cpu_data
(
dbs_data
);
return
count
;
}
static
ssize_t
store_up_threshold
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
unsigned
int
input
;
int
ret
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
...
...
@@ -355,40 +236,43 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
return
-
EINVAL
;
}
od_tuners
->
up_threshold
=
input
;
dbs_data
->
up_threshold
=
input
;
return
count
;
}
static
ssize_t
store_sampling_down_factor
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuner
s
;
unsigned
int
input
,
j
;
struct
policy_dbs_info
*
policy_db
s
;
unsigned
int
input
;
int
ret
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
if
(
ret
!=
1
||
input
>
MAX_SAMPLING_DOWN_FACTOR
||
input
<
1
)
return
-
EINVAL
;
od_tuners
->
sampling_down_factor
=
input
;
dbs_data
->
sampling_down_factor
=
input
;
/* Reset down sampling multiplier in case it was active */
for_each_online_cpu
(
j
)
{
struct
od_cpu_dbs_info_s
*
dbs_info
=
&
per_cpu
(
od_cpu_dbs_info
,
j
);
dbs_info
->
rate_mult
=
1
;
list_for_each_entry
(
policy_dbs
,
&
dbs_data
->
policy_dbs_list
,
list
)
{
/*
* Doing this without locking might lead to using different
* rate_mult values in od_update() and od_dbs_timer().
*/
mutex_lock
(
&
policy_dbs
->
timer_mutex
);
policy_dbs
->
rate_mult
=
1
;
mutex_unlock
(
&
policy_dbs
->
timer_mutex
);
}
return
count
;
}
static
ssize_t
store_ignore_nice_load
(
struct
dbs_data
*
dbs_data
,
const
char
*
buf
,
size_t
count
)
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
unsigned
int
input
;
int
ret
;
unsigned
int
j
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
if
(
ret
!=
1
)
return
-
EINVAL
;
...
...
@@ -396,22 +280,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
if
(
input
>
1
)
input
=
1
;
if
(
input
==
od_tuners
->
ignore_nice_load
)
{
/* nothing to do */
if
(
input
==
dbs_data
->
ignore_nice_load
)
{
/* nothing to do */
return
count
;
}
od_tuners
->
ignore_nice_load
=
input
;
dbs_data
->
ignore_nice_load
=
input
;
/* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu
(
j
)
{
struct
od_cpu_dbs_info_s
*
dbs_info
;
dbs_info
=
&
per_cpu
(
od_cpu_dbs_info
,
j
);
dbs_info
->
cdbs
.
prev_cpu_idle
=
get_cpu_idle_time
(
j
,
&
dbs_info
->
cdbs
.
prev_cpu_wall
,
od_tuners
->
io_is_busy
);
if
(
od_tuners
->
ignore_nice_load
)
dbs_info
->
cdbs
.
prev_cpu_nice
=
kcpustat_cpu
(
j
).
cpustat
[
CPUTIME_NICE
];
gov_update_cpu_data
(
dbs_data
);
}
return
count
;
}
...
...
@@ -419,6 +295,7 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
size_t
count
)
{
struct
od_dbs_tuners
*
od_tuners
=
dbs_data
->
tuners
;
struct
policy_dbs_info
*
policy_dbs
;
unsigned
int
input
;
int
ret
;
ret
=
sscanf
(
buf
,
"%u"
,
&
input
);
...
...
@@ -430,59 +307,54 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
input
=
1000
;
od_tuners
->
powersave_bias
=
input
;
ondemand_powersave_bias_init
();
list_for_each_entry
(
policy_dbs
,
&
dbs_data
->
policy_dbs_list
,
list
)
ondemand_powersave_bias_init
(
policy_dbs
->
policy
);
return
count
;
}
show_store_one
(
od
,
sampling_rate
);
show_store_one
(
od
,
io_is_busy
);
show_store_one
(
od
,
up_threshold
);
show_store_one
(
od
,
sampling_down_factor
);
show_store_one
(
od
,
ignore_nice_load
);
show_store_one
(
od
,
powersave_bias
);
declare_show_sampling_rate_min
(
od
);
gov_
sys_pol_
attr_rw
(
sampling_rate
);
gov_
sys_pol_
attr_rw
(
io_is_busy
);
gov_
sys_pol_
attr_rw
(
up_threshold
);
gov_
sys_pol_
attr_rw
(
sampling_down_factor
);
gov_
sys_pol_
attr_rw
(
ignore_nice_load
);
gov_
sys_pol_
attr_rw
(
powersave_bias
);
gov_
sys_pol_attr_ro
(
sampling_rate_min
);
static
struct
attribute
*
dbs_attributes_gov_sy
s
[]
=
{
&
sampling_rate_min_gov_sys
.
attr
,
&
sampling_rate
_gov_sys
.
attr
,
&
up_threshold
_gov_sys
.
attr
,
&
sampling_down_factor
_gov_sys
.
attr
,
&
ignore_nice_load
_gov_sys
.
attr
,
&
powersave_bias
_gov_sys
.
attr
,
&
io_is_busy
_gov_sys
.
attr
,
gov_show_one_common
(
sampling_rate
);
gov_show_one_common
(
up_threshold
);
gov_show_one_common
(
sampling_down_factor
);
gov_show_one_common
(
ignore_nice_load
);
gov_show_one_common
(
min_sampling_rate
);
gov_show_one_common
(
io_is_busy
);
gov_show_one
(
od
,
powersave_bias
);
gov_attr_rw
(
sampling_rate
);
gov_attr_rw
(
io_is_busy
);
gov_attr_rw
(
up_threshold
);
gov_attr_rw
(
sampling_down_factor
);
gov_attr_rw
(
ignore_nice_load
);
gov_attr_rw
(
powersave_bias
);
gov_
attr_ro
(
min_sampling_rate
);
static
struct
attribute
*
od_attribute
s
[]
=
{
&
min_sampling_rate
.
attr
,
&
sampling_rate
.
attr
,
&
up_threshold
.
attr
,
&
sampling_down_factor
.
attr
,
&
ignore_nice_load
.
attr
,
&
powersave_bias
.
attr
,
&
io_is_busy
.
attr
,
NULL
};
static
struct
attribute_group
od_attr_group_gov_sys
=
{
.
attrs
=
dbs_attributes_gov_sys
,
.
name
=
"ondemand"
,
};
/************************** sysfs end ************************/
static
struct
attribute
*
dbs_attributes_gov_pol
[]
=
{
&
sampling_rate_min_gov_pol
.
attr
,
&
sampling_rate_gov_pol
.
attr
,
&
up_threshold_gov_pol
.
attr
,
&
sampling_down_factor_gov_pol
.
attr
,
&
ignore_nice_load_gov_pol
.
attr
,
&
powersave_bias_gov_pol
.
attr
,
&
io_is_busy_gov_pol
.
attr
,
NULL
};
static
struct
policy_dbs_info
*
od_alloc
(
void
)
{
struct
od_policy_dbs_info
*
dbs_info
;
static
struct
attribute_group
od_attr_group_gov_pol
=
{
.
attrs
=
dbs_attributes_gov_pol
,
.
name
=
"ondemand"
,
};
dbs_info
=
kzalloc
(
sizeof
(
*
dbs_info
),
GFP_KERNEL
);
return
dbs_info
?
&
dbs_info
->
policy_dbs
:
NULL
;
}
/************************** sysfs end ************************/
static
void
od_free
(
struct
policy_dbs_info
*
policy_dbs
)
{
kfree
(
to_dbs_info
(
policy_dbs
));
}
static
int
od_init
(
struct
dbs_data
*
dbs_data
,
bool
notify
)
{
...
...
@@ -501,7 +373,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify)
put_cpu
();
if
(
idle_time
!=
-
1ULL
)
{
/* Idle micro accounting is supported. Use finer thresholds */
tuners
->
up_threshold
=
MICRO_FREQUENCY_UP_THRESHOLD
;
dbs_data
->
up_threshold
=
MICRO_FREQUENCY_UP_THRESHOLD
;
/*
* In nohz/micro accounting case we set the minimum frequency
* not depending on HZ, but fixed (very low). The deferred
...
...
@@ -509,17 +381,17 @@ static int od_init(struct dbs_data *dbs_data, bool notify)
*/
dbs_data
->
min_sampling_rate
=
MICRO_FREQUENCY_MIN_SAMPLE_RATE
;
}
else
{
tuners
->
up_threshold
=
DEF_FREQUENCY_UP_THRESHOLD
;
dbs_data
->
up_threshold
=
DEF_FREQUENCY_UP_THRESHOLD
;
/* For correct statistics, we need 10 ticks for each measure */
dbs_data
->
min_sampling_rate
=
MIN_SAMPLING_RATE_RATIO
*
jiffies_to_usecs
(
10
);
}
tuners
->
sampling_down_factor
=
DEF_SAMPLING_DOWN_FACTOR
;
tuners
->
ignore_nice_load
=
0
;
dbs_data
->
sampling_down_factor
=
DEF_SAMPLING_DOWN_FACTOR
;
dbs_data
->
ignore_nice_load
=
0
;
tuners
->
powersave_bias
=
default_powersave_bias
;
tuners
->
io_is_busy
=
should_io_be_busy
();
dbs_data
->
io_is_busy
=
should_io_be_busy
();
dbs_data
->
tuners
=
tuners
;
return
0
;
...
...
@@ -530,46 +402,38 @@ static void od_exit(struct dbs_data *dbs_data, bool notify)
kfree
(
dbs_data
->
tuners
);
}
define_get_cpu_dbs_routines
(
od_cpu_dbs_info
);
static
void
od_start
(
struct
cpufreq_policy
*
policy
)
{
struct
od_policy_dbs_info
*
dbs_info
=
to_dbs_info
(
policy
->
governor_data
);
dbs_info
->
sample_type
=
OD_NORMAL_SAMPLE
;
ondemand_powersave_bias_init
(
policy
);
}
static
struct
od_ops
od_ops
=
{
.
powersave_bias_init_cpu
=
ondemand_powersave_bias_init_cpu
,
.
powersave_bias_target
=
generic_powersave_bias_target
,
.
freq_increase
=
dbs_freq_increase
,
};
static
struct
common_dbs_data
od_dbs_cdata
=
{
.
governor
=
GOV_ONDEMAND
,
.
attr_group_gov_sys
=
&
od_attr_group_gov_sys
,
.
attr_group_gov_pol
=
&
od_attr_group_gov_pol
,
.
get_cpu_cdbs
=
get_cpu_cdbs
,
.
get_cpu_dbs_info_s
=
get_cpu_dbs_info_s
,
static
struct
dbs_governor
od_dbs_gov
=
{
.
gov
=
{
.
name
=
"ondemand"
,
.
governor
=
cpufreq_governor_dbs
,
.
max_transition_latency
=
TRANSITION_LATENCY_LIMIT
,
.
owner
=
THIS_MODULE
,
},
.
kobj_type
=
{
.
default_attrs
=
od_attributes
},
.
gov_dbs_timer
=
od_dbs_timer
,
.
gov_check_cpu
=
od_check_cpu
,
.
gov_ops
=
&
od_ops
,
.
alloc
=
od_alloc
,
.
free
=
od_free
,
.
init
=
od_init
,
.
exit
=
od_exit
,
.
mutex
=
__MUTEX_INITIALIZER
(
od_dbs_cdata
.
mutex
)
,
.
start
=
od_start
,
};
static
int
od_cpufreq_governor_dbs
(
struct
cpufreq_policy
*
policy
,
unsigned
int
event
)
{
return
cpufreq_governor_dbs
(
policy
,
&
od_dbs_cdata
,
event
);
}
static
struct
cpufreq_governor
cpufreq_gov_ondemand
=
{
.
name
=
"ondemand"
,
.
governor
=
od_cpufreq_governor_dbs
,
.
max_transition_latency
=
TRANSITION_LATENCY_LIMIT
,
.
owner
=
THIS_MODULE
,
};
#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov)
static
void
od_set_powersave_bias
(
unsigned
int
powersave_bias
)
{
struct
cpufreq_policy
*
policy
;
struct
dbs_data
*
dbs_data
;
struct
od_dbs_tuners
*
od_tuners
;
unsigned
int
cpu
;
cpumask_t
done
;
...
...
@@ -578,22 +442,25 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
get_online_cpus
();
for_each_online_cpu
(
cpu
)
{
struct
cpu_common_dbs_info
*
shared
;
struct
cpufreq_policy
*
policy
;
struct
policy_dbs_info
*
policy_dbs
;
struct
dbs_data
*
dbs_data
;
struct
od_dbs_tuners
*
od_tuners
;
if
(
cpumask_test_cpu
(
cpu
,
&
done
))
continue
;
shared
=
per_cpu
(
od_cpu_dbs_info
,
cpu
).
cdbs
.
shared
;
if
(
!
shared
)
policy
=
cpufreq_cpu_get_raw
(
cpu
)
;
if
(
!
policy
||
policy
->
governor
!=
CPU_FREQ_GOV_ONDEMAND
)
continue
;
policy
=
shared
->
policy
;
cpumask_or
(
&
done
,
&
done
,
policy
->
cpus
);
if
(
policy
->
governor
!=
&
cpufreq_gov_ondemand
)
policy_dbs
=
policy
->
governor_data
;
if
(
!
policy_dbs
)
continue
;
dbs_data
=
policy
->
governor_data
;
cpumask_or
(
&
done
,
&
done
,
policy
->
cpus
);
dbs_data
=
policy_dbs
->
dbs_data
;
od_tuners
=
dbs_data
->
tuners
;
od_tuners
->
powersave_bias
=
default_powersave_bias
;
}
...
...
@@ -618,12 +485,12 @@ EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
static
int
__init
cpufreq_gov_dbs_init
(
void
)
{
return
cpufreq_register_governor
(
&
cpufreq_gov_ondemand
);
return
cpufreq_register_governor
(
CPU_FREQ_GOV_ONDEMAND
);
}
static
void
__exit
cpufreq_gov_dbs_exit
(
void
)
{
cpufreq_unregister_governor
(
&
cpufreq_gov_ondemand
);
cpufreq_unregister_governor
(
CPU_FREQ_GOV_ONDEMAND
);
}
MODULE_AUTHOR
(
"Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"
);
...
...
@@ -635,7 +502,7 @@ MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
struct
cpufreq_governor
*
cpufreq_default_governor
(
void
)
{
return
&
cpufreq_gov_ondemand
;
return
CPU_FREQ_GOV_ONDEMAND
;
}
fs_initcall
(
cpufreq_gov_dbs_init
);
...
...
drivers/cpufreq/cpufreq_ondemand.h
0 → 100644
View file @
a5acbfbd
/*
* Header file for CPUFreq ondemand governor and related code.
*
* Copyright (C) 2016, Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "cpufreq_governor.h"
struct
od_policy_dbs_info
{
struct
policy_dbs_info
policy_dbs
;
struct
cpufreq_frequency_table
*
freq_table
;
unsigned
int
freq_lo
;
unsigned
int
freq_lo_delay_us
;
unsigned
int
freq_hi_delay_us
;
unsigned
int
sample_type
:
1
;
};
static
inline
struct
od_policy_dbs_info
*
to_dbs_info
(
struct
policy_dbs_info
*
policy_dbs
)
{
return
container_of
(
policy_dbs
,
struct
od_policy_dbs_info
,
policy_dbs
);
}
struct
od_dbs_tuners
{
unsigned
int
powersave_bias
;
};
drivers/cpufreq/intel_pstate.c
View file @
a5acbfbd
...
...
@@ -71,7 +71,7 @@ struct sample {
u64
mperf
;
u64
tsc
;
int
freq
;
ktime_t
time
;
u64
time
;
};
struct
pstate_data
{
...
...
@@ -103,13 +103,13 @@ struct _pid {
struct
cpudata
{
int
cpu
;
struct
timer_list
timer
;
struct
update_util_data
update_util
;
struct
pstate_data
pstate
;
struct
vid_data
vid
;
struct
_pid
pid
;
ktime_t
last_sample_time
;
u64
last_sample_time
;
u64
prev_aperf
;
u64
prev_mperf
;
u64
prev_tsc
;
...
...
@@ -120,6 +120,7 @@ struct cpudata {
static
struct
cpudata
**
all_cpu_data
;
struct
pstate_adjust_policy
{
int
sample_rate_ms
;
s64
sample_rate_ns
;
int
deadband
;
int
setpoint
;
int
p_gain_pct
;
...
...
@@ -718,7 +719,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
if
(
limits
->
no_turbo
&&
!
limits
->
turbo_disabled
)
val
|=
(
u64
)
1
<<
32
;
wrmsrl
_on_cpu
(
cpudata
->
cpu
,
MSR_IA32_PERF_CTL
,
val
);
wrmsrl
(
MSR_IA32_PERF_CTL
,
val
);
}
static
int
knl_get_turbo_pstate
(
void
)
...
...
@@ -889,7 +890,7 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
sample
->
core_pct_busy
=
(
int32_t
)
core_pct
;
}
static
inline
void
intel_pstate_sample
(
struct
cpudata
*
cpu
)
static
inline
void
intel_pstate_sample
(
struct
cpudata
*
cpu
,
u64
time
)
{
u64
aperf
,
mperf
;
unsigned
long
flags
;
...
...
@@ -906,7 +907,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
local_irq_restore
(
flags
);
cpu
->
last_sample_time
=
cpu
->
sample
.
time
;
cpu
->
sample
.
time
=
ktime_get
()
;
cpu
->
sample
.
time
=
time
;
cpu
->
sample
.
aperf
=
aperf
;
cpu
->
sample
.
mperf
=
mperf
;
cpu
->
sample
.
tsc
=
tsc
;
...
...
@@ -921,22 +922,6 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
cpu
->
prev_tsc
=
tsc
;
}
static
inline
void
intel_hwp_set_sample_time
(
struct
cpudata
*
cpu
)
{
int
delay
;
delay
=
msecs_to_jiffies
(
50
);
mod_timer_pinned
(
&
cpu
->
timer
,
jiffies
+
delay
);
}
static
inline
void
intel_pstate_set_sample_time
(
struct
cpudata
*
cpu
)
{
int
delay
;
delay
=
msecs_to_jiffies
(
pid_params
.
sample_rate_ms
);
mod_timer_pinned
(
&
cpu
->
timer
,
jiffies
+
delay
);
}
static
inline
int32_t
get_target_pstate_use_cpu_load
(
struct
cpudata
*
cpu
)
{
struct
sample
*
sample
=
&
cpu
->
sample
;
...
...
@@ -976,8 +961,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
static
inline
int32_t
get_target_pstate_use_performance
(
struct
cpudata
*
cpu
)
{
int32_t
core_busy
,
max_pstate
,
current_pstate
,
sample_ratio
;
s64
duration_us
;
u32
sample_time
;
u64
duration_ns
;
/*
* core_busy is the ratio of actual performance to max
...
...
@@ -996,18 +980,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
core_busy
=
mul_fp
(
core_busy
,
div_fp
(
max_pstate
,
current_pstate
));
/*
* Since we have a deferred timer, it will not fire unless
* we are in C0. So, determine if the actual elapsed time
* is significantly greater (3x) than our sample interval. If it
* is, then we were idle for a long enough period of time
* to adjust our busyness.
* Since our utilization update callback will not run unless we are
* in C0, check if the actual elapsed time is significantly greater (3x)
* than our sample interval. If it is, then we were idle for a long
* enough period of time to adjust our busyness.
*/
sample_time
=
pid_params
.
sample_rate_ms
*
USEC_PER_MSEC
;
duration_us
=
ktime_us_delta
(
cpu
->
sample
.
time
,
cpu
->
last_sample_time
);
if
(
duration_us
>
sample_time
*
3
)
{
sample_ratio
=
div_fp
(
int_tofp
(
sample_time
),
int_tofp
(
duration_us
));
duration_ns
=
cpu
->
sample
.
time
-
cpu
->
last_sample_time
;
if
((
s64
)
duration_ns
>
pid_params
.
sample_rate_ns
*
3
&&
cpu
->
last_sample_time
>
0
)
{
sample_ratio
=
div_fp
(
int_tofp
(
pid_params
.
sample_rate_ns
),
int_tofp
(
duration_ns
));
core_busy
=
mul_fp
(
core_busy
,
sample_ratio
);
}
...
...
@@ -1037,23 +1019,17 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
sample
->
freq
);
}
static
void
intel_hwp_timer_func
(
unsigned
long
__data
)
{
struct
cpudata
*
cpu
=
(
struct
cpudata
*
)
__data
;
intel_pstate_sample
(
cpu
);
intel_hwp_set_sample_time
(
cpu
);
}
static
void
intel_pstate_timer_func
(
unsigned
long
__data
)
static
void
intel_pstate_update_util
(
struct
update_util_data
*
data
,
u64
time
,
unsigned
long
util
,
unsigned
long
max
)
{
struct
cpudata
*
cpu
=
(
struct
cpudata
*
)
__data
;
intel_pstate_sample
(
cpu
);
struct
cpudata
*
cpu
=
container_of
(
data
,
struct
cpudata
,
update_util
);
u64
delta_ns
=
time
-
cpu
->
sample
.
time
;
intel_pstate_adjust_busy_pstate
(
cpu
);
intel_pstate_set_sample_time
(
cpu
);
if
((
s64
)
delta_ns
>=
pid_params
.
sample_rate_ns
)
{
intel_pstate_sample
(
cpu
,
time
);
if
(
!
hwp_active
)
intel_pstate_adjust_busy_pstate
(
cpu
);
}
}
#define ICPU(model, policy) \
...
...
@@ -1101,24 +1077,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
cpu
->
cpu
=
cpunum
;
if
(
hwp_active
)
if
(
hwp_active
)
{
intel_pstate_hwp_enable
(
cpu
);
pid_params
.
sample_rate_ms
=
50
;
pid_params
.
sample_rate_ns
=
50
*
NSEC_PER_MSEC
;
}
intel_pstate_get_cpu_pstates
(
cpu
);
init_timer_deferrable
(
&
cpu
->
timer
);
cpu
->
timer
.
data
=
(
unsigned
long
)
cpu
;
cpu
->
timer
.
expires
=
jiffies
+
HZ
/
100
;
if
(
!
hwp_active
)
cpu
->
timer
.
function
=
intel_pstate_timer_func
;
else
cpu
->
timer
.
function
=
intel_hwp_timer_func
;
intel_pstate_busy_pid_reset
(
cpu
);
intel_pstate_sample
(
cpu
);
intel_pstate_sample
(
cpu
,
0
);
add_timer_on
(
&
cpu
->
timer
,
cpunum
);
cpu
->
update_util
.
func
=
intel_pstate_update_util
;
cpufreq_set_update_util_data
(
cpunum
,
&
cpu
->
update_util
);
pr_debug
(
"intel_pstate: controlling: cpu %d
\n
"
,
cpunum
);
...
...
@@ -1202,7 +1173,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
pr_debug
(
"intel_pstate: CPU %d exiting
\n
"
,
cpu_num
);
del_timer_sync
(
&
all_cpu_data
[
cpu_num
]
->
timer
);
cpufreq_set_update_util_data
(
cpu_num
,
NULL
);
synchronize_sched
();
if
(
hwp_active
)
return
;
...
...
@@ -1266,6 +1239,7 @@ static int intel_pstate_msrs_not_valid(void)
static
void
copy_pid_params
(
struct
pstate_adjust_policy
*
policy
)
{
pid_params
.
sample_rate_ms
=
policy
->
sample_rate_ms
;
pid_params
.
sample_rate_ns
=
pid_params
.
sample_rate_ms
*
NSEC_PER_MSEC
;
pid_params
.
p_gain_pct
=
policy
->
p_gain_pct
;
pid_params
.
i_gain_pct
=
policy
->
i_gain_pct
;
pid_params
.
d_gain_pct
=
policy
->
d_gain_pct
;
...
...
@@ -1467,7 +1441,8 @@ static int __init intel_pstate_init(void)
get_online_cpus
();
for_each_online_cpu
(
cpu
)
{
if
(
all_cpu_data
[
cpu
])
{
del_timer_sync
(
&
all_cpu_data
[
cpu
]
->
timer
);
cpufreq_set_update_util_data
(
cpu
,
NULL
);
synchronize_sched
();
kfree
(
all_cpu_data
[
cpu
]);
}
}
...
...
include/linux/cpufreq.h
View file @
a5acbfbd
...
...
@@ -80,7 +80,6 @@ struct cpufreq_policy {
unsigned
int
last_policy
;
/* policy before unplug */
struct
cpufreq_governor
*
governor
;
/* see below */
void
*
governor_data
;
bool
governor_enabled
;
/* governor start/stop flag */
char
last_governor
[
CPUFREQ_NAME_LEN
];
/* last governor used */
struct
work_struct
update
;
/* if update_policy() needs to be
...
...
@@ -100,10 +99,6 @@ struct cpufreq_policy {
* - Any routine that will write to the policy structure and/or may take away
* the policy altogether (eg. CPU hotplug), will hold this lock in write
* mode before doing so.
*
* Additional rules:
* - Lock should not be held across
* __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
*/
struct
rw_semaphore
rwsem
;
...
...
include/linux/sched.h
View file @
a5acbfbd
...
...
@@ -3207,4 +3207,13 @@ static inline unsigned long rlimit_max(unsigned int limit)
return
task_rlimit_max
(
current
,
limit
);
}
#ifdef CONFIG_CPU_FREQ
struct
update_util_data
{
void
(
*
func
)(
struct
update_util_data
*
data
,
u64
time
,
unsigned
long
util
,
unsigned
long
max
);
};
void
cpufreq_set_update_util_data
(
int
cpu
,
struct
update_util_data
*
data
);
#endif
/* CONFIG_CPU_FREQ */
#endif
kernel/sched/Makefile
View file @
a5acbfbd
...
...
@@ -19,3 +19,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS)
+=
stats.o
obj-$(CONFIG_SCHED_DEBUG)
+=
debug.o
obj-$(CONFIG_CGROUP_CPUACCT)
+=
cpuacct.o
obj-$(CONFIG_CPU_FREQ)
+=
cpufreq.o
kernel/sched/cpufreq.c
0 → 100644
View file @
a5acbfbd
/*
* Scheduler code and data structures related to cpufreq.
*
* Copyright (C) 2016, Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "sched.h"
DEFINE_PER_CPU
(
struct
update_util_data
*
,
cpufreq_update_util_data
);
/**
* cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
* @cpu: The CPU to set the pointer for.
* @data: New pointer value.
*
* Set and publish the update_util_data pointer for the given CPU. That pointer
* points to a struct update_util_data object containing a callback function
* to call from cpufreq_update_util(). That function will be called from an RCU
* read-side critical section, so it must not sleep.
*
* Callers must use RCU-sched callbacks to free any memory that might be
* accessed via the old update_util_data pointer or invoke synchronize_sched()
* right after this function to avoid use-after-free.
*/
void
cpufreq_set_update_util_data
(
int
cpu
,
struct
update_util_data
*
data
)
{
if
(
WARN_ON
(
data
&&
!
data
->
func
))
return
;
rcu_assign_pointer
(
per_cpu
(
cpufreq_update_util_data
,
cpu
),
data
);
}
EXPORT_SYMBOL_GPL
(
cpufreq_set_update_util_data
);
kernel/sched/deadline.c
View file @
a5acbfbd
...
...
@@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq)
if
(
!
dl_task
(
curr
)
||
!
on_dl_rq
(
dl_se
))
return
;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if
(
cpu_of
(
rq
)
==
smp_processor_id
())
cpufreq_trigger_update
(
rq_clock
(
rq
));
/*
* Consumed budget is computed considering the time as
* observed by schedulable tasks (excluding time spent
...
...
kernel/sched/fair.c
View file @
a5acbfbd
...
...
@@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
{
struct
cfs_rq
*
cfs_rq
=
cfs_rq_of
(
se
);
u64
now
=
cfs_rq_clock_task
(
cfs_rq
);
int
cpu
=
cpu_of
(
rq_of
(
cfs_rq
));
struct
rq
*
rq
=
rq_of
(
cfs_rq
);
int
cpu
=
cpu_of
(
rq
);
/*
* Track task load average for carrying it to new CPU after migrated, and
...
...
@@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
if
(
update_cfs_rq_load_avg
(
now
,
cfs_rq
)
&&
update_tg
)
update_tg_load_avg
(
cfs_rq
,
0
);
if
(
cpu
==
smp_processor_id
()
&&
&
rq
->
cfs
==
cfs_rq
)
{
unsigned
long
max
=
rq
->
cpu_capacity_orig
;
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
* a real problem -- added to that it only calls on the local
* CPU, so if we enqueue remotely we'll miss an update, but
* the next tick/schedule should update.
*
* It will not get called when we go idle, because the idle
* thread is a different class (!fair), nor will the utilization
* number include things like RT tasks.
*
* As is, the util number is not freq-invariant (we'd have to
* implement arch_scale_freq_capacity() for that).
*
* See cpu_util().
*/
cpufreq_update_util
(
rq_clock
(
rq
),
min
(
cfs_rq
->
avg
.
util_avg
,
max
),
max
);
}
}
static
void
attach_entity_load_avg
(
struct
cfs_rq
*
cfs_rq
,
struct
sched_entity
*
se
)
...
...
kernel/sched/rt.c
View file @
a5acbfbd
...
...
@@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq)
if
(
curr
->
sched_class
!=
&
rt_sched_class
)
return
;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if
(
cpu_of
(
rq
)
==
smp_processor_id
())
cpufreq_trigger_update
(
rq_clock
(
rq
));
delta_exec
=
rq_clock_task
(
rq
)
-
curr
->
se
.
exec_start
;
if
(
unlikely
((
s64
)
delta_exec
<=
0
))
return
;
...
...
kernel/sched/sched.h
View file @
a5acbfbd
...
...
@@ -1738,3 +1738,51 @@ static inline u64 irq_time_read(int cpu)
}
#endif
/* CONFIG_64BIT */
#endif
/* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_CPU_FREQ
DECLARE_PER_CPU
(
struct
update_util_data
*
,
cpufreq_update_util_data
);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
* @time: Current time.
* @util: Current utilization.
* @max: Utilization ceiling.
*
* This function is called by the scheduler on every invocation of
* update_load_avg() on the CPU whose utilization is being updated.
*
* It can only be called from RCU-sched read-side critical sections.
*/
static
inline
void
cpufreq_update_util
(
u64
time
,
unsigned
long
util
,
unsigned
long
max
)
{
struct
update_util_data
*
data
;
data
=
rcu_dereference_sched
(
*
this_cpu_ptr
(
&
cpufreq_update_util_data
));
if
(
data
)
data
->
func
(
data
,
time
,
util
,
max
);
}
/**
* cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
* @time: Current time.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
* being stuck in a completely inadequate performance level for too long.
* That is not guaranteed to happen if the updates are only triggered from CFS,
* though, because they may not be coming in if RT or deadline tasks are active
* all the time (or there are RT and DL tasks only).
*
* As a workaround for that issue, this function is called by the RT and DL
* sched classes to trigger extra cpufreq updates to prevent it from stalling,
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT and DL tasks.
*/
static
inline
void
cpufreq_trigger_update
(
u64
time
)
{
cpufreq_update_util
(
time
,
ULONG_MAX
,
0
);
}
#else
static
inline
void
cpufreq_update_util
(
u64
time
,
unsigned
long
util
,
unsigned
long
max
)
{}
static
inline
void
cpufreq_trigger_update
(
u64
time
)
{}
#endif
/* CONFIG_CPU_FREQ */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment