Commit 901cb599 authored by Evan Quan's avatar Evan Quan Committed by Alex Deucher

drm/amd/powerplay: support temperature emergency max values

These new interfaces(temp1_emergency, temp2_emergency,
temp3_emergency) are supported on SOC15 dGPUs only.
Signed-off-by: default avatarEvan Quan <evan.quan@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 437ccd17
...@@ -75,14 +75,20 @@ struct amdgpu_dpm_thermal { ...@@ -75,14 +75,20 @@ struct amdgpu_dpm_thermal {
int min_temp; int min_temp;
/* high temperature threshold */ /* high temperature threshold */
int max_temp; int max_temp;
/* edge max emergency(shutdown) temp */
int max_edge_emergency_temp;
/* hotspot low temperature threshold */ /* hotspot low temperature threshold */
int min_hotspot_temp; int min_hotspot_temp;
/* hotspot high temperature critical threshold */ /* hotspot high temperature critical threshold */
int max_hotspot_crit_temp; int max_hotspot_crit_temp;
/* hotspot max emergency(shutdown) temp */
int max_hotspot_emergency_temp;
/* memory low temperature threshold */ /* memory low temperature threshold */
int min_mem_temp; int min_mem_temp;
/* memory high temperature critical threshold */ /* memory high temperature critical threshold */
int max_mem_crit_temp; int max_mem_crit_temp;
/* memory max emergency(shutdown) temp */
int max_mem_emergency_temp;
/* was last interrupt low to high or high to low */ /* was last interrupt low to high or high to low */
bool high_to_low; bool high_to_low;
/* interrupt source */ /* interrupt source */
......
...@@ -1446,6 +1446,32 @@ static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, ...@@ -1446,6 +1446,32 @@ static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", temp); return snprintf(buf, PAGE_SIZE, "%d\n", temp);
} }
static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
int channel = to_sensor_dev_attr(attr)->index;
int temp = 0;
if (channel >= PP_TEMP_MAX)
return -EINVAL;
switch (channel) {
case PP_TEMP_JUNCTION:
temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
break;
case PP_TEMP_EDGE:
temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
break;
case PP_TEMP_MEM:
temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
break;
}
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
struct device_attribute *attr, struct device_attribute *attr,
char *buf) char *buf)
...@@ -2023,6 +2049,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, ...@@ -2023,6 +2049,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
* - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
* - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
* *
* - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
* - these are supported on SOC15 dGPUs only
*
* hwmon interfaces for GPU voltage: * hwmon interfaces for GPU voltage:
* *
* - in0_input: the voltage on the GPU in millivolts * - in0_input: the voltage on the GPU in millivolts
...@@ -2072,10 +2101,13 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, ...@@ -2072,10 +2101,13 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
...@@ -2106,6 +2138,9 @@ static struct attribute *hwmon_attributes[] = { ...@@ -2106,6 +2138,9 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp3_crit.dev_attr.attr, &sensor_dev_attr_temp3_crit.dev_attr.attr,
&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp1_emergency.dev_attr.attr,
&sensor_dev_attr_temp2_emergency.dev_attr.attr,
&sensor_dev_attr_temp3_emergency.dev_attr.attr,
&sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr,
&sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr,
&sensor_dev_attr_pwm1_min.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr,
...@@ -2234,7 +2269,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, ...@@ -2234,7 +2269,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
(attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr)) attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
return 0; return 0;
return effective_mode; return effective_mode;
......
...@@ -228,9 +228,12 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) ...@@ -228,9 +228,12 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
struct PP_TemperatureRange range = { struct PP_TemperatureRange range = {
TEMP_RANGE_MIN, TEMP_RANGE_MIN,
TEMP_RANGE_MAX, TEMP_RANGE_MAX,
TEMP_RANGE_MAX,
TEMP_RANGE_MIN, TEMP_RANGE_MIN,
TEMP_RANGE_MAX, TEMP_RANGE_MAX,
TEMP_RANGE_MAX,
TEMP_RANGE_MIN, TEMP_RANGE_MIN,
TEMP_RANGE_MAX,
TEMP_RANGE_MAX}; TEMP_RANGE_MAX};
struct amdgpu_device *adev = hwmgr->adev; struct amdgpu_device *adev = hwmgr->adev;
...@@ -245,10 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) ...@@ -245,10 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
adev->pm.dpm.thermal.min_temp = range.min; adev->pm.dpm.thermal.min_temp = range.min;
adev->pm.dpm.thermal.max_temp = range.max; adev->pm.dpm.thermal.max_temp = range.max;
adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
adev->pm.dpm.thermal.min_mem_temp = range.mem_min; adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
return ret; return ret;
} }
......
...@@ -4859,10 +4859,16 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, ...@@ -4859,10 +4859,16 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
thermal_data->max = pp_table->TedgeLimit * thermal_data->max = pp_table->TedgeLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->mem_crit_max = pp_table->ThbmLimit * thermal_data->mem_crit_max = pp_table->ThbmLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0; return 0;
} }
......
...@@ -2534,10 +2534,16 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, ...@@ -2534,10 +2534,16 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
thermal_data->max = pp_table->TedgeLimit * thermal_data->max = pp_table->TedgeLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->mem_crit_max = pp_table->ThbmLimit * thermal_data->mem_crit_max = pp_table->ThbmLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0; return 0;
} }
......
...@@ -3982,10 +3982,16 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, ...@@ -3982,10 +3982,16 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
thermal_data->max = pp_table->TedgeLimit * thermal_data->max = pp_table->TedgeLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->mem_crit_max = pp_table->ThbmLimit * thermal_data->mem_crit_max = pp_table->ThbmLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES; PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0; return 0;
} }
......
...@@ -124,10 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock { ...@@ -124,10 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {
struct PP_TemperatureRange { struct PP_TemperatureRange {
int min; int min;
int max; int max;
int edge_emergency_max;
int hotspot_min; int hotspot_min;
int hotspot_crit_max; int hotspot_crit_max;
int hotspot_emergency_max;
int mem_min; int mem_min;
int mem_crit_max; int mem_crit_max;
int mem_emergency_max;
}; };
struct PP_StateValidationBlock { struct PP_StateValidationBlock {
......
...@@ -27,14 +27,18 @@ ...@@ -27,14 +27,18 @@
static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] = static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
{ {
{-273150, 99000, -273150, 99000, -273150, 99000}, {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
{ 120000, 120000, 120000, 120000, 120000, 120000}, { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
}; };
static const struct PP_TemperatureRange SMU7ThermalPolicy[] = static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
{ {
{-273150, 99000, -273150, 99000, -273150, 99000}, {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
{ 120000, 120000, 120000, 120000, 120000, 120000}, { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
}; };
#define CTF_OFFSET_EDGE 5
#define CTF_OFFSET_HOTSPOT 5
#define CTF_OFFSET_HBM 5
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment