Commit 66a76401 authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay

habanalabs: add 'needs reset' state in driver

The new state indicates that device should be reset in order
to re-gain funcionality.
This unique state can occur if reset_on_lockup is disabled
and an actual lockup has occurred.
Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent f2d032ee
...@@ -379,13 +379,14 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -379,13 +379,14 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
{ {
union hl_cb_args *args = data; union hl_cb_args *args = data;
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
enum hl_device_status status;
u64 handle = 0; u64 handle = 0;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"Device is %s. Can't execute CB IOCTL\n", "Device is %s. Can't execute CB IOCTL\n",
atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); hdev->status[status]);
return -EBUSY; return -EBUSY;
} }
......
...@@ -427,6 +427,8 @@ static void cs_timedout(struct work_struct *work) ...@@ -427,6 +427,8 @@ static void cs_timedout(struct work_struct *work)
if (hdev->reset_on_lockup) if (hdev->reset_on_lockup)
hl_device_reset(hdev, false, false); hl_device_reset(hdev, false, false);
else
hdev->needs_reset = true;
} }
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
...@@ -689,12 +691,13 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) ...@@ -689,12 +691,13 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx; struct hl_ctx *ctx = hpriv->ctx;
u32 cs_type_flags, num_chunks; u32 cs_type_flags, num_chunks;
enum hl_device_status status;
enum hl_cs_type cs_type; enum hl_cs_type cs_type;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"Device is %s. Can't submit new CS\n", "Device is %s. Can't submit new CS\n",
atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); hdev->status[status]);
return -EBUSY; return -EBUSY;
} }
......
...@@ -24,7 +24,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, ...@@ -24,7 +24,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
struct cpucp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -EBUSY; return -EBUSY;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
...@@ -50,7 +50,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, ...@@ -50,7 +50,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
struct cpucp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -EBUSY; return -EBUSY;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
...@@ -76,7 +76,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) ...@@ -76,7 +76,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
struct cpucp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return; return;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
......
...@@ -15,14 +15,6 @@ ...@@ -15,14 +15,6 @@
#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10) #define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
{
if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
return true;
else
return false;
}
enum hl_device_status hl_device_status(struct hl_device *hdev) enum hl_device_status hl_device_status(struct hl_device *hdev)
{ {
enum hl_device_status status; enum hl_device_status status;
...@@ -31,12 +23,34 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) ...@@ -31,12 +23,34 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
status = HL_DEVICE_STATUS_MALFUNCTION; status = HL_DEVICE_STATUS_MALFUNCTION;
else if (atomic_read(&hdev->in_reset)) else if (atomic_read(&hdev->in_reset))
status = HL_DEVICE_STATUS_IN_RESET; status = HL_DEVICE_STATUS_IN_RESET;
else if (hdev->needs_reset)
status = HL_DEVICE_STATUS_NEEDS_RESET;
else else
status = HL_DEVICE_STATUS_OPERATIONAL; status = HL_DEVICE_STATUS_OPERATIONAL;
return status; return status;
} }
bool hl_device_operational(struct hl_device *hdev,
enum hl_device_status *status)
{
enum hl_device_status current_status;
current_status = hl_device_status(hdev);
if (status)
*status = current_status;
switch (current_status) {
case HL_DEVICE_STATUS_IN_RESET:
case HL_DEVICE_STATUS_MALFUNCTION:
case HL_DEVICE_STATUS_NEEDS_RESET:
return false;
case HL_DEVICE_STATUS_OPERATIONAL:
default:
return true;
}
}
static void hpriv_release(struct kref *ref) static void hpriv_release(struct kref *ref)
{ {
struct hl_fpriv *hpriv; struct hl_fpriv *hpriv;
...@@ -411,7 +425,7 @@ static void hl_device_heartbeat(struct work_struct *work) ...@@ -411,7 +425,7 @@ static void hl_device_heartbeat(struct work_struct *work)
struct hl_device *hdev = container_of(work, struct hl_device, struct hl_device *hdev = container_of(work, struct hl_device,
work_heartbeat.work); work_heartbeat.work);
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
goto reschedule; goto reschedule;
if (!hdev->asic_funcs->send_heartbeat(hdev)) if (!hdev->asic_funcs->send_heartbeat(hdev))
...@@ -1091,6 +1105,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, ...@@ -1091,6 +1105,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
} }
atomic_set(&hdev->in_reset, 0); atomic_set(&hdev->in_reset, 0);
hdev->needs_reset = false;
if (hard_reset) if (hard_reset)
hdev->hard_reset_cnt++; hdev->hard_reset_cnt++;
......
...@@ -1432,6 +1432,10 @@ struct hl_dbg_device_entry { ...@@ -1432,6 +1432,10 @@ struct hl_dbg_device_entry {
* DEVICES * DEVICES
*/ */
#define HL_STR_MAX 32
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
* x16 cards. In extreme cases, there are hosts that can accommodate 16 cards. * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
*/ */
...@@ -1706,6 +1710,7 @@ struct hl_mmu_funcs { ...@@ -1706,6 +1710,7 @@ struct hl_mmu_funcs {
* @hwmon_dev: H/W monitor device. * @hwmon_dev: H/W monitor device.
* @pm_mng_profile: current power management profile. * @pm_mng_profile: current power management profile.
* @hl_chip_info: ASIC's sensors information. * @hl_chip_info: ASIC's sensors information.
* @device_status_description: device status description.
* @hl_debugfs: device's debugfs manager. * @hl_debugfs: device's debugfs manager.
* @cb_pool: list of preallocated CBs. * @cb_pool: list of preallocated CBs.
* @cb_pool_lock: protects the CB pool. * @cb_pool_lock: protects the CB pool.
...@@ -1774,6 +1779,8 @@ struct hl_mmu_funcs { ...@@ -1774,6 +1779,8 @@ struct hl_mmu_funcs {
* @supports_coresight: is CoreSight supported. * @supports_coresight: is CoreSight supported.
* @supports_soft_reset: is soft reset supported. * @supports_soft_reset: is soft reset supported.
* @supports_cb_mapping: is mapping a CB to the device's MMU supported. * @supports_cb_mapping: is mapping a CB to the device's MMU supported.
* @needs_reset: true if reset_on_lockup is false and device should be reset
* due to lockup.
*/ */
struct hl_device { struct hl_device {
struct pci_dev *pdev; struct pci_dev *pdev;
...@@ -1786,7 +1793,8 @@ struct hl_device { ...@@ -1786,7 +1793,8 @@ struct hl_device {
struct device *dev_ctrl; struct device *dev_ctrl;
struct delayed_work work_freq; struct delayed_work work_freq;
struct delayed_work work_heartbeat; struct delayed_work work_heartbeat;
char asic_name[32]; char asic_name[HL_STR_MAX];
char status[HL_DEV_STS_MAX][HL_STR_MAX];
enum hl_asic_type asic_type; enum hl_asic_type asic_type;
struct hl_cq *completion_queue; struct hl_cq *completion_queue;
struct workqueue_struct **cq_wq; struct workqueue_struct **cq_wq;
...@@ -1876,6 +1884,7 @@ struct hl_device { ...@@ -1876,6 +1884,7 @@ struct hl_device {
u8 supports_coresight; u8 supports_coresight;
u8 supports_soft_reset; u8 supports_soft_reset;
u8 supports_cb_mapping; u8 supports_cb_mapping;
u8 needs_reset;
/* Parameters for bring-up */ /* Parameters for bring-up */
u64 nic_ports_mask; u64 nic_ports_mask;
...@@ -1978,7 +1987,8 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, ...@@ -1978,7 +1987,8 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
int hl_device_open(struct inode *inode, struct file *filp); int hl_device_open(struct inode *inode, struct file *filp);
int hl_device_open_ctrl(struct inode *inode, struct file *filp); int hl_device_open_ctrl(struct inode *inode, struct file *filp);
bool hl_device_disabled_or_in_reset(struct hl_device *hdev); bool hl_device_operational(struct hl_device *hdev,
enum hl_device_status *status);
enum hl_device_status hl_device_status(struct hl_device *hdev); enum hl_device_status hl_device_status(struct hl_device *hdev);
int hl_device_set_debug_mode(struct hl_device *hdev, bool enable); int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
int create_hdev(struct hl_device **dev, struct pci_dev *pdev, int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
......
...@@ -92,6 +92,7 @@ static enum hl_asic_type get_asic_type(u16 device) ...@@ -92,6 +92,7 @@ static enum hl_asic_type get_asic_type(u16 device)
*/ */
int hl_device_open(struct inode *inode, struct file *filp) int hl_device_open(struct inode *inode, struct file *filp)
{ {
enum hl_device_status status;
struct hl_device *hdev; struct hl_device *hdev;
struct hl_fpriv *hpriv; struct hl_fpriv *hpriv;
int rc; int rc;
...@@ -124,10 +125,10 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -124,10 +125,10 @@ int hl_device_open(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_list_lock); mutex_lock(&hdev->fpriv_list_lock);
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, &status)) {
dev_err_ratelimited(hdev->dev, dev_err_ratelimited(hdev->dev,
"Can't open %s because it is disabled or in reset\n", "Can't open %s because it is %s\n",
dev_name(hdev->dev)); dev_name(hdev->dev), hdev->status[status]);
rc = -EPERM; rc = -EPERM;
goto out_err; goto out_err;
} }
...@@ -204,7 +205,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) ...@@ -204,7 +205,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_list_lock); mutex_lock(&hdev->fpriv_list_lock);
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, NULL)) {
dev_err_ratelimited(hdev->dev_ctrl, dev_err_ratelimited(hdev->dev_ctrl,
"Can't open %s because it is disabled or in reset\n", "Can't open %s because it is disabled or in reset\n",
dev_name(hdev->dev_ctrl)); dev_name(hdev->dev_ctrl));
...@@ -287,6 +288,14 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, ...@@ -287,6 +288,14 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->asic_type = asic_type; hdev->asic_type = asic_type;
} }
/* Assign status description string */
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
"disabled", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
"in reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
"needs reset", HL_STR_MAX);
hdev->major = hl_major; hdev->major = hl_major;
hdev->reset_on_lockup = reset_on_lockup; hdev->reset_on_lockup = reset_on_lockup;
hdev->memory_scrub = memory_scrub; hdev->memory_scrub = memory_scrub;
......
...@@ -406,8 +406,10 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, ...@@ -406,8 +406,10 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev) struct device *dev)
{ {
enum hl_device_status status;
struct hl_info_args *args = data; struct hl_info_args *args = data;
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
int rc; int rc;
/* /*
...@@ -428,10 +430,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ...@@ -428,10 +430,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
break; break;
} }
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(dev, dev_warn_ratelimited(dev,
"Device is %s. Can't execute INFO IOCTL\n", "Device is %s. Can't execute INFO IOCTL\n",
atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); hdev->status[status]);
return -EBUSY; return -EBUSY;
} }
...@@ -501,12 +503,14 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -501,12 +503,14 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
{ {
struct hl_debug_args *args = data; struct hl_debug_args *args = data;
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
enum hl_device_status status;
int rc = 0; int rc = 0;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"Device is %s. Can't execute DEBUG IOCTL\n", "Device is %s. Can't execute DEBUG IOCTL\n",
atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); hdev->status[status]);
return -EBUSY; return -EBUSY;
} }
......
...@@ -515,6 +515,7 @@ static void init_signal_wait_cs(struct hl_cs *cs) ...@@ -515,6 +515,7 @@ static void init_signal_wait_cs(struct hl_cs *cs)
*/ */
int hl_hw_queue_schedule_cs(struct hl_cs *cs) int hl_hw_queue_schedule_cs(struct hl_cs *cs)
{ {
enum hl_device_status status;
struct hl_cs_counters_atomic *cntr; struct hl_cs_counters_atomic *cntr;
struct hl_ctx *ctx = cs->ctx; struct hl_ctx *ctx = cs->ctx;
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
...@@ -527,11 +528,10 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) ...@@ -527,11 +528,10 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
hdev->asic_funcs->hw_queues_lock(hdev); hdev->asic_funcs->hw_queues_lock(hdev);
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, &status)) {
atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt); atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
atomic64_inc(&cntr->device_in_reset_drop_cnt);
dev_err(hdev->dev, dev_err(hdev->dev,
"device is disabled or in reset, CS rejected!\n"); "device is %s, CS rejected!\n", hdev->status[status]);
rc = -EPERM; rc = -EPERM;
goto out; goto out;
} }
......
...@@ -114,7 +114,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, ...@@ -114,7 +114,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
switch (type) { switch (type) {
...@@ -192,7 +192,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, ...@@ -192,7 +192,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
switch (type) { switch (type) {
......
...@@ -1237,6 +1237,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) ...@@ -1237,6 +1237,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
{ {
enum hl_device_status status;
union hl_mem_args *args = data; union hl_mem_args *args = data;
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx; struct hl_ctx *ctx = hpriv->ctx;
...@@ -1244,10 +1245,10 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -1244,10 +1245,10 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
u32 handle = 0; u32 handle = 0;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"Device is %s. Can't execute MEMORY IOCTL\n", "Device is %s. Can't execute MEMORY IOCTL\n",
atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); hdev->status[status]);
return -EBUSY; return -EBUSY;
} }
......
...@@ -276,6 +276,8 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr, ...@@ -276,6 +276,8 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
str = "In reset"; str = "In reset";
else if (hdev->disabled) else if (hdev->disabled)
str = "Malfunction"; str = "Malfunction";
else if (hdev->needs_reset)
str = "Needs Reset";
else else
str = "Operational"; str = "Operational";
...@@ -304,7 +306,7 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr, ...@@ -304,7 +306,7 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
long val; long val;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
val = hl_get_max_power(hdev); val = hl_get_max_power(hdev);
...@@ -319,7 +321,7 @@ static ssize_t max_power_store(struct device *dev, ...@@ -319,7 +321,7 @@ static ssize_t max_power_store(struct device *dev,
unsigned long value; unsigned long value;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV; count = -ENODEV;
goto out; goto out;
} }
...@@ -347,7 +349,7 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, ...@@ -347,7 +349,7 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
char *data; char *data;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
if (!max_size) if (!max_size)
......
...@@ -20,7 +20,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) ...@@ -20,7 +20,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{ {
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, MME_PLL, false); value = hl_get_frequency(hdev, MME_PLL, false);
...@@ -54,7 +54,7 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev, ...@@ -54,7 +54,7 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev,
struct gaudi_device *gaudi = hdev->asic_specific; struct gaudi_device *gaudi = hdev->asic_specific;
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, MME_PLL, false); value = hl_get_frequency(hdev, MME_PLL, false);
...@@ -72,7 +72,7 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev, ...@@ -72,7 +72,7 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
int rc; int rc;
u64 value; u64 value;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV; count = -ENODEV;
goto fail; goto fail;
} }
...@@ -97,7 +97,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, ...@@ -97,7 +97,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, MME_PLL, true); value = hl_get_frequency(hdev, MME_PLL, true);
......
...@@ -36,7 +36,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) ...@@ -36,7 +36,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{ {
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, MME_PLL, false); value = hl_get_frequency(hdev, MME_PLL, false);
...@@ -69,7 +69,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, ...@@ -69,7 +69,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, MME_PLL, false); value = hl_get_frequency(hdev, MME_PLL, false);
...@@ -88,7 +88,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr, ...@@ -88,7 +88,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
int rc; int rc;
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV; count = -ENODEV;
goto fail; goto fail;
} }
...@@ -118,7 +118,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr, ...@@ -118,7 +118,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, TPC_PLL, false); value = hl_get_frequency(hdev, TPC_PLL, false);
...@@ -137,7 +137,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr, ...@@ -137,7 +137,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
int rc; int rc;
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV; count = -ENODEV;
goto fail; goto fail;
} }
...@@ -167,7 +167,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr, ...@@ -167,7 +167,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, IC_PLL, false); value = hl_get_frequency(hdev, IC_PLL, false);
...@@ -186,7 +186,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr, ...@@ -186,7 +186,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
int rc; int rc;
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV; count = -ENODEV;
goto fail; goto fail;
} }
...@@ -216,7 +216,7 @@ static ssize_t mme_clk_curr_show(struct device *dev, ...@@ -216,7 +216,7 @@ static ssize_t mme_clk_curr_show(struct device *dev,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, MME_PLL, true); value = hl_get_frequency(hdev, MME_PLL, true);
...@@ -233,7 +233,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev, ...@@ -233,7 +233,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, TPC_PLL, true); value = hl_get_frequency(hdev, TPC_PLL, true);
...@@ -250,7 +250,7 @@ static ssize_t ic_clk_curr_show(struct device *dev, ...@@ -250,7 +250,7 @@ static ssize_t ic_clk_curr_show(struct device *dev,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
long value; long value;
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, IC_PLL, true); value = hl_get_frequency(hdev, IC_PLL, true);
...@@ -266,7 +266,7 @@ static ssize_t pm_mng_profile_show(struct device *dev, ...@@ -266,7 +266,7 @@ static ssize_t pm_mng_profile_show(struct device *dev,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
return sprintf(buf, "%s\n", return sprintf(buf, "%s\n",
...@@ -280,7 +280,7 @@ static ssize_t pm_mng_profile_store(struct device *dev, ...@@ -280,7 +280,7 @@ static ssize_t pm_mng_profile_store(struct device *dev,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV; count = -ENODEV;
goto out; goto out;
} }
...@@ -335,7 +335,7 @@ static ssize_t high_pll_show(struct device *dev, struct device_attribute *attr, ...@@ -335,7 +335,7 @@ static ssize_t high_pll_show(struct device *dev, struct device_attribute *attr,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
return sprintf(buf, "%u\n", hdev->high_pll); return sprintf(buf, "%u\n", hdev->high_pll);
...@@ -348,7 +348,7 @@ static ssize_t high_pll_store(struct device *dev, struct device_attribute *attr, ...@@ -348,7 +348,7 @@ static ssize_t high_pll_store(struct device *dev, struct device_attribute *attr,
long value; long value;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) { if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV; count = -ENODEV;
goto out; goto out;
} }
......
...@@ -242,7 +242,8 @@ enum gaudi_engine_id { ...@@ -242,7 +242,8 @@ enum gaudi_engine_id {
enum hl_device_status { enum hl_device_status {
HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_OPERATIONAL,
HL_DEVICE_STATUS_IN_RESET, HL_DEVICE_STATUS_IN_RESET,
HL_DEVICE_STATUS_MALFUNCTION HL_DEVICE_STATUS_MALFUNCTION,
HL_DEVICE_STATUS_NEEDS_RESET
}; };
/* Opcode for management ioctl /* Opcode for management ioctl
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment