Commit 4dee7a71 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2022-02-28' of...

Merge tag 'misc-habanalabs-next-2022-02-28' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.18:

- Add new feature of recording time-stamp when a completion
  queue counter reaches a target value as determined by the
  userspace application. This is used by the graph compiler
  to accurately measure the time it takes for certain workloads
  to execute, which helps to fine-tune future compilations.

- Add two new attributes to sysfs that expose the VRM and
  f/w OS version

- Add a delay to the reset path that allows the driver to
  receive and handle additional events from the f/w before
  doing the reset. This can help when debugging why a reset
  event was received from the f/w.

- Re-factor some of the sysfs code in the driver. Mainly,
  move functions from hwmgr.c to more relevant files and
  totally remove hwmgr.c file.

- Fix multiple bugs such as races, use-after-free, ignoring
  error codes, etc.

- As usual, multiple minor changes and small fixes.

* tag 'misc-habanalabs-next-2022-02-28' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (50 commits)
  habanalabs: remove deprecated firmware states
  habanalabs: add an option to delay a device reset
  habanalabs: Add check for pci_enable_device
  habanalabs: Fix reset upon device release bug
  habanalabs: make sure device mem alloc is page aligned
  habanalabs/gaudi: add missing handling of NIC related events
  habanalabs/gaudi: handle axi errors from NIC engines
  habanalabs: allow user to set allocation page size
  habanalabs: use kernel-doc for memory ioctl documentation
  habanalabs: avoid using an uninitialized variable
  habanalabs: set max power on device init per ASIC
  habanalabs: use proper max_power variable for device utilization
  habanalabs: enable stop-on-error debugfs setting per ASIC
  habanalabs: change function to static
  habanalabs: add missing include of vmalloc.h
  habanalabs: fix use-after-free bug
  habanalabs: rephrase error messages in PCI initialization
  habanalabs: fix spelling mistake
  habanalabs: Timestamps buffers registration
  habanalabs: fix race when waiting on encaps signal
  ...
parents 0245107a 655221c5
......@@ -12,24 +12,7 @@ What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
Date: May 2020
KernelVersion: 5.8
Contact: ogabbay@kernel.org
Description: Allow the root user to disable/enable in runtime the clock
gating mechanism in Gaudi. Due to how Gaudi is built, the
clock gating needs to be disabled in order to access the
registers of the TPC and MME engines. This is sometimes needed
during debug by the user and hence the user needs this option.
The user can supply a bitmask value, each bit represents
a different engine to disable/enable its clock gating feature.
The bitmask is composed of 20 bits:
======= ============
0 - 7 DMA channels
8 - 11 MME engines
12 - 19 TPC engines
======= ============
The bit's location of a specific engine can be determined
using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values
are defined in uapi habanalabs.h file in enum gaudi_engine_id
Description: This setting is now deprecated as clock gating is handled solely by the f/w
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
Date: Jan 2019
......@@ -239,6 +222,7 @@ KernelVersion: 5.6
Contact: ogabbay@kernel.org
Description: Sets the stop-on_error option for the device engines. Value of
"0" is for disable, otherwise enable.
Relevant only for GOYA and GAUDI.
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
Date: Sep 2021
......
......@@ -69,6 +69,12 @@ KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays the device's version from the eFuse
What: /sys/class/habanalabs/hl<n>/fw_os_ver
Date: Dec 2021
KernelVersion: 5.18
Contact: ogabbay@kernel.org
Description: Version of the firmware OS running on the device's CPU
What: /sys/class/habanalabs/hl<n>/hard_reset
Date: Jan 2019
KernelVersion: 5.1
......@@ -115,7 +121,7 @@ What: /sys/class/habanalabs/hl<n>/infineon_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Version of the Device's power supply F/W code
Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
What: /sys/class/habanalabs/hl<n>/max_power
Date: Jan 2019
......@@ -221,3 +227,9 @@ Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Version of the u-boot running on the device's CPU
What: /sys/class/habanalabs/hl<n>/vrm_ver
Date: Jan 2022
KernelVersion: not yet upstreamed
Contact: ogabbay@kernel.org
Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
......@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/firmware_if.o \
common/state_dump.o common/hwmgr.o
common/state_dump.o
......@@ -424,8 +424,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
{
union hl_cb_args *args = data;
struct hl_device *hdev = hpriv->hdev;
u64 handle = 0, device_va = 0;
enum hl_device_status status;
u64 handle = 0, device_va;
u32 usage_cnt = 0;
int rc;
......@@ -464,6 +464,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
args->in.flags,
&usage_cnt,
&device_va);
if (rc)
break;
memset(&args->out, 0, sizeof(args->out));
......
......@@ -890,6 +890,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
pci_set_power_state(hdev->pdev, PCI_D0);
pci_restore_state(hdev->pdev);
rc = pci_enable_device(hdev->pdev);
if (rc < 0)
return rc;
} else if (value == 2) {
pci_save_state(hdev->pdev);
pci_disable_device(hdev->pdev);
......@@ -1054,42 +1056,12 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf,
static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[200];
ssize_t rc;
if (*ppos)
return 0;
sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
strlen(tmp_buf) + 1);
return rc;
}
static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 value;
ssize_t rc;
if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
"Can't change clock gating during reset\n");
return 0;
}
rc = kstrtoull_from_user(buf, count, 16, &value);
if (rc)
return rc;
hdev->clock_gating_mask = value;
hdev->asic_funcs->set_clock_gating(hdev);
return count;
}
......@@ -1101,6 +1073,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
char tmp_buf[200];
ssize_t rc;
if (!hdev->asic_prop.configurable_stop_on_err)
return -EOPNOTSUPP;
if (*ppos)
return 0;
......@@ -1119,6 +1094,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
u32 value;
ssize_t rc;
if (!hdev->asic_prop.configurable_stop_on_err)
return -EOPNOTSUPP;
if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
"Can't change stop on error during reset\n");
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
......@@ -13,6 +13,8 @@
#include <linux/pci.h>
#include <linux/hwmon.h>
#define HL_RESET_DELAY_USEC 10000 /* 10ms */
enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
......@@ -145,6 +147,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
hl_release_pending_user_interrupts(hpriv->hdev);
hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
if (!hl_hpriv_put(hpriv))
......@@ -209,6 +212,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
case HL_MMAP_TYPE_BLOCK:
return hl_hw_block_mmap(hpriv, vma);
case HL_MMAP_TYPE_TS_BUFF:
return hl_ts_mmap(hpriv, vma);
}
return -EINVAL;
......@@ -410,10 +416,10 @@ static int device_early_init(struct hl_device *hdev)
goto free_cq_wq;
}
hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
if (!hdev->sob_reset_wq) {
hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
if (!hdev->ts_free_obj_wq) {
dev_err(hdev->dev,
"Failed to allocate SOB reset workqueue\n");
"Failed to allocate Timestamp registration free workqueue\n");
rc = -ENOMEM;
goto free_eq_wq;
}
......@@ -422,7 +428,7 @@ static int device_early_init(struct hl_device *hdev)
GFP_KERNEL);
if (!hdev->hl_chip_info) {
rc = -ENOMEM;
goto free_sob_reset_wq;
goto free_ts_free_wq;
}
rc = hl_mmu_if_set_funcs(hdev);
......@@ -461,8 +467,8 @@ static int device_early_init(struct hl_device *hdev)
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
free_chip_info:
kfree(hdev->hl_chip_info);
free_sob_reset_wq:
destroy_workqueue(hdev->sob_reset_wq);
free_ts_free_wq:
destroy_workqueue(hdev->ts_free_obj_wq);
free_eq_wq:
destroy_workqueue(hdev->eq_wq);
free_cq_wq:
......@@ -501,7 +507,7 @@ static void device_early_fini(struct hl_device *hdev)
kfree(hdev->hl_chip_info);
destroy_workqueue(hdev->sob_reset_wq);
destroy_workqueue(hdev->ts_free_obj_wq);
destroy_workqueue(hdev->eq_wq);
destroy_workqueue(hdev->device_reset_work.wq);
......@@ -610,7 +616,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
u64 max_power, curr_power, dc_power, dividend;
int rc;
max_power = hdev->asic_prop.max_power_default;
max_power = hdev->max_power;
dc_power = hdev->asic_prop.dc_power_default;
rc = hl_fw_cpucp_power_get(hdev, &curr_power);
......@@ -644,9 +650,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en
hdev->in_debug = 0;
if (!hdev->reset_info.hard_reset_pending)
hdev->asic_funcs->set_clock_gating(hdev);
goto out;
}
......@@ -657,7 +660,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en
goto out;
}
hdev->asic_funcs->disable_clock_gating(hdev);
hdev->in_debug = 1;
out:
......@@ -685,7 +687,8 @@ static void take_release_locks(struct hl_device *hdev)
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
}
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
bool skip_wq_flush)
{
if (hard_reset)
device_late_fini(hdev);
......@@ -698,7 +701,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r
hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev);
hl_cs_rollback_all(hdev, skip_wq_flush);
/* Release all pending user interrupts, each pending user interrupt
* holds a reference to user context
......@@ -978,7 +981,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
int hl_device_reset(struct hl_device *hdev, u32 flags)
{
bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
reset_upon_device_release = false, schedule_hard_reset = false;
reset_upon_device_release = false, schedule_hard_reset = false,
skip_wq_flush, delay_reset;
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
struct hl_ctx *ctx;
int i, rc;
......@@ -991,6 +995,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hard_reset = !!(flags & HL_DRV_RESET_HARD);
from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
delay_reset = !!(flags & HL_DRV_RESET_DELAY);
if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
hard_instead_soft = true;
......@@ -1040,6 +1046,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hdev->reset_info.in_reset = 1;
spin_unlock(&hdev->reset_info.lock);
if (delay_reset)
usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
handle_reset_trigger(hdev, flags);
/* This still allows the completion of some KDMA ops */
......@@ -1076,7 +1085,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
return 0;
}
cleanup_resources(hdev, hard_reset, fw_reset);
cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
kill_processes:
if (hard_reset) {
......@@ -1232,7 +1241,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
goto out_err;
}
hl_set_max_power(hdev);
hl_fw_set_max_power(hdev);
} else {
rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
if (rc) {
......@@ -1297,11 +1306,14 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hdev->reset_info.hard_reset_cnt++;
} else if (reset_upon_device_release) {
dev_err(hdev->dev, "Failed to reset device after user release\n");
flags |= HL_DRV_RESET_HARD;
flags &= ~HL_DRV_RESET_DEV_RELEASE;
hard_reset = true;
goto again;
} else {
dev_err(hdev->dev, "Failed to do soft-reset\n");
hdev->reset_info.soft_reset_cnt++;
flags |= HL_DRV_RESET_HARD;
hard_reset = true;
goto again;
}
......@@ -1538,7 +1550,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
/* Need to call this again because the max power might change,
* depending on card type for certain ASICs
*/
hl_set_max_power(hdev);
if (hdev->asic_prop.set_max_power_on_device_init)
hl_fw_set_max_power(hdev);
/*
* hl_hwmon_init() must be called after device_late_init(), because only
......@@ -1682,7 +1695,7 @@ void hl_device_fini(struct hl_device *hdev)
hl_hwmon_fini(hdev);
cleanup_resources(hdev, true, false);
cleanup_resources(hdev, true, false, false);
/* Kill processes here after CS rollback. This is because the process
* can't really exit until all its CSs are done, which is what we
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
......@@ -214,7 +214,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
dma_addr_t pkt_dma_addr;
struct hl_bd *sent_bd;
u32 tmp, expected_ack_val, pi;
int rc = 0;
int rc;
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
&pkt_dma_addr);
......@@ -228,8 +228,11 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
mutex_lock(&hdev->send_cpu_message_lock);
if (hdev->disabled)
/* CPU-CP messages can be sent during soft-reset */
if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
rc = 0;
goto out;
}
if (hdev->device_cpu_disabled) {
rc = -EIO;
......@@ -958,15 +961,17 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc)
if (rc) {
dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
return rc;
}
pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
return rc;
return 0;
}
int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
......@@ -1202,8 +1207,6 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
hdev,
cpu_boot_status_reg,
status,
(status == CPU_BOOT_STATUS_IN_UBOOT) ||
(status == CPU_BOOT_STATUS_DRAM_RDY) ||
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
......@@ -2682,3 +2685,138 @@ int hl_fw_init_cpu(struct hl_device *hdev)
hl_fw_dynamic_init_cpu(hdev, fw_loader) :
hl_fw_static_init_cpu(hdev, fw_loader);
}
void hl_fw_set_pll_profile(struct hl_device *hdev)
{
hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
}
int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
if (!hdev->pdev) {
*cur_clk = 0;
*max_clk = 0;
return 0;
}
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
return value;
}
*max_clk = (value / 1000 / 1000);
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
return value;
}
*cur_clk = (value / 1000 / 1000);
return 0;
}
long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
{
struct cpucp_packet pkt;
u32 used_pll_idx;
u64 result;
int rc;
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
if (rc)
return rc;
memset(&pkt, 0, sizeof(pkt));
if (curr)
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
else
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
if (rc) {
dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n",
used_pll_idx, rc);
return rc;
}
return (long) result;
}
void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
{
struct cpucp_packet pkt;
u32 used_pll_idx;
int rc;
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
if (rc)
return;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
pkt.value = cpu_to_le64(freq);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
if (rc)
dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n",
used_pll_idx, rc);
}
long hl_fw_get_max_power(struct hl_device *hdev)
{
struct cpucp_packet pkt;
u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
if (rc) {
dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
return rc;
}
return result;
}
void hl_fw_set_max_power(struct hl_device *hdev)
{
struct cpucp_packet pkt;
int rc;
/* TODO: remove this after simulator supports this packet */
if (!hdev->pdev)
return;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(hdev->max_power);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
if (rc)
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
}
This diff is collapsed.
......@@ -140,6 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_cb_mgr_init(&hpriv->cb_mgr);
hl_ctx_mgr_init(&hpriv->ctx_mgr);
hl_ts_mgr_init(&hpriv->ts_mem_mgr);
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
......@@ -184,6 +185,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
out_err:
mutex_unlock(&hdev->fpriv_list_lock);
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
mutex_destroy(&hpriv->restore_phase_mutex);
......@@ -256,7 +258,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
hdev->cpu_queues_enable = 1;
hdev->heartbeat = 1;
hdev->mmu_enable = 1;
hdev->clock_gating_mask = ULONG_MAX;
hdev->sram_scrambler_enable = 1;
hdev->dram_scrambler_enable = 1;
hdev->bmc_enable = 1;
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
......@@ -92,8 +92,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
hw_ip.first_available_interrupt_id =
prop->first_available_user_msix_interrupt;
hw_ip.first_available_interrupt_id = prop->first_available_user_msix_interrupt;
hw_ip.number_of_user_interrupts = prop->user_interrupt_count;
hw_ip.server_type = prop->server_type;
return copy_to_user(out, &hw_ip,
......@@ -251,13 +251,12 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
&clk_rate.max_clk_rate_mhz);
rc = hl_fw_get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, &clk_rate.max_clk_rate_mhz);
if (rc)
return rc;
return copy_to_user(out, &clk_rate,
min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
return copy_to_user(out, &clk_rate, min_t(size_t, max_size, sizeof(clk_rate)))
? -EFAULT : 0;
}
static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2019-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "habanalabs.h"
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
{
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
}
int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
value);
return value;
}
*max_clk = (value / 1000 / 1000);
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
if (value < 0) {
dev_err(hdev->dev,
"Failed to retrieve device current clock %ld\n",
value);
return value;
}
*cur_clk = (value / 1000 / 1000);
return 0;
}
static ssize_t clk_max_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
hdev->asic_prop.max_freq_value = value;
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
static ssize_t clk_max_freq_mhz_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
int rc;
u64 value;
if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV;
goto fail;
}
rc = kstrtoull(buf, 0, &value);
if (rc) {
count = -EINVAL;
goto fail;
}
hdev->asic_prop.max_freq_value = value * 1000 * 1000;
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
fail:
return count;
}
static ssize_t clk_cur_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
static DEVICE_ATTR_RW(clk_max_freq_mhz);
static DEVICE_ATTR_RO(clk_cur_freq_mhz);
static struct attribute *hl_dev_attrs[] = {
&dev_attr_clk_max_freq_mhz.attr,
&dev_attr_clk_cur_freq_mhz.attr,
NULL,
};
void hl_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp)
{
dev_attr_grp->attrs = hl_dev_attrs;
}
......@@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
return IRQ_HANDLED;
}
/*
* hl_ts_free_objects - handler of the free objects workqueue.
* This function should put refcount to objects that the registration node
* took refcount to them.
* @work: workqueue object pointer
*/
static void hl_ts_free_objects(struct work_struct *work)
{
struct timestamp_reg_work_obj *job =
container_of(work, struct timestamp_reg_work_obj, free_obj);
struct timestamp_reg_free_node *free_obj, *temp_free_obj;
struct list_head *free_list_head = job->free_obj_head;
struct hl_device *hdev = job->hdev;
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
free_obj->ts_buff,
free_obj->cq_cb);
hl_ts_put(free_obj->ts_buff);
hl_cb_put(free_obj->cq_cb);
kfree(free_obj);
}
kfree(free_list_head);
kfree(job);
}
/*
* This function called with spin_lock of wait_list_lock taken
* This function will set timestamp and delete the registration node from the
* wait_list_lock.
* and since we're protected with spin_lock here, so we cannot just put the refcount
* for the objects here, since the release function may be called and it's also a long
* logic (which might sleep also) that cannot be handled in irq context.
* so here we'll be filling a list with nodes of "put" jobs and then will send this
* list to a dedicated workqueue to do the actual put.
*/
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
struct list_head **free_list)
{
struct timestamp_reg_free_node *free_node;
u64 timestamp;
if (!(*free_list)) {
/* Alloc/Init the timestamp registration free objects list */
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
if (!(*free_list))
return -ENOMEM;
INIT_LIST_HEAD(*free_list);
}
free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
if (!free_node)
return -ENOMEM;
timestamp = ktime_get_ns();
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
pend->ts_reg_info.timestamp_kernel_addr,
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
list_del(&pend->wait_list_node);
/* Mark kernel CB node as free */
pend->ts_reg_info.in_use = 0;
/* Putting the refcount for ts_buff and cq_cb objects will be handled
* in workqueue context, just add job to free_list.
*/
free_node->ts_buff = pend->ts_reg_info.ts_buff;
free_node->cq_cb = pend->ts_reg_info.cq_cb;
list_add(&free_node->free_objects_node, *free_list);
return 0;
}
static void handle_user_cq(struct hl_device *hdev,
struct hl_user_interrupt *user_cq)
{
struct hl_user_pending_interrupt *pend;
struct hl_user_pending_interrupt *pend, *temp_pend;
struct list_head *ts_reg_free_list_head = NULL;
struct timestamp_reg_work_obj *job;
bool reg_node_handle_fail = false;
ktime_t now = ktime_get();
int rc;
/* For registration nodes:
* As part of handling the registration nodes, we should put refcount to
* some objects. the problem is that we cannot do that under spinlock
* or in irq handler context at all (since release functions are long and
* might sleep), so we will need to handle that part in workqueue context.
* To avoid handling kmalloc failure which compels us rolling back actions
* and move nodes hanged on the free list back to the interrupt wait list
* we always alloc the job of the WQ at the beginning.
*/
job = kmalloc(sizeof(*job), GFP_ATOMIC);
if (!job)
return;
spin_lock(&user_cq->wait_list_lock);
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
if ((pend->cq_kernel_addr &&
*(pend->cq_kernel_addr) >= pend->cq_target_value) ||
list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
if (pend->ts_reg_info.ts_buff) {
if (!reg_node_handle_fail) {
rc = handle_registration_node(hdev, pend,
&ts_reg_free_list_head);
if (rc)
reg_node_handle_fail = true;
}
} else {
/* Handle wait target value node */
pend->fence.timestamp = now;
complete_all(&pend->fence.completion);
}
}
}
spin_unlock(&user_cq->wait_list_lock);
if (ts_reg_free_list_head) {
INIT_WORK(&job->free_obj, hl_ts_free_objects);
job->free_obj_head = ts_reg_free_list_head;
job->hdev = hdev;
queue_work(hdev->ts_free_obj_wq, &job->free_obj);
} else {
kfree(job);
}
}
/**
......
This diff is collapsed.
......@@ -662,3 +662,58 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
return rc;
}
u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
{
return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX;
}
/**
* hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP
* @ctx: pointer to the context structure to initialize.
* @hop_idx: HOP index.
* @hop_addr: HOP address.
* @virt_addr: virtual address fro the translation.
*
* @return the matching PTE value on success, otherwise U64_MAX.
*/
u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
u8 hop_idx, u64 hop_addr, u64 virt_addr)
{
u64 mask, shift;
if (hop_idx >= mmu_prop->num_hops) {
dev_err_ratelimited(ctx->hdev->dev, "Invalid hop index %d\n", hop_idx);
return U64_MAX;
}
/* currently max number of HOPs is 6 */
switch (hop_idx) {
case 0:
mask = mmu_prop->hop0_mask;
shift = mmu_prop->hop0_shift;
break;
case 1:
mask = mmu_prop->hop1_mask;
shift = mmu_prop->hop1_shift;
break;
case 2:
mask = mmu_prop->hop2_mask;
shift = mmu_prop->hop2_shift;
break;
case 3:
mask = mmu_prop->hop3_mask;
shift = mmu_prop->hop3_shift;
break;
case 4:
mask = mmu_prop->hop4_mask;
shift = mmu_prop->hop4_shift;
break;
default:
mask = mmu_prop->hop5_mask;
shift = mmu_prop->hop5_shift;
break;
}
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
}
......@@ -217,18 +217,10 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
mmu_prop->hop4_shift);
}
static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
{
if (curr_pte & PAGE_PRESENT_MASK)
return curr_pte & HOP_PHYS_ADDR_MASK;
else
return ULLONG_MAX;
}
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
bool *is_new_hop)
{
u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr == ULLONG_MAX) {
hop_addr = alloc_hop(ctx);
......@@ -467,7 +459,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
{
/* MMU H/W fini was already done in device hw_fini() */
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
......@@ -546,7 +538,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
hop1_addr = get_next_hop_addr(ctx, curr_pte);
hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop1_addr == ULLONG_MAX)
goto not_mapped;
......@@ -555,7 +547,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
hop2_addr = get_next_hop_addr(ctx, curr_pte);
hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop2_addr == ULLONG_MAX)
goto not_mapped;
......@@ -564,7 +556,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
hop3_addr = get_next_hop_addr(ctx, curr_pte);
hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop3_addr == ULLONG_MAX)
goto not_mapped;
......@@ -582,7 +574,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
}
if (!is_huge) {
hop4_addr = get_next_hop_addr(ctx, curr_pte);
hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
......@@ -845,27 +837,6 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
}
static inline u64 get_hop_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
int hop_num, u64 hop_addr, u64 virt_addr)
{
switch (hop_num) {
case 0:
return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
case 1:
return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
case 2:
return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
case 3:
return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
case 4:
return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
default:
break;
}
return U64_MAX;
}
static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops)
{
......@@ -906,7 +877,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
hops->hop_info[0].hop_pte_addr =
get_hop_pte_addr(ctx, mmu_prop, 0,
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hops->hop_info[0].hop_addr, virt_addr);
hops->hop_info[0].hop_pte_val =
hdev->asic_funcs->read_pte(hdev,
......@@ -914,13 +885,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
for (i = 1 ; i < used_hops ; i++) {
hops->hop_info[i].hop_addr =
get_next_hop_addr(ctx,
hl_mmu_get_next_hop_addr(ctx,
hops->hop_info[i - 1].hop_pte_val);
if (hops->hop_info[i].hop_addr == ULLONG_MAX)
return -EFAULT;
hops->hop_info[i].hop_pte_addr =
get_hop_pte_addr(ctx, mmu_prop, i,
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
hops->hop_info[i].hop_addr,
virt_addr);
hops->hop_info[i].hop_pte_val =
......
......@@ -338,9 +338,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
lower_32_bits(outbound_region_end_address));
rc |= hl_pci_iatu_write(hdev, 0x014, 0);
if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
else
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
rc |= hl_pci_iatu_write(hdev, 0x020,
......@@ -411,13 +408,13 @@ int hl_pci_init(struct hl_device *hdev)
rc = hdev->asic_funcs->pci_bars_map(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
dev_err(hdev->dev, "Failed to map PCI BAR addresses\n");
goto disable_device;
}
rc = hdev->asic_funcs->init_iatu(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize iATU\n");
dev_err(hdev->dev, "PCI controller was not initialized successfully\n");
goto unmap_pci_bars;
}
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
......@@ -9,105 +9,91 @@
#include <linux/pci.h>
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct cpucp_packet pkt;
u32 used_pll_idx;
u64 result;
int rc;
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
if (rc)
return rc;
memset(&pkt, 0, sizeof(pkt));
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
if (curr)
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
else
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
if (value < 0)
return value;
if (rc) {
dev_err(hdev->dev,
"Failed to get frequency of PLL %d, error %d\n",
used_pll_idx, rc);
return rc;
}
hdev->asic_prop.max_freq_value = value;
return (long) result;
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
static ssize_t clk_max_freq_mhz_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct cpucp_packet pkt;
u32 used_pll_idx;
struct hl_device *hdev = dev_get_drvdata(dev);
int rc;
u64 value;
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
if (rc)
return;
if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV;
goto fail;
}
memset(&pkt, 0, sizeof(pkt));
rc = kstrtoull(buf, 0, &value);
if (rc) {
count = -EINVAL;
goto fail;
}
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
pkt.value = cpu_to_le64(freq);
hdev->asic_prop.max_freq_value = value * 1000 * 1000;
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index, hdev->asic_prop.max_freq_value);
if (rc)
dev_err(hdev->dev,
"Failed to set frequency to PLL %d, error %d\n",
used_pll_idx, rc);
fail:
return count;
}
u64 hl_get_max_power(struct hl_device *hdev)
static ssize_t clk_cur_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct cpucp_packet pkt;
u64 result;
int rc;
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
memset(&pkt, 0, sizeof(pkt));
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
if (value < 0)
return value;
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
if (rc) {
dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
return (u64) rc;
}
static DEVICE_ATTR_RW(clk_max_freq_mhz);
static DEVICE_ATTR_RO(clk_cur_freq_mhz);
return result;
}
static struct attribute *hl_dev_clk_attrs[] = {
&dev_attr_clk_max_freq_mhz.attr,
&dev_attr_clk_cur_freq_mhz.attr,
};
void hl_set_max_power(struct hl_device *hdev)
static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct cpucp_packet pkt;
int rc;
struct hl_device *hdev = dev_get_drvdata(dev);
struct cpucp_info *cpucp_info;
memset(&pkt, 0, sizeof(pkt));
cpucp_info = &hdev->asic_prop.cpucp_info;
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(hdev->max_power);
if (cpucp_info->infineon_second_stage_version)
return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version),
le32_to_cpu(cpucp_info->infineon_second_stage_version));
else
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
}
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
static DEVICE_ATTR_RO(vrm_ver);
if (rc)
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
}
static struct attribute *hl_dev_vrm_attrs[] = {
&dev_attr_vrm_ver.attr,
};
static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
char *buf)
......@@ -158,20 +144,6 @@ static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
}
static ssize_t infineon_ver_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
if (hdev->asic_prop.cpucp_info.infineon_second_stage_version)
return sprintf(buf, "%#04x %#04x\n",
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version),
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_second_stage_version));
else
return sprintf(buf, "%#04x\n",
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version));
}
static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
......@@ -188,6 +160,14 @@ static ssize_t thermal_ver_show(struct device *dev,
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version);
}
static ssize_t fw_os_ver_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.fw_os_version);
}
static ssize_t preboot_btl_ver_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
......@@ -323,7 +303,9 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
val = hl_get_max_power(hdev);
val = hl_fw_get_max_power(hdev);
if (val < 0)
return val;
return sprintf(buf, "%lu\n", val);
}
......@@ -348,7 +330,7 @@ static ssize_t max_power_store(struct device *dev,
}
hdev->max_power = value;
hl_set_max_power(hdev);
hl_fw_set_max_power(hdev);
out:
return count;
......@@ -394,7 +376,6 @@ static DEVICE_ATTR_RO(device_type);
static DEVICE_ATTR_RO(fuse_ver);
static DEVICE_ATTR_WO(hard_reset);
static DEVICE_ATTR_RO(hard_reset_cnt);
static DEVICE_ATTR_RO(infineon_ver);
static DEVICE_ATTR_RW(max_power);
static DEVICE_ATTR_RO(pci_addr);
static DEVICE_ATTR_RO(preboot_btl_ver);
......@@ -403,6 +384,7 @@ static DEVICE_ATTR_RO(soft_reset_cnt);
static DEVICE_ATTR_RO(status);
static DEVICE_ATTR_RO(thermal_ver);
static DEVICE_ATTR_RO(uboot_ver);
static DEVICE_ATTR_RO(fw_os_ver);
static struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
......@@ -420,13 +402,13 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_fuse_ver.attr,
&dev_attr_hard_reset.attr,
&dev_attr_hard_reset_cnt.attr,
&dev_attr_infineon_ver.attr,
&dev_attr_max_power.attr,
&dev_attr_pci_addr.attr,
&dev_attr_preboot_btl_ver.attr,
&dev_attr_status.attr,
&dev_attr_thermal_ver.attr,
&dev_attr_uboot_ver.attr,
&dev_attr_fw_os_ver.attr,
NULL,
};
......@@ -441,10 +423,12 @@ static struct attribute_group hl_dev_attr_group = {
};
static struct attribute_group hl_dev_clks_attr_group;
static struct attribute_group hl_dev_vrm_attr_group;
static const struct attribute_group *hl_dev_attr_groups[] = {
&hl_dev_attr_group,
&hl_dev_clks_attr_group,
&hl_dev_vrm_attr_group,
NULL,
};
......@@ -463,13 +447,23 @@ static const struct attribute_group *hl_dev_inference_attr_groups[] = {
NULL,
};
void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp)
{
dev_clk_attr_grp->attrs = hl_dev_clk_attrs;
}
void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp)
{
dev_vrm_attr_grp->attrs = hl_dev_vrm_attrs;
}
int hl_sysfs_init(struct hl_device *hdev)
{
int rc;
hdev->max_power = hdev->asic_prop.max_power_default;
hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group, &hl_dev_vrm_attr_group);
rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
if (rc) {
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2019-2020 HabanaLabs, Ltd.
* Copyright 2019-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
......@@ -177,7 +177,6 @@
#define HW_CAP_MSI BIT(6)
#define HW_CAP_CPU_Q BIT(7)
#define HW_CAP_HBM_DMA BIT(8)
#define HW_CAP_CLK_GATE BIT(9)
#define HW_CAP_SRAM_SCRAMBLER BIT(10)
#define HW_CAP_HBM_SCRAMBLER BIT(11)
......@@ -313,8 +312,6 @@ struct gaudi_internal_qman_info {
* struct gaudi_device - ASIC specific manage structure.
* @cpucp_info_get: get information on device from CPU-CP
* @hw_queues_lock: protects the H/W queues from concurrent access.
* @clk_gate_mutex: protects code areas that require clock gating to be disabled
* temporarily
* @internal_qmans: Internal QMANs information. The array size is larger than
* the actual number of internal queues because they are not in
* consecutive order.
......@@ -337,7 +334,6 @@ struct gaudi_device {
/* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock;
struct mutex clk_gate_mutex;
struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE];
......@@ -355,8 +351,6 @@ struct gaudi_device {
void gaudi_init_security(struct hl_device *hdev);
void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
void gaudi_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
......@@ -430,6 +430,9 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
prop->dmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
/* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
......@@ -438,6 +441,9 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
......@@ -477,6 +483,10 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->use_get_power_for_reset_history = true;
prop->configurable_stop_on_err = true;
prop->set_max_power_on_device_init = true;
return 0;
}
......@@ -893,7 +903,7 @@ int goya_late_init(struct hl_device *hdev)
goya->pm_mng_profile = PM_AUTO;
hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
goya_set_pll_profile(hdev, PLL_LOW);
schedule_delayed_work(&goya->goya_work->work_freq,
usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
......@@ -2700,8 +2710,7 @@ int goya_mmu_init(struct hl_device *hdev)
WREG32_AND(mmSTLB_STLB_FEATURE_EN,
(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
WREG32(mmMMU_MMU_ENABLE, 1);
WREG32(mmMMU_SPI_MASK, 0xF);
......@@ -5341,7 +5350,7 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
/* Treat as invalidate all because there is no range invalidation
* in Goya
*/
return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
return hl_mmu_invalidate_cache(hdev, is_hard, flags);
}
int goya_send_heartbeat(struct hl_device *hdev)
......@@ -5391,16 +5400,6 @@ int goya_cpucp_info_get(struct hl_device *hdev)
return 0;
}
static void goya_set_clock_gating(struct hl_device *hdev)
{
/* clock gating not supported in Goya */
}
static void goya_disable_clock_gating(struct hl_device *hdev)
{
/* clock gating not supported in Goya */
}
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
u8 mask_len, struct seq_file *s)
{
......@@ -5564,16 +5563,7 @@ static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
{
if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
HL_POWER9_HOST_MAGIC) {
dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
hdev->power9_64bit_dma_enable = 1;
hdev->dma_mask = 64;
} else {
dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
hdev->power9_64bit_dma_enable = 0;
hdev->dma_mask = 48;
}
}
u64 goya_get_device_time(struct hl_device *hdev)
......@@ -5727,15 +5717,12 @@ static const struct hl_asic_funcs goya_funcs = {
.debugfs_read_dma = goya_debugfs_read_dma,
.add_device_attr = goya_add_device_attr,
.handle_eqe = goya_handle_eqe,
.set_pll_profile = goya_set_pll_profile,
.get_events_stat = goya_get_events_stat,
.read_pte = goya_read_pte,
.write_pte = goya_write_pte,
.mmu_invalidate_cache = goya_mmu_invalidate_cache,
.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
.send_heartbeat = goya_send_heartbeat,
.set_clock_gating = goya_set_clock_gating,
.disable_clock_gating = goya_disable_clock_gating,
.debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle,
.non_hard_reset_late_init = goya_non_hard_reset_late_init,
......@@ -5751,7 +5738,6 @@ static const struct hl_asic_funcs goya_funcs = {
.halt_coresight = goya_halt_coresight,
.ctx_init = goya_ctx_init,
.ctx_fini = goya_ctx_fini,
.get_clk_rate = hl_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
.load_firmware_to_device = goya_load_firmware_to_device,
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
......@@ -5778,6 +5764,7 @@ static const struct hl_asic_funcs goya_funcs = {
.get_sob_addr = &goya_get_sob_addr,
.set_pci_memory_regions = goya_set_pci_memory_regions,
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
.is_valid_dram_page_size = NULL
};
/*
......
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
......@@ -217,8 +217,8 @@ u64 goya_get_max_power(struct hl_device *hdev);
void goya_set_max_power(struct hl_device *hdev, u64 value);
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
struct attribute_group *dev_vrm_attr_grp);
int goya_cpucp_info_get(struct hl_device *hdev);
int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
......@@ -11,21 +11,24 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
{
struct goya_device *goya = hdev->asic_specific;
if (!hdev->pdev)
return;
switch (freq) {
case PLL_HIGH:
hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
break;
case PLL_LOW:
hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
break;
case PLL_LAST:
hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
break;
default:
dev_err(hdev->dev, "unknown frequency setting\n");
......@@ -41,7 +44,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, false);
if (value < 0)
return value;
......@@ -74,7 +77,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
hl_set_frequency(hdev, HL_GOYA_MME_PLL, value);
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, value);
goya->mme_clk = value;
fail:
......@@ -90,7 +93,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
if (value < 0)
return value;
......@@ -123,7 +126,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
goya->tpc_clk = value;
fail:
......@@ -139,7 +142,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false);
value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, false);
if (value < 0)
return value;
......@@ -172,7 +175,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
hl_set_frequency(hdev, HL_GOYA_IC_PLL, value);
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, value);
goya->ic_clk = value;
fail:
......@@ -188,7 +191,7 @@ static ssize_t mme_clk_curr_show(struct device *dev,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, true);
if (value < 0)
return value;
......@@ -205,7 +208,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
if (value < 0)
return value;
......@@ -222,7 +225,7 @@ static ssize_t ic_clk_curr_show(struct device *dev,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true);
value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, true);
if (value < 0)
return value;
......@@ -347,7 +350,7 @@ static DEVICE_ATTR_RW(pm_mng_profile);
static DEVICE_ATTR_RW(tpc_clk);
static DEVICE_ATTR_RO(tpc_clk_curr);
static struct attribute *goya_dev_attrs[] = {
static struct attribute *goya_clk_dev_attrs[] = {
&dev_attr_high_pll.attr,
&dev_attr_ic_clk.attr,
&dev_attr_ic_clk_curr.attr,
......@@ -356,11 +359,27 @@ static struct attribute *goya_dev_attrs[] = {
&dev_attr_pm_mng_profile.attr,
&dev_attr_tpc_clk.attr,
&dev_attr_tpc_clk_curr.attr,
NULL,
};
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp)
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
struct cpucp_info *cpucp_info;
cpucp_info = &hdev->asic_prop.cpucp_info;
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
}
static DEVICE_ATTR_RO(infineon_ver);
static struct attribute *goya_vrm_dev_attrs[] = {
&dev_attr_infineon_ver.attr,
};
void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
struct attribute_group *dev_vrm_attr_grp)
{
dev_attr_grp->attrs = goya_dev_attrs;
dev_clk_attr_grp->attrs = goya_clk_dev_attrs;
dev_vrm_attr_grp->attrs = goya_vrm_dev_attrs;
}
......@@ -780,6 +780,7 @@ struct cpucp_security_info {
* (0 = functional 1 = binned)
* @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance
* (0 = functional 1 = binned)
* @fw_os_version: Firmware OS Version
*/
struct cpucp_info {
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
......@@ -807,6 +808,7 @@ struct cpucp_info {
__le32 reserved6;
__u8 pll_map[PLL_MAP_LEN];
__le64 mme_binning_mask;
__u8 fw_os_version[VERSION_MAX_LEN];
};
struct cpucp_mac_addr {
......
......@@ -33,6 +33,7 @@ enum cpu_boot_err {
CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
CPU_BOOT_ERR_BINNING_FAIL = 19,
CPU_BOOT_ERR_TPM_FAIL = 20,
CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
CPU_BOOT_ERR_ENABLED = 31,
CPU_BOOT_ERR_SCND_EN = 63,
CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
......@@ -111,6 +112,9 @@ enum cpu_boot_err {
*
* CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed.
*
* CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature
* sensor.
*
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
* This is a main indication that the
* running FW populates the error
......@@ -134,6 +138,7 @@ enum cpu_boot_err {
#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL)
#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
......
......@@ -311,6 +311,16 @@ enum gaudi_async_event_id {
GAUDI_EVENT_FW_ALIVE_S = 645,
GAUDI_EVENT_DEV_RESET_REQ = 646,
GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
GAUDI_EVENT_STATUS_NIC0_ENG0 = 648,
GAUDI_EVENT_STATUS_NIC0_ENG1 = 649,
GAUDI_EVENT_STATUS_NIC1_ENG0 = 650,
GAUDI_EVENT_STATUS_NIC1_ENG1 = 651,
GAUDI_EVENT_STATUS_NIC2_ENG0 = 652,
GAUDI_EVENT_STATUS_NIC2_ENG1 = 653,
GAUDI_EVENT_STATUS_NIC3_ENG0 = 654,
GAUDI_EVENT_STATUS_NIC3_ENG1 = 655,
GAUDI_EVENT_STATUS_NIC4_ENG0 = 656,
GAUDI_EVENT_STATUS_NIC4_ENG1 = 657,
GAUDI_EVENT_FIX_POWER_ENV_S = 658,
GAUDI_EVENT_FIX_POWER_ENV_E = 659,
GAUDI_EVENT_FIX_THERMAL_ENV_S = 660,
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
*
* Copyright 2016-2020 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
......@@ -30,6 +30,9 @@
*/
#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72
/* Max number of elements in timestamps registration buffers */
#define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */
/*
* Goya queue Numbering
*
......@@ -404,6 +407,8 @@ enum hl_server_type {
* @cpucp_version: The CPUCP f/w version.
* @card_name: The card name as passed by the f/w.
* @dram_page_size: The DRAM physical page size.
* @number_of_user_interrupts: The number of interrupts that are available to the userspace
* application to use. Relevant for Gaudi2 and later.
*/
struct hl_info_hw_ip_info {
__u64 sram_base_address;
......@@ -428,6 +433,9 @@ struct hl_info_hw_ip_info {
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
__u64 reserved2;
__u64 dram_page_size;
__u32 reserved3;
__u16 number_of_user_interrupts;
__u16 pad2;
};
struct hl_info_dram_usage {
......@@ -690,10 +698,12 @@ struct hl_cb_in {
__u64 cb_handle;
/* HL_CB_OP_* */
__u32 op;
/* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that
* will be allocated, regardless of this parameter's value, is PAGE_SIZE
*/
__u32 cb_size;
/* Context ID - Currently not in use */
__u32 ctx_id;
/* HL_CB_FLAGS_* */
......@@ -959,6 +969,7 @@ union hl_cs_args {
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10
#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20
#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
......@@ -1031,6 +1042,20 @@ struct hl_wait_cs_in {
* relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
*/
__u64 cq_counters_offset;
/*
* Timestamp_handle timestamps buffer handle.
* relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set
*/
__u64 timestamp_handle;
/*
* Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above.
* upon interrupt, if the cq reached the target value then driver will write
* timestamp to this offset.
* relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set
*/
__u64 timestamp_offset;
};
#define HL_WAIT_CS_STATUS_COMPLETED 0
......@@ -1077,100 +1102,119 @@ union hl_wait_cs_args {
*/
#define HL_MEM_OP_EXPORT_DMABUF_FD 5
/* Opcode to create timestamps pool for user interrupts registration support
* The memory will be allocated by the kernel driver, A timestamp buffer which the user
* will get handle to it for mmap, and another internal buffer used by the
* driver for registration management
* The memory will be freed when the user closes the file descriptor(ctx close)
*/
#define HL_MEM_OP_TS_ALLOC 6
/* Memory flags */
#define HL_MEM_CONTIGUOUS 0x1
#define HL_MEM_SHARED 0x2
#define HL_MEM_USERPTR 0x4
#define HL_MEM_FORCE_HINT 0x8
/**
* structure hl_mem_in - structure that handle input args for memory IOCTL
* @union arg: union of structures to be used based on the input operation
* @op: specify the requested memory operation (one of the HL_MEM_OP_* definitions).
* @flags: flags for the memory operation (one of the HL_MEM_* definitions).
* For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the DMA-BUF file/FD flags.
* @ctx_id: context ID - currently not in use.
* @num_of_elements: number of timestamp elements used only with HL_MEM_OP_TS_ALLOC opcode.
*/
struct hl_mem_in {
union {
/* HL_MEM_OP_ALLOC- allocate device memory */
/**
* structure for device memory allocation (used with the HL_MEM_OP_ALLOC op)
* @mem_size: memory size to allocate
* @page_size: page size to use on allocation. when the value is 0 the default page
* size will be taken.
*/
struct {
/* Size to alloc */
__u64 mem_size;
__u64 page_size;
} alloc;
/* HL_MEM_OP_FREE - free device memory */
/**
* structure for free-ing device memory (used with the HL_MEM_OP_FREE op)
* @handle: handle returned from HL_MEM_OP_ALLOC
*/
struct {
/* Handle returned from HL_MEM_OP_ALLOC */
__u64 handle;
} free;
/* HL_MEM_OP_MAP - map device memory */
struct {
/*
* Requested virtual address of mapped memory.
* The driver will try to map the requested region to
* this hint address, as long as the address is valid
* and not already mapped. The user should check the
* returned address of the IOCTL to make sure he got
* the hint address. Passing 0 here means that the
* driver will choose the address itself.
/**
* structure for mapping device memory (used with the HL_MEM_OP_MAP op)
* @hint_addr: requested virtual address of mapped memory.
* the driver will try to map the requested region to this hint
* address, as long as the address is valid and not already mapped.
* the user should check the returned address of the IOCTL to make
* sure he got the hint address.
* passing 0 here means that the driver will choose the address itself.
* @handle: handle returned from HL_MEM_OP_ALLOC.
*/
struct {
__u64 hint_addr;
/* Handle returned from HL_MEM_OP_ALLOC */
__u64 handle;
} map_device;
/* HL_MEM_OP_MAP - map host memory */
/**
* structure for mapping host memory (used with the HL_MEM_OP_MAP op)
* @host_virt_addr: address of allocated host memory.
* @hint_addr: requested virtual address of mapped memory.
* the driver will try to map the requested region to this hint
* address, as long as the address is valid and not already mapped.
* the user should check the returned address of the IOCTL to make
* sure he got the hint address.
* passing 0 here means that the driver will choose the address itself.
* @size: size of allocated host memory.
*/
struct {
/* Address of allocated host memory */
__u64 host_virt_addr;
/*
* Requested virtual address of mapped memory.
* The driver will try to map the requested region to
* this hint address, as long as the address is valid
* and not already mapped. The user should check the
* returned address of the IOCTL to make sure he got
* the hint address. Passing 0 here means that the
* driver will choose the address itself.
*/
__u64 hint_addr;
/* Size of allocated host memory */
__u64 mem_size;
} map_host;
/* HL_MEM_OP_MAP_BLOCK - map a hw block */
struct {
/*
* HW block address to map, a handle and size will be
* returned to the user and will be used to mmap the
* relevant block. Only addresses from configuration
* space are allowed.
/**
* structure for mapping hw block (used with the HL_MEM_OP_MAP_BLOCK op)
* @block_addr:HW block address to map, a handle and size will be returned
* to the user and will be used to mmap the relevant block.
* only addresses from configuration space are allowed.
*/
struct {
__u64 block_addr;
} map_block;
/* HL_MEM_OP_UNMAP - unmap host memory */
/**
* structure for unmapping host memory (used with the HL_MEM_OP_UNMAP op)
* @device_virt_addr: virtual address returned from HL_MEM_OP_MAP
*/
struct {
/* Virtual address returned from HL_MEM_OP_MAP */
__u64 device_virt_addr;
} unmap;
/* HL_MEM_OP_EXPORT_DMABUF_FD */
struct {
/* Handle returned from HL_MEM_OP_ALLOC. In Gaudi,
* where we don't have MMU for the device memory, the
* driver expects a physical address (instead of
* a handle) in the device memory space.
/**
* structure for exporting DMABUF object (used with
* the HL_MEM_OP_EXPORT_DMABUF_FD op)
* @handle: handle returned from HL_MEM_OP_ALLOC.
* in Gaudi, where we don't have MMU for the device memory, the
* driver expects a physical address (instead of a handle) in the
* device memory space.
* @mem_size: size of memory allocation. Relevant only for GAUDI
*/
struct {
__u64 handle;
/* Size of memory allocation. Relevant only for GAUDI */
__u64 mem_size;
} export_dmabuf_fd;
};
/* HL_MEM_OP_* */
__u32 op;
/* HL_MEM_* flags.
* For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the
* DMA-BUF file/FD flags.
*/
__u32 flags;
/* Context ID - Currently not in use */
__u32 ctx_id;
__u32 pad;
__u32 num_of_elements;
};
struct hl_mem_out {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment